lib/unicode/ArabLink.pl Unicode character database
lib/unicode/ArabLnkGrp.pl Unicode character database
lib/unicode/ArabShap.txt Unicode character database
+lib/unicode/BidiMirr.txt Unicode character database
lib/unicode/Bidirectional.pl Unicode character database
lib/unicode/Block.pl Unicode character database
lib/unicode/Blocks.txt Unicode character database
+lib/unicode/CaseFold.txt Unicode character database
lib/unicode/Category.pl Unicode character database
lib/unicode/CombiningClass.pl Unicode character database
lib/unicode/CompExcl.txt Unicode character database
lib/unicode/Names.txt Unicode character database
lib/unicode/NamesList.html Unicode character database
lib/unicode/Number.pl Unicode character database
-lib/unicode/Props.txt Unicode character database
+lib/unicode/PropList.txt Unicode character database
lib/unicode/README.Ethiopic Unicode character database
+lib/unicode/README.perl Unicode character database
lib/unicode/ReadMe.txt Unicode character database info
lib/unicode/SpecCase.txt Unicode character database
lib/unicode/To/Digit.pl Unicode character database
lib/unicode/To/Lower.pl Unicode character database
lib/unicode/To/Title.pl Unicode character database
lib/unicode/To/Upper.pl Unicode character database
-lib/unicode/UCD300.html Unicode character database
-lib/unicode/Unicode.300 Unicode character database
-lib/unicode/Unicode3.html Unicode character database
+lib/unicode/UCD301.html Unicode character database
+lib/unicode/UCDFF301.html Unicode character database
+lib/unicode/Unicode.301 Unicode character database
lib/unicode/mktables.PL Unicode character database generator
lib/unicode/syllables.txt Unicode character database
lib/utf8.pm Pragma to control Unicode support
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0622 0625 R
0633 063a D
0640 C
0641 0647 D
-0648 0649 R
-064a D
-0671 U
-0672 0673 R
+0648 R
+0649 064a D
+0671 0673 R
0674 U
0675 0677 R
0678 0687 D
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0622 0623 ALEF
0647 HEH
0648 WAW
0649 064a YEH
-0671 <no shaping>
-0672 0673 ALEF
+0671 0673 ALEF
0674 <no shaping>
0675 ALEF
0676 0677 WAW
-# Unicode; Schematic Name; Link; Link Group
+# ArabicShaping-3.txt
+#
+# This file is a normative contributory data file in the
+# Unicode Character Database.
+#
+# This file defines the shaping classes for Arabic and Syriac
+# positional shaping, repeating in machine readable form the
+# information printed in Tables 8-6, 8-7, 8-8, 8-10, 8-11, and
+# 8-13 of The Unicode Standard, Version 3.0.
+#
+# See sections 8.2 and 8.3 of The Unicode Standard, Version 3.0
+# for more information.
+#
+# Each line contains four fields, separated by a semicolon.
+#
+# The first field gives the code point, in 4-digit hexadecimal
+# form, of an Arabic or Syriac character.
+# The second field gives a short schematic name for that character,
+# abbreviated from the normative Unicode character name.
+# The third field defines the joining type: R right-joining,
+# D dual-joining, U non-joining
+# The fourth field defines the joining group.
+#
+# #############################################################
+
+# Unicode; Schematic Name; Joining Type; Joining Group
+
# Arabic characters
+
0622; MADDA ON ALEF; R; ALEF
0623; HAMZA ON ALEF; R; ALEF
0624; HAMZA ON WAW; R; WAW
0646; NOON; D; NOON
0647; HEH; D; HEH
0648; WAW; R; WAW
-0649; ALEF MAKSURA; R; YEH
+0649; ALEF MAKSURA; D; YEH
064A; YEH; D; YEH
-0671; HAMZAT WASL ON ALEF; U; <no shaping>
+0671; HAMZAT WASL ON ALEF; R; ALEF
0672; WAVY HAMZA ON ALEF; R; ALEF
0673; WAVY HAMZA UNDER ALEF; R; ALEF
0674; HIGH HAMZA; U; <no shaping>
06FA; SEEN WITH DOT BELOW AND 3 DOTS ABOVE; D; SEEN
06FB; DAD WITH DOT BELOW; D; SAD
06FC; GHAIN WITH DOT BELOW; D; AIN
+
# Syriac characters
+
0710; ALAPH; R; ALAPH
0712; BETH; D; BETH
0713; GAMAL; D; GAMAL
--- /dev/null
+# BidiMirroring-1.txt
+#
+# This file is an informative supplement to the UnicodeData file. It
+# lists characters that have the mirrored property
+# where there is another Unicode character that typically has a glyph
+# that is the mirror image of the original character's glyph.
+# The repertoire covered by the file is Unicode 3.0.1.
+#
+# The file contains a list of lines with mappings from one code point
+# to another one for character-based mirroring.
+# Note that for "real" mirroring, a rendering engine needs to select
+# appropriate alternative glyphs, and that many Unicode characters do not
+# have a mirror-image Unicode character.
+#
+# Each mapping line contains two fields, separated by a semicolon (';').
+# Each of the two fields contains a code point represented as a
+# variable-length hexadecimal value with 4 to 6 digits.
+# A comment indicates where the characters are "BEST FIT" mirroring.
+#
+# Code points with the "mirrored" property but no appropriate mirrors are
+# listed as comments at the end of the file.
+#
+# For information on bidi mirroring, see UTR #21: Bidirectional Algorithm,
+# at http://www.unicode.org/unicode/reports/tr9/
+#
+# Please address any comments to <errata@unicode.org>.
+# Note that this is an archival address: messages will be checked,
+# but do not expect an immediate response.
+#
+# This file was originally created by Markus Scherer
+#
+# ############################################################
+
+0028; 0029 # LEFT PARENTHESIS
+0029; 0028 # RIGHT PARENTHESIS
+003C; 003E # LESS-THAN SIGN
+003E; 003C # GREATER-THAN SIGN
+005B; 005D # LEFT SQUARE BRACKET
+005D; 005B # RIGHT SQUARE BRACKET
+007B; 007D # LEFT CURLY BRACKET
+007D; 007B # RIGHT CURLY BRACKET
+00AB; 00BB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+00BB; 00AB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+2039; 203A # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+203A; 2039 # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+2045; 2046 # LEFT SQUARE BRACKET WITH QUILL
+2046; 2045 # RIGHT SQUARE BRACKET WITH QUILL
+207D; 207E # SUPERSCRIPT LEFT PARENTHESIS
+207E; 207D # SUPERSCRIPT RIGHT PARENTHESIS
+208D; 208E # SUBSCRIPT LEFT PARENTHESIS
+208E; 208D # SUBSCRIPT RIGHT PARENTHESIS
+2208; 220B # ELEMENT OF
+2209; 220C # NOT AN ELEMENT OF
+220A; 220D # SMALL ELEMENT OF
+220B; 2208 # CONTAINS AS MEMBER
+220C; 2209 # DOES NOT CONTAIN AS MEMBER
+220D; 220A # SMALL CONTAINS AS MEMBER
+223C; 223D # TILDE OPERATOR
+223D; 223C # REVERSED TILDE
+2243; 22CD # ASYMPTOTICALLY EQUAL TO
+2252; 2253 # APPROXIMATELY EQUAL TO OR THE IMAGE OF
+2253; 2252 # IMAGE OF OR APPROXIMATELY EQUAL TO
+2254; 2255 # COLON EQUALS
+2255; 2254 # EQUALS COLON
+2264; 2265 # LESS-THAN OR EQUAL TO
+2265; 2264 # GREATER-THAN OR EQUAL TO
+2266; 2267 # LESS-THAN OVER EQUAL TO
+2267; 2266 # GREATER-THAN OVER EQUAL TO
+2268; 2269 # [BEST FIT] LESS-THAN BUT NOT EQUAL TO
+2269; 2268 # [BEST FIT] GREATER-THAN BUT NOT EQUAL TO
+226A; 226B # MUCH LESS-THAN
+226B; 226A # MUCH GREATER-THAN
+226E; 226F # [BEST FIT] NOT LESS-THAN
+226F; 226E # [BEST FIT] NOT GREATER-THAN
+2270; 2271 # [BEST FIT] NEITHER LESS-THAN NOR EQUAL TO
+2271; 2270 # [BEST FIT] NEITHER GREATER-THAN NOR EQUAL TO
+2272; 2273 # [BEST FIT] LESS-THAN OR EQUIVALENT TO
+2273; 2272 # [BEST FIT] GREATER-THAN OR EQUIVALENT TO
+2274; 2275 # [BEST FIT] NEITHER LESS-THAN NOR EQUIVALENT TO
+2275; 2274 # [BEST FIT] NEITHER GREATER-THAN NOR EQUIVALENT TO
+2276; 2277 # LESS-THAN OR GREATER-THAN
+2277; 2276 # GREATER-THAN OR LESS-THAN
+2278; 2279 # NEITHER LESS-THAN NOR GREATER-THAN
+2279; 2278 # NEITHER GREATER-THAN NOR LESS-THAN
+227A; 227B # PRECEDES
+227B; 227A # SUCCEEDS
+227C; 227D # PRECEDES OR EQUAL TO
+227D; 227C # SUCCEEDS OR EQUAL TO
+227E; 227F # [BEST FIT] PRECEDES OR EQUIVALENT TO
+227F; 227E # [BEST FIT] SUCCEEDS OR EQUIVALENT TO
+2280; 2281 # [BEST FIT] DOES NOT PRECEDE
+2281; 2280 # [BEST FIT] DOES NOT SUCCEED
+2282; 2283 # SUBSET OF
+2283; 2282 # SUPERSET OF
+2284; 2285 # [BEST FIT] NOT A SUBSET OF
+2285; 2284 # [BEST FIT] NOT A SUPERSET OF
+2286; 2287 # SUBSET OF OR EQUAL TO
+2287; 2286 # SUPERSET OF OR EQUAL TO
+2288; 2289 # [BEST FIT] NEITHER A SUBSET OF NOR EQUAL TO
+2289; 2288 # [BEST FIT] NEITHER A SUPERSET OF NOR EQUAL TO
+228A; 228B # [BEST FIT] SUBSET OF WITH NOT EQUAL TO
+228B; 228A # [BEST FIT] SUPERSET OF WITH NOT EQUAL TO
+228F; 2290 # SQUARE IMAGE OF
+2290; 228F # SQUARE ORIGINAL OF
+2291; 2292 # SQUARE IMAGE OF OR EQUAL TO
+2292; 2291 # SQUARE ORIGINAL OF OR EQUAL TO
+22A2; 22A3 # RIGHT TACK
+22A3; 22A2 # LEFT TACK
+22B0; 22B1 # PRECEDES UNDER RELATION
+22B1; 22B0 # SUCCEEDS UNDER RELATION
+22B2; 22B3 # NORMAL SUBGROUP OF
+22B3; 22B2 # CONTAINS AS NORMAL SUBGROUP
+22B4; 22B5 # NORMAL SUBGROUP OF OR EQUAL TO
+22B5; 22B4 # CONTAINS AS NORMAL SUBGROUP OR EQUAL TO
+22B6; 22B7 # ORIGINAL OF
+22B7; 22B6 # IMAGE OF
+22C9; 22CA # LEFT NORMAL FACTOR SEMIDIRECT PRODUCT
+22CA; 22C9 # RIGHT NORMAL FACTOR SEMIDIRECT PRODUCT
+22CB; 22CC # LEFT SEMIDIRECT PRODUCT
+22CC; 22CB # RIGHT SEMIDIRECT PRODUCT
+22CD; 2243 # REVERSED TILDE EQUALS
+22D0; 22D1 # DOUBLE SUBSET
+22D1; 22D0 # DOUBLE SUPERSET
+22D6; 22D7 # LESS-THAN WITH DOT
+22D7; 22D6 # GREATER-THAN WITH DOT
+22D8; 22D9 # VERY MUCH LESS-THAN
+22D9; 22D8 # VERY MUCH GREATER-THAN
+22DA; 22DB # LESS-THAN EQUAL TO OR GREATER-THAN
+22DB; 22DA # GREATER-THAN EQUAL TO OR LESS-THAN
+22DC; 22DD # EQUAL TO OR LESS-THAN
+22DD; 22DC # EQUAL TO OR GREATER-THAN
+22DE; 22DF # EQUAL TO OR PRECEDES
+22DF; 22DE # EQUAL TO OR SUCCEEDS
+22E0; 22E1 # [BEST FIT] DOES NOT PRECEDE OR EQUAL
+22E1; 22E0 # [BEST FIT] DOES NOT SUCCEED OR EQUAL
+22E2; 22E3 # [BEST FIT] NOT SQUARE IMAGE OF OR EQUAL TO
+22E3; 22E2 # [BEST FIT] NOT SQUARE ORIGINAL OF OR EQUAL TO
+22E4; 22E5 # [BEST FIT] SQUARE IMAGE OF OR NOT EQUAL TO
+22E5; 22E4 # [BEST FIT] SQUARE ORIGINAL OF OR NOT EQUAL TO
+22E6; 22E7 # [BEST FIT] LESS-THAN BUT NOT EQUIVALENT TO
+22E7; 22E6 # [BEST FIT] GREATER-THAN BUT NOT EQUIVALENT TO
+22E8; 22E9 # [BEST FIT] PRECEDES BUT NOT EQUIVALENT TO
+22E9; 22E8 # [BEST FIT] SUCCEEDS BUT NOT EQUIVALENT TO
+22EA; 22EB # [BEST FIT] NOT NORMAL SUBGROUP OF
+22EB; 22EA # [BEST FIT] DOES NOT CONTAIN AS NORMAL SUBGROUP
+22EC; 22ED # [BEST FIT] NOT NORMAL SUBGROUP OF OR EQUAL TO
+22ED; 22EC # [BEST FIT] DOES NOT CONTAIN AS NORMAL SUBGROUP OR EQUAL
+22F0; 22F1 # UP RIGHT DIAGONAL ELLIPSIS
+22F1; 22F0 # DOWN RIGHT DIAGONAL ELLIPSIS
+2308; 2309 # LEFT CEILING
+2309; 2308 # RIGHT CEILING
+230A; 230B # LEFT FLOOR
+230B; 230A # RIGHT FLOOR
+2329; 232A # LEFT-POINTING ANGLE BRACKET
+232A; 2329 # RIGHT-POINTING ANGLE BRACKET
+3008; 3009 # LEFT ANGLE BRACKET
+3009; 3008 # RIGHT ANGLE BRACKET
+300A; 300B # LEFT DOUBLE ANGLE BRACKET
+300B; 300A # RIGHT DOUBLE ANGLE BRACKET
+300C; 300D # [BEST FIT] LEFT CORNER BRACKET
+300D; 300C # [BEST FIT] RIGHT CORNER BRACKET
+300E; 300F # [BEST FIT] LEFT WHITE CORNER BRACKET
+300F; 300E # [BEST FIT] RIGHT WHITE CORNER BRACKET
+3010; 3011 # LEFT BLACK LENTICULAR BRACKET
+3011; 3010 # RIGHT BLACK LENTICULAR BRACKET
+3014; 3015 # [BEST FIT] LEFT TORTOISE SHELL BRACKET
+3015; 3014 # [BEST FIT] RIGHT TORTOISE SHELL BRACKET
+3016; 3017 # LEFT WHITE LENTICULAR BRACKET
+3017; 3016 # RIGHT WHITE LENTICULAR BRACKET
+3018; 3019 # LEFT WHITE TORTOISE SHELL BRACKET
+3019; 3018 # RIGHT WHITE TORTOISE SHELL BRACKET
+301A; 301B # LEFT WHITE SQUARE BRACKET
+301B; 301A # RIGHT WHITE SQUARE BRACKET
+
+# The following characters have no appropriate mirroring character
+
+# 2201; COMPLEMENT
+# 2202; PARTIAL DIFFERENTIAL
+# 2203; THERE EXISTS
+# 2204; THERE DOES NOT EXIST
+# 2211; N-ARY SUMMATION
+# 2215; DIVISION SLASH
+# 2216; SET MINUS
+# 221A; SQUARE ROOT
+# 221B; CUBE ROOT
+# 221C; FOURTH ROOT
+# 221D; PROPORTIONAL TO
+# 221F; RIGHT ANGLE
+# 2220; ANGLE
+# 2221; MEASURED ANGLE
+# 2222; SPHERICAL ANGLE
+# 2224; DOES NOT DIVIDE
+# 2226; NOT PARALLEL TO
+# 222B; INTEGRAL
+# 222C; DOUBLE INTEGRAL
+# 222D; TRIPLE INTEGRAL
+# 222E; CONTOUR INTEGRAL
+# 222F; SURFACE INTEGRAL
+# 2230; VOLUME INTEGRAL
+# 2231; CLOCKWISE INTEGRAL
+# 2232; CLOCKWISE CONTOUR INTEGRAL
+# 2233; ANTICLOCKWISE CONTOUR INTEGRAL
+# 2239; EXCESS
+# 223B; HOMOTHETIC
+# 223E; INVERTED LAZY S
+# 223F; SINE WAVE
+# 2240; WREATH PRODUCT
+# 2241; NOT TILDE
+# 2242; MINUS TILDE
+# 2244; NOT ASYMPTOTICALLY EQUAL TO
+# 2245; APPROXIMATELY EQUAL TO
+# 2246; APPROXIMATELY BUT NOT ACTUALLY EQUAL TO
+# 2247; NEITHER APPROXIMATELY NOR ACTUALLY EQUAL TO
+# 2248; ALMOST EQUAL TO
+# 2249; NOT ALMOST EQUAL TO
+# 224A; ALMOST EQUAL OR EQUAL TO
+# 224B; TRIPLE TILDE
+# 224C; ALL EQUAL TO
+# 225F; QUESTIONED EQUAL TO
+# 2260; NOT EQUAL TO
+# 2262; NOT IDENTICAL TO
+# 228C; MULTISET
+# 2298; CIRCLED DIVISION SLASH
+# 22A6; ASSERTION
+# 22A7; MODELS
+# 22A8; TRUE
+# 22A9; FORCES
+# 22AA; TRIPLE VERTICAL BAR RIGHT TURNSTILE
+# 22AB; DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE
+# 22AC; DOES NOT PROVE
+# 22AD; NOT TRUE
+# 22AE; DOES NOT FORCE
+# 22AF; NEGATED DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE
+# 22B8; MULTIMAP
+# 22BE; RIGHT ANGLE WITH ARC
+# 22BF; RIGHT TRIANGLE
+# 2320; TOP HALF INTEGRAL
+# 2321; BOTTOM HALF INTEGRAL
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0000 0008 BN
ffe8 ffee ON
fff9 fffb BN
fffc fffd ON
+f0000 ffffd L
+100000 10fffd L
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0000 007F Basic Latin
--- /dev/null
+# CaseFolding-2.txt
+#
+# Case Folding Properties
+#
+# This file is a supplement to the UnicodeData file.
+# It provides a case folding mapping generated from the Unicode Character Database.
+# If all characters are mapped according to this mapping, then
+# case differences (according to UnicodeData.txt and SpecialCasing.txt)
+# are eliminated.
+#
+# For information on case folding, see
+# UTR #21 Case Mappings, at http://www.unicode.org/unicode/reports/tr21/
+#
+# These are informative character properties.
+#
+# Send comments to mark@unicode.org
+#
+# ================================================================================
+# Format
+# ================================================================================
+# The entries in this file are in the following machine-readable format:
+#
+# <code>; <status>; <mapping>; # <name>
+#
+# The status is:
+# L (for Lowercase) if the case mapping matches the standard 1-1 lowercase mapping
+# E (for exception) if it does not.
+#
+# The mapping may consist of multiple characters.
+# If so, they are separated by spaces.
+#
+# =================================================================
+
+0041; L; 0061; #LATIN CAPITAL LETTER A
+0042; L; 0062; #LATIN CAPITAL LETTER B
+0043; L; 0063; #LATIN CAPITAL LETTER C
+0044; L; 0064; #LATIN CAPITAL LETTER D
+0045; L; 0065; #LATIN CAPITAL LETTER E
+0046; L; 0066; #LATIN CAPITAL LETTER F
+0047; L; 0067; #LATIN CAPITAL LETTER G
+0048; L; 0068; #LATIN CAPITAL LETTER H
+0049; L; 0069; #LATIN CAPITAL LETTER I
+004A; L; 006A; #LATIN CAPITAL LETTER J
+004B; L; 006B; #LATIN CAPITAL LETTER K
+004C; L; 006C; #LATIN CAPITAL LETTER L
+004D; L; 006D; #LATIN CAPITAL LETTER M
+004E; L; 006E; #LATIN CAPITAL LETTER N
+004F; L; 006F; #LATIN CAPITAL LETTER O
+0050; L; 0070; #LATIN CAPITAL LETTER P
+0051; L; 0071; #LATIN CAPITAL LETTER Q
+0052; L; 0072; #LATIN CAPITAL LETTER R
+0053; L; 0073; #LATIN CAPITAL LETTER S
+0054; L; 0074; #LATIN CAPITAL LETTER T
+0055; L; 0075; #LATIN CAPITAL LETTER U
+0056; L; 0076; #LATIN CAPITAL LETTER V
+0057; L; 0077; #LATIN CAPITAL LETTER W
+0058; L; 0078; #LATIN CAPITAL LETTER X
+0059; L; 0079; #LATIN CAPITAL LETTER Y
+005A; L; 007A; #LATIN CAPITAL LETTER Z
+00B5; E; 03BC; #MICRO SIGN
+00C0; L; 00E0; #LATIN CAPITAL LETTER A WITH GRAVE
+00C1; L; 00E1; #LATIN CAPITAL LETTER A WITH ACUTE
+00C2; L; 00E2; #LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+00C3; L; 00E3; #LATIN CAPITAL LETTER A WITH TILDE
+00C4; L; 00E4; #LATIN CAPITAL LETTER A WITH DIAERESIS
+00C5; L; 00E5; #LATIN CAPITAL LETTER A WITH RING ABOVE
+00C6; L; 00E6; #LATIN CAPITAL LETTER AE
+00C7; L; 00E7; #LATIN CAPITAL LETTER C WITH CEDILLA
+00C8; L; 00E8; #LATIN CAPITAL LETTER E WITH GRAVE
+00C9; L; 00E9; #LATIN CAPITAL LETTER E WITH ACUTE
+00CA; L; 00EA; #LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+00CB; L; 00EB; #LATIN CAPITAL LETTER E WITH DIAERESIS
+00CC; L; 00EC; #LATIN CAPITAL LETTER I WITH GRAVE
+00CD; L; 00ED; #LATIN CAPITAL LETTER I WITH ACUTE
+00CE; L; 00EE; #LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+00CF; L; 00EF; #LATIN CAPITAL LETTER I WITH DIAERESIS
+00D0; L; 00F0; #LATIN CAPITAL LETTER ETH
+00D1; L; 00F1; #LATIN CAPITAL LETTER N WITH TILDE
+00D2; L; 00F2; #LATIN CAPITAL LETTER O WITH GRAVE
+00D3; L; 00F3; #LATIN CAPITAL LETTER O WITH ACUTE
+00D4; L; 00F4; #LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+00D5; L; 00F5; #LATIN CAPITAL LETTER O WITH TILDE
+00D6; L; 00F6; #LATIN CAPITAL LETTER O WITH DIAERESIS
+00D8; L; 00F8; #LATIN CAPITAL LETTER O WITH STROKE
+00D9; L; 00F9; #LATIN CAPITAL LETTER U WITH GRAVE
+00DA; L; 00FA; #LATIN CAPITAL LETTER U WITH ACUTE
+00DB; L; 00FB; #LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+00DC; L; 00FC; #LATIN CAPITAL LETTER U WITH DIAERESIS
+00DD; L; 00FD; #LATIN CAPITAL LETTER Y WITH ACUTE
+00DE; L; 00FE; #LATIN CAPITAL LETTER THORN
+00DF; E; 0073 0073; #LATIN SMALL LETTER SHARP S
+0100; L; 0101; #LATIN CAPITAL LETTER A WITH MACRON
+0102; L; 0103; #LATIN CAPITAL LETTER A WITH BREVE
+0104; L; 0105; #LATIN CAPITAL LETTER A WITH OGONEK
+0106; L; 0107; #LATIN CAPITAL LETTER C WITH ACUTE
+0108; L; 0109; #LATIN CAPITAL LETTER C WITH CIRCUMFLEX
+010A; L; 010B; #LATIN CAPITAL LETTER C WITH DOT ABOVE
+010C; L; 010D; #LATIN CAPITAL LETTER C WITH CARON
+010E; L; 010F; #LATIN CAPITAL LETTER D WITH CARON
+0110; L; 0111; #LATIN CAPITAL LETTER D WITH STROKE
+0112; L; 0113; #LATIN CAPITAL LETTER E WITH MACRON
+0114; L; 0115; #LATIN CAPITAL LETTER E WITH BREVE
+0116; L; 0117; #LATIN CAPITAL LETTER E WITH DOT ABOVE
+0118; L; 0119; #LATIN CAPITAL LETTER E WITH OGONEK
+011A; L; 011B; #LATIN CAPITAL LETTER E WITH CARON
+011C; L; 011D; #LATIN CAPITAL LETTER G WITH CIRCUMFLEX
+011E; L; 011F; #LATIN CAPITAL LETTER G WITH BREVE
+0120; L; 0121; #LATIN CAPITAL LETTER G WITH DOT ABOVE
+0122; L; 0123; #LATIN CAPITAL LETTER G WITH CEDILLA
+0124; L; 0125; #LATIN CAPITAL LETTER H WITH CIRCUMFLEX
+0126; L; 0127; #LATIN CAPITAL LETTER H WITH STROKE
+0128; L; 0129; #LATIN CAPITAL LETTER I WITH TILDE
+012A; L; 012B; #LATIN CAPITAL LETTER I WITH MACRON
+012C; L; 012D; #LATIN CAPITAL LETTER I WITH BREVE
+012E; L; 012F; #LATIN CAPITAL LETTER I WITH OGONEK
+0130; L; 0069; #LATIN CAPITAL LETTER I WITH DOT ABOVE
+0131; E; 0069; #LATIN SMALL LETTER DOTLESS I
+0132; L; 0133; #LATIN CAPITAL LIGATURE IJ
+0134; L; 0135; #LATIN CAPITAL LETTER J WITH CIRCUMFLEX
+0136; L; 0137; #LATIN CAPITAL LETTER K WITH CEDILLA
+0139; L; 013A; #LATIN CAPITAL LETTER L WITH ACUTE
+013B; L; 013C; #LATIN CAPITAL LETTER L WITH CEDILLA
+013D; L; 013E; #LATIN CAPITAL LETTER L WITH CARON
+013F; L; 0140; #LATIN CAPITAL LETTER L WITH MIDDLE DOT
+0141; L; 0142; #LATIN CAPITAL LETTER L WITH STROKE
+0143; L; 0144; #LATIN CAPITAL LETTER N WITH ACUTE
+0145; L; 0146; #LATIN CAPITAL LETTER N WITH CEDILLA
+0147; L; 0148; #LATIN CAPITAL LETTER N WITH CARON
+0149; E; 02BC 006E; #LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
+014A; L; 014B; #LATIN CAPITAL LETTER ENG
+014C; L; 014D; #LATIN CAPITAL LETTER O WITH MACRON
+014E; L; 014F; #LATIN CAPITAL LETTER O WITH BREVE
+0150; L; 0151; #LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
+0152; L; 0153; #LATIN CAPITAL LIGATURE OE
+0154; L; 0155; #LATIN CAPITAL LETTER R WITH ACUTE
+0156; L; 0157; #LATIN CAPITAL LETTER R WITH CEDILLA
+0158; L; 0159; #LATIN CAPITAL LETTER R WITH CARON
+015A; L; 015B; #LATIN CAPITAL LETTER S WITH ACUTE
+015C; L; 015D; #LATIN CAPITAL LETTER S WITH CIRCUMFLEX
+015E; L; 015F; #LATIN CAPITAL LETTER S WITH CEDILLA
+0160; L; 0161; #LATIN CAPITAL LETTER S WITH CARON
+0162; L; 0163; #LATIN CAPITAL LETTER T WITH CEDILLA
+0164; L; 0165; #LATIN CAPITAL LETTER T WITH CARON
+0166; L; 0167; #LATIN CAPITAL LETTER T WITH STROKE
+0168; L; 0169; #LATIN CAPITAL LETTER U WITH TILDE
+016A; L; 016B; #LATIN CAPITAL LETTER U WITH MACRON
+016C; L; 016D; #LATIN CAPITAL LETTER U WITH BREVE
+016E; L; 016F; #LATIN CAPITAL LETTER U WITH RING ABOVE
+0170; L; 0171; #LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
+0172; L; 0173; #LATIN CAPITAL LETTER U WITH OGONEK
+0174; L; 0175; #LATIN CAPITAL LETTER W WITH CIRCUMFLEX
+0176; L; 0177; #LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
+0178; L; 00FF; #LATIN CAPITAL LETTER Y WITH DIAERESIS
+0179; L; 017A; #LATIN CAPITAL LETTER Z WITH ACUTE
+017B; L; 017C; #LATIN CAPITAL LETTER Z WITH DOT ABOVE
+017D; L; 017E; #LATIN CAPITAL LETTER Z WITH CARON
+017F; E; 0073; #LATIN SMALL LETTER LONG S
+0181; L; 0253; #LATIN CAPITAL LETTER B WITH HOOK
+0182; L; 0183; #LATIN CAPITAL LETTER B WITH TOPBAR
+0184; L; 0185; #LATIN CAPITAL LETTER TONE SIX
+0186; L; 0254; #LATIN CAPITAL LETTER OPEN O
+0187; L; 0188; #LATIN CAPITAL LETTER C WITH HOOK
+0189; L; 0256; #LATIN CAPITAL LETTER AFRICAN D
+018A; L; 0257; #LATIN CAPITAL LETTER D WITH HOOK
+018B; L; 018C; #LATIN CAPITAL LETTER D WITH TOPBAR
+018E; L; 01DD; #LATIN CAPITAL LETTER REVERSED E
+018F; L; 0259; #LATIN CAPITAL LETTER SCHWA
+0190; L; 025B; #LATIN CAPITAL LETTER OPEN E
+0191; L; 0192; #LATIN CAPITAL LETTER F WITH HOOK
+0193; L; 0260; #LATIN CAPITAL LETTER G WITH HOOK
+0194; L; 0263; #LATIN CAPITAL LETTER GAMMA
+0196; L; 0269; #LATIN CAPITAL LETTER IOTA
+0197; L; 0268; #LATIN CAPITAL LETTER I WITH STROKE
+0198; L; 0199; #LATIN CAPITAL LETTER K WITH HOOK
+019C; L; 026F; #LATIN CAPITAL LETTER TURNED M
+019D; L; 0272; #LATIN CAPITAL LETTER N WITH LEFT HOOK
+019F; L; 0275; #LATIN CAPITAL LETTER O WITH MIDDLE TILDE
+01A0; L; 01A1; #LATIN CAPITAL LETTER O WITH HORN
+01A2; L; 01A3; #LATIN CAPITAL LETTER OI
+01A4; L; 01A5; #LATIN CAPITAL LETTER P WITH HOOK
+01A6; L; 0280; #LATIN LETTER YR
+01A7; L; 01A8; #LATIN CAPITAL LETTER TONE TWO
+01A9; L; 0283; #LATIN CAPITAL LETTER ESH
+01AC; L; 01AD; #LATIN CAPITAL LETTER T WITH HOOK
+01AE; L; 0288; #LATIN CAPITAL LETTER T WITH RETROFLEX HOOK
+01AF; L; 01B0; #LATIN CAPITAL LETTER U WITH HORN
+01B1; L; 028A; #LATIN CAPITAL LETTER UPSILON
+01B2; L; 028B; #LATIN CAPITAL LETTER V WITH HOOK
+01B3; L; 01B4; #LATIN CAPITAL LETTER Y WITH HOOK
+01B5; L; 01B6; #LATIN CAPITAL LETTER Z WITH STROKE
+01B7; L; 0292; #LATIN CAPITAL LETTER EZH
+01B8; L; 01B9; #LATIN CAPITAL LETTER EZH REVERSED
+01BC; L; 01BD; #LATIN CAPITAL LETTER TONE FIVE
+01C4; L; 01C6; #LATIN CAPITAL LETTER DZ WITH CARON
+01C5; L; 01C6; #LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
+01C7; L; 01C9; #LATIN CAPITAL LETTER LJ
+01C8; L; 01C9; #LATIN CAPITAL LETTER L WITH SMALL LETTER J
+01CA; L; 01CC; #LATIN CAPITAL LETTER NJ
+01CB; L; 01CC; #LATIN CAPITAL LETTER N WITH SMALL LETTER J
+01CD; L; 01CE; #LATIN CAPITAL LETTER A WITH CARON
+01CF; L; 01D0; #LATIN CAPITAL LETTER I WITH CARON
+01D1; L; 01D2; #LATIN CAPITAL LETTER O WITH CARON
+01D3; L; 01D4; #LATIN CAPITAL LETTER U WITH CARON
+01D5; L; 01D6; #LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
+01D7; L; 01D8; #LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE
+01D9; L; 01DA; #LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON
+01DB; L; 01DC; #LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE
+01DE; L; 01DF; #LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON
+01E0; L; 01E1; #LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON
+01E2; L; 01E3; #LATIN CAPITAL LETTER AE WITH MACRON
+01E4; L; 01E5; #LATIN CAPITAL LETTER G WITH STROKE
+01E6; L; 01E7; #LATIN CAPITAL LETTER G WITH CARON
+01E8; L; 01E9; #LATIN CAPITAL LETTER K WITH CARON
+01EA; L; 01EB; #LATIN CAPITAL LETTER O WITH OGONEK
+01EC; L; 01ED; #LATIN CAPITAL LETTER O WITH OGONEK AND MACRON
+01EE; L; 01EF; #LATIN CAPITAL LETTER EZH WITH CARON
+01F0; E; 006A 030C; #LATIN SMALL LETTER J WITH CARON
+01F1; L; 01F3; #LATIN CAPITAL LETTER DZ
+01F2; L; 01F3; #LATIN CAPITAL LETTER D WITH SMALL LETTER Z
+01F4; L; 01F5; #LATIN CAPITAL LETTER G WITH ACUTE
+01F6; L; 0195; #LATIN CAPITAL LETTER HWAIR
+01F7; L; 01BF; #LATIN CAPITAL LETTER WYNN
+01F8; L; 01F9; #LATIN CAPITAL LETTER N WITH GRAVE
+01FA; L; 01FB; #LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE
+01FC; L; 01FD; #LATIN CAPITAL LETTER AE WITH ACUTE
+01FE; L; 01FF; #LATIN CAPITAL LETTER O WITH STROKE AND ACUTE
+0200; L; 0201; #LATIN CAPITAL LETTER A WITH DOUBLE GRAVE
+0202; L; 0203; #LATIN CAPITAL LETTER A WITH INVERTED BREVE
+0204; L; 0205; #LATIN CAPITAL LETTER E WITH DOUBLE GRAVE
+0206; L; 0207; #LATIN CAPITAL LETTER E WITH INVERTED BREVE
+0208; L; 0209; #LATIN CAPITAL LETTER I WITH DOUBLE GRAVE
+020A; L; 020B; #LATIN CAPITAL LETTER I WITH INVERTED BREVE
+020C; L; 020D; #LATIN CAPITAL LETTER O WITH DOUBLE GRAVE
+020E; L; 020F; #LATIN CAPITAL LETTER O WITH INVERTED BREVE
+0210; L; 0211; #LATIN CAPITAL LETTER R WITH DOUBLE GRAVE
+0212; L; 0213; #LATIN CAPITAL LETTER R WITH INVERTED BREVE
+0214; L; 0215; #LATIN CAPITAL LETTER U WITH DOUBLE GRAVE
+0216; L; 0217; #LATIN CAPITAL LETTER U WITH INVERTED BREVE
+0218; L; 0219; #LATIN CAPITAL LETTER S WITH COMMA BELOW
+021A; L; 021B; #LATIN CAPITAL LETTER T WITH COMMA BELOW
+021C; L; 021D; #LATIN CAPITAL LETTER YOGH
+021E; L; 021F; #LATIN CAPITAL LETTER H WITH CARON
+0222; L; 0223; #LATIN CAPITAL LETTER OU
+0224; L; 0225; #LATIN CAPITAL LETTER Z WITH HOOK
+0226; L; 0227; #LATIN CAPITAL LETTER A WITH DOT ABOVE
+0228; L; 0229; #LATIN CAPITAL LETTER E WITH CEDILLA
+022A; L; 022B; #LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON
+022C; L; 022D; #LATIN CAPITAL LETTER O WITH TILDE AND MACRON
+022E; L; 022F; #LATIN CAPITAL LETTER O WITH DOT ABOVE
+0230; L; 0231; #LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON
+0232; L; 0233; #LATIN CAPITAL LETTER Y WITH MACRON
+0345; E; 03B9; #COMBINING GREEK YPOGEGRAMMENI
+0386; L; 03AC; #GREEK CAPITAL LETTER ALPHA WITH TONOS
+0388; L; 03AD; #GREEK CAPITAL LETTER EPSILON WITH TONOS
+0389; L; 03AE; #GREEK CAPITAL LETTER ETA WITH TONOS
+038A; L; 03AF; #GREEK CAPITAL LETTER IOTA WITH TONOS
+038C; L; 03CC; #GREEK CAPITAL LETTER OMICRON WITH TONOS
+038E; L; 03CD; #GREEK CAPITAL LETTER UPSILON WITH TONOS
+038F; L; 03CE; #GREEK CAPITAL LETTER OMEGA WITH TONOS
+0390; E; 03B9 0308 0301; #GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
+0391; L; 03B1; #GREEK CAPITAL LETTER ALPHA
+0392; L; 03B2; #GREEK CAPITAL LETTER BETA
+0393; L; 03B3; #GREEK CAPITAL LETTER GAMMA
+0394; L; 03B4; #GREEK CAPITAL LETTER DELTA
+0395; L; 03B5; #GREEK CAPITAL LETTER EPSILON
+0396; L; 03B6; #GREEK CAPITAL LETTER ZETA
+0397; L; 03B7; #GREEK CAPITAL LETTER ETA
+0398; L; 03B8; #GREEK CAPITAL LETTER THETA
+0399; L; 03B9; #GREEK CAPITAL LETTER IOTA
+039A; L; 03BA; #GREEK CAPITAL LETTER KAPPA
+039B; L; 03BB; #GREEK CAPITAL LETTER LAMDA
+039C; L; 03BC; #GREEK CAPITAL LETTER MU
+039D; L; 03BD; #GREEK CAPITAL LETTER NU
+039E; L; 03BE; #GREEK CAPITAL LETTER XI
+039F; L; 03BF; #GREEK CAPITAL LETTER OMICRON
+03A0; L; 03C0; #GREEK CAPITAL LETTER PI
+03A1; L; 03C1; #GREEK CAPITAL LETTER RHO
+03A3; E; 03C2; #GREEK CAPITAL LETTER SIGMA
+03A4; L; 03C4; #GREEK CAPITAL LETTER TAU
+03A5; L; 03C5; #GREEK CAPITAL LETTER UPSILON
+03A6; L; 03C6; #GREEK CAPITAL LETTER PHI
+03A7; L; 03C7; #GREEK CAPITAL LETTER CHI
+03A8; L; 03C8; #GREEK CAPITAL LETTER PSI
+03A9; L; 03C9; #GREEK CAPITAL LETTER OMEGA
+03AA; L; 03CA; #GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
+03AB; L; 03CB; #GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
+03B0; E; 03C5 0308 0301; #GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
+03C2; L; 03C2; #GREEK SMALL LETTER FINAL SIGMA
+03C3; E; 03C2; #GREEK SMALL LETTER SIGMA
+03D0; E; 03B2; #GREEK BETA SYMBOL
+03D1; E; 03B8; #GREEK THETA SYMBOL
+03D5; E; 03C6; #GREEK PHI SYMBOL
+03D6; E; 03C0; #GREEK PI SYMBOL
+03DA; L; 03DB; #GREEK LETTER STIGMA
+03DC; L; 03DD; #GREEK LETTER DIGAMMA
+03DE; L; 03DF; #GREEK LETTER KOPPA
+03E0; L; 03E1; #GREEK LETTER SAMPI
+03E2; L; 03E3; #COPTIC CAPITAL LETTER SHEI
+03E4; L; 03E5; #COPTIC CAPITAL LETTER FEI
+03E6; L; 03E7; #COPTIC CAPITAL LETTER KHEI
+03E8; L; 03E9; #COPTIC CAPITAL LETTER HORI
+03EA; L; 03EB; #COPTIC CAPITAL LETTER GANGIA
+03EC; L; 03ED; #COPTIC CAPITAL LETTER SHIMA
+03EE; L; 03EF; #COPTIC CAPITAL LETTER DEI
+03F0; E; 03BA; #GREEK KAPPA SYMBOL
+03F1; E; 03C1; #GREEK RHO SYMBOL
+03F2; E; 03C2; #GREEK LUNATE SIGMA SYMBOL
+0400; L; 0450; #CYRILLIC CAPITAL LETTER IE WITH GRAVE
+0401; L; 0451; #CYRILLIC CAPITAL LETTER IO
+0402; L; 0452; #CYRILLIC CAPITAL LETTER DJE
+0403; L; 0453; #CYRILLIC CAPITAL LETTER GJE
+0404; L; 0454; #CYRILLIC CAPITAL LETTER UKRAINIAN IE
+0405; L; 0455; #CYRILLIC CAPITAL LETTER DZE
+0406; L; 0456; #CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
+0407; L; 0457; #CYRILLIC CAPITAL LETTER YI
+0408; L; 0458; #CYRILLIC CAPITAL LETTER JE
+0409; L; 0459; #CYRILLIC CAPITAL LETTER LJE
+040A; L; 045A; #CYRILLIC CAPITAL LETTER NJE
+040B; L; 045B; #CYRILLIC CAPITAL LETTER TSHE
+040C; L; 045C; #CYRILLIC CAPITAL LETTER KJE
+040D; L; 045D; #CYRILLIC CAPITAL LETTER I WITH GRAVE
+040E; L; 045E; #CYRILLIC CAPITAL LETTER SHORT U
+040F; L; 045F; #CYRILLIC CAPITAL LETTER DZHE
+0410; L; 0430; #CYRILLIC CAPITAL LETTER A
+0411; L; 0431; #CYRILLIC CAPITAL LETTER BE
+0412; L; 0432; #CYRILLIC CAPITAL LETTER VE
+0413; L; 0433; #CYRILLIC CAPITAL LETTER GHE
+0414; L; 0434; #CYRILLIC CAPITAL LETTER DE
+0415; L; 0435; #CYRILLIC CAPITAL LETTER IE
+0416; L; 0436; #CYRILLIC CAPITAL LETTER ZHE
+0417; L; 0437; #CYRILLIC CAPITAL LETTER ZE
+0418; L; 0438; #CYRILLIC CAPITAL LETTER I
+0419; L; 0439; #CYRILLIC CAPITAL LETTER SHORT I
+041A; L; 043A; #CYRILLIC CAPITAL LETTER KA
+041B; L; 043B; #CYRILLIC CAPITAL LETTER EL
+041C; L; 043C; #CYRILLIC CAPITAL LETTER EM
+041D; L; 043D; #CYRILLIC CAPITAL LETTER EN
+041E; L; 043E; #CYRILLIC CAPITAL LETTER O
+041F; L; 043F; #CYRILLIC CAPITAL LETTER PE
+0420; L; 0440; #CYRILLIC CAPITAL LETTER ER
+0421; L; 0441; #CYRILLIC CAPITAL LETTER ES
+0422; L; 0442; #CYRILLIC CAPITAL LETTER TE
+0423; L; 0443; #CYRILLIC CAPITAL LETTER U
+0424; L; 0444; #CYRILLIC CAPITAL LETTER EF
+0425; L; 0445; #CYRILLIC CAPITAL LETTER HA
+0426; L; 0446; #CYRILLIC CAPITAL LETTER TSE
+0427; L; 0447; #CYRILLIC CAPITAL LETTER CHE
+0428; L; 0448; #CYRILLIC CAPITAL LETTER SHA
+0429; L; 0449; #CYRILLIC CAPITAL LETTER SHCHA
+042A; L; 044A; #CYRILLIC CAPITAL LETTER HARD SIGN
+042B; L; 044B; #CYRILLIC CAPITAL LETTER YERU
+042C; L; 044C; #CYRILLIC CAPITAL LETTER SOFT SIGN
+042D; L; 044D; #CYRILLIC CAPITAL LETTER E
+042E; L; 044E; #CYRILLIC CAPITAL LETTER YU
+042F; L; 044F; #CYRILLIC CAPITAL LETTER YA
+0460; L; 0461; #CYRILLIC CAPITAL LETTER OMEGA
+0462; L; 0463; #CYRILLIC CAPITAL LETTER YAT
+0464; L; 0465; #CYRILLIC CAPITAL LETTER IOTIFIED E
+0466; L; 0467; #CYRILLIC CAPITAL LETTER LITTLE YUS
+0468; L; 0469; #CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS
+046A; L; 046B; #CYRILLIC CAPITAL LETTER BIG YUS
+046C; L; 046D; #CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS
+046E; L; 046F; #CYRILLIC CAPITAL LETTER KSI
+0470; L; 0471; #CYRILLIC CAPITAL LETTER PSI
+0472; L; 0473; #CYRILLIC CAPITAL LETTER FITA
+0474; L; 0475; #CYRILLIC CAPITAL LETTER IZHITSA
+0476; L; 0477; #CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT
+0478; L; 0479; #CYRILLIC CAPITAL LETTER UK
+047A; L; 047B; #CYRILLIC CAPITAL LETTER ROUND OMEGA
+047C; L; 047D; #CYRILLIC CAPITAL LETTER OMEGA WITH TITLO
+047E; L; 047F; #CYRILLIC CAPITAL LETTER OT
+0480; L; 0481; #CYRILLIC CAPITAL LETTER KOPPA
+048C; L; 048D; #CYRILLIC CAPITAL LETTER SEMISOFT SIGN
+048E; L; 048F; #CYRILLIC CAPITAL LETTER ER WITH TICK
+0490; L; 0491; #CYRILLIC CAPITAL LETTER GHE WITH UPTURN
+0492; L; 0493; #CYRILLIC CAPITAL LETTER GHE WITH STROKE
+0494; L; 0495; #CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK
+0496; L; 0497; #CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER
+0498; L; 0499; #CYRILLIC CAPITAL LETTER ZE WITH DESCENDER
+049A; L; 049B; #CYRILLIC CAPITAL LETTER KA WITH DESCENDER
+049C; L; 049D; #CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE
+049E; L; 049F; #CYRILLIC CAPITAL LETTER KA WITH STROKE
+04A0; L; 04A1; #CYRILLIC CAPITAL LETTER BASHKIR KA
+04A2; L; 04A3; #CYRILLIC CAPITAL LETTER EN WITH DESCENDER
+04A4; L; 04A5; #CYRILLIC CAPITAL LIGATURE EN GHE
+04A6; L; 04A7; #CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK
+04A8; L; 04A9; #CYRILLIC CAPITAL LETTER ABKHASIAN HA
+04AA; L; 04AB; #CYRILLIC CAPITAL LETTER ES WITH DESCENDER
+04AC; L; 04AD; #CYRILLIC CAPITAL LETTER TE WITH DESCENDER
+04AE; L; 04AF; #CYRILLIC CAPITAL LETTER STRAIGHT U
+04B0; L; 04B1; #CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE
+04B2; L; 04B3; #CYRILLIC CAPITAL LETTER HA WITH DESCENDER
+04B4; L; 04B5; #CYRILLIC CAPITAL LIGATURE TE TSE
+04B6; L; 04B7; #CYRILLIC CAPITAL LETTER CHE WITH DESCENDER
+04B8; L; 04B9; #CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE
+04BA; L; 04BB; #CYRILLIC CAPITAL LETTER SHHA
+04BC; L; 04BD; #CYRILLIC CAPITAL LETTER ABKHASIAN CHE
+04BE; L; 04BF; #CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER
+04C1; L; 04C2; #CYRILLIC CAPITAL LETTER ZHE WITH BREVE
+04C3; L; 04C4; #CYRILLIC CAPITAL LETTER KA WITH HOOK
+04C7; L; 04C8; #CYRILLIC CAPITAL LETTER EN WITH HOOK
+04CB; L; 04CC; #CYRILLIC CAPITAL LETTER KHAKASSIAN CHE
+04D0; L; 04D1; #CYRILLIC CAPITAL LETTER A WITH BREVE
+04D2; L; 04D3; #CYRILLIC CAPITAL LETTER A WITH DIAERESIS
+04D4; L; 04D5; #CYRILLIC CAPITAL LIGATURE A IE
+04D6; L; 04D7; #CYRILLIC CAPITAL LETTER IE WITH BREVE
+04D8; L; 04D9; #CYRILLIC CAPITAL LETTER SCHWA
+04DA; L; 04DB; #CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS
+04DC; L; 04DD; #CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS
+04DE; L; 04DF; #CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS
+04E0; L; 04E1; #CYRILLIC CAPITAL LETTER ABKHASIAN DZE
+04E2; L; 04E3; #CYRILLIC CAPITAL LETTER I WITH MACRON
+04E4; L; 04E5; #CYRILLIC CAPITAL LETTER I WITH DIAERESIS
+04E6; L; 04E7; #CYRILLIC CAPITAL LETTER O WITH DIAERESIS
+04E8; L; 04E9; #CYRILLIC CAPITAL LETTER BARRED O
+04EA; L; 04EB; #CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS
+04EC; L; 04ED; #CYRILLIC CAPITAL LETTER E WITH DIAERESIS
+04EE; L; 04EF; #CYRILLIC CAPITAL LETTER U WITH MACRON
+04F0; L; 04F1; #CYRILLIC CAPITAL LETTER U WITH DIAERESIS
+04F2; L; 04F3; #CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE
+04F4; L; 04F5; #CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS
+04F8; L; 04F9; #CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS
+0531; L; 0561; #ARMENIAN CAPITAL LETTER AYB
+0532; L; 0562; #ARMENIAN CAPITAL LETTER BEN
+0533; L; 0563; #ARMENIAN CAPITAL LETTER GIM
+0534; L; 0564; #ARMENIAN CAPITAL LETTER DA
+0535; L; 0565; #ARMENIAN CAPITAL LETTER ECH
+0536; L; 0566; #ARMENIAN CAPITAL LETTER ZA
+0537; L; 0567; #ARMENIAN CAPITAL LETTER EH
+0538; L; 0568; #ARMENIAN CAPITAL LETTER ET
+0539; L; 0569; #ARMENIAN CAPITAL LETTER TO
+053A; L; 056A; #ARMENIAN CAPITAL LETTER ZHE
+053B; L; 056B; #ARMENIAN CAPITAL LETTER INI
+053C; L; 056C; #ARMENIAN CAPITAL LETTER LIWN
+053D; L; 056D; #ARMENIAN CAPITAL LETTER XEH
+053E; L; 056E; #ARMENIAN CAPITAL LETTER CA
+053F; L; 056F; #ARMENIAN CAPITAL LETTER KEN
+0540; L; 0570; #ARMENIAN CAPITAL LETTER HO
+0541; L; 0571; #ARMENIAN CAPITAL LETTER JA
+0542; L; 0572; #ARMENIAN CAPITAL LETTER GHAD
+0543; L; 0573; #ARMENIAN CAPITAL LETTER CHEH
+0544; L; 0574; #ARMENIAN CAPITAL LETTER MEN
+0545; L; 0575; #ARMENIAN CAPITAL LETTER YI
+0546; L; 0576; #ARMENIAN CAPITAL LETTER NOW
+0547; L; 0577; #ARMENIAN CAPITAL LETTER SHA
+0548; L; 0578; #ARMENIAN CAPITAL LETTER VO
+0549; L; 0579; #ARMENIAN CAPITAL LETTER CHA
+054A; L; 057A; #ARMENIAN CAPITAL LETTER PEH
+054B; L; 057B; #ARMENIAN CAPITAL LETTER JHEH
+054C; L; 057C; #ARMENIAN CAPITAL LETTER RA
+054D; L; 057D; #ARMENIAN CAPITAL LETTER SEH
+054E; L; 057E; #ARMENIAN CAPITAL LETTER VEW
+054F; L; 057F; #ARMENIAN CAPITAL LETTER TIWN
+0550; L; 0580; #ARMENIAN CAPITAL LETTER REH
+0551; L; 0581; #ARMENIAN CAPITAL LETTER CO
+0552; L; 0582; #ARMENIAN CAPITAL LETTER YIWN
+0553; L; 0583; #ARMENIAN CAPITAL LETTER PIWR
+0554; L; 0584; #ARMENIAN CAPITAL LETTER KEH
+0555; L; 0585; #ARMENIAN CAPITAL LETTER OH
+0556; L; 0586; #ARMENIAN CAPITAL LETTER FEH
+0587; E; 0565 0582; #ARMENIAN SMALL LIGATURE ECH YIWN
+1E00; L; 1E01; #LATIN CAPITAL LETTER A WITH RING BELOW
+1E02; L; 1E03; #LATIN CAPITAL LETTER B WITH DOT ABOVE
+1E04; L; 1E05; #LATIN CAPITAL LETTER B WITH DOT BELOW
+1E06; L; 1E07; #LATIN CAPITAL LETTER B WITH LINE BELOW
+1E08; L; 1E09; #LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE
+1E0A; L; 1E0B; #LATIN CAPITAL LETTER D WITH DOT ABOVE
+1E0C; L; 1E0D; #LATIN CAPITAL LETTER D WITH DOT BELOW
+1E0E; L; 1E0F; #LATIN CAPITAL LETTER D WITH LINE BELOW
+1E10; L; 1E11; #LATIN CAPITAL LETTER D WITH CEDILLA
+1E12; L; 1E13; #LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW
+1E14; L; 1E15; #LATIN CAPITAL LETTER E WITH MACRON AND GRAVE
+1E16; L; 1E17; #LATIN CAPITAL LETTER E WITH MACRON AND ACUTE
+1E18; L; 1E19; #LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW
+1E1A; L; 1E1B; #LATIN CAPITAL LETTER E WITH TILDE BELOW
+1E1C; L; 1E1D; #LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE
+1E1E; L; 1E1F; #LATIN CAPITAL LETTER F WITH DOT ABOVE
+1E20; L; 1E21; #LATIN CAPITAL LETTER G WITH MACRON
+1E22; L; 1E23; #LATIN CAPITAL LETTER H WITH DOT ABOVE
+1E24; L; 1E25; #LATIN CAPITAL LETTER H WITH DOT BELOW
+1E26; L; 1E27; #LATIN CAPITAL LETTER H WITH DIAERESIS
+1E28; L; 1E29; #LATIN CAPITAL LETTER H WITH CEDILLA
+1E2A; L; 1E2B; #LATIN CAPITAL LETTER H WITH BREVE BELOW
+1E2C; L; 1E2D; #LATIN CAPITAL LETTER I WITH TILDE BELOW
+1E2E; L; 1E2F; #LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE
+1E30; L; 1E31; #LATIN CAPITAL LETTER K WITH ACUTE
+1E32; L; 1E33; #LATIN CAPITAL LETTER K WITH DOT BELOW
+1E34; L; 1E35; #LATIN CAPITAL LETTER K WITH LINE BELOW
+1E36; L; 1E37; #LATIN CAPITAL LETTER L WITH DOT BELOW
+1E38; L; 1E39; #LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON
+1E3A; L; 1E3B; #LATIN CAPITAL LETTER L WITH LINE BELOW
+1E3C; L; 1E3D; #LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW
+1E3E; L; 1E3F; #LATIN CAPITAL LETTER M WITH ACUTE
+1E40; L; 1E41; #LATIN CAPITAL LETTER M WITH DOT ABOVE
+1E42; L; 1E43; #LATIN CAPITAL LETTER M WITH DOT BELOW
+1E44; L; 1E45; #LATIN CAPITAL LETTER N WITH DOT ABOVE
+1E46; L; 1E47; #LATIN CAPITAL LETTER N WITH DOT BELOW
+1E48; L; 1E49; #LATIN CAPITAL LETTER N WITH LINE BELOW
+1E4A; L; 1E4B; #LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW
+1E4C; L; 1E4D; #LATIN CAPITAL LETTER O WITH TILDE AND ACUTE
+1E4E; L; 1E4F; #LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS
+1E50; L; 1E51; #LATIN CAPITAL LETTER O WITH MACRON AND GRAVE
+1E52; L; 1E53; #LATIN CAPITAL LETTER O WITH MACRON AND ACUTE
+1E54; L; 1E55; #LATIN CAPITAL LETTER P WITH ACUTE
+1E56; L; 1E57; #LATIN CAPITAL LETTER P WITH DOT ABOVE
+1E58; L; 1E59; #LATIN CAPITAL LETTER R WITH DOT ABOVE
+1E5A; L; 1E5B; #LATIN CAPITAL LETTER R WITH DOT BELOW
+1E5C; L; 1E5D; #LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON
+1E5E; L; 1E5F; #LATIN CAPITAL LETTER R WITH LINE BELOW
+1E60; L; 1E61; #LATIN CAPITAL LETTER S WITH DOT ABOVE
+1E62; L; 1E63; #LATIN CAPITAL LETTER S WITH DOT BELOW
+1E64; L; 1E65; #LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE
+1E66; L; 1E67; #LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE
+1E68; L; 1E69; #LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE
+1E6A; L; 1E6B; #LATIN CAPITAL LETTER T WITH DOT ABOVE
+1E6C; L; 1E6D; #LATIN CAPITAL LETTER T WITH DOT BELOW
+1E6E; L; 1E6F; #LATIN CAPITAL LETTER T WITH LINE BELOW
+1E70; L; 1E71; #LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW
+1E72; L; 1E73; #LATIN CAPITAL LETTER U WITH DIAERESIS BELOW
+1E74; L; 1E75; #LATIN CAPITAL LETTER U WITH TILDE BELOW
+1E76; L; 1E77; #LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW
+1E78; L; 1E79; #LATIN CAPITAL LETTER U WITH TILDE AND ACUTE
+1E7A; L; 1E7B; #LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS
+1E7C; L; 1E7D; #LATIN CAPITAL LETTER V WITH TILDE
+1E7E; L; 1E7F; #LATIN CAPITAL LETTER V WITH DOT BELOW
+1E80; L; 1E81; #LATIN CAPITAL LETTER W WITH GRAVE
+1E82; L; 1E83; #LATIN CAPITAL LETTER W WITH ACUTE
+1E84; L; 1E85; #LATIN CAPITAL LETTER W WITH DIAERESIS
+1E86; L; 1E87; #LATIN CAPITAL LETTER W WITH DOT ABOVE
+1E88; L; 1E89; #LATIN CAPITAL LETTER W WITH DOT BELOW
+1E8A; L; 1E8B; #LATIN CAPITAL LETTER X WITH DOT ABOVE
+1E8C; L; 1E8D; #LATIN CAPITAL LETTER X WITH DIAERESIS
+1E8E; L; 1E8F; #LATIN CAPITAL LETTER Y WITH DOT ABOVE
+1E90; L; 1E91; #LATIN CAPITAL LETTER Z WITH CIRCUMFLEX
+1E92; L; 1E93; #LATIN CAPITAL LETTER Z WITH DOT BELOW
+1E94; L; 1E95; #LATIN CAPITAL LETTER Z WITH LINE BELOW
+1E96; E; 0068 0331; #LATIN SMALL LETTER H WITH LINE BELOW
+1E97; E; 0074 0308; #LATIN SMALL LETTER T WITH DIAERESIS
+1E98; E; 0077 030A; #LATIN SMALL LETTER W WITH RING ABOVE
+1E99; E; 0079 030A; #LATIN SMALL LETTER Y WITH RING ABOVE
+1E9A; E; 0061 02BE; #LATIN SMALL LETTER A WITH RIGHT HALF RING
+1E9B; E; 1E61; #LATIN SMALL LETTER LONG S WITH DOT ABOVE
+1EA0; L; 1EA1; #LATIN CAPITAL LETTER A WITH DOT BELOW
+1EA2; L; 1EA3; #LATIN CAPITAL LETTER A WITH HOOK ABOVE
+1EA4; L; 1EA5; #LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE
+1EA6; L; 1EA7; #LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE
+1EA8; L; 1EA9; #LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
+1EAA; L; 1EAB; #LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE
+1EAC; L; 1EAD; #LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW
+1EAE; L; 1EAF; #LATIN CAPITAL LETTER A WITH BREVE AND ACUTE
+1EB0; L; 1EB1; #LATIN CAPITAL LETTER A WITH BREVE AND GRAVE
+1EB2; L; 1EB3; #LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE
+1EB4; L; 1EB5; #LATIN CAPITAL LETTER A WITH BREVE AND TILDE
+1EB6; L; 1EB7; #LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW
+1EB8; L; 1EB9; #LATIN CAPITAL LETTER E WITH DOT BELOW
+1EBA; L; 1EBB; #LATIN CAPITAL LETTER E WITH HOOK ABOVE
+1EBC; L; 1EBD; #LATIN CAPITAL LETTER E WITH TILDE
+1EBE; L; 1EBF; #LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE
+1EC0; L; 1EC1; #LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE
+1EC2; L; 1EC3; #LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
+1EC4; L; 1EC5; #LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE
+1EC6; L; 1EC7; #LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW
+1EC8; L; 1EC9; #LATIN CAPITAL LETTER I WITH HOOK ABOVE
+1ECA; L; 1ECB; #LATIN CAPITAL LETTER I WITH DOT BELOW
+1ECC; L; 1ECD; #LATIN CAPITAL LETTER O WITH DOT BELOW
+1ECE; L; 1ECF; #LATIN CAPITAL LETTER O WITH HOOK ABOVE
+1ED0; L; 1ED1; #LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE
+1ED2; L; 1ED3; #LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE
+1ED4; L; 1ED5; #LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
+1ED6; L; 1ED7; #LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE
+1ED8; L; 1ED9; #LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW
+1EDA; L; 1EDB; #LATIN CAPITAL LETTER O WITH HORN AND ACUTE
+1EDC; L; 1EDD; #LATIN CAPITAL LETTER O WITH HORN AND GRAVE
+1EDE; L; 1EDF; #LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE
+1EE0; L; 1EE1; #LATIN CAPITAL LETTER O WITH HORN AND TILDE
+1EE2; L; 1EE3; #LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW
+1EE4; L; 1EE5; #LATIN CAPITAL LETTER U WITH DOT BELOW
+1EE6; L; 1EE7; #LATIN CAPITAL LETTER U WITH HOOK ABOVE
+1EE8; L; 1EE9; #LATIN CAPITAL LETTER U WITH HORN AND ACUTE
+1EEA; L; 1EEB; #LATIN CAPITAL LETTER U WITH HORN AND GRAVE
+1EEC; L; 1EED; #LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE
+1EEE; L; 1EEF; #LATIN CAPITAL LETTER U WITH HORN AND TILDE
+1EF0; L; 1EF1; #LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW
+1EF2; L; 1EF3; #LATIN CAPITAL LETTER Y WITH GRAVE
+1EF4; L; 1EF5; #LATIN CAPITAL LETTER Y WITH DOT BELOW
+1EF6; L; 1EF7; #LATIN CAPITAL LETTER Y WITH HOOK ABOVE
+1EF8; L; 1EF9; #LATIN CAPITAL LETTER Y WITH TILDE
+1F08; L; 1F00; #GREEK CAPITAL LETTER ALPHA WITH PSILI
+1F09; L; 1F01; #GREEK CAPITAL LETTER ALPHA WITH DASIA
+1F0A; L; 1F02; #GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA
+1F0B; L; 1F03; #GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA
+1F0C; L; 1F04; #GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA
+1F0D; L; 1F05; #GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA
+1F0E; L; 1F06; #GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI
+1F0F; L; 1F07; #GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI
+1F18; L; 1F10; #GREEK CAPITAL LETTER EPSILON WITH PSILI
+1F19; L; 1F11; #GREEK CAPITAL LETTER EPSILON WITH DASIA
+1F1A; L; 1F12; #GREEK CAPITAL LETTER EPSILON WITH PSILI AND VARIA
+1F1B; L; 1F13; #GREEK CAPITAL LETTER EPSILON WITH DASIA AND VARIA
+1F1C; L; 1F14; #GREEK CAPITAL LETTER EPSILON WITH PSILI AND OXIA
+1F1D; L; 1F15; #GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA
+1F28; L; 1F20; #GREEK CAPITAL LETTER ETA WITH PSILI
+1F29; L; 1F21; #GREEK CAPITAL LETTER ETA WITH DASIA
+1F2A; L; 1F22; #GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA
+1F2B; L; 1F23; #GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA
+1F2C; L; 1F24; #GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA
+1F2D; L; 1F25; #GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA
+1F2E; L; 1F26; #GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI
+1F2F; L; 1F27; #GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI
+1F38; L; 1F30; #GREEK CAPITAL LETTER IOTA WITH PSILI
+1F39; L; 1F31; #GREEK CAPITAL LETTER IOTA WITH DASIA
+1F3A; L; 1F32; #GREEK CAPITAL LETTER IOTA WITH PSILI AND VARIA
+1F3B; L; 1F33; #GREEK CAPITAL LETTER IOTA WITH DASIA AND VARIA
+1F3C; L; 1F34; #GREEK CAPITAL LETTER IOTA WITH PSILI AND OXIA
+1F3D; L; 1F35; #GREEK CAPITAL LETTER IOTA WITH DASIA AND OXIA
+1F3E; L; 1F36; #GREEK CAPITAL LETTER IOTA WITH PSILI AND PERISPOMENI
+1F3F; L; 1F37; #GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI
+1F48; L; 1F40; #GREEK CAPITAL LETTER OMICRON WITH PSILI
+1F49; L; 1F41; #GREEK CAPITAL LETTER OMICRON WITH DASIA
+1F4A; L; 1F42; #GREEK CAPITAL LETTER OMICRON WITH PSILI AND VARIA
+1F4B; L; 1F43; #GREEK CAPITAL LETTER OMICRON WITH DASIA AND VARIA
+1F4C; L; 1F44; #GREEK CAPITAL LETTER OMICRON WITH PSILI AND OXIA
+1F4D; L; 1F45; #GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA
+1F50; E; 03C5 0313; #GREEK SMALL LETTER UPSILON WITH PSILI
+1F52; E; 03C5 0313 0300; #GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA
+1F54; E; 03C5 0313 0301; #GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA
+1F56; E; 03C5 0313 0342; #GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI
+1F59; L; 1F51; #GREEK CAPITAL LETTER UPSILON WITH DASIA
+1F5B; L; 1F53; #GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA
+1F5D; L; 1F55; #GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA
+1F5F; L; 1F57; #GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI
+1F68; L; 1F60; #GREEK CAPITAL LETTER OMEGA WITH PSILI
+1F69; L; 1F61; #GREEK CAPITAL LETTER OMEGA WITH DASIA
+1F6A; L; 1F62; #GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA
+1F6B; L; 1F63; #GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA
+1F6C; L; 1F64; #GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA
+1F6D; L; 1F65; #GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA
+1F6E; L; 1F66; #GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI
+1F6F; L; 1F67; #GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI
+1F80; E; 1F00 03B9; #GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI
+1F81; E; 1F01 03B9; #GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI
+1F82; E; 1F02 03B9; #GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI
+1F83; E; 1F03 03B9; #GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI
+1F84; E; 1F04 03B9; #GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI
+1F85; E; 1F05 03B9; #GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI
+1F86; E; 1F06 03B9; #GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
+1F87; E; 1F07 03B9; #GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
+1F88; E; 1F00 03B9; #GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI
+1F89; E; 1F01 03B9; #GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI
+1F8A; E; 1F02 03B9; #GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+1F8B; E; 1F03 03B9; #GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+1F8C; E; 1F04 03B9; #GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+1F8D; E; 1F05 03B9; #GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+1F8E; E; 1F06 03B9; #GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+1F8F; E; 1F07 03B9; #GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+1F90; E; 1F20 03B9; #GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI
+1F91; E; 1F21 03B9; #GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI
+1F92; E; 1F22 03B9; #GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI
+1F93; E; 1F23 03B9; #GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI
+1F94; E; 1F24 03B9; #GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI
+1F95; E; 1F25 03B9; #GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI
+1F96; E; 1F26 03B9; #GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
+1F97; E; 1F27 03B9; #GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
+1F98; E; 1F20 03B9; #GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
+1F99; E; 1F21 03B9; #GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI
+1F9A; E; 1F22 03B9; #GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+1F9B; E; 1F23 03B9; #GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+1F9C; E; 1F24 03B9; #GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+1F9D; E; 1F25 03B9; #GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+1F9E; E; 1F26 03B9; #GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+1F9F; E; 1F27 03B9; #GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+1FA0; E; 1F60 03B9; #GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI
+1FA1; E; 1F61 03B9; #GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI
+1FA2; E; 1F62 03B9; #GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI
+1FA3; E; 1F63 03B9; #GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI
+1FA4; E; 1F64 03B9; #GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI
+1FA5; E; 1F65 03B9; #GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI
+1FA6; E; 1F66 03B9; #GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
+1FA7; E; 1F67 03B9; #GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
+1FA8; E; 1F60 03B9; #GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI
+1FA9; E; 1F61 03B9; #GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI
+1FAA; E; 1F62 03B9; #GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+1FAB; E; 1F63 03B9; #GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+1FAC; E; 1F64 03B9; #GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+1FAD; E; 1F65 03B9; #GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+1FAE; E; 1F66 03B9; #GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+1FAF; E; 1F67 03B9; #GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+1FB2; E; 1F70 03B9; #GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI
+1FB3; E; 03B1 03B9; #GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI
+1FB4; E; 03AC 03B9; #GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
+1FB6; E; 03B1 0342; #GREEK SMALL LETTER ALPHA WITH PERISPOMENI
+1FB7; E; 03B1 0342 03B9; #GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
+1FB8; L; 1FB0; #GREEK CAPITAL LETTER ALPHA WITH VRACHY
+1FB9; L; 1FB1; #GREEK CAPITAL LETTER ALPHA WITH MACRON
+1FBA; L; 1F70; #GREEK CAPITAL LETTER ALPHA WITH VARIA
+1FBB; L; 1F71; #GREEK CAPITAL LETTER ALPHA WITH OXIA
+1FBC; E; 03B1 03B9; #GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
+1FBE; E; 03B9; #GREEK PROSGEGRAMMENI
+1FC2; E; 1F74 03B9; #GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI
+1FC3; E; 03B7 03B9; #GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI
+1FC4; E; 03AE 03B9; #GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
+1FC6; E; 03B7 0342; #GREEK SMALL LETTER ETA WITH PERISPOMENI
+1FC7; E; 03B7 0342 03B9; #GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
+1FC8; L; 1F72; #GREEK CAPITAL LETTER EPSILON WITH VARIA
+1FC9; L; 1F73; #GREEK CAPITAL LETTER EPSILON WITH OXIA
+1FCA; L; 1F74; #GREEK CAPITAL LETTER ETA WITH VARIA
+1FCB; L; 1F75; #GREEK CAPITAL LETTER ETA WITH OXIA
+1FCC; E; 03B7 03B9; #GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
+1FD2; E; 03B9 0308 0300; #GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA
+1FD3; E; 03B9 0308 0301; #GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
+1FD6; E; 03B9 0342; #GREEK SMALL LETTER IOTA WITH PERISPOMENI
+1FD7; E; 03B9 0308 0342; #GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI
+1FD8; L; 1FD0; #GREEK CAPITAL LETTER IOTA WITH VRACHY
+1FD9; L; 1FD1; #GREEK CAPITAL LETTER IOTA WITH MACRON
+1FDA; L; 1F76; #GREEK CAPITAL LETTER IOTA WITH VARIA
+1FDB; L; 1F77; #GREEK CAPITAL LETTER IOTA WITH OXIA
+1FE2; E; 03C5 0308 0300; #GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA
+1FE3; E; 03C5 0308 0301; #GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
+1FE4; E; 03C1 0313; #GREEK SMALL LETTER RHO WITH PSILI
+1FE6; E; 03C5 0342; #GREEK SMALL LETTER UPSILON WITH PERISPOMENI
+1FE7; E; 03C5 0308 0342; #GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI
+1FE8; L; 1FE0; #GREEK CAPITAL LETTER UPSILON WITH VRACHY
+1FE9; L; 1FE1; #GREEK CAPITAL LETTER UPSILON WITH MACRON
+1FEA; L; 1F7A; #GREEK CAPITAL LETTER UPSILON WITH VARIA
+1FEB; L; 1F7B; #GREEK CAPITAL LETTER UPSILON WITH OXIA
+1FEC; L; 1FE5; #GREEK CAPITAL LETTER RHO WITH DASIA
+1FF2; E; 1F7C 03B9; #GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI
+1FF3; E; 03C9 03B9; #GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI
+1FF4; E; 03CE 03B9; #GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
+1FF6; E; 03C9 0342; #GREEK SMALL LETTER OMEGA WITH PERISPOMENI
+1FF7; E; 03C9 0342 03B9; #GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
+1FF8; L; 1F78; #GREEK CAPITAL LETTER OMICRON WITH VARIA
+1FF9; L; 1F79; #GREEK CAPITAL LETTER OMICRON WITH OXIA
+1FFA; L; 1F7C; #GREEK CAPITAL LETTER OMEGA WITH VARIA
+1FFB; L; 1F7D; #GREEK CAPITAL LETTER OMEGA WITH OXIA
+1FFC; E; 03C9 03B9; #GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
+2126; L; 03C9; #OHM SIGN
+212A; L; 006B; #KELVIN SIGN
+212B; L; 00E5; #ANGSTROM SIGN
+2160; L; 2170; #ROMAN NUMERAL ONE
+2161; L; 2171; #ROMAN NUMERAL TWO
+2162; L; 2172; #ROMAN NUMERAL THREE
+2163; L; 2173; #ROMAN NUMERAL FOUR
+2164; L; 2174; #ROMAN NUMERAL FIVE
+2165; L; 2175; #ROMAN NUMERAL SIX
+2166; L; 2176; #ROMAN NUMERAL SEVEN
+2167; L; 2177; #ROMAN NUMERAL EIGHT
+2168; L; 2178; #ROMAN NUMERAL NINE
+2169; L; 2179; #ROMAN NUMERAL TEN
+216A; L; 217A; #ROMAN NUMERAL ELEVEN
+216B; L; 217B; #ROMAN NUMERAL TWELVE
+216C; L; 217C; #ROMAN NUMERAL FIFTY
+216D; L; 217D; #ROMAN NUMERAL ONE HUNDRED
+216E; L; 217E; #ROMAN NUMERAL FIVE HUNDRED
+216F; L; 217F; #ROMAN NUMERAL ONE THOUSAND
+24B6; L; 24D0; #CIRCLED LATIN CAPITAL LETTER A
+24B7; L; 24D1; #CIRCLED LATIN CAPITAL LETTER B
+24B8; L; 24D2; #CIRCLED LATIN CAPITAL LETTER C
+24B9; L; 24D3; #CIRCLED LATIN CAPITAL LETTER D
+24BA; L; 24D4; #CIRCLED LATIN CAPITAL LETTER E
+24BB; L; 24D5; #CIRCLED LATIN CAPITAL LETTER F
+24BC; L; 24D6; #CIRCLED LATIN CAPITAL LETTER G
+24BD; L; 24D7; #CIRCLED LATIN CAPITAL LETTER H
+24BE; L; 24D8; #CIRCLED LATIN CAPITAL LETTER I
+24BF; L; 24D9; #CIRCLED LATIN CAPITAL LETTER J
+24C0; L; 24DA; #CIRCLED LATIN CAPITAL LETTER K
+24C1; L; 24DB; #CIRCLED LATIN CAPITAL LETTER L
+24C2; L; 24DC; #CIRCLED LATIN CAPITAL LETTER M
+24C3; L; 24DD; #CIRCLED LATIN CAPITAL LETTER N
+24C4; L; 24DE; #CIRCLED LATIN CAPITAL LETTER O
+24C5; L; 24DF; #CIRCLED LATIN CAPITAL LETTER P
+24C6; L; 24E0; #CIRCLED LATIN CAPITAL LETTER Q
+24C7; L; 24E1; #CIRCLED LATIN CAPITAL LETTER R
+24C8; L; 24E2; #CIRCLED LATIN CAPITAL LETTER S
+24C9; L; 24E3; #CIRCLED LATIN CAPITAL LETTER T
+24CA; L; 24E4; #CIRCLED LATIN CAPITAL LETTER U
+24CB; L; 24E5; #CIRCLED LATIN CAPITAL LETTER V
+24CC; L; 24E6; #CIRCLED LATIN CAPITAL LETTER W
+24CD; L; 24E7; #CIRCLED LATIN CAPITAL LETTER X
+24CE; L; 24E8; #CIRCLED LATIN CAPITAL LETTER Y
+24CF; L; 24E9; #CIRCLED LATIN CAPITAL LETTER Z
+FB00; E; 0066 0066; #LATIN SMALL LIGATURE FF
+FB01; E; 0066 0069; #LATIN SMALL LIGATURE FI
+FB02; E; 0066 006C; #LATIN SMALL LIGATURE FL
+FB03; E; 0066 0066 0069; #LATIN SMALL LIGATURE FFI
+FB04; E; 0066 0066 006C; #LATIN SMALL LIGATURE FFL
+FB05; E; 0073 0074; #LATIN SMALL LIGATURE LONG S T
+FB06; E; 0073 0074; #LATIN SMALL LIGATURE ST
+FB13; E; 0574 0576; #ARMENIAN SMALL LIGATURE MEN NOW
+FB14; E; 0574 0565; #ARMENIAN SMALL LIGATURE MEN ECH
+FB15; E; 0574 056B; #ARMENIAN SMALL LIGATURE MEN INI
+FB16; E; 057E 0576; #ARMENIAN SMALL LIGATURE VEW NOW
+FB17; E; 0574 056D; #ARMENIAN SMALL LIGATURE MEN XEH
+FF21; L; FF41; #FULLWIDTH LATIN CAPITAL LETTER A
+FF22; L; FF42; #FULLWIDTH LATIN CAPITAL LETTER B
+FF23; L; FF43; #FULLWIDTH LATIN CAPITAL LETTER C
+FF24; L; FF44; #FULLWIDTH LATIN CAPITAL LETTER D
+FF25; L; FF45; #FULLWIDTH LATIN CAPITAL LETTER E
+FF26; L; FF46; #FULLWIDTH LATIN CAPITAL LETTER F
+FF27; L; FF47; #FULLWIDTH LATIN CAPITAL LETTER G
+FF28; L; FF48; #FULLWIDTH LATIN CAPITAL LETTER H
+FF29; L; FF49; #FULLWIDTH LATIN CAPITAL LETTER I
+FF2A; L; FF4A; #FULLWIDTH LATIN CAPITAL LETTER J
+FF2B; L; FF4B; #FULLWIDTH LATIN CAPITAL LETTER K
+FF2C; L; FF4C; #FULLWIDTH LATIN CAPITAL LETTER L
+FF2D; L; FF4D; #FULLWIDTH LATIN CAPITAL LETTER M
+FF2E; L; FF4E; #FULLWIDTH LATIN CAPITAL LETTER N
+FF2F; L; FF4F; #FULLWIDTH LATIN CAPITAL LETTER O
+FF30; L; FF50; #FULLWIDTH LATIN CAPITAL LETTER P
+FF31; L; FF51; #FULLWIDTH LATIN CAPITAL LETTER Q
+FF32; L; FF52; #FULLWIDTH LATIN CAPITAL LETTER R
+FF33; L; FF53; #FULLWIDTH LATIN CAPITAL LETTER S
+FF34; L; FF54; #FULLWIDTH LATIN CAPITAL LETTER T
+FF35; L; FF55; #FULLWIDTH LATIN CAPITAL LETTER U
+FF36; L; FF56; #FULLWIDTH LATIN CAPITAL LETTER V
+FF37; L; FF57; #FULLWIDTH LATIN CAPITAL LETTER W
+FF38; L; FF58; #FULLWIDTH LATIN CAPITAL LETTER X
+FF39; L; FF59; #FULLWIDTH LATIN CAPITAL LETTER Y
+FF3A; L; FF5A; #FULLWIDTH LATIN CAPITAL LETTER Z
+
+
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0000 001f Cc
ffed ffee So
fff9 fffb Cf
fffc fffd So
+f0000 ffffd Co
+100000 10fffd Co
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0300 0314 230
+# CompositionExclusions-2.txt
+#
# Composition Exclusions
# This file lists the characters from the UTR #15 Composition Exclusion Table.
#
# (4) Non-Starter Decompositions
# These characters can be derived from the UnicodeData file
# by including all characters whose canonical decomposition consists
-# of a sequence of characters, the first of which has a canonical
-# class of zero.
+# of a sequence of characters, the first of which has a non-zero
+# combining class.
# These characters are simply quoted here for reference.
# 0344 COMBINING GREEK DIALYTIKA TONOS
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
00a0 <noBreak> 0020
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
FB00 FB4F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0600 06FF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
FB50 FDFF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
FE70 FEFE
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0530 058F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
2190 21FF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0000 007F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0980 09FF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
2580 259F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
3100 312F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
31A0 31BF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
2500 257F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
2800 28FF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
3300 33FF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
FE30 FE4F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
F900 FAFF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
2E80 2EFF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
3000 303F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
4E00 9FFF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
3400 4DB5
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
13A0 13FF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0300 036F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
FE20 FE2F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
20D0 20FF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
2400 243F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
20A0 20CF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0400 04FF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0900 097F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
2700 27BF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
2460 24FF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
3200 32FF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
1200 137F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
2000 206F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
25A0 25FF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
10A0 10FF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0370 03FF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
1F00 1FFF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0A80 0AFF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0A00 0A7F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
FF00 FFEF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
3130 318F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
1100 11FF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
AC00 D7A3
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0590 05FF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
DB80 DBFF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
D800 DB7F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
3040 309F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0250 02AF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
2FF0 2FFF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
3190 319F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
2F00 2FDF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0C80 0CFF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
30A0 30FF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
1780 17FF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0E80 0EFF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0080 00FF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0100 017F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0180 024F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
1E00 1EFF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
2100 214F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
DC00 DFFF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0D00 0D7F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
2200 22FF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
2600 26FF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
2300 23FF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
1800 18AF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
1000 109F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
2150 218F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
1680 169F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
2440 245F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0B00 0B7F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
E000 F8FF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
16A0 16FF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0D80 0DFF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
FE50 FE6F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
02B0 02FF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
FFF0 FFFD
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
2070 209F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0700 074F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0B80 0BFF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0C00 0C7F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0780 07BF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0E00 0E7F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0F00 0FFF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
1400 167F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
A490 A4CF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
A000 A48F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0000 007f
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0030 0039
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0041 005a
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
061b
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0660 0669
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
000a
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0000 0008
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
002c
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0030 0039
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
002f
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0023 0025
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0041 005a
ffca ffcf
ffd2 ffd7
ffda ffdc
+f0000 ffffd
+100000 10fffd
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
202a
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
202d
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0300 034e
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0021 0022
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
202c
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
05be
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
202b
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
202e
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0009
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
000c
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0000 001f
e000 f8ff
feff
fff9 fffb
+f0000 ffffd
+100000 10fffd
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0000 001f
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
070f
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0220 0221
ffdd ffdf
ffe7
ffef fff8
+10000 1fffd
+20000 2fffd
+30000 3fffd
+40000 4fffd
+50000 5fffd
+60000 6fffd
+70000 7fffd
+80000 8fffd
+90000 9fffd
+a0000 afffd
+b0000 bfffd
+c0000 cfffd
+d0000 dfffd
+e0000 efffd
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0000 001f
e000 f8ff
feff
fff9 fffb
+f0000 ffffd
+100000 10fffd
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
e000 f8ff
+f0000 ffffd
+100000 10fffd
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
d800 db7f
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
2460 2473
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
00a8
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
fb51
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
2102
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
00bc 00be
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
fb55
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
fb54
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
fb50
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
ff61 ffbe
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
00a0
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
fe50 fe52
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
3300 3357
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
2080 208e
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
00aa
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
fe30 fe44
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
3000
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
00c0 00c5
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
00a0
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0030 0039
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0021 007e
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0041 005a
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
00a1
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0023
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
2014
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0009
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
02c8
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
000c
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
fffc
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0029
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0000 0008
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
000d
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0021
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
00a0
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
002d
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
1100 1159
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
2024 2026
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
002c
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
000a
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0e5a 0e5b
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0030 0039
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0028
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0025
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0024
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0022
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0e01 0e30
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
d800 db7f
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0020
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
002f
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
200b
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0061 007a
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
02b0 02b8
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
01bb
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0061 007a
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
01c5
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0041 005a
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0300 034e
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0903
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0488 0489
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0028 0029
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0300 034e
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0030 0039
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0030 0039
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
2160 2183
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
00b2 00b3
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0021 0023
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
005f
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
002d
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0029
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
00bb
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
00ab
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0021 0023
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0020 007e
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0028
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0021 0023
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0024
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0024
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
005e
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
002b
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
00a6 00a7
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0009 000d
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
1203
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
140b
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
1402
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
141c
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
1205
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
1204
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
1408
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
1202
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
1404
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
3093
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
1206
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
1406 1407
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
1201
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
1200
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
120f
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
1419 141b
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
124d
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
124c
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
15d9
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
124a
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
1410 1411
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
1412 1413
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
1414 1416
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
15d6
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
1248
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0041 005a
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0030 0039
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0030 0039
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0020
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
2028
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
2029
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0020
-#Value; Short Name; Unicode Name
-U+1100; G; HANGUL CHOSEONG KIYEOK
-U+1101; GG; HANGUL CHOSEONG SSANGKIYEOK
-U+1102; N; HANGUL CHOSEONG NIEUN
-U+1103; D; HANGUL CHOSEONG TIKEUT
-U+1104; DD; HANGUL CHOSEONG SSANGTIKEUT
-U+1105; R; HANGUL CHOSEONG RIEUL
-U+1106; M; HANGUL CHOSEONG MIEUM
-U+1107; B; HANGUL CHOSEONG PIEUP
-U+1108; BB; HANGUL CHOSEONG SSANGPIEUP
-U+1109; S; HANGUL CHOSEONG SIOS
-U+110A; SS; HANGUL CHOSEONG SSANGSIOS
-U+110B; ; HANGUL CHOSEONG IEUNG
-U+110C; J; HANGUL CHOSEONG CIEUC
-U+110D; JJ; HANGUL CHOSEONG SSANGCIEUC
-U+110E; C; HANGUL CHOSEONG CHIEUCH
-U+110F; K; HANGUL CHOSEONG KHIEUKH
-U+1110; T; HANGUL CHOSEONG THIEUTH
-U+1111; P; HANGUL CHOSEONG PHIEUPH
-U+1112; H; HANGUL CHOSEONG HIEUH
-U+1161; A; HANGUL JUNGSEONG A
-U+1162; AE; HANGUL JUNGSEONG AE
-U+1163; YA; HANGUL JUNGSEONG YA
-U+1164; YAE; HANGUL JUNGSEONG YAE
-U+1165; EO; HANGUL JUNGSEONG EO
-U+1166; E; HANGUL JUNGSEONG E
-U+1167; YEO; HANGUL JUNGSEONG YEO
-U+1168; YE; HANGUL JUNGSEONG YE
-U+1169; O; HANGUL JUNGSEONG O
-U+116A; WA; HANGUL JUNGSEONG WA
-U+116B; WAE; HANGUL JUNGSEONG WAE
-U+116C; OE; HANGUL JUNGSEONG OE
-U+116D; YO; HANGUL JUNGSEONG YO
-U+116E; U; HANGUL JUNGSEONG U
-U+116F; WEO; HANGUL JUNGSEONG WEO
-U+1170; WE; HANGUL JUNGSEONG WE
-U+1171; WI; HANGUL JUNGSEONG WI
-U+1172; YU; HANGUL JUNGSEONG YU
-U+1173; EU; HANGUL JUNGSEONG EU
-U+1174; YI; HANGUL JUNGSEONG YI
-U+1175; I; HANGUL JUNGSEONG I
-U+11A8; G; HANGUL JONGSEONG KIYEOK
-U+11A9; GG; HANGUL JONGSEONG SSANGKIYEOK
-U+11AA; GS; HANGUL JONGSEONG KIYEOK-SIOS
-U+11AB; N; HANGUL JONGSEONG NIEUN
-U+11AC; NJ; HANGUL JONGSEONG NIEUN-CIEUC
-U+11AD; NH; HANGUL JONGSEONG NIEUN-HIEUH
-U+11AE; D; HANGUL JONGSEONG TIKEUT
-U+11AF; L; HANGUL JONGSEONG RIEUL
-U+11B0; LG; HANGUL JONGSEONG RIEUL-KIYEOK
-U+11B1; LM; HANGUL JONGSEONG RIEUL-MIEUM
-U+11B2; LB; HANGUL JONGSEONG RIEUL-PIEUP
-U+11B3; LS; HANGUL JONGSEONG RIEUL-SIOS
-U+11B4; LT; HANGUL JONGSEONG RIEUL-THIEUTH
-U+11B5; LP; HANGUL JONGSEONG RIEUL-PHIEUPH
-U+11B6; LH; HANGUL JONGSEONG RIEUL-HIEUH
-U+11B7; M; HANGUL JONGSEONG MIEUM
-U+11B8; B; HANGUL JONGSEONG PIEUP
-U+11B9; BS; HANGUL JONGSEONG PIEUP-SIOS
-U+11BA; S; HANGUL JONGSEONG SIOS
-U+11BB; SS; HANGUL JONGSEONG SSANGSIOS
-U+11BC; NG; HANGUL JONGSEONG IEUNG
-U+11BD; J; HANGUL JONGSEONG CIEUC
-U+11BE; C; HANGUL JONGSEONG CHIEUCH
-U+11BF; K; HANGUL JONGSEONG KHIEUKH
-U+11C0; T; HANGUL JONGSEONG THIEUTH
-U+11C1; P; HANGUL JONGSEONG PHIEUPH
-U+11C2; H; HANGUL JONGSEONG HIEUH
+# Jamo-3.txt
+#
+# This file is a normative contributory data file in the
+# Unicode Character Database.
+#
+# This file defines the Jamo Short Name property, repeating
+# in machine readable form the information printed in Table 4-4
+# of The Unicode Standard, Version 3.0.
+#
+# See sections 3.11 and 4.4 of The Unicode Standard, Version 3.0
+# for more information.
+#
+# Each line contains two fields, separated by a semicolon.
+#
+# The first field gives the code point, in 4-digit hexadecimal
+# form, of a combining jamo character that participates in
+# the algorithmic determination Hangul syllable character names.
+# The second field gives the Jamo Short Name as a one-, two-,
+# or three-character ASCII string (or in one case, for U+110B,
+# the null string).
+#
+# #############################################################
+
+1100; G # HANGUL CHOSEONG KIYEOK
+1101; GG # HANGUL CHOSEONG SSANGKIYEOK
+1102; N # HANGUL CHOSEONG NIEUN
+1103; D # HANGUL CHOSEONG TIKEUT
+1104; DD # HANGUL CHOSEONG SSANGTIKEUT
+1105; R # HANGUL CHOSEONG RIEUL
+1106; M # HANGUL CHOSEONG MIEUM
+1107; B # HANGUL CHOSEONG PIEUP
+1108; BB # HANGUL CHOSEONG SSANGPIEUP
+1109; S # HANGUL CHOSEONG SIOS
+110A; SS # HANGUL CHOSEONG SSANGSIOS
+110B; # HANGUL CHOSEONG IEUNG
+110C; J # HANGUL CHOSEONG CIEUC
+110D; JJ # HANGUL CHOSEONG SSANGCIEUC
+110E; C # HANGUL CHOSEONG CHIEUCH
+110F; K # HANGUL CHOSEONG KHIEUKH
+1110; T # HANGUL CHOSEONG THIEUTH
+1111; P # HANGUL CHOSEONG PHIEUPH
+1112; H # HANGUL CHOSEONG HIEUH
+1161; A # HANGUL JUNGSEONG A
+1162; AE # HANGUL JUNGSEONG AE
+1163; YA # HANGUL JUNGSEONG YA
+1164; YAE # HANGUL JUNGSEONG YAE
+1165; EO # HANGUL JUNGSEONG EO
+1166; E # HANGUL JUNGSEONG E
+1167; YEO # HANGUL JUNGSEONG YEO
+1168; YE # HANGUL JUNGSEONG YE
+1169; O # HANGUL JUNGSEONG O
+116A; WA # HANGUL JUNGSEONG WA
+116B; WAE # HANGUL JUNGSEONG WAE
+116C; OE # HANGUL JUNGSEONG OE
+116D; YO # HANGUL JUNGSEONG YO
+116E; U # HANGUL JUNGSEONG U
+116F; WEO # HANGUL JUNGSEONG WEO
+1170; WE # HANGUL JUNGSEONG WE
+1171; WI # HANGUL JUNGSEONG WI
+1172; YU # HANGUL JUNGSEONG YU
+1173; EU # HANGUL JUNGSEONG EU
+1174; YI # HANGUL JUNGSEONG YI
+1175; I # HANGUL JUNGSEONG I
+11A8; G # HANGUL JONGSEONG KIYEOK
+11A9; GG # HANGUL JONGSEONG SSANGKIYEOK
+11AA; GS # HANGUL JONGSEONG KIYEOK-SIOS
+11AB; N # HANGUL JONGSEONG NIEUN
+11AC; NJ # HANGUL JONGSEONG NIEUN-CIEUC
+11AD; NH # HANGUL JONGSEONG NIEUN-HIEUH
+11AE; D # HANGUL JONGSEONG TIKEUT
+11AF; L # HANGUL JONGSEONG RIEUL
+11B0; LG # HANGUL JONGSEONG RIEUL-KIYEOK
+11B1; LM # HANGUL JONGSEONG RIEUL-MIEUM
+11B2; LB # HANGUL JONGSEONG RIEUL-PIEUP
+11B3; LS # HANGUL JONGSEONG RIEUL-SIOS
+11B4; LT # HANGUL JONGSEONG RIEUL-THIEUTH
+11B5; LP # HANGUL JONGSEONG RIEUL-PHIEUPH
+11B6; LH # HANGUL JONGSEONG RIEUL-HIEUH
+11B7; M # HANGUL JONGSEONG MIEUM
+11B8; B # HANGUL JONGSEONG PIEUP
+11B9; BS # HANGUL JONGSEONG PIEUP-SIOS
+11BA; S # HANGUL JONGSEONG SIOS
+11BB; SS # HANGUL JONGSEONG SSANGSIOS
+11BC; NG # HANGUL JONGSEONG IEUNG
+11BD; J # HANGUL JONGSEONG CIEUC
+11BE; C # HANGUL JONGSEONG CHIEUCH
+11BF; K # HANGUL JONGSEONG KHIEUKH
+11C0; T # HANGUL JONGSEONG THIEUTH
+11C1; P # HANGUL JONGSEONG PHIEUPH
+11C2; H # HANGUL JONGSEONG HIEUH
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
-1100 G
-1101 GG
-1102 N
-1103 D
-1104 DD
-1105 R
-1106 M
-1107 B
-1108 BB
-1109 S
-110a SS
-110b
-110c J
-110d JJ
-110e C
-110f K
-1110 T
-1111 P
-1112 H
-1161 A
-1162 AE
-1163 YA
-1164 YAE
-1165 EO
-1166 E
-1167 YEO
-1168 YE
-1169 O
-116a WA
-116b WAE
-116c OE
-116d YO
-116e U
-116f WEO
-1170 WE
-1171 WI
-1172 YU
-1173 EU
-1174 YI
-1175 I
-11a8 G
-11a9 GG
-11aa GS
-11ab N
-11ac NJ
-11ad NH
-11ae D
-11af L
-11b0 LG
-11b1 LM
-11b2 LB
-11b3 LS
-11b4 LT
-11b5 LP
-11b6 LH
-11b7 M
-11b8 B
-11b9 BS
-11ba S
-11bb SS
-11bc NG
-11bd J
-11be C
-11bf K
-11c0 T
-11c1 P
-11c2 H
+1100 G # HANGUL CHOSEONG KIYEOK
+1101 GG # HANGUL CHOSEONG SSANGKIYEOK
+1102 N # HANGUL CHOSEONG NIEUN
+1103 D # HANGUL CHOSEONG TIKEUT
+1104 DD # HANGUL CHOSEONG SSANGTIKEUT
+1105 R # HANGUL CHOSEONG RIEUL
+1106 M # HANGUL CHOSEONG MIEUM
+1107 B # HANGUL CHOSEONG PIEUP
+1108 BB # HANGUL CHOSEONG SSANGPIEUP
+1109 S # HANGUL CHOSEONG SIOS
+110a SS # HANGUL CHOSEONG SSANGSIOS
+110b # HANGUL CHOSEONG IEUNG
+110c J # HANGUL CHOSEONG CIEUC
+110d JJ # HANGUL CHOSEONG SSANGCIEUC
+110e C # HANGUL CHOSEONG CHIEUCH
+110f K # HANGUL CHOSEONG KHIEUKH
+1110 T # HANGUL CHOSEONG THIEUTH
+1111 P # HANGUL CHOSEONG PHIEUPH
+1112 H # HANGUL CHOSEONG HIEUH
+1161 A # HANGUL JUNGSEONG A
+1162 AE # HANGUL JUNGSEONG AE
+1163 YA # HANGUL JUNGSEONG YA
+1164 YAE # HANGUL JUNGSEONG YAE
+1165 EO # HANGUL JUNGSEONG EO
+1166 E # HANGUL JUNGSEONG E
+1167 YEO # HANGUL JUNGSEONG YEO
+1168 YE # HANGUL JUNGSEONG YE
+1169 O # HANGUL JUNGSEONG O
+116a WA # HANGUL JUNGSEONG WA
+116b WAE # HANGUL JUNGSEONG WAE
+116c OE # HANGUL JUNGSEONG OE
+116d YO # HANGUL JUNGSEONG YO
+116e U # HANGUL JUNGSEONG U
+116f WEO # HANGUL JUNGSEONG WEO
+1170 WE # HANGUL JUNGSEONG WE
+1171 WI # HANGUL JUNGSEONG WI
+1172 YU # HANGUL JUNGSEONG YU
+1173 EU # HANGUL JUNGSEONG EU
+1174 YI # HANGUL JUNGSEONG YI
+1175 I # HANGUL JUNGSEONG I
+11a8 G # HANGUL JONGSEONG KIYEOK
+11a9 GG # HANGUL JONGSEONG SSANGKIYEOK
+11aa GS # HANGUL JONGSEONG KIYEOK-SIOS
+11ab N # HANGUL JONGSEONG NIEUN
+11ac NJ # HANGUL JONGSEONG NIEUN-CIEUC
+11ad NH # HANGUL JONGSEONG NIEUN-HIEUH
+11ae D # HANGUL JONGSEONG TIKEUT
+11af L # HANGUL JONGSEONG RIEUL
+11b0 LG # HANGUL JONGSEONG RIEUL-KIYEOK
+11b1 LM # HANGUL JONGSEONG RIEUL-MIEUM
+11b2 LB # HANGUL JONGSEONG RIEUL-PIEUP
+11b3 LS # HANGUL JONGSEONG RIEUL-SIOS
+11b4 LT # HANGUL JONGSEONG RIEUL-THIEUTH
+11b5 LP # HANGUL JONGSEONG RIEUL-PHIEUPH
+11b6 LH # HANGUL JONGSEONG RIEUL-HIEUH
+11b7 M # HANGUL JONGSEONG MIEUM
+11b8 B # HANGUL JONGSEONG PIEUP
+11b9 BS # HANGUL JONGSEONG PIEUP-SIOS
+11ba S # HANGUL JONGSEONG SIOS
+11bb SS # HANGUL JONGSEONG SSANGSIOS
+11bc NG # HANGUL JONGSEONG IEUNG
+11bd J # HANGUL JONGSEONG CIEUC
+11be C # HANGUL JONGSEONG CHIEUCH
+11bf K # HANGUL JONGSEONG KHIEUKH
+11c0 T # HANGUL JONGSEONG THIEUTH
+11c1 P # HANGUL JONGSEONG PHIEUPH
+11c2 H # HANGUL JONGSEONG HIEUH
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0000 001f <control>
fffb INTERLINEAR ANNOTATION TERMINATOR
fffc OBJECT REPLACEMENT CHARACTER
fffd REPLACEMENT CHARACTER
+f0000 ffffd <Plane 15 Private Use, First>
+100000 10fffd <Plane 16 Private Use, First>
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0030 0
0f27 7
0f28 8
0f29 9
+0f2a 1/2
+0f2b 3/2
+0f2c 5/2
+0f2d 7/2
+0f2e 9/2
+0f2f 11/2
+0f30 13/2
+0f31 15/2
+0f32 17/2
+0f33 -1/2
1040 0
1041 1
1042 2
3038 10
3039 20
303a 30
+3192 1
+3193 2
+3194 3
+3195 4
+3220 1
+3221 2
+3222 3
+3223 4
+3224 5
+3225 6
+3226 7
+3227 8
+3228 9
+3229 10
3280 1
3281 2
3282 3
-Property dump: UnicodeData-3.0.0.txt
+Property dump: UnicodeData-3.0.1.txt
*******************************************
*******************************************
-Property dump for: 0x00800000 (Delimiter)
-
-0000
-0009..000D (5 chars)
-001C..0023 (8 chars)
-0028..0029 (2 chars)
-002C
-002E..002F (2 chars)
-003A..003B (2 chars)
-003F
-005B..005D (3 chars)
-007B..007D (3 chars)
-0085
-00A0..00A1 (2 chars)
-00A6..00A7 (2 chars)
-00AB
-00B6
-00BB
-00BF
-037E
-0387
-055C..055E (3 chars)
-0589
-060C
-061B
-061F
-066B..066C (2 chars)
-06D4
-0700..0709 (10 chars)
-070B..070D (3 chars)
-0964..0965 (2 chars)
-0DF4
-0E5A..0E5B (2 chars)
-0F0B
-0F0D..0F12 (6 chars)
-0F3A..0F3D (4 chars)
-104A..104B (2 chars)
-10FB
-1361..1368 (8 chars)
-166D..166E (2 chars)
-1680
-169B..169C (2 chars)
-16EB..16ED (3 chars)
-17D4..17D6 (3 chars)
-17DA
-1802..1805 (4 chars)
-1808..1809 (2 chars)
-2000..200B (12 chars)
-2016
-2018..201F (8 chars)
-2028..2029 (2 chars)
-202F
-2039..203A (2 chars)
-203C..203D (2 chars)
-2045..2046 (2 chars)
-2048..2049 (2 chars)
-207D..207E (2 chars)
-208D..208E (2 chars)
-2329..232A (2 chars)
-3000..3002 (3 chars)
-3008..3011 (10 chars)
-3014..301B (8 chars)
-301D..301F (3 chars)
-30FB
-FD3E..FD3F (2 chars)
-FE35..FE44 (16 chars)
-FE50..FE52 (3 chars)
-FE54..FE57 (4 chars)
-FE59..FE5F (7 chars)
-FE68
-FF01..FF03 (3 chars)
-FF08..FF09 (2 chars)
-FF0C
-FF0E..FF0F (2 chars)
-FF1A..FF1B (2 chars)
-FF1F
-FF3B..FF3D (3 chars)
-FF5B..FF5D (3 chars)
-FF61..FF65 (5 chars)
-FFE4
-
-*******************************************
-
Property dump for: 0x80000003 (Line Separator)
2028
20D0..20E3 (20 chars)
302A..302F (6 chars)
3099..309A (2 chars)
-F8F0..F8FF (16 chars)
FB1E
FE20..FE23 (4 chars)
20D0..20E3 (20 chars)
302A..302F (6 chars)
3099..309A (2 chars)
-F8F0..F8FF (16 chars)
FB1E
FE20..FE23 (4 chars)
0DF2..0DF3 (2 chars)
0E01..0E3A (58 chars)
0E40..0E45 (6 chars)
-0E47
0E4D
0E81..0E82 (2 chars)
0E84
0CCD
0D4D
0DCA
-0E48..0E4C (5 chars)
+0E47..0E4C (6 chars)
0E4E
0EC8..0ECC (5 chars)
0F18..0F19 (2 chars)
4E00..9FA5 (20902 chars)
A000..A48C (1165 chars)
AC00..D7A3 (11172 chars)
-D800..F7FF (8192 chars)
-F900..FA2D (302 chars)
+D800..FA2D (8750 chars)
FB00..FB06 (7 chars)
FB13..FB17 (5 chars)
FF21..FF3A (26 chars)
FFCA..FFCF (6 chars)
FFD2..FFD7 (6 chars)
FFDA..FFDC (3 chars)
+F0000..FFFFD (65534 chars)
+100000..10FFFD (65534 chars)
*******************************************
20D0..20E3 (20 chars)
302A..302F (6 chars)
3099..309A (2 chars)
-F8F0..F8FF (16 chars)
FB1E
FE20..FE23 (4 chars)
Property dump for: 0x80000005 (Private Use)
E000..F8FF (6400 chars)
+F0000..FFFFD (65534 chars)
+100000..10FFFD (65534 chars)
*******************************************
*******************************************
+Property dump for: 0x8000000A (Not a Character)
+
+FFFE..FFFF (2 chars)
+1FFFE..1FFFF (2 chars)
+2FFFE..2FFFF (2 chars)
+3FFFE..3FFFF (2 chars)
+4FFFE..4FFFF (2 chars)
+5FFFE..5FFFF (2 chars)
+6FFFE..6FFFF (2 chars)
+7FFFE..7FFFF (2 chars)
+8FFFE..8FFFF (2 chars)
+9FFFE..9FFFF (2 chars)
+AFFFE..AFFFF (2 chars)
+BFFFE..BFFFF (2 chars)
+CFFFE..CFFFF (2 chars)
+DFFFE..DFFFF (2 chars)
+EFFFE..EFFFF (2 chars)
+FFFFE..FFFFF (2 chars)
+10FFFE..10FFFF (2 chars)
+
+*******************************************
+
Property dump for: 0x00000000 (Unassigned Code Value)
0220..0221 (2 chars)
FFDD..FFDF (3 chars)
FFE7
FFEF..FFF8 (10 chars)
+10000..1FFFD (65534 chars)
+20000..2FFFD (65534 chars)
+30000..3FFFD (65534 chars)
+40000..4FFFD (65534 chars)
+50000..5FFFD (65534 chars)
+60000..6FFFD (65534 chars)
+70000..7FFFD (65534 chars)
+80000..8FFFD (65534 chars)
+90000..9FFFD (65534 chars)
+A0000..AFFFD (65534 chars)
+B0000..BFFFD (65534 chars)
+C0000..CFFFD (65534 chars)
+D0000..DFFFD (65534 chars)
+E0000..EFFFD (65534 chars)
--- /dev/null
+The *.txt files were copied 30 Aug 2000 from
+
+ http://www.unicode.org/Public/UNIDATA/
+
+and most of them were renamed to better fit 8.3 filename limitations,
+by which the Perl distribution tries to live.
+
+ www.unicode.org Perl distribution
+
+ ArabicShaping.txt ArabShap.txt
+ BidiMirroring.txt BidiMirr.txt
+ Blocks.txt Blocks.txt
+ CaseFolding.txt CaseFold.txt
+ CompositionExclusions.txt CompExcl.txt
+ EastAsianWidth.txt EAWidth.txt (0)
+ Index.txt Index.txt
+ Jamo.txt Jamo.txt
+ LineBreak.txt LineBrk.txt (0)
+ NamesList.html NamesList.html (0)
+ NamesList.txt Names.txt
+ PropList.txt PropList.txt
+ ReadMe.txt ReadMe.txt
+ SpecialCasing.txt SpecCase.txt
+ UnicodeCharacterDatabase.html UCD301.html
+ UnicodeData.html UCDFF301.html
+ UnicodeData.txt Unicode.301
+
+The two big files, NormalizationTest.txt (1.7MB) and Unihan.txt (15.8MB)
+were not copied for space considerations. The files marked with (0) had
+not been updated since Unicode 3.0.0 (10 Sep 1999)
+
+The *.pl files are generated from these files by the 'mktables.PL' script.
+
+While the files have been renamed the links in the html files haven't.
+
+--
+jhi@iki.fi
-June 23, 1999
+August 30, 2000
-This directory contains the initial release for Unicode 3.0.
+This directory contains the first update release for Unicode 3.0.
This release consists of corrections and additions to the
-Unicode Character Database, to match the publication of
-The Unicode Standard, Version 3.0.
+Unicode Character Database for the Unicode Standard,
+Version 3.0.1.
Detailed documentation of the files constituting the
Unicode Character Database (contributory data files for
the standard itself) can now be found in
UnicodeCharacterDatabase.html.
---------------------------------------------------------------------------
-NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE
-
-The files have been copied from
-
- ftp://ftp.unicode.org/Public/3.0-Update/
-
-and most of them have been renamed to better fit 8.3 filename limitations.
-
-long name at unicode.org short name latest '#'
------------------------- ---------- ----------
-ArabicShaping-#.txt ArabShap.txt 2
-Blocks-#.txt Blocks.txt 3
-CompositionExclusions-#.txt CompExcl.txt 1
-EastAsianWidth-#.txt EAWidth.txt 3
-Index-#.txt Index.txt 3.0.0
-Jamo-#.txt Jamo.txt 2
-LineBreak-#.txt LineBrk.txt 5
-NamesList-#.txt Names.txt 3.0.0
-NamesList-#.html NamesList.html 1
-PropList-#.txt Props.txt 3.0.0
-SpecialCasing-#.txt SpecCase.txt 2
-UnicodeData-#.txt Unicode.300 3.0.0
-UnicodeData-#.html Unicode3.html 3.0.0
-UnicodeCharacterDatabase-#.html UCD300.html 3.0.0
-
-The *.pl files are generated from these files by the 'mktables.PL' script.
-
-While the files have been renamed the links in the html files haven't.
-
---
-jhi@iki.fi
-# SpecialCasing-2.txt
+# SpecialCasing-3.txt
#
# Special Casing Properties
#
# <upper> := <code_point_list>
# <code_point_list> := <code_point> (<s>+ <code_point>)*
# <code_point> := <hex><hex><hex><hex>
-# <hex> := [0-1A-Fa-f]
+# <hex> := [0-9A-Fa-f]
# <s> := <space>
#
-# <condition_list> := <locale>? (<s>+ <context>)*
-# <locale> := <ISO_3166_code> ( "_" <ISO_639_code> )? ( "_" <variant> )?
+# <condition_list> := <locale>? (<s>+ <context>)* <sep>
+# <locale> := <ISO_639_code> ( "_" <ISO_3166_code> )? ( "_" <variant> )?
# <ISO_3166_code> := 2-letter country code,
# as in http://www.unicode.org/unicode/onlinedat/countries.html
# <ISO_639_code> := 2-letter code,
# as in http://www.unicode.org/unicode/onlinedat/languages.html
-# <context> := "FINAL" | "NON_FINAL" | "MODERN" | "NON_MODERN"
+# <context> := "FINAL" | "NON_FINAL" | "MODERN" | "NON_MODERN" | "AFTER_i"
+#
+# A condition list overrides the normal behavior if all of the listed conditions are true.
+# Case distinctions in the condition list are not significant.
#
-# A condition list overrides the normal behavior if any of the listed conditions is true.
# FINAL: The letter is not followed by a letter of category L* (e.g. Ll, Lt, Lu, Lm, or Lo).
# MODERN: The mapping is only used for modern text.
+# AFTER_i: The last base character was "i" 0069
+#
# Conditions preceded by "NON_" represent the negation of the condition
#
# New contexts may be added in the future.
-# Parsers of this file must be prepared to deal with that situation.
# Additional whitespace around elements is optional. Blank lines are ignored in parsing.
# On any line, all text following "#" is a comment, and are ignored in parsing.
+#
+# Parsers of this file must be prepared to deal future additions to this format:
+# * Additional contexts
+# * Additional fields
# ================================================================================
# ================================================================================
# No corresponding uppercase precomposed character
-0149; 0149; 02BC 006E; 02BC 004E; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
+0149; 0149; 02BC 004E; 02BC 004E; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
0390; 0390; 0399 0308 0301; 0399 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
03B0; 03B0; 03A5 0308 0301; 03A5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
01F0; 01F0; 004A 030C; 004A 030C; # LATIN SMALL LETTER J WITH CARON
# 03C3; 03C3; 03A3; 03A3; # GREEK SMALL LETTER SIGMA
# 03C2; 03C2; 03A3; 03A3; # GREEK SMALL LETTER FINAL SIGMA
-# Note: the following cases are not included, since they would normalize in lowercasing
+# Note: the following cases are not included, since they would case-fold in lowercasing
# 03C3; 03C2; 03A3; 03A3; FINAL; # GREEK SMALL LETTER SIGMA
# 03C2; 03C3; 03A3; 03A3; NON_FINAL; # GREEK SMALL LETTER FINAL SIGMA
# Locale-sensitive mappings
# ================================================================================
+# Lithuanian
+
+0307; 0307; ; ; lt AFTER_i; # Remove DOT ABOVE after "i" with upper or titlecase
+
# Turkish
-0049; 0131; 0049; 0049; TR; # LATIN CAPITAL LETTER I
-0069; 0069; 0130; 0130; TR; # LATIN SMALL LETTER I
+0049; 0131; 0049; 0049; tr; # LATIN CAPITAL LETTER I
+0069; 0069; 0130; 0130; tr; # LATIN SMALL LETTER I
# Note: the following cases are already in the UnicodeData file.
-# 0131; 0131; 0049; 0049; TR; # LATIN SMALL LETTER DOTLESS I
-# 0130; 0069; 0130; 0130; TR; # LATIN CAPITAL LETTER I WITH DOT ABOVE
+# 0131; 0131; 0049; 0049; tr; # LATIN SMALL LETTER DOTLESS I
+# 0130; 0069; 0130; 0130; tr; # LATIN CAPITAL LETTER I WITH DOT ABOVE
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0030 0039 0000
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0041 005a 0061
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0061 007a 0041
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.300.
+# This file is built by mktables.PL from e.g. Unicode.301.
# Any changes made here will be lost!
return <<'END';
0061 007a 0041
<html>
-
-
<head>
-
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-
<meta http-equiv="Content-Language" content="en-us">
-
<meta name="GENERATOR" content="Microsoft FrontPage 4.0">
-
<meta name="ProgId" content="FrontPage.Editor.Document">
-
<link rel="stylesheet" href="http://www.unicode.org/unicode.css" type="text/css">
-
<title>Unicode Character Database</title>
-
</head>
-
-
<body>
-
-
-<h1>UNICODE CHARACTER DATABASE<br>
-Version 3.0.0</h1>
-
+<h1>UNICODE CHARACTER DATABASE<br>
+Version 3.0.1</h1>
<table border="1" cellspacing="2" cellpadding="0" height="87" width="100%">
-
<tr>
-
<td valign="TOP" width="144">Revision</td>
-
- <td valign="TOP">3.0.0</td>
-
+ <td valign="TOP">3.0.1</td>
</tr>
-
<tr>
-
<td valign="TOP" width="144">Authors</td>
-
<td valign="TOP">Mark Davis and Ken Whistler</td>
-
</tr>
-
<tr>
-
<td valign="TOP" width="144">Date</td>
-
- <td valign="TOP">1999-09-11</td>
-
+ <td valign="TOP">2000-08-17</td>
</tr>
-
<tr>
-
<td valign="TOP" width="144">This Version</td>
-
- <td valign="TOP"><a href="ftp://ftp.unicode.org/Public/3.0-Update/UnicodeCharacterDatabase-3.0.0.html">ftp://ftp.unicode.org/Public/3.0-Update/UnicodeCharacterDatabase-3.0.0.html</a></td>
-
+ <td valign="TOP"><a
+ href="http://www.unicode.org/Public/3.0-Update1/UnicodeCharacterDatabase-3.0.1.html">http://www.unicode.org/Public/3.0-Update1/UnicodeCharacterDatabase-3.0.1.html</a></td>
</tr>
-
<tr>
-
<td valign="TOP" width="144">Previous Version</td>
-
- <td valign="TOP">n/a</td>
-
+ <td valign="TOP"><a
+ href="http://www.unicode.org/Public/3.0-Update/UnicodeCharacterDatabase-3.0.0.html">http://www.unicode.org/Public/3.0-Update/UnicodeCharacterDatabase-3.0.0.html</a></td>
</tr>
-
<tr>
-
<td valign="TOP" width="144">Latest Version</td>
-
- <td valign="TOP"><a href="ftp://ftp.unicode.org/Public/3.0-Update/UnicodeCharacterDatabase-3.0.0.html">ftp://ftp.unicode.org/Public/3.0-Update/UnicodeCharacterDatabase-3.0.0.html</a></td>
-
+ <td valign="TOP"><a
+ href="http://www.unicode.org/Public/UNIDATA/UnicodeCharacterDatabase.html">http://www.unicode.org/Public/UNIDATA/UnicodeCharacterDatabase.html</a></td>
</tr>
-
</table>
-
-<p align="center">Copyright © 1995-1999 Unicode, Inc. All Rights reserved.</p>
-
-<h2>Disclaimer</h2>
-
-<p>The Unicode Character Database is provided as is by Unicode, Inc. No claims
-
-are made as to fitness for any particular purpose. No warranties of any kind are
-
-expressed or implied. The recipient agrees to determine applicability of
-
-information provided. If this file has been purchased on magnetic or optical
-
-media from Unicode, Inc., the sole remedy for any claim will be exchange of
-
-defective media within 90 days of receipt.</p>
-
-<p>This disclaimer is applicable for all other data files accompanying the
-
-Unicode Character Database, some of which have been compiled by the Unicode
-
-Consortium, and some of which have been supplied by other sources.</p>
-
-<h2>Limitations on Rights to Redistribute This Data</h2>
-
-<p>Recipient is granted the right to make copies in any form for internal
-
-distribution and to freely use the information supplied in the creation of
-
-products supporting the Unicode<sup>TM</sup> Standard. The files in the Unicode
-
-Character Database can be redistributed to third parties or other organizations
-
-(whether for profit or not) as long as this notice and the disclaimer notice are
-
-retained. Information can be extracted from these files and used in
-
-documentation or programs, as long as there is an accompanying notice indicating
-
-the source.</p>
-
-<h2>Introduction</h2>
-
-<p>The Unicode Character Database is a set of files that define the Unicode
-
-character properties and internal mappings. For more information about character
-
-properties and mappings, see <i><a href="http://www.unicode.org/unicode/uni2book/u2.html">The
-
-Unicode Standard</a></i>.</p>
-
-<p>The Unicode Character Database has been updated to reflect Version 3.0 of the
-
-Unicode Standard, with many characters added to those published in Version 2.0.
-
-A number of corrections have also been made to case mappings or other errors in
-
-the database noted since the publication of Version 2.0. Normative bidirectional
-
-properties have also been modified to reflect decisions of the Unicode Technical
-
-Committee.</p>
-
-<p>For more information on versions of the Unicode Standard and how to reference
-
-them, see <a href="http://www.unicode.org/unicode/standard/versions/">http://www.unicode.org/unicode/standard/versions/</a>.</p>
-
-<h2>Conformance</h2>
-
-<p>Character properties may be either normative or informative. <i>Normative</i>
-
-means that implementations that claim conformance to the Unicode Standard (at a
-
-particular version) and which make use of a particular property or field must
-
-follow the specifications of the standard for that property or field in order to
-
-be conformant. The term <i>normative</i> when applied to a property or field of
-
-the Unicode Character Database, does <i>not</i> mean that the value of that
-
-field will never change. Corrections and extensions to the standard in the
-
-future may require minor changes to normative values, even though the Unicode
-
-Technical Committee strives to minimize such changes. An<i> informative </i>property
-
-or field is strongly recommended, but a conformant implementation is free to use
-
-or change such values as it may require while still being conformant to the
-
-standard. Particular implementations may choose to override the properties and
-
-mappings that are not normative. In that case, it is up to the implementer to
-
-establish a protocol to convey that information.</p>
-
-<h2>Files</h2>
-
-<p>The following summarizes the files in the Unicode Character Database. For
-
-more information about these files, see the referenced technical report or
-
-section of Unicode Standard, Version 3.0.</p>
-
-<p><b>UnicodeData.txt (Chapter 4)</b>
-
-<ul>
-
- <li>The main file in the Unicode Character Database.</li>
-
- <li>For detailed information on the format, see <a href="UnicodeData.html">UnicodeData.html</a>.
-
- This file also characterizes which properties are normative and which are
-
- informative.</li>
-
-</ul>
-
-<p><b>PropList.txt (Chapter 4)</b>
-
-<ul>
-
- <li>Additional informative properties list: <i>Alphabetic, Ideographic,</i>
-
- and <i>Mathematical</i>, among others.</li>
-
-</ul>
-
-<p><b>SpecialCasing.txt (Chapter 4)</b>
-
-<ul>
-
- <li>List of informative special casing properties, including one-to-many
-
- mappings such as SHARP S => "SS", and locale-specific mappings,
-
- such as for Turkish <i>dotless i</i>.</li>
-
-</ul>
-
-<p><b>Blocks.txt (Chapter 14)</b>
-
-<ul>
-
- <li>List of normative block names.</li>
-
-</ul>
-
-<p><b>Jamo.txt (Chapter 4)</b>
-
-<ul>
-
- <li>List of normative Jamo short names, used in deriving HANGUL SYLLABLE names
-
- algorithmically.</li>
-
-</ul>
-
-<p><b>ArabicShaping.txt (Section 8.2)</b>
-
-<ul>
-
- <li>Basic Arabic and Syriac character shaping properties, such as initial,
-
- medial and final shapes. These properties are normative for minimal shaping
-
- of Arabic and Syriac. </li>
-
-</ul>
-
-<p><b>NamesList.txt (Chapter 14)</b>
-
-<ul>
-
- <li>This file duplicates some of the material in the UnicodeData file, and
-
- adds informative annotations uses in the character charts, as printed in the
-
- Unicode Standard. </li>
-
- <li><b>Note: </b>The information in NamesList.txt and Index.txt files matches
-
- the appropriate version of the book. Changes in the Unicode Character
-
- Database since then may not be reflected in these files, since they are
-
- primarily of archival interest.</li>
-
-</ul>
-
-<p><b>Index.txt (Chapter 14)</b>
-
-<ul>
-
- <li>Informative index to Unicode characters, as printed in the Unicode
-
- Standard</li>
-
- <li><b>Note: </b>The information in NamesList.txt and Index.txt files matches
-
- the appropriate version of the book. Changes in the Unicode Character
-
- Database since then may not be reflected in these files, since they are
-
- primarily of archival interest.</li>
-
-</ul>
-
-<p><b>CompositionExclusions.txt (<a href="http://www.unicode.org/unicode/reports/tr15/">UTR#15
-
-Unicode Normalization Forms</a>)</b>
-
-<ul>
-
- <li>Normative properties for normalization.</li>
-
-</ul>
-
-<p><b>LineBreak.txt (<a href="http://www.unicode.org/unicode/reports/tr14/">UTR
-
-#14: Line Breaking Properties</a>)</b>
-
-<ul>
-
- <li>Normative and informative properties for line breaking. To see which
-
- properties are informative and which are normative, consult UTR#14.</li>
-
-</ul>
-
-<p><b>EastAsianWidth.txt (<a href="http://www.unicode.org/unicode/reports/tr11/">UTR
-
-#11: East Asian Character Width</a>)</b>
-
-<ul>
-
- <li>Informative properties for determining the choice of wide vs. narrow
-
- glyphs in East Asian contexts.</li>
-
-</ul>
-
-<p><b>diffXvY.txt</b>
-
-<ul>
-
- <li>Mechanically-generated informative files containing accumulated
-
- differences between successive versions of UnicodeData.txt</li>
-
-</ul>
-
-
-
-</body>
-
-
-
-</html>
-
+<p align="center">Copyright © 1995-2000 Unicode, Inc. All Rights reserved.</p>
+<h2>Disclaimer</h2>
+<p>The Unicode Character Database is provided as is by Unicode, Inc. No claims
+are made as to fitness for any particular purpose. No warranties of any kind are
+expressed or implied. The recipient agrees to determine applicability of
+information provided. If this file has been purchased on magnetic or optical
+media from Unicode, Inc., the sole remedy for any claim will be exchange of
+defective media within 90 days of receipt.</p>
+<p>This disclaimer is applicable for all other data files accompanying the
+Unicode Character Database, some of which have been compiled by the Unicode
+Consortium, and some of which have been supplied by other sources.</p>
+<h2>Limitations on Rights to Redistribute This Data</h2>
+<p>Recipient is granted the right to make copies in any form for internal
+distribution and to freely use the information supplied in the creation of
+products supporting the Unicode<sup>TM</sup> Standard. The files in the Unicode
+Character Database can be redistributed to third parties or other organizations
+(whether for profit or not) as long as this notice and the disclaimer notice are
+retained. Information can be extracted from these files and used in
+documentation or programs, as long as there is an accompanying notice indicating
+the source.</p>
+<h2>Introduction</h2>
+<p>The Unicode Character Database is a set of files that define the Unicode
+character properties and internal mappings. For more information about character
+properties and mappings, see <i><a
+href="http://www.unicode.org/unicode/uni2book/u2.html">The Unicode Standard</a></i>.</p>
+<p>The Unicode Character Database has been updated to reflect Version 3.0 of the
+Unicode Standard, with many characters added to those published in Version 2.0.
+A number of corrections have also been made to case mappings or other errors in
+the database noted since the publication of Version 2.0. Normative bidirectional
+properties have also been modified to reflect decisions of the Unicode Technical
+Committee.</p>
+<p>For more information on versions of the Unicode Standard and how to reference
+them, see <a href="http://www.unicode.org/unicode/standard/versions/">http://www.unicode.org/unicode/standard/versions/</a>.</p>
+<h2>Conformance</h2>
+<p>Character properties may be either normative or informative. <i>Normative</i>
+means that implementations that claim conformance to the Unicode Standard (at a
+particular version) and which make use of a particular property or field must
+follow the specifications of the standard for that property or field in order to
+be conformant. The term <i>normative</i> when applied to a property or field of
+the Unicode Character Database, does <i>not</i> mean that the value of that
+field will never change. Corrections and extensions to the standard in the
+future may require minor changes to normative values, even though the Unicode
+Technical Committee strives to minimize such changes. An<i> informative </i>property
+or field is strongly recommended, but a conformant implementation is free to use
+or change such values as it may require while still being conformant to the
+standard. Particular implementations may choose to override the properties and
+mappings that are not normative. In that case, it is up to the implementer to
+establish a protocol to convey that information.</p>
+<h2>Files</h2>
+<p>The following summarizes the files in the Unicode Character Database. For
+more information about these files, see the referenced technical report(s) or
+section of Unicode Standard, Version 3.0.</p>
+<p><b>UnicodeData.txt (Chapter 4, <a
+href="http://www.unicode.org/unicode/reports/tr21/">UTR #21: Case Mappings</a>, <a
+href="http://www.unicode.org/unicode/reports/tr15/">UAX #15 Unicode Normalization
+Forms</a>)</b>
+<ul>
+ <li>The main file in the Unicode Character Database.</li>
+ <li>For detailed information on the format, see <a href="UnicodeData.html">UnicodeData.html</a>.
+ This file also characterizes which properties are normative and which are
+ informative.</li>
+</ul>
+<p><b>PropList.txt (Chapter 4)</b>
+<ul>
+ <li>Additional informative properties list: <i>Alphabetic, Ideographic,</i>
+ and <i>Mathematical</i>, among others.</li>
+</ul>
+<p><b>SpecialCasing.txt (Chapter 4, <a
+href="http://www.unicode.org/unicode/reports/tr21/">UTR #21: Case Mappings</a>)</b>
+<ul>
+ <li>List of informative special casing properties, including one-to-many
+ mappings such as SHARP S => "SS", and locale-specific mappings,
+ such as for Turkish <i>dotless i</i>.</li>
+</ul>
+<p><b>Blocks.txt (Chapter 14)</b>
+<ul>
+ <li>List of normative block names.</li>
+</ul>
+<p><b>Jamo.txt (Chapter 4)</b>
+<ul>
+ <li>List of normative Jamo short names, used in deriving HANGUL SYLLABLE names
+ algorithmically.</li>
+</ul>
+<p><b>ArabicShaping.txt (Section 8.2)</b>
+<ul>
+ <li>Basic Arabic and Syriac character shaping properties, such as initial,
+ medial and final shapes. These properties are normative for minimal shaping
+ of Arabic and Syriac.</li>
+</ul>
+<p><b>NamesList.txt (Chapter 14)</b>
+<ul>
+ <li>This file duplicates some of the material in the UnicodeData file, and
+ adds informative annotations uses in the character charts, as printed in the
+ Unicode Standard.</li>
+ <li><b>Note: </b>The information in NamesList.txt and Index.txt files matches
+ the appropriate version of the book. Changes in the Unicode Character
+ Database since then may not be reflected in these files, since they are
+ primarily of archival interest.</li>
+</ul>
+<p><b>Index.txt (Chapter 14)</b>
+<ul>
+ <li>Informative index to Unicode characters, as printed in the Unicode
+ Standard</li>
+ <li><b>Note: </b>The information in NamesList.txt and Index.txt files matches
+ the appropriate version of the book. Changes in the Unicode Character
+ Database since then may not be reflected in these files, since they are
+ primarily of archival interest.</li>
+</ul>
+<p><b>CompositionExclusions.txt (<a
+href="http://www.unicode.org/unicode/reports/tr15/">UAX #15 Unicode Normalization
+Forms</a>)</b>
+<ul>
+ <li>Normative properties for normalization.</li>
+</ul>
+<p><b>LineBreak.txt (<a href="http://www.unicode.org/unicode/reports/tr14/">UAX
+#14: Line Breaking Properties</a>)</b>
+<ul>
+ <li>Normative and informative properties for line breaking. To see which
+ properties are informative and which are normative, consult UAX #14.</li>
+</ul>
+<p><b>EastAsianWidth.txt (<a href="http://www.unicode.org/unicode/reports/tr11/">UAX
+#11: East Asian Character Width</a>)</b>
+<ul>
+ <li>Informative properties for determining the choice of wide vs. narrow
+ glyphs in East Asian contexts.</li>
+</ul>
+<p><b>BidiMirroring.txt</b><b> (<a
+href="http://www.unicode.org/unicode/reports/tr9/">UAX #9: The
+Bidirectional Algorithm</a>)</b></p>
+<ul>
+ <li>Informative properties for substituting characters in an implementation of
+ bidirectional mirroring.</li>
+</ul>
+<p><b>CaseFolding.txt (<a href="http://www.unicode.org/unicode/reports/tr21/">UTR
+#21: Case Mappings</a>)</b></p>
+<ul>
+ <li>Informative file mapping characters to their case-folded form.</li>
+</ul>
+<p><b>NormalizationTest.txt (<a
+href="http://www.unicode.org/unicode/reports/tr15/">UAX #15 Unicode Normalization
+Forms</a>)</b></p>
+<ul>
+ <li>Normative test file for conformance to Unicode Normalization Forms.</li>
+</ul>
+<p><b>diffXvY.txt</b>
+<ul>
+ <li>Mechanically-generated informative files containing accumulated
+ differences between successive versions of UnicodeData.txt</li>
+</ul>
+
+</body>
+
+</html>
0081;<control>;Cc;0;BN;;;;;N;;;;;
0082;<control>;Cc;0;BN;;;;;N;BREAK PERMITTED HERE;;;;
0083;<control>;Cc;0;BN;;;;;N;NO BREAK HERE;;;;
-0084;<control>;Cc;0;BN;;;;;N;INDEX;;;;
+0084;<control>;Cc;0;BN;;;;;N;;;;;
0085;<control>;Cc;0;B;;;;;N;NEXT LINE;;;;
0086;<control>;Cc;0;BN;;;;;N;START OF SELECTED AREA;;;;
0087;<control>;Cc;0;BN;;;;;N;END OF SELECTED AREA;;;;
01A3;LATIN SMALL LETTER OI;Ll;0;L;;;;;N;LATIN SMALL LETTER O I;gha;01A2;;01A2
01A4;LATIN CAPITAL LETTER P WITH HOOK;Lu;0;L;;;;;N;LATIN CAPITAL LETTER P HOOK;;;01A5;
01A5;LATIN SMALL LETTER P WITH HOOK;Ll;0;L;;;;;N;LATIN SMALL LETTER P HOOK;;01A4;;01A4
-01A6;LATIN LETTER YR;Lu;0;L;;;;;N;LATIN LETTER Y R;;;0280;
+01A6;LATIN LETTER YR;Lu;0;L;;;;;N;LATIN LETTER Y R;*;;0280;
01A7;LATIN CAPITAL LETTER TONE TWO;Lu;0;L;;;;;N;;;;01A8;
01A8;LATIN SMALL LETTER TONE TWO;Ll;0;L;;;;;N;;;01A7;;01A7
01A9;LATIN CAPITAL LETTER ESH;Lu;0;L;;;;;N;;;;0283;
027D;LATIN SMALL LETTER R WITH TAIL;Ll;0;L;;;;;N;LATIN SMALL LETTER R HOOK;;;;
027E;LATIN SMALL LETTER R WITH FISHHOOK;Ll;0;L;;;;;N;LATIN SMALL LETTER FISHHOOK R;;;;
027F;LATIN SMALL LETTER REVERSED R WITH FISHHOOK;Ll;0;L;;;;;N;LATIN SMALL LETTER REVERSED FISHHOOK R;;;;
-0280;LATIN LETTER SMALL CAPITAL R;Ll;0;L;;;;;N;;;01A6;;01A6
+0280;LATIN LETTER SMALL CAPITAL R;Ll;0;L;;;;;N;;*;01A6;;01A6
0281;LATIN LETTER SMALL CAPITAL INVERTED R;Ll;0;L;;;;;N;;;;;
0282;LATIN SMALL LETTER S WITH HOOK;Ll;0;L;;;;;N;LATIN SMALL LETTER S HOOK;;;;
0283;LATIN SMALL LETTER ESH;Ll;0;L;;;;;N;;;01A9;;01A9
0F27;TIBETAN DIGIT SEVEN;Nd;0;L;;7;7;7;N;;;;;
0F28;TIBETAN DIGIT EIGHT;Nd;0;L;;8;8;8;N;;;;;
0F29;TIBETAN DIGIT NINE;Nd;0;L;;9;9;9;N;;;;;
-0F2A;TIBETAN DIGIT HALF ONE;No;0;L;;;;;N;;;;;
-0F2B;TIBETAN DIGIT HALF TWO;No;0;L;;;;;N;;;;;
-0F2C;TIBETAN DIGIT HALF THREE;No;0;L;;;;;N;;;;;
-0F2D;TIBETAN DIGIT HALF FOUR;No;0;L;;;;;N;;;;;
-0F2E;TIBETAN DIGIT HALF FIVE;No;0;L;;;;;N;;;;;
-0F2F;TIBETAN DIGIT HALF SIX;No;0;L;;;;;N;;;;;
-0F30;TIBETAN DIGIT HALF SEVEN;No;0;L;;;;;N;;;;;
-0F31;TIBETAN DIGIT HALF EIGHT;No;0;L;;;;;N;;;;;
-0F32;TIBETAN DIGIT HALF NINE;No;0;L;;;;;N;;;;;
-0F33;TIBETAN DIGIT HALF ZERO;No;0;L;;;;;N;;;;;
+0F2A;TIBETAN DIGIT HALF ONE;No;0;L;;;;1/2;N;;;;;
+0F2B;TIBETAN DIGIT HALF TWO;No;0;L;;;;3/2;N;;;;;
+0F2C;TIBETAN DIGIT HALF THREE;No;0;L;;;;5/2;N;;;;;
+0F2D;TIBETAN DIGIT HALF FOUR;No;0;L;;;;7/2;N;;;;;
+0F2E;TIBETAN DIGIT HALF FIVE;No;0;L;;;;9/2;N;;;;;
+0F2F;TIBETAN DIGIT HALF SIX;No;0;L;;;;11/2;N;;;;;
+0F30;TIBETAN DIGIT HALF SEVEN;No;0;L;;;;13/2;N;;;;;
+0F31;TIBETAN DIGIT HALF EIGHT;No;0;L;;;;15/2;N;;;;;
+0F32;TIBETAN DIGIT HALF NINE;No;0;L;;;;17/2;N;;;;;
+0F33;TIBETAN DIGIT HALF ZERO;No;0;L;;;;-1/2;N;;;;;
0F34;TIBETAN MARK BSDUS RTAGS;So;0;L;;;;;N;;du ta;;;
0F35;TIBETAN MARK NGAS BZUNG NYI ZLA;Mn;220;NSM;;;;;N;TIBETAN HONORIFIC UNDER RING;nge zung nyi da;;;
0F36;TIBETAN MARK CARET -DZUD RTAGS BZHI MIG CAN;So;0;L;;;;;N;;dzu ta shi mig chen;;;
0FCA;TIBETAN SYMBOL NOR BU NYIS -KHYIL;So;0;L;;;;;N;;norbu nyi khyi;;;
0FCB;TIBETAN SYMBOL NOR BU GSUM -KHYIL;So;0;L;;;;;N;;norbu sum khyi;;;
0FCC;TIBETAN SYMBOL NOR BU BZHI -KHYIL;So;0;L;;;;;N;;norbu shi khyi;;;
-0FCF;TIBETAN SIGN RDEL NAG GSUM;So;0;L;;;;;N;;;;;
+0FCF;TIBETAN SIGN RDEL NAG GSUM;So;0;L;;;;;N;;dena sum;;;
1000;MYANMAR LETTER KA;Lo;0;L;;;;;N;;;;;
1001;MYANMAR LETTER KHA;Lo;0;L;;;;;N;;;;;
1002;MYANMAR LETTER GA;Lo;0;L;;;;;N;;;;;
318E;HANGUL LETTER ARAEAE;Lo;0;L;<compat> 11A1;;;;N;HANGUL LETTER ALAE AE;;;;
3190;IDEOGRAPHIC ANNOTATION LINKING MARK;So;0;L;;;;;N;KANBUN TATETEN;Kanbun Tateten;;;
3191;IDEOGRAPHIC ANNOTATION REVERSE MARK;So;0;L;;;;;N;KAERITEN RE;Kaeriten;;;
-3192;IDEOGRAPHIC ANNOTATION ONE MARK;No;0;L;<super> 4E00;;;;N;KAERITEN ITI;Kaeriten;;;
-3193;IDEOGRAPHIC ANNOTATION TWO MARK;No;0;L;<super> 4E8C;;;;N;KAERITEN NI;Kaeriten;;;
-3194;IDEOGRAPHIC ANNOTATION THREE MARK;No;0;L;<super> 4E09;;;;N;KAERITEN SAN;Kaeriten;;;
-3195;IDEOGRAPHIC ANNOTATION FOUR MARK;No;0;L;<super> 56DB;;;;N;KAERITEN SI;Kaeriten;;;
+3192;IDEOGRAPHIC ANNOTATION ONE MARK;No;0;L;<super> 4E00;;;1;N;KAERITEN ITI;Kaeriten;;;
+3193;IDEOGRAPHIC ANNOTATION TWO MARK;No;0;L;<super> 4E8C;;;2;N;KAERITEN NI;Kaeriten;;;
+3194;IDEOGRAPHIC ANNOTATION THREE MARK;No;0;L;<super> 4E09;;;3;N;KAERITEN SAN;Kaeriten;;;
+3195;IDEOGRAPHIC ANNOTATION FOUR MARK;No;0;L;<super> 56DB;;;4;N;KAERITEN SI;Kaeriten;;;
3196;IDEOGRAPHIC ANNOTATION TOP MARK;So;0;L;<super> 4E0A;;;;N;KAERITEN ZYOU;Kaeriten;;;
3197;IDEOGRAPHIC ANNOTATION MIDDLE MARK;So;0;L;<super> 4E2D;;;;N;KAERITEN TYUU;Kaeriten;;;
3198;IDEOGRAPHIC ANNOTATION BOTTOM MARK;So;0;L;<super> 4E0B;;;;N;KAERITEN GE;Kaeriten;;;
321A;PARENTHESIZED HANGUL PHIEUPH A;So;0;L;<compat> 0028 1111 1161 0029;;;;N;PARENTHESIZED HANGUL PA;;;;
321B;PARENTHESIZED HANGUL HIEUH A;So;0;L;<compat> 0028 1112 1161 0029;;;;N;PARENTHESIZED HANGUL HA;;;;
321C;PARENTHESIZED HANGUL CIEUC U;So;0;L;<compat> 0028 110C 116E 0029;;;;N;PARENTHESIZED HANGUL JU;;;;
-3220;PARENTHESIZED IDEOGRAPH ONE;No;0;L;<compat> 0028 4E00 0029;;;;N;;;;;
-3221;PARENTHESIZED IDEOGRAPH TWO;No;0;L;<compat> 0028 4E8C 0029;;;;N;;;;;
-3222;PARENTHESIZED IDEOGRAPH THREE;No;0;L;<compat> 0028 4E09 0029;;;;N;;;;;
-3223;PARENTHESIZED IDEOGRAPH FOUR;No;0;L;<compat> 0028 56DB 0029;;;;N;;;;;
-3224;PARENTHESIZED IDEOGRAPH FIVE;No;0;L;<compat> 0028 4E94 0029;;;;N;;;;;
-3225;PARENTHESIZED IDEOGRAPH SIX;No;0;L;<compat> 0028 516D 0029;;;;N;;;;;
-3226;PARENTHESIZED IDEOGRAPH SEVEN;No;0;L;<compat> 0028 4E03 0029;;;;N;;;;;
-3227;PARENTHESIZED IDEOGRAPH EIGHT;No;0;L;<compat> 0028 516B 0029;;;;N;;;;;
-3228;PARENTHESIZED IDEOGRAPH NINE;No;0;L;<compat> 0028 4E5D 0029;;;;N;;;;;
-3229;PARENTHESIZED IDEOGRAPH TEN;No;0;L;<compat> 0028 5341 0029;;;;N;;;;;
+3220;PARENTHESIZED IDEOGRAPH ONE;No;0;L;<compat> 0028 4E00 0029;;;1;N;;;;;
+3221;PARENTHESIZED IDEOGRAPH TWO;No;0;L;<compat> 0028 4E8C 0029;;;2;N;;;;;
+3222;PARENTHESIZED IDEOGRAPH THREE;No;0;L;<compat> 0028 4E09 0029;;;3;N;;;;;
+3223;PARENTHESIZED IDEOGRAPH FOUR;No;0;L;<compat> 0028 56DB 0029;;;4;N;;;;;
+3224;PARENTHESIZED IDEOGRAPH FIVE;No;0;L;<compat> 0028 4E94 0029;;;5;N;;;;;
+3225;PARENTHESIZED IDEOGRAPH SIX;No;0;L;<compat> 0028 516D 0029;;;6;N;;;;;
+3226;PARENTHESIZED IDEOGRAPH SEVEN;No;0;L;<compat> 0028 4E03 0029;;;7;N;;;;;
+3227;PARENTHESIZED IDEOGRAPH EIGHT;No;0;L;<compat> 0028 516B 0029;;;8;N;;;;;
+3228;PARENTHESIZED IDEOGRAPH NINE;No;0;L;<compat> 0028 4E5D 0029;;;9;N;;;;;
+3229;PARENTHESIZED IDEOGRAPH TEN;No;0;L;<compat> 0028 5341 0029;;;10;N;;;;;
322A;PARENTHESIZED IDEOGRAPH MOON;So;0;L;<compat> 0028 6708 0029;;;;N;;;;;
322B;PARENTHESIZED IDEOGRAPH FIRE;So;0;L;<compat> 0028 706B 0029;;;;N;;;;;
322C;PARENTHESIZED IDEOGRAPH WATER;So;0;L;<compat> 0028 6C34 0029;;;;N;;;;;
FFFB;INTERLINEAR ANNOTATION TERMINATOR;Cf;0;BN;;;;;N;;;;;
FFFC;OBJECT REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
+F0000;<Plane 15 Private Use, First>;Co;0;L;;;;;N;;;;;
+FFFFD;<Plane 15 Private Use, Last>;Co;0;L;;;;;N;;;;;
+100000;<Plane 16 Private Use, First>;Co;0;L;;;;;N;;;;;
+10FFFD;<Plane 16 Private Use, Last>;Co;0;L;;;;;N;;;;;
+++ /dev/null
-<html>
-
-
-
-<head>
-
-<meta NAME="GENERATOR" CONTENT="Microsoft FrontPage 4.0">
-
-<meta HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
-
-<link REL="stylesheet" HREF="http://www.unicode.org/unicode.css" TYPE="text/css">
-
-<title>UnicodeData File Format</title>
-
-</head>
-
-
-
-<body>
-
-
-
-<h1>UnicodeData File Format<br>
-Version 3.0.0</h1>
-
-
-
-<table BORDER="1" CELLSPACING="2" CELLPADDING="0" HEIGHT="87" WIDTH="100%">
-
- <tr>
-
- <td VALIGN="TOP" width="144">Revision</td>
-
- <td VALIGN="TOP">3.0.0</td>
-
- </tr>
-
- <tr>
-
- <td VALIGN="TOP" width="144">Authors</td>
-
- <td VALIGN="TOP">Mark Davis and Ken Whistler</td>
-
- </tr>
-
- <tr>
-
- <td VALIGN="TOP" width="144">Date</td>
-
- <td VALIGN="TOP">1999-09-12</td>
-
- </tr>
-
- <tr>
-
- <td VALIGN="TOP" width="144">This Version</td>
-
- <td VALIGN="TOP"><a href="ftp://ftp.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.html">ftp://ftp.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.html</a></td>
-
- </tr>
-
- <tr>
-
- <td VALIGN="TOP" width="144">Previous Version</td>
-
- <td VALIGN="TOP">n/a</td>
-
- </tr>
-
- <tr>
-
- <td VALIGN="TOP" width="144">Latest Version</td>
-
- <td VALIGN="TOP"><a href="ftp://ftp.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.html">ftp://ftp.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.html</a></td>
-
- </tr>
-
-</table>
-
-
-
-<p align="center">Copyright © 1995-1999 Unicode, Inc. All Rights reserved.<br>
-
-<i>For more information, including Disclamer and Limitations, see <a HREF="UnicodeCharacterDatabase-3.0.0.html">UnicodeCharacterDatabase-3.0.0.html</a> </i></p>
-
-
-
-<p>This document describes the format of the UnicodeData.txt file, which is one of the
-
-files in the Unicode Character Database. The document is divided into the following
-
-sections:
-
-
-
-<ul>
-
- <li><a HREF="#Field Formats">Field Formats</a> <ul>
-
- <li><a HREF="#General Category">General Category</a> </li>
-
- <li><a HREF="#Bidirectional Category">Bidirectional Category</a> </li>
-
- <li><a HREF="#Character Decomposition">Character Decomposition Mapping</a> </li>
-
- <li><a HREF="#Canonical Combining Classes">Canonical Combining Classes</a> </li>
-
- <li><a HREF="#Decompositions and Normalization">Decompositions and Normalization</a> </li>
-
- <li><a HREF="#Case Mappings">Case Mappings</a> </li>
-
- </ul>
-
- </li>
-
- <li><a HREF="#Property Invariants">Property Invariants</a> </li>
-
- <li><a HREF="#Modification History">Modification History</a> </li>
-
-</ul>
-
-
-
-<p><b>Warning: </b>the information in this file does not completely describe the use and
-
-interpretation of Unicode character properties and behavior. It must be used in
-
-conjunction with the data in the other files in the Unicode Character Database, and relies
-
-on the notation and definitions supplied in <i><a href="http://www.unicode.org/unicode/standard/versions/Unicode3.0.html"> The Unicode
-Standard</a></i>. All chapter references
-
-are to Version 3.0 of the standard.</p>
-
-
-
-<h2><a NAME="Field Formats"></a>Field Formats</h2>
-
-
-
-<p>The file consists of lines containing fields terminated by semicolons. Each line
-
-represents the data for one encoded character in the Unicode Standard. Every encoded
-
-character has a data entry, with the exception of certain special ranges, as detailed
-
-below.
-
-
-
-<ul>
-
- <li>There are six special ranges of characters that are represented only by their start and
-
- end characters, since the properties in the file are uniform, except for code values
-
- (which are all sequential and assigned). </li>
-
- <li>The names of CJK ideograph characters and the names and decompositions of Hangul
-
- syllable characters are algorithmically derivable. (See the Unicode Standard and <a
-
- HREF="http://www.unicode.org/unicode/reports/tr15/">Unicode Technical Report #15</a> for
-
- more information). </li>
-
- <li>Surrogate code values and private use characters have no names. </li>
-
- <li>The Private Use character outside of the BMP (U+F0000..U+FFFFD, U+100000..U+10FFFD) are
-
- not listed. These correspond to surrogate pairs where the first surrogate is in the High
-
- Surrogate Private Use section. </li>
-
-</ul>
-
-
-
-<p>The exact ranges represented by start and end characters are:
-
-
-
-<ul>
-
- <li>CJK Ideographs Extension A (U+3400 - U+4DB5) </li>
-
- <li>CJK Ideographs (U+4E00 - U+9FA5) </li>
-
- <li>Hangul Syllables (U+AC00 - U+D7A3) </li>
-
- <li>Non-Private Use High Surrogates (U+D800 - U+DB7F) </li>
-
- <li>Private Use High Surrogates (U+DB80 - U+DBFF) </li>
-
- <li>Low Surrogates (U+DC00 - U+DFFF) </li>
-
- <li>The Private Use Area (U+E000 - U+F8FF) </li>
-
-</ul>
-
-
-
-<p>The following table describes the format and meaning of each field in a data entry in
-
-the UnicodeData file. Fields which contain normative information are so indicated.</p>
-
-
-
-<table BORDER="1" CELLSPACING="2" CELLPADDING="2">
-
- <tr>
-
- <th VALIGN="top" ALIGN="LEFT"><p ALIGN="LEFT">Field</th>
-
- <th VALIGN="top" ALIGN="LEFT"><p ALIGN="LEFT">Name</th>
-
- <th VALIGN="top" ALIGN="LEFT"><p ALIGN="LEFT">Status</th>
-
- <th VALIGN="top" ALIGN="LEFT"><p ALIGN="LEFT">Explanation</th>
-
- </tr>
-
- <tr>
-
- <th VALIGN="top">0</th>
-
- <td VALIGN="top">Code value</td>
-
- <td VALIGN="top">normative</td>
-
- <td VALIGN="top">Code value in 4-digit hexadecimal format.</td>
-
- </tr>
-
- <tr>
-
- <th VALIGN="top">1</th>
-
- <td VALIGN="top">Character name</td>
-
- <td VALIGN="top">normative</td>
-
- <td VALIGN="top">These names match exactly the names published in Chapter 14 of the
-
- Unicode Standard, Version 3.0.</td>
-
- </tr>
-
- <tr>
-
- <th VALIGN="top">2</th>
-
- <td VALIGN="top"><a HREF="#General Category">General Category</a> </td>
-
- <td VALIGN="top">normative / informative<br>
-
- (see below)</td>
-
- <td VALIGN="top">This is a useful breakdown into various "character types" which
-
- can be used as a default categorization in implementations. See below for a brief
-
- explanation.</td>
-
- </tr>
-
- <tr>
-
- <th VALIGN="top">3</th>
-
- <td VALIGN="top"><a HREF="#Canonical Combining Classes">Canonical Combining Classes</a> </td>
-
- <td VALIGN="top">normative</td>
-
- <td VALIGN="top">The classes used for the Canonical Ordering Algorithm in the Unicode
-
- Standard. These classes are also printed in Chapter 4 of the Unicode Standard.</td>
-
- </tr>
-
- <tr>
-
- <th VALIGN="top">4</th>
-
- <td VALIGN="top"><a HREF="#Bidirectional Category">Bidirectional Category</a> </td>
-
- <td VALIGN="top">normative</td>
-
- <td VALIGN="top">See the list below for an explanation of the abbreviations used in this
-
- field. These are the categories required by the Bidirectional Behavior Algorithm in the
-
- Unicode Standard. These categories are summarized in Chapter 3 of the Unicode Standard.</td>
-
- </tr>
-
- <tr>
-
- <th VALIGN="top">5</th>
-
- <td VALIGN="top"><a HREF="#Character Decomposition">Character Decomposition
- Mapping</a></td>
-
- <td VALIGN="top">normative</td>
-
- <td VALIGN="top">In the Unicode Standard, not all of the mappings are full (maximal)
-
- decompositions. Recursive application of look-up for decompositions will, in all cases,
-
- lead to a maximal decomposition. The decomposition mappings match exactly the
-
- decomposition mappings published with the character names in the Unicode Standard.</td>
-
- </tr>
-
- <tr>
-
- <th VALIGN="top">6</th>
-
- <td VALIGN="top">Decimal digit value</td>
-
- <td VALIGN="top">normative</td>
-
- <td VALIGN="top">This is a numeric field. If the character has the decimal digit property,
-
- as specified in Chapter 4 of the Unicode Standard, the value of that digit is represented
-
- with an integer value in this field</td>
-
- </tr>
-
- <tr>
-
- <th VALIGN="top">7</th>
-
- <td VALIGN="top">Digit value</td>
-
- <td VALIGN="top">normative</td>
-
- <td VALIGN="top">This is a numeric field. If the character represents a digit, not
-
- necessarily a decimal digit, the value is here. This covers digits which do not form
-
- decimal radix forms, such as the compatibility superscript digits</td>
-
- </tr>
-
- <tr>
-
- <th VALIGN="top">8</th>
-
- <td VALIGN="top">Numeric value</td>
-
- <td VALIGN="top">normative</td>
-
- <td VALIGN="top">This is a numeric field. If the character has the numeric property, as
-
- specified in Chapter 4 of the Unicode Standard, the value of that character is represented
-
- with an integer or rational number in this field. This includes fractions as, e.g.,
-
- "1/5" for U+2155 VULGAR FRACTION ONE FIFTH Also included are numerical values
-
- for compatibility characters such as circled numbers.</td>
-
- </tr>
-
- <tr>
-
- <th VALIGN="top">8</th>
-
- <td VALIGN="top">Mirrored</td>
-
- <td VALIGN="top">normative</td>
-
- <td VALIGN="top">If the character has been identified as a "mirrored" character
-
- in bidirectional text, this field has the value "Y"; otherwise "N".
-
- The list of mirrored characters is also printed in Chapter 4 of the Unicode Standard.</td>
-
- </tr>
-
- <tr>
-
- <th VALIGN="top">10</th>
-
- <td VALIGN="top">Unicode 1.0 Name</td>
-
- <td VALIGN="top">informative</td>
-
- <td VALIGN="top">This is the old name as published in Unicode 1.0. This name is only
-
- provided when it is significantly different from the Unicode 3.0 name for the character.</td>
-
- </tr>
-
- <tr>
-
- <th VALIGN="top">11</th>
-
- <td VALIGN="top">10646 comment field</td>
-
- <td VALIGN="top">informative</td>
-
- <td VALIGN="top">This is the ISO 10646 comment field. It is in parantheses in the 10646
-
- names list.</td>
-
- </tr>
-
- <tr>
-
- <th VALIGN="top">12</th>
-
- <td VALIGN="top"><a HREF="#Case Mappings">Uppercase Mapping</a></td>
-
- <td VALIGN="top">informative</td>
-
- <td VALIGN="top">Upper case equivalent mapping. If a character is part of an alphabet with
-
- case distinctions, and has an upper case equivalent, then the upper case equivalent is in
-
- this field. See the explanation below on case distinctions. These mappings are always
-
- one-to-one, not one-to-many or many-to-one. This field is informative.</td>
-
- </tr>
-
- <tr>
-
- <th VALIGN="top">13</th>
-
- <td VALIGN="top"><a HREF="#Case Mappings">Lowercase Mapping</a></td>
-
- <td VALIGN="top">informative</td>
-
- <td VALIGN="top">Similar to Uppercase mapping</td>
-
- </tr>
-
- <tr>
-
- <th VALIGN="top">14</th>
-
- <td VALIGN="top"><a HREF="#Case Mappings">Titlecase Mapping</a></td>
-
- <td VALIGN="top">informative</td>
-
- <td VALIGN="top">Similar to Uppercase mapping</td>
-
- </tr>
-
-</table>
-
-
-
-<h3><a NAME="General Category"></a>General Category</h3>
-
-
-
-<p>The values in this field are abbreviations for the following. Some of the values are
-
-normative, and some are informative. For more information, see the Unicode Standard.</p>
-
-
-
-<p><b>Note:</b> the standard does not assign information to control characters (except for
-
-certain cases in the Bidirectional Algorithm). Implementations will generally also assign
-
-categories to certain control characters, notably CR and LF, according to platform
-
-conventions.</p>
-
-
-
-<h4>Normative Categories</h4>
-
-
-
-<table BORDER="0" CELLSPACING="2" CELLPADDING="0">
-
- <tr>
-
- <th><p ALIGN="LEFT">Abbr.</th>
-
- <th><p ALIGN="LEFT">Description</th>
-
- </tr>
-
- <tr>
-
- <td ALIGN="CENTER">Lu</td>
-
- <td>Letter, Uppercase</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="CENTER">Ll</td>
-
- <td>Letter, Lowercase</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="CENTER">Lt</td>
-
- <td>Letter, Titlecase</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="CENTER">Mn</td>
-
- <td>Mark, Non-Spacing</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="CENTER">Mc</td>
-
- <td>Mark, Spacing Combining</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="CENTER">Me</td>
-
- <td>Mark, Enclosing</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="CENTER">Nd</td>
-
- <td>Number, Decimal Digit</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="CENTER">Nl</td>
-
- <td>Number, Letter</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="CENTER">No</td>
-
- <td>Number, Other</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="CENTER">Zs</td>
-
- <td>Separator, Space</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="CENTER">Zl</td>
-
- <td>Separator, Line</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="CENTER">Zp</td>
-
- <td>Separator, Paragraph</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="CENTER">Cc</td>
-
- <td>Other, Control</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="CENTER">Cf</td>
-
- <td>Other, Format</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="CENTER">Cs</td>
-
- <td>Other, Surrogate</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="CENTER">Co</td>
-
- <td>Other, Private Use</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="CENTER">Cn</td>
-
- <td>Other, Not Assigned (no characters in the file have this property)</td>
-
- </tr>
-
-</table>
-
-
-
-<h4>Informative Categories</h4>
-
-
-
-<table BORDER="0" CELLSPACING="2" CELLPADDING="0">
-
- <tr>
-
- <th><p ALIGN="LEFT">Abbr.</th>
-
- <th><p ALIGN="LEFT">Description</th>
-
- </tr>
-
- <tr>
-
- <td ALIGN="CENTER">Lm</td>
-
- <td>Letter, Modifier</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="CENTER">Lo</td>
-
- <td>Letter, Other</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="CENTER">Pc</td>
-
- <td>Punctuation, Connector</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="CENTER">Pd</td>
-
- <td>Punctuation, Dash</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="CENTER">Ps</td>
-
- <td>Punctuation, Open</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="CENTER">Pe</td>
-
- <td>Punctuation, Close</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="CENTER">Pi</td>
-
- <td>Punctuation, Initial quote (may behave like Ps or Pe depending on usage)</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="CENTER">Pf</td>
-
- <td>Punctuation, Final quote (may behave like Ps or Pe depending on usage)</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="CENTER">Po</td>
-
- <td>Punctuation, Other</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="CENTER">Sm</td>
-
- <td>Symbol, Math</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="CENTER">Sc</td>
-
- <td>Symbol, Currency</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="CENTER">Sk</td>
-
- <td>Symbol, Modifier</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="CENTER">So</td>
-
- <td>Symbol, Other</td>
-
- </tr>
-
-</table>
-
-
-
-<h3><a NAME="Bidirectional Category"></a>Bidirectional Category</h3>
-
-
-
-<p>Please refer to Chapter 3 for an explanation of the algorithm for Bidirectional
-
-Behavior and an explanation of the significance of these categories. An up-to-date version
-
-can be found on <a HREF="http://www.unicode.org/unicode/reports/tr9/">Unicode Technical
-
-Report #9: The Bidirectional Algorithm</a>. These values are normative.</p>
-
-
-
-<table BORDER="0" CELLPADDING="2">
-
- <tr>
-
- <th VALIGN="TOP" ALIGN="LEFT"><p ALIGN="LEFT">Type</th>
-
- <th VALIGN="TOP" ALIGN="LEFT"><p ALIGN="LEFT">Description</th>
-
- </tr>
-
- <tr>
-
- <td VALIGN="TOP"><b>L</b></td>
-
- <td VALIGN="TOP">Left-to-Right</td>
-
- </tr>
-
- <tr>
-
- <td VALIGN="TOP"><b>LRE</b></td>
-
- <td VALIGN="TOP">Left-to-Right Embedding</td>
-
- </tr>
-
- <tr>
-
- <td VALIGN="TOP"><b>LRO</b></td>
-
- <td VALIGN="TOP">Left-to-Right Override</td>
-
- </tr>
-
- <tr>
-
- <td VALIGN="TOP"><b>R</b></td>
-
- <td VALIGN="TOP">Right-to-Left</td>
-
- </tr>
-
- <tr>
-
- <td VALIGN="TOP"><b>AL</b></td>
-
- <td VALIGN="TOP">Right-to-Left Arabic</td>
-
- </tr>
-
- <tr>
-
- <td VALIGN="TOP"><b>RLE</b></td>
-
- <td VALIGN="TOP">Right-to-Left Embedding</td>
-
- </tr>
-
- <tr>
-
- <td VALIGN="TOP"><b>RLO</b></td>
-
- <td VALIGN="TOP">Right-to-Left Override</td>
-
- </tr>
-
- <tr>
-
- <td VALIGN="TOP"><b>PDF</b></td>
-
- <td VALIGN="TOP">Pop Directional Format</td>
-
- </tr>
-
- <tr>
-
- <td VALIGN="TOP"><b>EN</b></td>
-
- <td VALIGN="TOP">European Number</td>
-
- </tr>
-
- <tr>
-
- <td VALIGN="TOP"><b>ES</b></td>
-
- <td VALIGN="TOP">European Number Separator</td>
-
- </tr>
-
- <tr>
-
- <td VALIGN="TOP"><b>ET</b></td>
-
- <td VALIGN="TOP">European Number Terminator</td>
-
- </tr>
-
- <tr>
-
- <td VALIGN="TOP"><b>AN</b></td>
-
- <td VALIGN="TOP">Arabic Number</td>
-
- </tr>
-
- <tr>
-
- <td VALIGN="TOP"><b>CS</b></td>
-
- <td VALIGN="TOP">Common Number Separator</td>
-
- </tr>
-
- <tr>
-
- <td VALIGN="TOP"><b>NSM</b></td>
-
- <td VALIGN="TOP">Non-Spacing Mark</td>
-
- </tr>
-
- <tr>
-
- <td VALIGN="TOP"><b>BN</b></td>
-
- <td VALIGN="TOP">Boundary Neutral</td>
-
- </tr>
-
- <tr>
-
- <td VALIGN="TOP"><b>B</b></td>
-
- <td VALIGN="TOP">Paragraph Separator</td>
-
- </tr>
-
- <tr>
-
- <td VALIGN="TOP"><b>S</b></td>
-
- <td VALIGN="TOP">Segment Separator</td>
-
- </tr>
-
- <tr>
-
- <td VALIGN="TOP"><b>WS</b></td>
-
- <td VALIGN="TOP">Whitespace</td>
-
- </tr>
-
- <tr>
-
- <td VALIGN="TOP"><b>ON</b></td>
-
- <td VALIGN="TOP">Other Neutrals</td>
-
- </tr>
-
-</table>
-
-
-
-<h3><a NAME="Character Decomposition"></a>Character Decomposition Mapping</h3>
-
-
-
-<p>The decomposition is a normative property of a character. The tags supplied with
-
-certain decomposition mappings generally indicate formatting information. Where no such
-
-tag is given, the mapping is designated as canonical. Conversely, the presence of a
-
-formatting tag also indicates that the mapping is a compatibility mapping and not a
-
-canonical mapping. In the absence of other formatting information in a compatibility
-
-mapping, the tag is used to distinguish it from canonical mappings.</p>
-
-
-
-<p>In some instances a canonical mapping or a compatibility mapping may consist of a
-
-single character. For a canonical mapping, this indicates that the character is a
-
-canonical equivalent of another single character. For a compatibility mapping, this
-
-indicates that the character is a compatibility equivalent of another single character.
-
-The compatibility formatting tags used are:</p>
-
-
-
-<table BORDER="0" CELLSPACING="2" CELLPADDING="0">
-
- <tr>
-
- <th>Tag</th>
-
- <th><p ALIGN="LEFT">Description</th>
-
- </tr>
-
- <tr>
-
- <td ALIGN="CENTER"><font> </td>
-
- <td>A font variant (e.g. a blackletter form).</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="CENTER"><noBreak> </td>
-
- <td>A no-break version of a space or hyphen.</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="CENTER"><initial> </td>
-
- <td>An initial presentation form (Arabic).</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="CENTER"><medial> </td>
-
- <td>A medial presentation form (Arabic).</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="CENTER"><final> </td>
-
- <td>A final presentation form (Arabic).</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="CENTER"><isolated> </td>
-
- <td>An isolated presentation form (Arabic).</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="CENTER"><circle> </td>
-
- <td>An encircled form.</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="CENTER"><super> </td>
-
- <td>A superscript form.</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="CENTER"><sub> </td>
-
- <td>A subscript form.</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="CENTER"><vertical> </td>
-
- <td>A vertical layout presentation form.</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="CENTER"><wide> </td>
-
- <td>A wide (or zenkaku) compatibility character.</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="CENTER"><narrow> </td>
-
- <td>A narrow (or hankaku) compatibility character.</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="CENTER"><small> </td>
-
- <td>A small variant form (CNS compatibility).</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="CENTER"><square> </td>
-
- <td>A CJK squared font variant.</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="CENTER"><fraction> </td>
-
- <td>A vulgar fraction form.</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="CENTER"><compat> </td>
-
- <td>Otherwise unspecified compatibility character.</td>
-
- </tr>
-
-</table>
-
-
-
-<p><b>Reminder: </b>There is a difference between decomposition and decomposition mapping.
-
-The decomposition mappings are defined in the UnicodeData, while the decomposition (also
-
-termed "full decomposition") is defined in Chapter 3 to use those mappings
-<i>
-
-recursively.</i>
-
-
-
-<ul>
-
- <li>The canonical decomposition is formed by recursively applying the canonical mappings,
-
- then applying the canonical reordering algorithm. </li>
-
- <li>The compatibility decomposition is formed by recursively applying the canonical <em>and</em>
-
- compatibility mappings, then applying the canonical reordering algorithm. </li>
-
-</ul>
-
-
-
-<h3><a NAME="Canonical Combining Classes"></a>Canonical Combining Classes</h3>
-
-
-
-<table BORDER="0" CELLSPACING="2" CELLPADDING="0">
-
- <tr>
-
- <th><p ALIGN="LEFT">Value</th>
-
- <th><p ALIGN="LEFT">Description</th>
-
- </tr>
-
- <tr>
-
- <td ALIGN="RIGHT">0:</td>
-
- <td>Spacing, split, enclosing, reordrant, and Tibetan subjoined</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="RIGHT">1:</td>
-
- <td>Overlays and interior</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="RIGHT">7:</td>
-
- <td>Nuktas</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="RIGHT">8:</td>
-
- <td>Hiragana/Katakana voicing marks</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="RIGHT">9:</td>
-
- <td>Viramas</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="RIGHT">10:</td>
-
- <td>Start of fixed position classes</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="RIGHT">199:</td>
-
- <td>End of fixed position classes</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="RIGHT">200:</td>
-
- <td>Below left attached</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="RIGHT">202:</td>
-
- <td>Below attached</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="RIGHT">204:</td>
-
- <td>Below right attached</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="RIGHT">208:</td>
-
- <td>Left attached (reordrant around single base character)</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="RIGHT">210:</td>
-
- <td>Right attached</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="RIGHT">212:</td>
-
- <td>Above left attached</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="RIGHT">214:</td>
-
- <td>Above attached</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="RIGHT">216:</td>
-
- <td>Above right attached</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="RIGHT">218:</td>
-
- <td>Below left</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="RIGHT">220:</td>
-
- <td>Below</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="RIGHT">222:</td>
-
- <td>Below right</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="RIGHT">224:</td>
-
- <td>Left (reordrant around single base character)</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="RIGHT">226:</td>
-
- <td>Right</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="RIGHT">228:</td>
-
- <td>Above left</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="RIGHT">230:</td>
-
- <td>Above</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="RIGHT">232:</td>
-
- <td>Above right</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="RIGHT">233:</td>
-
- <td>Double below</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="RIGHT">234:</td>
-
- <td>Double above</td>
-
- </tr>
-
- <tr>
-
- <td ALIGN="RIGHT">240:</td>
-
- <td>Below (iota subscript)</td>
-
- </tr>
-
-</table>
-
-
-
-<p><strong>Note: </strong>some of the combining classes in this list do not currently have
-
-members but are specified here for completeness.</p>
-
-
-
-<h3><a NAME="Decompositions and Normalization"></a>Decompositions and Normalization</h3>
-
-
-
-<p>Decomposition is specified in Chapter 3. <a href="http://www.unicode.org/unicode/reports/tr15/"><i>Unicode Technical Report #15:
-
-Normalization Forms</i></a> specifies the interaction between decomposition and normalization. The
-
-most up-to-date version is found on <a HREF="http://www.unicode.org/unicode/reports/tr15/">http://www.unicode.org/unicode/reports/tr15/</a>.
-
-That report specifies how the decompositions defined in UnicodeData.txt are used to derive
-
-normalized forms of Unicode text.</p>
-
-
-
-<p>Note that as of the 2.1.9 update of the Unicode Character Database, the decompositions
-
-in the UnicodeData.txt file can be used to recursively derive the full decomposition in
-
-canonical order, without the need to separately apply canonical reordering. However,
-
-canonical reordering of combining character sequences must still be applied in
-
-decomposition when normalizing source text which contains any combining marks.</p>
-
-
-
-<h3><a NAME="Case Mappings"></a>Case Mappings</h3>
-
-
-
-<p>The case mapping is an informative, default mapping. Case itself, on the other hand,
-
-has normative status. Thus, for example, 0041 LATIN CAPITAL LETTER A is normatively
-
-uppercase, but its lowercase mapping the 0061 LATIN SMALL LETTER A is informative. The
-
-reason for this is that case can be considered to be an inherent property of a particular
-
-character (and is usually, but not always, derivable from the presence of the terms
-
-"CAPITAL" or "SMALL" in the character name), but case mappings between
-
-characters are occasionally influenced by local conventions. For example, certain
-
-languages, such as Turkish, German, French, or Greek may have small deviations from the
-
-default mappings listed in UnicodeData.</p>
-
-
-
-<p>In addition to uppercase and lowercase, because of the inclusion of certain composite
-
-characters for compatibility, such as 01F1 LATIN CAPITAL LETTER DZ, there is a third case,
-
-called <i>titlecase</i>, which is used where the first letter of a word is to be
-
-capitalized (e.g. UPPERCASE, Titlecase, lowercase). An example of such a titlecase letter
-
-is 01F2 LATIN CAPITAL LETTER D WITH SMALL LETTER Z.</p>
-
-
-
-<p>The uppercase, titlecase and lowercase fields are only included for characters that
-
-have a single corresponding character of that type. Composite characters (such as
-
-"339D SQUARE CM") that do not have a single corresponding character of that type
-
-can be cased by decomposition.</p>
-
-
-
-<p>For compatibility with existing parsers, UnicodeData only contains case mappings for
-
-characters where they are one-to-one mappings; it also omits information about
-
-context-sensitive case mappings. Information about these special cases can be found in a
-
-separate data file, SpecialCasing.txt,
-
-which has been added starting with the 2.1.8 update to the Unicode data files.
-
-SpecialCasing.txt contains additional informative case mappings that are either not
-
-one-to-one or which are context-sensitive.</p>
-
-
-
-<h2><a NAME="Property Invariants"></a>Property Invariants</h2>
-
-
-
-<p>Values in UnicodeData.txt are subject to correction as errors are found; however, some
-
-characteristics of the categories themselves can be considered invariants. Applications
-
-may wish to take these invariants into account when choosing how to implement character
-
-properties. The following is a partial list of known invariants for the Unicode Character
-
-Database.</p>
-
-
-
-<h4>Database Fields</h4>
-
-
-
-<ul>
-
- <li>The number of fields in UnicodeData.txt is fixed. </li>
-
- <li>The order of the fields is also fixed. <ul>
-
- <li>Any additional information about character properties to be added in the future will
-
- appear in separate data tables, rather than being added on to the existing table or by
-
- subdivision or reinterpretation of existing fields. </li>
-
- </ul>
-
- </li>
-
-</ul>
-
-
-
-<h4>General Category</h4>
-
-
-
-<ul>
-
- <li>There will never be more than 32 General Category values. <ul>
-
- <li>It is very unlikely that the Unicode Technical Committee will subdivide the General
-
- Category partition any further, since that can cause implementations to misbehave. Because
-
- the General Category is limited to 32 values, 5 bits can be used to represent the
-
- information, and a 32-bit integer can be used as a bitmask to represent arbitrary sets of
-
- categories. </li>
-
- </ul>
-
- </li>
-
-</ul>
-
-
-
-<h4>Combining Classes</h4>
-
-
-
-<ul>
-
- <li>Combining classes are limited to the values 0 to 255. <ul>
-
- <li>In practice, there are far fewer than 256 values used. Implementations may take
-
- advantage of this fact for compression, since only the ordering of the non-zero values
-
- matters for the Canonical Reordering Algorithm. It is possible for up to 256 values to be
-
- used in the future; however, UTC decisions in the future may restrict the number of values
-
- to 128, since this has implementation advantages. [Signed bytes can be used without
-
- widening to ints in Java, for example.] </li>
-
- </ul>
-
- </li>
-
- <li>All characters other than those of General Category M* have the combining class 0. <ul>
-
- <li>Currently, all characters other than those of General Category Mn have the value 0.
-
- However, some characters of General Category Me or Mc may be given non-zero values in the
-
- future. </li>
-
- <li>The precise values above the value 0 are not invariant--only the relative ordering is
-
- considered normative. For example, it is not guaranteed in future versions that the class
-
- of U+05B4 will be precisely 14. </li>
-
- </ul>
-
- </li>
-
-</ul>
-
-
-
-<h4>Case</h4>
-
-
-
-<ul>
-
- <li>Characters of type Lu, Lt, or Ll are called <i>cased</i>. All characters with an Upper,
-
- Lower, or Titlecase mapping are cased characters. <ul>
-
- <li>However, characters with the General Categories of Lu, Ll, or Lt may not always have
-
- case mappings, and case mappings may vary by locale. (See
-
- ftp://ftp.unicode.org/Public/UNIDATA/SpecialCasing.txt). </li>
-
- </ul>
-
- </li>
-
-</ul>
-
-
-
-<h4>Canonical Decomposition</h4>
-
-
-
-<ul>
-
- <li>Canonical mappings are always in canonical order. </li>
-
- <li>Canonical mappings have only the first of a pair possibly further decomposing. </li>
-
- <li>Canonical decompositions are "transparent" to other character data: <ul>
-
- <li><tt>BIDI(a) = BIDI(principal(canonicalDecomposition(a))</tt> </li>
-
- <li><tt>Category(a) = Category(principal(canonicalDecomposition(a))</tt> </li>
-
- <li><tt>CombiningClass(a) = CombiningClass(principal(canonicalDecomposition(a))</tt><br>
-
- where principal(a) is the first character not of type Mn, or the first character if all
-
- characters are of type Mn. </li>
-
- </ul>
-
- </li>
-
- <li>However, because there are sometimes missing case pairs, and because of some legacy
-
- characters, it is only generally true that: <ul>
-
- <li><tt>upper(canonicalDecomposition(a)) = canonicalDecomposition(upper(a))</tt> </li>
-
- <li><tt>lower(canonicalDecomposition(a)) = canonicalDecomposition(lower(a))</tt> </li>
-
- <li><tt>title(canonicalDecomposition(a)) = canonicalDecomposition(title(a))</tt> </li>
-
- </ul>
-
- </li>
-
-</ul>
-
-
-
-<h2><a NAME="Modification History"></a>Modification History</h2>
-
-
-
-<p>This section provides a summary of the changes between update versions of the Unicode
-
-Standard.</p>
-
-
-
-<h3><a href="http://www.unicode.org/unicode/standard/versions/enumeratedversions.html#Unicode 3.0.0"> Unicode 3.0.0</a></h3>
-
-
-
-<p>Modifications made for Version 3.0.0 of UnicodeData.txt include many new characters and
-
-a number of property changes. These are summarized in Appendex D of <em>The Unicode
-
-Standard, Version 3.0.</em></p>
-
-
-
-<h3><a HREF="http://www.unicode.org/unicode/standard/versions/enumeratedversions.html#Unicode 2.1.9">Unicode 2.1.9</a> </h3>
-
-
-
-<p>Modifications made for Version 2.1.9 of UnicodeData.txt include:
-
-
-
-<ul>
-
- <li>Corrected combining class for U+05AE HEBREW ACCENT ZINOR. </li>
-
- <li>Corrected combining class for U+20E1 COMBINING LEFT RIGHT ARROW ABOVE </li>
-
- <li>Corrected combining class for U+0F35 and U+0F37 to 220. </li>
-
- <li>Corrected combining class for U+0F71 to 129. </li>
-
- <li>Added a decomposition for U+0F0C TIBETAN MARK DELIMITER TSHEG BSTAR. </li>
-
- <li>Added decompositions for several Greek symbol letters: U+03D0..U+03D2, U+03D5,
-
- U+03D6, U+03F0..U+03F2. </li>
-
- <li>Removed decompositions from the conjoining jamo block: U+1100..U+11F8. </li>
-
- <li>Changes to decomposition mappings for some Tibetan vowels for consistency in
-
- normalization. (U+0F71, U+0F73, U+0F77, U+0F79, U+0F81) </li>
-
- <li>Updated the decomposition mappings for several Vietnamese characters with two diacritics
-
- (U+1EAC, U+1EAD, U+1EB6, U+1EB7, U+1EC6, U+1EC7, U+1ED8, U+1ED9), so that the recursive
-
- decomposition can be generated directly in canonically reordered form (not a normative
-
- change). </li>
-
- <li>Updated the decomposition mappings for several Arabic compatibility characters involving
-
- shadda (U+FC5E..U+FC62, U+FCF2..U+FCF4), and two Latin characters (U+1E1C, U+1E1D), so
-
- that the decompositions are generated directly in canonically reordered form (not a
-
- normative change). </li>
-
- <li>Changed BIDI category for: U+00A0 NO-BREAK SPACE, U+2007 FIGURE SPACE, U+2028 LINE
-
- SEPARATOR. </li>
-
- <li>Changed BIDI category for extenders of General Category Lm: U+3005, U+3021..U+3035,
-
- U+FF9E, U+FF9F. </li>
-
- <li>Changed General Category and BIDI category for the Greek numeral signs: U+0374, U+0375. </li>
-
- <li>Corrected General Category for U+FFE8 HALFWIDTH FORMS LIGHT VERTICAL. </li>
-
- <li>Added Unicode 1.0 names for many Tibetan characters (informative). </li>
-
-</ul>
-
-
-
-<h3><a HREF="http://www.unicode.org/unicode/standard/versions/enumeratedversions.html#Unicode 2.1.8">Unicode 2.1.8</a> </h3>
-
-
-
-<p>Modifications made for Version 2.1.8 of UnicodeData.txt include:
-
-
-
-<ul>
-
- <li>Added combining class 240 for U+0345 COMBINING GREEK YPOGEGRAMMENI so that
-
- decompositions involving iota subscript are derivable directly in canonically reordered
-
- form; this also has a bearing on simplification of casing of polytonic Greek. </li>
-
- <li>Changes in decompositions related to Greek tonos. These result from the clarification
-
- that monotonic Greek "tonos" should be equated with U+0301 COMBINING ACUTE,
-
- rather than with U+030D COMBINING VERTICAL LINE ABOVE. (All Greek characters in the Greek
-
- block involving "tonos"; some Greek characters in the polytonic Greek in the
-
- 1FXX block.) </li>
-
- <li>Changed decompositions involving dialytika tonos. (U+0390, U+03B0) </li>
-
- <li>Changed ternary decompositions to binary. (U+0CCB, U+FB2C, U+FB2D) These changes
-
- simplify normalization. </li>
-
- <li>Removed canonical decomposition for Latin Candrabindu. (U+0310) </li>
-
- <li>Corrected error in canonical decomposition for U+1FF4. </li>
-
- <li>Added compatibility decompositions to clarify collation tables. (U+2100, U+2101, U+2105,
-
- U+2106, U+1E9A) </li>
-
- <li>A series of general category changes to assist the convergence of of Unicode definition
-
- of identifier with ISO TR 10176: <ul>
-
- <li>So > Lo: U+0950, U+0AD0, U+0F00, U+0F88..U+0F8B </li>
-
- <li>Po > Lo: U+0E2F, U+0EAF, U+3006 </li>
-
- <li>Lm > Sk: U+309B, U+309C </li>
-
- <li>Po > Pc: U+30FB, U+FF65 </li>
-
- <li>Ps/Pe > Mn: U+0F3E, U+0F3F </li>
-
- </ul>
-
- </li>
-
- <li>A series of bidi property changes for consistency. <ul>
-
- <li>L > ET: U+09F2, U+09F3 </li>
-
- <li>ON > L: U+3007 </li>
-
- <li>L > ON: U+0F3A..U+0F3D, U+037E, U+0387 </li>
-
- </ul>
-
- </li>
-
- <li>Add case mapping: U+01A6 <-> U+0280 </li>
-
- <li>Updated symmetric swapping value for guillemets: U+00AB, U+00BB, U+2039, U+203A. </li>
-
- <li>Changes to combining class values. Most Indic fixed position class non-spacing marks
-
- were changed to combining class 0. This fixes some inconsistencies in how canonical
-
- reordering would apply to Indic scripts, including Tibetan. Indic interacting top/bottom
-
- fixed position classes were merged into single (non-zero) classes as part of this change.
-
- Tibetan subjoined consonants are changed from combining class 6 to combining class 0. Thai
-
- pinthu (U+0E3A) moved to combining class 9. Moved two Devanagari stress marks into generic
-
- above and below combining classes (U+0951, U+0952). </li>
-
- <li>Corrected placement of semicolon near symmetric swapping field. (U+FA0E, etc., scattered
-
- positions to U+FA29) </li>
-
-</ul>
-
-
-
-<h3>Version 2.1.7</h3>
-
-
-
-<p><i>This version was for internal change tracking only, and never publicly released.</i></p>
-
-
-
-<h3>Version 2.1.6</h3>
-
-
-
-<p><i>This version was for internal change tracking only, and never publicly released.</i></p>
-
-
-
-<h3><a HREF="http://www.unicode.org/unicode/standard/versions/enumeratedversions.html#Unicode 2.1.5">Unicode 2.1.5</a> </h3>
-
-
-
-<p>Modifications made for Version 2.1.5 of UnicodeData.txt include:
-
-
-
-<ul>
-
- <li>Changed decomposition for U+FF9E and U+FF9F so that correct collation weighting will
-
- automatically result from the canonical equivalences. </li>
-
- <li>Removed canonical decompositions for U+04D4, U+04D5, U+04D8, U+04D9, U+04E0, U+04E1,
-
- U+04E8, U+04E9 (the implication being that no canonical equivalence is claimed between
-
- these 8 characters and similar Latin letters), and updated 4 canonical decompositions for
-
- U+04DB, U+04DC, U+04EA, U+04EB to reflect the implied difference in the base character. </li>
-
- <li>Added Pi, and Pf categories and assigned the relevant quotation marks to those
-
- categories, based on the Unicode Technical Corrigendum on Quotation Characters. </li>
-
- <li>Updating of many bidi properties, following the advice of the ad hoc committee on bidi,
-
- and to make the bidi properties of compatibility characters more consistent. </li>
-
- <li>Changed category of several Tibetan characters: U+0F3E, U+0F3F, U+0F88..U+0F8B to make
-
- them non-combining, reflecting the combined opinion of Tibetan experts. </li>
-
- <li>Added case mapping for U+03F2. </li>
-
- <li>Corrected case mapping for U+0275. </li>
-
- <li>Added titlecase mappings for U+03D0, U+03D1, U+03D5, U+03D6, U+03F0.. U+03F2. </li>
-
- <li>Corrected compatibility label for U+2121. </li>
-
- <li>Add specific entries for all the CJK compatibility ideographs, U+F900..U+FA2D, so the
-
- canonical decomposition for each (the URO character it is equivalent to) can be carried in
-
- the database. </li>
-
-</ul>
-
-
-
-<h3>Version 2.1.4</h3>
-
-
-
-<p><i>This version was for internal change tracking only, and never publicly released.</i></p>
-
-
-
-<h3>Version 2.1.3</h3>
-
-
-
-<p><i>This version was for internal change tracking only, and never publicly released.</i></p>
-
-
-
-<h3><a HREF="http://www.unicode.org/unicode/standard/versions/enumeratedversions.html#Unicode 2.1.2">Unicode 2.1.2</a> </h3>
-
-
-
-<p>Modifications made in updating UnicodeData.txt to Version 2.1.2 for the Unicode
-
-Standard, Version 2.1 (from Version 2.0) include:
-
-
-
-<ul>
-
- <li>Added two characters (U+20AC and U+FFFC). </li>
-
- <li>Amended bidi properties for U+0026, U+002E, U+0040, U+2007. </li>
-
- <li>Corrected case mappings for U+018E, U+019F, U+01DD, U+0258, U+0275, U+03C2, U+1E9B. </li>
-
- <li>Changed combining order class for U+0F71. </li>
-
- <li>Corrected canonical decompositions for U+0F73, U+1FBE. </li>
-
- <li>Changed decomposition for U+FB1F from compatibility to canonical. </li>
-
- <li>Added compatibility decompositions for U+FBE8, U+FBE9, U+FBF9..U+FBFB. </li>
-
- <li>Corrected compatibility decompositions for U+2469, U+246A, U+3358. </li>
-
-</ul>
-
-
-
-<h3>Version 2.1.1</h3>
-
-
-
-<p><i>This version was for internal change tracking only, and never publicly released.</i></p>
-
-
-
-<h3><a HREF="http://www.unicode.org/unicode/standard/versions/enumeratedversions.html#Unicode 2.0.0">Unicode 2.0.0</a> </h3>
-
-
-
-<p>The modifications made in updating UnicodeData.txt for the Unicode
-
-Standard, Version 2.0 include:
-
-
-
-<ul>
-
- <li>Fixed decompositions with TONOS to use correct NSM: 030D. </li>
-
- <li>Removed old Hangul Syllables; mapping to new characters are in a separate table. </li>
-
- <li>Marked compatibility decompositions with additional tags. </li>
-
- <li>Changed old tag names for clarity. </li>
-
- <li>Revision of decompositions to use first-level decomposition, instead of maximal
-
- decomposition. </li>
-
- <li>Correction of all known errors in decompositions from earlier versions. </li>
-
- <li>Added control code names (as old Unicode names). </li>
-
- <li>Added Hangul Jamo decompositions. </li>
-
- <li>Added Number category to match properties list in book. </li>
-
- <li>Fixed categories of Koranic Arabic marks. </li>
-
- <li>Fixed categories of precomposed characters to match decomposition where possible. </li>
-
- <li>Added Hebrew cantillation marks and the Tibetan script. </li>
-
- <li>Added place holders for ranges such as CJK Ideographic Area and the Private Use Area. </li>
-
- <li>Added categories Me, Sk, Pc, Nl, Cs, Cf, and rectified a number of mistakes in the
-
- database. </li>
-
-</ul>
-
-</body>
-
-</html>
-
use bytes;
-$UnicodeData = "Unicode.300";
+$UnicodeData = "Unicode.301";
$SyllableData = "syllables.txt";
-$PropData = "Props.txt";
+$PropData = "PropList.txt";
# Note: we try to keep filenames unique within first 8 chars. Using