Commit | Line | Data |
20e8a3a3 |
1 | # CompositionExclusions-5.1.0.txt |
2 | # Date: 2008-03-20, 17:45:00 PDT [KW] |
8836d2a5 |
3 | # |
98fbe989 |
4 | # This file lists the characters for the Composition Exclusion Table |
5 | # defined in UAX #15, Unicode Normalization Forms. |
d357d9fe |
6 | # |
a2bd7410 |
7 | # This file is a normative contributory data file in the |
8 | # Unicode Character Database. |
9 | # |
20e8a3a3 |
10 | # Copyright (c) 1991-2008 Unicode, Inc. |
a2bd7410 |
11 | # For terms of use, see http://www.unicode.org/terms_of_use.html |
12 | # |
d357d9fe |
13 | # For more information, see |
14 | # http://www.unicode.org/unicode/reports/tr15/#Primary Exclusion List Table |
98fbe989 |
15 | # |
16 | # For a full derivation of composition exclusions, see the derived property |
17 | # Full_Composition_Exclusion in DerivedNormalizationProps.txt |
18 | # |
d357d9fe |
19 | |
98fbe989 |
20 | # ================================================ |
d357d9fe |
21 | # (1) Script Specifics |
98fbe989 |
22 | # |
23 | # This list of characters cannot be derived from the UnicodeData.txt file. |
822ebcc8 |
24 | # ================================================ |
25 | |
26 | 0958 # DEVANAGARI LETTER QA |
27 | 0959 # DEVANAGARI LETTER KHHA |
28 | 095A # DEVANAGARI LETTER GHHA |
29 | 095B # DEVANAGARI LETTER ZA |
30 | 095C # DEVANAGARI LETTER DDDHA |
31 | 095D # DEVANAGARI LETTER RHA |
32 | 095E # DEVANAGARI LETTER FA |
33 | 095F # DEVANAGARI LETTER YYA |
34 | 09DC # BENGALI LETTER RRA |
35 | 09DD # BENGALI LETTER RHA |
36 | 09DF # BENGALI LETTER YYA |
37 | 0A33 # GURMUKHI LETTER LLA |
38 | 0A36 # GURMUKHI LETTER SHA |
39 | 0A59 # GURMUKHI LETTER KHHA |
40 | 0A5A # GURMUKHI LETTER GHHA |
41 | 0A5B # GURMUKHI LETTER ZA |
42 | 0A5E # GURMUKHI LETTER FA |
43 | 0B5C # ORIYA LETTER RRA |
44 | 0B5D # ORIYA LETTER RHA |
45 | 0F43 # TIBETAN LETTER GHA |
46 | 0F4D # TIBETAN LETTER DDHA |
47 | 0F52 # TIBETAN LETTER DHA |
48 | 0F57 # TIBETAN LETTER BHA |
49 | 0F5C # TIBETAN LETTER DZHA |
50 | 0F69 # TIBETAN LETTER KSSA |
51 | 0F76 # TIBETAN VOWEL SIGN VOCALIC R |
52 | 0F78 # TIBETAN VOWEL SIGN VOCALIC L |
53 | 0F93 # TIBETAN SUBJOINED LETTER GHA |
54 | 0F9D # TIBETAN SUBJOINED LETTER DDHA |
55 | 0FA2 # TIBETAN SUBJOINED LETTER DHA |
56 | 0FA7 # TIBETAN SUBJOINED LETTER BHA |
57 | 0FAC # TIBETAN SUBJOINED LETTER DZHA |
58 | 0FB9 # TIBETAN SUBJOINED LETTER KSSA |
59 | FB1D # HEBREW LETTER YOD WITH HIRIQ |
60 | FB1F # HEBREW LIGATURE YIDDISH YOD YOD PATAH |
61 | FB2A # HEBREW LETTER SHIN WITH SHIN DOT |
62 | FB2B # HEBREW LETTER SHIN WITH SIN DOT |
63 | FB2C # HEBREW LETTER SHIN WITH DAGESH AND SHIN DOT |
64 | FB2D # HEBREW LETTER SHIN WITH DAGESH AND SIN DOT |
65 | FB2E # HEBREW LETTER ALEF WITH PATAH |
66 | FB2F # HEBREW LETTER ALEF WITH QAMATS |
67 | FB30 # HEBREW LETTER ALEF WITH MAPIQ |
68 | FB31 # HEBREW LETTER BET WITH DAGESH |
69 | FB32 # HEBREW LETTER GIMEL WITH DAGESH |
70 | FB33 # HEBREW LETTER DALET WITH DAGESH |
71 | FB34 # HEBREW LETTER HE WITH MAPIQ |
72 | FB35 # HEBREW LETTER VAV WITH DAGESH |
73 | FB36 # HEBREW LETTER ZAYIN WITH DAGESH |
74 | FB38 # HEBREW LETTER TET WITH DAGESH |
75 | FB39 # HEBREW LETTER YOD WITH DAGESH |
76 | FB3A # HEBREW LETTER FINAL KAF WITH DAGESH |
77 | FB3B # HEBREW LETTER KAF WITH DAGESH |
78 | FB3C # HEBREW LETTER LAMED WITH DAGESH |
79 | FB3E # HEBREW LETTER MEM WITH DAGESH |
80 | FB40 # HEBREW LETTER NUN WITH DAGESH |
81 | FB41 # HEBREW LETTER SAMEKH WITH DAGESH |
82 | FB43 # HEBREW LETTER FINAL PE WITH DAGESH |
83 | FB44 # HEBREW LETTER PE WITH DAGESH |
84 | FB46 # HEBREW LETTER TSADI WITH DAGESH |
85 | FB47 # HEBREW LETTER QOF WITH DAGESH |
86 | FB48 # HEBREW LETTER RESH WITH DAGESH |
87 | FB49 # HEBREW LETTER SHIN WITH DAGESH |
88 | FB4A # HEBREW LETTER TAV WITH DAGESH |
89 | FB4B # HEBREW LETTER VAV WITH HOLAM |
90 | FB4C # HEBREW LETTER BET WITH RAFE |
91 | FB4D # HEBREW LETTER KAF WITH RAFE |
92 | FB4E # HEBREW LETTER PE WITH RAFE |
d357d9fe |
93 | |
822ebcc8 |
94 | # Total code points: 67 |
d357d9fe |
95 | |
822ebcc8 |
96 | # ================================================ |
190eec7c |
97 | # (2) Post Composition Version precomposed characters |
98fbe989 |
98 | # |
822ebcc8 |
99 | # These characters cannot be derived solely from the UnicodeData.txt file |
100 | # in this version of Unicode. |
98fbe989 |
101 | # |
102 | # Note that characters added to the standard after the |
103 | # Composition Version and which have canonical decomposition mappings |
104 | # are not automatically added to this list of Post Composition |
105 | # Version precomposed characters. |
822ebcc8 |
106 | # ================================================ |
107 | |
108 | 2ADC # FORKING |
109 | 1D15E # MUSICAL SYMBOL HALF NOTE |
110 | 1D15F # MUSICAL SYMBOL QUARTER NOTE |
111 | 1D160 # MUSICAL SYMBOL EIGHTH NOTE |
112 | 1D161 # MUSICAL SYMBOL SIXTEENTH NOTE |
113 | 1D162 # MUSICAL SYMBOL THIRTY-SECOND NOTE |
114 | 1D163 # MUSICAL SYMBOL SIXTY-FOURTH NOTE |
115 | 1D164 # MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE |
116 | 1D1BB # MUSICAL SYMBOL MINIMA |
117 | 1D1BC # MUSICAL SYMBOL MINIMA BLACK |
118 | 1D1BD # MUSICAL SYMBOL SEMIMINIMA WHITE |
119 | 1D1BE # MUSICAL SYMBOL SEMIMINIMA BLACK |
120 | 1D1BF # MUSICAL SYMBOL FUSA WHITE |
121 | 1D1C0 # MUSICAL SYMBOL FUSA BLACK |
190eec7c |
122 | |
822ebcc8 |
123 | # Total code points: 14 |
d357d9fe |
124 | |
822ebcc8 |
125 | # ================================================ |
d357d9fe |
126 | # (3) Singleton Decompositions |
98fbe989 |
127 | # |
128 | # These characters can be derived from the UnicodeData.txt file |
d357d9fe |
129 | # by including all characters whose canonical decomposition |
130 | # consists of a single character. |
98fbe989 |
131 | # |
d357d9fe |
132 | # These characters are simply quoted here for reference. |
98fbe989 |
133 | # See also Full_Composition_Exclusion in DerivedNormalizationProps.txt |
822ebcc8 |
134 | # ================================================ |
d357d9fe |
135 | |
822ebcc8 |
136 | # 0340..0341 [2] COMBINING GRAVE TONE MARK..COMBINING ACUTE TONE MARK |
137 | # 0343 COMBINING GREEK KORONIS |
138 | # 0374 GREEK NUMERAL SIGN |
139 | # 037E GREEK QUESTION MARK |
140 | # 0387 GREEK ANO TELEIA |
141 | # 1F71 GREEK SMALL LETTER ALPHA WITH OXIA |
142 | # 1F73 GREEK SMALL LETTER EPSILON WITH OXIA |
143 | # 1F75 GREEK SMALL LETTER ETA WITH OXIA |
144 | # 1F77 GREEK SMALL LETTER IOTA WITH OXIA |
145 | # 1F79 GREEK SMALL LETTER OMICRON WITH OXIA |
146 | # 1F7B GREEK SMALL LETTER UPSILON WITH OXIA |
147 | # 1F7D GREEK SMALL LETTER OMEGA WITH OXIA |
148 | # 1FBB GREEK CAPITAL LETTER ALPHA WITH OXIA |
149 | # 1FBE GREEK PROSGEGRAMMENI |
150 | # 1FC9 GREEK CAPITAL LETTER EPSILON WITH OXIA |
151 | # 1FCB GREEK CAPITAL LETTER ETA WITH OXIA |
152 | # 1FD3 GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA |
153 | # 1FDB GREEK CAPITAL LETTER IOTA WITH OXIA |
154 | # 1FE3 GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA |
155 | # 1FEB GREEK CAPITAL LETTER UPSILON WITH OXIA |
156 | # 1FEE..1FEF [2] GREEK DIALYTIKA AND OXIA..GREEK VARIA |
157 | # 1FF9 GREEK CAPITAL LETTER OMICRON WITH OXIA |
158 | # 1FFB GREEK CAPITAL LETTER OMEGA WITH OXIA |
159 | # 1FFD GREEK OXIA |
160 | # 2000..2001 [2] EN QUAD..EM QUAD |
161 | # 2126 OHM SIGN |
162 | # 212A..212B [2] KELVIN SIGN..ANGSTROM SIGN |
163 | # 2329 LEFT-POINTING ANGLE BRACKET |
164 | # 232A RIGHT-POINTING ANGLE BRACKET |
165 | # F900..FA0D [270] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA0D |
166 | # FA10 CJK COMPATIBILITY IDEOGRAPH-FA10 |
167 | # FA12 CJK COMPATIBILITY IDEOGRAPH-FA12 |
168 | # FA15..FA1E [10] CJK COMPATIBILITY IDEOGRAPH-FA15..CJK COMPATIBILITY IDEOGRAPH-FA1E |
169 | # FA20 CJK COMPATIBILITY IDEOGRAPH-FA20 |
170 | # FA22 CJK COMPATIBILITY IDEOGRAPH-FA22 |
171 | # FA25..FA26 [2] CJK COMPATIBILITY IDEOGRAPH-FA25..CJK COMPATIBILITY IDEOGRAPH-FA26 |
172 | # FA2A..FA2D [4] CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA2D |
173 | # FA30..FA6A [59] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6A |
a2bd7410 |
174 | # FA70..FAD9 [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 |
822ebcc8 |
175 | # 2F800..2FA1D [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D |
d357d9fe |
176 | |
20e8a3a3 |
177 | # Total code points: 1030 |
822ebcc8 |
178 | |
179 | # ================================================ |
d357d9fe |
180 | # (4) Non-Starter Decompositions |
98fbe989 |
181 | # |
d357d9fe |
182 | # These characters can be derived from the UnicodeData file |
183 | # by including all characters whose canonical decomposition consists |
8836d2a5 |
184 | # of a sequence of characters, the first of which has a non-zero |
185 | # combining class. |
98fbe989 |
186 | # |
d357d9fe |
187 | # These characters are simply quoted here for reference. |
98fbe989 |
188 | # See also Full_Composition_Exclusion in DerivedNormalizationProps.txt |
822ebcc8 |
189 | # ================================================ |
190 | |
191 | # 0344 COMBINING GREEK DIALYTIKA TONOS |
192 | # 0F73 TIBETAN VOWEL SIGN II |
193 | # 0F75 TIBETAN VOWEL SIGN UU |
194 | # 0F81 TIBETAN VOWEL SIGN REVERSED II |
195 | |
196 | # Total code points: 4 |
d357d9fe |
197 | |