1 # PropertyValueAliases-5.0.0.txt
2 # Date: 2006-03-03, 08:23:34 GMT [MD]
4 # Unicode Character Database
5 # Copyright (c) 1991-2006 Unicode, Inc.
6 # For terms of use, see http://www.unicode.org/terms_of_use.html
7 # For documentation, see UCD.html
9 # This file contains aliases for property values used in the UCD.
10 # These names can be used for XML formats of UCD data, for regular-expression
11 # property tests, and other programmatic textual descriptions of Unicode data.
12 # For information on which properties are normative, see UCD.html.
14 # The names may be translated in appropriate environments, and additional
15 # aliases may be useful.
19 # Each line describes a property value name.
20 # This consists of three or more fields, separated by semicolons.
22 # First Field: The first field describes the property for which that
23 # property value name is used.
25 # Second Field: The second field is an abbreviated name.
26 # If there is no abbreviated name available, the field is marked with "n/a".
28 # Third Field: The third field is a long name.
30 # In the case of ccc, there are 4 fields. The second field is numeric, third
31 # is abbreviated, and fourth is long.
33 # The above are the preferred aliases. Other aliases may be listed in additional fields.
35 # Loose matching should be applied to all property names and property values, with
36 # the exception of String Property values. With loose matching of property names and
37 # values, the case distinctions, whitespace, and '_' are ignored. For Numeric Property
38 # values, numeric equivalencies are applied: thus "01.00" is equivalent to "1".
40 # NOTE: Property value names are NOT unique across properties. For example:
42 # AL means Arabic Letter for the Bidi_Class property, and
43 # AL means Alpha_Left for the Combining_Class property, and
44 # AL means Alphabetic for the Line_Break property.
46 # In addition, some property names may be the same as some property value names.
49 # sc means the Script property, and
50 # Sc means the General_Category property value Currency_Symbol (Sc)
52 # The combination of property value and property name is, however, unique.
54 # For more information, see UTS #18: Regular Expression Guidelines
55 # ================================================
73 bc ; AL ; Arabic_Letter
74 bc ; AN ; Arabic_Number
75 bc ; B ; Paragraph_Separator
76 bc ; BN ; Boundary_Neutral
77 bc ; CS ; Common_Separator
78 bc ; EN ; European_Number
79 bc ; ES ; European_Separator
80 bc ; ET ; European_Terminator
81 bc ; L ; Left_To_Right
82 bc ; LRE ; Left_To_Right_Embedding
83 bc ; LRO ; Left_To_Right_Override
84 bc ; NSM ; Nonspacing_Mark
85 bc ; ON ; Other_Neutral
86 bc ; PDF ; Pop_Directional_Format
87 bc ; R ; Right_To_Left
88 bc ; RLE ; Right_To_Left_Embedding
89 bc ; RLO ; Right_To_Left_Override
90 bc ; S ; Segment_Separator
95 blk; n/a ; Aegean_Numbers
96 blk; n/a ; Alphabetic_Presentation_Forms
97 blk; n/a ; Ancient_Greek_Musical_Notation
98 blk; n/a ; Ancient_Greek_Numbers
100 blk; n/a ; Arabic_Presentation_Forms-A
101 blk; n/a ; Arabic_Presentation_Forms-B
102 blk; n/a ; Arabic_Supplement
106 blk; n/a ; Basic_Latin
108 blk; n/a ; Block_Elements
110 blk; n/a ; Bopomofo_Extended
111 blk; n/a ; Box_Drawing
112 blk; n/a ; Braille_Patterns
115 blk; n/a ; Byzantine_Musical_Symbols
117 blk; n/a ; CJK_Compatibility
118 blk; n/a ; CJK_Compatibility_Forms
119 blk; n/a ; CJK_Compatibility_Ideographs
120 blk; n/a ; CJK_Compatibility_Ideographs_Supplement
121 blk; n/a ; CJK_Radicals_Supplement
122 blk; n/a ; CJK_Strokes
123 blk; n/a ; CJK_Symbols_and_Punctuation
124 blk; n/a ; CJK_Unified_Ideographs
125 blk; n/a ; CJK_Unified_Ideographs_Extension_A
126 blk; n/a ; CJK_Unified_Ideographs_Extension_B
127 blk; n/a ; Combining_Diacritical_Marks
128 blk; n/a ; Combining_Diacritical_Marks_for_Symbols
129 blk; n/a ; Combining_Diacritical_Marks_Supplement
130 blk; n/a ; Combining_Half_Marks
131 blk; n/a ; Control_Pictures
133 blk; n/a ; Counting_Rod_Numerals
135 blk; n/a ; Cuneiform_Numbers_and_Punctuation
136 blk; n/a ; Currency_Symbols
137 blk; n/a ; Cypriot_Syllabary
139 blk; n/a ; Cyrillic_Supplement ; Cyrillic_Supplementary
141 blk; n/a ; Devanagari
143 blk; n/a ; Enclosed_Alphanumerics
144 blk; n/a ; Enclosed_CJK_Letters_and_Months
146 blk; n/a ; Ethiopic_Extended
147 blk; n/a ; Ethiopic_Supplement
148 blk; n/a ; General_Punctuation
149 blk; n/a ; Geometric_Shapes
151 blk; n/a ; Georgian_Supplement
152 blk; n/a ; Glagolitic
154 blk; n/a ; Greek_and_Coptic
155 blk; n/a ; Greek_Extended
158 blk; n/a ; Halfwidth_and_Fullwidth_Forms
159 blk; n/a ; Hangul_Compatibility_Jamo
160 blk; n/a ; Hangul_Jamo
161 blk; n/a ; Hangul_Syllables
164 blk; n/a ; High_Private_Use_Surrogates
165 blk; n/a ; High_Surrogates
167 blk; n/a ; Ideographic_Description_Characters
168 blk; n/a ; IPA_Extensions
170 blk; n/a ; Kangxi_Radicals
173 blk; n/a ; Katakana_Phonetic_Extensions
174 blk; n/a ; Kharoshthi
176 blk; n/a ; Khmer_Symbols
178 blk; n/a ; Latin-1_Supplement
179 blk; n/a ; Latin_Extended-A
180 blk; n/a ; Latin_Extended-B
181 blk; n/a ; Latin_Extended-C
182 blk; n/a ; Latin_Extended-D
183 blk; n/a ; Latin_Extended_Additional
184 blk; n/a ; Letterlike_Symbols
186 blk; n/a ; Linear_B_Ideograms
187 blk; n/a ; Linear_B_Syllabary
188 blk; n/a ; Low_Surrogates
190 blk; n/a ; Mathematical_Alphanumeric_Symbols
191 blk; n/a ; Mathematical_Operators
192 blk; n/a ; Miscellaneous_Mathematical_Symbols-A
193 blk; n/a ; Miscellaneous_Mathematical_Symbols-B
194 blk; n/a ; Miscellaneous_Symbols
195 blk; n/a ; Miscellaneous_Symbols_and_Arrows
196 blk; n/a ; Miscellaneous_Technical
197 blk; n/a ; Modifier_Tone_Letters
199 blk; n/a ; Musical_Symbols
201 blk; n/a ; New_Tai_Lue
204 blk; n/a ; Number_Forms
206 blk; n/a ; Old_Italic
207 blk; n/a ; Old_Persian
208 blk; n/a ; Optical_Character_Recognition
212 blk; n/a ; Phoenician
213 blk; n/a ; Phonetic_Extensions
214 blk; n/a ; Phonetic_Extensions_Supplement
215 blk; n/a ; Private_Use_Area
219 blk; n/a ; Small_Form_Variants
220 blk; n/a ; Spacing_Modifier_Letters
222 blk; n/a ; Superscripts_and_Subscripts
223 blk; n/a ; Supplemental_Arrows-A
224 blk; n/a ; Supplemental_Arrows-B
225 blk; n/a ; Supplemental_Mathematical_Operators
226 blk; n/a ; Supplemental_Punctuation
227 blk; n/a ; Supplementary_Private_Use_Area-A
228 blk; n/a ; Supplementary_Private_Use_Area-B
229 blk; n/a ; Syloti_Nagri
235 blk; n/a ; Tai_Xuan_Jing_Symbols
243 blk; n/a ; Unified_Canadian_Aboriginal_Syllabics
244 blk; n/a ; Variation_Selectors
245 blk; n/a ; Variation_Selectors_Supplement
246 blk; n/a ; Vertical_Forms
247 blk; n/a ; Yi_Radicals
248 blk; n/a ; Yi_Syllables
249 blk; n/a ; Yijing_Hexagram_Symbols
251 # Canonical_Combining_Class (ccc)
253 ccc; 0; NR ; Not_Reordered
256 ccc; 8; KV ; Kana_Voicing
258 ccc; 200; ATBL ; Attached_Below_Left
259 ccc; 202; ATB ; Attached_Below
260 ccc; 216; ATAR ; Attached_Above_Right
261 ccc; 218; BL ; Below_Left
263 ccc; 222; BR ; Below_Right
266 ccc; 228; AL ; Above_Left
268 ccc; 232; AR ; Above_Right
269 ccc; 233; DB ; Double_Below
270 ccc; 234; DA ; Double_Above
271 ccc; 240; IS ; Iota_Subscript
273 # Decomposition_Type (dt)
294 # East_Asian_Width (ea)
303 # General_Category (gc)
305 gc ; C ; Other # Cc | Cf | Cn | Co | Cs
306 gc ; Cc ; Control ; cntrl
309 gc ; Co ; Private_Use
311 gc ; L ; Letter # Ll | Lm | Lo | Lt | Lu
312 gc ; LC ; Cased_Letter # Ll | Lt | Lu
313 gc ; Ll ; Lowercase_Letter
314 gc ; Lm ; Modifier_Letter
315 gc ; Lo ; Other_Letter
316 gc ; Lt ; Titlecase_Letter
317 gc ; Lu ; Uppercase_Letter
318 gc ; M ; Mark # Mc | Me | Mn
319 gc ; Mc ; Spacing_Mark
320 gc ; Me ; Enclosing_Mark
321 gc ; Mn ; Nonspacing_Mark
322 gc ; N ; Number # Nd | Nl | No
323 gc ; Nd ; Decimal_Number ; digit
324 gc ; Nl ; Letter_Number
325 gc ; No ; Other_Number
326 gc ; P ; Punctuation ; punct # Pc | Pd | Pe | Pf | Pi | Po | Ps
327 gc ; Pc ; Connector_Punctuation
328 gc ; Pd ; Dash_Punctuation
329 gc ; Pe ; Close_Punctuation
330 gc ; Pf ; Final_Punctuation
331 gc ; Pi ; Initial_Punctuation
332 gc ; Po ; Other_Punctuation
333 gc ; Ps ; Open_Punctuation
334 gc ; S ; Symbol # Sc | Sk | Sm | So
335 gc ; Sc ; Currency_Symbol
336 gc ; Sk ; Modifier_Symbol
337 gc ; Sm ; Math_Symbol
338 gc ; So ; Other_Symbol
339 gc ; Z ; Separator # Zl | Zp | Zs
340 gc ; Zl ; Line_Separator
341 gc ; Zp ; Paragraph_Separator
342 gc ; Zs ; Space_Separator
344 # Grapheme_Cluster_Break (GCB)
357 # Hangul_Syllable_Type (hst)
359 hst; L ; Leading_Jamo
360 hst; LV ; LV_Syllable
361 hst; LVT ; LVT_Syllable
362 hst; NA ; Not_Applicable
363 hst; T ; Trailing_Jamo
374 jg ; n/a ; Dalath_Rish
378 jg ; n/a ; Final_Semkath
382 jg ; n/a ; Hamza_On_Heh_Goal
390 jg ; n/a ; Knotted_Heh
395 jg ; n/a ; No_Joining_Group
402 jg ; n/a ; Reversed_Pe
409 jg ; n/a ; Syriac_Waw
412 jg ; n/a ; Teh_Marbuta
416 jg ; n/a ; Yeh_Barree
417 jg ; n/a ; Yeh_With_Tail
425 jt ; C ; Join_Causing
426 jt ; D ; Dual_Joining
427 jt ; L ; Left_Joining
428 jt ; R ; Right_Joining
437 lb ; BA ; Break_After
438 lb ; BB ; Break_Before
439 lb ; BK ; Mandatory_Break
440 lb ; CB ; Contingent_Break
441 lb ; CL ; Close_Punctuation
442 lb ; CM ; Combining_Mark
443 lb ; CR ; Carriage_Return
444 lb ; EX ; Exclamation
449 lb ; ID ; Ideographic
450 lb ; IN ; Inseparable ; Inseperable
451 lb ; IS ; Infix_Numeric
459 lb ; OP ; Open_Punctuation
460 lb ; PO ; Postfix_Numeric
461 lb ; PR ; Prefix_Numeric
463 lb ; SA ; Complex_Context
466 lb ; SY ; Break_Symbols
467 lb ; WJ ; Word_Joiner
471 # NFC_Quick_Check (NFC_QC)
477 # NFD_Quick_Check (NFD_QC)
482 # NFKC_Quick_Check (NFKC_QC)
488 # NFKD_Quick_Check (NFKD_QC)
510 sc ; Cans ; Canadian_Aboriginal
512 sc ; Copt ; Coptic ; Qaac
515 sc ; Deva ; Devanagari
519 sc ; Glag ; Glagolitic
529 sc ; Hrkt ; Katakana_Or_Hiragana
530 sc ; Ital ; Old_Italic
532 sc ; Khar ; Kharoshthi
539 sc ; Mlym ; Malayalam
540 sc ; Mong ; Mongolian
547 sc ; Phnx ; Phoenician
548 sc ; Qaai ; Inherited
552 sc ; Sylo ; Syloti_Nagri
556 sc ; Talu ; New_Tai_Lue
565 sc ; Xpeo ; Old_Persian
566 sc ; Xsux ; Cuneiform
571 # Sentence_Break (SB)
587 WB ; EX ; ExtendNumLet