[p5sagit/p5-mst-13.2.git] / lib / unicore / PropValueAliases.txt

# PropertyValueAliases-4.0.1.txt
# Date: 2004-03-02, 19:46:47 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2004 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see UCD.html
# This file contains aliases for property values used in the UCD.
# These names can be used for XML formats of UCD data, for regular-expression
# property tests, and other programmatic textual descriptions of Unicode data.
# For information on which properties are normative, see UCD.html.
#
# The names may be translated in appropriate environments, and additional
# aliases may be useful.
#
# FORMAT
#
# Each line describes a property value name.
# This consists of three or more fields, separated by semicolons.
#
# First Field: The first field describes the property for which that
# property value name is used.
#
# Second Field: The second field is an abbreviated name.
# If there is no abbreviated name available, the field is marked with "n/a".
#
# Third Field: The third field is a long name.
#
# In the case of ccc, there are 4 fields. The second field is numeric, third
# is abbreviated, and fourth is long.
#
# The above are the preferred aliases. Other aliases may be listed in additional fields.
#
# Loose matching should be applied to all property names and property values, with
# the exception of String Property values. With loose matching of property names and
# values, the case distinctions, whitespace, and '_' are ignored. For Numeric Property
# values, numeric equivalencies are applied: thus "01.00" is equivalent to "1".
#
# NOTE: Property value names are NOT unique across properties. For example:
#
#   AL means Arabic Letter for the Bidi_Class property, and
#   AL means Alpha_Left for the Combining_Class property, and
#   AL means Alphabetic for the Line_Break property.
#
# In addition, some property names may be the same as some property value names.
# For example:
#
#   sc means the Script property, and
#   Sc means the General_Category property value Currency_Symbol (Sc)
#
# The combination of property value and property name is, however, unique.
#
# For more information, see UTS #18: Regular Expression Guidelines
# ================================================


# Age (age)

age; n/a       ; 1.1
age; n/a       ; 2.0
age; n/a       ; 2.1
age; n/a       ; 3.0
age; n/a       ; 3.1
age; n/a       ; 3.2
age; n/a       ; 4.0
age; n/a       ; unassigned

# Bidi_Class (bc)

bc ; AL        ; Arabic_Letter
bc ; AN        ; Arabic_Number
bc ; B         ; Paragraph_Separator
bc ; BN        ; Boundary_Neutral
bc ; CS        ; Common_Separator
bc ; EN        ; European_Number
bc ; ES        ; European_Separator
bc ; ET        ; European_Terminator
bc ; L         ; Left_To_Right
bc ; LRE       ; Left_To_Right_Embedding
bc ; LRO       ; Left_To_Right_Override
bc ; NSM       ; Nonspacing_Mark
bc ; ON        ; Other_Neutral
bc ; PDF       ; Pop_Directional_Format
bc ; R         ; Right_To_Left
bc ; RLE       ; Right_To_Left_Embedding
bc ; RLO       ; Right_To_Left_Override
bc ; S         ; Segment_Separator
bc ; WS        ; White_Space

# Block (blk)

blk; n/a       ; Aegean_Numbers
blk; n/a       ; Alphabetic_Presentation_Forms
blk; n/a       ; Arabic
blk; n/a       ; Arabic_Presentation_Forms-A
blk; n/a       ; Arabic_Presentation_Forms-B
blk; n/a       ; Armenian
blk; n/a       ; Arrows
blk; n/a       ; Basic_Latin
blk; n/a       ; Bengali
blk; n/a       ; Block_Elements
blk; n/a       ; Bopomofo
blk; n/a       ; Bopomofo_Extended
blk; n/a       ; Box_Drawing
blk; n/a       ; Braille_Patterns
blk; n/a       ; Buhid
blk; n/a       ; Byzantine_Musical_Symbols
blk; n/a       ; Cherokee
blk; n/a       ; CJK_Compatibility
blk; n/a       ; CJK_Compatibility_Forms
blk; n/a       ; CJK_Compatibility_Ideographs
blk; n/a       ; CJK_Compatibility_Ideographs_Supplement
blk; n/a       ; CJK_Radicals_Supplement
blk; n/a       ; CJK_Symbols_and_Punctuation
blk; n/a       ; CJK_Unified_Ideographs
blk; n/a       ; CJK_Unified_Ideographs_Extension_A
blk; n/a       ; CJK_Unified_Ideographs_Extension_B
blk; n/a       ; Combining_Diacritical_Marks
blk; n/a       ; Combining_Diacritical_Marks_for_Symbols
blk; n/a       ; Combining_Half_Marks
blk; n/a       ; Control_Pictures
blk; n/a       ; Currency_Symbols
blk; n/a       ; Cypriot_Syllabary
blk; n/a       ; Cyrillic
blk; n/a       ; Cyrillic_Supplement              ; Cyrillic_Supplementary
blk; n/a       ; Deseret
blk; n/a       ; Devanagari
blk; n/a       ; Dingbats
blk; n/a       ; Enclosed_Alphanumerics
blk; n/a       ; Enclosed_CJK_Letters_and_Months
blk; n/a       ; Ethiopic
blk; n/a       ; General_Punctuation
blk; n/a       ; Geometric_Shapes
blk; n/a       ; Georgian
blk; n/a       ; Gothic
blk; n/a       ; Greek_and_Coptic
blk; n/a       ; Greek_Extended
blk; n/a       ; Gujarati
blk; n/a       ; Gurmukhi
blk; n/a       ; Halfwidth_and_Fullwidth_Forms
blk; n/a       ; Hangul_Compatibility_Jamo
blk; n/a       ; Hangul_Jamo
blk; n/a       ; Hangul_Syllables
blk; n/a       ; Hanunoo
blk; n/a       ; Hebrew
blk; n/a       ; High_Private_Use_Surrogates
blk; n/a       ; High_Surrogates
blk; n/a       ; Hiragana
blk; n/a       ; Ideographic_Description_Characters
blk; n/a       ; IPA_Extensions
blk; n/a       ; Kanbun
blk; n/a       ; Kangxi_Radicals
blk; n/a       ; Kannada
blk; n/a       ; Katakana
blk; n/a       ; Katakana_Phonetic_Extensions
blk; n/a       ; Khmer
blk; n/a       ; Khmer_Symbols
blk; n/a       ; Lao
blk; n/a       ; Latin-1_Supplement
blk; n/a       ; Latin_Extended-A
blk; n/a       ; Latin_Extended-B
blk; n/a       ; Latin_Extended_Additional
blk; n/a       ; Letterlike_Symbols
blk; n/a       ; Limbu
blk; n/a       ; Linear_B_Ideograms
blk; n/a       ; Linear_B_Syllabary
blk; n/a       ; Low_Surrogates
blk; n/a       ; Malayalam
blk; n/a       ; Mathematical_Alphanumeric_Symbols
blk; n/a       ; Mathematical_Operators
blk; n/a       ; Miscellaneous_Mathematical_Symbols-A
blk; n/a       ; Miscellaneous_Mathematical_Symbols-B
blk; n/a       ; Miscellaneous_Symbols
blk; n/a       ; Miscellaneous_Symbols_and_Arrows
blk; n/a       ; Miscellaneous_Technical
blk; n/a       ; Mongolian
blk; n/a       ; Musical_Symbols
blk; n/a       ; Myanmar
blk; n/a       ; No_Block
blk; n/a       ; Number_Forms
blk; n/a       ; Ogham
blk; n/a       ; Old_Italic
blk; n/a       ; Optical_Character_Recognition
blk; n/a       ; Oriya
blk; n/a       ; Osmanya
blk; n/a       ; Phonetic_Extensions
blk; n/a       ; Private_Use_Area
blk; n/a       ; Runic
blk; n/a       ; Shavian
blk; n/a       ; Sinhala
blk; n/a       ; Small_Form_Variants
blk; n/a       ; Spacing_Modifier_Letters
blk; n/a       ; Specials
blk; n/a       ; Superscripts_and_Subscripts
blk; n/a       ; Supplemental_Arrows-A
blk; n/a       ; Supplemental_Arrows-B
blk; n/a       ; Supplemental_Mathematical_Operators
blk; n/a       ; Supplementary_Private_Use_Area-A
blk; n/a       ; Supplementary_Private_Use_Area-B
blk; n/a       ; Syriac
blk; n/a       ; Tagalog
blk; n/a       ; Tagbanwa
blk; n/a       ; Tags
blk; n/a       ; Tai_Le
blk; n/a       ; Tai_Xuan_Jing_Symbols
blk; n/a       ; Tamil
blk; n/a       ; Telugu
blk; n/a       ; Thaana
blk; n/a       ; Thai
blk; n/a       ; Tibetan
blk; n/a       ; Ugaritic
blk; n/a       ; Unified_Canadian_Aboriginal_Syllabics
blk; n/a       ; Variation_Selectors
blk; n/a       ; Variation_Selectors_Supplement
blk; n/a       ; Yi_Radicals
blk; n/a       ; Yi_Syllables
blk; n/a       ; Yijing_Hexagram_Symbols

# Canonical_Combining_Class (ccc)

ccc;   0; NR   ; Not_Reordered
ccc;   1; OV   ; Overlay
ccc;   7; NK   ; Nukta
ccc;   8; KV   ; Kana_Voicing
ccc;   9; VR   ; Virama
ccc; 200; ATBL ; Attached_Below_Left
ccc; 202; ATB  ; Attached_Below
ccc; 216; ATAR ; Attached_Above_Right
ccc; 218; BL   ; Below_Left
ccc; 220; B    ; Below
ccc; 222; BR   ; Below_Right
ccc; 224; L    ; Left
ccc; 226; R    ; Right
ccc; 228; AL   ; Above_Left
ccc; 230; A    ; Above
ccc; 232; AR   ; Above_Right
ccc; 233; DB   ; Double_Below
ccc; 234; DA   ; Double_Above
ccc; 240; IS   ; Iota_Subscript

# Decomposition_Type (dt)

dt ; can       ; Canonical
dt ; com       ; Compat
dt ; enc       ; Circle
dt ; fin       ; Final
dt ; font      ; Font
dt ; fra       ; Fraction
dt ; init      ; Initial
dt ; iso       ; Isolated
dt ; med       ; Medial
dt ; nar       ; Narrow
dt ; nb        ; Nobreak
dt ; none      ; None
dt ; sml       ; Small
dt ; sqr       ; Square
dt ; sub       ; Sub
dt ; sup       ; Super
dt ; vert      ; Vertical
dt ; wide      ; Wide

# East_Asian_Width (ea)

ea ; A         ; Ambiguous
ea ; F         ; Fullwidth
ea ; H         ; Halfwidth
ea ; N         ; Neutral
ea ; Na        ; Narrow
ea ; W         ; Wide

# General_Category (gc)

gc ; C         ; Other                            # Cc | Cf | Cn | Co | Cs
gc ; Cc        ; Control
gc ; Cf        ; Format
gc ; Cn        ; Unassigned
gc ; Co        ; Private_Use
gc ; Cs        ; Surrogate
gc ; L         ; Letter                           # Ll | Lm | Lo | Lt | Lu
gc ; LC        ; Cased_Letter                     # Ll | Lt | Lu
gc ; Ll        ; Lowercase_Letter
gc ; Lm        ; Modifier_Letter
gc ; Lo        ; Other_Letter
gc ; Lt        ; Titlecase_Letter
gc ; Lu        ; Uppercase_Letter
gc ; M         ; Mark                             # Mc | Me | Mn
gc ; Mc        ; Spacing_Mark
gc ; Me        ; Enclosing_Mark
gc ; Mn        ; Nonspacing_Mark
gc ; N         ; Number                           # Nd | Nl | No
gc ; Nd        ; Decimal_Number
gc ; Nl        ; Letter_Number
gc ; No        ; Other_Number
gc ; P         ; Punctuation                      # Pc | Pd | Pe | Pf | Pi | Po | Ps
gc ; Pc        ; Connector_Punctuation
gc ; Pd        ; Dash_Punctuation
gc ; Pe        ; Close_Punctuation
gc ; Pf        ; Final_Punctuation
gc ; Pi        ; Initial_Punctuation
gc ; Po        ; Other_Punctuation
gc ; Ps        ; Open_Punctuation
gc ; S         ; Symbol                           # Sc | Sk | Sm | So
gc ; Sc        ; Currency_Symbol
gc ; Sk        ; Modifier_Symbol
gc ; Sm        ; Math_Symbol
gc ; So        ; Other_Symbol
gc ; Z         ; Separator                        # Zl | Zp | Zs
gc ; Zl        ; Line_Separator
gc ; Zp        ; Paragraph_Separator
gc ; Zs        ; Space_Separator

# Hangul_Syllable_Type (hst)

hst; L         ; Leading_Jamo
hst; LV        ; LV_Syllable
hst; LVT       ; LVT_Syllable
hst; NA        ; Not_Applicable
hst; T         ; Trailing_Jamo
hst; V         ; Vowel_Jamo

# Joining_Group (jg)

jg ; n/a       ; Ain
jg ; n/a       ; Alaph
jg ; n/a       ; Alef
jg ; n/a       ; Beh
jg ; n/a       ; Beth
jg ; n/a       ; Dal
jg ; n/a       ; Dalath_Rish
jg ; n/a       ; E
jg ; n/a       ; Fe
jg ; n/a       ; Feh
jg ; n/a       ; Final_Semkath
jg ; n/a       ; Gaf
jg ; n/a       ; Gamal
jg ; n/a       ; Hah
jg ; n/a       ; Hamza_On_Heh_Goal
jg ; n/a       ; He
jg ; n/a       ; Heh
jg ; n/a       ; Heh_Goal
jg ; n/a       ; Heth
jg ; n/a       ; Kaf
jg ; n/a       ; Kaph
jg ; n/a       ; Khaph
jg ; n/a       ; Knotted_Heh
jg ; n/a       ; Lam
jg ; n/a       ; Lamadh
jg ; n/a       ; Meem
jg ; n/a       ; Mim
jg ; n/a       ; No_Joining_Group
jg ; n/a       ; Noon
jg ; n/a       ; Nun
jg ; n/a       ; Pe
jg ; n/a       ; Qaf
jg ; n/a       ; Qaph
jg ; n/a       ; Reh
jg ; n/a       ; Reversed_Pe
jg ; n/a       ; Sad
jg ; n/a       ; Sadhe
jg ; n/a       ; Seen
jg ; n/a       ; Semkath
jg ; n/a       ; Shin
jg ; n/a       ; Swash_Kaf
jg ; n/a       ; Syriac_Waw
jg ; n/a       ; Tah
jg ; n/a       ; Taw
jg ; n/a       ; Teh_Marbuta
jg ; n/a       ; Teth
jg ; n/a       ; Waw
jg ; n/a       ; Yeh
jg ; n/a       ; Yeh_Barree
jg ; n/a       ; Yeh_With_Tail
jg ; n/a       ; Yudh
jg ; n/a       ; Yudh_He
jg ; n/a       ; Zain
jg ; n/a       ; Zhain

# Joining_Type (jt)

jt ; C         ; Join_Causing
jt ; D         ; Dual_Joining
jt ; L         ; Left_Joining
jt ; R         ; Right_Joining
jt ; T         ; Transparent
jt ; U         ; Non_Joining

# Line_Break (lb)

lb ; AI        ; Ambiguous
lb ; AL        ; Alphabetic
lb ; B2        ; Break_Both
lb ; BA        ; Break_After
lb ; BB        ; Break_Before
lb ; BK        ; Mandatory_Break
lb ; CB        ; Contingent_Break
lb ; CL        ; Close_Punctuation
lb ; CM        ; Combining_Mark
lb ; CR        ; Carriage_Return
lb ; EX        ; Exclamation
lb ; GL        ; Glue
lb ; HY        ; Hyphen
lb ; ID        ; Ideographic
lb ; IN        ; Inseparable                      ; Inseperable
lb ; IS        ; Infix_Numeric
lb ; LF        ; Line_Feed
lb ; NL        ; Next_Line
lb ; NS        ; Nonstarter
lb ; NU        ; Numeric
lb ; OP        ; Open_Punctuation
lb ; PO        ; Postfix_Numeric
lb ; PR        ; Prefix_Numeric
lb ; QU        ; Quotation
lb ; SA        ; Complex_Context
lb ; SG        ; Surrogate
lb ; SP        ; Space
lb ; SY        ; Break_Symbols
lb ; WJ        ; Word_Joiner
lb ; XX        ; Unknown
lb ; ZW        ; ZWSpace

# NFC_Quick_Check (NFC_QC)

NFC_QC; M      ; Maybe
NFC_QC; N      ; No
NFC_QC; Y      ; Yes

# NFD_Quick_Check (NFD_QC)

NFD_QC; N      ; No
NFD_QC; Y      ; Yes

# NFKC_Quick_Check (NFKC_QC)

NFKC_QC; M     ; Maybe
NFKC_QC; N     ; No
NFKC_QC; Y     ; Yes

# NFKD_Quick_Check (NFKD_QC)

NFKD_QC; N     ; No
NFKD_QC; Y     ; Yes

# Numeric_Type (nt)

nt ; De        ; Decimal
nt ; Di        ; Digit
nt ; None      ; None
nt ; Nu        ; Numeric

# Script (sc)

sc ; Arab      ; Arabic
sc ; Armn      ; Armenian
sc ; Beng      ; Bengali
sc ; Bopo      ; Bopomofo
sc ; Brai      ; Braille
sc ; Buhd      ; Buhid
sc ; Cans      ; Canadian_Aboriginal
sc ; Cher      ; Cherokee
sc ; Cprt      ; Cypriot
sc ; Cyrl      ; Cyrillic
sc ; Deva      ; Devanagari
sc ; Dsrt      ; Deseret
sc ; Ethi      ; Ethiopic
sc ; Geor      ; Georgian
sc ; Goth      ; Gothic
sc ; Grek      ; Greek
sc ; Gujr      ; Gujarati
sc ; Guru      ; Gurmukhi
sc ; Hang      ; Hangul
sc ; Hani      ; Han
sc ; Hano      ; Hanunoo
sc ; Hebr      ; Hebrew
sc ; Hira      ; Hiragana
sc ; Hrkt      ; Katakana_Or_Hiragana
sc ; Ital      ; Old_Italic
sc ; Kana      ; Katakana
sc ; Khmr      ; Khmer
sc ; Knda      ; Kannada
sc ; Laoo      ; Lao
sc ; Latn      ; Latin
sc ; Limb      ; Limbu
sc ; Linb      ; Linear_B
sc ; Mlym      ; Malayalam
sc ; Mong      ; Mongolian
sc ; Mymr      ; Myanmar
sc ; Ogam      ; Ogham
sc ; Orya      ; Oriya
sc ; Osma      ; Osmanya
sc ; Qaai      ; Inherited
sc ; Runr      ; Runic
sc ; Shaw      ; Shavian
sc ; Sinh      ; Sinhala
sc ; Syrc      ; Syriac
sc ; Tagb      ; Tagbanwa
sc ; Tale      ; Tai_Le
sc ; Taml      ; Tamil
sc ; Telu      ; Telugu
sc ; Tglg      ; Tagalog
sc ; Thaa      ; Thaana
sc ; Thai      ; Thai
sc ; Tibt      ; Tibetan
sc ; Ugar      ; Ugaritic
sc ; Yiii      ; Yi
sc ; Zyyy      ; Common
Commit	Line	Data
7be0dac3	1	# PropertyValueAliases-4.0.1.txt
7be0dac3	2	# Date: 2004-03-02, 19:46:47 GMT [MD]
822ebcc8	3	#
7be0dac3	4	# Unicode Character Database
	5	# Copyright (c) 1991-2004 Unicode, Inc.
	6	# For terms of use, see http://www.unicode.org/terms_of_use.html
	7	# For documentation, see UCD.html
822ebcc8	8	# This file contains aliases for property values used in the UCD.
	9	# These names can be used for XML formats of UCD data, for regular-expression
	10	# property tests, and other programmatic textual descriptions of Unicode data.
1911be83	11	# For information on which properties are normative, see UCD.html.
822ebcc8	12	#
	13	# The names may be translated in appropriate environments, and additional
	14	# aliases may be useful.
	15	#
	16	# FORMAT
	17	#
	18	# Each line describes a property value name.
7be0dac3	19	# This consists of three or more fields, separated by semicolons.
822ebcc8	20	#
	21	# First Field: The first field describes the property for which that
	22	# property value name is used.
822ebcc8	23	#
	24	# Second Field: The second field is an abbreviated name.
	25	# If there is no abbreviated name available, the field is marked with "n/a".
	26	#
	27	# Third Field: The third field is a long name.
	28	#
1911be83	29	# In the case of ccc, there are 4 fields. The second field is numeric, third
822ebcc8	30	# is abbreviated, and fourth is long.
822ebcc8	31	#
7be0dac3	32	# The above are the preferred aliases. Other aliases may be listed in additional fields.
822ebcc8	33	#
7be0dac3	34	# Loose matching should be applied to all property names and property values, with
	35	# the exception of String Property values. With loose matching of property names and
	36	# values, the case distinctions, whitespace, and '_' are ignored. For Numeric Property
	37	# values, numeric equivalencies are applied: thus "01.00" is equivalent to "1".
822ebcc8	38	#
7be0dac3	39	# NOTE: Property value names are NOT unique across properties. For example:
1911be83	40	#
7be0dac3	41	# AL means Arabic Letter for the Bidi_Class property, and
	42	# AL means Alpha_Left for the Combining_Class property, and
	43	# AL means Alphabetic for the Line_Break property.
822ebcc8	44	#
1911be83	45	# In addition, some property names may be the same as some property value names.
	46	# For example:
	47	#
7be0dac3	48	# sc means the Script property, and
7be0dac3	49	# Sc means the General_Category property value Currency_Symbol (Sc)
822ebcc8	50	#
822ebcc8	51	# The combination of property value and property name is, however, unique.
7be0dac3	52	#
7be0dac3	53	# For more information, see UTS #18: Regular Expression Guidelines
822ebcc8	54	# ================================================
	55
	56
7be0dac3	57	# Age (age)
	58
	59	age; n/a ; 1.1
	60	age; n/a ; 2.0
	61	age; n/a ; 2.1
	62	age; n/a ; 3.0
	63	age; n/a ; 3.1
	64	age; n/a ; 3.2
	65	age; n/a ; 4.0
	66	age; n/a ; unassigned
	67
	68	# Bidi_Class (bc)
	69
822ebcc8	70	bc ; AL ; Arabic_Letter
	71	bc ; AN ; Arabic_Number
	72	bc ; B ; Paragraph_Separator
	73	bc ; BN ; Boundary_Neutral
	74	bc ; CS ; Common_Separator
	75	bc ; EN ; European_Number
	76	bc ; ES ; European_Separator
	77	bc ; ET ; European_Terminator
	78	bc ; L ; Left_To_Right
	79	bc ; LRE ; Left_To_Right_Embedding
	80	bc ; LRO ; Left_To_Right_Override
	81	bc ; NSM ; Nonspacing_Mark
	82	bc ; ON ; Other_Neutral
	83	bc ; PDF ; Pop_Directional_Format
	84	bc ; R ; Right_To_Left
	85	bc ; RLE ; Right_To_Left_Embedding
	86	bc ; RLO ; Right_To_Left_Override
	87	bc ; S ; Segment_Separator
	88	bc ; WS ; White_Space
	89
7be0dac3	90	# Block (blk)
7be0dac3	91
1911be83	92	blk; n/a ; Aegean_Numbers
	93	blk; n/a ; Alphabetic_Presentation_Forms
	94	blk; n/a ; Arabic
	95	blk; n/a ; Arabic_Presentation_Forms-A
	96	blk; n/a ; Arabic_Presentation_Forms-B
	97	blk; n/a ; Armenian
	98	blk; n/a ; Arrows
	99	blk; n/a ; Basic_Latin
	100	blk; n/a ; Bengali
	101	blk; n/a ; Block_Elements
	102	blk; n/a ; Bopomofo
	103	blk; n/a ; Bopomofo_Extended
	104	blk; n/a ; Box_Drawing
	105	blk; n/a ; Braille_Patterns
	106	blk; n/a ; Buhid
	107	blk; n/a ; Byzantine_Musical_Symbols
	108	blk; n/a ; Cherokee
	109	blk; n/a ; CJK_Compatibility
	110	blk; n/a ; CJK_Compatibility_Forms
	111	blk; n/a ; CJK_Compatibility_Ideographs
	112	blk; n/a ; CJK_Compatibility_Ideographs_Supplement
	113	blk; n/a ; CJK_Radicals_Supplement
	114	blk; n/a ; CJK_Symbols_and_Punctuation
	115	blk; n/a ; CJK_Unified_Ideographs
	116	blk; n/a ; CJK_Unified_Ideographs_Extension_A
	117	blk; n/a ; CJK_Unified_Ideographs_Extension_B
	118	blk; n/a ; Combining_Diacritical_Marks
	119	blk; n/a ; Combining_Diacritical_Marks_for_Symbols
	120	blk; n/a ; Combining_Half_Marks
	121	blk; n/a ; Control_Pictures
	122	blk; n/a ; Currency_Symbols
	123	blk; n/a ; Cypriot_Syllabary
	124	blk; n/a ; Cyrillic
7be0dac3	125	blk; n/a ; Cyrillic_Supplement ; Cyrillic_Supplementary
1911be83	126	blk; n/a ; Deseret
	127	blk; n/a ; Devanagari
	128	blk; n/a ; Dingbats
	129	blk; n/a ; Enclosed_Alphanumerics
	130	blk; n/a ; Enclosed_CJK_Letters_and_Months
	131	blk; n/a ; Ethiopic
	132	blk; n/a ; General_Punctuation
	133	blk; n/a ; Geometric_Shapes
	134	blk; n/a ; Georgian
	135	blk; n/a ; Gothic
	136	blk; n/a ; Greek_and_Coptic
	137	blk; n/a ; Greek_Extended
	138	blk; n/a ; Gujarati
	139	blk; n/a ; Gurmukhi
	140	blk; n/a ; Halfwidth_and_Fullwidth_Forms
	141	blk; n/a ; Hangul_Compatibility_Jamo
	142	blk; n/a ; Hangul_Jamo
	143	blk; n/a ; Hangul_Syllables
	144	blk; n/a ; Hanunoo
	145	blk; n/a ; Hebrew
	146	blk; n/a ; High_Private_Use_Surrogates
	147	blk; n/a ; High_Surrogates
	148	blk; n/a ; Hiragana
	149	blk; n/a ; Ideographic_Description_Characters
	150	blk; n/a ; IPA_Extensions
	151	blk; n/a ; Kanbun
	152	blk; n/a ; Kangxi_Radicals
	153	blk; n/a ; Kannada
	154	blk; n/a ; Katakana
	155	blk; n/a ; Katakana_Phonetic_Extensions
	156	blk; n/a ; Khmer
	157	blk; n/a ; Khmer_Symbols
	158	blk; n/a ; Lao
7be0dac3	159	blk; n/a ; Latin-1_Supplement
1911be83	160	blk; n/a ; Latin_Extended-A
1911be83	161	blk; n/a ; Latin_Extended-B
7be0dac3	162	blk; n/a ; Latin_Extended_Additional
1911be83	163	blk; n/a ; Letterlike_Symbols
	164	blk; n/a ; Limbu
	165	blk; n/a ; Linear_B_Ideograms
	166	blk; n/a ; Linear_B_Syllabary
	167	blk; n/a ; Low_Surrogates
	168	blk; n/a ; Malayalam
	169	blk; n/a ; Mathematical_Alphanumeric_Symbols
	170	blk; n/a ; Mathematical_Operators
	171	blk; n/a ; Miscellaneous_Mathematical_Symbols-A
	172	blk; n/a ; Miscellaneous_Mathematical_Symbols-B
	173	blk; n/a ; Miscellaneous_Symbols
	174	blk; n/a ; Miscellaneous_Symbols_and_Arrows
	175	blk; n/a ; Miscellaneous_Technical
	176	blk; n/a ; Mongolian
	177	blk; n/a ; Musical_Symbols
	178	blk; n/a ; Myanmar
	179	blk; n/a ; No_Block
	180	blk; n/a ; Number_Forms
	181	blk; n/a ; Ogham
	182	blk; n/a ; Old_Italic
	183	blk; n/a ; Optical_Character_Recognition
	184	blk; n/a ; Oriya
	185	blk; n/a ; Osmanya
	186	blk; n/a ; Phonetic_Extensions
	187	blk; n/a ; Private_Use_Area
	188	blk; n/a ; Runic
	189	blk; n/a ; Shavian
	190	blk; n/a ; Sinhala
	191	blk; n/a ; Small_Form_Variants
	192	blk; n/a ; Spacing_Modifier_Letters
	193	blk; n/a ; Specials
	194	blk; n/a ; Superscripts_and_Subscripts
	195	blk; n/a ; Supplemental_Arrows-A
	196	blk; n/a ; Supplemental_Arrows-B
	197	blk; n/a ; Supplemental_Mathematical_Operators
	198	blk; n/a ; Supplementary_Private_Use_Area-A
	199	blk; n/a ; Supplementary_Private_Use_Area-B
	200	blk; n/a ; Syriac
	201	blk; n/a ; Tagalog
	202	blk; n/a ; Tagbanwa
	203	blk; n/a ; Tags
	204	blk; n/a ; Tai_Le
	205	blk; n/a ; Tai_Xuan_Jing_Symbols
	206	blk; n/a ; Tamil
	207	blk; n/a ; Telugu
	208	blk; n/a ; Thaana
	209	blk; n/a ; Thai
	210	blk; n/a ; Tibetan
	211	blk; n/a ; Ugaritic
	212	blk; n/a ; Unified_Canadian_Aboriginal_Syllabics
	213	blk; n/a ; Variation_Selectors
	214	blk; n/a ; Variation_Selectors_Supplement
	215	blk; n/a ; Yi_Radicals
	216	blk; n/a ; Yi_Syllables
	217	blk; n/a ; Yijing_Hexagram_Symbols
	218
7be0dac3	219	# Canonical_Combining_Class (ccc)
7be0dac3	220
822ebcc8	221	ccc; 0; NR ; Not_Reordered
822ebcc8	222	ccc; 1; OV ; Overlay
7be0dac3	223	ccc; 7; NK ; Nukta
	224	ccc; 8; KV ; Kana_Voicing
	225	ccc; 9; VR ; Virama
	226	ccc; 200; ATBL ; Attached_Below_Left
1911be83	227	ccc; 202; ATB ; Attached_Below
822ebcc8	228	ccc; 216; ATAR ; Attached_Above_Right
	229	ccc; 218; BL ; Below_Left
	230	ccc; 220; B ; Below
	231	ccc; 222; BR ; Below_Right
	232	ccc; 224; L ; Left
	233	ccc; 226; R ; Right
	234	ccc; 228; AL ; Above_Left
	235	ccc; 230; A ; Above
	236	ccc; 232; AR ; Above_Right
	237	ccc; 233; DB ; Double_Below
	238	ccc; 234; DA ; Double_Above
	239	ccc; 240; IS ; Iota_Subscript
822ebcc8	240
7be0dac3	241	# Decomposition_Type (dt)
	242
	243	dt ; can ; Canonical
	244	dt ; com ; Compat
	245	dt ; enc ; Circle
	246	dt ; fin ; Final
	247	dt ; font ; Font
	248	dt ; fra ; Fraction
	249	dt ; init ; Initial
	250	dt ; iso ; Isolated
	251	dt ; med ; Medial
	252	dt ; nar ; Narrow
	253	dt ; nb ; Nobreak
	254	dt ; none ; None
	255	dt ; sml ; Small
	256	dt ; sqr ; Square
	257	dt ; sub ; Sub
	258	dt ; sup ; Super
	259	dt ; vert ; Vertical
	260	dt ; wide ; Wide
	261
	262	# East_Asian_Width (ea)
822ebcc8	263
	264	ea ; A ; Ambiguous
	265	ea ; F ; Fullwidth
	266	ea ; H ; Halfwidth
	267	ea ; N ; Neutral
	268	ea ; Na ; Narrow
	269	ea ; W ; Wide
	270
7be0dac3	271	# General_Category (gc)
7be0dac3	272
822ebcc8	273	gc ; C ; Other # Cc \| Cf \| Cn \| Co \| Cs
	274	gc ; Cc ; Control
	275	gc ; Cf ; Format
	276	gc ; Cn ; Unassigned
	277	gc ; Co ; Private_Use
	278	gc ; Cs ; Surrogate
	279	gc ; L ; Letter # Ll \| Lm \| Lo \| Lt \| Lu
	280	gc ; LC ; Cased_Letter # Ll \| Lt \| Lu
	281	gc ; Ll ; Lowercase_Letter
	282	gc ; Lm ; Modifier_Letter
	283	gc ; Lo ; Other_Letter
	284	gc ; Lt ; Titlecase_Letter
	285	gc ; Lu ; Uppercase_Letter
	286	gc ; M ; Mark # Mc \| Me \| Mn
	287	gc ; Mc ; Spacing_Mark
	288	gc ; Me ; Enclosing_Mark
	289	gc ; Mn ; Nonspacing_Mark
	290	gc ; N ; Number # Nd \| Nl \| No
	291	gc ; Nd ; Decimal_Number
	292	gc ; Nl ; Letter_Number
	293	gc ; No ; Other_Number
	294	gc ; P ; Punctuation # Pc \| Pd \| Pe \| Pf \| Pi \| Po \| Ps
	295	gc ; Pc ; Connector_Punctuation
	296	gc ; Pd ; Dash_Punctuation
	297	gc ; Pe ; Close_Punctuation
	298	gc ; Pf ; Final_Punctuation
	299	gc ; Pi ; Initial_Punctuation
	300	gc ; Po ; Other_Punctuation
	301	gc ; Ps ; Open_Punctuation
	302	gc ; S ; Symbol # Sc \| Sk \| Sm \| So
	303	gc ; Sc ; Currency_Symbol
	304	gc ; Sk ; Modifier_Symbol
	305	gc ; Sm ; Math_Symbol
	306	gc ; So ; Other_Symbol
	307	gc ; Z ; Separator # Zl \| Zp \| Zs
	308	gc ; Zl ; Line_Separator
	309	gc ; Zp ; Paragraph_Separator
	310	gc ; Zs ; Space_Separator
	311
7be0dac3	312	# Hangul_Syllable_Type (hst)
7be0dac3	313
1911be83	314	hst; L ; Leading_Jamo
	315	hst; LV ; LV_Syllable
	316	hst; LVT ; LVT_Syllable
	317	hst; NA ; Not_Applicable
	318	hst; T ; Trailing_Jamo
	319	hst; V ; Vowel_Jamo
	320
7be0dac3	321	# Joining_Group (jg)
	322
	323	jg ; n/a ; Ain
	324	jg ; n/a ; Alaph
	325	jg ; n/a ; Alef
	326	jg ; n/a ; Beh
	327	jg ; n/a ; Beth
	328	jg ; n/a ; Dal
	329	jg ; n/a ; Dalath_Rish
822ebcc8	330	jg ; n/a ; E
7be0dac3	331	jg ; n/a ; Fe
	332	jg ; n/a ; Feh
	333	jg ; n/a ; Final_Semkath
	334	jg ; n/a ; Gaf
	335	jg ; n/a ; Gamal
	336	jg ; n/a ; Hah
	337	jg ; n/a ; Hamza_On_Heh_Goal
	338	jg ; n/a ; He
	339	jg ; n/a ; Heh
	340	jg ; n/a ; Heh_Goal
	341	jg ; n/a ; Heth
	342	jg ; n/a ; Kaf
	343	jg ; n/a ; Kaph
	344	jg ; n/a ; Khaph
	345	jg ; n/a ; Knotted_Heh
	346	jg ; n/a ; Lam
	347	jg ; n/a ; Lamadh
	348	jg ; n/a ; Meem
	349	jg ; n/a ; Mim
	350	jg ; n/a ; No_Joining_Group
	351	jg ; n/a ; Noon
	352	jg ; n/a ; Nun
	353	jg ; n/a ; Pe
	354	jg ; n/a ; Qaf
	355	jg ; n/a ; Qaph
	356	jg ; n/a ; Reh
	357	jg ; n/a ; Reversed_Pe
	358	jg ; n/a ; Sad
	359	jg ; n/a ; Sadhe
	360	jg ; n/a ; Seen
	361	jg ; n/a ; Semkath
	362	jg ; n/a ; Shin
	363	jg ; n/a ; Swash_Kaf
	364	jg ; n/a ; Syriac_Waw
	365	jg ; n/a ; Tah
	366	jg ; n/a ; Taw
	367	jg ; n/a ; Teh_Marbuta
	368	jg ; n/a ; Teth
	369	jg ; n/a ; Waw
	370	jg ; n/a ; Yeh
	371	jg ; n/a ; Yeh_Barree
	372	jg ; n/a ; Yeh_With_Tail
	373	jg ; n/a ; Yudh
	374	jg ; n/a ; Yudh_He
	375	jg ; n/a ; Zain
	376	jg ; n/a ; Zhain
	377
	378	# Joining_Type (jt)
822ebcc8	379
	380	jt ; C ; Join_Causing
	381	jt ; D ; Dual_Joining
	382	jt ; L ; Left_Joining
	383	jt ; R ; Right_Joining
	384	jt ; T ; Transparent
	385	jt ; U ; Non_Joining
	386
7be0dac3	387	# Line_Break (lb)
7be0dac3	388
822ebcc8	389	lb ; AI ; Ambiguous
	390	lb ; AL ; Alphabetic
	391	lb ; B2 ; Break_Both
	392	lb ; BA ; Break_After
	393	lb ; BB ; Break_Before
	394	lb ; BK ; Mandatory_Break
	395	lb ; CB ; Contingent_Break
	396	lb ; CL ; Close_Punctuation
	397	lb ; CM ; Combining_Mark
	398	lb ; CR ; Carriage_Return
	399	lb ; EX ; Exclamation
	400	lb ; GL ; Glue
	401	lb ; HY ; Hyphen
	402	lb ; ID ; Ideographic
7be0dac3	403	lb ; IN ; Inseparable ; Inseperable
822ebcc8	404	lb ; IS ; Infix_Numeric
822ebcc8	405	lb ; LF ; Line_Feed
1911be83	406	lb ; NL ; Next_Line
822ebcc8	407	lb ; NS ; Nonstarter
	408	lb ; NU ; Numeric
	409	lb ; OP ; Open_Punctuation
	410	lb ; PO ; Postfix_Numeric
	411	lb ; PR ; Prefix_Numeric
	412	lb ; QU ; Quotation
	413	lb ; SA ; Complex_Context
	414	lb ; SG ; Surrogate
	415	lb ; SP ; Space
	416	lb ; SY ; Break_Symbols
1911be83	417	lb ; WJ ; Word_Joiner
822ebcc8	418	lb ; XX ; Unknown
	419	lb ; ZW ; ZWSpace
	420
7be0dac3	421	# NFC_Quick_Check (NFC_QC)
	422
	423	NFC_QC; M ; Maybe
	424	NFC_QC; N ; No
	425	NFC_QC; Y ; Yes
	426
	427	# NFD_Quick_Check (NFD_QC)
	428
	429	NFD_QC; N ; No
	430	NFD_QC; Y ; Yes
	431
	432	# NFKC_Quick_Check (NFKC_QC)
	433
	434	NFKC_QC; M ; Maybe
	435	NFKC_QC; N ; No
	436	NFKC_QC; Y ; Yes
	437
	438	# NFKD_Quick_Check (NFKD_QC)
	439
	440	NFKD_QC; N ; No
	441	NFKD_QC; Y ; Yes
	442
	443	# Numeric_Type (nt)
	444
	445	nt ; De ; Decimal
	446	nt ; Di ; Digit
	447	nt ; None ; None
	448	nt ; Nu ; Numeric
822ebcc8	449
7be0dac3	450	# Script (sc)
822ebcc8	451
	452	sc ; Arab ; Arabic
	453	sc ; Armn ; Armenian
	454	sc ; Beng ; Bengali
	455	sc ; Bopo ; Bopomofo
1911be83	456	sc ; Brai ; Braille
822ebcc8	457	sc ; Buhd ; Buhid
	458	sc ; Cans ; Canadian_Aboriginal
	459	sc ; Cher ; Cherokee
1911be83	460	sc ; Cprt ; Cypriot
822ebcc8	461	sc ; Cyrl ; Cyrillic
	462	sc ; Deva ; Devanagari
	463	sc ; Dsrt ; Deseret
	464	sc ; Ethi ; Ethiopic
	465	sc ; Geor ; Georgian
	466	sc ; Goth ; Gothic
	467	sc ; Grek ; Greek
	468	sc ; Gujr ; Gujarati
	469	sc ; Guru ; Gurmukhi
	470	sc ; Hang ; Hangul
	471	sc ; Hani ; Han
	472	sc ; Hano ; Hanunoo
	473	sc ; Hebr ; Hebrew
	474	sc ; Hira ; Hiragana
7be0dac3	475	sc ; Hrkt ; Katakana_Or_Hiragana
822ebcc8	476	sc ; Ital ; Old_Italic
	477	sc ; Kana ; Katakana
	478	sc ; Khmr ; Khmer
	479	sc ; Knda ; Kannada
	480	sc ; Laoo ; Lao
	481	sc ; Latn ; Latin
1911be83	482	sc ; Limb ; Limbu
1911be83	483	sc ; Linb ; Linear_B
822ebcc8	484	sc ; Mlym ; Malayalam
	485	sc ; Mong ; Mongolian
	486	sc ; Mymr ; Myanmar
	487	sc ; Ogam ; Ogham
	488	sc ; Orya ; Oriya
1911be83	489	sc ; Osma ; Osmanya
822ebcc8	490	sc ; Qaai ; Inherited
822ebcc8	491	sc ; Runr ; Runic
1911be83	492	sc ; Shaw ; Shavian
822ebcc8	493	sc ; Sinh ; Sinhala
	494	sc ; Syrc ; Syriac
	495	sc ; Tagb ; Tagbanwa
1911be83	496	sc ; Tale ; Tai_Le
822ebcc8	497	sc ; Taml ; Tamil
	498	sc ; Telu ; Telugu
	499	sc ; Tglg ; Tagalog
	500	sc ; Thaa ; Thaana
	501	sc ; Thai ; Thai
	502	sc ; Tibt ; Tibetan
1911be83	503	sc ; Ugar ; Ugaritic
822ebcc8	504	sc ; Yiii ; Yi
822ebcc8	505	sc ; Zyyy ; Common