1 # !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
2 # This file is built by mktables.PL from e.g. Unicode.txt.
3 # Any changes made here will be lost!
31 'CANADIAN-ABORIGINAL' => 26,
46 'Latin-1 Supplement' => 41,
47 'Latin Extended-A' => 42,
48 'Latin Extended-B' => 43,
49 'IPA Extensions' => 44,
50 'Spacing Modifier Letters' => 45,
51 'Combining Diacritical Marks' => 46,
53 'Cyrillic Block' => 48,
54 'Armenian Block' => 49,
59 'Devanagari Block' => 54,
60 'Bengali Block' => 55,
61 'Gurmukhi Block' => 56,
62 'Gujarati Block' => 57,
66 'Kannada Block' => 61,
67 'Malayalam Block' => 62,
68 'Sinhala Block' => 63,
71 'Tibetan Block' => 66,
72 'Myanmar Block' => 67,
73 'Georgian Block' => 68,
75 'Ethiopic Block' => 70,
76 'Cherokee Block' => 71,
77 'Unified Canadian Aboriginal Syllabics' => 72,
81 'Mongolian Block' => 76,
82 'Latin Extended Additional' => 77,
83 'Greek Extended' => 78,
84 'General Punctuation' => 79,
85 'Superscripts and Subscripts' => 80,
86 'Currency Symbols' => 81,
87 'Combining Marks for Symbols' => 82,
88 'Letterlike Symbols' => 83,
91 'Mathematical Operators' => 86,
92 'Miscellaneous Technical' => 87,
93 'Control Pictures' => 88,
94 'Optical Character Recognition' => 89,
95 'Enclosed Alphanumerics' => 90,
97 'Block Elements' => 92,
98 'Geometric Shapes' => 93,
99 'Miscellaneous Symbols' => 94,
101 'Braille Patterns' => 96,
102 'CJK Radicals Supplement' => 97,
103 'Kangxi Radicals' => 98,
104 'Ideographic Description Characters' => 99,
105 'CJK Symbols and Punctuation' => 100,
106 'Hiragana Block' => 101,
107 'Katakana Block' => 102,
108 'Bopomofo Block' => 103,
109 'Hangul Compatibility Jamo' => 104,
111 'Bopomofo Extended' => 106,
112 'Enclosed CJK Letters and Months' => 107,
113 'CJK Compatibility' => 108,
114 'CJK Unified Ideographs Extension A' => 109,
115 'CJK Unified Ideographs' => 110,
116 'Yi Syllables' => 111,
117 'Yi Radicals' => 112,
118 'Hangul Syllables' => 113,
119 'High Surrogates' => 114,
120 'High Private Use Surrogates' => 115,
121 'Low Surrogates' => 116,
122 'Private Use' => 117,
123 'CJK Compatibility Ideographs' => 118,
124 'Alphabetic Presentation Forms' => 119,
125 'Arabic Presentation Forms-A' => 120,
126 'Combining Half Marks' => 121,
127 'CJK Compatibility Forms' => 122,
128 'Small Form Variants' => 123,
129 'Arabic Presentation Forms-B' => 124,
131 'Halfwidth and Fullwidth Forms' => 126,
133 'Gothic Block' => 128,
134 'Deseret Block' => 129,
135 'Byzantine Musical Symbols' => 130,
136 'Musical Symbols' => 131,
137 'Mathematical Alphanumeric Symbols' => 132,
138 'CJK Unified Ideographs Extension B' => 133,
139 'CJK Compatibility Ideographs Supplement' => 134,
143 'White_space' => 138,
144 'Bidi_Control' => 139,
145 'Join_Control' => 140,
148 'Quotation_Mark' => 143,
149 'Terminal_Punctuation' => 144,
152 'ASCII_Hex_Digit' => 147,
153 'Other_Alphabetic' => 148,
154 'Ideographic' => 149,
157 'Other_Lowercase' => 152,
158 'Other_Uppercase' => 153,
159 'Noncharacter_Code_Point' => 154,
167 'ID_Continue' => 162,
171 'Alphabetic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms' => 'Alphabetic Presentation Forms',
172 'Alphabetic' => 'Alphabetic',
178 'ARABIC' => 'ARABIC',
179 'Arabic(?:[-_]|\s+)?Block' => 'Arabic Block',
180 'Arabic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms(?:[-_]|\s+)?A' => 'Arabic Presentation Forms-A',
181 'Arabic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms(?:[-_]|\s+)?B' => 'Arabic Presentation Forms-B',
184 'ARMENIAN' => 'ARMENIAN',
185 'Armenian(?:[-_]|\s+)?Block' => 'Armenian Block',
188 'Arrows' => 'Arrows',
191 'ASCII(?:[-_]|\s+)?Hex(?:[-_]|\s+)?Digit' => 'ASCII_Hex_Digit',
194 'Assigned' => 'Assigned',
197 'Basic(?:[-_]|\s+)?Latin' => 'Basic Latin',
200 'BENGALI' => 'BENGALI',
201 'Bengali(?:[-_]|\s+)?Block' => 'Bengali Block',
204 'Bidi(?:[-_]|\s+)?Control' => 'Bidi_Control',
207 'Block(?:[-_]|\s+)?Elements' => 'Block Elements',
210 'BOPOMOFO' => 'BOPOMOFO',
211 'Bopomofo(?:[-_]|\s+)?Block' => 'Bopomofo Block',
212 'Bopomofo(?:[-_]|\s+)?Extended' => 'Bopomofo Extended',
215 'Box(?:[-_]|\s+)?Drawing' => 'Box Drawing',
218 'Braille(?:[-_]|\s+)?Patterns' => 'Braille Patterns',
221 'Byzantine(?:[-_]|\s+)?Musical(?:[-_]|\s+)?Symbols' => 'Byzantine Musical Symbols',
224 'CANADIAN(?:[-_]|\s+)?ABORIGINAL' => 'CANADIAN-ABORIGINAL',
227 'CHEROKEE' => 'CHEROKEE',
228 'Cherokee(?:[-_]|\s+)?Block' => 'Cherokee Block',
231 'CJK(?:[-_]|\s+)?Radicals(?:[-_]|\s+)?Supplement' => 'CJK Radicals Supplement',
232 'CJK(?:[-_]|\s+)?Symbols(?:[-_]|\s+)?and(?:[-_]|\s+)?Punctuation' => 'CJK Symbols and Punctuation',
233 'CJK(?:[-_]|\s+)?Compatibility' => 'CJK Compatibility',
234 'CJK(?:[-_]|\s+)?Unified(?:[-_]|\s+)?Ideographs(?:[-_]|\s+)?Extension(?:[-_]|\s+)?A' => 'CJK Unified Ideographs Extension A',
235 'CJK(?:[-_]|\s+)?Unified(?:[-_]|\s+)?Ideographs' => 'CJK Unified Ideographs',
236 'CJK(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Ideographs' => 'CJK Compatibility Ideographs',
237 'CJK(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Forms' => 'CJK Compatibility Forms',
238 'CJK(?:[-_]|\s+)?Unified(?:[-_]|\s+)?Ideographs(?:[-_]|\s+)?Extension(?:[-_]|\s+)?B' => 'CJK Unified Ideographs Extension B',
239 'CJK(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Ideographs(?:[-_]|\s+)?Supplement' => 'CJK Compatibility Ideographs Supplement',
242 'Combining(?:[-_]|\s+)?Diacritical(?:[-_]|\s+)?Marks' => 'Combining Diacritical Marks',
243 'Combining(?:[-_]|\s+)?Marks(?:[-_]|\s+)?for(?:[-_]|\s+)?Symbols' => 'Combining Marks for Symbols',
244 'Combining(?:[-_]|\s+)?Half(?:[-_]|\s+)?Marks' => 'Combining Half Marks',
245 'Common' => 'Common',
248 'Control(?:[-_]|\s+)?Pictures' => 'Control Pictures',
251 'Currency(?:[-_]|\s+)?Symbols' => 'Currency Symbols',
254 'CYRILLIC' => 'CYRILLIC',
255 'Cyrillic(?:[-_]|\s+)?Block' => 'Cyrillic Block',
261 'DESERET' => 'DESERET',
262 'Deseret(?:[-_]|\s+)?Block' => 'Deseret Block',
265 'DEVANAGARI' => 'DEVANAGARI',
266 'Devanagari(?:[-_]|\s+)?Block' => 'Devanagari Block',
269 'Diacritic' => 'Diacritic',
272 'Dingbats' => 'Dingbats',
275 'Enclosed(?:[-_]|\s+)?Alphanumerics' => 'Enclosed Alphanumerics',
276 'Enclosed(?:[-_]|\s+)?CJK(?:[-_]|\s+)?Letters(?:[-_]|\s+)?and(?:[-_]|\s+)?Months' => 'Enclosed CJK Letters and Months',
279 'ETHIOPIC' => 'ETHIOPIC',
280 'Ethiopic(?:[-_]|\s+)?Block' => 'Ethiopic Block',
283 'Extender' => 'Extender',
286 'General(?:[-_]|\s+)?Punctuation' => 'General Punctuation',
289 'GEORGIAN' => 'GEORGIAN',
290 'Georgian(?:[-_]|\s+)?Block' => 'Georgian Block',
291 'Geometric(?:[-_]|\s+)?Shapes' => 'Geometric Shapes',
294 'GOTHIC' => 'GOTHIC',
295 'Gothic(?:[-_]|\s+)?Block' => 'Gothic Block',
299 'Greek(?:[-_]|\s+)?Block' => 'Greek Block',
300 'Greek(?:[-_]|\s+)?Extended' => 'Greek Extended',
303 'GUJARATI' => 'GUJARATI',
304 'Gujarati(?:[-_]|\s+)?Block' => 'Gujarati Block',
307 'GURMUKHI' => 'GURMUKHI',
308 'Gurmukhi(?:[-_]|\s+)?Block' => 'Gurmukhi Block',
311 'Halfwidth(?:[-_]|\s+)?and(?:[-_]|\s+)?Fullwidth(?:[-_]|\s+)?Forms' => 'Halfwidth and Fullwidth Forms',
314 'HANGUL' => 'HANGUL',
316 'Hangul(?:[-_]|\s+)?Jamo' => 'Hangul Jamo',
317 'Hangul(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Jamo' => 'Hangul Compatibility Jamo',
318 'Hangul(?:[-_]|\s+)?Syllables' => 'Hangul Syllables',
321 'HEBREW' => 'HEBREW',
322 'Hebrew(?:[-_]|\s+)?Block' => 'Hebrew Block',
325 'Hex(?:[-_]|\s+)?Digit' => 'Hex_Digit',
328 'High(?:[-_]|\s+)?Surrogates' => 'High Surrogates',
329 'High(?:[-_]|\s+)?Private(?:[-_]|\s+)?Use(?:[-_]|\s+)?Surrogates' => 'High Private Use Surrogates',
332 'HIRAGANA' => 'HIRAGANA',
333 'Hiragana(?:[-_]|\s+)?Block' => 'Hiragana Block',
336 'Hyphen' => 'Hyphen',
339 'ID(?:[-_]|\s+)?Start' => 'ID_Start',
340 'ID(?:[-_]|\s+)?Continue' => 'ID_Continue',
343 'Ideographic(?:[-_]|\s+)?Description(?:[-_]|\s+)?Characters' => 'Ideographic Description Characters',
344 'Ideographic' => 'Ideographic',
347 'INHERITED' => 'INHERITED',
350 'IPA(?:[-_]|\s+)?Extensions' => 'IPA Extensions',
353 'Join(?:[-_]|\s+)?Control' => 'Join_Control',
356 'KANNADA' => 'KANNADA',
357 'Kannada(?:[-_]|\s+)?Block' => 'Kannada Block',
358 'Kangxi(?:[-_]|\s+)?Radicals' => 'Kangxi Radicals',
359 'Kanbun' => 'Kanbun',
362 'KATAKANA' => 'KATAKANA',
363 'Katakana(?:[-_]|\s+)?Block' => 'Katakana Block',
367 'Khmer(?:[-_]|\s+)?Block' => 'Khmer Block',
370 'Lampersand' => 'Lampersand',
374 'Lao(?:[-_]|\s+)?Block' => 'Lao Block',
378 'Latin(?:[-_]|\s+)?1(?:[-_]|\s+)?Supplement' => 'Latin-1 Supplement',
379 'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?A' => 'Latin Extended-A',
380 'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?B' => 'Latin Extended-B',
381 'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?Additional' => 'Latin Extended Additional',
384 'Letterlike(?:[-_]|\s+)?Symbols' => 'Letterlike Symbols',
387 'Low(?:[-_]|\s+)?Surrogates' => 'Low Surrogates',
388 'Lowercase' => 'Lowercase',
391 'MALAYALAM' => 'MALAYALAM',
392 'Malayalam(?:[-_]|\s+)?Block' => 'Malayalam Block',
395 'Mathematical(?:[-_]|\s+)?Operators' => 'Mathematical Operators',
396 'Mathematical(?:[-_]|\s+)?Alphanumeric(?:[-_]|\s+)?Symbols' => 'Mathematical Alphanumeric Symbols',
400 'Miscellaneous(?:[-_]|\s+)?Technical' => 'Miscellaneous Technical',
401 'Miscellaneous(?:[-_]|\s+)?Symbols' => 'Miscellaneous Symbols',
404 'MONGOLIAN' => 'MONGOLIAN',
405 'Mongolian(?:[-_]|\s+)?Block' => 'Mongolian Block',
408 'Musical(?:[-_]|\s+)?Symbols' => 'Musical Symbols',
411 'MYANMAR' => 'MYANMAR',
412 'Myanmar(?:[-_]|\s+)?Block' => 'Myanmar Block',
415 'Noncharacter(?:[-_]|\s+)?Code(?:[-_]|\s+)?Point' => 'Noncharacter_Code_Point',
418 'Number(?:[-_]|\s+)?Forms' => 'Number Forms',
422 'Ogham(?:[-_]|\s+)?Block' => 'Ogham Block',
425 'OLD(?:[-_]|\s+)?ITALIC' => 'OLD-ITALIC',
426 'Old(?:[-_]|\s+)?Italic' => 'Old Italic',
429 'Optical(?:[-_]|\s+)?Character(?:[-_]|\s+)?Recognition' => 'Optical Character Recognition',
433 'Oriya(?:[-_]|\s+)?Block' => 'Oriya Block',
436 'Other(?:[-_]|\s+)?Math' => 'Other_Math',
437 'Other(?:[-_]|\s+)?Alphabetic' => 'Other_Alphabetic',
438 'Other(?:[-_]|\s+)?Lowercase' => 'Other_Lowercase',
439 'Other(?:[-_]|\s+)?Uppercase' => 'Other_Uppercase',
442 'Private(?:[-_]|\s+)?Use' => 'Private Use',
445 'Quotation(?:[-_]|\s+)?Mark' => 'Quotation_Mark',
449 'Runic(?:[-_]|\s+)?Block' => 'Runic Block',
452 'SINHALA' => 'SINHALA',
453 'Sinhala(?:[-_]|\s+)?Block' => 'Sinhala Block',
456 'Small(?:[-_]|\s+)?Form(?:[-_]|\s+)?Variants' => 'Small Form Variants',
459 'Spacing(?:[-_]|\s+)?Modifier(?:[-_]|\s+)?Letters' => 'Spacing Modifier Letters',
462 'Specials' => 'Specials',
465 'Superscripts(?:[-_]|\s+)?and(?:[-_]|\s+)?Subscripts' => 'Superscripts and Subscripts',
468 'SYRIAC' => 'SYRIAC',
469 'Syriac(?:[-_]|\s+)?Block' => 'Syriac Block',
476 'Tamil(?:[-_]|\s+)?Block' => 'Tamil Block',
479 'TELUGU' => 'TELUGU',
480 'Telugu(?:[-_]|\s+)?Block' => 'Telugu Block',
483 'Terminal(?:[-_]|\s+)?Punctuation' => 'Terminal_Punctuation',
486 'THAANA' => 'THAANA',
488 'Thaana(?:[-_]|\s+)?Block' => 'Thaana Block',
489 'Thai(?:[-_]|\s+)?Block' => 'Thai Block',
492 'TIBETAN' => 'TIBETAN',
493 'Tibetan(?:[-_]|\s+)?Block' => 'Tibetan Block',
496 'Unified(?:[-_]|\s+)?Canadian(?:[-_]|\s+)?Aboriginal(?:[-_]|\s+)?Syllabics' => 'Unified Canadian Aboriginal Syllabics',
499 'Uppercase' => 'Uppercase',
502 'White(?:[-_]|\s+)?space' => 'White_space',
508 'Yi(?:[-_]|\s+)?Syllables' => 'Yi Syllables',
509 'Yi(?:[-_]|\s+)?Radicals' => 'Yi Radicals',