Add the special casing mappings (from SpecCase.txt)
[p5sagit/p5-mst-13.2.git] / lib / unicore / In.pl
CommitLineData
9fdf68be 1# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
d73e5302 2# This file is built by mktables from e.g. Unicode.txt.
9fdf68be 3# Any changes made here will be lost!
d73e5302 4%utf8::In =
5(
6'ARABIC' => '16',
7'ARMENIAN' => '14',
8'ASCII_Hex_Digit' => '152',
9'Alphabetic' => '164',
10'Alphabetic Presentation Forms' => '129',
11'Any' => '171',
12'Arabic Block' => '62',
13'Arabic Presentation Forms-A' => '130',
14'Arabic Presentation Forms-B' => '134',
15'Armenian Block' => '60',
16'Arrows' => '96',
17'Assigned' => '163',
18'BENGALI' => '20',
19'BOPOMOFO' => '45',
20'Basic Latin' => '51',
21'Bengali Block' => '66',
22'Bidi_Control' => '159',
23'Block Elements' => '103',
24'Bopomofo Block' => '114',
25'Bopomofo Extended' => '117',
26'Box Drawing' => '102',
27'Braille Patterns' => '107',
28'Byzantine Musical Symbols' => '140',
29'CANADIAN-ABORIGINAL' => '37',
30'CHEROKEE' => '36',
31'CJK Compatibility' => '119',
32'CJK Compatibility Forms' => '132',
33'CJK Compatibility Ideographs' => '128',
34'CJK Compatibility Ideographs Supplement' => '144',
35'CJK Ideograph' => '1',
36'CJK Ideograph Extension A' => '0',
37'CJK Ideograph Extension B' => '7',
38'CJK Radicals Supplement' => '108',
39'CJK Symbols and Punctuation' => '111',
40'CJK Unified Ideographs' => '121',
41'CJK Unified Ideographs Extension A' => '120',
42'CJK Unified Ideographs Extension B' => '143',
43'CYRILLIC' => '13',
44'Cherokee Block' => '82',
45'Combining Diacritical Marks' => '57',
46'Combining Half Marks' => '131',
47'Combining Marks for Symbols' => '93',
48'Common' => '50',
49'Control Pictures' => '99',
50'Currency Symbols' => '92',
51'Cyrillic Block' => '59',
52'DESERET' => '49',
53'DEVANAGARI' => '19',
54'Dash' => '151',
55'Deseret Block' => '139',
56'Devanagari Block' => '65',
57'Diacritic' => '154',
58'Dingbats' => '106',
59'ETHIOPIC' => '35',
60'Enclosed Alphanumerics' => '101',
61'Enclosed CJK Letters and Months' => '118',
62'Ethiopic Block' => '81',
63'Extender' => '155',
64'GEORGIAN' => '33',
65'GOTHIC' => '48',
66'GREEK' => '11',
67'GUJARATI' => '22',
68'GURMUKHI' => '21',
69'General Punctuation' => '90',
70'Geometric Shapes' => '104',
71'Georgian Block' => '79',
72'Gothic Block' => '138',
73'Greek Block' => '58',
74'Greek Extended' => '89',
75'Gujarati Block' => '68',
76'Gurmukhi Block' => '67',
77'HAN' => '42',
78'HANGUL' => '34',
79'HEBREW' => '15',
80'HIRAGANA' => '43',
81'Halfwidth and Fullwidth Forms' => '136',
82'Hangul Compatibility Jamo' => '115',
83'Hangul Jamo' => '80',
84'Hangul Syllable' => '2',
85'Hangul Syllables' => '124',
86'Hebrew Block' => '61',
87'Hex_Digit' => '153',
88'High Private Use Surrogates' => '126',
89'High Surrogates' => '125',
90'Hiragana Block' => '112',
91'Hyphen' => '150',
92'ID_Continue' => '170',
93'ID_Start' => '169',
94'INHERITED' => '12',
95'IPA Extensions' => '55',
96'Ideographic' => '161',
97'Ideographic Description Characters' => '110',
98'Join_Control' => '158',
99'KANNADA' => '26',
100'KATAKANA' => '44',
101'KHMER' => '40',
102'Kanbun' => '116',
103'Kangxi Radicals' => '109',
104'Kannada Block' => '72',
105'Katakana Block' => '113',
106'Khmer Block' => '86',
107'LAO' => '30',
108'LATIN' => '10',
109'Lampersand' => '168',
110'Lao Block' => '76',
111'Latin Extended Additional' => '88',
112'Latin Extended-A' => '53',
113'Latin Extended-B' => '54',
114'Latin-1 Supplement' => '52',
115'Letterlike Symbols' => '94',
116'Low Surrogate' => '5',
117'Low Surrogates' => '127',
118'Lowercase' => '165',
119'MALAYALAM' => '27',
120'MONGOLIAN' => '41',
121'MYANMAR' => '32',
122'Malayalam Block' => '73',
123'Math' => '167',
124'Mathematical Alphanumeric Symbols' => '142',
125'Mathematical Operators' => '97',
126'Miscellaneous Symbols' => '105',
127'Miscellaneous Technical' => '98',
128'Mongolian Block' => '87',
129'Musical Symbols' => '141',
130'Myanmar Block' => '78',
131'Non Private Use High Surrogate' => '3',
132'Noncharacter_Code_Point' => '162',
133'Number Forms' => '95',
134'OGHAM' => '38',
135'OLD-ITALIC' => '47',
136'ORIYA' => '23',
137'Ogham Block' => '84',
138'Old Italic' => '137',
139'Optical Character Recognition' => '100',
140'Oriya Block' => '69',
141'Other_Alphabetic' => '157',
142'Other_Lowercase' => '156',
143'Other_Math' => '149',
144'Other_Uppercase' => '160',
145'Plane 15 Private Use' => '8',
146'Plane 16 Private Use' => '9',
147'Private Use' => '6',
148'Private Use High Surrogate' => '4',
149'Quotation_Mark' => '148',
150'RUNIC' => '39',
151'Runic Block' => '85',
152'SINHALA' => '28',
153'SYRIAC' => '17',
154'Sinhala Block' => '74',
155'Small Form Variants' => '133',
156'Spacing Modifier Letters' => '56',
157'Specials' => '135',
158'Superscripts and Subscripts' => '91',
159'Syriac Block' => '63',
160'TAMIL' => '24',
161'TELUGU' => '25',
162'THAANA' => '18',
163'THAI' => '29',
164'TIBETAN' => '31',
165'Tags' => '145',
166'Tamil Block' => '70',
167'Telugu Block' => '71',
168'Terminal_Punctuation' => '147',
169'Thaana Block' => '64',
170'Thai Block' => '75',
171'Tibetan Block' => '77',
172'Unified Canadian Aboriginal Syllabics' => '83',
173'Uppercase' => '166',
174'White_space' => '146',
175'YI' => '46',
176'Yi Radicals' => '123',
177'Yi Syllables' => '122',
d9efae67 178);
d73e5302 179%utf8::InPat =
180(
c8b5a1e3 181'al' => {
d73e5302 182 'Alphabetic' => '164',
183 'Alphabetic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms' => '129',
1ac13f9a 184},
c8b5a1e3 185'an' => {
d73e5302 186 'Any' => '171',
d9efae67 187},
c8b5a1e3 188'ar' => {
d73e5302 189 'ARABIC' => '16',
190 'ARMENIAN' => '14',
191 'Arabic(?:[-_]|\s+)?Block' => '62',
192 'Arabic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms(?:[-_]|\s+)?A' => '130',
193 'Arabic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms(?:[-_]|\s+)?B' => '134',
194 'Armenian(?:[-_]|\s+)?Block' => '60',
195 'Arrows' => '96',
d9efae67 196},
c8b5a1e3 197'as' => {
d73e5302 198 'ASCII(?:[-_]|\s+)?Hex(?:[-_]|\s+)?Digit' => '152',
199 'Assigned' => '163',
1ac13f9a 200},
c8b5a1e3 201'ba' => {
d73e5302 202 'Basic(?:[-_]|\s+)?Latin' => '51',
d9efae67 203},
c8b5a1e3 204'be' => {
d73e5302 205 'BENGALI' => '20',
206 'Bengali(?:[-_]|\s+)?Block' => '66',
d9efae67 207},
c8b5a1e3 208'bi' => {
d73e5302 209 'Bidi(?:[-_]|\s+)?Control' => '159',
1ac13f9a 210},
c8b5a1e3 211'bl' => {
d73e5302 212 'Block(?:[-_]|\s+)?Elements' => '103',
d9efae67 213},
c8b5a1e3 214'bo' => {
d73e5302 215 'BOPOMOFO' => '45',
216 'Bopomofo(?:[-_]|\s+)?Block' => '114',
217 'Bopomofo(?:[-_]|\s+)?Extended' => '117',
218 'Box(?:[-_]|\s+)?Drawing' => '102',
d9efae67 219},
c8b5a1e3 220'br' => {
d73e5302 221 'Braille(?:[-_]|\s+)?Patterns' => '107',
d9efae67 222},
c8b5a1e3 223'by' => {
d73e5302 224 'Byzantine(?:[-_]|\s+)?Musical(?:[-_]|\s+)?Symbols' => '140',
d9efae67 225},
c8b5a1e3 226'ca' => {
d73e5302 227 'CANADIAN(?:[-_]|\s+)?ABORIGINAL' => '37',
d9efae67 228},
c8b5a1e3 229'ch' => {
d73e5302 230 'CHEROKEE' => '36',
231 'Cherokee(?:[-_]|\s+)?Block' => '82',
d9efae67 232},
c8b5a1e3 233'cj' => {
d73e5302 234 'CJK(?:[-_]|\s+)?Compatibility' => '119',
235 'CJK(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Forms' => '132',
236 'CJK(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Ideographs' => '128',
237 'CJK(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Ideographs(?:[-_]|\s+)?Supplement' => '144',
238 'CJK(?:[-_]|\s+)?Ideograph' => '1',
239 'CJK(?:[-_]|\s+)?Ideograph(?:[-_]|\s+)?Extension(?:[-_]|\s+)?A' => '0',
240 'CJK(?:[-_]|\s+)?Ideograph(?:[-_]|\s+)?Extension(?:[-_]|\s+)?B' => '7',
241 'CJK(?:[-_]|\s+)?Radicals(?:[-_]|\s+)?Supplement' => '108',
242 'CJK(?:[-_]|\s+)?Symbols(?:[-_]|\s+)?and(?:[-_]|\s+)?Punctuation' => '111',
243 'CJK(?:[-_]|\s+)?Unified(?:[-_]|\s+)?Ideographs' => '121',
244 'CJK(?:[-_]|\s+)?Unified(?:[-_]|\s+)?Ideographs(?:[-_]|\s+)?Extension(?:[-_]|\s+)?A' => '120',
245 'CJK(?:[-_]|\s+)?Unified(?:[-_]|\s+)?Ideographs(?:[-_]|\s+)?Extension(?:[-_]|\s+)?B' => '143',
d9efae67 246},
c8b5a1e3 247'co' => {
d73e5302 248 'Combining(?:[-_]|\s+)?Diacritical(?:[-_]|\s+)?Marks' => '57',
249 'Combining(?:[-_]|\s+)?Half(?:[-_]|\s+)?Marks' => '131',
250 'Combining(?:[-_]|\s+)?Marks(?:[-_]|\s+)?for(?:[-_]|\s+)?Symbols' => '93',
251 'Common' => '50',
252 'Control(?:[-_]|\s+)?Pictures' => '99',
d9efae67 253},
c8b5a1e3 254'cu' => {
d73e5302 255 'Currency(?:[-_]|\s+)?Symbols' => '92',
d9efae67 256},
c8b5a1e3 257'cy' => {
d73e5302 258 'CYRILLIC' => '13',
259 'Cyrillic(?:[-_]|\s+)?Block' => '59',
d9efae67 260},
c8b5a1e3 261'da' => {
d73e5302 262 'Dash' => '151',
1ac13f9a 263},
c8b5a1e3 264'de' => {
d73e5302 265 'DESERET' => '49',
266 'DEVANAGARI' => '19',
267 'Deseret(?:[-_]|\s+)?Block' => '139',
268 'Devanagari(?:[-_]|\s+)?Block' => '65',
d9efae67 269},
c8b5a1e3 270'di' => {
d73e5302 271 'Diacritic' => '154',
272 'Dingbats' => '106',
d9efae67 273},
c8b5a1e3 274'en' => {
d73e5302 275 'Enclosed(?:[-_]|\s+)?Alphanumerics' => '101',
276 'Enclosed(?:[-_]|\s+)?CJK(?:[-_]|\s+)?Letters(?:[-_]|\s+)?and(?:[-_]|\s+)?Months' => '118',
d9efae67 277},
c8b5a1e3 278'et' => {
d73e5302 279 'ETHIOPIC' => '35',
280 'Ethiopic(?:[-_]|\s+)?Block' => '81',
d9efae67 281},
c8b5a1e3 282'ex' => {
d73e5302 283 'Extender' => '155',
1ac13f9a 284},
c8b5a1e3 285'ge' => {
d73e5302 286 'GEORGIAN' => '33',
287 'General(?:[-_]|\s+)?Punctuation' => '90',
288 'Geometric(?:[-_]|\s+)?Shapes' => '104',
289 'Georgian(?:[-_]|\s+)?Block' => '79',
d9efae67 290},
c8b5a1e3 291'go' => {
d73e5302 292 'GOTHIC' => '48',
293 'Gothic(?:[-_]|\s+)?Block' => '138',
d9efae67 294},
c8b5a1e3 295'gr' => {
d73e5302 296 'GREEK' => '11',
297 'Greek(?:[-_]|\s+)?Block' => '58',
298 'Greek(?:[-_]|\s+)?Extended' => '89',
d9efae67 299},
c8b5a1e3 300'gu' => {
d73e5302 301 'GUJARATI' => '22',
302 'GURMUKHI' => '21',
303 'Gujarati(?:[-_]|\s+)?Block' => '68',
304 'Gurmukhi(?:[-_]|\s+)?Block' => '67',
d9efae67 305},
c8b5a1e3 306'ha' => {
d73e5302 307 'HAN' => '42',
308 'HANGUL' => '34',
309 'Halfwidth(?:[-_]|\s+)?and(?:[-_]|\s+)?Fullwidth(?:[-_]|\s+)?Forms' => '136',
310 'Hangul(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Jamo' => '115',
311 'Hangul(?:[-_]|\s+)?Jamo' => '80',
312 'Hangul(?:[-_]|\s+)?Syllable' => '2',
313 'Hangul(?:[-_]|\s+)?Syllables' => '124',
d9efae67 314},
c8b5a1e3 315'he' => {
d73e5302 316 'HEBREW' => '15',
317 'Hebrew(?:[-_]|\s+)?Block' => '61',
318 'Hex(?:[-_]|\s+)?Digit' => '153',
1ac13f9a 319},
c8b5a1e3 320'hi' => {
d73e5302 321 'HIRAGANA' => '43',
322 'High(?:[-_]|\s+)?Private(?:[-_]|\s+)?Use(?:[-_]|\s+)?Surrogates' => '126',
323 'High(?:[-_]|\s+)?Surrogates' => '125',
324 'Hiragana(?:[-_]|\s+)?Block' => '112',
d9efae67 325},
c8b5a1e3 326'hy' => {
d73e5302 327 'Hyphen' => '150',
1ac13f9a 328},
c8b5a1e3 329'id' => {
d73e5302 330 'ID(?:[-_]|\s+)?Continue' => '170',
331 'ID(?:[-_]|\s+)?Start' => '169',
332 'Ideographic' => '161',
333 'Ideographic(?:[-_]|\s+)?Description(?:[-_]|\s+)?Characters' => '110',
d9efae67 334},
c8b5a1e3 335'in' => {
d73e5302 336 'INHERITED' => '12',
d9efae67 337},
c8b5a1e3 338'ip' => {
d73e5302 339 'IPA(?:[-_]|\s+)?Extensions' => '55',
d9efae67 340},
c8b5a1e3 341'jo' => {
d73e5302 342 'Join(?:[-_]|\s+)?Control' => '158',
1ac13f9a 343},
c8b5a1e3 344'ka' => {
d73e5302 345 'KANNADA' => '26',
346 'KATAKANA' => '44',
347 'Kanbun' => '116',
348 'Kangxi(?:[-_]|\s+)?Radicals' => '109',
349 'Kannada(?:[-_]|\s+)?Block' => '72',
350 'Katakana(?:[-_]|\s+)?Block' => '113',
d9efae67 351},
c8b5a1e3 352'kh' => {
d73e5302 353 'KHMER' => '40',
354 'Khmer(?:[-_]|\s+)?Block' => '86',
d9efae67 355},
c8b5a1e3 356'la' => {
d73e5302 357 'LAO' => '30',
358 'LATIN' => '10',
359 'Lampersand' => '168',
360 'Lao(?:[-_]|\s+)?Block' => '76',
361 'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?Additional' => '88',
362 'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?A' => '53',
363 'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?B' => '54',
364 'Latin(?:[-_]|\s+)?1(?:[-_]|\s+)?Supplement' => '52',
d9efae67 365},
c8b5a1e3 366'le' => {
d73e5302 367 'Letterlike(?:[-_]|\s+)?Symbols' => '94',
d9efae67 368},
c8b5a1e3 369'lo' => {
d73e5302 370 'Low(?:[-_]|\s+)?Surrogate' => '5',
371 'Low(?:[-_]|\s+)?Surrogates' => '127',
372 'Lowercase' => '165',
d9efae67 373},
c8b5a1e3 374'ma' => {
d73e5302 375 'MALAYALAM' => '27',
376 'Malayalam(?:[-_]|\s+)?Block' => '73',
377 'Math' => '167',
378 'Mathematical(?:[-_]|\s+)?Alphanumeric(?:[-_]|\s+)?Symbols' => '142',
379 'Mathematical(?:[-_]|\s+)?Operators' => '97',
d9efae67 380},
c8b5a1e3 381'mi' => {
d73e5302 382 'Miscellaneous(?:[-_]|\s+)?Symbols' => '105',
383 'Miscellaneous(?:[-_]|\s+)?Technical' => '98',
d9efae67 384},
c8b5a1e3 385'mo' => {
d73e5302 386 'MONGOLIAN' => '41',
387 'Mongolian(?:[-_]|\s+)?Block' => '87',
d9efae67 388},
c8b5a1e3 389'mu' => {
d73e5302 390 'Musical(?:[-_]|\s+)?Symbols' => '141',
d9efae67 391},
c8b5a1e3 392'my' => {
d73e5302 393 'MYANMAR' => '32',
394 'Myanmar(?:[-_]|\s+)?Block' => '78',
d9efae67 395},
c8b5a1e3 396'no' => {
d73e5302 397 'Non(?:[-_]|\s+)?Private(?:[-_]|\s+)?Use(?:[-_]|\s+)?High(?:[-_]|\s+)?Surrogate' => '3',
398 'Noncharacter(?:[-_]|\s+)?Code(?:[-_]|\s+)?Point' => '162',
1ac13f9a 399},
c8b5a1e3 400'nu' => {
d73e5302 401 'Number(?:[-_]|\s+)?Forms' => '95',
d9efae67 402},
c8b5a1e3 403'og' => {
d73e5302 404 'OGHAM' => '38',
405 'Ogham(?:[-_]|\s+)?Block' => '84',
d9efae67 406},
c8b5a1e3 407'ol' => {
d73e5302 408 'OLD(?:[-_]|\s+)?ITALIC' => '47',
409 'Old(?:[-_]|\s+)?Italic' => '137',
d9efae67 410},
c8b5a1e3 411'op' => {
d73e5302 412 'Optical(?:[-_]|\s+)?Character(?:[-_]|\s+)?Recognition' => '100',
d9efae67 413},
c8b5a1e3 414'or' => {
d73e5302 415 'ORIYA' => '23',
416 'Oriya(?:[-_]|\s+)?Block' => '69',
d9efae67 417},
c8b5a1e3 418'ot' => {
d73e5302 419 'Other(?:[-_]|\s+)?Alphabetic' => '157',
420 'Other(?:[-_]|\s+)?Lowercase' => '156',
421 'Other(?:[-_]|\s+)?Math' => '149',
422 'Other(?:[-_]|\s+)?Uppercase' => '160',
423},
424'pl' => {
425 'Plane(?:[-_]|\s+)?15(?:[-_]|\s+)?Private(?:[-_]|\s+)?Use' => '8',
426 'Plane(?:[-_]|\s+)?16(?:[-_]|\s+)?Private(?:[-_]|\s+)?Use' => '9',
1ac13f9a 427},
c8b5a1e3 428'pr' => {
d73e5302 429 'Private(?:[-_]|\s+)?Use' => '6',
430 'Private(?:[-_]|\s+)?Use(?:[-_]|\s+)?High(?:[-_]|\s+)?Surrogate' => '4',
d9efae67 431},
c8b5a1e3 432'qu' => {
d73e5302 433 'Quotation(?:[-_]|\s+)?Mark' => '148',
1ac13f9a 434},
c8b5a1e3 435'ru' => {
d73e5302 436 'RUNIC' => '39',
437 'Runic(?:[-_]|\s+)?Block' => '85',
d9efae67 438},
c8b5a1e3 439'si' => {
d73e5302 440 'SINHALA' => '28',
441 'Sinhala(?:[-_]|\s+)?Block' => '74',
d9efae67 442},
c8b5a1e3 443'sm' => {
d73e5302 444 'Small(?:[-_]|\s+)?Form(?:[-_]|\s+)?Variants' => '133',
d9efae67 445},
c8b5a1e3 446'sp' => {
d73e5302 447 'Spacing(?:[-_]|\s+)?Modifier(?:[-_]|\s+)?Letters' => '56',
448 'Specials' => '135',
d9efae67 449},
c8b5a1e3 450'su' => {
d73e5302 451 'Superscripts(?:[-_]|\s+)?and(?:[-_]|\s+)?Subscripts' => '91',
d9efae67 452},
c8b5a1e3 453'sy' => {
d73e5302 454 'SYRIAC' => '17',
455 'Syriac(?:[-_]|\s+)?Block' => '63',
d9efae67 456},
c8b5a1e3 457'ta' => {
d73e5302 458 'TAMIL' => '24',
459 'Tags' => '145',
460 'Tamil(?:[-_]|\s+)?Block' => '70',
d9efae67 461},
c8b5a1e3 462'te' => {
d73e5302 463 'TELUGU' => '25',
464 'Telugu(?:[-_]|\s+)?Block' => '71',
465 'Terminal(?:[-_]|\s+)?Punctuation' => '147',
1ac13f9a 466},
c8b5a1e3 467'th' => {
d73e5302 468 'THAANA' => '18',
469 'THAI' => '29',
470 'Thaana(?:[-_]|\s+)?Block' => '64',
471 'Thai(?:[-_]|\s+)?Block' => '75',
d9efae67 472},
c8b5a1e3 473'ti' => {
d73e5302 474 'TIBETAN' => '31',
475 'Tibetan(?:[-_]|\s+)?Block' => '77',
d9efae67 476},
c8b5a1e3 477'un' => {
d73e5302 478 'Unified(?:[-_]|\s+)?Canadian(?:[-_]|\s+)?Aboriginal(?:[-_]|\s+)?Syllabics' => '83',
d9efae67 479},
c8b5a1e3 480'up' => {
d73e5302 481 'Uppercase' => '166',
1ac13f9a 482},
c8b5a1e3 483'wh' => {
d73e5302 484 'White(?:[-_]|\s+)?space' => '146',
1ac13f9a 485},
d9efae67 486'yi' => {
d73e5302 487 'YI' => '46',
488 'Yi(?:[-_]|\s+)?Radicals' => '123',
489 'Yi(?:[-_]|\s+)?Syllables' => '122',
d9efae67 490},
9fdf68be 491);