Yet more Unicode properties.
[p5sagit/p5-mst-13.2.git] / lib / unicore / In.pl
1 # !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!! 
2 # This file is built by mktables.PL from e.g. Unicode.txt.
3 # Any changes made here will be lost!
4 %utf8::In = (
5 'LATIN'                                       =>   0,
6 'GREEK'                                       =>   1,
7 'CYRILLIC'                                    =>   2,
8 'ARMENIAN'                                    =>   3,
9 'HEBREW'                                      =>   4,
10 'ARABIC'                                      =>   5,
11 'SYRIAC'                                      =>   6,
12 'THAANA'                                      =>   7,
13 'DEVANAGARI'                                  =>   8,
14 'BENGALI'                                     =>   9,
15 'GURMUKHI'                                    =>  10,
16 'GUJARATI'                                    =>  11,
17 'ORIYA'                                       =>  12,
18 'TAMIL'                                       =>  13,
19 'TELUGU'                                      =>  14,
20 'KANNADA'                                     =>  15,
21 'MALAYALAM'                                   =>  16,
22 'SINHALA'                                     =>  17,
23 'THAI'                                        =>  18,
24 'LAO'                                         =>  19,
25 'TIBETAN'                                     =>  20,
26 'MYANMAR'                                     =>  21,
27 'GEORGIAN'                                    =>  22,
28 'HANGUL'                                      =>  23,
29 'ETHIOPIC'                                    =>  24,
30 'CHEROKEE'                                    =>  25,
31 'CANADIAN-ABORIGINAL'                         =>  26,
32 'OGHAM'                                       =>  27,
33 'RUNIC'                                       =>  28,
34 'KHMER'                                       =>  29,
35 'MONGOLIAN'                                   =>  30,
36 'HIRAGANA'                                    =>  31,
37 'KATAKANA'                                    =>  32,
38 'BOPOMOFO'                                    =>  33,
39 'HAN'                                         =>  34,
40 'YI'                                          =>  35,
41 'OLD-ITALIC'                                  =>  36,
42 'GOTHIC'                                      =>  37,
43 'DESERET'                                     =>  38,
44 'INHERITED'                                   =>  39,
45 'Basic Latin'                                 =>  40,
46 'Latin-1 Supplement'                          =>  41,
47 'Latin Extended-A'                            =>  42,
48 'Latin Extended-B'                            =>  43,
49 'IPA Extensions'                              =>  44,
50 'Spacing Modifier Letters'                    =>  45,
51 'Combining Diacritical Marks'                 =>  46,
52 'Greek Block'                                 =>  47,
53 'Cyrillic Block'                              =>  48,
54 'Armenian Block'                              =>  49,
55 'Hebrew Block'                                =>  50,
56 'Arabic Block'                                =>  51,
57 'Syriac Block'                                =>  52,
58 'Thaana Block'                                =>  53,
59 'Devanagari Block'                            =>  54,
60 'Bengali Block'                               =>  55,
61 'Gurmukhi Block'                              =>  56,
62 'Gujarati Block'                              =>  57,
63 'Oriya Block'                                 =>  58,
64 'Tamil Block'                                 =>  59,
65 'Telugu Block'                                =>  60,
66 'Kannada Block'                               =>  61,
67 'Malayalam Block'                             =>  62,
68 'Sinhala Block'                               =>  63,
69 'Thai Block'                                  =>  64,
70 'Lao Block'                                   =>  65,
71 'Tibetan Block'                               =>  66,
72 'Myanmar Block'                               =>  67,
73 'Georgian Block'                              =>  68,
74 'Hangul Jamo'                                 =>  69,
75 'Ethiopic Block'                              =>  70,
76 'Cherokee Block'                              =>  71,
77 'Unified Canadian Aboriginal Syllabics'       =>  72,
78 'Ogham Block'                                 =>  73,
79 'Runic Block'                                 =>  74,
80 'Khmer Block'                                 =>  75,
81 'Mongolian Block'                             =>  76,
82 'Latin Extended Additional'                   =>  77,
83 'Greek Extended'                              =>  78,
84 'General Punctuation'                         =>  79,
85 'Superscripts and Subscripts'                 =>  80,
86 'Currency Symbols'                            =>  81,
87 'Combining Marks for Symbols'                 =>  82,
88 'Letterlike Symbols'                          =>  83,
89 'Number Forms'                                =>  84,
90 'Arrows'                                      =>  85,
91 'Mathematical Operators'                      =>  86,
92 'Miscellaneous Technical'                     =>  87,
93 'Control Pictures'                            =>  88,
94 'Optical Character Recognition'               =>  89,
95 'Enclosed Alphanumerics'                      =>  90,
96 'Box Drawing'                                 =>  91,
97 'Block Elements'                              =>  92,
98 'Geometric Shapes'                            =>  93,
99 'Miscellaneous Symbols'                       =>  94,
100 'Dingbats'                                    =>  95,
101 'Braille Patterns'                            =>  96,
102 'CJK Radicals Supplement'                     =>  97,
103 'Kangxi Radicals'                             =>  98,
104 'Ideographic Description Characters'          =>  99,
105 'CJK Symbols and Punctuation'                 => 100,
106 'Hiragana Block'                              => 101,
107 'Katakana Block'                              => 102,
108 'Bopomofo Block'                              => 103,
109 'Hangul Compatibility Jamo'                   => 104,
110 'Kanbun'                                      => 105,
111 'Bopomofo Extended'                           => 106,
112 'Enclosed CJK Letters and Months'             => 107,
113 'CJK Compatibility'                           => 108,
114 'CJK Unified Ideographs Extension A'          => 109,
115 'CJK Unified Ideographs'                      => 110,
116 'Yi Syllables'                                => 111,
117 'Yi Radicals'                                 => 112,
118 'Hangul Syllables'                            => 113,
119 'High Surrogates'                             => 114,
120 'High Private Use Surrogates'                 => 115,
121 'Low Surrogates'                              => 116,
122 'Private Use'                                 => 117,
123 'CJK Compatibility Ideographs'                => 118,
124 'Alphabetic Presentation Forms'               => 119,
125 'Arabic Presentation Forms-A'                 => 120,
126 'Combining Half Marks'                        => 121,
127 'CJK Compatibility Forms'                     => 122,
128 'Small Form Variants'                         => 123,
129 'Arabic Presentation Forms-B'                 => 124,
130 'Specials'                                    => 125,
131 'Halfwidth and Fullwidth Forms'               => 126,
132 'Old Italic'                                  => 127,
133 'Gothic Block'                                => 128,
134 'Deseret Block'                               => 129,
135 'Byzantine Musical Symbols'                   => 130,
136 'Musical Symbols'                             => 131,
137 'Mathematical Alphanumeric Symbols'           => 132,
138 'CJK Unified Ideographs Extension B'          => 133,
139 'CJK Compatibility Ideographs Supplement'     => 134,
140 'Tags'                                        => 135,
141 'Common'                                      => 136,
142 'Any'                                         => 137,
143 'White_space'                                 => 138,
144 'Bidi_Control'                                => 139,
145 'Join_Control'                                => 140,
146 'Dash'                                        => 141,
147 'Hyphen'                                      => 142,
148 'Quotation_Mark'                              => 143,
149 'Terminal_Punctuation'                        => 144,
150 'Other_Math'                                  => 145,
151 'Hex_Digit'                                   => 146,
152 'ASCII_Hex_Digit'                             => 147,
153 'Other_Alphabetic'                            => 148,
154 'Ideographic'                                 => 149,
155 'Diacritic'                                   => 150,
156 'Extender'                                    => 151,
157 'Other_Lowercase'                             => 152,
158 'Other_Uppercase'                             => 153,
159 'Noncharacter_Code_Point'                     => 154,
160 'Assigned'                                    => 155,
161 'Alphabetic'                                  => 156,
162 'Lowercase'                                   => 157,
163 'Uppercase'                                   => 158,
164 'Math'                                        => 159,
165 'Lampersand'                                  => 160,
166 'ID_Start'                                    => 161,
167 'ID_Continue'                                 => 162,
168 );
169 %utf8::InPat = (
170 'alp' => {
171         'Alphabetic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms' => 'Alphabetic Presentation Forms',
172         'Alphabetic' => 'Alphabetic',
173 },
174 'any' => {
175         'Any' => 'Any',
176 },
177 'ara' => {
178         'ARABIC' => 'ARABIC',
179         'Arabic(?:[-_]|\s+)?Block' => 'Arabic Block',
180         'Arabic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms(?:[-_]|\s+)?A' => 'Arabic Presentation Forms-A',
181         'Arabic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms(?:[-_]|\s+)?B' => 'Arabic Presentation Forms-B',
182 },
183 'arm' => {
184         'ARMENIAN' => 'ARMENIAN',
185         'Armenian(?:[-_]|\s+)?Block' => 'Armenian Block',
186 },
187 'arr' => {
188         'Arrows' => 'Arrows',
189 },
190 'asc' => {
191         'ASCII(?:[-_]|\s+)?Hex(?:[-_]|\s+)?Digit' => 'ASCII_Hex_Digit',
192 },
193 'ass' => {
194         'Assigned' => 'Assigned',
195 },
196 'bas' => {
197         'Basic(?:[-_]|\s+)?Latin' => 'Basic Latin',
198 },
199 'ben' => {
200         'BENGALI' => 'BENGALI',
201         'Bengali(?:[-_]|\s+)?Block' => 'Bengali Block',
202 },
203 'bid' => {
204         'Bidi(?:[-_]|\s+)?Control' => 'Bidi_Control',
205 },
206 'blo' => {
207         'Block(?:[-_]|\s+)?Elements' => 'Block Elements',
208 },
209 'bop' => {
210         'BOPOMOFO' => 'BOPOMOFO',
211         'Bopomofo(?:[-_]|\s+)?Block' => 'Bopomofo Block',
212         'Bopomofo(?:[-_]|\s+)?Extended' => 'Bopomofo Extended',
213 },
214 'box' => {
215         'Box(?:[-_]|\s+)?Drawing' => 'Box Drawing',
216 },
217 'bra' => {
218         'Braille(?:[-_]|\s+)?Patterns' => 'Braille Patterns',
219 },
220 'byz' => {
221         'Byzantine(?:[-_]|\s+)?Musical(?:[-_]|\s+)?Symbols' => 'Byzantine Musical Symbols',
222 },
223 'can' => {
224         'CANADIAN(?:[-_]|\s+)?ABORIGINAL' => 'CANADIAN-ABORIGINAL',
225 },
226 'che' => {
227         'CHEROKEE' => 'CHEROKEE',
228         'Cherokee(?:[-_]|\s+)?Block' => 'Cherokee Block',
229 },
230 'cjk' => {
231         'CJK(?:[-_]|\s+)?Radicals(?:[-_]|\s+)?Supplement' => 'CJK Radicals Supplement',
232         'CJK(?:[-_]|\s+)?Symbols(?:[-_]|\s+)?and(?:[-_]|\s+)?Punctuation' => 'CJK Symbols and Punctuation',
233         'CJK(?:[-_]|\s+)?Compatibility' => 'CJK Compatibility',
234         'CJK(?:[-_]|\s+)?Unified(?:[-_]|\s+)?Ideographs(?:[-_]|\s+)?Extension(?:[-_]|\s+)?A' => 'CJK Unified Ideographs Extension A',
235         'CJK(?:[-_]|\s+)?Unified(?:[-_]|\s+)?Ideographs' => 'CJK Unified Ideographs',
236         'CJK(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Ideographs' => 'CJK Compatibility Ideographs',
237         'CJK(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Forms' => 'CJK Compatibility Forms',
238         'CJK(?:[-_]|\s+)?Unified(?:[-_]|\s+)?Ideographs(?:[-_]|\s+)?Extension(?:[-_]|\s+)?B' => 'CJK Unified Ideographs Extension B',
239         'CJK(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Ideographs(?:[-_]|\s+)?Supplement' => 'CJK Compatibility Ideographs Supplement',
240 },
241 'com' => {
242         'Combining(?:[-_]|\s+)?Diacritical(?:[-_]|\s+)?Marks' => 'Combining Diacritical Marks',
243         'Combining(?:[-_]|\s+)?Marks(?:[-_]|\s+)?for(?:[-_]|\s+)?Symbols' => 'Combining Marks for Symbols',
244         'Combining(?:[-_]|\s+)?Half(?:[-_]|\s+)?Marks' => 'Combining Half Marks',
245         'Common' => 'Common',
246 },
247 'con' => {
248         'Control(?:[-_]|\s+)?Pictures' => 'Control Pictures',
249 },
250 'cur' => {
251         'Currency(?:[-_]|\s+)?Symbols' => 'Currency Symbols',
252 },
253 'cyr' => {
254         'CYRILLIC' => 'CYRILLIC',
255         'Cyrillic(?:[-_]|\s+)?Block' => 'Cyrillic Block',
256 },
257 'das' => {
258         'Dash' => 'Dash',
259 },
260 'des' => {
261         'DESERET' => 'DESERET',
262         'Deseret(?:[-_]|\s+)?Block' => 'Deseret Block',
263 },
264 'dev' => {
265         'DEVANAGARI' => 'DEVANAGARI',
266         'Devanagari(?:[-_]|\s+)?Block' => 'Devanagari Block',
267 },
268 'dia' => {
269         'Diacritic' => 'Diacritic',
270 },
271 'din' => {
272         'Dingbats' => 'Dingbats',
273 },
274 'enc' => {
275         'Enclosed(?:[-_]|\s+)?Alphanumerics' => 'Enclosed Alphanumerics',
276         'Enclosed(?:[-_]|\s+)?CJK(?:[-_]|\s+)?Letters(?:[-_]|\s+)?and(?:[-_]|\s+)?Months' => 'Enclosed CJK Letters and Months',
277 },
278 'eth' => {
279         'ETHIOPIC' => 'ETHIOPIC',
280         'Ethiopic(?:[-_]|\s+)?Block' => 'Ethiopic Block',
281 },
282 'ext' => {
283         'Extender' => 'Extender',
284 },
285 'gen' => {
286         'General(?:[-_]|\s+)?Punctuation' => 'General Punctuation',
287 },
288 'geo' => {
289         'GEORGIAN' => 'GEORGIAN',
290         'Georgian(?:[-_]|\s+)?Block' => 'Georgian Block',
291         'Geometric(?:[-_]|\s+)?Shapes' => 'Geometric Shapes',
292 },
293 'got' => {
294         'GOTHIC' => 'GOTHIC',
295         'Gothic(?:[-_]|\s+)?Block' => 'Gothic Block',
296 },
297 'gre' => {
298         'GREEK' => 'GREEK',
299         'Greek(?:[-_]|\s+)?Block' => 'Greek Block',
300         'Greek(?:[-_]|\s+)?Extended' => 'Greek Extended',
301 },
302 'guj' => {
303         'GUJARATI' => 'GUJARATI',
304         'Gujarati(?:[-_]|\s+)?Block' => 'Gujarati Block',
305 },
306 'gur' => {
307         'GURMUKHI' => 'GURMUKHI',
308         'Gurmukhi(?:[-_]|\s+)?Block' => 'Gurmukhi Block',
309 },
310 'hal' => {
311         'Halfwidth(?:[-_]|\s+)?and(?:[-_]|\s+)?Fullwidth(?:[-_]|\s+)?Forms' => 'Halfwidth and Fullwidth Forms',
312 },
313 'han' => {
314         'HANGUL' => 'HANGUL',
315         'HAN' => 'HAN',
316         'Hangul(?:[-_]|\s+)?Jamo' => 'Hangul Jamo',
317         'Hangul(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Jamo' => 'Hangul Compatibility Jamo',
318         'Hangul(?:[-_]|\s+)?Syllables' => 'Hangul Syllables',
319 },
320 'heb' => {
321         'HEBREW' => 'HEBREW',
322         'Hebrew(?:[-_]|\s+)?Block' => 'Hebrew Block',
323 },
324 'hex' => {
325         'Hex(?:[-_]|\s+)?Digit' => 'Hex_Digit',
326 },
327 'hig' => {
328         'High(?:[-_]|\s+)?Surrogates' => 'High Surrogates',
329         'High(?:[-_]|\s+)?Private(?:[-_]|\s+)?Use(?:[-_]|\s+)?Surrogates' => 'High Private Use Surrogates',
330 },
331 'hir' => {
332         'HIRAGANA' => 'HIRAGANA',
333         'Hiragana(?:[-_]|\s+)?Block' => 'Hiragana Block',
334 },
335 'hyp' => {
336         'Hyphen' => 'Hyphen',
337 },
338 'id_' => {
339         'ID(?:[-_]|\s+)?Start' => 'ID_Start',
340         'ID(?:[-_]|\s+)?Continue' => 'ID_Continue',
341 },
342 'ide' => {
343         'Ideographic(?:[-_]|\s+)?Description(?:[-_]|\s+)?Characters' => 'Ideographic Description Characters',
344         'Ideographic' => 'Ideographic',
345 },
346 'inh' => {
347         'INHERITED' => 'INHERITED',
348 },
349 'ipa' => {
350         'IPA(?:[-_]|\s+)?Extensions' => 'IPA Extensions',
351 },
352 'joi' => {
353         'Join(?:[-_]|\s+)?Control' => 'Join_Control',
354 },
355 'kan' => {
356         'KANNADA' => 'KANNADA',
357         'Kannada(?:[-_]|\s+)?Block' => 'Kannada Block',
358         'Kangxi(?:[-_]|\s+)?Radicals' => 'Kangxi Radicals',
359         'Kanbun' => 'Kanbun',
360 },
361 'kat' => {
362         'KATAKANA' => 'KATAKANA',
363         'Katakana(?:[-_]|\s+)?Block' => 'Katakana Block',
364 },
365 'khm' => {
366         'KHMER' => 'KHMER',
367         'Khmer(?:[-_]|\s+)?Block' => 'Khmer Block',
368 },
369 'lam' => {
370         'Lampersand' => 'Lampersand',
371 },
372 'lao' => {
373         'LAO' => 'LAO',
374         'Lao(?:[-_]|\s+)?Block' => 'Lao Block',
375 },
376 'lat' => {
377         'LATIN' => 'LATIN',
378         'Latin(?:[-_]|\s+)?1(?:[-_]|\s+)?Supplement' => 'Latin-1 Supplement',
379         'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?A' => 'Latin Extended-A',
380         'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?B' => 'Latin Extended-B',
381         'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?Additional' => 'Latin Extended Additional',
382 },
383 'let' => {
384         'Letterlike(?:[-_]|\s+)?Symbols' => 'Letterlike Symbols',
385 },
386 'low' => {
387         'Low(?:[-_]|\s+)?Surrogates' => 'Low Surrogates',
388         'Lowercase' => 'Lowercase',
389 },
390 'mal' => {
391         'MALAYALAM' => 'MALAYALAM',
392         'Malayalam(?:[-_]|\s+)?Block' => 'Malayalam Block',
393 },
394 'mat' => {
395         'Mathematical(?:[-_]|\s+)?Operators' => 'Mathematical Operators',
396         'Mathematical(?:[-_]|\s+)?Alphanumeric(?:[-_]|\s+)?Symbols' => 'Mathematical Alphanumeric Symbols',
397         'Math' => 'Math',
398 },
399 'mis' => {
400         'Miscellaneous(?:[-_]|\s+)?Technical' => 'Miscellaneous Technical',
401         'Miscellaneous(?:[-_]|\s+)?Symbols' => 'Miscellaneous Symbols',
402 },
403 'mon' => {
404         'MONGOLIAN' => 'MONGOLIAN',
405         'Mongolian(?:[-_]|\s+)?Block' => 'Mongolian Block',
406 },
407 'mus' => {
408         'Musical(?:[-_]|\s+)?Symbols' => 'Musical Symbols',
409 },
410 'mya' => {
411         'MYANMAR' => 'MYANMAR',
412         'Myanmar(?:[-_]|\s+)?Block' => 'Myanmar Block',
413 },
414 'non' => {
415         'Noncharacter(?:[-_]|\s+)?Code(?:[-_]|\s+)?Point' => 'Noncharacter_Code_Point',
416 },
417 'num' => {
418         'Number(?:[-_]|\s+)?Forms' => 'Number Forms',
419 },
420 'ogh' => {
421         'OGHAM' => 'OGHAM',
422         'Ogham(?:[-_]|\s+)?Block' => 'Ogham Block',
423 },
424 'old' => {
425         'OLD(?:[-_]|\s+)?ITALIC' => 'OLD-ITALIC',
426         'Old(?:[-_]|\s+)?Italic' => 'Old Italic',
427 },
428 'opt' => {
429         'Optical(?:[-_]|\s+)?Character(?:[-_]|\s+)?Recognition' => 'Optical Character Recognition',
430 },
431 'ori' => {
432         'ORIYA' => 'ORIYA',
433         'Oriya(?:[-_]|\s+)?Block' => 'Oriya Block',
434 },
435 'oth' => {
436         'Other(?:[-_]|\s+)?Math' => 'Other_Math',
437         'Other(?:[-_]|\s+)?Alphabetic' => 'Other_Alphabetic',
438         'Other(?:[-_]|\s+)?Lowercase' => 'Other_Lowercase',
439         'Other(?:[-_]|\s+)?Uppercase' => 'Other_Uppercase',
440 },
441 'pri' => {
442         'Private(?:[-_]|\s+)?Use' => 'Private Use',
443 },
444 'quo' => {
445         'Quotation(?:[-_]|\s+)?Mark' => 'Quotation_Mark',
446 },
447 'run' => {
448         'RUNIC' => 'RUNIC',
449         'Runic(?:[-_]|\s+)?Block' => 'Runic Block',
450 },
451 'sin' => {
452         'SINHALA' => 'SINHALA',
453         'Sinhala(?:[-_]|\s+)?Block' => 'Sinhala Block',
454 },
455 'sma' => {
456         'Small(?:[-_]|\s+)?Form(?:[-_]|\s+)?Variants' => 'Small Form Variants',
457 },
458 'spa' => {
459         'Spacing(?:[-_]|\s+)?Modifier(?:[-_]|\s+)?Letters' => 'Spacing Modifier Letters',
460 },
461 'spe' => {
462         'Specials' => 'Specials',
463 },
464 'sup' => {
465         'Superscripts(?:[-_]|\s+)?and(?:[-_]|\s+)?Subscripts' => 'Superscripts and Subscripts',
466 },
467 'syr' => {
468         'SYRIAC' => 'SYRIAC',
469         'Syriac(?:[-_]|\s+)?Block' => 'Syriac Block',
470 },
471 'tag' => {
472         'Tags' => 'Tags',
473 },
474 'tam' => {
475         'TAMIL' => 'TAMIL',
476         'Tamil(?:[-_]|\s+)?Block' => 'Tamil Block',
477 },
478 'tel' => {
479         'TELUGU' => 'TELUGU',
480         'Telugu(?:[-_]|\s+)?Block' => 'Telugu Block',
481 },
482 'ter' => {
483         'Terminal(?:[-_]|\s+)?Punctuation' => 'Terminal_Punctuation',
484 },
485 'tha' => {
486         'THAANA' => 'THAANA',
487         'THAI' => 'THAI',
488         'Thaana(?:[-_]|\s+)?Block' => 'Thaana Block',
489         'Thai(?:[-_]|\s+)?Block' => 'Thai Block',
490 },
491 'tib' => {
492         'TIBETAN' => 'TIBETAN',
493         'Tibetan(?:[-_]|\s+)?Block' => 'Tibetan Block',
494 },
495 'uni' => {
496         'Unified(?:[-_]|\s+)?Canadian(?:[-_]|\s+)?Aboriginal(?:[-_]|\s+)?Syllabics' => 'Unified Canadian Aboriginal Syllabics',
497 },
498 'upp' => {
499         'Uppercase' => 'Uppercase',
500 },
501 'whi' => {
502         'White(?:[-_]|\s+)?space' => 'White_space',
503 },
504 'yi' => {
505         'YI' => 'YI',
506 },
507 'yi ' => {
508         'Yi(?:[-_]|\s+)?Syllables' => 'Yi Syllables',
509         'Yi(?:[-_]|\s+)?Radicals' => 'Yi Radicals',
510 },
511 );