Unicode properties saga continues.
[p5sagit/p5-mst-13.2.git] / lib / unicore / In.pl
1 # !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!! 
2 # This file is built by mktables.PL from e.g. Unicode.txt.
3 # Any changes made here will be lost!
4 %utf8::In = (
5 'LATIN'                                       =>   0,
6 'GREEK'                                       =>   1,
7 'CYRILLIC'                                    =>   2,
8 'ARMENIAN'                                    =>   3,
9 'HEBREW'                                      =>   4,
10 'ARABIC'                                      =>   5,
11 'SYRIAC'                                      =>   6,
12 'THAANA'                                      =>   7,
13 'DEVANAGARI'                                  =>   8,
14 'BENGALI'                                     =>   9,
15 'GURMUKHI'                                    =>  10,
16 'GUJARATI'                                    =>  11,
17 'ORIYA'                                       =>  12,
18 'TAMIL'                                       =>  13,
19 'TELUGU'                                      =>  14,
20 'KANNADA'                                     =>  15,
21 'MALAYALAM'                                   =>  16,
22 'SINHALA'                                     =>  17,
23 'THAI'                                        =>  18,
24 'LAO'                                         =>  19,
25 'TIBETAN'                                     =>  20,
26 'MYANMAR'                                     =>  21,
27 'GEORGIAN'                                    =>  22,
28 'HANGUL'                                      =>  23,
29 'ETHIOPIC'                                    =>  24,
30 'CHEROKEE'                                    =>  25,
31 'CANADIAN-ABORIGINAL'                         =>  26,
32 'OGHAM'                                       =>  27,
33 'RUNIC'                                       =>  28,
34 'KHMER'                                       =>  29,
35 'MONGOLIAN'                                   =>  30,
36 'HIRAGANA'                                    =>  31,
37 'KATAKANA'                                    =>  32,
38 'BOPOMOFO'                                    =>  33,
39 'HAN'                                         =>  34,
40 'YI'                                          =>  35,
41 'OLD-ITALIC'                                  =>  36,
42 'GOTHIC'                                      =>  37,
43 'DESERET'                                     =>  38,
44 'INHERITED'                                   =>  39,
45 'Basic Latin'                                 =>  40,
46 'Latin-1 Supplement'                          =>  41,
47 'Latin Extended-A'                            =>  42,
48 'Latin Extended-B'                            =>  43,
49 'IPA Extensions'                              =>  44,
50 'Spacing Modifier Letters'                    =>  45,
51 'Combining Diacritical Marks'                 =>  46,
52 'Greek Block'                                 =>  47,
53 'Cyrillic Block'                              =>  48,
54 'Armenian Block'                              =>  49,
55 'Hebrew Block'                                =>  50,
56 'Arabic Block'                                =>  51,
57 'Syriac Block'                                =>  52,
58 'Thaana Block'                                =>  53,
59 'Devanagari Block'                            =>  54,
60 'Bengali Block'                               =>  55,
61 'Gurmukhi Block'                              =>  56,
62 'Gujarati Block'                              =>  57,
63 'Oriya Block'                                 =>  58,
64 'Tamil Block'                                 =>  59,
65 'Telugu Block'                                =>  60,
66 'Kannada Block'                               =>  61,
67 'Malayalam Block'                             =>  62,
68 'Sinhala Block'                               =>  63,
69 'Thai Block'                                  =>  64,
70 'Lao Block'                                   =>  65,
71 'Tibetan Block'                               =>  66,
72 'Myanmar Block'                               =>  67,
73 'Georgian Block'                              =>  68,
74 'Hangul Jamo'                                 =>  69,
75 'Ethiopic Block'                              =>  70,
76 'Cherokee Block'                              =>  71,
77 'Unified Canadian Aboriginal Syllabics'       =>  72,
78 'Ogham Block'                                 =>  73,
79 'Runic Block'                                 =>  74,
80 'Khmer Block'                                 =>  75,
81 'Mongolian Block'                             =>  76,
82 'Latin Extended Additional'                   =>  77,
83 'Greek Extended'                              =>  78,
84 'General Punctuation'                         =>  79,
85 'Superscripts and Subscripts'                 =>  80,
86 'Currency Symbols'                            =>  81,
87 'Combining Marks for Symbols'                 =>  82,
88 'Letterlike Symbols'                          =>  83,
89 'Number Forms'                                =>  84,
90 'Arrows'                                      =>  85,
91 'Mathematical Operators'                      =>  86,
92 'Miscellaneous Technical'                     =>  87,
93 'Control Pictures'                            =>  88,
94 'Optical Character Recognition'               =>  89,
95 'Enclosed Alphanumerics'                      =>  90,
96 'Box Drawing'                                 =>  91,
97 'Block Elements'                              =>  92,
98 'Geometric Shapes'                            =>  93,
99 'Miscellaneous Symbols'                       =>  94,
100 'Dingbats'                                    =>  95,
101 'Braille Patterns'                            =>  96,
102 'CJK Radicals Supplement'                     =>  97,
103 'Kangxi Radicals'                             =>  98,
104 'Ideographic Description Characters'          =>  99,
105 'CJK Symbols and Punctuation'                 => 100,
106 'Hiragana Block'                              => 101,
107 'Katakana Block'                              => 102,
108 'Bopomofo Block'                              => 103,
109 'Hangul Compatibility Jamo'                   => 104,
110 'Kanbun'                                      => 105,
111 'Bopomofo Extended'                           => 106,
112 'Enclosed CJK Letters and Months'             => 107,
113 'CJK Compatibility'                           => 108,
114 'CJK Unified Ideographs Extension A'          => 109,
115 'CJK Unified Ideographs'                      => 110,
116 'Yi Syllables'                                => 111,
117 'Yi Radicals'                                 => 112,
118 'Hangul Syllables'                            => 113,
119 'High Surrogates'                             => 114,
120 'High Private Use Surrogates'                 => 115,
121 'Low Surrogates'                              => 116,
122 'Private Use'                                 => 117,
123 'CJK Compatibility Ideographs'                => 118,
124 'Alphabetic Presentation Forms'               => 119,
125 'Arabic Presentation Forms-A'                 => 120,
126 'Combining Half Marks'                        => 121,
127 'CJK Compatibility Forms'                     => 122,
128 'Small Form Variants'                         => 123,
129 'Arabic Presentation Forms-B'                 => 124,
130 'Specials'                                    => 125,
131 'Halfwidth and Fullwidth Forms'               => 126,
132 'Old Italic'                                  => 127,
133 'Gothic Block'                                => 128,
134 'Deseret Block'                               => 129,
135 'Byzantine Musical Symbols'                   => 130,
136 'Musical Symbols'                             => 131,
137 'Mathematical Alphanumeric Symbols'           => 132,
138 'CJK Unified Ideographs Extension B'          => 133,
139 'CJK Compatibility Ideographs Supplement'     => 134,
140 'Tags'                                        => 135,
141 'Common'                                      => 136,
142 'Any'                                         => 137,
143 'White_space'                                 => 138,
144 'Bidi_Control'                                => 139,
145 'Join_Control'                                => 140,
146 'Dash'                                        => 141,
147 'Hyphen'                                      => 142,
148 'Quotation_Mark'                              => 143,
149 'Terminal_Punctuation'                        => 144,
150 'Other_Math'                                  => 145,
151 'Hex_Digit'                                   => 146,
152 'ASCII_Hex_Digit'                             => 147,
153 'Other_Alphabetic'                            => 148,
154 'Ideographic'                                 => 149,
155 'Diacritic'                                   => 150,
156 'Extender'                                    => 151,
157 'Other_Lowercase'                             => 152,
158 'Other_Uppercase'                             => 153,
159 'Noncharacter_Code_Point'                     => 154,
160 'Assigned'                                    => 155,
161 'Alphabetic'                                  => 156,
162 'Lowercase'                                   => 157,
163 'Uppercase'                                   => 158,
164 'Math'                                        => 159,
165 'Lampersand'                                  => 160,
166 'ID_Start'                                    => 161,
167 'ID_Continue'                                 => 162,
168 );
169 %utf8::InPat = (
170 'al' => {
171         'Alphabetic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms' => 'Alphabetic Presentation Forms',
172         'Alphabetic' => 'Alphabetic',
173 },
174 'an' => {
175         'Any' => 'Any',
176 },
177 'ar' => {
178         'ARMENIAN' => 'ARMENIAN',
179         'ARABIC' => 'ARABIC',
180         'Armenian(?:[-_]|\s+)?Block' => 'Armenian Block',
181         'Arabic(?:[-_]|\s+)?Block' => 'Arabic Block',
182         'Arrows' => 'Arrows',
183         'Arabic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms(?:[-_]|\s+)?A' => 'Arabic Presentation Forms-A',
184         'Arabic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms(?:[-_]|\s+)?B' => 'Arabic Presentation Forms-B',
185 },
186 'as' => {
187         'ASCII(?:[-_]|\s+)?Hex(?:[-_]|\s+)?Digit' => 'ASCII_Hex_Digit',
188         'Assigned' => 'Assigned',
189 },
190 'ba' => {
191         'Basic(?:[-_]|\s+)?Latin' => 'Basic Latin',
192 },
193 'be' => {
194         'BENGALI' => 'BENGALI',
195         'Bengali(?:[-_]|\s+)?Block' => 'Bengali Block',
196 },
197 'bi' => {
198         'Bidi(?:[-_]|\s+)?Control' => 'Bidi_Control',
199 },
200 'bl' => {
201         'Block(?:[-_]|\s+)?Elements' => 'Block Elements',
202 },
203 'bo' => {
204         'BOPOMOFO' => 'BOPOMOFO',
205         'Box(?:[-_]|\s+)?Drawing' => 'Box Drawing',
206         'Bopomofo(?:[-_]|\s+)?Block' => 'Bopomofo Block',
207         'Bopomofo(?:[-_]|\s+)?Extended' => 'Bopomofo Extended',
208 },
209 'br' => {
210         'Braille(?:[-_]|\s+)?Patterns' => 'Braille Patterns',
211 },
212 'by' => {
213         'Byzantine(?:[-_]|\s+)?Musical(?:[-_]|\s+)?Symbols' => 'Byzantine Musical Symbols',
214 },
215 'ca' => {
216         'CANADIAN(?:[-_]|\s+)?ABORIGINAL' => 'CANADIAN-ABORIGINAL',
217 },
218 'ch' => {
219         'CHEROKEE' => 'CHEROKEE',
220         'Cherokee(?:[-_]|\s+)?Block' => 'Cherokee Block',
221 },
222 'cj' => {
223         'CJK(?:[-_]|\s+)?Radicals(?:[-_]|\s+)?Supplement' => 'CJK Radicals Supplement',
224         'CJK(?:[-_]|\s+)?Symbols(?:[-_]|\s+)?and(?:[-_]|\s+)?Punctuation' => 'CJK Symbols and Punctuation',
225         'CJK(?:[-_]|\s+)?Compatibility' => 'CJK Compatibility',
226         'CJK(?:[-_]|\s+)?Unified(?:[-_]|\s+)?Ideographs(?:[-_]|\s+)?Extension(?:[-_]|\s+)?A' => 'CJK Unified Ideographs Extension A',
227         'CJK(?:[-_]|\s+)?Unified(?:[-_]|\s+)?Ideographs' => 'CJK Unified Ideographs',
228         'CJK(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Ideographs' => 'CJK Compatibility Ideographs',
229         'CJK(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Forms' => 'CJK Compatibility Forms',
230         'CJK(?:[-_]|\s+)?Unified(?:[-_]|\s+)?Ideographs(?:[-_]|\s+)?Extension(?:[-_]|\s+)?B' => 'CJK Unified Ideographs Extension B',
231         'CJK(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Ideographs(?:[-_]|\s+)?Supplement' => 'CJK Compatibility Ideographs Supplement',
232 },
233 'co' => {
234         'Combining(?:[-_]|\s+)?Diacritical(?:[-_]|\s+)?Marks' => 'Combining Diacritical Marks',
235         'Combining(?:[-_]|\s+)?Marks(?:[-_]|\s+)?for(?:[-_]|\s+)?Symbols' => 'Combining Marks for Symbols',
236         'Control(?:[-_]|\s+)?Pictures' => 'Control Pictures',
237         'Combining(?:[-_]|\s+)?Half(?:[-_]|\s+)?Marks' => 'Combining Half Marks',
238         'Common' => 'Common',
239 },
240 'cu' => {
241         'Currency(?:[-_]|\s+)?Symbols' => 'Currency Symbols',
242 },
243 'cy' => {
244         'CYRILLIC' => 'CYRILLIC',
245         'Cyrillic(?:[-_]|\s+)?Block' => 'Cyrillic Block',
246 },
247 'da' => {
248         'Dash' => 'Dash',
249 },
250 'de' => {
251         'DEVANAGARI' => 'DEVANAGARI',
252         'DESERET' => 'DESERET',
253         'Devanagari(?:[-_]|\s+)?Block' => 'Devanagari Block',
254         'Deseret(?:[-_]|\s+)?Block' => 'Deseret Block',
255 },
256 'di' => {
257         'Dingbats' => 'Dingbats',
258         'Diacritic' => 'Diacritic',
259 },
260 'en' => {
261         'Enclosed(?:[-_]|\s+)?Alphanumerics' => 'Enclosed Alphanumerics',
262         'Enclosed(?:[-_]|\s+)?CJK(?:[-_]|\s+)?Letters(?:[-_]|\s+)?and(?:[-_]|\s+)?Months' => 'Enclosed CJK Letters and Months',
263 },
264 'et' => {
265         'ETHIOPIC' => 'ETHIOPIC',
266         'Ethiopic(?:[-_]|\s+)?Block' => 'Ethiopic Block',
267 },
268 'ex' => {
269         'Extender' => 'Extender',
270 },
271 'ge' => {
272         'GEORGIAN' => 'GEORGIAN',
273         'Georgian(?:[-_]|\s+)?Block' => 'Georgian Block',
274         'General(?:[-_]|\s+)?Punctuation' => 'General Punctuation',
275         'Geometric(?:[-_]|\s+)?Shapes' => 'Geometric Shapes',
276 },
277 'go' => {
278         'GOTHIC' => 'GOTHIC',
279         'Gothic(?:[-_]|\s+)?Block' => 'Gothic Block',
280 },
281 'gr' => {
282         'GREEK' => 'GREEK',
283         'Greek(?:[-_]|\s+)?Block' => 'Greek Block',
284         'Greek(?:[-_]|\s+)?Extended' => 'Greek Extended',
285 },
286 'gu' => {
287         'GURMUKHI' => 'GURMUKHI',
288         'GUJARATI' => 'GUJARATI',
289         'Gurmukhi(?:[-_]|\s+)?Block' => 'Gurmukhi Block',
290         'Gujarati(?:[-_]|\s+)?Block' => 'Gujarati Block',
291 },
292 'ha' => {
293         'HANGUL' => 'HANGUL',
294         'HAN' => 'HAN',
295         'Hangul(?:[-_]|\s+)?Jamo' => 'Hangul Jamo',
296         'Hangul(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Jamo' => 'Hangul Compatibility Jamo',
297         'Hangul(?:[-_]|\s+)?Syllables' => 'Hangul Syllables',
298         'Halfwidth(?:[-_]|\s+)?and(?:[-_]|\s+)?Fullwidth(?:[-_]|\s+)?Forms' => 'Halfwidth and Fullwidth Forms',
299 },
300 'he' => {
301         'HEBREW' => 'HEBREW',
302         'Hebrew(?:[-_]|\s+)?Block' => 'Hebrew Block',
303         'Hex(?:[-_]|\s+)?Digit' => 'Hex_Digit',
304 },
305 'hi' => {
306         'HIRAGANA' => 'HIRAGANA',
307         'Hiragana(?:[-_]|\s+)?Block' => 'Hiragana Block',
308         'High(?:[-_]|\s+)?Surrogates' => 'High Surrogates',
309         'High(?:[-_]|\s+)?Private(?:[-_]|\s+)?Use(?:[-_]|\s+)?Surrogates' => 'High Private Use Surrogates',
310 },
311 'hy' => {
312         'Hyphen' => 'Hyphen',
313 },
314 'id' => {
315         'Ideographic(?:[-_]|\s+)?Description(?:[-_]|\s+)?Characters' => 'Ideographic Description Characters',
316         'Ideographic' => 'Ideographic',
317         'ID(?:[-_]|\s+)?Start' => 'ID_Start',
318         'ID(?:[-_]|\s+)?Continue' => 'ID_Continue',
319 },
320 'in' => {
321         'INHERITED' => 'INHERITED',
322 },
323 'ip' => {
324         'IPA(?:[-_]|\s+)?Extensions' => 'IPA Extensions',
325 },
326 'jo' => {
327         'Join(?:[-_]|\s+)?Control' => 'Join_Control',
328 },
329 'ka' => {
330         'KANNADA' => 'KANNADA',
331         'KATAKANA' => 'KATAKANA',
332         'Kannada(?:[-_]|\s+)?Block' => 'Kannada Block',
333         'Kangxi(?:[-_]|\s+)?Radicals' => 'Kangxi Radicals',
334         'Katakana(?:[-_]|\s+)?Block' => 'Katakana Block',
335         'Kanbun' => 'Kanbun',
336 },
337 'kh' => {
338         'KHMER' => 'KHMER',
339         'Khmer(?:[-_]|\s+)?Block' => 'Khmer Block',
340 },
341 'la' => {
342         'LATIN' => 'LATIN',
343         'LAO' => 'LAO',
344         'Latin(?:[-_]|\s+)?1(?:[-_]|\s+)?Supplement' => 'Latin-1 Supplement',
345         'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?A' => 'Latin Extended-A',
346         'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?B' => 'Latin Extended-B',
347         'Lao(?:[-_]|\s+)?Block' => 'Lao Block',
348         'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?Additional' => 'Latin Extended Additional',
349         'Lampersand' => 'Lampersand',
350 },
351 'le' => {
352         'Letterlike(?:[-_]|\s+)?Symbols' => 'Letterlike Symbols',
353 },
354 'lo' => {
355         'Low(?:[-_]|\s+)?Surrogates' => 'Low Surrogates',
356         'Lowercase' => 'Lowercase',
357 },
358 'ma' => {
359         'MALAYALAM' => 'MALAYALAM',
360         'Malayalam(?:[-_]|\s+)?Block' => 'Malayalam Block',
361         'Mathematical(?:[-_]|\s+)?Operators' => 'Mathematical Operators',
362         'Mathematical(?:[-_]|\s+)?Alphanumeric(?:[-_]|\s+)?Symbols' => 'Mathematical Alphanumeric Symbols',
363         'Math' => 'Math',
364 },
365 'mi' => {
366         'Miscellaneous(?:[-_]|\s+)?Technical' => 'Miscellaneous Technical',
367         'Miscellaneous(?:[-_]|\s+)?Symbols' => 'Miscellaneous Symbols',
368 },
369 'mo' => {
370         'MONGOLIAN' => 'MONGOLIAN',
371         'Mongolian(?:[-_]|\s+)?Block' => 'Mongolian Block',
372 },
373 'mu' => {
374         'Musical(?:[-_]|\s+)?Symbols' => 'Musical Symbols',
375 },
376 'my' => {
377         'MYANMAR' => 'MYANMAR',
378         'Myanmar(?:[-_]|\s+)?Block' => 'Myanmar Block',
379 },
380 'no' => {
381         'Noncharacter(?:[-_]|\s+)?Code(?:[-_]|\s+)?Point' => 'Noncharacter_Code_Point',
382 },
383 'nu' => {
384         'Number(?:[-_]|\s+)?Forms' => 'Number Forms',
385 },
386 'og' => {
387         'OGHAM' => 'OGHAM',
388         'Ogham(?:[-_]|\s+)?Block' => 'Ogham Block',
389 },
390 'ol' => {
391         'OLD(?:[-_]|\s+)?ITALIC' => 'OLD-ITALIC',
392         'Old(?:[-_]|\s+)?Italic' => 'Old Italic',
393 },
394 'op' => {
395         'Optical(?:[-_]|\s+)?Character(?:[-_]|\s+)?Recognition' => 'Optical Character Recognition',
396 },
397 'or' => {
398         'ORIYA' => 'ORIYA',
399         'Oriya(?:[-_]|\s+)?Block' => 'Oriya Block',
400 },
401 'ot' => {
402         'Other(?:[-_]|\s+)?Math' => 'Other_Math',
403         'Other(?:[-_]|\s+)?Alphabetic' => 'Other_Alphabetic',
404         'Other(?:[-_]|\s+)?Lowercase' => 'Other_Lowercase',
405         'Other(?:[-_]|\s+)?Uppercase' => 'Other_Uppercase',
406 },
407 'pr' => {
408         'Private(?:[-_]|\s+)?Use' => 'Private Use',
409 },
410 'qu' => {
411         'Quotation(?:[-_]|\s+)?Mark' => 'Quotation_Mark',
412 },
413 'ru' => {
414         'RUNIC' => 'RUNIC',
415         'Runic(?:[-_]|\s+)?Block' => 'Runic Block',
416 },
417 'si' => {
418         'SINHALA' => 'SINHALA',
419         'Sinhala(?:[-_]|\s+)?Block' => 'Sinhala Block',
420 },
421 'sm' => {
422         'Small(?:[-_]|\s+)?Form(?:[-_]|\s+)?Variants' => 'Small Form Variants',
423 },
424 'sp' => {
425         'Spacing(?:[-_]|\s+)?Modifier(?:[-_]|\s+)?Letters' => 'Spacing Modifier Letters',
426         'Specials' => 'Specials',
427 },
428 'su' => {
429         'Superscripts(?:[-_]|\s+)?and(?:[-_]|\s+)?Subscripts' => 'Superscripts and Subscripts',
430 },
431 'sy' => {
432         'SYRIAC' => 'SYRIAC',
433         'Syriac(?:[-_]|\s+)?Block' => 'Syriac Block',
434 },
435 'ta' => {
436         'TAMIL' => 'TAMIL',
437         'Tamil(?:[-_]|\s+)?Block' => 'Tamil Block',
438         'Tags' => 'Tags',
439 },
440 'te' => {
441         'TELUGU' => 'TELUGU',
442         'Telugu(?:[-_]|\s+)?Block' => 'Telugu Block',
443         'Terminal(?:[-_]|\s+)?Punctuation' => 'Terminal_Punctuation',
444 },
445 'th' => {
446         'THAANA' => 'THAANA',
447         'THAI' => 'THAI',
448         'Thaana(?:[-_]|\s+)?Block' => 'Thaana Block',
449         'Thai(?:[-_]|\s+)?Block' => 'Thai Block',
450 },
451 'ti' => {
452         'TIBETAN' => 'TIBETAN',
453         'Tibetan(?:[-_]|\s+)?Block' => 'Tibetan Block',
454 },
455 'un' => {
456         'Unified(?:[-_]|\s+)?Canadian(?:[-_]|\s+)?Aboriginal(?:[-_]|\s+)?Syllabics' => 'Unified Canadian Aboriginal Syllabics',
457 },
458 'up' => {
459         'Uppercase' => 'Uppercase',
460 },
461 'wh' => {
462         'White(?:[-_]|\s+)?space' => 'White_space',
463 },
464 'yi' => {
465         'YI' => 'YI',
466         'Yi(?:[-_]|\s+)?Syllables' => 'Yi Syllables',
467         'Yi(?:[-_]|\s+)?Radicals' => 'Yi Radicals',
468 },
469 );