Add the special casing mappings (from SpecCase.txt)
[p5sagit/p5-mst-13.2.git] / lib / unicore / In.pl
index a6c2419..cd872fa 100644 (file)
 # !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!! 
-# This file is built by mktables.PL from e.g. Unicode.txt.
+# This file is built by mktables from e.g. Unicode.txt.
 # Any changes made here will be lost!
-%utf8::In = (
-'Latin'                                  =>   0,
-'Greek'                                  =>   1,
-'Cyrillic'                               =>   2,
-'Armenian'                               =>   3,
-'Hebrew'                                 =>   4,
-'Arabic'                                 =>   5,
-'Syriac'                                 =>   6,
-'Thaana'                                 =>   7,
-'Devanagari'                             =>   8,
-'Bengali'                                =>   9,
-'Gurmukhi'                               =>  10,
-'Gujarati'                               =>  11,
-'Oriya'                                  =>  12,
-'Tamil'                                  =>  13,
-'Telugu'                                 =>  14,
-'Kannada'                                =>  15,
-'Malayalam'                              =>  16,
-'Sinhala'                                =>  17,
-'Thai'                                   =>  18,
-'Lao'                                    =>  19,
-'Tibetan'                                =>  20,
-'Myanmar'                                =>  21,
-'Georgian'                               =>  22,
-'Hangul'                                 =>  23,
-'Ethiopic'                               =>  24,
-'Cherokee'                               =>  25,
-'CanadianAboriginal'                     =>  26,
-'Ogham'                                  =>  27,
-'Runic'                                  =>  28,
-'Khmer'                                  =>  29,
-'Mongolian'                              =>  30,
-'Hiragana'                               =>  31,
-'Katakana'                               =>  32,
-'Bopomofo'                               =>  33,
-'Han'                                    =>  34,
-'Yi'                                     =>  35,
-'OldItalic'                              =>  36,
-'Gothic'                                 =>  37,
-'Deseret'                                =>  38,
-'Inherited'                              =>  39,
-'BasicLatin'                             =>  40,
-'Latin1Supplement'                       =>  41,
-'LatinExtendedA'                         =>  42,
-'LatinExtendedB'                         =>  43,
-'IPAExtensions'                          =>  44,
-'SpacingModifierLetters'                 =>  45,
-'CombiningDiacriticalMarks'              =>  46,
-'GreekBlock'                             =>  47,
-'CyrillicBlock'                          =>  48,
-'ArmenianBlock'                          =>  49,
-'HebrewBlock'                            =>  50,
-'ArabicBlock'                            =>  51,
-'SyriacBlock'                            =>  52,
-'ThaanaBlock'                            =>  53,
-'DevanagariBlock'                        =>  54,
-'BengaliBlock'                           =>  55,
-'GurmukhiBlock'                          =>  56,
-'GujaratiBlock'                          =>  57,
-'OriyaBlock'                             =>  58,
-'TamilBlock'                             =>  59,
-'TeluguBlock'                            =>  60,
-'KannadaBlock'                           =>  61,
-'MalayalamBlock'                         =>  62,
-'SinhalaBlock'                           =>  63,
-'ThaiBlock'                              =>  64,
-'LaoBlock'                               =>  65,
-'TibetanBlock'                           =>  66,
-'MyanmarBlock'                           =>  67,
-'GeorgianBlock'                          =>  68,
-'HangulJamo'                             =>  69,
-'EthiopicBlock'                          =>  70,
-'CherokeeBlock'                          =>  71,
-'UnifiedCanadianAboriginalSyllabics'     =>  72,
-'OghamBlock'                             =>  73,
-'RunicBlock'                             =>  74,
-'KhmerBlock'                             =>  75,
-'MongolianBlock'                         =>  76,
-'LatinExtendedAdditional'                =>  77,
-'GreekExtended'                          =>  78,
-'GeneralPunctuation'                     =>  79,
-'SuperscriptsandSubscripts'              =>  80,
-'CurrencySymbols'                        =>  81,
-'CombiningMarksforSymbols'               =>  82,
-'LetterlikeSymbols'                      =>  83,
-'NumberForms'                            =>  84,
-'Arrows'                                 =>  85,
-'MathematicalOperators'                  =>  86,
-'MiscellaneousTechnical'                 =>  87,
-'ControlPictures'                        =>  88,
-'OpticalCharacterRecognition'            =>  89,
-'EnclosedAlphanumerics'                  =>  90,
-'BoxDrawing'                             =>  91,
-'BlockElements'                          =>  92,
-'GeometricShapes'                        =>  93,
-'MiscellaneousSymbols'                   =>  94,
-'Dingbats'                               =>  95,
-'BraillePatterns'                        =>  96,
-'CJKRadicalsSupplement'                  =>  97,
-'KangxiRadicals'                         =>  98,
-'IdeographicDescriptionCharacters'       =>  99,
-'CJKSymbolsandPunctuation'               => 100,
-'HiraganaBlock'                          => 101,
-'KatakanaBlock'                          => 102,
-'BopomofoBlock'                          => 103,
-'HangulCompatibilityJamo'                => 104,
-'Kanbun'                                 => 105,
-'BopomofoExtended'                       => 106,
-'EnclosedCJKLettersandMonths'            => 107,
-'CJKCompatibility'                       => 108,
-'CJKUnifiedIdeographsExtensionA'         => 109,
-'CJKUnifiedIdeographs'                   => 110,
-'YiSyllables'                            => 111,
-'YiRadicals'                             => 112,
-'HangulSyllables'                        => 113,
-'HighSurrogates'                         => 114,
-'HighPrivateUseSurrogates'               => 115,
-'LowSurrogates'                          => 116,
-'PrivateUse'                             => 117,
-'CJKCompatibilityIdeographs'             => 118,
-'AlphabeticPresentationForms'            => 119,
-'ArabicPresentationFormsA'               => 120,
-'CombiningHalfMarks'                     => 121,
-'CJKCompatibilityForms'                  => 122,
-'SmallFormVariants'                      => 123,
-'ArabicPresentationFormsB'               => 124,
-'Specials'                               => 125,
-'HalfwidthandFullwidthForms'             => 126,
-'OldItalicBlock'                         => 127,
-'GothicBlock'                            => 128,
-'DeseretBlock'                           => 129,
-'ByzantineMusicalSymbols'                => 130,
-'MusicalSymbols'                         => 131,
-'MathematicalAlphanumericSymbols'        => 132,
-'CJKUnifiedIdeographsExtensionB'         => 133,
-'CJKCompatibilityIdeographsSupplement'   => 134,
-'Tags'                                   => 135,
+%utf8::In =
+(
+'ARABIC' => '16',
+'ARMENIAN' => '14',
+'ASCII_Hex_Digit' => '152',
+'Alphabetic' => '164',
+'Alphabetic Presentation Forms' => '129',
+'Any' => '171',
+'Arabic Block' => '62',
+'Arabic Presentation Forms-A' => '130',
+'Arabic Presentation Forms-B' => '134',
+'Armenian Block' => '60',
+'Arrows' => '96',
+'Assigned' => '163',
+'BENGALI' => '20',
+'BOPOMOFO' => '45',
+'Basic Latin' => '51',
+'Bengali Block' => '66',
+'Bidi_Control' => '159',
+'Block Elements' => '103',
+'Bopomofo Block' => '114',
+'Bopomofo Extended' => '117',
+'Box Drawing' => '102',
+'Braille Patterns' => '107',
+'Byzantine Musical Symbols' => '140',
+'CANADIAN-ABORIGINAL' => '37',
+'CHEROKEE' => '36',
+'CJK Compatibility' => '119',
+'CJK Compatibility Forms' => '132',
+'CJK Compatibility Ideographs' => '128',
+'CJK Compatibility Ideographs Supplement' => '144',
+'CJK Ideograph' => '1',
+'CJK Ideograph Extension A' => '0',
+'CJK Ideograph Extension B' => '7',
+'CJK Radicals Supplement' => '108',
+'CJK Symbols and Punctuation' => '111',
+'CJK Unified Ideographs' => '121',
+'CJK Unified Ideographs Extension A' => '120',
+'CJK Unified Ideographs Extension B' => '143',
+'CYRILLIC' => '13',
+'Cherokee Block' => '82',
+'Combining Diacritical Marks' => '57',
+'Combining Half Marks' => '131',
+'Combining Marks for Symbols' => '93',
+'Common' => '50',
+'Control Pictures' => '99',
+'Currency Symbols' => '92',
+'Cyrillic Block' => '59',
+'DESERET' => '49',
+'DEVANAGARI' => '19',
+'Dash' => '151',
+'Deseret Block' => '139',
+'Devanagari Block' => '65',
+'Diacritic' => '154',
+'Dingbats' => '106',
+'ETHIOPIC' => '35',
+'Enclosed Alphanumerics' => '101',
+'Enclosed CJK Letters and Months' => '118',
+'Ethiopic Block' => '81',
+'Extender' => '155',
+'GEORGIAN' => '33',
+'GOTHIC' => '48',
+'GREEK' => '11',
+'GUJARATI' => '22',
+'GURMUKHI' => '21',
+'General Punctuation' => '90',
+'Geometric Shapes' => '104',
+'Georgian Block' => '79',
+'Gothic Block' => '138',
+'Greek Block' => '58',
+'Greek Extended' => '89',
+'Gujarati Block' => '68',
+'Gurmukhi Block' => '67',
+'HAN' => '42',
+'HANGUL' => '34',
+'HEBREW' => '15',
+'HIRAGANA' => '43',
+'Halfwidth and Fullwidth Forms' => '136',
+'Hangul Compatibility Jamo' => '115',
+'Hangul Jamo' => '80',
+'Hangul Syllable' => '2',
+'Hangul Syllables' => '124',
+'Hebrew Block' => '61',
+'Hex_Digit' => '153',
+'High Private Use Surrogates' => '126',
+'High Surrogates' => '125',
+'Hiragana Block' => '112',
+'Hyphen' => '150',
+'ID_Continue' => '170',
+'ID_Start' => '169',
+'INHERITED' => '12',
+'IPA Extensions' => '55',
+'Ideographic' => '161',
+'Ideographic Description Characters' => '110',
+'Join_Control' => '158',
+'KANNADA' => '26',
+'KATAKANA' => '44',
+'KHMER' => '40',
+'Kanbun' => '116',
+'Kangxi Radicals' => '109',
+'Kannada Block' => '72',
+'Katakana Block' => '113',
+'Khmer Block' => '86',
+'LAO' => '30',
+'LATIN' => '10',
+'Lampersand' => '168',
+'Lao Block' => '76',
+'Latin Extended Additional' => '88',
+'Latin Extended-A' => '53',
+'Latin Extended-B' => '54',
+'Latin-1 Supplement' => '52',
+'Letterlike Symbols' => '94',
+'Low Surrogate' => '5',
+'Low Surrogates' => '127',
+'Lowercase' => '165',
+'MALAYALAM' => '27',
+'MONGOLIAN' => '41',
+'MYANMAR' => '32',
+'Malayalam Block' => '73',
+'Math' => '167',
+'Mathematical Alphanumeric Symbols' => '142',
+'Mathematical Operators' => '97',
+'Miscellaneous Symbols' => '105',
+'Miscellaneous Technical' => '98',
+'Mongolian Block' => '87',
+'Musical Symbols' => '141',
+'Myanmar Block' => '78',
+'Non Private Use High Surrogate' => '3',
+'Noncharacter_Code_Point' => '162',
+'Number Forms' => '95',
+'OGHAM' => '38',
+'OLD-ITALIC' => '47',
+'ORIYA' => '23',
+'Ogham Block' => '84',
+'Old Italic' => '137',
+'Optical Character Recognition' => '100',
+'Oriya Block' => '69',
+'Other_Alphabetic' => '157',
+'Other_Lowercase' => '156',
+'Other_Math' => '149',
+'Other_Uppercase' => '160',
+'Plane 15 Private Use' => '8',
+'Plane 16 Private Use' => '9',
+'Private Use' => '6',
+'Private Use High Surrogate' => '4',
+'Quotation_Mark' => '148',
+'RUNIC' => '39',
+'Runic Block' => '85',
+'SINHALA' => '28',
+'SYRIAC' => '17',
+'Sinhala Block' => '74',
+'Small Form Variants' => '133',
+'Spacing Modifier Letters' => '56',
+'Specials' => '135',
+'Superscripts and Subscripts' => '91',
+'Syriac Block' => '63',
+'TAMIL' => '24',
+'TELUGU' => '25',
+'THAANA' => '18',
+'THAI' => '29',
+'TIBETAN' => '31',
+'Tags' => '145',
+'Tamil Block' => '70',
+'Telugu Block' => '71',
+'Terminal_Punctuation' => '147',
+'Thaana Block' => '64',
+'Thai Block' => '75',
+'Tibetan Block' => '77',
+'Unified Canadian Aboriginal Syllabics' => '83',
+'Uppercase' => '166',
+'White_space' => '146',
+'YI' => '46',
+'Yi Radicals' => '123',
+'Yi Syllables' => '122',
+);
+%utf8::InPat =
+(
+'al' => {
+       'Alphabetic' => '164',
+       'Alphabetic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms' => '129',
+},
+'an' => {
+       'Any' => '171',
+},
+'ar' => {
+       'ARABIC' => '16',
+       'ARMENIAN' => '14',
+       'Arabic(?:[-_]|\s+)?Block' => '62',
+       'Arabic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms(?:[-_]|\s+)?A' => '130',
+       'Arabic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms(?:[-_]|\s+)?B' => '134',
+       'Armenian(?:[-_]|\s+)?Block' => '60',
+       'Arrows' => '96',
+},
+'as' => {
+       'ASCII(?:[-_]|\s+)?Hex(?:[-_]|\s+)?Digit' => '152',
+       'Assigned' => '163',
+},
+'ba' => {
+       'Basic(?:[-_]|\s+)?Latin' => '51',
+},
+'be' => {
+       'BENGALI' => '20',
+       'Bengali(?:[-_]|\s+)?Block' => '66',
+},
+'bi' => {
+       'Bidi(?:[-_]|\s+)?Control' => '159',
+},
+'bl' => {
+       'Block(?:[-_]|\s+)?Elements' => '103',
+},
+'bo' => {
+       'BOPOMOFO' => '45',
+       'Bopomofo(?:[-_]|\s+)?Block' => '114',
+       'Bopomofo(?:[-_]|\s+)?Extended' => '117',
+       'Box(?:[-_]|\s+)?Drawing' => '102',
+},
+'br' => {
+       'Braille(?:[-_]|\s+)?Patterns' => '107',
+},
+'by' => {
+       'Byzantine(?:[-_]|\s+)?Musical(?:[-_]|\s+)?Symbols' => '140',
+},
+'ca' => {
+       'CANADIAN(?:[-_]|\s+)?ABORIGINAL' => '37',
+},
+'ch' => {
+       'CHEROKEE' => '36',
+       'Cherokee(?:[-_]|\s+)?Block' => '82',
+},
+'cj' => {
+       'CJK(?:[-_]|\s+)?Compatibility' => '119',
+       'CJK(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Forms' => '132',
+       'CJK(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Ideographs' => '128',
+       'CJK(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Ideographs(?:[-_]|\s+)?Supplement' => '144',
+       'CJK(?:[-_]|\s+)?Ideograph' => '1',
+       'CJK(?:[-_]|\s+)?Ideograph(?:[-_]|\s+)?Extension(?:[-_]|\s+)?A' => '0',
+       'CJK(?:[-_]|\s+)?Ideograph(?:[-_]|\s+)?Extension(?:[-_]|\s+)?B' => '7',
+       'CJK(?:[-_]|\s+)?Radicals(?:[-_]|\s+)?Supplement' => '108',
+       'CJK(?:[-_]|\s+)?Symbols(?:[-_]|\s+)?and(?:[-_]|\s+)?Punctuation' => '111',
+       'CJK(?:[-_]|\s+)?Unified(?:[-_]|\s+)?Ideographs' => '121',
+       'CJK(?:[-_]|\s+)?Unified(?:[-_]|\s+)?Ideographs(?:[-_]|\s+)?Extension(?:[-_]|\s+)?A' => '120',
+       'CJK(?:[-_]|\s+)?Unified(?:[-_]|\s+)?Ideographs(?:[-_]|\s+)?Extension(?:[-_]|\s+)?B' => '143',
+},
+'co' => {
+       'Combining(?:[-_]|\s+)?Diacritical(?:[-_]|\s+)?Marks' => '57',
+       'Combining(?:[-_]|\s+)?Half(?:[-_]|\s+)?Marks' => '131',
+       'Combining(?:[-_]|\s+)?Marks(?:[-_]|\s+)?for(?:[-_]|\s+)?Symbols' => '93',
+       'Common' => '50',
+       'Control(?:[-_]|\s+)?Pictures' => '99',
+},
+'cu' => {
+       'Currency(?:[-_]|\s+)?Symbols' => '92',
+},
+'cy' => {
+       'CYRILLIC' => '13',
+       'Cyrillic(?:[-_]|\s+)?Block' => '59',
+},
+'da' => {
+       'Dash' => '151',
+},
+'de' => {
+       'DESERET' => '49',
+       'DEVANAGARI' => '19',
+       'Deseret(?:[-_]|\s+)?Block' => '139',
+       'Devanagari(?:[-_]|\s+)?Block' => '65',
+},
+'di' => {
+       'Diacritic' => '154',
+       'Dingbats' => '106',
+},
+'en' => {
+       'Enclosed(?:[-_]|\s+)?Alphanumerics' => '101',
+       'Enclosed(?:[-_]|\s+)?CJK(?:[-_]|\s+)?Letters(?:[-_]|\s+)?and(?:[-_]|\s+)?Months' => '118',
+},
+'et' => {
+       'ETHIOPIC' => '35',
+       'Ethiopic(?:[-_]|\s+)?Block' => '81',
+},
+'ex' => {
+       'Extender' => '155',
+},
+'ge' => {
+       'GEORGIAN' => '33',
+       'General(?:[-_]|\s+)?Punctuation' => '90',
+       'Geometric(?:[-_]|\s+)?Shapes' => '104',
+       'Georgian(?:[-_]|\s+)?Block' => '79',
+},
+'go' => {
+       'GOTHIC' => '48',
+       'Gothic(?:[-_]|\s+)?Block' => '138',
+},
+'gr' => {
+       'GREEK' => '11',
+       'Greek(?:[-_]|\s+)?Block' => '58',
+       'Greek(?:[-_]|\s+)?Extended' => '89',
+},
+'gu' => {
+       'GUJARATI' => '22',
+       'GURMUKHI' => '21',
+       'Gujarati(?:[-_]|\s+)?Block' => '68',
+       'Gurmukhi(?:[-_]|\s+)?Block' => '67',
+},
+'ha' => {
+       'HAN' => '42',
+       'HANGUL' => '34',
+       'Halfwidth(?:[-_]|\s+)?and(?:[-_]|\s+)?Fullwidth(?:[-_]|\s+)?Forms' => '136',
+       'Hangul(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Jamo' => '115',
+       'Hangul(?:[-_]|\s+)?Jamo' => '80',
+       'Hangul(?:[-_]|\s+)?Syllable' => '2',
+       'Hangul(?:[-_]|\s+)?Syllables' => '124',
+},
+'he' => {
+       'HEBREW' => '15',
+       'Hebrew(?:[-_]|\s+)?Block' => '61',
+       'Hex(?:[-_]|\s+)?Digit' => '153',
+},
+'hi' => {
+       'HIRAGANA' => '43',
+       'High(?:[-_]|\s+)?Private(?:[-_]|\s+)?Use(?:[-_]|\s+)?Surrogates' => '126',
+       'High(?:[-_]|\s+)?Surrogates' => '125',
+       'Hiragana(?:[-_]|\s+)?Block' => '112',
+},
+'hy' => {
+       'Hyphen' => '150',
+},
+'id' => {
+       'ID(?:[-_]|\s+)?Continue' => '170',
+       'ID(?:[-_]|\s+)?Start' => '169',
+       'Ideographic' => '161',
+       'Ideographic(?:[-_]|\s+)?Description(?:[-_]|\s+)?Characters' => '110',
+},
+'in' => {
+       'INHERITED' => '12',
+},
+'ip' => {
+       'IPA(?:[-_]|\s+)?Extensions' => '55',
+},
+'jo' => {
+       'Join(?:[-_]|\s+)?Control' => '158',
+},
+'ka' => {
+       'KANNADA' => '26',
+       'KATAKANA' => '44',
+       'Kanbun' => '116',
+       'Kangxi(?:[-_]|\s+)?Radicals' => '109',
+       'Kannada(?:[-_]|\s+)?Block' => '72',
+       'Katakana(?:[-_]|\s+)?Block' => '113',
+},
+'kh' => {
+       'KHMER' => '40',
+       'Khmer(?:[-_]|\s+)?Block' => '86',
+},
+'la' => {
+       'LAO' => '30',
+       'LATIN' => '10',
+       'Lampersand' => '168',
+       'Lao(?:[-_]|\s+)?Block' => '76',
+       'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?Additional' => '88',
+       'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?A' => '53',
+       'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?B' => '54',
+       'Latin(?:[-_]|\s+)?1(?:[-_]|\s+)?Supplement' => '52',
+},
+'le' => {
+       'Letterlike(?:[-_]|\s+)?Symbols' => '94',
+},
+'lo' => {
+       'Low(?:[-_]|\s+)?Surrogate' => '5',
+       'Low(?:[-_]|\s+)?Surrogates' => '127',
+       'Lowercase' => '165',
+},
+'ma' => {
+       'MALAYALAM' => '27',
+       'Malayalam(?:[-_]|\s+)?Block' => '73',
+       'Math' => '167',
+       'Mathematical(?:[-_]|\s+)?Alphanumeric(?:[-_]|\s+)?Symbols' => '142',
+       'Mathematical(?:[-_]|\s+)?Operators' => '97',
+},
+'mi' => {
+       'Miscellaneous(?:[-_]|\s+)?Symbols' => '105',
+       'Miscellaneous(?:[-_]|\s+)?Technical' => '98',
+},
+'mo' => {
+       'MONGOLIAN' => '41',
+       'Mongolian(?:[-_]|\s+)?Block' => '87',
+},
+'mu' => {
+       'Musical(?:[-_]|\s+)?Symbols' => '141',
+},
+'my' => {
+       'MYANMAR' => '32',
+       'Myanmar(?:[-_]|\s+)?Block' => '78',
+},
+'no' => {
+       'Non(?:[-_]|\s+)?Private(?:[-_]|\s+)?Use(?:[-_]|\s+)?High(?:[-_]|\s+)?Surrogate' => '3',
+       'Noncharacter(?:[-_]|\s+)?Code(?:[-_]|\s+)?Point' => '162',
+},
+'nu' => {
+       'Number(?:[-_]|\s+)?Forms' => '95',
+},
+'og' => {
+       'OGHAM' => '38',
+       'Ogham(?:[-_]|\s+)?Block' => '84',
+},
+'ol' => {
+       'OLD(?:[-_]|\s+)?ITALIC' => '47',
+       'Old(?:[-_]|\s+)?Italic' => '137',
+},
+'op' => {
+       'Optical(?:[-_]|\s+)?Character(?:[-_]|\s+)?Recognition' => '100',
+},
+'or' => {
+       'ORIYA' => '23',
+       'Oriya(?:[-_]|\s+)?Block' => '69',
+},
+'ot' => {
+       'Other(?:[-_]|\s+)?Alphabetic' => '157',
+       'Other(?:[-_]|\s+)?Lowercase' => '156',
+       'Other(?:[-_]|\s+)?Math' => '149',
+       'Other(?:[-_]|\s+)?Uppercase' => '160',
+},
+'pl' => {
+       'Plane(?:[-_]|\s+)?15(?:[-_]|\s+)?Private(?:[-_]|\s+)?Use' => '8',
+       'Plane(?:[-_]|\s+)?16(?:[-_]|\s+)?Private(?:[-_]|\s+)?Use' => '9',
+},
+'pr' => {
+       'Private(?:[-_]|\s+)?Use' => '6',
+       'Private(?:[-_]|\s+)?Use(?:[-_]|\s+)?High(?:[-_]|\s+)?Surrogate' => '4',
+},
+'qu' => {
+       'Quotation(?:[-_]|\s+)?Mark' => '148',
+},
+'ru' => {
+       'RUNIC' => '39',
+       'Runic(?:[-_]|\s+)?Block' => '85',
+},
+'si' => {
+       'SINHALA' => '28',
+       'Sinhala(?:[-_]|\s+)?Block' => '74',
+},
+'sm' => {
+       'Small(?:[-_]|\s+)?Form(?:[-_]|\s+)?Variants' => '133',
+},
+'sp' => {
+       'Spacing(?:[-_]|\s+)?Modifier(?:[-_]|\s+)?Letters' => '56',
+       'Specials' => '135',
+},
+'su' => {
+       'Superscripts(?:[-_]|\s+)?and(?:[-_]|\s+)?Subscripts' => '91',
+},
+'sy' => {
+       'SYRIAC' => '17',
+       'Syriac(?:[-_]|\s+)?Block' => '63',
+},
+'ta' => {
+       'TAMIL' => '24',
+       'Tags' => '145',
+       'Tamil(?:[-_]|\s+)?Block' => '70',
+},
+'te' => {
+       'TELUGU' => '25',
+       'Telugu(?:[-_]|\s+)?Block' => '71',
+       'Terminal(?:[-_]|\s+)?Punctuation' => '147',
+},
+'th' => {
+       'THAANA' => '18',
+       'THAI' => '29',
+       'Thaana(?:[-_]|\s+)?Block' => '64',
+       'Thai(?:[-_]|\s+)?Block' => '75',
+},
+'ti' => {
+       'TIBETAN' => '31',
+       'Tibetan(?:[-_]|\s+)?Block' => '77',
+},
+'un' => {
+       'Unified(?:[-_]|\s+)?Canadian(?:[-_]|\s+)?Aboriginal(?:[-_]|\s+)?Syllabics' => '83',
+},
+'up' => {
+       'Uppercase' => '166',
+},
+'wh' => {
+       'White(?:[-_]|\s+)?space' => '146',
+},
+'yi' => {
+       'YI' => '46',
+       'Yi(?:[-_]|\s+)?Radicals' => '123',
+       'Yi(?:[-_]|\s+)?Syllables' => '122',
+},
 );