X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2Funicore%2FIn.pl;h=cd872faac120dc4f94831c207f9803ffa14810e5;hb=d2d499f5a831730fa4ee7eedade0afc419d869bc;hp=2a82359701da0d5313ea03435400368b904056fb;hpb=c602af677d8c7bd977ba491d5b450211d4807d32;p=p5sagit%2Fp5-mst-13.2.git diff --git a/lib/unicore/In.pl b/lib/unicore/In.pl index 2a82359..cd872fa 100644 --- a/lib/unicore/In.pl +++ b/lib/unicore/In.pl @@ -1,423 +1,491 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.txt. +# This file is built by mktables from e.g. Unicode.txt. # Any changes made here will be lost! -%utf8::In = ( -'LATIN' => 0, -'GREEK' => 1, -'CYRILLIC' => 2, -'ARMENIAN' => 3, -'HEBREW' => 4, -'ARABIC' => 5, -'SYRIAC' => 6, -'THAANA' => 7, -'DEVANAGARI' => 8, -'BENGALI' => 9, -'GURMUKHI' => 10, -'GUJARATI' => 11, -'ORIYA' => 12, -'TAMIL' => 13, -'TELUGU' => 14, -'KANNADA' => 15, -'MALAYALAM' => 16, -'SINHALA' => 17, -'THAI' => 18, -'LAO' => 19, -'TIBETAN' => 20, -'MYANMAR' => 21, -'GEORGIAN' => 22, -'HANGUL' => 23, -'ETHIOPIC' => 24, -'CHEROKEE' => 25, -'CANADIAN-ABORIGINAL' => 26, -'OGHAM' => 27, -'RUNIC' => 28, -'KHMER' => 29, -'MONGOLIAN' => 30, -'HIRAGANA' => 31, -'KATAKANA' => 32, -'BOPOMOFO' => 33, -'HAN' => 34, -'YI' => 35, -'OLD-ITALIC' => 36, -'GOTHIC' => 37, -'DESERET' => 38, -'INHERITED' => 39, -'Basic Latin' => 40, -'Latin-1 Supplement' => 41, -'Latin Extended-A' => 42, -'Latin Extended-B' => 43, -'IPA Extensions' => 44, -'Spacing Modifier Letters' => 45, -'Combining Diacritical Marks' => 46, -'Greek Block' => 47, -'Cyrillic Block' => 48, -'Armenian Block' => 49, -'Hebrew Block' => 50, -'Arabic Block' => 51, -'Syriac Block' => 52, -'Thaana Block' => 53, -'Devanagari Block' => 54, -'Bengali Block' => 55, -'Gurmukhi Block' => 56, -'Gujarati Block' => 57, -'Oriya Block' => 58, -'Tamil Block' => 59, -'Telugu Block' => 60, -'Kannada Block' => 61, -'Malayalam Block' => 62, -'Sinhala Block' => 63, -'Thai Block' => 64, -'Lao Block' => 65, -'Tibetan Block' => 66, -'Myanmar Block' => 67, -'Georgian Block' => 68, -'Hangul Jamo' => 69, -'Ethiopic Block' => 70, -'Cherokee Block' => 71, -'Unified Canadian Aboriginal Syllabics' => 72, -'Ogham Block' => 73, -'Runic Block' => 74, -'Khmer Block' => 75, -'Mongolian Block' => 76, -'Latin Extended Additional' => 77, -'Greek Extended' => 78, -'General Punctuation' => 79, -'Superscripts and Subscripts' => 80, -'Currency Symbols' => 81, -'Combining Marks for Symbols' => 82, -'Letterlike Symbols' => 83, -'Number Forms' => 84, -'Arrows' => 85, -'Mathematical Operators' => 86, -'Miscellaneous Technical' => 87, -'Control Pictures' => 88, -'Optical Character Recognition' => 89, -'Enclosed Alphanumerics' => 90, -'Box Drawing' => 91, -'Block Elements' => 92, -'Geometric Shapes' => 93, -'Miscellaneous Symbols' => 94, -'Dingbats' => 95, -'Braille Patterns' => 96, -'CJK Radicals Supplement' => 97, -'Kangxi Radicals' => 98, -'Ideographic Description Characters' => 99, -'CJK Symbols and Punctuation' => 100, -'Hiragana Block' => 101, -'Katakana Block' => 102, -'Bopomofo Block' => 103, -'Hangul Compatibility Jamo' => 104, -'Kanbun' => 105, -'Bopomofo Extended' => 106, -'Enclosed CJK Letters and Months' => 107, -'CJK Compatibility' => 108, -'CJK Unified Ideographs Extension A' => 109, -'CJK Unified Ideographs' => 110, -'Yi Syllables' => 111, -'Yi Radicals' => 112, -'Hangul Syllables' => 113, -'High Surrogates' => 114, -'High Private Use Surrogates' => 115, -'Low Surrogates' => 116, -'Private Use' => 117, -'CJK Compatibility Ideographs' => 118, -'Alphabetic Presentation Forms' => 119, -'Arabic Presentation Forms-A' => 120, -'Combining Half Marks' => 121, -'CJK Compatibility Forms' => 122, -'Small Form Variants' => 123, -'Arabic Presentation Forms-B' => 124, -'Specials' => 125, -'Halfwidth and Fullwidth Forms' => 126, -'Old Italic' => 127, -'Gothic Block' => 128, -'Deseret Block' => 129, -'Byzantine Musical Symbols' => 130, -'Musical Symbols' => 131, -'Mathematical Alphanumeric Symbols' => 132, -'CJK Unified Ideographs Extension B' => 133, -'CJK Compatibility Ideographs Supplement' => 134, -'Tags' => 135, -'Common' => 136, +%utf8::In = +( +'ARABIC' => '16', +'ARMENIAN' => '14', +'ASCII_Hex_Digit' => '152', +'Alphabetic' => '164', +'Alphabetic Presentation Forms' => '129', +'Any' => '171', +'Arabic Block' => '62', +'Arabic Presentation Forms-A' => '130', +'Arabic Presentation Forms-B' => '134', +'Armenian Block' => '60', +'Arrows' => '96', +'Assigned' => '163', +'BENGALI' => '20', +'BOPOMOFO' => '45', +'Basic Latin' => '51', +'Bengali Block' => '66', +'Bidi_Control' => '159', +'Block Elements' => '103', +'Bopomofo Block' => '114', +'Bopomofo Extended' => '117', +'Box Drawing' => '102', +'Braille Patterns' => '107', +'Byzantine Musical Symbols' => '140', +'CANADIAN-ABORIGINAL' => '37', +'CHEROKEE' => '36', +'CJK Compatibility' => '119', +'CJK Compatibility Forms' => '132', +'CJK Compatibility Ideographs' => '128', +'CJK Compatibility Ideographs Supplement' => '144', +'CJK Ideograph' => '1', +'CJK Ideograph Extension A' => '0', +'CJK Ideograph Extension B' => '7', +'CJK Radicals Supplement' => '108', +'CJK Symbols and Punctuation' => '111', +'CJK Unified Ideographs' => '121', +'CJK Unified Ideographs Extension A' => '120', +'CJK Unified Ideographs Extension B' => '143', +'CYRILLIC' => '13', +'Cherokee Block' => '82', +'Combining Diacritical Marks' => '57', +'Combining Half Marks' => '131', +'Combining Marks for Symbols' => '93', +'Common' => '50', +'Control Pictures' => '99', +'Currency Symbols' => '92', +'Cyrillic Block' => '59', +'DESERET' => '49', +'DEVANAGARI' => '19', +'Dash' => '151', +'Deseret Block' => '139', +'Devanagari Block' => '65', +'Diacritic' => '154', +'Dingbats' => '106', +'ETHIOPIC' => '35', +'Enclosed Alphanumerics' => '101', +'Enclosed CJK Letters and Months' => '118', +'Ethiopic Block' => '81', +'Extender' => '155', +'GEORGIAN' => '33', +'GOTHIC' => '48', +'GREEK' => '11', +'GUJARATI' => '22', +'GURMUKHI' => '21', +'General Punctuation' => '90', +'Geometric Shapes' => '104', +'Georgian Block' => '79', +'Gothic Block' => '138', +'Greek Block' => '58', +'Greek Extended' => '89', +'Gujarati Block' => '68', +'Gurmukhi Block' => '67', +'HAN' => '42', +'HANGUL' => '34', +'HEBREW' => '15', +'HIRAGANA' => '43', +'Halfwidth and Fullwidth Forms' => '136', +'Hangul Compatibility Jamo' => '115', +'Hangul Jamo' => '80', +'Hangul Syllable' => '2', +'Hangul Syllables' => '124', +'Hebrew Block' => '61', +'Hex_Digit' => '153', +'High Private Use Surrogates' => '126', +'High Surrogates' => '125', +'Hiragana Block' => '112', +'Hyphen' => '150', +'ID_Continue' => '170', +'ID_Start' => '169', +'INHERITED' => '12', +'IPA Extensions' => '55', +'Ideographic' => '161', +'Ideographic Description Characters' => '110', +'Join_Control' => '158', +'KANNADA' => '26', +'KATAKANA' => '44', +'KHMER' => '40', +'Kanbun' => '116', +'Kangxi Radicals' => '109', +'Kannada Block' => '72', +'Katakana Block' => '113', +'Khmer Block' => '86', +'LAO' => '30', +'LATIN' => '10', +'Lampersand' => '168', +'Lao Block' => '76', +'Latin Extended Additional' => '88', +'Latin Extended-A' => '53', +'Latin Extended-B' => '54', +'Latin-1 Supplement' => '52', +'Letterlike Symbols' => '94', +'Low Surrogate' => '5', +'Low Surrogates' => '127', +'Lowercase' => '165', +'MALAYALAM' => '27', +'MONGOLIAN' => '41', +'MYANMAR' => '32', +'Malayalam Block' => '73', +'Math' => '167', +'Mathematical Alphanumeric Symbols' => '142', +'Mathematical Operators' => '97', +'Miscellaneous Symbols' => '105', +'Miscellaneous Technical' => '98', +'Mongolian Block' => '87', +'Musical Symbols' => '141', +'Myanmar Block' => '78', +'Non Private Use High Surrogate' => '3', +'Noncharacter_Code_Point' => '162', +'Number Forms' => '95', +'OGHAM' => '38', +'OLD-ITALIC' => '47', +'ORIYA' => '23', +'Ogham Block' => '84', +'Old Italic' => '137', +'Optical Character Recognition' => '100', +'Oriya Block' => '69', +'Other_Alphabetic' => '157', +'Other_Lowercase' => '156', +'Other_Math' => '149', +'Other_Uppercase' => '160', +'Plane 15 Private Use' => '8', +'Plane 16 Private Use' => '9', +'Private Use' => '6', +'Private Use High Surrogate' => '4', +'Quotation_Mark' => '148', +'RUNIC' => '39', +'Runic Block' => '85', +'SINHALA' => '28', +'SYRIAC' => '17', +'Sinhala Block' => '74', +'Small Form Variants' => '133', +'Spacing Modifier Letters' => '56', +'Specials' => '135', +'Superscripts and Subscripts' => '91', +'Syriac Block' => '63', +'TAMIL' => '24', +'TELUGU' => '25', +'THAANA' => '18', +'THAI' => '29', +'TIBETAN' => '31', +'Tags' => '145', +'Tamil Block' => '70', +'Telugu Block' => '71', +'Terminal_Punctuation' => '147', +'Thaana Block' => '64', +'Thai Block' => '75', +'Tibetan Block' => '77', +'Unified Canadian Aboriginal Syllabics' => '83', +'Uppercase' => '166', +'White_space' => '146', +'YI' => '46', +'Yi Radicals' => '123', +'Yi Syllables' => '122', ); -%utf8::InPat = ( -'alp' => { - 'Alphabetic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms' => 'Alphabetic Presentation Forms', -}, -'ara' => { - 'ARABIC' => 'ARABIC', - 'Arabic(?:[-_]|\s+)?Block' => 'Arabic Block', - 'Arabic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms(?:[-_]|\s+)?A' => 'Arabic Presentation Forms-A', - 'Arabic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms(?:[-_]|\s+)?B' => 'Arabic Presentation Forms-B', -}, -'arm' => { - 'ARMENIAN' => 'ARMENIAN', - 'Armenian(?:[-_]|\s+)?Block' => 'Armenian Block', -}, -'arr' => { - 'Arrows' => 'Arrows', -}, -'bas' => { - 'Basic(?:[-_]|\s+)?Latin' => 'Basic Latin', -}, -'ben' => { - 'BENGALI' => 'BENGALI', - 'Bengali(?:[-_]|\s+)?Block' => 'Bengali Block', -}, -'blo' => { - 'Block(?:[-_]|\s+)?Elements' => 'Block Elements', -}, -'bop' => { - 'BOPOMOFO' => 'BOPOMOFO', - 'Bopomofo(?:[-_]|\s+)?Block' => 'Bopomofo Block', - 'Bopomofo(?:[-_]|\s+)?Extended' => 'Bopomofo Extended', -}, -'box' => { - 'Box(?:[-_]|\s+)?Drawing' => 'Box Drawing', -}, -'bra' => { - 'Braille(?:[-_]|\s+)?Patterns' => 'Braille Patterns', -}, -'byz' => { - 'Byzantine(?:[-_]|\s+)?Musical(?:[-_]|\s+)?Symbols' => 'Byzantine Musical Symbols', -}, -'can' => { - 'CANADIAN(?:[-_]|\s+)?ABORIGINAL' => 'CANADIAN-ABORIGINAL', -}, -'che' => { - 'CHEROKEE' => 'CHEROKEE', - 'Cherokee(?:[-_]|\s+)?Block' => 'Cherokee Block', -}, -'cjk' => { - 'CJK(?:[-_]|\s+)?Radicals(?:[-_]|\s+)?Supplement' => 'CJK Radicals Supplement', - 'CJK(?:[-_]|\s+)?Symbols(?:[-_]|\s+)?and(?:[-_]|\s+)?Punctuation' => 'CJK Symbols and Punctuation', - 'CJK(?:[-_]|\s+)?Compatibility' => 'CJK Compatibility', - 'CJK(?:[-_]|\s+)?Unified(?:[-_]|\s+)?Ideographs(?:[-_]|\s+)?Extension(?:[-_]|\s+)?A' => 'CJK Unified Ideographs Extension A', - 'CJK(?:[-_]|\s+)?Unified(?:[-_]|\s+)?Ideographs' => 'CJK Unified Ideographs', - 'CJK(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Ideographs' => 'CJK Compatibility Ideographs', - 'CJK(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Forms' => 'CJK Compatibility Forms', - 'CJK(?:[-_]|\s+)?Unified(?:[-_]|\s+)?Ideographs(?:[-_]|\s+)?Extension(?:[-_]|\s+)?B' => 'CJK Unified Ideographs Extension B', - 'CJK(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Ideographs(?:[-_]|\s+)?Supplement' => 'CJK Compatibility Ideographs Supplement', -}, -'com' => { - 'Combining(?:[-_]|\s+)?Diacritical(?:[-_]|\s+)?Marks' => 'Combining Diacritical Marks', - 'Combining(?:[-_]|\s+)?Marks(?:[-_]|\s+)?for(?:[-_]|\s+)?Symbols' => 'Combining Marks for Symbols', - 'Combining(?:[-_]|\s+)?Half(?:[-_]|\s+)?Marks' => 'Combining Half Marks', - 'Common' => 'Common', -}, -'con' => { - 'Control(?:[-_]|\s+)?Pictures' => 'Control Pictures', -}, -'cur' => { - 'Currency(?:[-_]|\s+)?Symbols' => 'Currency Symbols', -}, -'cyr' => { - 'CYRILLIC' => 'CYRILLIC', - 'Cyrillic(?:[-_]|\s+)?Block' => 'Cyrillic Block', -}, -'des' => { - 'DESERET' => 'DESERET', - 'Deseret(?:[-_]|\s+)?Block' => 'Deseret Block', -}, -'dev' => { - 'DEVANAGARI' => 'DEVANAGARI', - 'Devanagari(?:[-_]|\s+)?Block' => 'Devanagari Block', -}, -'din' => { - 'Dingbats' => 'Dingbats', -}, -'enc' => { - 'Enclosed(?:[-_]|\s+)?Alphanumerics' => 'Enclosed Alphanumerics', - 'Enclosed(?:[-_]|\s+)?CJK(?:[-_]|\s+)?Letters(?:[-_]|\s+)?and(?:[-_]|\s+)?Months' => 'Enclosed CJK Letters and Months', -}, -'eth' => { - 'ETHIOPIC' => 'ETHIOPIC', - 'Ethiopic(?:[-_]|\s+)?Block' => 'Ethiopic Block', -}, -'gen' => { - 'General(?:[-_]|\s+)?Punctuation' => 'General Punctuation', -}, -'geo' => { - 'GEORGIAN' => 'GEORGIAN', - 'Georgian(?:[-_]|\s+)?Block' => 'Georgian Block', - 'Geometric(?:[-_]|\s+)?Shapes' => 'Geometric Shapes', -}, -'got' => { - 'GOTHIC' => 'GOTHIC', - 'Gothic(?:[-_]|\s+)?Block' => 'Gothic Block', -}, -'gre' => { - 'GREEK' => 'GREEK', - 'Greek(?:[-_]|\s+)?Block' => 'Greek Block', - 'Greek(?:[-_]|\s+)?Extended' => 'Greek Extended', +%utf8::InPat = +( +'al' => { + 'Alphabetic' => '164', + 'Alphabetic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms' => '129', +}, +'an' => { + 'Any' => '171', +}, +'ar' => { + 'ARABIC' => '16', + 'ARMENIAN' => '14', + 'Arabic(?:[-_]|\s+)?Block' => '62', + 'Arabic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms(?:[-_]|\s+)?A' => '130', + 'Arabic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms(?:[-_]|\s+)?B' => '134', + 'Armenian(?:[-_]|\s+)?Block' => '60', + 'Arrows' => '96', +}, +'as' => { + 'ASCII(?:[-_]|\s+)?Hex(?:[-_]|\s+)?Digit' => '152', + 'Assigned' => '163', +}, +'ba' => { + 'Basic(?:[-_]|\s+)?Latin' => '51', +}, +'be' => { + 'BENGALI' => '20', + 'Bengali(?:[-_]|\s+)?Block' => '66', +}, +'bi' => { + 'Bidi(?:[-_]|\s+)?Control' => '159', +}, +'bl' => { + 'Block(?:[-_]|\s+)?Elements' => '103', +}, +'bo' => { + 'BOPOMOFO' => '45', + 'Bopomofo(?:[-_]|\s+)?Block' => '114', + 'Bopomofo(?:[-_]|\s+)?Extended' => '117', + 'Box(?:[-_]|\s+)?Drawing' => '102', +}, +'br' => { + 'Braille(?:[-_]|\s+)?Patterns' => '107', +}, +'by' => { + 'Byzantine(?:[-_]|\s+)?Musical(?:[-_]|\s+)?Symbols' => '140', +}, +'ca' => { + 'CANADIAN(?:[-_]|\s+)?ABORIGINAL' => '37', +}, +'ch' => { + 'CHEROKEE' => '36', + 'Cherokee(?:[-_]|\s+)?Block' => '82', +}, +'cj' => { + 'CJK(?:[-_]|\s+)?Compatibility' => '119', + 'CJK(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Forms' => '132', + 'CJK(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Ideographs' => '128', + 'CJK(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Ideographs(?:[-_]|\s+)?Supplement' => '144', + 'CJK(?:[-_]|\s+)?Ideograph' => '1', + 'CJK(?:[-_]|\s+)?Ideograph(?:[-_]|\s+)?Extension(?:[-_]|\s+)?A' => '0', + 'CJK(?:[-_]|\s+)?Ideograph(?:[-_]|\s+)?Extension(?:[-_]|\s+)?B' => '7', + 'CJK(?:[-_]|\s+)?Radicals(?:[-_]|\s+)?Supplement' => '108', + 'CJK(?:[-_]|\s+)?Symbols(?:[-_]|\s+)?and(?:[-_]|\s+)?Punctuation' => '111', + 'CJK(?:[-_]|\s+)?Unified(?:[-_]|\s+)?Ideographs' => '121', + 'CJK(?:[-_]|\s+)?Unified(?:[-_]|\s+)?Ideographs(?:[-_]|\s+)?Extension(?:[-_]|\s+)?A' => '120', + 'CJK(?:[-_]|\s+)?Unified(?:[-_]|\s+)?Ideographs(?:[-_]|\s+)?Extension(?:[-_]|\s+)?B' => '143', +}, +'co' => { + 'Combining(?:[-_]|\s+)?Diacritical(?:[-_]|\s+)?Marks' => '57', + 'Combining(?:[-_]|\s+)?Half(?:[-_]|\s+)?Marks' => '131', + 'Combining(?:[-_]|\s+)?Marks(?:[-_]|\s+)?for(?:[-_]|\s+)?Symbols' => '93', + 'Common' => '50', + 'Control(?:[-_]|\s+)?Pictures' => '99', +}, +'cu' => { + 'Currency(?:[-_]|\s+)?Symbols' => '92', +}, +'cy' => { + 'CYRILLIC' => '13', + 'Cyrillic(?:[-_]|\s+)?Block' => '59', +}, +'da' => { + 'Dash' => '151', +}, +'de' => { + 'DESERET' => '49', + 'DEVANAGARI' => '19', + 'Deseret(?:[-_]|\s+)?Block' => '139', + 'Devanagari(?:[-_]|\s+)?Block' => '65', +}, +'di' => { + 'Diacritic' => '154', + 'Dingbats' => '106', +}, +'en' => { + 'Enclosed(?:[-_]|\s+)?Alphanumerics' => '101', + 'Enclosed(?:[-_]|\s+)?CJK(?:[-_]|\s+)?Letters(?:[-_]|\s+)?and(?:[-_]|\s+)?Months' => '118', +}, +'et' => { + 'ETHIOPIC' => '35', + 'Ethiopic(?:[-_]|\s+)?Block' => '81', +}, +'ex' => { + 'Extender' => '155', +}, +'ge' => { + 'GEORGIAN' => '33', + 'General(?:[-_]|\s+)?Punctuation' => '90', + 'Geometric(?:[-_]|\s+)?Shapes' => '104', + 'Georgian(?:[-_]|\s+)?Block' => '79', +}, +'go' => { + 'GOTHIC' => '48', + 'Gothic(?:[-_]|\s+)?Block' => '138', +}, +'gr' => { + 'GREEK' => '11', + 'Greek(?:[-_]|\s+)?Block' => '58', + 'Greek(?:[-_]|\s+)?Extended' => '89', +}, +'gu' => { + 'GUJARATI' => '22', + 'GURMUKHI' => '21', + 'Gujarati(?:[-_]|\s+)?Block' => '68', + 'Gurmukhi(?:[-_]|\s+)?Block' => '67', +}, +'ha' => { + 'HAN' => '42', + 'HANGUL' => '34', + 'Halfwidth(?:[-_]|\s+)?and(?:[-_]|\s+)?Fullwidth(?:[-_]|\s+)?Forms' => '136', + 'Hangul(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Jamo' => '115', + 'Hangul(?:[-_]|\s+)?Jamo' => '80', + 'Hangul(?:[-_]|\s+)?Syllable' => '2', + 'Hangul(?:[-_]|\s+)?Syllables' => '124', +}, +'he' => { + 'HEBREW' => '15', + 'Hebrew(?:[-_]|\s+)?Block' => '61', + 'Hex(?:[-_]|\s+)?Digit' => '153', +}, +'hi' => { + 'HIRAGANA' => '43', + 'High(?:[-_]|\s+)?Private(?:[-_]|\s+)?Use(?:[-_]|\s+)?Surrogates' => '126', + 'High(?:[-_]|\s+)?Surrogates' => '125', + 'Hiragana(?:[-_]|\s+)?Block' => '112', +}, +'hy' => { + 'Hyphen' => '150', +}, +'id' => { + 'ID(?:[-_]|\s+)?Continue' => '170', + 'ID(?:[-_]|\s+)?Start' => '169', + 'Ideographic' => '161', + 'Ideographic(?:[-_]|\s+)?Description(?:[-_]|\s+)?Characters' => '110', +}, +'in' => { + 'INHERITED' => '12', +}, +'ip' => { + 'IPA(?:[-_]|\s+)?Extensions' => '55', +}, +'jo' => { + 'Join(?:[-_]|\s+)?Control' => '158', +}, +'ka' => { + 'KANNADA' => '26', + 'KATAKANA' => '44', + 'Kanbun' => '116', + 'Kangxi(?:[-_]|\s+)?Radicals' => '109', + 'Kannada(?:[-_]|\s+)?Block' => '72', + 'Katakana(?:[-_]|\s+)?Block' => '113', +}, +'kh' => { + 'KHMER' => '40', + 'Khmer(?:[-_]|\s+)?Block' => '86', +}, +'la' => { + 'LAO' => '30', + 'LATIN' => '10', + 'Lampersand' => '168', + 'Lao(?:[-_]|\s+)?Block' => '76', + 'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?Additional' => '88', + 'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?A' => '53', + 'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?B' => '54', + 'Latin(?:[-_]|\s+)?1(?:[-_]|\s+)?Supplement' => '52', +}, +'le' => { + 'Letterlike(?:[-_]|\s+)?Symbols' => '94', +}, +'lo' => { + 'Low(?:[-_]|\s+)?Surrogate' => '5', + 'Low(?:[-_]|\s+)?Surrogates' => '127', + 'Lowercase' => '165', +}, +'ma' => { + 'MALAYALAM' => '27', + 'Malayalam(?:[-_]|\s+)?Block' => '73', + 'Math' => '167', + 'Mathematical(?:[-_]|\s+)?Alphanumeric(?:[-_]|\s+)?Symbols' => '142', + 'Mathematical(?:[-_]|\s+)?Operators' => '97', +}, +'mi' => { + 'Miscellaneous(?:[-_]|\s+)?Symbols' => '105', + 'Miscellaneous(?:[-_]|\s+)?Technical' => '98', +}, +'mo' => { + 'MONGOLIAN' => '41', + 'Mongolian(?:[-_]|\s+)?Block' => '87', +}, +'mu' => { + 'Musical(?:[-_]|\s+)?Symbols' => '141', +}, +'my' => { + 'MYANMAR' => '32', + 'Myanmar(?:[-_]|\s+)?Block' => '78', +}, +'no' => { + 'Non(?:[-_]|\s+)?Private(?:[-_]|\s+)?Use(?:[-_]|\s+)?High(?:[-_]|\s+)?Surrogate' => '3', + 'Noncharacter(?:[-_]|\s+)?Code(?:[-_]|\s+)?Point' => '162', +}, +'nu' => { + 'Number(?:[-_]|\s+)?Forms' => '95', +}, +'og' => { + 'OGHAM' => '38', + 'Ogham(?:[-_]|\s+)?Block' => '84', +}, +'ol' => { + 'OLD(?:[-_]|\s+)?ITALIC' => '47', + 'Old(?:[-_]|\s+)?Italic' => '137', +}, +'op' => { + 'Optical(?:[-_]|\s+)?Character(?:[-_]|\s+)?Recognition' => '100', +}, +'or' => { + 'ORIYA' => '23', + 'Oriya(?:[-_]|\s+)?Block' => '69', +}, +'ot' => { + 'Other(?:[-_]|\s+)?Alphabetic' => '157', + 'Other(?:[-_]|\s+)?Lowercase' => '156', + 'Other(?:[-_]|\s+)?Math' => '149', + 'Other(?:[-_]|\s+)?Uppercase' => '160', +}, +'pl' => { + 'Plane(?:[-_]|\s+)?15(?:[-_]|\s+)?Private(?:[-_]|\s+)?Use' => '8', + 'Plane(?:[-_]|\s+)?16(?:[-_]|\s+)?Private(?:[-_]|\s+)?Use' => '9', +}, +'pr' => { + 'Private(?:[-_]|\s+)?Use' => '6', + 'Private(?:[-_]|\s+)?Use(?:[-_]|\s+)?High(?:[-_]|\s+)?Surrogate' => '4', +}, +'qu' => { + 'Quotation(?:[-_]|\s+)?Mark' => '148', +}, +'ru' => { + 'RUNIC' => '39', + 'Runic(?:[-_]|\s+)?Block' => '85', +}, +'si' => { + 'SINHALA' => '28', + 'Sinhala(?:[-_]|\s+)?Block' => '74', +}, +'sm' => { + 'Small(?:[-_]|\s+)?Form(?:[-_]|\s+)?Variants' => '133', +}, +'sp' => { + 'Spacing(?:[-_]|\s+)?Modifier(?:[-_]|\s+)?Letters' => '56', + 'Specials' => '135', +}, +'su' => { + 'Superscripts(?:[-_]|\s+)?and(?:[-_]|\s+)?Subscripts' => '91', +}, +'sy' => { + 'SYRIAC' => '17', + 'Syriac(?:[-_]|\s+)?Block' => '63', +}, +'ta' => { + 'TAMIL' => '24', + 'Tags' => '145', + 'Tamil(?:[-_]|\s+)?Block' => '70', }, -'guj' => { - 'GUJARATI' => 'GUJARATI', - 'Gujarati(?:[-_]|\s+)?Block' => 'Gujarati Block', +'te' => { + 'TELUGU' => '25', + 'Telugu(?:[-_]|\s+)?Block' => '71', + 'Terminal(?:[-_]|\s+)?Punctuation' => '147', }, -'gur' => { - 'GURMUKHI' => 'GURMUKHI', - 'Gurmukhi(?:[-_]|\s+)?Block' => 'Gurmukhi Block', +'th' => { + 'THAANA' => '18', + 'THAI' => '29', + 'Thaana(?:[-_]|\s+)?Block' => '64', + 'Thai(?:[-_]|\s+)?Block' => '75', }, -'hal' => { - 'Halfwidth(?:[-_]|\s+)?and(?:[-_]|\s+)?Fullwidth(?:[-_]|\s+)?Forms' => 'Halfwidth and Fullwidth Forms', +'ti' => { + 'TIBETAN' => '31', + 'Tibetan(?:[-_]|\s+)?Block' => '77', }, -'han' => { - 'HANGUL' => 'HANGUL', - 'HAN' => 'HAN', - 'Hangul(?:[-_]|\s+)?Jamo' => 'Hangul Jamo', - 'Hangul(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Jamo' => 'Hangul Compatibility Jamo', - 'Hangul(?:[-_]|\s+)?Syllables' => 'Hangul Syllables', +'un' => { + 'Unified(?:[-_]|\s+)?Canadian(?:[-_]|\s+)?Aboriginal(?:[-_]|\s+)?Syllabics' => '83', }, -'heb' => { - 'HEBREW' => 'HEBREW', - 'Hebrew(?:[-_]|\s+)?Block' => 'Hebrew Block', +'up' => { + 'Uppercase' => '166', }, -'hig' => { - 'High(?:[-_]|\s+)?Surrogates' => 'High Surrogates', - 'High(?:[-_]|\s+)?Private(?:[-_]|\s+)?Use(?:[-_]|\s+)?Surrogates' => 'High Private Use Surrogates', -}, -'hir' => { - 'HIRAGANA' => 'HIRAGANA', - 'Hiragana(?:[-_]|\s+)?Block' => 'Hiragana Block', -}, -'ide' => { - 'Ideographic(?:[-_]|\s+)?Description(?:[-_]|\s+)?Characters' => 'Ideographic Description Characters', -}, -'inh' => { - 'INHERITED' => 'INHERITED', -}, -'ipa' => { - 'IPA(?:[-_]|\s+)?Extensions' => 'IPA Extensions', -}, -'kan' => { - 'KANNADA' => 'KANNADA', - 'Kannada(?:[-_]|\s+)?Block' => 'Kannada Block', - 'Kangxi(?:[-_]|\s+)?Radicals' => 'Kangxi Radicals', - 'Kanbun' => 'Kanbun', -}, -'kat' => { - 'KATAKANA' => 'KATAKANA', - 'Katakana(?:[-_]|\s+)?Block' => 'Katakana Block', -}, -'khm' => { - 'KHMER' => 'KHMER', - 'Khmer(?:[-_]|\s+)?Block' => 'Khmer Block', -}, -'lao' => { - 'LAO' => 'LAO', - 'Lao(?:[-_]|\s+)?Block' => 'Lao Block', -}, -'lat' => { - 'LATIN' => 'LATIN', - 'Latin(?:[-_]|\s+)?1(?:[-_]|\s+)?Supplement' => 'Latin-1 Supplement', - 'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?A' => 'Latin Extended-A', - 'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?B' => 'Latin Extended-B', - 'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?Additional' => 'Latin Extended Additional', -}, -'let' => { - 'Letterlike(?:[-_]|\s+)?Symbols' => 'Letterlike Symbols', -}, -'low' => { - 'Low(?:[-_]|\s+)?Surrogates' => 'Low Surrogates', -}, -'mal' => { - 'MALAYALAM' => 'MALAYALAM', - 'Malayalam(?:[-_]|\s+)?Block' => 'Malayalam Block', -}, -'mat' => { - 'Mathematical(?:[-_]|\s+)?Operators' => 'Mathematical Operators', - 'Mathematical(?:[-_]|\s+)?Alphanumeric(?:[-_]|\s+)?Symbols' => 'Mathematical Alphanumeric Symbols', -}, -'mis' => { - 'Miscellaneous(?:[-_]|\s+)?Technical' => 'Miscellaneous Technical', - 'Miscellaneous(?:[-_]|\s+)?Symbols' => 'Miscellaneous Symbols', -}, -'mon' => { - 'MONGOLIAN' => 'MONGOLIAN', - 'Mongolian(?:[-_]|\s+)?Block' => 'Mongolian Block', -}, -'mus' => { - 'Musical(?:[-_]|\s+)?Symbols' => 'Musical Symbols', -}, -'mya' => { - 'MYANMAR' => 'MYANMAR', - 'Myanmar(?:[-_]|\s+)?Block' => 'Myanmar Block', -}, -'num' => { - 'Number(?:[-_]|\s+)?Forms' => 'Number Forms', -}, -'ogh' => { - 'OGHAM' => 'OGHAM', - 'Ogham(?:[-_]|\s+)?Block' => 'Ogham Block', -}, -'old' => { - 'OLD(?:[-_]|\s+)?ITALIC' => 'OLD-ITALIC', - 'Old(?:[-_]|\s+)?Italic' => 'Old Italic', -}, -'opt' => { - 'Optical(?:[-_]|\s+)?Character(?:[-_]|\s+)?Recognition' => 'Optical Character Recognition', -}, -'ori' => { - 'ORIYA' => 'ORIYA', - 'Oriya(?:[-_]|\s+)?Block' => 'Oriya Block', -}, -'pri' => { - 'Private(?:[-_]|\s+)?Use' => 'Private Use', -}, -'run' => { - 'RUNIC' => 'RUNIC', - 'Runic(?:[-_]|\s+)?Block' => 'Runic Block', -}, -'sin' => { - 'SINHALA' => 'SINHALA', - 'Sinhala(?:[-_]|\s+)?Block' => 'Sinhala Block', -}, -'sma' => { - 'Small(?:[-_]|\s+)?Form(?:[-_]|\s+)?Variants' => 'Small Form Variants', -}, -'spa' => { - 'Spacing(?:[-_]|\s+)?Modifier(?:[-_]|\s+)?Letters' => 'Spacing Modifier Letters', -}, -'spe' => { - 'Specials' => 'Specials', -}, -'sup' => { - 'Superscripts(?:[-_]|\s+)?and(?:[-_]|\s+)?Subscripts' => 'Superscripts and Subscripts', -}, -'syr' => { - 'SYRIAC' => 'SYRIAC', - 'Syriac(?:[-_]|\s+)?Block' => 'Syriac Block', -}, -'tag' => { - 'Tags' => 'Tags', -}, -'tam' => { - 'TAMIL' => 'TAMIL', - 'Tamil(?:[-_]|\s+)?Block' => 'Tamil Block', -}, -'tel' => { - 'TELUGU' => 'TELUGU', - 'Telugu(?:[-_]|\s+)?Block' => 'Telugu Block', -}, -'tha' => { - 'THAANA' => 'THAANA', - 'THAI' => 'THAI', - 'Thaana(?:[-_]|\s+)?Block' => 'Thaana Block', - 'Thai(?:[-_]|\s+)?Block' => 'Thai Block', -}, -'tib' => { - 'TIBETAN' => 'TIBETAN', - 'Tibetan(?:[-_]|\s+)?Block' => 'Tibetan Block', -}, -'uni' => { - 'Unified(?:[-_]|\s+)?Canadian(?:[-_]|\s+)?Aboriginal(?:[-_]|\s+)?Syllabics' => 'Unified Canadian Aboriginal Syllabics', +'wh' => { + 'White(?:[-_]|\s+)?space' => '146', }, 'yi' => { - 'YI' => 'YI', -}, -'yi ' => { - 'Yi(?:[-_]|\s+)?Syllables' => 'Yi Syllables', - 'Yi(?:[-_]|\s+)?Radicals' => 'Yi Radicals', + 'YI' => '46', + 'Yi(?:[-_]|\s+)?Radicals' => '123', + 'Yi(?:[-_]|\s+)?Syllables' => '122', }, );