# Any changes made here will be lost!
%utf8::In =
(
-'ARABIC' => '16',
-'ARMENIAN' => '14',
-'ASCII_Hex_Digit' => '152',
-'Alphabetic' => '164',
-'Alphabetic Presentation Forms' => '129',
-'Any' => '171',
-'Arabic Block' => '62',
-'Arabic Presentation Forms-A' => '130',
-'Arabic Presentation Forms-B' => '134',
-'Armenian Block' => '60',
-'Arrows' => '96',
-'Assigned' => '163',
-'BENGALI' => '20',
-'BOPOMOFO' => '45',
-'Basic Latin' => '51',
-'Bengali Block' => '66',
-'Bidi_Control' => '159',
-'Block Elements' => '103',
-'Bopomofo Block' => '114',
-'Bopomofo Extended' => '117',
-'Box Drawing' => '102',
-'Braille Patterns' => '107',
-'Byzantine Musical Symbols' => '140',
-'CANADIAN-ABORIGINAL' => '37',
-'CHEROKEE' => '36',
-'CJK Compatibility' => '119',
-'CJK Compatibility Forms' => '132',
-'CJK Compatibility Ideographs' => '128',
-'CJK Compatibility Ideographs Supplement' => '144',
-'CJK Ideograph' => '1',
-'CJK Ideograph Extension A' => '0',
-'CJK Ideograph Extension B' => '7',
-'CJK Radicals Supplement' => '108',
-'CJK Symbols and Punctuation' => '111',
-'CJK Unified Ideographs' => '121',
-'CJK Unified Ideographs Extension A' => '120',
-'CJK Unified Ideographs Extension B' => '143',
-'CYRILLIC' => '13',
-'Cherokee Block' => '82',
-'Combining Diacritical Marks' => '57',
-'Combining Half Marks' => '131',
-'Combining Marks for Symbols' => '93',
-'Common' => '50',
-'Control Pictures' => '99',
-'Currency Symbols' => '92',
-'Cyrillic Block' => '59',
-'DESERET' => '49',
-'DEVANAGARI' => '19',
-'Dash' => '151',
-'Deseret Block' => '139',
-'Devanagari Block' => '65',
-'Diacritic' => '154',
-'Dingbats' => '106',
-'ETHIOPIC' => '35',
-'Enclosed Alphanumerics' => '101',
-'Enclosed CJK Letters and Months' => '118',
-'Ethiopic Block' => '81',
-'Extender' => '155',
-'GEORGIAN' => '33',
-'GOTHIC' => '48',
-'GREEK' => '11',
-'GUJARATI' => '22',
-'GURMUKHI' => '21',
-'General Punctuation' => '90',
-'Geometric Shapes' => '104',
-'Georgian Block' => '79',
-'Gothic Block' => '138',
-'Greek Block' => '58',
-'Greek Extended' => '89',
-'Gujarati Block' => '68',
-'Gurmukhi Block' => '67',
-'HAN' => '42',
-'HANGUL' => '34',
-'HEBREW' => '15',
-'HIRAGANA' => '43',
-'Halfwidth and Fullwidth Forms' => '136',
-'Hangul Compatibility Jamo' => '115',
-'Hangul Jamo' => '80',
-'Hangul Syllable' => '2',
-'Hangul Syllables' => '124',
-'Hebrew Block' => '61',
-'Hex_Digit' => '153',
-'High Private Use Surrogates' => '126',
-'High Surrogates' => '125',
-'Hiragana Block' => '112',
-'Hyphen' => '150',
-'ID_Continue' => '170',
-'ID_Start' => '169',
-'INHERITED' => '12',
-'IPA Extensions' => '55',
-'Ideographic' => '161',
-'Ideographic Description Characters' => '110',
-'Join_Control' => '158',
-'KANNADA' => '26',
-'KATAKANA' => '44',
-'KHMER' => '40',
-'Kanbun' => '116',
-'Kangxi Radicals' => '109',
-'Kannada Block' => '72',
-'Katakana Block' => '113',
-'Khmer Block' => '86',
-'LAO' => '30',
-'LATIN' => '10',
-'Lampersand' => '168',
-'Lao Block' => '76',
-'Latin Extended Additional' => '88',
-'Latin Extended-A' => '53',
-'Latin Extended-B' => '54',
-'Latin-1 Supplement' => '52',
-'Letterlike Symbols' => '94',
-'Low Surrogate' => '5',
-'Low Surrogates' => '127',
-'Lowercase' => '165',
-'MALAYALAM' => '27',
-'MONGOLIAN' => '41',
-'MYANMAR' => '32',
-'Malayalam Block' => '73',
-'Math' => '167',
-'Mathematical Alphanumeric Symbols' => '142',
-'Mathematical Operators' => '97',
-'Miscellaneous Symbols' => '105',
-'Miscellaneous Technical' => '98',
-'Mongolian Block' => '87',
-'Musical Symbols' => '141',
-'Myanmar Block' => '78',
-'Non Private Use High Surrogate' => '3',
-'Noncharacter_Code_Point' => '162',
-'Number Forms' => '95',
-'OGHAM' => '38',
-'OLD-ITALIC' => '47',
-'ORIYA' => '23',
-'Ogham Block' => '84',
-'Old Italic' => '137',
-'Optical Character Recognition' => '100',
-'Oriya Block' => '69',
-'Other_Alphabetic' => '157',
-'Other_Lowercase' => '156',
-'Other_Math' => '149',
-'Other_Uppercase' => '160',
-'Plane 15 Private Use' => '8',
-'Plane 16 Private Use' => '9',
-'Private Use' => '6',
-'Private Use High Surrogate' => '4',
-'Quotation_Mark' => '148',
-'RUNIC' => '39',
-'Runic Block' => '85',
-'SINHALA' => '28',
-'SYRIAC' => '17',
-'Sinhala Block' => '74',
-'Small Form Variants' => '133',
-'Spacing Modifier Letters' => '56',
-'Specials' => '135',
-'Superscripts and Subscripts' => '91',
-'Syriac Block' => '63',
-'TAMIL' => '24',
-'TELUGU' => '25',
-'THAANA' => '18',
-'THAI' => '29',
-'TIBETAN' => '31',
-'Tags' => '145',
-'Tamil Block' => '70',
-'Telugu Block' => '71',
-'Terminal_Punctuation' => '147',
-'Thaana Block' => '64',
-'Thai Block' => '75',
-'Tibetan Block' => '77',
-'Unified Canadian Aboriginal Syllabics' => '83',
-'Uppercase' => '166',
-'White_space' => '146',
-'YI' => '46',
-'Yi Radicals' => '123',
-'Yi Syllables' => '122',
+'Alphabetic' => '164',
+'Alphabetic Presentation Forms' => '129',
+'Any' => '171',
+'ARABIC' => '16',
+'Arabic Block' => '62',
+'Arabic Presentation Forms-A' => '130',
+'Arabic Presentation Forms-B' => '134',
+'ARMENIAN' => '14',
+'Armenian Block' => '60',
+'Arrows' => '96',
+'ASCII_Hex_Digit' => '152',
+'Assigned' => '163',
+'Basic Latin' => '51',
+'BENGALI' => '20',
+'Bengali Block' => '66',
+'Bidi_Control' => '159',
+'Block Elements' => '103',
+'BOPOMOFO' => '45',
+'Bopomofo Block' => '114',
+'Bopomofo Extended' => '117',
+'Box Drawing' => '102',
+'Braille Patterns' => '107',
+'Byzantine Musical Symbols' => '140',
+'CANADIAN-ABORIGINAL' => '37',
+'CHEROKEE' => '36',
+'Cherokee Block' => '82',
+'CJK Compatibility' => '119',
+'CJK Compatibility Forms' => '132',
+'CJK Compatibility Ideographs' => '128',
+'CJK Compatibility Ideographs Supplement' => '144',
+'CJK Ideograph' => '1',
+'CJK Ideograph Extension A' => '0',
+'CJK Ideograph Extension B' => '7',
+'CJK Radicals Supplement' => '108',
+'CJK Symbols and Punctuation' => '111',
+'CJK Unified Ideographs' => '121',
+'CJK Unified Ideographs Extension A' => '120',
+'CJK Unified Ideographs Extension B' => '143',
+'Combining Diacritical Marks' => '57',
+'Combining Half Marks' => '131',
+'Combining Marks for Symbols' => '93',
+'Common' => '50',
+'Control Pictures' => '99',
+'Currency Symbols' => '92',
+'CYRILLIC' => '13',
+'Cyrillic Block' => '59',
+'Dash' => '151',
+'DESERET' => '49',
+'Deseret Block' => '139',
+'DEVANAGARI' => '19',
+'Devanagari Block' => '65',
+'Diacritic' => '154',
+'Dingbats' => '106',
+'Enclosed Alphanumerics' => '101',
+'Enclosed CJK Letters and Months' => '118',
+'ETHIOPIC' => '35',
+'Ethiopic Block' => '81',
+'Extender' => '155',
+'General Punctuation' => '90',
+'Geometric Shapes' => '104',
+'GEORGIAN' => '33',
+'Georgian Block' => '79',
+'GOTHIC' => '48',
+'Gothic Block' => '138',
+'GREEK' => '11',
+'Greek Block' => '58',
+'Greek Extended' => '89',
+'GUJARATI' => '22',
+'Gujarati Block' => '68',
+'GURMUKHI' => '21',
+'Gurmukhi Block' => '67',
+'Halfwidth and Fullwidth Forms' => '136',
+'HAN' => '42',
+'HANGUL' => '34',
+'Hangul Compatibility Jamo' => '115',
+'Hangul Jamo' => '80',
+'Hangul Syllable' => '2',
+'Hangul Syllables' => '124',
+'HEBREW' => '15',
+'Hebrew Block' => '61',
+'Hex_Digit' => '153',
+'High Private Use Surrogates' => '126',
+'High Surrogates' => '125',
+'HIRAGANA' => '43',
+'Hiragana Block' => '112',
+'Hyphen' => '150',
+'ID_Continue' => '170',
+'ID_Start' => '169',
+'Ideographic' => '161',
+'Ideographic Description Characters' => '110',
+'INHERITED' => '12',
+'IPA Extensions' => '55',
+'Join_Control' => '158',
+'Kanbun' => '116',
+'Kangxi Radicals' => '109',
+'KANNADA' => '26',
+'Kannada Block' => '72',
+'KATAKANA' => '44',
+'Katakana Block' => '113',
+'KHMER' => '40',
+'Khmer Block' => '86',
+'Lampersand' => '168',
+'LAO' => '30',
+'Lao Block' => '76',
+'LATIN' => '10',
+'Latin Extended Additional' => '88',
+'Latin Extended-A' => '53',
+'Latin Extended-B' => '54',
+'Latin-1 Supplement' => '52',
+'Letterlike Symbols' => '94',
+'Low Surrogate' => '5',
+'Low Surrogates' => '127',
+'Lowercase' => '165',
+'MALAYALAM' => '27',
+'Malayalam Block' => '73',
+'Math' => '167',
+'Mathematical Alphanumeric Symbols' => '142',
+'Mathematical Operators' => '97',
+'Miscellaneous Symbols' => '105',
+'Miscellaneous Technical' => '98',
+'MONGOLIAN' => '41',
+'Mongolian Block' => '87',
+'Musical Symbols' => '141',
+'MYANMAR' => '32',
+'Myanmar Block' => '78',
+'Non Private Use High Surrogate' => '3',
+'Noncharacter_Code_Point' => '162',
+'Number Forms' => '95',
+'OGHAM' => '38',
+'Ogham Block' => '84',
+'Old Italic' => '137',
+'OLD-ITALIC' => '47',
+'Optical Character Recognition' => '100',
+'ORIYA' => '23',
+'Oriya Block' => '69',
+'Other_Alphabetic' => '157',
+'Other_Lowercase' => '156',
+'Other_Math' => '149',
+'Other_Uppercase' => '160',
+'Plane 15 Private Use' => '8',
+'Plane 16 Private Use' => '9',
+'Private Use' => '6',
+'Private Use High Surrogate' => '4',
+'Quotation_Mark' => '148',
+'RUNIC' => '39',
+'Runic Block' => '85',
+'SINHALA' => '28',
+'Sinhala Block' => '74',
+'Small Form Variants' => '133',
+'Spacing Modifier Letters' => '56',
+'Specials' => '135',
+'Superscripts and Subscripts' => '91',
+'SYRIAC' => '17',
+'Syriac Block' => '63',
+'Tags' => '145',
+'TAMIL' => '24',
+'Tamil Block' => '70',
+'TELUGU' => '25',
+'Telugu Block' => '71',
+'Terminal_Punctuation' => '147',
+'THAANA' => '18',
+'Thaana Block' => '64',
+'THAI' => '29',
+'Thai Block' => '75',
+'TIBETAN' => '31',
+'Tibetan Block' => '77',
+'Unified Canadian Aboriginal Syllabics' => '83',
+'Uppercase' => '166',
+'White_space' => '146',
+'YI' => '46',
+'Yi Radicals' => '123',
+'Yi Syllables' => '122',
);
%utf8::InPat =
(
},
'ar' => {
'ARABIC' => '16',
- 'ARMENIAN' => '14',
'Arabic(?:[-_]|\s+)?Block' => '62',
'Arabic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms(?:[-_]|\s+)?A' => '130',
'Arabic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms(?:[-_]|\s+)?B' => '134',
+ 'ARMENIAN' => '14',
'Armenian(?:[-_]|\s+)?Block' => '60',
'Arrows' => '96',
},
},
'de' => {
'DESERET' => '49',
- 'DEVANAGARI' => '19',
'Deseret(?:[-_]|\s+)?Block' => '139',
+ 'DEVANAGARI' => '19',
'Devanagari(?:[-_]|\s+)?Block' => '65',
},
'di' => {
'Extender' => '155',
},
'ge' => {
- 'GEORGIAN' => '33',
'General(?:[-_]|\s+)?Punctuation' => '90',
'Geometric(?:[-_]|\s+)?Shapes' => '104',
+ 'GEORGIAN' => '33',
'Georgian(?:[-_]|\s+)?Block' => '79',
},
'go' => {
},
'gu' => {
'GUJARATI' => '22',
- 'GURMUKHI' => '21',
'Gujarati(?:[-_]|\s+)?Block' => '68',
+ 'GURMUKHI' => '21',
'Gurmukhi(?:[-_]|\s+)?Block' => '67',
},
'ha' => {
+ 'Halfwidth(?:[-_]|\s+)?and(?:[-_]|\s+)?Fullwidth(?:[-_]|\s+)?Forms' => '136',
'HAN' => '42',
'HANGUL' => '34',
- 'Halfwidth(?:[-_]|\s+)?and(?:[-_]|\s+)?Fullwidth(?:[-_]|\s+)?Forms' => '136',
'Hangul(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Jamo' => '115',
'Hangul(?:[-_]|\s+)?Jamo' => '80',
'Hangul(?:[-_]|\s+)?Syllable' => '2',
'Hex(?:[-_]|\s+)?Digit' => '153',
},
'hi' => {
- 'HIRAGANA' => '43',
'High(?:[-_]|\s+)?Private(?:[-_]|\s+)?Use(?:[-_]|\s+)?Surrogates' => '126',
'High(?:[-_]|\s+)?Surrogates' => '125',
+ 'HIRAGANA' => '43',
'Hiragana(?:[-_]|\s+)?Block' => '112',
},
'hy' => {
'Join(?:[-_]|\s+)?Control' => '158',
},
'ka' => {
- 'KANNADA' => '26',
- 'KATAKANA' => '44',
'Kanbun' => '116',
'Kangxi(?:[-_]|\s+)?Radicals' => '109',
+ 'KANNADA' => '26',
'Kannada(?:[-_]|\s+)?Block' => '72',
+ 'KATAKANA' => '44',
'Katakana(?:[-_]|\s+)?Block' => '113',
},
'kh' => {
'Khmer(?:[-_]|\s+)?Block' => '86',
},
'la' => {
- 'LAO' => '30',
- 'LATIN' => '10',
'Lampersand' => '168',
+ 'LAO' => '30',
'Lao(?:[-_]|\s+)?Block' => '76',
+ 'LATIN' => '10',
'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?Additional' => '88',
'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?A' => '53',
'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?B' => '54',
'Ogham(?:[-_]|\s+)?Block' => '84',
},
'ol' => {
- 'OLD(?:[-_]|\s+)?ITALIC' => '47',
'Old(?:[-_]|\s+)?Italic' => '137',
+ 'OLD(?:[-_]|\s+)?ITALIC' => '47',
},
'op' => {
'Optical(?:[-_]|\s+)?Character(?:[-_]|\s+)?Recognition' => '100',
'Syriac(?:[-_]|\s+)?Block' => '63',
},
'ta' => {
- 'TAMIL' => '24',
'Tags' => '145',
+ 'TAMIL' => '24',
'Tamil(?:[-_]|\s+)?Block' => '70',
},
'te' => {
},
'th' => {
'THAANA' => '18',
- 'THAI' => '29',
'Thaana(?:[-_]|\s+)?Block' => '64',
+ 'THAI' => '29',
'Thai(?:[-_]|\s+)?Block' => '75',
},
'ti' => {
# Any changes made here will be lost!
%utf8::Is =
(
-'Close Punctuation' => 'Pe',
-'Connector Punctuation' => 'Pc',
-'Control' => 'Cc',
-'Currency Symbol' => 'Sc',
-'Dash Punctuation' => 'Pd',
-'Decimal Digit Number' => 'Nd',
-'Enclosing Mark' => 'Me',
-'Final Punctuation' => 'Pf',
-'Format' => 'Cf',
-'Initial Punctuation' => 'Pi',
-'Letter' => 'L',
-'Letter Number' => 'Nl',
-'Line Separator' => 'Zl',
-'Lowercase Letter' => 'Ll',
-'Mark' => 'M',
-'Math Symbol' => 'Sm',
-'Modifier Letter' => 'Lm',
-'Modifier Symbol' => 'Sk',
-'Non-Spacing Mark' => 'Mn',
-'Not Assigned' => 'Cn',
-'Number' => 'N',
-'Open Punctuation' => 'Ps',
-'Other' => 'C',
-'Other Control' => 'Cc',
-'Other Format' => 'Cf',
-'Other Letter' => 'Lo',
-'Other Not Assigned' => 'Cn',
-'Other Number' => 'No',
-'Other Private Use' => 'Co',
-'Other Punctuation' => 'Po',
-'Other Surrogate' => 'Cs',
-'Other Symbol' => 'So',
-'Paragraph Separator' => 'Zp',
-'Private Use' => 'Co',
-'Punctuation' => 'P',
-'Separator' => 'Z',
-'Space Separator' => 'Zs',
-'Spacing Combining Mark' => 'Mc',
-'Surrogate' => 'Cs',
-'Symbol' => 'S',
-'Titlecase Letter' => 'Lt',
-'Uppercase Letter' => 'Lu',
+'Close Punctuation' => 'Pe',
+'Connector Punctuation' => 'Pc',
+'Control' => 'Cc',
+'Currency Symbol' => 'Sc',
+'Dash Punctuation' => 'Pd',
+'Decimal Digit Number' => 'Nd',
+'Enclosing Mark' => 'Me',
+'Final Punctuation' => 'Pf',
+'Format' => 'Cf',
+'Initial Punctuation' => 'Pi',
+'Letter' => 'L',
+'Letter Number' => 'Nl',
+'Line Separator' => 'Zl',
+'Lowercase Letter' => 'Ll',
+'Mark' => 'M',
+'Math Symbol' => 'Sm',
+'Modifier Letter' => 'Lm',
+'Modifier Symbol' => 'Sk',
+'Non-Spacing Mark' => 'Mn',
+'Not Assigned' => 'Cn',
+'Number' => 'N',
+'Open Punctuation' => 'Ps',
+'Other' => 'C',
+'Other Control' => 'Cc',
+'Other Format' => 'Cf',
+'Other Letter' => 'Lo',
+'Other Not Assigned' => 'Cn',
+'Other Number' => 'No',
+'Other Private Use' => 'Co',
+'Other Punctuation' => 'Po',
+'Other Surrogate' => 'Cs',
+'Other Symbol' => 'So',
+'Paragraph Separator' => 'Zp',
+'Private Use' => 'Co',
+'Punctuation' => 'P',
+'Separator' => 'Z',
+'Space Separator' => 'Zs',
+'Spacing Combining Mark' => 'Mc',
+'Surrogate' => 'Cs',
+'Symbol' => 'S',
+'Titlecase Letter' => 'Lt',
+'Uppercase Letter' => 'Lu',
);
%utf8::IsPat =
(