Allow for more flexibility in the \p{In...} names, now
Jarkko Hietaniemi [Sat, 29 Sep 2001 04:57:42 +0000 (04:57 +0000)]
case doesn't matter, and any space or dash can be
matched by any space, dash, underbar, or empty.
(may be going too far on leniency)

p4raw-id: //depot/perl@12264

lib/unicore/Blocks.pl
lib/unicore/In.pl
lib/unicore/Scripts.pl
lib/unicore/mktables.PL
lib/utf8_heavy.pl
pod/perlunicode.pod
t/op/pat.t

index e45026a..83c2757 100644 (file)
 # This file is built by mktables.PL from e.g. Unicode.txt.
 # Any changes made here will be lost!
 return <<'END';
-0000   007F    Basic Latin     # BasicLatin In/40.pl
-0080   00FF    Latin-1 Supplement      # Latin1Supplement In/41.pl
-0100   017F    Latin Extended-A        # LatinExtendedA In/42.pl
-0180   024F    Latin Extended-B        # LatinExtendedB In/43.pl
-0250   02AF    IPA Extensions  # IPAExtensions In/44.pl
-02B0   02FF    Spacing Modifier Letters        # SpacingModifierLetters In/45.pl
-0300   036F    Combining Diacritical Marks     # CombiningDiacriticalMarks In/46.pl
-0370   03FF    Greek   # GreekBlock In/47.pl
-0400   04FF    Cyrillic        # CyrillicBlock In/48.pl
-0530   058F    Armenian        # ArmenianBlock In/49.pl
-0590   05FF    Hebrew  # HebrewBlock In/50.pl
-0600   06FF    Arabic  # ArabicBlock In/51.pl
-0700   074F    Syriac          # SyriacBlock In/52.pl
-0780   07BF    Thaana  # ThaanaBlock In/53.pl
-0900   097F    Devanagari      # DevanagariBlock In/54.pl
-0980   09FF    Bengali # BengaliBlock In/55.pl
-0A00   0A7F    Gurmukhi        # GurmukhiBlock In/56.pl
-0A80   0AFF    Gujarati        # GujaratiBlock In/57.pl
-0B00   0B7F    Oriya   # OriyaBlock In/58.pl
-0B80   0BFF    Tamil   # TamilBlock In/59.pl
-0C00   0C7F    Telugu  # TeluguBlock In/60.pl
-0C80   0CFF    Kannada # KannadaBlock In/61.pl
-0D00   0D7F    Malayalam       # MalayalamBlock In/62.pl
-0D80   0DFF    Sinhala # SinhalaBlock In/63.pl
-0E00   0E7F    Thai    # ThaiBlock In/64.pl
-0E80   0EFF    Lao     # LaoBlock In/65.pl
-0F00   0FFF    Tibetan # TibetanBlock In/66.pl
-1000   109F    Myanmar         # MyanmarBlock In/67.pl
-10A0   10FF    Georgian        # GeorgianBlock In/68.pl
-1100   11FF    Hangul Jamo     # HangulJamo In/69.pl
-1200   137F    Ethiopic        # EthiopicBlock In/70.pl
-13A0   13FF    Cherokee        # CherokeeBlock In/71.pl
-1400   167F    Unified Canadian Aboriginal Syllabics   # UnifiedCanadianAboriginalSyllabics In/72.pl
-1680   169F    Ogham   # OghamBlock In/73.pl
-16A0   16FF    Runic   # RunicBlock In/74.pl
-1780   17FF    Khmer   # KhmerBlock In/75.pl
-1800   18AF    Mongolian       # MongolianBlock In/76.pl
-1E00   1EFF    Latin Extended Additional       # LatinExtendedAdditional In/77.pl
-1F00   1FFF    Greek Extended  # GreekExtended In/78.pl
-2000   206F    General Punctuation     # GeneralPunctuation In/79.pl
-2070   209F    Superscripts and Subscripts     # SuperscriptsandSubscripts In/80.pl
-20A0   20CF    Currency Symbols        # CurrencySymbols In/81.pl
-20D0   20FF    Combining Marks for Symbols     # CombiningMarksforSymbols In/82.pl
-2100   214F    Letterlike Symbols      # LetterlikeSymbols In/83.pl
-2150   218F    Number Forms    # NumberForms In/84.pl
-2190   21FF    Arrows  # Arrows In/85.pl
-2200   22FF    Mathematical Operators  # MathematicalOperators In/86.pl
-2300   23FF    Miscellaneous Technical # MiscellaneousTechnical In/87.pl
-2400   243F    Control Pictures        # ControlPictures In/88.pl
-2440   245F    Optical Character Recognition   # OpticalCharacterRecognition In/89.pl
-2460   24FF    Enclosed Alphanumerics  # EnclosedAlphanumerics In/90.pl
-2500   257F    Box Drawing     # BoxDrawing In/91.pl
-2580   259F    Block Elements  # BlockElements In/92.pl
-25A0   25FF    Geometric Shapes        # GeometricShapes In/93.pl
-2600   26FF    Miscellaneous Symbols   # MiscellaneousSymbols In/94.pl
-2700   27BF    Dingbats        # Dingbats In/95.pl
-2800   28FF    Braille Patterns        # BraillePatterns In/96.pl
-2E80   2EFF    CJK Radicals Supplement # CJKRadicalsSupplement In/97.pl
-2F00   2FDF    Kangxi Radicals # KangxiRadicals In/98.pl
-2FF0   2FFF    Ideographic Description Characters      # IdeographicDescriptionCharacters In/99.pl
-3000   303F    CJK Symbols and Punctuation     # CJKSymbolsandPunctuation In/100.pl
-3040   309F    Hiragana        # HiraganaBlock In/101.pl
-30A0   30FF    Katakana        # KatakanaBlock In/102.pl
-3100   312F    Bopomofo        # BopomofoBlock In/103.pl
-3130   318F    Hangul Compatibility Jamo       # HangulCompatibilityJamo In/104.pl
-3190   319F    Kanbun  # Kanbun In/105.pl
-31A0   31BF    Bopomofo Extended       # BopomofoExtended In/106.pl
-3200   32FF    Enclosed CJK Letters and Months # EnclosedCJKLettersandMonths In/107.pl
-3300   33FF    CJK Compatibility       # CJKCompatibility In/108.pl
-3400   4DB5    CJK Unified Ideographs Extension A      # CJKUnifiedIdeographsExtensionA In/109.pl
-4E00   9FFF    CJK Unified Ideographs  # CJKUnifiedIdeographs In/110.pl
-A000   A48F    Yi Syllables    # YiSyllables In/111.pl
-A490   A4CF    Yi Radicals     # YiRadicals In/112.pl
-AC00   D7A3    Hangul Syllables        # HangulSyllables In/113.pl
-D800   DB7F    High Surrogates # HighSurrogates In/114.pl
-DB80   DBFF    High Private Use Surrogates     # HighPrivateUseSurrogates In/115.pl
-DC00   DFFF    Low Surrogates  # LowSurrogates In/116.pl
-E000   F8FF    Private Use     # PrivateUse In/117.pl
-F900   FAFF    CJK Compatibility Ideographs    # CJKCompatibilityIdeographs In/118.pl
-FB00   FB4F    Alphabetic Presentation Forms   # AlphabeticPresentationForms In/119.pl
-FB50   FDFF    Arabic Presentation Forms-A     # ArabicPresentationFormsA In/120.pl
-FE20   FE2F    Combining Half Marks    # CombiningHalfMarks In/121.pl
-FE30   FE4F    CJK Compatibility Forms # CJKCompatibilityForms In/122.pl
-FE50   FE6F    Small Form Variants     # SmallFormVariants In/123.pl
-FE70   FEFE    Arabic Presentation Forms-B     # ArabicPresentationFormsB In/124.pl
-FEFF   FEFF    Specials        # Specials In/125.pl
-FF00   FFEF    Halfwidth and Fullwidth Forms   # HalfwidthandFullwidthForms In/126.pl
-FFF0   FFFD    Specials        # Specials In/125.pl
-10300  1032F   Old Italic      # OldItalicBlock In/127.pl
-10330  1034F   Gothic  # GothicBlock In/128.pl
-10400  1044F   Deseret # DeseretBlock In/129.pl
-1D000  1D0FF   Byzantine Musical Symbols       # ByzantineMusicalSymbols In/130.pl
-1D100  1D1FF   Musical Symbols # MusicalSymbols In/131.pl
-1D400  1D7FF   Mathematical Alphanumeric Symbols       # MathematicalAlphanumericSymbols In/132.pl
-20000  2A6D6   CJK Unified Ideographs Extension B      # CJKUnifiedIdeographsExtensionB In/133.pl
-2F800  2FA1F   CJK Compatibility Ideographs Supplement # CJKCompatibilityIdeographsSupplement In/134.pl
-E0000  E007F   Tags    # Tags In/135.pl
-F0000  FFFFD   Private Use     # PrivateUse In/117.pl
-100000 10FFFD  Private Use     # PrivateUse In/117.pl
+0000   007F    Basic Latin     # In/40.pl
+0080   00FF    Latin-1 Supplement      # In/41.pl
+0100   017F    Latin Extended-A        # In/42.pl
+0180   024F    Latin Extended-B        # In/43.pl
+0250   02AF    IPA Extensions  # In/44.pl
+02B0   02FF    Spacing Modifier Letters        # In/45.pl
+0300   036F    Combining Diacritical Marks     # In/46.pl
+0370   03FF    Greek   # In/47.pl
+0400   04FF    Cyrillic        # In/48.pl
+0530   058F    Armenian        # In/49.pl
+0590   05FF    Hebrew  # In/50.pl
+0600   06FF    Arabic  # In/51.pl
+0700   074F    Syriac  # In/52.pl
+0780   07BF    Thaana  # In/53.pl
+0900   097F    Devanagari      # In/54.pl
+0980   09FF    Bengali # In/55.pl
+0A00   0A7F    Gurmukhi        # In/56.pl
+0A80   0AFF    Gujarati        # In/57.pl
+0B00   0B7F    Oriya   # In/58.pl
+0B80   0BFF    Tamil   # In/59.pl
+0C00   0C7F    Telugu  # In/60.pl
+0C80   0CFF    Kannada # In/61.pl
+0D00   0D7F    Malayalam       # In/62.pl
+0D80   0DFF    Sinhala # In/63.pl
+0E00   0E7F    Thai    # In/64.pl
+0E80   0EFF    Lao     # In/65.pl
+0F00   0FFF    Tibetan # In/66.pl
+1000   109F    Myanmar # In/67.pl
+10A0   10FF    Georgian        # In/68.pl
+1100   11FF    Hangul Jamo     # In/69.pl
+1200   137F    Ethiopic        # In/70.pl
+13A0   13FF    Cherokee        # In/71.pl
+1400   167F    Unified Canadian Aboriginal Syllabics   # In/72.pl
+1680   169F    Ogham   # In/73.pl
+16A0   16FF    Runic   # In/74.pl
+1780   17FF    Khmer   # In/75.pl
+1800   18AF    Mongolian       # In/76.pl
+1E00   1EFF    Latin Extended Additional       # In/77.pl
+1F00   1FFF    Greek Extended  # In/78.pl
+2000   206F    General Punctuation     # In/79.pl
+2070   209F    Superscripts and Subscripts     # In/80.pl
+20A0   20CF    Currency Symbols        # In/81.pl
+20D0   20FF    Combining Marks for Symbols     # In/82.pl
+2100   214F    Letterlike Symbols      # In/83.pl
+2150   218F    Number Forms    # In/84.pl
+2190   21FF    Arrows  # In/85.pl
+2200   22FF    Mathematical Operators  # In/86.pl
+2300   23FF    Miscellaneous Technical # In/87.pl
+2400   243F    Control Pictures        # In/88.pl
+2440   245F    Optical Character Recognition   # In/89.pl
+2460   24FF    Enclosed Alphanumerics  # In/90.pl
+2500   257F    Box Drawing     # In/91.pl
+2580   259F    Block Elements  # In/92.pl
+25A0   25FF    Geometric Shapes        # In/93.pl
+2600   26FF    Miscellaneous Symbols   # In/94.pl
+2700   27BF    Dingbats        # In/95.pl
+2800   28FF    Braille Patterns        # In/96.pl
+2E80   2EFF    CJK Radicals Supplement # In/97.pl
+2F00   2FDF    Kangxi Radicals # In/98.pl
+2FF0   2FFF    Ideographic Description Characters      # In/99.pl
+3000   303F    CJK Symbols and Punctuation     # In/100.pl
+3040   309F    Hiragana        # In/101.pl
+30A0   30FF    Katakana        # In/102.pl
+3100   312F    Bopomofo        # In/103.pl
+3130   318F    Hangul Compatibility Jamo       # In/104.pl
+3190   319F    Kanbun  # In/105.pl
+31A0   31BF    Bopomofo Extended       # In/106.pl
+3200   32FF    Enclosed CJK Letters and Months # In/107.pl
+3300   33FF    CJK Compatibility       # In/108.pl
+3400   4DB5    CJK Unified Ideographs Extension A      # In/109.pl
+4E00   9FFF    CJK Unified Ideographs  # In/110.pl
+A000   A48F    Yi Syllables    # In/111.pl
+A490   A4CF    Yi Radicals     # In/112.pl
+AC00   D7A3    Hangul Syllables        # In/113.pl
+D800   DB7F    High Surrogates # In/114.pl
+DB80   DBFF    High Private Use Surrogates     # In/115.pl
+DC00   DFFF    Low Surrogates  # In/116.pl
+E000   F8FF    Private Use     # In/117.pl
+F900   FAFF    CJK Compatibility Ideographs    # In/118.pl
+FB00   FB4F    Alphabetic Presentation Forms   # In/119.pl
+FB50   FDFF    Arabic Presentation Forms-A     # In/120.pl
+FE20   FE2F    Combining Half Marks    # In/121.pl
+FE30   FE4F    CJK Compatibility Forms # In/122.pl
+FE50   FE6F    Small Form Variants     # In/123.pl
+FE70   FEFE    Arabic Presentation Forms-B     # In/124.pl
+FEFF   FEFF    Specials        # In/125.pl
+FF00   FFEF    Halfwidth and Fullwidth Forms   # In/126.pl
+FFF0   FFFD    Specials        # In/125.pl
+10300  1032F   Old Italic      # In/127.pl
+10330  1034F   Gothic  # In/128.pl
+10400  1044F   Deseret # In/129.pl
+1D000  1D0FF   Byzantine Musical Symbols       # In/130.pl
+1D100  1D1FF   Musical Symbols # In/131.pl
+1D400  1D7FF   Mathematical Alphanumeric Symbols       # In/132.pl
+20000  2A6D6   CJK Unified Ideographs Extension B      # In/133.pl
+2F800  2FA1F   CJK Compatibility Ideographs Supplement # In/134.pl
+E0000  E007F   Tags    # In/135.pl
+F0000  FFFFD   Private Use     # In/117.pl
+100000 10FFFD  Private Use     # In/117.pl
 END
index a6c2419..c11445c 100644 (file)
 # This file is built by mktables.PL from e.g. Unicode.txt.
 # Any changes made here will be lost!
 %utf8::In = (
-'Latin'                                  =>   0,
-'Greek'                                  =>   1,
-'Cyrillic'                               =>   2,
-'Armenian'                               =>   3,
-'Hebrew'                                 =>   4,
-'Arabic'                                 =>   5,
-'Syriac'                                 =>   6,
-'Thaana'                                 =>   7,
-'Devanagari'                             =>   8,
-'Bengali'                                =>   9,
-'Gurmukhi'                               =>  10,
-'Gujarati'                               =>  11,
-'Oriya'                                  =>  12,
-'Tamil'                                  =>  13,
-'Telugu'                                 =>  14,
-'Kannada'                                =>  15,
-'Malayalam'                              =>  16,
-'Sinhala'                                =>  17,
-'Thai'                                   =>  18,
-'Lao'                                    =>  19,
-'Tibetan'                                =>  20,
-'Myanmar'                                =>  21,
-'Georgian'                               =>  22,
-'Hangul'                                 =>  23,
-'Ethiopic'                               =>  24,
-'Cherokee'                               =>  25,
-'CanadianAboriginal'                     =>  26,
-'Ogham'                                  =>  27,
-'Runic'                                  =>  28,
-'Khmer'                                  =>  29,
-'Mongolian'                              =>  30,
-'Hiragana'                               =>  31,
-'Katakana'                               =>  32,
-'Bopomofo'                               =>  33,
-'Han'                                    =>  34,
-'Yi'                                     =>  35,
-'OldItalic'                              =>  36,
-'Gothic'                                 =>  37,
-'Deseret'                                =>  38,
-'Inherited'                              =>  39,
-'BasicLatin'                             =>  40,
-'Latin1Supplement'                       =>  41,
-'LatinExtendedA'                         =>  42,
-'LatinExtendedB'                         =>  43,
-'IPAExtensions'                          =>  44,
-'SpacingModifierLetters'                 =>  45,
-'CombiningDiacriticalMarks'              =>  46,
-'GreekBlock'                             =>  47,
-'CyrillicBlock'                          =>  48,
-'ArmenianBlock'                          =>  49,
-'HebrewBlock'                            =>  50,
-'ArabicBlock'                            =>  51,
-'SyriacBlock'                            =>  52,
-'ThaanaBlock'                            =>  53,
-'DevanagariBlock'                        =>  54,
-'BengaliBlock'                           =>  55,
-'GurmukhiBlock'                          =>  56,
-'GujaratiBlock'                          =>  57,
-'OriyaBlock'                             =>  58,
-'TamilBlock'                             =>  59,
-'TeluguBlock'                            =>  60,
-'KannadaBlock'                           =>  61,
-'MalayalamBlock'                         =>  62,
-'SinhalaBlock'                           =>  63,
-'ThaiBlock'                              =>  64,
-'LaoBlock'                               =>  65,
-'TibetanBlock'                           =>  66,
-'MyanmarBlock'                           =>  67,
-'GeorgianBlock'                          =>  68,
-'HangulJamo'                             =>  69,
-'EthiopicBlock'                          =>  70,
-'CherokeeBlock'                          =>  71,
-'UnifiedCanadianAboriginalSyllabics'     =>  72,
-'OghamBlock'                             =>  73,
-'RunicBlock'                             =>  74,
-'KhmerBlock'                             =>  75,
-'MongolianBlock'                         =>  76,
-'LatinExtendedAdditional'                =>  77,
-'GreekExtended'                          =>  78,
-'GeneralPunctuation'                     =>  79,
-'SuperscriptsandSubscripts'              =>  80,
-'CurrencySymbols'                        =>  81,
-'CombiningMarksforSymbols'               =>  82,
-'LetterlikeSymbols'                      =>  83,
-'NumberForms'                            =>  84,
-'Arrows'                                 =>  85,
-'MathematicalOperators'                  =>  86,
-'MiscellaneousTechnical'                 =>  87,
-'ControlPictures'                        =>  88,
-'OpticalCharacterRecognition'            =>  89,
-'EnclosedAlphanumerics'                  =>  90,
-'BoxDrawing'                             =>  91,
-'BlockElements'                          =>  92,
-'GeometricShapes'                        =>  93,
-'MiscellaneousSymbols'                   =>  94,
-'Dingbats'                               =>  95,
-'BraillePatterns'                        =>  96,
-'CJKRadicalsSupplement'                  =>  97,
-'KangxiRadicals'                         =>  98,
-'IdeographicDescriptionCharacters'       =>  99,
-'CJKSymbolsandPunctuation'               => 100,
-'HiraganaBlock'                          => 101,
-'KatakanaBlock'                          => 102,
-'BopomofoBlock'                          => 103,
-'HangulCompatibilityJamo'                => 104,
-'Kanbun'                                 => 105,
-'BopomofoExtended'                       => 106,
-'EnclosedCJKLettersandMonths'            => 107,
-'CJKCompatibility'                       => 108,
-'CJKUnifiedIdeographsExtensionA'         => 109,
-'CJKUnifiedIdeographs'                   => 110,
-'YiSyllables'                            => 111,
-'YiRadicals'                             => 112,
-'HangulSyllables'                        => 113,
-'HighSurrogates'                         => 114,
-'HighPrivateUseSurrogates'               => 115,
-'LowSurrogates'                          => 116,
-'PrivateUse'                             => 117,
-'CJKCompatibilityIdeographs'             => 118,
-'AlphabeticPresentationForms'            => 119,
-'ArabicPresentationFormsA'               => 120,
-'CombiningHalfMarks'                     => 121,
-'CJKCompatibilityForms'                  => 122,
-'SmallFormVariants'                      => 123,
-'ArabicPresentationFormsB'               => 124,
-'Specials'                               => 125,
-'HalfwidthandFullwidthForms'             => 126,
-'OldItalicBlock'                         => 127,
-'GothicBlock'                            => 128,
-'DeseretBlock'                           => 129,
-'ByzantineMusicalSymbols'                => 130,
-'MusicalSymbols'                         => 131,
-'MathematicalAlphanumericSymbols'        => 132,
-'CJKUnifiedIdeographsExtensionB'         => 133,
-'CJKCompatibilityIdeographsSupplement'   => 134,
-'Tags'                                   => 135,
+'LATIN'                                       =>   0,
+'GREEK'                                       =>   1,
+'CYRILLIC'                                    =>   2,
+'ARMENIAN'                                    =>   3,
+'HEBREW'                                      =>   4,
+'ARABIC'                                      =>   5,
+'SYRIAC'                                      =>   6,
+'THAANA'                                      =>   7,
+'DEVANAGARI'                                  =>   8,
+'BENGALI'                                     =>   9,
+'GURMUKHI'                                    =>  10,
+'GUJARATI'                                    =>  11,
+'ORIYA'                                       =>  12,
+'TAMIL'                                       =>  13,
+'TELUGU'                                      =>  14,
+'KANNADA'                                     =>  15,
+'MALAYALAM'                                   =>  16,
+'SINHALA'                                     =>  17,
+'THAI'                                        =>  18,
+'LAO'                                         =>  19,
+'TIBETAN'                                     =>  20,
+'MYANMAR'                                     =>  21,
+'GEORGIAN'                                    =>  22,
+'HANGUL'                                      =>  23,
+'ETHIOPIC'                                    =>  24,
+'CHEROKEE'                                    =>  25,
+'CANADIAN-ABORIGINAL'                         =>  26,
+'OGHAM'                                       =>  27,
+'RUNIC'                                       =>  28,
+'KHMER'                                       =>  29,
+'MONGOLIAN'                                   =>  30,
+'HIRAGANA'                                    =>  31,
+'KATAKANA'                                    =>  32,
+'BOPOMOFO'                                    =>  33,
+'HAN'                                         =>  34,
+'YI'                                          =>  35,
+'OLD-ITALIC'                                  =>  36,
+'GOTHIC'                                      =>  37,
+'DESERET'                                     =>  38,
+'INHERITED'                                   =>  39,
+'Basic Latin'                                 =>  40,
+'Latin-1 Supplement'                          =>  41,
+'Latin Extended-A'                            =>  42,
+'Latin Extended-B'                            =>  43,
+'IPA Extensions'                              =>  44,
+'Spacing Modifier Letters'                    =>  45,
+'Combining Diacritical Marks'                 =>  46,
+'Greek Block'                                 =>  47,
+'Cyrillic Block'                              =>  48,
+'Armenian Block'                              =>  49,
+'Hebrew Block'                                =>  50,
+'Arabic Block'                                =>  51,
+'Syriac Block'                                =>  52,
+'Thaana Block'                                =>  53,
+'Devanagari Block'                            =>  54,
+'Bengali Block'                               =>  55,
+'Gurmukhi Block'                              =>  56,
+'Gujarati Block'                              =>  57,
+'Oriya Block'                                 =>  58,
+'Tamil Block'                                 =>  59,
+'Telugu Block'                                =>  60,
+'Kannada Block'                               =>  61,
+'Malayalam Block'                             =>  62,
+'Sinhala Block'                               =>  63,
+'Thai Block'                                  =>  64,
+'Lao Block'                                   =>  65,
+'Tibetan Block'                               =>  66,
+'Myanmar Block'                               =>  67,
+'Georgian Block'                              =>  68,
+'Hangul Jamo'                                 =>  69,
+'Ethiopic Block'                              =>  70,
+'Cherokee Block'                              =>  71,
+'Unified Canadian Aboriginal Syllabics'       =>  72,
+'Ogham Block'                                 =>  73,
+'Runic Block'                                 =>  74,
+'Khmer Block'                                 =>  75,
+'Mongolian Block'                             =>  76,
+'Latin Extended Additional'                   =>  77,
+'Greek Extended'                              =>  78,
+'General Punctuation'                         =>  79,
+'Superscripts and Subscripts'                 =>  80,
+'Currency Symbols'                            =>  81,
+'Combining Marks for Symbols'                 =>  82,
+'Letterlike Symbols'                          =>  83,
+'Number Forms'                                =>  84,
+'Arrows'                                      =>  85,
+'Mathematical Operators'                      =>  86,
+'Miscellaneous Technical'                     =>  87,
+'Control Pictures'                            =>  88,
+'Optical Character Recognition'               =>  89,
+'Enclosed Alphanumerics'                      =>  90,
+'Box Drawing'                                 =>  91,
+'Block Elements'                              =>  92,
+'Geometric Shapes'                            =>  93,
+'Miscellaneous Symbols'                       =>  94,
+'Dingbats'                                    =>  95,
+'Braille Patterns'                            =>  96,
+'CJK Radicals Supplement'                     =>  97,
+'Kangxi Radicals'                             =>  98,
+'Ideographic Description Characters'          =>  99,
+'CJK Symbols and Punctuation'                 => 100,
+'Hiragana Block'                              => 101,
+'Katakana Block'                              => 102,
+'Bopomofo Block'                              => 103,
+'Hangul Compatibility Jamo'                   => 104,
+'Kanbun'                                      => 105,
+'Bopomofo Extended'                           => 106,
+'Enclosed CJK Letters and Months'             => 107,
+'CJK Compatibility'                           => 108,
+'CJK Unified Ideographs Extension A'          => 109,
+'CJK Unified Ideographs'                      => 110,
+'Yi Syllables'                                => 111,
+'Yi Radicals'                                 => 112,
+'Hangul Syllables'                            => 113,
+'High Surrogates'                             => 114,
+'High Private Use Surrogates'                 => 115,
+'Low Surrogates'                              => 116,
+'Private Use'                                 => 117,
+'CJK Compatibility Ideographs'                => 118,
+'Alphabetic Presentation Forms'               => 119,
+'Arabic Presentation Forms-A'                 => 120,
+'Combining Half Marks'                        => 121,
+'CJK Compatibility Forms'                     => 122,
+'Small Form Variants'                         => 123,
+'Arabic Presentation Forms-B'                 => 124,
+'Specials'                                    => 125,
+'Halfwidth and Fullwidth Forms'               => 126,
+'Old Italic'                                  => 127,
+'Gothic Block'                                => 128,
+'Deseret Block'                               => 129,
+'Byzantine Musical Symbols'                   => 130,
+'Musical Symbols'                             => 131,
+'Mathematical Alphanumeric Symbols'           => 132,
+'CJK Unified Ideographs Extension B'          => 133,
+'CJK Compatibility Ideographs Supplement'     => 134,
+'Tags'                                        => 135,
+);
+%utf8::InPat = (
+'alp' => {
+       'Alphabetic[- _]?Presentation[- _]?Forms' => 'Alphabetic Presentation Forms',
+},
+'ara' => {
+       'ARABIC' => 'ARABIC',
+       'Arabic[- _]?Block' => 'Arabic Block',
+       'Arabic[- _]?Presentation[- _]?Forms[- _]?A' => 'Arabic Presentation Forms-A',
+       'Arabic[- _]?Presentation[- _]?Forms[- _]?B' => 'Arabic Presentation Forms-B',
+},
+'arm' => {
+       'ARMENIAN' => 'ARMENIAN',
+       'Armenian[- _]?Block' => 'Armenian Block',
+},
+'arr' => {
+       'Arrows' => 'Arrows',
+},
+'bas' => {
+       'Basic[- _]?Latin' => 'Basic Latin',
+},
+'ben' => {
+       'BENGALI' => 'BENGALI',
+       'Bengali[- _]?Block' => 'Bengali Block',
+},
+'blo' => {
+       'Block[- _]?Elements' => 'Block Elements',
+},
+'bop' => {
+       'BOPOMOFO' => 'BOPOMOFO',
+       'Bopomofo[- _]?Block' => 'Bopomofo Block',
+       'Bopomofo[- _]?Extended' => 'Bopomofo Extended',
+},
+'box' => {
+       'Box[- _]?Drawing' => 'Box Drawing',
+},
+'bra' => {
+       'Braille[- _]?Patterns' => 'Braille Patterns',
+},
+'byz' => {
+       'Byzantine[- _]?Musical[- _]?Symbols' => 'Byzantine Musical Symbols',
+},
+'can' => {
+       'CANADIAN[- _]?ABORIGINAL' => 'CANADIAN-ABORIGINAL',
+},
+'che' => {
+       'CHEROKEE' => 'CHEROKEE',
+       'Cherokee[- _]?Block' => 'Cherokee Block',
+},
+'cjk' => {
+       'CJK[- _]?Radicals[- _]?Supplement' => 'CJK Radicals Supplement',
+       'CJK[- _]?Symbols[- _]?and[- _]?Punctuation' => 'CJK Symbols and Punctuation',
+       'CJK[- _]?Compatibility' => 'CJK Compatibility',
+       'CJK[- _]?Unified[- _]?Ideographs[- _]?Extension[- _]?A' => 'CJK Unified Ideographs Extension A',
+       'CJK[- _]?Unified[- _]?Ideographs' => 'CJK Unified Ideographs',
+       'CJK[- _]?Compatibility[- _]?Ideographs' => 'CJK Compatibility Ideographs',
+       'CJK[- _]?Compatibility[- _]?Forms' => 'CJK Compatibility Forms',
+       'CJK[- _]?Unified[- _]?Ideographs[- _]?Extension[- _]?B' => 'CJK Unified Ideographs Extension B',
+       'CJK[- _]?Compatibility[- _]?Ideographs[- _]?Supplement' => 'CJK Compatibility Ideographs Supplement',
+},
+'com' => {
+       'Combining[- _]?Diacritical[- _]?Marks' => 'Combining Diacritical Marks',
+       'Combining[- _]?Marks[- _]?for[- _]?Symbols' => 'Combining Marks for Symbols',
+       'Combining[- _]?Half[- _]?Marks' => 'Combining Half Marks',
+},
+'con' => {
+       'Control[- _]?Pictures' => 'Control Pictures',
+},
+'cur' => {
+       'Currency[- _]?Symbols' => 'Currency Symbols',
+},
+'cyr' => {
+       'CYRILLIC' => 'CYRILLIC',
+       'Cyrillic[- _]?Block' => 'Cyrillic Block',
+},
+'des' => {
+       'DESERET' => 'DESERET',
+       'Deseret[- _]?Block' => 'Deseret Block',
+},
+'dev' => {
+       'DEVANAGARI' => 'DEVANAGARI',
+       'Devanagari[- _]?Block' => 'Devanagari Block',
+},
+'din' => {
+       'Dingbats' => 'Dingbats',
+},
+'enc' => {
+       'Enclosed[- _]?Alphanumerics' => 'Enclosed Alphanumerics',
+       'Enclosed[- _]?CJK[- _]?Letters[- _]?and[- _]?Months' => 'Enclosed CJK Letters and Months',
+},
+'eth' => {
+       'ETHIOPIC' => 'ETHIOPIC',
+       'Ethiopic[- _]?Block' => 'Ethiopic Block',
+},
+'gen' => {
+       'General[- _]?Punctuation' => 'General Punctuation',
+},
+'geo' => {
+       'GEORGIAN' => 'GEORGIAN',
+       'Georgian[- _]?Block' => 'Georgian Block',
+       'Geometric[- _]?Shapes' => 'Geometric Shapes',
+},
+'got' => {
+       'GOTHIC' => 'GOTHIC',
+       'Gothic[- _]?Block' => 'Gothic Block',
+},
+'gre' => {
+       'GREEK' => 'GREEK',
+       'Greek[- _]?Block' => 'Greek Block',
+       'Greek[- _]?Extended' => 'Greek Extended',
+},
+'guj' => {
+       'GUJARATI' => 'GUJARATI',
+       'Gujarati[- _]?Block' => 'Gujarati Block',
+},
+'gur' => {
+       'GURMUKHI' => 'GURMUKHI',
+       'Gurmukhi[- _]?Block' => 'Gurmukhi Block',
+},
+'hal' => {
+       'Halfwidth[- _]?and[- _]?Fullwidth[- _]?Forms' => 'Halfwidth and Fullwidth Forms',
+},
+'han' => {
+       'HANGUL' => 'HANGUL',
+       'HAN' => 'HAN',
+       'Hangul[- _]?Jamo' => 'Hangul Jamo',
+       'Hangul[- _]?Compatibility[- _]?Jamo' => 'Hangul Compatibility Jamo',
+       'Hangul[- _]?Syllables' => 'Hangul Syllables',
+},
+'heb' => {
+       'HEBREW' => 'HEBREW',
+       'Hebrew[- _]?Block' => 'Hebrew Block',
+},
+'hig' => {
+       'High[- _]?Surrogates' => 'High Surrogates',
+       'High[- _]?Private[- _]?Use[- _]?Surrogates' => 'High Private Use Surrogates',
+},
+'hir' => {
+       'HIRAGANA' => 'HIRAGANA',
+       'Hiragana[- _]?Block' => 'Hiragana Block',
+},
+'ide' => {
+       'Ideographic[- _]?Description[- _]?Characters' => 'Ideographic Description Characters',
+},
+'inh' => {
+       'INHERITED' => 'INHERITED',
+},
+'ipa' => {
+       'IPA[- _]?Extensions' => 'IPA Extensions',
+},
+'kan' => {
+       'KANNADA' => 'KANNADA',
+       'Kannada[- _]?Block' => 'Kannada Block',
+       'Kangxi[- _]?Radicals' => 'Kangxi Radicals',
+       'Kanbun' => 'Kanbun',
+},
+'kat' => {
+       'KATAKANA' => 'KATAKANA',
+       'Katakana[- _]?Block' => 'Katakana Block',
+},
+'khm' => {
+       'KHMER' => 'KHMER',
+       'Khmer[- _]?Block' => 'Khmer Block',
+},
+'lao' => {
+       'LAO' => 'LAO',
+       'Lao[- _]?Block' => 'Lao Block',
+},
+'lat' => {
+       'LATIN' => 'LATIN',
+       'Latin[- _]?1[- _]?Supplement' => 'Latin-1 Supplement',
+       'Latin[- _]?Extended[- _]?A' => 'Latin Extended-A',
+       'Latin[- _]?Extended[- _]?B' => 'Latin Extended-B',
+       'Latin[- _]?Extended[- _]?Additional' => 'Latin Extended Additional',
+},
+'let' => {
+       'Letterlike[- _]?Symbols' => 'Letterlike Symbols',
+},
+'low' => {
+       'Low[- _]?Surrogates' => 'Low Surrogates',
+},
+'mal' => {
+       'MALAYALAM' => 'MALAYALAM',
+       'Malayalam[- _]?Block' => 'Malayalam Block',
+},
+'mat' => {
+       'Mathematical[- _]?Operators' => 'Mathematical Operators',
+       'Mathematical[- _]?Alphanumeric[- _]?Symbols' => 'Mathematical Alphanumeric Symbols',
+},
+'mis' => {
+       'Miscellaneous[- _]?Technical' => 'Miscellaneous Technical',
+       'Miscellaneous[- _]?Symbols' => 'Miscellaneous Symbols',
+},
+'mon' => {
+       'MONGOLIAN' => 'MONGOLIAN',
+       'Mongolian[- _]?Block' => 'Mongolian Block',
+},
+'mus' => {
+       'Musical[- _]?Symbols' => 'Musical Symbols',
+},
+'mya' => {
+       'MYANMAR' => 'MYANMAR',
+       'Myanmar[- _]?Block' => 'Myanmar Block',
+},
+'num' => {
+       'Number[- _]?Forms' => 'Number Forms',
+},
+'ogh' => {
+       'OGHAM' => 'OGHAM',
+       'Ogham[- _]?Block' => 'Ogham Block',
+},
+'old' => {
+       'OLD[- _]?ITALIC' => 'OLD-ITALIC',
+       'Old[- _]?Italic' => 'Old Italic',
+},
+'opt' => {
+       'Optical[- _]?Character[- _]?Recognition' => 'Optical Character Recognition',
+},
+'ori' => {
+       'ORIYA' => 'ORIYA',
+       'Oriya[- _]?Block' => 'Oriya Block',
+},
+'pri' => {
+       'Private[- _]?Use' => 'Private Use',
+},
+'run' => {
+       'RUNIC' => 'RUNIC',
+       'Runic[- _]?Block' => 'Runic Block',
+},
+'sin' => {
+       'SINHALA' => 'SINHALA',
+       'Sinhala[- _]?Block' => 'Sinhala Block',
+},
+'sma' => {
+       'Small[- _]?Form[- _]?Variants' => 'Small Form Variants',
+},
+'spa' => {
+       'Spacing[- _]?Modifier[- _]?Letters' => 'Spacing Modifier Letters',
+},
+'spe' => {
+       'Specials' => 'Specials',
+},
+'sup' => {
+       'Superscripts[- _]?and[- _]?Subscripts' => 'Superscripts and Subscripts',
+},
+'syr' => {
+       'SYRIAC' => 'SYRIAC',
+       'Syriac[- _]?Block' => 'Syriac Block',
+},
+'tag' => {
+       'Tags' => 'Tags',
+},
+'tam' => {
+       'TAMIL' => 'TAMIL',
+       'Tamil[- _]?Block' => 'Tamil Block',
+},
+'tel' => {
+       'TELUGU' => 'TELUGU',
+       'Telugu[- _]?Block' => 'Telugu Block',
+},
+'tha' => {
+       'THAANA' => 'THAANA',
+       'THAI' => 'THAI',
+       'Thaana[- _]?Block' => 'Thaana Block',
+       'Thai[- _]?Block' => 'Thai Block',
+},
+'tib' => {
+       'TIBETAN' => 'TIBETAN',
+       'Tibetan[- _]?Block' => 'Tibetan Block',
+},
+'uni' => {
+       'Unified[- _]?Canadian[- _]?Aboriginal[- _]?Syllabics' => 'Unified Canadian Aboriginal Syllabics',
+},
+'yi' => {
+       'YI' => 'YI',
+},
+'yi ' => {
+       'Yi[- _]?Syllables' => 'Yi Syllables',
+       'Yi[- _]?Radicals' => 'Yi Radicals',
+},
 );
index ed0168e..b924f3a 100644 (file)
 # This file is built by mktables.PL from e.g. Unicode.txt.
 # Any changes made here will be lost!
 return <<'END';
-0041   005A    LATIN   # Latin In/0.pl
-0061   007A    LATIN   # Latin In/0.pl
-00AA           LATIN   # Latin In/0.pl
-00BA           LATIN   # Latin In/0.pl
-00C0   00D6    LATIN   # Latin In/0.pl
-00D8   00F6    LATIN   # Latin In/0.pl
-00F8   01BA    LATIN   # Latin In/0.pl
-01BB           LATIN   # Latin In/0.pl
-01BC   01BF    LATIN   # Latin In/0.pl
-01C0   01C3    LATIN   # Latin In/0.pl
-01C4   021F    LATIN   # Latin In/0.pl
-0222   0233    LATIN   # Latin In/0.pl
-0250   02AD    LATIN   # Latin In/0.pl
-02B0   02B8    LATIN   # Latin In/0.pl
-02E0   02E4    LATIN   # Latin In/0.pl
-1E00   1E9B    LATIN   # Latin In/0.pl
-1EA0   1EF9    LATIN   # Latin In/0.pl
-207F           LATIN   # Latin In/0.pl
-212A   212B    LATIN   # Latin In/0.pl
-FB00   FB06    LATIN   # Latin In/0.pl
-FF21   FF3A    LATIN   # Latin In/0.pl
-FF41   FF5A    LATIN   # Latin In/0.pl
-00B5           GREEK   # Greek In/1.pl
-037A           GREEK   # Greek In/1.pl
-0386           GREEK   # Greek In/1.pl
-0388   038A    GREEK   # Greek In/1.pl
-038C           GREEK   # Greek In/1.pl
-038E   03A1    GREEK   # Greek In/1.pl
-03A3   03CE    GREEK   # Greek In/1.pl
-03D0   03D7    GREEK   # Greek In/1.pl
-03DA   03F5    GREEK   # Greek In/1.pl
-1F00   1F15    GREEK   # Greek In/1.pl
-1F18   1F1D    GREEK   # Greek In/1.pl
-1F20   1F45    GREEK   # Greek In/1.pl
-1F48   1F4D    GREEK   # Greek In/1.pl
-1F50   1F57    GREEK   # Greek In/1.pl
-1F59           GREEK   # Greek In/1.pl
-1F5B           GREEK   # Greek In/1.pl
-1F5D           GREEK   # Greek In/1.pl
-1F5F   1F7D    GREEK   # Greek In/1.pl
-1F80   1FB4    GREEK   # Greek In/1.pl
-1FB6   1FBC    GREEK   # Greek In/1.pl
-1FBE           GREEK   # Greek In/1.pl
-1FC2   1FC4    GREEK   # Greek In/1.pl
-1FC6   1FCC    GREEK   # Greek In/1.pl
-1FD0   1FD3    GREEK   # Greek In/1.pl
-1FD6   1FDB    GREEK   # Greek In/1.pl
-1FE0   1FEC    GREEK   # Greek In/1.pl
-1FF2   1FF4    GREEK   # Greek In/1.pl
-1FF6   1FFC    GREEK   # Greek In/1.pl
-2126           GREEK   # Greek In/1.pl
-0400   0481    CYRILLIC        # Cyrillic In/2.pl
-0483   0486    CYRILLIC        # Cyrillic In/2.pl
-048C   04C4    CYRILLIC        # Cyrillic In/2.pl
-04C7   04C8    CYRILLIC        # Cyrillic In/2.pl
-04CB   04CC    CYRILLIC        # Cyrillic In/2.pl
-04D0   04F5    CYRILLIC        # Cyrillic In/2.pl
-04F8   04F9    CYRILLIC        # Cyrillic In/2.pl
-0531   0556    ARMENIAN        # Armenian In/3.pl
-0559           ARMENIAN        # Armenian In/3.pl
-0561   0587    ARMENIAN        # Armenian In/3.pl
-FB13   FB17    ARMENIAN        # Armenian In/3.pl
-05D0   05EA    HEBREW  # Hebrew In/4.pl
-05F0   05F2    HEBREW  # Hebrew In/4.pl
-FB1D           HEBREW  # Hebrew In/4.pl
-FB1F   FB28    HEBREW  # Hebrew In/4.pl
-FB2A   FB36    HEBREW  # Hebrew In/4.pl
-FB38   FB3C    HEBREW  # Hebrew In/4.pl
-FB3E           HEBREW  # Hebrew In/4.pl
-FB40   FB41    HEBREW  # Hebrew In/4.pl
-FB43   FB44    HEBREW  # Hebrew In/4.pl
-FB46   FB4F    HEBREW  # Hebrew In/4.pl
-0621   063A    ARABIC  # Arabic In/5.pl
-0641   064A    ARABIC  # Arabic In/5.pl
-0671   06D3    ARABIC  # Arabic In/5.pl
-06D5           ARABIC  # Arabic In/5.pl
-06E5   06E6    ARABIC  # Arabic In/5.pl
-06FA   06FC    ARABIC  # Arabic In/5.pl
-FB50   FBB1    ARABIC  # Arabic In/5.pl
-FBD3   FD3D    ARABIC  # Arabic In/5.pl
-FD50   FD8F    ARABIC  # Arabic In/5.pl
-FD92   FDC7    ARABIC  # Arabic In/5.pl
-FDF0   FDFB    ARABIC  # Arabic In/5.pl
-FE70   FE72    ARABIC  # Arabic In/5.pl
-FE74           ARABIC  # Arabic In/5.pl
-FE76   FEFC    ARABIC  # Arabic In/5.pl
-0710           SYRIAC  # Syriac In/6.pl
-0711           SYRIAC  # Syriac In/6.pl
-0712   072C    SYRIAC  # Syriac In/6.pl
-0730   074A    SYRIAC  # Syriac In/6.pl
-0780   07A5    THAANA  # Thaana In/7.pl
-07A6   07B0    THAANA  # Thaana In/7.pl
-0901   0902    DEVANAGARI      # Devanagari In/8.pl
-0903           DEVANAGARI      # Devanagari In/8.pl
-0905   0939    DEVANAGARI      # Devanagari In/8.pl
-093C           DEVANAGARI      # Devanagari In/8.pl
-093D           DEVANAGARI      # Devanagari In/8.pl
-093E   0940    DEVANAGARI      # Devanagari In/8.pl
-0941   0948    DEVANAGARI      # Devanagari In/8.pl
-0949   094C    DEVANAGARI      # Devanagari In/8.pl
-094D           DEVANAGARI      # Devanagari In/8.pl
-0950           DEVANAGARI      # Devanagari In/8.pl
-0951   0954    DEVANAGARI      # Devanagari In/8.pl
-0958   0961    DEVANAGARI      # Devanagari In/8.pl
-0962   0963    DEVANAGARI      # Devanagari In/8.pl
-0966   096F    DEVANAGARI      # Devanagari In/8.pl
-0981           BENGALI # Bengali In/9.pl
-0985   098C    BENGALI # Bengali In/9.pl
-098F   0990    BENGALI # Bengali In/9.pl
-0993   09A8    BENGALI # Bengali In/9.pl
-09AA   09B0    BENGALI # Bengali In/9.pl
-09B2           BENGALI # Bengali In/9.pl
-09B6   09B9    BENGALI # Bengali In/9.pl
-09BC           BENGALI # Bengali In/9.pl
-09BE   09C0    BENGALI # Bengali In/9.pl
-09C1   09C4    BENGALI # Bengali In/9.pl
-09C7   09C8    BENGALI # Bengali In/9.pl
-09CB   09CC    BENGALI # Bengali In/9.pl
-09CD           BENGALI # Bengali In/9.pl
-09D7           BENGALI # Bengali In/9.pl
-09DC   09DD    BENGALI # Bengali In/9.pl
-09DF   09E1    BENGALI # Bengali In/9.pl
-09E2   09E3    BENGALI # Bengali In/9.pl
-09E6   09EF    BENGALI # Bengali In/9.pl
-09F0   09F1    BENGALI # Bengali In/9.pl
-0A02           GURMUKHI        # Gurmukhi In/10.pl
-0A05   0A0A    GURMUKHI        # Gurmukhi In/10.pl
-0A0F   0A10    GURMUKHI        # Gurmukhi In/10.pl
-0A13   0A28    GURMUKHI        # Gurmukhi In/10.pl
-0A2A   0A30    GURMUKHI        # Gurmukhi In/10.pl
-0A32   0A33    GURMUKHI        # Gurmukhi In/10.pl
-0A35   0A36    GURMUKHI        # Gurmukhi In/10.pl
-0A38   0A39    GURMUKHI        # Gurmukhi In/10.pl
-0A3C           GURMUKHI        # Gurmukhi In/10.pl
-0A3E   0A40    GURMUKHI        # Gurmukhi In/10.pl
-0A41   0A42    GURMUKHI        # Gurmukhi In/10.pl
-0A47   0A48    GURMUKHI        # Gurmukhi In/10.pl
-0A4B   0A4D    GURMUKHI        # Gurmukhi In/10.pl
-0A59   0A5C    GURMUKHI        # Gurmukhi In/10.pl
-0A5E           GURMUKHI        # Gurmukhi In/10.pl
-0A66   0A6F    GURMUKHI        # Gurmukhi In/10.pl
-0A70   0A71    GURMUKHI        # Gurmukhi In/10.pl
-0A72   0A74    GURMUKHI        # Gurmukhi In/10.pl
-0A81   0A82    GUJARATI        # Gujarati In/11.pl
-0A83           GUJARATI        # Gujarati In/11.pl
-0A85   0A8B    GUJARATI        # Gujarati In/11.pl
-0A8D           GUJARATI        # Gujarati In/11.pl
-0A8F   0A91    GUJARATI        # Gujarati In/11.pl
-0A93   0AA8    GUJARATI        # Gujarati In/11.pl
-0AAA   0AB0    GUJARATI        # Gujarati In/11.pl
-0AB2   0AB3    GUJARATI        # Gujarati In/11.pl
-0AB5   0AB9    GUJARATI        # Gujarati In/11.pl
-0ABC           GUJARATI        # Gujarati In/11.pl
-0ABD           GUJARATI        # Gujarati In/11.pl
-0ABE   0AC0    GUJARATI        # Gujarati In/11.pl
-0AC1   0AC5    GUJARATI        # Gujarati In/11.pl
-0AC7   0AC8    GUJARATI        # Gujarati In/11.pl
-0AC9           GUJARATI        # Gujarati In/11.pl
-0ACB   0ACC    GUJARATI        # Gujarati In/11.pl
-0ACD           GUJARATI        # Gujarati In/11.pl
-0AD0           GUJARATI        # Gujarati In/11.pl
-0AE0           GUJARATI        # Gujarati In/11.pl
-0AE6   0AEF    GUJARATI        # Gujarati In/11.pl
-0B01           ORIYA   # Oriya In/12.pl
-0B02   0B03    ORIYA   # Oriya In/12.pl
-0B05   0B0C    ORIYA   # Oriya In/12.pl
-0B0F   0B10    ORIYA   # Oriya In/12.pl
-0B13   0B28    ORIYA   # Oriya In/12.pl
-0B2A   0B30    ORIYA   # Oriya In/12.pl
-0B32   0B33    ORIYA   # Oriya In/12.pl
-0B36   0B39    ORIYA   # Oriya In/12.pl
-0B3C           ORIYA   # Oriya In/12.pl
-0B3D           ORIYA   # Oriya In/12.pl
-0B3E           ORIYA   # Oriya In/12.pl
-0B3F           ORIYA   # Oriya In/12.pl
-0B40           ORIYA   # Oriya In/12.pl
-0B41   0B43    ORIYA   # Oriya In/12.pl
-0B47   0B48    ORIYA   # Oriya In/12.pl
-0B4B   0B4C    ORIYA   # Oriya In/12.pl
-0B4D           ORIYA   # Oriya In/12.pl
-0B56           ORIYA   # Oriya In/12.pl
-0B57           ORIYA   # Oriya In/12.pl
-0B5C   0B5D    ORIYA   # Oriya In/12.pl
-0B5F   0B61    ORIYA   # Oriya In/12.pl
-0B66   0B6F    ORIYA   # Oriya In/12.pl
-0B82           TAMIL   # Tamil In/13.pl
-0B83           TAMIL   # Tamil In/13.pl
-0B85   0B8A    TAMIL   # Tamil In/13.pl
-0B8E   0B90    TAMIL   # Tamil In/13.pl
-0B92   0B95    TAMIL   # Tamil In/13.pl
-0B99   0B9A    TAMIL   # Tamil In/13.pl
-0B9C           TAMIL   # Tamil In/13.pl
-0B9E   0B9F    TAMIL   # Tamil In/13.pl
-0BA3   0BA4    TAMIL   # Tamil In/13.pl
-0BA8   0BAA    TAMIL   # Tamil In/13.pl
-0BAE   0BB5    TAMIL   # Tamil In/13.pl
-0BB7   0BB9    TAMIL   # Tamil In/13.pl
-0BBE   0BBF    TAMIL   # Tamil In/13.pl
-0BC0           TAMIL   # Tamil In/13.pl
-0BC1   0BC2    TAMIL   # Tamil In/13.pl
-0BC6   0BC8    TAMIL   # Tamil In/13.pl
-0BCA   0BCC    TAMIL   # Tamil In/13.pl
-0BCD           TAMIL   # Tamil In/13.pl
-0BD7           TAMIL   # Tamil In/13.pl
-0BE7   0BEF    TAMIL   # Tamil In/13.pl
-0BF0   0BF2    TAMIL   # Tamil In/13.pl
-0C01   0C03    TELUGU  # Telugu In/14.pl
-0C05   0C0C    TELUGU  # Telugu In/14.pl
-0C0E   0C10    TELUGU  # Telugu In/14.pl
-0C12   0C28    TELUGU  # Telugu In/14.pl
-0C2A   0C33    TELUGU  # Telugu In/14.pl
-0C35   0C39    TELUGU  # Telugu In/14.pl
-0C3E   0C40    TELUGU  # Telugu In/14.pl
-0C41   0C44    TELUGU  # Telugu In/14.pl
-0C46   0C48    TELUGU  # Telugu In/14.pl
-0C4A   0C4D    TELUGU  # Telugu In/14.pl
-0C55   0C56    TELUGU  # Telugu In/14.pl
-0C60   0C61    TELUGU  # Telugu In/14.pl
-0C66   0C6F    TELUGU  # Telugu In/14.pl
-0C82   0C83    KANNADA # Kannada In/15.pl
-0C85   0C8C    KANNADA # Kannada In/15.pl
-0C8E   0C90    KANNADA # Kannada In/15.pl
-0C92   0CA8    KANNADA # Kannada In/15.pl
-0CAA   0CB3    KANNADA # Kannada In/15.pl
-0CB5   0CB9    KANNADA # Kannada In/15.pl
-0CBE           KANNADA # Kannada In/15.pl
-0CBF           KANNADA # Kannada In/15.pl
-0CC0   0CC4    KANNADA # Kannada In/15.pl
-0CC6           KANNADA # Kannada In/15.pl
-0CC7   0CC8    KANNADA # Kannada In/15.pl
-0CCA   0CCB    KANNADA # Kannada In/15.pl
-0CCC   0CCD    KANNADA # Kannada In/15.pl
-0CD5   0CD6    KANNADA # Kannada In/15.pl
-0CDE           KANNADA # Kannada In/15.pl
-0CE0   0CE1    KANNADA # Kannada In/15.pl
-0CE6   0CEF    KANNADA # Kannada In/15.pl
-0D02   0D03    MALAYALAM       # Malayalam In/16.pl
-0D05   0D0C    MALAYALAM       # Malayalam In/16.pl
-0D0E   0D10    MALAYALAM       # Malayalam In/16.pl
-0D12   0D28    MALAYALAM       # Malayalam In/16.pl
-0D2A   0D39    MALAYALAM       # Malayalam In/16.pl
-0D3E   0D40    MALAYALAM       # Malayalam In/16.pl
-0D41   0D43    MALAYALAM       # Malayalam In/16.pl
-0D46   0D48    MALAYALAM       # Malayalam In/16.pl
-0D4A   0D4C    MALAYALAM       # Malayalam In/16.pl
-0D4D           MALAYALAM       # Malayalam In/16.pl
-0D57           MALAYALAM       # Malayalam In/16.pl
-0D60   0D61    MALAYALAM       # Malayalam In/16.pl
-0D66   0D6F    MALAYALAM       # Malayalam In/16.pl
-0D82   0D83    SINHALA # Sinhala In/17.pl
-0D85   0D96    SINHALA # Sinhala In/17.pl
-0D9A   0DB1    SINHALA # Sinhala In/17.pl
-0DB3   0DBB    SINHALA # Sinhala In/17.pl
-0DBD           SINHALA # Sinhala In/17.pl
-0DC0   0DC6    SINHALA # Sinhala In/17.pl
-0DCA           SINHALA # Sinhala In/17.pl
-0DCF   0DD1    SINHALA # Sinhala In/17.pl
-0DD2   0DD4    SINHALA # Sinhala In/17.pl
-0DD6           SINHALA # Sinhala In/17.pl
-0DD8   0DDF    SINHALA # Sinhala In/17.pl
-0DF2   0DF3    SINHALA # Sinhala In/17.pl
-0E01   0E30    THAI    # Thai In/18.pl
-0E31           THAI    # Thai In/18.pl
-0E32   0E33    THAI    # Thai In/18.pl
-0E34   0E3A    THAI    # Thai In/18.pl
-0E40   0E45    THAI    # Thai In/18.pl
-0E46           THAI    # Thai In/18.pl
-0E47   0E4E    THAI    # Thai In/18.pl
-0E50   0E59    THAI    # Thai In/18.pl
-0E81   0E82    LAO     # Lao In/19.pl
-0E84           LAO     # Lao In/19.pl
-0E87   0E88    LAO     # Lao In/19.pl
-0E8A           LAO     # Lao In/19.pl
-0E8D           LAO     # Lao In/19.pl
-0E94   0E97    LAO     # Lao In/19.pl
-0E99   0E9F    LAO     # Lao In/19.pl
-0EA1   0EA3    LAO     # Lao In/19.pl
-0EA5           LAO     # Lao In/19.pl
-0EA7           LAO     # Lao In/19.pl
-0EAA   0EAB    LAO     # Lao In/19.pl
-0EAD   0EB0    LAO     # Lao In/19.pl
-0EB1           LAO     # Lao In/19.pl
-0EB2   0EB3    LAO     # Lao In/19.pl
-0EB4   0EB9    LAO     # Lao In/19.pl
-0EBB   0EBC    LAO     # Lao In/19.pl
-0EBD           LAO     # Lao In/19.pl
-0EC0   0EC4    LAO     # Lao In/19.pl
-0EC6           LAO     # Lao In/19.pl
-0EC8   0ECD    LAO     # Lao In/19.pl
-0ED0   0ED9    LAO     # Lao In/19.pl
-0EDC   0EDD    LAO     # Lao In/19.pl
-0F00           TIBETAN # Tibetan In/20.pl
-0F18   0F19    TIBETAN # Tibetan In/20.pl
-0F20   0F29    TIBETAN # Tibetan In/20.pl
-0F2A   0F33    TIBETAN # Tibetan In/20.pl
-0F35           TIBETAN # Tibetan In/20.pl
-0F37           TIBETAN # Tibetan In/20.pl
-0F39           TIBETAN # Tibetan In/20.pl
-0F40   0F47    TIBETAN # Tibetan In/20.pl
-0F49   0F6A    TIBETAN # Tibetan In/20.pl
-0F71   0F7E    TIBETAN # Tibetan In/20.pl
-0F7F           TIBETAN # Tibetan In/20.pl
-0F80   0F84    TIBETAN # Tibetan In/20.pl
-0F86   0F87    TIBETAN # Tibetan In/20.pl
-0F88   0F8B    TIBETAN # Tibetan In/20.pl
-0F90   0F97    TIBETAN # Tibetan In/20.pl
-0F99   0FBC    TIBETAN # Tibetan In/20.pl
-0FC6           TIBETAN # Tibetan In/20.pl
-1000   1021    MYANMAR # Myanmar In/21.pl
-1023   1027    MYANMAR # Myanmar In/21.pl
-1029   102A    MYANMAR # Myanmar In/21.pl
-102C           MYANMAR # Myanmar In/21.pl
-102D   1030    MYANMAR # Myanmar In/21.pl
-1031           MYANMAR # Myanmar In/21.pl
-1032           MYANMAR # Myanmar In/21.pl
-1036   1037    MYANMAR # Myanmar In/21.pl
-1038           MYANMAR # Myanmar In/21.pl
-1039           MYANMAR # Myanmar In/21.pl
-1040   1049    MYANMAR # Myanmar In/21.pl
-1050   1055    MYANMAR # Myanmar In/21.pl
-1056   1057    MYANMAR # Myanmar In/21.pl
-1058   1059    MYANMAR # Myanmar In/21.pl
-10A0   10C5    GEORGIAN        # Georgian In/22.pl
-10D0   10F6    GEORGIAN        # Georgian In/22.pl
-1100   1159    HANGUL  # Hangul In/23.pl
-115F   11A2    HANGUL  # Hangul In/23.pl
-11A8   11F9    HANGUL  # Hangul In/23.pl
-3131   318E    HANGUL  # Hangul In/23.pl
-AC00   D7A3    HANGUL  # Hangul In/23.pl
-FFA0   FFBE    HANGUL  # Hangul In/23.pl
-FFC2   FFC7    HANGUL  # Hangul In/23.pl
-FFCA   FFCF    HANGUL  # Hangul In/23.pl
-FFD2   FFD7    HANGUL  # Hangul In/23.pl
-FFDA   FFDC    HANGUL  # Hangul In/23.pl
-1200   1206    ETHIOPIC        # Ethiopic In/24.pl
-1208   1246    ETHIOPIC        # Ethiopic In/24.pl
-1248           ETHIOPIC        # Ethiopic In/24.pl
-124A   124D    ETHIOPIC        # Ethiopic In/24.pl
-1250   1256    ETHIOPIC        # Ethiopic In/24.pl
-1258           ETHIOPIC        # Ethiopic In/24.pl
-125A   125D    ETHIOPIC        # Ethiopic In/24.pl
-1260   1286    ETHIOPIC        # Ethiopic In/24.pl
-1288           ETHIOPIC        # Ethiopic In/24.pl
-128A   128D    ETHIOPIC        # Ethiopic In/24.pl
-1290   12AE    ETHIOPIC        # Ethiopic In/24.pl
-12B0           ETHIOPIC        # Ethiopic In/24.pl
-12B2   12B5    ETHIOPIC        # Ethiopic In/24.pl
-12B8   12BE    ETHIOPIC        # Ethiopic In/24.pl
-12C0           ETHIOPIC        # Ethiopic In/24.pl
-12C2   12C5    ETHIOPIC        # Ethiopic In/24.pl
-12C8   12CE    ETHIOPIC        # Ethiopic In/24.pl
-12D0   12D6    ETHIOPIC        # Ethiopic In/24.pl
-12D8   12EE    ETHIOPIC        # Ethiopic In/24.pl
-12F0   130E    ETHIOPIC        # Ethiopic In/24.pl
-1310           ETHIOPIC        # Ethiopic In/24.pl
-1312   1315    ETHIOPIC        # Ethiopic In/24.pl
-1318   131E    ETHIOPIC        # Ethiopic In/24.pl
-1320   1346    ETHIOPIC        # Ethiopic In/24.pl
-1348   135A    ETHIOPIC        # Ethiopic In/24.pl
-1369   1371    ETHIOPIC        # Ethiopic In/24.pl
-1372   137C    ETHIOPIC        # Ethiopic In/24.pl
-13A0   13F4    CHEROKEE        # Cherokee In/25.pl
-1401   166C    CANADIAN-ABORIGINAL     # CanadianAboriginal In/26.pl
-166F   1676    CANADIAN-ABORIGINAL     # CanadianAboriginal In/26.pl
-1681   169A    OGHAM   # Ogham In/27.pl
-16A0   16EA    RUNIC   # Runic In/28.pl
-16EE   16F0    RUNIC   # Runic In/28.pl
-1780   17B3    KHMER   # Khmer In/29.pl
-17B4   17B6    KHMER   # Khmer In/29.pl
-17B7   17BD    KHMER   # Khmer In/29.pl
-17BE   17C5    KHMER   # Khmer In/29.pl
-17C6           KHMER   # Khmer In/29.pl
-17C7   17C8    KHMER   # Khmer In/29.pl
-17C9   17D3    KHMER   # Khmer In/29.pl
-17E0   17E9    KHMER   # Khmer In/29.pl
-1810   1819    MONGOLIAN       # Mongolian In/30.pl
-1820   1842    MONGOLIAN       # Mongolian In/30.pl
-1843           MONGOLIAN       # Mongolian In/30.pl
-1844   1877    MONGOLIAN       # Mongolian In/30.pl
-1880   18A8    MONGOLIAN       # Mongolian In/30.pl
-18A9           MONGOLIAN       # Mongolian In/30.pl
-3041   3094    HIRAGANA        # Hiragana In/31.pl
-309D   309E    HIRAGANA        # Hiragana In/31.pl
-30A1   30FA    KATAKANA        # Katakana In/32.pl
-30FD   30FE    KATAKANA        # Katakana In/32.pl
-FF66   FF6F    KATAKANA        # Katakana In/32.pl
-FF71   FF9D    KATAKANA        # Katakana In/32.pl
-3105   312C    BOPOMOFO        # Bopomofo In/33.pl
-31A0   31B7    BOPOMOFO        # Bopomofo In/33.pl
-2E80   2E99    HAN     # Han In/34.pl
-2E9B   2EF3    HAN     # Han In/34.pl
-2F00   2FD5    HAN     # Han In/34.pl
-3005           HAN     # Han In/34.pl
-3007           HAN     # Han In/34.pl
-3021   3029    HAN     # Han In/34.pl
-3038   303A    HAN     # Han In/34.pl
-3400   4DB5    HAN     # Han In/34.pl
-4E00   9FA5    HAN     # Han In/34.pl
-F900   FA2D    HAN     # Han In/34.pl
-20000  2A6D6   HAN     # Han In/34.pl
-2F800  2FA1D   HAN     # Han In/34.pl
-A000   A48C    YI      # Yi In/35.pl
-A490   A4A1    YI      # Yi In/35.pl
-A4A4   A4B3    YI      # Yi In/35.pl
-A4B5   A4C0    YI      # Yi In/35.pl
-A4C2   A4C4    YI      # Yi In/35.pl
-A4C6           YI      # Yi In/35.pl
-10300  1031E   OLD-ITALIC      # OldItalic In/36.pl
-10330  10349   GOTHIC  # Gothic In/37.pl
-1034A          GOTHIC  # Gothic In/37.pl
-10400  10425   DESERET # Deseret In/38.pl
-10428  1044D   DESERET # Deseret In/38.pl
-0300   034E    INHERITED       # Inherited In/39.pl
-0360   0362    INHERITED       # Inherited In/39.pl
-0488   0489    INHERITED       # Inherited In/39.pl
-0591   05A1    INHERITED       # Inherited In/39.pl
-05A3   05B9    INHERITED       # Inherited In/39.pl
-05BB   05BD    INHERITED       # Inherited In/39.pl
-05BF           INHERITED       # Inherited In/39.pl
-05C1   05C2    INHERITED       # Inherited In/39.pl
-05C4           INHERITED       # Inherited In/39.pl
-064B   0655    INHERITED       # Inherited In/39.pl
-0670           INHERITED       # Inherited In/39.pl
-06D6   06DC    INHERITED       # Inherited In/39.pl
-06DD   06DE    INHERITED       # Inherited In/39.pl
-06DF   06E4    INHERITED       # Inherited In/39.pl
-06E7   06E8    INHERITED       # Inherited In/39.pl
-06EA   06ED    INHERITED       # Inherited In/39.pl
-20D0   20DC    INHERITED       # Inherited In/39.pl
-20DD   20E0    INHERITED       # Inherited In/39.pl
-20E1           INHERITED       # Inherited In/39.pl
-20E2   20E3    INHERITED       # Inherited In/39.pl
-302A   302F    INHERITED       # Inherited In/39.pl
-3099   309A    INHERITED       # Inherited In/39.pl
-FB1E           INHERITED       # Inherited In/39.pl
-FE20   FE23    INHERITED       # Inherited In/39.pl
-1D167  1D169   INHERITED       # Inherited In/39.pl
-1D17B  1D182   INHERITED       # Inherited In/39.pl
-1D185  1D18B   INHERITED       # Inherited In/39.pl
-1D1AA  1D1AD   INHERITED       # Inherited In/39.pl
+0041   005A    LATIN   # In/0.pl
+0061   007A    LATIN   # In/0.pl
+00AA           LATIN   # In/0.pl
+00BA           LATIN   # In/0.pl
+00C0   00D6    LATIN   # In/0.pl
+00D8   00F6    LATIN   # In/0.pl
+00F8   01BA    LATIN   # In/0.pl
+01BB           LATIN   # In/0.pl
+01BC   01BF    LATIN   # In/0.pl
+01C0   01C3    LATIN   # In/0.pl
+01C4   021F    LATIN   # In/0.pl
+0222   0233    LATIN   # In/0.pl
+0250   02AD    LATIN   # In/0.pl
+02B0   02B8    LATIN   # In/0.pl
+02E0   02E4    LATIN   # In/0.pl
+1E00   1E9B    LATIN   # In/0.pl
+1EA0   1EF9    LATIN   # In/0.pl
+207F           LATIN   # In/0.pl
+212A   212B    LATIN   # In/0.pl
+FB00   FB06    LATIN   # In/0.pl
+FF21   FF3A    LATIN   # In/0.pl
+FF41   FF5A    LATIN   # In/0.pl
+00B5           GREEK   # In/1.pl
+037A           GREEK   # In/1.pl
+0386           GREEK   # In/1.pl
+0388   038A    GREEK   # In/1.pl
+038C           GREEK   # In/1.pl
+038E   03A1    GREEK   # In/1.pl
+03A3   03CE    GREEK   # In/1.pl
+03D0   03D7    GREEK   # In/1.pl
+03DA   03F5    GREEK   # In/1.pl
+1F00   1F15    GREEK   # In/1.pl
+1F18   1F1D    GREEK   # In/1.pl
+1F20   1F45    GREEK   # In/1.pl
+1F48   1F4D    GREEK   # In/1.pl
+1F50   1F57    GREEK   # In/1.pl
+1F59           GREEK   # In/1.pl
+1F5B           GREEK   # In/1.pl
+1F5D           GREEK   # In/1.pl
+1F5F   1F7D    GREEK   # In/1.pl
+1F80   1FB4    GREEK   # In/1.pl
+1FB6   1FBC    GREEK   # In/1.pl
+1FBE           GREEK   # In/1.pl
+1FC2   1FC4    GREEK   # In/1.pl
+1FC6   1FCC    GREEK   # In/1.pl
+1FD0   1FD3    GREEK   # In/1.pl
+1FD6   1FDB    GREEK   # In/1.pl
+1FE0   1FEC    GREEK   # In/1.pl
+1FF2   1FF4    GREEK   # In/1.pl
+1FF6   1FFC    GREEK   # In/1.pl
+2126           GREEK   # In/1.pl
+0400   0481    CYRILLIC        # In/2.pl
+0483   0486    CYRILLIC        # In/2.pl
+048C   04C4    CYRILLIC        # In/2.pl
+04C7   04C8    CYRILLIC        # In/2.pl
+04CB   04CC    CYRILLIC        # In/2.pl
+04D0   04F5    CYRILLIC        # In/2.pl
+04F8   04F9    CYRILLIC        # In/2.pl
+0531   0556    ARMENIAN        # In/3.pl
+0559           ARMENIAN        # In/3.pl
+0561   0587    ARMENIAN        # In/3.pl
+FB13   FB17    ARMENIAN        # In/3.pl
+05D0   05EA    HEBREW  # In/4.pl
+05F0   05F2    HEBREW  # In/4.pl
+FB1D           HEBREW  # In/4.pl
+FB1F   FB28    HEBREW  # In/4.pl
+FB2A   FB36    HEBREW  # In/4.pl
+FB38   FB3C    HEBREW  # In/4.pl
+FB3E           HEBREW  # In/4.pl
+FB40   FB41    HEBREW  # In/4.pl
+FB43   FB44    HEBREW  # In/4.pl
+FB46   FB4F    HEBREW  # In/4.pl
+0621   063A    ARABIC  # In/5.pl
+0641   064A    ARABIC  # In/5.pl
+0671   06D3    ARABIC  # In/5.pl
+06D5           ARABIC  # In/5.pl
+06E5   06E6    ARABIC  # In/5.pl
+06FA   06FC    ARABIC  # In/5.pl
+FB50   FBB1    ARABIC  # In/5.pl
+FBD3   FD3D    ARABIC  # In/5.pl
+FD50   FD8F    ARABIC  # In/5.pl
+FD92   FDC7    ARABIC  # In/5.pl
+FDF0   FDFB    ARABIC  # In/5.pl
+FE70   FE72    ARABIC  # In/5.pl
+FE74           ARABIC  # In/5.pl
+FE76   FEFC    ARABIC  # In/5.pl
+0710           SYRIAC  # In/6.pl
+0711           SYRIAC  # In/6.pl
+0712   072C    SYRIAC  # In/6.pl
+0730   074A    SYRIAC  # In/6.pl
+0780   07A5    THAANA  # In/7.pl
+07A6   07B0    THAANA  # In/7.pl
+0901   0902    DEVANAGARI      # In/8.pl
+0903           DEVANAGARI      # In/8.pl
+0905   0939    DEVANAGARI      # In/8.pl
+093C           DEVANAGARI      # In/8.pl
+093D           DEVANAGARI      # In/8.pl
+093E   0940    DEVANAGARI      # In/8.pl
+0941   0948    DEVANAGARI      # In/8.pl
+0949   094C    DEVANAGARI      # In/8.pl
+094D           DEVANAGARI      # In/8.pl
+0950           DEVANAGARI      # In/8.pl
+0951   0954    DEVANAGARI      # In/8.pl
+0958   0961    DEVANAGARI      # In/8.pl
+0962   0963    DEVANAGARI      # In/8.pl
+0966   096F    DEVANAGARI      # In/8.pl
+0981           BENGALI # In/9.pl
+0985   098C    BENGALI # In/9.pl
+098F   0990    BENGALI # In/9.pl
+0993   09A8    BENGALI # In/9.pl
+09AA   09B0    BENGALI # In/9.pl
+09B2           BENGALI # In/9.pl
+09B6   09B9    BENGALI # In/9.pl
+09BC           BENGALI # In/9.pl
+09BE   09C0    BENGALI # In/9.pl
+09C1   09C4    BENGALI # In/9.pl
+09C7   09C8    BENGALI # In/9.pl
+09CB   09CC    BENGALI # In/9.pl
+09CD           BENGALI # In/9.pl
+09D7           BENGALI # In/9.pl
+09DC   09DD    BENGALI # In/9.pl
+09DF   09E1    BENGALI # In/9.pl
+09E2   09E3    BENGALI # In/9.pl
+09E6   09EF    BENGALI # In/9.pl
+09F0   09F1    BENGALI # In/9.pl
+0A02           GURMUKHI        # In/10.pl
+0A05   0A0A    GURMUKHI        # In/10.pl
+0A0F   0A10    GURMUKHI        # In/10.pl
+0A13   0A28    GURMUKHI        # In/10.pl
+0A2A   0A30    GURMUKHI        # In/10.pl
+0A32   0A33    GURMUKHI        # In/10.pl
+0A35   0A36    GURMUKHI        # In/10.pl
+0A38   0A39    GURMUKHI        # In/10.pl
+0A3C           GURMUKHI        # In/10.pl
+0A3E   0A40    GURMUKHI        # In/10.pl
+0A41   0A42    GURMUKHI        # In/10.pl
+0A47   0A48    GURMUKHI        # In/10.pl
+0A4B   0A4D    GURMUKHI        # In/10.pl
+0A59   0A5C    GURMUKHI        # In/10.pl
+0A5E           GURMUKHI        # In/10.pl
+0A66   0A6F    GURMUKHI        # In/10.pl
+0A70   0A71    GURMUKHI        # In/10.pl
+0A72   0A74    GURMUKHI        # In/10.pl
+0A81   0A82    GUJARATI        # In/11.pl
+0A83           GUJARATI        # In/11.pl
+0A85   0A8B    GUJARATI        # In/11.pl
+0A8D           GUJARATI        # In/11.pl
+0A8F   0A91    GUJARATI        # In/11.pl
+0A93   0AA8    GUJARATI        # In/11.pl
+0AAA   0AB0    GUJARATI        # In/11.pl
+0AB2   0AB3    GUJARATI        # In/11.pl
+0AB5   0AB9    GUJARATI        # In/11.pl
+0ABC           GUJARATI        # In/11.pl
+0ABD           GUJARATI        # In/11.pl
+0ABE   0AC0    GUJARATI        # In/11.pl
+0AC1   0AC5    GUJARATI        # In/11.pl
+0AC7   0AC8    GUJARATI        # In/11.pl
+0AC9           GUJARATI        # In/11.pl
+0ACB   0ACC    GUJARATI        # In/11.pl
+0ACD           GUJARATI        # In/11.pl
+0AD0           GUJARATI        # In/11.pl
+0AE0           GUJARATI        # In/11.pl
+0AE6   0AEF    GUJARATI        # In/11.pl
+0B01           ORIYA   # In/12.pl
+0B02   0B03    ORIYA   # In/12.pl
+0B05   0B0C    ORIYA   # In/12.pl
+0B0F   0B10    ORIYA   # In/12.pl
+0B13   0B28    ORIYA   # In/12.pl
+0B2A   0B30    ORIYA   # In/12.pl
+0B32   0B33    ORIYA   # In/12.pl
+0B36   0B39    ORIYA   # In/12.pl
+0B3C           ORIYA   # In/12.pl
+0B3D           ORIYA   # In/12.pl
+0B3E           ORIYA   # In/12.pl
+0B3F           ORIYA   # In/12.pl
+0B40           ORIYA   # In/12.pl
+0B41   0B43    ORIYA   # In/12.pl
+0B47   0B48    ORIYA   # In/12.pl
+0B4B   0B4C    ORIYA   # In/12.pl
+0B4D           ORIYA   # In/12.pl
+0B56           ORIYA   # In/12.pl
+0B57           ORIYA   # In/12.pl
+0B5C   0B5D    ORIYA   # In/12.pl
+0B5F   0B61    ORIYA   # In/12.pl
+0B66   0B6F    ORIYA   # In/12.pl
+0B82           TAMIL   # In/13.pl
+0B83           TAMIL   # In/13.pl
+0B85   0B8A    TAMIL   # In/13.pl
+0B8E   0B90    TAMIL   # In/13.pl
+0B92   0B95    TAMIL   # In/13.pl
+0B99   0B9A    TAMIL   # In/13.pl
+0B9C           TAMIL   # In/13.pl
+0B9E   0B9F    TAMIL   # In/13.pl
+0BA3   0BA4    TAMIL   # In/13.pl
+0BA8   0BAA    TAMIL   # In/13.pl
+0BAE   0BB5    TAMIL   # In/13.pl
+0BB7   0BB9    TAMIL   # In/13.pl
+0BBE   0BBF    TAMIL   # In/13.pl
+0BC0           TAMIL   # In/13.pl
+0BC1   0BC2    TAMIL   # In/13.pl
+0BC6   0BC8    TAMIL   # In/13.pl
+0BCA   0BCC    TAMIL   # In/13.pl
+0BCD           TAMIL   # In/13.pl
+0BD7           TAMIL   # In/13.pl
+0BE7   0BEF    TAMIL   # In/13.pl
+0BF0   0BF2    TAMIL   # In/13.pl
+0C01   0C03    TELUGU  # In/14.pl
+0C05   0C0C    TELUGU  # In/14.pl
+0C0E   0C10    TELUGU  # In/14.pl
+0C12   0C28    TELUGU  # In/14.pl
+0C2A   0C33    TELUGU  # In/14.pl
+0C35   0C39    TELUGU  # In/14.pl
+0C3E   0C40    TELUGU  # In/14.pl
+0C41   0C44    TELUGU  # In/14.pl
+0C46   0C48    TELUGU  # In/14.pl
+0C4A   0C4D    TELUGU  # In/14.pl
+0C55   0C56    TELUGU  # In/14.pl
+0C60   0C61    TELUGU  # In/14.pl
+0C66   0C6F    TELUGU  # In/14.pl
+0C82   0C83    KANNADA # In/15.pl
+0C85   0C8C    KANNADA # In/15.pl
+0C8E   0C90    KANNADA # In/15.pl
+0C92   0CA8    KANNADA # In/15.pl
+0CAA   0CB3    KANNADA # In/15.pl
+0CB5   0CB9    KANNADA # In/15.pl
+0CBE           KANNADA # In/15.pl
+0CBF           KANNADA # In/15.pl
+0CC0   0CC4    KANNADA # In/15.pl
+0CC6           KANNADA # In/15.pl
+0CC7   0CC8    KANNADA # In/15.pl
+0CCA   0CCB    KANNADA # In/15.pl
+0CCC   0CCD    KANNADA # In/15.pl
+0CD5   0CD6    KANNADA # In/15.pl
+0CDE           KANNADA # In/15.pl
+0CE0   0CE1    KANNADA # In/15.pl
+0CE6   0CEF    KANNADA # In/15.pl
+0D02   0D03    MALAYALAM       # In/16.pl
+0D05   0D0C    MALAYALAM       # In/16.pl
+0D0E   0D10    MALAYALAM       # In/16.pl
+0D12   0D28    MALAYALAM       # In/16.pl
+0D2A   0D39    MALAYALAM       # In/16.pl
+0D3E   0D40    MALAYALAM       # In/16.pl
+0D41   0D43    MALAYALAM       # In/16.pl
+0D46   0D48    MALAYALAM       # In/16.pl
+0D4A   0D4C    MALAYALAM       # In/16.pl
+0D4D           MALAYALAM       # In/16.pl
+0D57           MALAYALAM       # In/16.pl
+0D60   0D61    MALAYALAM       # In/16.pl
+0D66   0D6F    MALAYALAM       # In/16.pl
+0D82   0D83    SINHALA # In/17.pl
+0D85   0D96    SINHALA # In/17.pl
+0D9A   0DB1    SINHALA # In/17.pl
+0DB3   0DBB    SINHALA # In/17.pl
+0DBD           SINHALA # In/17.pl
+0DC0   0DC6    SINHALA # In/17.pl
+0DCA           SINHALA # In/17.pl
+0DCF   0DD1    SINHALA # In/17.pl
+0DD2   0DD4    SINHALA # In/17.pl
+0DD6           SINHALA # In/17.pl
+0DD8   0DDF    SINHALA # In/17.pl
+0DF2   0DF3    SINHALA # In/17.pl
+0E01   0E30    THAI    # In/18.pl
+0E31           THAI    # In/18.pl
+0E32   0E33    THAI    # In/18.pl
+0E34   0E3A    THAI    # In/18.pl
+0E40   0E45    THAI    # In/18.pl
+0E46           THAI    # In/18.pl
+0E47   0E4E    THAI    # In/18.pl
+0E50   0E59    THAI    # In/18.pl
+0E81   0E82    LAO     # In/19.pl
+0E84           LAO     # In/19.pl
+0E87   0E88    LAO     # In/19.pl
+0E8A           LAO     # In/19.pl
+0E8D           LAO     # In/19.pl
+0E94   0E97    LAO     # In/19.pl
+0E99   0E9F    LAO     # In/19.pl
+0EA1   0EA3    LAO     # In/19.pl
+0EA5           LAO     # In/19.pl
+0EA7           LAO     # In/19.pl
+0EAA   0EAB    LAO     # In/19.pl
+0EAD   0EB0    LAO     # In/19.pl
+0EB1           LAO     # In/19.pl
+0EB2   0EB3    LAO     # In/19.pl
+0EB4   0EB9    LAO     # In/19.pl
+0EBB   0EBC    LAO     # In/19.pl
+0EBD           LAO     # In/19.pl
+0EC0   0EC4    LAO     # In/19.pl
+0EC6           LAO     # In/19.pl
+0EC8   0ECD    LAO     # In/19.pl
+0ED0   0ED9    LAO     # In/19.pl
+0EDC   0EDD    LAO     # In/19.pl
+0F00           TIBETAN # In/20.pl
+0F18   0F19    TIBETAN # In/20.pl
+0F20   0F29    TIBETAN # In/20.pl
+0F2A   0F33    TIBETAN # In/20.pl
+0F35           TIBETAN # In/20.pl
+0F37           TIBETAN # In/20.pl
+0F39           TIBETAN # In/20.pl
+0F40   0F47    TIBETAN # In/20.pl
+0F49   0F6A    TIBETAN # In/20.pl
+0F71   0F7E    TIBETAN # In/20.pl
+0F7F           TIBETAN # In/20.pl
+0F80   0F84    TIBETAN # In/20.pl
+0F86   0F87    TIBETAN # In/20.pl
+0F88   0F8B    TIBETAN # In/20.pl
+0F90   0F97    TIBETAN # In/20.pl
+0F99   0FBC    TIBETAN # In/20.pl
+0FC6           TIBETAN # In/20.pl
+1000   1021    MYANMAR # In/21.pl
+1023   1027    MYANMAR # In/21.pl
+1029   102A    MYANMAR # In/21.pl
+102C           MYANMAR # In/21.pl
+102D   1030    MYANMAR # In/21.pl
+1031           MYANMAR # In/21.pl
+1032           MYANMAR # In/21.pl
+1036   1037    MYANMAR # In/21.pl
+1038           MYANMAR # In/21.pl
+1039           MYANMAR # In/21.pl
+1040   1049    MYANMAR # In/21.pl
+1050   1055    MYANMAR # In/21.pl
+1056   1057    MYANMAR # In/21.pl
+1058   1059    MYANMAR # In/21.pl
+10A0   10C5    GEORGIAN        # In/22.pl
+10D0   10F6    GEORGIAN        # In/22.pl
+1100   1159    HANGUL  # In/23.pl
+115F   11A2    HANGUL  # In/23.pl
+11A8   11F9    HANGUL  # In/23.pl
+3131   318E    HANGUL  # In/23.pl
+AC00   D7A3    HANGUL  # In/23.pl
+FFA0   FFBE    HANGUL  # In/23.pl
+FFC2   FFC7    HANGUL  # In/23.pl
+FFCA   FFCF    HANGUL  # In/23.pl
+FFD2   FFD7    HANGUL  # In/23.pl
+FFDA   FFDC    HANGUL  # In/23.pl
+1200   1206    ETHIOPIC        # In/24.pl
+1208   1246    ETHIOPIC        # In/24.pl
+1248           ETHIOPIC        # In/24.pl
+124A   124D    ETHIOPIC        # In/24.pl
+1250   1256    ETHIOPIC        # In/24.pl
+1258           ETHIOPIC        # In/24.pl
+125A   125D    ETHIOPIC        # In/24.pl
+1260   1286    ETHIOPIC        # In/24.pl
+1288           ETHIOPIC        # In/24.pl
+128A   128D    ETHIOPIC        # In/24.pl
+1290   12AE    ETHIOPIC        # In/24.pl
+12B0           ETHIOPIC        # In/24.pl
+12B2   12B5    ETHIOPIC        # In/24.pl
+12B8   12BE    ETHIOPIC        # In/24.pl
+12C0           ETHIOPIC        # In/24.pl
+12C2   12C5    ETHIOPIC        # In/24.pl
+12C8   12CE    ETHIOPIC        # In/24.pl
+12D0   12D6    ETHIOPIC        # In/24.pl
+12D8   12EE    ETHIOPIC        # In/24.pl
+12F0   130E    ETHIOPIC        # In/24.pl
+1310           ETHIOPIC        # In/24.pl
+1312   1315    ETHIOPIC        # In/24.pl
+1318   131E    ETHIOPIC        # In/24.pl
+1320   1346    ETHIOPIC        # In/24.pl
+1348   135A    ETHIOPIC        # In/24.pl
+1369   1371    ETHIOPIC        # In/24.pl
+1372   137C    ETHIOPIC        # In/24.pl
+13A0   13F4    CHEROKEE        # In/25.pl
+1401   166C    CANADIAN-ABORIGINAL     # In/26.pl
+166F   1676    CANADIAN-ABORIGINAL     # In/26.pl
+1681   169A    OGHAM   # In/27.pl
+16A0   16EA    RUNIC   # In/28.pl
+16EE   16F0    RUNIC   # In/28.pl
+1780   17B3    KHMER   # In/29.pl
+17B4   17B6    KHMER   # In/29.pl
+17B7   17BD    KHMER   # In/29.pl
+17BE   17C5    KHMER   # In/29.pl
+17C6           KHMER   # In/29.pl
+17C7   17C8    KHMER   # In/29.pl
+17C9   17D3    KHMER   # In/29.pl
+17E0   17E9    KHMER   # In/29.pl
+1810   1819    MONGOLIAN       # In/30.pl
+1820   1842    MONGOLIAN       # In/30.pl
+1843           MONGOLIAN       # In/30.pl
+1844   1877    MONGOLIAN       # In/30.pl
+1880   18A8    MONGOLIAN       # In/30.pl
+18A9           MONGOLIAN       # In/30.pl
+3041   3094    HIRAGANA        # In/31.pl
+309D   309E    HIRAGANA        # In/31.pl
+30A1   30FA    KATAKANA        # In/32.pl
+30FD   30FE    KATAKANA        # In/32.pl
+FF66   FF6F    KATAKANA        # In/32.pl
+FF71   FF9D    KATAKANA        # In/32.pl
+3105   312C    BOPOMOFO        # In/33.pl
+31A0   31B7    BOPOMOFO        # In/33.pl
+2E80   2E99    HAN     # In/34.pl
+2E9B   2EF3    HAN     # In/34.pl
+2F00   2FD5    HAN     # In/34.pl
+3005           HAN     # In/34.pl
+3007           HAN     # In/34.pl
+3021   3029    HAN     # In/34.pl
+3038   303A    HAN     # In/34.pl
+3400   4DB5    HAN     # In/34.pl
+4E00   9FA5    HAN     # In/34.pl
+F900   FA2D    HAN     # In/34.pl
+20000  2A6D6   HAN     # In/34.pl
+2F800  2FA1D   HAN     # In/34.pl
+A000   A48C    YI      # In/35.pl
+A490   A4A1    YI      # In/35.pl
+A4A4   A4B3    YI      # In/35.pl
+A4B5   A4C0    YI      # In/35.pl
+A4C2   A4C4    YI      # In/35.pl
+A4C6           YI      # In/35.pl
+10300  1031E   OLD-ITALIC      # In/36.pl
+10330  10349   GOTHIC  # In/37.pl
+1034A          GOTHIC  # In/37.pl
+10400  10425   DESERET # In/38.pl
+10428  1044D   DESERET # In/38.pl
+0300   034E    INHERITED       # In/39.pl
+0360   0362    INHERITED       # In/39.pl
+0488   0489    INHERITED       # In/39.pl
+0591   05A1    INHERITED       # In/39.pl
+05A3   05B9    INHERITED       # In/39.pl
+05BB   05BD    INHERITED       # In/39.pl
+05BF           INHERITED       # In/39.pl
+05C1   05C2    INHERITED       # In/39.pl
+05C4           INHERITED       # In/39.pl
+064B   0655    INHERITED       # In/39.pl
+0670           INHERITED       # In/39.pl
+06D6   06DC    INHERITED       # In/39.pl
+06DD   06DE    INHERITED       # In/39.pl
+06DF   06E4    INHERITED       # In/39.pl
+06E7   06E8    INHERITED       # In/39.pl
+06EA   06ED    INHERITED       # In/39.pl
+20D0   20DC    INHERITED       # In/39.pl
+20DD   20E0    INHERITED       # In/39.pl
+20E1           INHERITED       # In/39.pl
+20E2   20E3    INHERITED       # In/39.pl
+302A   302F    INHERITED       # In/39.pl
+3099   309A    INHERITED       # In/39.pl
+FB1E           INHERITED       # In/39.pl
+FE20   FE23    INHERITED       # In/39.pl
+1D167  1D169   INHERITED       # In/39.pl
+1D17B  1D182   INHERITED       # In/39.pl
+1D185  1D18B   INHERITED       # In/39.pl
+1D1AA  1D1AD   INHERITED       # In/39.pl
 END
index f86ff69..642c66f 100755 (executable)
@@ -231,7 +231,8 @@ mkdir "To", 0755;
 
 # This is not written for speed...
 
-my %InId;
+my %InIdScript;
+my %InIdBlock;
 my $InId = 0;
 
 foreach $file (@todo) {
@@ -258,9 +259,6 @@ END
     close OUT;
 }
 
-# Do Scripts before Blocks so that in case of naming conflicts
-# the more natural one (Script) wins over the artificial one (Block).
-
 print "Scripts\n";
 open(UD, 'Scripts.txt') or die "Can't open Scripts.txt: $!\n";
 open(OUT, ">Scripts.pl") or die "Can't create Scripts.pl: $!\n";
@@ -281,13 +279,11 @@ while (<UD>) {
     chomp;
     ($code, $last, $name) = /^([0-9a-f]+)(?:\.\.([0-9a-f]+))?\s+;\s+(.+)\s+\#/i;
     if ($name) {
-       my $InName = lc($name);
-       $InName =~ s/\b(\w)/uc($1)/ge;
-       $InName =~ s/\W+//g;
+       my $InName = $name;
        my $id;
-        unless (exists $InId{$InName}) {
+        unless (exists $InIdScript{$InName}) {
            print "\t$InName\n";
-           $id = $Scripts{$InName} = $InId{$InName} = $InId++;
+           $id = $Scripts{$InName} = $InIdScript{$InName} = $InId++;
            open(SCRIPT, ">In/$id.pl") or die "create In/$id.pl: $!\n";
            print SCRIPT <<EOH;
 # !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!! 
@@ -297,10 +293,10 @@ return <<'END';
 EOH
             close(SCRIPT);
        } else {
-           $id = $InId{$InName};
+           $id = $InIdScript{$InName};
        }
        $last = "" unless defined $last;
-       print OUT "$code\t$last\t$name\t# $InName In/$id.pl\n";
+       print OUT "$code\t$last\t$name\t# In/$id.pl\n";
         open(SCRIPT, ">>In/$id.pl");
        print SCRIPT <<END;
 $code  $last
@@ -309,7 +305,7 @@ END
     }
 }
 
-for my $id (values %InId) {
+for my $id (values %InIdScript) {
     open(SCRIPT, ">>In/$id.pl");
     print SCRIPT <<END2;
 END
@@ -339,22 +335,18 @@ while (<UD>) {
     next if /^#/;
     next if /^$/;
     chomp;
-    ($code, $last, $name) = /^([0-9a-f]+)\.\.([0-9a-f]+); (.+)/i;
+    ($code, $last, $name) = /^([0-9a-f]+)\.\.([0-9a-f]+); (.+?)\s*$/i;
     if ($name) {
        my $InName = $name;
-       $InName =~ s/\W+//g;
        print "\t$InName\n";
        my $id;
        # TODO: only the first one of Private Use blocks qualifies
-        unless (exists $InId{$InName}) {
-           $InId{$InName} = $InId++;
-       } elsif (exists $Scripts{$InName}) {
-           $InName .= 'Block';
-           $InId{$InName} = $InId++;
+        unless (exists $InIdBlock{$InName}) {
+           $InIdBlock{$InName} = $InId++;
        }
-       $id = $InId{$InName};
+       $id = $InIdBlock{$InName};
        open(BLOCK, ">In/$id.pl") or die "create In/$id.pl: $!\n";
-       print OUT "$code\t$last\t$name\t# $InName In/$id.pl\n";
+       print OUT "$code\t$last\t$name\t# In/$id.pl\n";
        print BLOCK <<EOH;
 # !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!! 
 # This file is built by $0 from e.g. $UnicodeData.
@@ -381,9 +373,57 @@ print INID <<EOH;
 %utf8::In = (
 EOH
 
-# Order doesn't matter but let's prettyprint anyway.
-foreach my $in (sort { $InId{$a} <=> $InId{$b} } keys %InId) {
-    printf INID "%-40s => %3d,\n", "'$in'", $InId{$in};
+my %InIdScriptById = reverse %InIdScript;
+my %InIdBlockById  = reverse %InIdBlock;
+
+my @InIdScriptById = sort { $a <=> $b } keys %InIdScriptById;
+my @InIdBlockById  = sort { $a <=> $b } keys %InIdBlockById;
+
+my %InId;
+my %IdIdLcName;
+
+for my $id (@InIdScriptById) {
+    my $name = $InIdScriptById{$id};
+    my $lcname = lc($name);
+    $InId{$name} = $id;
+    $IdIdLcName{$lcname} = $id;
+}
+
+for my $id (@InIdBlockById) {
+    my $name = $InIdBlockById{$id};
+    my $lcname = lc($name);
+    if (exists $IdIdLcName{$lcname}) {
+       $InId{"$name Block"} = $id;
+    } else {
+       $InId{$name} = $id;
+    }
+    $IdIdLcName{$lcname} = $id;
+}
+
+my @InId = sort { $InId{$a} <=> $InId{$b} } keys %InId;
+
+my %InIdPrefix;
+
+foreach my $in (@InId) {
+    my $inpat = $in;
+    $inpat =~ s/([- ])/[- _]?/g;
+    push @{$InIdPrefix{lc(substr($in, 0, 3))}}, [ $in, $inpat ];
+    printf INID "%-45s => %3d,\n", "'$in'", $InId{$in};
+}
+
+print INID ");\n";
+
+print INID <<EOH;
+%utf8::InPat = (
+EOH
+
+foreach my $prefix (sort keys %InIdPrefix) {
+    printf INID "'$prefix' => {\n";
+    foreach my $ininpat (@{$InIdPrefix{$prefix}}) {
+       my ($in, $inpat) = @$ininpat;
+       printf INID "\t'$inpat' => '$in',\n";
+    }
+    printf INID "},\n";
 }
 
 print INID ");\n";
index a90e24c..e8cf0cc 100644 (file)
@@ -26,11 +26,20 @@ sub SWASHNEW {
     while (($caller = caller($i)) eq __PACKAGE__) { $i++ }
     my $encoding = $enc{$caller} || "unicore";
     (my $file = $type) =~ s!::!/!g;
-    if ($file =~ /^In(.+)/) {
+    if ($file =~ /^In[- ]?(.+)/i) {
        my $In = $1;
        defined %utf8::In || do "$encoding/In.pl";
-       if (exists $utf8::In{$In}) {
-           $file = "$encoding/In/$utf8::In{$In}";
+       my $prefix = substr(lc($In), 0, 3);
+       if (exists $utf8::InPat{$prefix}) {
+           for my $k (keys %{$utf8::InPat{$prefix}}) {
+               if ($In =~ /^$k$/i) {
+                   $In = $utf8::InPat{$prefix}->{$k};
+                   if (exists $utf8::In{$In}) {
+                       $file = "$encoding/In/$utf8::In{$In}";
+                       last;
+                   }
+               }
+           }
        }
     } else {
        $file =~ s#^(Is|To)([A-Z].*)#$1/$2#;
@@ -43,7 +52,7 @@ sub SWASHNEW {
            || do "$file.pl"
            || do "$encoding/$file.pl"
            || do "$encoding/Is/${type}.pl"
-           || croak("Can't find $encoding character property \"$type\"");
+           || croak("Can't find Unicode character property \"$type\"");
     }
 
     $| = 1;
index 63ad011..f27173c 100644 (file)
@@ -169,9 +169,10 @@ character with the Unicode uppercase property, while C<\p{M}> matches
 any mark character.  Single letter properties may omit the brackets,
 so that can be written C<\pM> also.  Many predefined character classes
 are available, such as C<\p{IsMirrored}> and C<\p{InTibetan}>.  The
-names of the C<In> classes are the official Unicode script and block
-names but with all non-alphanumeric characters removed, for example
-the block name C<"Latin-1 Supplement"> becomes C<\p{InLatin1Supplement}>.
+recommended names of the C<In> classes are the official Unicode script
+and block names but with all non-alphanumeric characters removed, for
+example the block name C<"Latin-1 Supplement"> becomes
+C<\p{InLatin1Supplement}>.
 
 Here is the list as of Unicode 3.1.0 (the two-letter classes) and
 as defined by Perl (the one-letter classes) (in Unicode materials
index 2042f39..f5a2edd 100755 (executable)
@@ -6,7 +6,7 @@
 
 $| = 1;
 
-print "1..715\n";
+print "1..716\n";
 
 BEGIN {
     chdir 't' if -d 't';
@@ -2121,9 +2121,13 @@ sub ok ($$) {
 }
 
 {
-  # high bit bug -- japhy
-  my $x = "ab\200d";
-  $x =~ /.*?\200/ or print "not ";
-  print "ok 715\n";
+    # high bit bug -- japhy
+    my $x = "ab\200d";
+    $x =~ /.*?\200/ or print "not ";
+    print "ok 715\n";
 }
 
+{
+    print "not " unless "\x80" =~ /\p{in-latin1_SUPPLEMENT}/;
+    print "ok 716\n";
+}