From: Jarkko Hietaniemi Date: Sat, 28 Apr 2001 17:18:26 +0000 (+0000) Subject: Add a level of indirection to the implementation of \p{InFoo} X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=9fdf68be0731526d7d18a86c19f0b68d4946728d;p=p5sagit%2Fp5-mst-13.2.git Add a level of indirection to the implementation of \p{InFoo} so that we don't have to have long filenames. (Nothing changes in the user interface.) The indirection is defined in the file lib/unicode/In.pl and it is handled in lib/utf8_heavy.pl. Also rename some the character classes by removing '-' from the classnames, and finally renamed Block.pl as Blocks.pl. p4raw-id: //depot/perl@9897 --- diff --git a/MANIFEST b/MANIFEST index 33e69eb..988302e 100644 --- a/MANIFEST +++ b/MANIFEST @@ -888,7 +888,7 @@ lib/unicode/ArabLnkGrp.pl Unicode character database lib/unicode/ArabShap.txt Unicode character database lib/unicode/BidiMirr.txt Unicode character database lib/unicode/Bidirectional.pl Unicode character database -lib/unicode/Block.pl Unicode character database +lib/unicode/Blocks.pl Unicode character database lib/unicode/Blocks.txt Unicode character database lib/unicode/CaseFold.txt Unicode character database lib/unicode/Category.pl Unicode character database @@ -896,93 +896,103 @@ lib/unicode/CombiningClass.pl Unicode character database lib/unicode/CompExcl.txt Unicode character database lib/unicode/Decomposition.pl Unicode character database lib/unicode/EAWidth.txt Unicode character database -lib/unicode/In/AlphabeticPresentationForms.pl Unicode character database -lib/unicode/In/Arabic.pl Unicode character database -lib/unicode/In/ArabicPresentationForms-A.pl Unicode character database -lib/unicode/In/ArabicPresentationForms-B.pl Unicode character database -lib/unicode/In/Armenian.pl Unicode character database -lib/unicode/In/Arrows.pl Unicode character database -lib/unicode/In/BasicLatin.pl Unicode character database -lib/unicode/In/Bengali.pl Unicode character database -lib/unicode/In/BlockElements.pl Unicode character database -lib/unicode/In/Bopomofo.pl Unicode character database -lib/unicode/In/BopomofoExtended.pl Unicode character database -lib/unicode/In/BoxDrawing.pl Unicode character database -lib/unicode/In/BraillePatterns.pl Unicode character database -lib/unicode/In/CJKCompatibility.pl Unicode character database -lib/unicode/In/CJKCompatibilityForms.pl Unicode character database -lib/unicode/In/CJKCompatibilityIdeographs.pl Unicode character database -lib/unicode/In/CJKRadicalsSupplement.pl Unicode character database -lib/unicode/In/CJKSymbolsandPunctuation.pl Unicode character database -lib/unicode/In/CJKUnifiedIdeographs.pl Unicode character database -lib/unicode/In/CJKUnifiedIdeographsExtensionA.pl Unicode character database -lib/unicode/In/Cherokee.pl Unicode character database -lib/unicode/In/CombiningDiacriticalMarks.pl Unicode character database -lib/unicode/In/CombiningHalfMarks.pl Unicode character database -lib/unicode/In/CombiningMarksforSymbols.pl Unicode character database -lib/unicode/In/ControlPictures.pl Unicode character database -lib/unicode/In/CurrencySymbols.pl Unicode character database -lib/unicode/In/Cyrillic.pl Unicode character database -lib/unicode/In/Devanagari.pl Unicode character database -lib/unicode/In/Dingbats.pl Unicode character database -lib/unicode/In/EnclosedAlphanumerics.pl Unicode character database -lib/unicode/In/EnclosedCJKLettersandMonths.pl Unicode character database -lib/unicode/In/Ethiopic.pl Unicode character database -lib/unicode/In/GeneralPunctuation.pl Unicode character database -lib/unicode/In/GeometricShapes.pl Unicode character database -lib/unicode/In/Georgian.pl Unicode character database -lib/unicode/In/Greek.pl Unicode character database -lib/unicode/In/GreekExtended.pl Unicode character database -lib/unicode/In/Gujarati.pl Unicode character database -lib/unicode/In/Gurmukhi.pl Unicode character database -lib/unicode/In/HalfwidthandFullwidthForms.pl Unicode character database -lib/unicode/In/HangulCompatibilityJamo.pl Unicode character database -lib/unicode/In/HangulJamo.pl Unicode character database -lib/unicode/In/HangulSyllables.pl Unicode character database -lib/unicode/In/Hebrew.pl Unicode character database -lib/unicode/In/HighPrivateUseSurrogates.pl Unicode character database -lib/unicode/In/HighSurrogates.pl Unicode character database -lib/unicode/In/Hiragana.pl Unicode character database -lib/unicode/In/IPAExtensions.pl Unicode character database -lib/unicode/In/IdeographicDescriptionCharacters.pl Unicode character database -lib/unicode/In/Kanbun.pl Unicode character database -lib/unicode/In/KangxiRadicals.pl Unicode character database -lib/unicode/In/Kannada.pl Unicode character database -lib/unicode/In/Katakana.pl Unicode character database -lib/unicode/In/Khmer.pl Unicode character database -lib/unicode/In/Lao.pl Unicode character database -lib/unicode/In/Latin-1Supplement.pl Unicode character database -lib/unicode/In/LatinExtended-A.pl Unicode character database -lib/unicode/In/LatinExtended-B.pl Unicode character database -lib/unicode/In/LatinExtendedAdditional.pl Unicode character database -lib/unicode/In/LetterlikeSymbols.pl Unicode character database -lib/unicode/In/LowSurrogates.pl Unicode character database -lib/unicode/In/Malayalam.pl Unicode character database -lib/unicode/In/MathematicalOperators.pl Unicode character database -lib/unicode/In/MiscellaneousSymbols.pl Unicode character database -lib/unicode/In/MiscellaneousTechnical.pl Unicode character database -lib/unicode/In/Mongolian.pl Unicode character database -lib/unicode/In/Myanmar.pl Unicode character database -lib/unicode/In/NumberForms.pl Unicode character database -lib/unicode/In/Ogham.pl Unicode character database -lib/unicode/In/OpticalCharacterRecognition.pl Unicode character database -lib/unicode/In/Oriya.pl Unicode character database -lib/unicode/In/PrivateUse.pl Unicode character database -lib/unicode/In/Runic.pl Unicode character database -lib/unicode/In/Sinhala.pl Unicode character database -lib/unicode/In/SmallFormVariants.pl Unicode character database -lib/unicode/In/SpacingModifierLetters.pl Unicode character database -lib/unicode/In/Specials.pl Unicode character database -lib/unicode/In/SuperscriptsandSubscripts.pl Unicode character database -lib/unicode/In/Syriac.pl Unicode character database -lib/unicode/In/Tamil.pl Unicode character database -lib/unicode/In/Telugu.pl Unicode character database -lib/unicode/In/Thaana.pl Unicode character database -lib/unicode/In/Thai.pl Unicode character database -lib/unicode/In/Tibetan.pl Unicode character database -lib/unicode/In/UnifiedCanadianAboriginalSyllabics.pl Unicode character database -lib/unicode/In/YiRadicals.pl Unicode character database -lib/unicode/In/YiSyllables.pl Unicode character database +lib/unicode/In.pl Unicode character database +lib/unicode/In/0.pl Unicode character database +lib/unicode/In/1.pl Unicode character database +lib/unicode/In/2.pl Unicode character database +lib/unicode/In/3.pl Unicode character database +lib/unicode/In/4.pl Unicode character database +lib/unicode/In/5.pl Unicode character database +lib/unicode/In/6.pl Unicode character database +lib/unicode/In/7.pl Unicode character database +lib/unicode/In/8.pl Unicode character database +lib/unicode/In/9.pl Unicode character database +lib/unicode/In/10.pl Unicode character database +lib/unicode/In/11.pl Unicode character database +lib/unicode/In/12.pl Unicode character database +lib/unicode/In/13.pl Unicode character database +lib/unicode/In/14.pl Unicode character database +lib/unicode/In/15.pl Unicode character database +lib/unicode/In/16.pl Unicode character database +lib/unicode/In/17.pl Unicode character database +lib/unicode/In/18.pl Unicode character database +lib/unicode/In/19.pl Unicode character database +lib/unicode/In/20.pl Unicode character database +lib/unicode/In/21.pl Unicode character database +lib/unicode/In/22.pl Unicode character database +lib/unicode/In/23.pl Unicode character database +lib/unicode/In/24.pl Unicode character database +lib/unicode/In/25.pl Unicode character database +lib/unicode/In/26.pl Unicode character database +lib/unicode/In/27.pl Unicode character database +lib/unicode/In/28.pl Unicode character database +lib/unicode/In/29.pl Unicode character database +lib/unicode/In/30.pl Unicode character database +lib/unicode/In/31.pl Unicode character database +lib/unicode/In/32.pl Unicode character database +lib/unicode/In/33.pl Unicode character database +lib/unicode/In/34.pl Unicode character database +lib/unicode/In/35.pl Unicode character database +lib/unicode/In/36.pl Unicode character database +lib/unicode/In/37.pl Unicode character database +lib/unicode/In/38.pl Unicode character database +lib/unicode/In/39.pl Unicode character database +lib/unicode/In/40.pl Unicode character database +lib/unicode/In/41.pl Unicode character database +lib/unicode/In/42.pl Unicode character database +lib/unicode/In/43.pl Unicode character database +lib/unicode/In/44.pl Unicode character database +lib/unicode/In/45.pl Unicode character database +lib/unicode/In/46.pl Unicode character database +lib/unicode/In/47.pl Unicode character database +lib/unicode/In/48.pl Unicode character database +lib/unicode/In/49.pl Unicode character database +lib/unicode/In/50.pl Unicode character database +lib/unicode/In/51.pl Unicode character database +lib/unicode/In/52.pl Unicode character database +lib/unicode/In/53.pl Unicode character database +lib/unicode/In/54.pl Unicode character database +lib/unicode/In/55.pl Unicode character database +lib/unicode/In/56.pl Unicode character database +lib/unicode/In/57.pl Unicode character database +lib/unicode/In/58.pl Unicode character database +lib/unicode/In/59.pl Unicode character database +lib/unicode/In/60.pl Unicode character database +lib/unicode/In/61.pl Unicode character database +lib/unicode/In/62.pl Unicode character database +lib/unicode/In/63.pl Unicode character database +lib/unicode/In/64.pl Unicode character database +lib/unicode/In/65.pl Unicode character database +lib/unicode/In/66.pl Unicode character database +lib/unicode/In/67.pl Unicode character database +lib/unicode/In/68.pl Unicode character database +lib/unicode/In/69.pl Unicode character database +lib/unicode/In/70.pl Unicode character database +lib/unicode/In/71.pl Unicode character database +lib/unicode/In/72.pl Unicode character database +lib/unicode/In/73.pl Unicode character database +lib/unicode/In/74.pl Unicode character database +lib/unicode/In/75.pl Unicode character database +lib/unicode/In/76.pl Unicode character database +lib/unicode/In/77.pl Unicode character database +lib/unicode/In/78.pl Unicode character database +lib/unicode/In/79.pl Unicode character database +lib/unicode/In/80.pl Unicode character database +lib/unicode/In/81.pl Unicode character database +lib/unicode/In/82.pl Unicode character database +lib/unicode/In/83.pl Unicode character database +lib/unicode/In/84.pl Unicode character database +lib/unicode/In/85.pl Unicode character database +lib/unicode/In/86.pl Unicode character database +lib/unicode/In/87.pl Unicode character database +lib/unicode/In/88.pl Unicode character database +lib/unicode/In/89.pl Unicode character database +lib/unicode/In/90.pl Unicode character database +lib/unicode/In/91.pl Unicode character database +lib/unicode/In/92.pl Unicode character database +lib/unicode/In/93.pl Unicode character database +lib/unicode/In/94.pl Unicode character database +lib/unicode/In/95.pl Unicode character database lib/unicode/Index.txt Unicode character database lib/unicode/Is/ASCII.pl Unicode character database lib/unicode/Is/Alnum.pl Unicode character database diff --git a/lib/unicode/Blocks.pl b/lib/unicode/Blocks.pl new file mode 100644 index 0000000..ef60058 --- /dev/null +++ b/lib/unicode/Blocks.pl @@ -0,0 +1,203 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables.PL from e.g. Unicode.txt. +# Any changes made here will be lost! +return <<'END'; +0000 007F Basic Latin +# In/0.pl BasicLatin +0080 00FF Latin-1 Supplement +# In/1.pl Latin1Supplement +0100 017F Latin Extended-A +# In/2.pl LatinExtendedA +0180 024F Latin Extended-B +# In/3.pl LatinExtendedB +0250 02AF IPA Extensions +# In/4.pl IPAExtensions +02B0 02FF Spacing Modifier Letters +# In/5.pl SpacingModifierLetters +0300 036F Combining Diacritical Marks +# In/6.pl CombiningDiacriticalMarks +0370 03FF Greek +# In/7.pl Greek +0400 04FF Cyrillic +# In/8.pl Cyrillic +0530 058F Armenian +# In/9.pl Armenian +0590 05FF Hebrew +# In/10.pl Hebrew +0600 06FF Arabic +# In/11.pl Arabic +0700 074F Syriac +# In/12.pl Syriac +0780 07BF Thaana +# In/13.pl Thaana +0900 097F Devanagari +# In/14.pl Devanagari +0980 09FF Bengali +# In/15.pl Bengali +0A00 0A7F Gurmukhi +# In/16.pl Gurmukhi +0A80 0AFF Gujarati +# In/17.pl Gujarati +0B00 0B7F Oriya +# In/18.pl Oriya +0B80 0BFF Tamil +# In/19.pl Tamil +0C00 0C7F Telugu +# In/20.pl Telugu +0C80 0CFF Kannada +# In/21.pl Kannada +0D00 0D7F Malayalam +# In/22.pl Malayalam +0D80 0DFF Sinhala +# In/23.pl Sinhala +0E00 0E7F Thai +# In/24.pl Thai +0E80 0EFF Lao +# In/25.pl Lao +0F00 0FFF Tibetan +# In/26.pl Tibetan +1000 109F Myanmar +# In/27.pl Myanmar +10A0 10FF Georgian +# In/28.pl Georgian +1100 11FF Hangul Jamo +# In/29.pl HangulJamo +1200 137F Ethiopic +# In/30.pl Ethiopic +13A0 13FF Cherokee +# In/31.pl Cherokee +1400 167F Unified Canadian Aboriginal Syllabics +# In/32.pl UnifiedCanadianAboriginalSyllabics +1680 169F Ogham +# In/33.pl Ogham +16A0 16FF Runic +# In/34.pl Runic +1780 17FF Khmer +# In/35.pl Khmer +1800 18AF Mongolian +# In/36.pl Mongolian +1E00 1EFF Latin Extended Additional +# In/37.pl LatinExtendedAdditional +1F00 1FFF Greek Extended +# In/38.pl GreekExtended +2000 206F General Punctuation +# In/39.pl GeneralPunctuation +2070 209F Superscripts and Subscripts +# In/40.pl SuperscriptsandSubscripts +20A0 20CF Currency Symbols +# In/41.pl CurrencySymbols +20D0 20FF Combining Marks for Symbols +# In/42.pl CombiningMarksforSymbols +2100 214F Letterlike Symbols +# In/43.pl LetterlikeSymbols +2150 218F Number Forms +# In/44.pl NumberForms +2190 21FF Arrows +# In/45.pl Arrows +2200 22FF Mathematical Operators +# In/46.pl MathematicalOperators +2300 23FF Miscellaneous Technical +# In/47.pl MiscellaneousTechnical +2400 243F Control Pictures +# In/48.pl ControlPictures +2440 245F Optical Character Recognition +# In/49.pl OpticalCharacterRecognition +2460 24FF Enclosed Alphanumerics +# In/50.pl EnclosedAlphanumerics +2500 257F Box Drawing +# In/51.pl BoxDrawing +2580 259F Block Elements +# In/52.pl BlockElements +25A0 25FF Geometric Shapes +# In/53.pl GeometricShapes +2600 26FF Miscellaneous Symbols +# In/54.pl MiscellaneousSymbols +2700 27BF Dingbats +# In/55.pl Dingbats +2800 28FF Braille Patterns +# In/56.pl BraillePatterns +2E80 2EFF CJK Radicals Supplement +# In/57.pl CJKRadicalsSupplement +2F00 2FDF Kangxi Radicals +# In/58.pl KangxiRadicals +2FF0 2FFF Ideographic Description Characters +# In/59.pl IdeographicDescriptionCharacters +3000 303F CJK Symbols and Punctuation +# In/60.pl CJKSymbolsandPunctuation +3040 309F Hiragana +# In/61.pl Hiragana +30A0 30FF Katakana +# In/62.pl Katakana +3100 312F Bopomofo +# In/63.pl Bopomofo +3130 318F Hangul Compatibility Jamo +# In/64.pl HangulCompatibilityJamo +3190 319F Kanbun +# In/65.pl Kanbun +31A0 31BF Bopomofo Extended +# In/66.pl BopomofoExtended +3200 32FF Enclosed CJK Letters and Months +# In/67.pl EnclosedCJKLettersandMonths +3300 33FF CJK Compatibility +# In/68.pl CJKCompatibility +3400 4DB5 CJK Unified Ideographs Extension A +# In/69.pl CJKUnifiedIdeographsExtensionA +4E00 9FFF CJK Unified Ideographs +# In/70.pl CJKUnifiedIdeographs +A000 A48F Yi Syllables +# In/71.pl YiSyllables +A490 A4CF Yi Radicals +# In/72.pl YiRadicals +AC00 D7A3 Hangul Syllables +# In/73.pl HangulSyllables +D800 DB7F High Surrogates +# In/74.pl HighSurrogates +DB80 DBFF High Private Use Surrogates +# In/75.pl HighPrivateUseSurrogates +DC00 DFFF Low Surrogates +# In/76.pl LowSurrogates +E000 F8FF Private Use +# In/77.pl PrivateUse +F900 FAFF CJK Compatibility Ideographs +# In/78.pl CJKCompatibilityIdeographs +FB00 FB4F Alphabetic Presentation Forms +# In/79.pl AlphabeticPresentationForms +FB50 FDFF Arabic Presentation Forms-A +# In/80.pl ArabicPresentationFormsA +FE20 FE2F Combining Half Marks +# In/81.pl CombiningHalfMarks +FE30 FE4F CJK Compatibility Forms +# In/82.pl CJKCompatibilityForms +FE50 FE6F Small Form Variants +# In/83.pl SmallFormVariants +FE70 FEFE Arabic Presentation Forms-B +# In/84.pl ArabicPresentationFormsB +FEFF FEFF Specials +# In/85.pl Specials +FF00 FFEF Halfwidth and Fullwidth Forms +# In/86.pl HalfwidthandFullwidthForms +FFF0 FFFD Specials +# In/85.pl Specials +10300 1032F Old Italic +# In/87.pl OldItalic +10330 1034F Gothic +# In/88.pl Gothic +10400 1044F Deseret +# In/89.pl Deseret +1D000 1D0FF Byzantine Musical Symbols +# In/90.pl ByzantineMusicalSymbols +1D100 1D1FF Musical Symbols +# In/91.pl MusicalSymbols +1D400 1D7FF Mathematical Alphanumeric Symbols +# In/92.pl MathematicalAlphanumericSymbols +20000 2A6D6 CJK Unified Ideographs Extension B +# In/93.pl CJKUnifiedIdeographsExtensionB +2F800 2FA1F CJK Compatibility Ideographs Supplement +# In/94.pl CJKCompatibilityIdeographsSupplement +E0000 E007F Tags +# In/95.pl Tags +F0000 FFFFD Private Use +# In/77.pl PrivateUse +100000 10FFFD Private Use +# In/77.pl PrivateUse +END diff --git a/lib/unicode/In.pl b/lib/unicode/In.pl new file mode 100644 index 0000000..eefec27 --- /dev/null +++ b/lib/unicode/In.pl @@ -0,0 +1,101 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables.PL from e.g. Unicode.txt. +# Any changes made here will be lost! +%utf8::In = ( +'BasicLatin' => 0, +'Latin1Supplement' => 1, +'Hebrew' => 10, +'Arabic' => 11, +'Syriac' => 12, +'Thaana' => 13, +'Devanagari' => 14, +'Bengali' => 15, +'Gurmukhi' => 16, +'Gujarati' => 17, +'Oriya' => 18, +'Tamil' => 19, +'LatinExtendedA' => 2, +'Telugu' => 20, +'Kannada' => 21, +'Malayalam' => 22, +'Sinhala' => 23, +'Thai' => 24, +'Lao' => 25, +'Tibetan' => 26, +'Myanmar' => 27, +'Georgian' => 28, +'HangulJamo' => 29, +'LatinExtendedB' => 3, +'Ethiopic' => 30, +'Cherokee' => 31, +'UnifiedCanadianAboriginalSyllabics' => 32, +'Ogham' => 33, +'Runic' => 34, +'Khmer' => 35, +'Mongolian' => 36, +'LatinExtendedAdditional' => 37, +'GreekExtended' => 38, +'GeneralPunctuation' => 39, +'IPAExtensions' => 4, +'SuperscriptsandSubscripts' => 40, +'CurrencySymbols' => 41, +'CombiningMarksforSymbols' => 42, +'LetterlikeSymbols' => 43, +'NumberForms' => 44, +'Arrows' => 45, +'MathematicalOperators' => 46, +'MiscellaneousTechnical' => 47, +'ControlPictures' => 48, +'OpticalCharacterRecognition' => 49, +'SpacingModifierLetters' => 5, +'EnclosedAlphanumerics' => 50, +'BoxDrawing' => 51, +'BlockElements' => 52, +'GeometricShapes' => 53, +'MiscellaneousSymbols' => 54, +'Dingbats' => 55, +'BraillePatterns' => 56, +'CJKRadicalsSupplement' => 57, +'KangxiRadicals' => 58, +'IdeographicDescriptionCharacters' => 59, +'CombiningDiacriticalMarks' => 6, +'CJKSymbolsandPunctuation' => 60, +'Hiragana' => 61, +'Katakana' => 62, +'Bopomofo' => 63, +'HangulCompatibilityJamo' => 64, +'Kanbun' => 65, +'BopomofoExtended' => 66, +'EnclosedCJKLettersandMonths' => 67, +'CJKCompatibility' => 68, +'CJKUnifiedIdeographsExtensionA' => 69, +'Greek' => 7, +'CJKUnifiedIdeographs' => 70, +'YiSyllables' => 71, +'YiRadicals' => 72, +'HangulSyllables' => 73, +'HighSurrogates' => 74, +'HighPrivateUseSurrogates' => 75, +'LowSurrogates' => 76, +'PrivateUse' => 77, +'CJKCompatibilityIdeographs' => 78, +'AlphabeticPresentationForms' => 79, +'Cyrillic' => 8, +'ArabicPresentationFormsA' => 80, +'CombiningHalfMarks' => 81, +'CJKCompatibilityForms' => 82, +'SmallFormVariants' => 83, +'ArabicPresentationFormsB' => 84, +'Specials' => 85, +'HalfwidthandFullwidthForms' => 86, +'OldItalic' => 87, +'Gothic' => 88, +'Deseret' => 89, +'Armenian' => 9, +'ByzantineMusicalSymbols' => 90, +'MusicalSymbols' => 91, +'MathematicalAlphanumericSymbols' => 92, +'CJKUnifiedIdeographsExtensionB' => 93, +'CJKCompatibilityIdeographsSupplement' => 94, +'Tags' => 95, +); diff --git a/lib/unicode/In/BasicLatin.pl b/lib/unicode/In/0.pl similarity index 100% rename from lib/unicode/In/BasicLatin.pl rename to lib/unicode/In/0.pl diff --git a/lib/unicode/In/Latin-1Supplement.pl b/lib/unicode/In/1.pl similarity index 100% rename from lib/unicode/In/Latin-1Supplement.pl rename to lib/unicode/In/1.pl diff --git a/lib/unicode/In/Hebrew.pl b/lib/unicode/In/10.pl similarity index 100% rename from lib/unicode/In/Hebrew.pl rename to lib/unicode/In/10.pl diff --git a/lib/unicode/In/Arabic.pl b/lib/unicode/In/11.pl similarity index 100% rename from lib/unicode/In/Arabic.pl rename to lib/unicode/In/11.pl diff --git a/lib/unicode/In/Syriac.pl b/lib/unicode/In/12.pl similarity index 100% rename from lib/unicode/In/Syriac.pl rename to lib/unicode/In/12.pl diff --git a/lib/unicode/In/Thaana.pl b/lib/unicode/In/13.pl similarity index 100% rename from lib/unicode/In/Thaana.pl rename to lib/unicode/In/13.pl diff --git a/lib/unicode/In/Devanagari.pl b/lib/unicode/In/14.pl similarity index 100% rename from lib/unicode/In/Devanagari.pl rename to lib/unicode/In/14.pl diff --git a/lib/unicode/In/Bengali.pl b/lib/unicode/In/15.pl similarity index 100% rename from lib/unicode/In/Bengali.pl rename to lib/unicode/In/15.pl diff --git a/lib/unicode/In/Gurmukhi.pl b/lib/unicode/In/16.pl similarity index 100% rename from lib/unicode/In/Gurmukhi.pl rename to lib/unicode/In/16.pl diff --git a/lib/unicode/In/Gujarati.pl b/lib/unicode/In/17.pl similarity index 100% rename from lib/unicode/In/Gujarati.pl rename to lib/unicode/In/17.pl diff --git a/lib/unicode/In/Oriya.pl b/lib/unicode/In/18.pl similarity index 100% rename from lib/unicode/In/Oriya.pl rename to lib/unicode/In/18.pl diff --git a/lib/unicode/In/Tamil.pl b/lib/unicode/In/19.pl similarity index 100% rename from lib/unicode/In/Tamil.pl rename to lib/unicode/In/19.pl diff --git a/lib/unicode/In/LatinExtended-A.pl b/lib/unicode/In/2.pl similarity index 100% rename from lib/unicode/In/LatinExtended-A.pl rename to lib/unicode/In/2.pl diff --git a/lib/unicode/In/Telugu.pl b/lib/unicode/In/20.pl similarity index 100% rename from lib/unicode/In/Telugu.pl rename to lib/unicode/In/20.pl diff --git a/lib/unicode/In/Kannada.pl b/lib/unicode/In/21.pl similarity index 100% rename from lib/unicode/In/Kannada.pl rename to lib/unicode/In/21.pl diff --git a/lib/unicode/In/Malayalam.pl b/lib/unicode/In/22.pl similarity index 100% rename from lib/unicode/In/Malayalam.pl rename to lib/unicode/In/22.pl diff --git a/lib/unicode/In/Sinhala.pl b/lib/unicode/In/23.pl similarity index 100% rename from lib/unicode/In/Sinhala.pl rename to lib/unicode/In/23.pl diff --git a/lib/unicode/In/Thai.pl b/lib/unicode/In/24.pl similarity index 100% rename from lib/unicode/In/Thai.pl rename to lib/unicode/In/24.pl diff --git a/lib/unicode/In/Lao.pl b/lib/unicode/In/25.pl similarity index 100% rename from lib/unicode/In/Lao.pl rename to lib/unicode/In/25.pl diff --git a/lib/unicode/In/Tibetan.pl b/lib/unicode/In/26.pl similarity index 100% rename from lib/unicode/In/Tibetan.pl rename to lib/unicode/In/26.pl diff --git a/lib/unicode/In/Myanmar.pl b/lib/unicode/In/27.pl similarity index 100% rename from lib/unicode/In/Myanmar.pl rename to lib/unicode/In/27.pl diff --git a/lib/unicode/In/Georgian.pl b/lib/unicode/In/28.pl similarity index 100% rename from lib/unicode/In/Georgian.pl rename to lib/unicode/In/28.pl diff --git a/lib/unicode/In/HangulJamo.pl b/lib/unicode/In/29.pl similarity index 100% rename from lib/unicode/In/HangulJamo.pl rename to lib/unicode/In/29.pl diff --git a/lib/unicode/In/LatinExtended-B.pl b/lib/unicode/In/3.pl similarity index 100% rename from lib/unicode/In/LatinExtended-B.pl rename to lib/unicode/In/3.pl diff --git a/lib/unicode/In/Ethiopic.pl b/lib/unicode/In/30.pl similarity index 100% rename from lib/unicode/In/Ethiopic.pl rename to lib/unicode/In/30.pl diff --git a/lib/unicode/In/Cherokee.pl b/lib/unicode/In/31.pl similarity index 100% rename from lib/unicode/In/Cherokee.pl rename to lib/unicode/In/31.pl diff --git a/lib/unicode/In/UnifiedCanadianAboriginalSyllabics.pl b/lib/unicode/In/32.pl similarity index 100% rename from lib/unicode/In/UnifiedCanadianAboriginalSyllabics.pl rename to lib/unicode/In/32.pl diff --git a/lib/unicode/In/Ogham.pl b/lib/unicode/In/33.pl similarity index 100% rename from lib/unicode/In/Ogham.pl rename to lib/unicode/In/33.pl diff --git a/lib/unicode/In/Runic.pl b/lib/unicode/In/34.pl similarity index 100% rename from lib/unicode/In/Runic.pl rename to lib/unicode/In/34.pl diff --git a/lib/unicode/In/Khmer.pl b/lib/unicode/In/35.pl similarity index 100% rename from lib/unicode/In/Khmer.pl rename to lib/unicode/In/35.pl diff --git a/lib/unicode/In/Mongolian.pl b/lib/unicode/In/36.pl similarity index 100% rename from lib/unicode/In/Mongolian.pl rename to lib/unicode/In/36.pl diff --git a/lib/unicode/In/LatinExtendedAdditional.pl b/lib/unicode/In/37.pl similarity index 100% rename from lib/unicode/In/LatinExtendedAdditional.pl rename to lib/unicode/In/37.pl diff --git a/lib/unicode/In/GreekExtended.pl b/lib/unicode/In/38.pl similarity index 100% rename from lib/unicode/In/GreekExtended.pl rename to lib/unicode/In/38.pl diff --git a/lib/unicode/In/GeneralPunctuation.pl b/lib/unicode/In/39.pl similarity index 100% rename from lib/unicode/In/GeneralPunctuation.pl rename to lib/unicode/In/39.pl diff --git a/lib/unicode/In/IPAExtensions.pl b/lib/unicode/In/4.pl similarity index 100% rename from lib/unicode/In/IPAExtensions.pl rename to lib/unicode/In/4.pl diff --git a/lib/unicode/In/SuperscriptsandSubscripts.pl b/lib/unicode/In/40.pl similarity index 100% rename from lib/unicode/In/SuperscriptsandSubscripts.pl rename to lib/unicode/In/40.pl diff --git a/lib/unicode/In/CurrencySymbols.pl b/lib/unicode/In/41.pl similarity index 100% rename from lib/unicode/In/CurrencySymbols.pl rename to lib/unicode/In/41.pl diff --git a/lib/unicode/In/CombiningMarksforSymbols.pl b/lib/unicode/In/42.pl similarity index 100% rename from lib/unicode/In/CombiningMarksforSymbols.pl rename to lib/unicode/In/42.pl diff --git a/lib/unicode/In/LetterlikeSymbols.pl b/lib/unicode/In/43.pl similarity index 100% rename from lib/unicode/In/LetterlikeSymbols.pl rename to lib/unicode/In/43.pl diff --git a/lib/unicode/In/NumberForms.pl b/lib/unicode/In/44.pl similarity index 100% rename from lib/unicode/In/NumberForms.pl rename to lib/unicode/In/44.pl diff --git a/lib/unicode/In/Arrows.pl b/lib/unicode/In/45.pl similarity index 100% rename from lib/unicode/In/Arrows.pl rename to lib/unicode/In/45.pl diff --git a/lib/unicode/In/MathematicalOperators.pl b/lib/unicode/In/46.pl similarity index 100% rename from lib/unicode/In/MathematicalOperators.pl rename to lib/unicode/In/46.pl diff --git a/lib/unicode/In/MiscellaneousTechnical.pl b/lib/unicode/In/47.pl similarity index 100% rename from lib/unicode/In/MiscellaneousTechnical.pl rename to lib/unicode/In/47.pl diff --git a/lib/unicode/In/ControlPictures.pl b/lib/unicode/In/48.pl similarity index 100% rename from lib/unicode/In/ControlPictures.pl rename to lib/unicode/In/48.pl diff --git a/lib/unicode/In/OpticalCharacterRecognition.pl b/lib/unicode/In/49.pl similarity index 100% rename from lib/unicode/In/OpticalCharacterRecognition.pl rename to lib/unicode/In/49.pl diff --git a/lib/unicode/In/SpacingModifierLetters.pl b/lib/unicode/In/5.pl similarity index 100% rename from lib/unicode/In/SpacingModifierLetters.pl rename to lib/unicode/In/5.pl diff --git a/lib/unicode/In/EnclosedAlphanumerics.pl b/lib/unicode/In/50.pl similarity index 100% rename from lib/unicode/In/EnclosedAlphanumerics.pl rename to lib/unicode/In/50.pl diff --git a/lib/unicode/In/BoxDrawing.pl b/lib/unicode/In/51.pl similarity index 100% rename from lib/unicode/In/BoxDrawing.pl rename to lib/unicode/In/51.pl diff --git a/lib/unicode/In/BlockElements.pl b/lib/unicode/In/52.pl similarity index 100% rename from lib/unicode/In/BlockElements.pl rename to lib/unicode/In/52.pl diff --git a/lib/unicode/In/GeometricShapes.pl b/lib/unicode/In/53.pl similarity index 100% rename from lib/unicode/In/GeometricShapes.pl rename to lib/unicode/In/53.pl diff --git a/lib/unicode/In/MiscellaneousSymbols.pl b/lib/unicode/In/54.pl similarity index 100% rename from lib/unicode/In/MiscellaneousSymbols.pl rename to lib/unicode/In/54.pl diff --git a/lib/unicode/In/Dingbats.pl b/lib/unicode/In/55.pl similarity index 100% rename from lib/unicode/In/Dingbats.pl rename to lib/unicode/In/55.pl diff --git a/lib/unicode/In/BraillePatterns.pl b/lib/unicode/In/56.pl similarity index 100% rename from lib/unicode/In/BraillePatterns.pl rename to lib/unicode/In/56.pl diff --git a/lib/unicode/In/CJKRadicalsSupplement.pl b/lib/unicode/In/57.pl similarity index 100% rename from lib/unicode/In/CJKRadicalsSupplement.pl rename to lib/unicode/In/57.pl diff --git a/lib/unicode/In/KangxiRadicals.pl b/lib/unicode/In/58.pl similarity index 100% rename from lib/unicode/In/KangxiRadicals.pl rename to lib/unicode/In/58.pl diff --git a/lib/unicode/In/IdeographicDescriptionCharacters.pl b/lib/unicode/In/59.pl similarity index 100% rename from lib/unicode/In/IdeographicDescriptionCharacters.pl rename to lib/unicode/In/59.pl diff --git a/lib/unicode/In/CombiningDiacriticalMarks.pl b/lib/unicode/In/6.pl similarity index 100% rename from lib/unicode/In/CombiningDiacriticalMarks.pl rename to lib/unicode/In/6.pl diff --git a/lib/unicode/In/CJKSymbolsandPunctuation.pl b/lib/unicode/In/60.pl similarity index 100% rename from lib/unicode/In/CJKSymbolsandPunctuation.pl rename to lib/unicode/In/60.pl diff --git a/lib/unicode/In/Hiragana.pl b/lib/unicode/In/61.pl similarity index 100% rename from lib/unicode/In/Hiragana.pl rename to lib/unicode/In/61.pl diff --git a/lib/unicode/In/Katakana.pl b/lib/unicode/In/62.pl similarity index 100% rename from lib/unicode/In/Katakana.pl rename to lib/unicode/In/62.pl diff --git a/lib/unicode/In/Bopomofo.pl b/lib/unicode/In/63.pl similarity index 100% rename from lib/unicode/In/Bopomofo.pl rename to lib/unicode/In/63.pl diff --git a/lib/unicode/In/HangulCompatibilityJamo.pl b/lib/unicode/In/64.pl similarity index 100% rename from lib/unicode/In/HangulCompatibilityJamo.pl rename to lib/unicode/In/64.pl diff --git a/lib/unicode/In/Kanbun.pl b/lib/unicode/In/65.pl similarity index 100% rename from lib/unicode/In/Kanbun.pl rename to lib/unicode/In/65.pl diff --git a/lib/unicode/In/BopomofoExtended.pl b/lib/unicode/In/66.pl similarity index 100% rename from lib/unicode/In/BopomofoExtended.pl rename to lib/unicode/In/66.pl diff --git a/lib/unicode/In/EnclosedCJKLettersandMonths.pl b/lib/unicode/In/67.pl similarity index 100% rename from lib/unicode/In/EnclosedCJKLettersandMonths.pl rename to lib/unicode/In/67.pl diff --git a/lib/unicode/In/CJKCompatibility.pl b/lib/unicode/In/68.pl similarity index 100% rename from lib/unicode/In/CJKCompatibility.pl rename to lib/unicode/In/68.pl diff --git a/lib/unicode/In/CJKUnifiedIdeographsExtensionA.pl b/lib/unicode/In/69.pl similarity index 100% rename from lib/unicode/In/CJKUnifiedIdeographsExtensionA.pl rename to lib/unicode/In/69.pl diff --git a/lib/unicode/In/Greek.pl b/lib/unicode/In/7.pl similarity index 100% rename from lib/unicode/In/Greek.pl rename to lib/unicode/In/7.pl diff --git a/lib/unicode/In/CJKUnifiedIdeographs.pl b/lib/unicode/In/70.pl similarity index 100% rename from lib/unicode/In/CJKUnifiedIdeographs.pl rename to lib/unicode/In/70.pl diff --git a/lib/unicode/In/YiSyllables.pl b/lib/unicode/In/71.pl similarity index 100% rename from lib/unicode/In/YiSyllables.pl rename to lib/unicode/In/71.pl diff --git a/lib/unicode/In/YiRadicals.pl b/lib/unicode/In/72.pl similarity index 100% rename from lib/unicode/In/YiRadicals.pl rename to lib/unicode/In/72.pl diff --git a/lib/unicode/In/HangulSyllables.pl b/lib/unicode/In/73.pl similarity index 100% rename from lib/unicode/In/HangulSyllables.pl rename to lib/unicode/In/73.pl diff --git a/lib/unicode/In/HighSurrogates.pl b/lib/unicode/In/74.pl similarity index 100% rename from lib/unicode/In/HighSurrogates.pl rename to lib/unicode/In/74.pl diff --git a/lib/unicode/In/HighPrivateUseSurrogates.pl b/lib/unicode/In/75.pl similarity index 100% rename from lib/unicode/In/HighPrivateUseSurrogates.pl rename to lib/unicode/In/75.pl diff --git a/lib/unicode/In/LowSurrogates.pl b/lib/unicode/In/76.pl similarity index 100% rename from lib/unicode/In/LowSurrogates.pl rename to lib/unicode/In/76.pl diff --git a/lib/unicode/Block.pl b/lib/unicode/In/77.pl similarity index 92% copy from lib/unicode/Block.pl copy to lib/unicode/In/77.pl index 272f63f..530166d 100644 --- a/lib/unicode/Block.pl +++ b/lib/unicode/In/77.pl @@ -2,4 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; +100000 10FFFD END diff --git a/lib/unicode/In/CJKCompatibilityIdeographs.pl b/lib/unicode/In/78.pl similarity index 100% rename from lib/unicode/In/CJKCompatibilityIdeographs.pl rename to lib/unicode/In/78.pl diff --git a/lib/unicode/In/AlphabeticPresentationForms.pl b/lib/unicode/In/79.pl similarity index 100% rename from lib/unicode/In/AlphabeticPresentationForms.pl rename to lib/unicode/In/79.pl diff --git a/lib/unicode/In/Cyrillic.pl b/lib/unicode/In/8.pl similarity index 100% rename from lib/unicode/In/Cyrillic.pl rename to lib/unicode/In/8.pl diff --git a/lib/unicode/In/ArabicPresentationForms-A.pl b/lib/unicode/In/80.pl similarity index 100% rename from lib/unicode/In/ArabicPresentationForms-A.pl rename to lib/unicode/In/80.pl diff --git a/lib/unicode/In/CombiningHalfMarks.pl b/lib/unicode/In/81.pl similarity index 100% rename from lib/unicode/In/CombiningHalfMarks.pl rename to lib/unicode/In/81.pl diff --git a/lib/unicode/In/CJKCompatibilityForms.pl b/lib/unicode/In/82.pl similarity index 100% rename from lib/unicode/In/CJKCompatibilityForms.pl rename to lib/unicode/In/82.pl diff --git a/lib/unicode/In/SmallFormVariants.pl b/lib/unicode/In/83.pl similarity index 100% rename from lib/unicode/In/SmallFormVariants.pl rename to lib/unicode/In/83.pl diff --git a/lib/unicode/In/ArabicPresentationForms-B.pl b/lib/unicode/In/84.pl similarity index 100% rename from lib/unicode/In/ArabicPresentationForms-B.pl rename to lib/unicode/In/84.pl diff --git a/lib/unicode/In/Specials.pl b/lib/unicode/In/85.pl similarity index 100% rename from lib/unicode/In/Specials.pl rename to lib/unicode/In/85.pl diff --git a/lib/unicode/In/HalfwidthandFullwidthForms.pl b/lib/unicode/In/86.pl similarity index 100% rename from lib/unicode/In/HalfwidthandFullwidthForms.pl rename to lib/unicode/In/86.pl diff --git a/lib/unicode/Block.pl b/lib/unicode/In/87.pl similarity index 93% copy from lib/unicode/Block.pl copy to lib/unicode/In/87.pl index 272f63f..44a5e47 100644 --- a/lib/unicode/Block.pl +++ b/lib/unicode/In/87.pl @@ -2,4 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; +10300 1032F END diff --git a/lib/unicode/Block.pl b/lib/unicode/In/88.pl similarity index 93% copy from lib/unicode/Block.pl copy to lib/unicode/In/88.pl index 272f63f..8030411 100644 --- a/lib/unicode/Block.pl +++ b/lib/unicode/In/88.pl @@ -2,4 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; +10330 1034F END diff --git a/lib/unicode/Block.pl b/lib/unicode/In/89.pl similarity index 93% copy from lib/unicode/Block.pl copy to lib/unicode/In/89.pl index 272f63f..d2c50bb 100644 --- a/lib/unicode/Block.pl +++ b/lib/unicode/In/89.pl @@ -2,4 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; +10400 1044F END diff --git a/lib/unicode/In/Armenian.pl b/lib/unicode/In/9.pl similarity index 100% rename from lib/unicode/In/Armenian.pl rename to lib/unicode/In/9.pl diff --git a/lib/unicode/Block.pl b/lib/unicode/In/90.pl similarity index 93% copy from lib/unicode/Block.pl copy to lib/unicode/In/90.pl index 272f63f..f1073c7 100644 --- a/lib/unicode/Block.pl +++ b/lib/unicode/In/90.pl @@ -2,4 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; +1D000 1D0FF END diff --git a/lib/unicode/Block.pl b/lib/unicode/In/91.pl similarity index 93% copy from lib/unicode/Block.pl copy to lib/unicode/In/91.pl index 272f63f..7435889 100644 --- a/lib/unicode/Block.pl +++ b/lib/unicode/In/91.pl @@ -2,4 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; +1D100 1D1FF END diff --git a/lib/unicode/Block.pl b/lib/unicode/In/92.pl similarity index 93% copy from lib/unicode/Block.pl copy to lib/unicode/In/92.pl index 272f63f..7e40edc 100644 --- a/lib/unicode/Block.pl +++ b/lib/unicode/In/92.pl @@ -2,4 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; +1D400 1D7FF END diff --git a/lib/unicode/Block.pl b/lib/unicode/In/93.pl similarity index 93% copy from lib/unicode/Block.pl copy to lib/unicode/In/93.pl index 272f63f..931aec3 100644 --- a/lib/unicode/Block.pl +++ b/lib/unicode/In/93.pl @@ -2,4 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; +20000 2A6D6 END diff --git a/lib/unicode/Block.pl b/lib/unicode/In/94.pl similarity index 93% copy from lib/unicode/Block.pl copy to lib/unicode/In/94.pl index 272f63f..c025148 100644 --- a/lib/unicode/Block.pl +++ b/lib/unicode/In/94.pl @@ -2,4 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; +2F800 2FA1F END diff --git a/lib/unicode/Block.pl b/lib/unicode/In/95.pl similarity index 93% rename from lib/unicode/Block.pl rename to lib/unicode/In/95.pl index 272f63f..495d2d5 100644 --- a/lib/unicode/Block.pl +++ b/lib/unicode/In/95.pl @@ -2,4 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; +E0000 E007F END diff --git a/lib/unicode/In/PrivateUse.pl b/lib/unicode/In/PrivateUse.pl deleted file mode 100644 index c81b567..0000000 --- a/lib/unicode/In/PrivateUse.pl +++ /dev/null @@ -1,6 +0,0 @@ -# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables.PL from e.g. Unicode.txt. -# Any changes made here will be lost! -return <<'END'; -E000 F8FF -END diff --git a/lib/unicode/mktables.PL b/lib/unicode/mktables.PL index 8187854..68578b9 100755 --- a/lib/unicode/mktables.PL +++ b/lib/unicode/mktables.PL @@ -231,11 +231,24 @@ mkdir "To", 0755; # This is not written for speed... +my %InId; +my $InId = 0; + foreach $file (@todo) { my ($table, $wanted, $val) = @$file; next if @ARGV and not grep { $_ eq $table } @ARGV; - print $table,"\n"; - if ($table =~ /^(Is|In|To)(.*)/) { + print $table, "\n"; + $table =~ s/\W+//g; + if ($table =~ /^In(.+)/) { + my $id; + unless (exists $InId{$1}) { + $InId{$1} = $InId++; + } + $id = $InId{$1}; + open(OUT, ">In/$id.pl") or die "Can't create In/$id.pl: $!\n"; + print OUT "# In/$id.pl $1\n"; + } + elsif ($table =~ /^(Is|To)(.+)/) { open(OUT, ">$1/$2.pl") or die "Can't create $1/$2.pl: $!\n"; } else { @@ -257,9 +270,9 @@ END # Must treat blocks specially. exit if @ARGV and not grep { $_ eq Block } @ARGV; -print "Block\n"; +print "Blocks\n"; open(UD, 'Blocks.txt') or die "Can't open Blocks.txt: $!\n"; -open(OUT, ">Block.pl") or die "Can't create Block.pl: $!\n"; +open(OUT, ">Blocks.pl") or die "Can't create Blocks.pl: $!\n"; print OUT <) { next if /^#/; next if /^$/; chomp; - ($code, $last, $name) = /^([0-9a-f]+)\.\.([0-9a-f]); (.+)/i; + ($code, $last, $name) = /^([0-9a-f]+)\.\.([0-9a-f]+); (.+)/i; if ($name) { print OUT "$code $last $name\n"; - $name =~ s/\s+//g; - open(BLOCK, ">In/$name.pl"); + $name =~ s/\W+//g; + my $id; + unless (exists $InId{$name}) { + $InId{$name} = $InId++; + } + $id = $InId{$name}; + open(BLOCK, ">In/$id.pl"); + print OUT "# In/$id.pl $name\n"; print BLOCK <In.pl"); + +print INID < $InId{$in},\n"; +} + +print INID ");\n"; + +close(INID); + ################################################## sub proplist { diff --git a/lib/utf8_heavy.pl b/lib/utf8_heavy.pl index 8649e9e..5637d12 100644 --- a/lib/utf8_heavy.pl +++ b/lib/utf8_heavy.pl @@ -26,7 +26,14 @@ sub SWASHNEW { while (($caller = caller($i)) eq __PACKAGE__) { $i++ } my $encoding = $enc{$caller} || "unicode"; (my $file = $type) =~ s!::!/!g; - $file =~ s#^(I[sn]|To)([A-Z].*)#$1/$2#; + if ($file =~ /^In(.+)/) { + defined %utf8::In || do "$encoding/In.pl"; + if (exists $utf8::In{$1}) { + $file = "$enconding/In/$utf8::In{$1}"; + } + } else { + $file =~ s#^(Is|To)([A-Z].*)#$1/$2#; + } $list ||= eval { $caller->$type(); } || do "$file.pl" || do "$encoding/$file.pl" diff --git a/pod/perlunicode.pod b/pod/perlunicode.pod index 8ddcdd2..12bee5c 100644 --- a/pod/perlunicode.pod +++ b/pod/perlunicode.pod @@ -158,9 +158,12 @@ Named Unicode properties and block ranges make be used as character classes via the new C<\p{}> (matches property) and C<\P{}> (doesn't match property) constructs. For instance, C<\p{Lu}> matches any character with the Unicode uppercase property, while C<\p{M}> matches -any mark character. Single letter properties may omit the brackets, so -that can be written C<\pM> also. Many predefined character classes are -available, such as C<\p{IsMirrored}> and C<\p{InTibetan}>. +any mark character. Single letter properties may omit the brackets, +so that can be written C<\pM> also. Many predefined character classes +are available, such as C<\p{IsMirrored}> and C<\p{InTibetan}>. The +names of the C classes are the official Unicode block names but +with all non-alphanumeric characters removed, for example the block +name C<"Latin-1 Supplement"> becomes C<\p{InLatin1Supplement}>. =item *