Add a level of indirection to the implementation of \p{InFoo}
Jarkko Hietaniemi [Sat, 28 Apr 2001 17:18:26 +0000 (17:18 +0000)]
so that we don't have to have long filenames.  (Nothing changes
in the user interface.)   The indirection is defined in
the file lib/unicode/In.pl and it is handled in lib/utf8_heavy.pl.
Also rename some the character classes by removing '-' from
the classnames, and finally renamed Block.pl as Blocks.pl.

p4raw-id: //depot/perl@9897

103 files changed:
MANIFEST
lib/unicode/Blocks.pl [new file with mode: 0644]
lib/unicode/In.pl [new file with mode: 0644]
lib/unicode/In/0.pl [moved from lib/unicode/In/BasicLatin.pl with 100% similarity]
lib/unicode/In/1.pl [moved from lib/unicode/In/Latin-1Supplement.pl with 100% similarity]
lib/unicode/In/10.pl [moved from lib/unicode/In/Hebrew.pl with 100% similarity]
lib/unicode/In/11.pl [moved from lib/unicode/In/Arabic.pl with 100% similarity]
lib/unicode/In/12.pl [moved from lib/unicode/In/Syriac.pl with 100% similarity]
lib/unicode/In/13.pl [moved from lib/unicode/In/Thaana.pl with 100% similarity]
lib/unicode/In/14.pl [moved from lib/unicode/In/Devanagari.pl with 100% similarity]
lib/unicode/In/15.pl [moved from lib/unicode/In/Bengali.pl with 100% similarity]
lib/unicode/In/16.pl [moved from lib/unicode/In/Gurmukhi.pl with 100% similarity]
lib/unicode/In/17.pl [moved from lib/unicode/In/Gujarati.pl with 100% similarity]
lib/unicode/In/18.pl [moved from lib/unicode/In/Oriya.pl with 100% similarity]
lib/unicode/In/19.pl [moved from lib/unicode/In/Tamil.pl with 100% similarity]
lib/unicode/In/2.pl [moved from lib/unicode/In/LatinExtended-A.pl with 100% similarity]
lib/unicode/In/20.pl [moved from lib/unicode/In/Telugu.pl with 100% similarity]
lib/unicode/In/21.pl [moved from lib/unicode/In/Kannada.pl with 100% similarity]
lib/unicode/In/22.pl [moved from lib/unicode/In/Malayalam.pl with 100% similarity]
lib/unicode/In/23.pl [moved from lib/unicode/In/Sinhala.pl with 100% similarity]
lib/unicode/In/24.pl [moved from lib/unicode/In/Thai.pl with 100% similarity]
lib/unicode/In/25.pl [moved from lib/unicode/In/Lao.pl with 100% similarity]
lib/unicode/In/26.pl [moved from lib/unicode/In/Tibetan.pl with 100% similarity]
lib/unicode/In/27.pl [moved from lib/unicode/In/Myanmar.pl with 100% similarity]
lib/unicode/In/28.pl [moved from lib/unicode/In/Georgian.pl with 100% similarity]
lib/unicode/In/29.pl [moved from lib/unicode/In/HangulJamo.pl with 100% similarity]
lib/unicode/In/3.pl [moved from lib/unicode/In/LatinExtended-B.pl with 100% similarity]
lib/unicode/In/30.pl [moved from lib/unicode/In/Ethiopic.pl with 100% similarity]
lib/unicode/In/31.pl [moved from lib/unicode/In/Cherokee.pl with 100% similarity]
lib/unicode/In/32.pl [moved from lib/unicode/In/UnifiedCanadianAboriginalSyllabics.pl with 100% similarity]
lib/unicode/In/33.pl [moved from lib/unicode/In/Ogham.pl with 100% similarity]
lib/unicode/In/34.pl [moved from lib/unicode/In/Runic.pl with 100% similarity]
lib/unicode/In/35.pl [moved from lib/unicode/In/Khmer.pl with 100% similarity]
lib/unicode/In/36.pl [moved from lib/unicode/In/Mongolian.pl with 100% similarity]
lib/unicode/In/37.pl [moved from lib/unicode/In/LatinExtendedAdditional.pl with 100% similarity]
lib/unicode/In/38.pl [moved from lib/unicode/In/GreekExtended.pl with 100% similarity]
lib/unicode/In/39.pl [moved from lib/unicode/In/GeneralPunctuation.pl with 100% similarity]
lib/unicode/In/4.pl [moved from lib/unicode/In/IPAExtensions.pl with 100% similarity]
lib/unicode/In/40.pl [moved from lib/unicode/In/SuperscriptsandSubscripts.pl with 100% similarity]
lib/unicode/In/41.pl [moved from lib/unicode/In/CurrencySymbols.pl with 100% similarity]
lib/unicode/In/42.pl [moved from lib/unicode/In/CombiningMarksforSymbols.pl with 100% similarity]
lib/unicode/In/43.pl [moved from lib/unicode/In/LetterlikeSymbols.pl with 100% similarity]
lib/unicode/In/44.pl [moved from lib/unicode/In/NumberForms.pl with 100% similarity]
lib/unicode/In/45.pl [moved from lib/unicode/In/Arrows.pl with 100% similarity]
lib/unicode/In/46.pl [moved from lib/unicode/In/MathematicalOperators.pl with 100% similarity]
lib/unicode/In/47.pl [moved from lib/unicode/In/MiscellaneousTechnical.pl with 100% similarity]
lib/unicode/In/48.pl [moved from lib/unicode/In/ControlPictures.pl with 100% similarity]
lib/unicode/In/49.pl [moved from lib/unicode/In/OpticalCharacterRecognition.pl with 100% similarity]
lib/unicode/In/5.pl [moved from lib/unicode/In/SpacingModifierLetters.pl with 100% similarity]
lib/unicode/In/50.pl [moved from lib/unicode/In/EnclosedAlphanumerics.pl with 100% similarity]
lib/unicode/In/51.pl [moved from lib/unicode/In/BoxDrawing.pl with 100% similarity]
lib/unicode/In/52.pl [moved from lib/unicode/In/BlockElements.pl with 100% similarity]
lib/unicode/In/53.pl [moved from lib/unicode/In/GeometricShapes.pl with 100% similarity]
lib/unicode/In/54.pl [moved from lib/unicode/In/MiscellaneousSymbols.pl with 100% similarity]
lib/unicode/In/55.pl [moved from lib/unicode/In/Dingbats.pl with 100% similarity]
lib/unicode/In/56.pl [moved from lib/unicode/In/BraillePatterns.pl with 100% similarity]
lib/unicode/In/57.pl [moved from lib/unicode/In/CJKRadicalsSupplement.pl with 100% similarity]
lib/unicode/In/58.pl [moved from lib/unicode/In/KangxiRadicals.pl with 100% similarity]
lib/unicode/In/59.pl [moved from lib/unicode/In/IdeographicDescriptionCharacters.pl with 100% similarity]
lib/unicode/In/6.pl [moved from lib/unicode/In/CombiningDiacriticalMarks.pl with 100% similarity]
lib/unicode/In/60.pl [moved from lib/unicode/In/CJKSymbolsandPunctuation.pl with 100% similarity]
lib/unicode/In/61.pl [moved from lib/unicode/In/Hiragana.pl with 100% similarity]
lib/unicode/In/62.pl [moved from lib/unicode/In/Katakana.pl with 100% similarity]
lib/unicode/In/63.pl [moved from lib/unicode/In/Bopomofo.pl with 100% similarity]
lib/unicode/In/64.pl [moved from lib/unicode/In/HangulCompatibilityJamo.pl with 100% similarity]
lib/unicode/In/65.pl [moved from lib/unicode/In/Kanbun.pl with 100% similarity]
lib/unicode/In/66.pl [moved from lib/unicode/In/BopomofoExtended.pl with 100% similarity]
lib/unicode/In/67.pl [moved from lib/unicode/In/EnclosedCJKLettersandMonths.pl with 100% similarity]
lib/unicode/In/68.pl [moved from lib/unicode/In/CJKCompatibility.pl with 100% similarity]
lib/unicode/In/69.pl [moved from lib/unicode/In/CJKUnifiedIdeographsExtensionA.pl with 100% similarity]
lib/unicode/In/7.pl [moved from lib/unicode/In/Greek.pl with 100% similarity]
lib/unicode/In/70.pl [moved from lib/unicode/In/CJKUnifiedIdeographs.pl with 100% similarity]
lib/unicode/In/71.pl [moved from lib/unicode/In/YiSyllables.pl with 100% similarity]
lib/unicode/In/72.pl [moved from lib/unicode/In/YiRadicals.pl with 100% similarity]
lib/unicode/In/73.pl [moved from lib/unicode/In/HangulSyllables.pl with 100% similarity]
lib/unicode/In/74.pl [moved from lib/unicode/In/HighSurrogates.pl with 100% similarity]
lib/unicode/In/75.pl [moved from lib/unicode/In/HighPrivateUseSurrogates.pl with 100% similarity]
lib/unicode/In/76.pl [moved from lib/unicode/In/LowSurrogates.pl with 100% similarity]
lib/unicode/In/77.pl [copied from lib/unicode/Block.pl with 92% similarity]
lib/unicode/In/78.pl [moved from lib/unicode/In/CJKCompatibilityIdeographs.pl with 100% similarity]
lib/unicode/In/79.pl [moved from lib/unicode/In/AlphabeticPresentationForms.pl with 100% similarity]
lib/unicode/In/8.pl [moved from lib/unicode/In/Cyrillic.pl with 100% similarity]
lib/unicode/In/80.pl [moved from lib/unicode/In/ArabicPresentationForms-A.pl with 100% similarity]
lib/unicode/In/81.pl [moved from lib/unicode/In/CombiningHalfMarks.pl with 100% similarity]
lib/unicode/In/82.pl [moved from lib/unicode/In/CJKCompatibilityForms.pl with 100% similarity]
lib/unicode/In/83.pl [moved from lib/unicode/In/SmallFormVariants.pl with 100% similarity]
lib/unicode/In/84.pl [moved from lib/unicode/In/ArabicPresentationForms-B.pl with 100% similarity]
lib/unicode/In/85.pl [moved from lib/unicode/In/Specials.pl with 100% similarity]
lib/unicode/In/86.pl [moved from lib/unicode/In/HalfwidthandFullwidthForms.pl with 100% similarity]
lib/unicode/In/87.pl [copied from lib/unicode/Block.pl with 93% similarity]
lib/unicode/In/88.pl [copied from lib/unicode/Block.pl with 93% similarity]
lib/unicode/In/89.pl [copied from lib/unicode/Block.pl with 93% similarity]
lib/unicode/In/9.pl [moved from lib/unicode/In/Armenian.pl with 100% similarity]
lib/unicode/In/90.pl [copied from lib/unicode/Block.pl with 93% similarity]
lib/unicode/In/91.pl [copied from lib/unicode/Block.pl with 93% similarity]
lib/unicode/In/92.pl [copied from lib/unicode/Block.pl with 93% similarity]
lib/unicode/In/93.pl [copied from lib/unicode/Block.pl with 93% similarity]
lib/unicode/In/94.pl [copied from lib/unicode/Block.pl with 93% similarity]
lib/unicode/In/95.pl [moved from lib/unicode/Block.pl with 93% similarity]
lib/unicode/In/PrivateUse.pl [deleted file]
lib/unicode/mktables.PL
lib/utf8_heavy.pl
pod/perlunicode.pod

index 33e69eb..988302e 100644 (file)
--- a/MANIFEST
+++ b/MANIFEST
@@ -888,7 +888,7 @@ lib/unicode/ArabLnkGrp.pl                   Unicode character database
 lib/unicode/ArabShap.txt                       Unicode character database
 lib/unicode/BidiMirr.txt                       Unicode character database
 lib/unicode/Bidirectional.pl                   Unicode character database
-lib/unicode/Block.pl                           Unicode character database
+lib/unicode/Blocks.pl                          Unicode character database
 lib/unicode/Blocks.txt                         Unicode character database
 lib/unicode/CaseFold.txt                       Unicode character database
 lib/unicode/Category.pl                                Unicode character database
@@ -896,93 +896,103 @@ lib/unicode/CombiningClass.pl                    Unicode character database
 lib/unicode/CompExcl.txt                       Unicode character database
 lib/unicode/Decomposition.pl                   Unicode character database
 lib/unicode/EAWidth.txt                                Unicode character database
-lib/unicode/In/AlphabeticPresentationForms.pl  Unicode character database
-lib/unicode/In/Arabic.pl                       Unicode character database
-lib/unicode/In/ArabicPresentationForms-A.pl    Unicode character database
-lib/unicode/In/ArabicPresentationForms-B.pl    Unicode character database
-lib/unicode/In/Armenian.pl                     Unicode character database
-lib/unicode/In/Arrows.pl                       Unicode character database
-lib/unicode/In/BasicLatin.pl                   Unicode character database
-lib/unicode/In/Bengali.pl                      Unicode character database
-lib/unicode/In/BlockElements.pl                        Unicode character database
-lib/unicode/In/Bopomofo.pl                     Unicode character database
-lib/unicode/In/BopomofoExtended.pl             Unicode character database
-lib/unicode/In/BoxDrawing.pl                   Unicode character database
-lib/unicode/In/BraillePatterns.pl              Unicode character database
-lib/unicode/In/CJKCompatibility.pl             Unicode character database
-lib/unicode/In/CJKCompatibilityForms.pl                Unicode character database
-lib/unicode/In/CJKCompatibilityIdeographs.pl   Unicode character database
-lib/unicode/In/CJKRadicalsSupplement.pl                Unicode character database
-lib/unicode/In/CJKSymbolsandPunctuation.pl     Unicode character database
-lib/unicode/In/CJKUnifiedIdeographs.pl         Unicode character database
-lib/unicode/In/CJKUnifiedIdeographsExtensionA.pl       Unicode character database
-lib/unicode/In/Cherokee.pl                     Unicode character database
-lib/unicode/In/CombiningDiacriticalMarks.pl    Unicode character database
-lib/unicode/In/CombiningHalfMarks.pl           Unicode character database
-lib/unicode/In/CombiningMarksforSymbols.pl     Unicode character database
-lib/unicode/In/ControlPictures.pl              Unicode character database
-lib/unicode/In/CurrencySymbols.pl              Unicode character database
-lib/unicode/In/Cyrillic.pl                     Unicode character database
-lib/unicode/In/Devanagari.pl                   Unicode character database
-lib/unicode/In/Dingbats.pl                     Unicode character database
-lib/unicode/In/EnclosedAlphanumerics.pl                Unicode character database
-lib/unicode/In/EnclosedCJKLettersandMonths.pl  Unicode character database
-lib/unicode/In/Ethiopic.pl                     Unicode character database
-lib/unicode/In/GeneralPunctuation.pl           Unicode character database
-lib/unicode/In/GeometricShapes.pl              Unicode character database
-lib/unicode/In/Georgian.pl                     Unicode character database
-lib/unicode/In/Greek.pl                                Unicode character database
-lib/unicode/In/GreekExtended.pl                        Unicode character database
-lib/unicode/In/Gujarati.pl                     Unicode character database
-lib/unicode/In/Gurmukhi.pl                     Unicode character database
-lib/unicode/In/HalfwidthandFullwidthForms.pl   Unicode character database
-lib/unicode/In/HangulCompatibilityJamo.pl      Unicode character database
-lib/unicode/In/HangulJamo.pl                   Unicode character database
-lib/unicode/In/HangulSyllables.pl              Unicode character database
-lib/unicode/In/Hebrew.pl                       Unicode character database
-lib/unicode/In/HighPrivateUseSurrogates.pl     Unicode character database
-lib/unicode/In/HighSurrogates.pl               Unicode character database
-lib/unicode/In/Hiragana.pl                     Unicode character database
-lib/unicode/In/IPAExtensions.pl                        Unicode character database
-lib/unicode/In/IdeographicDescriptionCharacters.pl     Unicode character database
-lib/unicode/In/Kanbun.pl                       Unicode character database
-lib/unicode/In/KangxiRadicals.pl               Unicode character database
-lib/unicode/In/Kannada.pl                      Unicode character database
-lib/unicode/In/Katakana.pl                     Unicode character database
-lib/unicode/In/Khmer.pl                                Unicode character database
-lib/unicode/In/Lao.pl                          Unicode character database
-lib/unicode/In/Latin-1Supplement.pl            Unicode character database
-lib/unicode/In/LatinExtended-A.pl              Unicode character database
-lib/unicode/In/LatinExtended-B.pl              Unicode character database
-lib/unicode/In/LatinExtendedAdditional.pl      Unicode character database
-lib/unicode/In/LetterlikeSymbols.pl            Unicode character database
-lib/unicode/In/LowSurrogates.pl                        Unicode character database
-lib/unicode/In/Malayalam.pl                    Unicode character database
-lib/unicode/In/MathematicalOperators.pl                Unicode character database
-lib/unicode/In/MiscellaneousSymbols.pl         Unicode character database
-lib/unicode/In/MiscellaneousTechnical.pl       Unicode character database
-lib/unicode/In/Mongolian.pl                    Unicode character database
-lib/unicode/In/Myanmar.pl                      Unicode character database
-lib/unicode/In/NumberForms.pl                  Unicode character database
-lib/unicode/In/Ogham.pl                                Unicode character database
-lib/unicode/In/OpticalCharacterRecognition.pl  Unicode character database
-lib/unicode/In/Oriya.pl                                Unicode character database
-lib/unicode/In/PrivateUse.pl                   Unicode character database
-lib/unicode/In/Runic.pl                                Unicode character database
-lib/unicode/In/Sinhala.pl                      Unicode character database
-lib/unicode/In/SmallFormVariants.pl            Unicode character database
-lib/unicode/In/SpacingModifierLetters.pl       Unicode character database
-lib/unicode/In/Specials.pl                     Unicode character database
-lib/unicode/In/SuperscriptsandSubscripts.pl    Unicode character database
-lib/unicode/In/Syriac.pl                       Unicode character database
-lib/unicode/In/Tamil.pl                                Unicode character database
-lib/unicode/In/Telugu.pl                       Unicode character database
-lib/unicode/In/Thaana.pl                       Unicode character database
-lib/unicode/In/Thai.pl                         Unicode character database
-lib/unicode/In/Tibetan.pl                      Unicode character database
-lib/unicode/In/UnifiedCanadianAboriginalSyllabics.pl   Unicode character database
-lib/unicode/In/YiRadicals.pl                   Unicode character database
-lib/unicode/In/YiSyllables.pl                  Unicode character database
+lib/unicode/In.pl                              Unicode character database
+lib/unicode/In/0.pl                            Unicode character database
+lib/unicode/In/1.pl                            Unicode character database
+lib/unicode/In/2.pl                            Unicode character database
+lib/unicode/In/3.pl                            Unicode character database
+lib/unicode/In/4.pl                            Unicode character database
+lib/unicode/In/5.pl                            Unicode character database
+lib/unicode/In/6.pl                            Unicode character database
+lib/unicode/In/7.pl                            Unicode character database
+lib/unicode/In/8.pl                            Unicode character database
+lib/unicode/In/9.pl                            Unicode character database
+lib/unicode/In/10.pl                           Unicode character database
+lib/unicode/In/11.pl                           Unicode character database
+lib/unicode/In/12.pl                           Unicode character database
+lib/unicode/In/13.pl                           Unicode character database
+lib/unicode/In/14.pl                           Unicode character database
+lib/unicode/In/15.pl                           Unicode character database
+lib/unicode/In/16.pl                           Unicode character database
+lib/unicode/In/17.pl                           Unicode character database
+lib/unicode/In/18.pl                           Unicode character database
+lib/unicode/In/19.pl                           Unicode character database
+lib/unicode/In/20.pl                           Unicode character database
+lib/unicode/In/21.pl                           Unicode character database
+lib/unicode/In/22.pl                           Unicode character database
+lib/unicode/In/23.pl                           Unicode character database
+lib/unicode/In/24.pl                           Unicode character database
+lib/unicode/In/25.pl                           Unicode character database
+lib/unicode/In/26.pl                           Unicode character database
+lib/unicode/In/27.pl                           Unicode character database
+lib/unicode/In/28.pl                           Unicode character database
+lib/unicode/In/29.pl                           Unicode character database
+lib/unicode/In/30.pl                           Unicode character database
+lib/unicode/In/31.pl                           Unicode character database
+lib/unicode/In/32.pl                           Unicode character database
+lib/unicode/In/33.pl                           Unicode character database
+lib/unicode/In/34.pl                           Unicode character database
+lib/unicode/In/35.pl                           Unicode character database
+lib/unicode/In/36.pl                           Unicode character database
+lib/unicode/In/37.pl                           Unicode character database
+lib/unicode/In/38.pl                           Unicode character database
+lib/unicode/In/39.pl                           Unicode character database
+lib/unicode/In/40.pl                           Unicode character database
+lib/unicode/In/41.pl                           Unicode character database
+lib/unicode/In/42.pl                           Unicode character database
+lib/unicode/In/43.pl                           Unicode character database
+lib/unicode/In/44.pl                           Unicode character database
+lib/unicode/In/45.pl                           Unicode character database
+lib/unicode/In/46.pl                           Unicode character database
+lib/unicode/In/47.pl                           Unicode character database
+lib/unicode/In/48.pl                           Unicode character database
+lib/unicode/In/49.pl                           Unicode character database
+lib/unicode/In/50.pl                           Unicode character database
+lib/unicode/In/51.pl                           Unicode character database
+lib/unicode/In/52.pl                           Unicode character database
+lib/unicode/In/53.pl                           Unicode character database
+lib/unicode/In/54.pl                           Unicode character database
+lib/unicode/In/55.pl                           Unicode character database
+lib/unicode/In/56.pl                           Unicode character database
+lib/unicode/In/57.pl                           Unicode character database
+lib/unicode/In/58.pl                           Unicode character database
+lib/unicode/In/59.pl                           Unicode character database
+lib/unicode/In/60.pl                           Unicode character database
+lib/unicode/In/61.pl                           Unicode character database
+lib/unicode/In/62.pl                           Unicode character database
+lib/unicode/In/63.pl                           Unicode character database
+lib/unicode/In/64.pl                           Unicode character database
+lib/unicode/In/65.pl                           Unicode character database
+lib/unicode/In/66.pl                           Unicode character database
+lib/unicode/In/67.pl                           Unicode character database
+lib/unicode/In/68.pl                           Unicode character database
+lib/unicode/In/69.pl                           Unicode character database
+lib/unicode/In/70.pl                           Unicode character database
+lib/unicode/In/71.pl                           Unicode character database
+lib/unicode/In/72.pl                           Unicode character database
+lib/unicode/In/73.pl                           Unicode character database
+lib/unicode/In/74.pl                           Unicode character database
+lib/unicode/In/75.pl                           Unicode character database
+lib/unicode/In/76.pl                           Unicode character database
+lib/unicode/In/77.pl                           Unicode character database
+lib/unicode/In/78.pl                           Unicode character database
+lib/unicode/In/79.pl                           Unicode character database
+lib/unicode/In/80.pl                           Unicode character database
+lib/unicode/In/81.pl                           Unicode character database
+lib/unicode/In/82.pl                           Unicode character database
+lib/unicode/In/83.pl                           Unicode character database
+lib/unicode/In/84.pl                           Unicode character database
+lib/unicode/In/85.pl                           Unicode character database
+lib/unicode/In/86.pl                           Unicode character database
+lib/unicode/In/87.pl                           Unicode character database
+lib/unicode/In/88.pl                           Unicode character database
+lib/unicode/In/89.pl                           Unicode character database
+lib/unicode/In/90.pl                           Unicode character database
+lib/unicode/In/91.pl                           Unicode character database
+lib/unicode/In/92.pl                           Unicode character database
+lib/unicode/In/93.pl                           Unicode character database
+lib/unicode/In/94.pl                           Unicode character database
+lib/unicode/In/95.pl                           Unicode character database
 lib/unicode/Index.txt                          Unicode character database
 lib/unicode/Is/ASCII.pl                                Unicode character database
 lib/unicode/Is/Alnum.pl                                Unicode character database
diff --git a/lib/unicode/Blocks.pl b/lib/unicode/Blocks.pl
new file mode 100644 (file)
index 0000000..ef60058
--- /dev/null
@@ -0,0 +1,203 @@
+# !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!! 
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+0000   007F    Basic Latin
+# In/0.pl BasicLatin
+0080   00FF    Latin-1 Supplement
+# In/1.pl Latin1Supplement
+0100   017F    Latin Extended-A
+# In/2.pl LatinExtendedA
+0180   024F    Latin Extended-B
+# In/3.pl LatinExtendedB
+0250   02AF    IPA Extensions
+# In/4.pl IPAExtensions
+02B0   02FF    Spacing Modifier Letters
+# In/5.pl SpacingModifierLetters
+0300   036F    Combining Diacritical Marks
+# In/6.pl CombiningDiacriticalMarks
+0370   03FF    Greek
+# In/7.pl Greek
+0400   04FF    Cyrillic
+# In/8.pl Cyrillic
+0530   058F    Armenian
+# In/9.pl Armenian
+0590   05FF    Hebrew
+# In/10.pl Hebrew
+0600   06FF    Arabic
+# In/11.pl Arabic
+0700   074F    Syriac  
+# In/12.pl Syriac
+0780   07BF    Thaana
+# In/13.pl Thaana
+0900   097F    Devanagari
+# In/14.pl Devanagari
+0980   09FF    Bengali
+# In/15.pl Bengali
+0A00   0A7F    Gurmukhi
+# In/16.pl Gurmukhi
+0A80   0AFF    Gujarati
+# In/17.pl Gujarati
+0B00   0B7F    Oriya
+# In/18.pl Oriya
+0B80   0BFF    Tamil
+# In/19.pl Tamil
+0C00   0C7F    Telugu
+# In/20.pl Telugu
+0C80   0CFF    Kannada
+# In/21.pl Kannada
+0D00   0D7F    Malayalam
+# In/22.pl Malayalam
+0D80   0DFF    Sinhala
+# In/23.pl Sinhala
+0E00   0E7F    Thai
+# In/24.pl Thai
+0E80   0EFF    Lao
+# In/25.pl Lao
+0F00   0FFF    Tibetan
+# In/26.pl Tibetan
+1000   109F    Myanmar 
+# In/27.pl Myanmar
+10A0   10FF    Georgian
+# In/28.pl Georgian
+1100   11FF    Hangul Jamo
+# In/29.pl HangulJamo
+1200   137F    Ethiopic
+# In/30.pl Ethiopic
+13A0   13FF    Cherokee
+# In/31.pl Cherokee
+1400   167F    Unified Canadian Aboriginal Syllabics
+# In/32.pl UnifiedCanadianAboriginalSyllabics
+1680   169F    Ogham
+# In/33.pl Ogham
+16A0   16FF    Runic
+# In/34.pl Runic
+1780   17FF    Khmer
+# In/35.pl Khmer
+1800   18AF    Mongolian
+# In/36.pl Mongolian
+1E00   1EFF    Latin Extended Additional
+# In/37.pl LatinExtendedAdditional
+1F00   1FFF    Greek Extended
+# In/38.pl GreekExtended
+2000   206F    General Punctuation
+# In/39.pl GeneralPunctuation
+2070   209F    Superscripts and Subscripts
+# In/40.pl SuperscriptsandSubscripts
+20A0   20CF    Currency Symbols
+# In/41.pl CurrencySymbols
+20D0   20FF    Combining Marks for Symbols
+# In/42.pl CombiningMarksforSymbols
+2100   214F    Letterlike Symbols
+# In/43.pl LetterlikeSymbols
+2150   218F    Number Forms
+# In/44.pl NumberForms
+2190   21FF    Arrows
+# In/45.pl Arrows
+2200   22FF    Mathematical Operators
+# In/46.pl MathematicalOperators
+2300   23FF    Miscellaneous Technical
+# In/47.pl MiscellaneousTechnical
+2400   243F    Control Pictures
+# In/48.pl ControlPictures
+2440   245F    Optical Character Recognition
+# In/49.pl OpticalCharacterRecognition
+2460   24FF    Enclosed Alphanumerics
+# In/50.pl EnclosedAlphanumerics
+2500   257F    Box Drawing
+# In/51.pl BoxDrawing
+2580   259F    Block Elements
+# In/52.pl BlockElements
+25A0   25FF    Geometric Shapes
+# In/53.pl GeometricShapes
+2600   26FF    Miscellaneous Symbols
+# In/54.pl MiscellaneousSymbols
+2700   27BF    Dingbats
+# In/55.pl Dingbats
+2800   28FF    Braille Patterns
+# In/56.pl BraillePatterns
+2E80   2EFF    CJK Radicals Supplement
+# In/57.pl CJKRadicalsSupplement
+2F00   2FDF    Kangxi Radicals
+# In/58.pl KangxiRadicals
+2FF0   2FFF    Ideographic Description Characters
+# In/59.pl IdeographicDescriptionCharacters
+3000   303F    CJK Symbols and Punctuation
+# In/60.pl CJKSymbolsandPunctuation
+3040   309F    Hiragana
+# In/61.pl Hiragana
+30A0   30FF    Katakana
+# In/62.pl Katakana
+3100   312F    Bopomofo
+# In/63.pl Bopomofo
+3130   318F    Hangul Compatibility Jamo
+# In/64.pl HangulCompatibilityJamo
+3190   319F    Kanbun
+# In/65.pl Kanbun
+31A0   31BF    Bopomofo Extended
+# In/66.pl BopomofoExtended
+3200   32FF    Enclosed CJK Letters and Months
+# In/67.pl EnclosedCJKLettersandMonths
+3300   33FF    CJK Compatibility
+# In/68.pl CJKCompatibility
+3400   4DB5    CJK Unified Ideographs Extension A
+# In/69.pl CJKUnifiedIdeographsExtensionA
+4E00   9FFF    CJK Unified Ideographs
+# In/70.pl CJKUnifiedIdeographs
+A000   A48F    Yi Syllables
+# In/71.pl YiSyllables
+A490   A4CF    Yi Radicals
+# In/72.pl YiRadicals
+AC00   D7A3    Hangul Syllables
+# In/73.pl HangulSyllables
+D800   DB7F    High Surrogates
+# In/74.pl HighSurrogates
+DB80   DBFF    High Private Use Surrogates
+# In/75.pl HighPrivateUseSurrogates
+DC00   DFFF    Low Surrogates
+# In/76.pl LowSurrogates
+E000   F8FF    Private Use
+# In/77.pl PrivateUse
+F900   FAFF    CJK Compatibility Ideographs
+# In/78.pl CJKCompatibilityIdeographs
+FB00   FB4F    Alphabetic Presentation Forms
+# In/79.pl AlphabeticPresentationForms
+FB50   FDFF    Arabic Presentation Forms-A
+# In/80.pl ArabicPresentationFormsA
+FE20   FE2F    Combining Half Marks
+# In/81.pl CombiningHalfMarks
+FE30   FE4F    CJK Compatibility Forms
+# In/82.pl CJKCompatibilityForms
+FE50   FE6F    Small Form Variants
+# In/83.pl SmallFormVariants
+FE70   FEFE    Arabic Presentation Forms-B
+# In/84.pl ArabicPresentationFormsB
+FEFF   FEFF    Specials
+# In/85.pl Specials
+FF00   FFEF    Halfwidth and Fullwidth Forms
+# In/86.pl HalfwidthandFullwidthForms
+FFF0   FFFD    Specials
+# In/85.pl Specials
+10300  1032F   Old Italic
+# In/87.pl OldItalic
+10330  1034F   Gothic
+# In/88.pl Gothic
+10400  1044F   Deseret
+# In/89.pl Deseret
+1D000  1D0FF   Byzantine Musical Symbols
+# In/90.pl ByzantineMusicalSymbols
+1D100  1D1FF   Musical Symbols
+# In/91.pl MusicalSymbols
+1D400  1D7FF   Mathematical Alphanumeric Symbols
+# In/92.pl MathematicalAlphanumericSymbols
+20000  2A6D6   CJK Unified Ideographs Extension B
+# In/93.pl CJKUnifiedIdeographsExtensionB
+2F800  2FA1F   CJK Compatibility Ideographs Supplement
+# In/94.pl CJKCompatibilityIdeographsSupplement
+E0000  E007F   Tags
+# In/95.pl Tags
+F0000  FFFFD   Private Use
+# In/77.pl PrivateUse
+100000 10FFFD  Private Use
+# In/77.pl PrivateUse
+END
diff --git a/lib/unicode/In.pl b/lib/unicode/In.pl
new file mode 100644 (file)
index 0000000..eefec27
--- /dev/null
@@ -0,0 +1,101 @@
+# !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!! 
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+%utf8::In = (
+'BasicLatin' => 0,
+'Latin1Supplement' => 1,
+'Hebrew' => 10,
+'Arabic' => 11,
+'Syriac' => 12,
+'Thaana' => 13,
+'Devanagari' => 14,
+'Bengali' => 15,
+'Gurmukhi' => 16,
+'Gujarati' => 17,
+'Oriya' => 18,
+'Tamil' => 19,
+'LatinExtendedA' => 2,
+'Telugu' => 20,
+'Kannada' => 21,
+'Malayalam' => 22,
+'Sinhala' => 23,
+'Thai' => 24,
+'Lao' => 25,
+'Tibetan' => 26,
+'Myanmar' => 27,
+'Georgian' => 28,
+'HangulJamo' => 29,
+'LatinExtendedB' => 3,
+'Ethiopic' => 30,
+'Cherokee' => 31,
+'UnifiedCanadianAboriginalSyllabics' => 32,
+'Ogham' => 33,
+'Runic' => 34,
+'Khmer' => 35,
+'Mongolian' => 36,
+'LatinExtendedAdditional' => 37,
+'GreekExtended' => 38,
+'GeneralPunctuation' => 39,
+'IPAExtensions' => 4,
+'SuperscriptsandSubscripts' => 40,
+'CurrencySymbols' => 41,
+'CombiningMarksforSymbols' => 42,
+'LetterlikeSymbols' => 43,
+'NumberForms' => 44,
+'Arrows' => 45,
+'MathematicalOperators' => 46,
+'MiscellaneousTechnical' => 47,
+'ControlPictures' => 48,
+'OpticalCharacterRecognition' => 49,
+'SpacingModifierLetters' => 5,
+'EnclosedAlphanumerics' => 50,
+'BoxDrawing' => 51,
+'BlockElements' => 52,
+'GeometricShapes' => 53,
+'MiscellaneousSymbols' => 54,
+'Dingbats' => 55,
+'BraillePatterns' => 56,
+'CJKRadicalsSupplement' => 57,
+'KangxiRadicals' => 58,
+'IdeographicDescriptionCharacters' => 59,
+'CombiningDiacriticalMarks' => 6,
+'CJKSymbolsandPunctuation' => 60,
+'Hiragana' => 61,
+'Katakana' => 62,
+'Bopomofo' => 63,
+'HangulCompatibilityJamo' => 64,
+'Kanbun' => 65,
+'BopomofoExtended' => 66,
+'EnclosedCJKLettersandMonths' => 67,
+'CJKCompatibility' => 68,
+'CJKUnifiedIdeographsExtensionA' => 69,
+'Greek' => 7,
+'CJKUnifiedIdeographs' => 70,
+'YiSyllables' => 71,
+'YiRadicals' => 72,
+'HangulSyllables' => 73,
+'HighSurrogates' => 74,
+'HighPrivateUseSurrogates' => 75,
+'LowSurrogates' => 76,
+'PrivateUse' => 77,
+'CJKCompatibilityIdeographs' => 78,
+'AlphabeticPresentationForms' => 79,
+'Cyrillic' => 8,
+'ArabicPresentationFormsA' => 80,
+'CombiningHalfMarks' => 81,
+'CJKCompatibilityForms' => 82,
+'SmallFormVariants' => 83,
+'ArabicPresentationFormsB' => 84,
+'Specials' => 85,
+'HalfwidthandFullwidthForms' => 86,
+'OldItalic' => 87,
+'Gothic' => 88,
+'Deseret' => 89,
+'Armenian' => 9,
+'ByzantineMusicalSymbols' => 90,
+'MusicalSymbols' => 91,
+'MathematicalAlphanumericSymbols' => 92,
+'CJKUnifiedIdeographsExtensionB' => 93,
+'CJKCompatibilityIdeographsSupplement' => 94,
+'Tags' => 95,
+);
similarity index 100%
rename from lib/unicode/In/Oriya.pl
rename to lib/unicode/In/18.pl
similarity index 100%
rename from lib/unicode/In/Tamil.pl
rename to lib/unicode/In/19.pl
similarity index 100%
rename from lib/unicode/In/Thai.pl
rename to lib/unicode/In/24.pl
similarity index 100%
rename from lib/unicode/In/Lao.pl
rename to lib/unicode/In/25.pl
similarity index 100%
rename from lib/unicode/In/Ogham.pl
rename to lib/unicode/In/33.pl
similarity index 100%
rename from lib/unicode/In/Runic.pl
rename to lib/unicode/In/34.pl
similarity index 100%
rename from lib/unicode/In/Khmer.pl
rename to lib/unicode/In/35.pl
similarity index 100%
rename from lib/unicode/In/Greek.pl
rename to lib/unicode/In/7.pl
similarity index 92%
copy from lib/unicode/Block.pl
copy to lib/unicode/In/77.pl
index 272f63f..530166d 100644 (file)
@@ -2,4 +2,5 @@
 # This file is built by mktables.PL from e.g. Unicode.txt.
 # Any changes made here will be lost!
 return <<'END';
+100000 10FFFD
 END
similarity index 93%
copy from lib/unicode/Block.pl
copy to lib/unicode/In/87.pl
index 272f63f..44a5e47 100644 (file)
@@ -2,4 +2,5 @@
 # This file is built by mktables.PL from e.g. Unicode.txt.
 # Any changes made here will be lost!
 return <<'END';
+10300  1032F
 END
similarity index 93%
copy from lib/unicode/Block.pl
copy to lib/unicode/In/88.pl
index 272f63f..8030411 100644 (file)
@@ -2,4 +2,5 @@
 # This file is built by mktables.PL from e.g. Unicode.txt.
 # Any changes made here will be lost!
 return <<'END';
+10330  1034F
 END
similarity index 93%
copy from lib/unicode/Block.pl
copy to lib/unicode/In/89.pl
index 272f63f..d2c50bb 100644 (file)
@@ -2,4 +2,5 @@
 # This file is built by mktables.PL from e.g. Unicode.txt.
 # Any changes made here will be lost!
 return <<'END';
+10400  1044F
 END
similarity index 93%
copy from lib/unicode/Block.pl
copy to lib/unicode/In/90.pl
index 272f63f..f1073c7 100644 (file)
@@ -2,4 +2,5 @@
 # This file is built by mktables.PL from e.g. Unicode.txt.
 # Any changes made here will be lost!
 return <<'END';
+1D000  1D0FF
 END
similarity index 93%
copy from lib/unicode/Block.pl
copy to lib/unicode/In/91.pl
index 272f63f..7435889 100644 (file)
@@ -2,4 +2,5 @@
 # This file is built by mktables.PL from e.g. Unicode.txt.
 # Any changes made here will be lost!
 return <<'END';
+1D100  1D1FF
 END
similarity index 93%
copy from lib/unicode/Block.pl
copy to lib/unicode/In/92.pl
index 272f63f..7e40edc 100644 (file)
@@ -2,4 +2,5 @@
 # This file is built by mktables.PL from e.g. Unicode.txt.
 # Any changes made here will be lost!
 return <<'END';
+1D400  1D7FF
 END
similarity index 93%
copy from lib/unicode/Block.pl
copy to lib/unicode/In/93.pl
index 272f63f..931aec3 100644 (file)
@@ -2,4 +2,5 @@
 # This file is built by mktables.PL from e.g. Unicode.txt.
 # Any changes made here will be lost!
 return <<'END';
+20000  2A6D6
 END
similarity index 93%
copy from lib/unicode/Block.pl
copy to lib/unicode/In/94.pl
index 272f63f..c025148 100644 (file)
@@ -2,4 +2,5 @@
 # This file is built by mktables.PL from e.g. Unicode.txt.
 # Any changes made here will be lost!
 return <<'END';
+2F800  2FA1F
 END
similarity index 93%
rename from lib/unicode/Block.pl
rename to lib/unicode/In/95.pl
index 272f63f..495d2d5 100644 (file)
@@ -2,4 +2,5 @@
 # This file is built by mktables.PL from e.g. Unicode.txt.
 # Any changes made here will be lost!
 return <<'END';
+E0000  E007F
 END
diff --git a/lib/unicode/In/PrivateUse.pl b/lib/unicode/In/PrivateUse.pl
deleted file mode 100644 (file)
index c81b567..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-# !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!! 
-# This file is built by mktables.PL from e.g. Unicode.txt.
-# Any changes made here will be lost!
-return <<'END';
-E000   F8FF
-END
index 8187854..68578b9 100755 (executable)
@@ -231,11 +231,24 @@ mkdir "To", 0755;
 
 # This is not written for speed...
 
+my %InId;
+my $InId = 0;
+
 foreach $file (@todo) {
     my ($table, $wanted, $val) = @$file;
     next if @ARGV and not grep { $_ eq $table } @ARGV;
-    print $table,"\n";
-    if ($table =~ /^(Is|In|To)(.*)/) {
+    print $table, "\n";
+    $table =~ s/\W+//g;
+    if ($table =~ /^In(.+)/) {
+       my $id;
+        unless (exists $InId{$1}) {
+           $InId{$1} = $InId++;
+       }
+       $id = $InId{$1};
+       open(OUT, ">In/$id.pl") or die "Can't create In/$id.pl: $!\n";
+       print OUT "# In/$id.pl $1\n";
+    }
+    elsif ($table =~ /^(Is|To)(.+)/) {
        open(OUT, ">$1/$2.pl") or die "Can't create $1/$2.pl: $!\n";
     }
     else {
@@ -257,9 +270,9 @@ END
 # Must treat blocks specially.
 
 exit if @ARGV and not grep { $_ eq Block } @ARGV;
-print "Block\n";
+print "Blocks\n";
 open(UD, 'Blocks.txt') or die "Can't open Blocks.txt: $!\n";
-open(OUT, ">Block.pl") or die "Can't create Block.pl: $!\n";
+open(OUT, ">Blocks.pl") or die "Can't create Blocks.pl: $!\n";
 print OUT <<EOH;
 # !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!! 
 # This file is built by $0 from e.g. $UnicodeData.
@@ -273,11 +286,17 @@ while (<UD>) {
     next if /^#/;
     next if /^$/;
     chomp;
-    ($code, $last, $name) = /^([0-9a-f]+)\.\.([0-9a-f]); (.+)/i;
+    ($code, $last, $name) = /^([0-9a-f]+)\.\.([0-9a-f]+); (.+)/i;
     if ($name) {
        print OUT "$code        $last   $name\n";
-       $name =~ s/\s+//g;
-       open(BLOCK, ">In/$name.pl");
+       $name =~ s/\W+//g;
+       my $id;
+        unless (exists $InId{$name}) {
+           $InId{$name} = $InId++;
+       }
+       $id = $InId{$name};
+       open(BLOCK, ">In/$id.pl");
+       print OUT "# In/$id.pl $name\n";
        print BLOCK <<EOH;
 # !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!! 
 # This file is built by $0 from e.g. $UnicodeData.
@@ -295,6 +314,24 @@ END2
 print OUT "END\n";
 close OUT;
 
+open(INID, ">In.pl");
+
+print INID <<EOH;
+# !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!! 
+# This file is built by $0 from e.g. $UnicodeData.
+# Any changes made here will be lost!
+%utf8::In = (
+EOH
+
+# Order doesn't matter but let's prettyprint anyway.
+foreach my $in (sort { $InId{$a} cmp $InId{$b} } keys %InId) {
+    print INID "'$in' => $InId{$in},\n";
+}
+
+print INID ");\n";
+
+close(INID);
+
 ##################################################
 
 sub proplist {
index 8649e9e..5637d12 100644 (file)
@@ -26,7 +26,14 @@ sub SWASHNEW {
     while (($caller = caller($i)) eq __PACKAGE__) { $i++ }
     my $encoding = $enc{$caller} || "unicode";
     (my $file = $type) =~ s!::!/!g;
-    $file =~ s#^(I[sn]|To)([A-Z].*)#$1/$2#;
+    if ($file =~ /^In(.+)/) {
+       defined %utf8::In || do "$encoding/In.pl";
+       if (exists $utf8::In{$1}) {
+           $file = "$enconding/In/$utf8::In{$1}";
+       }
+    } else {
+       $file =~ s#^(Is|To)([A-Z].*)#$1/$2#;
+    }
     $list ||= eval { $caller->$type(); }
        || do "$file.pl"
        || do "$encoding/$file.pl"
index 8ddcdd2..12bee5c 100644 (file)
@@ -158,9 +158,12 @@ Named Unicode properties and block ranges make be used as character
 classes via the new C<\p{}> (matches property) and C<\P{}> (doesn't
 match property) constructs.  For instance, C<\p{Lu}> matches any
 character with the Unicode uppercase property, while C<\p{M}> matches
-any mark character.  Single letter properties may omit the brackets, so
-that can be written C<\pM> also.  Many predefined character classes are
-available, such as C<\p{IsMirrored}> and  C<\p{InTibetan}>.
+any mark character.  Single letter properties may omit the brackets,
+so that can be written C<\pM> also.  Many predefined character classes
+are available, such as C<\p{IsMirrored}> and C<\p{InTibetan}>.  The
+names of the C<In> classes are the official Unicode block names but
+with all non-alphanumeric characters removed, for example the block
+name C<"Latin-1 Supplement"> becomes C<\p{InLatin1Supplement}>.
 
 =item *