lib/unicode/ArabShap.txt Unicode character database
lib/unicode/BidiMirr.txt Unicode character database
lib/unicode/Bidirectional.pl Unicode character database
-lib/unicode/Block.pl Unicode character database
+lib/unicode/Blocks.pl Unicode character database
lib/unicode/Blocks.txt Unicode character database
lib/unicode/CaseFold.txt Unicode character database
lib/unicode/Category.pl Unicode character database
lib/unicode/CompExcl.txt Unicode character database
lib/unicode/Decomposition.pl Unicode character database
lib/unicode/EAWidth.txt Unicode character database
-lib/unicode/In/AlphabeticPresentationForms.pl Unicode character database
-lib/unicode/In/Arabic.pl Unicode character database
-lib/unicode/In/ArabicPresentationForms-A.pl Unicode character database
-lib/unicode/In/ArabicPresentationForms-B.pl Unicode character database
-lib/unicode/In/Armenian.pl Unicode character database
-lib/unicode/In/Arrows.pl Unicode character database
-lib/unicode/In/BasicLatin.pl Unicode character database
-lib/unicode/In/Bengali.pl Unicode character database
-lib/unicode/In/BlockElements.pl Unicode character database
-lib/unicode/In/Bopomofo.pl Unicode character database
-lib/unicode/In/BopomofoExtended.pl Unicode character database
-lib/unicode/In/BoxDrawing.pl Unicode character database
-lib/unicode/In/BraillePatterns.pl Unicode character database
-lib/unicode/In/CJKCompatibility.pl Unicode character database
-lib/unicode/In/CJKCompatibilityForms.pl Unicode character database
-lib/unicode/In/CJKCompatibilityIdeographs.pl Unicode character database
-lib/unicode/In/CJKRadicalsSupplement.pl Unicode character database
-lib/unicode/In/CJKSymbolsandPunctuation.pl Unicode character database
-lib/unicode/In/CJKUnifiedIdeographs.pl Unicode character database
-lib/unicode/In/CJKUnifiedIdeographsExtensionA.pl Unicode character database
-lib/unicode/In/Cherokee.pl Unicode character database
-lib/unicode/In/CombiningDiacriticalMarks.pl Unicode character database
-lib/unicode/In/CombiningHalfMarks.pl Unicode character database
-lib/unicode/In/CombiningMarksforSymbols.pl Unicode character database
-lib/unicode/In/ControlPictures.pl Unicode character database
-lib/unicode/In/CurrencySymbols.pl Unicode character database
-lib/unicode/In/Cyrillic.pl Unicode character database
-lib/unicode/In/Devanagari.pl Unicode character database
-lib/unicode/In/Dingbats.pl Unicode character database
-lib/unicode/In/EnclosedAlphanumerics.pl Unicode character database
-lib/unicode/In/EnclosedCJKLettersandMonths.pl Unicode character database
-lib/unicode/In/Ethiopic.pl Unicode character database
-lib/unicode/In/GeneralPunctuation.pl Unicode character database
-lib/unicode/In/GeometricShapes.pl Unicode character database
-lib/unicode/In/Georgian.pl Unicode character database
-lib/unicode/In/Greek.pl Unicode character database
-lib/unicode/In/GreekExtended.pl Unicode character database
-lib/unicode/In/Gujarati.pl Unicode character database
-lib/unicode/In/Gurmukhi.pl Unicode character database
-lib/unicode/In/HalfwidthandFullwidthForms.pl Unicode character database
-lib/unicode/In/HangulCompatibilityJamo.pl Unicode character database
-lib/unicode/In/HangulJamo.pl Unicode character database
-lib/unicode/In/HangulSyllables.pl Unicode character database
-lib/unicode/In/Hebrew.pl Unicode character database
-lib/unicode/In/HighPrivateUseSurrogates.pl Unicode character database
-lib/unicode/In/HighSurrogates.pl Unicode character database
-lib/unicode/In/Hiragana.pl Unicode character database
-lib/unicode/In/IPAExtensions.pl Unicode character database
-lib/unicode/In/IdeographicDescriptionCharacters.pl Unicode character database
-lib/unicode/In/Kanbun.pl Unicode character database
-lib/unicode/In/KangxiRadicals.pl Unicode character database
-lib/unicode/In/Kannada.pl Unicode character database
-lib/unicode/In/Katakana.pl Unicode character database
-lib/unicode/In/Khmer.pl Unicode character database
-lib/unicode/In/Lao.pl Unicode character database
-lib/unicode/In/Latin-1Supplement.pl Unicode character database
-lib/unicode/In/LatinExtended-A.pl Unicode character database
-lib/unicode/In/LatinExtended-B.pl Unicode character database
-lib/unicode/In/LatinExtendedAdditional.pl Unicode character database
-lib/unicode/In/LetterlikeSymbols.pl Unicode character database
-lib/unicode/In/LowSurrogates.pl Unicode character database
-lib/unicode/In/Malayalam.pl Unicode character database
-lib/unicode/In/MathematicalOperators.pl Unicode character database
-lib/unicode/In/MiscellaneousSymbols.pl Unicode character database
-lib/unicode/In/MiscellaneousTechnical.pl Unicode character database
-lib/unicode/In/Mongolian.pl Unicode character database
-lib/unicode/In/Myanmar.pl Unicode character database
-lib/unicode/In/NumberForms.pl Unicode character database
-lib/unicode/In/Ogham.pl Unicode character database
-lib/unicode/In/OpticalCharacterRecognition.pl Unicode character database
-lib/unicode/In/Oriya.pl Unicode character database
-lib/unicode/In/PrivateUse.pl Unicode character database
-lib/unicode/In/Runic.pl Unicode character database
-lib/unicode/In/Sinhala.pl Unicode character database
-lib/unicode/In/SmallFormVariants.pl Unicode character database
-lib/unicode/In/SpacingModifierLetters.pl Unicode character database
-lib/unicode/In/Specials.pl Unicode character database
-lib/unicode/In/SuperscriptsandSubscripts.pl Unicode character database
-lib/unicode/In/Syriac.pl Unicode character database
-lib/unicode/In/Tamil.pl Unicode character database
-lib/unicode/In/Telugu.pl Unicode character database
-lib/unicode/In/Thaana.pl Unicode character database
-lib/unicode/In/Thai.pl Unicode character database
-lib/unicode/In/Tibetan.pl Unicode character database
-lib/unicode/In/UnifiedCanadianAboriginalSyllabics.pl Unicode character database
-lib/unicode/In/YiRadicals.pl Unicode character database
-lib/unicode/In/YiSyllables.pl Unicode character database
+lib/unicode/In.pl Unicode character database
+lib/unicode/In/0.pl Unicode character database
+lib/unicode/In/1.pl Unicode character database
+lib/unicode/In/2.pl Unicode character database
+lib/unicode/In/3.pl Unicode character database
+lib/unicode/In/4.pl Unicode character database
+lib/unicode/In/5.pl Unicode character database
+lib/unicode/In/6.pl Unicode character database
+lib/unicode/In/7.pl Unicode character database
+lib/unicode/In/8.pl Unicode character database
+lib/unicode/In/9.pl Unicode character database
+lib/unicode/In/10.pl Unicode character database
+lib/unicode/In/11.pl Unicode character database
+lib/unicode/In/12.pl Unicode character database
+lib/unicode/In/13.pl Unicode character database
+lib/unicode/In/14.pl Unicode character database
+lib/unicode/In/15.pl Unicode character database
+lib/unicode/In/16.pl Unicode character database
+lib/unicode/In/17.pl Unicode character database
+lib/unicode/In/18.pl Unicode character database
+lib/unicode/In/19.pl Unicode character database
+lib/unicode/In/20.pl Unicode character database
+lib/unicode/In/21.pl Unicode character database
+lib/unicode/In/22.pl Unicode character database
+lib/unicode/In/23.pl Unicode character database
+lib/unicode/In/24.pl Unicode character database
+lib/unicode/In/25.pl Unicode character database
+lib/unicode/In/26.pl Unicode character database
+lib/unicode/In/27.pl Unicode character database
+lib/unicode/In/28.pl Unicode character database
+lib/unicode/In/29.pl Unicode character database
+lib/unicode/In/30.pl Unicode character database
+lib/unicode/In/31.pl Unicode character database
+lib/unicode/In/32.pl Unicode character database
+lib/unicode/In/33.pl Unicode character database
+lib/unicode/In/34.pl Unicode character database
+lib/unicode/In/35.pl Unicode character database
+lib/unicode/In/36.pl Unicode character database
+lib/unicode/In/37.pl Unicode character database
+lib/unicode/In/38.pl Unicode character database
+lib/unicode/In/39.pl Unicode character database
+lib/unicode/In/40.pl Unicode character database
+lib/unicode/In/41.pl Unicode character database
+lib/unicode/In/42.pl Unicode character database
+lib/unicode/In/43.pl Unicode character database
+lib/unicode/In/44.pl Unicode character database
+lib/unicode/In/45.pl Unicode character database
+lib/unicode/In/46.pl Unicode character database
+lib/unicode/In/47.pl Unicode character database
+lib/unicode/In/48.pl Unicode character database
+lib/unicode/In/49.pl Unicode character database
+lib/unicode/In/50.pl Unicode character database
+lib/unicode/In/51.pl Unicode character database
+lib/unicode/In/52.pl Unicode character database
+lib/unicode/In/53.pl Unicode character database
+lib/unicode/In/54.pl Unicode character database
+lib/unicode/In/55.pl Unicode character database
+lib/unicode/In/56.pl Unicode character database
+lib/unicode/In/57.pl Unicode character database
+lib/unicode/In/58.pl Unicode character database
+lib/unicode/In/59.pl Unicode character database
+lib/unicode/In/60.pl Unicode character database
+lib/unicode/In/61.pl Unicode character database
+lib/unicode/In/62.pl Unicode character database
+lib/unicode/In/63.pl Unicode character database
+lib/unicode/In/64.pl Unicode character database
+lib/unicode/In/65.pl Unicode character database
+lib/unicode/In/66.pl Unicode character database
+lib/unicode/In/67.pl Unicode character database
+lib/unicode/In/68.pl Unicode character database
+lib/unicode/In/69.pl Unicode character database
+lib/unicode/In/70.pl Unicode character database
+lib/unicode/In/71.pl Unicode character database
+lib/unicode/In/72.pl Unicode character database
+lib/unicode/In/73.pl Unicode character database
+lib/unicode/In/74.pl Unicode character database
+lib/unicode/In/75.pl Unicode character database
+lib/unicode/In/76.pl Unicode character database
+lib/unicode/In/77.pl Unicode character database
+lib/unicode/In/78.pl Unicode character database
+lib/unicode/In/79.pl Unicode character database
+lib/unicode/In/80.pl Unicode character database
+lib/unicode/In/81.pl Unicode character database
+lib/unicode/In/82.pl Unicode character database
+lib/unicode/In/83.pl Unicode character database
+lib/unicode/In/84.pl Unicode character database
+lib/unicode/In/85.pl Unicode character database
+lib/unicode/In/86.pl Unicode character database
+lib/unicode/In/87.pl Unicode character database
+lib/unicode/In/88.pl Unicode character database
+lib/unicode/In/89.pl Unicode character database
+lib/unicode/In/90.pl Unicode character database
+lib/unicode/In/91.pl Unicode character database
+lib/unicode/In/92.pl Unicode character database
+lib/unicode/In/93.pl Unicode character database
+lib/unicode/In/94.pl Unicode character database
+lib/unicode/In/95.pl Unicode character database
lib/unicode/Index.txt Unicode character database
lib/unicode/Is/ASCII.pl Unicode character database
lib/unicode/Is/Alnum.pl Unicode character database
--- /dev/null
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+0000 007F Basic Latin
+# In/0.pl BasicLatin
+0080 00FF Latin-1 Supplement
+# In/1.pl Latin1Supplement
+0100 017F Latin Extended-A
+# In/2.pl LatinExtendedA
+0180 024F Latin Extended-B
+# In/3.pl LatinExtendedB
+0250 02AF IPA Extensions
+# In/4.pl IPAExtensions
+02B0 02FF Spacing Modifier Letters
+# In/5.pl SpacingModifierLetters
+0300 036F Combining Diacritical Marks
+# In/6.pl CombiningDiacriticalMarks
+0370 03FF Greek
+# In/7.pl Greek
+0400 04FF Cyrillic
+# In/8.pl Cyrillic
+0530 058F Armenian
+# In/9.pl Armenian
+0590 05FF Hebrew
+# In/10.pl Hebrew
+0600 06FF Arabic
+# In/11.pl Arabic
+0700 074F Syriac
+# In/12.pl Syriac
+0780 07BF Thaana
+# In/13.pl Thaana
+0900 097F Devanagari
+# In/14.pl Devanagari
+0980 09FF Bengali
+# In/15.pl Bengali
+0A00 0A7F Gurmukhi
+# In/16.pl Gurmukhi
+0A80 0AFF Gujarati
+# In/17.pl Gujarati
+0B00 0B7F Oriya
+# In/18.pl Oriya
+0B80 0BFF Tamil
+# In/19.pl Tamil
+0C00 0C7F Telugu
+# In/20.pl Telugu
+0C80 0CFF Kannada
+# In/21.pl Kannada
+0D00 0D7F Malayalam
+# In/22.pl Malayalam
+0D80 0DFF Sinhala
+# In/23.pl Sinhala
+0E00 0E7F Thai
+# In/24.pl Thai
+0E80 0EFF Lao
+# In/25.pl Lao
+0F00 0FFF Tibetan
+# In/26.pl Tibetan
+1000 109F Myanmar
+# In/27.pl Myanmar
+10A0 10FF Georgian
+# In/28.pl Georgian
+1100 11FF Hangul Jamo
+# In/29.pl HangulJamo
+1200 137F Ethiopic
+# In/30.pl Ethiopic
+13A0 13FF Cherokee
+# In/31.pl Cherokee
+1400 167F Unified Canadian Aboriginal Syllabics
+# In/32.pl UnifiedCanadianAboriginalSyllabics
+1680 169F Ogham
+# In/33.pl Ogham
+16A0 16FF Runic
+# In/34.pl Runic
+1780 17FF Khmer
+# In/35.pl Khmer
+1800 18AF Mongolian
+# In/36.pl Mongolian
+1E00 1EFF Latin Extended Additional
+# In/37.pl LatinExtendedAdditional
+1F00 1FFF Greek Extended
+# In/38.pl GreekExtended
+2000 206F General Punctuation
+# In/39.pl GeneralPunctuation
+2070 209F Superscripts and Subscripts
+# In/40.pl SuperscriptsandSubscripts
+20A0 20CF Currency Symbols
+# In/41.pl CurrencySymbols
+20D0 20FF Combining Marks for Symbols
+# In/42.pl CombiningMarksforSymbols
+2100 214F Letterlike Symbols
+# In/43.pl LetterlikeSymbols
+2150 218F Number Forms
+# In/44.pl NumberForms
+2190 21FF Arrows
+# In/45.pl Arrows
+2200 22FF Mathematical Operators
+# In/46.pl MathematicalOperators
+2300 23FF Miscellaneous Technical
+# In/47.pl MiscellaneousTechnical
+2400 243F Control Pictures
+# In/48.pl ControlPictures
+2440 245F Optical Character Recognition
+# In/49.pl OpticalCharacterRecognition
+2460 24FF Enclosed Alphanumerics
+# In/50.pl EnclosedAlphanumerics
+2500 257F Box Drawing
+# In/51.pl BoxDrawing
+2580 259F Block Elements
+# In/52.pl BlockElements
+25A0 25FF Geometric Shapes
+# In/53.pl GeometricShapes
+2600 26FF Miscellaneous Symbols
+# In/54.pl MiscellaneousSymbols
+2700 27BF Dingbats
+# In/55.pl Dingbats
+2800 28FF Braille Patterns
+# In/56.pl BraillePatterns
+2E80 2EFF CJK Radicals Supplement
+# In/57.pl CJKRadicalsSupplement
+2F00 2FDF Kangxi Radicals
+# In/58.pl KangxiRadicals
+2FF0 2FFF Ideographic Description Characters
+# In/59.pl IdeographicDescriptionCharacters
+3000 303F CJK Symbols and Punctuation
+# In/60.pl CJKSymbolsandPunctuation
+3040 309F Hiragana
+# In/61.pl Hiragana
+30A0 30FF Katakana
+# In/62.pl Katakana
+3100 312F Bopomofo
+# In/63.pl Bopomofo
+3130 318F Hangul Compatibility Jamo
+# In/64.pl HangulCompatibilityJamo
+3190 319F Kanbun
+# In/65.pl Kanbun
+31A0 31BF Bopomofo Extended
+# In/66.pl BopomofoExtended
+3200 32FF Enclosed CJK Letters and Months
+# In/67.pl EnclosedCJKLettersandMonths
+3300 33FF CJK Compatibility
+# In/68.pl CJKCompatibility
+3400 4DB5 CJK Unified Ideographs Extension A
+# In/69.pl CJKUnifiedIdeographsExtensionA
+4E00 9FFF CJK Unified Ideographs
+# In/70.pl CJKUnifiedIdeographs
+A000 A48F Yi Syllables
+# In/71.pl YiSyllables
+A490 A4CF Yi Radicals
+# In/72.pl YiRadicals
+AC00 D7A3 Hangul Syllables
+# In/73.pl HangulSyllables
+D800 DB7F High Surrogates
+# In/74.pl HighSurrogates
+DB80 DBFF High Private Use Surrogates
+# In/75.pl HighPrivateUseSurrogates
+DC00 DFFF Low Surrogates
+# In/76.pl LowSurrogates
+E000 F8FF Private Use
+# In/77.pl PrivateUse
+F900 FAFF CJK Compatibility Ideographs
+# In/78.pl CJKCompatibilityIdeographs
+FB00 FB4F Alphabetic Presentation Forms
+# In/79.pl AlphabeticPresentationForms
+FB50 FDFF Arabic Presentation Forms-A
+# In/80.pl ArabicPresentationFormsA
+FE20 FE2F Combining Half Marks
+# In/81.pl CombiningHalfMarks
+FE30 FE4F CJK Compatibility Forms
+# In/82.pl CJKCompatibilityForms
+FE50 FE6F Small Form Variants
+# In/83.pl SmallFormVariants
+FE70 FEFE Arabic Presentation Forms-B
+# In/84.pl ArabicPresentationFormsB
+FEFF FEFF Specials
+# In/85.pl Specials
+FF00 FFEF Halfwidth and Fullwidth Forms
+# In/86.pl HalfwidthandFullwidthForms
+FFF0 FFFD Specials
+# In/85.pl Specials
+10300 1032F Old Italic
+# In/87.pl OldItalic
+10330 1034F Gothic
+# In/88.pl Gothic
+10400 1044F Deseret
+# In/89.pl Deseret
+1D000 1D0FF Byzantine Musical Symbols
+# In/90.pl ByzantineMusicalSymbols
+1D100 1D1FF Musical Symbols
+# In/91.pl MusicalSymbols
+1D400 1D7FF Mathematical Alphanumeric Symbols
+# In/92.pl MathematicalAlphanumericSymbols
+20000 2A6D6 CJK Unified Ideographs Extension B
+# In/93.pl CJKUnifiedIdeographsExtensionB
+2F800 2FA1F CJK Compatibility Ideographs Supplement
+# In/94.pl CJKCompatibilityIdeographsSupplement
+E0000 E007F Tags
+# In/95.pl Tags
+F0000 FFFFD Private Use
+# In/77.pl PrivateUse
+100000 10FFFD Private Use
+# In/77.pl PrivateUse
+END
--- /dev/null
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+%utf8::In = (
+'BasicLatin' => 0,
+'Latin1Supplement' => 1,
+'Hebrew' => 10,
+'Arabic' => 11,
+'Syriac' => 12,
+'Thaana' => 13,
+'Devanagari' => 14,
+'Bengali' => 15,
+'Gurmukhi' => 16,
+'Gujarati' => 17,
+'Oriya' => 18,
+'Tamil' => 19,
+'LatinExtendedA' => 2,
+'Telugu' => 20,
+'Kannada' => 21,
+'Malayalam' => 22,
+'Sinhala' => 23,
+'Thai' => 24,
+'Lao' => 25,
+'Tibetan' => 26,
+'Myanmar' => 27,
+'Georgian' => 28,
+'HangulJamo' => 29,
+'LatinExtendedB' => 3,
+'Ethiopic' => 30,
+'Cherokee' => 31,
+'UnifiedCanadianAboriginalSyllabics' => 32,
+'Ogham' => 33,
+'Runic' => 34,
+'Khmer' => 35,
+'Mongolian' => 36,
+'LatinExtendedAdditional' => 37,
+'GreekExtended' => 38,
+'GeneralPunctuation' => 39,
+'IPAExtensions' => 4,
+'SuperscriptsandSubscripts' => 40,
+'CurrencySymbols' => 41,
+'CombiningMarksforSymbols' => 42,
+'LetterlikeSymbols' => 43,
+'NumberForms' => 44,
+'Arrows' => 45,
+'MathematicalOperators' => 46,
+'MiscellaneousTechnical' => 47,
+'ControlPictures' => 48,
+'OpticalCharacterRecognition' => 49,
+'SpacingModifierLetters' => 5,
+'EnclosedAlphanumerics' => 50,
+'BoxDrawing' => 51,
+'BlockElements' => 52,
+'GeometricShapes' => 53,
+'MiscellaneousSymbols' => 54,
+'Dingbats' => 55,
+'BraillePatterns' => 56,
+'CJKRadicalsSupplement' => 57,
+'KangxiRadicals' => 58,
+'IdeographicDescriptionCharacters' => 59,
+'CombiningDiacriticalMarks' => 6,
+'CJKSymbolsandPunctuation' => 60,
+'Hiragana' => 61,
+'Katakana' => 62,
+'Bopomofo' => 63,
+'HangulCompatibilityJamo' => 64,
+'Kanbun' => 65,
+'BopomofoExtended' => 66,
+'EnclosedCJKLettersandMonths' => 67,
+'CJKCompatibility' => 68,
+'CJKUnifiedIdeographsExtensionA' => 69,
+'Greek' => 7,
+'CJKUnifiedIdeographs' => 70,
+'YiSyllables' => 71,
+'YiRadicals' => 72,
+'HangulSyllables' => 73,
+'HighSurrogates' => 74,
+'HighPrivateUseSurrogates' => 75,
+'LowSurrogates' => 76,
+'PrivateUse' => 77,
+'CJKCompatibilityIdeographs' => 78,
+'AlphabeticPresentationForms' => 79,
+'Cyrillic' => 8,
+'ArabicPresentationFormsA' => 80,
+'CombiningHalfMarks' => 81,
+'CJKCompatibilityForms' => 82,
+'SmallFormVariants' => 83,
+'ArabicPresentationFormsB' => 84,
+'Specials' => 85,
+'HalfwidthandFullwidthForms' => 86,
+'OldItalic' => 87,
+'Gothic' => 88,
+'Deseret' => 89,
+'Armenian' => 9,
+'ByzantineMusicalSymbols' => 90,
+'MusicalSymbols' => 91,
+'MathematicalAlphanumericSymbols' => 92,
+'CJKUnifiedIdeographsExtensionB' => 93,
+'CJKCompatibilityIdeographsSupplement' => 94,
+'Tags' => 95,
+);
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
+100000 10FFFD
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
+10300 1032F
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
+10330 1034F
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
+10400 1044F
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
+1D000 1D0FF
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
+1D100 1D1FF
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
+1D400 1D7FF
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
+20000 2A6D6
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
+2F800 2FA1F
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
+E0000 E007F
END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. Unicode.txt.
-# Any changes made here will be lost!
-return <<'END';
-E000 F8FF
-END
# This is not written for speed...
+my %InId;
+my $InId = 0;
+
foreach $file (@todo) {
my ($table, $wanted, $val) = @$file;
next if @ARGV and not grep { $_ eq $table } @ARGV;
- print $table,"\n";
- if ($table =~ /^(Is|In|To)(.*)/) {
+ print $table, "\n";
+ $table =~ s/\W+//g;
+ if ($table =~ /^In(.+)/) {
+ my $id;
+ unless (exists $InId{$1}) {
+ $InId{$1} = $InId++;
+ }
+ $id = $InId{$1};
+ open(OUT, ">In/$id.pl") or die "Can't create In/$id.pl: $!\n";
+ print OUT "# In/$id.pl $1\n";
+ }
+ elsif ($table =~ /^(Is|To)(.+)/) {
open(OUT, ">$1/$2.pl") or die "Can't create $1/$2.pl: $!\n";
}
else {
# Must treat blocks specially.
exit if @ARGV and not grep { $_ eq Block } @ARGV;
-print "Block\n";
+print "Blocks\n";
open(UD, 'Blocks.txt') or die "Can't open Blocks.txt: $!\n";
-open(OUT, ">Block.pl") or die "Can't create Block.pl: $!\n";
+open(OUT, ">Blocks.pl") or die "Can't create Blocks.pl: $!\n";
print OUT <<EOH;
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by $0 from e.g. $UnicodeData.
next if /^#/;
next if /^$/;
chomp;
- ($code, $last, $name) = /^([0-9a-f]+)\.\.([0-9a-f]); (.+)/i;
+ ($code, $last, $name) = /^([0-9a-f]+)\.\.([0-9a-f]+); (.+)/i;
if ($name) {
print OUT "$code $last $name\n";
- $name =~ s/\s+//g;
- open(BLOCK, ">In/$name.pl");
+ $name =~ s/\W+//g;
+ my $id;
+ unless (exists $InId{$name}) {
+ $InId{$name} = $InId++;
+ }
+ $id = $InId{$name};
+ open(BLOCK, ">In/$id.pl");
+ print OUT "# In/$id.pl $name\n";
print BLOCK <<EOH;
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by $0 from e.g. $UnicodeData.
print OUT "END\n";
close OUT;
+open(INID, ">In.pl");
+
+print INID <<EOH;
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by $0 from e.g. $UnicodeData.
+# Any changes made here will be lost!
+%utf8::In = (
+EOH
+
+# Order doesn't matter but let's prettyprint anyway.
+foreach my $in (sort { $InId{$a} cmp $InId{$b} } keys %InId) {
+ print INID "'$in' => $InId{$in},\n";
+}
+
+print INID ");\n";
+
+close(INID);
+
##################################################
sub proplist {
while (($caller = caller($i)) eq __PACKAGE__) { $i++ }
my $encoding = $enc{$caller} || "unicode";
(my $file = $type) =~ s!::!/!g;
- $file =~ s#^(I[sn]|To)([A-Z].*)#$1/$2#;
+ if ($file =~ /^In(.+)/) {
+ defined %utf8::In || do "$encoding/In.pl";
+ if (exists $utf8::In{$1}) {
+ $file = "$enconding/In/$utf8::In{$1}";
+ }
+ } else {
+ $file =~ s#^(Is|To)([A-Z].*)#$1/$2#;
+ }
$list ||= eval { $caller->$type(); }
|| do "$file.pl"
|| do "$encoding/$file.pl"
classes via the new C<\p{}> (matches property) and C<\P{}> (doesn't
match property) constructs. For instance, C<\p{Lu}> matches any
character with the Unicode uppercase property, while C<\p{M}> matches
-any mark character. Single letter properties may omit the brackets, so
-that can be written C<\pM> also. Many predefined character classes are
-available, such as C<\p{IsMirrored}> and C<\p{InTibetan}>.
+any mark character. Single letter properties may omit the brackets,
+so that can be written C<\pM> also. Many predefined character classes
+are available, such as C<\p{IsMirrored}> and C<\p{InTibetan}>. The
+names of the C<In> classes are the official Unicode block names but
+with all non-alphanumeric characters removed, for example the block
+name C<"Latin-1 Supplement"> becomes C<\p{InLatin1Supplement}>.
=item *