Changes5.004 Differences between 5.003 and 5.004
Changes5.005 Differences between 5.004 and 5.005
Changes5.6 Differences between 5.005 and 5.6
-config_h.SH Produces config.h
configpm Produces lib/Config.pm
Configure Portability tool
configure.com Configure-equivalent for VMS
configure.gnu Crude emulation of GNU configure
+config_h.SH Produces config.h
cop.h Control operator header
Copying The GNU General Public License
Cross/README Cross-compilation
epoc/config.sh EPOC port config.sh template
epoc/createpkg.pl EPOC port generate PKG file
epoc/epoc.c EPOC port
-epoc/epoc_stubs.c EPOC port
epoc/epocish.c EPOC port
epoc/epocish.h EPOC port
+epoc/epoc_stubs.c EPOC port
epoc/link.pl EPOC port link a exe
ext/attrs.t See if attrs works with C<sub : attrs>
ext/attrs/attrs.pm attrs extension Perl module
ext/Data/Dumper/t/overload.t See if Data::Dumper works for overloaded data
ext/Data/Dumper/Todo Data pretty printer, futures
ext/DB_File/Changes Berkeley DB extension change log
+ext/DB_File/dbinfo Berkeley DB database version checker
ext/DB_File/DB_File.pm Berkeley DB extension Perl module
ext/DB_File/DB_File.xs Berkeley DB extension external subroutines
ext/DB_File/DB_File_BS Berkeley DB extension mkbootstrap fodder
-ext/DB_File/dbinfo Berkeley DB database version checker
ext/DB_File/hints/dynixptx.pl Hint for DB_File for named architecture
ext/DB_File/hints/sco.pl Hint for DB_File for named architecture
ext/DB_File/Makefile.PL Berkeley DB extension makefile writer
ext/Digest/MD5/t/badfile.t See if Digest::MD5 extension works
ext/Digest/MD5/t/files.t See if Digest::MD5 extension works
ext/Digest/MD5/typemap Digest::MD5 extension
+ext/DynaLoader/dlutils.c Dynamic loader utilities for dl_*.xs files
ext/DynaLoader/dl_aix.xs AIX implementation
ext/DynaLoader/dl_beos.xs BeOS implementation
ext/DynaLoader/dl_dld.xs GNU dld style implementation
ext/DynaLoader/dl_none.xs Stub implementation
ext/DynaLoader/dl_vmesa.xs VM/ESA implementation
ext/DynaLoader/dl_vms.xs VMS implementation
-ext/DynaLoader/dlutils.c Dynamic loader utilities for dl_*.xs files
ext/DynaLoader/DynaLoader_pm.PL Dynamic Loader perl module
ext/DynaLoader/hints/aix.pl Hint for DynaLoader for named architecture
ext/DynaLoader/hints/linux.pl Hint for DynaLoader for named architecture
lib/Memoize/t/prototype.t Memoize
lib/Memoize/t/speed.t Memoize
lib/Memoize/t/tie.t Memoize
+lib/Memoize/t/tiefeatures.t Memoize
lib/Memoize/t/tie_gdbm.t Memoize
lib/Memoize/t/tie_ndbm.t Memoize
lib/Memoize/t/tie_sdbm.t Memoize
lib/Memoize/t/tie_storable.t Memoize
-lib/Memoize/t/tiefeatures.t Memoize
lib/Memoize/t/unmemoize.t Memoize
lib/Memoize/TODO Memoize
lib/Net/ChangeLog.libnet libnet
lib/unicode/In/0.pl Unicode character database
lib/unicode/In/1.pl Unicode character database
lib/unicode/In/10.pl Unicode character database
+lib/unicode/In/100.pl Unicode character database
+lib/unicode/In/101.pl Unicode character database
+lib/unicode/In/102.pl Unicode character database
+lib/unicode/In/103.pl Unicode character database
+lib/unicode/In/104.pl Unicode character database
+lib/unicode/In/105.pl Unicode character database
+lib/unicode/In/106.pl Unicode character database
+lib/unicode/In/107.pl Unicode character database
+lib/unicode/In/108.pl Unicode character database
+lib/unicode/In/109.pl Unicode character database
lib/unicode/In/11.pl Unicode character database
+lib/unicode/In/110.pl Unicode character database
+lib/unicode/In/111.pl Unicode character database
+lib/unicode/In/112.pl Unicode character database
+lib/unicode/In/113.pl Unicode character database
+lib/unicode/In/114.pl Unicode character database
+lib/unicode/In/115.pl Unicode character database
+lib/unicode/In/116.pl Unicode character database
+lib/unicode/In/117.pl Unicode character database
+lib/unicode/In/118.pl Unicode character database
+lib/unicode/In/119.pl Unicode character database
lib/unicode/In/12.pl Unicode character database
+lib/unicode/In/120.pl Unicode character database
+lib/unicode/In/121.pl Unicode character database
+lib/unicode/In/122.pl Unicode character database
+lib/unicode/In/123.pl Unicode character database
+lib/unicode/In/124.pl Unicode character database
+lib/unicode/In/125.pl Unicode character database
+lib/unicode/In/126.pl Unicode character database
+lib/unicode/In/127.pl Unicode character database
+lib/unicode/In/128.pl Unicode character database
+lib/unicode/In/129.pl Unicode character database
lib/unicode/In/13.pl Unicode character database
+lib/unicode/In/130.pl Unicode character database
+lib/unicode/In/131.pl Unicode character database
+lib/unicode/In/132.pl Unicode character database
+lib/unicode/In/133.pl Unicode character database
+lib/unicode/In/134.pl Unicode character database
+lib/unicode/In/135.pl Unicode character database
lib/unicode/In/14.pl Unicode character database
lib/unicode/In/15.pl Unicode character database
lib/unicode/In/16.pl Unicode character database
lib/unicode/In/93.pl Unicode character database
lib/unicode/In/94.pl Unicode character database
lib/unicode/In/95.pl Unicode character database
+lib/unicode/In/96.pl Unicode character database
+lib/unicode/In/97.pl Unicode character database
+lib/unicode/In/98.pl Unicode character database
+lib/unicode/In/99.pl Unicode character database
lib/unicode/Index.txt Unicode character database
lib/unicode/Is/Alnum.pl Unicode character database
lib/unicode/Is/Alpha.pl Unicode character database
lib/unicode/README.perl Unicode character database
lib/unicode/ReadMe.txt Unicode character database info
lib/unicode/rename Filename mappings used
+lib/unicode/Scripts.pl Unicode character database
lib/unicode/Scripts.txt Unicode character database
lib/unicode/SpecCase.txt Unicode character database
lib/unicode/syllables.txt Unicode character database
opnames.h Automatically generated opcode header
os2/Changes Changelog for OS/2 port
os2/diff.configure Patches to Configure
-os2/dl_os2.c Addon for dl_open
os2/dlfcn.h Addon for dl_open
+os2/dl_os2.c Addon for dl_open
os2/Makefile.SHs Shared library generation for OS/2
os2/os2.c Additional code for OS/2
os2/os2.sym Additional symbols to export
os2/OS2/REXX/t/rx_tieydb.t DLL access module
os2/OS2/REXX/t/rx_varset.t DLL access module
os2/OS2/REXX/t/rx_vrexx.t DLL access module
-os2/os2_base.t Additional tests for builtin methods
os2/os2add.sym Overriding symbols to export
os2/os2ish.h Header for OS/2
os2/os2thread.h pthread-like typedefs
+os2/os2_base.t Additional tests for builtin methods
os2/perl2cmd.pl Corrects installed binaries under OS/2
patchlevel.h The current patch level of perl
perl.c main()
perly.fixer A program to remove yacc stack limitations
perly.h The header file for perly.c
perly.y Yacc grammar for perl
-perly_c.diff Fixup perly.c to allow recursion
perlyline.pl Perl code to fix #line directives and gcc warnings in perly.c
+perly_c.diff Fixup perly.c to allow recursion
plan9/aperl Shell to make Perl error messages Acme-friendly
plan9/arpa/inet.h Plan9 port: replacement C header file
plan9/buildinfo Plan9 port: configuration information
t/op/quotemeta.t See if quotemeta works
t/op/rand.t See if rand works
t/op/range.t See if .. works
-t/op/re_tests Regular expressions for regexp.t
t/op/read.t See if read() works
t/op/readdir.t See if readdir() works
t/op/recurse.t See if deep recursion works
t/op/regmesg.t See if one can get regular expression errors
t/op/repeat.t See if x operator works
t/op/reverse.t See if reverse operator works
+t/op/re_tests Regular expressions for regexp.t
t/op/runlevel.t See if die() works from perl_call_*()
t/op/sleep.t See if sleep works
t/op/sort.t See if sort works
t/op/sprintf.t See if sprintf works
t/op/stat.t See if stat works
t/op/study.t See if study works
-t/op/sub_lval.t See if lvalue subroutines work
t/op/subst.t See if substitution works
+t/op/substr.t See if substr works
t/op/subst_amp.t See if $&-related substitution works
t/op/subst_wamp.t See if substitution works with $& present
-t/op/substr.t See if substr works
+t/op/sub_lval.t See if lvalue subroutines work
t/op/sysio.t See if sysread and syswrite work
t/op/taint.t See if tainting works
t/op/tie.t See if tie/untie functions work
vms/ext/vmsish.pm Control VMS-specific behavior of Perl core
vms/ext/vmsish.t Tests for vmsish.pm
vms/ext/XSSymSet.pm manage linker symbols when building extensions
-vms/gen_shrfls.pl generate options files and glue for shareable image
vms/genconfig.pl retcon config.sh from config.h
vms/genopt.com hack to write options files in case of broken makes
+vms/gen_shrfls.pl generate options files and glue for shareable image
vms/make_command.com record MM[SK] command used to build Perl
vms/mms2make.pl convert descrip.mms to make syntax
vms/munchconfig.c performs shell $var substitution for VMS
vms/sockadapt.h glue for SockshShr socket support
vms/test.com DCL driver for regression tests
vms/vms.c VMS-specific C code for Perl core
-vms/vms_yfix.pl convert Unix perly.[ch] to VMS perly_[ch].vms
vms/vmsish.h VMS-specific C header for Perl core
vms/vmspipe.com VMS-specific piped command helper script
+vms/vms_yfix.pl convert Unix perly.[ch] to VMS perly_[ch].vms
vms/writemain.pl Generate perlmain.c from miniperlmain.c+extensions
vos/build.cm VOS command macro to build Perl
vos/Changes Changes made to port Perl to the VOS operating system
vos/Makefile A helper for maintaining the config.*.* in UNIX
vos/perl.bind VOS bind control file
vos/test_vos_dummies.c Test program for "vos_dummies.c"
-vos/vos_dummies.c Wrappers to soak up undefined functions
vos/vosish.h VOS-specific header file
+vos/vos_dummies.c Wrappers to soak up undefined functions
warnings.h The warning numbers
warnings.pl Program to write warnings.h and lib/warnings.pm
win32/bin/exetype.pl Set executable type to CONSOLE or WINDOWS
my $code = shift;
unless (@BLOCKS) {
- if (openunicode(\$BLOCKS, "Blocks.pl")) {
+ if (openunicode(\$BLOCKS, "Blocks.txt")) {
while (<$BLOCKS>) {
- if (/^([0-9A-F]+)\s+([0-9A-F]+)\s+(.+)/) {
+ if (/^([0-9A-F]+)\.\.([0-9A-F]+);\s+(.+)/) {
push @BLOCKS, [ hex($1), hex($2), $3 ];
}
}
while the block names are Capitalized and with intermixed spaces,
e.g. C<Yi Syllables>.
+Greek
+Cyrillic
+Armenian
+Hebrew
+Arabic
+Syriac
+Thaana
+Devanagari
+Bengali
+Gurmukhi
+Gujarati
+Oriya
+Tamil
+Telugu
+Kannada
+Malayalam
+Sinhala
+Thai
+Lao
+Tibetan
+Myanmar
+Georgian
+Ethiopic
+Cherokee
+Ogham
+Runic
+Khmer
+Hiragana
+Katakana
+Bopomofo
+OldItalic
+Gothic
+Deseret
+
=head1 IMPLEMENTATION NOTE
The first use of charinfo() opens a read-only filehandle to the Unicode
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-0000 007F Basic Latin
-# In/0.pl BasicLatin
-0080 00FF Latin-1 Supplement
-# In/1.pl Latin1Supplement
-0100 017F Latin Extended-A
-# In/2.pl LatinExtendedA
-0180 024F Latin Extended-B
-# In/3.pl LatinExtendedB
-0250 02AF IPA Extensions
-# In/4.pl IPAExtensions
-02B0 02FF Spacing Modifier Letters
-# In/5.pl SpacingModifierLetters
-0300 036F Combining Diacritical Marks
-# In/6.pl CombiningDiacriticalMarks
-0370 03FF Greek
-# In/7.pl Greek
-0400 04FF Cyrillic
-# In/8.pl Cyrillic
-0530 058F Armenian
-# In/9.pl Armenian
-0590 05FF Hebrew
-# In/10.pl Hebrew
-0600 06FF Arabic
-# In/11.pl Arabic
-0700 074F Syriac
-# In/12.pl Syriac
-0780 07BF Thaana
-# In/13.pl Thaana
-0900 097F Devanagari
-# In/14.pl Devanagari
-0980 09FF Bengali
-# In/15.pl Bengali
-0A00 0A7F Gurmukhi
-# In/16.pl Gurmukhi
-0A80 0AFF Gujarati
-# In/17.pl Gujarati
-0B00 0B7F Oriya
-# In/18.pl Oriya
-0B80 0BFF Tamil
-# In/19.pl Tamil
-0C00 0C7F Telugu
-# In/20.pl Telugu
-0C80 0CFF Kannada
-# In/21.pl Kannada
-0D00 0D7F Malayalam
-# In/22.pl Malayalam
-0D80 0DFF Sinhala
-# In/23.pl Sinhala
-0E00 0E7F Thai
-# In/24.pl Thai
-0E80 0EFF Lao
-# In/25.pl Lao
-0F00 0FFF Tibetan
-# In/26.pl Tibetan
-1000 109F Myanmar
-# In/27.pl Myanmar
-10A0 10FF Georgian
-# In/28.pl Georgian
-1100 11FF Hangul Jamo
-# In/29.pl HangulJamo
-1200 137F Ethiopic
-# In/30.pl Ethiopic
-13A0 13FF Cherokee
-# In/31.pl Cherokee
-1400 167F Unified Canadian Aboriginal Syllabics
-# In/32.pl UnifiedCanadianAboriginalSyllabics
-1680 169F Ogham
-# In/33.pl Ogham
-16A0 16FF Runic
-# In/34.pl Runic
-1780 17FF Khmer
-# In/35.pl Khmer
-1800 18AF Mongolian
-# In/36.pl Mongolian
-1E00 1EFF Latin Extended Additional
-# In/37.pl LatinExtendedAdditional
-1F00 1FFF Greek Extended
-# In/38.pl GreekExtended
-2000 206F General Punctuation
-# In/39.pl GeneralPunctuation
-2070 209F Superscripts and Subscripts
-# In/40.pl SuperscriptsandSubscripts
-20A0 20CF Currency Symbols
-# In/41.pl CurrencySymbols
-20D0 20FF Combining Marks for Symbols
-# In/42.pl CombiningMarksforSymbols
-2100 214F Letterlike Symbols
-# In/43.pl LetterlikeSymbols
-2150 218F Number Forms
-# In/44.pl NumberForms
-2190 21FF Arrows
-# In/45.pl Arrows
-2200 22FF Mathematical Operators
-# In/46.pl MathematicalOperators
-2300 23FF Miscellaneous Technical
-# In/47.pl MiscellaneousTechnical
-2400 243F Control Pictures
-# In/48.pl ControlPictures
-2440 245F Optical Character Recognition
-# In/49.pl OpticalCharacterRecognition
-2460 24FF Enclosed Alphanumerics
-# In/50.pl EnclosedAlphanumerics
-2500 257F Box Drawing
-# In/51.pl BoxDrawing
-2580 259F Block Elements
-# In/52.pl BlockElements
-25A0 25FF Geometric Shapes
-# In/53.pl GeometricShapes
-2600 26FF Miscellaneous Symbols
-# In/54.pl MiscellaneousSymbols
-2700 27BF Dingbats
-# In/55.pl Dingbats
-2800 28FF Braille Patterns
-# In/56.pl BraillePatterns
-2E80 2EFF CJK Radicals Supplement
-# In/57.pl CJKRadicalsSupplement
-2F00 2FDF Kangxi Radicals
-# In/58.pl KangxiRadicals
-2FF0 2FFF Ideographic Description Characters
-# In/59.pl IdeographicDescriptionCharacters
-3000 303F CJK Symbols and Punctuation
-# In/60.pl CJKSymbolsandPunctuation
-3040 309F Hiragana
-# In/61.pl Hiragana
-30A0 30FF Katakana
-# In/62.pl Katakana
-3100 312F Bopomofo
-# In/63.pl Bopomofo
-3130 318F Hangul Compatibility Jamo
-# In/64.pl HangulCompatibilityJamo
-3190 319F Kanbun
-# In/65.pl Kanbun
-31A0 31BF Bopomofo Extended
-# In/66.pl BopomofoExtended
-3200 32FF Enclosed CJK Letters and Months
-# In/67.pl EnclosedCJKLettersandMonths
-3300 33FF CJK Compatibility
-# In/68.pl CJKCompatibility
-3400 4DB5 CJK Unified Ideographs Extension A
-# In/69.pl CJKUnifiedIdeographsExtensionA
-4E00 9FFF CJK Unified Ideographs
-# In/70.pl CJKUnifiedIdeographs
-A000 A48F Yi Syllables
-# In/71.pl YiSyllables
-A490 A4CF Yi Radicals
-# In/72.pl YiRadicals
-AC00 D7A3 Hangul Syllables
-# In/73.pl HangulSyllables
-D800 DB7F High Surrogates
-# In/74.pl HighSurrogates
-DB80 DBFF High Private Use Surrogates
-# In/75.pl HighPrivateUseSurrogates
-DC00 DFFF Low Surrogates
-# In/76.pl LowSurrogates
-E000 F8FF Private Use
-# In/77.pl PrivateUse
-F900 FAFF CJK Compatibility Ideographs
-# In/78.pl CJKCompatibilityIdeographs
-FB00 FB4F Alphabetic Presentation Forms
-# In/79.pl AlphabeticPresentationForms
-FB50 FDFF Arabic Presentation Forms-A
-# In/80.pl ArabicPresentationFormsA
-FE20 FE2F Combining Half Marks
-# In/81.pl CombiningHalfMarks
-FE30 FE4F CJK Compatibility Forms
-# In/82.pl CJKCompatibilityForms
-FE50 FE6F Small Form Variants
-# In/83.pl SmallFormVariants
-FE70 FEFE Arabic Presentation Forms-B
-# In/84.pl ArabicPresentationFormsB
-FEFF FEFF Specials
-# In/85.pl Specials
-FF00 FFEF Halfwidth and Fullwidth Forms
-# In/86.pl HalfwidthandFullwidthForms
-FFF0 FFFD Specials
-# In/85.pl Specials
-10300 1032F Old Italic
-# In/87.pl OldItalic
-10330 1034F Gothic
-# In/88.pl Gothic
-10400 1044F Deseret
-# In/89.pl Deseret
-1D000 1D0FF Byzantine Musical Symbols
-# In/90.pl ByzantineMusicalSymbols
-1D100 1D1FF Musical Symbols
-# In/91.pl MusicalSymbols
-1D400 1D7FF Mathematical Alphanumeric Symbols
-# In/92.pl MathematicalAlphanumericSymbols
-20000 2A6D6 CJK Unified Ideographs Extension B
-# In/93.pl CJKUnifiedIdeographsExtensionB
-2F800 2FA1F CJK Compatibility Ideographs Supplement
-# In/94.pl CJKCompatibilityIdeographsSupplement
-E0000 E007F Tags
-# In/95.pl Tags
-F0000 FFFFD Private Use
-# In/77.pl PrivateUse
-100000 10FFFD Private Use
-# In/77.pl PrivateUse
+0000 007F Basic Latin # BasicLatin In/40.pl
+0080 00FF Latin-1 Supplement # Latin1Supplement In/41.pl
+0100 017F Latin Extended-A # LatinExtendedA In/42.pl
+0180 024F Latin Extended-B # LatinExtendedB In/43.pl
+0250 02AF IPA Extensions # IPAExtensions In/44.pl
+02B0 02FF Spacing Modifier Letters # SpacingModifierLetters In/45.pl
+0300 036F Combining Diacritical Marks # CombiningDiacriticalMarks In/46.pl
+0370 03FF Greek # GreekBlock In/47.pl
+0400 04FF Cyrillic # CyrillicBlock In/48.pl
+0530 058F Armenian # ArmenianBlock In/49.pl
+0590 05FF Hebrew # HebrewBlock In/50.pl
+0600 06FF Arabic # ArabicBlock In/51.pl
+0700 074F Syriac # SyriacBlock In/52.pl
+0780 07BF Thaana # ThaanaBlock In/53.pl
+0900 097F Devanagari # DevanagariBlock In/54.pl
+0980 09FF Bengali # BengaliBlock In/55.pl
+0A00 0A7F Gurmukhi # GurmukhiBlock In/56.pl
+0A80 0AFF Gujarati # GujaratiBlock In/57.pl
+0B00 0B7F Oriya # OriyaBlock In/58.pl
+0B80 0BFF Tamil # TamilBlock In/59.pl
+0C00 0C7F Telugu # TeluguBlock In/60.pl
+0C80 0CFF Kannada # KannadaBlock In/61.pl
+0D00 0D7F Malayalam # MalayalamBlock In/62.pl
+0D80 0DFF Sinhala # SinhalaBlock In/63.pl
+0E00 0E7F Thai # ThaiBlock In/64.pl
+0E80 0EFF Lao # LaoBlock In/65.pl
+0F00 0FFF Tibetan # TibetanBlock In/66.pl
+1000 109F Myanmar # MyanmarBlock In/67.pl
+10A0 10FF Georgian # GeorgianBlock In/68.pl
+1100 11FF Hangul Jamo # HangulJamo In/69.pl
+1200 137F Ethiopic # EthiopicBlock In/70.pl
+13A0 13FF Cherokee # CherokeeBlock In/71.pl
+1400 167F Unified Canadian Aboriginal Syllabics # UnifiedCanadianAboriginalSyllabics In/72.pl
+1680 169F Ogham # OghamBlock In/73.pl
+16A0 16FF Runic # RunicBlock In/74.pl
+1780 17FF Khmer # KhmerBlock In/75.pl
+1800 18AF Mongolian # MongolianBlock In/76.pl
+1E00 1EFF Latin Extended Additional # LatinExtendedAdditional In/77.pl
+1F00 1FFF Greek Extended # GreekExtended In/78.pl
+2000 206F General Punctuation # GeneralPunctuation In/79.pl
+2070 209F Superscripts and Subscripts # SuperscriptsandSubscripts In/80.pl
+20A0 20CF Currency Symbols # CurrencySymbols In/81.pl
+20D0 20FF Combining Marks for Symbols # CombiningMarksforSymbols In/82.pl
+2100 214F Letterlike Symbols # LetterlikeSymbols In/83.pl
+2150 218F Number Forms # NumberForms In/84.pl
+2190 21FF Arrows # Arrows In/85.pl
+2200 22FF Mathematical Operators # MathematicalOperators In/86.pl
+2300 23FF Miscellaneous Technical # MiscellaneousTechnical In/87.pl
+2400 243F Control Pictures # ControlPictures In/88.pl
+2440 245F Optical Character Recognition # OpticalCharacterRecognition In/89.pl
+2460 24FF Enclosed Alphanumerics # EnclosedAlphanumerics In/90.pl
+2500 257F Box Drawing # BoxDrawing In/91.pl
+2580 259F Block Elements # BlockElements In/92.pl
+25A0 25FF Geometric Shapes # GeometricShapes In/93.pl
+2600 26FF Miscellaneous Symbols # MiscellaneousSymbols In/94.pl
+2700 27BF Dingbats # Dingbats In/95.pl
+2800 28FF Braille Patterns # BraillePatterns In/96.pl
+2E80 2EFF CJK Radicals Supplement # CJKRadicalsSupplement In/97.pl
+2F00 2FDF Kangxi Radicals # KangxiRadicals In/98.pl
+2FF0 2FFF Ideographic Description Characters # IdeographicDescriptionCharacters In/99.pl
+3000 303F CJK Symbols and Punctuation # CJKSymbolsandPunctuation In/100.pl
+3040 309F Hiragana # HiraganaBlock In/101.pl
+30A0 30FF Katakana # KatakanaBlock In/102.pl
+3100 312F Bopomofo # BopomofoBlock In/103.pl
+3130 318F Hangul Compatibility Jamo # HangulCompatibilityJamo In/104.pl
+3190 319F Kanbun # Kanbun In/105.pl
+31A0 31BF Bopomofo Extended # BopomofoExtended In/106.pl
+3200 32FF Enclosed CJK Letters and Months # EnclosedCJKLettersandMonths In/107.pl
+3300 33FF CJK Compatibility # CJKCompatibility In/108.pl
+3400 4DB5 CJK Unified Ideographs Extension A # CJKUnifiedIdeographsExtensionA In/109.pl
+4E00 9FFF CJK Unified Ideographs # CJKUnifiedIdeographs In/110.pl
+A000 A48F Yi Syllables # YiSyllables In/111.pl
+A490 A4CF Yi Radicals # YiRadicals In/112.pl
+AC00 D7A3 Hangul Syllables # HangulSyllables In/113.pl
+D800 DB7F High Surrogates # HighSurrogates In/114.pl
+DB80 DBFF High Private Use Surrogates # HighPrivateUseSurrogates In/115.pl
+DC00 DFFF Low Surrogates # LowSurrogates In/116.pl
+E000 F8FF Private Use # PrivateUse In/117.pl
+F900 FAFF CJK Compatibility Ideographs # CJKCompatibilityIdeographs In/118.pl
+FB00 FB4F Alphabetic Presentation Forms # AlphabeticPresentationForms In/119.pl
+FB50 FDFF Arabic Presentation Forms-A # ArabicPresentationFormsA In/120.pl
+FE20 FE2F Combining Half Marks # CombiningHalfMarks In/121.pl
+FE30 FE4F CJK Compatibility Forms # CJKCompatibilityForms In/122.pl
+FE50 FE6F Small Form Variants # SmallFormVariants In/123.pl
+FE70 FEFE Arabic Presentation Forms-B # ArabicPresentationFormsB In/124.pl
+FEFF FEFF Specials # Specials In/125.pl
+FF00 FFEF Halfwidth and Fullwidth Forms # HalfwidthandFullwidthForms In/126.pl
+FFF0 FFFD Specials # Specials In/125.pl
+10300 1032F Old Italic # OldItalicBlock In/127.pl
+10330 1034F Gothic # GothicBlock In/128.pl
+10400 1044F Deseret # DeseretBlock In/129.pl
+1D000 1D0FF Byzantine Musical Symbols # ByzantineMusicalSymbols In/130.pl
+1D100 1D1FF Musical Symbols # MusicalSymbols In/131.pl
+1D400 1D7FF Mathematical Alphanumeric Symbols # MathematicalAlphanumericSymbols In/132.pl
+20000 2A6D6 CJK Unified Ideographs Extension B # CJKUnifiedIdeographsExtensionB In/133.pl
+2F800 2FA1F CJK Compatibility Ideographs Supplement # CJKCompatibilityIdeographsSupplement In/134.pl
+E0000 E007F Tags # Tags In/135.pl
+F0000 FFFFD Private Use # PrivateUse In/117.pl
+100000 10FFFD Private Use # PrivateUse In/117.pl
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
%utf8::In = (
-'BasicLatin' => 0,
-'Latin1Supplement' => 1,
-'LatinExtendedA' => 2,
-'LatinExtendedB' => 3,
-'IPAExtensions' => 4,
-'SpacingModifierLetters' => 5,
-'CombiningDiacriticalMarks' => 6,
-'Greek' => 7,
-'Cyrillic' => 8,
-'Armenian' => 9,
-'Hebrew' => 10,
-'Arabic' => 11,
-'Syriac' => 12,
-'Thaana' => 13,
-'Devanagari' => 14,
-'Bengali' => 15,
-'Gurmukhi' => 16,
-'Gujarati' => 17,
-'Oriya' => 18,
-'Tamil' => 19,
-'Telugu' => 20,
-'Kannada' => 21,
-'Malayalam' => 22,
-'Sinhala' => 23,
-'Thai' => 24,
-'Lao' => 25,
-'Tibetan' => 26,
-'Myanmar' => 27,
-'Georgian' => 28,
-'HangulJamo' => 29,
-'Ethiopic' => 30,
-'Cherokee' => 31,
-'UnifiedCanadianAboriginalSyllabics' => 32,
-'Ogham' => 33,
-'Runic' => 34,
-'Khmer' => 35,
-'Mongolian' => 36,
-'LatinExtendedAdditional' => 37,
-'GreekExtended' => 38,
-'GeneralPunctuation' => 39,
-'SuperscriptsandSubscripts' => 40,
-'CurrencySymbols' => 41,
-'CombiningMarksforSymbols' => 42,
-'LetterlikeSymbols' => 43,
-'NumberForms' => 44,
-'Arrows' => 45,
-'MathematicalOperators' => 46,
-'MiscellaneousTechnical' => 47,
-'ControlPictures' => 48,
-'OpticalCharacterRecognition' => 49,
-'EnclosedAlphanumerics' => 50,
-'BoxDrawing' => 51,
-'BlockElements' => 52,
-'GeometricShapes' => 53,
-'MiscellaneousSymbols' => 54,
-'Dingbats' => 55,
-'BraillePatterns' => 56,
-'CJKRadicalsSupplement' => 57,
-'KangxiRadicals' => 58,
-'IdeographicDescriptionCharacters' => 59,
-'CJKSymbolsandPunctuation' => 60,
-'Hiragana' => 61,
-'Katakana' => 62,
-'Bopomofo' => 63,
-'HangulCompatibilityJamo' => 64,
-'Kanbun' => 65,
-'BopomofoExtended' => 66,
-'EnclosedCJKLettersandMonths' => 67,
-'CJKCompatibility' => 68,
-'CJKUnifiedIdeographsExtensionA' => 69,
-'CJKUnifiedIdeographs' => 70,
-'YiSyllables' => 71,
-'YiRadicals' => 72,
-'HangulSyllables' => 73,
-'HighSurrogates' => 74,
-'HighPrivateUseSurrogates' => 75,
-'LowSurrogates' => 76,
-'PrivateUse' => 77,
-'CJKCompatibilityIdeographs' => 78,
-'AlphabeticPresentationForms' => 79,
-'ArabicPresentationFormsA' => 80,
-'CombiningHalfMarks' => 81,
-'CJKCompatibilityForms' => 82,
-'SmallFormVariants' => 83,
-'ArabicPresentationFormsB' => 84,
-'Specials' => 85,
-'HalfwidthandFullwidthForms' => 86,
-'OldItalic' => 87,
-'Gothic' => 88,
-'Deseret' => 89,
-'ByzantineMusicalSymbols' => 90,
-'MusicalSymbols' => 91,
-'MathematicalAlphanumericSymbols' => 92,
-'CJKUnifiedIdeographsExtensionB' => 93,
-'CJKCompatibilityIdeographsSupplement' => 94,
-'Tags' => 95,
+'Latin' => 0,
+'Greek' => 1,
+'Cyrillic' => 2,
+'Armenian' => 3,
+'Hebrew' => 4,
+'Arabic' => 5,
+'Syriac' => 6,
+'Thaana' => 7,
+'Devanagari' => 8,
+'Bengali' => 9,
+'Gurmukhi' => 10,
+'Gujarati' => 11,
+'Oriya' => 12,
+'Tamil' => 13,
+'Telugu' => 14,
+'Kannada' => 15,
+'Malayalam' => 16,
+'Sinhala' => 17,
+'Thai' => 18,
+'Lao' => 19,
+'Tibetan' => 20,
+'Myanmar' => 21,
+'Georgian' => 22,
+'Hangul' => 23,
+'Ethiopic' => 24,
+'Cherokee' => 25,
+'CanadianAboriginal' => 26,
+'Ogham' => 27,
+'Runic' => 28,
+'Khmer' => 29,
+'Mongolian' => 30,
+'Hiragana' => 31,
+'Katakana' => 32,
+'Bopomofo' => 33,
+'Han' => 34,
+'Yi' => 35,
+'OldItalic' => 36,
+'Gothic' => 37,
+'Deseret' => 38,
+'Inherited' => 39,
+'BasicLatin' => 40,
+'Latin1Supplement' => 41,
+'LatinExtendedA' => 42,
+'LatinExtendedB' => 43,
+'IPAExtensions' => 44,
+'SpacingModifierLetters' => 45,
+'CombiningDiacriticalMarks' => 46,
+'GreekBlock' => 47,
+'CyrillicBlock' => 48,
+'ArmenianBlock' => 49,
+'HebrewBlock' => 50,
+'ArabicBlock' => 51,
+'SyriacBlock' => 52,
+'ThaanaBlock' => 53,
+'DevanagariBlock' => 54,
+'BengaliBlock' => 55,
+'GurmukhiBlock' => 56,
+'GujaratiBlock' => 57,
+'OriyaBlock' => 58,
+'TamilBlock' => 59,
+'TeluguBlock' => 60,
+'KannadaBlock' => 61,
+'MalayalamBlock' => 62,
+'SinhalaBlock' => 63,
+'ThaiBlock' => 64,
+'LaoBlock' => 65,
+'TibetanBlock' => 66,
+'MyanmarBlock' => 67,
+'GeorgianBlock' => 68,
+'HangulJamo' => 69,
+'EthiopicBlock' => 70,
+'CherokeeBlock' => 71,
+'UnifiedCanadianAboriginalSyllabics' => 72,
+'OghamBlock' => 73,
+'RunicBlock' => 74,
+'KhmerBlock' => 75,
+'MongolianBlock' => 76,
+'LatinExtendedAdditional' => 77,
+'GreekExtended' => 78,
+'GeneralPunctuation' => 79,
+'SuperscriptsandSubscripts' => 80,
+'CurrencySymbols' => 81,
+'CombiningMarksforSymbols' => 82,
+'LetterlikeSymbols' => 83,
+'NumberForms' => 84,
+'Arrows' => 85,
+'MathematicalOperators' => 86,
+'MiscellaneousTechnical' => 87,
+'ControlPictures' => 88,
+'OpticalCharacterRecognition' => 89,
+'EnclosedAlphanumerics' => 90,
+'BoxDrawing' => 91,
+'BlockElements' => 92,
+'GeometricShapes' => 93,
+'MiscellaneousSymbols' => 94,
+'Dingbats' => 95,
+'BraillePatterns' => 96,
+'CJKRadicalsSupplement' => 97,
+'KangxiRadicals' => 98,
+'IdeographicDescriptionCharacters' => 99,
+'CJKSymbolsandPunctuation' => 100,
+'HiraganaBlock' => 101,
+'KatakanaBlock' => 102,
+'BopomofoBlock' => 103,
+'HangulCompatibilityJamo' => 104,
+'Kanbun' => 105,
+'BopomofoExtended' => 106,
+'EnclosedCJKLettersandMonths' => 107,
+'CJKCompatibility' => 108,
+'CJKUnifiedIdeographsExtensionA' => 109,
+'CJKUnifiedIdeographs' => 110,
+'YiSyllables' => 111,
+'YiRadicals' => 112,
+'HangulSyllables' => 113,
+'HighSurrogates' => 114,
+'HighPrivateUseSurrogates' => 115,
+'LowSurrogates' => 116,
+'PrivateUse' => 117,
+'CJKCompatibilityIdeographs' => 118,
+'AlphabeticPresentationForms' => 119,
+'ArabicPresentationFormsA' => 120,
+'CombiningHalfMarks' => 121,
+'CJKCompatibilityForms' => 122,
+'SmallFormVariants' => 123,
+'ArabicPresentationFormsB' => 124,
+'Specials' => 125,
+'HalfwidthandFullwidthForms' => 126,
+'OldItalicBlock' => 127,
+'GothicBlock' => 128,
+'DeseretBlock' => 129,
+'ByzantineMusicalSymbols' => 130,
+'MusicalSymbols' => 131,
+'MathematicalAlphanumericSymbols' => 132,
+'CJKUnifiedIdeographsExtensionB' => 133,
+'CJKCompatibilityIdeographsSupplement' => 134,
+'Tags' => 135,
);
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-0000 007F
+0041 005A
+0061 007A
+00C0 00D6
+00D8 00F6
+00F8 01BA
+01BC 01BF
+01C0 01C3
+01C4 021F
+0222 0233
+0250 02AD
+02B0 02B8
+02E0 02E4
+1E00 1E9B
+1EA0 1EF9
+212A 212B
+FB00 FB06
+FF21 FF3A
+FF41 FF5A
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-0080 00FF
+0388 038A
+038E 03A1
+03A3 03CE
+03D0 03D7
+03DA 03F5
+1F00 1F15
+1F18 1F1D
+1F20 1F45
+1F48 1F4D
+1F50 1F57
+1F5F 1F7D
+1F80 1FB4
+1FB6 1FBC
+1FC2 1FC4
+1FC6 1FCC
+1FD0 1FD3
+1FD6 1FDB
+1FE0 1FEC
+1FF2 1FF4
+1FF6 1FFC
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-0590 05FF
+0A05 0A0A
+0A0F 0A10
+0A13 0A28
+0A2A 0A30
+0A32 0A33
+0A35 0A36
+0A38 0A39
+0A3E 0A40
+0A41 0A42
+0A47 0A48
+0A4B 0A4D
+0A59 0A5C
+0A66 0A6F
+0A70 0A71
+0A72 0A74
END
--- /dev/null
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+3000 303F
+END
--- /dev/null
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+3040 309F
+END
--- /dev/null
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+30A0 30FF
+END
--- /dev/null
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+3100 312F
+END
--- /dev/null
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+3130 318F
+END
--- /dev/null
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+3190 319F
+END
--- /dev/null
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+31A0 31BF
+END
--- /dev/null
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+3200 32FF
+END
--- /dev/null
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+3300 33FF
+END
--- /dev/null
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+3400 4DB5
+END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-0600 06FF
+0A81 0A82
+0A85 0A8B
+0A8F 0A91
+0A93 0AA8
+0AAA 0AB0
+0AB2 0AB3
+0AB5 0AB9
+0ABE 0AC0
+0AC1 0AC5
+0AC7 0AC8
+0ACB 0ACC
+0AE6 0AEF
END
--- /dev/null
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+4E00 9FFF
+END
--- /dev/null
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+A000 A48F
+END
--- /dev/null
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+A490 A4CF
+END
--- /dev/null
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+AC00 D7A3
+END
--- /dev/null
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+D800 DB7F
+END
--- /dev/null
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+DB80 DBFF
+END
--- /dev/null
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+DC00 DFFF
+END
--- /dev/null
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+100000 10FFFD
+END
--- /dev/null
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+F900 FAFF
+END
--- /dev/null
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+FB00 FB4F
+END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-0700 074F
+0B02 0B03
+0B05 0B0C
+0B0F 0B10
+0B13 0B28
+0B2A 0B30
+0B32 0B33
+0B36 0B39
+0B41 0B43
+0B47 0B48
+0B4B 0B4C
+0B5C 0B5D
+0B5F 0B61
+0B66 0B6F
END
--- /dev/null
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+FB50 FDFF
+END
--- /dev/null
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+FE20 FE2F
+END
--- /dev/null
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+FE30 FE4F
+END
--- /dev/null
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+FE50 FE6F
+END
--- /dev/null
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+FE70 FEFE
+END
--- /dev/null
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+FFF0 FFFD
+END
--- /dev/null
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+FF00 FFEF
+END
--- /dev/null
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+10300 1032F
+END
--- /dev/null
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+10330 1034F
+END
--- /dev/null
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+10400 1044F
+END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-0780 07BF
+0B85 0B8A
+0B8E 0B90
+0B92 0B95
+0B99 0B9A
+0B9E 0B9F
+0BA3 0BA4
+0BA8 0BAA
+0BAE 0BB5
+0BB7 0BB9
+0BBE 0BBF
+0BC1 0BC2
+0BC6 0BC8
+0BCA 0BCC
+0BE7 0BEF
+0BF0 0BF2
END
--- /dev/null
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+1D000 1D0FF
+END
--- /dev/null
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+1D100 1D1FF
+END
--- /dev/null
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+1D400 1D7FF
+END
--- /dev/null
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+20000 2A6D6
+END
--- /dev/null
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+2F800 2FA1F
+END
--- /dev/null
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+E0000 E007F
+END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-0900 097F
+0C01 0C03
+0C05 0C0C
+0C0E 0C10
+0C12 0C28
+0C2A 0C33
+0C35 0C39
+0C3E 0C40
+0C41 0C44
+0C46 0C48
+0C4A 0C4D
+0C55 0C56
+0C60 0C61
+0C66 0C6F
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-0980 09FF
+0C82 0C83
+0C85 0C8C
+0C8E 0C90
+0C92 0CA8
+0CAA 0CB3
+0CB5 0CB9
+0CC0 0CC4
+0CC7 0CC8
+0CCA 0CCB
+0CCC 0CCD
+0CD5 0CD6
+0CE0 0CE1
+0CE6 0CEF
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-0A00 0A7F
+0D02 0D03
+0D05 0D0C
+0D0E 0D10
+0D12 0D28
+0D2A 0D39
+0D3E 0D40
+0D41 0D43
+0D46 0D48
+0D4A 0D4C
+0D60 0D61
+0D66 0D6F
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-0A80 0AFF
+0D82 0D83
+0D85 0D96
+0D9A 0DB1
+0DB3 0DBB
+0DC0 0DC6
+0DCF 0DD1
+0DD2 0DD4
+0DD8 0DDF
+0DF2 0DF3
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-0B00 0B7F
+0E01 0E30
+0E32 0E33
+0E34 0E3A
+0E40 0E45
+0E47 0E4E
+0E50 0E59
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-0B80 0BFF
+0E81 0E82
+0E87 0E88
+0E94 0E97
+0E99 0E9F
+0EA1 0EA3
+0EAA 0EAB
+0EAD 0EB0
+0EB2 0EB3
+0EB4 0EB9
+0EBB 0EBC
+0EC0 0EC4
+0EC8 0ECD
+0ED0 0ED9
+0EDC 0EDD
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-0100 017F
+0400 0481
+0483 0486
+048C 04C4
+04C7 04C8
+04CB 04CC
+04D0 04F5
+04F8 04F9
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-0C00 0C7F
+0F18 0F19
+0F20 0F29
+0F2A 0F33
+0F40 0F47
+0F49 0F6A
+0F71 0F7E
+0F80 0F84
+0F86 0F87
+0F88 0F8B
+0F90 0F97
+0F99 0FBC
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-0C80 0CFF
+1000 1021
+1023 1027
+1029 102A
+102D 1030
+1036 1037
+1040 1049
+1050 1055
+1056 1057
+1058 1059
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-0D00 0D7F
+10A0 10C5
+10D0 10F6
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-0D80 0DFF
+1100 1159
+115F 11A2
+11A8 11F9
+3131 318E
+AC00 D7A3
+FFA0 FFBE
+FFC2 FFC7
+FFCA FFCF
+FFD2 FFD7
+FFDA FFDC
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-0E00 0E7F
+1200 1206
+1208 1246
+124A 124D
+1250 1256
+125A 125D
+1260 1286
+128A 128D
+1290 12AE
+12B2 12B5
+12B8 12BE
+12C2 12C5
+12C8 12CE
+12D0 12D6
+12D8 12EE
+12F0 130E
+1312 1315
+1318 131E
+1320 1346
+1348 135A
+1369 1371
+1372 137C
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-0E80 0EFF
+13A0 13F4
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-0F00 0FFF
+1401 166C
+166F 1676
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-1000 109F
+1681 169A
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-10A0 10FF
+16A0 16EA
+16EE 16F0
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-1100 11FF
+1780 17B3
+17B4 17B6
+17B7 17BD
+17BE 17C5
+17C7 17C8
+17C9 17D3
+17E0 17E9
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-0180 024F
+0531 0556
+0561 0587
+FB13 FB17
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-1200 137F
+1810 1819
+1820 1842
+1844 1877
+1880 18A8
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-13A0 13FF
+3041 3094
+309D 309E
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-1400 167F
+30A1 30FA
+30FD 30FE
+FF66 FF6F
+FF71 FF9D
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-1680 169F
+3105 312C
+31A0 31B7
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-16A0 16FF
+2E80 2E99
+2E9B 2EF3
+2F00 2FD5
+3021 3029
+3038 303A
+3400 4DB5
+4E00 9FA5
+F900 FA2D
+20000 2A6D6
+2F800 2FA1D
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-1780 17FF
+A000 A48C
+A490 A4A1
+A4A4 A4B3
+A4B5 A4C0
+A4C2 A4C4
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-1800 18AF
+10300 1031E
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-1E00 1EFF
+10330 10349
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-1F00 1FFF
+10400 10425
+10428 1044D
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-2000 206F
+0300 034E
+0360 0362
+0488 0489
+0591 05A1
+05A3 05B9
+05BB 05BD
+05C1 05C2
+064B 0655
+06D6 06DC
+06DD 06DE
+06DF 06E4
+06E7 06E8
+06EA 06ED
+20D0 20DC
+20DD 20E0
+20E2 20E3
+302A 302F
+3099 309A
+FE20 FE23
+1D167 1D169
+1D17B 1D182
+1D185 1D18B
+1D1AA 1D1AD
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-0250 02AF
+05D0 05EA
+05F0 05F2
+FB1F FB28
+FB2A FB36
+FB38 FB3C
+FB40 FB41
+FB43 FB44
+FB46 FB4F
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-2070 209F
+0000 007F
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-20A0 20CF
+0080 00FF
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-20D0 20FF
+0100 017F
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-2100 214F
+0180 024F
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-2150 218F
+0250 02AF
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-2190 21FF
+02B0 02FF
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-2200 22FF
+0300 036F
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-2300 23FF
+0370 03FF
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-2400 243F
+0400 04FF
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-2440 245F
+0530 058F
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-02B0 02FF
+0621 063A
+0641 064A
+0671 06D3
+06E5 06E6
+06FA 06FC
+FB50 FBB1
+FBD3 FD3D
+FD50 FD8F
+FD92 FDC7
+FDF0 FDFB
+FE70 FE72
+FE76 FEFC
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-2460 24FF
+0590 05FF
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-2500 257F
+0600 06FF
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-2580 259F
+0700 074F
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-25A0 25FF
+0780 07BF
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-2600 26FF
+0900 097F
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-2700 27BF
+0980 09FF
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-2800 28FF
+0A00 0A7F
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-2E80 2EFF
+0A80 0AFF
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-2F00 2FDF
+0B00 0B7F
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-2FF0 2FFF
+0B80 0BFF
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-0300 036F
+0712 072C
+0730 074A
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-3000 303F
+0C00 0C7F
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-3040 309F
+0C80 0CFF
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-30A0 30FF
+0D00 0D7F
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-3100 312F
+0D80 0DFF
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-3130 318F
+0E00 0E7F
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-3190 319F
+0E80 0EFF
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-31A0 31BF
+0F00 0FFF
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-3200 32FF
+1000 109F
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-3300 33FF
+10A0 10FF
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-3400 4DB5
+1100 11FF
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-0370 03FF
+0780 07A5
+07A6 07B0
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-4E00 9FFF
+1200 137F
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-A000 A48F
+13A0 13FF
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-A490 A4CF
+1400 167F
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-AC00 D7A3
+1680 169F
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-D800 DB7F
+16A0 16FF
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-DB80 DBFF
+1780 17FF
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-DC00 DFFF
+1800 18AF
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-100000 10FFFD
+1E00 1EFF
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-F900 FAFF
+1F00 1FFF
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-FB00 FB4F
+2000 206F
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-0400 04FF
+0901 0902
+0905 0939
+093E 0940
+0941 0948
+0949 094C
+0951 0954
+0958 0961
+0962 0963
+0966 096F
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-FB50 FDFF
+2070 209F
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-FE20 FE2F
+20A0 20CF
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-FE30 FE4F
+20D0 20FF
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-FE50 FE6F
+2100 214F
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-FE70 FEFE
+2150 218F
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-FFF0 FFFD
+2190 21FF
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-FF00 FFEF
+2200 22FF
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-10300 1032F
+2300 23FF
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-10330 1034F
+2400 243F
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-10400 1044F
+2440 245F
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-0530 058F
+0985 098C
+098F 0990
+0993 09A8
+09AA 09B0
+09B6 09B9
+09BE 09C0
+09C1 09C4
+09C7 09C8
+09CB 09CC
+09DC 09DD
+09DF 09E1
+09E2 09E3
+09E6 09EF
+09F0 09F1
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-1D000 1D0FF
+2460 24FF
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-1D100 1D1FF
+2500 257F
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-1D400 1D7FF
+2580 259F
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-20000 2A6D6
+25A0 25FF
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-2F800 2FA1F
+2600 26FF
END
# This file is built by mktables.PL from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
-E0000 E007F
+2700 27BF
END
--- /dev/null
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+2800 28FF
+END
--- /dev/null
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+2E80 2EFF
+END
--- /dev/null
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+2F00 2FDF
+END
--- /dev/null
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+2FF0 2FFF
+END
--- /dev/null
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+0041 005A LATIN # Latin In/0.pl
+0061 007A LATIN # Latin In/0.pl
+00C0 00D6 LATIN # Latin In/0.pl
+00D8 00F6 LATIN # Latin In/0.pl
+00F8 01BA LATIN # Latin In/0.pl
+01BC 01BF LATIN # Latin In/0.pl
+01C0 01C3 LATIN # Latin In/0.pl
+01C4 021F LATIN # Latin In/0.pl
+0222 0233 LATIN # Latin In/0.pl
+0250 02AD LATIN # Latin In/0.pl
+02B0 02B8 LATIN # Latin In/0.pl
+02E0 02E4 LATIN # Latin In/0.pl
+1E00 1E9B LATIN # Latin In/0.pl
+1EA0 1EF9 LATIN # Latin In/0.pl
+212A 212B LATIN # Latin In/0.pl
+FB00 FB06 LATIN # Latin In/0.pl
+FF21 FF3A LATIN # Latin In/0.pl
+FF41 FF5A LATIN # Latin In/0.pl
+0388 038A GREEK # Greek In/1.pl
+038E 03A1 GREEK # Greek In/1.pl
+03A3 03CE GREEK # Greek In/1.pl
+03D0 03D7 GREEK # Greek In/1.pl
+03DA 03F5 GREEK # Greek In/1.pl
+1F00 1F15 GREEK # Greek In/1.pl
+1F18 1F1D GREEK # Greek In/1.pl
+1F20 1F45 GREEK # Greek In/1.pl
+1F48 1F4D GREEK # Greek In/1.pl
+1F50 1F57 GREEK # Greek In/1.pl
+1F5F 1F7D GREEK # Greek In/1.pl
+1F80 1FB4 GREEK # Greek In/1.pl
+1FB6 1FBC GREEK # Greek In/1.pl
+1FC2 1FC4 GREEK # Greek In/1.pl
+1FC6 1FCC GREEK # Greek In/1.pl
+1FD0 1FD3 GREEK # Greek In/1.pl
+1FD6 1FDB GREEK # Greek In/1.pl
+1FE0 1FEC GREEK # Greek In/1.pl
+1FF2 1FF4 GREEK # Greek In/1.pl
+1FF6 1FFC GREEK # Greek In/1.pl
+0400 0481 CYRILLIC # Cyrillic In/2.pl
+0483 0486 CYRILLIC # Cyrillic In/2.pl
+048C 04C4 CYRILLIC # Cyrillic In/2.pl
+04C7 04C8 CYRILLIC # Cyrillic In/2.pl
+04CB 04CC CYRILLIC # Cyrillic In/2.pl
+04D0 04F5 CYRILLIC # Cyrillic In/2.pl
+04F8 04F9 CYRILLIC # Cyrillic In/2.pl
+0531 0556 ARMENIAN # Armenian In/3.pl
+0561 0587 ARMENIAN # Armenian In/3.pl
+FB13 FB17 ARMENIAN # Armenian In/3.pl
+05D0 05EA HEBREW # Hebrew In/4.pl
+05F0 05F2 HEBREW # Hebrew In/4.pl
+FB1F FB28 HEBREW # Hebrew In/4.pl
+FB2A FB36 HEBREW # Hebrew In/4.pl
+FB38 FB3C HEBREW # Hebrew In/4.pl
+FB40 FB41 HEBREW # Hebrew In/4.pl
+FB43 FB44 HEBREW # Hebrew In/4.pl
+FB46 FB4F HEBREW # Hebrew In/4.pl
+0621 063A ARABIC # Arabic In/5.pl
+0641 064A ARABIC # Arabic In/5.pl
+0671 06D3 ARABIC # Arabic In/5.pl
+06E5 06E6 ARABIC # Arabic In/5.pl
+06FA 06FC ARABIC # Arabic In/5.pl
+FB50 FBB1 ARABIC # Arabic In/5.pl
+FBD3 FD3D ARABIC # Arabic In/5.pl
+FD50 FD8F ARABIC # Arabic In/5.pl
+FD92 FDC7 ARABIC # Arabic In/5.pl
+FDF0 FDFB ARABIC # Arabic In/5.pl
+FE70 FE72 ARABIC # Arabic In/5.pl
+FE76 FEFC ARABIC # Arabic In/5.pl
+0712 072C SYRIAC # Syriac In/6.pl
+0730 074A SYRIAC # Syriac In/6.pl
+0780 07A5 THAANA # Thaana In/7.pl
+07A6 07B0 THAANA # Thaana In/7.pl
+0901 0902 DEVANAGARI # Devanagari In/8.pl
+0905 0939 DEVANAGARI # Devanagari In/8.pl
+093E 0940 DEVANAGARI # Devanagari In/8.pl
+0941 0948 DEVANAGARI # Devanagari In/8.pl
+0949 094C DEVANAGARI # Devanagari In/8.pl
+0951 0954 DEVANAGARI # Devanagari In/8.pl
+0958 0961 DEVANAGARI # Devanagari In/8.pl
+0962 0963 DEVANAGARI # Devanagari In/8.pl
+0966 096F DEVANAGARI # Devanagari In/8.pl
+0985 098C BENGALI # Bengali In/9.pl
+098F 0990 BENGALI # Bengali In/9.pl
+0993 09A8 BENGALI # Bengali In/9.pl
+09AA 09B0 BENGALI # Bengali In/9.pl
+09B6 09B9 BENGALI # Bengali In/9.pl
+09BE 09C0 BENGALI # Bengali In/9.pl
+09C1 09C4 BENGALI # Bengali In/9.pl
+09C7 09C8 BENGALI # Bengali In/9.pl
+09CB 09CC BENGALI # Bengali In/9.pl
+09DC 09DD BENGALI # Bengali In/9.pl
+09DF 09E1 BENGALI # Bengali In/9.pl
+09E2 09E3 BENGALI # Bengali In/9.pl
+09E6 09EF BENGALI # Bengali In/9.pl
+09F0 09F1 BENGALI # Bengali In/9.pl
+0A05 0A0A GURMUKHI # Gurmukhi In/10.pl
+0A0F 0A10 GURMUKHI # Gurmukhi In/10.pl
+0A13 0A28 GURMUKHI # Gurmukhi In/10.pl
+0A2A 0A30 GURMUKHI # Gurmukhi In/10.pl
+0A32 0A33 GURMUKHI # Gurmukhi In/10.pl
+0A35 0A36 GURMUKHI # Gurmukhi In/10.pl
+0A38 0A39 GURMUKHI # Gurmukhi In/10.pl
+0A3E 0A40 GURMUKHI # Gurmukhi In/10.pl
+0A41 0A42 GURMUKHI # Gurmukhi In/10.pl
+0A47 0A48 GURMUKHI # Gurmukhi In/10.pl
+0A4B 0A4D GURMUKHI # Gurmukhi In/10.pl
+0A59 0A5C GURMUKHI # Gurmukhi In/10.pl
+0A66 0A6F GURMUKHI # Gurmukhi In/10.pl
+0A70 0A71 GURMUKHI # Gurmukhi In/10.pl
+0A72 0A74 GURMUKHI # Gurmukhi In/10.pl
+0A81 0A82 GUJARATI # Gujarati In/11.pl
+0A85 0A8B GUJARATI # Gujarati In/11.pl
+0A8F 0A91 GUJARATI # Gujarati In/11.pl
+0A93 0AA8 GUJARATI # Gujarati In/11.pl
+0AAA 0AB0 GUJARATI # Gujarati In/11.pl
+0AB2 0AB3 GUJARATI # Gujarati In/11.pl
+0AB5 0AB9 GUJARATI # Gujarati In/11.pl
+0ABE 0AC0 GUJARATI # Gujarati In/11.pl
+0AC1 0AC5 GUJARATI # Gujarati In/11.pl
+0AC7 0AC8 GUJARATI # Gujarati In/11.pl
+0ACB 0ACC GUJARATI # Gujarati In/11.pl
+0AE6 0AEF GUJARATI # Gujarati In/11.pl
+0B02 0B03 ORIYA # Oriya In/12.pl
+0B05 0B0C ORIYA # Oriya In/12.pl
+0B0F 0B10 ORIYA # Oriya In/12.pl
+0B13 0B28 ORIYA # Oriya In/12.pl
+0B2A 0B30 ORIYA # Oriya In/12.pl
+0B32 0B33 ORIYA # Oriya In/12.pl
+0B36 0B39 ORIYA # Oriya In/12.pl
+0B41 0B43 ORIYA # Oriya In/12.pl
+0B47 0B48 ORIYA # Oriya In/12.pl
+0B4B 0B4C ORIYA # Oriya In/12.pl
+0B5C 0B5D ORIYA # Oriya In/12.pl
+0B5F 0B61 ORIYA # Oriya In/12.pl
+0B66 0B6F ORIYA # Oriya In/12.pl
+0B85 0B8A TAMIL # Tamil In/13.pl
+0B8E 0B90 TAMIL # Tamil In/13.pl
+0B92 0B95 TAMIL # Tamil In/13.pl
+0B99 0B9A TAMIL # Tamil In/13.pl
+0B9E 0B9F TAMIL # Tamil In/13.pl
+0BA3 0BA4 TAMIL # Tamil In/13.pl
+0BA8 0BAA TAMIL # Tamil In/13.pl
+0BAE 0BB5 TAMIL # Tamil In/13.pl
+0BB7 0BB9 TAMIL # Tamil In/13.pl
+0BBE 0BBF TAMIL # Tamil In/13.pl
+0BC1 0BC2 TAMIL # Tamil In/13.pl
+0BC6 0BC8 TAMIL # Tamil In/13.pl
+0BCA 0BCC TAMIL # Tamil In/13.pl
+0BE7 0BEF TAMIL # Tamil In/13.pl
+0BF0 0BF2 TAMIL # Tamil In/13.pl
+0C01 0C03 TELUGU # Telugu In/14.pl
+0C05 0C0C TELUGU # Telugu In/14.pl
+0C0E 0C10 TELUGU # Telugu In/14.pl
+0C12 0C28 TELUGU # Telugu In/14.pl
+0C2A 0C33 TELUGU # Telugu In/14.pl
+0C35 0C39 TELUGU # Telugu In/14.pl
+0C3E 0C40 TELUGU # Telugu In/14.pl
+0C41 0C44 TELUGU # Telugu In/14.pl
+0C46 0C48 TELUGU # Telugu In/14.pl
+0C4A 0C4D TELUGU # Telugu In/14.pl
+0C55 0C56 TELUGU # Telugu In/14.pl
+0C60 0C61 TELUGU # Telugu In/14.pl
+0C66 0C6F TELUGU # Telugu In/14.pl
+0C82 0C83 KANNADA # Kannada In/15.pl
+0C85 0C8C KANNADA # Kannada In/15.pl
+0C8E 0C90 KANNADA # Kannada In/15.pl
+0C92 0CA8 KANNADA # Kannada In/15.pl
+0CAA 0CB3 KANNADA # Kannada In/15.pl
+0CB5 0CB9 KANNADA # Kannada In/15.pl
+0CC0 0CC4 KANNADA # Kannada In/15.pl
+0CC7 0CC8 KANNADA # Kannada In/15.pl
+0CCA 0CCB KANNADA # Kannada In/15.pl
+0CCC 0CCD KANNADA # Kannada In/15.pl
+0CD5 0CD6 KANNADA # Kannada In/15.pl
+0CE0 0CE1 KANNADA # Kannada In/15.pl
+0CE6 0CEF KANNADA # Kannada In/15.pl
+0D02 0D03 MALAYALAM # Malayalam In/16.pl
+0D05 0D0C MALAYALAM # Malayalam In/16.pl
+0D0E 0D10 MALAYALAM # Malayalam In/16.pl
+0D12 0D28 MALAYALAM # Malayalam In/16.pl
+0D2A 0D39 MALAYALAM # Malayalam In/16.pl
+0D3E 0D40 MALAYALAM # Malayalam In/16.pl
+0D41 0D43 MALAYALAM # Malayalam In/16.pl
+0D46 0D48 MALAYALAM # Malayalam In/16.pl
+0D4A 0D4C MALAYALAM # Malayalam In/16.pl
+0D60 0D61 MALAYALAM # Malayalam In/16.pl
+0D66 0D6F MALAYALAM # Malayalam In/16.pl
+0D82 0D83 SINHALA # Sinhala In/17.pl
+0D85 0D96 SINHALA # Sinhala In/17.pl
+0D9A 0DB1 SINHALA # Sinhala In/17.pl
+0DB3 0DBB SINHALA # Sinhala In/17.pl
+0DC0 0DC6 SINHALA # Sinhala In/17.pl
+0DCF 0DD1 SINHALA # Sinhala In/17.pl
+0DD2 0DD4 SINHALA # Sinhala In/17.pl
+0DD8 0DDF SINHALA # Sinhala In/17.pl
+0DF2 0DF3 SINHALA # Sinhala In/17.pl
+0E01 0E30 THAI # Thai In/18.pl
+0E32 0E33 THAI # Thai In/18.pl
+0E34 0E3A THAI # Thai In/18.pl
+0E40 0E45 THAI # Thai In/18.pl
+0E47 0E4E THAI # Thai In/18.pl
+0E50 0E59 THAI # Thai In/18.pl
+0E81 0E82 LAO # Lao In/19.pl
+0E87 0E88 LAO # Lao In/19.pl
+0E94 0E97 LAO # Lao In/19.pl
+0E99 0E9F LAO # Lao In/19.pl
+0EA1 0EA3 LAO # Lao In/19.pl
+0EAA 0EAB LAO # Lao In/19.pl
+0EAD 0EB0 LAO # Lao In/19.pl
+0EB2 0EB3 LAO # Lao In/19.pl
+0EB4 0EB9 LAO # Lao In/19.pl
+0EBB 0EBC LAO # Lao In/19.pl
+0EC0 0EC4 LAO # Lao In/19.pl
+0EC8 0ECD LAO # Lao In/19.pl
+0ED0 0ED9 LAO # Lao In/19.pl
+0EDC 0EDD LAO # Lao In/19.pl
+0F18 0F19 TIBETAN # Tibetan In/20.pl
+0F20 0F29 TIBETAN # Tibetan In/20.pl
+0F2A 0F33 TIBETAN # Tibetan In/20.pl
+0F40 0F47 TIBETAN # Tibetan In/20.pl
+0F49 0F6A TIBETAN # Tibetan In/20.pl
+0F71 0F7E TIBETAN # Tibetan In/20.pl
+0F80 0F84 TIBETAN # Tibetan In/20.pl
+0F86 0F87 TIBETAN # Tibetan In/20.pl
+0F88 0F8B TIBETAN # Tibetan In/20.pl
+0F90 0F97 TIBETAN # Tibetan In/20.pl
+0F99 0FBC TIBETAN # Tibetan In/20.pl
+1000 1021 MYANMAR # Myanmar In/21.pl
+1023 1027 MYANMAR # Myanmar In/21.pl
+1029 102A MYANMAR # Myanmar In/21.pl
+102D 1030 MYANMAR # Myanmar In/21.pl
+1036 1037 MYANMAR # Myanmar In/21.pl
+1040 1049 MYANMAR # Myanmar In/21.pl
+1050 1055 MYANMAR # Myanmar In/21.pl
+1056 1057 MYANMAR # Myanmar In/21.pl
+1058 1059 MYANMAR # Myanmar In/21.pl
+10A0 10C5 GEORGIAN # Georgian In/22.pl
+10D0 10F6 GEORGIAN # Georgian In/22.pl
+1100 1159 HANGUL # Hangul In/23.pl
+115F 11A2 HANGUL # Hangul In/23.pl
+11A8 11F9 HANGUL # Hangul In/23.pl
+3131 318E HANGUL # Hangul In/23.pl
+AC00 D7A3 HANGUL # Hangul In/23.pl
+FFA0 FFBE HANGUL # Hangul In/23.pl
+FFC2 FFC7 HANGUL # Hangul In/23.pl
+FFCA FFCF HANGUL # Hangul In/23.pl
+FFD2 FFD7 HANGUL # Hangul In/23.pl
+FFDA FFDC HANGUL # Hangul In/23.pl
+1200 1206 ETHIOPIC # Ethiopic In/24.pl
+1208 1246 ETHIOPIC # Ethiopic In/24.pl
+124A 124D ETHIOPIC # Ethiopic In/24.pl
+1250 1256 ETHIOPIC # Ethiopic In/24.pl
+125A 125D ETHIOPIC # Ethiopic In/24.pl
+1260 1286 ETHIOPIC # Ethiopic In/24.pl
+128A 128D ETHIOPIC # Ethiopic In/24.pl
+1290 12AE ETHIOPIC # Ethiopic In/24.pl
+12B2 12B5 ETHIOPIC # Ethiopic In/24.pl
+12B8 12BE ETHIOPIC # Ethiopic In/24.pl
+12C2 12C5 ETHIOPIC # Ethiopic In/24.pl
+12C8 12CE ETHIOPIC # Ethiopic In/24.pl
+12D0 12D6 ETHIOPIC # Ethiopic In/24.pl
+12D8 12EE ETHIOPIC # Ethiopic In/24.pl
+12F0 130E ETHIOPIC # Ethiopic In/24.pl
+1312 1315 ETHIOPIC # Ethiopic In/24.pl
+1318 131E ETHIOPIC # Ethiopic In/24.pl
+1320 1346 ETHIOPIC # Ethiopic In/24.pl
+1348 135A ETHIOPIC # Ethiopic In/24.pl
+1369 1371 ETHIOPIC # Ethiopic In/24.pl
+1372 137C ETHIOPIC # Ethiopic In/24.pl
+13A0 13F4 CHEROKEE # Cherokee In/25.pl
+1401 166C CANADIAN-ABORIGINAL # CanadianAboriginal In/26.pl
+166F 1676 CANADIAN-ABORIGINAL # CanadianAboriginal In/26.pl
+1681 169A OGHAM # Ogham In/27.pl
+16A0 16EA RUNIC # Runic In/28.pl
+16EE 16F0 RUNIC # Runic In/28.pl
+1780 17B3 KHMER # Khmer In/29.pl
+17B4 17B6 KHMER # Khmer In/29.pl
+17B7 17BD KHMER # Khmer In/29.pl
+17BE 17C5 KHMER # Khmer In/29.pl
+17C7 17C8 KHMER # Khmer In/29.pl
+17C9 17D3 KHMER # Khmer In/29.pl
+17E0 17E9 KHMER # Khmer In/29.pl
+1810 1819 MONGOLIAN # Mongolian In/30.pl
+1820 1842 MONGOLIAN # Mongolian In/30.pl
+1844 1877 MONGOLIAN # Mongolian In/30.pl
+1880 18A8 MONGOLIAN # Mongolian In/30.pl
+3041 3094 HIRAGANA # Hiragana In/31.pl
+309D 309E HIRAGANA # Hiragana In/31.pl
+30A1 30FA KATAKANA # Katakana In/32.pl
+30FD 30FE KATAKANA # Katakana In/32.pl
+FF66 FF6F KATAKANA # Katakana In/32.pl
+FF71 FF9D KATAKANA # Katakana In/32.pl
+3105 312C BOPOMOFO # Bopomofo In/33.pl
+31A0 31B7 BOPOMOFO # Bopomofo In/33.pl
+2E80 2E99 HAN # Han In/34.pl
+2E9B 2EF3 HAN # Han In/34.pl
+2F00 2FD5 HAN # Han In/34.pl
+3021 3029 HAN # Han In/34.pl
+3038 303A HAN # Han In/34.pl
+3400 4DB5 HAN # Han In/34.pl
+4E00 9FA5 HAN # Han In/34.pl
+F900 FA2D HAN # Han In/34.pl
+20000 2A6D6 HAN # Han In/34.pl
+2F800 2FA1D HAN # Han In/34.pl
+A000 A48C YI # Yi In/35.pl
+A490 A4A1 YI # Yi In/35.pl
+A4A4 A4B3 YI # Yi In/35.pl
+A4B5 A4C0 YI # Yi In/35.pl
+A4C2 A4C4 YI # Yi In/35.pl
+10300 1031E OLD-ITALIC # OldItalic In/36.pl
+10330 10349 GOTHIC # Gothic In/37.pl
+10400 10425 DESERET # Deseret In/38.pl
+10428 1044D DESERET # Deseret In/38.pl
+0300 034E INHERITED # Inherited In/39.pl
+0360 0362 INHERITED # Inherited In/39.pl
+0488 0489 INHERITED # Inherited In/39.pl
+0591 05A1 INHERITED # Inherited In/39.pl
+05A3 05B9 INHERITED # Inherited In/39.pl
+05BB 05BD INHERITED # Inherited In/39.pl
+05C1 05C2 INHERITED # Inherited In/39.pl
+064B 0655 INHERITED # Inherited In/39.pl
+06D6 06DC INHERITED # Inherited In/39.pl
+06DD 06DE INHERITED # Inherited In/39.pl
+06DF 06E4 INHERITED # Inherited In/39.pl
+06E7 06E8 INHERITED # Inherited In/39.pl
+06EA 06ED INHERITED # Inherited In/39.pl
+20D0 20DC INHERITED # Inherited In/39.pl
+20DD 20E0 INHERITED # Inherited In/39.pl
+20E2 20E3 INHERITED # Inherited In/39.pl
+302A 302F INHERITED # Inherited In/39.pl
+3099 309A INHERITED # Inherited In/39.pl
+FE20 FE23 INHERITED # Inherited In/39.pl
+1D167 1D169 INHERITED # Inherited In/39.pl
+1D17B 1D182 INHERITED # Inherited In/39.pl
+1D185 1D18B INHERITED # Inherited In/39.pl
+1D1AA 1D1AD INHERITED # Inherited In/39.pl
+END
next if @ARGV and not grep { $_ eq $table } @ARGV;
print $table, "\n";
$table =~ s/\W+//g;
- if ($table =~ /^In(.+)/) {
- my $id;
- unless (exists $InId{$1}) {
- $InId{$1} = $InId++;
- }
- $id = $InId{$1};
- open(OUT, ">In/$id.pl") or die "Can't create In/$id.pl: $!\n";
- print OUT "# In/$id.pl $1\n";
- }
- elsif ($table =~ /^(Is|To)(.+)/) {
+ if ($table =~ /^(Is|To)(.+)/) {
open(OUT, ">$1/$2.pl") or die "Can't create $1/$2.pl: $!\n";
}
else {
close OUT;
}
+# Do Scripts before Blocks so that in case of naming conflicts
+# the more natural one (Script) wins over the artificial one (Block).
+
+print "Scripts\n";
+open(UD, 'Scripts.txt') or die "Can't open Scripts.txt: $!\n";
+open(OUT, ">Scripts.pl") or die "Can't create Scripts.pl: $!\n";
+print OUT <<EOH;
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by $0 from e.g. $UnicodeData.
+# Any changes made here will be lost!
+EOH
+print OUT <<"END";
+return <<'END';
+END
+
+my %Scripts;
+
+while (<UD>) {
+ next if /^#/;
+ next if /^$/;
+ chomp;
+ ($code, $last, $name) = /^([0-9a-f]+)\.\.([0-9a-f]+)\s+;\s+(.+)\s+\#/i;
+ if ($name) {
+ my $InName = lc($name);
+ $InName =~ s/\b(\w)/uc($1)/ge;
+ $InName =~ s/\W+//g;
+ my $id;
+ unless (exists $InId{$InName}) {
+ print "\t$InName\n";
+ $id = $Scripts{$InName} = $InId{$InName} = $InId++;
+ open(SCRIPT, ">In/$id.pl") or die "create In/$id.pl: $!\n";
+ print SCRIPT <<EOH;
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by $0 from e.g. $UnicodeData.
+# Any changes made here will be lost!
+return <<'END';
+EOH
+ close(SCRIPT);
+ } else {
+ $id = $InId{$InName};
+ }
+ print OUT "$code\t$last\t$name\t# $InName In/$id.pl\n";
+ open(SCRIPT, ">>In/$id.pl");
+ print SCRIPT <<END;
+$code $last
+END
+ close SCRIPT;
+ }
+}
+
+for my $id (values %InId) {
+ open(SCRIPT, ">>In/$id.pl");
+ print SCRIPT <<END2;
+END
+END2
+ close(SCRIPT);
+}
+
+print OUT "END\n";
+close OUT;
+
# Must treat blocks specially.
exit if @ARGV and not grep { $_ eq Block } @ARGV;
chomp;
($code, $last, $name) = /^([0-9a-f]+)\.\.([0-9a-f]+); (.+)/i;
if ($name) {
- print OUT "$code $last $name\n";
- $name =~ s/\W+//g;
+ my $InName = $name;
+ $InName =~ s/\W+//g;
+ print "\t$InName\n";
my $id;
- unless (exists $InId{$name}) {
- $InId{$name} = $InId++;
+ # TODO: only the first one of Private Use blocks qualifies
+ unless (exists $InId{$InName}) {
+ $InId{$InName} = $InId++;
+ } elsif (exists $Scripts{$InName}) {
+ $InName .= 'Block';
+ $InId{$InName} = $InId++;
}
- $id = $InId{$name};
- open(BLOCK, ">In/$id.pl");
- print OUT "# In/$id.pl $name\n";
+ $id = $InId{$InName};
+ open(BLOCK, ">In/$id.pl") or die "create In/$id.pl: $!\n";
+ print OUT "$code\t$last\t$name\t# $InName In/$id.pl\n";
print BLOCK <<EOH;
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by $0 from e.g. $UnicodeData.
TCP/IP stacks of VMS: we do not know since we weren't able to test
Perl in such configurations.
+=head2 Different Definition of the Unicode Character Classes \p{In...}
+
+As suggested by the Unicode consortium, the Unicode character classes
+now prefer I<scripts> as opposed to I<blocks> (as defined by Unicode);
+in Perl, when the C<\p{In....}> and the C<\p{In....}> regular expression
+constructs are used. This has changed the definition of some of those
+character classes.
+
+The difference between scripts and blocks is that scripts are the
+glyphs used by a language or a group of languages, while the blocks
+are more artificial groupings of 256 characters based on the Unicode
+numbering.
+
+In general this change results in more inclusive Unicode character
+classes, but changes to the other direction also do take place:
+for example while the script C<Latin> includes all the Latin
+characters and their various diacritic-adorned versions, it
+does not include the various punctuation or digits (since they
+are not solely C<Latin>).
+
+Changes in the character class semantics may have happened if a script
+and a block happen to have the same name, for example C<Hebrew>.
+In such cases the script wins and C<\p{InHebrew}> now means the script
+definition of Hebrew. The block definition in still available,
+though, by appending C<Block> to the name: C<\p{InHebrewBlock}> means
+what C<\p{InHebrew}> meant in perl 5.6.0. For the full list
+of affected character classes, see L<perlunicode/Blocks>.
+
=head2 Deprecations
The current user-visible implementation of pseudo-hashes (the weird
For the full list see L<perlunicode>.
The Unicode has also been separated into blocks of charaters which you
-can test with C<\p{InBlock}> and C<\P{InBlock}>, for example C<\p{InGreek}>
-and C<\P{InKatakana}>. For the full list see L<perlunicode>.
+can test with C<\p{In...}> (in) and C<\P{In...}> (not in), for example
+C<\p{InLatin}, C<\p{InGreek}>, or C<\P{InKatakana}>. For the full list see
+L<perlunicode>.
For the the full and latest information see the latest Unicode standard.
http://www.unicode.org/unicode/reports/tr18/
-=head2 Unicode Scripts support
-
-Currently the C<\p{In...}> supports only the Blocks database, like
-C<\p{BasicLatin}>, C<\p{InGreek}>, C<\p{InThai}>, but there's also the
-Scripts database, which has members like C<Latin>, C<Greek>,
-C<Armenian>, C<Han>. It is desireable that also the script names
-could be used for the C<\p{In...}> construct. Note: needs to be
-researched whether this is possible, that is, are there conflicts
-between the Blocks and the Scripts, is the Blocks Greek the same as
-the Scripts Greek?
-
=head2 use Thread for iThreads
Artur Bergman's C<iThreads> module is a start on this, but needs to
no difference, because UTF-8 stores ASCII in single bytes, but for
any character greater than C<chr(127)>, the character may be stored in
a sequence of two or more bytes, all of which have the high bit set.
-For C1 controls or Latin 1 characters on an EBCDIC platform the character
-may be stored in a UTF-EBCDIC multi byte sequence.
-But by and large, the user need not worry about this, because Perl
-hides it from the user. A character in Perl is logically just a number
-ranging from 0 to 2**32 or so. Larger characters encode to longer
-sequences of bytes internally, but again, this is just an internal
-detail which is hidden at the Perl level.
+
+For C1 controls or Latin 1 characters on an EBCDIC platform the
+character may be stored in a UTF-EBCDIC multi byte sequence. But by
+and large, the user need not worry about this, because Perl hides it
+from the user. A character in Perl is logically just a number ranging
+from 0 to 2**32 or so. Larger characters encode to longer sequences
+of bytes internally, but again, this is just an internal detail which
+is hidden at the Perl level.
=head2 Effects of character semantics
name C<"Latin-1 Supplement"> becomes C<\p{InLatin1Supplement}>.
Here is the list as of Unicode 3.1.0 (the two-letter classes) and
-Perl 5.8.0 (the one-letter classes):
+as defined by Perl (the one-letter classes) (in Unicode materials
+what Perl calls C<L> is often called C<L&>):
L Letter
Lu Letter, Uppercase
BidiWS Whitespace
BidiON Other Neutrals
-The blocks available for C<\p{InBlock}> and C<\P{InBlock}>, for
-example \p{InCyrillic>, are as follows:
-
- BasicLatin
- Latin1Supplement
- LatinExtendedA
- LatinExtendedB
- IPAExtensions
- SpacingModifierLetters
- CombiningDiacriticalMarks
- Greek
- Cyrillic
- Armenian
- Hebrew
- Arabic
- Syriac
- Thaana
- Devanagari
- Bengali
- Gurmukhi
- Gujarati
- Oriya
- Tamil
- Telugu
- Kannada
- Malayalam
- Sinhala
- Thai
- Lao
- Tibetan
- Myanmar
- Georgian
- HangulJamo
- Ethiopic
- Cherokee
- UnifiedCanadianAboriginalSyllabics
- Ogham
- Runic
- Khmer
- Mongolian
- LatinExtendedAdditional
- GreekExtended
- GeneralPunctuation
- SuperscriptsandSubscripts
- CurrencySymbols
- CombiningMarksforSymbols
- LetterlikeSymbols
- NumberForms
- Arrows
- MathematicalOperators
- MiscellaneousTechnical
- ControlPictures
- OpticalCharacterRecognition
- EnclosedAlphanumerics
- BoxDrawing
- BlockElements
- GeometricShapes
- MiscellaneousSymbols
- Dingbats
- BraillePatterns
- CJKRadicalsSupplement
- KangxiRadicals
- IdeographicDescriptionCharacters
- CJKSymbolsandPunctuation
- Hiragana
- Katakana
- Bopomofo
- HangulCompatibilityJamo
- Kanbun
- BopomofoExtended
- EnclosedCJKLettersandMonths
- CJKCompatibility
- CJKUnifiedIdeographsExtensionA
- CJKUnifiedIdeographs
- YiSyllables
- YiRadicals
- HangulSyllables
- HighSurrogates
- HighPrivateUseSurrogates
- LowSurrogates
- PrivateUse
- CJKCompatibilityIdeographs
- AlphabeticPresentationForms
- ArabicPresentationFormsA
- CombiningHalfMarks
- CJKCompatibilityForms
- SmallFormVariants
- ArabicPresentationFormsB
- Specials
- HalfwidthandFullwidthForms
- OldItalic
- Gothic
- Deseret
- ByzantineMusicalSymbols
- MusicalSymbols
- MathematicalAlphanumericSymbols
- CJKUnifiedIdeographsExtensionB
- CJKCompatibilityIdeographsSupplement
- Tags
+=head2 Scripts
+
+The scripts available for C<\p{In...}> and C<\P{In...}>, for
+example \p{InCyrillic>, are as follows, for example C<\p{InLatin}>
+or C<\P{InHan}>:
+
+ Latin
+ Greek
+ Cyrillic
+ Armenian
+ Hebrew
+ Arabic
+ Syriac
+ Thaana
+ Devanagari
+ Bengali
+ Gurmukhi
+ Gujarati
+ Oriya
+ Tamil
+ Telugu
+ Kannada
+ Malayalam
+ Sinhala
+ Thai
+ Lao
+ Tibetan
+ Myanmar
+ Georgian
+ Hangul
+ Ethiopic
+ Cherokee
+ CanadianAboriginal
+ Ogham
+ Runic
+ Khmer
+ Mongolian
+ Hiragana
+ Katakana
+ Bopomofo
+ Han
+ Yi
+ OldItalic
+ Gothic
+ Deseret
+ Inherited
+
+=head2 Blocks
+
+In addition to B<scripts>, Unicode also defines B<blocks> of
+characters. The difference between scripts and blocks is that the
+former concept is closer to natural languages, while the latter
+concept is more an artificial grouping based on groups of 256 Unicode
+characters. For example, the C<Latin> script contains letters from
+many blocks, but it does not contain all the characters from those
+blocks, it does not for example contain digits.
+
+For more about scripts see the UTR #24:
+http://www.unicode.org/unicode/reports/tr24/
+For more about blocks see
+http://www.unicode.org/Public/UNIDATA/Blocks.txt
+
+Because there are overlaps in naming (there are, for example, both
+a script called C<Katakana> and a block called C<Katakana>, the block
+version has C<Block> appended to its name, C<\p{InKatakanaBlock}>.
+
+Notice that this definition was introduced in Perl 5.8.0: in Perl
+5.6.0 only the blocks were used; in Perl 5.8.0 scripts became the
+preferential character class definition; this meant that the
+definitions of some character classes changed (the ones in the
+below list that have the C<Block> appended).
+
+ BasicLatin
+ Latin1Supplement
+ LatinExtendedA
+ LatinExtendedB
+ IPAExtensions
+ SpacingModifierLetters
+ CombiningDiacriticalMarks
+ GreekBlock
+ CyrillicBlock
+ ArmenianBlock
+ HebrewBlock
+ ArabicBlock
+ SyriacBlock
+ ThaanaBlock
+ DevanagariBlock
+ BengaliBlock
+ GurmukhiBlock
+ GujaratiBlock
+ OriyaBlock
+ TamilBlock
+ TeluguBlock
+ KannadaBlock
+ MalayalamBlock
+ SinhalaBlock
+ ThaiBlock
+ LaoBlock
+ TibetanBlock
+ MyanmarBlock
+ GeorgianBlock
+ HangulJamo
+ EthiopicBlock
+ CherokeeBlock
+ UnifiedCanadianAboriginalSyllabics
+ OghamBlock
+ RunicBlock
+ KhmerBlock
+ MongolianBlock
+ LatinExtendedAdditional
+ GreekExtended
+ GeneralPunctuation
+ SuperscriptsandSubscripts
+ CurrencySymbols
+ CombiningMarksforSymbols
+ LetterlikeSymbols
+ NumberForms
+ Arrows
+ MathematicalOperators
+ MiscellaneousTechnical
+ ControlPictures
+ OpticalCharacterRecognition
+ EnclosedAlphanumerics
+ BoxDrawing
+ BlockElements
+ GeometricShapes
+ MiscellaneousSymbols
+ Dingbats
+ BraillePatterns
+ CJKRadicalsSupplement
+ KangxiRadicals
+ IdeographicDescriptionCharacters
+ CJKSymbolsandPunctuation
+ HiraganaBlock
+ KatakanaBlock
+ BopomofoBlock
+ HangulCompatibilityJamo
+ Kanbun
+ BopomofoExtended
+ EnclosedCJKLettersandMonths
+ CJKCompatibility
+ CJKUnifiedIdeographsExtensionA
+ CJKUnifiedIdeographs
+ YiSyllables
+ YiRadicals
+ HangulSyllables
+ HighSurrogates
+ HighPrivateUseSurrogates
+ LowSurrogates
+ PrivateUse
+ CJKCompatibilityIdeographs
+ AlphabeticPresentationForms
+ ArabicPresentationFormsA
+ CombiningHalfMarks
+ CJKCompatibilityForms
+ SmallFormVariants
+ ArabicPresentationFormsB
+ Specials
+ HalfwidthandFullwidthForms
+ OldItalicBlock
+ GothicBlock
+ DeseretBlock
+ ByzantineMusicalSymbols
+ MusicalSymbols
+ MathematicalAlphanumericSymbols
+ CJKUnifiedIdeographsExtensionB
+ CJKCompatibilityIdeographsSupplement
+ Tags
=item *
$| = 1;
-print "1..660\n";
+print "1..664\n";
BEGIN {
chdir 't' if -d 't';
{print $T} else {print "not $T"};
$T="ok 660\n";if ($x =~ /(a([abcdefg]+)(?{$y=$^N})de)(?{$z=$^N})/ and $y eq "bc" and $z eq "abcde")
{print $T} else {print "not $T"};
+
+# Test the Unicode script classes
+
+print "not " unless chr(0x100) =~ /\p{InLatin}/; # outside Latin-1
+print "ok 661\n";
+
+print "not " unless chr(0x212b) =~ /\p{InLatin}/; # Angstrom sign, very outside
+print "ok 662\n";
+
+print "not " unless chr(0x5d0) =~ /\p{InHebrew}/; # inside HebrewBlock
+print "ok 663\n";
+
+print "not " unless chr(0xfb4f) =~ /\p{InHebrew}/; # outside HebrewBlock
+print "ok 664\n";
+