From: Jarkko Hietaniemi Date: Wed, 4 Jul 2001 01:32:11 +0000 (+0000) Subject: Support preferentially the Unicode 'scripts' definition X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=2796c109dc2c56e2241410992d78bd8e0cccd71f;p=p5sagit%2Fp5-mst-13.2.git Support preferentially the Unicode 'scripts' definition in the \p{In...} notation since according to Unicode the scripts concept is more natural for matching than using the somewhat artificial block names. The block names are still available, though, and if there's a name conflict, the scripts one wins and the blocks one has to do with 'Block' appended to its name. For more information see http://www.unicode.org/unicode/reports/tr24/ p4raw-id: //depot/perl@11132 --- diff --git a/MANIFEST b/MANIFEST index 90ebe8e..915dba8 100644 --- a/MANIFEST +++ b/MANIFEST @@ -15,11 +15,11 @@ Changes5.003 Differences between 5.002 and 5.003 Changes5.004 Differences between 5.003 and 5.004 Changes5.005 Differences between 5.004 and 5.005 Changes5.6 Differences between 5.005 and 5.6 -config_h.SH Produces config.h configpm Produces lib/Config.pm Configure Portability tool configure.com Configure-equivalent for VMS configure.gnu Crude emulation of GNU configure +config_h.SH Produces config.h cop.h Control operator header Copying The GNU General Public License Cross/README Cross-compilation @@ -47,9 +47,9 @@ embedvar.h C namespace management epoc/config.sh EPOC port config.sh template epoc/createpkg.pl EPOC port generate PKG file epoc/epoc.c EPOC port -epoc/epoc_stubs.c EPOC port epoc/epocish.c EPOC port epoc/epocish.h EPOC port +epoc/epoc_stubs.c EPOC port epoc/link.pl EPOC port link a exe ext/attrs.t See if attrs works with C ext/attrs/attrs.pm attrs extension Perl module @@ -114,10 +114,10 @@ ext/Data/Dumper/t/dumper.t See if Data::Dumper works ext/Data/Dumper/t/overload.t See if Data::Dumper works for overloaded data ext/Data/Dumper/Todo Data pretty printer, futures ext/DB_File/Changes Berkeley DB extension change log +ext/DB_File/dbinfo Berkeley DB database version checker ext/DB_File/DB_File.pm Berkeley DB extension Perl module ext/DB_File/DB_File.xs Berkeley DB extension external subroutines ext/DB_File/DB_File_BS Berkeley DB extension mkbootstrap fodder -ext/DB_File/dbinfo Berkeley DB database version checker ext/DB_File/hints/dynixptx.pl Hint for DB_File for named architecture ext/DB_File/hints/sco.pl Hint for DB_File for named architecture ext/DB_File/Makefile.PL Berkeley DB extension makefile writer @@ -146,6 +146,7 @@ ext/Digest/MD5/t/align.t See if Digest::MD5 extension works ext/Digest/MD5/t/badfile.t See if Digest::MD5 extension works ext/Digest/MD5/t/files.t See if Digest::MD5 extension works ext/Digest/MD5/typemap Digest::MD5 extension +ext/DynaLoader/dlutils.c Dynamic loader utilities for dl_*.xs files ext/DynaLoader/dl_aix.xs AIX implementation ext/DynaLoader/dl_beos.xs BeOS implementation ext/DynaLoader/dl_dld.xs GNU dld style implementation @@ -159,7 +160,6 @@ ext/DynaLoader/dl_next.xs NeXT implementation ext/DynaLoader/dl_none.xs Stub implementation ext/DynaLoader/dl_vmesa.xs VM/ESA implementation ext/DynaLoader/dl_vms.xs VMS implementation -ext/DynaLoader/dlutils.c Dynamic loader utilities for dl_*.xs files ext/DynaLoader/DynaLoader_pm.PL Dynamic Loader perl module ext/DynaLoader/hints/aix.pl Hint for DynaLoader for named architecture ext/DynaLoader/hints/linux.pl Hint for DynaLoader for named architecture @@ -984,11 +984,11 @@ lib/Memoize/t/normalize.t Memoize lib/Memoize/t/prototype.t Memoize lib/Memoize/t/speed.t Memoize lib/Memoize/t/tie.t Memoize +lib/Memoize/t/tiefeatures.t Memoize lib/Memoize/t/tie_gdbm.t Memoize lib/Memoize/t/tie_ndbm.t Memoize lib/Memoize/t/tie_sdbm.t Memoize lib/Memoize/t/tie_storable.t Memoize -lib/Memoize/t/tiefeatures.t Memoize lib/Memoize/t/unmemoize.t Memoize lib/Memoize/TODO Memoize lib/Net/ChangeLog.libnet libnet @@ -1178,9 +1178,45 @@ lib/unicode/In.pl Unicode character database lib/unicode/In/0.pl Unicode character database lib/unicode/In/1.pl Unicode character database lib/unicode/In/10.pl Unicode character database +lib/unicode/In/100.pl Unicode character database +lib/unicode/In/101.pl Unicode character database +lib/unicode/In/102.pl Unicode character database +lib/unicode/In/103.pl Unicode character database +lib/unicode/In/104.pl Unicode character database +lib/unicode/In/105.pl Unicode character database +lib/unicode/In/106.pl Unicode character database +lib/unicode/In/107.pl Unicode character database +lib/unicode/In/108.pl Unicode character database +lib/unicode/In/109.pl Unicode character database lib/unicode/In/11.pl Unicode character database +lib/unicode/In/110.pl Unicode character database +lib/unicode/In/111.pl Unicode character database +lib/unicode/In/112.pl Unicode character database +lib/unicode/In/113.pl Unicode character database +lib/unicode/In/114.pl Unicode character database +lib/unicode/In/115.pl Unicode character database +lib/unicode/In/116.pl Unicode character database +lib/unicode/In/117.pl Unicode character database +lib/unicode/In/118.pl Unicode character database +lib/unicode/In/119.pl Unicode character database lib/unicode/In/12.pl Unicode character database +lib/unicode/In/120.pl Unicode character database +lib/unicode/In/121.pl Unicode character database +lib/unicode/In/122.pl Unicode character database +lib/unicode/In/123.pl Unicode character database +lib/unicode/In/124.pl Unicode character database +lib/unicode/In/125.pl Unicode character database +lib/unicode/In/126.pl Unicode character database +lib/unicode/In/127.pl Unicode character database +lib/unicode/In/128.pl Unicode character database +lib/unicode/In/129.pl Unicode character database lib/unicode/In/13.pl Unicode character database +lib/unicode/In/130.pl Unicode character database +lib/unicode/In/131.pl Unicode character database +lib/unicode/In/132.pl Unicode character database +lib/unicode/In/133.pl Unicode character database +lib/unicode/In/134.pl Unicode character database +lib/unicode/In/135.pl Unicode character database lib/unicode/In/14.pl Unicode character database lib/unicode/In/15.pl Unicode character database lib/unicode/In/16.pl Unicode character database @@ -1271,6 +1307,10 @@ lib/unicode/In/92.pl Unicode character database lib/unicode/In/93.pl Unicode character database lib/unicode/In/94.pl Unicode character database lib/unicode/In/95.pl Unicode character database +lib/unicode/In/96.pl Unicode character database +lib/unicode/In/97.pl Unicode character database +lib/unicode/In/98.pl Unicode character database +lib/unicode/In/99.pl Unicode character database lib/unicode/Index.txt Unicode character database lib/unicode/Is/Alnum.pl Unicode character database lib/unicode/Is/Alpha.pl Unicode character database @@ -1431,6 +1471,7 @@ lib/unicode/PropList.txt Unicode character database lib/unicode/README.perl Unicode character database lib/unicode/ReadMe.txt Unicode character database info lib/unicode/rename Filename mappings used +lib/unicode/Scripts.pl Unicode character database lib/unicode/Scripts.txt Unicode character database lib/unicode/SpecCase.txt Unicode character database lib/unicode/syllables.txt Unicode character database @@ -1544,8 +1585,8 @@ opcode.pl Opcode header generatore opnames.h Automatically generated opcode header os2/Changes Changelog for OS/2 port os2/diff.configure Patches to Configure -os2/dl_os2.c Addon for dl_open os2/dlfcn.h Addon for dl_open +os2/dl_os2.c Addon for dl_open os2/Makefile.SHs Shared library generation for OS/2 os2/os2.c Additional code for OS/2 os2/os2.sym Additional symbols to export @@ -1588,10 +1629,10 @@ os2/OS2/REXX/t/rx_tievar.t DLL access module os2/OS2/REXX/t/rx_tieydb.t DLL access module os2/OS2/REXX/t/rx_varset.t DLL access module os2/OS2/REXX/t/rx_vrexx.t DLL access module -os2/os2_base.t Additional tests for builtin methods os2/os2add.sym Overriding symbols to export os2/os2ish.h Header for OS/2 os2/os2thread.h pthread-like typedefs +os2/os2_base.t Additional tests for builtin methods os2/perl2cmd.pl Corrects installed binaries under OS/2 patchlevel.h The current patch level of perl perl.c main() @@ -1610,8 +1651,8 @@ perly.c A byacc'ed perly.y perly.fixer A program to remove yacc stack limitations perly.h The header file for perly.c perly.y Yacc grammar for perl -perly_c.diff Fixup perly.c to allow recursion perlyline.pl Perl code to fix #line directives and gcc warnings in perly.c +perly_c.diff Fixup perly.c to allow recursion plan9/aperl Shell to make Perl error messages Acme-friendly plan9/arpa/inet.h Plan9 port: replacement C header file plan9/buildinfo Plan9 port: configuration information @@ -1987,7 +2028,6 @@ t/op/pwent.t See if getpw*() functions work t/op/quotemeta.t See if quotemeta works t/op/rand.t See if rand works t/op/range.t See if .. works -t/op/re_tests Regular expressions for regexp.t t/op/read.t See if read() works t/op/readdir.t See if readdir() works t/op/recurse.t See if deep recursion works @@ -1997,6 +2037,7 @@ t/op/regexp_noamp.t See if regular expressions work with optimizations t/op/regmesg.t See if one can get regular expression errors t/op/repeat.t See if x operator works t/op/reverse.t See if reverse operator works +t/op/re_tests Regular expressions for regexp.t t/op/runlevel.t See if die() works from perl_call_*() t/op/sleep.t See if sleep works t/op/sort.t See if sort works @@ -2005,11 +2046,11 @@ t/op/split.t See if split works t/op/sprintf.t See if sprintf works t/op/stat.t See if stat works t/op/study.t See if study works -t/op/sub_lval.t See if lvalue subroutines work t/op/subst.t See if substitution works +t/op/substr.t See if substr works t/op/subst_amp.t See if $&-related substitution works t/op/subst_wamp.t See if substitution works with $& present -t/op/substr.t See if substr works +t/op/sub_lval.t See if lvalue subroutines work t/op/sysio.t See if sysread and syswrite work t/op/taint.t See if tainting works t/op/tie.t See if tie/untie functions work @@ -2110,9 +2151,9 @@ vms/ext/Stdio/test.pl regression tests for VMS::Stdio vms/ext/vmsish.pm Control VMS-specific behavior of Perl core vms/ext/vmsish.t Tests for vmsish.pm vms/ext/XSSymSet.pm manage linker symbols when building extensions -vms/gen_shrfls.pl generate options files and glue for shareable image vms/genconfig.pl retcon config.sh from config.h vms/genopt.com hack to write options files in case of broken makes +vms/gen_shrfls.pl generate options files and glue for shareable image vms/make_command.com record MM[SK] command used to build Perl vms/mms2make.pl convert descrip.mms to make syntax vms/munchconfig.c performs shell $var substitution for VMS @@ -2124,9 +2165,9 @@ vms/sockadapt.c glue for SockshShr socket support vms/sockadapt.h glue for SockshShr socket support vms/test.com DCL driver for regression tests vms/vms.c VMS-specific C code for Perl core -vms/vms_yfix.pl convert Unix perly.[ch] to VMS perly_[ch].vms vms/vmsish.h VMS-specific C header for Perl core vms/vmspipe.com VMS-specific piped command helper script +vms/vms_yfix.pl convert Unix perly.[ch] to VMS perly_[ch].vms vms/writemain.pl Generate perlmain.c from miniperlmain.c+extensions vos/build.cm VOS command macro to build Perl vos/Changes Changes made to port Perl to the VOS operating system @@ -2141,8 +2182,8 @@ vos/install_perl.cm VOS command macro to install perl after building vos/Makefile A helper for maintaining the config.*.* in UNIX vos/perl.bind VOS bind control file vos/test_vos_dummies.c Test program for "vos_dummies.c" -vos/vos_dummies.c Wrappers to soak up undefined functions vos/vosish.h VOS-specific header file +vos/vos_dummies.c Wrappers to soak up undefined functions warnings.h The warning numbers warnings.pl Program to write warnings.h and lib/warnings.pm win32/bin/exetype.pl Set executable type to CONSOLE or WINDOWS diff --git a/lib/Unicode/UCD.pm b/lib/Unicode/UCD.pm index 4e310e7..6c20d40 100644 --- a/lib/Unicode/UCD.pm +++ b/lib/Unicode/UCD.pm @@ -174,9 +174,9 @@ sub charblock { my $code = shift; unless (@BLOCKS) { - if (openunicode(\$BLOCKS, "Blocks.pl")) { + if (openunicode(\$BLOCKS, "Blocks.txt")) { while (<$BLOCKS>) { - if (/^([0-9A-F]+)\s+([0-9A-F]+)\s+(.+)/) { + if (/^([0-9A-F]+)\.\.([0-9A-F]+);\s+(.+)/) { push @BLOCKS, [ hex($1), hex($2), $3 ]; } } @@ -241,6 +241,40 @@ Note also that the script names are all in uppercase, e.g. C, while the block names are Capitalized and with intermixed spaces, e.g. C. +Greek +Cyrillic +Armenian +Hebrew +Arabic +Syriac +Thaana +Devanagari +Bengali +Gurmukhi +Gujarati +Oriya +Tamil +Telugu +Kannada +Malayalam +Sinhala +Thai +Lao +Tibetan +Myanmar +Georgian +Ethiopic +Cherokee +Ogham +Runic +Khmer +Hiragana +Katakana +Bopomofo +OldItalic +Gothic +Deseret + =head1 IMPLEMENTATION NOTE The first use of charinfo() opens a read-only filehandle to the Unicode diff --git a/lib/unicode/Blocks.pl b/lib/unicode/Blocks.pl index ef60058..e45026a 100644 --- a/lib/unicode/Blocks.pl +++ b/lib/unicode/Blocks.pl @@ -2,202 +2,103 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -0000 007F Basic Latin -# In/0.pl BasicLatin -0080 00FF Latin-1 Supplement -# In/1.pl Latin1Supplement -0100 017F Latin Extended-A -# In/2.pl LatinExtendedA -0180 024F Latin Extended-B -# In/3.pl LatinExtendedB -0250 02AF IPA Extensions -# In/4.pl IPAExtensions -02B0 02FF Spacing Modifier Letters -# In/5.pl SpacingModifierLetters -0300 036F Combining Diacritical Marks -# In/6.pl CombiningDiacriticalMarks -0370 03FF Greek -# In/7.pl Greek -0400 04FF Cyrillic -# In/8.pl Cyrillic -0530 058F Armenian -# In/9.pl Armenian -0590 05FF Hebrew -# In/10.pl Hebrew -0600 06FF Arabic -# In/11.pl Arabic -0700 074F Syriac -# In/12.pl Syriac -0780 07BF Thaana -# In/13.pl Thaana -0900 097F Devanagari -# In/14.pl Devanagari -0980 09FF Bengali -# In/15.pl Bengali -0A00 0A7F Gurmukhi -# In/16.pl Gurmukhi -0A80 0AFF Gujarati -# In/17.pl Gujarati -0B00 0B7F Oriya -# In/18.pl Oriya -0B80 0BFF Tamil -# In/19.pl Tamil -0C00 0C7F Telugu -# In/20.pl Telugu -0C80 0CFF Kannada -# In/21.pl Kannada -0D00 0D7F Malayalam -# In/22.pl Malayalam -0D80 0DFF Sinhala -# In/23.pl Sinhala -0E00 0E7F Thai -# In/24.pl Thai -0E80 0EFF Lao -# In/25.pl Lao -0F00 0FFF Tibetan -# In/26.pl Tibetan -1000 109F Myanmar -# In/27.pl Myanmar -10A0 10FF Georgian -# In/28.pl Georgian -1100 11FF Hangul Jamo -# In/29.pl HangulJamo -1200 137F Ethiopic -# In/30.pl Ethiopic -13A0 13FF Cherokee -# In/31.pl Cherokee -1400 167F Unified Canadian Aboriginal Syllabics -# In/32.pl UnifiedCanadianAboriginalSyllabics -1680 169F Ogham -# In/33.pl Ogham -16A0 16FF Runic -# In/34.pl Runic -1780 17FF Khmer -# In/35.pl Khmer -1800 18AF Mongolian -# In/36.pl Mongolian -1E00 1EFF Latin Extended Additional -# In/37.pl LatinExtendedAdditional -1F00 1FFF Greek Extended -# In/38.pl GreekExtended -2000 206F General Punctuation -# In/39.pl GeneralPunctuation -2070 209F Superscripts and Subscripts -# In/40.pl SuperscriptsandSubscripts -20A0 20CF Currency Symbols -# In/41.pl CurrencySymbols -20D0 20FF Combining Marks for Symbols -# In/42.pl CombiningMarksforSymbols -2100 214F Letterlike Symbols -# In/43.pl LetterlikeSymbols -2150 218F Number Forms -# In/44.pl NumberForms -2190 21FF Arrows -# In/45.pl Arrows -2200 22FF Mathematical Operators -# In/46.pl MathematicalOperators -2300 23FF Miscellaneous Technical -# In/47.pl MiscellaneousTechnical -2400 243F Control Pictures -# In/48.pl ControlPictures -2440 245F Optical Character Recognition -# In/49.pl OpticalCharacterRecognition -2460 24FF Enclosed Alphanumerics -# In/50.pl EnclosedAlphanumerics -2500 257F Box Drawing -# In/51.pl BoxDrawing -2580 259F Block Elements -# In/52.pl BlockElements -25A0 25FF Geometric Shapes -# In/53.pl GeometricShapes -2600 26FF Miscellaneous Symbols -# In/54.pl MiscellaneousSymbols -2700 27BF Dingbats -# In/55.pl Dingbats -2800 28FF Braille Patterns -# In/56.pl BraillePatterns -2E80 2EFF CJK Radicals Supplement -# In/57.pl CJKRadicalsSupplement -2F00 2FDF Kangxi Radicals -# In/58.pl KangxiRadicals -2FF0 2FFF Ideographic Description Characters -# In/59.pl IdeographicDescriptionCharacters -3000 303F CJK Symbols and Punctuation -# In/60.pl CJKSymbolsandPunctuation -3040 309F Hiragana -# In/61.pl Hiragana -30A0 30FF Katakana -# In/62.pl Katakana -3100 312F Bopomofo -# In/63.pl Bopomofo -3130 318F Hangul Compatibility Jamo -# In/64.pl HangulCompatibilityJamo -3190 319F Kanbun -# In/65.pl Kanbun -31A0 31BF Bopomofo Extended -# In/66.pl BopomofoExtended -3200 32FF Enclosed CJK Letters and Months -# In/67.pl EnclosedCJKLettersandMonths -3300 33FF CJK Compatibility -# In/68.pl CJKCompatibility -3400 4DB5 CJK Unified Ideographs Extension A -# In/69.pl CJKUnifiedIdeographsExtensionA -4E00 9FFF CJK Unified Ideographs -# In/70.pl CJKUnifiedIdeographs -A000 A48F Yi Syllables -# In/71.pl YiSyllables -A490 A4CF Yi Radicals -# In/72.pl YiRadicals -AC00 D7A3 Hangul Syllables -# In/73.pl HangulSyllables -D800 DB7F High Surrogates -# In/74.pl HighSurrogates -DB80 DBFF High Private Use Surrogates -# In/75.pl HighPrivateUseSurrogates -DC00 DFFF Low Surrogates -# In/76.pl LowSurrogates -E000 F8FF Private Use -# In/77.pl PrivateUse -F900 FAFF CJK Compatibility Ideographs -# In/78.pl CJKCompatibilityIdeographs -FB00 FB4F Alphabetic Presentation Forms -# In/79.pl AlphabeticPresentationForms -FB50 FDFF Arabic Presentation Forms-A -# In/80.pl ArabicPresentationFormsA -FE20 FE2F Combining Half Marks -# In/81.pl CombiningHalfMarks -FE30 FE4F CJK Compatibility Forms -# In/82.pl CJKCompatibilityForms -FE50 FE6F Small Form Variants -# In/83.pl SmallFormVariants -FE70 FEFE Arabic Presentation Forms-B -# In/84.pl ArabicPresentationFormsB -FEFF FEFF Specials -# In/85.pl Specials -FF00 FFEF Halfwidth and Fullwidth Forms -# In/86.pl HalfwidthandFullwidthForms -FFF0 FFFD Specials -# In/85.pl Specials -10300 1032F Old Italic -# In/87.pl OldItalic -10330 1034F Gothic -# In/88.pl Gothic -10400 1044F Deseret -# In/89.pl Deseret -1D000 1D0FF Byzantine Musical Symbols -# In/90.pl ByzantineMusicalSymbols -1D100 1D1FF Musical Symbols -# In/91.pl MusicalSymbols -1D400 1D7FF Mathematical Alphanumeric Symbols -# In/92.pl MathematicalAlphanumericSymbols -20000 2A6D6 CJK Unified Ideographs Extension B -# In/93.pl CJKUnifiedIdeographsExtensionB -2F800 2FA1F CJK Compatibility Ideographs Supplement -# In/94.pl CJKCompatibilityIdeographsSupplement -E0000 E007F Tags -# In/95.pl Tags -F0000 FFFFD Private Use -# In/77.pl PrivateUse -100000 10FFFD Private Use -# In/77.pl PrivateUse +0000 007F Basic Latin # BasicLatin In/40.pl +0080 00FF Latin-1 Supplement # Latin1Supplement In/41.pl +0100 017F Latin Extended-A # LatinExtendedA In/42.pl +0180 024F Latin Extended-B # LatinExtendedB In/43.pl +0250 02AF IPA Extensions # IPAExtensions In/44.pl +02B0 02FF Spacing Modifier Letters # SpacingModifierLetters In/45.pl +0300 036F Combining Diacritical Marks # CombiningDiacriticalMarks In/46.pl +0370 03FF Greek # GreekBlock In/47.pl +0400 04FF Cyrillic # CyrillicBlock In/48.pl +0530 058F Armenian # ArmenianBlock In/49.pl +0590 05FF Hebrew # HebrewBlock In/50.pl +0600 06FF Arabic # ArabicBlock In/51.pl +0700 074F Syriac # SyriacBlock In/52.pl +0780 07BF Thaana # ThaanaBlock In/53.pl +0900 097F Devanagari # DevanagariBlock In/54.pl +0980 09FF Bengali # BengaliBlock In/55.pl +0A00 0A7F Gurmukhi # GurmukhiBlock In/56.pl +0A80 0AFF Gujarati # GujaratiBlock In/57.pl +0B00 0B7F Oriya # OriyaBlock In/58.pl +0B80 0BFF Tamil # TamilBlock In/59.pl +0C00 0C7F Telugu # TeluguBlock In/60.pl +0C80 0CFF Kannada # KannadaBlock In/61.pl +0D00 0D7F Malayalam # MalayalamBlock In/62.pl +0D80 0DFF Sinhala # SinhalaBlock In/63.pl +0E00 0E7F Thai # ThaiBlock In/64.pl +0E80 0EFF Lao # LaoBlock In/65.pl +0F00 0FFF Tibetan # TibetanBlock In/66.pl +1000 109F Myanmar # MyanmarBlock In/67.pl +10A0 10FF Georgian # GeorgianBlock In/68.pl +1100 11FF Hangul Jamo # HangulJamo In/69.pl +1200 137F Ethiopic # EthiopicBlock In/70.pl +13A0 13FF Cherokee # CherokeeBlock In/71.pl +1400 167F Unified Canadian Aboriginal Syllabics # UnifiedCanadianAboriginalSyllabics In/72.pl +1680 169F Ogham # OghamBlock In/73.pl +16A0 16FF Runic # RunicBlock In/74.pl +1780 17FF Khmer # KhmerBlock In/75.pl +1800 18AF Mongolian # MongolianBlock In/76.pl +1E00 1EFF Latin Extended Additional # LatinExtendedAdditional In/77.pl +1F00 1FFF Greek Extended # GreekExtended In/78.pl +2000 206F General Punctuation # GeneralPunctuation In/79.pl +2070 209F Superscripts and Subscripts # SuperscriptsandSubscripts In/80.pl +20A0 20CF Currency Symbols # CurrencySymbols In/81.pl +20D0 20FF Combining Marks for Symbols # CombiningMarksforSymbols In/82.pl +2100 214F Letterlike Symbols # LetterlikeSymbols In/83.pl +2150 218F Number Forms # NumberForms In/84.pl +2190 21FF Arrows # Arrows In/85.pl +2200 22FF Mathematical Operators # MathematicalOperators In/86.pl +2300 23FF Miscellaneous Technical # MiscellaneousTechnical In/87.pl +2400 243F Control Pictures # ControlPictures In/88.pl +2440 245F Optical Character Recognition # OpticalCharacterRecognition In/89.pl +2460 24FF Enclosed Alphanumerics # EnclosedAlphanumerics In/90.pl +2500 257F Box Drawing # BoxDrawing In/91.pl +2580 259F Block Elements # BlockElements In/92.pl +25A0 25FF Geometric Shapes # GeometricShapes In/93.pl +2600 26FF Miscellaneous Symbols # MiscellaneousSymbols In/94.pl +2700 27BF Dingbats # Dingbats In/95.pl +2800 28FF Braille Patterns # BraillePatterns In/96.pl +2E80 2EFF CJK Radicals Supplement # CJKRadicalsSupplement In/97.pl +2F00 2FDF Kangxi Radicals # KangxiRadicals In/98.pl +2FF0 2FFF Ideographic Description Characters # IdeographicDescriptionCharacters In/99.pl +3000 303F CJK Symbols and Punctuation # CJKSymbolsandPunctuation In/100.pl +3040 309F Hiragana # HiraganaBlock In/101.pl +30A0 30FF Katakana # KatakanaBlock In/102.pl +3100 312F Bopomofo # BopomofoBlock In/103.pl +3130 318F Hangul Compatibility Jamo # HangulCompatibilityJamo In/104.pl +3190 319F Kanbun # Kanbun In/105.pl +31A0 31BF Bopomofo Extended # BopomofoExtended In/106.pl +3200 32FF Enclosed CJK Letters and Months # EnclosedCJKLettersandMonths In/107.pl +3300 33FF CJK Compatibility # CJKCompatibility In/108.pl +3400 4DB5 CJK Unified Ideographs Extension A # CJKUnifiedIdeographsExtensionA In/109.pl +4E00 9FFF CJK Unified Ideographs # CJKUnifiedIdeographs In/110.pl +A000 A48F Yi Syllables # YiSyllables In/111.pl +A490 A4CF Yi Radicals # YiRadicals In/112.pl +AC00 D7A3 Hangul Syllables # HangulSyllables In/113.pl +D800 DB7F High Surrogates # HighSurrogates In/114.pl +DB80 DBFF High Private Use Surrogates # HighPrivateUseSurrogates In/115.pl +DC00 DFFF Low Surrogates # LowSurrogates In/116.pl +E000 F8FF Private Use # PrivateUse In/117.pl +F900 FAFF CJK Compatibility Ideographs # CJKCompatibilityIdeographs In/118.pl +FB00 FB4F Alphabetic Presentation Forms # AlphabeticPresentationForms In/119.pl +FB50 FDFF Arabic Presentation Forms-A # ArabicPresentationFormsA In/120.pl +FE20 FE2F Combining Half Marks # CombiningHalfMarks In/121.pl +FE30 FE4F CJK Compatibility Forms # CJKCompatibilityForms In/122.pl +FE50 FE6F Small Form Variants # SmallFormVariants In/123.pl +FE70 FEFE Arabic Presentation Forms-B # ArabicPresentationFormsB In/124.pl +FEFF FEFF Specials # Specials In/125.pl +FF00 FFEF Halfwidth and Fullwidth Forms # HalfwidthandFullwidthForms In/126.pl +FFF0 FFFD Specials # Specials In/125.pl +10300 1032F Old Italic # OldItalicBlock In/127.pl +10330 1034F Gothic # GothicBlock In/128.pl +10400 1044F Deseret # DeseretBlock In/129.pl +1D000 1D0FF Byzantine Musical Symbols # ByzantineMusicalSymbols In/130.pl +1D100 1D1FF Musical Symbols # MusicalSymbols In/131.pl +1D400 1D7FF Mathematical Alphanumeric Symbols # MathematicalAlphanumericSymbols In/132.pl +20000 2A6D6 CJK Unified Ideographs Extension B # CJKUnifiedIdeographsExtensionB In/133.pl +2F800 2FA1F CJK Compatibility Ideographs Supplement # CJKCompatibilityIdeographsSupplement In/134.pl +E0000 E007F Tags # Tags In/135.pl +F0000 FFFFD Private Use # PrivateUse In/117.pl +100000 10FFFD Private Use # PrivateUse In/117.pl END diff --git a/lib/unicode/In.pl b/lib/unicode/In.pl index e0b7a5a..a6c2419 100644 --- a/lib/unicode/In.pl +++ b/lib/unicode/In.pl @@ -2,100 +2,140 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! %utf8::In = ( -'BasicLatin' => 0, -'Latin1Supplement' => 1, -'LatinExtendedA' => 2, -'LatinExtendedB' => 3, -'IPAExtensions' => 4, -'SpacingModifierLetters' => 5, -'CombiningDiacriticalMarks' => 6, -'Greek' => 7, -'Cyrillic' => 8, -'Armenian' => 9, -'Hebrew' => 10, -'Arabic' => 11, -'Syriac' => 12, -'Thaana' => 13, -'Devanagari' => 14, -'Bengali' => 15, -'Gurmukhi' => 16, -'Gujarati' => 17, -'Oriya' => 18, -'Tamil' => 19, -'Telugu' => 20, -'Kannada' => 21, -'Malayalam' => 22, -'Sinhala' => 23, -'Thai' => 24, -'Lao' => 25, -'Tibetan' => 26, -'Myanmar' => 27, -'Georgian' => 28, -'HangulJamo' => 29, -'Ethiopic' => 30, -'Cherokee' => 31, -'UnifiedCanadianAboriginalSyllabics' => 32, -'Ogham' => 33, -'Runic' => 34, -'Khmer' => 35, -'Mongolian' => 36, -'LatinExtendedAdditional' => 37, -'GreekExtended' => 38, -'GeneralPunctuation' => 39, -'SuperscriptsandSubscripts' => 40, -'CurrencySymbols' => 41, -'CombiningMarksforSymbols' => 42, -'LetterlikeSymbols' => 43, -'NumberForms' => 44, -'Arrows' => 45, -'MathematicalOperators' => 46, -'MiscellaneousTechnical' => 47, -'ControlPictures' => 48, -'OpticalCharacterRecognition' => 49, -'EnclosedAlphanumerics' => 50, -'BoxDrawing' => 51, -'BlockElements' => 52, -'GeometricShapes' => 53, -'MiscellaneousSymbols' => 54, -'Dingbats' => 55, -'BraillePatterns' => 56, -'CJKRadicalsSupplement' => 57, -'KangxiRadicals' => 58, -'IdeographicDescriptionCharacters' => 59, -'CJKSymbolsandPunctuation' => 60, -'Hiragana' => 61, -'Katakana' => 62, -'Bopomofo' => 63, -'HangulCompatibilityJamo' => 64, -'Kanbun' => 65, -'BopomofoExtended' => 66, -'EnclosedCJKLettersandMonths' => 67, -'CJKCompatibility' => 68, -'CJKUnifiedIdeographsExtensionA' => 69, -'CJKUnifiedIdeographs' => 70, -'YiSyllables' => 71, -'YiRadicals' => 72, -'HangulSyllables' => 73, -'HighSurrogates' => 74, -'HighPrivateUseSurrogates' => 75, -'LowSurrogates' => 76, -'PrivateUse' => 77, -'CJKCompatibilityIdeographs' => 78, -'AlphabeticPresentationForms' => 79, -'ArabicPresentationFormsA' => 80, -'CombiningHalfMarks' => 81, -'CJKCompatibilityForms' => 82, -'SmallFormVariants' => 83, -'ArabicPresentationFormsB' => 84, -'Specials' => 85, -'HalfwidthandFullwidthForms' => 86, -'OldItalic' => 87, -'Gothic' => 88, -'Deseret' => 89, -'ByzantineMusicalSymbols' => 90, -'MusicalSymbols' => 91, -'MathematicalAlphanumericSymbols' => 92, -'CJKUnifiedIdeographsExtensionB' => 93, -'CJKCompatibilityIdeographsSupplement' => 94, -'Tags' => 95, +'Latin' => 0, +'Greek' => 1, +'Cyrillic' => 2, +'Armenian' => 3, +'Hebrew' => 4, +'Arabic' => 5, +'Syriac' => 6, +'Thaana' => 7, +'Devanagari' => 8, +'Bengali' => 9, +'Gurmukhi' => 10, +'Gujarati' => 11, +'Oriya' => 12, +'Tamil' => 13, +'Telugu' => 14, +'Kannada' => 15, +'Malayalam' => 16, +'Sinhala' => 17, +'Thai' => 18, +'Lao' => 19, +'Tibetan' => 20, +'Myanmar' => 21, +'Georgian' => 22, +'Hangul' => 23, +'Ethiopic' => 24, +'Cherokee' => 25, +'CanadianAboriginal' => 26, +'Ogham' => 27, +'Runic' => 28, +'Khmer' => 29, +'Mongolian' => 30, +'Hiragana' => 31, +'Katakana' => 32, +'Bopomofo' => 33, +'Han' => 34, +'Yi' => 35, +'OldItalic' => 36, +'Gothic' => 37, +'Deseret' => 38, +'Inherited' => 39, +'BasicLatin' => 40, +'Latin1Supplement' => 41, +'LatinExtendedA' => 42, +'LatinExtendedB' => 43, +'IPAExtensions' => 44, +'SpacingModifierLetters' => 45, +'CombiningDiacriticalMarks' => 46, +'GreekBlock' => 47, +'CyrillicBlock' => 48, +'ArmenianBlock' => 49, +'HebrewBlock' => 50, +'ArabicBlock' => 51, +'SyriacBlock' => 52, +'ThaanaBlock' => 53, +'DevanagariBlock' => 54, +'BengaliBlock' => 55, +'GurmukhiBlock' => 56, +'GujaratiBlock' => 57, +'OriyaBlock' => 58, +'TamilBlock' => 59, +'TeluguBlock' => 60, +'KannadaBlock' => 61, +'MalayalamBlock' => 62, +'SinhalaBlock' => 63, +'ThaiBlock' => 64, +'LaoBlock' => 65, +'TibetanBlock' => 66, +'MyanmarBlock' => 67, +'GeorgianBlock' => 68, +'HangulJamo' => 69, +'EthiopicBlock' => 70, +'CherokeeBlock' => 71, +'UnifiedCanadianAboriginalSyllabics' => 72, +'OghamBlock' => 73, +'RunicBlock' => 74, +'KhmerBlock' => 75, +'MongolianBlock' => 76, +'LatinExtendedAdditional' => 77, +'GreekExtended' => 78, +'GeneralPunctuation' => 79, +'SuperscriptsandSubscripts' => 80, +'CurrencySymbols' => 81, +'CombiningMarksforSymbols' => 82, +'LetterlikeSymbols' => 83, +'NumberForms' => 84, +'Arrows' => 85, +'MathematicalOperators' => 86, +'MiscellaneousTechnical' => 87, +'ControlPictures' => 88, +'OpticalCharacterRecognition' => 89, +'EnclosedAlphanumerics' => 90, +'BoxDrawing' => 91, +'BlockElements' => 92, +'GeometricShapes' => 93, +'MiscellaneousSymbols' => 94, +'Dingbats' => 95, +'BraillePatterns' => 96, +'CJKRadicalsSupplement' => 97, +'KangxiRadicals' => 98, +'IdeographicDescriptionCharacters' => 99, +'CJKSymbolsandPunctuation' => 100, +'HiraganaBlock' => 101, +'KatakanaBlock' => 102, +'BopomofoBlock' => 103, +'HangulCompatibilityJamo' => 104, +'Kanbun' => 105, +'BopomofoExtended' => 106, +'EnclosedCJKLettersandMonths' => 107, +'CJKCompatibility' => 108, +'CJKUnifiedIdeographsExtensionA' => 109, +'CJKUnifiedIdeographs' => 110, +'YiSyllables' => 111, +'YiRadicals' => 112, +'HangulSyllables' => 113, +'HighSurrogates' => 114, +'HighPrivateUseSurrogates' => 115, +'LowSurrogates' => 116, +'PrivateUse' => 117, +'CJKCompatibilityIdeographs' => 118, +'AlphabeticPresentationForms' => 119, +'ArabicPresentationFormsA' => 120, +'CombiningHalfMarks' => 121, +'CJKCompatibilityForms' => 122, +'SmallFormVariants' => 123, +'ArabicPresentationFormsB' => 124, +'Specials' => 125, +'HalfwidthandFullwidthForms' => 126, +'OldItalicBlock' => 127, +'GothicBlock' => 128, +'DeseretBlock' => 129, +'ByzantineMusicalSymbols' => 130, +'MusicalSymbols' => 131, +'MathematicalAlphanumericSymbols' => 132, +'CJKUnifiedIdeographsExtensionB' => 133, +'CJKCompatibilityIdeographsSupplement' => 134, +'Tags' => 135, ); diff --git a/lib/unicode/In/0.pl b/lib/unicode/In/0.pl index 475c1df..4d6dcfd 100644 --- a/lib/unicode/In/0.pl +++ b/lib/unicode/In/0.pl @@ -2,5 +2,22 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -0000 007F +0041 005A +0061 007A +00C0 00D6 +00D8 00F6 +00F8 01BA +01BC 01BF +01C0 01C3 +01C4 021F +0222 0233 +0250 02AD +02B0 02B8 +02E0 02E4 +1E00 1E9B +1EA0 1EF9 +212A 212B +FB00 FB06 +FF21 FF3A +FF41 FF5A END diff --git a/lib/unicode/In/1.pl b/lib/unicode/In/1.pl index 5a5aa0e..f64aca7 100644 --- a/lib/unicode/In/1.pl +++ b/lib/unicode/In/1.pl @@ -2,5 +2,24 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -0080 00FF +0388 038A +038E 03A1 +03A3 03CE +03D0 03D7 +03DA 03F5 +1F00 1F15 +1F18 1F1D +1F20 1F45 +1F48 1F4D +1F50 1F57 +1F5F 1F7D +1F80 1FB4 +1FB6 1FBC +1FC2 1FC4 +1FC6 1FCC +1FD0 1FD3 +1FD6 1FDB +1FE0 1FEC +1FF2 1FF4 +1FF6 1FFC END diff --git a/lib/unicode/In/10.pl b/lib/unicode/In/10.pl index f1d866c..f656245 100644 --- a/lib/unicode/In/10.pl +++ b/lib/unicode/In/10.pl @@ -2,5 +2,19 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -0590 05FF +0A05 0A0A +0A0F 0A10 +0A13 0A28 +0A2A 0A30 +0A32 0A33 +0A35 0A36 +0A38 0A39 +0A3E 0A40 +0A41 0A42 +0A47 0A48 +0A4B 0A4D +0A59 0A5C +0A66 0A6F +0A70 0A71 +0A72 0A74 END diff --git a/lib/unicode/In/100.pl b/lib/unicode/In/100.pl new file mode 100644 index 0000000..0c66f05 --- /dev/null +++ b/lib/unicode/In/100.pl @@ -0,0 +1,6 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables.PL from e.g. Unicode.txt. +# Any changes made here will be lost! +return <<'END'; +3000 303F +END diff --git a/lib/unicode/In/101.pl b/lib/unicode/In/101.pl new file mode 100644 index 0000000..49b4e49 --- /dev/null +++ b/lib/unicode/In/101.pl @@ -0,0 +1,6 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables.PL from e.g. Unicode.txt. +# Any changes made here will be lost! +return <<'END'; +3040 309F +END diff --git a/lib/unicode/In/102.pl b/lib/unicode/In/102.pl new file mode 100644 index 0000000..e5568a2 --- /dev/null +++ b/lib/unicode/In/102.pl @@ -0,0 +1,6 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables.PL from e.g. Unicode.txt. +# Any changes made here will be lost! +return <<'END'; +30A0 30FF +END diff --git a/lib/unicode/In/103.pl b/lib/unicode/In/103.pl new file mode 100644 index 0000000..4f9b5f4 --- /dev/null +++ b/lib/unicode/In/103.pl @@ -0,0 +1,6 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables.PL from e.g. Unicode.txt. +# Any changes made here will be lost! +return <<'END'; +3100 312F +END diff --git a/lib/unicode/In/104.pl b/lib/unicode/In/104.pl new file mode 100644 index 0000000..b15c4cc --- /dev/null +++ b/lib/unicode/In/104.pl @@ -0,0 +1,6 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables.PL from e.g. Unicode.txt. +# Any changes made here will be lost! +return <<'END'; +3130 318F +END diff --git a/lib/unicode/In/105.pl b/lib/unicode/In/105.pl new file mode 100644 index 0000000..d78c208 --- /dev/null +++ b/lib/unicode/In/105.pl @@ -0,0 +1,6 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables.PL from e.g. Unicode.txt. +# Any changes made here will be lost! +return <<'END'; +3190 319F +END diff --git a/lib/unicode/In/106.pl b/lib/unicode/In/106.pl new file mode 100644 index 0000000..96150b4 --- /dev/null +++ b/lib/unicode/In/106.pl @@ -0,0 +1,6 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables.PL from e.g. Unicode.txt. +# Any changes made here will be lost! +return <<'END'; +31A0 31BF +END diff --git a/lib/unicode/In/107.pl b/lib/unicode/In/107.pl new file mode 100644 index 0000000..2708fec --- /dev/null +++ b/lib/unicode/In/107.pl @@ -0,0 +1,6 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables.PL from e.g. Unicode.txt. +# Any changes made here will be lost! +return <<'END'; +3200 32FF +END diff --git a/lib/unicode/In/108.pl b/lib/unicode/In/108.pl new file mode 100644 index 0000000..d504529 --- /dev/null +++ b/lib/unicode/In/108.pl @@ -0,0 +1,6 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables.PL from e.g. Unicode.txt. +# Any changes made here will be lost! +return <<'END'; +3300 33FF +END diff --git a/lib/unicode/In/109.pl b/lib/unicode/In/109.pl new file mode 100644 index 0000000..83adb81 --- /dev/null +++ b/lib/unicode/In/109.pl @@ -0,0 +1,6 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables.PL from e.g. Unicode.txt. +# Any changes made here will be lost! +return <<'END'; +3400 4DB5 +END diff --git a/lib/unicode/In/11.pl b/lib/unicode/In/11.pl index 7546a74..edbbfbe 100644 --- a/lib/unicode/In/11.pl +++ b/lib/unicode/In/11.pl @@ -2,5 +2,16 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -0600 06FF +0A81 0A82 +0A85 0A8B +0A8F 0A91 +0A93 0AA8 +0AAA 0AB0 +0AB2 0AB3 +0AB5 0AB9 +0ABE 0AC0 +0AC1 0AC5 +0AC7 0AC8 +0ACB 0ACC +0AE6 0AEF END diff --git a/lib/unicode/In/110.pl b/lib/unicode/In/110.pl new file mode 100644 index 0000000..f74552e --- /dev/null +++ b/lib/unicode/In/110.pl @@ -0,0 +1,6 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables.PL from e.g. Unicode.txt. +# Any changes made here will be lost! +return <<'END'; +4E00 9FFF +END diff --git a/lib/unicode/In/111.pl b/lib/unicode/In/111.pl new file mode 100644 index 0000000..0636a82 --- /dev/null +++ b/lib/unicode/In/111.pl @@ -0,0 +1,6 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables.PL from e.g. Unicode.txt. +# Any changes made here will be lost! +return <<'END'; +A000 A48F +END diff --git a/lib/unicode/In/112.pl b/lib/unicode/In/112.pl new file mode 100644 index 0000000..56404c5 --- /dev/null +++ b/lib/unicode/In/112.pl @@ -0,0 +1,6 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables.PL from e.g. Unicode.txt. +# Any changes made here will be lost! +return <<'END'; +A490 A4CF +END diff --git a/lib/unicode/In/113.pl b/lib/unicode/In/113.pl new file mode 100644 index 0000000..e1e2694 --- /dev/null +++ b/lib/unicode/In/113.pl @@ -0,0 +1,6 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables.PL from e.g. Unicode.txt. +# Any changes made here will be lost! +return <<'END'; +AC00 D7A3 +END diff --git a/lib/unicode/In/114.pl b/lib/unicode/In/114.pl new file mode 100644 index 0000000..0f4eb57 --- /dev/null +++ b/lib/unicode/In/114.pl @@ -0,0 +1,6 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables.PL from e.g. Unicode.txt. +# Any changes made here will be lost! +return <<'END'; +D800 DB7F +END diff --git a/lib/unicode/In/115.pl b/lib/unicode/In/115.pl new file mode 100644 index 0000000..ec4ca07 --- /dev/null +++ b/lib/unicode/In/115.pl @@ -0,0 +1,6 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables.PL from e.g. Unicode.txt. +# Any changes made here will be lost! +return <<'END'; +DB80 DBFF +END diff --git a/lib/unicode/In/116.pl b/lib/unicode/In/116.pl new file mode 100644 index 0000000..d056168 --- /dev/null +++ b/lib/unicode/In/116.pl @@ -0,0 +1,6 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables.PL from e.g. Unicode.txt. +# Any changes made here will be lost! +return <<'END'; +DC00 DFFF +END diff --git a/lib/unicode/In/117.pl b/lib/unicode/In/117.pl new file mode 100644 index 0000000..530166d --- /dev/null +++ b/lib/unicode/In/117.pl @@ -0,0 +1,6 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables.PL from e.g. Unicode.txt. +# Any changes made here will be lost! +return <<'END'; +100000 10FFFD +END diff --git a/lib/unicode/In/118.pl b/lib/unicode/In/118.pl new file mode 100644 index 0000000..0c553d6 --- /dev/null +++ b/lib/unicode/In/118.pl @@ -0,0 +1,6 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables.PL from e.g. Unicode.txt. +# Any changes made here will be lost! +return <<'END'; +F900 FAFF +END diff --git a/lib/unicode/In/119.pl b/lib/unicode/In/119.pl new file mode 100644 index 0000000..42cc1ca --- /dev/null +++ b/lib/unicode/In/119.pl @@ -0,0 +1,6 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables.PL from e.g. Unicode.txt. +# Any changes made here will be lost! +return <<'END'; +FB00 FB4F +END diff --git a/lib/unicode/In/12.pl b/lib/unicode/In/12.pl index e5247ad..ab920ef 100644 --- a/lib/unicode/In/12.pl +++ b/lib/unicode/In/12.pl @@ -2,5 +2,17 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -0700 074F +0B02 0B03 +0B05 0B0C +0B0F 0B10 +0B13 0B28 +0B2A 0B30 +0B32 0B33 +0B36 0B39 +0B41 0B43 +0B47 0B48 +0B4B 0B4C +0B5C 0B5D +0B5F 0B61 +0B66 0B6F END diff --git a/lib/unicode/In/120.pl b/lib/unicode/In/120.pl new file mode 100644 index 0000000..ffb4f1e --- /dev/null +++ b/lib/unicode/In/120.pl @@ -0,0 +1,6 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables.PL from e.g. Unicode.txt. +# Any changes made here will be lost! +return <<'END'; +FB50 FDFF +END diff --git a/lib/unicode/In/121.pl b/lib/unicode/In/121.pl new file mode 100644 index 0000000..cc8a4a2 --- /dev/null +++ b/lib/unicode/In/121.pl @@ -0,0 +1,6 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables.PL from e.g. Unicode.txt. +# Any changes made here will be lost! +return <<'END'; +FE20 FE2F +END diff --git a/lib/unicode/In/122.pl b/lib/unicode/In/122.pl new file mode 100644 index 0000000..4e462b8 --- /dev/null +++ b/lib/unicode/In/122.pl @@ -0,0 +1,6 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables.PL from e.g. Unicode.txt. +# Any changes made here will be lost! +return <<'END'; +FE30 FE4F +END diff --git a/lib/unicode/In/123.pl b/lib/unicode/In/123.pl new file mode 100644 index 0000000..4eff1ea --- /dev/null +++ b/lib/unicode/In/123.pl @@ -0,0 +1,6 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables.PL from e.g. Unicode.txt. +# Any changes made here will be lost! +return <<'END'; +FE50 FE6F +END diff --git a/lib/unicode/In/124.pl b/lib/unicode/In/124.pl new file mode 100644 index 0000000..dc5a32e --- /dev/null +++ b/lib/unicode/In/124.pl @@ -0,0 +1,6 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables.PL from e.g. Unicode.txt. +# Any changes made here will be lost! +return <<'END'; +FE70 FEFE +END diff --git a/lib/unicode/In/125.pl b/lib/unicode/In/125.pl new file mode 100644 index 0000000..931fc5b --- /dev/null +++ b/lib/unicode/In/125.pl @@ -0,0 +1,6 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables.PL from e.g. Unicode.txt. +# Any changes made here will be lost! +return <<'END'; +FFF0 FFFD +END diff --git a/lib/unicode/In/126.pl b/lib/unicode/In/126.pl new file mode 100644 index 0000000..03e8515 --- /dev/null +++ b/lib/unicode/In/126.pl @@ -0,0 +1,6 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables.PL from e.g. Unicode.txt. +# Any changes made here will be lost! +return <<'END'; +FF00 FFEF +END diff --git a/lib/unicode/In/127.pl b/lib/unicode/In/127.pl new file mode 100644 index 0000000..44a5e47 --- /dev/null +++ b/lib/unicode/In/127.pl @@ -0,0 +1,6 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables.PL from e.g. Unicode.txt. +# Any changes made here will be lost! +return <<'END'; +10300 1032F +END diff --git a/lib/unicode/In/128.pl b/lib/unicode/In/128.pl new file mode 100644 index 0000000..8030411 --- /dev/null +++ b/lib/unicode/In/128.pl @@ -0,0 +1,6 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables.PL from e.g. Unicode.txt. +# Any changes made here will be lost! +return <<'END'; +10330 1034F +END diff --git a/lib/unicode/In/129.pl b/lib/unicode/In/129.pl new file mode 100644 index 0000000..d2c50bb --- /dev/null +++ b/lib/unicode/In/129.pl @@ -0,0 +1,6 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables.PL from e.g. Unicode.txt. +# Any changes made here will be lost! +return <<'END'; +10400 1044F +END diff --git a/lib/unicode/In/13.pl b/lib/unicode/In/13.pl index 5bda401..a69c6d2 100644 --- a/lib/unicode/In/13.pl +++ b/lib/unicode/In/13.pl @@ -2,5 +2,19 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -0780 07BF +0B85 0B8A +0B8E 0B90 +0B92 0B95 +0B99 0B9A +0B9E 0B9F +0BA3 0BA4 +0BA8 0BAA +0BAE 0BB5 +0BB7 0BB9 +0BBE 0BBF +0BC1 0BC2 +0BC6 0BC8 +0BCA 0BCC +0BE7 0BEF +0BF0 0BF2 END diff --git a/lib/unicode/In/130.pl b/lib/unicode/In/130.pl new file mode 100644 index 0000000..f1073c7 --- /dev/null +++ b/lib/unicode/In/130.pl @@ -0,0 +1,6 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables.PL from e.g. Unicode.txt. +# Any changes made here will be lost! +return <<'END'; +1D000 1D0FF +END diff --git a/lib/unicode/In/131.pl b/lib/unicode/In/131.pl new file mode 100644 index 0000000..7435889 --- /dev/null +++ b/lib/unicode/In/131.pl @@ -0,0 +1,6 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables.PL from e.g. Unicode.txt. +# Any changes made here will be lost! +return <<'END'; +1D100 1D1FF +END diff --git a/lib/unicode/In/132.pl b/lib/unicode/In/132.pl new file mode 100644 index 0000000..7e40edc --- /dev/null +++ b/lib/unicode/In/132.pl @@ -0,0 +1,6 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables.PL from e.g. Unicode.txt. +# Any changes made here will be lost! +return <<'END'; +1D400 1D7FF +END diff --git a/lib/unicode/In/133.pl b/lib/unicode/In/133.pl new file mode 100644 index 0000000..931aec3 --- /dev/null +++ b/lib/unicode/In/133.pl @@ -0,0 +1,6 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables.PL from e.g. Unicode.txt. +# Any changes made here will be lost! +return <<'END'; +20000 2A6D6 +END diff --git a/lib/unicode/In/134.pl b/lib/unicode/In/134.pl new file mode 100644 index 0000000..c025148 --- /dev/null +++ b/lib/unicode/In/134.pl @@ -0,0 +1,6 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables.PL from e.g. Unicode.txt. +# Any changes made here will be lost! +return <<'END'; +2F800 2FA1F +END diff --git a/lib/unicode/In/135.pl b/lib/unicode/In/135.pl new file mode 100644 index 0000000..495d2d5 --- /dev/null +++ b/lib/unicode/In/135.pl @@ -0,0 +1,6 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables.PL from e.g. Unicode.txt. +# Any changes made here will be lost! +return <<'END'; +E0000 E007F +END diff --git a/lib/unicode/In/14.pl b/lib/unicode/In/14.pl index a20b68d..f82057d 100644 --- a/lib/unicode/In/14.pl +++ b/lib/unicode/In/14.pl @@ -2,5 +2,17 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -0900 097F +0C01 0C03 +0C05 0C0C +0C0E 0C10 +0C12 0C28 +0C2A 0C33 +0C35 0C39 +0C3E 0C40 +0C41 0C44 +0C46 0C48 +0C4A 0C4D +0C55 0C56 +0C60 0C61 +0C66 0C6F END diff --git a/lib/unicode/In/15.pl b/lib/unicode/In/15.pl index 306f653..0b3207d 100644 --- a/lib/unicode/In/15.pl +++ b/lib/unicode/In/15.pl @@ -2,5 +2,17 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -0980 09FF +0C82 0C83 +0C85 0C8C +0C8E 0C90 +0C92 0CA8 +0CAA 0CB3 +0CB5 0CB9 +0CC0 0CC4 +0CC7 0CC8 +0CCA 0CCB +0CCC 0CCD +0CD5 0CD6 +0CE0 0CE1 +0CE6 0CEF END diff --git a/lib/unicode/In/16.pl b/lib/unicode/In/16.pl index d37d484..e35e526 100644 --- a/lib/unicode/In/16.pl +++ b/lib/unicode/In/16.pl @@ -2,5 +2,15 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -0A00 0A7F +0D02 0D03 +0D05 0D0C +0D0E 0D10 +0D12 0D28 +0D2A 0D39 +0D3E 0D40 +0D41 0D43 +0D46 0D48 +0D4A 0D4C +0D60 0D61 +0D66 0D6F END diff --git a/lib/unicode/In/17.pl b/lib/unicode/In/17.pl index 65d853b..8220990 100644 --- a/lib/unicode/In/17.pl +++ b/lib/unicode/In/17.pl @@ -2,5 +2,13 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -0A80 0AFF +0D82 0D83 +0D85 0D96 +0D9A 0DB1 +0DB3 0DBB +0DC0 0DC6 +0DCF 0DD1 +0DD2 0DD4 +0DD8 0DDF +0DF2 0DF3 END diff --git a/lib/unicode/In/18.pl b/lib/unicode/In/18.pl index 14e1027..356cb5c 100644 --- a/lib/unicode/In/18.pl +++ b/lib/unicode/In/18.pl @@ -2,5 +2,10 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -0B00 0B7F +0E01 0E30 +0E32 0E33 +0E34 0E3A +0E40 0E45 +0E47 0E4E +0E50 0E59 END diff --git a/lib/unicode/In/19.pl b/lib/unicode/In/19.pl index a28ba3d..6d35986 100644 --- a/lib/unicode/In/19.pl +++ b/lib/unicode/In/19.pl @@ -2,5 +2,18 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -0B80 0BFF +0E81 0E82 +0E87 0E88 +0E94 0E97 +0E99 0E9F +0EA1 0EA3 +0EAA 0EAB +0EAD 0EB0 +0EB2 0EB3 +0EB4 0EB9 +0EBB 0EBC +0EC0 0EC4 +0EC8 0ECD +0ED0 0ED9 +0EDC 0EDD END diff --git a/lib/unicode/In/2.pl b/lib/unicode/In/2.pl index 0f6acf9..4c8985f 100644 --- a/lib/unicode/In/2.pl +++ b/lib/unicode/In/2.pl @@ -2,5 +2,11 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -0100 017F +0400 0481 +0483 0486 +048C 04C4 +04C7 04C8 +04CB 04CC +04D0 04F5 +04F8 04F9 END diff --git a/lib/unicode/In/20.pl b/lib/unicode/In/20.pl index aff6cc9..7a1a630 100644 --- a/lib/unicode/In/20.pl +++ b/lib/unicode/In/20.pl @@ -2,5 +2,15 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -0C00 0C7F +0F18 0F19 +0F20 0F29 +0F2A 0F33 +0F40 0F47 +0F49 0F6A +0F71 0F7E +0F80 0F84 +0F86 0F87 +0F88 0F8B +0F90 0F97 +0F99 0FBC END diff --git a/lib/unicode/In/21.pl b/lib/unicode/In/21.pl index 41e05bd..4743cd1 100644 --- a/lib/unicode/In/21.pl +++ b/lib/unicode/In/21.pl @@ -2,5 +2,13 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -0C80 0CFF +1000 1021 +1023 1027 +1029 102A +102D 1030 +1036 1037 +1040 1049 +1050 1055 +1056 1057 +1058 1059 END diff --git a/lib/unicode/In/22.pl b/lib/unicode/In/22.pl index b42bbee..df0b241 100644 --- a/lib/unicode/In/22.pl +++ b/lib/unicode/In/22.pl @@ -2,5 +2,6 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -0D00 0D7F +10A0 10C5 +10D0 10F6 END diff --git a/lib/unicode/In/23.pl b/lib/unicode/In/23.pl index 00da6d1..902b03c 100644 --- a/lib/unicode/In/23.pl +++ b/lib/unicode/In/23.pl @@ -2,5 +2,14 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -0D80 0DFF +1100 1159 +115F 11A2 +11A8 11F9 +3131 318E +AC00 D7A3 +FFA0 FFBE +FFC2 FFC7 +FFCA FFCF +FFD2 FFD7 +FFDA FFDC END diff --git a/lib/unicode/In/24.pl b/lib/unicode/In/24.pl index 2fa00eb..c56ed97 100644 --- a/lib/unicode/In/24.pl +++ b/lib/unicode/In/24.pl @@ -2,5 +2,25 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -0E00 0E7F +1200 1206 +1208 1246 +124A 124D +1250 1256 +125A 125D +1260 1286 +128A 128D +1290 12AE +12B2 12B5 +12B8 12BE +12C2 12C5 +12C8 12CE +12D0 12D6 +12D8 12EE +12F0 130E +1312 1315 +1318 131E +1320 1346 +1348 135A +1369 1371 +1372 137C END diff --git a/lib/unicode/In/25.pl b/lib/unicode/In/25.pl index 5fd607c..42d0af4 100644 --- a/lib/unicode/In/25.pl +++ b/lib/unicode/In/25.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -0E80 0EFF +13A0 13F4 END diff --git a/lib/unicode/In/26.pl b/lib/unicode/In/26.pl index 3ae5e62..fda7338 100644 --- a/lib/unicode/In/26.pl +++ b/lib/unicode/In/26.pl @@ -2,5 +2,6 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -0F00 0FFF +1401 166C +166F 1676 END diff --git a/lib/unicode/In/27.pl b/lib/unicode/In/27.pl index ecc3448..31dfc66 100644 --- a/lib/unicode/In/27.pl +++ b/lib/unicode/In/27.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -1000 109F +1681 169A END diff --git a/lib/unicode/In/28.pl b/lib/unicode/In/28.pl index 73a8818..77b12e1 100644 --- a/lib/unicode/In/28.pl +++ b/lib/unicode/In/28.pl @@ -2,5 +2,6 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -10A0 10FF +16A0 16EA +16EE 16F0 END diff --git a/lib/unicode/In/29.pl b/lib/unicode/In/29.pl index 692be7d..a3cb8b3 100644 --- a/lib/unicode/In/29.pl +++ b/lib/unicode/In/29.pl @@ -2,5 +2,11 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -1100 11FF +1780 17B3 +17B4 17B6 +17B7 17BD +17BE 17C5 +17C7 17C8 +17C9 17D3 +17E0 17E9 END diff --git a/lib/unicode/In/3.pl b/lib/unicode/In/3.pl index 68f0932..22f6f4d 100644 --- a/lib/unicode/In/3.pl +++ b/lib/unicode/In/3.pl @@ -2,5 +2,7 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -0180 024F +0531 0556 +0561 0587 +FB13 FB17 END diff --git a/lib/unicode/In/30.pl b/lib/unicode/In/30.pl index ad4776d..0552657 100644 --- a/lib/unicode/In/30.pl +++ b/lib/unicode/In/30.pl @@ -2,5 +2,8 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -1200 137F +1810 1819 +1820 1842 +1844 1877 +1880 18A8 END diff --git a/lib/unicode/In/31.pl b/lib/unicode/In/31.pl index f40dfa2..df3dd8c 100644 --- a/lib/unicode/In/31.pl +++ b/lib/unicode/In/31.pl @@ -2,5 +2,6 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -13A0 13FF +3041 3094 +309D 309E END diff --git a/lib/unicode/In/32.pl b/lib/unicode/In/32.pl index 7318008..87fe407 100644 --- a/lib/unicode/In/32.pl +++ b/lib/unicode/In/32.pl @@ -2,5 +2,8 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -1400 167F +30A1 30FA +30FD 30FE +FF66 FF6F +FF71 FF9D END diff --git a/lib/unicode/In/33.pl b/lib/unicode/In/33.pl index 5d7bd97..bafc4c7 100644 --- a/lib/unicode/In/33.pl +++ b/lib/unicode/In/33.pl @@ -2,5 +2,6 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -1680 169F +3105 312C +31A0 31B7 END diff --git a/lib/unicode/In/34.pl b/lib/unicode/In/34.pl index d404cb6..7f947b5 100644 --- a/lib/unicode/In/34.pl +++ b/lib/unicode/In/34.pl @@ -2,5 +2,14 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -16A0 16FF +2E80 2E99 +2E9B 2EF3 +2F00 2FD5 +3021 3029 +3038 303A +3400 4DB5 +4E00 9FA5 +F900 FA2D +20000 2A6D6 +2F800 2FA1D END diff --git a/lib/unicode/In/35.pl b/lib/unicode/In/35.pl index 2b0b198..924b0e0 100644 --- a/lib/unicode/In/35.pl +++ b/lib/unicode/In/35.pl @@ -2,5 +2,9 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -1780 17FF +A000 A48C +A490 A4A1 +A4A4 A4B3 +A4B5 A4C0 +A4C2 A4C4 END diff --git a/lib/unicode/In/36.pl b/lib/unicode/In/36.pl index 06526c6..a2dcdc2 100644 --- a/lib/unicode/In/36.pl +++ b/lib/unicode/In/36.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -1800 18AF +10300 1031E END diff --git a/lib/unicode/In/37.pl b/lib/unicode/In/37.pl index c288810..486486d 100644 --- a/lib/unicode/In/37.pl +++ b/lib/unicode/In/37.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -1E00 1EFF +10330 10349 END diff --git a/lib/unicode/In/38.pl b/lib/unicode/In/38.pl index 74cd2c8..98b1f16 100644 --- a/lib/unicode/In/38.pl +++ b/lib/unicode/In/38.pl @@ -2,5 +2,6 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -1F00 1FFF +10400 10425 +10428 1044D END diff --git a/lib/unicode/In/39.pl b/lib/unicode/In/39.pl index b9b0e7e..f4b0a17 100644 --- a/lib/unicode/In/39.pl +++ b/lib/unicode/In/39.pl @@ -2,5 +2,27 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -2000 206F +0300 034E +0360 0362 +0488 0489 +0591 05A1 +05A3 05B9 +05BB 05BD +05C1 05C2 +064B 0655 +06D6 06DC +06DD 06DE +06DF 06E4 +06E7 06E8 +06EA 06ED +20D0 20DC +20DD 20E0 +20E2 20E3 +302A 302F +3099 309A +FE20 FE23 +1D167 1D169 +1D17B 1D182 +1D185 1D18B +1D1AA 1D1AD END diff --git a/lib/unicode/In/4.pl b/lib/unicode/In/4.pl index f6e9454..2a9a94e 100644 --- a/lib/unicode/In/4.pl +++ b/lib/unicode/In/4.pl @@ -2,5 +2,12 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -0250 02AF +05D0 05EA +05F0 05F2 +FB1F FB28 +FB2A FB36 +FB38 FB3C +FB40 FB41 +FB43 FB44 +FB46 FB4F END diff --git a/lib/unicode/In/40.pl b/lib/unicode/In/40.pl index 2e36ac3..475c1df 100644 --- a/lib/unicode/In/40.pl +++ b/lib/unicode/In/40.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -2070 209F +0000 007F END diff --git a/lib/unicode/In/41.pl b/lib/unicode/In/41.pl index 12c6737..5a5aa0e 100644 --- a/lib/unicode/In/41.pl +++ b/lib/unicode/In/41.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -20A0 20CF +0080 00FF END diff --git a/lib/unicode/In/42.pl b/lib/unicode/In/42.pl index 2d58a56..0f6acf9 100644 --- a/lib/unicode/In/42.pl +++ b/lib/unicode/In/42.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -20D0 20FF +0100 017F END diff --git a/lib/unicode/In/43.pl b/lib/unicode/In/43.pl index c735821..68f0932 100644 --- a/lib/unicode/In/43.pl +++ b/lib/unicode/In/43.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -2100 214F +0180 024F END diff --git a/lib/unicode/In/44.pl b/lib/unicode/In/44.pl index a1949a1..f6e9454 100644 --- a/lib/unicode/In/44.pl +++ b/lib/unicode/In/44.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -2150 218F +0250 02AF END diff --git a/lib/unicode/In/45.pl b/lib/unicode/In/45.pl index 799f739..a242e02 100644 --- a/lib/unicode/In/45.pl +++ b/lib/unicode/In/45.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -2190 21FF +02B0 02FF END diff --git a/lib/unicode/In/46.pl b/lib/unicode/In/46.pl index 8bc8295..cf9bb94 100644 --- a/lib/unicode/In/46.pl +++ b/lib/unicode/In/46.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -2200 22FF +0300 036F END diff --git a/lib/unicode/In/47.pl b/lib/unicode/In/47.pl index 6786795..8d89b71 100644 --- a/lib/unicode/In/47.pl +++ b/lib/unicode/In/47.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -2300 23FF +0370 03FF END diff --git a/lib/unicode/In/48.pl b/lib/unicode/In/48.pl index 7aad2fc..0075ce1 100644 --- a/lib/unicode/In/48.pl +++ b/lib/unicode/In/48.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -2400 243F +0400 04FF END diff --git a/lib/unicode/In/49.pl b/lib/unicode/In/49.pl index c7cecd0..a6d50e3 100644 --- a/lib/unicode/In/49.pl +++ b/lib/unicode/In/49.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -2440 245F +0530 058F END diff --git a/lib/unicode/In/5.pl b/lib/unicode/In/5.pl index a242e02..feac649 100644 --- a/lib/unicode/In/5.pl +++ b/lib/unicode/In/5.pl @@ -2,5 +2,16 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -02B0 02FF +0621 063A +0641 064A +0671 06D3 +06E5 06E6 +06FA 06FC +FB50 FBB1 +FBD3 FD3D +FD50 FD8F +FD92 FDC7 +FDF0 FDFB +FE70 FE72 +FE76 FEFC END diff --git a/lib/unicode/In/50.pl b/lib/unicode/In/50.pl index 7b1b778..f1d866c 100644 --- a/lib/unicode/In/50.pl +++ b/lib/unicode/In/50.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -2460 24FF +0590 05FF END diff --git a/lib/unicode/In/51.pl b/lib/unicode/In/51.pl index 4d44686..7546a74 100644 --- a/lib/unicode/In/51.pl +++ b/lib/unicode/In/51.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -2500 257F +0600 06FF END diff --git a/lib/unicode/In/52.pl b/lib/unicode/In/52.pl index 6135c93..e5247ad 100644 --- a/lib/unicode/In/52.pl +++ b/lib/unicode/In/52.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -2580 259F +0700 074F END diff --git a/lib/unicode/In/53.pl b/lib/unicode/In/53.pl index 855d98e..5bda401 100644 --- a/lib/unicode/In/53.pl +++ b/lib/unicode/In/53.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -25A0 25FF +0780 07BF END diff --git a/lib/unicode/In/54.pl b/lib/unicode/In/54.pl index 0949bc2..a20b68d 100644 --- a/lib/unicode/In/54.pl +++ b/lib/unicode/In/54.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -2600 26FF +0900 097F END diff --git a/lib/unicode/In/55.pl b/lib/unicode/In/55.pl index 3013f73..306f653 100644 --- a/lib/unicode/In/55.pl +++ b/lib/unicode/In/55.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -2700 27BF +0980 09FF END diff --git a/lib/unicode/In/56.pl b/lib/unicode/In/56.pl index d785c31..d37d484 100644 --- a/lib/unicode/In/56.pl +++ b/lib/unicode/In/56.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -2800 28FF +0A00 0A7F END diff --git a/lib/unicode/In/57.pl b/lib/unicode/In/57.pl index 2bf5651..65d853b 100644 --- a/lib/unicode/In/57.pl +++ b/lib/unicode/In/57.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -2E80 2EFF +0A80 0AFF END diff --git a/lib/unicode/In/58.pl b/lib/unicode/In/58.pl index 3903f15..14e1027 100644 --- a/lib/unicode/In/58.pl +++ b/lib/unicode/In/58.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -2F00 2FDF +0B00 0B7F END diff --git a/lib/unicode/In/59.pl b/lib/unicode/In/59.pl index 07799e6..a28ba3d 100644 --- a/lib/unicode/In/59.pl +++ b/lib/unicode/In/59.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -2FF0 2FFF +0B80 0BFF END diff --git a/lib/unicode/In/6.pl b/lib/unicode/In/6.pl index cf9bb94..7fc0239 100644 --- a/lib/unicode/In/6.pl +++ b/lib/unicode/In/6.pl @@ -2,5 +2,6 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -0300 036F +0712 072C +0730 074A END diff --git a/lib/unicode/In/60.pl b/lib/unicode/In/60.pl index 0c66f05..aff6cc9 100644 --- a/lib/unicode/In/60.pl +++ b/lib/unicode/In/60.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -3000 303F +0C00 0C7F END diff --git a/lib/unicode/In/61.pl b/lib/unicode/In/61.pl index 49b4e49..41e05bd 100644 --- a/lib/unicode/In/61.pl +++ b/lib/unicode/In/61.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -3040 309F +0C80 0CFF END diff --git a/lib/unicode/In/62.pl b/lib/unicode/In/62.pl index e5568a2..b42bbee 100644 --- a/lib/unicode/In/62.pl +++ b/lib/unicode/In/62.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -30A0 30FF +0D00 0D7F END diff --git a/lib/unicode/In/63.pl b/lib/unicode/In/63.pl index 4f9b5f4..00da6d1 100644 --- a/lib/unicode/In/63.pl +++ b/lib/unicode/In/63.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -3100 312F +0D80 0DFF END diff --git a/lib/unicode/In/64.pl b/lib/unicode/In/64.pl index b15c4cc..2fa00eb 100644 --- a/lib/unicode/In/64.pl +++ b/lib/unicode/In/64.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -3130 318F +0E00 0E7F END diff --git a/lib/unicode/In/65.pl b/lib/unicode/In/65.pl index d78c208..5fd607c 100644 --- a/lib/unicode/In/65.pl +++ b/lib/unicode/In/65.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -3190 319F +0E80 0EFF END diff --git a/lib/unicode/In/66.pl b/lib/unicode/In/66.pl index 96150b4..3ae5e62 100644 --- a/lib/unicode/In/66.pl +++ b/lib/unicode/In/66.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -31A0 31BF +0F00 0FFF END diff --git a/lib/unicode/In/67.pl b/lib/unicode/In/67.pl index 2708fec..ecc3448 100644 --- a/lib/unicode/In/67.pl +++ b/lib/unicode/In/67.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -3200 32FF +1000 109F END diff --git a/lib/unicode/In/68.pl b/lib/unicode/In/68.pl index d504529..73a8818 100644 --- a/lib/unicode/In/68.pl +++ b/lib/unicode/In/68.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -3300 33FF +10A0 10FF END diff --git a/lib/unicode/In/69.pl b/lib/unicode/In/69.pl index 83adb81..692be7d 100644 --- a/lib/unicode/In/69.pl +++ b/lib/unicode/In/69.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -3400 4DB5 +1100 11FF END diff --git a/lib/unicode/In/7.pl b/lib/unicode/In/7.pl index 8d89b71..2421d05 100644 --- a/lib/unicode/In/7.pl +++ b/lib/unicode/In/7.pl @@ -2,5 +2,6 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -0370 03FF +0780 07A5 +07A6 07B0 END diff --git a/lib/unicode/In/70.pl b/lib/unicode/In/70.pl index f74552e..ad4776d 100644 --- a/lib/unicode/In/70.pl +++ b/lib/unicode/In/70.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -4E00 9FFF +1200 137F END diff --git a/lib/unicode/In/71.pl b/lib/unicode/In/71.pl index 0636a82..f40dfa2 100644 --- a/lib/unicode/In/71.pl +++ b/lib/unicode/In/71.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -A000 A48F +13A0 13FF END diff --git a/lib/unicode/In/72.pl b/lib/unicode/In/72.pl index 56404c5..7318008 100644 --- a/lib/unicode/In/72.pl +++ b/lib/unicode/In/72.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -A490 A4CF +1400 167F END diff --git a/lib/unicode/In/73.pl b/lib/unicode/In/73.pl index e1e2694..5d7bd97 100644 --- a/lib/unicode/In/73.pl +++ b/lib/unicode/In/73.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -AC00 D7A3 +1680 169F END diff --git a/lib/unicode/In/74.pl b/lib/unicode/In/74.pl index 0f4eb57..d404cb6 100644 --- a/lib/unicode/In/74.pl +++ b/lib/unicode/In/74.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -D800 DB7F +16A0 16FF END diff --git a/lib/unicode/In/75.pl b/lib/unicode/In/75.pl index ec4ca07..2b0b198 100644 --- a/lib/unicode/In/75.pl +++ b/lib/unicode/In/75.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -DB80 DBFF +1780 17FF END diff --git a/lib/unicode/In/76.pl b/lib/unicode/In/76.pl index d056168..06526c6 100644 --- a/lib/unicode/In/76.pl +++ b/lib/unicode/In/76.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -DC00 DFFF +1800 18AF END diff --git a/lib/unicode/In/77.pl b/lib/unicode/In/77.pl index 530166d..c288810 100644 --- a/lib/unicode/In/77.pl +++ b/lib/unicode/In/77.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -100000 10FFFD +1E00 1EFF END diff --git a/lib/unicode/In/78.pl b/lib/unicode/In/78.pl index 0c553d6..74cd2c8 100644 --- a/lib/unicode/In/78.pl +++ b/lib/unicode/In/78.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -F900 FAFF +1F00 1FFF END diff --git a/lib/unicode/In/79.pl b/lib/unicode/In/79.pl index 42cc1ca..b9b0e7e 100644 --- a/lib/unicode/In/79.pl +++ b/lib/unicode/In/79.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -FB00 FB4F +2000 206F END diff --git a/lib/unicode/In/8.pl b/lib/unicode/In/8.pl index 0075ce1..b8c23d1 100644 --- a/lib/unicode/In/8.pl +++ b/lib/unicode/In/8.pl @@ -2,5 +2,13 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -0400 04FF +0901 0902 +0905 0939 +093E 0940 +0941 0948 +0949 094C +0951 0954 +0958 0961 +0962 0963 +0966 096F END diff --git a/lib/unicode/In/80.pl b/lib/unicode/In/80.pl index ffb4f1e..2e36ac3 100644 --- a/lib/unicode/In/80.pl +++ b/lib/unicode/In/80.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -FB50 FDFF +2070 209F END diff --git a/lib/unicode/In/81.pl b/lib/unicode/In/81.pl index cc8a4a2..12c6737 100644 --- a/lib/unicode/In/81.pl +++ b/lib/unicode/In/81.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -FE20 FE2F +20A0 20CF END diff --git a/lib/unicode/In/82.pl b/lib/unicode/In/82.pl index 4e462b8..2d58a56 100644 --- a/lib/unicode/In/82.pl +++ b/lib/unicode/In/82.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -FE30 FE4F +20D0 20FF END diff --git a/lib/unicode/In/83.pl b/lib/unicode/In/83.pl index 4eff1ea..c735821 100644 --- a/lib/unicode/In/83.pl +++ b/lib/unicode/In/83.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -FE50 FE6F +2100 214F END diff --git a/lib/unicode/In/84.pl b/lib/unicode/In/84.pl index dc5a32e..a1949a1 100644 --- a/lib/unicode/In/84.pl +++ b/lib/unicode/In/84.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -FE70 FEFE +2150 218F END diff --git a/lib/unicode/In/85.pl b/lib/unicode/In/85.pl index 931fc5b..799f739 100644 --- a/lib/unicode/In/85.pl +++ b/lib/unicode/In/85.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -FFF0 FFFD +2190 21FF END diff --git a/lib/unicode/In/86.pl b/lib/unicode/In/86.pl index 03e8515..8bc8295 100644 --- a/lib/unicode/In/86.pl +++ b/lib/unicode/In/86.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -FF00 FFEF +2200 22FF END diff --git a/lib/unicode/In/87.pl b/lib/unicode/In/87.pl index 44a5e47..6786795 100644 --- a/lib/unicode/In/87.pl +++ b/lib/unicode/In/87.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -10300 1032F +2300 23FF END diff --git a/lib/unicode/In/88.pl b/lib/unicode/In/88.pl index 8030411..7aad2fc 100644 --- a/lib/unicode/In/88.pl +++ b/lib/unicode/In/88.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -10330 1034F +2400 243F END diff --git a/lib/unicode/In/89.pl b/lib/unicode/In/89.pl index d2c50bb..c7cecd0 100644 --- a/lib/unicode/In/89.pl +++ b/lib/unicode/In/89.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -10400 1044F +2440 245F END diff --git a/lib/unicode/In/9.pl b/lib/unicode/In/9.pl index a6d50e3..b094796 100644 --- a/lib/unicode/In/9.pl +++ b/lib/unicode/In/9.pl @@ -2,5 +2,18 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -0530 058F +0985 098C +098F 0990 +0993 09A8 +09AA 09B0 +09B6 09B9 +09BE 09C0 +09C1 09C4 +09C7 09C8 +09CB 09CC +09DC 09DD +09DF 09E1 +09E2 09E3 +09E6 09EF +09F0 09F1 END diff --git a/lib/unicode/In/90.pl b/lib/unicode/In/90.pl index f1073c7..7b1b778 100644 --- a/lib/unicode/In/90.pl +++ b/lib/unicode/In/90.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -1D000 1D0FF +2460 24FF END diff --git a/lib/unicode/In/91.pl b/lib/unicode/In/91.pl index 7435889..4d44686 100644 --- a/lib/unicode/In/91.pl +++ b/lib/unicode/In/91.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -1D100 1D1FF +2500 257F END diff --git a/lib/unicode/In/92.pl b/lib/unicode/In/92.pl index 7e40edc..6135c93 100644 --- a/lib/unicode/In/92.pl +++ b/lib/unicode/In/92.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -1D400 1D7FF +2580 259F END diff --git a/lib/unicode/In/93.pl b/lib/unicode/In/93.pl index 931aec3..855d98e 100644 --- a/lib/unicode/In/93.pl +++ b/lib/unicode/In/93.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -20000 2A6D6 +25A0 25FF END diff --git a/lib/unicode/In/94.pl b/lib/unicode/In/94.pl index c025148..0949bc2 100644 --- a/lib/unicode/In/94.pl +++ b/lib/unicode/In/94.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -2F800 2FA1F +2600 26FF END diff --git a/lib/unicode/In/95.pl b/lib/unicode/In/95.pl index 495d2d5..3013f73 100644 --- a/lib/unicode/In/95.pl +++ b/lib/unicode/In/95.pl @@ -2,5 +2,5 @@ # This file is built by mktables.PL from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -E0000 E007F +2700 27BF END diff --git a/lib/unicode/In/96.pl b/lib/unicode/In/96.pl new file mode 100644 index 0000000..d785c31 --- /dev/null +++ b/lib/unicode/In/96.pl @@ -0,0 +1,6 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables.PL from e.g. Unicode.txt. +# Any changes made here will be lost! +return <<'END'; +2800 28FF +END diff --git a/lib/unicode/In/97.pl b/lib/unicode/In/97.pl new file mode 100644 index 0000000..2bf5651 --- /dev/null +++ b/lib/unicode/In/97.pl @@ -0,0 +1,6 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables.PL from e.g. Unicode.txt. +# Any changes made here will be lost! +return <<'END'; +2E80 2EFF +END diff --git a/lib/unicode/In/98.pl b/lib/unicode/In/98.pl new file mode 100644 index 0000000..3903f15 --- /dev/null +++ b/lib/unicode/In/98.pl @@ -0,0 +1,6 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables.PL from e.g. Unicode.txt. +# Any changes made here will be lost! +return <<'END'; +2F00 2FDF +END diff --git a/lib/unicode/In/99.pl b/lib/unicode/In/99.pl new file mode 100644 index 0000000..07799e6 --- /dev/null +++ b/lib/unicode/In/99.pl @@ -0,0 +1,6 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables.PL from e.g. Unicode.txt. +# Any changes made here will be lost! +return <<'END'; +2FF0 2FFF +END diff --git a/lib/unicode/Scripts.pl b/lib/unicode/Scripts.pl new file mode 100644 index 0000000..361069b --- /dev/null +++ b/lib/unicode/Scripts.pl @@ -0,0 +1,341 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables.PL from e.g. Unicode.txt. +# Any changes made here will be lost! +return <<'END'; +0041 005A LATIN # Latin In/0.pl +0061 007A LATIN # Latin In/0.pl +00C0 00D6 LATIN # Latin In/0.pl +00D8 00F6 LATIN # Latin In/0.pl +00F8 01BA LATIN # Latin In/0.pl +01BC 01BF LATIN # Latin In/0.pl +01C0 01C3 LATIN # Latin In/0.pl +01C4 021F LATIN # Latin In/0.pl +0222 0233 LATIN # Latin In/0.pl +0250 02AD LATIN # Latin In/0.pl +02B0 02B8 LATIN # Latin In/0.pl +02E0 02E4 LATIN # Latin In/0.pl +1E00 1E9B LATIN # Latin In/0.pl +1EA0 1EF9 LATIN # Latin In/0.pl +212A 212B LATIN # Latin In/0.pl +FB00 FB06 LATIN # Latin In/0.pl +FF21 FF3A LATIN # Latin In/0.pl +FF41 FF5A LATIN # Latin In/0.pl +0388 038A GREEK # Greek In/1.pl +038E 03A1 GREEK # Greek In/1.pl +03A3 03CE GREEK # Greek In/1.pl +03D0 03D7 GREEK # Greek In/1.pl +03DA 03F5 GREEK # Greek In/1.pl +1F00 1F15 GREEK # Greek In/1.pl +1F18 1F1D GREEK # Greek In/1.pl +1F20 1F45 GREEK # Greek In/1.pl +1F48 1F4D GREEK # Greek In/1.pl +1F50 1F57 GREEK # Greek In/1.pl +1F5F 1F7D GREEK # Greek In/1.pl +1F80 1FB4 GREEK # Greek In/1.pl +1FB6 1FBC GREEK # Greek In/1.pl +1FC2 1FC4 GREEK # Greek In/1.pl +1FC6 1FCC GREEK # Greek In/1.pl +1FD0 1FD3 GREEK # Greek In/1.pl +1FD6 1FDB GREEK # Greek In/1.pl +1FE0 1FEC GREEK # Greek In/1.pl +1FF2 1FF4 GREEK # Greek In/1.pl +1FF6 1FFC GREEK # Greek In/1.pl +0400 0481 CYRILLIC # Cyrillic In/2.pl +0483 0486 CYRILLIC # Cyrillic In/2.pl +048C 04C4 CYRILLIC # Cyrillic In/2.pl +04C7 04C8 CYRILLIC # Cyrillic In/2.pl +04CB 04CC CYRILLIC # Cyrillic In/2.pl +04D0 04F5 CYRILLIC # Cyrillic In/2.pl +04F8 04F9 CYRILLIC # Cyrillic In/2.pl +0531 0556 ARMENIAN # Armenian In/3.pl +0561 0587 ARMENIAN # Armenian In/3.pl +FB13 FB17 ARMENIAN # Armenian In/3.pl +05D0 05EA HEBREW # Hebrew In/4.pl +05F0 05F2 HEBREW # Hebrew In/4.pl +FB1F FB28 HEBREW # Hebrew In/4.pl +FB2A FB36 HEBREW # Hebrew In/4.pl +FB38 FB3C HEBREW # Hebrew In/4.pl +FB40 FB41 HEBREW # Hebrew In/4.pl +FB43 FB44 HEBREW # Hebrew In/4.pl +FB46 FB4F HEBREW # Hebrew In/4.pl +0621 063A ARABIC # Arabic In/5.pl +0641 064A ARABIC # Arabic In/5.pl +0671 06D3 ARABIC # Arabic In/5.pl +06E5 06E6 ARABIC # Arabic In/5.pl +06FA 06FC ARABIC # Arabic In/5.pl +FB50 FBB1 ARABIC # Arabic In/5.pl +FBD3 FD3D ARABIC # Arabic In/5.pl +FD50 FD8F ARABIC # Arabic In/5.pl +FD92 FDC7 ARABIC # Arabic In/5.pl +FDF0 FDFB ARABIC # Arabic In/5.pl +FE70 FE72 ARABIC # Arabic In/5.pl +FE76 FEFC ARABIC # Arabic In/5.pl +0712 072C SYRIAC # Syriac In/6.pl +0730 074A SYRIAC # Syriac In/6.pl +0780 07A5 THAANA # Thaana In/7.pl +07A6 07B0 THAANA # Thaana In/7.pl +0901 0902 DEVANAGARI # Devanagari In/8.pl +0905 0939 DEVANAGARI # Devanagari In/8.pl +093E 0940 DEVANAGARI # Devanagari In/8.pl +0941 0948 DEVANAGARI # Devanagari In/8.pl +0949 094C DEVANAGARI # Devanagari In/8.pl +0951 0954 DEVANAGARI # Devanagari In/8.pl +0958 0961 DEVANAGARI # Devanagari In/8.pl +0962 0963 DEVANAGARI # Devanagari In/8.pl +0966 096F DEVANAGARI # Devanagari In/8.pl +0985 098C BENGALI # Bengali In/9.pl +098F 0990 BENGALI # Bengali In/9.pl +0993 09A8 BENGALI # Bengali In/9.pl +09AA 09B0 BENGALI # Bengali In/9.pl +09B6 09B9 BENGALI # Bengali In/9.pl +09BE 09C0 BENGALI # Bengali In/9.pl +09C1 09C4 BENGALI # Bengali In/9.pl +09C7 09C8 BENGALI # Bengali In/9.pl +09CB 09CC BENGALI # Bengali In/9.pl +09DC 09DD BENGALI # Bengali In/9.pl +09DF 09E1 BENGALI # Bengali In/9.pl +09E2 09E3 BENGALI # Bengali In/9.pl +09E6 09EF BENGALI # Bengali In/9.pl +09F0 09F1 BENGALI # Bengali In/9.pl +0A05 0A0A GURMUKHI # Gurmukhi In/10.pl +0A0F 0A10 GURMUKHI # Gurmukhi In/10.pl +0A13 0A28 GURMUKHI # Gurmukhi In/10.pl +0A2A 0A30 GURMUKHI # Gurmukhi In/10.pl +0A32 0A33 GURMUKHI # Gurmukhi In/10.pl +0A35 0A36 GURMUKHI # Gurmukhi In/10.pl +0A38 0A39 GURMUKHI # Gurmukhi In/10.pl +0A3E 0A40 GURMUKHI # Gurmukhi In/10.pl +0A41 0A42 GURMUKHI # Gurmukhi In/10.pl +0A47 0A48 GURMUKHI # Gurmukhi In/10.pl +0A4B 0A4D GURMUKHI # Gurmukhi In/10.pl +0A59 0A5C GURMUKHI # Gurmukhi In/10.pl +0A66 0A6F GURMUKHI # Gurmukhi In/10.pl +0A70 0A71 GURMUKHI # Gurmukhi In/10.pl +0A72 0A74 GURMUKHI # Gurmukhi In/10.pl +0A81 0A82 GUJARATI # Gujarati In/11.pl +0A85 0A8B GUJARATI # Gujarati In/11.pl +0A8F 0A91 GUJARATI # Gujarati In/11.pl +0A93 0AA8 GUJARATI # Gujarati In/11.pl +0AAA 0AB0 GUJARATI # Gujarati In/11.pl +0AB2 0AB3 GUJARATI # Gujarati In/11.pl +0AB5 0AB9 GUJARATI # Gujarati In/11.pl +0ABE 0AC0 GUJARATI # Gujarati In/11.pl +0AC1 0AC5 GUJARATI # Gujarati In/11.pl +0AC7 0AC8 GUJARATI # Gujarati In/11.pl +0ACB 0ACC GUJARATI # Gujarati In/11.pl +0AE6 0AEF GUJARATI # Gujarati In/11.pl +0B02 0B03 ORIYA # Oriya In/12.pl +0B05 0B0C ORIYA # Oriya In/12.pl +0B0F 0B10 ORIYA # Oriya In/12.pl +0B13 0B28 ORIYA # Oriya In/12.pl +0B2A 0B30 ORIYA # Oriya In/12.pl +0B32 0B33 ORIYA # Oriya In/12.pl +0B36 0B39 ORIYA # Oriya In/12.pl +0B41 0B43 ORIYA # Oriya In/12.pl +0B47 0B48 ORIYA # Oriya In/12.pl +0B4B 0B4C ORIYA # Oriya In/12.pl +0B5C 0B5D ORIYA # Oriya In/12.pl +0B5F 0B61 ORIYA # Oriya In/12.pl +0B66 0B6F ORIYA # Oriya In/12.pl +0B85 0B8A TAMIL # Tamil In/13.pl +0B8E 0B90 TAMIL # Tamil In/13.pl +0B92 0B95 TAMIL # Tamil In/13.pl +0B99 0B9A TAMIL # Tamil In/13.pl +0B9E 0B9F TAMIL # Tamil In/13.pl +0BA3 0BA4 TAMIL # Tamil In/13.pl +0BA8 0BAA TAMIL # Tamil In/13.pl +0BAE 0BB5 TAMIL # Tamil In/13.pl +0BB7 0BB9 TAMIL # Tamil In/13.pl +0BBE 0BBF TAMIL # Tamil In/13.pl +0BC1 0BC2 TAMIL # Tamil In/13.pl +0BC6 0BC8 TAMIL # Tamil In/13.pl +0BCA 0BCC TAMIL # Tamil In/13.pl +0BE7 0BEF TAMIL # Tamil In/13.pl +0BF0 0BF2 TAMIL # Tamil In/13.pl +0C01 0C03 TELUGU # Telugu In/14.pl +0C05 0C0C TELUGU # Telugu In/14.pl +0C0E 0C10 TELUGU # Telugu In/14.pl +0C12 0C28 TELUGU # Telugu In/14.pl +0C2A 0C33 TELUGU # Telugu In/14.pl +0C35 0C39 TELUGU # Telugu In/14.pl +0C3E 0C40 TELUGU # Telugu In/14.pl +0C41 0C44 TELUGU # Telugu In/14.pl +0C46 0C48 TELUGU # Telugu In/14.pl +0C4A 0C4D TELUGU # Telugu In/14.pl +0C55 0C56 TELUGU # Telugu In/14.pl +0C60 0C61 TELUGU # Telugu In/14.pl +0C66 0C6F TELUGU # Telugu In/14.pl +0C82 0C83 KANNADA # Kannada In/15.pl +0C85 0C8C KANNADA # Kannada In/15.pl +0C8E 0C90 KANNADA # Kannada In/15.pl +0C92 0CA8 KANNADA # Kannada In/15.pl +0CAA 0CB3 KANNADA # Kannada In/15.pl +0CB5 0CB9 KANNADA # Kannada In/15.pl +0CC0 0CC4 KANNADA # Kannada In/15.pl +0CC7 0CC8 KANNADA # Kannada In/15.pl +0CCA 0CCB KANNADA # Kannada In/15.pl +0CCC 0CCD KANNADA # Kannada In/15.pl +0CD5 0CD6 KANNADA # Kannada In/15.pl +0CE0 0CE1 KANNADA # Kannada In/15.pl +0CE6 0CEF KANNADA # Kannada In/15.pl +0D02 0D03 MALAYALAM # Malayalam In/16.pl +0D05 0D0C MALAYALAM # Malayalam In/16.pl +0D0E 0D10 MALAYALAM # Malayalam In/16.pl +0D12 0D28 MALAYALAM # Malayalam In/16.pl +0D2A 0D39 MALAYALAM # Malayalam In/16.pl +0D3E 0D40 MALAYALAM # Malayalam In/16.pl +0D41 0D43 MALAYALAM # Malayalam In/16.pl +0D46 0D48 MALAYALAM # Malayalam In/16.pl +0D4A 0D4C MALAYALAM # Malayalam In/16.pl +0D60 0D61 MALAYALAM # Malayalam In/16.pl +0D66 0D6F MALAYALAM # Malayalam In/16.pl +0D82 0D83 SINHALA # Sinhala In/17.pl +0D85 0D96 SINHALA # Sinhala In/17.pl +0D9A 0DB1 SINHALA # Sinhala In/17.pl +0DB3 0DBB SINHALA # Sinhala In/17.pl +0DC0 0DC6 SINHALA # Sinhala In/17.pl +0DCF 0DD1 SINHALA # Sinhala In/17.pl +0DD2 0DD4 SINHALA # Sinhala In/17.pl +0DD8 0DDF SINHALA # Sinhala In/17.pl +0DF2 0DF3 SINHALA # Sinhala In/17.pl +0E01 0E30 THAI # Thai In/18.pl +0E32 0E33 THAI # Thai In/18.pl +0E34 0E3A THAI # Thai In/18.pl +0E40 0E45 THAI # Thai In/18.pl +0E47 0E4E THAI # Thai In/18.pl +0E50 0E59 THAI # Thai In/18.pl +0E81 0E82 LAO # Lao In/19.pl +0E87 0E88 LAO # Lao In/19.pl +0E94 0E97 LAO # Lao In/19.pl +0E99 0E9F LAO # Lao In/19.pl +0EA1 0EA3 LAO # Lao In/19.pl +0EAA 0EAB LAO # Lao In/19.pl +0EAD 0EB0 LAO # Lao In/19.pl +0EB2 0EB3 LAO # Lao In/19.pl +0EB4 0EB9 LAO # Lao In/19.pl +0EBB 0EBC LAO # Lao In/19.pl +0EC0 0EC4 LAO # Lao In/19.pl +0EC8 0ECD LAO # Lao In/19.pl +0ED0 0ED9 LAO # Lao In/19.pl +0EDC 0EDD LAO # Lao In/19.pl +0F18 0F19 TIBETAN # Tibetan In/20.pl +0F20 0F29 TIBETAN # Tibetan In/20.pl +0F2A 0F33 TIBETAN # Tibetan In/20.pl +0F40 0F47 TIBETAN # Tibetan In/20.pl +0F49 0F6A TIBETAN # Tibetan In/20.pl +0F71 0F7E TIBETAN # Tibetan In/20.pl +0F80 0F84 TIBETAN # Tibetan In/20.pl +0F86 0F87 TIBETAN # Tibetan In/20.pl +0F88 0F8B TIBETAN # Tibetan In/20.pl +0F90 0F97 TIBETAN # Tibetan In/20.pl +0F99 0FBC TIBETAN # Tibetan In/20.pl +1000 1021 MYANMAR # Myanmar In/21.pl +1023 1027 MYANMAR # Myanmar In/21.pl +1029 102A MYANMAR # Myanmar In/21.pl +102D 1030 MYANMAR # Myanmar In/21.pl +1036 1037 MYANMAR # Myanmar In/21.pl +1040 1049 MYANMAR # Myanmar In/21.pl +1050 1055 MYANMAR # Myanmar In/21.pl +1056 1057 MYANMAR # Myanmar In/21.pl +1058 1059 MYANMAR # Myanmar In/21.pl +10A0 10C5 GEORGIAN # Georgian In/22.pl +10D0 10F6 GEORGIAN # Georgian In/22.pl +1100 1159 HANGUL # Hangul In/23.pl +115F 11A2 HANGUL # Hangul In/23.pl +11A8 11F9 HANGUL # Hangul In/23.pl +3131 318E HANGUL # Hangul In/23.pl +AC00 D7A3 HANGUL # Hangul In/23.pl +FFA0 FFBE HANGUL # Hangul In/23.pl +FFC2 FFC7 HANGUL # Hangul In/23.pl +FFCA FFCF HANGUL # Hangul In/23.pl +FFD2 FFD7 HANGUL # Hangul In/23.pl +FFDA FFDC HANGUL # Hangul In/23.pl +1200 1206 ETHIOPIC # Ethiopic In/24.pl +1208 1246 ETHIOPIC # Ethiopic In/24.pl +124A 124D ETHIOPIC # Ethiopic In/24.pl +1250 1256 ETHIOPIC # Ethiopic In/24.pl +125A 125D ETHIOPIC # Ethiopic In/24.pl +1260 1286 ETHIOPIC # Ethiopic In/24.pl +128A 128D ETHIOPIC # Ethiopic In/24.pl +1290 12AE ETHIOPIC # Ethiopic In/24.pl +12B2 12B5 ETHIOPIC # Ethiopic In/24.pl +12B8 12BE ETHIOPIC # Ethiopic In/24.pl +12C2 12C5 ETHIOPIC # Ethiopic In/24.pl +12C8 12CE ETHIOPIC # Ethiopic In/24.pl +12D0 12D6 ETHIOPIC # Ethiopic In/24.pl +12D8 12EE ETHIOPIC # Ethiopic In/24.pl +12F0 130E ETHIOPIC # Ethiopic In/24.pl +1312 1315 ETHIOPIC # Ethiopic In/24.pl +1318 131E ETHIOPIC # Ethiopic In/24.pl +1320 1346 ETHIOPIC # Ethiopic In/24.pl +1348 135A ETHIOPIC # Ethiopic In/24.pl +1369 1371 ETHIOPIC # Ethiopic In/24.pl +1372 137C ETHIOPIC # Ethiopic In/24.pl +13A0 13F4 CHEROKEE # Cherokee In/25.pl +1401 166C CANADIAN-ABORIGINAL # CanadianAboriginal In/26.pl +166F 1676 CANADIAN-ABORIGINAL # CanadianAboriginal In/26.pl +1681 169A OGHAM # Ogham In/27.pl +16A0 16EA RUNIC # Runic In/28.pl +16EE 16F0 RUNIC # Runic In/28.pl +1780 17B3 KHMER # Khmer In/29.pl +17B4 17B6 KHMER # Khmer In/29.pl +17B7 17BD KHMER # Khmer In/29.pl +17BE 17C5 KHMER # Khmer In/29.pl +17C7 17C8 KHMER # Khmer In/29.pl +17C9 17D3 KHMER # Khmer In/29.pl +17E0 17E9 KHMER # Khmer In/29.pl +1810 1819 MONGOLIAN # Mongolian In/30.pl +1820 1842 MONGOLIAN # Mongolian In/30.pl +1844 1877 MONGOLIAN # Mongolian In/30.pl +1880 18A8 MONGOLIAN # Mongolian In/30.pl +3041 3094 HIRAGANA # Hiragana In/31.pl +309D 309E HIRAGANA # Hiragana In/31.pl +30A1 30FA KATAKANA # Katakana In/32.pl +30FD 30FE KATAKANA # Katakana In/32.pl +FF66 FF6F KATAKANA # Katakana In/32.pl +FF71 FF9D KATAKANA # Katakana In/32.pl +3105 312C BOPOMOFO # Bopomofo In/33.pl +31A0 31B7 BOPOMOFO # Bopomofo In/33.pl +2E80 2E99 HAN # Han In/34.pl +2E9B 2EF3 HAN # Han In/34.pl +2F00 2FD5 HAN # Han In/34.pl +3021 3029 HAN # Han In/34.pl +3038 303A HAN # Han In/34.pl +3400 4DB5 HAN # Han In/34.pl +4E00 9FA5 HAN # Han In/34.pl +F900 FA2D HAN # Han In/34.pl +20000 2A6D6 HAN # Han In/34.pl +2F800 2FA1D HAN # Han In/34.pl +A000 A48C YI # Yi In/35.pl +A490 A4A1 YI # Yi In/35.pl +A4A4 A4B3 YI # Yi In/35.pl +A4B5 A4C0 YI # Yi In/35.pl +A4C2 A4C4 YI # Yi In/35.pl +10300 1031E OLD-ITALIC # OldItalic In/36.pl +10330 10349 GOTHIC # Gothic In/37.pl +10400 10425 DESERET # Deseret In/38.pl +10428 1044D DESERET # Deseret In/38.pl +0300 034E INHERITED # Inherited In/39.pl +0360 0362 INHERITED # Inherited In/39.pl +0488 0489 INHERITED # Inherited In/39.pl +0591 05A1 INHERITED # Inherited In/39.pl +05A3 05B9 INHERITED # Inherited In/39.pl +05BB 05BD INHERITED # Inherited In/39.pl +05C1 05C2 INHERITED # Inherited In/39.pl +064B 0655 INHERITED # Inherited In/39.pl +06D6 06DC INHERITED # Inherited In/39.pl +06DD 06DE INHERITED # Inherited In/39.pl +06DF 06E4 INHERITED # Inherited In/39.pl +06E7 06E8 INHERITED # Inherited In/39.pl +06EA 06ED INHERITED # Inherited In/39.pl +20D0 20DC INHERITED # Inherited In/39.pl +20DD 20E0 INHERITED # Inherited In/39.pl +20E2 20E3 INHERITED # Inherited In/39.pl +302A 302F INHERITED # Inherited In/39.pl +3099 309A INHERITED # Inherited In/39.pl +FE20 FE23 INHERITED # Inherited In/39.pl +1D167 1D169 INHERITED # Inherited In/39.pl +1D17B 1D182 INHERITED # Inherited In/39.pl +1D185 1D18B INHERITED # Inherited In/39.pl +1D1AA 1D1AD INHERITED # Inherited In/39.pl +END diff --git a/lib/unicode/mktables.PL b/lib/unicode/mktables.PL index 637050a..ca9f6d9 100755 --- a/lib/unicode/mktables.PL +++ b/lib/unicode/mktables.PL @@ -239,16 +239,7 @@ foreach $file (@todo) { next if @ARGV and not grep { $_ eq $table } @ARGV; print $table, "\n"; $table =~ s/\W+//g; - if ($table =~ /^In(.+)/) { - my $id; - unless (exists $InId{$1}) { - $InId{$1} = $InId++; - } - $id = $InId{$1}; - open(OUT, ">In/$id.pl") or die "Can't create In/$id.pl: $!\n"; - print OUT "# In/$id.pl $1\n"; - } - elsif ($table =~ /^(Is|To)(.+)/) { + if ($table =~ /^(Is|To)(.+)/) { open(OUT, ">$1/$2.pl") or die "Can't create $1/$2.pl: $!\n"; } else { @@ -267,6 +258,67 @@ END close OUT; } +# Do Scripts before Blocks so that in case of naming conflicts +# the more natural one (Script) wins over the artificial one (Block). + +print "Scripts\n"; +open(UD, 'Scripts.txt') or die "Can't open Scripts.txt: $!\n"; +open(OUT, ">Scripts.pl") or die "Can't create Scripts.pl: $!\n"; +print OUT <) { + next if /^#/; + next if /^$/; + chomp; + ($code, $last, $name) = /^([0-9a-f]+)\.\.([0-9a-f]+)\s+;\s+(.+)\s+\#/i; + if ($name) { + my $InName = lc($name); + $InName =~ s/\b(\w)/uc($1)/ge; + $InName =~ s/\W+//g; + my $id; + unless (exists $InId{$InName}) { + print "\t$InName\n"; + $id = $Scripts{$InName} = $InId{$InName} = $InId++; + open(SCRIPT, ">In/$id.pl") or die "create In/$id.pl: $!\n"; + print SCRIPT <>In/$id.pl"); + print SCRIPT <>In/$id.pl"); + print SCRIPT <) { chomp; ($code, $last, $name) = /^([0-9a-f]+)\.\.([0-9a-f]+); (.+)/i; if ($name) { - print OUT "$code $last $name\n"; - $name =~ s/\W+//g; + my $InName = $name; + $InName =~ s/\W+//g; + print "\t$InName\n"; my $id; - unless (exists $InId{$name}) { - $InId{$name} = $InId++; + # TODO: only the first one of Private Use blocks qualifies + unless (exists $InId{$InName}) { + $InId{$InName} = $InId++; + } elsif (exists $Scripts{$InName}) { + $InName .= 'Block'; + $InId{$InName} = $InId++; } - $id = $InId{$name}; - open(BLOCK, ">In/$id.pl"); - print OUT "# In/$id.pl $name\n"; + $id = $InId{$InName}; + open(BLOCK, ">In/$id.pl") or die "create In/$id.pl: $!\n"; + print OUT "$code\t$last\t$name\t# $InName In/$id.pl\n"; print BLOCK < as opposed to I (as defined by Unicode); +in Perl, when the C<\p{In....}> and the C<\p{In....}> regular expression +constructs are used. This has changed the definition of some of those +character classes. + +The difference between scripts and blocks is that scripts are the +glyphs used by a language or a group of languages, while the blocks +are more artificial groupings of 256 characters based on the Unicode +numbering. + +In general this change results in more inclusive Unicode character +classes, but changes to the other direction also do take place: +for example while the script C includes all the Latin +characters and their various diacritic-adorned versions, it +does not include the various punctuation or digits (since they +are not solely C). + +Changes in the character class semantics may have happened if a script +and a block happen to have the same name, for example C. +In such cases the script wins and C<\p{InHebrew}> now means the script +definition of Hebrew. The block definition in still available, +though, by appending C to the name: C<\p{InHebrewBlock}> means +what C<\p{InHebrew}> meant in perl 5.6.0. For the full list +of affected character classes, see L. + =head2 Deprecations The current user-visible implementation of pseudo-hashes (the weird diff --git a/pod/perlretut.pod b/pod/perlretut.pod index 3e83c13..7f8e8f5 100644 --- a/pod/perlretut.pod +++ b/pod/perlretut.pod @@ -1752,8 +1752,9 @@ character class of Unicode 'marks', for example accent marks. For the full list see L. The Unicode has also been separated into blocks of charaters which you -can test with C<\p{InBlock}> and C<\P{InBlock}>, for example C<\p{InGreek}> -and C<\P{InKatakana}>. For the full list see L. +can test with C<\p{In...}> (in) and C<\P{In...}> (not in), for example +C<\p{InLatin}, C<\p{InGreek}>, or C<\P{InKatakana}>. For the full list see +L. For the the full and latest information see the latest Unicode standard. diff --git a/pod/perltodo.pod b/pod/perltodo.pod index f96c770..3c72432 100644 --- a/pod/perltodo.pod +++ b/pod/perltodo.pod @@ -87,17 +87,6 @@ class subtraction. http://www.unicode.org/unicode/reports/tr18/ -=head2 Unicode Scripts support - -Currently the C<\p{In...}> supports only the Blocks database, like -C<\p{BasicLatin}>, C<\p{InGreek}>, C<\p{InThai}>, but there's also the -Scripts database, which has members like C, C, -C, C. It is desireable that also the script names -could be used for the C<\p{In...}> construct. Note: needs to be -researched whether this is possible, that is, are there conflicts -between the Blocks and the Scripts, is the Blocks Greek the same as -the Scripts Greek? - =head2 use Thread for iThreads Artur Bergman's C module is a start on this, but needs to diff --git a/pod/perlunicode.pod b/pod/perlunicode.pod index d629cab..877b497 100644 --- a/pod/perlunicode.pod +++ b/pod/perlunicode.pod @@ -105,13 +105,14 @@ bytes change to operating on characters. For ASCII data this makes no difference, because UTF-8 stores ASCII in single bytes, but for any character greater than C, the character may be stored in a sequence of two or more bytes, all of which have the high bit set. -For C1 controls or Latin 1 characters on an EBCDIC platform the character -may be stored in a UTF-EBCDIC multi byte sequence. -But by and large, the user need not worry about this, because Perl -hides it from the user. A character in Perl is logically just a number -ranging from 0 to 2**32 or so. Larger characters encode to longer -sequences of bytes internally, but again, this is just an internal -detail which is hidden at the Perl level. + +For C1 controls or Latin 1 characters on an EBCDIC platform the +character may be stored in a UTF-EBCDIC multi byte sequence. But by +and large, the user need not worry about this, because Perl hides it +from the user. A character in Perl is logically just a number ranging +from 0 to 2**32 or so. Larger characters encode to longer sequences +of bytes internally, but again, this is just an internal detail which +is hidden at the Perl level. =head2 Effects of character semantics @@ -166,7 +167,8 @@ with all non-alphanumeric characters removed, for example the block name C<"Latin-1 Supplement"> becomes C<\p{InLatin1Supplement}>. Here is the list as of Unicode 3.1.0 (the two-letter classes) and -Perl 5.8.0 (the one-letter classes): +as defined by Perl (the one-letter classes) (in Unicode materials +what Perl calls C is often called C): L Letter Lu Letter, Uppercase @@ -232,105 +234,174 @@ have their directionality defined: BidiWS Whitespace BidiON Other Neutrals -The blocks available for C<\p{InBlock}> and C<\P{InBlock}>, for -example \p{InCyrillic>, are as follows: - - BasicLatin - Latin1Supplement - LatinExtendedA - LatinExtendedB - IPAExtensions - SpacingModifierLetters - CombiningDiacriticalMarks - Greek - Cyrillic - Armenian - Hebrew - Arabic - Syriac - Thaana - Devanagari - Bengali - Gurmukhi - Gujarati - Oriya - Tamil - Telugu - Kannada - Malayalam - Sinhala - Thai - Lao - Tibetan - Myanmar - Georgian - HangulJamo - Ethiopic - Cherokee - UnifiedCanadianAboriginalSyllabics - Ogham - Runic - Khmer - Mongolian - LatinExtendedAdditional - GreekExtended - GeneralPunctuation - SuperscriptsandSubscripts - CurrencySymbols - CombiningMarksforSymbols - LetterlikeSymbols - NumberForms - Arrows - MathematicalOperators - MiscellaneousTechnical - ControlPictures - OpticalCharacterRecognition - EnclosedAlphanumerics - BoxDrawing - BlockElements - GeometricShapes - MiscellaneousSymbols - Dingbats - BraillePatterns - CJKRadicalsSupplement - KangxiRadicals - IdeographicDescriptionCharacters - CJKSymbolsandPunctuation - Hiragana - Katakana - Bopomofo - HangulCompatibilityJamo - Kanbun - BopomofoExtended - EnclosedCJKLettersandMonths - CJKCompatibility - CJKUnifiedIdeographsExtensionA - CJKUnifiedIdeographs - YiSyllables - YiRadicals - HangulSyllables - HighSurrogates - HighPrivateUseSurrogates - LowSurrogates - PrivateUse - CJKCompatibilityIdeographs - AlphabeticPresentationForms - ArabicPresentationFormsA - CombiningHalfMarks - CJKCompatibilityForms - SmallFormVariants - ArabicPresentationFormsB - Specials - HalfwidthandFullwidthForms - OldItalic - Gothic - Deseret - ByzantineMusicalSymbols - MusicalSymbols - MathematicalAlphanumericSymbols - CJKUnifiedIdeographsExtensionB - CJKCompatibilityIdeographsSupplement - Tags +=head2 Scripts + +The scripts available for C<\p{In...}> and C<\P{In...}>, for +example \p{InCyrillic>, are as follows, for example C<\p{InLatin}> +or C<\P{InHan}>: + + Latin + Greek + Cyrillic + Armenian + Hebrew + Arabic + Syriac + Thaana + Devanagari + Bengali + Gurmukhi + Gujarati + Oriya + Tamil + Telugu + Kannada + Malayalam + Sinhala + Thai + Lao + Tibetan + Myanmar + Georgian + Hangul + Ethiopic + Cherokee + CanadianAboriginal + Ogham + Runic + Khmer + Mongolian + Hiragana + Katakana + Bopomofo + Han + Yi + OldItalic + Gothic + Deseret + Inherited + +=head2 Blocks + +In addition to B, Unicode also defines B of +characters. The difference between scripts and blocks is that the +former concept is closer to natural languages, while the latter +concept is more an artificial grouping based on groups of 256 Unicode +characters. For example, the C script contains letters from +many blocks, but it does not contain all the characters from those +blocks, it does not for example contain digits. + +For more about scripts see the UTR #24: +http://www.unicode.org/unicode/reports/tr24/ +For more about blocks see +http://www.unicode.org/Public/UNIDATA/Blocks.txt + +Because there are overlaps in naming (there are, for example, both +a script called C and a block called C, the block +version has C appended to its name, C<\p{InKatakanaBlock}>. + +Notice that this definition was introduced in Perl 5.8.0: in Perl +5.6.0 only the blocks were used; in Perl 5.8.0 scripts became the +preferential character class definition; this meant that the +definitions of some character classes changed (the ones in the +below list that have the C appended). + + BasicLatin + Latin1Supplement + LatinExtendedA + LatinExtendedB + IPAExtensions + SpacingModifierLetters + CombiningDiacriticalMarks + GreekBlock + CyrillicBlock + ArmenianBlock + HebrewBlock + ArabicBlock + SyriacBlock + ThaanaBlock + DevanagariBlock + BengaliBlock + GurmukhiBlock + GujaratiBlock + OriyaBlock + TamilBlock + TeluguBlock + KannadaBlock + MalayalamBlock + SinhalaBlock + ThaiBlock + LaoBlock + TibetanBlock + MyanmarBlock + GeorgianBlock + HangulJamo + EthiopicBlock + CherokeeBlock + UnifiedCanadianAboriginalSyllabics + OghamBlock + RunicBlock + KhmerBlock + MongolianBlock + LatinExtendedAdditional + GreekExtended + GeneralPunctuation + SuperscriptsandSubscripts + CurrencySymbols + CombiningMarksforSymbols + LetterlikeSymbols + NumberForms + Arrows + MathematicalOperators + MiscellaneousTechnical + ControlPictures + OpticalCharacterRecognition + EnclosedAlphanumerics + BoxDrawing + BlockElements + GeometricShapes + MiscellaneousSymbols + Dingbats + BraillePatterns + CJKRadicalsSupplement + KangxiRadicals + IdeographicDescriptionCharacters + CJKSymbolsandPunctuation + HiraganaBlock + KatakanaBlock + BopomofoBlock + HangulCompatibilityJamo + Kanbun + BopomofoExtended + EnclosedCJKLettersandMonths + CJKCompatibility + CJKUnifiedIdeographsExtensionA + CJKUnifiedIdeographs + YiSyllables + YiRadicals + HangulSyllables + HighSurrogates + HighPrivateUseSurrogates + LowSurrogates + PrivateUse + CJKCompatibilityIdeographs + AlphabeticPresentationForms + ArabicPresentationFormsA + CombiningHalfMarks + CJKCompatibilityForms + SmallFormVariants + ArabicPresentationFormsB + Specials + HalfwidthandFullwidthForms + OldItalicBlock + GothicBlock + DeseretBlock + ByzantineMusicalSymbols + MusicalSymbols + MathematicalAlphanumericSymbols + CJKUnifiedIdeographsExtensionB + CJKCompatibilityIdeographsSupplement + Tags =item * diff --git a/t/op/pat.t b/t/op/pat.t index 57f7cb7..ff692d2 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -6,7 +6,7 @@ $| = 1; -print "1..660\n"; +print "1..664\n"; BEGIN { chdir 't' if -d 't'; @@ -1889,3 +1889,18 @@ $T="ok 659\n";if ($x =~ /(a([abcdefg]+)(?{$y=$^N})d)(?{$z=$^N})e/ and $y eq "bc" {print $T} else {print "not $T"}; $T="ok 660\n";if ($x =~ /(a([abcdefg]+)(?{$y=$^N})de)(?{$z=$^N})/ and $y eq "bc" and $z eq "abcde") {print $T} else {print "not $T"}; + +# Test the Unicode script classes + +print "not " unless chr(0x100) =~ /\p{InLatin}/; # outside Latin-1 +print "ok 661\n"; + +print "not " unless chr(0x212b) =~ /\p{InLatin}/; # Angstrom sign, very outside +print "ok 662\n"; + +print "not " unless chr(0x5d0) =~ /\p{InHebrew}/; # inside HebrewBlock +print "ok 663\n"; + +print "not " unless chr(0xfb4f) =~ /\p{InHebrew}/; # outside HebrewBlock +print "ok 664\n"; +