lib/unicore/Decomposition.pl Unicode character database
lib/unicore/EAWidth.txt Unicode character database
lib/unicore/In.pl Unicode character database
-lib/unicore/In/0.pl Unicode character database
-lib/unicore/In/1.pl Unicode character database
-lib/unicore/In/10.pl Unicode character database
-lib/unicore/In/100.pl Unicode character database
-lib/unicore/In/101.pl Unicode character database
-lib/unicore/In/102.pl Unicode character database
-lib/unicore/In/103.pl Unicode character database
-lib/unicore/In/104.pl Unicode character database
-lib/unicore/In/105.pl Unicode character database
-lib/unicore/In/106.pl Unicode character database
-lib/unicore/In/107.pl Unicode character database
-lib/unicore/In/108.pl Unicode character database
-lib/unicore/In/109.pl Unicode character database
-lib/unicore/In/11.pl Unicode character database
-lib/unicore/In/110.pl Unicode character database
-lib/unicore/In/111.pl Unicode character database
-lib/unicore/In/112.pl Unicode character database
-lib/unicore/In/113.pl Unicode character database
-lib/unicore/In/114.pl Unicode character database
-lib/unicore/In/115.pl Unicode character database
-lib/unicore/In/116.pl Unicode character database
-lib/unicore/In/117.pl Unicode character database
-lib/unicore/In/118.pl Unicode character database
-lib/unicore/In/119.pl Unicode character database
-lib/unicore/In/12.pl Unicode character database
-lib/unicore/In/120.pl Unicode character database
-lib/unicore/In/121.pl Unicode character database
-lib/unicore/In/122.pl Unicode character database
-lib/unicore/In/123.pl Unicode character database
-lib/unicore/In/124.pl Unicode character database
-lib/unicore/In/125.pl Unicode character database
-lib/unicore/In/126.pl Unicode character database
-lib/unicore/In/127.pl Unicode character database
-lib/unicore/In/128.pl Unicode character database
-lib/unicore/In/129.pl Unicode character database
-lib/unicore/In/13.pl Unicode character database
-lib/unicore/In/130.pl Unicode character database
-lib/unicore/In/131.pl Unicode character database
-lib/unicore/In/132.pl Unicode character database
-lib/unicore/In/133.pl Unicode character database
-lib/unicore/In/134.pl Unicode character database
-lib/unicore/In/135.pl Unicode character database
-lib/unicore/In/136.pl Unicode character database
-lib/unicore/In/137.pl Unicode character database
-lib/unicore/In/138.pl Unicode character database
-lib/unicore/In/139.pl Unicode character database
-lib/unicore/In/14.pl Unicode character database
-lib/unicore/In/140.pl Unicode character database
-lib/unicore/In/141.pl Unicode character database
-lib/unicore/In/142.pl Unicode character database
-lib/unicore/In/143.pl Unicode character database
-lib/unicore/In/144.pl Unicode character database
-lib/unicore/In/145.pl Unicode character database
-lib/unicore/In/146.pl Unicode character database
-lib/unicore/In/147.pl Unicode character database
-lib/unicore/In/148.pl Unicode character database
-lib/unicore/In/149.pl Unicode character database
-lib/unicore/In/15.pl Unicode character database
-lib/unicore/In/150.pl Unicode character database
-lib/unicore/In/151.pl Unicode character database
-lib/unicore/In/152.pl Unicode character database
-lib/unicore/In/153.pl Unicode character database
-lib/unicore/In/154.pl Unicode character database
-lib/unicore/In/155.pl Unicode character database
-lib/unicore/In/156.pl Unicode character database
-lib/unicore/In/157.pl Unicode character database
-lib/unicore/In/158.pl Unicode character database
-lib/unicore/In/159.pl Unicode character database
-lib/unicore/In/16.pl Unicode character database
-lib/unicore/In/160.pl Unicode character database
-lib/unicore/In/161.pl Unicode character database
-lib/unicore/In/162.pl Unicode character database
-lib/unicore/In/163.pl Unicode character database
-lib/unicore/In/164.pl Unicode character database
-lib/unicore/In/165.pl Unicode character database
-lib/unicore/In/166.pl Unicode character database
-lib/unicore/In/167.pl Unicode character database
-lib/unicore/In/168.pl Unicode character database
-lib/unicore/In/169.pl Unicode character database
-lib/unicore/In/17.pl Unicode character database
-lib/unicore/In/170.pl Unicode character database
-lib/unicore/In/171.pl Unicode character database
-lib/unicore/In/172.pl Unicode character database
-lib/unicore/In/173.pl Unicode character database
-lib/unicore/In/18.pl Unicode character database
-lib/unicore/In/19.pl Unicode character database
-lib/unicore/In/2.pl Unicode character database
-lib/unicore/In/20.pl Unicode character database
-lib/unicore/In/21.pl Unicode character database
-lib/unicore/In/22.pl Unicode character database
-lib/unicore/In/23.pl Unicode character database
-lib/unicore/In/24.pl Unicode character database
-lib/unicore/In/25.pl Unicode character database
-lib/unicore/In/26.pl Unicode character database
-lib/unicore/In/27.pl Unicode character database
-lib/unicore/In/28.pl Unicode character database
-lib/unicore/In/29.pl Unicode character database
-lib/unicore/In/3.pl Unicode character database
-lib/unicore/In/30.pl Unicode character database
-lib/unicore/In/31.pl Unicode character database
-lib/unicore/In/32.pl Unicode character database
-lib/unicore/In/33.pl Unicode character database
-lib/unicore/In/34.pl Unicode character database
-lib/unicore/In/35.pl Unicode character database
-lib/unicore/In/36.pl Unicode character database
-lib/unicore/In/37.pl Unicode character database
-lib/unicore/In/38.pl Unicode character database
-lib/unicore/In/39.pl Unicode character database
-lib/unicore/In/4.pl Unicode character database
-lib/unicore/In/40.pl Unicode character database
-lib/unicore/In/41.pl Unicode character database
-lib/unicore/In/42.pl Unicode character database
-lib/unicore/In/43.pl Unicode character database
-lib/unicore/In/44.pl Unicode character database
-lib/unicore/In/45.pl Unicode character database
-lib/unicore/In/46.pl Unicode character database
-lib/unicore/In/47.pl Unicode character database
-lib/unicore/In/48.pl Unicode character database
-lib/unicore/In/49.pl Unicode character database
-lib/unicore/In/5.pl Unicode character database
-lib/unicore/In/50.pl Unicode character database
-lib/unicore/In/51.pl Unicode character database
-lib/unicore/In/52.pl Unicode character database
-lib/unicore/In/53.pl Unicode character database
-lib/unicore/In/54.pl Unicode character database
-lib/unicore/In/55.pl Unicode character database
-lib/unicore/In/56.pl Unicode character database
-lib/unicore/In/57.pl Unicode character database
-lib/unicore/In/58.pl Unicode character database
-lib/unicore/In/59.pl Unicode character database
-lib/unicore/In/6.pl Unicode character database
-lib/unicore/In/60.pl Unicode character database
-lib/unicore/In/61.pl Unicode character database
-lib/unicore/In/62.pl Unicode character database
-lib/unicore/In/63.pl Unicode character database
-lib/unicore/In/64.pl Unicode character database
-lib/unicore/In/65.pl Unicode character database
-lib/unicore/In/66.pl Unicode character database
-lib/unicore/In/67.pl Unicode character database
-lib/unicore/In/68.pl Unicode character database
-lib/unicore/In/69.pl Unicode character database
-lib/unicore/In/7.pl Unicode character database
-lib/unicore/In/70.pl Unicode character database
-lib/unicore/In/71.pl Unicode character database
-lib/unicore/In/72.pl Unicode character database
-lib/unicore/In/73.pl Unicode character database
-lib/unicore/In/74.pl Unicode character database
-lib/unicore/In/75.pl Unicode character database
-lib/unicore/In/76.pl Unicode character database
-lib/unicore/In/77.pl Unicode character database
-lib/unicore/In/78.pl Unicode character database
-lib/unicore/In/79.pl Unicode character database
-lib/unicore/In/8.pl Unicode character database
-lib/unicore/In/80.pl Unicode character database
-lib/unicore/In/81.pl Unicode character database
-lib/unicore/In/82.pl Unicode character database
-lib/unicore/In/83.pl Unicode character database
-lib/unicore/In/84.pl Unicode character database
-lib/unicore/In/85.pl Unicode character database
-lib/unicore/In/86.pl Unicode character database
-lib/unicore/In/87.pl Unicode character database
-lib/unicore/In/88.pl Unicode character database
-lib/unicore/In/89.pl Unicode character database
-lib/unicore/In/9.pl Unicode character database
-lib/unicore/In/90.pl Unicode character database
-lib/unicore/In/91.pl Unicode character database
-lib/unicore/In/92.pl Unicode character database
-lib/unicore/In/93.pl Unicode character database
-lib/unicore/In/94.pl Unicode character database
-lib/unicore/In/95.pl Unicode character database
-lib/unicore/In/96.pl Unicode character database
-lib/unicore/In/97.pl Unicode character database
-lib/unicore/In/98.pl Unicode character database
-lib/unicore/In/99.pl Unicode character database
+lib/unicore/In/Alphabet.pl Unicode character database
+lib/unicore/In/Arabic.pl Unicode character database
+lib/unicore/In/ArabicP2.pl Unicode character database
+lib/unicore/In/ArabicPr.pl Unicode character database
+lib/unicore/In/Armenian.pl Unicode character database
+lib/unicore/In/Arrows.pl Unicode character database
+lib/unicore/In/BasicLat.pl Unicode character database
+lib/unicore/In/Bengali.pl Unicode character database
+lib/unicore/In/BlockEle.pl Unicode character database
+lib/unicore/In/Bopomof2.pl Unicode character database
+lib/unicore/In/Bopomofo.pl Unicode character database
+lib/unicore/In/BoxDrawi.pl Unicode character database
+lib/unicore/In/BrailleP.pl Unicode character database
+lib/unicore/In/Byzantin.pl Unicode character database
+lib/unicore/In/Cherokee.pl Unicode character database
+lib/unicore/In/CjkComp2.pl Unicode character database
+lib/unicore/In/CjkComp3.pl Unicode character database
+lib/unicore/In/CjkComp4.pl Unicode character database
+lib/unicore/In/CjkCompa.pl Unicode character database
+lib/unicore/In/CjkRadic.pl Unicode character database
+lib/unicore/In/CjkSymbo.pl Unicode character database
+lib/unicore/In/CjkUnif2.pl Unicode character database
+lib/unicore/In/CjkUnif3.pl Unicode character database
+lib/unicore/In/CjkUnifi.pl Unicode character database
+lib/unicore/In/Combini2.pl Unicode character database
+lib/unicore/In/Combini3.pl Unicode character database
+lib/unicore/In/Combinin.pl Unicode character database
+lib/unicore/In/ControlP.pl Unicode character database
+lib/unicore/In/Currency.pl Unicode character database
+lib/unicore/In/Cyrillic.pl Unicode character database
+lib/unicore/In/Deseret.pl Unicode character database
+lib/unicore/In/Devanaga.pl Unicode character database
+lib/unicore/In/Dingbats.pl Unicode character database
+lib/unicore/In/Enclose2.pl Unicode character database
+lib/unicore/In/Enclosed.pl Unicode character database
+lib/unicore/In/Ethiopic.pl Unicode character database
+lib/unicore/In/GeneralP.pl Unicode character database
+lib/unicore/In/Geometri.pl Unicode character database
+lib/unicore/In/Georgian.pl Unicode character database
+lib/unicore/In/Gothic.pl Unicode character database
+lib/unicore/In/Greek.pl Unicode character database
+lib/unicore/In/GreekExt.pl Unicode character database
+lib/unicore/In/Gujarati.pl Unicode character database
+lib/unicore/In/Gurmukhi.pl Unicode character database
+lib/unicore/In/Halfwidt.pl Unicode character database
+lib/unicore/In/HangulCo.pl Unicode character database
+lib/unicore/In/HangulJa.pl Unicode character database
+lib/unicore/In/HangulSy.pl Unicode character database
+lib/unicore/In/Hebrew.pl Unicode character database
+lib/unicore/In/HighPriv.pl Unicode character database
+lib/unicore/In/HighSurr.pl Unicode character database
+lib/unicore/In/Hiragana.pl Unicode character database
+lib/unicore/In/Ideograp.pl Unicode character database
+lib/unicore/In/IpaExten.pl Unicode character database
+lib/unicore/In/Kanbun.pl Unicode character database
+lib/unicore/In/KangxiRa.pl Unicode character database
+lib/unicore/In/Kannada.pl Unicode character database
+lib/unicore/In/Katakana.pl Unicode character database
+lib/unicore/In/Khmer.pl Unicode character database
+lib/unicore/In/Lao.pl Unicode character database
+lib/unicore/In/Latin1Su.pl Unicode character database
+lib/unicore/In/LatinEx2.pl Unicode character database
+lib/unicore/In/LatinEx3.pl Unicode character database
+lib/unicore/In/LatinExt.pl Unicode character database
+lib/unicore/In/Letterli.pl Unicode character database
+lib/unicore/In/LowSurro.pl Unicode character database
+lib/unicore/In/Malayala.pl Unicode character database
+lib/unicore/In/Mathema2.pl Unicode character database
+lib/unicore/In/Mathemat.pl Unicode character database
+lib/unicore/In/Miscell2.pl Unicode character database
+lib/unicore/In/Miscella.pl Unicode character database
+lib/unicore/In/Mongolia.pl Unicode character database
+lib/unicore/In/MusicalS.pl Unicode character database
+lib/unicore/In/Myanmar.pl Unicode character database
+lib/unicore/In/NumberFo.pl Unicode character database
+lib/unicore/In/Ogham.pl Unicode character database
+lib/unicore/In/OldItali.pl Unicode character database
+lib/unicore/In/OpticalC.pl Unicode character database
+lib/unicore/In/Oriya.pl Unicode character database
+lib/unicore/In/PrivateU.pl Unicode character database
+lib/unicore/In/Runic.pl Unicode character database
+lib/unicore/In/Sinhala.pl Unicode character database
+lib/unicore/In/SmallFor.pl Unicode character database
+lib/unicore/In/SpacingM.pl Unicode character database
+lib/unicore/In/Specials.pl Unicode character database
+lib/unicore/In/Superscr.pl Unicode character database
+lib/unicore/In/Syriac.pl Unicode character database
+lib/unicore/In/Tags.pl Unicode character database
+lib/unicore/In/Tamil.pl Unicode character database
+lib/unicore/In/Telugu.pl Unicode character database
+lib/unicore/In/Thaana.pl Unicode character database
+lib/unicore/In/Thai.pl Unicode character database
+lib/unicore/In/Tibetan.pl Unicode character database
+lib/unicore/In/UnifiedC.pl Unicode character database
+lib/unicore/In/YiRadica.pl Unicode character database
+lib/unicore/In/YiSyllab.pl Unicode character database
lib/unicore/Index.txt Unicode character database
lib/unicore/Is.pl Unicode character database
+lib/unicore/Is/2.pl Unicode character database
lib/unicore/Is/Alnum.pl Unicode character database
lib/unicore/Is/Alpha.pl Unicode character database
+lib/unicore/Is/Alphabet.pl Unicode character database
+lib/unicore/Is/Any.pl Unicode character database
+lib/unicore/Is/Arabic.pl Unicode character database
+lib/unicore/Is/Armenian.pl Unicode character database
lib/unicore/Is/ASCII.pl Unicode character database
+lib/unicore/Is/ASCIIHex.pl Unicode character database
+lib/unicore/Is/Assigned.pl Unicode character database
+lib/unicore/Is/Bengali.pl Unicode character database
lib/unicore/Is/BidiAL.pl Unicode character database
lib/unicore/Is/BidiAN.pl Unicode character database
lib/unicore/Is/BidiB.pl Unicode character database
lib/unicore/Is/BidiBN.pl Unicode character database
+lib/unicore/Is/BidiCont.pl Unicode character database
lib/unicore/Is/BidiCS.pl Unicode character database
lib/unicore/Is/BidiEN.pl Unicode character database
lib/unicore/Is/BidiES.pl Unicode character database
lib/unicore/Is/BidiS.pl Unicode character database
lib/unicore/Is/BidiWS.pl Unicode character database
lib/unicore/Is/Blank.pl Unicode character database
+lib/unicore/Is/Bopomofo.pl Unicode character database
lib/unicore/Is/C.pl Unicode character database
+lib/unicore/Is/Canadian.pl Unicode character database
+lib/unicore/Is/Canon.pl Unicode character database
lib/unicore/Is/Cc.pl Unicode character database
lib/unicore/Is/Cf.pl Unicode character database
+lib/unicore/Is/Cherokee.pl Unicode character database
lib/unicore/Is/Cn.pl Unicode character database
lib/unicore/Is/Cntrl.pl Unicode character database
lib/unicore/Is/Co.pl Unicode character database
+lib/unicore/Is/Common.pl Unicode character database
+lib/unicore/Is/Compat.pl Unicode character database
lib/unicore/Is/Cs.pl Unicode character database
+lib/unicore/Is/Cyrillic.pl Unicode character database
+lib/unicore/Is/Dash.pl Unicode character database
lib/unicore/Is/DCcircle.pl Unicode character database
lib/unicore/Is/DCcompat.pl Unicode character database
lib/unicore/Is/DCfinal.pl Unicode character database
lib/unicore/Is/DCfont.pl Unicode character database
-lib/unicore/Is/DCfraction.pl Unicode character database
-lib/unicore/Is/DCinitial.pl Unicode character database
-lib/unicore/Is/DCisolated.pl Unicode character database
+lib/unicore/Is/DCfracti.pl Unicode character database
+lib/unicore/Is/DCinitia.pl Unicode character database
+lib/unicore/Is/DCisolat.pl Unicode character database
lib/unicore/Is/DCmedial.pl Unicode character database
lib/unicore/Is/DCnarrow.pl Unicode character database
-lib/unicore/Is/DCnoBreak.pl Unicode character database
+lib/unicore/Is/DCnoBrea.pl Unicode character database
lib/unicore/Is/DCsmall.pl Unicode character database
lib/unicore/Is/DCsquare.pl Unicode character database
lib/unicore/Is/DCsub.pl Unicode character database
lib/unicore/Is/DCsuper.pl Unicode character database
-lib/unicore/Is/DCvertical.pl Unicode character database
+lib/unicore/Is/DCvertic.pl Unicode character database
lib/unicore/Is/DCwide.pl Unicode character database
-lib/unicore/Is/DecoCanon.pl Unicode character database
-lib/unicore/Is/DecoCompat.pl Unicode character database
+lib/unicore/Is/Deseret.pl Unicode character database
+lib/unicore/Is/Devanaga.pl Unicode character database
+lib/unicore/Is/Diacriti.pl Unicode character database
lib/unicore/Is/Digit.pl Unicode character database
+lib/unicore/Is/Ethiopic.pl Unicode character database
+lib/unicore/Is/Extender.pl Unicode character database
+lib/unicore/Is/Georgian.pl Unicode character database
+lib/unicore/Is/Gothic.pl Unicode character database
lib/unicore/Is/Graph.pl Unicode character database
+lib/unicore/Is/Greek.pl Unicode character database
+lib/unicore/Is/Gujarati.pl Unicode character database
+lib/unicore/Is/Gurmukhi.pl Unicode character database
+lib/unicore/Is/Han.pl Unicode character database
+lib/unicore/Is/Hangul.pl Unicode character database
+lib/unicore/Is/Hebrew.pl Unicode character database
+lib/unicore/Is/HexDigit.pl Unicode character database
+lib/unicore/Is/Hiragana.pl Unicode character database
+lib/unicore/Is/Hyphen.pl Unicode character database
+lib/unicore/Is/IDContin.pl Unicode character database
+lib/unicore/Is/Ideograp.pl Unicode character database
+lib/unicore/Is/IDStart.pl Unicode character database
+lib/unicore/Is/Inherite.pl Unicode character database
+lib/unicore/Is/JoinCont.pl Unicode character database
+lib/unicore/Is/Kannada.pl Unicode character database
+lib/unicore/Is/Katakana.pl Unicode character database
+lib/unicore/Is/Khmer.pl Unicode character database
lib/unicore/Is/L.pl Unicode character database
+lib/unicore/Is/Lao.pl Unicode character database
+lib/unicore/Is/Latin.pl Unicode character database
lib/unicore/Is/LbrkAI.pl Unicode character database
lib/unicore/Is/LbrkAL.pl Unicode character database
lib/unicore/Is/LbrkB2.pl Unicode character database
lib/unicore/Is/Lm.pl Unicode character database
lib/unicore/Is/Lo.pl Unicode character database
lib/unicore/Is/Lower.pl Unicode character database
+lib/unicore/Is/Lowercas.pl Unicode character database
lib/unicore/Is/Lt.pl Unicode character database
lib/unicore/Is/Lu.pl Unicode character database
lib/unicore/Is/M.pl Unicode character database
+lib/unicore/Is/Malayala.pl Unicode character database
+lib/unicore/Is/Math.pl Unicode character database
lib/unicore/Is/Mc.pl Unicode character database
lib/unicore/Is/Me.pl Unicode character database
lib/unicore/Is/Mirrored.pl Unicode character database
lib/unicore/Is/Mn.pl Unicode character database
+lib/unicore/Is/Mongolia.pl Unicode character database
+lib/unicore/Is/Myanmar.pl Unicode character database
lib/unicore/Is/N.pl Unicode character database
lib/unicore/Is/Nd.pl Unicode character database
lib/unicore/Is/Nl.pl Unicode character database
lib/unicore/Is/No.pl Unicode character database
+lib/unicore/Is/Nonchara.pl Unicode character database
+lib/unicore/Is/Ogham.pl Unicode character database
+lib/unicore/Is/OldItali.pl Unicode character database
+lib/unicore/Is/Oriya.pl Unicode character database
+lib/unicore/Is/OtherAlp.pl Unicode character database
+lib/unicore/Is/OtherLow.pl Unicode character database
+lib/unicore/Is/OtherMat.pl Unicode character database
+lib/unicore/Is/OtherUpp.pl Unicode character database
lib/unicore/Is/P.pl Unicode character database
lib/unicore/Is/Pc.pl Unicode character database
lib/unicore/Is/Pd.pl Unicode character database
lib/unicore/Is/Print.pl Unicode character database
lib/unicore/Is/Ps.pl Unicode character database
lib/unicore/Is/Punct.pl Unicode character database
+lib/unicore/Is/Quotatio.pl Unicode character database
+lib/unicore/Is/Runic.pl Unicode character database
lib/unicore/Is/S.pl Unicode character database
lib/unicore/Is/Sc.pl Unicode character database
+lib/unicore/Is/Sinhala.pl Unicode character database
lib/unicore/Is/Sk.pl Unicode character database
lib/unicore/Is/Sm.pl Unicode character database
lib/unicore/Is/So.pl Unicode character database
lib/unicore/Is/Space.pl Unicode character database
-lib/unicore/Is/SpacePerl.pl Unicode character database
+lib/unicore/Is/SpacePer.pl Unicode character database
+lib/unicore/Is/Syriac.pl Unicode character database
+lib/unicore/Is/Tamil.pl Unicode character database
+lib/unicore/Is/Telugu.pl Unicode character database
+lib/unicore/Is/Terminal.pl Unicode character database
+lib/unicore/Is/Thaana.pl Unicode character database
+lib/unicore/Is/Thai.pl Unicode character database
+lib/unicore/Is/Tibetan.pl Unicode character database
lib/unicore/Is/Title.pl Unicode character database
lib/unicore/Is/Upper.pl Unicode character database
+lib/unicore/Is/Uppercas.pl Unicode character database
+lib/unicore/Is/WhiteSpa.pl Unicode character database
lib/unicore/Is/Word.pl Unicode character database
lib/unicore/Is/XDigit.pl Unicode character database
+lib/unicore/Is/Yi.pl Unicode character database
lib/unicore/Is/Z.pl Unicode character database
lib/unicore/Is/Zl.pl Unicode character database
lib/unicore/Is/Zp.pl Unicode character database
t/op/time.t See if time functions work
t/op/tr.t See if tr works
t/op/undef.t See if undef works
-t/op/unifold.t See if Unicode folding works
-t/op/unisprintf.t See if Unicode sprintf works
t/op/universal.t See if UNIVERSAL class works
t/op/unshift.t See if unshift works
t/op/utf8decode.t See if UTF-8 decoding works
t/TEST The regression tester
t/test.pl Simple testing library
t/TestInit.pm Preamble library for core tests
+t/uni/fold.t See if Unicode folding works
+t/uni/sprintf.t See if Unicode sprintf works
taint.c Tainting code
thrdvar.h Per-thread variables
thread.h Threading header
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0621 U
0622 0625 R
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0621 <no shaping>
0622 0623 ALEF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0000 0008 BN
0009 S
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0000 007F Basic Latin
0080 00FF Latin-1 Supplement
0250 02AF IPA Extensions
02B0 02FF Spacing Modifier Letters
0300 036F Combining Diacritical Marks
-0370 03FF Greek Block
-0400 04FF Cyrillic Block
-0530 058F Armenian Block
-0590 05FF Hebrew Block
-0600 06FF Arabic Block
-0700 074F Syriac Block
-0780 07BF Thaana Block
-0900 097F Devanagari Block
-0980 09FF Bengali Block
-0A00 0A7F Gurmukhi Block
-0A80 0AFF Gujarati Block
-0B00 0B7F Oriya Block
-0B80 0BFF Tamil Block
-0C00 0C7F Telugu Block
-0C80 0CFF Kannada Block
-0D00 0D7F Malayalam Block
-0D80 0DFF Sinhala Block
-0E00 0E7F Thai Block
-0E80 0EFF Lao Block
-0F00 0FFF Tibetan Block
-1000 109F Myanmar Block
-10A0 10FF Georgian Block
+0370 03FF Greek
+0400 04FF Cyrillic
+0530 058F Armenian
+0590 05FF Hebrew
+0600 06FF Arabic
+0700 074F Syriac
+0780 07BF Thaana
+0900 097F Devanagari
+0980 09FF Bengali
+0A00 0A7F Gurmukhi
+0A80 0AFF Gujarati
+0B00 0B7F Oriya
+0B80 0BFF Tamil
+0C00 0C7F Telugu
+0C80 0CFF Kannada
+0D00 0D7F Malayalam
+0D80 0DFF Sinhala
+0E00 0E7F Thai
+0E80 0EFF Lao
+0F00 0FFF Tibetan
+1000 109F Myanmar
+10A0 10FF Georgian
1100 11FF Hangul Jamo
-1200 137F Ethiopic Block
-13A0 13FF Cherokee Block
+1200 137F Ethiopic
+13A0 13FF Cherokee
1400 167F Unified Canadian Aboriginal Syllabics
-1680 169F Ogham Block
-16A0 16FF Runic Block
-1780 17FF Khmer Block
-1800 18AF Mongolian Block
+1680 169F Ogham
+16A0 16FF Runic
+1780 17FF Khmer
+1800 18AF Mongolian
1E00 1EFF Latin Extended Additional
1F00 1FFF Greek Extended
2000 206F General Punctuation
2F00 2FDF Kangxi Radicals
2FF0 2FFF Ideographic Description Characters
3000 303F CJK Symbols and Punctuation
-3040 309F Hiragana Block
-30A0 30FF Katakana Block
-3100 312F Bopomofo Block
+3040 309F Hiragana
+30A0 30FF Katakana
+3100 312F Bopomofo
3130 318F Hangul Compatibility Jamo
3190 319F Kanbun
31A0 31BF Bopomofo Extended
FEFF Specials
FF00 FFEF Halfwidth and Fullwidth Forms
FFF0 FFFD Specials
-10300 1032F Old Italic Block
-10330 1034F Gothic Block
-10400 1044F Deseret Block
+10300 1032F Old Italic
+10330 1034F Gothic
+10400 1044F Deseret
1D000 1D0FF Byzantine Musical Symbols
1D100 1D1FF Musical Symbols
1D400 1D7FF Mathematical Alphanumeric Symbols
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0000 001F Cc
0020 Zs
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0300 0314 230
0315 232
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
00A0 <noBreak> 0020
00A8 <compat> 0020 0308
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
-%utf8::In =
-(
-'All' => '173',
-'Alphabetic' => '165',
-'Alphabetic Presentation Forms' => '129',
-'Any' => '172',
-'ARABIC' => '16',
-'Arabic Block' => '62',
-'Arabic Presentation Forms-A' => '130',
-'Arabic Presentation Forms-B' => '134',
-'ARMENIAN' => '14',
-'Armenian Block' => '60',
-'Arrows' => '96',
-'ASCII_Hex_Digit' => '152',
-'Assigned' => '163',
-'Basic Latin' => '51',
-'BENGALI' => '20',
-'Bengali Block' => '66',
-'Bidi_Control' => '159',
-'Block Elements' => '103',
-'BOPOMOFO' => '45',
-'Bopomofo Block' => '114',
-'Bopomofo Extended' => '117',
-'Box Drawing' => '102',
-'Braille Patterns' => '107',
-'Byzantine Musical Symbols' => '140',
-'CANADIAN-ABORIGINAL' => '37',
-'CHEROKEE' => '36',
-'Cherokee Block' => '82',
-'CJK Compatibility' => '119',
-'CJK Compatibility Forms' => '132',
-'CJK Compatibility Ideographs' => '128',
-'CJK Compatibility Ideographs Supplement' => '144',
-'CJK Ideograph' => '1',
-'CJK Ideograph Extension A' => '0',
-'CJK Ideograph Extension B' => '7',
-'CJK Radicals Supplement' => '108',
-'CJK Symbols and Punctuation' => '111',
-'CJK Unified Ideographs' => '121',
-'CJK Unified Ideographs Extension A' => '120',
-'CJK Unified Ideographs Extension B' => '143',
-'Combining Diacritical Marks' => '57',
-'Combining Half Marks' => '131',
-'Combining Marks for Symbols' => '93',
-'Common' => '50',
-'Control Pictures' => '99',
-'Currency Symbols' => '92',
-'CYRILLIC' => '13',
-'Cyrillic Block' => '59',
-'Dash' => '151',
-'DESERET' => '49',
-'Deseret Block' => '139',
-'DEVANAGARI' => '19',
-'Devanagari Block' => '65',
-'Diacritic' => '154',
-'Dingbats' => '106',
-'Enclosed Alphanumerics' => '101',
-'Enclosed CJK Letters and Months' => '118',
-'ETHIOPIC' => '35',
-'Ethiopic Block' => '81',
-'Extender' => '155',
-'General Punctuation' => '90',
-'Geometric Shapes' => '104',
-'GEORGIAN' => '33',
-'Georgian Block' => '79',
-'GOTHIC' => '48',
-'Gothic Block' => '138',
-'GREEK' => '11',
-'Greek Block' => '58',
-'Greek Extended' => '89',
-'GUJARATI' => '22',
-'Gujarati Block' => '68',
-'GURMUKHI' => '21',
-'Gurmukhi Block' => '67',
-'Halfwidth and Fullwidth Forms' => '136',
-'HAN' => '42',
-'HANGUL' => '34',
-'Hangul Compatibility Jamo' => '115',
-'Hangul Jamo' => '80',
-'Hangul Syllable' => '2',
-'Hangul Syllables' => '124',
-'HEBREW' => '15',
-'Hebrew Block' => '61',
-'Hex_Digit' => '153',
-'High Private Use Surrogates' => '126',
-'High Surrogates' => '125',
-'HIRAGANA' => '43',
-'Hiragana Block' => '112',
-'Hyphen' => '150',
-'ID_Continue' => '171',
-'ID_Start' => '170',
-'Ideographic' => '161',
-'Ideographic Description Characters' => '110',
-'INHERITED' => '12',
-'IPA Extensions' => '55',
-'Join_Control' => '158',
-'Kanbun' => '116',
-'Kangxi Radicals' => '109',
-'KANNADA' => '26',
-'Kannada Block' => '72',
-'KATAKANA' => '44',
-'Katakana Block' => '113',
-'KHMER' => '40',
-'Khmer Block' => '86',
-'Lampersand' => '169',
-'LAO' => '30',
-'Lao Block' => '76',
-'LATIN' => '10',
-'Latin Extended Additional' => '88',
-'Latin Extended-A' => '53',
-'Latin Extended-B' => '54',
-'Latin-1 Supplement' => '52',
-'Letterlike Symbols' => '94',
-'Low Surrogate' => '5',
-'Low Surrogates' => '127',
-'Lowercase' => '166',
-'MALAYALAM' => '27',
-'Malayalam Block' => '73',
-'Math' => '168',
-'Mathematical Alphanumeric Symbols' => '142',
-'Mathematical Operators' => '97',
-'Miscellaneous Symbols' => '105',
-'Miscellaneous Technical' => '98',
-'MONGOLIAN' => '41',
-'Mongolian Block' => '87',
-'Musical Symbols' => '141',
-'MYANMAR' => '32',
-'Myanmar Block' => '78',
-'Non Private Use High Surrogate' => '3',
-'Noncharacter_Code_Point' => '162',
-'Number Forms' => '95',
-'OGHAM' => '38',
-'Ogham Block' => '84',
-'Old Italic Block' => '137',
-'OLD-ITALIC' => '47',
-'Optical Character Recognition' => '100',
-'ORIYA' => '23',
-'Oriya Block' => '69',
-'Other_Alphabetic' => '157',
-'Other_Lowercase' => '156',
-'Other_Math' => '149',
-'Other_Uppercase' => '160',
-'Plane 15 Private Use' => '8',
-'Plane 16 Private Use' => '9',
-'Private Use' => '6',
-'Private Use High Surrogate' => '4',
-'Quotation_Mark' => '148',
-'RUNIC' => '39',
-'Runic Block' => '85',
-'SINHALA' => '28',
-'Sinhala Block' => '74',
-'Small Form Variants' => '133',
-'Spacing Modifier Letters' => '56',
-'Specials' => '135',
-'Superscripts and Subscripts' => '91',
-'SYRIAC' => '17',
-'Syriac Block' => '63',
-'Tags' => '145',
-'TAMIL' => '24',
-'Tamil Block' => '70',
-'TELUGU' => '25',
-'Telugu Block' => '71',
-'Terminal_Punctuation' => '147',
-'THAANA' => '18',
-'Thaana Block' => '64',
-'THAI' => '29',
-'Thai Block' => '75',
-'TIBETAN' => '31',
-'Tibetan Block' => '77',
-'Unassigned' => '164',
-'Unified Canadian Aboriginal Syllabics' => '83',
-'Uppercase' => '167',
-'White_space' => '146',
-'YI' => '46',
-'Yi Radicals' => '123',
-'Yi Syllables' => '122',
-);
-%utf8::InPat =
-(
-'al' => {
- 'All' => '173',
- 'Alphabetic' => '165',
- 'Alphabetic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms' => '129',
-},
-'an' => {
- 'Any' => '172',
-},
-'ar' => {
- 'ARABIC' => '16',
- 'Arabic(?:[-_]|\s+)?Block' => '62',
- 'Arabic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms(?:[-_]|\s+)?A' => '130',
- 'Arabic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms(?:[-_]|\s+)?B' => '134',
- 'ARMENIAN' => '14',
- 'Armenian(?:[-_]|\s+)?Block' => '60',
- 'Arrows' => '96',
-},
-'as' => {
- 'ASCII(?:[-_]|\s+)?Hex(?:[-_]|\s+)?Digit' => '152',
- 'Assigned' => '163',
-},
-'ba' => {
- 'Basic(?:[-_]|\s+)?Latin' => '51',
-},
-'be' => {
- 'BENGALI' => '20',
- 'Bengali(?:[-_]|\s+)?Block' => '66',
-},
-'bi' => {
- 'Bidi(?:[-_]|\s+)?Control' => '159',
-},
-'bl' => {
- 'Block(?:[-_]|\s+)?Elements' => '103',
-},
-'bo' => {
- 'BOPOMOFO' => '45',
- 'Bopomofo(?:[-_]|\s+)?Block' => '114',
- 'Bopomofo(?:[-_]|\s+)?Extended' => '117',
- 'Box(?:[-_]|\s+)?Drawing' => '102',
-},
-'br' => {
- 'Braille(?:[-_]|\s+)?Patterns' => '107',
-},
-'by' => {
- 'Byzantine(?:[-_]|\s+)?Musical(?:[-_]|\s+)?Symbols' => '140',
-},
-'ca' => {
- 'CANADIAN(?:[-_]|\s+)?ABORIGINAL' => '37',
-},
-'ch' => {
- 'CHEROKEE' => '36',
- 'Cherokee(?:[-_]|\s+)?Block' => '82',
-},
-'cj' => {
- 'CJK(?:[-_]|\s+)?Compatibility' => '119',
- 'CJK(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Forms' => '132',
- 'CJK(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Ideographs' => '128',
- 'CJK(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Ideographs(?:[-_]|\s+)?Supplement' => '144',
- 'CJK(?:[-_]|\s+)?Ideograph' => '1',
- 'CJK(?:[-_]|\s+)?Ideograph(?:[-_]|\s+)?Extension(?:[-_]|\s+)?A' => '0',
- 'CJK(?:[-_]|\s+)?Ideograph(?:[-_]|\s+)?Extension(?:[-_]|\s+)?B' => '7',
- 'CJK(?:[-_]|\s+)?Radicals(?:[-_]|\s+)?Supplement' => '108',
- 'CJK(?:[-_]|\s+)?Symbols(?:[-_]|\s+)?and(?:[-_]|\s+)?Punctuation' => '111',
- 'CJK(?:[-_]|\s+)?Unified(?:[-_]|\s+)?Ideographs' => '121',
- 'CJK(?:[-_]|\s+)?Unified(?:[-_]|\s+)?Ideographs(?:[-_]|\s+)?Extension(?:[-_]|\s+)?A' => '120',
- 'CJK(?:[-_]|\s+)?Unified(?:[-_]|\s+)?Ideographs(?:[-_]|\s+)?Extension(?:[-_]|\s+)?B' => '143',
-},
-'co' => {
- 'Combining(?:[-_]|\s+)?Diacritical(?:[-_]|\s+)?Marks' => '57',
- 'Combining(?:[-_]|\s+)?Half(?:[-_]|\s+)?Marks' => '131',
- 'Combining(?:[-_]|\s+)?Marks(?:[-_]|\s+)?for(?:[-_]|\s+)?Symbols' => '93',
- 'Common' => '50',
- 'Control(?:[-_]|\s+)?Pictures' => '99',
-},
-'cu' => {
- 'Currency(?:[-_]|\s+)?Symbols' => '92',
-},
-'cy' => {
- 'CYRILLIC' => '13',
- 'Cyrillic(?:[-_]|\s+)?Block' => '59',
-},
-'da' => {
- 'Dash' => '151',
-},
-'de' => {
- 'DESERET' => '49',
- 'Deseret(?:[-_]|\s+)?Block' => '139',
- 'DEVANAGARI' => '19',
- 'Devanagari(?:[-_]|\s+)?Block' => '65',
-},
-'di' => {
- 'Diacritic' => '154',
- 'Dingbats' => '106',
-},
-'en' => {
- 'Enclosed(?:[-_]|\s+)?Alphanumerics' => '101',
- 'Enclosed(?:[-_]|\s+)?CJK(?:[-_]|\s+)?Letters(?:[-_]|\s+)?and(?:[-_]|\s+)?Months' => '118',
-},
-'et' => {
- 'ETHIOPIC' => '35',
- 'Ethiopic(?:[-_]|\s+)?Block' => '81',
-},
-'ex' => {
- 'Extender' => '155',
-},
-'ge' => {
- 'General(?:[-_]|\s+)?Punctuation' => '90',
- 'Geometric(?:[-_]|\s+)?Shapes' => '104',
- 'GEORGIAN' => '33',
- 'Georgian(?:[-_]|\s+)?Block' => '79',
-},
-'go' => {
- 'GOTHIC' => '48',
- 'Gothic(?:[-_]|\s+)?Block' => '138',
-},
-'gr' => {
- 'GREEK' => '11',
- 'Greek(?:[-_]|\s+)?Block' => '58',
- 'Greek(?:[-_]|\s+)?Extended' => '89',
-},
-'gu' => {
- 'GUJARATI' => '22',
- 'Gujarati(?:[-_]|\s+)?Block' => '68',
- 'GURMUKHI' => '21',
- 'Gurmukhi(?:[-_]|\s+)?Block' => '67',
-},
-'ha' => {
- 'Halfwidth(?:[-_]|\s+)?and(?:[-_]|\s+)?Fullwidth(?:[-_]|\s+)?Forms' => '136',
- 'HAN' => '42',
- 'HANGUL' => '34',
- 'Hangul(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Jamo' => '115',
- 'Hangul(?:[-_]|\s+)?Jamo' => '80',
- 'Hangul(?:[-_]|\s+)?Syllable' => '2',
- 'Hangul(?:[-_]|\s+)?Syllables' => '124',
-},
-'he' => {
- 'HEBREW' => '15',
- 'Hebrew(?:[-_]|\s+)?Block' => '61',
- 'Hex(?:[-_]|\s+)?Digit' => '153',
-},
-'hi' => {
- 'High(?:[-_]|\s+)?Private(?:[-_]|\s+)?Use(?:[-_]|\s+)?Surrogates' => '126',
- 'High(?:[-_]|\s+)?Surrogates' => '125',
- 'HIRAGANA' => '43',
- 'Hiragana(?:[-_]|\s+)?Block' => '112',
-},
-'hy' => {
- 'Hyphen' => '150',
-},
-'id' => {
- 'ID(?:[-_]|\s+)?Continue' => '171',
- 'ID(?:[-_]|\s+)?Start' => '170',
- 'Ideographic' => '161',
- 'Ideographic(?:[-_]|\s+)?Description(?:[-_]|\s+)?Characters' => '110',
-},
-'in' => {
- 'INHERITED' => '12',
-},
-'ip' => {
- 'IPA(?:[-_]|\s+)?Extensions' => '55',
-},
-'jo' => {
- 'Join(?:[-_]|\s+)?Control' => '158',
-},
-'ka' => {
- 'Kanbun' => '116',
- 'Kangxi(?:[-_]|\s+)?Radicals' => '109',
- 'KANNADA' => '26',
- 'Kannada(?:[-_]|\s+)?Block' => '72',
- 'KATAKANA' => '44',
- 'Katakana(?:[-_]|\s+)?Block' => '113',
-},
-'kh' => {
- 'KHMER' => '40',
- 'Khmer(?:[-_]|\s+)?Block' => '86',
-},
-'la' => {
- 'Lampersand' => '169',
- 'LAO' => '30',
- 'Lao(?:[-_]|\s+)?Block' => '76',
- 'LATIN' => '10',
- 'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?Additional' => '88',
- 'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?A' => '53',
- 'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?B' => '54',
- 'Latin(?:[-_]|\s+)?1(?:[-_]|\s+)?Supplement' => '52',
-},
-'le' => {
- 'Letterlike(?:[-_]|\s+)?Symbols' => '94',
-},
-'lo' => {
- 'Low(?:[-_]|\s+)?Surrogate' => '5',
- 'Low(?:[-_]|\s+)?Surrogates' => '127',
- 'Lowercase' => '166',
-},
-'ma' => {
- 'MALAYALAM' => '27',
- 'Malayalam(?:[-_]|\s+)?Block' => '73',
- 'Math' => '168',
- 'Mathematical(?:[-_]|\s+)?Alphanumeric(?:[-_]|\s+)?Symbols' => '142',
- 'Mathematical(?:[-_]|\s+)?Operators' => '97',
-},
-'mi' => {
- 'Miscellaneous(?:[-_]|\s+)?Symbols' => '105',
- 'Miscellaneous(?:[-_]|\s+)?Technical' => '98',
-},
-'mo' => {
- 'MONGOLIAN' => '41',
- 'Mongolian(?:[-_]|\s+)?Block' => '87',
-},
-'mu' => {
- 'Musical(?:[-_]|\s+)?Symbols' => '141',
-},
-'my' => {
- 'MYANMAR' => '32',
- 'Myanmar(?:[-_]|\s+)?Block' => '78',
-},
-'no' => {
- 'Non(?:[-_]|\s+)?Private(?:[-_]|\s+)?Use(?:[-_]|\s+)?High(?:[-_]|\s+)?Surrogate' => '3',
- 'Noncharacter(?:[-_]|\s+)?Code(?:[-_]|\s+)?Point' => '162',
-},
-'nu' => {
- 'Number(?:[-_]|\s+)?Forms' => '95',
-},
-'og' => {
- 'OGHAM' => '38',
- 'Ogham(?:[-_]|\s+)?Block' => '84',
-},
-'ol' => {
- 'Old(?:[-_]|\s+)?Italic(?:[-_]|\s+)?Block' => '137',
- 'OLD(?:[-_]|\s+)?ITALIC' => '47',
-},
-'op' => {
- 'Optical(?:[-_]|\s+)?Character(?:[-_]|\s+)?Recognition' => '100',
-},
-'or' => {
- 'ORIYA' => '23',
- 'Oriya(?:[-_]|\s+)?Block' => '69',
-},
-'ot' => {
- 'Other(?:[-_]|\s+)?Alphabetic' => '157',
- 'Other(?:[-_]|\s+)?Lowercase' => '156',
- 'Other(?:[-_]|\s+)?Math' => '149',
- 'Other(?:[-_]|\s+)?Uppercase' => '160',
-},
-'pl' => {
- 'Plane(?:[-_]|\s+)?15(?:[-_]|\s+)?Private(?:[-_]|\s+)?Use' => '8',
- 'Plane(?:[-_]|\s+)?16(?:[-_]|\s+)?Private(?:[-_]|\s+)?Use' => '9',
-},
-'pr' => {
- 'Private(?:[-_]|\s+)?Use' => '6',
- 'Private(?:[-_]|\s+)?Use(?:[-_]|\s+)?High(?:[-_]|\s+)?Surrogate' => '4',
-},
-'qu' => {
- 'Quotation(?:[-_]|\s+)?Mark' => '148',
-},
-'ru' => {
- 'RUNIC' => '39',
- 'Runic(?:[-_]|\s+)?Block' => '85',
-},
-'si' => {
- 'SINHALA' => '28',
- 'Sinhala(?:[-_]|\s+)?Block' => '74',
-},
-'sm' => {
- 'Small(?:[-_]|\s+)?Form(?:[-_]|\s+)?Variants' => '133',
-},
-'sp' => {
- 'Spacing(?:[-_]|\s+)?Modifier(?:[-_]|\s+)?Letters' => '56',
- 'Specials' => '135',
-},
-'su' => {
- 'Superscripts(?:[-_]|\s+)?and(?:[-_]|\s+)?Subscripts' => '91',
-},
-'sy' => {
- 'SYRIAC' => '17',
- 'Syriac(?:[-_]|\s+)?Block' => '63',
-},
-'ta' => {
- 'Tags' => '145',
- 'TAMIL' => '24',
- 'Tamil(?:[-_]|\s+)?Block' => '70',
-},
-'te' => {
- 'TELUGU' => '25',
- 'Telugu(?:[-_]|\s+)?Block' => '71',
- 'Terminal(?:[-_]|\s+)?Punctuation' => '147',
-},
-'th' => {
- 'THAANA' => '18',
- 'Thaana(?:[-_]|\s+)?Block' => '64',
- 'THAI' => '29',
- 'Thai(?:[-_]|\s+)?Block' => '75',
-},
-'ti' => {
- 'TIBETAN' => '31',
- 'Tibetan(?:[-_]|\s+)?Block' => '77',
-},
-'un' => {
- 'Unassigned' => '164',
- 'Unified(?:[-_]|\s+)?Canadian(?:[-_]|\s+)?Aboriginal(?:[-_]|\s+)?Syllabics' => '83',
-},
-'up' => {
- 'Uppercase' => '167',
-},
-'wh' => {
- 'White(?:[-_]|\s+)?space' => '146',
-},
-'yi' => {
- 'YI' => '46',
- 'Yi(?:[-_]|\s+)?Radicals' => '123',
- 'Yi(?:[-_]|\s+)?Syllables' => '122',
-},
-);
+# Built Mon Jan 14 15:52:42 2002.
+
+##
+## Data in this file used by ../utf8_heavy.pl
+##
-%utf8::InScript =
-(
- 10 => 'LATIN',
- 11 => 'GREEK',
- 12 => 'INHERITED',
- 13 => 'CYRILLIC',
- 14 => 'ARMENIAN',
- 15 => 'HEBREW',
- 16 => 'ARABIC',
- 17 => 'SYRIAC',
- 18 => 'THAANA',
- 19 => 'DEVANAGARI',
- 20 => 'BENGALI',
- 21 => 'GURMUKHI',
- 22 => 'GUJARATI',
- 23 => 'ORIYA',
- 24 => 'TAMIL',
- 25 => 'TELUGU',
- 26 => 'KANNADA',
- 27 => 'MALAYALAM',
- 28 => 'SINHALA',
- 29 => 'THAI',
- 30 => 'LAO',
- 31 => 'TIBETAN',
- 32 => 'MYANMAR',
- 33 => 'GEORGIAN',
- 34 => 'HANGUL',
- 35 => 'ETHIOPIC',
- 36 => 'CHEROKEE',
- 37 => 'CANADIAN-ABORIGINAL',
- 38 => 'OGHAM',
- 39 => 'RUNIC',
- 40 => 'KHMER',
- 41 => 'MONGOLIAN',
- 42 => 'HAN',
- 43 => 'HIRAGANA',
- 44 => 'KATAKANA',
- 45 => 'BOPOMOFO',
- 46 => 'YI',
- 47 => 'OLD-ITALIC',
- 48 => 'GOTHIC',
- 49 => 'DESERET',
+## Mapping from name to filename in ./In
+%utf8::In = (
);
-%utf8::InBlock =
-(
- 51 => 'Basic Latin',
- 52 => 'Latin-1 Supplement',
- 53 => 'Latin Extended-A',
- 54 => 'Latin Extended-B',
- 55 => 'IPA Extensions',
- 56 => 'Spacing Modifier Letters',
- 57 => 'Combining Diacritical Marks',
- 58 => 'Greek',
- 59 => 'Cyrillic',
- 60 => 'Armenian',
- 61 => 'Hebrew',
- 62 => 'Arabic',
- 63 => 'Syriac',
- 64 => 'Thaana',
- 65 => 'Devanagari',
- 66 => 'Bengali',
- 67 => 'Gurmukhi',
- 68 => 'Gujarati',
- 69 => 'Oriya',
- 70 => 'Tamil',
- 71 => 'Telugu',
- 72 => 'Kannada',
- 73 => 'Malayalam',
- 74 => 'Sinhala',
- 75 => 'Thai',
- 76 => 'Lao',
- 77 => 'Tibetan',
- 78 => 'Myanmar',
- 79 => 'Georgian',
- 80 => 'Hangul Jamo',
- 81 => 'Ethiopic',
- 82 => 'Cherokee',
- 83 => 'Unified Canadian Aboriginal Syllabics',
- 84 => 'Ogham',
- 85 => 'Runic',
- 86 => 'Khmer',
- 87 => 'Mongolian',
- 88 => 'Latin Extended Additional',
- 89 => 'Greek Extended',
- 90 => 'General Punctuation',
- 91 => 'Superscripts and Subscripts',
- 92 => 'Currency Symbols',
- 93 => 'Combining Marks for Symbols',
- 94 => 'Letterlike Symbols',
- 95 => 'Number Forms',
- 96 => 'Arrows',
- 97 => 'Mathematical Operators',
- 98 => 'Miscellaneous Technical',
- 99 => 'Control Pictures',
- 100 => 'Optical Character Recognition',
- 101 => 'Enclosed Alphanumerics',
- 102 => 'Box Drawing',
- 103 => 'Block Elements',
- 104 => 'Geometric Shapes',
- 105 => 'Miscellaneous Symbols',
- 106 => 'Dingbats',
- 107 => 'Braille Patterns',
- 108 => 'CJK Radicals Supplement',
- 109 => 'Kangxi Radicals',
- 110 => 'Ideographic Description Characters',
- 111 => 'CJK Symbols and Punctuation',
- 112 => 'Hiragana',
- 113 => 'Katakana',
- 114 => 'Bopomofo',
- 115 => 'Hangul Compatibility Jamo',
- 116 => 'Kanbun',
- 117 => 'Bopomofo Extended',
- 118 => 'Enclosed CJK Letters and Months',
- 119 => 'CJK Compatibility',
- 120 => 'CJK Unified Ideographs Extension A',
- 121 => 'CJK Unified Ideographs',
- 122 => 'Yi Syllables',
- 123 => 'Yi Radicals',
- 124 => 'Hangul Syllables',
- 125 => 'High Surrogates',
- 126 => 'High Private Use Surrogates',
- 127 => 'Low Surrogates',
- 128 => 'CJK Compatibility Ideographs',
- 129 => 'Alphabetic Presentation Forms',
- 130 => 'Arabic Presentation Forms-A',
- 131 => 'Combining Half Marks',
- 132 => 'CJK Compatibility Forms',
- 133 => 'Small Form Variants',
- 134 => 'Arabic Presentation Forms-B',
- 135 => 'Specials',
- 136 => 'Halfwidth and Fullwidth Forms',
- 137 => 'Old Italic',
- 138 => 'Gothic',
- 139 => 'Deseret',
- 140 => 'Byzantine Musical Symbols',
- 141 => 'Musical Symbols',
- 142 => 'Mathematical Alphanumeric Symbols',
- 143 => 'CJK Unified Ideographs Extension B',
- 144 => 'CJK Compatibility Ideographs Supplement',
- 145 => 'Tags',
+## Mappings from regex to filename in ./In/
+%utf8::InPat = (
+ 'al' => {
+ 'Alphabetic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms' => 'Alphabet',
+ },
+ 'ar' => {
+ 'Armenian' => 'Armenian',
+ 'Arabic' => 'Arabic',
+ 'Arrows' => 'Arrows',
+ 'Arabic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms(?:[-_]|\s+)?A' => 'ArabicPr',
+ 'Arabic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms(?:[-_]|\s+)?B' => 'ArabicP2',
+ },
+ 'ba' => {
+ 'Basic(?:[-_]|\s+)?Latin' => 'BasicLat',
+ },
+ 'be' => {
+ 'Bengali' => 'Bengali',
+ },
+ 'bl' => {
+ 'Block(?:[-_]|\s+)?Elements' => 'BlockEle',
+ },
+ 'bo' => {
+ 'Bopomofo' => 'Bopomof2',
+ 'Bopomofo(?:[-_]|\s+)?Extended' => 'Bopomofo',
+ 'Box(?:[-_]|\s+)?Drawing' => 'BoxDrawi',
+ },
+ 'br' => {
+ 'Braille(?:[-_]|\s+)?Patterns' => 'BrailleP',
+ },
+ 'by' => {
+ 'Byzantine(?:[-_]|\s+)?Musical(?:[-_]|\s+)?Symbols' => 'Byzantin',
+ },
+ 'ch' => {
+ 'Cherokee' => 'Cherokee',
+ },
+ 'cj' => {
+ 'Cjk(?:[-_]|\s+)?Radicals(?:[-_]|\s+)?Supplement' => 'CjkRadic',
+ 'Cjk(?:[-_]|\s+)?Symbols(?:[-_]|\s+)?And(?:[-_]|\s+)?Punctuation' => 'CjkSymbo',
+ 'Cjk(?:[-_]|\s+)?Unified(?:[-_]|\s+)?Ideographs(?:[-_]|\s+)?Extension(?:[-_]|\s+)?A' => 'CjkUnif2',
+ 'Cjk(?:[-_]|\s+)?Unified(?:[-_]|\s+)?Ideographs(?:[-_]|\s+)?Extension(?:[-_]|\s+)?B' => 'CjkUnif3',
+ 'Cjk(?:[-_]|\s+)?Unified(?:[-_]|\s+)?Ideographs' => 'CjkUnifi',
+ 'Cjk(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Ideographs' => 'CjkComp2',
+ 'Cjk(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Ideographs(?:[-_]|\s+)?Supplement' => 'CjkComp4',
+ 'Cjk(?:[-_]|\s+)?Compatibility' => 'CjkComp3',
+ 'Cjk(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Forms' => 'CjkCompa',
+ },
+ 'co' => {
+ 'Combining(?:[-_]|\s+)?Marks(?:[-_]|\s+)?For(?:[-_]|\s+)?Symbols' => 'Combini2',
+ 'Combining(?:[-_]|\s+)?Half(?:[-_]|\s+)?Marks' => 'Combini3',
+ 'Combining(?:[-_]|\s+)?Diacritical(?:[-_]|\s+)?Marks' => 'Combinin',
+ 'Control(?:[-_]|\s+)?Pictures' => 'ControlP',
+ },
+ 'cu' => {
+ 'Currency(?:[-_]|\s+)?Symbols' => 'Currency',
+ },
+ 'cy' => {
+ 'Cyrillic' => 'Cyrillic',
+ },
+ 'de' => {
+ 'Devanagari' => 'Devanaga',
+ 'Deseret' => 'Deseret',
+ },
+ 'di' => {
+ 'Dingbats' => 'Dingbats',
+ },
+ 'en' => {
+ 'Enclosed(?:[-_]|\s+)?Alphanumerics' => 'Enclose2',
+ 'Enclosed(?:[-_]|\s+)?Cjk(?:[-_]|\s+)?Letters(?:[-_]|\s+)?And(?:[-_]|\s+)?Months' => 'Enclosed',
+ },
+ 'et' => {
+ 'Ethiopic' => 'Ethiopic',
+ },
+ 'ge' => {
+ 'Geometric(?:[-_]|\s+)?Shapes' => 'Geometri',
+ 'General(?:[-_]|\s+)?Punctuation' => 'GeneralP',
+ 'Georgian' => 'Georgian',
+ },
+ 'go' => {
+ 'Gothic' => 'Gothic',
+ },
+ 'gr' => {
+ 'Greek(?:[-_]|\s+)?Extended' => 'GreekExt',
+ 'Greek' => 'Greek',
+ },
+ 'gu' => {
+ 'Gurmukhi' => 'Gurmukhi',
+ 'Gujarati' => 'Gujarati',
+ },
+ 'ha' => {
+ 'Hangul(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Jamo' => 'HangulCo',
+ 'Hangul(?:[-_]|\s+)?Syllables' => 'HangulSy',
+ 'Halfwidth(?:[-_]|\s+)?And(?:[-_]|\s+)?Fullwidth(?:[-_]|\s+)?Forms' => 'Halfwidt',
+ 'Hangul(?:[-_]|\s+)?Jamo' => 'HangulJa',
+ },
+ 'he' => {
+ 'Hebrew' => 'Hebrew',
+ },
+ 'hi' => {
+ 'High(?:[-_]|\s+)?Private(?:[-_]|\s+)?Use(?:[-_]|\s+)?Surrogates' => 'HighPriv',
+ 'Hiragana' => 'Hiragana',
+ 'High(?:[-_]|\s+)?Surrogates' => 'HighSurr',
+ },
+ 'id' => {
+ 'Ideographic(?:[-_]|\s+)?Description(?:[-_]|\s+)?Characters' => 'Ideograp',
+ },
+ 'ip' => {
+ 'Ipa(?:[-_]|\s+)?Extensions' => 'IpaExten',
+ },
+ 'ka' => {
+ 'Katakana' => 'Katakana',
+ 'Kangxi(?:[-_]|\s+)?Radicals' => 'KangxiRa',
+ 'Kannada' => 'Kannada',
+ 'Kanbun' => 'Kanbun',
+ },
+ 'kh' => {
+ 'Khmer' => 'Khmer',
+ },
+ 'la' => {
+ 'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?Additional' => 'LatinExt',
+ 'Latin(?:[-_]|\s+)?1(?:[-_]|\s+)?Supplement' => 'Latin1Su',
+ 'Lao' => 'Lao',
+ 'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?A' => 'LatinEx2',
+ 'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?B' => 'LatinEx3',
+ },
+ 'le' => {
+ 'Letterlike(?:[-_]|\s+)?Symbols' => 'Letterli',
+ },
+ 'lo' => {
+ 'Low(?:[-_]|\s+)?Surrogates' => 'LowSurro',
+ },
+ 'ma' => {
+ 'Mathematical(?:[-_]|\s+)?Operators' => 'Mathemat',
+ 'Mathematical(?:[-_]|\s+)?Alphanumeric(?:[-_]|\s+)?Symbols' => 'Mathema2',
+ 'Malayalam' => 'Malayala',
+ },
+ 'mi' => {
+ 'Miscellaneous(?:[-_]|\s+)?Symbols' => 'Miscell2',
+ 'Miscellaneous(?:[-_]|\s+)?Technical' => 'Miscella',
+ },
+ 'mo' => {
+ 'Mongolian' => 'Mongolia',
+ },
+ 'mu' => {
+ 'Musical(?:[-_]|\s+)?Symbols' => 'MusicalS',
+ },
+ 'my' => {
+ 'Myanmar' => 'Myanmar',
+ },
+ 'nu' => {
+ 'Number(?:[-_]|\s+)?Forms' => 'NumberFo',
+ },
+ 'og' => {
+ 'Ogham' => 'Ogham',
+ },
+ 'ol' => {
+ 'Old(?:[-_]|\s+)?Italic' => 'OldItali',
+ },
+ 'op' => {
+ 'Optical(?:[-_]|\s+)?Character(?:[-_]|\s+)?Recognition' => 'OpticalC',
+ },
+ 'or' => {
+ 'Oriya' => 'Oriya',
+ },
+ 'pr' => {
+ 'Private(?:[-_]|\s+)?Use' => 'PrivateU',
+ },
+ 'ru' => {
+ 'Runic' => 'Runic',
+ },
+ 'si' => {
+ 'Sinhala' => 'Sinhala',
+ },
+ 'sm' => {
+ 'Small(?:[-_]|\s+)?Form(?:[-_]|\s+)?Variants' => 'SmallFor',
+ },
+ 'sp' => {
+ 'Specials' => 'Specials',
+ 'Spacing(?:[-_]|\s+)?Modifier(?:[-_]|\s+)?Letters' => 'SpacingM',
+ },
+ 'su' => {
+ 'Superscripts(?:[-_]|\s+)?And(?:[-_]|\s+)?Subscripts' => 'Superscr',
+ },
+ 'sy' => {
+ 'Syriac' => 'Syriac',
+ },
+ 'ta' => {
+ 'Tags' => 'Tags',
+ 'Tamil' => 'Tamil',
+ },
+ 'te' => {
+ 'Telugu' => 'Telugu',
+ },
+ 'th' => {
+ 'Thaana' => 'Thaana',
+ 'Thai' => 'Thai',
+ },
+ 'ti' => {
+ 'Tibetan' => 'Tibetan',
+ },
+ 'un' => {
+ 'Unified(?:[-_]|\s+)?Canadian(?:[-_]|\s+)?Aboriginal(?:[-_]|\s+)?Syllabics' => 'UnifiedC',
+ },
+ 'yi' => {
+ 'Yi(?:[-_]|\s+)?Radicals' => 'YiRadica',
+ 'Yi(?:[-_]|\s+)?Syllables' => 'YiSyllab',
+ },
);
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-return <<'END';
-3040 309F Hiragana Block
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-return <<'END';
-30A0 30FF Katakana Block
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-return <<'END';
-3100 312F Bopomofo Block
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-return <<'END';
-10300 1032F Old Italic Block
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-return <<'END';
-10330 1034F Gothic Block
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-return <<'END';
-10400 1044F Deseret Block
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-return <<'END';
-0220 0221
-0234 024F
-02AE 02AF
-02EF 02FF
-034F 035F
-0363 0373
-0376 0379
-037B 037D
-037F 0383
-038B
-038D
-03A2
-03CF
-03D8 03D9
-03F6 03FF
-0487
-048A 048B
-04C5 04C6
-04C9 04CA
-04CD 04CF
-04F6 04F7
-04FA 0530
-0557 0558
-0560
-0588
-058B 0590
-05A2
-05BA
-05C5 05CF
-05EB 05EF
-05F5 060B
-060D 061A
-061C 061E
-0620
-063B 063F
-0656 065F
-066E 066F
-06EE 06EF
-06FF
-070E
-072D 072F
-074B 077F
-07B1 0900
-0904
-093A 093B
-094E 094F
-0955 0957
-0971 0980
-0984
-098D 098E
-0991 0992
-09A9
-09B1
-09B3 09B5
-09BA 09BB
-09BD
-09C5 09C6
-09C9 09CA
-09CE 09D6
-09D8 09DB
-09DE
-09E4 09E5
-09FB 0A01
-0A03 0A04
-0A0B 0A0E
-0A11 0A12
-0A29
-0A31
-0A34
-0A37
-0A3A 0A3B
-0A3D
-0A43 0A46
-0A49 0A4A
-0A4E 0A58
-0A5D
-0A5F 0A65
-0A75 0A80
-0A84
-0A8C
-0A8E
-0A92
-0AA9
-0AB1
-0AB4
-0ABA 0ABB
-0AC6
-0ACA
-0ACE 0ACF
-0AD1 0ADF
-0AE1 0AE5
-0AF0 0B00
-0B04
-0B0D 0B0E
-0B11 0B12
-0B29
-0B31
-0B34 0B35
-0B3A 0B3B
-0B44 0B46
-0B49 0B4A
-0B4E 0B55
-0B58 0B5B
-0B5E
-0B62 0B65
-0B71 0B81
-0B84
-0B8B 0B8D
-0B91
-0B96 0B98
-0B9B
-0B9D
-0BA0 0BA2
-0BA5 0BA7
-0BAB 0BAD
-0BB6
-0BBA 0BBD
-0BC3 0BC5
-0BC9
-0BCE 0BD6
-0BD8 0BE6
-0BF3 0C00
-0C04
-0C0D
-0C11
-0C29
-0C34
-0C3A 0C3D
-0C45
-0C49
-0C4E 0C54
-0C57 0C5F
-0C62 0C65
-0C70 0C81
-0C84
-0C8D
-0C91
-0CA9
-0CB4
-0CBA 0CBD
-0CC5
-0CC9
-0CCE 0CD4
-0CD7 0CDD
-0CDF
-0CE2 0CE5
-0CF0 0D01
-0D04
-0D0D
-0D11
-0D29
-0D3A 0D3D
-0D44 0D45
-0D49
-0D4E 0D56
-0D58 0D5F
-0D62 0D65
-0D70 0D81
-0D84
-0D97 0D99
-0DB2
-0DBC
-0DBE 0DBF
-0DC7 0DC9
-0DCB 0DCE
-0DD5
-0DD7
-0DE0 0DF1
-0DF5 0E00
-0E3B 0E3E
-0E5C 0E80
-0E83
-0E85 0E86
-0E89
-0E8B 0E8C
-0E8E 0E93
-0E98
-0EA0
-0EA4
-0EA6
-0EA8 0EA9
-0EAC
-0EBA
-0EBE 0EBF
-0EC5
-0EC7
-0ECE 0ECF
-0EDA 0EDB
-0EDE 0EFF
-0F48
-0F6B 0F70
-0F8C 0F8F
-0F98
-0FBD
-0FCD 0FCE
-0FD0 0FFF
-1022
-1028
-102B
-1033 1035
-103A 103F
-105A 109F
-10C6 10CF
-10F7 10FA
-10FC 10FF
-115A 115E
-11A3 11A7
-11FA 11FF
-1207
-1247
-1249
-124E 124F
-1257
-1259
-125E 125F
-1287
-1289
-128E 128F
-12AF
-12B1
-12B6 12B7
-12BF
-12C1
-12C6 12C7
-12CF
-12D7
-12EF
-130F
-1311
-1316 1317
-131F
-1347
-135B 1360
-137D 139F
-13F5 1400
-1677 167F
-169D 169F
-16F1 177F
-17DD 17DF
-17EA 17FF
-180F
-181A 181F
-1878 187F
-18AA 1DFF
-1E9C 1E9F
-1EFA 1EFF
-1F16 1F17
-1F1E 1F1F
-1F46 1F47
-1F4E 1F4F
-1F58
-1F5A
-1F5C
-1F5E
-1F7E 1F7F
-1FB5
-1FC5
-1FD4 1FD5
-1FDC
-1FF0 1FF1
-1FF5
-1FFF
-2047
-204E 2069
-2071 2073
-208F 209F
-20B0 20CF
-20E4 20FF
-213B 2152
-2184 218F
-21F4 21FF
-22F2 22FF
-237C
-239B 23FF
-2427 243F
-244B 245F
-24EB 24FF
-2596 259F
-25F8 25FF
-2614 2618
-2672 2700
-2705
-270A 270B
-2728
-274C
-274E
-2753 2755
-2757
-275F 2760
-2768 2775
-2795 2797
-27B0
-27BF 27FF
-2900 2E7F
-2E9A
-2EF4 2EFF
-2FD6 2FEF
-2FFC 2FFF
-303B 303D
-3040
-3095 3098
-309F 30A0
-30FF 3104
-312D 3130
-318F
-31B8 31FF
-321D 321F
-3244 325F
-327C 327E
-32B1 32BF
-32CC 32CF
-32FF
-3377 337A
-33DE 33DF
-33FF
-4DB6 4DFF
-9FA6 9FFF
-A48D A48F
-A4A2 A4A3
-A4B4
-A4C1
-A4C5
-A4C7 ABFF
-D7A4 D7FF
-FA2E FAFF
-FB07 FB12
-FB18 FB1C
-FB37
-FB3D
-FB3F
-FB42
-FB45
-FBB2 FBD2
-FD40 FD4F
-FD90 FD91
-FDC8 FDEF
-FDFC FE1F
-FE24 FE2F
-FE45 FE48
-FE53
-FE67
-FE6C FE6F
-FE73
-FE75
-FEFD FEFE
-FF00
-FF5F FF60
-FFBF FFC1
-FFC8 FFC9
-FFD0 FFD1
-FFD8 FFD9
-FFDD FFDF
-FFE7
-FFEF FFF8
-FFFE 102FF
-1031F
-10324 1032F
-1034B 103FF
-10426 10427
-1044E 1CFFF
-1D0F6 1D0FF
-1D127 1D129
-1D1DE 1D3FF
-1D455
-1D49D
-1D4A0 1D4A1
-1D4A3 1D4A4
-1D4A7 1D4A8
-1D4AD
-1D4BA
-1D4BC
-1D4C1
-1D4C4
-1D506
-1D50B 1D50C
-1D515
-1D51D
-1D53A
-1D53F
-1D545
-1D547 1D549
-1D551
-1D6A4 1D6A7
-1D7CA 1D7CD
-1D800 1FFFF
-2A6D7 2F7FF
-2FA1E E0000
-E0002 E001F
-E0080 EFFFF
-FFFFE FFFFF
-10FFFE 10FFFF
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-return <<'END';
-0041 005A
-0061 007A
-00AA
-00B5
-00BA
-00C0 00D6
-00D8 00DE
-00DF 00F6
-00F8 00FF
-0100
-0101
-0102
-0103
-0104
-0105
-0106
-0107
-0108
-0109
-010A
-010B
-010C
-010D
-010E
-010F
-0110
-0111
-0112
-0113
-0114
-0115
-0116
-0117
-0118
-0119
-011A
-011B
-011C
-011D
-011E
-011F
-0120
-0121
-0122
-0123
-0124
-0125
-0126
-0127
-0128
-0129
-012A
-012B
-012C
-012D
-012E
-012F
-0130
-0131
-0132
-0133
-0134
-0135
-0136
-0137 0138
-0139
-013A
-013B
-013C
-013D
-013E
-013F
-0140
-0141
-0142
-0143
-0144
-0145
-0146
-0147
-0148 0149
-014A
-014B
-014C
-014D
-014E
-014F
-0150
-0151
-0152
-0153
-0154
-0155
-0156
-0157
-0158
-0159
-015A
-015B
-015C
-015D
-015E
-015F
-0160
-0161
-0162
-0163
-0164
-0165
-0166
-0167
-0168
-0169
-016A
-016B
-016C
-016D
-016E
-016F
-0170
-0171
-0172
-0173
-0174
-0175
-0176
-0177
-0178 0179
-017A
-017B
-017C
-017D
-017E 0180
-0181 0182
-0183
-0184
-0185
-0186 0187
-0188
-0189 018B
-018C 018D
-018E 0191
-0192
-0193 0194
-0195
-0196 0198
-0199 019B
-019C 019D
-019E
-019F 01A0
-01A1
-01A2
-01A3
-01A4
-01A5
-01A6 01A7
-01A8
-01A9
-01AA 01AB
-01AC
-01AD
-01AE 01AF
-01B0
-01B1 01B3
-01B4
-01B5
-01B6
-01B7 01B8
-01B9 01BA
-01BC
-01BD 01BF
-01C4
-01C5
-01C6
-01C7
-01C8
-01C9
-01CA
-01CB
-01CC
-01CD
-01CE
-01CF
-01D0
-01D1
-01D2
-01D3
-01D4
-01D5
-01D6
-01D7
-01D8
-01D9
-01DA
-01DB
-01DC 01DD
-01DE
-01DF
-01E0
-01E1
-01E2
-01E3
-01E4
-01E5
-01E6
-01E7
-01E8
-01E9
-01EA
-01EB
-01EC
-01ED
-01EE
-01EF 01F0
-01F1
-01F2
-01F3
-01F4
-01F5
-01F6 01F8
-01F9
-01FA
-01FB
-01FC
-01FD
-01FE
-01FF
-0200
-0201
-0202
-0203
-0204
-0205
-0206
-0207
-0208
-0209
-020A
-020B
-020C
-020D
-020E
-020F
-0210
-0211
-0212
-0213
-0214
-0215
-0216
-0217
-0218
-0219
-021A
-021B
-021C
-021D
-021E
-021F
-0222
-0223
-0224
-0225
-0226
-0227
-0228
-0229
-022A
-022B
-022C
-022D
-022E
-022F
-0230
-0231
-0232
-0233
-0250 02AD
-0386
-0388 038A
-038C
-038E 038F
-0390
-0391 03A1
-03A3 03AB
-03AC 03CE
-03D0 03D1
-03D2 03D4
-03D5 03D7
-03DA
-03DB
-03DC
-03DD
-03DE
-03DF
-03E0
-03E1
-03E2
-03E3
-03E4
-03E5
-03E6
-03E7
-03E8
-03E9
-03EA
-03EB
-03EC
-03ED
-03EE
-03EF 03F3
-03F4
-03F5
-0400 042F
-0430 045F
-0460
-0461
-0462
-0463
-0464
-0465
-0466
-0467
-0468
-0469
-046A
-046B
-046C
-046D
-046E
-046F
-0470
-0471
-0472
-0473
-0474
-0475
-0476
-0477
-0478
-0479
-047A
-047B
-047C
-047D
-047E
-047F
-0480
-0481
-048C
-048D
-048E
-048F
-0490
-0491
-0492
-0493
-0494
-0495
-0496
-0497
-0498
-0499
-049A
-049B
-049C
-049D
-049E
-049F
-04A0
-04A1
-04A2
-04A3
-04A4
-04A5
-04A6
-04A7
-04A8
-04A9
-04AA
-04AB
-04AC
-04AD
-04AE
-04AF
-04B0
-04B1
-04B2
-04B3
-04B4
-04B5
-04B6
-04B7
-04B8
-04B9
-04BA
-04BB
-04BC
-04BD
-04BE
-04BF
-04C0 04C1
-04C2
-04C3
-04C4
-04C7
-04C8
-04CB
-04CC
-04D0
-04D1
-04D2
-04D3
-04D4
-04D5
-04D6
-04D7
-04D8
-04D9
-04DA
-04DB
-04DC
-04DD
-04DE
-04DF
-04E0
-04E1
-04E2
-04E3
-04E4
-04E5
-04E6
-04E7
-04E8
-04E9
-04EA
-04EB
-04EC
-04ED
-04EE
-04EF
-04F0
-04F1
-04F2
-04F3
-04F4
-04F5
-04F8
-04F9
-0531 0556
-0561 0587
-10A0 10C5
-1E00
-1E01
-1E02
-1E03
-1E04
-1E05
-1E06
-1E07
-1E08
-1E09
-1E0A
-1E0B
-1E0C
-1E0D
-1E0E
-1E0F
-1E10
-1E11
-1E12
-1E13
-1E14
-1E15
-1E16
-1E17
-1E18
-1E19
-1E1A
-1E1B
-1E1C
-1E1D
-1E1E
-1E1F
-1E20
-1E21
-1E22
-1E23
-1E24
-1E25
-1E26
-1E27
-1E28
-1E29
-1E2A
-1E2B
-1E2C
-1E2D
-1E2E
-1E2F
-1E30
-1E31
-1E32
-1E33
-1E34
-1E35
-1E36
-1E37
-1E38
-1E39
-1E3A
-1E3B
-1E3C
-1E3D
-1E3E
-1E3F
-1E40
-1E41
-1E42
-1E43
-1E44
-1E45
-1E46
-1E47
-1E48
-1E49
-1E4A
-1E4B
-1E4C
-1E4D
-1E4E
-1E4F
-1E50
-1E51
-1E52
-1E53
-1E54
-1E55
-1E56
-1E57
-1E58
-1E59
-1E5A
-1E5B
-1E5C
-1E5D
-1E5E
-1E5F
-1E60
-1E61
-1E62
-1E63
-1E64
-1E65
-1E66
-1E67
-1E68
-1E69
-1E6A
-1E6B
-1E6C
-1E6D
-1E6E
-1E6F
-1E70
-1E71
-1E72
-1E73
-1E74
-1E75
-1E76
-1E77
-1E78
-1E79
-1E7A
-1E7B
-1E7C
-1E7D
-1E7E
-1E7F
-1E80
-1E81
-1E82
-1E83
-1E84
-1E85
-1E86
-1E87
-1E88
-1E89
-1E8A
-1E8B
-1E8C
-1E8D
-1E8E
-1E8F
-1E90
-1E91
-1E92
-1E93
-1E94
-1E95 1E9B
-1EA0
-1EA1
-1EA2
-1EA3
-1EA4
-1EA5
-1EA6
-1EA7
-1EA8
-1EA9
-1EAA
-1EAB
-1EAC
-1EAD
-1EAE
-1EAF
-1EB0
-1EB1
-1EB2
-1EB3
-1EB4
-1EB5
-1EB6
-1EB7
-1EB8
-1EB9
-1EBA
-1EBB
-1EBC
-1EBD
-1EBE
-1EBF
-1EC0
-1EC1
-1EC2
-1EC3
-1EC4
-1EC5
-1EC6
-1EC7
-1EC8
-1EC9
-1ECA
-1ECB
-1ECC
-1ECD
-1ECE
-1ECF
-1ED0
-1ED1
-1ED2
-1ED3
-1ED4
-1ED5
-1ED6
-1ED7
-1ED8
-1ED9
-1EDA
-1EDB
-1EDC
-1EDD
-1EDE
-1EDF
-1EE0
-1EE1
-1EE2
-1EE3
-1EE4
-1EE5
-1EE6
-1EE7
-1EE8
-1EE9
-1EEA
-1EEB
-1EEC
-1EED
-1EEE
-1EEF
-1EF0
-1EF1
-1EF2
-1EF3
-1EF4
-1EF5
-1EF6
-1EF7
-1EF8
-1EF9
-1F00 1F07
-1F08 1F0F
-1F10 1F15
-1F18 1F1D
-1F20 1F27
-1F28 1F2F
-1F30 1F37
-1F38 1F3F
-1F40 1F45
-1F48 1F4D
-1F50 1F57
-1F59
-1F5B
-1F5D
-1F5F
-1F60 1F67
-1F68 1F6F
-1F70 1F7D
-1F80 1F87
-1F88 1F8F
-1F90 1F97
-1F98 1F9F
-1FA0 1FA7
-1FA8 1FAF
-1FB0 1FB4
-1FB6 1FB7
-1FB8 1FBB
-1FBC
-1FBE
-1FC2 1FC4
-1FC6 1FC7
-1FC8 1FCB
-1FCC
-1FD0 1FD3
-1FD6 1FD7
-1FD8 1FDB
-1FE0 1FE7
-1FE8 1FEC
-1FF2 1FF4
-1FF6 1FF7
-1FF8 1FFB
-1FFC
-207F
-2102
-2107
-210A
-210B 210D
-210E 210F
-2110 2112
-2113
-2115
-2119 211D
-2124
-2126
-2128
-212A 212D
-212F
-2130 2131
-2133
-2134
-2139
-FB00 FB06
-FB13 FB17
-FF21 FF3A
-FF41 FF5A
-10400 10425
-10428 1044D
-1D400 1D419
-1D41A 1D433
-1D434 1D44D
-1D44E 1D454
-1D456 1D467
-1D468 1D481
-1D482 1D49B
-1D49C
-1D49E 1D49F
-1D4A2
-1D4A5 1D4A6
-1D4A9 1D4AC
-1D4AE 1D4B5
-1D4B6 1D4B9
-1D4BB
-1D4BD 1D4C0
-1D4C2 1D4C3
-1D4C5 1D4CF
-1D4D0 1D4E9
-1D4EA 1D503
-1D504 1D505
-1D507 1D50A
-1D50D 1D514
-1D516 1D51C
-1D51E 1D537
-1D538 1D539
-1D53B 1D53E
-1D540 1D544
-1D546
-1D54A 1D550
-1D552 1D56B
-1D56C 1D585
-1D586 1D59F
-1D5A0 1D5B9
-1D5BA 1D5D3
-1D5D4 1D5ED
-1D5EE 1D607
-1D608 1D621
-1D622 1D63B
-1D63C 1D655
-1D656 1D66F
-1D670 1D689
-1D68A 1D6A3
-1D6A8 1D6C0
-1D6C2 1D6DA
-1D6DC 1D6E1
-1D6E2 1D6FA
-1D6FC 1D714
-1D716 1D71B
-1D71C 1D734
-1D736 1D74E
-1D750 1D755
-1D756 1D76E
-1D770 1D788
-1D78A 1D78F
-1D790 1D7A8
-1D7AA 1D7C2
-1D7C4 1D7C9
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-return <<'END';
-0041 005A
-0061 007A
-00AA
-00B5
-00BA
-00C0 00D6
-00D8 00DE
-00DF 00F6
-00F8 00FF
-0100
-0101
-0102
-0103
-0104
-0105
-0106
-0107
-0108
-0109
-010A
-010B
-010C
-010D
-010E
-010F
-0110
-0111
-0112
-0113
-0114
-0115
-0116
-0117
-0118
-0119
-011A
-011B
-011C
-011D
-011E
-011F
-0120
-0121
-0122
-0123
-0124
-0125
-0126
-0127
-0128
-0129
-012A
-012B
-012C
-012D
-012E
-012F
-0130
-0131
-0132
-0133
-0134
-0135
-0136
-0137 0138
-0139
-013A
-013B
-013C
-013D
-013E
-013F
-0140
-0141
-0142
-0143
-0144
-0145
-0146
-0147
-0148 0149
-014A
-014B
-014C
-014D
-014E
-014F
-0150
-0151
-0152
-0153
-0154
-0155
-0156
-0157
-0158
-0159
-015A
-015B
-015C
-015D
-015E
-015F
-0160
-0161
-0162
-0163
-0164
-0165
-0166
-0167
-0168
-0169
-016A
-016B
-016C
-016D
-016E
-016F
-0170
-0171
-0172
-0173
-0174
-0175
-0176
-0177
-0178 0179
-017A
-017B
-017C
-017D
-017E 0180
-0181 0182
-0183
-0184
-0185
-0186 0187
-0188
-0189 018B
-018C 018D
-018E 0191
-0192
-0193 0194
-0195
-0196 0198
-0199 019B
-019C 019D
-019E
-019F 01A0
-01A1
-01A2
-01A3
-01A4
-01A5
-01A6 01A7
-01A8
-01A9
-01AA 01AB
-01AC
-01AD
-01AE 01AF
-01B0
-01B1 01B3
-01B4
-01B5
-01B6
-01B7 01B8
-01B9 01BA
-01BB
-01BC
-01BD 01BF
-01C0 01C3
-01C4
-01C5
-01C6
-01C7
-01C8
-01C9
-01CA
-01CB
-01CC
-01CD
-01CE
-01CF
-01D0
-01D1
-01D2
-01D3
-01D4
-01D5
-01D6
-01D7
-01D8
-01D9
-01DA
-01DB
-01DC 01DD
-01DE
-01DF
-01E0
-01E1
-01E2
-01E3
-01E4
-01E5
-01E6
-01E7
-01E8
-01E9
-01EA
-01EB
-01EC
-01ED
-01EE
-01EF 01F0
-01F1
-01F2
-01F3
-01F4
-01F5
-01F6 01F8
-01F9
-01FA
-01FB
-01FC
-01FD
-01FE
-01FF
-0200
-0201
-0202
-0203
-0204
-0205
-0206
-0207
-0208
-0209
-020A
-020B
-020C
-020D
-020E
-020F
-0210
-0211
-0212
-0213
-0214
-0215
-0216
-0217
-0218
-0219
-021A
-021B
-021C
-021D
-021E
-021F
-0222
-0223
-0224
-0225
-0226
-0227
-0228
-0229
-022A
-022B
-022C
-022D
-022E
-022F
-0230
-0231
-0232
-0233
-0250 02AD
-02B0 02B8
-02BB 02C1
-02D0 02D1
-02E0 02E4
-02EE
-037A
-0386
-0388 038A
-038C
-038E 038F
-0390
-0391 03A1
-03A3 03AB
-03AC 03CE
-03D0 03D1
-03D2 03D4
-03D5 03D7
-03DA
-03DB
-03DC
-03DD
-03DE
-03DF
-03E0
-03E1
-03E2
-03E3
-03E4
-03E5
-03E6
-03E7
-03E8
-03E9
-03EA
-03EB
-03EC
-03ED
-03EE
-03EF 03F3
-03F4
-03F5
-0400 042F
-0430 045F
-0460
-0461
-0462
-0463
-0464
-0465
-0466
-0467
-0468
-0469
-046A
-046B
-046C
-046D
-046E
-046F
-0470
-0471
-0472
-0473
-0474
-0475
-0476
-0477
-0478
-0479
-047A
-047B
-047C
-047D
-047E
-047F
-0480
-0481
-048C
-048D
-048E
-048F
-0490
-0491
-0492
-0493
-0494
-0495
-0496
-0497
-0498
-0499
-049A
-049B
-049C
-049D
-049E
-049F
-04A0
-04A1
-04A2
-04A3
-04A4
-04A5
-04A6
-04A7
-04A8
-04A9
-04AA
-04AB
-04AC
-04AD
-04AE
-04AF
-04B0
-04B1
-04B2
-04B3
-04B4
-04B5
-04B6
-04B7
-04B8
-04B9
-04BA
-04BB
-04BC
-04BD
-04BE
-04BF
-04C0 04C1
-04C2
-04C3
-04C4
-04C7
-04C8
-04CB
-04CC
-04D0
-04D1
-04D2
-04D3
-04D4
-04D5
-04D6
-04D7
-04D8
-04D9
-04DA
-04DB
-04DC
-04DD
-04DE
-04DF
-04E0
-04E1
-04E2
-04E3
-04E4
-04E5
-04E6
-04E7
-04E8
-04E9
-04EA
-04EB
-04EC
-04ED
-04EE
-04EF
-04F0
-04F1
-04F2
-04F3
-04F4
-04F5
-04F8
-04F9
-0531 0556
-0559
-0561 0587
-05D0 05EA
-05F0 05F2
-0621 063A
-0640
-0641 064A
-0671 06D3
-06D5
-06E5 06E6
-06FA 06FC
-0710
-0712 072C
-0780 07A5
-0905 0939
-093D
-0950
-0958 0961
-0985 098C
-098F 0990
-0993 09A8
-09AA 09B0
-09B2
-09B6 09B9
-09DC 09DD
-09DF 09E1
-09F0 09F1
-0A05 0A0A
-0A0F 0A10
-0A13 0A28
-0A2A 0A30
-0A32 0A33
-0A35 0A36
-0A38 0A39
-0A59 0A5C
-0A5E
-0A72 0A74
-0A85 0A8B
-0A8D
-0A8F 0A91
-0A93 0AA8
-0AAA 0AB0
-0AB2 0AB3
-0AB5 0AB9
-0ABD
-0AD0
-0AE0
-0B05 0B0C
-0B0F 0B10
-0B13 0B28
-0B2A 0B30
-0B32 0B33
-0B36 0B39
-0B3D
-0B5C 0B5D
-0B5F 0B61
-0B85 0B8A
-0B8E 0B90
-0B92 0B95
-0B99 0B9A
-0B9C
-0B9E 0B9F
-0BA3 0BA4
-0BA8 0BAA
-0BAE 0BB5
-0BB7 0BB9
-0C05 0C0C
-0C0E 0C10
-0C12 0C28
-0C2A 0C33
-0C35 0C39
-0C60 0C61
-0C85 0C8C
-0C8E 0C90
-0C92 0CA8
-0CAA 0CB3
-0CB5 0CB9
-0CDE
-0CE0 0CE1
-0D05 0D0C
-0D0E 0D10
-0D12 0D28
-0D2A 0D39
-0D60 0D61
-0D85 0D96
-0D9A 0DB1
-0DB3 0DBB
-0DBD
-0DC0 0DC6
-0E01 0E30
-0E32 0E33
-0E40 0E45
-0E46
-0E81 0E82
-0E84
-0E87 0E88
-0E8A
-0E8D
-0E94 0E97
-0E99 0E9F
-0EA1 0EA3
-0EA5
-0EA7
-0EAA 0EAB
-0EAD 0EB0
-0EB2 0EB3
-0EBD
-0EC0 0EC4
-0EC6
-0EDC 0EDD
-0F00
-0F40 0F47
-0F49 0F6A
-0F88 0F8B
-1000 1021
-1023 1027
-1029 102A
-1050 1055
-10A0 10C5
-10D0 10F6
-1100 1159
-115F 11A2
-11A8 11F9
-1200 1206
-1208 1246
-1248
-124A 124D
-1250 1256
-1258
-125A 125D
-1260 1286
-1288
-128A 128D
-1290 12AE
-12B0
-12B2 12B5
-12B8 12BE
-12C0
-12C2 12C5
-12C8 12CE
-12D0 12D6
-12D8 12EE
-12F0 130E
-1310
-1312 1315
-1318 131E
-1320 1346
-1348 135A
-13A0 13F4
-1401 166C
-166F 1676
-1681 169A
-16A0 16EA
-16EE 16F0
-1780 17B3
-1820 1842
-1843
-1844 1877
-1880 18A8
-1E00
-1E01
-1E02
-1E03
-1E04
-1E05
-1E06
-1E07
-1E08
-1E09
-1E0A
-1E0B
-1E0C
-1E0D
-1E0E
-1E0F
-1E10
-1E11
-1E12
-1E13
-1E14
-1E15
-1E16
-1E17
-1E18
-1E19
-1E1A
-1E1B
-1E1C
-1E1D
-1E1E
-1E1F
-1E20
-1E21
-1E22
-1E23
-1E24
-1E25
-1E26
-1E27
-1E28
-1E29
-1E2A
-1E2B
-1E2C
-1E2D
-1E2E
-1E2F
-1E30
-1E31
-1E32
-1E33
-1E34
-1E35
-1E36
-1E37
-1E38
-1E39
-1E3A
-1E3B
-1E3C
-1E3D
-1E3E
-1E3F
-1E40
-1E41
-1E42
-1E43
-1E44
-1E45
-1E46
-1E47
-1E48
-1E49
-1E4A
-1E4B
-1E4C
-1E4D
-1E4E
-1E4F
-1E50
-1E51
-1E52
-1E53
-1E54
-1E55
-1E56
-1E57
-1E58
-1E59
-1E5A
-1E5B
-1E5C
-1E5D
-1E5E
-1E5F
-1E60
-1E61
-1E62
-1E63
-1E64
-1E65
-1E66
-1E67
-1E68
-1E69
-1E6A
-1E6B
-1E6C
-1E6D
-1E6E
-1E6F
-1E70
-1E71
-1E72
-1E73
-1E74
-1E75
-1E76
-1E77
-1E78
-1E79
-1E7A
-1E7B
-1E7C
-1E7D
-1E7E
-1E7F
-1E80
-1E81
-1E82
-1E83
-1E84
-1E85
-1E86
-1E87
-1E88
-1E89
-1E8A
-1E8B
-1E8C
-1E8D
-1E8E
-1E8F
-1E90
-1E91
-1E92
-1E93
-1E94
-1E95 1E9B
-1EA0
-1EA1
-1EA2
-1EA3
-1EA4
-1EA5
-1EA6
-1EA7
-1EA8
-1EA9
-1EAA
-1EAB
-1EAC
-1EAD
-1EAE
-1EAF
-1EB0
-1EB1
-1EB2
-1EB3
-1EB4
-1EB5
-1EB6
-1EB7
-1EB8
-1EB9
-1EBA
-1EBB
-1EBC
-1EBD
-1EBE
-1EBF
-1EC0
-1EC1
-1EC2
-1EC3
-1EC4
-1EC5
-1EC6
-1EC7
-1EC8
-1EC9
-1ECA
-1ECB
-1ECC
-1ECD
-1ECE
-1ECF
-1ED0
-1ED1
-1ED2
-1ED3
-1ED4
-1ED5
-1ED6
-1ED7
-1ED8
-1ED9
-1EDA
-1EDB
-1EDC
-1EDD
-1EDE
-1EDF
-1EE0
-1EE1
-1EE2
-1EE3
-1EE4
-1EE5
-1EE6
-1EE7
-1EE8
-1EE9
-1EEA
-1EEB
-1EEC
-1EED
-1EEE
-1EEF
-1EF0
-1EF1
-1EF2
-1EF3
-1EF4
-1EF5
-1EF6
-1EF7
-1EF8
-1EF9
-1F00 1F07
-1F08 1F0F
-1F10 1F15
-1F18 1F1D
-1F20 1F27
-1F28 1F2F
-1F30 1F37
-1F38 1F3F
-1F40 1F45
-1F48 1F4D
-1F50 1F57
-1F59
-1F5B
-1F5D
-1F5F
-1F60 1F67
-1F68 1F6F
-1F70 1F7D
-1F80 1F87
-1F88 1F8F
-1F90 1F97
-1F98 1F9F
-1FA0 1FA7
-1FA8 1FAF
-1FB0 1FB4
-1FB6 1FB7
-1FB8 1FBB
-1FBC
-1FBE
-1FC2 1FC4
-1FC6 1FC7
-1FC8 1FCB
-1FCC
-1FD0 1FD3
-1FD6 1FD7
-1FD8 1FDB
-1FE0 1FE7
-1FE8 1FEC
-1FF2 1FF4
-1FF6 1FF7
-1FF8 1FFB
-1FFC
-207F
-2102
-2107
-210A
-210B 210D
-210E 210F
-2110 2112
-2113
-2115
-2119 211D
-2124
-2126
-2128
-212A 212D
-212F
-2130 2131
-2133
-2134
-2135 2138
-2139
-2160 2183
-3005
-3006
-3007
-3021 3029
-3031 3035
-3038 303A
-3041 3094
-309D 309E
-30A1 30FA
-30FC 30FE
-3105 312C
-3131 318E
-31A0 31B7
-3400 4DB5
-4E00 9FA5
-A000 A48C
-AC00 D7A3
-F900 FA2D
-FB00 FB06
-FB13 FB17
-FB1D
-FB1F FB28
-FB2A FB36
-FB38 FB3C
-FB3E
-FB40 FB41
-FB43 FB44
-FB46 FBB1
-FBD3 FD3D
-FD50 FD8F
-FD92 FDC7
-FDF0 FDFB
-FE70 FE72
-FE74
-FE76 FEFC
-FF21 FF3A
-FF41 FF5A
-FF66 FF6F
-FF70
-FF71 FF9D
-FF9E FF9F
-FFA0 FFBE
-FFC2 FFC7
-FFCA FFCF
-FFD2 FFD7
-FFDA FFDC
-10300 1031E
-10330 10349
-1034A
-10400 10425
-10428 1044D
-1D400 1D419
-1D41A 1D433
-1D434 1D44D
-1D44E 1D454
-1D456 1D467
-1D468 1D481
-1D482 1D49B
-1D49C
-1D49E 1D49F
-1D4A2
-1D4A5 1D4A6
-1D4A9 1D4AC
-1D4AE 1D4B5
-1D4B6 1D4B9
-1D4BB
-1D4BD 1D4C0
-1D4C2 1D4C3
-1D4C5 1D4CF
-1D4D0 1D4E9
-1D4EA 1D503
-1D504 1D505
-1D507 1D50A
-1D50D 1D514
-1D516 1D51C
-1D51E 1D537
-1D538 1D539
-1D53B 1D53E
-1D540 1D544
-1D546
-1D54A 1D550
-1D552 1D56B
-1D56C 1D585
-1D586 1D59F
-1D5A0 1D5B9
-1D5BA 1D5D3
-1D5D4 1D5ED
-1D5EE 1D607
-1D608 1D621
-1D622 1D63B
-1D63C 1D655
-1D656 1D66F
-1D670 1D689
-1D68A 1D6A3
-1D6A8 1D6C0
-1D6C2 1D6DA
-1D6DC 1D6E1
-1D6E2 1D6FA
-1D6FC 1D714
-1D716 1D71B
-1D71C 1D734
-1D736 1D74E
-1D750 1D755
-1D756 1D76E
-1D770 1D788
-1D78A 1D78F
-1D790 1D7A8
-1D7AA 1D7C2
-1D7C4 1D7C9
-20000 2A6D6
-2F800 2FA1D
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-return <<'END';
-0030 0039
-0041 005A
-005F
-0061 007A
-00AA
-00B5
-00BA
-00C0 00D6
-00D8 00DE
-00DF 00F6
-00F8 00FF
-0100
-0101
-0102
-0103
-0104
-0105
-0106
-0107
-0108
-0109
-010A
-010B
-010C
-010D
-010E
-010F
-0110
-0111
-0112
-0113
-0114
-0115
-0116
-0117
-0118
-0119
-011A
-011B
-011C
-011D
-011E
-011F
-0120
-0121
-0122
-0123
-0124
-0125
-0126
-0127
-0128
-0129
-012A
-012B
-012C
-012D
-012E
-012F
-0130
-0131
-0132
-0133
-0134
-0135
-0136
-0137 0138
-0139
-013A
-013B
-013C
-013D
-013E
-013F
-0140
-0141
-0142
-0143
-0144
-0145
-0146
-0147
-0148 0149
-014A
-014B
-014C
-014D
-014E
-014F
-0150
-0151
-0152
-0153
-0154
-0155
-0156
-0157
-0158
-0159
-015A
-015B
-015C
-015D
-015E
-015F
-0160
-0161
-0162
-0163
-0164
-0165
-0166
-0167
-0168
-0169
-016A
-016B
-016C
-016D
-016E
-016F
-0170
-0171
-0172
-0173
-0174
-0175
-0176
-0177
-0178 0179
-017A
-017B
-017C
-017D
-017E 0180
-0181 0182
-0183
-0184
-0185
-0186 0187
-0188
-0189 018B
-018C 018D
-018E 0191
-0192
-0193 0194
-0195
-0196 0198
-0199 019B
-019C 019D
-019E
-019F 01A0
-01A1
-01A2
-01A3
-01A4
-01A5
-01A6 01A7
-01A8
-01A9
-01AA 01AB
-01AC
-01AD
-01AE 01AF
-01B0
-01B1 01B3
-01B4
-01B5
-01B6
-01B7 01B8
-01B9 01BA
-01BB
-01BC
-01BD 01BF
-01C0 01C3
-01C4
-01C5
-01C6
-01C7
-01C8
-01C9
-01CA
-01CB
-01CC
-01CD
-01CE
-01CF
-01D0
-01D1
-01D2
-01D3
-01D4
-01D5
-01D6
-01D7
-01D8
-01D9
-01DA
-01DB
-01DC 01DD
-01DE
-01DF
-01E0
-01E1
-01E2
-01E3
-01E4
-01E5
-01E6
-01E7
-01E8
-01E9
-01EA
-01EB
-01EC
-01ED
-01EE
-01EF 01F0
-01F1
-01F2
-01F3
-01F4
-01F5
-01F6 01F8
-01F9
-01FA
-01FB
-01FC
-01FD
-01FE
-01FF
-0200
-0201
-0202
-0203
-0204
-0205
-0206
-0207
-0208
-0209
-020A
-020B
-020C
-020D
-020E
-020F
-0210
-0211
-0212
-0213
-0214
-0215
-0216
-0217
-0218
-0219
-021A
-021B
-021C
-021D
-021E
-021F
-0222
-0223
-0224
-0225
-0226
-0227
-0228
-0229
-022A
-022B
-022C
-022D
-022E
-022F
-0230
-0231
-0232
-0233
-0250 02AD
-02B0 02B8
-02BB 02C1
-02D0 02D1
-02E0 02E4
-02EE
-0300 034E
-0360 0362
-037A
-0386
-0388 038A
-038C
-038E 038F
-0390
-0391 03A1
-03A3 03AB
-03AC 03CE
-03D0 03D1
-03D2 03D4
-03D5 03D7
-03DA
-03DB
-03DC
-03DD
-03DE
-03DF
-03E0
-03E1
-03E2
-03E3
-03E4
-03E5
-03E6
-03E7
-03E8
-03E9
-03EA
-03EB
-03EC
-03ED
-03EE
-03EF 03F3
-03F4
-03F5
-0400 042F
-0430 045F
-0460
-0461
-0462
-0463
-0464
-0465
-0466
-0467
-0468
-0469
-046A
-046B
-046C
-046D
-046E
-046F
-0470
-0471
-0472
-0473
-0474
-0475
-0476
-0477
-0478
-0479
-047A
-047B
-047C
-047D
-047E
-047F
-0480
-0481
-0483 0486
-048C
-048D
-048E
-048F
-0490
-0491
-0492
-0493
-0494
-0495
-0496
-0497
-0498
-0499
-049A
-049B
-049C
-049D
-049E
-049F
-04A0
-04A1
-04A2
-04A3
-04A4
-04A5
-04A6
-04A7
-04A8
-04A9
-04AA
-04AB
-04AC
-04AD
-04AE
-04AF
-04B0
-04B1
-04B2
-04B3
-04B4
-04B5
-04B6
-04B7
-04B8
-04B9
-04BA
-04BB
-04BC
-04BD
-04BE
-04BF
-04C0 04C1
-04C2
-04C3
-04C4
-04C7
-04C8
-04CB
-04CC
-04D0
-04D1
-04D2
-04D3
-04D4
-04D5
-04D6
-04D7
-04D8
-04D9
-04DA
-04DB
-04DC
-04DD
-04DE
-04DF
-04E0
-04E1
-04E2
-04E3
-04E4
-04E5
-04E6
-04E7
-04E8
-04E9
-04EA
-04EB
-04EC
-04ED
-04EE
-04EF
-04F0
-04F1
-04F2
-04F3
-04F4
-04F5
-04F8
-04F9
-0531 0556
-0559
-0561 0587
-0591 05A1
-05A3 05B9
-05BB 05BD
-05BF
-05C1 05C2
-05C4
-05D0 05EA
-05F0 05F2
-0621 063A
-0640
-0641 064A
-064B 0655
-0660 0669
-0670
-0671 06D3
-06D5
-06D6 06DC
-06DF 06E4
-06E5 06E6
-06E7 06E8
-06EA 06ED
-06F0 06F9
-06FA 06FC
-0710
-0711
-0712 072C
-0730 074A
-0780 07A5
-07A6 07B0
-0901 0902
-0903
-0905 0939
-093C
-093D
-093E 0940
-0941 0948
-0949 094C
-094D
-0950
-0951 0954
-0958 0961
-0962 0963
-0966 096F
-0981
-0982 0983
-0985 098C
-098F 0990
-0993 09A8
-09AA 09B0
-09B2
-09B6 09B9
-09BC
-09BE 09C0
-09C1 09C4
-09C7 09C8
-09CB 09CC
-09CD
-09D7
-09DC 09DD
-09DF 09E1
-09E2 09E3
-09E6 09EF
-09F0 09F1
-0A02
-0A05 0A0A
-0A0F 0A10
-0A13 0A28
-0A2A 0A30
-0A32 0A33
-0A35 0A36
-0A38 0A39
-0A3C
-0A3E 0A40
-0A41 0A42
-0A47 0A48
-0A4B 0A4D
-0A59 0A5C
-0A5E
-0A66 0A6F
-0A70 0A71
-0A72 0A74
-0A81 0A82
-0A83
-0A85 0A8B
-0A8D
-0A8F 0A91
-0A93 0AA8
-0AAA 0AB0
-0AB2 0AB3
-0AB5 0AB9
-0ABC
-0ABD
-0ABE 0AC0
-0AC1 0AC5
-0AC7 0AC8
-0AC9
-0ACB 0ACC
-0ACD
-0AD0
-0AE0
-0AE6 0AEF
-0B01
-0B02 0B03
-0B05 0B0C
-0B0F 0B10
-0B13 0B28
-0B2A 0B30
-0B32 0B33
-0B36 0B39
-0B3C
-0B3D
-0B3E
-0B3F
-0B40
-0B41 0B43
-0B47 0B48
-0B4B 0B4C
-0B4D
-0B56
-0B57
-0B5C 0B5D
-0B5F 0B61
-0B66 0B6F
-0B82
-0B83
-0B85 0B8A
-0B8E 0B90
-0B92 0B95
-0B99 0B9A
-0B9C
-0B9E 0B9F
-0BA3 0BA4
-0BA8 0BAA
-0BAE 0BB5
-0BB7 0BB9
-0BBE 0BBF
-0BC0
-0BC1 0BC2
-0BC6 0BC8
-0BCA 0BCC
-0BCD
-0BD7
-0BE7 0BEF
-0C01 0C03
-0C05 0C0C
-0C0E 0C10
-0C12 0C28
-0C2A 0C33
-0C35 0C39
-0C3E 0C40
-0C41 0C44
-0C46 0C48
-0C4A 0C4D
-0C55 0C56
-0C60 0C61
-0C66 0C6F
-0C82 0C83
-0C85 0C8C
-0C8E 0C90
-0C92 0CA8
-0CAA 0CB3
-0CB5 0CB9
-0CBE
-0CBF
-0CC0 0CC4
-0CC6
-0CC7 0CC8
-0CCA 0CCB
-0CCC 0CCD
-0CD5 0CD6
-0CDE
-0CE0 0CE1
-0CE6 0CEF
-0D02 0D03
-0D05 0D0C
-0D0E 0D10
-0D12 0D28
-0D2A 0D39
-0D3E 0D40
-0D41 0D43
-0D46 0D48
-0D4A 0D4C
-0D4D
-0D57
-0D60 0D61
-0D66 0D6F
-0D82 0D83
-0D85 0D96
-0D9A 0DB1
-0DB3 0DBB
-0DBD
-0DC0 0DC6
-0DCA
-0DCF 0DD1
-0DD2 0DD4
-0DD6
-0DD8 0DDF
-0DF2 0DF3
-0E01 0E30
-0E31
-0E32 0E33
-0E34 0E3A
-0E40 0E45
-0E46
-0E47 0E4E
-0E50 0E59
-0E81 0E82
-0E84
-0E87 0E88
-0E8A
-0E8D
-0E94 0E97
-0E99 0E9F
-0EA1 0EA3
-0EA5
-0EA7
-0EAA 0EAB
-0EAD 0EB0
-0EB1
-0EB2 0EB3
-0EB4 0EB9
-0EBB 0EBC
-0EBD
-0EC0 0EC4
-0EC6
-0EC8 0ECD
-0ED0 0ED9
-0EDC 0EDD
-0F00
-0F18 0F19
-0F20 0F29
-0F35
-0F37
-0F39
-0F3E 0F3F
-0F40 0F47
-0F49 0F6A
-0F71 0F7E
-0F7F
-0F80 0F84
-0F86 0F87
-0F88 0F8B
-0F90 0F97
-0F99 0FBC
-0FC6
-1000 1021
-1023 1027
-1029 102A
-102C
-102D 1030
-1031
-1032
-1036 1037
-1038
-1039
-1040 1049
-1050 1055
-1056 1057
-1058 1059
-10A0 10C5
-10D0 10F6
-1100 1159
-115F 11A2
-11A8 11F9
-1200 1206
-1208 1246
-1248
-124A 124D
-1250 1256
-1258
-125A 125D
-1260 1286
-1288
-128A 128D
-1290 12AE
-12B0
-12B2 12B5
-12B8 12BE
-12C0
-12C2 12C5
-12C8 12CE
-12D0 12D6
-12D8 12EE
-12F0 130E
-1310
-1312 1315
-1318 131E
-1320 1346
-1348 135A
-1369 1371
-13A0 13F4
-1401 166C
-166F 1676
-1681 169A
-16A0 16EA
-16EE 16F0
-1780 17B3
-17B4 17B6
-17B7 17BD
-17BE 17C5
-17C6
-17C7 17C8
-17C9 17D3
-17E0 17E9
-1810 1819
-1820 1842
-1843
-1844 1877
-1880 18A8
-18A9
-1E00
-1E01
-1E02
-1E03
-1E04
-1E05
-1E06
-1E07
-1E08
-1E09
-1E0A
-1E0B
-1E0C
-1E0D
-1E0E
-1E0F
-1E10
-1E11
-1E12
-1E13
-1E14
-1E15
-1E16
-1E17
-1E18
-1E19
-1E1A
-1E1B
-1E1C
-1E1D
-1E1E
-1E1F
-1E20
-1E21
-1E22
-1E23
-1E24
-1E25
-1E26
-1E27
-1E28
-1E29
-1E2A
-1E2B
-1E2C
-1E2D
-1E2E
-1E2F
-1E30
-1E31
-1E32
-1E33
-1E34
-1E35
-1E36
-1E37
-1E38
-1E39
-1E3A
-1E3B
-1E3C
-1E3D
-1E3E
-1E3F
-1E40
-1E41
-1E42
-1E43
-1E44
-1E45
-1E46
-1E47
-1E48
-1E49
-1E4A
-1E4B
-1E4C
-1E4D
-1E4E
-1E4F
-1E50
-1E51
-1E52
-1E53
-1E54
-1E55
-1E56
-1E57
-1E58
-1E59
-1E5A
-1E5B
-1E5C
-1E5D
-1E5E
-1E5F
-1E60
-1E61
-1E62
-1E63
-1E64
-1E65
-1E66
-1E67
-1E68
-1E69
-1E6A
-1E6B
-1E6C
-1E6D
-1E6E
-1E6F
-1E70
-1E71
-1E72
-1E73
-1E74
-1E75
-1E76
-1E77
-1E78
-1E79
-1E7A
-1E7B
-1E7C
-1E7D
-1E7E
-1E7F
-1E80
-1E81
-1E82
-1E83
-1E84
-1E85
-1E86
-1E87
-1E88
-1E89
-1E8A
-1E8B
-1E8C
-1E8D
-1E8E
-1E8F
-1E90
-1E91
-1E92
-1E93
-1E94
-1E95 1E9B
-1EA0
-1EA1
-1EA2
-1EA3
-1EA4
-1EA5
-1EA6
-1EA7
-1EA8
-1EA9
-1EAA
-1EAB
-1EAC
-1EAD
-1EAE
-1EAF
-1EB0
-1EB1
-1EB2
-1EB3
-1EB4
-1EB5
-1EB6
-1EB7
-1EB8
-1EB9
-1EBA
-1EBB
-1EBC
-1EBD
-1EBE
-1EBF
-1EC0
-1EC1
-1EC2
-1EC3
-1EC4
-1EC5
-1EC6
-1EC7
-1EC8
-1EC9
-1ECA
-1ECB
-1ECC
-1ECD
-1ECE
-1ECF
-1ED0
-1ED1
-1ED2
-1ED3
-1ED4
-1ED5
-1ED6
-1ED7
-1ED8
-1ED9
-1EDA
-1EDB
-1EDC
-1EDD
-1EDE
-1EDF
-1EE0
-1EE1
-1EE2
-1EE3
-1EE4
-1EE5
-1EE6
-1EE7
-1EE8
-1EE9
-1EEA
-1EEB
-1EEC
-1EED
-1EEE
-1EEF
-1EF0
-1EF1
-1EF2
-1EF3
-1EF4
-1EF5
-1EF6
-1EF7
-1EF8
-1EF9
-1F00 1F07
-1F08 1F0F
-1F10 1F15
-1F18 1F1D
-1F20 1F27
-1F28 1F2F
-1F30 1F37
-1F38 1F3F
-1F40 1F45
-1F48 1F4D
-1F50 1F57
-1F59
-1F5B
-1F5D
-1F5F
-1F60 1F67
-1F68 1F6F
-1F70 1F7D
-1F80 1F87
-1F88 1F8F
-1F90 1F97
-1F98 1F9F
-1FA0 1FA7
-1FA8 1FAF
-1FB0 1FB4
-1FB6 1FB7
-1FB8 1FBB
-1FBC
-1FBE
-1FC2 1FC4
-1FC6 1FC7
-1FC8 1FCB
-1FCC
-1FD0 1FD3
-1FD6 1FD7
-1FD8 1FDB
-1FE0 1FE7
-1FE8 1FEC
-1FF2 1FF4
-1FF6 1FF7
-1FF8 1FFB
-1FFC
-203F 2040
-207F
-20D0 20DC
-20E1
-2102
-2107
-210A
-210B 210D
-210E 210F
-2110 2112
-2113
-2115
-2119 211D
-2124
-2126
-2128
-212A 212D
-212F
-2130 2131
-2133
-2134
-2135 2138
-2139
-2160 2183
-3005
-3006
-3007
-3021 3029
-302A 302F
-3031 3035
-3038 303A
-3041 3094
-3099 309A
-309D 309E
-30A1 30FA
-30FB
-30FC 30FE
-3105 312C
-3131 318E
-31A0 31B7
-3400 4DB5
-4E00 9FA5
-A000 A48C
-AC00 D7A3
-F900 FA2D
-FB00 FB06
-FB13 FB17
-FB1D
-FB1E
-FB1F FB28
-FB2A FB36
-FB38 FB3C
-FB3E
-FB40 FB41
-FB43 FB44
-FB46 FBB1
-FBD3 FD3D
-FD50 FD8F
-FD92 FDC7
-FDF0 FDFB
-FE20 FE23
-FE33 FE34
-FE4D FE4F
-FE70 FE72
-FE74
-FE76 FEFC
-FF10 FF19
-FF21 FF3A
-FF3F
-FF41 FF5A
-FF65
-FF66 FF6F
-FF70
-FF71 FF9D
-FF9E FF9F
-FFA0 FFBE
-FFC2 FFC7
-FFCA FFCF
-FFD2 FFD7
-FFDA FFDC
-10300 1031E
-10330 10349
-1034A
-10400 10425
-10428 1044D
-1D165 1D166
-1D167 1D169
-1D16D 1D172
-1D17B 1D182
-1D185 1D18B
-1D1AA 1D1AD
-1D400 1D419
-1D41A 1D433
-1D434 1D44D
-1D44E 1D454
-1D456 1D467
-1D468 1D481
-1D482 1D49B
-1D49C
-1D49E 1D49F
-1D4A2
-1D4A5 1D4A6
-1D4A9 1D4AC
-1D4AE 1D4B5
-1D4B6 1D4B9
-1D4BB
-1D4BD 1D4C0
-1D4C2 1D4C3
-1D4C5 1D4CF
-1D4D0 1D4E9
-1D4EA 1D503
-1D504 1D505
-1D507 1D50A
-1D50D 1D514
-1D516 1D51C
-1D51E 1D537
-1D538 1D539
-1D53B 1D53E
-1D540 1D544
-1D546
-1D54A 1D550
-1D552 1D56B
-1D56C 1D585
-1D586 1D59F
-1D5A0 1D5B9
-1D5BA 1D5D3
-1D5D4 1D5ED
-1D5EE 1D607
-1D608 1D621
-1D622 1D63B
-1D63C 1D655
-1D656 1D66F
-1D670 1D689
-1D68A 1D6A3
-1D6A8 1D6C0
-1D6C2 1D6DA
-1D6DC 1D6E1
-1D6E2 1D6FA
-1D6FC 1D714
-1D716 1D71B
-1D71C 1D734
-1D736 1D74E
-1D750 1D755
-1D756 1D76E
-1D770 1D788
-1D78A 1D78F
-1D790 1D7A8
-1D7AA 1D7C2
-1D7C4 1D7C9
-1D7CE 1D7FF
-20000 2A6D6
-2F800 2FA1D
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-return <<'END';
-0 10FFFF
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-return <<'END';
-0 10FFFF
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-return <<'END';
-AC00 D7A3 Hangul Syllable
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-return <<'END';
-D800 DB7F Non Private Use High Surrogate
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-return <<'END';
-DB80 DBFF Private Use High Surrogate
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-return <<'END';
-DC00 DFFF Low Surrogate
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-return <<'END';
-0370 03FF Greek Block
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-return <<'END';
-0400 04FF Cyrillic Block
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-return <<'END';
-0530 058F Armenian Block
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-return <<'END';
-0590 05FF Hebrew Block
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-return <<'END';
-0600 06FF Arabic Block
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-return <<'END';
-0700 074F Syriac Block
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-return <<'END';
-0780 07BF Thaana Block
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-return <<'END';
-0900 097F Devanagari Block
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-return <<'END';
-0980 09FF Bengali Block
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-return <<'END';
-0A00 0A7F Gurmukhi Block
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-return <<'END';
-0A80 0AFF Gujarati Block
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-return <<'END';
-0B00 0B7F Oriya Block
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-return <<'END';
-20000 2A6D6 CJK Ideograph Extension B
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-return <<'END';
-0B80 0BFF Tamil Block
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-return <<'END';
-0C00 0C7F Telugu Block
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-return <<'END';
-0C80 0CFF Kannada Block
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-return <<'END';
-0D00 0D7F Malayalam Block
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-return <<'END';
-0D80 0DFF Sinhala Block
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-return <<'END';
-0E00 0E7F Thai Block
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-return <<'END';
-0E80 0EFF Lao Block
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-return <<'END';
-0F00 0FFF Tibetan Block
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-return <<'END';
-1000 109F Myanmar Block
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-return <<'END';
-10A0 10FF Georgian Block
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-return <<'END';
-F0000 FFFFD Plane 15 Private Use
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-return <<'END';
-1200 137F Ethiopic Block
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-return <<'END';
-13A0 13FF Cherokee Block
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-return <<'END';
-1680 169F Ogham Block
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-return <<'END';
-16A0 16FF Runic Block
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-return <<'END';
-1780 17FF Khmer Block
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-return <<'END';
-1800 18AF Mongolian Block
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-return <<'END';
-100000 10FFFD Plane 16 Private Use
-END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
FB00 FB4F Alphabetic Presentation Forms
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
-3400 4DB5 CJK Ideograph Extension A
+0600 06FF Arabic
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
FE70 FEFE Arabic Presentation Forms-B
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
FB50 FDFF Arabic Presentation Forms-A
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
-3400 4DB5 CJK Ideograph Extension A
+0530 058F Armenian
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
2190 21FF Arrows
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0000 007F Basic Latin
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
-3400 4DB5 CJK Ideograph Extension A
+0980 09FF Bengali
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
2580 259F Block Elements
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
-3400 4DB5 CJK Ideograph Extension A
+3100 312F Bopomofo
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
31A0 31BF Bopomofo Extended
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
2500 257F Box Drawing
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
2800 28FF Braille Patterns
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
1D000 1D0FF Byzantine Musical Symbols
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
-3400 4DB5 CJK Ideograph Extension A
+13A0 13FF Cherokee
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
F900 FAFF CJK Compatibility Ideographs
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
3300 33FF CJK Compatibility
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
2F800 2FA1F CJK Compatibility Ideographs Supplement
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
FE30 FE4F CJK Compatibility Forms
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
2E80 2EFF CJK Radicals Supplement
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
3000 303F CJK Symbols and Punctuation
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
3400 4DB5 CJK Unified Ideographs Extension A
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
20000 2A6D6 CJK Unified Ideographs Extension B
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
4E00 9FFF CJK Unified Ideographs
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
20D0 20FF Combining Marks for Symbols
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
FE20 FE2F Combining Half Marks
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0300 036F Combining Diacritical Marks
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
2400 243F Control Pictures
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
20A0 20CF Currency Symbols
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
-3400 4DB5 CJK Ideograph Extension A
+0400 04FF Cyrillic
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
-3400 4DB5 CJK Ideograph Extension A
+10400 1044F Deseret
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
-3400 4DB5 CJK Ideograph Extension A
+0900 097F Devanagari
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
2700 27BF Dingbats
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
2460 24FF Enclosed Alphanumerics
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
3200 32FF Enclosed CJK Letters and Months
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
-3400 4DB5 CJK Ideograph Extension A
+1200 137F Ethiopic
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
2000 206F General Punctuation
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
25A0 25FF Geometric Shapes
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
-3400 4DB5 CJK Ideograph Extension A
+10A0 10FF Georgian
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
-3400 4DB5 CJK Ideograph Extension A
+10330 1034F Gothic
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
-3400 4DB5 CJK Ideograph Extension A
+0370 03FF Greek
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
1F00 1FFF Greek Extended
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
-3400 4DB5 CJK Ideograph Extension A
+0A80 0AFF Gujarati
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
-3400 4DB5 CJK Ideograph Extension A
+0A00 0A7F Gurmukhi
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
FF00 FFEF Halfwidth and Fullwidth Forms
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
3130 318F Hangul Compatibility Jamo
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
1100 11FF Hangul Jamo
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
AC00 D7A3 Hangul Syllables
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
-3400 4DB5 CJK Ideograph Extension A
+0590 05FF Hebrew
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
DB80 DBFF High Private Use Surrogates
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
D800 DB7F High Surrogates
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
-3400 4DB5 CJK Ideograph Extension A
+3040 309F Hiragana
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
2FF0 2FFF Ideographic Description Characters
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0250 02AF IPA Extensions
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
3190 319F Kanbun
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
2F00 2FDF Kangxi Radicals
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
-3400 4DB5 CJK Ideograph Extension A
+0C80 0CFF Kannada
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
-3400 4DB5 CJK Ideograph Extension A
+30A0 30FF Katakana
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
-3400 4DB5 CJK Ideograph Extension A
+1780 17FF Khmer
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
-4E00 9FA5 CJK Ideograph
+0E80 0EFF Lao
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0080 00FF Latin-1 Supplement
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0100 017F Latin Extended-A
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0180 024F Latin Extended-B
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
1E00 1EFF Latin Extended Additional
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
2100 214F Letterlike Symbols
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
DC00 DFFF Low Surrogates
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
-3400 4DB5 CJK Ideograph Extension A
+0D00 0D7F Malayalam
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
1D400 1D7FF Mathematical Alphanumeric Symbols
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
2200 22FF Mathematical Operators
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
2600 26FF Miscellaneous Symbols
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
2300 23FF Miscellaneous Technical
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
-3400 4DB5 CJK Ideograph Extension A
+1800 18AF Mongolian
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
1D100 1D1FF Musical Symbols
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
-3400 4DB5 CJK Ideograph Extension A
+1000 109F Myanmar
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
2150 218F Number Forms
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
-3400 4DB5 CJK Ideograph Extension A
+1680 169F Ogham
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
-3400 4DB5 CJK Ideograph Extension A
+10300 1032F Old Italic
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
2440 245F Optical Character Recognition
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
-3400 4DB5 CJK Ideograph Extension A
+0B00 0B7F Oriya
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
E000 F8FF Private Use
+F0000 FFFFD Private Use
+100000 10FFFD Private Use
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
-3400 4DB5 CJK Ideograph Extension A
+16A0 16FF Runic
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
-3400 4DB5 CJK Ideograph Extension A
+0D80 0DFF Sinhala
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
FE50 FE6F Small Form Variants
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
02B0 02FF Spacing Modifier Letters
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
FEFF Specials
FFF0 FFFD Specials
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
2070 209F Superscripts and Subscripts
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
-3400 4DB5 CJK Ideograph Extension A
+0700 074F Syriac
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
E0000 E007F Tags
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
-3400 4DB5 CJK Ideograph Extension A
+0B80 0BFF Tamil
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
-3400 4DB5 CJK Ideograph Extension A
+0C00 0C7F Telugu
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
-3400 4DB5 CJK Ideograph Extension A
+0780 07BF Thaana
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
-3400 4DB5 CJK Ideograph Extension A
+0E00 0E7F Thai
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
-3400 4DB5 CJK Ideograph Extension A
+0F00 0FFF Tibetan
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
1400 167F Unified Canadian Aboriginal Syllabics
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
A490 A4CF Yi Radicals
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
A000 A48F Yi Syllables
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
-%utf8::Is =
-(
-'Close_Punctuation' => 'Pe',
-'Connector_Punctuation' => 'Pc',
-'Control' => 'Cc',
-'Currency_Symbol' => 'Sc',
-'Dash_Punctuation' => 'Pd',
-'Decimal_Number' => 'Nd',
-'Enclosing_Mark' => 'Me',
-'Final_Punctuation' => 'Pf',
-'Format' => 'Cf',
-'Initial_Punctuation' => 'Pi',
-'Letter' => 'L',
-'Letter_Number' => 'Nl',
-'Line_Separator' => 'Zl',
-'Lowercase_Letter' => 'Ll',
-'Mark' => 'M',
-'Math_Symbol' => 'Sm',
-'Modifier_Letter' => 'Lm',
-'Modifier_Symbol' => 'Sk',
-'Non_Spacing_Mark' => 'Mn',
-'Number' => 'N',
-'Open_Punctuation' => 'Ps',
-'Other' => 'C',
-'Other_Letter' => 'Lo',
-'Other_Number' => 'No',
-'Other_Punctuation' => 'Po',
-'Other_Symbol' => 'So',
-'Paragraph_Separator' => 'Zp',
-'Private Use' => 'Co',
-'Punctuation' => 'P',
-'Separator' => 'Z',
-'Space_Separator' => 'Zs',
-'Spacing_Mark' => 'Mc',
-'Surrogate' => 'Cs',
-'Symbol' => 'S',
-'Titlecase_Letter' => 'Lt',
-'Unassigned' => 'Cn',
-'Uppercase_Letter' => 'Lu',
+# Built Mon Jan 14 15:52:42 2002.
+
+##
+## Data in this file used by ../utf8_heavy.pl
+##
+
+## Mapping from name to filename in ./Is
+%utf8::Is = (
+ 'ASCII' => 'ASCII',
+ 'Alnum' => 'Alnum',
+ 'Alpha' => 'Alpha',
+ 'BidiAL' => 'BidiAL',
+ 'BidiAN' => 'BidiAN',
+ 'BidiB' => 'BidiB',
+ 'BidiBN' => 'BidiBN',
+ 'BidiCS' => 'BidiCS',
+ 'BidiEN' => 'BidiEN',
+ 'BidiES' => 'BidiES',
+ 'BidiET' => 'BidiET',
+ 'BidiL' => 'BidiL',
+ 'BidiLRE' => 'BidiLRE',
+ 'BidiLRO' => 'BidiLRO',
+ 'BidiNSM' => 'BidiNSM',
+ 'BidiON' => 'BidiON',
+ 'BidiPDF' => 'BidiPDF',
+ 'BidiR' => 'BidiR',
+ 'BidiRLE' => 'BidiRLE',
+ 'BidiRLO' => 'BidiRLO',
+ 'BidiS' => 'BidiS',
+ 'BidiWS' => 'BidiWS',
+ 'Blank' => 'Blank',
+ 'C' => 'C',
+ 'Canon' => 'Canon',
+ 'Cc' => 'Cc',
+ 'Cf' => 'Cf',
+ 'Cn' => 'Cn',
+ 'Cntrl' => 'Cntrl',
+ 'Co' => 'Co',
+ 'Compat' => 'Compat',
+ 'Cs' => 'Cs',
+ 'DCcircle' => 'DCcircle',
+ 'DCcompat' => 'DCcompat',
+ 'DCfinal' => 'DCfinal',
+ 'DCfont' => 'DCfont',
+ 'DCfraction' => 'DCfracti',
+ 'DCinitial' => 'DCinitia',
+ 'DCisolated' => 'DCisolat',
+ 'DCmedial' => 'DCmedial',
+ 'DCnarrow' => 'DCnarrow',
+ 'DCnoBreak' => 'DCnoBrea',
+ 'DCsmall' => 'DCsmall',
+ 'DCsquare' => 'DCsquare',
+ 'DCsub' => 'DCsub',
+ 'DCsuper' => 'DCsuper',
+ 'DCvertical' => 'DCvertic',
+ 'DCwide' => 'DCwide',
+ 'Digit' => 'Digit',
+ 'Graph' => 'Graph',
+ 'L' => '2',
+ 'L&' => 'L',
+ 'LbrkAI' => 'LbrkAI',
+ 'LbrkAL' => 'LbrkAL',
+ 'LbrkB2' => 'LbrkB2',
+ 'LbrkBA' => 'LbrkBA',
+ 'LbrkBB' => 'LbrkBB',
+ 'LbrkBK' => 'LbrkBK',
+ 'LbrkCB' => 'LbrkCB',
+ 'LbrkCL' => 'LbrkCL',
+ 'LbrkCM' => 'LbrkCM',
+ 'LbrkCR' => 'LbrkCR',
+ 'LbrkEX' => 'LbrkEX',
+ 'LbrkGL' => 'LbrkGL',
+ 'LbrkHY' => 'LbrkHY',
+ 'LbrkID' => 'LbrkID',
+ 'LbrkIN' => 'LbrkIN',
+ 'LbrkIS' => 'LbrkIS',
+ 'LbrkLF' => 'LbrkLF',
+ 'LbrkNS' => 'LbrkNS',
+ 'LbrkNU' => 'LbrkNU',
+ 'LbrkOP' => 'LbrkOP',
+ 'LbrkPO' => 'LbrkPO',
+ 'LbrkPR' => 'LbrkPR',
+ 'LbrkQU' => 'LbrkQU',
+ 'LbrkSA' => 'LbrkSA',
+ 'LbrkSG' => 'LbrkSG',
+ 'LbrkSP' => 'LbrkSP',
+ 'LbrkSY' => 'LbrkSY',
+ 'LbrkXX' => 'LbrkXX',
+ 'LbrkZW' => 'LbrkZW',
+ 'Ll' => 'Ll',
+ 'Lm' => 'Lm',
+ 'Lo' => 'Lo',
+ 'Lower' => 'Lower',
+ 'Lt' => 'Lt',
+ 'Lu' => 'Lu',
+ 'M' => 'M',
+ 'Mc' => 'Mc',
+ 'Me' => 'Me',
+ 'Mirrored' => 'Mirrored',
+ 'Mn' => 'Mn',
+ 'N' => 'N',
+ 'Nd' => 'Nd',
+ 'Nl' => 'Nl',
+ 'No' => 'No',
+ 'P' => 'P',
+ 'Pc' => 'Pc',
+ 'Pd' => 'Pd',
+ 'Pe' => 'Pe',
+ 'Pf' => 'Pf',
+ 'Pi' => 'Pi',
+ 'Po' => 'Po',
+ 'Print' => 'Print',
+ 'Ps' => 'Ps',
+ 'Punct' => 'Punct',
+ 'S' => 'S',
+ 'Sc' => 'Sc',
+ 'Sk' => 'Sk',
+ 'Sm' => 'Sm',
+ 'So' => 'So',
+ 'Space' => 'Space',
+ 'SpacePerl' => 'SpacePer',
+ 'Title' => 'Title',
+ 'Upper' => 'Upper',
+ 'Word' => 'Word',
+ 'XDigit' => 'XDigit',
+ 'Z' => 'Z',
+ 'Zl' => 'Zl',
+ 'Zp' => 'Zp',
+ 'Zs' => 'Zs',
);
-%utf8::IsPat =
-(
-'cl' => {
+
+## Mappings from regex to filename in ./Is/
+%utf8::IsPat = (
+ 'al' => {
+ 'Alphabetic' => 'Alphabet',
+ 'All' => 'Any',
+ },
+ 'an' => {
+ 'Any' => 'Any',
+ },
+ 'ar' => {
+ 'Armenian' => 'Armenian',
+ 'Arabic' => 'Arabic',
+ },
+ 'as' => {
+ 'ASCII(?:[-_]|\s+)?Hex(?:[-_]|\s+)?Digit' => 'ASCIIHex',
+ 'Assigned' => 'Assigned',
+ },
+ 'be' => {
+ 'Bengali' => 'Bengali',
+ },
+ 'bi' => {
+ 'Bidi(?:[-_]|\s+)?Control' => 'BidiCont',
+ },
+ 'bo' => {
+ 'Bopomofo' => 'Bopomofo',
+ },
+ 'ca' => {
+ 'Canadian(?:[-_]|\s+)?Aboriginal' => 'Canadian',
+ },
+ 'ch' => {
+ 'Cherokee' => 'Cherokee',
+ },
+ 'cl' => {
'Close(?:[-_]|\s+)?Punctuation' => 'Pe',
-},
-'co' => {
+ },
+ 'co' => {
'Connector(?:[-_]|\s+)?Punctuation' => 'Pc',
'Control' => 'Cc',
-},
-'cu' => {
+ 'Common' => 'Common',
+ },
+ 'cu' => {
'Currency(?:[-_]|\s+)?Symbol' => 'Sc',
-},
-'da' => {
+ },
+ 'cy' => {
+ 'Cyrillic' => 'Cyrillic',
+ },
+ 'da' => {
'Dash(?:[-_]|\s+)?Punctuation' => 'Pd',
-},
-'de' => {
+ 'Dash' => 'Dash',
+ },
+ 'de' => {
+ 'Devanagari' => 'Devanaga',
'Decimal(?:[-_]|\s+)?Number' => 'Nd',
-},
-'en' => {
+ 'Deseret' => 'Deseret',
+ },
+ 'di' => {
+ 'Diacritic' => 'Diacriti',
+ },
+ 'en' => {
'Enclosing(?:[-_]|\s+)?Mark' => 'Me',
-},
-'fi' => {
+ },
+ 'et' => {
+ 'Ethiopic' => 'Ethiopic',
+ },
+ 'ex' => {
+ 'Extender' => 'Extender',
+ },
+ 'fi' => {
'Final(?:[-_]|\s+)?Punctuation' => 'Pf',
-},
-'fo' => {
+ },
+ 'fo' => {
'Format' => 'Cf',
-},
-'in' => {
+ },
+ 'ge' => {
+ 'Georgian' => 'Georgian',
+ },
+ 'go' => {
+ 'Gothic' => 'Gothic',
+ },
+ 'gr' => {
+ 'Greek' => 'Greek',
+ },
+ 'gu' => {
+ 'Gurmukhi' => 'Gurmukhi',
+ 'Gujarati' => 'Gujarati',
+ },
+ 'ha' => {
+ 'Han' => 'Han',
+ 'Hangul' => 'Hangul',
+ },
+ 'he' => {
+ 'Hebrew' => 'Hebrew',
+ 'Hex(?:[-_]|\s+)?Digit' => 'HexDigit',
+ },
+ 'hi' => {
+ 'Hiragana' => 'Hiragana',
+ },
+ 'hy' => {
+ 'Hyphen' => 'Hyphen',
+ },
+ 'id' => {
+ 'Ideographic' => 'Ideograp',
+ 'ID(?:[-_]|\s+)?Continue' => 'IDContin',
+ 'ID(?:[-_]|\s+)?Start' => 'IDStart',
+ },
+ 'in' => {
+ 'Inherited' => 'Inherite',
'Initial(?:[-_]|\s+)?Punctuation' => 'Pi',
-},
-'le' => {
- 'Letter' => 'L',
+ },
+ 'jo' => {
+ 'Join(?:[-_]|\s+)?Control' => 'JoinCont',
+ },
+ 'ka' => {
+ 'Katakana' => 'Katakana',
+ 'Kannada' => 'Kannada',
+ },
+ 'kh' => {
+ 'Khmer' => 'Khmer',
+ },
+ 'la' => {
+ 'Latin' => 'Latin',
+ 'Lao' => 'Lao',
+ },
+ 'le' => {
'Letter(?:[-_]|\s+)?Number' => 'Nl',
-},
-'li' => {
+ 'Letter' => '2',
+ },
+ 'li' => {
'Line(?:[-_]|\s+)?Separator' => 'Zl',
-},
-'lo' => {
+ },
+ 'lo' => {
'Lowercase(?:[-_]|\s+)?Letter' => 'Ll',
-},
-'ma' => {
- 'Mark' => 'M',
+ 'Lowercase' => 'Lowercas',
+ },
+ 'ma' => {
'Math(?:[-_]|\s+)?Symbol' => 'Sm',
-},
-'mo' => {
+ 'Mark' => 'M',
+ 'Math' => 'Math',
+ 'Malayalam' => 'Malayala',
+ },
+ 'mo' => {
'Modifier(?:[-_]|\s+)?Letter' => 'Lm',
'Modifier(?:[-_]|\s+)?Symbol' => 'Sk',
-},
-'no' => {
+ 'Mongolian' => 'Mongolia',
+ },
+ 'my' => {
+ 'Myanmar' => 'Myanmar',
+ },
+ 'no' => {
'Non(?:[-_]|\s+)?Spacing(?:[-_]|\s+)?Mark' => 'Mn',
-},
-'nu' => {
+ 'Noncharacter(?:[-_]|\s+)?Code(?:[-_]|\s+)?Point' => 'Nonchara',
+ },
+ 'nu' => {
'Number' => 'N',
-},
-'op' => {
+ },
+ 'og' => {
+ 'Ogham' => 'Ogham',
+ },
+ 'ol' => {
+ 'Old(?:[-_]|\s+)?Italic' => 'OldItali',
+ },
+ 'op' => {
'Open(?:[-_]|\s+)?Punctuation' => 'Ps',
-},
-'ot' => {
- 'Other' => 'C',
- 'Other(?:[-_]|\s+)?Letter' => 'Lo',
- 'Other(?:[-_]|\s+)?Number' => 'No',
+ },
+ 'or' => {
+ 'Oriya' => 'Oriya',
+ },
+ 'ot' => {
+ 'Other(?:[-_]|\s+)?Math' => 'OtherMat',
'Other(?:[-_]|\s+)?Punctuation' => 'Po',
+ 'Other(?:[-_]|\s+)?Lowercase' => 'OtherLow',
+ 'Other(?:[-_]|\s+)?Uppercase' => 'OtherUpp',
+ 'Other(?:[-_]|\s+)?Letter' => 'Lo',
+ 'Other(?:[-_]|\s+)?Alphabetic' => 'OtherAlp',
'Other(?:[-_]|\s+)?Symbol' => 'So',
-},
-'pa' => {
+ 'Other(?:[-_]|\s+)?Number' => 'No',
+ 'Other' => 'C',
+ },
+ 'pa' => {
'Paragraph(?:[-_]|\s+)?Separator' => 'Zp',
-},
-'pr' => {
+ },
+ 'pr' => {
'Private(?:[-_]|\s+)?Use' => 'Co',
-},
-'pu' => {
+ },
+ 'pu' => {
'Punctuation' => 'P',
-},
-'se' => {
+ },
+ 'qu' => {
+ 'Quotation(?:[-_]|\s+)?Mark' => 'Quotatio',
+ },
+ 'ru' => {
+ 'Runic' => 'Runic',
+ },
+ 'se' => {
'Separator' => 'Z',
-},
-'sp' => {
+ },
+ 'si' => {
+ 'Sinhala' => 'Sinhala',
+ },
+ 'sp' => {
'Space(?:[-_]|\s+)?Separator' => 'Zs',
'Spacing(?:[-_]|\s+)?Mark' => 'Mc',
-},
-'su' => {
+ },
+ 'su' => {
'Surrogate' => 'Cs',
-},
-'sy' => {
+ },
+ 'sy' => {
+ 'Syriac' => 'Syriac',
'Symbol' => 'S',
-},
-'ti' => {
+ },
+ 'ta' => {
+ 'Tamil' => 'Tamil',
+ },
+ 'te' => {
+ 'Terminal(?:[-_]|\s+)?Punctuation' => 'Terminal',
+ 'Telugu' => 'Telugu',
+ },
+ 'th' => {
+ 'Thaana' => 'Thaana',
+ 'Thai' => 'Thai',
+ },
+ 'ti' => {
'Titlecase(?:[-_]|\s+)?Letter' => 'Lt',
-},
-'un' => {
+ 'Tibetan' => 'Tibetan',
+ },
+ 'un' => {
'Unassigned' => 'Cn',
-},
-'up' => {
+ },
+ 'up' => {
'Uppercase(?:[-_]|\s+)?Letter' => 'Lu',
-},
+ 'Uppercase' => 'Uppercas',
+ },
+ 'wh' => {
+ 'White(?:[-_]|\s+)?space' => 'WhiteSpa',
+ },
+ 'yi' => {
+ 'Yi' => 'Yi',
+ },
);
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0041 005A
0061 007A
02D0 02D1
02E0 02E4
02EE
-0345
037A
0386
0388 038A
0531 0556
0559
0561 0587
-05B0 05B9
-05BB 05BD
-05BF
-05C1 05C2
-05C4
05D0 05EA
05F0 05F2
0621 063A
0640 064A
-064B 0655
-0670
0671 06D3
06D5
-06D6 06DC
-06E1 06E4
06E5 06E6
-06E7 06E8
-06ED
06FA 06FC
0710
-0711
0712 072C
-0730 073F
0780 07A5
-07A6 07B0
-0901 0903
0905 0939
093D
-093E 094C
0950
0958 0961
-0962 0963
-0981 0983
0985 098C
098F 0990
0993 09A8
09AA 09B0
09B2
09B6 09B9
-09BE 09C4
-09C7 09C8
-09CB 09CC
-09D7
09DC 09DD
09DF 09E1
-09E2 09E3
09F0 09F1
-0A02
0A05 0A0A
0A0F 0A10
0A13 0A28
0A32 0A33
0A35 0A36
0A38 0A39
-0A3E 0A42
-0A47 0A48
-0A4B 0A4C
0A59 0A5C
0A5E
-0A70 0A71
0A72 0A74
-0A81 0A83
0A85 0A8B
0A8D
0A8F 0A91
0AB2 0AB3
0AB5 0AB9
0ABD
-0ABE 0AC5
-0AC7 0AC9
-0ACB 0ACC
0AD0
0AE0
-0B01 0B03
0B05 0B0C
0B0F 0B10
0B13 0B28
0B32 0B33
0B36 0B39
0B3D
-0B3E 0B43
-0B47 0B48
-0B4B 0B4C
-0B56 0B57
0B5C 0B5D
0B5F 0B61
-0B82 0B83
0B85 0B8A
0B8E 0B90
0B92 0B95
0BA8 0BAA
0BAE 0BB5
0BB7 0BB9
-0BBE 0BC2
-0BC6 0BC8
-0BCA 0BCC
-0BD7
-0C01 0C03
0C05 0C0C
0C0E 0C10
0C12 0C28
0C2A 0C33
0C35 0C39
-0C3E 0C44
-0C46 0C48
-0C4A 0C4C
-0C55 0C56
0C60 0C61
-0C82 0C83
0C85 0C8C
0C8E 0C90
0C92 0CA8
0CAA 0CB3
0CB5 0CB9
-0CBE 0CC4
-0CC6 0CC8
-0CCA 0CCC
-0CD5 0CD6
0CDE
0CE0 0CE1
-0D02 0D03
0D05 0D0C
0D0E 0D10
0D12 0D28
0D2A 0D39
-0D3E 0D43
-0D46 0D48
-0D4A 0D4C
-0D57
0D60 0D61
-0D82 0D83
0D85 0D96
0D9A 0DB1
0DB3 0DBB
0DBD
0DC0 0DC6
-0DCF 0DD4
-0DD6
-0DD8 0DDF
-0DF2 0DF3
0E01 0E30
-0E31
0E32 0E33
-0E34 0E3A
0E40 0E46
-0E4D
0E81 0E82
0E84
0E87 0E88
0EA7
0EAA 0EAB
0EAD 0EB0
-0EB1
0EB2 0EB3
-0EB4 0EB9
-0EBB 0EBC
0EBD
0EC0 0EC4
0EC6
-0ECD
0EDC 0EDD
0F00
0F40 0F47
0F49 0F6A
-0F71 0F81
0F88 0F8B
-0F90 0F97
-0F99 0FBC
1000 1021
1023 1027
1029 102A
-102C 1032
-1036
-1038
1050 1055
-1056 1059
10A0 10C5
10D0 10F6
1100 1159
166F 1676
1681 169A
16A0 16EA
-16EE 16F0
1780 17B3
-17B4 17C8
1820 1877
1880 18A8
-18A9
1E00 1E9B
1EA0 1EF9
1F00 1F15
212A 212D
212F 2131
2133 2139
-2160 2183
3005 3006
3031 3035
3041 3094
FB00 FB06
FB13 FB17
FB1D
-FB1E
FB1F FB28
FB2A FB36
FB38 FB3C
FFDA FFDC
10300 1031E
10330 10349
-1034A
10400 10425
10428 1044D
1D400 1D454
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0000 007F
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0030 0039 ASCII_Hex_Digit
0041 0046 ASCII_Hex_Digit
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0030 0039
0041 005A
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0041 005A
0061 007A
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0041 005A
0061 007A
05D0 05EA
05F0 05F2
0621 063A
-0640 064A
-064B 0655
-0670
-0671 06D3
-06D5
-06D6 06DC
-06E1 06E4
-06E5 06E6
-06E7 06E8
+0640 0655
+0670 06D3
+06D5 06DC
+06E1 06E8
06ED
06FA 06FC
-0710
-0711
-0712 072C
+0710 072C
0730 073F
-0780 07A5
-07A6 07B0
+0780 07B0
0901 0903
0905 0939
-093D
-093E 094C
+093D 094C
0950
-0958 0961
-0962 0963
+0958 0963
0981 0983
0985 098C
098F 0990
09CB 09CC
09D7
09DC 09DD
-09DF 09E1
-09E2 09E3
+09DF 09E3
09F0 09F1
0A02
0A05 0A0A
0A4B 0A4C
0A59 0A5C
0A5E
-0A70 0A71
-0A72 0A74
+0A70 0A74
0A81 0A83
0A85 0A8B
0A8D
0AAA 0AB0
0AB2 0AB3
0AB5 0AB9
-0ABD
-0ABE 0AC5
+0ABD 0AC5
0AC7 0AC9
0ACB 0ACC
0AD0
0B2A 0B30
0B32 0B33
0B36 0B39
-0B3D
-0B3E 0B43
+0B3D 0B43
0B47 0B48
0B4B 0B4C
0B56 0B57
0DD6
0DD8 0DDF
0DF2 0DF3
-0E01 0E30
-0E31
-0E32 0E33
-0E34 0E3A
+0E01 0E3A
0E40 0E46
0E4D
0E81 0E82
0EA5
0EA7
0EAA 0EAB
-0EAD 0EB0
-0EB1
-0EB2 0EB3
-0EB4 0EB9
-0EBB 0EBC
-0EBD
+0EAD 0EB9
+0EBB 0EBD
0EC0 0EC4
0EC6
0ECD
102C 1032
1036
1038
-1050 1055
-1056 1059
+1050 1059
10A0 10C5
10D0 10F6
1100 1159
1681 169A
16A0 16EA
16EE 16F0
-1780 17B3
-17B4 17C8
+1780 17C8
1820 1877
-1880 18A8
-18A9
+1880 18A9
1E00 1E9B
1EA0 1EF9
1F00 1F15
F900 FA2D
FB00 FB06
FB13 FB17
-FB1D
-FB1E
-FB1F FB28
+FB1D FB28
FB2A FB36
FB38 FB3C
FB3E
FFD2 FFD7
FFDA FFDC
10300 1031E
-10330 10349
-1034A
+10330 1034A
10400 10425
10428 1044D
1D400 1D454
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
-3400 4DB5 CJK Ideograph Extension A
+0000 10FFFF
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0621 063A ARABIC
0641 064A ARABIC
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0531 0556 ARMENIAN
0559 ARMENIAN
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0000 021F
0222 0233
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0981 BENGALI
0985 098C BENGALI
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
061B
061F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0660 0669
066B 066C
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
000A
000D
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0000 0008
000E 001B
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
002C
002E
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
200E 200F Bidi_Control
202A 202E Bidi_Control
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0030 0039
00B2 00B3
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
002F
FF0F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0023 0025
002B
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0041 005A
0061 007A
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
202A
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
202D
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0300 034E
0360 0362
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0021 0022
0026 002A
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
202C
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
05BE
05C0
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
202B
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
202E
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0009
000B
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
000C
0020
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0009
0020
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
3105 312C BOPOMOFO
31A0 31B7 BOPOMOFO
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0000 001F
007F 009F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
1401 166C CANADIAN-ABORIGINAL
166F 1676 CANADIAN-ABORIGINAL
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
00C0 00C5
00C7 00CF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0000 001F
007F 009F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
070F
180B 180E
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
13A0 13F4 CHEROKEE
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0220 0221
0234 024F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0000 001F
007F 009F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
E000 F8FF
F0000 FFFFD
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0000 0040
005B 0060
1D18C 1D1A9
1D1AE 1FFFF
2A6D7 2F7FF
+2FA1E 10FFFF
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
00A0
00A8
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
D800 DFFF
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0400 0481 CYRILLIC
0483 0486 CYRILLIC
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
2460 2473
24B6 24EA
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
00A8
00AF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
FB51
FB53
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
2102
210A 2113
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
00BC 00BE
2153 215F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
FB54
FB58
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
FB50
FB52
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
FB55
FB59
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
FF61 FFBE
FFC2 FFC7
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
00A0
0F0C
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
FE50 FE52
FE54 FE66
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
3300 3357
3371 3376
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
2080 208E
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
00AA
00B2 00B3
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
FE30 FE44
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
3000
FF01 FF5E
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
002D Dash
00AD Dash
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
10400 10425 DESERET
10428 1044D DESERET
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0901 0903 DEVANAGARI
0905 0939 DEVANAGARI
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
005E Diacritic
0060 Diacritic
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0030 0039
0660 0669
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
1200 1206 ETHIOPIC
1208 1246 ETHIOPIC
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
00B7 Extender
02D0 02D1 Extender
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
10A0 10C5 GEORGIAN
10D0 10F6 GEORGIAN
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
10330 1034A GOTHIC
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0021 007E
00A1 021F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
00B5 GREEK
037A GREEK
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0A81 0A83 GUJARATI
0A85 0A8B GUJARATI
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0A02 GURMUKHI
0A05 0A0A GURMUKHI
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
2E80 2E99 HAN
2E9B 2EF3 HAN
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
1100 1159 HANGUL
115F 11A2 HANGUL
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
05D0 05EA HEBREW
05F0 05F2 HEBREW
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0030 0039 Hex_Digit
0041 0046 Hex_Digit
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
3041 3094 HIRAGANA
309D 309E HIRAGANA
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
002D Hyphen
00AD Hyphen
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
+0030 0039
0041 005A
+005F
0061 007A
00AA
00B5
02D0 02D1
02E0 02E4
02EE
-0345
+0300 034E
+0360 0362
037A
0386
0388 038A
03D0 03D7
03DA 03F5
0400 0481
+0483 0486
048C 04C4
04C7 04C8
04CB 04CC
0531 0556
0559
0561 0587
-05B0 05B9
+0591 05A1
+05A3 05B9
05BB 05BD
05BF
05C1 05C2
05D0 05EA
05F0 05F2
0621 063A
-0640 064A
-064B 0655
-0670
-0671 06D3
-06D5
-06D6 06DC
-06E1 06E4
-06E5 06E6
-06E7 06E8
-06ED
-06FA 06FC
-0710
-0711
-0712 072C
-0730 073F
-0780 07A5
-07A6 07B0
+0640 0655
+0660 0669
+0670 06D3
+06D5 06DC
+06DF 06E8
+06EA 06ED
+06F0 06FC
+0710 072C
+0730 074A
+0780 07B0
0901 0903
0905 0939
-093D
-093E 094C
-0950
-0958 0961
-0962 0963
+093C 094D
+0950 0954
+0958 0963
+0966 096F
0981 0983
0985 098C
098F 0990
09AA 09B0
09B2
09B6 09B9
+09BC
09BE 09C4
09C7 09C8
-09CB 09CC
+09CB 09CD
09D7
09DC 09DD
-09DF 09E1
-09E2 09E3
-09F0 09F1
+09DF 09E3
+09E6 09F1
0A02
0A05 0A0A
0A0F 0A10
0A32 0A33
0A35 0A36
0A38 0A39
+0A3C
0A3E 0A42
0A47 0A48
-0A4B 0A4C
+0A4B 0A4D
0A59 0A5C
0A5E
-0A70 0A71
-0A72 0A74
+0A66 0A74
0A81 0A83
0A85 0A8B
0A8D
0AAA 0AB0
0AB2 0AB3
0AB5 0AB9
-0ABD
-0ABE 0AC5
+0ABC 0AC5
0AC7 0AC9
-0ACB 0ACC
+0ACB 0ACD
0AD0
0AE0
+0AE6 0AEF
0B01 0B03
0B05 0B0C
0B0F 0B10
0B2A 0B30
0B32 0B33
0B36 0B39
-0B3D
-0B3E 0B43
+0B3C 0B43
0B47 0B48
-0B4B 0B4C
+0B4B 0B4D
0B56 0B57
0B5C 0B5D
0B5F 0B61
+0B66 0B6F
0B82 0B83
0B85 0B8A
0B8E 0B90
0BB7 0BB9
0BBE 0BC2
0BC6 0BC8
-0BCA 0BCC
+0BCA 0BCD
0BD7
+0BE7 0BEF
0C01 0C03
0C05 0C0C
0C0E 0C10
0C35 0C39
0C3E 0C44
0C46 0C48
-0C4A 0C4C
+0C4A 0C4D
0C55 0C56
0C60 0C61
+0C66 0C6F
0C82 0C83
0C85 0C8C
0C8E 0C90
0CB5 0CB9
0CBE 0CC4
0CC6 0CC8
-0CCA 0CCC
+0CCA 0CCD
0CD5 0CD6
0CDE
0CE0 0CE1
+0CE6 0CEF
0D02 0D03
0D05 0D0C
0D0E 0D10
0D2A 0D39
0D3E 0D43
0D46 0D48
-0D4A 0D4C
+0D4A 0D4D
0D57
0D60 0D61
+0D66 0D6F
0D82 0D83
0D85 0D96
0D9A 0DB1
0DB3 0DBB
0DBD
0DC0 0DC6
+0DCA
0DCF 0DD4
0DD6
0DD8 0DDF
0DF2 0DF3
-0E01 0E30
-0E31
-0E32 0E33
-0E34 0E3A
-0E40 0E46
-0E4D
+0E01 0E3A
+0E40 0E4E
+0E50 0E59
0E81 0E82
0E84
0E87 0E88
0EA5
0EA7
0EAA 0EAB
-0EAD 0EB0
-0EB1
-0EB2 0EB3
-0EB4 0EB9
-0EBB 0EBC
-0EBD
+0EAD 0EB9
+0EBB 0EBD
0EC0 0EC4
0EC6
-0ECD
+0EC8 0ECD
+0ED0 0ED9
0EDC 0EDD
0F00
-0F40 0F47
+0F18 0F19
+0F20 0F29
+0F35
+0F37
+0F39
+0F3E 0F47
0F49 0F6A
-0F71 0F81
-0F88 0F8B
+0F71 0F84
+0F86 0F8B
0F90 0F97
0F99 0FBC
+0FC6
1000 1021
1023 1027
1029 102A
102C 1032
-1036
-1038
-1050 1055
-1056 1059
+1036 1039
+1040 1049
+1050 1059
10A0 10C5
10D0 10F6
1100 1159
1318 131E
1320 1346
1348 135A
+1369 1371
13A0 13F4
1401 166C
166F 1676
1681 169A
16A0 16EA
16EE 16F0
-1780 17B3
-17B4 17C8
+1780 17D3
+17E0 17E9
+1810 1819
1820 1877
-1880 18A8
-18A9
+1880 18A9
1E00 1E9B
1EA0 1EF9
1F00 1F15
1FE0 1FEC
1FF2 1FF4
1FF6 1FFC
+203F 2040
207F
+20D0 20DC
+20E1
2102
2107
210A 2113
212F 2131
2133 2139
2160 2183
-3005 3006
+3005 3007
+3021 302F
3031 3035
+3038 303A
3041 3094
+3099 309A
309D 309E
-30A1 30FA
-30FC 30FE
+30A1 30FE
3105 312C
3131 318E
31A0 31B7
F900 FA2D
FB00 FB06
FB13 FB17
-FB1D
-FB1E
-FB1F FB28
+FB1D FB28
FB2A FB36
FB38 FB3C
FB3E
FD50 FD8F
FD92 FDC7
FDF0 FDFB
+FE20 FE23
+FE33 FE34
+FE4D FE4F
FE70 FE72
FE74
FE76 FEFC
+FF10 FF19
FF21 FF3A
+FF3F
FF41 FF5A
-FF66 FFBE
+FF65 FFBE
FFC2 FFC7
FFCA FFCF
FFD2 FFD7
FFDA FFDC
10300 1031E
-10330 10349
-1034A
+10330 1034A
10400 10425
10428 1044D
+1D165 1D169
+1D16D 1D172
+1D17B 1D182
+1D185 1D18B
+1D1AA 1D1AD
1D400 1D454
1D456 1D49C
1D49E 1D49F
1D78A 1D7A8
1D7AA 1D7C2
1D7C4 1D7C9
+1D7CE 1D7FF
20000 2A6D6
2F800 2FA1D
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0041 005A
0061 007A
02D0 02D1
02E0 02E4
02EE
-0345
037A
0386
0388 038A
0531 0556
0559
0561 0587
-05B0 05B9
-05BB 05BD
-05BF
-05C1 05C2
-05C4
05D0 05EA
05F0 05F2
0621 063A
0640 064A
-064B 0655
-0670
0671 06D3
06D5
-06D6 06DC
-06E1 06E4
06E5 06E6
-06E7 06E8
-06ED
06FA 06FC
0710
-0711
0712 072C
-0730 073F
0780 07A5
-07A6 07B0
-0901 0903
0905 0939
093D
-093E 094C
0950
0958 0961
-0962 0963
-0981 0983
0985 098C
098F 0990
0993 09A8
09AA 09B0
09B2
09B6 09B9
-09BE 09C4
-09C7 09C8
-09CB 09CC
-09D7
09DC 09DD
09DF 09E1
-09E2 09E3
09F0 09F1
-0A02
0A05 0A0A
0A0F 0A10
0A13 0A28
0A32 0A33
0A35 0A36
0A38 0A39
-0A3E 0A42
-0A47 0A48
-0A4B 0A4C
0A59 0A5C
0A5E
-0A70 0A71
0A72 0A74
-0A81 0A83
0A85 0A8B
0A8D
0A8F 0A91
0AB2 0AB3
0AB5 0AB9
0ABD
-0ABE 0AC5
-0AC7 0AC9
-0ACB 0ACC
0AD0
0AE0
-0B01 0B03
0B05 0B0C
0B0F 0B10
0B13 0B28
0B32 0B33
0B36 0B39
0B3D
-0B3E 0B43
-0B47 0B48
-0B4B 0B4C
-0B56 0B57
0B5C 0B5D
0B5F 0B61
-0B82 0B83
0B85 0B8A
0B8E 0B90
0B92 0B95
0BA8 0BAA
0BAE 0BB5
0BB7 0BB9
-0BBE 0BC2
-0BC6 0BC8
-0BCA 0BCC
-0BD7
-0C01 0C03
0C05 0C0C
0C0E 0C10
0C12 0C28
0C2A 0C33
0C35 0C39
-0C3E 0C44
-0C46 0C48
-0C4A 0C4C
-0C55 0C56
0C60 0C61
-0C82 0C83
0C85 0C8C
0C8E 0C90
0C92 0CA8
0CAA 0CB3
0CB5 0CB9
-0CBE 0CC4
-0CC6 0CC8
-0CCA 0CCC
-0CD5 0CD6
0CDE
0CE0 0CE1
-0D02 0D03
0D05 0D0C
0D0E 0D10
0D12 0D28
0D2A 0D39
-0D3E 0D43
-0D46 0D48
-0D4A 0D4C
-0D57
0D60 0D61
-0D82 0D83
0D85 0D96
0D9A 0DB1
0DB3 0DBB
0DBD
0DC0 0DC6
-0DCF 0DD4
-0DD6
-0DD8 0DDF
-0DF2 0DF3
0E01 0E30
-0E31
0E32 0E33
-0E34 0E3A
0E40 0E46
-0E4D
0E81 0E82
0E84
0E87 0E88
0EA7
0EAA 0EAB
0EAD 0EB0
-0EB1
0EB2 0EB3
-0EB4 0EB9
-0EBB 0EBC
0EBD
0EC0 0EC4
0EC6
-0ECD
0EDC 0EDD
0F00
0F40 0F47
0F49 0F6A
-0F71 0F81
0F88 0F8B
-0F90 0F97
-0F99 0FBC
1000 1021
1023 1027
1029 102A
-102C 1032
-1036
-1038
1050 1055
-1056 1059
10A0 10C5
10D0 10F6
1100 1159
16A0 16EA
16EE 16F0
1780 17B3
-17B4 17C8
1820 1877
1880 18A8
-18A9
1E00 1E9B
1EA0 1EF9
1F00 1F15
212F 2131
2133 2139
2160 2183
-3005 3006
+3005 3007
+3021 3029
3031 3035
+3038 303A
3041 3094
309D 309E
30A1 30FA
FB00 FB06
FB13 FB17
FB1D
-FB1E
FB1F FB28
FB2A FB36
FB38 FB3C
FFD2 FFD7
FFDA FFDC
10300 1031E
-10330 10349
-1034A
+10330 1034A
10400 10425
10428 1044D
1D400 1D454
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
3006 3007 Ideographic
3021 3029 Ideographic
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0300 034E INHERITED
0360 0362 INHERITED
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
200C 200D Join_Control
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0C82 0C83 KANNADA
0C85 0C8C KANNADA
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
30A1 30FA KATAKANA
30FD 30FE KATAKANA
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
1780 17D3 KHMER
17E0 17E9 KHMER
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0041 005A
0061 007A
00BA
00C0 00D6
00D8 00F6
-00F8 021F
+00F8 01BA
+01BC 01BF
+01C4 021F
0222 0233
0250 02AD
-02B0 02B8
-02BB 02C1
-02D0 02D1
-02E0 02E4
-02EE
-037A
0386
0388 038A
038C
04D0 04F5
04F8 04F9
0531 0556
-0559
0561 0587
-05D0 05EA
-05F0 05F2
-0621 063A
-0640 064A
-0671 06D3
-06D5
-06E5 06E6
-06FA 06FC
-0710
-0712 072C
-0780 07A5
-0905 0939
-093D
-0950
-0958 0961
-0985 098C
-098F 0990
-0993 09A8
-09AA 09B0
-09B2
-09B6 09B9
-09DC 09DD
-09DF 09E1
-09F0 09F1
-0A05 0A0A
-0A0F 0A10
-0A13 0A28
-0A2A 0A30
-0A32 0A33
-0A35 0A36
-0A38 0A39
-0A59 0A5C
-0A5E
-0A72 0A74
-0A85 0A8B
-0A8D
-0A8F 0A91
-0A93 0AA8
-0AAA 0AB0
-0AB2 0AB3
-0AB5 0AB9
-0ABD
-0AD0
-0AE0
-0B05 0B0C
-0B0F 0B10
-0B13 0B28
-0B2A 0B30
-0B32 0B33
-0B36 0B39
-0B3D
-0B5C 0B5D
-0B5F 0B61
-0B85 0B8A
-0B8E 0B90
-0B92 0B95
-0B99 0B9A
-0B9C
-0B9E 0B9F
-0BA3 0BA4
-0BA8 0BAA
-0BAE 0BB5
-0BB7 0BB9
-0C05 0C0C
-0C0E 0C10
-0C12 0C28
-0C2A 0C33
-0C35 0C39
-0C60 0C61
-0C85 0C8C
-0C8E 0C90
-0C92 0CA8
-0CAA 0CB3
-0CB5 0CB9
-0CDE
-0CE0 0CE1
-0D05 0D0C
-0D0E 0D10
-0D12 0D28
-0D2A 0D39
-0D60 0D61
-0D85 0D96
-0D9A 0DB1
-0DB3 0DBB
-0DBD
-0DC0 0DC6
-0E01 0E30
-0E32 0E33
-0E40 0E46
-0E81 0E82
-0E84
-0E87 0E88
-0E8A
-0E8D
-0E94 0E97
-0E99 0E9F
-0EA1 0EA3
-0EA5
-0EA7
-0EAA 0EAB
-0EAD 0EB0
-0EB2 0EB3
-0EBD
-0EC0 0EC4
-0EC6
-0EDC 0EDD
-0F00
-0F40 0F47
-0F49 0F6A
-0F88 0F8B
-1000 1021
-1023 1027
-1029 102A
-1050 1055
10A0 10C5
-10D0 10F6
-1100 1159
-115F 11A2
-11A8 11F9
-1200 1206
-1208 1246
-1248
-124A 124D
-1250 1256
-1258
-125A 125D
-1260 1286
-1288
-128A 128D
-1290 12AE
-12B0
-12B2 12B5
-12B8 12BE
-12C0
-12C2 12C5
-12C8 12CE
-12D0 12D6
-12D8 12EE
-12F0 130E
-1310
-1312 1315
-1318 131E
-1320 1346
-1348 135A
-13A0 13F4
-1401 166C
-166F 1676
-1681 169A
-16A0 16EA
-1780 17B3
-1820 1877
-1880 18A8
1E00 1E9B
1EA0 1EF9
1F00 1F15
2128
212A 212D
212F 2131
-2133 2139
-3005 3006
-3031 3035
-3041 3094
-309D 309E
-30A1 30FA
-30FC 30FE
-3105 312C
-3131 318E
-31A0 31B7
-3400 4DB5
-4E00 9FA5
-A000 A48C
-AC00 D7A3
-F900 FA2D
+2133 2134
+2139
FB00 FB06
FB13 FB17
-FB1D
-FB1F FB28
-FB2A FB36
-FB38 FB3C
-FB3E
-FB40 FB41
-FB43 FB44
-FB46 FBB1
-FBD3 FD3D
-FD50 FD8F
-FD92 FDC7
-FDF0 FDFB
-FE70 FE72
-FE74
-FE76 FEFC
FF21 FF3A
FF41 FF5A
-FF66 FFBE
-FFC2 FFC7
-FFCA FFCF
-FFD2 FFD7
-FFDA FFDC
-10300 1031E
-10330 10349
10400 10425
10428 1044D
1D400 1D454
1D78A 1D7A8
1D7AA 1D7C2
1D7C4 1D7C9
-20000 2A6D6
-2F800 2FA1D
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0E81 0E82 LAO
0E84 LAO
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0041 005A LATIN
0061 007A LATIN
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
00A1
00A7 00A8
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0023
0026
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
2014
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0009
007C
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
00B4
02C8
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
000C
2028 2029
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
FFFC
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0029
005D
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0000 0008
000B
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
000D
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0021
003F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
00A0
0F0C
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
002D
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
1100 1159
115F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
2024 2026
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
002C
002E
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
000A
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0E5A 0E5B
17D4
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0030 0039
0660 0669
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0028
005B
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0025
00A2
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0024
002B
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0022
0027
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0E01 0E30
0E32 0E33
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
D800 DFFF
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0020
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
002F
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
E000 F8FF
F0000 FFFFD
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
200B
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0061 007A
00AA
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
02B0 02B8
02BB 02C1
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
01BB
01C0 01C3
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0061 007A
00AA
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0061 007A
00AA
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
01C5
01C8
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0041 005A
00C0 00D6
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0300 034E
0360 0362
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0D02 0D03 MALAYALAM
0D05 0D0C MALAYALAM
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
-0028 002A
-002B
+0028 002B
002D
002F
003C 003E
005B 005E
-007B
-007C
-007D
-007E
+007B 007E
00AC
00B1
00D7
2016
2032 2034
2044
-207A 207C
-207D 207E
-208A 208C
-208D 208E
+207A 207E
+208A 208E
20D0 20DC
20E1
2102
FB29
FE35 FE38
FE59 FE5C
-FE61
-FE62
-FE63
-FE64 FE66
+FE61 FE66
FE68
-FF08 FF0A
-FF0B
+FF08 FF0B
FF0D
FF0F
FF1C FF1E
FF3B FF3E
-FF5B
-FF5C
-FF5D
-FF5E
+FF5B FF5E
FFE2
FFE9 FFEC
1D400 1D454
1D546
1D54A 1D550
1D552 1D6A3
-1D6A8 1D6C0
-1D6C1
-1D6C2 1D6DA
-1D6DB
-1D6DC 1D6FA
-1D6FB
-1D6FC 1D714
-1D715
-1D716 1D734
-1D735
-1D736 1D74E
-1D74F
-1D750 1D76E
-1D76F
-1D770 1D788
-1D789
-1D78A 1D7A8
-1D7A9
-1D7AA 1D7C2
-1D7C3
-1D7C4 1D7C9
+1D6A8 1D7C9
1D7CE 1D7FF
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0903
093E 0940
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0488 0489
06DD 06DE
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0028 0029
003C
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0300 034E
0360 0362
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
1810 1819 MONGOLIAN
1820 1877 MONGOLIAN
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
1000 1021 MYANMAR
1023 1027 MYANMAR
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0030 0039
00B2 00B3
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0030 0039
0660 0669
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
16EE 16F0
2160 2183
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
00B2 00B3
00B9
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
FDD0 FDEF Noncharacter_Code_Point
FFFE FFFF Noncharacter_Code_Point
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
1681 169A OGHAM
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
10300 1031E OLD-ITALIC
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0B01 0B03 ORIYA
0B05 0B0C ORIYA
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0345 Other_Alphabetic
05B0 05B9 Other_Alphabetic
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
02B0 02B8 Other_Lowercase
02C0 02C1 Other_Lowercase
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0028 002A Other_Math
002D Other_Math
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
2160 216F Other_Uppercase
24B6 24CF Other_Uppercase
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0021 0023
0025 002A
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
005F
203F 2040
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
002D
00AD
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0029
005D
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
00BB
2019
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
00AB
2018
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0021 0023
0025 0027
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0020 007E
00A0 021F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0028
005B
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0021 0023
0025 002A
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0022 Quotation_Mark
0027 Quotation_Mark
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
16A0 16EA RUNIC
16EE 16F0 RUNIC
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0024
002B
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0024
00A2 00A5
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0D82 0D83 SINHALA
0D85 0D96 SINHALA
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
005E
0060
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
002B
003C 003E
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
00A6 00A7
00A9
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0009 000D
0020
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0009 000A
000C 000D
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0710 072C SYRIAC
0730 074A SYRIAC
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0B82 0B83 TAMIL
0B85 0B8A TAMIL
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0C01 0C03 TELUGU
0C05 0C0C TELUGU
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0021 Terminal_Punctuation
002C Terminal_Punctuation
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0780 07B0 THAANA
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0E01 0E3A THAI
0E40 0E4E THAI
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0F00 TIBETAN
0F18 0F19 TIBETAN
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
01C5
01C8
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0041 005A
00C0 00D6
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0041 005A
00C0 00D6
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0009 000D White_space
0020 White_space
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0030 0039
0041 005A
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0030 0039
0041 0046
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
A000 A48C YI
A490 A4A1 YI
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0020
00A0
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
2028
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
2029
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0020
00A0
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
1100 G
1101 GG
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0000 0008 CM
0009 BA
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
0000 001F <control>
0020 SPACE
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
+0030 0
0031 1
0032 2
0033 3
00BC 1/4
00BD 1/2
00BE 3/4
+0660 0
0661 1
0662 2
0663 3
0667 7
0668 8
0669 9
+06F0 0
06F1 1
06F2 2
06F3 3
06F7 7
06F8 8
06F9 9
+0966 0
0967 1
0968 2
0969 3
096D 7
096E 8
096F 9
+09E6 0
09E7 1
09E8 2
09E9 3
09F6 3
09F7 4
09F9 16
+0A66 0
0A67 1
0A68 2
0A69 3
0A6D 7
0A6E 8
0A6F 9
+0AE6 0
0AE7 1
0AE8 2
0AE9 3
0AED 7
0AEE 8
0AEF 9
+0B66 0
0B67 1
0B68 2
0B69 3
0BF0 10
0BF1 100
0BF2 1000
+0C66 0
0C67 1
0C68 2
0C69 3
0C6D 7
0C6E 8
0C6F 9
+0CE6 0
0CE7 1
0CE8 2
0CE9 3
0CED 7
0CEE 8
0CEF 9
+0D66 0
0D67 1
0D68 2
0D69 3
0D6D 7
0D6E 8
0D6F 9
+0E50 0
0E51 1
0E52 2
0E53 3
0E57 7
0E58 8
0E59 9
+0ED0 0
0ED1 1
0ED2 2
0ED3 3
0ED7 7
0ED8 8
0ED9 9
+0F20 0
0F21 1
0F22 2
0F23 3
0F31 15/2
0F32 17/2
0F33 -1/2
+1040 0
1041 1
1042 2
1043 3
16EE 17
16EF 18
16F0 19
+17E0 0
17E1 1
17E2 2
17E3 3
17E7 7
17E8 8
17E9 9
+1810 0
1811 1
1812 2
1813 3
1817 7
1818 8
1819 9
+2070 0
2074 4
2075 5
2076 6
2077 7
2078 8
2079 9
+2080 0
2081 1
2082 2
2083 3
2499 18
249A 19
249B 20
+24EA 0
2776 1
2777 2
2778 3
2791 8
2792 9
2793 10
+3007 0
3021 1
3022 2
3023 3
3287 8
3288 9
3289 10
+FF10 0
FF11 1
FF12 2
FF13 3
10321 5
10322 10
10323 50
+1D7CE 0
1D7CF 1
1D7D0 2
1D7D1 3
1D7D5 7
1D7D6 8
1D7D7 9
+1D7D8 0
1D7D9 1
1D7DA 2
1D7DB 3
1D7DF 7
1D7E0 8
1D7E1 9
+1D7E2 0
1D7E3 1
1D7E4 2
1D7E5 3
1D7E9 7
1D7EA 8
1D7EB 9
+1D7EC 0
1D7ED 1
1D7EE 2
1D7EF 3
1D7F3 7
1D7F4 8
1D7F5 9
+1D7F6 0
1D7F7 1
1D7F8 2
1D7F9 3
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
-65 0041 005A
-97 0061 007A
-170 00AA
-186 00BA
-192 00C0 00D6
-216 00D8 00F6
-248 00F8 01BA
-443 01BB
-444 01BC 01BF
-448 01C0 01C3
-452 01C4 021F
-546 0222 0233
-592 0250 02AD
-688 02B0 02B8
-736 02E0 02E4
-7680 1E00 1E9B
-7840 1EA0 1EF9
-8319 207F
-8490 212A 212B
-64256 FB00 FB06
-65313 FF21 FF3A
-65345 FF41 FF5A
-181 00B5
-890 037A
-902 0386
-904 0388 038A
-908 038C
-910 038E 03A1
-931 03A3 03CE
-976 03D0 03D7
-986 03DA 03F5
-7936 1F00 1F15
-7960 1F18 1F1D
-7968 1F20 1F45
-8008 1F48 1F4D
-8016 1F50 1F57
-8025 1F59
-8027 1F5B
-8029 1F5D
-8031 1F5F 1F7D
-8064 1F80 1FB4
-8118 1FB6 1FBC
-8126 1FBE
-8130 1FC2 1FC4
-8134 1FC6 1FCC
-8144 1FD0 1FD3
-8150 1FD6 1FDB
-8160 1FE0 1FEC
-8178 1FF2 1FF4
-8182 1FF6 1FFC
-8486 2126
-1024 0400 0481
-1155 0483 0486
-1164 048C 04C4
-1223 04C7 04C8
-1227 04CB 04CC
-1232 04D0 04F5
-1272 04F8 04F9
-1329 0531 0556
-1369 0559
-1377 0561 0587
-64275 FB13 FB17
-1488 05D0 05EA
-1520 05F0 05F2
-64285 FB1D
-64287 FB1F FB28
-64298 FB2A FB36
-64312 FB38 FB3C
-64318 FB3E
-64320 FB40 FB41
-64323 FB43 FB44
-64326 FB46 FB4F
-1569 0621 063A
-1601 0641 064A
-1649 0671 06D3
-1749 06D5
-1765 06E5 06E6
-1786 06FA 06FC
-64336 FB50 FBB1
-64467 FBD3 FD3D
-64848 FD50 FD8F
-64914 FD92 FDC7
-65008 FDF0 FDFB
-65136 FE70 FE72
-65140 FE74
-65142 FE76 FEFC
-1808 0710
-1809 0711
-1810 0712 072C
-1840 0730 074A
-1920 0780 07A5
-1958 07A6 07B0
-2305 0901 0902
-2307 0903
-2309 0905 0939
-2364 093C
-2365 093D
-2366 093E 0940
-2369 0941 0948
-2377 0949 094C
-2381 094D
-2384 0950
-2385 0951 0954
-2392 0958 0961
-2402 0962 0963
-2406 0966 096F
-2433 0981
-2437 0985 098C
-2447 098F 0990
-2451 0993 09A8
-2474 09AA 09B0
-2482 09B2
-2486 09B6 09B9
-2492 09BC
-2494 09BE 09C0
-2497 09C1 09C4
-2503 09C7 09C8
-2507 09CB 09CC
-2509 09CD
-2519 09D7
-2524 09DC 09DD
-2527 09DF 09E1
-2530 09E2 09E3
-2534 09E6 09EF
-2544 09F0 09F1
-2562 0A02
-2565 0A05 0A0A
-2575 0A0F 0A10
-2579 0A13 0A28
-2602 0A2A 0A30
-2610 0A32 0A33
-2613 0A35 0A36
-2616 0A38 0A39
-2620 0A3C
-2622 0A3E 0A40
-2625 0A41 0A42
-2631 0A47 0A48
-2635 0A4B 0A4D
-2649 0A59 0A5C
-2654 0A5E
-2662 0A66 0A6F
-2672 0A70 0A71
-2674 0A72 0A74
-2689 0A81 0A82
-2691 0A83
-2693 0A85 0A8B
-2701 0A8D
-2703 0A8F 0A91
-2707 0A93 0AA8
-2730 0AAA 0AB0
-2738 0AB2 0AB3
-2741 0AB5 0AB9
-2748 0ABC
-2749 0ABD
-2750 0ABE 0AC0
-2753 0AC1 0AC5
-2759 0AC7 0AC8
-2761 0AC9
-2763 0ACB 0ACC
-2765 0ACD
-2768 0AD0
-2784 0AE0
-2790 0AE6 0AEF
-2817 0B01
-2818 0B02 0B03
-2821 0B05 0B0C
-2831 0B0F 0B10
-2835 0B13 0B28
-2858 0B2A 0B30
-2866 0B32 0B33
-2870 0B36 0B39
-2876 0B3C
-2877 0B3D
-2878 0B3E
-2879 0B3F
-2880 0B40
-2881 0B41 0B43
-2887 0B47 0B48
-2891 0B4B 0B4C
-2893 0B4D
-2902 0B56
-2903 0B57
-2908 0B5C 0B5D
-2911 0B5F 0B61
-2918 0B66 0B6F
-2946 0B82
-2947 0B83
-2949 0B85 0B8A
-2958 0B8E 0B90
-2962 0B92 0B95
-2969 0B99 0B9A
-2972 0B9C
-2974 0B9E 0B9F
-2979 0BA3 0BA4
-2984 0BA8 0BAA
-2990 0BAE 0BB5
-2999 0BB7 0BB9
-3006 0BBE 0BBF
-3008 0BC0
-3009 0BC1 0BC2
-3014 0BC6 0BC8
-3018 0BCA 0BCC
-3021 0BCD
-3031 0BD7
-3047 0BE7 0BEF
-3056 0BF0 0BF2
-3073 0C01 0C03
-3077 0C05 0C0C
-3086 0C0E 0C10
-3090 0C12 0C28
-3114 0C2A 0C33
-3125 0C35 0C39
-3134 0C3E 0C40
-3137 0C41 0C44
-3142 0C46 0C48
-3146 0C4A 0C4D
-3157 0C55 0C56
-3168 0C60 0C61
-3174 0C66 0C6F
-3202 0C82 0C83
-3205 0C85 0C8C
-3214 0C8E 0C90
-3218 0C92 0CA8
-3242 0CAA 0CB3
-3253 0CB5 0CB9
-3262 0CBE
-3263 0CBF
-3264 0CC0 0CC4
-3270 0CC6
-3271 0CC7 0CC8
-3274 0CCA 0CCB
-3276 0CCC 0CCD
-3285 0CD5 0CD6
-3294 0CDE
-3296 0CE0 0CE1
-3302 0CE6 0CEF
-3330 0D02 0D03
-3333 0D05 0D0C
-3342 0D0E 0D10
-3346 0D12 0D28
-3370 0D2A 0D39
-3390 0D3E 0D40
-3393 0D41 0D43
-3398 0D46 0D48
-3402 0D4A 0D4C
-3405 0D4D
-3415 0D57
-3424 0D60 0D61
-3430 0D66 0D6F
-3458 0D82 0D83
-3461 0D85 0D96
-3482 0D9A 0DB1
-3507 0DB3 0DBB
-3517 0DBD
-3520 0DC0 0DC6
-3530 0DCA
-3535 0DCF 0DD1
-3538 0DD2 0DD4
-3542 0DD6
-3544 0DD8 0DDF
-3570 0DF2 0DF3
-3585 0E01 0E30
-3633 0E31
-3634 0E32 0E33
-3636 0E34 0E3A
-3648 0E40 0E45
-3654 0E46
-3655 0E47 0E4E
-3664 0E50 0E59
-3713 0E81 0E82
-3716 0E84
-3719 0E87 0E88
-3722 0E8A
-3725 0E8D
-3732 0E94 0E97
-3737 0E99 0E9F
-3745 0EA1 0EA3
-3749 0EA5
-3751 0EA7
-3754 0EAA 0EAB
-3757 0EAD 0EB0
-3761 0EB1
-3762 0EB2 0EB3
-3764 0EB4 0EB9
-3771 0EBB 0EBC
-3773 0EBD
-3776 0EC0 0EC4
-3782 0EC6
-3784 0EC8 0ECD
-3792 0ED0 0ED9
-3804 0EDC 0EDD
-3840 0F00
-3864 0F18 0F19
-3872 0F20 0F29
-3882 0F2A 0F33
-3893 0F35
-3895 0F37
-3897 0F39
-3904 0F40 0F47
-3913 0F49 0F6A
-3953 0F71 0F7E
-3967 0F7F
-3968 0F80 0F84
-3974 0F86 0F87
-3976 0F88 0F8B
-3984 0F90 0F97
-3993 0F99 0FBC
-4038 0FC6
-4096 1000 1021
-4131 1023 1027
-4137 1029 102A
-4140 102C
-4141 102D 1030
-4145 1031
-4146 1032
-4150 1036 1037
-4152 1038
-4153 1039
-4160 1040 1049
-4176 1050 1055
-4182 1056 1057
-4184 1058 1059
-4256 10A0 10C5
-4304 10D0 10F6
-4352 1100 1159
-4447 115F 11A2
-4520 11A8 11F9
-12593 3131 318E
-44032 AC00 D7A3
-65440 FFA0 FFBE
-65474 FFC2 FFC7
-65482 FFCA FFCF
-65490 FFD2 FFD7
-65498 FFDA FFDC
-4608 1200 1206
-4616 1208 1246
-4680 1248
-4682 124A 124D
-4688 1250 1256
-4696 1258
-4698 125A 125D
-4704 1260 1286
-4744 1288
-4746 128A 128D
-4752 1290 12AE
-4784 12B0
-4786 12B2 12B5
-4792 12B8 12BE
-4800 12C0
-4802 12C2 12C5
-4808 12C8 12CE
-4816 12D0 12D6
-4824 12D8 12EE
-4848 12F0 130E
-4880 1310
-4882 1312 1315
-4888 1318 131E
-4896 1320 1346
-4936 1348 135A
-4969 1369 1371
-4978 1372 137C
-5024 13A0 13F4
-5121 1401 166C
-5743 166F 1676
-5761 1681 169A
-5792 16A0 16EA
-5870 16EE 16F0
-6016 1780 17B3
-6068 17B4 17B6
-6071 17B7 17BD
-6078 17BE 17C5
-6086 17C6
-6087 17C7 17C8
-6089 17C9 17D3
-6112 17E0 17E9
-6160 1810 1819
-6176 1820 1842
-6211 1843
-6212 1844 1877
-6272 1880 18A8
-6313 18A9
-12353 3041 3094
-12445 309D 309E
-12449 30A1 30FA
-12541 30FD 30FE
-65382 FF66 FF6F
-65393 FF71 FF9D
-12549 3105 312C
-12704 31A0 31B7
-11904 2E80 2E99
-11931 2E9B 2EF3
-12032 2F00 2FD5
-12293 3005
-12295 3007
-12321 3021 3029
-12344 3038 303A
-13312 3400 4DB5
-19968 4E00 9FA5
-63744 F900 FA2D
-131072 20000 2A6D6
-194560 2F800 2FA1D
-40960 A000 A48C
-42128 A490 A4A1
-42148 A4A4 A4B3
-42165 A4B5 A4C0
-42178 A4C2 A4C4
-42182 A4C6
-66304 10300 1031E
-66352 10330 10349
-66378 1034A
-66560 10400 10425
-66600 10428 1044D
-768 0300 034E
-864 0360 0362
-1160 0488 0489
-1425 0591 05A1
-1443 05A3 05B9
-1467 05BB 05BD
-1471 05BF
-1473 05C1 05C2
-1476 05C4
-1611 064B 0655
-1648 0670
-1750 06D6 06DC
-1757 06DD 06DE
-1759 06DF 06E4
-1767 06E7 06E8
-1770 06EA 06ED
-8400 20D0 20DC
-8413 20DD 20E0
-8417 20E1
-8418 20E2 20E3
-12330 302A 302F
-12441 3099 309A
-64286 FB1E
-65056 FE20 FE23
-119143 1D167 1D169
-119163 1D17B 1D182
-119173 1D185 1D18B
-119210 1D1AA 1D1AD
+0041 005A LATIN
+0061 007A LATIN
+00AA LATIN
+00B5 GREEK
+00BA LATIN
+00C0 00D6 LATIN
+00D8 00F6 LATIN
+00F8 021F LATIN
+0222 0233 LATIN
+0250 02AD LATIN
+02B0 02B8 LATIN
+02E0 02E4 LATIN
+0300 034E INHERITED
+0360 0362 INHERITED
+037A GREEK
+0386 GREEK
+0388 038A GREEK
+038C GREEK
+038E 03A1 GREEK
+03A3 03CE GREEK
+03D0 03D7 GREEK
+03DA 03F5 GREEK
+0400 0481 CYRILLIC
+0483 0486 CYRILLIC
+0488 0489 INHERITED
+048C 04C4 CYRILLIC
+04C7 04C8 CYRILLIC
+04CB 04CC CYRILLIC
+04D0 04F5 CYRILLIC
+04F8 04F9 CYRILLIC
+0531 0556 ARMENIAN
+0559 ARMENIAN
+0561 0587 ARMENIAN
+0591 05A1 INHERITED
+05A3 05B9 INHERITED
+05BB 05BD INHERITED
+05BF INHERITED
+05C1 05C2 INHERITED
+05C4 INHERITED
+05D0 05EA HEBREW
+05F0 05F2 HEBREW
+0621 063A ARABIC
+0641 064A ARABIC
+064B 0655 INHERITED
+0670 INHERITED
+0671 06D3 ARABIC
+06D5 ARABIC
+06D6 06E4 INHERITED
+06E5 06E6 ARABIC
+06E7 06E8 INHERITED
+06EA 06ED INHERITED
+06FA 06FC ARABIC
+0710 072C SYRIAC
+0730 074A SYRIAC
+0780 07B0 THAANA
+0901 0903 DEVANAGARI
+0905 0939 DEVANAGARI
+093C 094D DEVANAGARI
+0950 0954 DEVANAGARI
+0958 0963 DEVANAGARI
+0966 096F DEVANAGARI
+0981 BENGALI
+0985 098C BENGALI
+098F 0990 BENGALI
+0993 09A8 BENGALI
+09AA 09B0 BENGALI
+09B2 BENGALI
+09B6 09B9 BENGALI
+09BC BENGALI
+09BE 09C4 BENGALI
+09C7 09C8 BENGALI
+09CB 09CD BENGALI
+09D7 BENGALI
+09DC 09DD BENGALI
+09DF 09E3 BENGALI
+09E6 09F1 BENGALI
+0A02 GURMUKHI
+0A05 0A0A GURMUKHI
+0A0F 0A10 GURMUKHI
+0A13 0A28 GURMUKHI
+0A2A 0A30 GURMUKHI
+0A32 0A33 GURMUKHI
+0A35 0A36 GURMUKHI
+0A38 0A39 GURMUKHI
+0A3C GURMUKHI
+0A3E 0A42 GURMUKHI
+0A47 0A48 GURMUKHI
+0A4B 0A4D GURMUKHI
+0A59 0A5C GURMUKHI
+0A5E GURMUKHI
+0A66 0A74 GURMUKHI
+0A81 0A83 GUJARATI
+0A85 0A8B GUJARATI
+0A8D GUJARATI
+0A8F 0A91 GUJARATI
+0A93 0AA8 GUJARATI
+0AAA 0AB0 GUJARATI
+0AB2 0AB3 GUJARATI
+0AB5 0AB9 GUJARATI
+0ABC 0AC5 GUJARATI
+0AC7 0AC9 GUJARATI
+0ACB 0ACD GUJARATI
+0AD0 GUJARATI
+0AE0 GUJARATI
+0AE6 0AEF GUJARATI
+0B01 0B03 ORIYA
+0B05 0B0C ORIYA
+0B0F 0B10 ORIYA
+0B13 0B28 ORIYA
+0B2A 0B30 ORIYA
+0B32 0B33 ORIYA
+0B36 0B39 ORIYA
+0B3C 0B43 ORIYA
+0B47 0B48 ORIYA
+0B4B 0B4D ORIYA
+0B56 0B57 ORIYA
+0B5C 0B5D ORIYA
+0B5F 0B61 ORIYA
+0B66 0B6F ORIYA
+0B82 0B83 TAMIL
+0B85 0B8A TAMIL
+0B8E 0B90 TAMIL
+0B92 0B95 TAMIL
+0B99 0B9A TAMIL
+0B9C TAMIL
+0B9E 0B9F TAMIL
+0BA3 0BA4 TAMIL
+0BA8 0BAA TAMIL
+0BAE 0BB5 TAMIL
+0BB7 0BB9 TAMIL
+0BBE 0BC2 TAMIL
+0BC6 0BC8 TAMIL
+0BCA 0BCD TAMIL
+0BD7 TAMIL
+0BE7 0BF2 TAMIL
+0C01 0C03 TELUGU
+0C05 0C0C TELUGU
+0C0E 0C10 TELUGU
+0C12 0C28 TELUGU
+0C2A 0C33 TELUGU
+0C35 0C39 TELUGU
+0C3E 0C44 TELUGU
+0C46 0C48 TELUGU
+0C4A 0C4D TELUGU
+0C55 0C56 TELUGU
+0C60 0C61 TELUGU
+0C66 0C6F TELUGU
+0C82 0C83 KANNADA
+0C85 0C8C KANNADA
+0C8E 0C90 KANNADA
+0C92 0CA8 KANNADA
+0CAA 0CB3 KANNADA
+0CB5 0CB9 KANNADA
+0CBE 0CC4 KANNADA
+0CC6 0CC8 KANNADA
+0CCA 0CCD KANNADA
+0CD5 0CD6 KANNADA
+0CDE KANNADA
+0CE0 0CE1 KANNADA
+0CE6 0CEF KANNADA
+0D02 0D03 MALAYALAM
+0D05 0D0C MALAYALAM
+0D0E 0D10 MALAYALAM
+0D12 0D28 MALAYALAM
+0D2A 0D39 MALAYALAM
+0D3E 0D43 MALAYALAM
+0D46 0D48 MALAYALAM
+0D4A 0D4D MALAYALAM
+0D57 MALAYALAM
+0D60 0D61 MALAYALAM
+0D66 0D6F MALAYALAM
+0D82 0D83 SINHALA
+0D85 0D96 SINHALA
+0D9A 0DB1 SINHALA
+0DB3 0DBB SINHALA
+0DBD SINHALA
+0DC0 0DC6 SINHALA
+0DCA SINHALA
+0DCF 0DD4 SINHALA
+0DD6 SINHALA
+0DD8 0DDF SINHALA
+0DF2 0DF3 SINHALA
+0E01 0E3A THAI
+0E40 0E4E THAI
+0E50 0E59 THAI
+0E81 0E82 LAO
+0E84 LAO
+0E87 0E88 LAO
+0E8A LAO
+0E8D LAO
+0E94 0E97 LAO
+0E99 0E9F LAO
+0EA1 0EA3 LAO
+0EA5 LAO
+0EA7 LAO
+0EAA 0EAB LAO
+0EAD 0EB9 LAO
+0EBB 0EBD LAO
+0EC0 0EC4 LAO
+0EC6 LAO
+0EC8 0ECD LAO
+0ED0 0ED9 LAO
+0EDC 0EDD LAO
+0F00 TIBETAN
+0F18 0F19 TIBETAN
+0F20 0F33 TIBETAN
+0F35 TIBETAN
+0F37 TIBETAN
+0F39 TIBETAN
+0F40 0F47 TIBETAN
+0F49 0F6A TIBETAN
+0F71 0F84 TIBETAN
+0F86 0F8B TIBETAN
+0F90 0F97 TIBETAN
+0F99 0FBC TIBETAN
+0FC6 TIBETAN
+1000 1021 MYANMAR
+1023 1027 MYANMAR
+1029 102A MYANMAR
+102C 1032 MYANMAR
+1036 1039 MYANMAR
+1040 1049 MYANMAR
+1050 1059 MYANMAR
+10A0 10C5 GEORGIAN
+10D0 10F6 GEORGIAN
+1100 1159 HANGUL
+115F 11A2 HANGUL
+11A8 11F9 HANGUL
+1200 1206 ETHIOPIC
+1208 1246 ETHIOPIC
+1248 ETHIOPIC
+124A 124D ETHIOPIC
+1250 1256 ETHIOPIC
+1258 ETHIOPIC
+125A 125D ETHIOPIC
+1260 1286 ETHIOPIC
+1288 ETHIOPIC
+128A 128D ETHIOPIC
+1290 12AE ETHIOPIC
+12B0 ETHIOPIC
+12B2 12B5 ETHIOPIC
+12B8 12BE ETHIOPIC
+12C0 ETHIOPIC
+12C2 12C5 ETHIOPIC
+12C8 12CE ETHIOPIC
+12D0 12D6 ETHIOPIC
+12D8 12EE ETHIOPIC
+12F0 130E ETHIOPIC
+1310 ETHIOPIC
+1312 1315 ETHIOPIC
+1318 131E ETHIOPIC
+1320 1346 ETHIOPIC
+1348 135A ETHIOPIC
+1369 137C ETHIOPIC
+13A0 13F4 CHEROKEE
+1401 166C CANADIAN-ABORIGINAL
+166F 1676 CANADIAN-ABORIGINAL
+1681 169A OGHAM
+16A0 16EA RUNIC
+16EE 16F0 RUNIC
+1780 17D3 KHMER
+17E0 17E9 KHMER
+1810 1819 MONGOLIAN
+1820 1877 MONGOLIAN
+1880 18A9 MONGOLIAN
+1E00 1E9B LATIN
+1EA0 1EF9 LATIN
+1F00 1F15 GREEK
+1F18 1F1D GREEK
+1F20 1F45 GREEK
+1F48 1F4D GREEK
+1F50 1F57 GREEK
+1F59 GREEK
+1F5B GREEK
+1F5D GREEK
+1F5F 1F7D GREEK
+1F80 1FB4 GREEK
+1FB6 1FBC GREEK
+1FBE GREEK
+1FC2 1FC4 GREEK
+1FC6 1FCC GREEK
+1FD0 1FD3 GREEK
+1FD6 1FDB GREEK
+1FE0 1FEC GREEK
+1FF2 1FF4 GREEK
+1FF6 1FFC GREEK
+207F LATIN
+20D0 20E3 INHERITED
+2126 GREEK
+212A 212B LATIN
+2E80 2E99 HAN
+2E9B 2EF3 HAN
+2F00 2FD5 HAN
+3005 HAN
+3007 HAN
+3021 3029 HAN
+302A 302F INHERITED
+3038 303A HAN
+3041 3094 HIRAGANA
+3099 309A INHERITED
+309D 309E HIRAGANA
+30A1 30FA KATAKANA
+30FD 30FE KATAKANA
+3105 312C BOPOMOFO
+3131 318E HANGUL
+31A0 31B7 BOPOMOFO
+3400 4DB5 HAN
+4E00 9FA5 HAN
+A000 A48C YI
+A490 A4A1 YI
+A4A4 A4B3 YI
+A4B5 A4C0 YI
+A4C2 A4C4 YI
+A4C6 YI
+AC00 D7A3 HANGUL
+F900 FA2D HAN
+FB00 FB06 LATIN
+FB13 FB17 ARMENIAN
+FB1D HEBREW
+FB1E INHERITED
+FB1F FB28 HEBREW
+FB2A FB36 HEBREW
+FB38 FB3C HEBREW
+FB3E HEBREW
+FB40 FB41 HEBREW
+FB43 FB44 HEBREW
+FB46 FB4F HEBREW
+FB50 FBB1 ARABIC
+FBD3 FD3D ARABIC
+FD50 FD8F ARABIC
+FD92 FDC7 ARABIC
+FDF0 FDFB ARABIC
+FE20 FE23 INHERITED
+FE70 FE72 ARABIC
+FE74 ARABIC
+FE76 FEFC ARABIC
+FF21 FF3A LATIN
+FF41 FF5A LATIN
+FF66 FF6F KATAKANA
+FF71 FF9D KATAKANA
+FFA0 FFBE HANGUL
+FFC2 FFC7 HANGUL
+FFCA FFCF HANGUL
+FFD2 FFD7 HANGUL
+FFDA FFDC HANGUL
+10300 1031E OLD-ITALIC
+10330 1034A GOTHIC
+10400 10425 DESERET
+10428 1044D DESERET
+1D167 1D169 INHERITED
+1D17B 1D182 INHERITED
+1D185 1D18B INHERITED
+1D1AA 1D1AD INHERITED
+20000 2A6D6 HAN
+2F800 2FA1D HAN
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
+
return <<'END';
+0030 0
0031 1
0032 2
0033 3
00B2 2
00B3 3
00B9 1
+0660 0
0661 1
0662 2
0663 3
0667 7
0668 8
0669 9
+06F0 0
06F1 1
06F2 2
06F3 3
06F7 7
06F8 8
06F9 9
+0966 0
0967 1
0968 2
0969 3
096D 7
096E 8
096F 9
+09E6 0
09E7 1
09E8 2
09E9 3
09ED 7
09EE 8
09EF 9
+0A66 0
0A67 1
0A68 2
0A69 3
0A6D 7
0A6E 8
0A6F 9
+0AE6 0
0AE7 1
0AE8 2
0AE9 3
0AED 7
0AEE 8
0AEF 9
+0B66 0
0B67 1
0B68 2
0B69 3
0BED 7
0BEE 8
0BEF 9
+0C66 0
0C67 1
0C68 2
0C69 3
0C6D 7
0C6E 8
0C6F 9
+0CE6 0
0CE7 1
0CE8 2
0CE9 3
0CED 7
0CEE 8
0CEF 9
+0D66 0
0D67 1
0D68 2
0D69 3
0D6D 7
0D6E 8
0D6F 9
+0E50 0
0E51 1
0E52 2
0E53 3
0E57 7
0E58 8
0E59 9
+0ED0 0
0ED1 1
0ED2 2
0ED3 3
0ED7 7
0ED8 8
0ED9 9
+0F20 0
0F21 1
0F22 2
0F23 3
0F27 7
0F28 8
0F29 9
+1040 0
1041 1
1042 2
1043 3
136F 7
1370 8
1371 9
+17E0 0
17E1 1
17E2 2
17E3 3
17E7 7
17E8 8
17E9 9
+1810 0
1811 1
1812 2
1813 3
1817 7
1818 8
1819 9
+2070 0
2074 4
2075 5
2076 6
2077 7
2078 8
2079 9
+2080 0
2081 1
2082 2
2083 3
2087 7
2088 8
2089 9
+FF10 0
FF11 1
FF12 2
FF13 3
FF17 7
FF18 8
FF19 9
+1D7CE 0
1D7CF 1
1D7D0 2
1D7D1 3
1D7D5 7
1D7D6 8
1D7D7 9
+1D7D8 0
1D7D9 1
1D7DA 2
1D7DB 3
1D7DF 7
1D7E0 8
1D7E1 9
+1D7E2 0
1D7E3 1
1D7E4 2
1D7E5 3
1D7E9 7
1D7EA 8
1D7EB 9
+1D7EC 0
1D7ED 1
1D7EE 2
1D7EF 3
1D7F3 7
1D7F4 8
1D7F5 9
+1D7F6 0
1D7F7 1
1D7F8 2
1D7F9 3
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
-%utf8::ToSpecFold = (
+
+%utf8::ToSpecFold =
+(
'00DF' => "\x{0073}\x{0073}",
'0130' => "\x{0069}",
'0131' => "\x{0069}",
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
-%utf8::ToSpecLower = (
+
+%utf8::ToSpecLower =
+(
'00DF' => "\x{00DF}",
'0149' => "\x{0149}",
'01F0' => "\x{01F0}",
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
-%utf8::ToSpecTitle = (
+
+%utf8::ToSpecTitle =
+(
'00DF' => "\x{0053}\x{0073}",
'0149' => "\x{02BC}\x{004E}",
'01F0' => "\x{004A}\x{030C}",
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
+# Built Mon Jan 14 15:52:42 2002.
-%utf8::ToSpecUpper = (
+
+%utf8::ToSpecUpper =
+(
'00DF' => "\x{0053}\x{0053}",
'0149' => "\x{02BC}\x{004E}",
'01F0' => "\x{004A}\x{030C}",
#!/usr/bin/perl -w
-
-#
-# mktables -- create the runtime Perl Unicode files (lib/unicore/**/*.pl)
-# from the Unicode database files (lib/unicore/*.txt).
-#
-
use strict;
-
-my $LastUnicodeCodepoint = 0x10FFFF; # As of Unicode 3.1.1.
+use Carp;
+##
+## mktables -- create the runtime Perl Unicode files (lib/unicore/**/*.pl)
+## from the Unicode database files (lib/unicore/*.txt).
+##
mkdir("In", 0755);
mkdir("Is", 0755);
mkdir("To", 0755);
-sub extend {
- my ($table, $last) = @_;
-
- $table->[-1]->[1] = $last;
-}
-
-sub append {
- my ($table, $code, $name) = @_;
- if (@$table &&
- hex($table->[-1]->[1]) == hex($code) - 1 &&
- (!defined $name || $table->[-1]->[2] eq $name)) {
- extend($table, $code);
+##
+## Process any args.
+##
+my $Verbose = 0;
+
+while (@ARGV)
+{
+ my $arg = shift @ARGV;
+ if ($arg eq '-v') {
+ $Verbose = 1;
+ } elsif ($arg eq '-q') {
+ $Verbose = 0;
} else {
- push @$table, [$code, $code, $name];
+ die "usage: $0 [-v|-q]";
}
}
-sub append_range {
- my ($table, $code_ini, $code_fin, $name) = @_;
- append($table, $code_ini, $name);
- extend($table, $code_fin);
-}
-
-sub inverse {
- my ($table) = @_;
- my $inverse = [];
- my ($first, $last);
- if ($table->[0]->[0]) {
- $last = hex($table->[0]->[0]);
- push @$inverse, [ "0000",
- sprintf("%04X", $last - 1) ];
- }
- for my $i (0..$#$table-1) {
- $first = defined $table->[$i ]->[1] ?
- hex($table->[$i ]->[1]) : 0;
- $last = defined $table->[$i + 1]->[0] ?
- hex($table->[$i + 1]->[0]) : $first;
- push @$inverse, [ sprintf("%04X", $first + 1),
- sprintf("%04X", $last - 1) ]
- unless $first + 1 == $last;
- }
- return $inverse;
-}
-
-sub header {
- my $fh = shift;
+my $LastUnicodeCodepoint = 0x10FFFF; # As of Unicode 3.1.1.
- print $fh <<EOT;
+my $now = localtime;
+my $HEADER=<<"EOF";
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
# This file is built by $0 from e.g. Unicode.txt.
# Any changes made here will be lost!
-EOT
+# Built $now.
+
+EOF
+
+##
+## The main datastructure (a "Table") represents a set of code points that
+## are part of a particular quality (that are part of \pL, \p{InGreek},
+## etc.). They are kept as ranges of code points (starting and ending of
+## each range).
+##
+## For example, a range ASCII LETTERS would be represented as:
+## [ [ 0x41 => 0x5A, 'UPPER' ],
+## [ 0x61 => 0x7A, 'LOWER, ] ]
+##
+sub RANGE_START() { 0 } ## index into range element
+sub RANGE_END() { 1 } ## index into range element
+sub RANGE_NAME() { 2 } ## index into range element
+
+my %TableInfo;
+my %FuzzyNames;
+my %AliasInfo;
+
+##
+## Associates a property ("Greek", "Lu", "Assigned",...) with a Table.
+##
+## Called like:
+## New_Prop(In => 'Greek', $Table, AllowFuzzy => 1);
+##
+## Normally, these parameters are set when the Table is created (when the
+## Table->New constructor is called), but there are times when it needs to
+## be done after-the-fact...)
+##
+sub New_Prop($$$@)
+{
+ my $Type = shift; ## "Is" or "In";
+ my $Name = shift;
+ my $Table = shift;
+
+ ## remaining args are optional key/val
+ my %Args = @_;
+
+ my $AllowFuzzy = delete $Args{AllowFuzzy};
+
+ ## sanity check a few args
+ if (%Args or ($Type ne 'Is' and $Type ne 'In') or not ref $Table) {
+ confess "$0: bad args to New_Prop"
+ }
+
+ if (not $TableInfo{$Type}->{$Name})
+ {
+ $TableInfo{$Type}->{$Name} = $Table;
+ if ($AllowFuzzy) {
+ $FuzzyNames{$Type}->{$Name} = $Name;
+ }
+ }
}
-sub begin {
- my $fh = shift;
- print $fh <<EOT;
-return <<'END';
-EOT
+##
+## Creates a new Table object.
+##
+## Args are key/value pairs:
+## In => Name -- Name of "In" property to be associated with
+## Is => Name -- Name of "Is" property to be associated with
+## AllowFuzzy => Boolean -- True if name can be accessed "fuzzily"
+##
+## No args are required.
+##
+sub Table::New
+{
+ my $class = shift;
+ my %Args = @_;
+
+ my $Table = bless [], $class;
+
+ my $AllowFuzzy = delete $Args{AllowFuzzy};
+
+ for my $Type ('Is', 'In')
+ {
+ if (my $Name = delete $Args{$Type}) {
+ New_Prop($Type => $Name, $Table, AllowFuzzy => $AllowFuzzy);
+ }
+ }
+
+ ## shouldn't have any left over
+ if (%Args) {
+ confess "$0: bad args to Table->New"
+ }
+
+ return $Table;
}
-sub end {
- my $fh = shift;
-
- print $fh <<EOT;
-END
-EOT
+##
+## Returns true if the Table has no code points
+##
+sub Table::IsEmpty
+{
+ my $Table = shift; #self
+ return not @$Table;
}
-sub flush {
- my ($table, $file) = @_;
- print "$file\n";
- if (open(my $fh, ">$file")) {
- header($fh);
- begin($fh);
- for my $i (@$table) {
- print $fh $i->[0], "\t",
- $i->[1] ne $i->[0] ? $i->[1] : "", "\t",
- defined $i->[2] ? $i->[2] : "", "\n";
- }
- end($fh);
- close($fh);
- } else {
- die "$0: $file: $!\n";
- }
+##
+## Returns true if the Table has code points
+##
+sub Table::NotEmpty
+{
+ my $Table = shift; #self
+ return @$Table;
}
-#
-# The %In contains the mapping of the script/block name into a number.
-#
+##
+## Returns the maximum code point currently in the table.
+##
+sub Table::Max
+{
+ my $Table = shift; #self
+ confess "oops" if $Table->IsEmpty; ## must have code points to have a max
+ return $Table->[-1]->[RANGE_END];
+}
-my %In;
-my $InId = 0;
-my %InIn;
+##
+## Replaces the codepoints in the Table with those in the Table given
+## as an arg. (NOTE: this is not a "deep copy").
+##
+sub Table::Replace($$)
+{
+ my $Table = shift; #self
+ my $New = shift;
-my %InScript;
-my %InBlock;
+ @$Table = @$New;
+}
-#
-# Read in the Unicode.txt, the main Unicode database.
-#
+##
+## Given a new code point, make the last range of the Table extend to
+## include the new (and all intervening) code points.
+##
+sub Table::Extend
+{
+ my $Table = shift; #self
+ my $codepoint = shift;
-my %Cat;
-my %General;
-my @General;
-
-sub gencat {
- my ($Name, $GeneralH, $GeneralA, $Cat,
- $name, $cat, $code, $op) = @_;
-
- $op->($Name, $code, $name);
- $op->($GeneralA, $code, $cat);
-
- $op->($GeneralH->{$name} ||= [], $code, $name);
-
- $op->($Cat->{$cat} ||= [], $code);
- $op->($Cat->{substr($cat, 0, 1)}
- ||= [], $code);
- # 005F: SPACING UNDERSCORE
- $op->($Cat->{Word} ||= [], $code)
- if $cat =~ /^[LMN]/ || $code eq "005F";
- $op->($Cat->{Alnum} ||= [], $code)
- if $cat =~ /^[LMN]/;
- $op->($Cat->{Alpha} ||= [], $code)
- if $cat =~ /^[LM]/;
- # 0009: HORIZONTAL TABULATION
- # 000A: LINE FEED
- # 000B: VERTICAL TABULATION
- # 000C: FORM FEED
- # 000D: CARRIAGE RETURN
- # 0020: SPACE
- $op->($Cat->{Space} ||= [], $code)
- if $cat =~ /^Z/ ||
- $code =~ /^(0009|000A|000B|000C|000D)$/;
- $op->($Cat->{SpacePerl} ||= [], $code)
- if $cat =~ /^Z/ || $code =~ /^(?:0085|2028|2029)$/ ||
- $code =~ /^(0009|000A|000C|000D)$/;
- $op->($Cat->{Blank} ||= [], $code)
- if $code =~ /^(0020|0009)$/ ||
- $cat =~ /^Z[^lp]$/;
- $op->($Cat->{Digit} ||= [], $code) if $cat eq "Nd";
- $op->($Cat->{Upper} ||= [], $code) if $cat eq "Lu";
- $op->($Cat->{Lower} ||= [], $code) if $cat eq "Ll";
- $op->($Cat->{Title} ||= [], $code) if $cat eq "Lt";
- $op->($Cat->{ASCII} ||= [], $code) if $code le "007F";
- $op->($Cat->{Cntrl} ||= [], $code) if $cat =~ /^C/;
- $op->($Cat->{Graph} ||= [], $code) if $cat =~ /^([LMNPS]|Co)/;
- $op->($Cat->{Print} ||= [], $code) if $cat =~ /^([LMNPS]|Co|Zs)/;
- $op->($Cat->{Punct} ||= [], $code) if $cat =~ /^P/;
- # 003[0-9]: DIGIT ZERO..NINE, 00[46][1-6]: A..F, a..f
- $op->($Cat->{XDigit} ||= [], $code)
- if $code =~ /^00(3[0-9]|[46][1-6])$/;
+ my $PrevMax = $Table->Max;
-}
+ confess "oops ($codepoint <= $PrevMax)" if $codepoint <= $PrevMax;
-if (open(my $Unicode, "Unicode.txt")) {
- my @Name;
- my @Bidi;
- my %Bidi;
- my @Comb;
- my @Deco;
- my %Deco;
- my %DC;
- my @Number;
- my @Mirrored;
- my %To;
+ $Table->[-1]->[RANGE_END] = $codepoint;
+}
+##
+## Given a code point range start and end (and optional name), blindly
+## append them to the list of ranges for the Table.
+##
+## NOTE: Code points must be added in strictly ascending numeric order.
+##
+sub Table::RawAppendRange
+{
+ my $Table = shift; #self
+ my $start = shift;
+ my $end = shift;
+ my $name = shift;
+ $name = "" if not defined $name; ## warning: $name can be "0"
+
+ push @$Table, [ $start, # RANGE_START
+ $end, # RANGE_END
+ $name ]; # RANGE_NAME
+}
- my $LastCodeInt = -1; # a numeric, not a hexadecimal string.
-
- # UnicodeData-3.1.0.html says
- # no characters in the file have the property, Cn, Not Assigned.
-
- sub check_no_characters { # in the scope of my $LastCodeInt;
- my $code = shift;
- my $diff_from_last = hex($code) - $LastCodeInt;
- my $code_ini = sprintf("%04X", $LastCodeInt + 1);
- $LastCodeInt = hex($code);
- if ($diff_from_last == 1) {
- return;
- } elsif ($diff_from_last == 2) {
- append($Cat{Cn} ||= [], $code_ini);
- append($Cat{C} ||= [], $code_ini);
- } else {
- my $code_fin = sprintf("%04X", hex($code) - 1);
- append_range($Cat{Cn} ||= [], $code_ini, $code_fin);
- append_range($Cat{C} ||= [], $code_ini, $code_fin);
- }
+##
+## Given a code point (and optional name), add it to the Table.
+##
+## NOTE: Code points must be added in strictly ascending numeric order.
+##
+sub Table::Append
+{
+ my $Table = shift; #self
+ my $codepoint = shift;
+ my $name = shift;
+ $name = "" if not defined $name; ## warning: $name can be "0"
+
+ ##
+ ## If we've already got a range working, and this code point is the next
+ ## one in line, and if the name is the same, just extend the current range.
+ ##
+ if ($Table->NotEmpty
+ and
+ $Table->Max == $codepoint - 1
+ and
+ $Table->[-1]->[RANGE_NAME] eq $name)
+ {
+ $Table->Extend($codepoint);
}
-
- while (<$Unicode>) {
- next unless /^[0-9A-Fa-f]+;/;
- s/\s+$//;
-
- my ($code, $name, $cat, $comb, $bidi, $deco,
- $decimal, $digit, $number,
- $mirrored, $unicode10, $comment,
- $upper, $lower, $title) = split(/\s*;\s*/);
-
- if ($name =~ /^<(.+), (First|Last)>$/) {
- if($2 eq 'First') {
- check_no_characters($code);
- } else {
- $LastCodeInt = hex($code);
- }
-
- $name = $1;
- gencat(\@Name, \%General, \@General, \%Cat,
- $name, $cat, $code,
- $2 eq 'First' ? \&append : \&extend);
- unless (defined $In{$name}) {
- $In{$name} = $InId++;
- $InIn{$name} = $General{$name};
- }
- } else {
- check_no_characters($code);
-
- gencat(\@Name, \%General, \@General, \%Cat,
- $name, $cat, $code, \&append);
-
- # No append() here since since several codes may map into one.
- push @{$To{Upper}}, [ $code, $code, $upper ] if $upper;
- push @{$To{Lower}}, [ $code, $code, $lower ] if $lower;
- push @{$To{Title}}, [ $code, $code, $title ] if $title;
-
- append($To{Digit} ||= [], $code, $decimal) if $decimal;
-
- append(\@Bidi, $code, $bidi);
- append($Bidi{$bidi} ||= [], $code);
-
- append(\@Comb, $code, $comb) if $comb;
-
- if ($deco) {
- append(\@Deco, $code, $deco);
- if ($deco =~/^<(\w+)>/) {
- append($Deco{Compat} ||= [], $code);
- append($DC{$1} ||= [], $code);
- } else {
- append($Deco{Canon} ||= [], $code);
- }
- }
-
- append(\@Number, $code, $number) if $number;
-
- append(\@Mirrored, $code) if $mirrored eq "Y";
- }
+ else
+ {
+ $Table->RawAppendRange($codepoint, $codepoint, $name);
}
+}
- check_no_characters(sprintf("%X", $LastUnicodeCodepoint + 1));
-
- flush(\@Name, "Name.pl");
+##
+## Given a code point range starting value and ending value (and name),
+## Add the range to teh Table.
+##
+## NOTE: Code points must be added in strictly ascending numeric order.
+##
+sub Table::AppendRange
+{
+ my $Table = shift; #self
+ my $start = shift;
+ my $end = shift;
+ my $name = shift;
+ $name = "" if not defined $name; ## warning: $name can be "0"
+
+ $Table->Append($start, $name);
+ $Table->Extend($end) if $end > $start;
+}
- foreach my $cat (sort keys %Cat) {
- flush($Cat{$cat}, "Is/$cat.pl");
+##
+## Return a new Table that represents all code points not in the Table.
+##
+sub Table::Invert
+{
+ my $Table = shift; #self
+
+ my $New = Table->New();
+ my $max = -1;
+ for my $range (@$Table)
+ {
+ my $start = $range->[RANGE_START];
+ my $end = $range->[RANGE_END];
+ if ($start-1 >= $max+1) {
+ $New->AppendRange($max+1, $start-1, "");
+ }
+ $max = $end;
}
-
- foreach my $to (sort keys %To) {
- flush($To{$to}, "To/$to.pl");
+ if ($max+1 < $LastUnicodeCodepoint) {
+ $New->AppendRange($max+1, $LastUnicodeCodepoint);
}
+ return $New;
+}
- flush(\@Bidi, "Bidirectional.pl");
- foreach my $bidi (sort keys %Bidi) {
- flush($Bidi{$bidi}, "Is/Bidi$bidi.pl");
+##
+## Merges any number of other tables with $self, returning the new table.
+## (existing tables are not modified)
+##
+## Can be called as either a constructor or a method.
+##
+sub Table::Merge
+{
+ shift(@_) if not ref $_[0]; ## if called as a constructor, lose the class
+ my @Tables = @_;
+
+ ## Accumulate all records from all tables
+ my @Records;
+ for my $Table (@Tables) {
+ push @Records, @$Table;
}
- flush(\@Comb, "CombiningClass.pl");
-
- flush(\@Deco, "Decomposition.pl");
- foreach my $deco (sort keys %Deco) {
- flush($Deco{$deco}, "Is/Deco$deco.pl");
+ ## sort by range start, with longer ranges coming first.
+ my ($first, @Rest) = sort {
+ ($a->[RANGE_START] <=> $b->[RANGE_START])
+ or
+ ($b->[RANGE_END] <=> $b->[RANGE_END])
+ } @Records;
+
+ my $New = Table->New();
+
+ ## Ensuring the first range is there makes the subsequent loop easier
+ $New->AppendRange($first->[RANGE_START],
+ $first->[RANGE_END]);
+
+ ## Fold in records so long as they add new information.
+ for my $set (@Rest)
+ {
+ my $start = $set->[RANGE_START];
+ my $end = $set->[RANGE_END];
+ if ($start > $New->Max) {
+ $New->AppendRange($start, $end);
+ } elsif ($end > $New->Max) {
+ $New->Extend($end);
+ }
}
- foreach my $dc (sort keys %DC) {
- flush($DC{$dc}, "Is/DC$dc.pl");
- }
-
- flush(\@Number, "Number.pl");
- flush(\@Mirrored, "Is/Mirrored.pl");
-} else {
- die "$0: Unicode.txt: $!\n";
+ return $New;
}
-# The general cateory can be written out already now.
+##
+## Given a filename, write a representation of the Table to a file.
+##
+sub Table::Write
+{
+ my $Table = shift; #self
+ my $filename = shift;
-flush(\@General, "Category.pl");
+ print "$filename\n" if $Verbose;
-#
-# Read in the LineBrk.txt.
-#
+ if (not open(OUT, ">$filename")) {
+ die "$0: can't write $filename: $!\n";
+ }
-if (open(my $LineBrk, "LineBrk.txt")) {
- my @Lbrk;
- my %Lbrk;
+ print OUT $HEADER;
+ print OUT "return <<'END';\n";
- while (<$LineBrk>) {
- next unless /^([0-9A-Fa-f]+)(?:\.\.([0-9A-Fa-f]+))?\s*;\s*(\w+)/;
+ for my $set (@$Table)
+ {
+ my $start = $set->[RANGE_START];
+ my $end = $set->[RANGE_END];
+ my $name = $set->[RANGE_NAME];
- my ($first, $last, $lbrk) = ($1, $2, $3);
+ if ($start == $end) {
+ printf OUT "%04X\t\t%s\n", $start, $name;
+ } else {
+ printf OUT "%04X\t%04X\t%s\n", $start, $end, $name;
+ }
+ }
- append(\@Lbrk, $first, $lbrk);
- append($Lbrk{$lbrk} ||= [], $first);
- if (defined $last) {
- extend(\@Lbrk, $last);
- extend($Lbrk{$lbrk}, $last);
- }
+ print OUT "END\n";
+ close OUT;
+}
+
+###########################################################################
+###########################################################################
+###########################################################################
+
+
+##
+## Called like:
+## New_Alias(Is => 'All', SameAs => 'Any', AllowFuzzy => 1);
+##
+## The args must be in that order, although the AllowFuzzy pair may be omitted.
+##
+## This creates 'IsAll' as an alias for 'IsAny'
+##
+sub New_Alias($$$@)
+{
+ my $Type = shift; ## "Is" or "In"
+ my $Alias = shift;
+ my $SameAs = shift;
+ my $Name = shift;
+
+ ## remaining args are optional key/val
+ my %Args = @_;
+
+ my $AllowFuzzy = delete $Args{AllowFuzzy};
+
+ ## sanity check a few args
+ if (%Args or ($Type ne 'Is' and $Type ne 'In') or $SameAs ne 'SameAs') {
+ confess "$0: bad args to New_Alias"
}
- flush(\@Lbrk, "Lbrk.pl");
- foreach my $lbrk (sort keys %Lbrk) {
- flush($Lbrk{$lbrk}, "Is/Lbrk$lbrk.pl");
+ if (not $TableInfo{$Type}->{$Name}) {
+ confess "$0: don't have orignial $Type => $Name to make alias"
+ }
+ if ($TableInfo{$Alias}) {
+ confess "$0: already have original $Type => $Alias; can't make alias";
}
-} else {
- die "$0: LineBrk.txt: $!\n";
+ $AliasInfo{$Type}->{$Name} = $Alias;
+ if ($AllowFuzzy) {
+ $FuzzyNames{$Type}->{$Alias} = $Name;
+ }
+
}
-#
-# Read in the ArabShap.txt.
-#
+##
+## Turn something like
+## OLD-ITALIC
+## to
+## Old_Italic
+##
+sub CanonicalName($)
+{
+ my $name = lc shift;
+ $name =~ s/\W+/_/;
+ $name =~ s/(?<![a-z])(\w)/\u$1/g;
+ return $name;
+}
-if (open(my $ArabShap, "ArabShap.txt")) {
- my @ArabLink;
- my @ArabLinkGroup;
- while (<$ArabShap>) {
- next unless /^[0-9A-Fa-f]+;/;
- s/\s+$//;
+## All assigned code points
+my $Assigned = Table->New(Is => 'Assigned', AllowFuzzy => 1);
- my ($code, $name, $link, $linkgroup) = split(/\s*;\s*/);
+my $Name = Table->New(); ## all characters, individually by name
+my $General = Table->New(); ## all characters, grouped by category
+my %General;
+my %Cat;
- append(\@ArabLink, $code, $link);
- append(\@ArabLinkGroup, $code, $linkgroup);
- }
+##
+## Process Unicode.txt (Categories, etc.)
+##
+sub Unicode_Txt()
+{
+ my $Bidi = Table->New();
+ my $Deco = Table->New();
+ my $Comb = Table->New();
+ my $Number = Table->New();
+ my $Mirrored = Table->New(Is => 'Mirrored', AllowFuzzy => 0);
- flush(\@ArabLink, "ArabLink.pl");
- flush(\@ArabLinkGroup, "ArabLnkGrp.pl");
-} else {
- die "$0: ArabShap.txt: $!\n";
-}
+ my %DC;
+ my %Bidi;
+ my %Deco;
+ $Deco{Canon} = Table->New(Is => 'Canon', AllowFuzzy => 0);
+ $Deco{Compat} = Table->New(Is => 'Compat', AllowFuzzy => 0);
+
+ ## Initialize Perl-generated categories
+ $Cat{Alnum} = Table->New(Is => 'Alnum', AllowFuzzy => 0);
+ $Cat{Alpha} = Table->New(Is => 'Alpha', AllowFuzzy => 0);
+ $Cat{ASCII} = Table->New(Is => 'ASCII', AllowFuzzy => 0);
+ $Cat{Blank} = Table->New(Is => 'Blank', AllowFuzzy => 0);
+ $Cat{Cntrl} = Table->New(Is => 'Cntrl', AllowFuzzy => 0);
+ $Cat{Digit} = Table->New(Is => 'Digit', AllowFuzzy => 0);
+ $Cat{Graph} = Table->New(Is => 'Graph', AllowFuzzy => 0);
+ $Cat{Lower} = Table->New(Is => 'Lower', AllowFuzzy => 0);
+ $Cat{Print} = Table->New(Is => 'Print', AllowFuzzy => 0);
+ $Cat{Punct} = Table->New(Is => 'Punct', AllowFuzzy => 0);
+ $Cat{SpacePerl} = Table->New(Is => 'SpacePerl', AllowFuzzy => 0);
+ $Cat{Space} = Table->New(Is => 'Space', AllowFuzzy => 0);
+ $Cat{Title} = Table->New(Is => 'Title', AllowFuzzy => 0);
+ $Cat{Upper} = Table->New(Is => 'Upper', AllowFuzzy => 0);
+ $Cat{Word} = Table->New(Is => 'Word' , AllowFuzzy => 0);
+ $Cat{XDigit} = Table->New(Is => 'XDigit', AllowFuzzy => 0);
+ ## Categories from Unicode.txt are auto-initialized in gencat()
-#
-# Read in the Jamo.txt.
-#
+ my %To;
+ $To{Upper} = Table->New();
+ $To{Lower} = Table->New();
+ $To{Title} = Table->New();
+ $To{Digit} = Table->New();
+
+ sub gencat($$$$)
+ {
+ my ($name, ## Name ("LATIN CAPITAL LETTER A")
+ $cat, ## Category ("Lu", "Zp", "Nd", etc.)
+ $code, ## Code point (as an integer)
+ $op) = @_;
+
+ my $MajorCat = substr($cat, 0, 1); ## L, M, Z, S, etc
+
+ $Assigned->$op($code);
+ $Name->$op($code, $name);
+ $General->$op($code, $cat);
+
+ ## add to the sub category (e.g. "Lu", "Nd", "Cf", ..)
+ $Cat{$cat} ||= Table->New(Is => $cat, AllowFuzzy => 0);
+ $Cat{$cat}->$op($code);
+
+ ## add to the major category (e.g. "L", "N", "C", ...)
+ $Cat{$MajorCat} ||= Table->New(Is => $MajorCat, AllowFuzzy => 0);
+ $Cat{$MajorCat}->$op($code);
+
+ ($General{$name} ||= Table->New)->$op($code, $name);
+
+ # 005F: SPACING UNDERSCORE
+ $Cat{Word}->$op($code) if $cat =~ /^[LMN]/ || $code == 0x005F;
+ $Cat{Alnum}->$op($code) if $cat =~ /^[LMN]/;
+ $Cat{Alpha}->$op($code) if $cat =~ /^[LM]/;
+
+
+
+ $Cat{Space}->$op($code) if $cat =~ /^Z/
+ || $code == 0x0009 # 0009: HORIZONTAL TAB
+ || $code == 0x000A # 000A: LINE FEED
+ || $code == 0x000B # 000B: VERTICAL TAB
+ || $code == 0x000C # 000C: FORM FEED
+ || $code == 0x000D; # 000D: CARRIAGE RETURN
+
+
+ $Cat{SpacePerl}->$op($code) if $cat =~ /^Z/
+ || $code == 0x0009 # 0009: HORIZONTAL TAB
+ || $code == 0x000A # 000A: LINE FEED
+ || $code == 0x000C # 000C: FORM FEED
+ || $code == 0x000D # 000D: CARRIAGE RETURN
+ || $code == 0x0085 # 0085: <NEXT LINE>
+ || $code == 0x2028 # 2028: LINE SEPARATOR
+ || $code == 0x2029;# 2029: PARAGRAPH SEP.
+
+ $Cat{Blank}->$op($code) if $cat =~ /^Z[^lp]$/
+ || $code == 0x0009 # 0009: HORIZONTAL TAB
+ || $code == 0x0020; # 0020: SPACE
+
+ $Cat{Digit}->$op($code) if $cat eq "Nd";
+ $Cat{Upper}->$op($code) if $cat eq "Lu";
+ $Cat{Lower}->$op($code) if $cat eq "Ll";
+ $Cat{Title}->$op($code) if $cat eq "Lt";
+ $Cat{ASCII}->$op($code) if $code <= 0x007F;
+ $Cat{Cntrl}->$op($code) if $cat =~ /^C/;
+ $Cat{Graph}->$op($code) if $cat =~ /^([LMNPS]|Co)/;
+ $Cat{Print}->$op($code) if $cat =~ /^([LMNPS]|Co|Zs)/;
+ $Cat{Punct}->$op($code) if $cat =~ /^P/;
+
+ $Cat{XDigit}->$op($code) if ($code >= 0x30 && $code <= 0x39) ## 0..9
+ || ($code >= 0x41 && $code <= 0x46) ## A..F
+ || ($code >= 0x61 && $code <= 0x66); ## a..f
+ }
-if (open(my $Jamo, "Jamo.txt")) {
- my @Short;
+ ## open ane read file.....
+ if (not open IN, "Unicode.txt") {
+ die "$0: Unicode.txt: $!\n";
+ }
- while (<$Jamo>) {
- next unless /^([0-9A-Fa-f]+)\s*;\s*(\w*)/;
+ while (<IN>)
+ {
+ next unless /^[0-9A-Fa-f]+;/;
+ s/\s+$//;
+
+ my ($hexcode, ## code point in hex (e.g. "0041")
+ $name, ## character name (e.g. "LATIN CAPITAL LETTER A")
+ $cat, ## category (e.g. "Lu")
+ $comb, ## Canonical combining class (e.t. "230")
+ $bidi, ## directional category (e.g. "L")
+ $deco, ## decomposition mapping
+ $decimal, ## decimal digit value
+ $digit, ## digit value
+ $number, ## numeric value
+ $mirrored, ## mirrored
+ $unicode10, ## name in Unicode 1.0
+ $comment, ## comment field
+ $upper, ## uppercase mapping
+ $lower, ## lowercase mapping
+ $title, ## titlecase mapping
+ ) = split(/\s*;\s*/);
+
+ my $code = hex($hexcode);
+
+ ##
+ ## There are a few pairs of lines like:
+ ## AC00;<Hangul Syllable, First>;Lo;0;L;;;;;N;;;;;
+ ## D7A3;<Hangul Syllable, Last>;Lo;0;L;;;;;N;;;;;
+ ## that define ranges.
+ ##
+ if ($name =~ /^<(.+), (First|Last)>$/)
+ {
+ $name = $1;
+ gencat($name, $cat, $code, $2 eq 'First' ? 'Append' : 'Extend');
+ #New_Prop(In => $name, $General{$name}, AllowFuzzy => 1);
+ }
+ else
+ {
+ ## normal (single-character) lines
+ gencat($name, $cat, $code, 'Append');
+
+ # No Append() here since since several codes may map into one.
+ $To{Upper}->RawAppendRange($code, $code, $upper) if $upper;
+ $To{Lower}->RawAppendRange($code, $code, $lower) if $lower;
+ $To{Title}->RawAppendRange($code, $code, $title) if $title;
+ $To{Digit}->Append($code, $decimal) if length $decimal;
+
+ $Bidi->Append($code, $bidi);
+ $Comb->Append($code, $comb) if $comb;
+ $Number->Append($code, $number) if length $number;
+
+ $Mirrored->Append($code) if $mirrored eq "Y";
+
+ $Bidi{$bidi} ||= Table->New(Is => "Bidi$bidi", AllowFuzzy => 0);
+ $Bidi{$bidi}->Append($code);
+
+ if ($deco)
+ {
+ $Deco->Append($code, $deco);
+ if ($deco =~/^<(\w+)>/)
+ {
+ $Deco{Compat}->Append($code);
+
+ $DC{$1} ||= Table->New(Is => "DC$1", AllowFuzzy => 0);
+ $DC{$1}->Append($code);
+ }
+ else
+ {
+ $Deco{Canon}->Append($code);
+ }
+ }
+ }
+ }
+ close IN;
- my ($code, $short) = ($1, $2);
+ ##
+ ## Tidy up a few special cases....
+ ##
- append(\@Short, $code, $short);
- }
+ $Cat{Cn} = $Assigned->Invert; ## Cn is everything that doesn't exist
+ New_Prop(Is => 'Cn', $Cat{Cn}, AllowFuzzy => 0);
- flush(\@Short, "JamoShort.pl");
-} else {
- die "$0: Jamo.txt: $!\n";
-}
+ ## Unassigned is the same as 'Cn'
+ New_Alias(Is => 'Unassigned', SameAs => 'Cn', AllowFuzzy => 1);
-#
-# Read in the Scripts.txt.
-#
+ $Cat{C}->Replace($Cat{C}->Merge($Cat{Cn})); ## Now merge in Cn into C
-my @Scripts;
-if (open(my $Scripts, "Scripts.txt")) {
- while (<$Scripts>) {
- next unless /^([0-9A-Fa-f]+)(?:\.\.([0-9A-Fa-f]+))?\s*;\s*(.+?)\s*\#/;
+ # L& is Ll, Lu, and Lt.
+ New_Prop(Is => 'L&',
+ Table->Merge(@Cat{qw[Ll Lu Lt]}),
+ AllowFuzzy => 0);
- # Wait until all the scripts have been read since
- # they are not listed in numeric order.
- push @Scripts, [ hex($1), $1, $2, $3 ];
- }
-} else {
- die "$0: Scripts.txt: $!\n";
-}
+ ## Any and All are all code points.
+ my $Any = Table->New(Is => 'Any', AllowFuzzy => 1);
+ $Any->RawAppendRange(0, $LastUnicodeCodepoint);
-# Now append the scripts properties in their code point order.
+ New_Alias(Is => 'All', SameAs => 'Any', AllowFuzzy => 1);
-my %Script;
-my $Scripts = [];
-for my $script (sort { $a->[0] <=> $b->[0] } @Scripts) {
- my ($code, $first, $last, $name) = @$script;
- append($Scripts, $first, $name);
- append($Script{$name} ||= [], $first, $name);
- if (defined $last) {
- extend($Scripts, $last);
- extend($Script{$name}, $last);
- }
- unless (defined $In{$name}) {
- $InScript{$InId} = $name;
- $In{$name} = $InId++;
- $InIn{$name} = $Script{$name};
+ ##
+ ## Now dump the files.
+ ##
+ $Name->Write("Name.pl");
+ $Bidi->Write("Bidirectional.pl");
+ $Comb->Write("CombiningClass.pl");
+ $Deco->Write("Decomposition.pl");
+ $Number->Write("Number.pl");
+ $General->Write("Category.pl");
+
+ for my $to (sort keys %To) {
+ $To{$to}->Write("To/$to.pl");
}
}
-# Scripts.pl can be written out already now.
+##
+## Process LineBrk.txt
+##
+sub LineBrk_Txt()
+{
+ if (not open IN, "LineBrk.txt") {
+ die "$0: LineBrk.txt: $!\n";
+ }
-flush(\@Scripts, "Scripts.pl");
+ my $Lbrk = Table->New();
+ my %Lbrk;
-# Common is everything not explicitly assigned to a Script
+ while (<IN>)
+ {
+ next unless /^([0-9A-Fa-f]+)(?:\.\.([0-9A-Fa-f]+))?\s*;\s*(\w+)/;
-$In{Common} = $InId++;
-my $Common = inverse($Scripts);
-$InIn{Common} = $Common;
+ my ($first, $last, $lbrk) = (hex($1), hex($2||""), $3);
-#
-# Read in the Blocks.txt.
-#
+ $Lbrk->Append($first, $lbrk);
-my @Blocks;
-my %Blocks;
-
-if (open(my $Blocks, "Blocks.txt")) {
- while (<$Blocks>) {
- next unless /^([0-9A-Fa-f]+)\.\.([0-9A-Fa-f]+)\s*;\s*(.+?)\s*$/;
-
- my ($first, $last, $name) = ($1, $2, $3);
- my $origname = $name;
-
- # If there's a naming conflict (the script names are
- # in uppercase), the name of the block has " Block"
- # appended to it.
- my $pat = $name;
- $pat =~ s/([- _])/(?:[-_]|\\s+)?/g;
- for my $i (values %InScript) {
- if ($i =~ /^$pat$/i) {
- $name .= " Block";
- last;
- }
- }
+ $Lbrk{$lbrk} ||= Table->New(Is => "Lbrk$lbrk", AllowFuzzy => 0);
+ $Lbrk{$lbrk}->Append($first);
- append(\@Blocks, $first, $name);
- append($Blocks{$name} ||= [], $first, $name);
- if (defined $last) {
- extend(\@Blocks, $last);
- extend($Blocks{$name}, $last);
- }
- unless (defined $In{$name}) {
- $InBlock{$InId} = $origname;
- $In{$name} = $InId++;
- $InIn{$name} = $Blocks{$name};
+ if ($last) {
+ $Lbrk->Extend($last);
+ $Lbrk{$lbrk}->Extend($last);
}
}
-} else {
- die "$0: Blocks.txt: $!\n";
-}
-
-# Blocks.pl can be written out already now.
+ close IN;
-flush(\@Blocks, "Blocks.pl");
+ $Lbrk->Write("Lbrk.pl");
+}
-#
-# Read in the PropList.txt. It contains extended properties not
-# listed in the Unicode.txt, such as 'Other_Alphabetic':
-# alphabetic but not of the general category L; many modifiers
-# belong to this extended property category: while they are not
-# alphabets, they are alphabetic in nature.
-#
+##
+## Process ArabShap.txt.
+##
+sub ArabShap_txt()
+{
+ if (not open IN, "ArabShap.txt") {
+ die "$0: ArabShap.txt: $!\n";
+ }
-my @Props;
+ my $ArabLink = Table->New();
+ my $ArabLinkGroup = Table->New();
-if (open(my $Props, "PropList.txt")) {
- while (<$Props>) {
- next unless /^([0-9A-Fa-f]+)(?:\.\.([0-9A-Fa-f]+))?\s*;\s*(.+?)\s*\#/;
+ while (<IN>)
+ {
+ next unless /^[0-9A-Fa-f]+;/;
+ s/\s+$//;
- # Wait until all the extended properties have been read since
- # they are not listed in numeric order.
- push @Props, [ hex($1), $1, $2, $3 ];
+ my ($hexcode, $name, $link, $linkgroup) = split(/\s*;\s*/);
+ my $code = hex($hexcode);
+ $ArabLink->Append($code, $link);
+ $ArabLinkGroup->Append($code, $linkgroup);
}
-} else {
- die "$0: PropList.txt: $!\n";
+ close IN;
+
+ $ArabLink->Write("ArabLink.pl");
+ $ArabLinkGroup->Write("ArabLnkGrp.pl");
}
-# Now append the extended properties in their code point order.
+##
+## Process Jamo.txt.
+##
+sub Jamo_txt()
+{
+ if (not open IN, "Jamo.txt") {
+ die "$0: Jamo.txt: $!\n";
+ }
+ my $Short = Table->New();
-my %Prop;
-my $Props = [];
+ while (<IN>)
+ {
+ next unless /^([0-9A-Fa-f]+)\s*;\s*(\w*)/;
+ my ($code, $short) = (hex($1), $2);
-for my $prop (sort { $a->[0] <=> $b->[0] } @Props) {
- my ($code, $first, $last, $name) = @$prop;
- append($Props, $first, $name);
- append($Prop{$name} ||= [], $first, $name);
- if (defined $last) {
- extend($Props, $last);
- extend($Prop{$name}, $last);
- }
- unless (defined $In{$name}) {
- $In{$name} = $InId++;
- $InIn{$name} = $Prop{$name};
+ $Short->Append($code, $short);
}
+ close IN;
+ $Short->Write("JamoShort.pl");
}
-# Assigned is everything not Cn
-
-$In{Assigned} = $InId++;
-my $Assigned = inverse($Cat{Cn});
-$InIn{Assigned} = $Assigned;
-
-# Unassigned is everything not Assigned
-
-$In{Unassigned} = $InId++;
-my $Unassigned = $Cat{Cn};
-$InIn{Unassigned} = $Unassigned;
-
-# Unassigned is everything not Assigned
-sub merge_general_and_extended {
- my ($name, $general, $extended) = @_;
- my $merged;
-
- push @$merged,
- map { pop @{$_}; $_ }
- sort { $a->[2] <=> $b->[2] }
- map { [ $_->[0], $_->[1], hex($_->[0]) ] }
- ($general ?
- map { ref $_ ? @$_ : $_ }
- @Cat {ref $general ? @$general : $general } :
- (),
- $extended ?
- map { ref $_ ? @$_ : $_ }
- @Prop{ref $extended ? @$extended : $extended} :
- ());
-
- $In{$name} = $InId++;
- $InIn{$name} = $merged;
-
- return $merged;
-}
-
-# Alphabetic is L and Other_Alphabetic.
-
-my $Alphabetic =
- merge_general_and_extended('Alphabetic', 'L', 'Other_Alphabetic');
+##
+## Process Scripts.txt.
+##
+sub Scripts_txt()
+{
+ my @ScriptInfo;
-# Lowercase is Ll and Other_Lowercase.
+ if (not open(IN, "Scripts.txt")) {
+ die "$0: Scripts.txt: $!\n";
+ }
+ while (<IN>) {
+ next unless /^([0-9A-Fa-f]+)(?:\.\.([0-9A-Fa-f]+))?\s*;\s*(.+?)\s*\#/;
-my $Lowercase =
- merge_general_and_extended('Lowercase', 'Ll', 'Other_Lowercase');
+ # Wait until all the scripts have been read since
+ # they are not listed in numeric order.
+ push @ScriptInfo, [ hex($1), hex($2||""), $3 ];
+ }
+ close IN;
-# Uppercase is Lu and Other_Uppercase.
+ # Now append the scripts properties in their code point order.
-my $Uppercase =
- merge_general_and_extended('Uppercase', 'Lu', 'Other_Uppercase');
+ my %Script;
+ my $Scripts = Table->New();
-# Math is Sm and Other_Math.
+ for my $script (sort { $a->[0] <=> $b->[0] } @ScriptInfo)
+ {
+ my ($first, $last, $name) = @$script;
+ $Scripts->Append($first, $name);
-my $Math =
- merge_general_and_extended('Math', 'Sm', 'Other_Math');
+ $Script{$name} ||= Table->New(Is => CanonicalName($name),
+ AllowFuzzy => 1);
+ $Script{$name}->Append($first, $name);
-# Lampersand is Ll, Lu, and Lt.
+ if ($last) {
+ $Scripts->Extend($last);
+ $Script{$name}->Extend($last);
+ }
+ }
-my $Lampersand =
- merge_general_and_extended('Lampersand', [ qw(Ll Lu Lt) ]);
+ $Scripts->Write("Scripts.pl");
-# ID_Start is Ll, Lu, Lt, Lm, Lo, and Nl.
+ ## Common is everything not explicitly assigned to a Script
+ ##
+ ## ***shouldn't this be intersected with \p{Assigned}? ******
+ ##
+ New_Prop(Is => 'Common', $Scripts->Invert, AllowFuzzy => 1);
+}
-my $ID_Start =
- merge_general_and_extended('ID_Start', [ qw(Ll Lu Lt Lm Lo Nl) ]);
+##
+## Given a name like "Close Punctuation", return a regex (that when applied
+## with /i) matches any valid form of that name (e.g. "ClosePunctuation",
+## "Close-Punctuation", etc.)
+##
+## Accept any space, dash, or underbar where in the official name there is
+## space or a dash (or underbar, but there never is).
+##
+##
+sub NameToRegex($)
+{
+ my $Name = shift;
+ $Name =~ s/[- _]/(?:[-_]|\\s+)?/g;
+ return $Name;
+}
-# ID_Continue is ID_Start, Mn, Mc, Nd, and Pc.
+##
+## Process Blocks.txt.
+##
+sub Blocks_txt()
+{
+ my $Blocks = Table->New();
+ my %Blocks;
-my $ID_Continue =
- merge_general_and_extended('ID_Continue', [ qw(Ll Lu Lt Lm Lo Nl
- Mn Mc Nd Pc) ]);
+ if (not open IN, "Blocks.txt") {
+ die "$0: Blocks.txt: $!\n";
+ }
-#
-# Any is any.
-#
+ while (<IN>)
+ {
+ #next if not /Private Use$/;
+ next if not /^([0-9A-Fa-f]+)\.\.([0-9A-Fa-f]+)\s*;\s*(.+?)\s*$/;
-$In{Any} = $InId++;
-my $Any = [ [ 0, sprintf("%04X", $LastUnicodeCodepoint) ] ];
-$InIn{Any} = $Any;
+ my ($first, $last, $name) = (hex($1), hex($2), $3);
-#
-# All is any, too.
-#
+ $Blocks->Append($first, $name);
-$In{All} = $InId++;
-$InIn{All} = $Any;
+ $Blocks{$name} ||= Table->New(In=>CanonicalName($name), AllowFuzzy=>1);
+ $Blocks{$name}->Append($first, $name);
-#
-# mapping() will be used to write out the In and Is virtual mappings.
-#
-
-sub mapping {
- my ($map, $name) = @_;
-
- if (open(my $fh, ">$name.pl")) {
- print "$name.pl\n";
- header($fh);
-
- # The %pat will hold a hash that maps the first two
- # lowercased letters of a class to a 'fuzzified' regular
- # expression that points to the real mapping.
-
- my %pat;
-
- # But first write out the offical name to real name
- # (the filename) mapping.
-
- print $fh <<EOT;
-%utf8::${name} =
-(
-EOT
- for my $i (sort { lc $a cmp lc $b } keys %$map) {
- my $pat = $i;
- # Here is the 'fuzzification': accept any space,
- # dash, or underbar where in the official name
- # there is space or a dash (or underbar, but
- # there never is).
- $pat =~ s/([- _])/(?:[-_]|\\s+)?/g;
- # The prefix length of 2 is enough spread,
- # and besides, we have 'Yi' as an In category.
- push @{$pat{lc(substr($i, 0, 2))}}, [ $i, $pat ];
- printf $fh "%-45s => '$map->{$i}',\n", "'$i'";
- }
- print $fh <<EOT;
-);
-EOT
-
- # Now write out the %pat mapping.
-
- print $fh <<EOT;
-%utf8::${name}Pat =
-(
-EOT
- foreach my $prefix (sort keys %pat) {
- print $fh "'$prefix' => {\n";
- foreach my $ipat (@{$pat{$prefix}}) {
- my ($i, $pat) = @$ipat;
- print $fh "\t'$pat' => '$map->{$i}',\n";
- }
- print $fh "},\n";
+ if ($last and $last != $first) {
+ $Blocks->Extend($last);
+ $Blocks{$name}->Extend($last);
}
- print $fh <<EOT;
-);
-EOT
-
- close($fh);
- } else {
- die "$0: $name.pl: $!\n";
}
+ close IN;
+
+ $Blocks->Write("Blocks.pl");
}
-#
-# Write out the virtual In mappings.
-#
+##
+## Read in the PropList.txt. It contains extended properties not
+## listed in the Unicode.txt, such as 'Other_Alphabetic':
+## alphabetic but not of the general category L; many modifiers
+## belong to this extended property category: while they are not
+## alphabets, they are alphabetic in nature.
+##
+sub PropList_txt()
+{
+ my @PropInfo;
+
+ if (not open IN, "PropList.txt") {
+ die "$0: PropList.txt: $!\n";
+ }
-mapping(\%In, "In");
+ while (<IN>)
+ {
+ next unless /^([0-9A-Fa-f]+)(?:\.\.([0-9A-Fa-f]+))?\s*;\s*(.+?)\s*\#/;
-#
-# Append the InScript and InBlock mappings.
-# These are needed only if Script= and Block= syntaxes are used.
-#
+ # Wait until all the extended properties have been read since
+ # they are not listed in numeric order.
+ push @PropInfo, [ hex($1), hex($2||""), $3 ];
+ }
+ close IN;
-if (open(my $In, ">>In.pl")) {
- print $In <<EOT;
+ # Now append the extended properties in their code point order.
+ my $Props = Table->New();
+ my %Prop;
-%utf8::InScript =
-(
-EOT
- for my $i (sort { $a <=> $b } keys %InScript) {
- printf $In "%4d => '$InScript{$i}',\n", $i;
- }
- print $In <<EOT;
-);
-EOT
+ for my $prop (sort { $a->[0] <=> $b->[0] } @PropInfo)
+ {
+ my ($first, $last, $name) = @$prop;
+ $Props->Append($first, $name);
- print $In <<EOT;
+ $Prop{$name} ||= Table->New(Is => $name, AllowFuzzy => 1);
+ $Prop{$name}->Append($first, $name);
-%utf8::InBlock =
-(
-EOT
- for my $i (sort { $a <=> $b } keys %InBlock) {
- printf $In "%4d => '$InBlock{$i}',\n", $i;
+ if ($last) {
+ $Props->Extend($last);
+ $Prop{$name}->Extend($last);
+ }
}
- print $In <<EOT;
-);
-EOT
-} else {
- die "$0: In.pl: $!\n";
-}
-
-#
-# Write out the real In mappings
-# (the In.pl written out just above has the virtual In mappings)
-#
-foreach my $in (sort { $In{$a} <=> $In{$b} } keys %In) {
- flush($InIn{$in}, "In/$In{$in}.pl");
+ # Alphabetic is L and Other_Alphabetic.
+ New_Prop(Is => 'Alphabetic',
+ Table->Merge($Cat{L}, $Prop{Other_Alphabetic}),
+ AllowFuzzy => 1);
+
+ # Lowercase is Ll and Other_Lowercase.
+ New_Prop(Is => 'Lowercase',
+ Table->Merge($Cat{Ll}, $Prop{Other_Lowercase}),
+ AllowFuzzy => 1);
+
+ # Uppercase is Lu and Other_Uppercase.
+ New_Prop(Is => 'Uppercase',
+ Table->Merge($Cat{Lu}, $Prop{Other_Uppercase}),
+ AllowFuzzy => 1);
+
+ # Math is Sm and Other_Math.
+ New_Prop(Is => 'Math',
+ Table->Merge($Cat{Sm}, $Prop{Other_Math}),
+ AllowFuzzy => 1);
+
+ # ID_Start is Ll, Lu, Lt, Lm, Lo, and Nl.
+ New_Prop(Is => 'ID_Start',
+ Table->Merge(@Cat{qw[Ll Lu Lt Lm Lo Nl]}),
+ AllowFuzzy => 1);
+
+ # ID_Continue is ID_Start, Mn, Mc, Nd, and Pc.
+ New_Prop(Is => 'ID_Continue',
+ Table->Merge(@Cat{qw[Ll Lu Lt Lm Lo Nl Mn Mc Nd Pc ]}),
+ AllowFuzzy => 1);
}
-#
-# The mapping from General Category long forms to short forms is
-# currently hardwired here since no simple data file in the UCD
-# seems to do that. Unicode 3.2 will assumedly correct this.
-#
-
-my %Is = (
+sub Make_GC_Aliases()
+{
+ ##
+ ## The mapping from General Category long forms to short forms is
+ ## currently hardwired here since no simple data file in the UCD
+ ## seems to do that. Unicode 3.2 will assumedly correct this.
+ ##
+ my %Is = (
'Letter' => 'L',
'Uppercase_Letter' => 'Lu',
'Lowercase_Letter' => 'Ll',
'Surrogate' => 'Cs',
'Private Use' => 'Co',
'Unassigned' => 'Cn',
-);
-
-#
-# Write out the virtual Is mappings.
-#
-
-mapping(\%Is, "Is");
-
-#
-# Read in the special cases.
-#
+ );
-my %Case;
-
-if (open(my $SpecCase, "SpecCase.txt")) {
- while (<$SpecCase>) {
- next unless /^[0-9A-Fa-f]+;/;
- s/\#.*//;
- s/\s+$//;
-
- my ($code, $lower, $title, $upper, $condition) = split(/\s*;\s*/);
-
- if ($condition) { # not implemented yet
- print "# SKIPPING $_\n";
- next;
- }
+ ## make the aliases....
+ while (my ($Alias, $Name) = each %Is) {
+ New_Alias(Is => $Alias, SameAs => $Name, AllowFuzzy => 1);
+ }
+}
- # Wait until all the special cases have been read since
- # they are not listed in numeric order.
- my $ix = hex($code);
- push @{$Case{Lower}}, [ $ix, $code, $lower ];
- push @{$Case{Title}}, [ $ix, $code, $title ];
- push @{$Case{Upper}}, [ $ix, $code, $upper ];
+##
+## Writes the info accumulated in
+##
+## %TableInfo;
+## %FuzzyNames;
+## %AliasInfo;
+##
+##
+sub WriteAllMappings()
+{
+ for my $Type ('In', 'Is')
+ {
+ my %Filenames;
+ my %NameToFile;
+
+ my %Exact; ## will become %utf8::Is or %utf8::In
+ my %Pat; ## will become %utf8::IsPat or %utf8::InPat
+
+ ##
+ ## First write all the files to the $Type/ directory
+ ##
+ while (my ($Name, $Table) = each %{$TableInfo{$Type}})
+ {
+ ## Need an 8.3 safe filename.
+ my $filename = $Name;
+ $filename =~ s/[_\W]+(\w*)/\u$1/g;
+ substr($filename, 8) = '' if length($filename) > 8;
+
+ ##
+ ## Make sure the filename doesn't conflict with something we
+ ## might have already written. If we have, say,
+ ## Greek_Extended1
+ ## Greek_Extended2
+ ## they become
+ ## Greek_Ex
+ ## Greek_E2
+ ##
+ while (my $num = $Filenames{lc $filename}++)
+ {
+ $num++; ## so filenames with numbers start with '2', which
+ ## just looks more natural.
+ substr($filename, -length($num)) = $num;
+ }
+
+ ##
+ ## Okay, write the file...
+ ##
+ $Exact{$Name} = $filename;
+ $Table->Write("$Type/$filename.pl");
+ }
+
+ ##
+ ## Build %Pat
+ ##
+ while (my ($Fuzzy, $Real) = each %{$FuzzyNames{$Type}})
+ {
+ my $File = $Exact{$Real};
+
+ if (not $File) {
+ die "$0: oops [$Real]";
+ }
+
+ ## The prefix length of 2 is enough spread,
+ ## and besides, we have 'Yi' as an In category.
+ my $Prefix = lc(substr($Fuzzy, 0, 2));
+ my $Regex = NameToRegex($Fuzzy);
+
+ if ($Pat{$Prefix}->{$Regex}) {
+ warn "WHOA, conflict with /$Regex/: $Pat{$Prefix}->{$Regex} vs $File\n";
+ }
+
+ $Pat{$Prefix}->{$Regex} = $File;
+ }
+
+ ##
+ ## Since the fuzzy method will provide for a way to match $Fuzzy,
+ ## there's no need for $Fuzzy to be in %Exact as well.
+ ## This can't be done in the loop above because there could be
+ ## multiple $Fuzzys pointing at the same $Real, and we don't want
+ ## the first to delete the exact mapping out from under the second.
+ ##
+ for my $Fuzzy (keys %{$FuzzyNames{$Type}})
+ {
+ delete $Exact{$Fuzzy};
+ }
+
+
+
+ ##
+ ## Now write In.pl / Is.pl
+ ##
+ if (not open OUT, ">$Type.pl") {
+ die "$0: $Type.pl: $!\n";
+ }
+ print OUT $HEADER;
+ print OUT "##\n";
+ print OUT "## Data in this file used by ../utf8_heavy.pl\n";
+ print OUT "##\n";
+ print OUT "\n";
+ print OUT "## Mapping from name to filename in ./$Type\n";
+ print OUT "%utf8::$Type = (\n";
+ for my $Name (sort keys %Exact)
+ {
+ my $File = $Exact{$Name};
+ printf OUT " %-41s => %s,\n", "'$Name'", "'$File'";
+ }
+ print OUT ");\n\n";
+
+ print OUT "## Mappings from regex to filename in ./$Type/\n";
+ print OUT "%utf8::${Type}Pat = (\n";
+ for my $Prefix (sort keys %Pat)
+ {
+ print OUT " '$Prefix' => {\n";
+ while (my ($Regex, $File) = each %{ $Pat{$Prefix} }) {
+ print OUT "\t'$Regex' => '$File',\n";
+ }
+ print OUT " },\n";
+ }
+ print OUT ");\n";
+
+ close(OUT);
}
-} else {
- die "$0: SpecCase.txt: $!\n";
}
-# Now write out the special cases properties in their code point order.
-# Prepend them to the To/{Upper,Lower,Title}.pl.
-
-for my $case (qw(Lower Title Upper)) {
- my $NormalCase = do "To/$case.pl" || die "$0: To/$case.pl: $!\n";
- if (open(my $Case, ">To/$case.pl")) {
- header($Case);
- print $Case <<EOT;
-
-%utf8::ToSpec$case = (
-EOT
- for my $prop (sort { $a->[0] <=> $b->[0] } @{$Case{$case}}) {
- my ($ix, $code, $to) = @$prop;
- my $tostr =
- join "", map { sprintf "\\x{%s}", $_ } split ' ', $to;
- printf $Case qq['%04X' => "$tostr",\n], $ix;
- }
- print $Case <<EOT;
-);
+sub SpecCase_txt()
+{
+ #
+ # Read in the special cases.
+ #
-EOT
- begin($Case);
- print $Case $NormalCase;
- end($Case);
- } else {
- die "$0: To/$case.txt: $!\n";
+ my %CaseInfo;
+
+ if (not open IN, "SpecCase.txt") {
+ die "$0: SpecCase.txt: $!\n";
+ }
+ while (<IN>) {
+ next unless /^[0-9A-Fa-f]+;/;
+ s/\#.*//;
+ s/\s+$//;
+
+ my ($code, $lower, $title, $upper, $condition) = split(/\s*;\s*/);
+
+ if ($condition) { # not implemented yet
+ print "# SKIPPING $_\n" if $Verbose;
+ next;
+ }
+
+ # Wait until all the special cases have been read since
+ # they are not listed in numeric order.
+ my $ix = hex($code);
+ push @{$CaseInfo{Lower}}, [ $ix, $code, $lower ];
+ push @{$CaseInfo{Title}}, [ $ix, $code, $title ];
+ push @{$CaseInfo{Upper}}, [ $ix, $code, $upper ];
+ }
+ close IN;
+
+ # Now write out the special cases properties in their code point order.
+ # Prepend them to the To/{Upper,Lower,Title}.pl.
+
+ for my $case (qw(Lower Title Upper))
+ {
+ my $NormalCase = do "To/$case.pl" || die "$0: $@\n";
+ if (not open OUT, ">To/$case.pl") {
+ die "$0: To/$case.txt: $!";
+ }
+
+ print OUT $HEADER, "\n";
+ print OUT "%utf8::ToSpec$case =\n(\n";
+
+ for my $prop (sort { $a->[0] <=> $b->[0] } @{$CaseInfo{$case}}) {
+ my ($ix, $code, $to) = @$prop;
+ my $tostr =
+ join "", map { sprintf "\\x{%s}", $_ } split ' ', $to;
+ printf OUT qq['%04X' => "$tostr",\n], $ix;
+ }
+ print OUT ");\n\n";
+ print OUT "return <<'END';\n";
+ print OUT $NormalCase;
+ print OUT "END\n";
+ close OUT;
}
}
#
# We will do full case folding, C + F + I (see CaseFold.txt).
#
+sub CaseFold_txt()
+{
+ if (not open IN, "CaseFold.txt") {
+ die "$0: To/Fold.pl: $!\n";
+ }
-if (open(my $CaseFold, "CaseFold.txt")) {
- my @Fold;
+ my $Fold = Table->New();
my %Fold;
- while (<$CaseFold>) {
+ while (<IN>) {
# Skip status 'S', simple case folding
next unless /^([0-9A-Fa-f]+)\s*;\s*([CFI])\s*;\s*([0-9A-Fa-f]+(?: [0-9A-Fa-f]+)*)\s*;/;
- my ($code, $status, $fold) = ($1, $2, $3);
+ my ($code, $status, $fold) = (hex($1), $2, $3);
if ($status eq 'C') { # Common: one-to-one folding
# No append() since several codes may fold into one.
- push @Fold, [ $code, $code, $fold ];
+ $Fold->RawAppendRange($code, $code, $fold);
} else { # F: full, or I: dotted uppercase I -> dotless lowercase I
- $Fold{hex($code)} = $fold;
+ $Fold{$code} = $fold;
}
}
+ close IN;
- flush(\@Fold, "To/Fold.pl");
+ $Fold->Write("To/Fold.pl");
#
# Prepend the special foldings to the common foldings.
#
my $CommonFold = do "To/Fold.pl" || die "$0: To/Fold.pl: $!\n";
- if (open(my $Fold, ">To/Fold.pl")) {
- header($Fold);
- print $Fold <<EOT;
-
-%utf8::ToSpecFold = (
-EOT
- for my $code (sort { $a <=> $b } keys %Fold) {
- my $foldstr =
- join "", map { sprintf "\\x{%s}", $_ } split ' ', $Fold{$code};
- printf $Fold qq['%04X' => "$foldstr",\n], $code;
- }
- print $Fold <<EOT;
-);
-
-EOT
- begin($Fold);
- print $Fold $CommonFold;
- end($Fold);
- } else {
- die "$0: To/Fold.pl: $!\n";
+ if (not open OUT, ">To/Fold.pl") {
+ die "$0: To/Fold.pl: $!\n";
+ }
+ print OUT $HEADER, "\n";
+ print OUT "%utf8::ToSpecFold =\n(\n";
+ for my $code (sort { $a <=> $b } keys %Fold) {
+ my $foldstr =
+ join "", map { sprintf "\\x{%s}", $_ } split ' ', $Fold{$code};
+ printf OUT qq['%04X' => "$foldstr",\n], $code;
}
-} else {
- die "$0: CaseFold.txt: $!\n";
+ print OUT ");\n\n";
+ print OUT "return <<'END';\n";
+ print OUT $CommonFold;
+ print OUT "END\n";
+ close OUT;
}
+## Do it....
+
+Unicode_Txt();
+Make_GC_Aliases();
+PropList_txt();
+
+Scripts_txt();
+Blocks_txt();
+
+LineBrk_Txt();
+ArabShap_txt();
+Jamo_txt();
+SpecCase_txt();
+
+WriteAllMappings();
+
+CaseFold_txt();
+
# That's all, folks!
+__END__
with external libraries or existing data. G_FLOAT is still available as
a configuration option. The default on VAX (D_FLOAT) has not changed.
-=head2 Different Definition of the Unicode Character Classes \p{In...}
-
-As suggested by the Unicode consortium, the Unicode character classes
-now prefer I<scripts> as opposed to I<blocks> (as defined by Unicode);
-in Perl, when the C<\p{In....}> and the C<\p{In....}> regular expression
-constructs are used. This has changed the definition of some of those
-character classes.
-
-The difference between scripts and blocks is that scripts are the
-glyphs used by a language or a group of languages, while the blocks
-are more artificial groupings of 256 characters based on the Unicode
-numbering.
-
-In general this change results in more inclusive Unicode character
-classes, but changes to the other direction also do take place:
-for example while the script C<Latin> includes all the Latin
-characters and their various diacritic-adorned versions, it
-does not include the various punctuation or digits (since they
-are not solely C<Latin>).
-
-Changes in the character class semantics may have happened if a script
-and a block happen to have the same name, for example C<Hebrew>.
-In such cases the script wins and C<\p{InHebrew}> now means the script
-definition of Hebrew. The block definition in still available,
-though, by appending C<Block> to the name: C<\p{InHebrewBlock}> means
-what C<\p{InHebrew}> meant in perl 5.6.0. For the full list
-of affected character classes, see L<perlunicode/Blocks>.
+=head2 New Unicode Properties
+
+Unicode I<scripts> are now supported. Scripts are similar to (and superior
+to) Unicode I<blocks>. The difference between scripts and blocks is that
+scripts are the glyphs used by a language or a group of languages, while
+the blocks are more artificial groupings of (mostly) 256 characters based
+on the Unicode numbering.
+
+In general, scripts are more inclusive, but not universally so. For
+example, while the script C<Latin> includes all the Latin characters and
+their various diacritic-adorned versions, it does not include the various
+punctuation or digits (since they are not solely C<Latin>).
+
+A number of other properties are now supported, including C<\p{L&}>,
+C<\p{Any}> C<\p{Assigned}>, C<\p{Unassigned}>, C<\p{Blank}> and
+C<\p{SpacePerl}> (along with their C<\P{...}> versions, of course).
+See L<perlunicode> for details, and more additions.
+
+The C<In> or C<Is> prefix to names used with the C<\p{...}> and C<\P{...}>
+are now almost always optional. The only exception is that a C<In> prefix
+is required to signify a Unicode block when a block name conflicts with a
+script name. For example, C<\p{Tibetan}> refers to the script, while
+C<\p{InTibetan}> refers to the block. When there is no name conflict, you
+can omit the C<In> from the block name (e.g. C<\p{BraillePatterns}>), but
+to be safe, it's probably best to always use the C<In>).
=head2 Perl Parser Stress Tested
=item *
-The Unicode character classes \p{Blank} and \p{SpacePerl} have been
-added. "Blank" is like C isblank(), that is, it contains only
-"horizontal whitespace" (the space character is, the newline isn't),
-and the "SpacePerl" is the Unicode equivalent of C<\s> (\p{Space}
-isn't, since that includes the vertical tabulator character, whereas
-C<\s> doesn't.)
+The properties \p{Blank} and \p{SpacePerl} have been added. "Blank" is like
+C isblank(), that is, it contains only "horizontal whitespace" (the space
+character is, the newline isn't), and the "SpacePerl" is the Unicode
+equivalent of C<\s> (\p{Space} isn't, since that includes the vertical
+tabulator character, whereas C<\s> doesn't.)
+
+See "New Unicode Properties" earlier in this document for additional
+information on changes with Unicode properties.
=back
on your favorite CPAN mirror for a slew of potentially useful
modules.
-If using crypt() on a Unicode string (which potentially has
-characters with codepoints above 255), Perl tries to make sense of
-the situation by using only the low eight bits of the characters when
-calling crypt().
+If using crypt() on a Unicode string (which I<potentially> has
+characters with codepoints above 255), Perl tries to make sense
+of the situation by trying to downgrade (a copy of the string)
+the string back to an eight-bit byte string before calling crypt()
+(on that copy). If that works, good. If not, crypt() dies with
+C<Wide character in crypt>.
=item dbmclose HASH
And, as you'll have noticed from the previous example, if you override
C<glob>, the C<E<lt>*E<gt>> glob operator is overridden as well.
+In a similar fashion, overriding the C<readline> function also overrides
+the equivalent I/O operator C<< <FILEHANDLE> >>.
+
Finally, some built-ins (e.g. C<exists> or C<grep>) can't be overridden.
=head2 Autoloading
All the code we ship with Perl needs to be sensible about temporary file
handling, locking, input validation, and so on.
+=head2 Sort out the uid-setting mess
+
+Currently there are several problems with the setting of uids ($<, $>
+for the real and effective uids). Firstly, what exactly setuid() call
+gets invoked in which platform is simply a big mess that needs to be
+untangled. Secondly, the effects are apparently not standard across
+platforms, (if you first set $< and then $>, or vice versa, being
+uid==euid== zero, or just euid==zero, or as a normal user, what are
+the results?). The test suite not (usually) being run as root means
+that these things do not get much testing. Thirdly, there's quite
+often a third uid called saved uid, and Perl has no knowledge of that
+feature in any way. (If one has the saved uid of zero, one can get
+back any real and effective uids.) As an example, to change also the
+saved uid, one needs to set the real and effective uids B<twice>-- in
+most systems, that is: in HP-UX that doesn't seem to work.
+
=head2 Custom opcodes
Have a way to introduce user-defined opcodes without the subroutine call
=item *
-Named Unicode properties and block ranges may be used as character
-classes via the new C<\p{}> (matches property) and C<\P{}> (doesn't
-match property) constructs. For instance, C<\p{Lu}> matches any
+Named Unicode properties, scripts, and block ranges may be used like
+character classes via the new C<\p{}> (matches property) and C<\P{}>
+(doesn't match property) constructs. For instance, C<\p{Lu}> matches any
character with the Unicode "Lu" (Letter, uppercase) property, while
C<\p{M}> matches any character with a "M" (mark -- accents and such)
-property. Single letter properties may omit the brackets, so that can
-be written C<\pM> also. Many predefined character classes are
-available, such as C<\p{IsMirrored}> and C<\p{InTibetan}>.
-
-The C<\p{Is...}> test for "general properties" such as "letter",
-"digit", while the C<\p{In...}> test for Unicode scripts and blocks.
+property. Single letter properties may omit the brackets, so that can be
+written C<\pM> also. Many predefined properties are available, such
+as C<\p{Mirrored}> and C<\p{Tibetan}>.
The official Unicode script and block names have spaces and dashes as
-separators, but for convenience you can have dashes, spaces, and
-underbars at every word division, and you need not care about correct
-casing. It is recommended, however, that for consistency you use the
-following naming: the official Unicode script, block, or property name
-(see below for the additional rules that apply to block names), with
-whitespace and dashes replaced with underbar, and the words
-"uppercase-first-lowercase-rest". That is, "Latin-1 Supplement"
-becomes "Latin_1_Supplement".
+separators, but for convenience you can have dashes, spaces, and underbars
+at every word division, and you need not care about correct casing. It is
+recommended, however, that for consistency you use the following naming:
+the official Unicode script, block, or property name (see below for the
+additional rules that apply to block names), with whitespace and dashes
+removed, and the words "uppercase-first-lowercase-rest". That is, "Latin-1
+Supplement" becomes "Latin1Supplement".
You can also negate both C<\p{}> and C<\P{}> by introducing a caret
-(^) between the first curly and the property name: C<\p{^In_Tamil}> is
-equal to C<\P{In_Tamil}>.
+(^) between the first curly and the property name: C<\p{^Tamil}> is
+equal to C<\P{Tamil}>.
-The C<In> and C<Is> can be left out: C<\p{Greek}> is equal to
-C<\p{In_Greek}>, C<\P{Pd}> is equal to C<\P{Pd}>.
+Here are the basic Unicode General Category properties, followed by their
+long form (you can use either, e.g. C<\p{Lu}> and C<\p{LowercaseLetter}>
+are identical).
Short Long
L Letter
- Lu Uppercase_Letter
- Ll Lowercase_Letter
- Lt Titlecase_Letter
- Lm Modifier_Letter
- Lo Other_Letter
+ Lu UppercaseLetter
+ Ll LowercaseLetter
+ Lt TitlecaseLetter
+ Lm ModifierLetter
+ Lo OtherLetter
M Mark
- Mn Nonspacing_Mark
- Mc Spacing_Mark
- Me Enclosing_Mark
+ Mn NonspacingMark
+ Mc SpacingMark
+ Me EnclosingMark
N Number
- Nd Decimal_Number
- Nl Letter_Number
- No Other_Number
+ Nd DecimalNumber
+ Nl LetterNumber
+ No OtherNumber
P Punctuation
- Pc Connector_Punctuation
- Pd Dash_Punctuation
- Ps Open_Punctuation
- Pe Close_Punctuation
- Pi Initial_Punctuation
+ Pc ConnectorPunctuation
+ Pd DashPunctuation
+ Ps OpenPunctuation
+ Pe ClosePunctuation
+ Pi InitialPunctuation
(may behave like Ps or Pe depending on usage)
- Pf Final_Punctuation
+ Pf FinalPunctuation
(may behave like Ps or Pe depending on usage)
- Po Other_Punctuation
+ Po OtherPunctuation
S Symbol
- Sm Math_Symbol
- Sc Currency_Symbol
- Sk Modifier_Symbol
- So Other_Symbol
+ Sm MathSymbol
+ Sc CurrencySymbol
+ Sk ModifierSymbol
+ So OtherSymbol
Z Separator
- Zs Space_Separator
- Zl Line_Separator
- Zp Paragraph_Separator
+ Zs SpaceSeparator
+ Zl LineSeparator
+ Zp ParagraphSeparator
C Other
Cc Control
Cf Format
- Cs Surrogate
- Co Private_Use
+ Cs Surrogate (not usable)
+ Co PrivateUse
Cn Unassigned
The single-letter properties match all characters in any of the
two-letter sub-properties starting with the same letter.
There's also C<L&> which is an alias for C<Ll>, C<Lu>, and C<Lt>.
-The following reserved ranges have C<In> tests:
-
- CJK_Ideograph_Extension_A
- CJK_Ideograph
- Hangul_Syllable
- Non_Private_Use_High_Surrogate
- Private_Use_High_Surrogate
- Low_Surrogate
- Private_Surrogate
- CJK_Ideograph_Extension_B
- Plane_15_Private_Use
- Plane_16_Private_Use
+Because Perl hides the need for the user to understand the internal
+representation of Unicode characters, it has no need to support the
+somewhat messy concept of surrogates. Therefore, the C<Cs> property is not
+supported.
-For example C<"\x{AC00}" =~ \p{HangulSyllable}> will test true.
-(Handling of surrogates is not implemented yet, because Perl
-uses UTF-8 and not UTF-16 internally to represent Unicode.
-So you really can't use the "Cs" category.)
+Because scripts differ in their directionality (for example Hebrew is
+written right to left), Unicode supplies these properties:
-Additionally, because scripts differ in their directionality
-(for example Hebrew is written right to left), all characters
-have their directionality defined:
+ Property Meaning
BidiL Left-to-Right
BidiLRE Left-to-Right Embedding
BidiWS Whitespace
BidiON Other Neutrals
+For example, C<\p{BidiR}> matches all characters that are normally
+written right to left.
+
=back
=head2 Scripts
-The scripts available for C<\p{In...}> and C<\P{In...}>, for example
-C<\p{InLatin}> or \p{InCyrillic>, are as follows:
+The scripts available via C<\p{...}> and C<\P{...}>, for example
+C<\p{Latin}> or \p{Cyrillic>, are as follows:
Arabic
Armenian
Bengali
Bopomofo
- Canadian-Aboriginal
+ CanadianAboriginal
Cherokee
Cyrillic
Deseret
Mongolian
Myanmar
Ogham
- Old-Italic
+ OldItalic
Oriya
Runic
Sinhala
properties, defined by the F<PropList> Unicode database:
ASCII_Hex_Digit
- Bidi_Control
+ BidiControl
Dash
Diacritic
Extender
- Hex_Digit
+ HexDigit
Hyphen
Ideographic
- Join_Control
- Noncharacter_Code_Point
- Other_Alphabetic
- Other_Lowercase
- Other_Math
- Other_Uppercase
- Quotation_Mark
- White_Space
+ JoinControl
+ NoncharacterCodePoint
+ OtherAlphabetic
+ OtherLowercase
+ OtherMath
+ OtherUppercase
+ QuotationMark
+ WhiteSpace
and further derived properties:
- Alphabetic Lu + Ll + Lt + Lm + Lo + Other_Alphabetic
- Lowercase Ll + Other_Lowercase
- Uppercase Lu + Other_Uppercase
- Math Sm + Other_Math
+ Alphabetic Lu + Ll + Lt + Lm + Lo + OtherAlphabetic
+ Lowercase Ll + OtherLowercase
+ Uppercase Lu + OtherUppercase
+ Math Sm + OtherMath
ID_Start Lu + Ll + Lt + Lm + Lo + Nl
ID_Continue ID_Start + Mn + Mc + Nd + Pc
Any Any character
- Assigned Any non-Cn character
+ Assigned Any non-Cn character (i.e. synonym for C<\P{Cn}>)
+ Unassigned Synonym for C<\p{Cn}>
Common Any character (or unassigned code point)
not explicitly assigned to a script
+For backward compatability, all properties mentioned so far may have C<Is>
+prepended to their name (e.g. C<\P{IsLu}> is equal to C<\P{Lu}>).
+
=head2 Blocks
-In addition to B<scripts>, Unicode also defines B<blocks> of
-characters. The difference between scripts and blocks is that the
-scripts concept is closer to natural languages, while the blocks
-concept is more an artificial grouping based on groups of 256 Unicode
-characters. For example, the C<Latin> script contains letters from
-many blocks. On the other hand, the C<Latin> script does not contain
-all the characters from those blocks. It does not, for example,
-contain digits because digits are shared across many scripts. Digits
-and other similar groups, like punctuation, are in a category called
-C<Common>.
+In addition to B<scripts>, Unicode also defines B<blocks> of characters.
+The difference between scripts and blocks is that the scripts concept is
+closer to natural languages, while the blocks concept is more an artificial
+grouping based on groups of mostly 256 Unicode characters. For example, the
+C<Latin> script contains letters from many blocks. On the other hand, the
+C<Latin> script does not contain all the characters from those blocks. It
+does not, for example, contain digits because digits are shared across many
+scripts. Digits and other similar groups, like punctuation, are in a
+category called C<Common>.
For more about scripts, see the UTR #24:
http://www.unicode.org/Public/UNIDATA/Blocks.txt
-Because there are overlaps in naming (there are, for example, both
-a script called C<Katakana> and a block called C<Katakana>, the block
-version has C<Block> appended to its name, C<\p{InKatakanaBlock}>.
-
-Notice that this definition was introduced in Perl 5.8.0: in Perl
-5.6 only the blocks were used; in Perl 5.8.0 scripts became the
-preferential Unicode character class definition (prompted by
-recommendations from the Unicode consortium); this meant that
-the definitions of some character classes changed (the ones in
-the below list that have the C<Block> appended).
-
- Alphabetic Presentation Forms
- Arabic Block
- Arabic Presentation Forms-A
- Arabic Presentation Forms-B
- Armenian Block
- Arrows
- Basic Latin
- Bengali Block
- Block Elements
- Bopomofo Block
- Bopomofo Extended
- Box Drawing
- Braille Patterns
- Byzantine Musical Symbols
- CJK Compatibility
- CJK Compatibility Forms
- CJK Compatibility Ideographs
- CJK Compatibility Ideographs Supplement
- CJK Radicals Supplement
- CJK Symbols and Punctuation
- CJK Unified Ideographs
- CJK Unified Ideographs Extension A
- CJK Unified Ideographs Extension B
- Cherokee Block
- Combining Diacritical Marks
- Combining Half Marks
- Combining Marks for Symbols
- Control Pictures
- Currency Symbols
- Cyrillic Block
- Deseret Block
- Devanagari Block
- Dingbats
- Enclosed Alphanumerics
- Enclosed CJK Letters and Months
- Ethiopic Block
- General Punctuation
- Geometric Shapes
- Georgian Block
- Gothic Block
- Greek Block
- Greek Extended
- Gujarati Block
- Gurmukhi Block
- Halfwidth and Fullwidth Forms
- Hangul Compatibility Jamo
- Hangul Jamo
- Hangul Syllables
- Hebrew Block
- High Private Use Surrogates
- High Surrogates
- Hiragana Block
- IPA Extensions
- Ideographic Description Characters
- Kanbun
- Kangxi Radicals
- Kannada Block
- Katakana Block
- Khmer Block
- Lao Block
- Latin 1 Supplement
- Latin Extended Additional
- Latin Extended-A
- Latin Extended-B
- Letterlike Symbols
- Low Surrogates
- Malayalam Block
- Mathematical Alphanumeric Symbols
- Mathematical Operators
- Miscellaneous Symbols
- Miscellaneous Technical
- Mongolian Block
- Musical Symbols
- Myanmar Block
- Number Forms
- Ogham Block
- Old Italic Block
- Optical Character Recognition
- Oriya Block
- Private Use
- Runic Block
- Sinhala Block
- Small Form Variants
- Spacing Modifier Letters
- Specials
- Superscripts and Subscripts
- Syriac Block
- Tags
- Tamil Block
- Telugu Block
- Thaana Block
- Thai Block
- Tibetan Block
- Unified Canadian Aboriginal Syllabics
- Yi Radicals
- Yi Syllables
+Blocks names are given with the C<In> prefix. For example, the
+Katakana block is referenced via C<\p{InKatakana}>. The C<In>
+prefix may be omitted if there is no nameing conflict with a script
+or any other property, but it is recommended that C<In> always be used
+to avoid confusion.
+
+These block names are supported:
+
+ InAlphabeticPresentationForms
+ InArabicBlock
+ InArabicPresentationFormsA
+ InArabicPresentationFormsB
+ InArmenianBlock
+ InArrows
+ InBasicLatin
+ InBengaliBlock
+ InBlockElements
+ InBopomofoBlock
+ InBopomofoExtended
+ InBoxDrawing
+ InBraillePatterns
+ InByzantineMusicalSymbols
+ InCJKCompatibility
+ InCJKCompatibilityForms
+ InCJKCompatibilityIdeographs
+ InCJKCompatibilityIdeographsSupplement
+ InCJKRadicalsSupplement
+ InCJKSymbolsAndPunctuation
+ InCJKUnifiedIdeographs
+ InCJKUnifiedIdeographsExtensionA
+ InCJKUnifiedIdeographsExtensionB
+ InCherokeeBlock
+ InCombiningDiacriticalMarks
+ InCombiningHalfMarks
+ InCombiningMarksForSymbols
+ InControlPictures
+ InCurrencySymbols
+ InCyrillicBlock
+ InDeseretBlock
+ InDevanagariBlock
+ InDingbats
+ InEnclosedAlphanumerics
+ InEnclosedCJKLettersAndMonths
+ InEthiopicBlock
+ InGeneralPunctuation
+ InGeometricShapes
+ InGeorgianBlock
+ InGothicBlock
+ InGreekBlock
+ InGreekExtended
+ InGujaratiBlock
+ InGurmukhiBlock
+ InHalfwidthAndFullwidthForms
+ InHangulCompatibilityJamo
+ InHangulJamo
+ InHangulSyllables
+ InHebrewBlock
+ InHighPrivateUseSurrogates
+ InHighSurrogates
+ InHiraganaBlock
+ InIPAExtensions
+ InIdeographicDescriptionCharacters
+ InKanbun
+ InKangxiRadicals
+ InKannadaBlock
+ InKatakanaBlock
+ InKhmerBlock
+ InLaoBlock
+ InLatin1Supplement
+ InLatinExtendedAdditional
+ InLatinExtended-A
+ InLatinExtended-B
+ InLetterlikeSymbols
+ InLowSurrogates
+ InMalayalamBlock
+ InMathematicalAlphanumericSymbols
+ InMathematicalOperators
+ InMiscellaneousSymbols
+ InMiscellaneousTechnical
+ InMongolianBlock
+ InMusicalSymbols
+ InMyanmarBlock
+ InNumberForms
+ InOghamBlock
+ InOldItalicBlock
+ InOpticalCharacterRecognition
+ InOriyaBlock
+ InPrivateUse
+ InRunicBlock
+ InSinhalaBlock
+ InSmallFormVariants
+ InSpacingModifierLetters
+ InSpecials
+ InSuperscriptsAndSubscripts
+ InSyriacBlock
+ InTags
+ InTamilBlock
+ InTeluguBlock
+ InThaanaBlock
+ InThaiBlock
+ InTibetanBlock
+ InUnifiedCanadianAboriginalSyllabics
+ InYiRadicals
+ InYiSyllables
=over 4
[ 1] \x{...}
[ 2] \N{...}
- [ 3] . \p{Is...} \P{Is...}
+ [ 3] . \p{...} \P{...}
[ 4] now scripts (see UTR#24 Script Names) in addition to blocks
[ 5] have negation
[ 6] can use look-ahead to emulate subtraction (*)
in Perl can be written as:
- (?!\p{UNASSIGNED})\p{GreekBlock}
- (?=\p{ASSIGNED})\p{GreekBlock}
+ (?!\p{Unassigned})\p{InGreek}
+ (?=\p{Assigned})\p{InGreek}
But in this particular example, you probably really want
}
unless (@ARGV) {
- foreach my $dir (qw(base comp cmd run io op)) {
+ foreach my $dir (qw(base comp cmd run io op uni)) {
_find_tests($dir);
}
_find_tests("lib") unless $core;
push @tests, <run/*.t>;
push @tests, <io/*.t>;
push @tests, <op/*.t>;
+ push @tests, <uni/*.t>;
push @tests, <lib/*.t>;
use File::Spec;
my $updir = File::Spec->updir;
skip_all("crypt unimplemented");
}
else {
- plan(tests => 2);
+ plan(tests => 4);
}
}
ok(substr(crypt("ab", "cd"), 2) ne substr(crypt("ab", "ce"), 2), "salt makes a difference");
-ok(crypt("HI", "HO") eq crypt(join("",map{chr($_+256)}unpack"C*","HI"), "HO"), "low eight bits of Unicode");
+$a = "a\xFF\x{100}";
+
+eval {$b = crypt($a, "cd")};
+like($@, qr/Wide character in crypt/, "wide characters ungood");
+
+chop $a; # throw away the wide character
+
+eval {$b = crypt($a, "cd")};
+is($@, '', "downgrade to eight bit characters");
+is($b, crypt("a\xFF", "cd"), "downgrade results agree");
+
push @INC, '../lib';
}
-print "1..11\n";
+print "1..17\n";
#
# This file tries to test builtin override using CORE::GLOBAL
print "not " if $r or $@ !~ /^Can't locate NoNeXiSt/i;
print "ok 11\n";
}
+
+#
+# readline() has special behaviour too
+#
+
+$r = 11;
+BEGIN { *CORE::GLOBAL::readline = sub (;*) { ++$r }; }
+print <FH> == 12 ? "ok 12\n" : "not ok 12\n";
+print <$fh> == 13 ? "ok 13\n" : "not ok 13\n";
+my $pad_fh;
+print <$pad_fh> == 14 ? "ok 14\n" : "not ok 14\n";
+
+# Non-global readline() override
+BEGIN { *Rgs::readline = sub (;*) { --$r }; }
+package Rgs;
+print <FH> == 13 ? "ok 15\n" : "not ok 15\n";
+print <$fh> == 12 ? "ok 16\n" : "not ok 16\n";
+print <$pad_fh> == 11 ? "ok 17\n" : "not ok 17\n";
# Test the Unicode script classes
-print "not " unless chr(0x100) =~ /\p{InLatin}/; # outside Latin-1
+print "not " unless chr(0x100) =~ /\p{IsLatin}/; # outside Latin-1
print "ok 661\n";
-print "not " unless chr(0x212b) =~ /\p{InLatin}/; # Angstrom sign, very outside
+print "not " unless chr(0x212b) =~ /\p{IsLatin}/; # Angstrom sign, very outside
print "ok 662\n";
-print "not " unless chr(0x5d0) =~ /\p{InHebrew}/; # inside HebrewBlock
+print "not " unless chr(0x5d0) =~ /\p{IsHebrew}/; # inside InHebrew
print "ok 663\n";
-print "not " unless chr(0xfb4f) =~ /\p{InHebrew}/; # outside HebrewBlock
+print "not " unless chr(0xfb4f) =~ /\p{IsHebrew}/; # outside InHebrew
print "ok 664\n";
-print "not " unless chr(0xb5) =~ /\p{InGreek}/; # singleton (not in a range)
+print "not " unless chr(0xb5) =~ /\p{IsGreek}/; # singleton (not in a range)
print "ok 665\n";
-print "not " unless chr(0x37a) =~ /\p{InGreek}/; # singleton
+print "not " unless chr(0x37a) =~ /\p{IsGreek}/; # singleton
print "ok 666\n";
-print "not " unless chr(0x386) =~ /\p{InGreek}/; # singleton
+print "not " unless chr(0x386) =~ /\p{IsGreek}/; # singleton
print "ok 667\n";
-print "not " unless chr(0x387) =~ /\P{InGreek}/; # not there
+print "not " unless chr(0x387) =~ /\P{IsGreek}/; # not there
print "ok 668\n";
-print "not " unless chr(0x388) =~ /\p{InGreek}/; # range
+print "not " unless chr(0x388) =~ /\p{IsGreek}/; # range
print "ok 669\n";
-print "not " unless chr(0x38a) =~ /\p{InGreek}/; # range
+print "not " unless chr(0x38a) =~ /\p{IsGreek}/; # range
print "ok 670\n";
-print "not " unless chr(0x38b) =~ /\P{InGreek}/; # not there
+print "not " unless chr(0x38b) =~ /\P{IsGreek}/; # not there
print "ok 671\n";
-print "not " unless chr(0x38c) =~ /\p{InGreek}/; # singleton
+print "not " unless chr(0x38c) =~ /\p{IsGreek}/; # singleton
print "ok 672\n";
##
}
{
- print "not " unless "a" =~ /\p{LowercaseLetter}/;
+ print "not " unless "a" =~ /\p{Lowercase}/;
print "ok 745\n";
- print "not " if "A" =~ /\p{
- Lowercase
- Letter
- }/x;
+ print "not " if "A" =~ /\p{Lowercase}/;
print "ok 746\n";
}
{
- print "not " unless "\x{AC00}" =~ /\p{HangulSyllable}/;
+ print "not " unless "\x{AC00}" =~ /\p{HangulSyllables}/;
print "ok 747\n";
}
'^(o)(?!.*\1)'i Oo n - -
(.*)\d+\1 abc12bc y $1 bc
(?m:(foo\s*$)) foo\n bar y $1 foo
+(.*)c abcd y $1 ab
+(.*)(?=c) abcd y $1 ab
+(.*)(?=c)c abcd yB $1 ab
+(.*)(?=b|c) abcd y $1 ab
+(.*)(?=b|c)c abcd y $1 ab
+(.*)(?=c|b) abcd y $1 ab
+(.*)(?=c|b)c abcd y $1 ab
+(.*)(?=[bc]) abcd y $1 ab
+(.*)(?=[bc])c abcd yB $1 ab
+(.*)(?<=b) abcd y $1 ab
+(.*)(?<=b)c abcd y $1 ab
+(.*)(?<=b|c) abcd y $1 abc
+(.*)(?<=b|c)c abcd y $1 ab
+(.*)(?<=c|b) abcd y $1 abc
+(.*)(?<=c|b)c abcd y $1 ab
+(.*)(?<=[bc]) abcd y $1 abc
+(.*)(?<=[bc])c abcd y $1 ab
+(.*?)c abcd y $1 ab
+(.*?)(?=c) abcd y $1 ab
+(.*?)(?=c)c abcd yB $1 ab
+(.*?)(?=b|c) abcd y $1 a
+(.*?)(?=b|c)c abcd y $1 ab
+(.*?)(?=c|b) abcd y $1 a
+(.*?)(?=c|b)c abcd y $1 ab
+(.*?)(?=[bc]) abcd y $1 a
+(.*?)(?=[bc])c abcd yB $1 ab
+(.*?)(?<=b) abcd y $1 ab
+(.*?)(?<=b)c abcd y $1 ab
+(.*?)(?<=b|c) abcd y $1 ab
+(.*?)(?<=b|c)c abcd y $1 ab
+(.*?)(?<=c|b) abcd y $1 ab
+(.*?)(?<=c|b)c abcd y $1 ab
+(.*?)(?<=[bc]) abcd y $1 ab
+(.*?)(?<=[bc])c abcd y $1 ab
my $v = sprintf("%d.%.3d%.3d",$revision,$version,$subversion);
-ok( $v eq "$]", "\$^V eq \$] (string)");
+ok( $v eq "$]", qq{"\$^V eq "\$]"});
$v = $revision + $version/1000 + $subversion/1000000;