Jeffrey's Unicode adventure continues: unify the In/*.pl
Jarkko Hietaniemi [Wed, 16 Jan 2002 05:37:29 +0000 (05:37 +0000)]
and Is/*.pl to lib/*.pl, remove In.pl and Is.pl, introduce
Canonical.pl and Exact.pl.

p4raw-id: //depot/perl@14294

300 files changed:
MANIFEST
lib/unicore/Canonical.pl [new file with mode: 0644]
lib/unicore/Exact.pl [new file with mode: 0644]
lib/unicore/In.pl [deleted file]
lib/unicore/Is.pl [deleted file]
lib/unicore/Is/LbrkAI.pl [deleted file]
lib/unicore/Is/LbrkAL.pl [deleted file]
lib/unicore/Is/LbrkB2.pl [deleted file]
lib/unicore/Is/LbrkBA.pl [deleted file]
lib/unicore/Is/LbrkBB.pl [deleted file]
lib/unicore/Is/LbrkBK.pl [deleted file]
lib/unicore/Is/LbrkCB.pl [deleted file]
lib/unicore/Is/LbrkCL.pl [deleted file]
lib/unicore/Is/LbrkCM.pl [deleted file]
lib/unicore/Is/LbrkCR.pl [deleted file]
lib/unicore/Is/LbrkEX.pl [deleted file]
lib/unicore/Is/LbrkGL.pl [deleted file]
lib/unicore/Is/LbrkHY.pl [deleted file]
lib/unicore/Is/LbrkID.pl [deleted file]
lib/unicore/Is/LbrkIN.pl [deleted file]
lib/unicore/Is/LbrkIS.pl [deleted file]
lib/unicore/Is/LbrkLF.pl [deleted file]
lib/unicore/Is/LbrkNS.pl [deleted file]
lib/unicore/Is/LbrkNU.pl [deleted file]
lib/unicore/Is/LbrkOP.pl [deleted file]
lib/unicore/Is/LbrkPO.pl [deleted file]
lib/unicore/Is/LbrkPR.pl [deleted file]
lib/unicore/Is/LbrkQU.pl [deleted file]
lib/unicore/Is/LbrkSA.pl [deleted file]
lib/unicore/Is/LbrkSG.pl [deleted file]
lib/unicore/Is/LbrkSP.pl [deleted file]
lib/unicore/Is/LbrkSY.pl [deleted file]
lib/unicore/Is/LbrkXX.pl [deleted file]
lib/unicore/Is/LbrkZW.pl [deleted file]
lib/unicore/Makefile
lib/unicore/Properties
lib/unicore/To/Digit.pl
lib/unicore/To/Fold.pl
lib/unicore/To/Lower.pl
lib/unicore/To/Title.pl
lib/unicore/To/Upper.pl
lib/unicore/lib/ASCII.pl [moved from lib/unicore/Is/ASCII.pl with 100% similarity]
lib/unicore/lib/Alnum.pl [moved from lib/unicore/Is/Alnum.pl with 100% similarity]
lib/unicore/lib/Alpha.pl [moved from lib/unicore/Is/Alpha.pl with 100% similarity]
lib/unicore/lib/Alphabet.pl [moved from lib/unicore/Is/Alphabet.pl with 100% similarity]
lib/unicore/lib/Any.pl [moved from lib/unicore/Is/Any.pl with 76% similarity]
lib/unicore/lib/Arabic.pl [moved from lib/unicore/Is/Arabic.pl with 100% similarity]
lib/unicore/lib/Armenian.pl [moved from lib/unicore/Is/Armenian.pl with 100% similarity]
lib/unicore/lib/AsciiHex.pl [moved from lib/unicore/Is/AsciiHex.pl with 100% similarity]
lib/unicore/lib/Assigned.pl [moved from lib/unicore/Is/Assigned.pl with 99% similarity]
lib/unicore/lib/Bengali.pl [moved from lib/unicore/Is/Bengali.pl with 100% similarity]
lib/unicore/lib/BidiAL.pl [moved from lib/unicore/Is/BidiAL.pl with 100% similarity]
lib/unicore/lib/BidiAN.pl [moved from lib/unicore/Is/BidiAN.pl with 100% similarity]
lib/unicore/lib/BidiB.pl [moved from lib/unicore/Is/BidiB.pl with 100% similarity]
lib/unicore/lib/BidiBN.pl [moved from lib/unicore/Is/BidiBN.pl with 100% similarity]
lib/unicore/lib/BidiCS.pl [moved from lib/unicore/Is/BidiCS.pl with 100% similarity]
lib/unicore/lib/BidiCont.pl [moved from lib/unicore/Is/BidiCont.pl with 100% similarity]
lib/unicore/lib/BidiEN.pl [moved from lib/unicore/Is/BidiEN.pl with 100% similarity]
lib/unicore/lib/BidiES.pl [moved from lib/unicore/Is/BidiES.pl with 100% similarity]
lib/unicore/lib/BidiET.pl [moved from lib/unicore/Is/BidiET.pl with 100% similarity]
lib/unicore/lib/BidiL.pl [moved from lib/unicore/Is/BidiL.pl with 100% similarity]
lib/unicore/lib/BidiLRE.pl [moved from lib/unicore/Is/BidiLRE.pl with 100% similarity]
lib/unicore/lib/BidiLRO.pl [moved from lib/unicore/Is/BidiLRO.pl with 100% similarity]
lib/unicore/lib/BidiNSM.pl [moved from lib/unicore/Is/BidiNSM.pl with 100% similarity]
lib/unicore/lib/BidiON.pl [moved from lib/unicore/Is/BidiON.pl with 100% similarity]
lib/unicore/lib/BidiPDF.pl [moved from lib/unicore/Is/BidiPDF.pl with 100% similarity]
lib/unicore/lib/BidiR.pl [moved from lib/unicore/Is/BidiR.pl with 100% similarity]
lib/unicore/lib/BidiRLE.pl [moved from lib/unicore/Is/BidiRLE.pl with 100% similarity]
lib/unicore/lib/BidiRLO.pl [moved from lib/unicore/Is/BidiRLO.pl with 100% similarity]
lib/unicore/lib/BidiS.pl [moved from lib/unicore/Is/BidiS.pl with 100% similarity]
lib/unicore/lib/BidiWS.pl [moved from lib/unicore/Is/BidiWS.pl with 100% similarity]
lib/unicore/lib/Blank.pl [moved from lib/unicore/Is/Blank.pl with 100% similarity]
lib/unicore/lib/Bopomofo.pl [moved from lib/unicore/Is/Bopomofo.pl with 100% similarity]
lib/unicore/lib/C.pl [moved from lib/unicore/Is/C.pl with 99% similarity]
lib/unicore/lib/Canadian.pl [moved from lib/unicore/Is/Canadian.pl with 100% similarity]
lib/unicore/lib/Canon.pl [moved from lib/unicore/Is/Canon.pl with 100% similarity]
lib/unicore/lib/Cc.pl [moved from lib/unicore/Is/Cc.pl with 86% similarity]
lib/unicore/lib/Cf.pl [moved from lib/unicore/Is/Cf.pl with 89% similarity]
lib/unicore/lib/Cherokee.pl [moved from lib/unicore/Is/Cherokee.pl with 100% similarity]
lib/unicore/lib/Cn.pl [moved from lib/unicore/Is/Cn.pl with 98% similarity]
lib/unicore/lib/Cntrl.pl [moved from lib/unicore/Is/Cntrl.pl with 100% similarity]
lib/unicore/lib/Co.pl [moved from lib/unicore/Is/Co.pl with 86% similarity]
lib/unicore/lib/Common.pl [moved from lib/unicore/Is/Common.pl with 100% similarity]
lib/unicore/lib/Compat.pl [moved from lib/unicore/Is/Compat.pl with 100% similarity]
lib/unicore/lib/Cs.pl [moved from lib/unicore/Is/Cs.pl with 85% similarity]
lib/unicore/lib/Cyrillic.pl [moved from lib/unicore/Is/Cyrillic.pl with 100% similarity]
lib/unicore/lib/DCcircle.pl [moved from lib/unicore/Is/DCcircle.pl with 100% similarity]
lib/unicore/lib/DCcompat.pl [moved from lib/unicore/Is/DCcompat.pl with 100% similarity]
lib/unicore/lib/DCfinal.pl [moved from lib/unicore/Is/DCfinal.pl with 100% similarity]
lib/unicore/lib/DCfont.pl [moved from lib/unicore/Is/DCfont.pl with 100% similarity]
lib/unicore/lib/DCfracti.pl [moved from lib/unicore/Is/DCfracti.pl with 100% similarity]
lib/unicore/lib/DCinitia.pl [moved from lib/unicore/Is/DCinitia.pl with 100% similarity]
lib/unicore/lib/DCisolat.pl [moved from lib/unicore/Is/DCisolat.pl with 100% similarity]
lib/unicore/lib/DCmedial.pl [moved from lib/unicore/Is/DCmedial.pl with 100% similarity]
lib/unicore/lib/DCnarrow.pl [moved from lib/unicore/Is/DCnarrow.pl with 100% similarity]
lib/unicore/lib/DCnoBrea.pl [moved from lib/unicore/Is/DCnoBrea.pl with 100% similarity]
lib/unicore/lib/DCsmall.pl [moved from lib/unicore/Is/DCsmall.pl with 100% similarity]
lib/unicore/lib/DCsquare.pl [moved from lib/unicore/Is/DCsquare.pl with 100% similarity]
lib/unicore/lib/DCsub.pl [moved from lib/unicore/Is/DCsub.pl with 100% similarity]
lib/unicore/lib/DCsuper.pl [moved from lib/unicore/Is/DCsuper.pl with 100% similarity]
lib/unicore/lib/DCvertic.pl [moved from lib/unicore/Is/DCvertic.pl with 100% similarity]
lib/unicore/lib/DCwide.pl [moved from lib/unicore/Is/DCwide.pl with 100% similarity]
lib/unicore/lib/Dash.pl [moved from lib/unicore/Is/Dash.pl with 100% similarity]
lib/unicore/lib/Deseret.pl [moved from lib/unicore/Is/Deseret.pl with 100% similarity]
lib/unicore/lib/Devanaga.pl [moved from lib/unicore/Is/Devanaga.pl with 100% similarity]
lib/unicore/lib/Diacriti.pl [moved from lib/unicore/Is/Diacriti.pl with 100% similarity]
lib/unicore/lib/Digit.pl [moved from lib/unicore/Is/Digit.pl with 100% similarity]
lib/unicore/lib/Ethiopic.pl [moved from lib/unicore/Is/Ethiopic.pl with 100% similarity]
lib/unicore/lib/Extender.pl [moved from lib/unicore/Is/Extender.pl with 100% similarity]
lib/unicore/lib/Georgian.pl [moved from lib/unicore/Is/Georgian.pl with 100% similarity]
lib/unicore/lib/Gothic.pl [moved from lib/unicore/Is/Gothic.pl with 100% similarity]
lib/unicore/lib/Graph.pl [moved from lib/unicore/Is/Graph.pl with 100% similarity]
lib/unicore/lib/Greek.pl [moved from lib/unicore/Is/Greek.pl with 100% similarity]
lib/unicore/lib/Gujarati.pl [moved from lib/unicore/Is/Gujarati.pl with 100% similarity]
lib/unicore/lib/Gurmukhi.pl [moved from lib/unicore/Is/Gurmukhi.pl with 100% similarity]
lib/unicore/lib/Han.pl [moved from lib/unicore/Is/Han.pl with 100% similarity]
lib/unicore/lib/Hangul.pl [moved from lib/unicore/Is/Hangul.pl with 100% similarity]
lib/unicore/lib/Hebrew.pl [moved from lib/unicore/Is/Hebrew.pl with 100% similarity]
lib/unicore/lib/HexDigit.pl [moved from lib/unicore/Is/HexDigit.pl with 100% similarity]
lib/unicore/lib/Hiragana.pl [moved from lib/unicore/Is/Hiragana.pl with 100% similarity]
lib/unicore/lib/Hyphen.pl [moved from lib/unicore/Is/Hyphen.pl with 100% similarity]
lib/unicore/lib/IdContin.pl [moved from lib/unicore/Is/IdContin.pl with 100% similarity]
lib/unicore/lib/IdStart.pl [moved from lib/unicore/Is/IdStart.pl with 100% similarity]
lib/unicore/lib/Ideograp.pl [moved from lib/unicore/Is/Ideograp.pl with 100% similarity]
lib/unicore/lib/InAlphab.pl [moved from lib/unicore/In/Alphabet.pl with 100% similarity]
lib/unicore/lib/InArabi2.pl [moved from lib/unicore/In/ArabicPr.pl with 100% similarity]
lib/unicore/lib/InArabi3.pl [moved from lib/unicore/In/ArabicP2.pl with 100% similarity]
lib/unicore/lib/InArabic.pl [moved from lib/unicore/In/Arabic.pl with 100% similarity]
lib/unicore/lib/InArmeni.pl [moved from lib/unicore/In/Armenian.pl with 100% similarity]
lib/unicore/lib/InArrows.pl [moved from lib/unicore/In/Arrows.pl with 100% similarity]
lib/unicore/lib/InBasicL.pl [moved from lib/unicore/In/BasicLat.pl with 100% similarity]
lib/unicore/lib/InBengal.pl [moved from lib/unicore/In/Bengali.pl with 100% similarity]
lib/unicore/lib/InBlockE.pl [moved from lib/unicore/In/BlockEle.pl with 100% similarity]
lib/unicore/lib/InBopom2.pl [moved from lib/unicore/In/Bopomof2.pl with 100% similarity]
lib/unicore/lib/InBopomo.pl [moved from lib/unicore/In/Bopomofo.pl with 100% similarity]
lib/unicore/lib/InBoxDra.pl [moved from lib/unicore/In/BoxDrawi.pl with 100% similarity]
lib/unicore/lib/InBraill.pl [moved from lib/unicore/In/BrailleP.pl with 100% similarity]
lib/unicore/lib/InByzant.pl [moved from lib/unicore/In/Byzantin.pl with 100% similarity]
lib/unicore/lib/InCherok.pl [moved from lib/unicore/In/Cherokee.pl with 100% similarity]
lib/unicore/lib/InCjkCo2.pl [moved from lib/unicore/In/CjkComp2.pl with 100% similarity]
lib/unicore/lib/InCjkCo3.pl [moved from lib/unicore/In/CjkComp3.pl with 100% similarity]
lib/unicore/lib/InCjkCo4.pl [moved from lib/unicore/In/CjkComp4.pl with 100% similarity]
lib/unicore/lib/InCjkCom.pl [moved from lib/unicore/In/CjkCompa.pl with 100% similarity]
lib/unicore/lib/InCjkRad.pl [moved from lib/unicore/In/CjkRadic.pl with 100% similarity]
lib/unicore/lib/InCjkSym.pl [moved from lib/unicore/In/CjkSymbo.pl with 100% similarity]
lib/unicore/lib/InCjkUn2.pl [moved from lib/unicore/In/CjkUnif3.pl with 100% similarity]
lib/unicore/lib/InCjkUn3.pl [moved from lib/unicore/In/CjkUnif2.pl with 100% similarity]
lib/unicore/lib/InCjkUni.pl [moved from lib/unicore/In/CjkUnifi.pl with 100% similarity]
lib/unicore/lib/InCombi2.pl [moved from lib/unicore/In/Combini3.pl with 100% similarity]
lib/unicore/lib/InCombi3.pl [moved from lib/unicore/In/Combini2.pl with 100% similarity]
lib/unicore/lib/InCombin.pl [moved from lib/unicore/In/Combinin.pl with 100% similarity]
lib/unicore/lib/InContro.pl [moved from lib/unicore/In/ControlP.pl with 100% similarity]
lib/unicore/lib/InCurren.pl [moved from lib/unicore/In/Currency.pl with 100% similarity]
lib/unicore/lib/InCyrill.pl [moved from lib/unicore/In/Cyrillic.pl with 100% similarity]
lib/unicore/lib/InDesere.pl [moved from lib/unicore/In/Deseret.pl with 100% similarity]
lib/unicore/lib/InDevana.pl [moved from lib/unicore/In/Devanaga.pl with 100% similarity]
lib/unicore/lib/InDingba.pl [moved from lib/unicore/In/Dingbats.pl with 100% similarity]
lib/unicore/lib/InEnclo2.pl [moved from lib/unicore/In/Enclose2.pl with 100% similarity]
lib/unicore/lib/InEnclos.pl [moved from lib/unicore/In/Enclosed.pl with 100% similarity]
lib/unicore/lib/InEthiop.pl [moved from lib/unicore/In/Ethiopic.pl with 100% similarity]
lib/unicore/lib/InGenera.pl [moved from lib/unicore/In/GeneralP.pl with 100% similarity]
lib/unicore/lib/InGeomet.pl [moved from lib/unicore/In/Geometri.pl with 100% similarity]
lib/unicore/lib/InGeorgi.pl [moved from lib/unicore/In/Georgian.pl with 100% similarity]
lib/unicore/lib/InGothic.pl [moved from lib/unicore/In/Gothic.pl with 100% similarity]
lib/unicore/lib/InGreek.pl [moved from lib/unicore/In/Greek.pl with 100% similarity]
lib/unicore/lib/InGreekE.pl [moved from lib/unicore/In/GreekExt.pl with 100% similarity]
lib/unicore/lib/InGujara.pl [moved from lib/unicore/In/Gujarati.pl with 100% similarity]
lib/unicore/lib/InGurmuk.pl [moved from lib/unicore/In/Gurmukhi.pl with 100% similarity]
lib/unicore/lib/InHalfwi.pl [moved from lib/unicore/In/Halfwidt.pl with 100% similarity]
lib/unicore/lib/InHangu2.pl [moved from lib/unicore/In/HangulSy.pl with 100% similarity]
lib/unicore/lib/InHangu3.pl [moved from lib/unicore/In/HangulCo.pl with 100% similarity]
lib/unicore/lib/InHangul.pl [moved from lib/unicore/In/HangulJa.pl with 100% similarity]
lib/unicore/lib/InHebrew.pl [moved from lib/unicore/In/Hebrew.pl with 100% similarity]
lib/unicore/lib/InHighPr.pl [moved from lib/unicore/In/HighPriv.pl with 100% similarity]
lib/unicore/lib/InHighSu.pl [moved from lib/unicore/In/HighSurr.pl with 100% similarity]
lib/unicore/lib/InHiraga.pl [moved from lib/unicore/In/Hiragana.pl with 100% similarity]
lib/unicore/lib/InIdeogr.pl [moved from lib/unicore/In/Ideograp.pl with 100% similarity]
lib/unicore/lib/InIpaExt.pl [moved from lib/unicore/In/IpaExten.pl with 100% similarity]
lib/unicore/lib/InKanbun.pl [moved from lib/unicore/In/Kanbun.pl with 100% similarity]
lib/unicore/lib/InKangxi.pl [moved from lib/unicore/In/KangxiRa.pl with 100% similarity]
lib/unicore/lib/InKannad.pl [moved from lib/unicore/In/Kannada.pl with 100% similarity]
lib/unicore/lib/InKataka.pl [moved from lib/unicore/In/Katakana.pl with 100% similarity]
lib/unicore/lib/InKhmer.pl [moved from lib/unicore/In/Khmer.pl with 100% similarity]
lib/unicore/lib/InLao.pl [moved from lib/unicore/In/Lao.pl with 100% similarity]
lib/unicore/lib/InLatin1.pl [moved from lib/unicore/In/Latin1Su.pl with 100% similarity]
lib/unicore/lib/InLatin2.pl [moved from lib/unicore/In/LatinEx2.pl with 100% similarity]
lib/unicore/lib/InLatin3.pl [moved from lib/unicore/In/LatinEx3.pl with 100% similarity]
lib/unicore/lib/InLatinE.pl [moved from lib/unicore/In/LatinExt.pl with 100% similarity]
lib/unicore/lib/InLetter.pl [moved from lib/unicore/In/Letterli.pl with 100% similarity]
lib/unicore/lib/InLowSur.pl [moved from lib/unicore/In/LowSurro.pl with 100% similarity]
lib/unicore/lib/InMalaya.pl [moved from lib/unicore/In/Malayala.pl with 100% similarity]
lib/unicore/lib/InMathe2.pl [moved from lib/unicore/In/Mathema2.pl with 100% similarity]
lib/unicore/lib/InMathem.pl [moved from lib/unicore/In/Mathemat.pl with 100% similarity]
lib/unicore/lib/InMisce2.pl [moved from lib/unicore/In/Miscell2.pl with 100% similarity]
lib/unicore/lib/InMiscel.pl [moved from lib/unicore/In/Miscella.pl with 100% similarity]
lib/unicore/lib/InMongol.pl [moved from lib/unicore/In/Mongolia.pl with 100% similarity]
lib/unicore/lib/InMusica.pl [moved from lib/unicore/In/MusicalS.pl with 100% similarity]
lib/unicore/lib/InMyanma.pl [moved from lib/unicore/In/Myanmar.pl with 100% similarity]
lib/unicore/lib/InNumber.pl [moved from lib/unicore/In/NumberFo.pl with 100% similarity]
lib/unicore/lib/InOgham.pl [moved from lib/unicore/In/Ogham.pl with 100% similarity]
lib/unicore/lib/InOldIta.pl [moved from lib/unicore/In/OldItali.pl with 100% similarity]
lib/unicore/lib/InOptica.pl [moved from lib/unicore/In/OpticalC.pl with 100% similarity]
lib/unicore/lib/InOriya.pl [moved from lib/unicore/In/Oriya.pl with 100% similarity]
lib/unicore/lib/InPrivat.pl [moved from lib/unicore/In/PrivateU.pl with 100% similarity]
lib/unicore/lib/InRunic.pl [moved from lib/unicore/In/Runic.pl with 100% similarity]
lib/unicore/lib/InSinhal.pl [moved from lib/unicore/In/Sinhala.pl with 100% similarity]
lib/unicore/lib/InSmallF.pl [moved from lib/unicore/In/SmallFor.pl with 100% similarity]
lib/unicore/lib/InSpacin.pl [moved from lib/unicore/In/SpacingM.pl with 100% similarity]
lib/unicore/lib/InSpecia.pl [moved from lib/unicore/In/Specials.pl with 100% similarity]
lib/unicore/lib/InSupers.pl [moved from lib/unicore/In/Superscr.pl with 100% similarity]
lib/unicore/lib/InSyriac.pl [moved from lib/unicore/In/Syriac.pl with 100% similarity]
lib/unicore/lib/InTags.pl [moved from lib/unicore/In/Tags.pl with 100% similarity]
lib/unicore/lib/InTamil.pl [moved from lib/unicore/In/Tamil.pl with 100% similarity]
lib/unicore/lib/InTelugu.pl [moved from lib/unicore/In/Telugu.pl with 100% similarity]
lib/unicore/lib/InThaana.pl [moved from lib/unicore/In/Thaana.pl with 100% similarity]
lib/unicore/lib/InThai.pl [moved from lib/unicore/In/Thai.pl with 100% similarity]
lib/unicore/lib/InTibeta.pl [moved from lib/unicore/In/Tibetan.pl with 100% similarity]
lib/unicore/lib/InUnifie.pl [moved from lib/unicore/In/UnifiedC.pl with 100% similarity]
lib/unicore/lib/InYiRadi.pl [moved from lib/unicore/In/YiRadica.pl with 100% similarity]
lib/unicore/lib/InYiSyll.pl [moved from lib/unicore/In/YiSyllab.pl with 100% similarity]
lib/unicore/lib/Inherite.pl [moved from lib/unicore/Is/Inherite.pl with 100% similarity]
lib/unicore/lib/JoinCont.pl [moved from lib/unicore/Is/JoinCont.pl with 100% similarity]
lib/unicore/lib/Kannada.pl [moved from lib/unicore/Is/Kannada.pl with 100% similarity]
lib/unicore/lib/Katakana.pl [moved from lib/unicore/Is/Katakana.pl with 100% similarity]
lib/unicore/lib/Khmer.pl [moved from lib/unicore/Is/Khmer.pl with 100% similarity]
lib/unicore/lib/L.pl [moved from lib/unicore/Is/L.pl with 98% similarity]
lib/unicore/lib/L_.pl [moved from lib/unicore/Is/L_.pl with 100% similarity]
lib/unicore/lib/Lao.pl [moved from lib/unicore/Is/Lao.pl with 100% similarity]
lib/unicore/lib/Latin.pl [moved from lib/unicore/Is/Latin.pl with 100% similarity]
lib/unicore/lib/Ll.pl [moved from lib/unicore/Is/Ll.pl with 98% similarity]
lib/unicore/lib/Lm.pl [moved from lib/unicore/Is/Lm.pl with 89% similarity]
lib/unicore/lib/Lo.pl [moved from lib/unicore/Is/Lo.pl with 98% similarity]
lib/unicore/lib/Lower.pl [moved from lib/unicore/Is/Lower.pl with 100% similarity]
lib/unicore/lib/Lowercas.pl [moved from lib/unicore/Is/Lowercas.pl with 100% similarity]
lib/unicore/lib/Lt.pl [moved from lib/unicore/Is/Lt.pl with 86% similarity]
lib/unicore/lib/Lu.pl [moved from lib/unicore/Is/Lu.pl with 98% similarity]
lib/unicore/lib/M.pl [moved from lib/unicore/Is/M.pl with 97% similarity]
lib/unicore/lib/Malayala.pl [moved from lib/unicore/Is/Malayala.pl with 100% similarity]
lib/unicore/lib/Math.pl [moved from lib/unicore/Is/Math.pl with 100% similarity]
lib/unicore/lib/Mc.pl [moved from lib/unicore/Is/Mc.pl with 94% similarity]
lib/unicore/lib/Me.pl [moved from lib/unicore/Is/Me.pl with 85% similarity]
lib/unicore/lib/Mirrored.pl [moved from lib/unicore/Is/Mirrored.pl with 100% similarity]
lib/unicore/lib/Mn.pl [moved from lib/unicore/Is/Mn.pl with 96% similarity]
lib/unicore/lib/Mongolia.pl [moved from lib/unicore/Is/Mongolia.pl with 100% similarity]
lib/unicore/lib/Myanmar.pl [moved from lib/unicore/Is/Myanmar.pl with 100% similarity]
lib/unicore/lib/N.pl [moved from lib/unicore/Is/N.pl with 94% similarity]
lib/unicore/lib/Nd.pl [moved from lib/unicore/Is/Nd.pl with 91% similarity]
lib/unicore/lib/Nl.pl [moved from lib/unicore/Is/Nl.pl with 86% similarity]
lib/unicore/lib/No.pl [moved from lib/unicore/Is/No.pl with 90% similarity]
lib/unicore/lib/Nonchara.pl [moved from lib/unicore/Is/Nonchara.pl with 100% similarity]
lib/unicore/lib/Ogham.pl [moved from lib/unicore/Is/Ogham.pl with 100% similarity]
lib/unicore/lib/OldItali.pl [moved from lib/unicore/Is/OldItali.pl with 100% similarity]
lib/unicore/lib/Oriya.pl [moved from lib/unicore/Is/Oriya.pl with 100% similarity]
lib/unicore/lib/OtherAlp.pl [moved from lib/unicore/Is/OtherAlp.pl with 100% similarity]
lib/unicore/lib/OtherLow.pl [moved from lib/unicore/Is/OtherLow.pl with 100% similarity]
lib/unicore/lib/OtherMat.pl [moved from lib/unicore/Is/OtherMat.pl with 100% similarity]
lib/unicore/lib/OtherUpp.pl [moved from lib/unicore/Is/OtherUpp.pl with 100% similarity]
lib/unicore/lib/P.pl [moved from lib/unicore/Is/P.pl with 95% similarity]
lib/unicore/lib/Pc.pl [moved from lib/unicore/Is/Pc.pl with 84% similarity]
lib/unicore/lib/Pd.pl [moved from lib/unicore/Is/Pd.pl with 86% similarity]
lib/unicore/lib/Pe.pl [moved from lib/unicore/Is/Pe.pl with 90% similarity]
lib/unicore/lib/Pf.pl [moved from lib/unicore/Is/Pf.pl with 84% similarity]
lib/unicore/lib/Pi.pl [moved from lib/unicore/Is/Pi.pl with 84% similarity]
lib/unicore/lib/Po.pl [moved from lib/unicore/Is/Po.pl with 94% similarity]
lib/unicore/lib/Print.pl [moved from lib/unicore/Is/Print.pl with 100% similarity]
lib/unicore/lib/Ps.pl [moved from lib/unicore/Is/Ps.pl with 91% similarity]
lib/unicore/lib/Punct.pl [moved from lib/unicore/Is/Punct.pl with 100% similarity]
lib/unicore/lib/Quotatio.pl [moved from lib/unicore/Is/Quotatio.pl with 100% similarity]
lib/unicore/lib/Runic.pl [moved from lib/unicore/Is/Runic.pl with 100% similarity]
lib/unicore/lib/S.pl [moved from lib/unicore/Is/S.pl with 97% similarity]
lib/unicore/lib/Sc.pl [moved from lib/unicore/Is/Sc.pl with 87% similarity]
lib/unicore/lib/Sinhala.pl [moved from lib/unicore/Is/Sinhala.pl with 100% similarity]
lib/unicore/lib/Sk.pl [moved from lib/unicore/Is/Sk.pl with 90% similarity]
lib/unicore/lib/Sm.pl [moved from lib/unicore/Is/Sm.pl with 93% similarity]
lib/unicore/lib/So.pl [moved from lib/unicore/Is/So.pl with 96% similarity]
lib/unicore/lib/Space.pl [moved from lib/unicore/Is/Space.pl with 100% similarity]
lib/unicore/lib/SpacePer.pl [moved from lib/unicore/Is/SpacePer.pl with 100% similarity]
lib/unicore/lib/Syriac.pl [moved from lib/unicore/Is/Syriac.pl with 100% similarity]
lib/unicore/lib/Tamil.pl [moved from lib/unicore/Is/Tamil.pl with 100% similarity]
lib/unicore/lib/Telugu.pl [moved from lib/unicore/Is/Telugu.pl with 100% similarity]
lib/unicore/lib/Terminal.pl [moved from lib/unicore/Is/Terminal.pl with 100% similarity]
lib/unicore/lib/Thaana.pl [moved from lib/unicore/Is/Thaana.pl with 100% similarity]
lib/unicore/lib/Thai.pl [moved from lib/unicore/Is/Thai.pl with 100% similarity]
lib/unicore/lib/Tibetan.pl [moved from lib/unicore/Is/Tibetan.pl with 100% similarity]
lib/unicore/lib/Title.pl [moved from lib/unicore/Is/Title.pl with 100% similarity]
lib/unicore/lib/Upper.pl [moved from lib/unicore/Is/Upper.pl with 100% similarity]
lib/unicore/lib/Uppercas.pl [moved from lib/unicore/Is/Uppercas.pl with 100% similarity]
lib/unicore/lib/WhiteSpa.pl [moved from lib/unicore/Is/WhiteSpa.pl with 100% similarity]
lib/unicore/lib/Word.pl [moved from lib/unicore/Is/Word.pl with 100% similarity]
lib/unicore/lib/XDigit.pl [moved from lib/unicore/Is/XDigit.pl with 100% similarity]
lib/unicore/lib/Yi.pl [moved from lib/unicore/Is/Yi.pl with 100% similarity]
lib/unicore/lib/Z.pl [moved from lib/unicore/Is/Z.pl with 87% similarity]
lib/unicore/lib/Zl.pl [moved from lib/unicore/Is/Zl.pl with 83% similarity]
lib/unicore/lib/Zp.pl [moved from lib/unicore/Is/Zp.pl with 82% similarity]
lib/unicore/lib/Zs.pl [moved from lib/unicore/Is/Zs.pl with 85% similarity]
lib/unicore/lib/_CanonDC.pl [moved from lib/unicore/Is/_CanonDC.pl with 100% similarity]
lib/unicore/lib/_CaseIgn.pl [moved from lib/unicore/Is/_CaseIgn.pl with 100% similarity]
lib/unicore/lib/_CombAbo.pl [moved from lib/unicore/Is/_CombAbo.pl with 100% similarity]
lib/unicore/mktables
lib/utf8_heavy.pl

index c88fa9c..f8a6289 100644 (file)
--- a/MANIFEST
+++ b/MANIFEST
@@ -1388,304 +1388,275 @@ lib/unicore/BidiMirr.txt      Unicode character database
 lib/unicore/Bidirectional.pl   Unicode character database
 lib/unicore/Blocks.pl          Unicode character database
 lib/unicore/Blocks.txt         Unicode character database
+lib/unicore/Canonical.pl       Unicode character database
 lib/unicore/CaseFold.txt       Unicode character database
 lib/unicore/Category.pl                Unicode character database
 lib/unicore/CombiningClass.pl  Unicode character database
 lib/unicore/CompExcl.txt       Unicode character database
 lib/unicore/Decomposition.pl   Unicode character database
 lib/unicore/EAWidth.txt                Unicode character database
-lib/unicore/In.pl              Unicode character database
-lib/unicore/In/Alphabet.pl     Unicode character database
-lib/unicore/In/Arabic.pl       Unicode character database
-lib/unicore/In/ArabicP2.pl     Unicode character database
-lib/unicore/In/ArabicPr.pl     Unicode character database
-lib/unicore/In/Armenian.pl     Unicode character database
-lib/unicore/In/Arrows.pl       Unicode character database
-lib/unicore/In/BasicLat.pl     Unicode character database
-lib/unicore/In/Bengali.pl      Unicode character database
-lib/unicore/In/BlockEle.pl     Unicode character database
-lib/unicore/In/Bopomof2.pl     Unicode character database
-lib/unicore/In/Bopomofo.pl     Unicode character database
-lib/unicore/In/BoxDrawi.pl     Unicode character database
-lib/unicore/In/BrailleP.pl     Unicode character database
-lib/unicore/In/Byzantin.pl     Unicode character database
-lib/unicore/In/Cherokee.pl     Unicode character database
-lib/unicore/In/CjkComp2.pl     Unicode character database
-lib/unicore/In/CjkComp3.pl     Unicode character database
-lib/unicore/In/CjkComp4.pl     Unicode character database
-lib/unicore/In/CjkCompa.pl     Unicode character database
-lib/unicore/In/CjkRadic.pl     Unicode character database
-lib/unicore/In/CjkSymbo.pl     Unicode character database
-lib/unicore/In/CjkUnif2.pl     Unicode character database
-lib/unicore/In/CjkUnif3.pl     Unicode character database
-lib/unicore/In/CjkUnifi.pl     Unicode character database
-lib/unicore/In/Combini2.pl     Unicode character database
-lib/unicore/In/Combini3.pl     Unicode character database
-lib/unicore/In/Combinin.pl     Unicode character database
-lib/unicore/In/ControlP.pl     Unicode character database
-lib/unicore/In/Currency.pl     Unicode character database
-lib/unicore/In/Cyrillic.pl     Unicode character database
-lib/unicore/In/Deseret.pl      Unicode character database
-lib/unicore/In/Devanaga.pl     Unicode character database
-lib/unicore/In/Dingbats.pl     Unicode character database
-lib/unicore/In/Enclose2.pl     Unicode character database
-lib/unicore/In/Enclosed.pl     Unicode character database
-lib/unicore/In/Ethiopic.pl     Unicode character database
-lib/unicore/In/GeneralP.pl     Unicode character database
-lib/unicore/In/Geometri.pl     Unicode character database
-lib/unicore/In/Georgian.pl     Unicode character database
-lib/unicore/In/Gothic.pl       Unicode character database
-lib/unicore/In/Greek.pl                Unicode character database
-lib/unicore/In/GreekExt.pl     Unicode character database
-lib/unicore/In/Gujarati.pl     Unicode character database
-lib/unicore/In/Gurmukhi.pl     Unicode character database
-lib/unicore/In/Halfwidt.pl     Unicode character database
-lib/unicore/In/HangulCo.pl     Unicode character database
-lib/unicore/In/HangulJa.pl     Unicode character database
-lib/unicore/In/HangulSy.pl     Unicode character database
-lib/unicore/In/Hebrew.pl       Unicode character database
-lib/unicore/In/HighPriv.pl     Unicode character database
-lib/unicore/In/HighSurr.pl     Unicode character database
-lib/unicore/In/Hiragana.pl     Unicode character database
-lib/unicore/In/Ideograp.pl     Unicode character database
-lib/unicore/In/IpaExten.pl     Unicode character database
-lib/unicore/In/Kanbun.pl       Unicode character database
-lib/unicore/In/KangxiRa.pl     Unicode character database
-lib/unicore/In/Kannada.pl      Unicode character database
-lib/unicore/In/Katakana.pl     Unicode character database
-lib/unicore/In/Khmer.pl                Unicode character database
-lib/unicore/In/Lao.pl          Unicode character database
-lib/unicore/In/Latin1Su.pl     Unicode character database
-lib/unicore/In/LatinEx2.pl     Unicode character database
-lib/unicore/In/LatinEx3.pl     Unicode character database
-lib/unicore/In/LatinExt.pl     Unicode character database
-lib/unicore/In/Letterli.pl     Unicode character database
-lib/unicore/In/LowSurro.pl     Unicode character database
-lib/unicore/In/Malayala.pl     Unicode character database
-lib/unicore/In/Mathema2.pl     Unicode character database
-lib/unicore/In/Mathemat.pl     Unicode character database
-lib/unicore/In/Miscell2.pl     Unicode character database
-lib/unicore/In/Miscella.pl     Unicode character database
-lib/unicore/In/Mongolia.pl     Unicode character database
-lib/unicore/In/MusicalS.pl     Unicode character database
-lib/unicore/In/Myanmar.pl      Unicode character database
-lib/unicore/In/NumberFo.pl     Unicode character database
-lib/unicore/In/Ogham.pl                Unicode character database
-lib/unicore/In/OldItali.pl     Unicode character database
-lib/unicore/In/OpticalC.pl     Unicode character database
-lib/unicore/In/Oriya.pl                Unicode character database
-lib/unicore/In/PrivateU.pl     Unicode character database
-lib/unicore/In/Runic.pl                Unicode character database
-lib/unicore/In/Sinhala.pl      Unicode character database
-lib/unicore/In/SmallFor.pl     Unicode character database
-lib/unicore/In/SpacingM.pl     Unicode character database
-lib/unicore/In/Specials.pl     Unicode character database
-lib/unicore/In/Superscr.pl     Unicode character database
-lib/unicore/In/Syriac.pl       Unicode character database
-lib/unicore/In/Tags.pl         Unicode character database
-lib/unicore/In/Tamil.pl                Unicode character database
-lib/unicore/In/Telugu.pl       Unicode character database
-lib/unicore/In/Thaana.pl       Unicode character database
-lib/unicore/In/Thai.pl         Unicode character database
-lib/unicore/In/Tibetan.pl      Unicode character database
-lib/unicore/In/UnifiedC.pl     Unicode character database
-lib/unicore/In/YiRadica.pl     Unicode character database
-lib/unicore/In/YiSyllab.pl     Unicode character database
+lib/unicore/Exact.pl           Unicode character database
 lib/unicore/Index.txt          Unicode character database
-lib/unicore/Is.pl              Unicode character database
-lib/unicore/Is/_CanonDC.pl     Unicode character database
-lib/unicore/Is/_CaseIgn.pl     Unicode character database
-lib/unicore/Is/_CombAbo.pl     Unicode character database
-lib/unicore/Is/Alnum.pl                Unicode character database
-lib/unicore/Is/Alpha.pl                Unicode character database
-lib/unicore/Is/Alphabet.pl     Unicode character database
-lib/unicore/Is/Any.pl          Unicode character database
-lib/unicore/Is/Arabic.pl       Unicode character database
-lib/unicore/Is/Armenian.pl     Unicode character database
-lib/unicore/Is/ASCII.pl                Unicode character database
-lib/unicore/Is/AsciiHex.pl     Unicode character database
-lib/unicore/Is/Assigned.pl     Unicode character database
-lib/unicore/Is/Bengali.pl      Unicode character database
-lib/unicore/Is/BidiAL.pl       Unicode character database
-lib/unicore/Is/BidiAN.pl       Unicode character database
-lib/unicore/Is/BidiB.pl                Unicode character database
-lib/unicore/Is/BidiBN.pl       Unicode character database
-lib/unicore/Is/BidiCont.pl     Unicode character database
-lib/unicore/Is/BidiCS.pl       Unicode character database
-lib/unicore/Is/BidiEN.pl       Unicode character database
-lib/unicore/Is/BidiES.pl       Unicode character database
-lib/unicore/Is/BidiET.pl       Unicode character database
-lib/unicore/Is/BidiL.pl                Unicode character database
-lib/unicore/Is/BidiLRE.pl      Unicode character database
-lib/unicore/Is/BidiLRO.pl      Unicode character database
-lib/unicore/Is/BidiNSM.pl      Unicode character database
-lib/unicore/Is/BidiON.pl       Unicode character database
-lib/unicore/Is/BidiPDF.pl      Unicode character database
-lib/unicore/Is/BidiR.pl                Unicode character database
-lib/unicore/Is/BidiRLE.pl      Unicode character database
-lib/unicore/Is/BidiRLO.pl      Unicode character database
-lib/unicore/Is/BidiS.pl                Unicode character database
-lib/unicore/Is/BidiWS.pl       Unicode character database
-lib/unicore/Is/Blank.pl                Unicode character database
-lib/unicore/Is/Bopomofo.pl     Unicode character database
-lib/unicore/Is/C.pl            Unicode character database
-lib/unicore/Is/Canadian.pl     Unicode character database
-lib/unicore/Is/Canon.pl                Unicode character database
-lib/unicore/Is/Cc.pl           Unicode character database
-lib/unicore/Is/Cf.pl           Unicode character database
-lib/unicore/Is/Cherokee.pl     Unicode character database
-lib/unicore/Is/Cn.pl           Unicode character database
-lib/unicore/Is/Cntrl.pl                Unicode character database
-lib/unicore/Is/Co.pl           Unicode character database
-lib/unicore/Is/Common.pl       Unicode character database
-lib/unicore/Is/Compat.pl       Unicode character database
-lib/unicore/Is/Cs.pl           Unicode character database
-lib/unicore/Is/Cyrillic.pl     Unicode character database
-lib/unicore/Is/Dash.pl         Unicode character database
-lib/unicore/Is/DCcircle.pl     Unicode character database
-lib/unicore/Is/DCcompat.pl     Unicode character database
-lib/unicore/Is/DCfinal.pl      Unicode character database
-lib/unicore/Is/DCfont.pl       Unicode character database
-lib/unicore/Is/DCfracti.pl     Unicode character database
-lib/unicore/Is/DCinitia.pl     Unicode character database
-lib/unicore/Is/DCisolat.pl     Unicode character database
-lib/unicore/Is/DCmedial.pl     Unicode character database
-lib/unicore/Is/DCnarrow.pl     Unicode character database
-lib/unicore/Is/DCnoBrea.pl     Unicode character database
-lib/unicore/Is/DCsmall.pl      Unicode character database
-lib/unicore/Is/DCsquare.pl     Unicode character database
-lib/unicore/Is/DCsub.pl                Unicode character database
-lib/unicore/Is/DCsuper.pl      Unicode character database
-lib/unicore/Is/DCvertic.pl     Unicode character database
-lib/unicore/Is/DCwide.pl       Unicode character database
-lib/unicore/Is/Deseret.pl      Unicode character database
-lib/unicore/Is/Devanaga.pl     Unicode character database
-lib/unicore/Is/Diacriti.pl     Unicode character database
-lib/unicore/Is/Digit.pl                Unicode character database
-lib/unicore/Is/Ethiopic.pl     Unicode character database
-lib/unicore/Is/Extender.pl     Unicode character database
-lib/unicore/Is/Georgian.pl     Unicode character database
-lib/unicore/Is/Gothic.pl       Unicode character database
-lib/unicore/Is/Graph.pl                Unicode character database
-lib/unicore/Is/Greek.pl                Unicode character database
-lib/unicore/Is/Gujarati.pl     Unicode character database
-lib/unicore/Is/Gurmukhi.pl     Unicode character database
-lib/unicore/Is/Han.pl          Unicode character database
-lib/unicore/Is/Hangul.pl       Unicode character database
-lib/unicore/Is/Hebrew.pl       Unicode character database
-lib/unicore/Is/HexDigit.pl     Unicode character database
-lib/unicore/Is/Hiragana.pl     Unicode character database
-lib/unicore/Is/Hyphen.pl       Unicode character database
-lib/unicore/Is/IdContin.pl     Unicode character database
-lib/unicore/Is/Ideograp.pl     Unicode character database
-lib/unicore/Is/IdStart.pl      Unicode character database
-lib/unicore/Is/Inherite.pl     Unicode character database
-lib/unicore/Is/JoinCont.pl     Unicode character database
-lib/unicore/Is/Kannada.pl      Unicode character database
-lib/unicore/Is/Katakana.pl     Unicode character database
-lib/unicore/Is/Khmer.pl                Unicode character database
-lib/unicore/Is/L.pl            Unicode character database
-lib/unicore/Is/L_.pl           Unicode character database
-lib/unicore/Is/Lao.pl          Unicode character database
-lib/unicore/Is/Latin.pl                Unicode character database
-lib/unicore/Is/LbrkAI.pl       Unicode character database
-lib/unicore/Is/LbrkAL.pl       Unicode character database
-lib/unicore/Is/LbrkB2.pl       Unicode character database
-lib/unicore/Is/LbrkBA.pl       Unicode character database
-lib/unicore/Is/LbrkBB.pl       Unicode character database
-lib/unicore/Is/LbrkBK.pl       Unicode character database
-lib/unicore/Is/LbrkCB.pl       Unicode character database
-lib/unicore/Is/LbrkCL.pl       Unicode character database
-lib/unicore/Is/LbrkCM.pl       Unicode character database
-lib/unicore/Is/LbrkCR.pl       Unicode character database
-lib/unicore/Is/LbrkEX.pl       Unicode character database
-lib/unicore/Is/LbrkGL.pl       Unicode character database
-lib/unicore/Is/LbrkHY.pl       Unicode character database
-lib/unicore/Is/LbrkID.pl       Unicode character database
-lib/unicore/Is/LbrkIN.pl       Unicode character database
-lib/unicore/Is/LbrkIS.pl       Unicode character database
-lib/unicore/Is/LbrkLF.pl       Unicode character database
-lib/unicore/Is/LbrkNS.pl       Unicode character database
-lib/unicore/Is/LbrkNU.pl       Unicode character database
-lib/unicore/Is/LbrkOP.pl       Unicode character database
-lib/unicore/Is/LbrkPO.pl       Unicode character database
-lib/unicore/Is/LbrkPR.pl       Unicode character database
-lib/unicore/Is/LbrkQU.pl       Unicode character database
-lib/unicore/Is/LbrkSA.pl       Unicode character database
-lib/unicore/Is/LbrkSG.pl       Unicode character database
-lib/unicore/Is/LbrkSP.pl       Unicode character database
-lib/unicore/Is/LbrkSY.pl       Unicode character database
-lib/unicore/Is/LbrkXX.pl       Unicode character database
-lib/unicore/Is/LbrkZW.pl       Unicode character database
-lib/unicore/Is/Ll.pl           Unicode character database
-lib/unicore/Is/Lm.pl           Unicode character database
-lib/unicore/Is/Lo.pl           Unicode character database
-lib/unicore/Is/Lower.pl                Unicode character database
-lib/unicore/Is/Lowercas.pl     Unicode character database
-lib/unicore/Is/Lt.pl           Unicode character database
-lib/unicore/Is/Lu.pl           Unicode character database
-lib/unicore/Is/M.pl            Unicode character database
-lib/unicore/Is/Malayala.pl     Unicode character database
-lib/unicore/Is/Math.pl         Unicode character database
-lib/unicore/Is/Mc.pl           Unicode character database
-lib/unicore/Is/Me.pl           Unicode character database
-lib/unicore/Is/Mirrored.pl     Unicode character database
-lib/unicore/Is/Mn.pl           Unicode character database
-lib/unicore/Is/Mongolia.pl     Unicode character database
-lib/unicore/Is/Myanmar.pl      Unicode character database
-lib/unicore/Is/N.pl            Unicode character database
-lib/unicore/Is/Nd.pl           Unicode character database
-lib/unicore/Is/Nl.pl           Unicode character database
-lib/unicore/Is/No.pl           Unicode character database
-lib/unicore/Is/Nonchara.pl     Unicode character database
-lib/unicore/Is/Ogham.pl                Unicode character database
-lib/unicore/Is/OldItali.pl     Unicode character database
-lib/unicore/Is/Oriya.pl                Unicode character database
-lib/unicore/Is/OtherAlp.pl     Unicode character database
-lib/unicore/Is/OtherLow.pl     Unicode character database
-lib/unicore/Is/OtherMat.pl     Unicode character database
-lib/unicore/Is/OtherUpp.pl     Unicode character database
-lib/unicore/Is/P.pl            Unicode character database
-lib/unicore/Is/Pc.pl           Unicode character database
-lib/unicore/Is/Pd.pl           Unicode character database
-lib/unicore/Is/Pe.pl           Unicode character database
-lib/unicore/Is/Pf.pl           Unicode character database
-lib/unicore/Is/Pi.pl           Unicode character database
-lib/unicore/Is/Po.pl           Unicode character database
-lib/unicore/Is/Print.pl                Unicode character database
-lib/unicore/Is/Ps.pl           Unicode character database
-lib/unicore/Is/Punct.pl                Unicode character database
-lib/unicore/Is/Quotatio.pl     Unicode character database
-lib/unicore/Is/Runic.pl                Unicode character database
-lib/unicore/Is/S.pl            Unicode character database
-lib/unicore/Is/Sc.pl           Unicode character database
-lib/unicore/Is/Sinhala.pl      Unicode character database
-lib/unicore/Is/Sk.pl           Unicode character database
-lib/unicore/Is/Sm.pl           Unicode character database
-lib/unicore/Is/So.pl           Unicode character database
-lib/unicore/Is/Space.pl                Unicode character database
-lib/unicore/Is/SpacePer.pl     Unicode character database
-lib/unicore/Is/Syriac.pl       Unicode character database
-lib/unicore/Is/Tamil.pl                Unicode character database
-lib/unicore/Is/Telugu.pl       Unicode character database
-lib/unicore/Is/Terminal.pl     Unicode character database
-lib/unicore/Is/Thaana.pl       Unicode character database
-lib/unicore/Is/Thai.pl         Unicode character database
-lib/unicore/Is/Tibetan.pl      Unicode character database
-lib/unicore/Is/Title.pl                Unicode character database
-lib/unicore/Is/Upper.pl                Unicode character database
-lib/unicore/Is/Uppercas.pl     Unicode character database
-lib/unicore/Is/WhiteSpa.pl     Unicode character database
-lib/unicore/Is/Word.pl         Unicode character database
-lib/unicore/Is/XDigit.pl       Unicode character database
-lib/unicore/Is/Yi.pl           Unicode character database
-lib/unicore/Is/Z.pl            Unicode character database
-lib/unicore/Is/Zl.pl           Unicode character database
-lib/unicore/Is/Zp.pl           Unicode character database
-lib/unicore/Is/Zs.pl           Unicode character database
 lib/unicore/Jamo.txt           Unicode character database
 lib/unicore/JamoShort.pl       Unicode character database
 lib/unicore/Lbrk.pl            Unicode character database
+lib/unicore/lib/Alnum.pl       Unicode character database
+lib/unicore/lib/Alpha.pl       Unicode character database
+lib/unicore/lib/Alphabet.pl    Unicode character database
+lib/unicore/lib/Any.pl         Unicode character database
+lib/unicore/lib/Arabic.pl      Unicode character database
+lib/unicore/lib/Armenian.pl    Unicode character database
+lib/unicore/lib/ASCII.pl       Unicode character database
+lib/unicore/lib/AsciiHex.pl    Unicode character database
+lib/unicore/lib/Assigned.pl    Unicode character database
+lib/unicore/lib/Bengali.pl     Unicode character database
+lib/unicore/lib/BidiAL.pl      Unicode character database
+lib/unicore/lib/BidiAN.pl      Unicode character database
+lib/unicore/lib/BidiB.pl       Unicode character database
+lib/unicore/lib/BidiBN.pl      Unicode character database
+lib/unicore/lib/BidiCont.pl    Unicode character database
+lib/unicore/lib/BidiCS.pl      Unicode character database
+lib/unicore/lib/BidiEN.pl      Unicode character database
+lib/unicore/lib/BidiES.pl      Unicode character database
+lib/unicore/lib/BidiET.pl      Unicode character database
+lib/unicore/lib/BidiL.pl       Unicode character database
+lib/unicore/lib/BidiLRE.pl     Unicode character database
+lib/unicore/lib/BidiLRO.pl     Unicode character database
+lib/unicore/lib/BidiNSM.pl     Unicode character database
+lib/unicore/lib/BidiON.pl      Unicode character database
+lib/unicore/lib/BidiPDF.pl     Unicode character database
+lib/unicore/lib/BidiR.pl       Unicode character database
+lib/unicore/lib/BidiRLE.pl     Unicode character database
+lib/unicore/lib/BidiRLO.pl     Unicode character database
+lib/unicore/lib/BidiS.pl       Unicode character database
+lib/unicore/lib/BidiWS.pl      Unicode character database
+lib/unicore/lib/Blank.pl       Unicode character database
+lib/unicore/lib/Bopomofo.pl    Unicode character database
+lib/unicore/lib/C.pl           Unicode character database
+lib/unicore/lib/Canadian.pl    Unicode character database
+lib/unicore/lib/Canon.pl       Unicode character database
+lib/unicore/lib/Cc.pl          Unicode character database
+lib/unicore/lib/Cf.pl          Unicode character database
+lib/unicore/lib/Cherokee.pl    Unicode character database
+lib/unicore/lib/Cn.pl          Unicode character database
+lib/unicore/lib/Cntrl.pl       Unicode character database
+lib/unicore/lib/Co.pl          Unicode character database
+lib/unicore/lib/Common.pl      Unicode character database
+lib/unicore/lib/Compat.pl      Unicode character database
+lib/unicore/lib/Cs.pl          Unicode character database
+lib/unicore/lib/Cyrillic.pl    Unicode character database
+lib/unicore/lib/Dash.pl                Unicode character database
+lib/unicore/lib/DCcircle.pl    Unicode character database
+lib/unicore/lib/DCcompat.pl    Unicode character database
+lib/unicore/lib/DCfinal.pl     Unicode character database
+lib/unicore/lib/DCfont.pl      Unicode character database
+lib/unicore/lib/DCfracti.pl    Unicode character database
+lib/unicore/lib/DCinitia.pl    Unicode character database
+lib/unicore/lib/DCisolat.pl    Unicode character database
+lib/unicore/lib/DCmedial.pl    Unicode character database
+lib/unicore/lib/DCnarrow.pl    Unicode character database
+lib/unicore/lib/DCnoBrea.pl    Unicode character database
+lib/unicore/lib/DCsmall.pl     Unicode character database
+lib/unicore/lib/DCsquare.pl    Unicode character database
+lib/unicore/lib/DCsub.pl       Unicode character database
+lib/unicore/lib/DCsuper.pl     Unicode character database
+lib/unicore/lib/DCvertic.pl    Unicode character database
+lib/unicore/lib/DCwide.pl      Unicode character database
+lib/unicore/lib/Deseret.pl     Unicode character database
+lib/unicore/lib/Devanaga.pl    Unicode character database
+lib/unicore/lib/Diacriti.pl    Unicode character database
+lib/unicore/lib/Digit.pl       Unicode character database
+lib/unicore/lib/Ethiopic.pl    Unicode character database
+lib/unicore/lib/Extender.pl    Unicode character database
+lib/unicore/lib/Georgian.pl    Unicode character database
+lib/unicore/lib/Gothic.pl      Unicode character database
+lib/unicore/lib/Graph.pl       Unicode character database
+lib/unicore/lib/Greek.pl       Unicode character database
+lib/unicore/lib/Gujarati.pl    Unicode character database
+lib/unicore/lib/Gurmukhi.pl    Unicode character database
+lib/unicore/lib/Han.pl         Unicode character database
+lib/unicore/lib/Hangul.pl      Unicode character database
+lib/unicore/lib/Hebrew.pl      Unicode character database
+lib/unicore/lib/HexDigit.pl    Unicode character database
+lib/unicore/lib/Hiragana.pl    Unicode character database
+lib/unicore/lib/Hyphen.pl      Unicode character database
+lib/unicore/lib/IdContin.pl    Unicode character database
+lib/unicore/lib/Ideograp.pl    Unicode character database
+lib/unicore/lib/IdStart.pl     Unicode character database
+lib/unicore/lib/InAlphab.pl    Unicode character database
+lib/unicore/lib/InArabi2.pl    Unicode character database
+lib/unicore/lib/InArabi3.pl    Unicode character database
+lib/unicore/lib/InArabic.pl    Unicode character database
+lib/unicore/lib/InArmeni.pl    Unicode character database
+lib/unicore/lib/InArrows.pl    Unicode character database
+lib/unicore/lib/InBasicL.pl    Unicode character database
+lib/unicore/lib/InBengal.pl    Unicode character database
+lib/unicore/lib/InBlockE.pl    Unicode character database
+lib/unicore/lib/InBopom2.pl    Unicode character database
+lib/unicore/lib/InBopomo.pl    Unicode character database
+lib/unicore/lib/InBoxDra.pl    Unicode character database
+lib/unicore/lib/InBraill.pl    Unicode character database
+lib/unicore/lib/InByzant.pl    Unicode character database
+lib/unicore/lib/InCherok.pl    Unicode character database
+lib/unicore/lib/InCjkCo2.pl    Unicode character database
+lib/unicore/lib/InCjkCo3.pl    Unicode character database
+lib/unicore/lib/InCjkCo4.pl    Unicode character database
+lib/unicore/lib/InCjkCom.pl    Unicode character database
+lib/unicore/lib/InCjkRad.pl    Unicode character database
+lib/unicore/lib/InCjkSym.pl    Unicode character database
+lib/unicore/lib/InCjkUn2.pl    Unicode character database
+lib/unicore/lib/InCjkUn3.pl    Unicode character database
+lib/unicore/lib/InCjkUni.pl    Unicode character database
+lib/unicore/lib/InCombi2.pl    Unicode character database
+lib/unicore/lib/InCombi3.pl    Unicode character database
+lib/unicore/lib/InCombin.pl    Unicode character database
+lib/unicore/lib/InContro.pl    Unicode character database
+lib/unicore/lib/InCurren.pl    Unicode character database
+lib/unicore/lib/InCyrill.pl    Unicode character database
+lib/unicore/lib/InDesere.pl    Unicode character database
+lib/unicore/lib/InDevana.pl    Unicode character database
+lib/unicore/lib/InDingba.pl    Unicode character database
+lib/unicore/lib/InEnclo2.pl    Unicode character database
+lib/unicore/lib/InEnclos.pl    Unicode character database
+lib/unicore/lib/InEthiop.pl    Unicode character database
+lib/unicore/lib/InGenera.pl    Unicode character database
+lib/unicore/lib/InGeomet.pl    Unicode character database
+lib/unicore/lib/InGeorgi.pl    Unicode character database
+lib/unicore/lib/InGothic.pl    Unicode character database
+lib/unicore/lib/InGreek.pl     Unicode character database
+lib/unicore/lib/InGreekE.pl    Unicode character database
+lib/unicore/lib/InGujara.pl    Unicode character database
+lib/unicore/lib/InGurmuk.pl    Unicode character database
+lib/unicore/lib/InHalfwi.pl    Unicode character database
+lib/unicore/lib/InHangu2.pl    Unicode character database
+lib/unicore/lib/InHangu3.pl    Unicode character database
+lib/unicore/lib/InHangul.pl    Unicode character database
+lib/unicore/lib/InHebrew.pl    Unicode character database
+lib/unicore/lib/Inherite.pl    Unicode character database
+lib/unicore/lib/InHighPr.pl    Unicode character database
+lib/unicore/lib/InHighSu.pl    Unicode character database
+lib/unicore/lib/InHiraga.pl    Unicode character database
+lib/unicore/lib/InIdeogr.pl    Unicode character database
+lib/unicore/lib/InIpaExt.pl    Unicode character database
+lib/unicore/lib/InKanbun.pl    Unicode character database
+lib/unicore/lib/InKangxi.pl    Unicode character database
+lib/unicore/lib/InKannad.pl    Unicode character database
+lib/unicore/lib/InKataka.pl    Unicode character database
+lib/unicore/lib/InKhmer.pl     Unicode character database
+lib/unicore/lib/InLao.pl       Unicode character database
+lib/unicore/lib/InLatin1.pl    Unicode character database
+lib/unicore/lib/InLatin2.pl    Unicode character database
+lib/unicore/lib/InLatin3.pl    Unicode character database
+lib/unicore/lib/InLatinE.pl    Unicode character database
+lib/unicore/lib/InLetter.pl    Unicode character database
+lib/unicore/lib/InLowSur.pl    Unicode character database
+lib/unicore/lib/InMalaya.pl    Unicode character database
+lib/unicore/lib/InMathe2.pl    Unicode character database
+lib/unicore/lib/InMathem.pl    Unicode character database
+lib/unicore/lib/InMisce2.pl    Unicode character database
+lib/unicore/lib/InMiscel.pl    Unicode character database
+lib/unicore/lib/InMongol.pl    Unicode character database
+lib/unicore/lib/InMusica.pl    Unicode character database
+lib/unicore/lib/InMyanma.pl    Unicode character database
+lib/unicore/lib/InNumber.pl    Unicode character database
+lib/unicore/lib/InOgham.pl     Unicode character database
+lib/unicore/lib/InOldIta.pl    Unicode character database
+lib/unicore/lib/InOptica.pl    Unicode character database
+lib/unicore/lib/InOriya.pl     Unicode character database
+lib/unicore/lib/InPrivat.pl    Unicode character database
+lib/unicore/lib/InRunic.pl     Unicode character database
+lib/unicore/lib/InSinhal.pl    Unicode character database
+lib/unicore/lib/InSmallF.pl    Unicode character database
+lib/unicore/lib/InSpacin.pl    Unicode character database
+lib/unicore/lib/InSpecia.pl    Unicode character database
+lib/unicore/lib/InSupers.pl    Unicode character database
+lib/unicore/lib/InSyriac.pl    Unicode character database
+lib/unicore/lib/InTags.pl      Unicode character database
+lib/unicore/lib/InTamil.pl     Unicode character database
+lib/unicore/lib/InTelugu.pl    Unicode character database
+lib/unicore/lib/InThaana.pl    Unicode character database
+lib/unicore/lib/InThai.pl      Unicode character database
+lib/unicore/lib/InTibeta.pl    Unicode character database
+lib/unicore/lib/InUnifie.pl    Unicode character database
+lib/unicore/lib/InYiRadi.pl    Unicode character database
+lib/unicore/lib/InYiSyll.pl    Unicode character database
+lib/unicore/lib/JoinCont.pl    Unicode character database
+lib/unicore/lib/Kannada.pl     Unicode character database
+lib/unicore/lib/Katakana.pl    Unicode character database
+lib/unicore/lib/Khmer.pl       Unicode character database
+lib/unicore/lib/L.pl           Unicode character database
+lib/unicore/lib/Lao.pl         Unicode character database
+lib/unicore/lib/Latin.pl       Unicode character database
+lib/unicore/lib/Ll.pl          Unicode character database
+lib/unicore/lib/Lm.pl          Unicode character database
+lib/unicore/lib/Lo.pl          Unicode character database
+lib/unicore/lib/Lower.pl       Unicode character database
+lib/unicore/lib/Lowercas.pl    Unicode character database
+lib/unicore/lib/Lt.pl          Unicode character database
+lib/unicore/lib/Lu.pl          Unicode character database
+lib/unicore/lib/L_.pl          Unicode character database
+lib/unicore/lib/M.pl           Unicode character database
+lib/unicore/lib/Malayala.pl    Unicode character database
+lib/unicore/lib/Math.pl                Unicode character database
+lib/unicore/lib/Mc.pl          Unicode character database
+lib/unicore/lib/Me.pl          Unicode character database
+lib/unicore/lib/Mirrored.pl    Unicode character database
+lib/unicore/lib/Mn.pl          Unicode character database
+lib/unicore/lib/Mongolia.pl    Unicode character database
+lib/unicore/lib/Myanmar.pl     Unicode character database
+lib/unicore/lib/N.pl           Unicode character database
+lib/unicore/lib/Nd.pl          Unicode character database
+lib/unicore/lib/Nl.pl          Unicode character database
+lib/unicore/lib/No.pl          Unicode character database
+lib/unicore/lib/Nonchara.pl    Unicode character database
+lib/unicore/lib/Ogham.pl       Unicode character database
+lib/unicore/lib/OldItali.pl    Unicode character database
+lib/unicore/lib/Oriya.pl       Unicode character database
+lib/unicore/lib/OtherAlp.pl    Unicode character database
+lib/unicore/lib/OtherLow.pl    Unicode character database
+lib/unicore/lib/OtherMat.pl    Unicode character database
+lib/unicore/lib/OtherUpp.pl    Unicode character database
+lib/unicore/lib/P.pl           Unicode character database
+lib/unicore/lib/Pc.pl          Unicode character database
+lib/unicore/lib/Pd.pl          Unicode character database
+lib/unicore/lib/Pe.pl          Unicode character database
+lib/unicore/lib/Pf.pl          Unicode character database
+lib/unicore/lib/Pi.pl          Unicode character database
+lib/unicore/lib/Po.pl          Unicode character database
+lib/unicore/lib/Print.pl       Unicode character database
+lib/unicore/lib/Ps.pl          Unicode character database
+lib/unicore/lib/Punct.pl       Unicode character database
+lib/unicore/lib/Quotatio.pl    Unicode character database
+lib/unicore/lib/Runic.pl       Unicode character database
+lib/unicore/lib/S.pl           Unicode character database
+lib/unicore/lib/Sc.pl          Unicode character database
+lib/unicore/lib/Sinhala.pl     Unicode character database
+lib/unicore/lib/Sk.pl          Unicode character database
+lib/unicore/lib/Sm.pl          Unicode character database
+lib/unicore/lib/So.pl          Unicode character database
+lib/unicore/lib/Space.pl       Unicode character database
+lib/unicore/lib/SpacePer.pl    Unicode character database
+lib/unicore/lib/Syriac.pl      Unicode character database
+lib/unicore/lib/Tamil.pl       Unicode character database
+lib/unicore/lib/Telugu.pl      Unicode character database
+lib/unicore/lib/Terminal.pl    Unicode character database
+lib/unicore/lib/Thaana.pl      Unicode character database
+lib/unicore/lib/Thai.pl                Unicode character database
+lib/unicore/lib/Tibetan.pl     Unicode character database
+lib/unicore/lib/Title.pl       Unicode character database
+lib/unicore/lib/Upper.pl       Unicode character database
+lib/unicore/lib/Uppercas.pl    Unicode character database
+lib/unicore/lib/WhiteSpa.pl    Unicode character database
+lib/unicore/lib/Word.pl                Unicode character database
+lib/unicore/lib/XDigit.pl      Unicode character database
+lib/unicore/lib/Yi.pl          Unicode character database
+lib/unicore/lib/Z.pl           Unicode character database
+lib/unicore/lib/Zl.pl          Unicode character database
+lib/unicore/lib/Zp.pl          Unicode character database
+lib/unicore/lib/Zs.pl          Unicode character database
+lib/unicore/lib/_CanonDC.pl    Unicode character database
+lib/unicore/lib/_CaseIgn.pl    Unicode character database
+lib/unicore/lib/_CombAbo.pl    Unicode character database
 lib/unicore/LineBrk.txt                Unicode character database
 lib/unicore/Makefile           Unicode character database
 lib/unicore/mktables           Unicode character database generator
diff --git a/lib/unicore/Canonical.pl b/lib/unicore/Canonical.pl
new file mode 100644 (file)
index 0000000..ac1a660
--- /dev/null
@@ -0,0 +1,704 @@
+# !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!! 
+# This file is built by ./mktables from e.g. Unicode.txt.
+# Any changes made here will be lost!
+
+##
+## Data in this file used by ../utf8_heavy.pl
+##
+
+## Mapping from lc(canonical name) to filename in ./lib
+%utf8::Canonical = (
+   alphabetic                              => 'Alphabet',
+
+ # InAlphabeticPresentationForms
+   alphabeticpresentationforms             => 'InAlphab',
+
+   arabic                                  => 'Arabic',
+
+ # InArabicPresentationFormsA
+   arabicpresentationformsa                => 'InArabi3',
+
+ # InArabicPresentationFormsB
+   arabicpresentationformsb                => 'InArabi2',
+
+   armenian                                => 'Armenian',
+
+ # InArrows
+   arrows                                  => 'InArrows',
+
+ # AsciiHexDigit
+   asciihexdigit                           => 'AsciiHex',
+
+ # InBasicLatin
+   basiclatin                              => 'InBasicL',
+
+   bengali                                 => 'Bengali',
+
+ # BidiControl
+   bidicontrol                             => 'BidiCont',
+
+ # InBlockElements
+   blockelements                           => 'InBlockE',
+
+   bopomofo                                => 'Bopomofo',
+
+ # InBopomofoExtended
+   bopomofoextended                        => 'InBopom2',
+
+ # InBoxDrawing
+   boxdrawing                              => 'InBoxDra',
+
+ # InBraillePatterns
+   braillepatterns                         => 'InBraill',
+
+ # InByzantineMusicalSymbols
+   byzantinemusicalsymbols                 => 'InByzant',
+
+ # CanadianAboriginal
+   canadianaboriginal                      => 'Canadian',
+
+   cherokee                                => 'Cherokee',
+
+ # InCjkCompatibility
+   cjkcompatibility                        => 'InCjkCom',
+
+ # InCjkCompatibilityForms
+   cjkcompatibilityforms                   => 'InCjkCo2',
+
+ # InCjkCompatibilityIdeographs
+   cjkcompatibilityideographs              => 'InCjkCo3',
+
+ # InCjkCompatibilityIdeographsSupplement
+   cjkcompatibilityideographssupplement     => 'InCjkCo4',
+
+ # InCjkRadicalsSupplement
+   cjkradicalssupplement                   => 'InCjkRad',
+
+ # InCjkSymbolsAndPunctuation
+   cjksymbolsandpunctuation                => 'InCjkSym',
+
+ # InCjkUnifiedIdeographs
+   cjkunifiedideographs                    => 'InCjkUni',
+
+ # InCjkUnifiedIdeographsExtensionA
+   cjkunifiedideographsextensiona          => 'InCjkUn3',
+
+ # InCjkUnifiedIdeographsExtensionB
+   cjkunifiedideographsextensionb          => 'InCjkUn2',
+
+ # ClosePunctuation
+   closepunctuation                        => 'Pe',
+
+ # InCombiningDiacriticalMarks
+   combiningdiacriticalmarks               => 'InCombi3',
+
+ # InCombiningHalfMarks
+   combininghalfmarks                      => 'InCombin',
+
+ # InCombiningMarksForSymbols
+   combiningmarksforsymbols                => 'InCombi2',
+
+   common                                  => 'Common',
+
+ # ConnectorPunctuation
+   connectorpunctuation                    => 'Pc',
+
+   control                                 => 'Cc',
+
+ # InControlPictures
+   controlpictures                         => 'InContro',
+
+ # CurrencySymbol
+   currencysymbol                          => 'Sc',
+
+ # InCurrencySymbols
+   currencysymbols                         => 'InCurren',
+
+   cyrillic                                => 'Cyrillic',
+   dash                                    => 'Dash',
+
+ # DashPunctuation
+   dashpunctuation                         => 'Pd',
+
+ # DecimalNumber
+   decimalnumber                           => 'Nd',
+
+   deseret                                 => 'Deseret',
+   devanagari                              => 'Devanaga',
+   diacritic                               => 'Diacriti',
+
+ # InDingbats
+   dingbats                                => 'InDingba',
+
+ # InEnclosedAlphanumerics
+   enclosedalphanumerics                   => 'InEnclos',
+
+ # InEnclosedCjkLettersAndMonths
+   enclosedcjklettersandmonths             => 'InEnclo2',
+
+ # EnclosingMark
+   enclosingmark                           => 'Me',
+
+   ethiopic                                => 'Ethiopic',
+   extender                                => 'Extender',
+
+ # FinalPunctuation
+   finalpunctuation                        => 'Pf',
+
+   format                                  => 'Cf',
+
+ # InGeneralPunctuation
+   generalpunctuation                      => 'InGenera',
+
+ # InGeometricShapes
+   geometricshapes                         => 'InGeomet',
+
+   georgian                                => 'Georgian',
+   gothic                                  => 'Gothic',
+   greek                                   => 'Greek',
+
+ # InGreekExtended
+   greekextended                           => 'InGreekE',
+
+   gujarati                                => 'Gujarati',
+   gurmukhi                                => 'Gurmukhi',
+
+ # InHalfwidthAndFullwidthForms
+   halfwidthandfullwidthforms              => 'InHalfwi',
+
+   han                                     => 'Han',
+   hangul                                  => 'Hangul',
+
+ # InHangulCompatibilityJamo
+   hangulcompatibilityjamo                 => 'InHangu3',
+
+ # InHangulJamo
+   hanguljamo                              => 'InHangul',
+
+ # InHangulSyllables
+   hangulsyllables                         => 'InHangu2',
+
+   hebrew                                  => 'Hebrew',
+
+ # HexDigit
+   hexdigit                                => 'HexDigit',
+
+ # InHighPrivateUseSurrogates
+   highprivateusesurrogates                => 'InHighPr',
+
+ # InHighSurrogates
+   highsurrogates                          => 'InHighSu',
+
+   hiragana                                => 'Hiragana',
+   hyphen                                  => 'Hyphen',
+
+ # IdContinue
+   idcontinue                              => 'IdContin',
+
+   ideographic                             => 'Ideograp',
+
+ # InIdeographicDescriptionCharacters
+   ideographicdescriptioncharacters        => 'InIdeogr',
+
+ # IdStart
+   idstart                                 => 'IdStart',
+
+ # InAlphabeticPresentationForms
+   inalphabeticpresentationforms           => 'InAlphab',
+
+ # InArabic
+   inarabic                                => 'InArabic',
+
+ # InArabicPresentationFormsA
+   inarabicpresentationformsa              => 'InArabi3',
+
+ # InArabicPresentationFormsB
+   inarabicpresentationformsb              => 'InArabi2',
+
+ # InArmenian
+   inarmenian                              => 'InArmeni',
+
+ # InArrows
+   inarrows                                => 'InArrows',
+
+ # InBasicLatin
+   inbasiclatin                            => 'InBasicL',
+
+ # InBengali
+   inbengali                               => 'InBengal',
+
+ # InBlockElements
+   inblockelements                         => 'InBlockE',
+
+ # InBopomofo
+   inbopomofo                              => 'InBopomo',
+
+ # InBopomofoExtended
+   inbopomofoextended                      => 'InBopom2',
+
+ # InBoxDrawing
+   inboxdrawing                            => 'InBoxDra',
+
+ # InBraillePatterns
+   inbraillepatterns                       => 'InBraill',
+
+ # InByzantineMusicalSymbols
+   inbyzantinemusicalsymbols               => 'InByzant',
+
+ # InCherokee
+   incherokee                              => 'InCherok',
+
+ # InCjkCompatibility
+   incjkcompatibility                      => 'InCjkCom',
+
+ # InCjkCompatibilityForms
+   incjkcompatibilityforms                 => 'InCjkCo2',
+
+ # InCjkCompatibilityIdeographs
+   incjkcompatibilityideographs            => 'InCjkCo3',
+
+ # InCjkCompatibilityIdeographsSupplement
+   incjkcompatibilityideographssupplement   => 'InCjkCo4',
+
+ # InCjkRadicalsSupplement
+   incjkradicalssupplement                 => 'InCjkRad',
+
+ # InCjkSymbolsAndPunctuation
+   incjksymbolsandpunctuation              => 'InCjkSym',
+
+ # InCjkUnifiedIdeographs
+   incjkunifiedideographs                  => 'InCjkUni',
+
+ # InCjkUnifiedIdeographsExtensionA
+   incjkunifiedideographsextensiona        => 'InCjkUn3',
+
+ # InCjkUnifiedIdeographsExtensionB
+   incjkunifiedideographsextensionb        => 'InCjkUn2',
+
+ # InCombiningDiacriticalMarks
+   incombiningdiacriticalmarks             => 'InCombi3',
+
+ # InCombiningHalfMarks
+   incombininghalfmarks                    => 'InCombin',
+
+ # InCombiningMarksForSymbols
+   incombiningmarksforsymbols              => 'InCombi2',
+
+ # InControlPictures
+   incontrolpictures                       => 'InContro',
+
+ # InCurrencySymbols
+   incurrencysymbols                       => 'InCurren',
+
+ # InCyrillic
+   incyrillic                              => 'InCyrill',
+
+ # InDeseret
+   indeseret                               => 'InDesere',
+
+ # InDevanagari
+   indevanagari                            => 'InDevana',
+
+ # InDingbats
+   indingbats                              => 'InDingba',
+
+ # InEnclosedAlphanumerics
+   inenclosedalphanumerics                 => 'InEnclos',
+
+ # InEnclosedCjkLettersAndMonths
+   inenclosedcjklettersandmonths           => 'InEnclo2',
+
+ # InEthiopic
+   inethiopic                              => 'InEthiop',
+
+ # InGeneralPunctuation
+   ingeneralpunctuation                    => 'InGenera',
+
+ # InGeometricShapes
+   ingeometricshapes                       => 'InGeomet',
+
+ # InGeorgian
+   ingeorgian                              => 'InGeorgi',
+
+ # InGothic
+   ingothic                                => 'InGothic',
+
+ # InGreek
+   ingreek                                 => 'InGreek',
+
+ # InGreekExtended
+   ingreekextended                         => 'InGreekE',
+
+ # InGujarati
+   ingujarati                              => 'InGujara',
+
+ # InGurmukhi
+   ingurmukhi                              => 'InGurmuk',
+
+ # InHalfwidthAndFullwidthForms
+   inhalfwidthandfullwidthforms            => 'InHalfwi',
+
+ # InHangulCompatibilityJamo
+   inhangulcompatibilityjamo               => 'InHangu3',
+
+ # InHangulJamo
+   inhanguljamo                            => 'InHangul',
+
+ # InHangulSyllables
+   inhangulsyllables                       => 'InHangu2',
+
+ # InHebrew
+   inhebrew                                => 'InHebrew',
+
+   inherited                               => 'Inherite',
+
+ # InHighPrivateUseSurrogates
+   inhighprivateusesurrogates              => 'InHighPr',
+
+ # InHighSurrogates
+   inhighsurrogates                        => 'InHighSu',
+
+ # InHiragana
+   inhiragana                              => 'InHiraga',
+
+ # InIdeographicDescriptionCharacters
+   inideographicdescriptioncharacters      => 'InIdeogr',
+
+ # InIpaExtensions
+   inipaextensions                         => 'InIpaExt',
+
+ # InitialPunctuation
+   initialpunctuation                      => 'Pi',
+
+ # InKanbun
+   inkanbun                                => 'InKanbun',
+
+ # InKangxiRadicals
+   inkangxiradicals                        => 'InKangxi',
+
+ # InKannada
+   inkannada                               => 'InKannad',
+
+ # InKatakana
+   inkatakana                              => 'InKataka',
+
+ # InKhmer
+   inkhmer                                 => 'InKhmer',
+
+ # InLao
+   inlao                                   => 'InLao',
+
+ # InLatin1Supplement
+   inlatin1supplement                      => 'InLatin1',
+
+ # InLatinExtendedA
+   inlatinextendeda                        => 'InLatin2',
+
+ # InLatinExtendedAdditional
+   inlatinextendedadditional               => 'InLatin3',
+
+ # InLatinExtendedB
+   inlatinextendedb                        => 'InLatinE',
+
+ # InLetterlikeSymbols
+   inletterlikesymbols                     => 'InLetter',
+
+ # InLowSurrogates
+   inlowsurrogates                         => 'InLowSur',
+
+ # InMalayalam
+   inmalayalam                             => 'InMalaya',
+
+ # InMathematicalAlphanumericSymbols
+   inmathematicalalphanumericsymbols       => 'InMathe2',
+
+ # InMathematicalOperators
+   inmathematicaloperators                 => 'InMathem',
+
+ # InMiscellaneousSymbols
+   inmiscellaneoussymbols                  => 'InMiscel',
+
+ # InMiscellaneousTechnical
+   inmiscellaneoustechnical                => 'InMisce2',
+
+ # InMongolian
+   inmongolian                             => 'InMongol',
+
+ # InMusicalSymbols
+   inmusicalsymbols                        => 'InMusica',
+
+ # InMyanmar
+   inmyanmar                               => 'InMyanma',
+
+ # InNumberForms
+   innumberforms                           => 'InNumber',
+
+ # InOgham
+   inogham                                 => 'InOgham',
+
+ # InOldItalic
+   inolditalic                             => 'InOldIta',
+
+ # InOpticalCharacterRecognition
+   inopticalcharacterrecognition           => 'InOptica',
+
+ # InOriya
+   inoriya                                 => 'InOriya',
+
+ # InPrivateUse
+   inprivateuse                            => 'InPrivat',
+
+ # InRunic
+   inrunic                                 => 'InRunic',
+
+ # InSinhala
+   insinhala                               => 'InSinhal',
+
+ # InSmallFormVariants
+   insmallformvariants                     => 'InSmallF',
+
+ # InSpacingModifierLetters
+   inspacingmodifierletters                => 'InSpacin',
+
+ # InSpecials
+   inspecials                              => 'InSpecia',
+
+ # InSuperscriptsAndSubscripts
+   insuperscriptsandsubscripts             => 'InSupers',
+
+ # InSyriac
+   insyriac                                => 'InSyriac',
+
+ # InTags
+   intags                                  => 'InTags',
+
+ # InTamil
+   intamil                                 => 'InTamil',
+
+ # InTelugu
+   intelugu                                => 'InTelugu',
+
+ # InThaana
+   inthaana                                => 'InThaana',
+
+ # InThai
+   inthai                                  => 'InThai',
+
+ # InTibetan
+   intibetan                               => 'InTibeta',
+
+ # InUnifiedCanadianAboriginalSyllabics
+   inunifiedcanadianaboriginalsyllabics     => 'InUnifie',
+
+ # InYiRadicals
+   inyiradicals                            => 'InYiRadi',
+
+ # InYiSyllables
+   inyisyllables                           => 'InYiSyll',
+
+ # InIpaExtensions
+   ipaextensions                           => 'InIpaExt',
+
+ # JoinControl
+   joincontrol                             => 'JoinCont',
+
+ # InKanbun
+   kanbun                                  => 'InKanbun',
+
+ # InKangxiRadicals
+   kangxiradicals                          => 'InKangxi',
+
+   kannada                                 => 'Kannada',
+   katakana                                => 'Katakana',
+   khmer                                   => 'Khmer',
+   lao                                     => 'Lao',
+   latin                                   => 'Latin',
+
+ # InLatin1Supplement
+   latin1supplement                        => 'InLatin1',
+
+ # InLatinExtendedA
+   latinextendeda                          => 'InLatin2',
+
+ # InLatinExtendedAdditional
+   latinextendedadditional                 => 'InLatin3',
+
+ # InLatinExtendedB
+   latinextendedb                          => 'InLatinE',
+
+   letter                                  => 'L',
+
+ # InLetterlikeSymbols
+   letterlikesymbols                       => 'InLetter',
+
+ # LetterNumber
+   letternumber                            => 'Nl',
+
+ # LineSeparator
+   lineseparator                           => 'Zl',
+
+   lowercase                               => 'Lowercas',
+
+ # LowercaseLetter
+   lowercaseletter                         => 'Ll',
+
+ # InLowSurrogates
+   lowsurrogates                           => 'InLowSur',
+
+   malayalam                               => 'Malayala',
+   mark                                    => 'M',
+   math                                    => 'Math',
+
+ # InMathematicalAlphanumericSymbols
+   mathematicalalphanumericsymbols         => 'InMathe2',
+
+ # InMathematicalOperators
+   mathematicaloperators                   => 'InMathem',
+
+ # MathSymbol
+   mathsymbol                              => 'Sm',
+
+ # InMiscellaneousSymbols
+   miscellaneoussymbols                    => 'InMiscel',
+
+ # InMiscellaneousTechnical
+   miscellaneoustechnical                  => 'InMisce2',
+
+ # ModifierLetter
+   modifierletter                          => 'Lm',
+
+ # ModifierSymbol
+   modifiersymbol                          => 'Sk',
+
+   mongolian                               => 'Mongolia',
+
+ # InMusicalSymbols
+   musicalsymbols                          => 'InMusica',
+
+   myanmar                                 => 'Myanmar',
+
+ # NoncharacterCodePoint
+   noncharactercodepoint                   => 'Nonchara',
+
+ # NonSpacingMark
+   nonspacingmark                          => 'Mn',
+
+   number                                  => 'N',
+
+ # InNumberForms
+   numberforms                             => 'InNumber',
+
+   ogham                                   => 'Ogham',
+
+ # OldItalic
+   olditalic                               => 'OldItali',
+
+ # OpenPunctuation
+   openpunctuation                         => 'Ps',
+
+ # InOpticalCharacterRecognition
+   opticalcharacterrecognition             => 'InOptica',
+
+   oriya                                   => 'Oriya',
+   other                                   => 'C',
+
+ # OtherAlphabetic
+   otheralphabetic                         => 'OtherAlp',
+
+ # OtherLetter
+   otherletter                             => 'Lo',
+
+ # OtherLowercase
+   otherlowercase                          => 'OtherLow',
+
+ # OtherMath
+   othermath                               => 'OtherMat',
+
+ # OtherNumber
+   othernumber                             => 'No',
+
+ # OtherPunctuation
+   otherpunctuation                        => 'Po',
+
+ # OtherSymbol
+   othersymbol                             => 'So',
+
+ # OtherUppercase
+   otheruppercase                          => 'OtherUpp',
+
+ # ParagraphSeparator
+   paragraphseparator                      => 'Zp',
+
+ # PrivateUse
+   privateuse                              => 'Co',
+
+   punctuation                             => 'P',
+
+ # QuotationMark
+   quotationmark                           => 'Quotatio',
+
+   runic                                   => 'Runic',
+   separator                               => 'Z',
+   sinhala                                 => 'Sinhala',
+
+ # InSmallFormVariants
+   smallformvariants                       => 'InSmallF',
+
+ # SpaceSeparator
+   spaceseparator                          => 'Zs',
+
+ # SpacingMark
+   spacingmark                             => 'Mc',
+
+ # InSpacingModifierLetters
+   spacingmodifierletters                  => 'InSpacin',
+
+ # InSpecials
+   specials                                => 'InSpecia',
+
+ # InSuperscriptsAndSubscripts
+   superscriptsandsubscripts               => 'InSupers',
+
+   surrogate                               => 'Cs',
+   symbol                                  => 'S',
+   syriac                                  => 'Syriac',
+
+ # InTags
+   tags                                    => 'InTags',
+
+   tamil                                   => 'Tamil',
+   telugu                                  => 'Telugu',
+
+ # TerminalPunctuation
+   terminalpunctuation                     => 'Terminal',
+
+   thaana                                  => 'Thaana',
+   thai                                    => 'Thai',
+   tibetan                                 => 'Tibetan',
+
+ # TitlecaseLetter
+   titlecaseletter                         => 'Lt',
+
+   unassigned                              => 'Cn',
+
+ # InUnifiedCanadianAboriginalSyllabics
+   unifiedcanadianaboriginalsyllabics      => 'InUnifie',
+
+   uppercase                               => 'Uppercas',
+
+ # UppercaseLetter
+   uppercaseletter                         => 'Lu',
+
+ # WhiteSpace
+   whitespace                              => 'WhiteSpa',
+
+   yi                                      => 'Yi',
+
+ # InYiRadicals
+   yiradicals                              => 'InYiRadi',
+
+ # InYiSyllables
+   yisyllables                             => 'InYiSyll',
+
+);
+1
diff --git a/lib/unicore/Exact.pl b/lib/unicore/Exact.pl
new file mode 100644 (file)
index 0000000..c72557a
--- /dev/null
@@ -0,0 +1,110 @@
+# !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!! 
+# This file is built by ./mktables from e.g. Unicode.txt.
+# Any changes made here will be lost!
+
+##
+## Data in this file used by ../utf8_heavy.pl
+##
+
+## Mapping from name to filename in ./lib
+%utf8::Exact = (
+ ASCII         => 'ASCII',
+ All           => 'Any',
+ Alnum         => 'Alnum',
+ Alpha         => 'Alpha',
+ Any           => 'Any',
+ Assigned      => 'Assigned',
+ BidiAL        => 'BidiAL',
+ BidiAN        => 'BidiAN',
+ BidiB         => 'BidiB',
+ BidiBN        => 'BidiBN',
+ BidiCS        => 'BidiCS',
+ BidiEN        => 'BidiEN',
+ BidiES        => 'BidiES',
+ BidiET        => 'BidiET',
+ BidiL         => 'BidiL',
+ BidiLRE       => 'BidiLRE',
+ BidiLRO       => 'BidiLRO',
+ BidiNSM       => 'BidiNSM',
+ BidiON        => 'BidiON',
+ BidiPDF       => 'BidiPDF',
+ BidiR         => 'BidiR',
+ BidiRLE       => 'BidiRLE',
+ BidiRLO       => 'BidiRLO',
+ BidiS         => 'BidiS',
+ BidiWS        => 'BidiWS',
+ Blank         => 'Blank',
+ C             => 'C',
+ Canon         => 'Canon',
+ Cc            => 'Cc',
+ Cf            => 'Cf',
+ Cn            => 'Cn',
+ Cntrl         => 'Cntrl',
+ Co            => 'Co',
+ Compat        => 'Compat',
+ Cs            => 'Cs',
+ DCcircle      => 'DCcircle',
+ DCcompat      => 'DCcompat',
+ DCfinal       => 'DCfinal',
+ DCfont        => 'DCfont',
+ DCfraction    => 'DCfracti',
+ DCinitial     => 'DCinitia',
+ DCisolated    => 'DCisolat',
+ DCmedial      => 'DCmedial',
+ DCnarrow      => 'DCnarrow',
+ DCnoBreak     => 'DCnoBrea',
+ DCsmall       => 'DCsmall',
+ DCsquare      => 'DCsquare',
+ DCsub         => 'DCsub',
+ DCsuper       => 'DCsuper',
+ DCvertical    => 'DCvertic',
+ DCwide        => 'DCwide',
+ Digit         => 'Digit',
+ Graph         => 'Graph',
+ L             => 'L',
+'L&'           => 'L_',
+ Ll            => 'Ll',
+ Lm            => 'Lm',
+ Lo            => 'Lo',
+ Lower         => 'Lower',
+ Lt            => 'Lt',
+ Lu            => 'Lu',
+ M             => 'M',
+ Mc            => 'Mc',
+ Me            => 'Me',
+ Mirrored      => 'Mirrored',
+ Mn            => 'Mn',
+ N             => 'N',
+ Nd            => 'Nd',
+ Nl            => 'Nl',
+ No            => 'No',
+ P             => 'P',
+ Pc            => 'Pc',
+ Pd            => 'Pd',
+ Pe            => 'Pe',
+ Pf            => 'Pf',
+ Pi            => 'Pi',
+ Po            => 'Po',
+ Print         => 'Print',
+ Ps            => 'Ps',
+ Punct         => 'Punct',
+ S             => 'S',
+ Sc            => 'Sc',
+ Sk            => 'Sk',
+ Sm            => 'Sm',
+ So            => 'So',
+ Space         => 'Space',
+ SpacePerl     => 'SpacePer',
+ Title         => 'Title',
+ Upper         => 'Upper',
+ Word          => 'Word',
+ XDigit        => 'XDigit',
+ Z             => 'Z',
+ Zl            => 'Zl',
+ Zp            => 'Zp',
+ Zs            => 'Zs',
+ _CanonDCIJ    => '_CanonDC',
+ _CaseIgnorable  => '_CaseIgn',
+ _CombAbove    => '_CombAbo',
+);
+1;
diff --git a/lib/unicore/In.pl b/lib/unicore/In.pl
deleted file mode 100644 (file)
index 00f16e1..0000000
+++ /dev/null
@@ -1,219 +0,0 @@
-# !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!! 
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-##
-## Data in this file used by ../utf8_heavy.pl
-##
-
-## Mapping from name to filename in ./In
-%utf8::In = (
-);
-
-## Mappings from regex to filename in ./In/
-%utf8::InPat = (
- 'al' => {
-       'Alphabetic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms' => 'Alphabet',
- },
- 'ar' => {
-       'Arabic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms(?:[-_]|\s+)?A' => 'ArabicP2',
-       'Armenian' => 'Armenian',
-       'Arabic' => 'Arabic',
-       'Arrows' => 'Arrows',
-       'Arabic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms(?:[-_]|\s+)?B' => 'ArabicPr',
- },
- 'ba' => {
-       'Basic(?:[-_]|\s+)?Latin' => 'BasicLat',
- },
- 'be' => {
-       'Bengali' => 'Bengali',
- },
- 'bl' => {
-       'Block(?:[-_]|\s+)?Elements' => 'BlockEle',
- },
- 'bo' => {
-       'Bopomofo(?:[-_]|\s+)?Extended' => 'Bopomof2',
-       'Box(?:[-_]|\s+)?Drawing' => 'BoxDrawi',
-       'Bopomofo' => 'Bopomofo',
- },
- 'br' => {
-       'Braille(?:[-_]|\s+)?Patterns' => 'BrailleP',
- },
- 'by' => {
-       'Byzantine(?:[-_]|\s+)?Musical(?:[-_]|\s+)?Symbols' => 'Byzantin',
- },
- 'ch' => {
-       'Cherokee' => 'Cherokee',
- },
- 'cj' => {
-       'Cjk(?:[-_]|\s+)?Unified(?:[-_]|\s+)?Ideographs(?:[-_]|\s+)?Extension(?:[-_]|\s+)?A' => 'CjkUnif2',
-       'Cjk(?:[-_]|\s+)?Radicals(?:[-_]|\s+)?Supplement' => 'CjkRadic',
-       'Cjk(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Ideographs' => 'CjkComp3',
-       'Cjk(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Ideographs(?:[-_]|\s+)?Supplement' => 'CjkComp4',
-       'Cjk(?:[-_]|\s+)?Compatibility' => 'CjkCompa',
-       'Cjk(?:[-_]|\s+)?Unified(?:[-_]|\s+)?Ideographs' => 'CjkUnifi',
-       'Cjk(?:[-_]|\s+)?Symbols(?:[-_]|\s+)?And(?:[-_]|\s+)?Punctuation' => 'CjkSymbo',
-       'Cjk(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Forms' => 'CjkComp2',
-       'Cjk(?:[-_]|\s+)?Unified(?:[-_]|\s+)?Ideographs(?:[-_]|\s+)?Extension(?:[-_]|\s+)?B' => 'CjkUnif3',
- },
- 'co' => {
-       'Combining(?:[-_]|\s+)?Diacritical(?:[-_]|\s+)?Marks' => 'Combini2',
-       'Combining(?:[-_]|\s+)?Half(?:[-_]|\s+)?Marks' => 'Combinin',
-       'Control(?:[-_]|\s+)?Pictures' => 'ControlP',
-       'Combining(?:[-_]|\s+)?Marks(?:[-_]|\s+)?For(?:[-_]|\s+)?Symbols' => 'Combini3',
- },
- 'cu' => {
-       'Currency(?:[-_]|\s+)?Symbols' => 'Currency',
- },
- 'cy' => {
-       'Cyrillic' => 'Cyrillic',
- },
- 'de' => {
-       'Deseret' => 'Deseret',
-       'Devanagari' => 'Devanaga',
- },
- 'di' => {
-       'Dingbats' => 'Dingbats',
- },
- 'en' => {
-       'Enclosed(?:[-_]|\s+)?Alphanumerics' => 'Enclosed',
-       'Enclosed(?:[-_]|\s+)?Cjk(?:[-_]|\s+)?Letters(?:[-_]|\s+)?And(?:[-_]|\s+)?Months' => 'Enclose2',
- },
- 'et' => {
-       'Ethiopic' => 'Ethiopic',
- },
- 'ge' => {
-       'Geometric(?:[-_]|\s+)?Shapes' => 'Geometri',
-       'General(?:[-_]|\s+)?Punctuation' => 'GeneralP',
-       'Georgian' => 'Georgian',
- },
- 'go' => {
-       'Gothic' => 'Gothic',
- },
- 'gr' => {
-       'Greek(?:[-_]|\s+)?Extended' => 'GreekExt',
-       'Greek' => 'Greek',
- },
- 'gu' => {
-       'Gujarati' => 'Gujarati',
-       'Gurmukhi' => 'Gurmukhi',
- },
- 'ha' => {
-       'Hangul(?:[-_]|\s+)?Syllables' => 'HangulSy',
-       'Hangul(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Jamo' => 'HangulCo',
-       'Halfwidth(?:[-_]|\s+)?And(?:[-_]|\s+)?Fullwidth(?:[-_]|\s+)?Forms' => 'Halfwidt',
-       'Hangul(?:[-_]|\s+)?Jamo' => 'HangulJa',
- },
- 'he' => {
-       'Hebrew' => 'Hebrew',
- },
- 'hi' => {
-       'High(?:[-_]|\s+)?Surrogates' => 'HighSurr',
-       'Hiragana' => 'Hiragana',
-       'High(?:[-_]|\s+)?Private(?:[-_]|\s+)?Use(?:[-_]|\s+)?Surrogates' => 'HighPriv',
- },
- 'id' => {
-       'Ideographic(?:[-_]|\s+)?Description(?:[-_]|\s+)?Characters' => 'Ideograp',
- },
- 'ip' => {
-       'Ipa(?:[-_]|\s+)?Extensions' => 'IpaExten',
- },
- 'ka' => {
-       'Kannada' => 'Kannada',
-       'Kanbun' => 'Kanbun',
-       'Kangxi(?:[-_]|\s+)?Radicals' => 'KangxiRa',
-       'Katakana' => 'Katakana',
- },
- 'kh' => {
-       'Khmer' => 'Khmer',
- },
- 'la' => {
-       'Lao' => 'Lao',
-       'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?B' => 'LatinExt',
-       'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?A' => 'LatinEx2',
-       'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?Additional' => 'LatinEx3',
-       'Latin(?:[-_]|\s+)?1(?:[-_]|\s+)?Supplement' => 'Latin1Su',
- },
- 'le' => {
-       'Letterlike(?:[-_]|\s+)?Symbols' => 'Letterli',
- },
- 'lo' => {
-       'Low(?:[-_]|\s+)?Surrogates' => 'LowSurro',
- },
- 'ma' => {
-       'Mathematical(?:[-_]|\s+)?Operators' => 'Mathemat',
-       'Mathematical(?:[-_]|\s+)?Alphanumeric(?:[-_]|\s+)?Symbols' => 'Mathema2',
-       'Malayalam' => 'Malayala',
- },
- 'mi' => {
-       'Miscellaneous(?:[-_]|\s+)?Technical' => 'Miscell2',
-       'Miscellaneous(?:[-_]|\s+)?Symbols' => 'Miscella',
- },
- 'mo' => {
-       'Mongolian' => 'Mongolia',
- },
- 'mu' => {
-       'Musical(?:[-_]|\s+)?Symbols' => 'MusicalS',
- },
- 'my' => {
-       'Myanmar' => 'Myanmar',
- },
- 'nu' => {
-       'Number(?:[-_]|\s+)?Forms' => 'NumberFo',
- },
- 'og' => {
-       'Ogham' => 'Ogham',
- },
- 'ol' => {
-       'Old(?:[-_]|\s+)?Italic' => 'OldItali',
- },
- 'op' => {
-       'Optical(?:[-_]|\s+)?Character(?:[-_]|\s+)?Recognition' => 'OpticalC',
- },
- 'or' => {
-       'Oriya' => 'Oriya',
- },
- 'pr' => {
-       'Private(?:[-_]|\s+)?Use' => 'PrivateU',
- },
- 'ru' => {
-       'Runic' => 'Runic',
- },
- 'si' => {
-       'Sinhala' => 'Sinhala',
- },
- 'sm' => {
-       'Small(?:[-_]|\s+)?Form(?:[-_]|\s+)?Variants' => 'SmallFor',
- },
- 'sp' => {
-       'Spacing(?:[-_]|\s+)?Modifier(?:[-_]|\s+)?Letters' => 'SpacingM',
-       'Specials' => 'Specials',
- },
- 'su' => {
-       'Superscripts(?:[-_]|\s+)?And(?:[-_]|\s+)?Subscripts' => 'Superscr',
- },
- 'sy' => {
-       'Syriac' => 'Syriac',
- },
- 'ta' => {
-       'Tamil' => 'Tamil',
-       'Tags' => 'Tags',
- },
- 'te' => {
-       'Telugu' => 'Telugu',
- },
- 'th' => {
-       'Thaana' => 'Thaana',
-       'Thai' => 'Thai',
- },
- 'ti' => {
-       'Tibetan' => 'Tibetan',
- },
- 'un' => {
-       'Unified(?:[-_]|\s+)?Canadian(?:[-_]|\s+)?Aboriginal(?:[-_]|\s+)?Syllabics' => 'UnifiedC',
- },
- 'yi' => {
-       'Yi(?:[-_]|\s+)?Syllables' => 'YiSyllab',
-       'Yi(?:[-_]|\s+)?Radicals' => 'YiRadica',
- },
-);
diff --git a/lib/unicore/Is.pl b/lib/unicore/Is.pl
deleted file mode 100644 (file)
index 91debee..0000000
+++ /dev/null
@@ -1,375 +0,0 @@
-# !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!! 
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-##
-## Data in this file used by ../utf8_heavy.pl
-##
-
-## Mapping from name to filename in ./Is
-%utf8::Is = (
-  'ASCII'                                   => 'ASCII',
-  'Alnum'                                   => 'Alnum',
-  'Alpha'                                   => 'Alpha',
-  'BidiAL'                                  => 'BidiAL',
-  'BidiAN'                                  => 'BidiAN',
-  'BidiB'                                   => 'BidiB',
-  'BidiBN'                                  => 'BidiBN',
-  'BidiCS'                                  => 'BidiCS',
-  'BidiEN'                                  => 'BidiEN',
-  'BidiES'                                  => 'BidiES',
-  'BidiET'                                  => 'BidiET',
-  'BidiL'                                   => 'BidiL',
-  'BidiLRE'                                 => 'BidiLRE',
-  'BidiLRO'                                 => 'BidiLRO',
-  'BidiNSM'                                 => 'BidiNSM',
-  'BidiON'                                  => 'BidiON',
-  'BidiPDF'                                 => 'BidiPDF',
-  'BidiR'                                   => 'BidiR',
-  'BidiRLE'                                 => 'BidiRLE',
-  'BidiRLO'                                 => 'BidiRLO',
-  'BidiS'                                   => 'BidiS',
-  'BidiWS'                                  => 'BidiWS',
-  'Blank'                                   => 'Blank',
-  'C'                                       => 'C',
-  'Canon'                                   => 'Canon',
-  'Cc'                                      => 'Cc',
-  'Cf'                                      => 'Cf',
-  'Cn'                                      => 'Cn',
-  'Cntrl'                                   => 'Cntrl',
-  'Co'                                      => 'Co',
-  'Compat'                                  => 'Compat',
-  'Cs'                                      => 'Cs',
-  'DCcircle'                                => 'DCcircle',
-  'DCcompat'                                => 'DCcompat',
-  'DCfinal'                                 => 'DCfinal',
-  'DCfont'                                  => 'DCfont',
-  'DCfraction'                              => 'DCfracti',
-  'DCinitial'                               => 'DCinitia',
-  'DCisolated'                              => 'DCisolat',
-  'DCmedial'                                => 'DCmedial',
-  'DCnarrow'                                => 'DCnarrow',
-  'DCnoBreak'                               => 'DCnoBrea',
-  'DCsmall'                                 => 'DCsmall',
-  'DCsquare'                                => 'DCsquare',
-  'DCsub'                                   => 'DCsub',
-  'DCsuper'                                 => 'DCsuper',
-  'DCvertical'                              => 'DCvertic',
-  'DCwide'                                  => 'DCwide',
-  'Digit'                                   => 'Digit',
-  'Graph'                                   => 'Graph',
-  'L'                                       => 'L',
-  'L&'                                      => 'L_',
-  'LbrkAI'                                  => 'LbrkAI',
-  'LbrkAL'                                  => 'LbrkAL',
-  'LbrkB2'                                  => 'LbrkB2',
-  'LbrkBA'                                  => 'LbrkBA',
-  'LbrkBB'                                  => 'LbrkBB',
-  'LbrkBK'                                  => 'LbrkBK',
-  'LbrkCB'                                  => 'LbrkCB',
-  'LbrkCL'                                  => 'LbrkCL',
-  'LbrkCM'                                  => 'LbrkCM',
-  'LbrkCR'                                  => 'LbrkCR',
-  'LbrkEX'                                  => 'LbrkEX',
-  'LbrkGL'                                  => 'LbrkGL',
-  'LbrkHY'                                  => 'LbrkHY',
-  'LbrkID'                                  => 'LbrkID',
-  'LbrkIN'                                  => 'LbrkIN',
-  'LbrkIS'                                  => 'LbrkIS',
-  'LbrkLF'                                  => 'LbrkLF',
-  'LbrkNS'                                  => 'LbrkNS',
-  'LbrkNU'                                  => 'LbrkNU',
-  'LbrkOP'                                  => 'LbrkOP',
-  'LbrkPO'                                  => 'LbrkPO',
-  'LbrkPR'                                  => 'LbrkPR',
-  'LbrkQU'                                  => 'LbrkQU',
-  'LbrkSA'                                  => 'LbrkSA',
-  'LbrkSG'                                  => 'LbrkSG',
-  'LbrkSP'                                  => 'LbrkSP',
-  'LbrkSY'                                  => 'LbrkSY',
-  'LbrkXX'                                  => 'LbrkXX',
-  'LbrkZW'                                  => 'LbrkZW',
-  'Ll'                                      => 'Ll',
-  'Lm'                                      => 'Lm',
-  'Lo'                                      => 'Lo',
-  'Lower'                                   => 'Lower',
-  'Lt'                                      => 'Lt',
-  'Lu'                                      => 'Lu',
-  'M'                                       => 'M',
-  'Mc'                                      => 'Mc',
-  'Me'                                      => 'Me',
-  'Mirrored'                                => 'Mirrored',
-  'Mn'                                      => 'Mn',
-  'N'                                       => 'N',
-  'Nd'                                      => 'Nd',
-  'Nl'                                      => 'Nl',
-  'No'                                      => 'No',
-  'P'                                       => 'P',
-  'Pc'                                      => 'Pc',
-  'Pd'                                      => 'Pd',
-  'Pe'                                      => 'Pe',
-  'Pf'                                      => 'Pf',
-  'Pi'                                      => 'Pi',
-  'Po'                                      => 'Po',
-  'Print'                                   => 'Print',
-  'Ps'                                      => 'Ps',
-  'Punct'                                   => 'Punct',
-  'S'                                       => 'S',
-  'Sc'                                      => 'Sc',
-  'Sk'                                      => 'Sk',
-  'Sm'                                      => 'Sm',
-  'So'                                      => 'So',
-  'Space'                                   => 'Space',
-  'SpacePerl'                               => 'SpacePer',
-  'Title'                                   => 'Title',
-  'Upper'                                   => 'Upper',
-  'Word'                                    => 'Word',
-  'XDigit'                                  => 'XDigit',
-  'Z'                                       => 'Z',
-  'Zl'                                      => 'Zl',
-  'Zp'                                      => 'Zp',
-  'Zs'                                      => 'Zs',
-  '_CanonDCIJ'                              => '_CanonDC',
-  '_CaseIgnorable'                          => '_CaseIgn',
-  '_CombAbove'                              => '_CombAbo',
-);
-
-## Mappings from regex to filename in ./Is/
-%utf8::IsPat = (
- 'al' => {
-       'All' => 'Any',
-       'Alphabetic' => 'Alphabet',
- },
- 'an' => {
-       'Any' => 'Any',
- },
- 'ar' => {
-       'Armenian' => 'Armenian',
-       'Arabic' => 'Arabic',
- },
- 'as' => {
-       'Ascii(?:[-_]|\s+)?Hex(?:[-_]|\s+)?Digit' => 'AsciiHex',
-       'Assigned' => 'Assigned',
- },
- 'be' => {
-       'Bengali' => 'Bengali',
- },
- 'bi' => {
-       'Bidi(?:[-_]|\s+)?Control' => 'BidiCont',
- },
- 'bo' => {
-       'Bopomofo' => 'Bopomofo',
- },
- 'ca' => {
-       'Canadian(?:[-_]|\s+)?Aboriginal' => 'Canadian',
- },
- 'ch' => {
-       'Cherokee' => 'Cherokee',
- },
- 'cl' => {
-       'Close(?:[-_]|\s+)?Punctuation' => 'Pe',
- },
- 'co' => {
-       'Control' => 'Cc',
-       'Common' => 'Common',
-       'Connector(?:[-_]|\s+)?Punctuation' => 'Pc',
- },
- 'cu' => {
-       'Currency(?:[-_]|\s+)?Symbol' => 'Sc',
- },
- 'cy' => {
-       'Cyrillic' => 'Cyrillic',
- },
- 'da' => {
-       'Dash(?:[-_]|\s+)?Punctuation' => 'Pd',
-       'Dash' => 'Dash',
- },
- 'de' => {
-       'Deseret' => 'Deseret',
-       'Devanagari' => 'Devanaga',
-       'Decimal(?:[-_]|\s+)?Number' => 'Nd',
- },
- 'di' => {
-       'Diacritic' => 'Diacriti',
- },
- 'en' => {
-       'Enclosing(?:[-_]|\s+)?Mark' => 'Me',
- },
- 'et' => {
-       'Ethiopic' => 'Ethiopic',
- },
- 'ex' => {
-       'Extender' => 'Extender',
- },
- 'fi' => {
-       'Final(?:[-_]|\s+)?Punctuation' => 'Pf',
- },
- 'fo' => {
-       'Format' => 'Cf',
- },
- 'ge' => {
-       'Georgian' => 'Georgian',
- },
- 'go' => {
-       'Gothic' => 'Gothic',
- },
- 'gr' => {
-       'Greek' => 'Greek',
- },
- 'gu' => {
-       'Gujarati' => 'Gujarati',
-       'Gurmukhi' => 'Gurmukhi',
- },
- 'ha' => {
-       'Hangul' => 'Hangul',
-       'Han' => 'Han',
- },
- 'he' => {
-       'Hebrew' => 'Hebrew',
-       'Hex(?:[-_]|\s+)?Digit' => 'HexDigit',
- },
- 'hi' => {
-       'Hiragana' => 'Hiragana',
- },
- 'hy' => {
-       'Hyphen' => 'Hyphen',
- },
- 'id' => {
-       'Ideographic' => 'Ideograp',
-       'Id(?:[-_]|\s+)?Continue' => 'IdContin',
-       'Id(?:[-_]|\s+)?Start' => 'IdStart',
- },
- 'in' => {
-       'Inherited' => 'Inherite',
-       'Initial(?:[-_]|\s+)?Punctuation' => 'Pi',
- },
- 'jo' => {
-       'Join(?:[-_]|\s+)?Control' => 'JoinCont',
- },
- 'ka' => {
-       'Kannada' => 'Kannada',
-       'Katakana' => 'Katakana',
- },
- 'kh' => {
-       'Khmer' => 'Khmer',
- },
- 'la' => {
-       'Lao' => 'Lao',
-       'Latin' => 'Latin',
- },
- 'le' => {
-       'Letter(?:[-_]|\s+)?Number' => 'Nl',
-       'Letter' => 'L',
- },
- 'li' => {
-       'Line(?:[-_]|\s+)?Separator' => 'Zl',
- },
- 'lo' => {
-       'Lowercase' => 'Lowercas',
-       'Lowercase(?:[-_]|\s+)?Letter' => 'Ll',
- },
- 'ma' => {
-       'Math' => 'Math',
-       'Malayalam' => 'Malayala',
-       'Mark' => 'M',
-       'Math(?:[-_]|\s+)?Symbol' => 'Sm',
- },
- 'mo' => {
-       'Modifier(?:[-_]|\s+)?Symbol' => 'Sk',
-       'Mongolian' => 'Mongolia',
-       'Modifier(?:[-_]|\s+)?Letter' => 'Lm',
- },
- 'my' => {
-       'Myanmar' => 'Myanmar',
- },
- 'no' => {
-       'Noncharacter(?:[-_]|\s+)?Code(?:[-_]|\s+)?Point' => 'Nonchara',
-       'Non(?:[-_]|\s+)?Spacing(?:[-_]|\s+)?Mark' => 'Mn',
- },
- 'nu' => {
-       'Number' => 'N',
- },
- 'og' => {
-       'Ogham' => 'Ogham',
- },
- 'ol' => {
-       'Old(?:[-_]|\s+)?Italic' => 'OldItali',
- },
- 'op' => {
-       'Open(?:[-_]|\s+)?Punctuation' => 'Ps',
- },
- 'or' => {
-       'Oriya' => 'Oriya',
- },
- 'ot' => {
-       'Other(?:[-_]|\s+)?Punctuation' => 'Po',
-       'Other(?:[-_]|\s+)?Uppercase' => 'OtherUpp',
-       'Other(?:[-_]|\s+)?Alphabetic' => 'OtherAlp',
-       'Other(?:[-_]|\s+)?Symbol' => 'So',
-       'Other(?:[-_]|\s+)?Number' => 'No',
-       'Other' => 'C',
-       'Other(?:[-_]|\s+)?Math' => 'OtherMat',
-       'Other(?:[-_]|\s+)?Letter' => 'Lo',
-       'Other(?:[-_]|\s+)?Lowercase' => 'OtherLow',
- },
- 'pa' => {
-       'Paragraph(?:[-_]|\s+)?Separator' => 'Zp',
- },
- 'pr' => {
-       'Private(?:[-_]|\s+)?Use' => 'Co',
- },
- 'pu' => {
-       'Punctuation' => 'P',
- },
- 'qu' => {
-       'Quotation(?:[-_]|\s+)?Mark' => 'Quotatio',
- },
- 'ru' => {
-       'Runic' => 'Runic',
- },
- 'se' => {
-       'Separator' => 'Z',
- },
- 'si' => {
-       'Sinhala' => 'Sinhala',
- },
- 'sp' => {
-       'Space(?:[-_]|\s+)?Separator' => 'Zs',
-       'Spacing(?:[-_]|\s+)?Mark' => 'Mc',
- },
- 'su' => {
-       'Surrogate' => 'Cs',
- },
- 'sy' => {
-       'Syriac' => 'Syriac',
-       'Symbol' => 'S',
- },
- 'ta' => {
-       'Tamil' => 'Tamil',
- },
- 'te' => {
-       'Telugu' => 'Telugu',
-       'Terminal(?:[-_]|\s+)?Punctuation' => 'Terminal',
- },
- 'th' => {
-       'Thaana' => 'Thaana',
-       'Thai' => 'Thai',
- },
- 'ti' => {
-       'Tibetan' => 'Tibetan',
-       'Titlecase(?:[-_]|\s+)?Letter' => 'Lt',
- },
- 'un' => {
-       'Unassigned' => 'Cn',
- },
- 'up' => {
-       'Uppercase' => 'Uppercas',
-       'Uppercase(?:[-_]|\s+)?Letter' => 'Lu',
- },
- 'wh' => {
-       'White(?:[-_]|\s+)?Space' => 'WhiteSpa',
- },
- 'yi' => {
-       'Yi' => 'Yi',
- },
-);
diff --git a/lib/unicore/Is/LbrkAI.pl b/lib/unicore/Is/LbrkAI.pl
deleted file mode 100644 (file)
index 36e3e17..0000000
+++ /dev/null
@@ -1,145 +0,0 @@
-# !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!! 
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-#      \p{LbrkAI}
-# 
-# Meaning: Linebreak category 'AI'
-#
-return <<'END';
-00A1           
-00A7   00A8    
-00AA           
-00B2   00B3    
-00B6   00BA    
-00BC   00BF    
-00C6           
-00D0           
-00D7   00D8    
-00DE   00E1    
-00E6           
-00E8   00EA    
-00EC   00ED    
-00F0           
-00F2   00F3    
-00F7   00FA    
-00FC           
-00FE           
-0101           
-0111           
-0113           
-011B           
-0126   0127    
-012B           
-0131   0133    
-0138           
-013F   0142    
-0144           
-0148   014A    
-014D           
-0152   0153    
-0166   0167    
-016B           
-01CE           
-01D0           
-01D2           
-01D4           
-01D6           
-01D8           
-01DA           
-01DC           
-0251           
-0261           
-02C7           
-02C9   02CB    
-02CD           
-02D0           
-02D8   02DB    
-02DD           
-0391   03A1    
-03A3   03A9    
-03B1   03C1    
-03C3   03C9    
-0401           
-0410   044F    
-0451           
-2015   2016    
-2020   2021    
-203B           
-2074           
-207F           
-2081   2084    
-2105           
-2113           
-2121   2122    
-212B           
-2154   2155    
-215B           
-215E           
-2160   216B    
-2170   2179    
-2190   2199    
-21D2           
-21D4           
-2200           
-2202   2203    
-2207   2208    
-220B           
-220F           
-2211           
-2215           
-221A           
-221D   2220    
-2223           
-2225           
-2227   222C    
-222E           
-2234   2237    
-223C   223D    
-2248           
-224C           
-2252           
-2260   2261    
-2264   2267    
-226A   226B    
-226E   226F    
-2282   2283    
-2286   2287    
-2295           
-2299           
-22A5           
-22BF           
-2312           
-2460   24BF    
-24D0   24E9    
-2500   254B    
-2550   2574    
-2580   258F    
-2592   2595    
-25A0   25A1    
-25A3   25A9    
-25B2   25B3    
-25B6   25B7    
-25BC   25BD    
-25C0   25C1    
-25C6   25C8    
-25CB           
-25CE   25D1    
-25E2   25E5    
-25EF           
-2605   2606    
-2609           
-260E   260F    
-261C           
-261E           
-2640           
-2642           
-2660   2661    
-2663   2665    
-2667   266A    
-266C   266D    
-266F           
-FFFD           
-END
diff --git a/lib/unicore/Is/LbrkAL.pl b/lib/unicore/Is/LbrkAL.pl
deleted file mode 100644 (file)
index 59b3c4a..0000000
+++ /dev/null
@@ -1,425 +0,0 @@
-# !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!! 
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-#      \p{LbrkAL}
-# 
-# Meaning: Linebreak category 'AL'
-#
-return <<'END';
-0023           
-0026           
-002A           
-003C   003E    
-0040   005A    
-005E   007A    
-007E           
-00A6           
-00A9           
-00AC           
-00AE   00AF    
-00B5           
-00C0   00C5    
-00C7   00CF    
-00D1   00D6    
-00D9   00DD    
-00E2   00E5    
-00E7           
-00EB           
-00EE   00EF    
-00F1           
-00F4   00F6    
-00FB           
-00FD           
-00FF   0100    
-0102   0110    
-0112           
-0114   011A    
-011C   0125    
-0128   012A    
-012C   0130    
-0134   0137    
-0139   013E    
-0143           
-0145   0147    
-014B   014C    
-014E   0151    
-0154   0165    
-0168   016A    
-016C   01CD    
-01CF           
-01D1           
-01D3           
-01D5           
-01D7           
-01D9           
-01DB           
-01DD   021F    
-0222   0233    
-0250           
-0252   0260    
-0262   02AD    
-02B0   02C6    
-02CE   02CF    
-02D1   02D7    
-02DC           
-02DE   02EE    
-0374   0375    
-037A           
-037E           
-0384   038A    
-038C           
-038E   0390    
-03AA   03B0    
-03C2           
-03CA   03CE    
-03D0   03D7    
-03DA   03F5    
-0400           
-0402   040F    
-0450           
-0452   0482    
-048C   04C4    
-04C7   04C8    
-04CB   04CC    
-04D0   04F5    
-04F8   04F9    
-0531   0556    
-0559   055F    
-0561   0587    
-05BE           
-05C0           
-05C3           
-05D0   05EA    
-05F0   05F4    
-060C           
-061B           
-061F           
-0621   063A    
-0640   064A    
-066A   066D    
-0671   06D5    
-06E5   06E6    
-06E9           
-06FA   06FE    
-0700   070D    
-0710           
-0712   072C    
-0780   07A5    
-0905   0939    
-093D           
-0950           
-0958   0961    
-0964   0965    
-0970           
-0985   098C    
-098F   0990    
-0993   09A8    
-09AA   09B0    
-09B2           
-09B6   09B9    
-09DC   09DD    
-09DF   09E1    
-09F0   09F1    
-09F4   09FA    
-0A05   0A0A    
-0A0F   0A10    
-0A13   0A28    
-0A2A   0A30    
-0A32   0A33    
-0A35   0A36    
-0A38   0A39    
-0A59   0A5C    
-0A5E           
-0A72   0A74    
-0A85   0A8B    
-0A8D           
-0A8F   0A91    
-0A93   0AA8    
-0AAA   0AB0    
-0AB2   0AB3    
-0AB5   0AB9    
-0ABD           
-0AD0           
-0AE0           
-0B05   0B0C    
-0B0F   0B10    
-0B13   0B28    
-0B2A   0B30    
-0B32   0B33    
-0B36   0B39    
-0B3D           
-0B5C   0B5D    
-0B5F   0B61    
-0B70           
-0B85   0B8A    
-0B8E   0B90    
-0B92   0B95    
-0B99   0B9A    
-0B9C           
-0B9E   0B9F    
-0BA3   0BA4    
-0BA8   0BAA    
-0BAE   0BB5    
-0BB7   0BB9    
-0BF0   0BF2    
-0C05   0C0C    
-0C0E   0C10    
-0C12   0C28    
-0C2A   0C33    
-0C35   0C39    
-0C60   0C61    
-0C85   0C8C    
-0C8E   0C90    
-0C92   0CA8    
-0CAA   0CB3    
-0CB5   0CB9    
-0CDE           
-0CE0   0CE1    
-0D05   0D0C    
-0D0E   0D10    
-0D12   0D28    
-0D2A   0D39    
-0D60   0D61    
-0D85   0D96    
-0D9A   0DB1    
-0DB3   0DBB    
-0DBD           
-0DC0   0DC6    
-0DF4           
-0E4F           
-0F00   0F0A    
-0F0D   0F17    
-0F1A   0F1F    
-0F2A   0F34    
-0F36           
-0F38           
-0F40   0F47    
-0F49   0F6A    
-0F85           
-0F88   0F8B    
-0FBE   0FC5    
-0FC7   0FCC    
-0FCF           
-104A   104F    
-10A0   10C5    
-10D0   10F6    
-10FB           
-1200   1206    
-1208   1246    
-1248           
-124A   124D    
-1250   1256    
-1258           
-125A   125D    
-1260   1286    
-1288           
-128A   128D    
-1290   12AE    
-12B0           
-12B2   12B5    
-12B8   12BE    
-12C0           
-12C2   12C5    
-12C8   12CE    
-12D0   12D6    
-12D8   12EE    
-12F0   130E    
-1310           
-1312   1315    
-1318   131E    
-1320   1346    
-1348   135A    
-1362   1368    
-1372   137C    
-13A0   13F4    
-1401   1676    
-1681   169A    
-16A0   16F0    
-17DC           
-1800   1805    
-1807   180A    
-1820   1877    
-1880   18A8    
-1E00   1E9B    
-1EA0   1EF9    
-1F00   1F15    
-1F18   1F1D    
-1F20   1F45    
-1F48   1F4D    
-1F50   1F57    
-1F59           
-1F5B           
-1F5D           
-1F5F   1F7D    
-1F80   1FB4    
-1FB6   1FC4    
-1FC6   1FD3    
-1FD6   1FDB    
-1FDD   1FEF    
-1FF2   1FF4    
-1FF6   1FFE    
-2017           
-2022   2023    
-2038           
-203D   2043    
-2048   204D    
-2070           
-2075   207C    
-2080           
-2085   208C    
-2100   2102    
-2104           
-2106   2108    
-210A   2112    
-2114   2115    
-2117   2120    
-2123   2125    
-2127   212A    
-212C   213A    
-2153           
-2156   215A    
-215C   215D    
-215F           
-216C   216F    
-217A   2183    
-219A   21D1    
-21D3           
-21D5   21F3    
-2201           
-2204   2206    
-2209   220A    
-220C   220E    
-2210           
-2214           
-2216   2219    
-221B   221C    
-2221   2222    
-2224           
-2226           
-222D           
-222F   2233    
-2238   223B    
-223E   2247    
-2249   224B    
-224D   2251    
-2253   225F    
-2262   2263    
-2268   2269    
-226C   226D    
-2270   2281    
-2284   2285    
-2288   2294    
-2296   2298    
-229A   22A4    
-22A6   22BE    
-22C0   22F1    
-2300   2311    
-2313   2328    
-232B   237B    
-237D   239A    
-2400   2426    
-2440   244A    
-24C0   24CF    
-24EA           
-254C   254F    
-2575   257F    
-2590   2591    
-25A2           
-25AA   25B1    
-25B4   25B5    
-25B8   25BB    
-25BE   25BF    
-25C2   25C5    
-25C9   25CA    
-25CC   25CD    
-25D2   25E1    
-25E6   25EE    
-25F0   25F7    
-2600   2604    
-2607   2608    
-260A   260D    
-2610   2613    
-2619   261B    
-261D           
-261F   263F    
-2641           
-2643   265F    
-2662           
-2666           
-266B           
-266E           
-2670   2671    
-2701   2704    
-2706   2709    
-270C   2727    
-2729   274B    
-274D           
-274F   2752    
-2756           
-2758   275E    
-2761   2767    
-2776   2794    
-2798   27AF    
-27B1   27BE    
-2800   28FF    
-FB00   FB06    
-FB13   FB17    
-FB1D           
-FB1F   FB36    
-FB38   FB3C    
-FB3E           
-FB40   FB41    
-FB43   FB44    
-FB46   FBB1    
-FBD3   FD3D    
-FD50   FD8F    
-FD92   FDC7    
-FDF0   FDFB    
-FE70   FE72    
-FE74           
-FE76   FEFC    
-FF66           
-FF71   FF9D    
-FFA0   FFBE    
-FFC2   FFC7    
-FFCA   FFCF    
-FFD2   FFD7    
-FFDA   FFDC    
-FFE8   FFEE    
-10300  1031E   
-10320  10323   
-10330  1034A   
-10400  10425   
-10428  1044D   
-1D000  1D0F5   
-1D100  1D126   
-1D12A  1D164   
-1D16A  1D16C   
-1D183  1D184   
-1D18C  1D1A9   
-1D1AE  1D1DD   
-1D400  1D454   
-1D456  1D49C   
-1D49E  1D49F   
-1D4A2          
-1D4A5  1D4A6   
-1D4A9  1D4AC   
-1D4AE  1D4B9   
-1D4BB          
-1D4BD  1D4C0   
-1D4C2  1D4C3   
-1D4C5  1D505   
-1D507  1D50A   
-1D50D  1D514   
-1D516  1D51C   
-1D51E  1D539   
-1D53B  1D53E   
-1D540  1D544   
-1D546          
-1D54A  1D550   
-1D552  1D6A3   
-1D6A8  1D7C9   
-END
diff --git a/lib/unicore/Is/LbrkB2.pl b/lib/unicore/Is/LbrkB2.pl
deleted file mode 100644 (file)
index dee0b69..0000000
+++ /dev/null
@@ -1,13 +0,0 @@
-# !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!! 
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-#      \p{LbrkB2}
-# 
-# Meaning: Linebreak category 'B2'
-#
-return <<'END';
-2014           
-END
diff --git a/lib/unicore/Is/LbrkBA.pl b/lib/unicore/Is/LbrkBA.pl
deleted file mode 100644 (file)
index fcc8c61..0000000
+++ /dev/null
@@ -1,25 +0,0 @@
-# !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!! 
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-#      \p{LbrkBA}
-# 
-# Meaning: Linebreak category 'BA'
-#
-return <<'END';
-0009           
-007C           
-00AD           
-058A           
-0F0B           
-1361           
-1680           
-17D5           
-2000   2006    
-2008   200A    
-2010           
-2012   2013    
-2027           
-END
diff --git a/lib/unicore/Is/LbrkBB.pl b/lib/unicore/Is/LbrkBB.pl
deleted file mode 100644 (file)
index 5f71338..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-# !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!! 
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-#      \p{LbrkBB}
-# 
-# Meaning: Linebreak category 'BB'
-#
-return <<'END';
-00B4           
-02C8           
-02CC           
-1806           
-END
diff --git a/lib/unicore/Is/LbrkBK.pl b/lib/unicore/Is/LbrkBK.pl
deleted file mode 100644 (file)
index 27b4389..0000000
+++ /dev/null
@@ -1,14 +0,0 @@
-# !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!! 
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-#      \p{LbrkBK}
-# 
-# Meaning: Linebreak category 'BK'
-#
-return <<'END';
-000C           
-2028   2029    
-END
diff --git a/lib/unicore/Is/LbrkCB.pl b/lib/unicore/Is/LbrkCB.pl
deleted file mode 100644 (file)
index 2a71fd5..0000000
+++ /dev/null
@@ -1,13 +0,0 @@
-# !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!! 
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-#      \p{LbrkCB}
-# 
-# Meaning: Linebreak category 'CB'
-#
-return <<'END';
-FFFC           
-END
diff --git a/lib/unicore/Is/LbrkCL.pl b/lib/unicore/Is/LbrkCL.pl
deleted file mode 100644 (file)
index beeeadc..0000000
+++ /dev/null
@@ -1,54 +0,0 @@
-# !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!! 
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-#      \p{LbrkCL}
-# 
-# Meaning: Linebreak category 'CL'
-#
-return <<'END';
-0029           
-005D           
-007D           
-0F3B           
-0F3D           
-169C           
-2046           
-207E           
-208E           
-232A           
-3001   3002    
-3009           
-300B           
-300D           
-300F           
-3011           
-3015           
-3017           
-3019           
-301B           
-301E   301F    
-FD3F           
-FE36           
-FE38           
-FE3A           
-FE3C           
-FE3E           
-FE40           
-FE42           
-FE44           
-FE50           
-FE52           
-FE5A           
-FE5C           
-FE5E           
-FF09           
-FF0C           
-FF0E           
-FF3D           
-FF5D           
-FF61           
-FF63   FF64    
-END
diff --git a/lib/unicore/Is/LbrkCM.pl b/lib/unicore/Is/LbrkCM.pl
deleted file mode 100644 (file)
index 1db78a9..0000000
+++ /dev/null
@@ -1,130 +0,0 @@
-# !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!! 
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-#      \p{LbrkCM}
-# 
-# Meaning: Linebreak category 'CM'
-#
-return <<'END';
-0000   0008    
-000B           
-000E   001F    
-007F   009F    
-0300   034E    
-0360   0362    
-0483   0486    
-0488   0489    
-0591   05A1    
-05A3   05B9    
-05BB   05BD    
-05BF           
-05C1   05C2    
-05C4           
-064B   0655    
-0670           
-06D6   06E4    
-06E7   06E8    
-06EA   06ED    
-070F           
-0711           
-0730   074A    
-07A6   07B0    
-0901   0903    
-093C           
-093E   094D    
-0951   0954    
-0962   0963    
-0981   0983    
-09BC           
-09BE   09C4    
-09C7   09C8    
-09CB   09CD    
-09D7           
-09E2   09E3    
-0A02           
-0A3C           
-0A3E   0A42    
-0A47   0A48    
-0A4B   0A4D    
-0A70   0A71    
-0A81   0A83    
-0ABC           
-0ABE   0AC5    
-0AC7   0AC9    
-0ACB   0ACD    
-0B01   0B03    
-0B3C           
-0B3E   0B43    
-0B47   0B48    
-0B4B   0B4D    
-0B56   0B57    
-0B82   0B83    
-0BBE   0BC2    
-0BC6   0BC8    
-0BCA   0BCD    
-0BD7           
-0C01   0C03    
-0C3E   0C44    
-0C46   0C48    
-0C4A   0C4D    
-0C55   0C56    
-0C82   0C83    
-0CBE   0CC4    
-0CC6   0CC8    
-0CCA   0CCD    
-0CD5   0CD6    
-0D02   0D03    
-0D3E   0D43    
-0D46   0D48    
-0D4A   0D4D    
-0D57           
-0D82   0D83    
-0DCA           
-0DCF   0DD4    
-0DD6           
-0DD8   0DDF    
-0DF2   0DF3    
-0E31           
-0E34   0E3A    
-0E47   0E4E    
-0EB1           
-0EB4   0EB9    
-0EBB   0EBC    
-0EC8   0ECD    
-0F18   0F19    
-0F35           
-0F37           
-0F39           
-0F3E   0F3F    
-0F71   0F84    
-0F86   0F87    
-0F90   0F97    
-0F99   0FBC    
-0FC6           
-102C   1032    
-1036   1039    
-1056   1059    
-1160   11A2    
-11A8   11F9    
-17B4   17D3    
-180B   180E    
-18A9           
-200C   200F    
-202A   202E    
-206A   206F    
-20D0   20E3    
-302A   302F    
-3099   309A    
-FB1E           
-FE20   FE23    
-FFF9   FFFB    
-1D165  1D169   
-1D16D  1D182   
-1D185  1D18B   
-1D1AA  1D1AD   
-E0001          
-E0020  E007F   
-END
diff --git a/lib/unicore/Is/LbrkCR.pl b/lib/unicore/Is/LbrkCR.pl
deleted file mode 100644 (file)
index 22a4702..0000000
+++ /dev/null
@@ -1,13 +0,0 @@
-# !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!! 
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-#      \p{LbrkCR}
-# 
-# Meaning: Linebreak category 'CR'
-#
-return <<'END';
-000D           
-END
diff --git a/lib/unicore/Is/LbrkEX.pl b/lib/unicore/Is/LbrkEX.pl
deleted file mode 100644 (file)
index 48626e0..0000000
+++ /dev/null
@@ -1,17 +0,0 @@
-# !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!! 
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-#      \p{LbrkEX}
-# 
-# Meaning: Linebreak category 'EX'
-#
-return <<'END';
-0021           
-003F           
-FE56   FE57    
-FF01           
-FF1F           
-END
diff --git a/lib/unicore/Is/LbrkGL.pl b/lib/unicore/Is/LbrkGL.pl
deleted file mode 100644 (file)
index ae2f909..0000000
+++ /dev/null
@@ -1,18 +0,0 @@
-# !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!! 
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-#      \p{LbrkGL}
-# 
-# Meaning: Linebreak category 'GL'
-#
-return <<'END';
-00A0           
-0F0C           
-2007           
-2011           
-202F           
-FEFF           
-END
diff --git a/lib/unicore/Is/LbrkHY.pl b/lib/unicore/Is/LbrkHY.pl
deleted file mode 100644 (file)
index 09cfd36..0000000
+++ /dev/null
@@ -1,13 +0,0 @@
-# !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!! 
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-#      \p{LbrkHY}
-# 
-# Meaning: Linebreak category 'HY'
-#
-return <<'END';
-002D           
-END
diff --git a/lib/unicore/Is/LbrkID.pl b/lib/unicore/Is/LbrkID.pl
deleted file mode 100644 (file)
index f91dd0e..0000000
+++ /dev/null
@@ -1,91 +0,0 @@
-# !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!! 
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-#      \p{LbrkID}
-# 
-# Meaning: Linebreak category 'ID'
-#
-return <<'END';
-1100   1159    
-115F           
-2E80   2E99    
-2E9B   2EF3    
-2F00   2FD5    
-2FF0   2FFB    
-3000           
-3003   3004    
-3006   3007    
-3012   3013    
-3020   3029    
-3030   303A    
-303E   303F    
-3042           
-3044           
-3046           
-3048           
-304A   3062    
-3064   3082    
-3084           
-3086           
-3088   308D    
-308F   3094    
-30A2           
-30A4           
-30A6           
-30A8           
-30AA   30C2    
-30C4   30E2    
-30E4           
-30E6           
-30E8   30ED    
-30EF   30F4    
-30F7   30FA    
-30FC           
-30FE           
-3105   312C    
-3131   318E    
-3190   31B7    
-3200   321C    
-3220   3243    
-3260   327B    
-327F   32B0    
-32C0   32CB    
-32D0   32FE    
-3300   3376    
-337B   33DD    
-33E0   33FE    
-3400   4DB5    
-4E00   9FA5    
-A000   A48C    
-A490   A4A1    
-A4A4   A4B3    
-A4B5   A4C0    
-A4C2   A4C4    
-A4C6           
-AC00   D7A3    
-F900   FA2D    
-FE30   FE34    
-FE49   FE4F    
-FE51           
-FE58           
-FE5F   FE66    
-FE68           
-FE6B           
-FF02   FF03    
-FF06   FF07    
-FF0A   FF0B    
-FF0D           
-FF0F   FF19    
-FF1C   FF1E    
-FF20   FF3A    
-FF3C           
-FF3E   FF5A    
-FF5C           
-FF5E           
-FFE2   FFE4    
-20000  2A6D6   
-2F800  2FA1D   
-END
diff --git a/lib/unicore/Is/LbrkIN.pl b/lib/unicore/Is/LbrkIN.pl
deleted file mode 100644 (file)
index e2920eb..0000000
+++ /dev/null
@@ -1,13 +0,0 @@
-# !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!! 
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-#      \p{LbrkIN}
-# 
-# Meaning: Linebreak category 'IN'
-#
-return <<'END';
-2024   2026    
-END
diff --git a/lib/unicore/Is/LbrkIS.pl b/lib/unicore/Is/LbrkIS.pl
deleted file mode 100644 (file)
index 32159ba..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-# !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!! 
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-#      \p{LbrkIS}
-# 
-# Meaning: Linebreak category 'IS'
-#
-return <<'END';
-002C           
-002E           
-003A   003B    
-0589           
-END
diff --git a/lib/unicore/Is/LbrkLF.pl b/lib/unicore/Is/LbrkLF.pl
deleted file mode 100644 (file)
index 84d9ef4..0000000
+++ /dev/null
@@ -1,13 +0,0 @@
-# !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!! 
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-#      \p{LbrkLF}
-# 
-# Meaning: Linebreak category 'LF'
-#
-return <<'END';
-000A           
-END
diff --git a/lib/unicore/Is/LbrkNS.pl b/lib/unicore/Is/LbrkNS.pl
deleted file mode 100644 (file)
index 829d01b..0000000
+++ /dev/null
@@ -1,48 +0,0 @@
-# !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!! 
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-#      \p{LbrkNS}
-# 
-# Meaning: Linebreak category 'NS'
-#
-return <<'END';
-0E5A   0E5B    
-17D4           
-17D6   17DA    
-203C           
-2044           
-3005           
-301C           
-3041           
-3043           
-3045           
-3047           
-3049           
-3063           
-3083           
-3085           
-3087           
-308E           
-309B   309E    
-30A1           
-30A3           
-30A5           
-30A7           
-30A9           
-30C3           
-30E3           
-30E5           
-30E7           
-30EE           
-30F5   30F6    
-30FB           
-30FD           
-FE54   FE55    
-FF1A   FF1B    
-FF65           
-FF67   FF70    
-FF9E   FF9F    
-END
diff --git a/lib/unicore/Is/LbrkNU.pl b/lib/unicore/Is/LbrkNU.pl
deleted file mode 100644 (file)
index bfecec3..0000000
+++ /dev/null
@@ -1,32 +0,0 @@
-# !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!! 
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-#      \p{LbrkNU}
-# 
-# Meaning: Linebreak category 'NU'
-#
-return <<'END';
-0030   0039    
-0660   0669    
-06F0   06F9    
-0966   096F    
-09E6   09EF    
-0A66   0A6F    
-0AE6   0AEF    
-0B66   0B6F    
-0BE7   0BEF    
-0C66   0C6F    
-0CE6   0CEF    
-0D66   0D6F    
-0E50   0E59    
-0ED0   0ED9    
-0F20   0F29    
-1040   1049    
-1369   1371    
-17E0   17E9    
-1810   1819    
-1D7CE  1D7FF   
-END
diff --git a/lib/unicore/Is/LbrkOP.pl b/lib/unicore/Is/LbrkOP.pl
deleted file mode 100644 (file)
index 6560490..0000000
+++ /dev/null
@@ -1,50 +0,0 @@
-# !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!! 
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-#      \p{LbrkOP}
-# 
-# Meaning: Linebreak category 'OP'
-#
-return <<'END';
-0028           
-005B           
-007B           
-0F3A           
-0F3C           
-169B           
-201A           
-201E           
-2045           
-207D           
-208D           
-2329           
-3008           
-300A           
-300C           
-300E           
-3010           
-3014           
-3016           
-3018           
-301A           
-301D           
-FD3E           
-FE35           
-FE37           
-FE39           
-FE3B           
-FE3D           
-FE3F           
-FE41           
-FE43           
-FE59           
-FE5B           
-FE5D           
-FF08           
-FF3B           
-FF5B           
-FF62           
-END
diff --git a/lib/unicore/Is/LbrkPO.pl b/lib/unicore/Is/LbrkPO.pl
deleted file mode 100644 (file)
index 0ea5548..0000000
+++ /dev/null
@@ -1,23 +0,0 @@
-# !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!! 
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-#      \p{LbrkPO}
-# 
-# Meaning: Linebreak category 'PO'
-#
-return <<'END';
-0025           
-00A2           
-00B0           
-2030   2037    
-20A7           
-2103           
-2109           
-2126           
-FE6A           
-FF05           
-FFE0           
-END
diff --git a/lib/unicore/Is/LbrkPR.pl b/lib/unicore/Is/LbrkPR.pl
deleted file mode 100644 (file)
index be6c388..0000000
+++ /dev/null
@@ -1,28 +0,0 @@
-# !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!! 
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-#      \p{LbrkPR}
-# 
-# Meaning: Linebreak category 'PR'
-#
-return <<'END';
-0024           
-002B           
-005C           
-00A3   00A5    
-00B1           
-09F2   09F3    
-0E3F           
-17DB           
-20A0   20A6    
-20A8   20AF    
-2116           
-2212   2213    
-FE69           
-FF04           
-FFE1           
-FFE5   FFE6    
-END
diff --git a/lib/unicore/Is/LbrkQU.pl b/lib/unicore/Is/LbrkQU.pl
deleted file mode 100644 (file)
index f23ef75..0000000
+++ /dev/null
@@ -1,20 +0,0 @@
-# !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!! 
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-#      \p{LbrkQU}
-# 
-# Meaning: Linebreak category 'QU'
-#
-return <<'END';
-0022           
-0027           
-00AB           
-00BB           
-2018   2019    
-201B   201D    
-201F           
-2039   203A    
-END
diff --git a/lib/unicore/Is/LbrkSA.pl b/lib/unicore/Is/LbrkSA.pl
deleted file mode 100644 (file)
index fc3d98c..0000000
+++ /dev/null
@@ -1,37 +0,0 @@
-# !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!! 
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-#      \p{LbrkSA}
-# 
-# Meaning: Linebreak category 'SA'
-#
-return <<'END';
-0E01   0E30    
-0E32   0E33    
-0E40   0E46    
-0E81   0E82    
-0E84           
-0E87   0E88    
-0E8A           
-0E8D           
-0E94   0E97    
-0E99   0E9F    
-0EA1   0EA3    
-0EA5           
-0EA7           
-0EAA   0EAB    
-0EAD   0EB0    
-0EB2   0EB3    
-0EBD           
-0EC0   0EC4    
-0EC6           
-0EDC   0EDD    
-1000   1021    
-1023   1027    
-1029   102A    
-1050   1055    
-1780   17B3    
-END
diff --git a/lib/unicore/Is/LbrkSG.pl b/lib/unicore/Is/LbrkSG.pl
deleted file mode 100644 (file)
index a5acf16..0000000
+++ /dev/null
@@ -1,13 +0,0 @@
-# !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!! 
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-#      \p{LbrkSG}
-# 
-# Meaning: Linebreak category 'SG'
-#
-return <<'END';
-D800   DFFF    
-END
diff --git a/lib/unicore/Is/LbrkSP.pl b/lib/unicore/Is/LbrkSP.pl
deleted file mode 100644 (file)
index c21e46d..0000000
+++ /dev/null
@@ -1,13 +0,0 @@
-# !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!! 
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-#      \p{LbrkSP}
-# 
-# Meaning: Linebreak category 'SP'
-#
-return <<'END';
-0020           
-END
diff --git a/lib/unicore/Is/LbrkSY.pl b/lib/unicore/Is/LbrkSY.pl
deleted file mode 100644 (file)
index 554b302..0000000
+++ /dev/null
@@ -1,13 +0,0 @@
-# !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!! 
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-#      \p{LbrkSY}
-# 
-# Meaning: Linebreak category 'SY'
-#
-return <<'END';
-002F           
-END
diff --git a/lib/unicore/Is/LbrkXX.pl b/lib/unicore/Is/LbrkXX.pl
deleted file mode 100644 (file)
index 6ab9fcf..0000000
+++ /dev/null
@@ -1,15 +0,0 @@
-# !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!! 
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-#      \p{LbrkXX}
-# 
-# Meaning: Linebreak category 'XX'
-#
-return <<'END';
-E000   F8FF    
-F0000  FFFFD   
-100000 10FFFD  
-END
diff --git a/lib/unicore/Is/LbrkZW.pl b/lib/unicore/Is/LbrkZW.pl
deleted file mode 100644 (file)
index a338cba..0000000
+++ /dev/null
@@ -1,13 +0,0 @@
-# !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!! 
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-#      \p{LbrkZW}
-# 
-# Meaning: Linebreak category 'ZW'
-#
-return <<'END';
-200B           
-END
index 948e982..99b7026 100644 (file)
@@ -1,6 +1,12 @@
 all:
        ../../miniperl -I../../lib ./mktables
 
+TestProp.pl: mktables Unicode.txt Scripts.txt Blocks.txt PropList.txt
+       ../../miniperl -I../../lib ./mktables -maketest
+
+test:   TestProp.pl
+       ../../miniperl -I../../lib TestProp.pl
+
 clean:
        rm -f *.pl */*.pl
        rm -f Properties
index 377fa5a..143ca59 100644 (file)
@@ -6,15 +6,15 @@
 ##
 
   \p{ASCII}                                  [[:ASCII:]]
-* \p{All}                                    Alias for \p{Any} ([\x{0000}-\x{10FFFF}])
   \p{Alnum}                                  [[:Alnum:]]
 * \p{Alphabetic}                             [\p{L}\p{OtherAlphabetic}]
   \p{Alpha}                                  [[:Alpha:]]
-* \p{Any}                                    [\x{0000}-\x{10FFFF}]
+  \p{Any}                                    Alias for \p{Any} ([\x{0000}-\x{10FFFF}])
+  \p{Any}                                    [\x{0000}-\x{10FFFF}]
 * \p{Arabic}                                 Script 'ARABIC'
 * \p{Armenian}                               Script 'ARMENIAN'
 * \p{AsciiHexDigit}                          Extended property 'ASCII_Hex_Digit'
-* \p{Assigned}                               All assigned code points
+  \p{Assigned}                               All assigned code points
 * \p{Bengali}                                Script 'BENGALI'
   \p{BidiAL}                                 Bi-directional category 'AL'
   \p{BidiAN}                                 Bi-directional category 'AN'
 * \p{Bopomofo}                               Script 'BOPOMOFO'
 * \p{CanadianAboriginal}                     Script 'CANADIAN-ABORIGINAL'
   \p{Canon}                                  Decomposes to multiple characters
+* \p{Cc}                                     Alias for \p{Cc} (General Category 'Cc')
   \p{Cc}                                     General Category 'Cc'
+* \p{Cf}                                     Alias for \p{Cf} (General Category 'Cf')
   \p{Cf}                                     General Category 'Cf'
 * \p{Cherokee}                               Script 'CHEROKEE'
-* \p{ClosePunctuation}                       Alias for \p{Pe} (General Category 'Pe')
   \p{Cntrl}                                  [[:Cntrl:]]
+* \p{Cn}                                     Alias for \p{Cn} (General Category 'Cn' [not functional in Perl])
   \p{Cn}                                     General Category 'Cn' [not functional in Perl]
 * \p{Common}                                 Pseudo-Script of codepoints not in other Unicode scripts
   \p{Compat}                                 Compatible with a more-basic character
-* \p{ConnectorPunctuation}                   Alias for \p{Pc} (General Category 'Pc')
-* \p{Control}                                Alias for \p{Cc} (General Category 'Cc')
+* \p{Co}                                     Alias for \p{Co} (General Category 'Co')
   \p{Co}                                     General Category 'Co'
+* \p{Cs}                                     Alias for \p{Cs} (General Category 'Cs')
   \p{Cs}                                     General Category 'Cs'
-* \p{CurrencySymbol}                         Alias for \p{Sc} (General Category 'Sc')
 * \p{Cyrillic}                               Script 'CYRILLIC'
+* \p{C}                                      Alias for \p{C} (Major Category 'C')
   \p{C}                                      Major Category 'C'
   \p{DCcircle}                               Compatible with 'circle'
   \p{DCcompat}                               Compatible with 'compat'
   \p{DCsuper}                                Compatible with 'super'
   \p{DCvertical}                             Compatible with 'vertical'
   \p{DCwide}                                 Compatible with 'wide'
-* \p{DashPunctuation}                        Alias for \p{Pd} (General Category 'Pd')
 * \p{Dash}                                   Extended property 'Dash'
-* \p{DecimalNumber}                          Alias for \p{Nd} (General Category 'Nd')
 * \p{Deseret}                                Script 'DESERET'
 * \p{Devanagari}                             Script 'DEVANAGARI'
 * \p{Diacritic}                              Extended property 'Diacritic'
   \p{Digit}                                  [[:Digit:]]
-* \p{EnclosingMark}                          Alias for \p{Me} (General Category 'Me')
 * \p{Ethiopic}                               Script 'ETHIOPIC'
 * \p{Extender}                               Extended property 'Extender'
-* \p{FinalPunctuation}                       Alias for \p{Pf} (General Category 'Pf')
-* \p{Format}                                 Alias for \p{Cf} (General Category 'Cf')
 * \p{Georgian}                               Script 'GEORGIAN'
 * \p{Gothic}                                 Script 'GOTHIC'
   \p{Graph}                                  [[:Graph:]]
 * \p{InYiRadicals}                           Block 'Yi Radicals'
 * \p{InYiSyllables}                          Block 'Yi Syllables'
 * \p{Inherited}                              Script 'INHERITED'
-* \p{InitialPunctuation}                     Alias for \p{Pi} (General Category 'Pi')
 * \p{JoinControl}                            Extended property 'Join_Control'
 * \p{Kannada}                                Script 'KANNADA'
 * \p{Katakana}                               Script 'KATAKANA'
   \p{L&}                                     [\p{Ll}\p{Lu}\p{Lt}]
 * \p{Lao}                                    Script 'LAO'
 * \p{Latin}                                  Script 'LATIN'
-  \p{LbrkAI}                                 Linebreak category 'AI'
-  \p{LbrkAL}                                 Linebreak category 'AL'
-  \p{LbrkB2}                                 Linebreak category 'B2'
-  \p{LbrkBA}                                 Linebreak category 'BA'
-  \p{LbrkBB}                                 Linebreak category 'BB'
-  \p{LbrkBK}                                 Linebreak category 'BK'
-  \p{LbrkCB}                                 Linebreak category 'CB'
-  \p{LbrkCL}                                 Linebreak category 'CL'
-  \p{LbrkCM}                                 Linebreak category 'CM'
-  \p{LbrkCR}                                 Linebreak category 'CR'
-  \p{LbrkEX}                                 Linebreak category 'EX'
-  \p{LbrkGL}                                 Linebreak category 'GL'
-  \p{LbrkHY}                                 Linebreak category 'HY'
-  \p{LbrkID}                                 Linebreak category 'ID'
-  \p{LbrkIN}                                 Linebreak category 'IN'
-  \p{LbrkIS}                                 Linebreak category 'IS'
-  \p{LbrkLF}                                 Linebreak category 'LF'
-  \p{LbrkNS}                                 Linebreak category 'NS'
-  \p{LbrkNU}                                 Linebreak category 'NU'
-  \p{LbrkOP}                                 Linebreak category 'OP'
-  \p{LbrkPO}                                 Linebreak category 'PO'
-  \p{LbrkPR}                                 Linebreak category 'PR'
-  \p{LbrkQU}                                 Linebreak category 'QU'
-  \p{LbrkSA}                                 Linebreak category 'SA'
-  \p{LbrkSG}                                 Linebreak category 'SG'
-  \p{LbrkSP}                                 Linebreak category 'SP'
-  \p{LbrkSY}                                 Linebreak category 'SY'
-  \p{LbrkXX}                                 Linebreak category 'XX'
-  \p{LbrkZW}                                 Linebreak category 'ZW'
-* \p{LetterNumber}                           Alias for \p{Nl} (General Category 'Nl')
-* \p{Letter}                                 Alias for \p{L} (Major Category 'L')
-* \p{LineSeparator}                          Alias for \p{Zl} (General Category 'Zl')
+* \p{Ll}                                     Alias for \p{Ll} (General Category 'Ll')
   \p{Ll}                                     General Category 'Ll'
+* \p{Lm}                                     Alias for \p{Lm} (General Category 'Lm')
   \p{Lm}                                     General Category 'Lm'
-* \p{LowercaseLetter}                        Alias for \p{Ll} (General Category 'Ll')
 * \p{Lowercase}                              [\p{Ll}\p{OtherLowercase}]
   \p{Lower}                                  [[:Lower:]]
+* \p{Lo}                                     Alias for \p{Lo} (General Category 'Lo')
   \p{Lo}                                     General Category 'Lo'
+* \p{Lt}                                     Alias for \p{Lt} (General Category 'Lt')
   \p{Lt}                                     General Category 'Lt'
+* \p{Lu}                                     Alias for \p{Lu} (General Category 'Lu')
   \p{Lu}                                     General Category 'Lu'
+* \p{L}                                      Alias for \p{L} (Major Category 'L')
   \p{L}                                      Major Category 'L'
 * \p{Malayalam}                              Script 'MALAYALAM'
-* \p{Mark}                                   Alias for \p{M} (Major Category 'M')
-* \p{MathSymbol}                             Alias for \p{Sm} (General Category 'Sm')
 * \p{Math}                                   [\p{Sm}\p{OtherMath}]
+* \p{Mc}                                     Alias for \p{Mc} (General Category 'Mc')
   \p{Mc}                                     General Category 'Mc'
+* \p{Me}                                     Alias for \p{Me} (General Category 'Me')
   \p{Me}                                     General Category 'Me'
   \p{Mirrored}                               Mirrored in bidirectional text
+* \p{Mn}                                     Alias for \p{Mn} (General Category 'Mn')
   \p{Mn}                                     General Category 'Mn'
-* \p{ModifierLetter}                         Alias for \p{Lm} (General Category 'Lm')
-* \p{ModifierSymbol}                         Alias for \p{Sk} (General Category 'Sk')
 * \p{Mongolian}                              Script 'MONGOLIAN'
 * \p{Myanmar}                                Script 'MYANMAR'
+* \p{M}                                      Alias for \p{M} (Major Category 'M')
   \p{M}                                      Major Category 'M'
+* \p{Nd}                                     Alias for \p{Nd} (General Category 'Nd')
   \p{Nd}                                     General Category 'Nd'
+* \p{Nl}                                     Alias for \p{Nl} (General Category 'Nl')
   \p{Nl}                                     General Category 'Nl'
-* \p{NonSpacingMark}                         Alias for \p{Mn} (General Category 'Mn')
 * \p{NoncharacterCodePoint}                  Extended property 'Noncharacter_Code_Point'
+* \p{No}                                     Alias for \p{No} (General Category 'No')
   \p{No}                                     General Category 'No'
-* \p{Number}                                 Alias for \p{N} (Major Category 'N')
+* \p{N}                                      Alias for \p{N} (Major Category 'N')
   \p{N}                                      Major Category 'N'
 * \p{Ogham}                                  Script 'OGHAM'
 * \p{OldItalic}                              Script 'OLD-ITALIC'
-* \p{OpenPunctuation}                        Alias for \p{Ps} (General Category 'Ps')
 * \p{Oriya}                                  Script 'ORIYA'
 * \p{OtherAlphabetic}                        Extended property 'Other_Alphabetic'
-* \p{OtherLetter}                            Alias for \p{Lo} (General Category 'Lo')
 * \p{OtherLowercase}                         Extended property 'Other_Lowercase'
 * \p{OtherMath}                              Extended property 'Other_Math'
-* \p{OtherNumber}                            Alias for \p{No} (General Category 'No')
-* \p{OtherPunctuation}                       Alias for \p{Po} (General Category 'Po')
-* \p{OtherSymbol}                            Alias for \p{So} (General Category 'So')
 * \p{OtherUppercase}                         Extended property 'Other_Uppercase'
-* \p{Other}                                  Alias for \p{C} (Major Category 'C')
-* \p{ParagraphSeparator}                     Alias for \p{Zp} (General Category 'Zp')
+* \p{Pc}                                     Alias for \p{Pc} (General Category 'Pc')
   \p{Pc}                                     General Category 'Pc'
+* \p{Pd}                                     Alias for \p{Pd} (General Category 'Pd')
   \p{Pd}                                     General Category 'Pd'
+* \p{Pe}                                     Alias for \p{Pe} (General Category 'Pe')
   \p{Pe}                                     General Category 'Pe'
+* \p{Pf}                                     Alias for \p{Pf} (General Category 'Pf')
   \p{Pf}                                     General Category 'Pf'
+* \p{Pi}                                     Alias for \p{Pi} (General Category 'Pi')
   \p{Pi}                                     General Category 'Pi'
+* \p{Po}                                     Alias for \p{Po} (General Category 'Po')
   \p{Po}                                     General Category 'Po'
   \p{Print}                                  [[:Print:]]
-* \p{PrivateUse}                             Alias for \p{Co} (General Category 'Co')
+* \p{Ps}                                     Alias for \p{Ps} (General Category 'Ps')
   \p{Ps}                                     General Category 'Ps'
-* \p{Punctuation}                            Alias for \p{P} (Major Category 'P')
   \p{Punct}                                  [[:Punct:]]
+* \p{P}                                      Alias for \p{P} (Major Category 'P')
   \p{P}                                      Major Category 'P'
 * \p{QuotationMark}                          Extended property 'Quotation_Mark'
 * \p{Runic}                                  Script 'RUNIC'
+* \p{Sc}                                     Alias for \p{Sc} (General Category 'Sc')
   \p{Sc}                                     General Category 'Sc'
-* \p{Separator}                              Alias for \p{Z} (Major Category 'Z')
 * \p{Sinhala}                                Script 'SINHALA'
+* \p{Sk}                                     Alias for \p{Sk} (General Category 'Sk')
   \p{Sk}                                     General Category 'Sk'
+* \p{Sm}                                     Alias for \p{Sm} (General Category 'Sm')
   \p{Sm}                                     General Category 'Sm'
+* \p{So}                                     Alias for \p{So} (General Category 'So')
   \p{So}                                     General Category 'So'
   \p{SpacePerl}                              \s
-* \p{SpaceSeparator}                         Alias for \p{Zs} (General Category 'Zs')
   \p{Space}                                  [[:Space:]]
-* \p{SpacingMark}                            Alias for \p{Mc} (General Category 'Mc')
-* \p{Surrogate}                              Alias for \p{Cs} (General Category 'Cs')
-* \p{Symbol}                                 Alias for \p{S} (Major Category 'S')
 * \p{Syriac}                                 Script 'SYRIAC'
+* \p{S}                                      Alias for \p{S} (Major Category 'S')
   \p{S}                                      Major Category 'S'
 * \p{Tamil}                                  Script 'TAMIL'
 * \p{Telugu}                                 Script 'TELUGU'
 * \p{Thaana}                                 Script 'THAANA'
 * \p{Thai}                                   Script 'THAI'
 * \p{Tibetan}                                Script 'TIBETAN'
-* \p{TitlecaseLetter}                        Alias for \p{Lt} (General Category 'Lt')
   \p{Title}                                  [[:Title:]]
-* \p{Unassigned}                             Alias for \p{Cn} (General Category 'Cn' [not functional in Perl])
-* \p{UppercaseLetter}                        Alias for \p{Lu} (General Category 'Lu')
 * \p{Uppercase}                              [\p{Lu}\p{Other_Uppercase}]
   \p{Upper}                                  [[:Upper:]]
 * \p{WhiteSpace}                             Extended property 'White_space'
   \p{Word}                                   [[:Word:]]
   \p{XDigit}                                 [[:XDigit:]]
 * \p{Yi}                                     Script 'YI'
+* \p{Zl}                                     Alias for \p{Zl} (General Category 'Zl')
   \p{Zl}                                     General Category 'Zl'
+* \p{Zp}                                     Alias for \p{Zp} (General Category 'Zp')
   \p{Zp}                                     General Category 'Zp'
+* \p{Zs}                                     Alias for \p{Zs} (General Category 'Zs')
   \p{Zs}                                     General Category 'Zs'
+* \p{Z}                                      Alias for \p{Z} (Major Category 'Z')
   \p{Z}                                      Major Category 'Z'
   \p{_CanonDCIJ}                             (for internal casefolding use)
   \p{_CaseIgnorable}                         (for internal casefolding use)
index 74b34a1..9531d15 100644 (file)
@@ -1,5 +1,5 @@
 # !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!! 
-# This file is built by mktables from e.g. Unicode.txt.
+# This file is built by ./mktables from e.g. Unicode.txt.
 # Any changes made here will be lost!
 
 return <<'END';
index 5a9117f..ae5642d 100644 (file)
@@ -1,5 +1,5 @@
 # !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!! 
-# This file is built by mktables from e.g. Unicode.txt.
+# This file is built by ./mktables from e.g. Unicode.txt.
 # Any changes made here will be lost!
 
 
index 0574910..0cf3d23 100644 (file)
@@ -1,5 +1,5 @@
 # !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!! 
-# This file is built by mktables from e.g. Unicode.txt.
+# This file is built by ./mktables from e.g. Unicode.txt.
 # Any changes made here will be lost!
 
 
index 576cff3..445732f 100644 (file)
@@ -1,5 +1,5 @@
 # !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!! 
-# This file is built by mktables from e.g. Unicode.txt.
+# This file is built by ./mktables from e.g. Unicode.txt.
 # Any changes made here will be lost!
 
 
index 3b3f07f..3b57ed3 100644 (file)
@@ -1,5 +1,5 @@
 # !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!! 
-# This file is built by mktables from e.g. Unicode.txt.
+# This file is built by ./mktables from e.g. Unicode.txt.
 # Any changes made here will be lost!
 
 
similarity index 76%
rename from lib/unicore/Is/Any.pl
rename to lib/unicore/lib/Any.pl
index 45a0636..8179ec7 100644 (file)
@@ -4,8 +4,8 @@
 
 #
 # This file supports:
-#      \p{Any} (and fuzzy permutations)
-#      \p{All} (and fuzzy permutations)
+#      \p{Any}
+#      \p{Any}
 # 
 # Meaning: [\x{0000}-\x{10FFFF}]
 #
similarity index 99%
rename from lib/unicore/Is/Assigned.pl
rename to lib/unicore/lib/Assigned.pl
index 3646421..e1ace2e 100644 (file)
@@ -4,7 +4,7 @@
 
 #
 # This file supports:
-#      \p{Assigned} (and fuzzy permutations)
+#      \p{Assigned}
 # 
 # Meaning: All assigned code points
 #
similarity index 99%
rename from lib/unicore/Is/C.pl
rename to lib/unicore/lib/C.pl
index f9f7420..c3a04e8 100644 (file)
@@ -5,7 +5,7 @@
 #
 # This file supports:
 #      \p{C}
-#      \p{Other} (and fuzzy permutations)
+#      \p{C} (and fuzzy permutations)
 # 
 # Meaning: Major Category 'C'
 #
similarity index 86%
rename from lib/unicore/Is/Cc.pl
rename to lib/unicore/lib/Cc.pl
index 0d654ab..4238ba8 100644 (file)
@@ -5,7 +5,7 @@
 #
 # This file supports:
 #      \p{Cc}
-#      \p{Control} (and fuzzy permutations)
+#      \p{Cc} (and fuzzy permutations)
 # 
 # Meaning: General Category 'Cc'
 #
similarity index 89%
rename from lib/unicore/Is/Cf.pl
rename to lib/unicore/lib/Cf.pl
index efc1336..9c05455 100644 (file)
@@ -5,7 +5,7 @@
 #
 # This file supports:
 #      \p{Cf}
-#      \p{Format} (and fuzzy permutations)
+#      \p{Cf} (and fuzzy permutations)
 # 
 # Meaning: General Category 'Cf'
 #
similarity index 98%
rename from lib/unicore/Is/Cn.pl
rename to lib/unicore/lib/Cn.pl
index b2598e7..c666285 100644 (file)
@@ -5,7 +5,7 @@
 #
 # This file supports:
 #      \p{Cn}
-#      \p{Unassigned} (and fuzzy permutations)
+#      \p{Cn} (and fuzzy permutations)
 # 
 # Meaning: General Category 'Cn' [not functional in Perl]
 #
similarity index 86%
rename from lib/unicore/Is/Co.pl
rename to lib/unicore/lib/Co.pl
index 46da434..bd792f3 100644 (file)
@@ -5,7 +5,7 @@
 #
 # This file supports:
 #      \p{Co}
-#      \p{PrivateUse} (and fuzzy permutations)
+#      \p{Co} (and fuzzy permutations)
 # 
 # Meaning: General Category 'Co'
 #
similarity index 85%
rename from lib/unicore/Is/Cs.pl
rename to lib/unicore/lib/Cs.pl
index a6a181f..5a1ea95 100644 (file)
@@ -5,7 +5,7 @@
 #
 # This file supports:
 #      \p{Cs}
-#      \p{Surrogate} (and fuzzy permutations)
+#      \p{Cs} (and fuzzy permutations)
 # 
 # Meaning: General Category 'Cs'
 #
similarity index 100%
rename from lib/unicore/Is/Han.pl
rename to lib/unicore/lib/Han.pl
similarity index 98%
rename from lib/unicore/Is/L.pl
rename to lib/unicore/lib/L.pl
index 663fd1e..b3f6df3 100644 (file)
@@ -5,7 +5,7 @@
 #
 # This file supports:
 #      \p{L}
-#      \p{Letter} (and fuzzy permutations)
+#      \p{L} (and fuzzy permutations)
 # 
 # Meaning: Major Category 'L'
 #
similarity index 100%
rename from lib/unicore/Is/L_.pl
rename to lib/unicore/lib/L_.pl
similarity index 100%
rename from lib/unicore/Is/Lao.pl
rename to lib/unicore/lib/Lao.pl
similarity index 98%
rename from lib/unicore/Is/Ll.pl
rename to lib/unicore/lib/Ll.pl
index 1cecfe7..573c56a 100644 (file)
@@ -5,7 +5,7 @@
 #
 # This file supports:
 #      \p{Ll}
-#      \p{LowercaseLetter} (and fuzzy permutations)
+#      \p{Ll} (and fuzzy permutations)
 # 
 # Meaning: General Category 'Ll'
 #
similarity index 89%
rename from lib/unicore/Is/Lm.pl
rename to lib/unicore/lib/Lm.pl
index 7cbb55e..2dfd4f3 100644 (file)
@@ -5,7 +5,7 @@
 #
 # This file supports:
 #      \p{Lm}
-#      \p{ModifierLetter} (and fuzzy permutations)
+#      \p{Lm} (and fuzzy permutations)
 # 
 # Meaning: General Category 'Lm'
 #
similarity index 98%
rename from lib/unicore/Is/Lo.pl
rename to lib/unicore/lib/Lo.pl
index b0b46e6..7113103 100644 (file)
@@ -5,7 +5,7 @@
 #
 # This file supports:
 #      \p{Lo}
-#      \p{OtherLetter} (and fuzzy permutations)
+#      \p{Lo} (and fuzzy permutations)
 # 
 # Meaning: General Category 'Lo'
 #
similarity index 86%
rename from lib/unicore/Is/Lt.pl
rename to lib/unicore/lib/Lt.pl
index ed8af13..b758932 100644 (file)
@@ -5,7 +5,7 @@
 #
 # This file supports:
 #      \p{Lt}
-#      \p{TitlecaseLetter} (and fuzzy permutations)
+#      \p{Lt} (and fuzzy permutations)
 # 
 # Meaning: General Category 'Lt'
 #
similarity index 98%
rename from lib/unicore/Is/Lu.pl
rename to lib/unicore/lib/Lu.pl
index 7ded16b..2923b3b 100644 (file)
@@ -5,7 +5,7 @@
 #
 # This file supports:
 #      \p{Lu}
-#      \p{UppercaseLetter} (and fuzzy permutations)
+#      \p{Lu} (and fuzzy permutations)
 # 
 # Meaning: General Category 'Lu'
 #
similarity index 97%
rename from lib/unicore/Is/M.pl
rename to lib/unicore/lib/M.pl
index a4cd30a..0274aed 100644 (file)
@@ -5,7 +5,7 @@
 #
 # This file supports:
 #      \p{M}
-#      \p{Mark} (and fuzzy permutations)
+#      \p{M} (and fuzzy permutations)
 # 
 # Meaning: Major Category 'M'
 #
similarity index 94%
rename from lib/unicore/Is/Mc.pl
rename to lib/unicore/lib/Mc.pl
index 5cb32b8..70181af 100644 (file)
@@ -5,7 +5,7 @@
 #
 # This file supports:
 #      \p{Mc}
-#      \p{SpacingMark} (and fuzzy permutations)
+#      \p{Mc} (and fuzzy permutations)
 # 
 # Meaning: General Category 'Mc'
 #
similarity index 85%
rename from lib/unicore/Is/Me.pl
rename to lib/unicore/lib/Me.pl
index 3afdefd..03ba369 100644 (file)
@@ -5,7 +5,7 @@
 #
 # This file supports:
 #      \p{Me}
-#      \p{EnclosingMark} (and fuzzy permutations)
+#      \p{Me} (and fuzzy permutations)
 # 
 # Meaning: General Category 'Me'
 #
similarity index 96%
rename from lib/unicore/Is/Mn.pl
rename to lib/unicore/lib/Mn.pl
index c86c640..c707ec2 100644 (file)
@@ -5,7 +5,7 @@
 #
 # This file supports:
 #      \p{Mn}
-#      \p{NonSpacingMark} (and fuzzy permutations)
+#      \p{Mn} (and fuzzy permutations)
 # 
 # Meaning: General Category 'Mn'
 #
similarity index 94%
rename from lib/unicore/Is/N.pl
rename to lib/unicore/lib/N.pl
index 57b4170..07d2da4 100644 (file)
@@ -5,7 +5,7 @@
 #
 # This file supports:
 #      \p{N}
-#      \p{Number} (and fuzzy permutations)
+#      \p{N} (and fuzzy permutations)
 # 
 # Meaning: Major Category 'N'
 #
similarity index 91%
rename from lib/unicore/Is/Nd.pl
rename to lib/unicore/lib/Nd.pl
index c6fd133..d51cb07 100644 (file)
@@ -5,7 +5,7 @@
 #
 # This file supports:
 #      \p{Nd}
-#      \p{DecimalNumber} (and fuzzy permutations)
+#      \p{Nd} (and fuzzy permutations)
 # 
 # Meaning: General Category 'Nd'
 #
similarity index 86%
rename from lib/unicore/Is/Nl.pl
rename to lib/unicore/lib/Nl.pl
index 690bed4..9b42189 100644 (file)
@@ -5,7 +5,7 @@
 #
 # This file supports:
 #      \p{Nl}
-#      \p{LetterNumber} (and fuzzy permutations)
+#      \p{Nl} (and fuzzy permutations)
 # 
 # Meaning: General Category 'Nl'
 #
similarity index 90%
rename from lib/unicore/Is/No.pl
rename to lib/unicore/lib/No.pl
index f67310a..b0cc18c 100644 (file)
@@ -5,7 +5,7 @@
 #
 # This file supports:
 #      \p{No}
-#      \p{OtherNumber} (and fuzzy permutations)
+#      \p{No} (and fuzzy permutations)
 # 
 # Meaning: General Category 'No'
 #
similarity index 95%
rename from lib/unicore/Is/P.pl
rename to lib/unicore/lib/P.pl
index 599bc30..df116cf 100644 (file)
@@ -5,7 +5,7 @@
 #
 # This file supports:
 #      \p{P}
-#      \p{Punctuation} (and fuzzy permutations)
+#      \p{P} (and fuzzy permutations)
 # 
 # Meaning: Major Category 'P'
 #
similarity index 84%
rename from lib/unicore/Is/Pc.pl
rename to lib/unicore/lib/Pc.pl
index 04a8c1f..f4a03a9 100644 (file)
@@ -5,7 +5,7 @@
 #
 # This file supports:
 #      \p{Pc}
-#      \p{ConnectorPunctuation} (and fuzzy permutations)
+#      \p{Pc} (and fuzzy permutations)
 # 
 # Meaning: General Category 'Pc'
 #
similarity index 86%
rename from lib/unicore/Is/Pd.pl
rename to lib/unicore/lib/Pd.pl
index 453ec5a..be8412a 100644 (file)
@@ -5,7 +5,7 @@
 #
 # This file supports:
 #      \p{Pd}
-#      \p{DashPunctuation} (and fuzzy permutations)
+#      \p{Pd} (and fuzzy permutations)
 # 
 # Meaning: General Category 'Pd'
 #
similarity index 90%
rename from lib/unicore/Is/Pe.pl
rename to lib/unicore/lib/Pe.pl
index 2be04ae..72c1bae 100644 (file)
@@ -5,7 +5,7 @@
 #
 # This file supports:
 #      \p{Pe}
-#      \p{ClosePunctuation} (and fuzzy permutations)
+#      \p{Pe} (and fuzzy permutations)
 # 
 # Meaning: General Category 'Pe'
 #
similarity index 84%
rename from lib/unicore/Is/Pf.pl
rename to lib/unicore/lib/Pf.pl
index b8c60da..9af533d 100644 (file)
@@ -5,7 +5,7 @@
 #
 # This file supports:
 #      \p{Pf}
-#      \p{FinalPunctuation} (and fuzzy permutations)
+#      \p{Pf} (and fuzzy permutations)
 # 
 # Meaning: General Category 'Pf'
 #
similarity index 84%
rename from lib/unicore/Is/Pi.pl
rename to lib/unicore/lib/Pi.pl
index 868d4fb..54fa4e6 100644 (file)
@@ -5,7 +5,7 @@
 #
 # This file supports:
 #      \p{Pi}
-#      \p{InitialPunctuation} (and fuzzy permutations)
+#      \p{Pi} (and fuzzy permutations)
 # 
 # Meaning: General Category 'Pi'
 #
similarity index 94%
rename from lib/unicore/Is/Po.pl
rename to lib/unicore/lib/Po.pl
index c24a8f4..d1ec8cc 100644 (file)
@@ -5,7 +5,7 @@
 #
 # This file supports:
 #      \p{Po}
-#      \p{OtherPunctuation} (and fuzzy permutations)
+#      \p{Po} (and fuzzy permutations)
 # 
 # Meaning: General Category 'Po'
 #
similarity index 91%
rename from lib/unicore/Is/Ps.pl
rename to lib/unicore/lib/Ps.pl
index 8c29336..f03aa02 100644 (file)
@@ -5,7 +5,7 @@
 #
 # This file supports:
 #      \p{Ps}
-#      \p{OpenPunctuation} (and fuzzy permutations)
+#      \p{Ps} (and fuzzy permutations)
 # 
 # Meaning: General Category 'Ps'
 #
similarity index 97%
rename from lib/unicore/Is/S.pl
rename to lib/unicore/lib/S.pl
index 5e51785..f9ded7d 100644 (file)
@@ -5,7 +5,7 @@
 #
 # This file supports:
 #      \p{S}
-#      \p{Symbol} (and fuzzy permutations)
+#      \p{S} (and fuzzy permutations)
 # 
 # Meaning: Major Category 'S'
 #
similarity index 87%
rename from lib/unicore/Is/Sc.pl
rename to lib/unicore/lib/Sc.pl
index b9818c2..31cabc3 100644 (file)
@@ -5,7 +5,7 @@
 #
 # This file supports:
 #      \p{Sc}
-#      \p{CurrencySymbol} (and fuzzy permutations)
+#      \p{Sc} (and fuzzy permutations)
 # 
 # Meaning: General Category 'Sc'
 #
similarity index 90%
rename from lib/unicore/Is/Sk.pl
rename to lib/unicore/lib/Sk.pl
index 47febf5..34d4ae3 100644 (file)
@@ -5,7 +5,7 @@
 #
 # This file supports:
 #      \p{Sk}
-#      \p{ModifierSymbol} (and fuzzy permutations)
+#      \p{Sk} (and fuzzy permutations)
 # 
 # Meaning: General Category 'Sk'
 #
similarity index 93%
rename from lib/unicore/Is/Sm.pl
rename to lib/unicore/lib/Sm.pl
index 5b423bf..bde87a9 100644 (file)
@@ -5,7 +5,7 @@
 #
 # This file supports:
 #      \p{Sm}
-#      \p{MathSymbol} (and fuzzy permutations)
+#      \p{Sm} (and fuzzy permutations)
 # 
 # Meaning: General Category 'Sm'
 #
similarity index 96%
rename from lib/unicore/Is/So.pl
rename to lib/unicore/lib/So.pl
index 7cb9987..0cc548e 100644 (file)
@@ -5,7 +5,7 @@
 #
 # This file supports:
 #      \p{So}
-#      \p{OtherSymbol} (and fuzzy permutations)
+#      \p{So} (and fuzzy permutations)
 # 
 # Meaning: General Category 'So'
 #
similarity index 100%
rename from lib/unicore/Is/Yi.pl
rename to lib/unicore/lib/Yi.pl
similarity index 87%
rename from lib/unicore/Is/Z.pl
rename to lib/unicore/lib/Z.pl
index 4c13a1e..3a053e1 100644 (file)
@@ -5,7 +5,7 @@
 #
 # This file supports:
 #      \p{Z}
-#      \p{Separator} (and fuzzy permutations)
+#      \p{Z} (and fuzzy permutations)
 # 
 # Meaning: Major Category 'Z'
 #
similarity index 83%
rename from lib/unicore/Is/Zl.pl
rename to lib/unicore/lib/Zl.pl
index 80f5902..2fa53b3 100644 (file)
@@ -5,7 +5,7 @@
 #
 # This file supports:
 #      \p{Zl}
-#      \p{LineSeparator} (and fuzzy permutations)
+#      \p{Zl} (and fuzzy permutations)
 # 
 # Meaning: General Category 'Zl'
 #
similarity index 82%
rename from lib/unicore/Is/Zp.pl
rename to lib/unicore/lib/Zp.pl
index 908dbb9..7860fc3 100644 (file)
@@ -5,7 +5,7 @@
 #
 # This file supports:
 #      \p{Zp}
-#      \p{ParagraphSeparator} (and fuzzy permutations)
+#      \p{Zp} (and fuzzy permutations)
 # 
 # Meaning: General Category 'Zp'
 #
similarity index 85%
rename from lib/unicore/Is/Zs.pl
rename to lib/unicore/lib/Zs.pl
index 593fa23..56ff72e 100644 (file)
@@ -5,7 +5,7 @@
 #
 # This file supports:
 #      \p{Zs}
-#      \p{SpaceSeparator} (and fuzzy permutations)
+#      \p{Zs} (and fuzzy permutations)
 # 
 # Meaning: General Category 'Zs'
 #
index 1e1f7ed..a6c234c 100644 (file)
@@ -1,19 +1,20 @@
 #!/usr/bin/perl -w
 use strict;
 use Carp;
+
 ##
 ## mktables -- create the runtime Perl Unicode files (lib/unicore/**/*.pl)
 ## from the Unicode database files (lib/unicore/*.txt).
 ##
 
-mkdir("In", 0755);
-mkdir("Is", 0755);
-mkdir("To", 0755);
+mkdir("lib", 0755);
+mkdir("To",  0755);
 
 ##
 ## Process any args.
 ##
-my $Verbose = 0;
+my $Verbose        = 0;
+my $MakeTestScript = 0;
 
 while (@ARGV)
 {
@@ -22,8 +23,10 @@ while (@ARGV)
         $Verbose = 1;
     } elsif ($arg eq '-q') {
         $Verbose = 0;
+    } elsif ($arg eq '-maketest') {
+        $MakeTestScript = 1;
     } else {
-        die "usage: $0 [-v|-q]";
+        die "usage: $0 [-v|-q] [-maketest]";
     }
 }
 
@@ -36,6 +39,35 @@ my $HEADER=<<"EOF";
 
 EOF
 
+
+##
+## Given a filename and a reference to an array of lines,
+## write the lines to the file only if the contents have not changed.
+##
+sub WriteIfChanged($\@)
+{
+    my $file  = shift;
+    my $lines = shift;
+
+    my $TextToWrite = join '', @$lines;
+    if (open IN, $file) {
+        local($/) = undef;
+        my $PreviousText = <IN>;
+        close IN;
+        if ($PreviousText eq $TextToWrite) {
+            print "$file unchanged.\n" if $Verbose;
+            return;
+        }
+    }
+    if (not open OUT, ">$file") {
+        die "$0: can't open $file for output: $!\n";
+    }
+    print "$file written.\n" if $Verbose;
+
+    print OUT $TextToWrite;
+    close OUT;
+}
+
 ##
 ## The main datastructure (a "Table") represents a set of code points that
 ## are part of a particular quality (that are part of \pL, \p{InGreek},
@@ -55,36 +87,25 @@ my %TableInfo;
 my %TableDesc;
 my %FuzzyNames;
 my %AliasInfo;
+my %CanonicalToOrig;
 
 ##
 ## Turn something like
 ##    OLD-ITALIC
-## to
+## into
 ##    OldItalic
 ##
 sub CanonicalName($)
 {
-    my $name = lc shift;
+    my $orig = shift;
+    my $name = lc $orig;
     $name =~ s/(?<![a-z])(\w)/\u$1/g;
-    $name =~ s/[_\W]+//g;
-    return $name;
-}
+    $name =~ s/[-_\s]+//g;
 
-##
-## Turn something like
-##    OLD-ITALIC
-## to
-##    Old_Italic
-##
-sub CanonicalNameForPattern($)
-{
-    my $name = lc shift;
-    $name =~ s/(?<![a-z])(\w)/\u$1/g;
-    $name =~ s/[_\W]+/_/;
+    $CanonicalToOrig{$name} = $orig if not $CanonicalToOrig{$name};
     return $name;
 }
 
-
 ##
 ## Associates a property ("Greek", "Lu", "Assigned",...) with a Table.
 ##
@@ -107,7 +128,7 @@ sub New_Prop($$$@)
     my $Fuzzy = delete $Args{Fuzzy};
     my $Desc  = delete $Args{Desc}; # description
 
-    $Name = CanonicalNameForPattern($Name) if $Fuzzy;
+    $Name = CanonicalName($Name) if $Fuzzy;
 
     ## sanity check a few args
     if (%Args or ($Type ne 'Is' and $Type ne 'In') or not ref $Table) {
@@ -373,19 +394,13 @@ sub Table::Write
     my $filename = shift;
     my $comment  = shift;
 
-    print "$filename\n" if $Verbose;
-
-    if (not open(OUT, ">$filename")) {
-       die "$0: can't write $filename: $!\n";
-    }
-
-    print OUT $HEADER;
+    my @OUT = $HEADER;
     if (defined $comment) {
         $comment =~ s/\s+\Z//;
         $comment =~ s/^/# /gm;
-        print OUT "#\n$comment\n#\n";
+        push @OUT, "#\n$comment\n#\n";
     }
-    print OUT "return <<'END';\n";
+    push @OUT, "return <<'END';\n";
 
     for my $set (@$Table)
     {
@@ -394,14 +409,65 @@ sub Table::Write
         my $name  = $set->[RANGE_NAME];
 
         if ($start == $end) {
-            printf OUT "%04X\t\t%s\n", $start, $name;
+            push @OUT, sprintf "%04X\t\t%s\n", $start, $name;
         } else {
-            printf OUT "%04X\t%04X\t%s\n", $start, $end, $name;
+            push @OUT, sprintf "%04X\t%04X\t%s\n", $start, $end, $name;
         }
     }
 
-    print OUT "END\n";
-    close OUT;
+    push @OUT, "END\n";
+
+    WriteIfChanged($filename, @OUT);
+}
+
+## This used only for making the test script.
+## helper function
+sub IsUsable($)
+{
+    my $code = shift;
+    return 0 if $code <= 0x0000;                       ## don't use null
+    return 0 if $code >= $LastUnicodeCodepoint;        ## keep in range
+    return 0 if ($code >= 0xD800 and $code <= 0xDFFF); ## no surrogates
+    return 0 if ($code >= 0xFDD0 and $code <= 0xFDEF); ## utf8.c says no good
+    return 0 if (($code & 0xFFFF) == 0xFFFE);          ## utf8.c says no good
+    return 0 if (($code & 0xFFFF) == 0xFFFF);          ## utf8.c says no good
+    return 1;
+}
+
+## Return a code point that's part of the table.
+## Returns nothing if the table is empty (or covers only surrogates).
+## This used only for making the test script.
+sub Table::ValidCode
+{
+    my $Table = shift; #self
+    for my $set (@$Table) {
+        return $set->[RANGE_END] if IsUsable($set->[RANGE_END]);
+    }
+    return ();
+}
+
+## Return a code point that's not part of the table
+## Returns nothing if the table covers all code points.
+## This used only for making the test script.
+sub Table::InvalidCode
+{
+    my $Table = shift; #self
+
+    return 0x1234 if $Table->IsEmpty();
+
+    for my $set (@$Table)
+    {
+        if (IsUsable($set->[RANGE_END] + 1))
+        {
+            return $set->[RANGE_END] + 1;
+        }
+
+        if (IsUsable($set->[RANGE_START] - 1))
+        {
+            return $set->[RANGE_START] - 1;
+        }
+    }
+    return ();
 }
 
 ###########################################################################
@@ -434,8 +500,16 @@ sub New_Alias($$$@)
         confess "$0: bad args to New_Alias"
     }
 
-    if (not $TableInfo{$Type}->{$Name}) {
-        confess "$0: don't have orignial $Type => $Name to make alias"
+    $Alias = CanonicalName($Alias) if $Fuzzy;
+
+    if (not $TableInfo{$Type}->{$Name})
+    {
+        my $CName = CanonicalName($Name);
+        if ($TableInfo{$Type}->{$CName}) {
+            confess "$0: Use canonical form '$CName' instead of '$Name' for alias.";
+        } else {
+            confess "$0: don't have orignial $Type => $Name to make alias";
+        }
     }
     if ($TableInfo{$Alias}) {
         confess "$0: already have original $Type => $Alias; can't make alias";
@@ -451,7 +525,7 @@ sub New_Alias($$$@)
 ## All assigned code points
 my $Assigned = Table->New(Is    => 'Assigned',
                           Desc  => "All assigned code points",
-                          Fuzzy => 1);
+                          Fuzzy => 0);
 
 my $Name     = Table->New(); ## all characters, individually by name
 my $General  = Table->New(); ## all characters, grouped by category
@@ -694,7 +768,7 @@ sub Unicode_Txt()
              Fuzzy => 0);
 
     ## Unassigned is the same as 'Cn'
-    New_Alias(Is => 'Unassigned', SameAs => 'Cn', Fuzzy => 1);
+    New_Alias(Is => 'Unassigned', SameAs => 'Cn', Fuzzy => 0);
 
     $Cat{C}->Replace($Cat{C}->Merge($Cat{Cn}));  ## Now merge in Cn into C
 
@@ -709,10 +783,10 @@ sub Unicode_Txt()
     my $Any = Table->New(Is    => 'Any',
                          Desc  => sprintf("[\\x{0000}-\\x{%X}]",
                                           $LastUnicodeCodepoint),
-                         Fuzzy => 1);
+                         Fuzzy => 0);
     $Any->RawAppendRange(0, $LastUnicodeCodepoint);
 
-    New_Alias(Is => 'All', SameAs => 'Any', Fuzzy => 1);
+    New_Alias(Is => 'All', SameAs => 'Any', Fuzzy => 0);
 
     ##
     ## Build special properties for Perl's internal case-folding needs:
@@ -1106,6 +1180,199 @@ sub Make_GC_Aliases()
     }
 }
 
+
+##
+## These are used in:
+##   MakePropTestScript()
+##   WriteAllMappings()
+## for making the test script.
+##
+my %FuzzyNameToTest;
+my %ExactNameToTest;
+
+
+## This used only for making the test script
+sub GenTests($$$$)
+{
+    my $FH = shift;
+    my $Prop = shift;
+    my $MatchCode = shift;
+    my $FailCode = shift;
+
+    if (defined $MatchCode) {
+        printf $FH qq/Expect(1, "\\x{%04X}", '\\p{$Prop}' );\n/, $MatchCode;
+        printf $FH qq/Expect(0, "\\x{%04X}", '\\p{^$Prop}');\n/, $MatchCode;
+        printf $FH qq/Expect(0, "\\x{%04X}", '\\P{$Prop}' );\n/, $MatchCode;
+        printf $FH qq/Expect(1, "\\x{%04X}", '\\P{^$Prop}');\n/, $MatchCode;
+    }
+    if (defined $FailCode) {
+        printf $FH qq/Expect(0, "\\x{%04X}", '\\p{$Prop}' );\n/, $FailCode;
+        printf $FH qq/Expect(1, "\\x{%04X}", '\\p{^$Prop}');\n/, $FailCode;
+        printf $FH qq/Expect(1, "\\x{%04X}", '\\P{$Prop}' );\n/, $FailCode;
+        printf $FH qq/Expect(0, "\\x{%04X}", '\\P{^$Prop}');\n/, $FailCode;
+    }
+}
+
+## This used only for making the test script
+sub ExpectError($$)
+{
+    my $FH = shift;
+    my $prop = shift;
+
+    print $FH qq/Error('\\p{$prop}');\n/;
+    print $FH qq/Error('\\P{$prop}');\n/;
+}
+
+## This used only for making the test script
+my @GoodSeps = (
+                " ",
+                "-",
+                " \t ",
+                "",
+                "",
+                "_",
+               );
+my @BadSeps = (
+               "--",
+               "__",
+               " _",
+               "/"
+              );
+
+## This used only for making the test script
+sub RandomlyFuzzifyName($;$)
+{
+    my $Name = shift;
+    my $WantError = shift;  ## if true, make an error
+
+    my @parts;
+    for my $part (split /[-\s_]+/, $Name)
+    {
+        if (@parts) {
+            if ($WantError and rand() < 0.3) {
+                push @parts, $BadSeps[rand(@BadSeps)];
+                $WantError = 0;
+            } else {
+                push @parts, $GoodSeps[rand(@GoodSeps)];
+            }
+        }
+        my $switch = int rand(4);
+        if ($switch == 0) {
+            push @parts, uc $part;
+        } elsif ($switch == 1) {
+            push @parts, lc $part;
+        } elsif ($switch == 2) {
+            push @parts, ucfirst $part;
+        } else {
+            push @parts, $part;
+        }
+    }
+    my $new = join('', @parts);
+
+    if ($WantError) {
+        if (rand() >= 0.5) {
+            $new .= $BadSeps[rand(@BadSeps)];
+        } else {
+            $new = $BadSeps[rand(@BadSeps)] . $new;
+        }
+    }
+    return $new;
+}
+
+## This used only for making the test script
+sub MakePropTestScript()
+{
+    ## this written directly -- it's huge.
+    if (not open OUT, ">TestProp.pl") {
+        die "$0: TestProp.pl: $!\n";
+    }
+    print OUT <DATA>;
+
+    while (my ($Name, $Table) = each %ExactNameToTest)
+    {
+        GenTests(*OUT, $Name, $Table->ValidCode, $Table->InvalidCode);
+        ExpectError(*OUT, uc $Name) if uc $Name ne $Name;
+        ExpectError(*OUT, lc $Name) if lc $Name ne $Name;
+    }
+
+
+    while (my ($Name, $Table) = each %FuzzyNameToTest)
+    {
+        my $Orig  = $CanonicalToOrig{$Name};
+        my %Names = (
+                     $Name => 1,
+                     $Orig => 1,
+                     RandomlyFuzzifyName($Orig) => 1
+                    );
+
+        for my $N (keys %Names) {
+            GenTests(*OUT, $N, $Table->ValidCode, $Table->InvalidCode);
+        }
+
+        ExpectError(*OUT, RandomlyFuzzifyName($Orig, 'ERROR'));
+    }
+
+    print OUT "Finished();\n";
+    close OUT;
+}
+
+
+##
+## These are used only in:
+##   RegisterFileForName()
+##   WriteAllMappings()
+##
+my %Exact;      ## will become %utf8::Exact;
+my %Canonical;  ## will become %utf8::Canonical;
+my %CaComment;  ## Comment for %Canonical entry of same key
+
+##
+## Given info about a name and a datafile that it should be associated with,
+## register that assocation in %Exact and %Canonical.
+sub RegisterFileForName($$$$)
+{
+    my $Type     = shift;
+    my $Name     = shift;
+    my $IsFuzzy  = shift;
+    my $filename = shift;
+
+    ##
+    ## Now in details for the mapping. $Type eq 'Is' has the
+    ## Is removed, as it will be removed in utf8_heavy when this
+    ## data is being checked. In keeps its "In", but a second
+    ## sans-In record is written if it doesn't conflict with
+    ## anything already there.
+    ##
+    if (not $IsFuzzy)
+    {
+        if ($Type eq 'Is') {
+            die "oops[$Name]" if $Exact{$Name};
+            $Exact{$Name} = $filename;
+        } else {
+            die "oops[$Type$Name]" if $Exact{"$Type$Name"};
+            $Exact{"$Type$Name"} = $filename;
+            $Exact{$Name} = $filename if not $Exact{$Name};
+        }
+    }
+    else
+    {
+        my $CName = lc $Name;
+        if ($Type eq 'Is') {
+            die "oops[$CName]" if $Canonical{$CName};
+            $Canonical{$CName} = $filename;
+            $CaComment{$CName} = $Name if $Name =~ tr/A-Z// >= 2;
+        } else {
+            die "oops[$Type$CName]" if $Canonical{lc "$Type$CName"};
+            $Canonical{lc "$Type$CName"} = $filename;
+            $CaComment{lc "$Type$CName"} = "$Type$Name";
+            if (not $Canonical{$CName}) {
+                $Canonical{$CName} = $filename;
+                $CaComment{$CName} = "$Type$Name";
+            }
+        }
+    }
+}
+
 ##
 ## Writes the info accumulated in
 ##
@@ -1118,51 +1385,52 @@ sub WriteAllMappings()
 {
     my @MAP;
 
-    for my $Type ('In', 'Is')
-    {
-        my %Filenames;
-        my %NameToFile;
+    my %BaseNames;  ## Base names already used (for avoiding 8.3 conflicts)
 
-        my %Exact; ## will become %utf8::Is    or %utf8::In
-        my %Pat;   ## will become %utf8::IsPat or %utf8::InPat
+    ## 'Is' *MUST* come first, so its names have precidence over 'In's
+    for my $Type ('Is', 'In')
+    {
+        my %RawNameToFile; ## a per-$Type cache
 
-        ##
-        ## First write all the files to the $Type/ directory
-        ##
-        for my $Name (sort { length $a <=> length $b } keys %{$TableInfo{$Type}})
+        for my $Name (sort {length $a <=> length $b} keys %{$TableInfo{$Type}})
         {
+            ## Note: $Name is already canonical
             my $Table   = $TableInfo{$Type}->{$Name};
+            my $IsFuzzy = $FuzzyNames{$Type}->{$Name};
 
             ## Need an 8.3 safe filename (which means "an 8 safe" $filename)
-            my $filename = $FuzzyNames{$Type}->{$Name} ? CanonicalName($Name): $Name;
-            $filename =~ s/[^\w_]+/_/g; # "L&" -> "L_"
-            substr($filename, 8) = '' if length($filename) > 8;
-
-            ##
-            ## Make sure the filename doesn't conflict with something we
-            ## might have already written. If we have, say,
-            ##     GreekExtended1
-            ##     GreekExtended2
-            ## they become
-            ##     GreekExt
-            ##     GreekEx2
-            ##
-            while (my $num = $Filenames{lc $filename}++)
+            my $filename;
             {
-                $num++; ## so filenames with numbers start with '2', which
-                        ## just looks more natural.
-                ## Want to append $num, but if it'll make the filename longer
-                ## than 8 characters, pre-truncate $filename so that the result
-                ## is acceptable.
-                my $delta = length($filename) + length($num) - 8;
-                if ($delta > 0) {
-                    substr($filename, -$delta) = $num;
-                } else {
-                    $filename .= $num;
+                ## 'Is' items lose 'Is' from the basename.
+                $filename = $Type eq 'Is' ? $Name : "$Type$Name";
+
+                $filename =~ s/[^\w_]+/_/g; # "L&" -> "L_"
+                substr($filename, 8) = '' if length($filename) > 8;
+
+                ##
+                ## Make sure the basename doesn't conflict with something we
+                ## might have already written. If we have, say,
+                ##     InGreekExtended1
+                ##     InGreekExtended2
+                ## they become
+                ##     InGreekE
+                ##     InGreek2
+                ##
+                while (my $num = $BaseNames{lc $filename}++)
+                {
+                    $num++; ## so basenames with numbers start with '2', which
+                            ## just looks more natural.
+                    ## Want to append $num, but if it'll make the basename longer
+                    ## than 8 characters, pre-truncate $filename so that the result
+                    ## is acceptable.
+                    my $delta = length($filename) + length($num) - 8;
+                    if ($delta > 0) {
+                        substr($filename, -$delta) = $num;
+                    } else {
+                        $filename .= $num;
+                    }
                 }
-            }
-
-            $Exact{$Name} = $filename;
+            };
 
             ##
             ## Construct a nice comment to add to the file, and build data
@@ -1187,8 +1455,7 @@ sub WriteAllMappings()
                 for my $N (@Supported)
                 {
                     my $IsFuzzy = $FuzzyNames{$Type}->{$N};
-                    my $CName   = $IsFuzzy ? CanonicalName($N): $N;
-                    my $Prop    = "\\p{$TypeToShow$CName}";
+                    my $Prop    = "\\p{$TypeToShow$Name}";
                     $OrigProp = $Prop if not $OrigProp; #cache for aliases
                     if ($IsFuzzy) {
                         $Comment .= "\t$Prop (and fuzzy permutations)\n";
@@ -1208,98 +1475,119 @@ sub WriteAllMappings()
             ##
             ## Okay, write the file...
             ##
-            $Table->Write("$Type/$filename.pl", $Comment);
-        }
+            $Table->Write("lib/$filename.pl", $Comment);
 
-        ##
-        ## Write out the map
-        ##
-        if (not open MAP, ">Properties") {
-            die "$0: can't write Properties: $!\n";
-        }
-        print MAP "##\n";
-        print MAP "## This file created by $0\n";
-        print MAP "## List of built-in \\p{...}/\\P{...} properties.\n";
-        print MAP "##\n";
-        print MAP "## '*' means name may be 'fuzzy'\n";
-        print MAP "##\n";
-        print MAP "\n";
-        print MAP sort { substr($a,2) cmp substr($b, 2) } @MAP;
-        close MAP;
+            ## and register it
+            $RawNameToFile{$Name} = $filename;
+            RegisterFileForName($Type => $Name, $IsFuzzy, $filename);
 
-        ##
-        ## Build %Pat
-        ##
-        while (my ($Fuzzy, $Real) = each %{$FuzzyNames{$Type}})
-        {
-            my $File = $Exact{$Real};
-
-            if (not $File) {
-                die "$0: oops [$Real]";
-            }
-
-            ## The prefix length of 2 is enough spread,
-            ## and besides, we have 'Yi' as an In category.
-            my $Prefix = lc(substr($Fuzzy, 0, 2));
-            my $Regex = NameToRegex($Fuzzy);
-
-            if ($Pat{$Prefix}->{$Regex}) {
-                warn "WHOA, conflict with /$Regex/: $Pat{$Prefix}->{$Regex} vs $File\n";
+            if ($IsFuzzy)
+            {
+                my $CName = CanonicalName($Type . '_'. $Name);
+                $FuzzyNameToTest{$Name}  = $Table if !$FuzzyNameToTest{$Name};
+                $FuzzyNameToTest{$CName} = $Table if !$FuzzyNameToTest{$CName};
+            } else {
+                $ExactNameToTest{$Name} = $Table;
             }
 
-            $Pat{$Prefix}->{$Regex} = $File;
         }
 
-        ##
-        ## Since the fuzzy method will provide for a way to match $Fuzzy,
-        ## there's no need for $Fuzzy to be in %Exact as well.
-        ## This can't be done in the loop above because there could be
-        ## multiple $Fuzzys pointing at the same $Real, and we don't want
-        ## the first to delete the exact mapping out from under the second.
-        ##
-        for my $Fuzzy (keys %{$FuzzyNames{$Type}})
+        ## Register aliase info
+        for my $Name (sort {length $a <=> length $b} keys %{$AliasInfo{$Type}})
         {
-            delete $Exact{$Fuzzy};
+            my $Alias    = $AliasInfo{$Type}->{$Name};
+            my $IsFuzzy  = $FuzzyNames{$Type}->{$Alias};
+            my $filename = $RawNameToFile{$Name};
+            die "oops [$Alias]->[$Name]" if not $filename;
+            RegisterFileForName($Type => $Alias, $IsFuzzy, $filename);
+
+            my $Table = $TableInfo{$Type}->{$Name};
+            die "oops" if not $Table;
+            if ($IsFuzzy)
+            {
+                my $CName = CanonicalName($Type .'_'. $Alias);
+                $FuzzyNameToTest{$Alias} = $Table if !$FuzzyNameToTest{$Alias};
+                $FuzzyNameToTest{$CName} = $Table if !$FuzzyNameToTest{$CName};
+            } else {
+                $ExactNameToTest{$Alias} = $Table;
+            }
         }
+    }
 
+    ##
+    ## Write out the property list
+    ##
+    {
+        my @OUT = (
+                   "##\n",
+                   "## This file created by $0\n",
+                   "## List of built-in \\p{...}/\\P{...} properties.\n",
+                   "##\n",
+                   "## '*' means name may be 'fuzzy'\n",
+                   "##\n\n",
+                   sort { substr($a,2) cmp substr($b, 2) } @MAP,
+                  );
+        WriteIfChanged('Properties', @OUT);
+    }
 
+    use Text::Tabs ();  ## using this makes the files about half the size
+
+    ## Write Exact.pl
+    {
+        my @OUT = (
+                   $HEADER,
+                   "##\n",
+                   "## Data in this file used by ../utf8_heavy.pl\n",
+                   "##\n\n",
+                   "## Mapping from name to filename in ./lib\n",
+                   "%utf8::Exact = (\n",
+                  );
 
-        ##
-        ## Now write In.pl / Is.pl
-        ##
-        if (not open OUT, ">$Type.pl") {
-            die "$0: $Type.pl: $!\n";
-        }
-        print OUT $HEADER;
-        print OUT "##\n";
-        print OUT "## Data in this file used by ../utf8_heavy.pl\n";
-        print OUT "##\n";
-        print OUT "\n";
-        print OUT "## Mapping from name to filename in ./$Type\n";
-        print OUT "%utf8::$Type = (\n";
         for my $Name (sort keys %Exact)
         {
             my $File = $Exact{$Name};
-            printf OUT "  %-41s => %s,\n", "'$Name'", "'$File'";
+            $Name = $Name =~ m/\W/ ? qq/'$Name'/ : " $Name ";
+            my $Text = sprintf("%-15s => %s,\n", $Name, qq/'$File'/);
+            push @OUT, Text::Tabs::unexpand($Text);
         }
-        print OUT ");\n\n";
+        push @OUT, ");\n1;\n";
+
+        WriteIfChanged('Exact.pl', @OUT);
+    }
 
-        print OUT "## Mappings from regex to filename in ./$Type/\n";
-        print OUT "%utf8::${Type}Pat = (\n";
-        for my $Prefix (sort keys %Pat)
+    ## Write Canonical.pl
+    {
+        my @OUT = (
+                   $HEADER,
+                   "##\n",
+                   "## Data in this file used by ../utf8_heavy.pl\n",
+                   "##\n\n",
+                   "## Mapping from lc(canonical name) to filename in ./lib\n",
+                   "%utf8::Canonical = (\n",
+                  );
+        my $Trail = ""; ## used just to keep the spacing pretty
+        for my $Name (sort keys %Canonical)
         {
-            print OUT " '$Prefix' => {\n";
-            while (my ($Regex, $File) = each %{ $Pat{$Prefix} }) {
-                print OUT "\t'$Regex' => '$File',\n";
+            my $File = $Canonical{$Name};
+            if ($CaComment{$Name}) {
+                push @OUT, "\n" if not $Trail;
+                push @OUT, " # $CaComment{$Name}\n";
+                $Trail = "\n";
+            } else {
+                $Trail = "";
             }
-            print OUT " },\n";
+            $Name = $Name =~ m/\W/ ? qq/'$Name'/ : " $Name ";
+            my $Text = sprintf("  %-41s => %s,\n$Trail", $Name, qq/'$File'/);
+            push @OUT, Text::Tabs::unexpand($Text);
         }
-        print OUT ");\n";
-
-        close(OUT);
+        push @OUT, ");\n1\n";
+        WriteIfChanged('Canonical.pl', @OUT);
     }
+
+    MakePropTestScript() if $MakeTestScript;
 }
 
+
 sub SpecCase_txt()
 {
     #
@@ -1338,24 +1626,25 @@ sub SpecCase_txt()
     for my $case (qw(Lower Title Upper))
     {
         my $NormalCase = do "To/$case.pl" || die "$0: $@\n";
-        if (not open OUT, ">To/$case.pl") {
-            die "$0: To/$case.txt: $!";
-        }
 
-        print OUT $HEADER, "\n";
-        print OUT "%utf8::ToSpec$case =\n(\n";
+        my @OUT = (
+                   $HEADER, "\n",
+                   "%utf8::ToSpec$case =\n(\n",
+                   );
 
         for my $prop (sort { $a->[0] <=> $b->[0] } @{$CaseInfo{$case}}) {
             my ($ix, $code, $to) = @$prop;
             my $tostr =
               join "", map { sprintf "\\x{%s}", $_ } split ' ', $to;
-            printf OUT qq['%04X' => "$tostr",\n], $ix;
+            push @OUT, sprintf qq['%04X' => "$tostr",\n], $ix;
         }
-        print OUT ");\n\n";
-        print OUT "return <<'END';\n";
-        print OUT $NormalCase;
-        print OUT "END\n";
-        close OUT;
+        push @OUT, (
+                    ");\n\n",
+                    "return <<'END';\n",
+                    $NormalCase,
+                    "END\n"
+                    );
+        WriteIfChanged("To/$case.pl", @OUT);
     }
 }
 
@@ -1367,7 +1656,7 @@ sub SpecCase_txt()
 sub CaseFold_txt()
 {
     if (not open IN, "CaseFold.txt") {
-       die "$0: To/Fold.pl: $!\n";
+       die "$0: CaseFold.txt: $!\n";
     }
 
     my $Fold = Table->New();
@@ -1393,23 +1682,25 @@ sub CaseFold_txt()
     #
     # Prepend the special foldings to the common foldings.
     #
-
     my $CommonFold = do "To/Fold.pl" || die "$0: To/Fold.pl: $!\n";
-    if (not open OUT, ">To/Fold.pl") {
-        die "$0: To/Fold.pl: $!\n";
-    }
-    print OUT $HEADER, "\n";
-    print OUT "%utf8::ToSpecFold =\n(\n";
+
+    my @OUT = (
+               $HEADER, "\n",
+               "%utf8::ToSpecFold =\n(\n",
+              );
     for my $code (sort { $a <=> $b } keys %Fold) {
         my $foldstr =
           join "", map { sprintf "\\x{%s}", $_ } split ' ', $Fold{$code};
-        printf OUT qq['%04X' => "$foldstr",\n], $code;
+        push @OUT, sprintf qq['%04X' => "$foldstr",\n], $code;
     }
-    print OUT ");\n\n";
-    print OUT "return <<'END';\n";
-    print OUT $CommonFold;
-    print OUT "END\n";
-    close OUT;
+    push @OUT, (
+                ");\n\n",
+                "return <<'END';\n",
+                $CommonFold,
+                "END\n",
+               );
+
+    WriteIfChanged("To/Fold.pl", @OUT);
 }
 
 ## Do it....
@@ -1421,15 +1712,65 @@ PropList_txt();
 Scripts_txt();
 Blocks_txt();
 
+WriteAllMappings();
+
 LineBrk_Txt();
 ArabShap_txt();
 Jamo_txt();
 SpecCase_txt();
+CaseFold_txt();
 
-WriteAllMappings();
+exit(0);
 
-CaseFold_txt();
+## TRAILING CODE IS USED BY MakePropTestScript()
+__DATA__
+use strict;
+use warnings;
+
+my $Tests = 0;
+my $Fails = 0;
 
-# That's all, folks!
+sub Expect($$$)
+{
+    my $Expect = shift;
+    my $String = shift;
+    my $Regex  = shift;
+    my $Line   = (caller)[2];
+
+    $Tests++;
+    my $RegObj;
+    my $result = eval {
+        $RegObj = qr/$Regex/;
+        $String =~ $RegObj ? 1 : 0
+    };
+    
+    if (not defined $result) {
+        print "couldn't compile /$Regex/ on $0 line $Line: $@\n";
+        $Fails++;
+    } elsif ($result ^ $Expect) {
+        print "bad result (expected $Expect) on $0 line $Line: $@\n";
+        $Fails++;
+    }
+}
 
-__END__
+sub Error($)
+{
+    my $Regex  = shift;
+    $Tests++;
+    if (eval { 'x' =~ qr/$Regex/; 1 }) {
+        $Fails++;
+        my $Line = (caller)[2];
+        print "expected error for /$Regex/ on $0 line $Line: $@\n";
+    }
+}
+
+sub Finished()
+{
+   if ($Fails == 0) {
+      print "All $Tests tests passed.\n";
+      exit(0);
+   } else {
+      print "$Tests tests, $Fails failed!\n";
+      exit(-1);
+   }
+}
index 3f14afe..28e0d70 100644 (file)
@@ -8,35 +8,33 @@ sub DESTROY {}
 
 sub croak { require Carp; Carp::croak(@_) }
 
+my %Cache;
+
+##
+## "SWASH" == "SWATCH HASH". A "swatch" is a swatch of the Unicode landscape
+##
+
 sub SWASHNEW {
     my ($class, $type, $list, $minbits, $none) = @_;
     local $^D = 0 if $^D;
 
     print STDERR "SWASHNEW @_\n" if DEBUG;
 
-    ## check to see if we've already got it.
-    {
-        no strict 'refs';
-        if ($type and ref ${"${class}::{$type}"} eq $class) {
-            warn qq/Found \${"${class}::{$type}"}\n/ if DEBUG;
-            return ${"${class}::{$type}"};
-        }
-    }
-
     ##
     ## Get the list of codepoints for the type.
     ## Called from utf8.c
     ##
     ## Given a $type, our goal is to fill $list with the set of codepoint
-    ## ranges. As we try various interpretations of $type, sometimes we'll
-    ## end up with the $list directly, and sometimes we'll end up with a
-    ## $file name that holds the list data.
+    ## ranges.
     ##
     ## To make the parsing of $type clear, this code takes the a rather
     ## unorthadox approach of last'ing out of the block once we have the
     ## info we need. Were this to be a subroutine, the 'last' would just
     ## be a 'return'.
     ##
+    my $file; ## file to load data from, and also part of the %Cache key.
+    my $ListSorted = 0;
+
     if ($type)
     {
         $type =~ s/^\s+//;
@@ -44,131 +42,43 @@ sub SWASHNEW {
 
         print "type = $type\n" if DEBUG;
 
-        my $file;
-        ## Figure out what file to load to get the data....
       GETFILE:
         {
             ##
-            ## First, see if it's an "Is" name (the 'Is' is optional)
+            ## 'Is' is always optional, so if it's there, remove it.
+            ## Same with 'Category=' and 'Script='.
             ##
-            ## Because we check "Is" names first, they have precidence over
-            ## "In" names. For example, "Greek" is both a script and a
-            ## block. "IsGreek" always gets the script, while "InGreek"
-            ## always gets the block. "Greek" gets the script because we
-            ## check "Is" names first.
+            ## 'Block=' is replaced by 'In'.
             ##
-            if ($type =~ m{^
-                           ## "Is" prefix, or "Script=" or "Category="
-                           (?: Is [- _]? | (?:Script|Category)\s*=\s* )?
-                           ## name to check in the "Is" symbol table.
-                           ([A-Z].*)
-                           $
-                          }ix)
-            {
-                my $istype = $1;
-                ##
-                ## Input ($type)     Name To Check ($istype)
-                ## -------------     -----------------------
-                ## IsLu                 Lu
-                ## Lu                   Lu
-                ## Category = Lu        Lu
-                ## Foo                  Foo
-                ## Script = Greek       Greek
-                ##
-
-                print "istype = $istype\n" if DEBUG;
-
-                ## Load "Is" mapping data, if not yet loaded.
-                do "unicore/Is.pl" if not defined %utf8::Is;
-
-                ##
-                ## If the "Is" mapping data has an exact match, it points
-                ## to the file we need.
-                ##
-                if (exists $utf8::Is{$istype})
-                {
-                    $file = "unicore/Is/$utf8::Is{$istype}.pl";
-                    last GETFILE;
-                }
-
-                ##
-                ## Need to look at %utf8::IsPat (loaded from "unicore/Is.pl")
-                ## to see if there's a regex that matches this $istype.
-                ## If so, the associated name is the file we need.
-                ##
-                my $prefix = substr(lc($istype), 0, 2);
-                if (my $hashref = $utf8::IsPat{$prefix})
-                {
-                    while (my ($pat, $name) = each %{$hashref})
-                    {
-                        print "isprefix = $prefix, Is = $istype, pat = $pat\n" if DEBUG;
-                        ##
-                        ## The following regex probably need not be cached,
-                        ## since every time there's a match, the results of
-                        ## the entire call to SWASHNEW() is cached, so there's
-                        ## a very limited number of times any one $pat will
-                        ## be evaluated as a regex, at least with "reasonable"
-                        ## code that doesn't try a baziilion \p{Random} names.
-                        ##
-                        if ($istype =~ /^$pat$/i)
-                        {
-                            $file = "unicore/Is/$name.pl";
-                            keys %{$hashref}; ## reset the 'each' above
-                            last GETFILE;
-                        }
-                    }
-                }
+            $type =~ s/^Is(?:\s+|[-_])?//i
+              or
+            $type =~ s/^Category\s*=\s*//i
+              or
+            $type =~ s/^Script\s*=\s*//i
+              or
+            $type =~ s/^Block\s*=\s*/In/i;
+
+            ##
+            ## See if it's in the direct mapping table.
+            ##
+            require "unicore/Exact.pl";
+            if (my $base = $utf8::Exact{$type}) {
+                $file = "unicore/lib/$base.pl";
+                last GETFILE;
             }
 
             ##
-            ## Couldn't find via "Is" -- let's try via "In".....
+            ## If not there exactly, try the canonical form. The canonical
+            ## form is lowercased, with any separators (\s+|[-_]) removed.
             ##
-            if ($type =~ m{^
-                           ( In(?!herited$)[- _]? | Block\s*=\s*)?
-                           ([A-Z].*)
-                           $
-                          }xi)
-            {
-                my $intype = $2;
-                print "intype = $intype\n" if DEBUG;
-
-                ##
-                ## Input ($type)      Name To Check ($intype)
-                ## -------------      -----------------------
-                ## Inherited             Inherited
-                ## InGreek               Greek
-                ## Block = Greek         Greek
-                ##
-
-                ## Load "In" mapping data, if not yet loaded.
-                do "unicore/In.pl" if not defined %utf8::In;
-
-                ## If there's a direct match, it points to the file we need
-                if (exists $utf8::In{$intype}) {
-                    $file = "unicore/In/$utf8::In{$intype}.pl";
-                    last GETFILE;
-                }
-
-                ##
-                ## Need to look at %utf8::InPat (loaded from "unicore/In.pl")
-                ## to see if there's a regex that matches this $intype.
-                ## If so, the associated name is the file we need.
-                ##
-                my $prefix = substr(lc($intype), 0, 2);
-                if (my $hashref = $utf8::InPat{$prefix})
-                {
-                    print "inprefix = $prefix, In = $intype\n" if DEBUG;
-                    while (my ($pat, $name) = each %{$hashref})
-                    {
-                        print "inprefix = $prefix, In = $intype, k = $pat\n" if DEBUG;
-                        if ($intype =~ /^$pat$/i) {
-                            $file = "unicore/In/$name.pl";
-                            print "inprefix = $prefix, In = $intype, k = $pat, file = $file\n" if DEBUG;
-                            keys %{$hashref}; ## reset the 'each' above
-                            last GETFILE;
-                        }
-                    }
-                }
+            my $canonical = lc $type;
+            $canonical =~ s/(?<=[a-z\d])(?:\s+|[-_])(?=[a-z\d])//g;
+            print "canonical = $canonical\n" if DEBUG;
+
+            require "unicore/Canonical.pl";
+            if (my $base = $utf8::Canonical{$canonical}) {
+                $file = "unicore/lib/$base.pl";
+                last GETFILE;
             }
 
             ##
@@ -188,16 +98,28 @@ sub SWASHNEW {
             croak("Can't find Unicode character property \"$type\"");
         }
 
+        print "found it (file='$file')\n" if DEBUG;
+
         ##
         ## If we reach here, it was due to a 'last GETFILE' above, so we
-        ## have a filename, so now we load it.
+        ## have a filename, so now we load it if we haven't already.
+        ## If we have, return the cached results. The cache key is the
+        ## file to load.
         ##
+        if ($Cache{$file} and ref($Cache{$file}) eq $class)
+        {
+            print "Returning cached '$file' for \\p{$type}\n" if DEBUG;
+            return $Cache{$class, $file};
+        }
+
         $list = do $file;
+        $ListSorted = 1; ## we know that these lists are sorted
     }
 
     my $extras;
     my $bits;
 
+    my $ORIG = $list;
     if ($list) {
        my @tmp = split(/^/m, $list);
        my %seen;
@@ -247,8 +169,7 @@ sub SWASHNEW {
 
     print STDERR "CLASS = $class, TYPE => $type, BITS => $bits, NONE => $none\nEXTRAS =>\n$extras\nLIST =>\n$list\n" if DEBUG;
 
-    no strict 'refs';
-    ${"${class}::{$type}"} = bless {
+    my $SWASH = bless {
        TYPE => $type,
        BITS => $bits,
        EXTRAS => $extras,
@@ -256,6 +177,12 @@ sub SWASHNEW {
        NONE => $none,
        @extras,
     } => $class;
+
+    if ($file) {
+        $Cache{$class, $file} = $SWASH;
+    }
+
+    return $SWASH;
 }
 
 # NOTE: utf8.c:swash_init() assumes entries are never modified once generated.