From: Jarkko Hietaniemi Date: Wed, 16 Jan 2002 05:37:29 +0000 (+0000) Subject: Jeffrey's Unicode adventure continues: unify the In/*.pl X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=5beb625efdc42b7e3d999a21d111e88bdda8c811;p=p5sagit%2Fp5-mst-13.2.git Jeffrey's Unicode adventure continues: unify the In/*.pl and Is/*.pl to lib/*.pl, remove In.pl and Is.pl, introduce Canonical.pl and Exact.pl. p4raw-id: //depot/perl@14294 --- diff --git a/MANIFEST b/MANIFEST index c88fa9c..f8a6289 100644 --- a/MANIFEST +++ b/MANIFEST @@ -1388,304 +1388,275 @@ lib/unicore/BidiMirr.txt Unicode character database lib/unicore/Bidirectional.pl Unicode character database lib/unicore/Blocks.pl Unicode character database lib/unicore/Blocks.txt Unicode character database +lib/unicore/Canonical.pl Unicode character database lib/unicore/CaseFold.txt Unicode character database lib/unicore/Category.pl Unicode character database lib/unicore/CombiningClass.pl Unicode character database lib/unicore/CompExcl.txt Unicode character database lib/unicore/Decomposition.pl Unicode character database lib/unicore/EAWidth.txt Unicode character database -lib/unicore/In.pl Unicode character database -lib/unicore/In/Alphabet.pl Unicode character database -lib/unicore/In/Arabic.pl Unicode character database -lib/unicore/In/ArabicP2.pl Unicode character database -lib/unicore/In/ArabicPr.pl Unicode character database -lib/unicore/In/Armenian.pl Unicode character database -lib/unicore/In/Arrows.pl Unicode character database -lib/unicore/In/BasicLat.pl Unicode character database -lib/unicore/In/Bengali.pl Unicode character database -lib/unicore/In/BlockEle.pl Unicode character database -lib/unicore/In/Bopomof2.pl Unicode character database -lib/unicore/In/Bopomofo.pl Unicode character database -lib/unicore/In/BoxDrawi.pl Unicode character database -lib/unicore/In/BrailleP.pl Unicode character database -lib/unicore/In/Byzantin.pl Unicode character database -lib/unicore/In/Cherokee.pl Unicode character database -lib/unicore/In/CjkComp2.pl Unicode character database -lib/unicore/In/CjkComp3.pl Unicode character database -lib/unicore/In/CjkComp4.pl Unicode character database -lib/unicore/In/CjkCompa.pl Unicode character database -lib/unicore/In/CjkRadic.pl Unicode character database -lib/unicore/In/CjkSymbo.pl Unicode character database -lib/unicore/In/CjkUnif2.pl Unicode character database -lib/unicore/In/CjkUnif3.pl Unicode character database -lib/unicore/In/CjkUnifi.pl Unicode character database -lib/unicore/In/Combini2.pl Unicode character database -lib/unicore/In/Combini3.pl Unicode character database -lib/unicore/In/Combinin.pl Unicode character database -lib/unicore/In/ControlP.pl Unicode character database -lib/unicore/In/Currency.pl Unicode character database -lib/unicore/In/Cyrillic.pl Unicode character database -lib/unicore/In/Deseret.pl Unicode character database -lib/unicore/In/Devanaga.pl Unicode character database -lib/unicore/In/Dingbats.pl Unicode character database -lib/unicore/In/Enclose2.pl Unicode character database -lib/unicore/In/Enclosed.pl Unicode character database -lib/unicore/In/Ethiopic.pl Unicode character database -lib/unicore/In/GeneralP.pl Unicode character database -lib/unicore/In/Geometri.pl Unicode character database -lib/unicore/In/Georgian.pl Unicode character database -lib/unicore/In/Gothic.pl Unicode character database -lib/unicore/In/Greek.pl Unicode character database -lib/unicore/In/GreekExt.pl Unicode character database -lib/unicore/In/Gujarati.pl Unicode character database -lib/unicore/In/Gurmukhi.pl Unicode character database -lib/unicore/In/Halfwidt.pl Unicode character database -lib/unicore/In/HangulCo.pl Unicode character database -lib/unicore/In/HangulJa.pl Unicode character database -lib/unicore/In/HangulSy.pl Unicode character database -lib/unicore/In/Hebrew.pl Unicode character database -lib/unicore/In/HighPriv.pl Unicode character database -lib/unicore/In/HighSurr.pl Unicode character database -lib/unicore/In/Hiragana.pl Unicode character database -lib/unicore/In/Ideograp.pl Unicode character database -lib/unicore/In/IpaExten.pl Unicode character database -lib/unicore/In/Kanbun.pl Unicode character database -lib/unicore/In/KangxiRa.pl Unicode character database -lib/unicore/In/Kannada.pl Unicode character database -lib/unicore/In/Katakana.pl Unicode character database -lib/unicore/In/Khmer.pl Unicode character database -lib/unicore/In/Lao.pl Unicode character database -lib/unicore/In/Latin1Su.pl Unicode character database -lib/unicore/In/LatinEx2.pl Unicode character database -lib/unicore/In/LatinEx3.pl Unicode character database -lib/unicore/In/LatinExt.pl Unicode character database -lib/unicore/In/Letterli.pl Unicode character database -lib/unicore/In/LowSurro.pl Unicode character database -lib/unicore/In/Malayala.pl Unicode character database -lib/unicore/In/Mathema2.pl Unicode character database -lib/unicore/In/Mathemat.pl Unicode character database -lib/unicore/In/Miscell2.pl Unicode character database -lib/unicore/In/Miscella.pl Unicode character database -lib/unicore/In/Mongolia.pl Unicode character database -lib/unicore/In/MusicalS.pl Unicode character database -lib/unicore/In/Myanmar.pl Unicode character database -lib/unicore/In/NumberFo.pl Unicode character database -lib/unicore/In/Ogham.pl Unicode character database -lib/unicore/In/OldItali.pl Unicode character database -lib/unicore/In/OpticalC.pl Unicode character database -lib/unicore/In/Oriya.pl Unicode character database -lib/unicore/In/PrivateU.pl Unicode character database -lib/unicore/In/Runic.pl Unicode character database -lib/unicore/In/Sinhala.pl Unicode character database -lib/unicore/In/SmallFor.pl Unicode character database -lib/unicore/In/SpacingM.pl Unicode character database -lib/unicore/In/Specials.pl Unicode character database -lib/unicore/In/Superscr.pl Unicode character database -lib/unicore/In/Syriac.pl Unicode character database -lib/unicore/In/Tags.pl Unicode character database -lib/unicore/In/Tamil.pl Unicode character database -lib/unicore/In/Telugu.pl Unicode character database -lib/unicore/In/Thaana.pl Unicode character database -lib/unicore/In/Thai.pl Unicode character database -lib/unicore/In/Tibetan.pl Unicode character database -lib/unicore/In/UnifiedC.pl Unicode character database -lib/unicore/In/YiRadica.pl Unicode character database -lib/unicore/In/YiSyllab.pl Unicode character database +lib/unicore/Exact.pl Unicode character database lib/unicore/Index.txt Unicode character database -lib/unicore/Is.pl Unicode character database -lib/unicore/Is/_CanonDC.pl Unicode character database -lib/unicore/Is/_CaseIgn.pl Unicode character database -lib/unicore/Is/_CombAbo.pl Unicode character database -lib/unicore/Is/Alnum.pl Unicode character database -lib/unicore/Is/Alpha.pl Unicode character database -lib/unicore/Is/Alphabet.pl Unicode character database -lib/unicore/Is/Any.pl Unicode character database -lib/unicore/Is/Arabic.pl Unicode character database -lib/unicore/Is/Armenian.pl Unicode character database -lib/unicore/Is/ASCII.pl Unicode character database -lib/unicore/Is/AsciiHex.pl Unicode character database -lib/unicore/Is/Assigned.pl Unicode character database -lib/unicore/Is/Bengali.pl Unicode character database -lib/unicore/Is/BidiAL.pl Unicode character database -lib/unicore/Is/BidiAN.pl Unicode character database -lib/unicore/Is/BidiB.pl Unicode character database -lib/unicore/Is/BidiBN.pl Unicode character database -lib/unicore/Is/BidiCont.pl Unicode character database -lib/unicore/Is/BidiCS.pl Unicode character database -lib/unicore/Is/BidiEN.pl Unicode character database -lib/unicore/Is/BidiES.pl Unicode character database -lib/unicore/Is/BidiET.pl Unicode character database -lib/unicore/Is/BidiL.pl Unicode character database -lib/unicore/Is/BidiLRE.pl Unicode character database -lib/unicore/Is/BidiLRO.pl Unicode character database -lib/unicore/Is/BidiNSM.pl Unicode character database -lib/unicore/Is/BidiON.pl Unicode character database -lib/unicore/Is/BidiPDF.pl Unicode character database -lib/unicore/Is/BidiR.pl Unicode character database -lib/unicore/Is/BidiRLE.pl Unicode character database -lib/unicore/Is/BidiRLO.pl Unicode character database -lib/unicore/Is/BidiS.pl Unicode character database -lib/unicore/Is/BidiWS.pl Unicode character database -lib/unicore/Is/Blank.pl Unicode character database -lib/unicore/Is/Bopomofo.pl Unicode character database -lib/unicore/Is/C.pl Unicode character database -lib/unicore/Is/Canadian.pl Unicode character database -lib/unicore/Is/Canon.pl Unicode character database -lib/unicore/Is/Cc.pl Unicode character database -lib/unicore/Is/Cf.pl Unicode character database -lib/unicore/Is/Cherokee.pl Unicode character database -lib/unicore/Is/Cn.pl Unicode character database -lib/unicore/Is/Cntrl.pl Unicode character database -lib/unicore/Is/Co.pl Unicode character database -lib/unicore/Is/Common.pl Unicode character database -lib/unicore/Is/Compat.pl Unicode character database -lib/unicore/Is/Cs.pl Unicode character database -lib/unicore/Is/Cyrillic.pl Unicode character database -lib/unicore/Is/Dash.pl Unicode character database -lib/unicore/Is/DCcircle.pl Unicode character database -lib/unicore/Is/DCcompat.pl Unicode character database -lib/unicore/Is/DCfinal.pl Unicode character database -lib/unicore/Is/DCfont.pl Unicode character database -lib/unicore/Is/DCfracti.pl Unicode character database -lib/unicore/Is/DCinitia.pl Unicode character database -lib/unicore/Is/DCisolat.pl Unicode character database -lib/unicore/Is/DCmedial.pl Unicode character database -lib/unicore/Is/DCnarrow.pl Unicode character database -lib/unicore/Is/DCnoBrea.pl Unicode character database -lib/unicore/Is/DCsmall.pl Unicode character database -lib/unicore/Is/DCsquare.pl Unicode character database -lib/unicore/Is/DCsub.pl Unicode character database -lib/unicore/Is/DCsuper.pl Unicode character database -lib/unicore/Is/DCvertic.pl Unicode character database -lib/unicore/Is/DCwide.pl Unicode character database -lib/unicore/Is/Deseret.pl Unicode character database -lib/unicore/Is/Devanaga.pl Unicode character database -lib/unicore/Is/Diacriti.pl Unicode character database -lib/unicore/Is/Digit.pl Unicode character database -lib/unicore/Is/Ethiopic.pl Unicode character database -lib/unicore/Is/Extender.pl Unicode character database -lib/unicore/Is/Georgian.pl Unicode character database -lib/unicore/Is/Gothic.pl Unicode character database -lib/unicore/Is/Graph.pl Unicode character database -lib/unicore/Is/Greek.pl Unicode character database -lib/unicore/Is/Gujarati.pl Unicode character database -lib/unicore/Is/Gurmukhi.pl Unicode character database -lib/unicore/Is/Han.pl Unicode character database -lib/unicore/Is/Hangul.pl Unicode character database -lib/unicore/Is/Hebrew.pl Unicode character database -lib/unicore/Is/HexDigit.pl Unicode character database -lib/unicore/Is/Hiragana.pl Unicode character database -lib/unicore/Is/Hyphen.pl Unicode character database -lib/unicore/Is/IdContin.pl Unicode character database -lib/unicore/Is/Ideograp.pl Unicode character database -lib/unicore/Is/IdStart.pl Unicode character database -lib/unicore/Is/Inherite.pl Unicode character database -lib/unicore/Is/JoinCont.pl Unicode character database -lib/unicore/Is/Kannada.pl Unicode character database -lib/unicore/Is/Katakana.pl Unicode character database -lib/unicore/Is/Khmer.pl Unicode character database -lib/unicore/Is/L.pl Unicode character database -lib/unicore/Is/L_.pl Unicode character database -lib/unicore/Is/Lao.pl Unicode character database -lib/unicore/Is/Latin.pl Unicode character database -lib/unicore/Is/LbrkAI.pl Unicode character database -lib/unicore/Is/LbrkAL.pl Unicode character database -lib/unicore/Is/LbrkB2.pl Unicode character database -lib/unicore/Is/LbrkBA.pl Unicode character database -lib/unicore/Is/LbrkBB.pl Unicode character database -lib/unicore/Is/LbrkBK.pl Unicode character database -lib/unicore/Is/LbrkCB.pl Unicode character database -lib/unicore/Is/LbrkCL.pl Unicode character database -lib/unicore/Is/LbrkCM.pl Unicode character database -lib/unicore/Is/LbrkCR.pl Unicode character database -lib/unicore/Is/LbrkEX.pl Unicode character database -lib/unicore/Is/LbrkGL.pl Unicode character database -lib/unicore/Is/LbrkHY.pl Unicode character database -lib/unicore/Is/LbrkID.pl Unicode character database -lib/unicore/Is/LbrkIN.pl Unicode character database -lib/unicore/Is/LbrkIS.pl Unicode character database -lib/unicore/Is/LbrkLF.pl Unicode character database -lib/unicore/Is/LbrkNS.pl Unicode character database -lib/unicore/Is/LbrkNU.pl Unicode character database -lib/unicore/Is/LbrkOP.pl Unicode character database -lib/unicore/Is/LbrkPO.pl Unicode character database -lib/unicore/Is/LbrkPR.pl Unicode character database -lib/unicore/Is/LbrkQU.pl Unicode character database -lib/unicore/Is/LbrkSA.pl Unicode character database -lib/unicore/Is/LbrkSG.pl Unicode character database -lib/unicore/Is/LbrkSP.pl Unicode character database -lib/unicore/Is/LbrkSY.pl Unicode character database -lib/unicore/Is/LbrkXX.pl Unicode character database -lib/unicore/Is/LbrkZW.pl Unicode character database -lib/unicore/Is/Ll.pl Unicode character database -lib/unicore/Is/Lm.pl Unicode character database -lib/unicore/Is/Lo.pl Unicode character database -lib/unicore/Is/Lower.pl Unicode character database -lib/unicore/Is/Lowercas.pl Unicode character database -lib/unicore/Is/Lt.pl Unicode character database -lib/unicore/Is/Lu.pl Unicode character database -lib/unicore/Is/M.pl Unicode character database -lib/unicore/Is/Malayala.pl Unicode character database -lib/unicore/Is/Math.pl Unicode character database -lib/unicore/Is/Mc.pl Unicode character database -lib/unicore/Is/Me.pl Unicode character database -lib/unicore/Is/Mirrored.pl Unicode character database -lib/unicore/Is/Mn.pl Unicode character database -lib/unicore/Is/Mongolia.pl Unicode character database -lib/unicore/Is/Myanmar.pl Unicode character database -lib/unicore/Is/N.pl Unicode character database -lib/unicore/Is/Nd.pl Unicode character database -lib/unicore/Is/Nl.pl Unicode character database -lib/unicore/Is/No.pl Unicode character database -lib/unicore/Is/Nonchara.pl Unicode character database -lib/unicore/Is/Ogham.pl Unicode character database -lib/unicore/Is/OldItali.pl Unicode character database -lib/unicore/Is/Oriya.pl Unicode character database -lib/unicore/Is/OtherAlp.pl Unicode character database -lib/unicore/Is/OtherLow.pl Unicode character database -lib/unicore/Is/OtherMat.pl Unicode character database -lib/unicore/Is/OtherUpp.pl Unicode character database -lib/unicore/Is/P.pl Unicode character database -lib/unicore/Is/Pc.pl Unicode character database -lib/unicore/Is/Pd.pl Unicode character database -lib/unicore/Is/Pe.pl Unicode character database -lib/unicore/Is/Pf.pl Unicode character database -lib/unicore/Is/Pi.pl Unicode character database -lib/unicore/Is/Po.pl Unicode character database -lib/unicore/Is/Print.pl Unicode character database -lib/unicore/Is/Ps.pl Unicode character database -lib/unicore/Is/Punct.pl Unicode character database -lib/unicore/Is/Quotatio.pl Unicode character database -lib/unicore/Is/Runic.pl Unicode character database -lib/unicore/Is/S.pl Unicode character database -lib/unicore/Is/Sc.pl Unicode character database -lib/unicore/Is/Sinhala.pl Unicode character database -lib/unicore/Is/Sk.pl Unicode character database -lib/unicore/Is/Sm.pl Unicode character database -lib/unicore/Is/So.pl Unicode character database -lib/unicore/Is/Space.pl Unicode character database -lib/unicore/Is/SpacePer.pl Unicode character database -lib/unicore/Is/Syriac.pl Unicode character database -lib/unicore/Is/Tamil.pl Unicode character database -lib/unicore/Is/Telugu.pl Unicode character database -lib/unicore/Is/Terminal.pl Unicode character database -lib/unicore/Is/Thaana.pl Unicode character database -lib/unicore/Is/Thai.pl Unicode character database -lib/unicore/Is/Tibetan.pl Unicode character database -lib/unicore/Is/Title.pl Unicode character database -lib/unicore/Is/Upper.pl Unicode character database -lib/unicore/Is/Uppercas.pl Unicode character database -lib/unicore/Is/WhiteSpa.pl Unicode character database -lib/unicore/Is/Word.pl Unicode character database -lib/unicore/Is/XDigit.pl Unicode character database -lib/unicore/Is/Yi.pl Unicode character database -lib/unicore/Is/Z.pl Unicode character database -lib/unicore/Is/Zl.pl Unicode character database -lib/unicore/Is/Zp.pl Unicode character database -lib/unicore/Is/Zs.pl Unicode character database lib/unicore/Jamo.txt Unicode character database lib/unicore/JamoShort.pl Unicode character database lib/unicore/Lbrk.pl Unicode character database +lib/unicore/lib/Alnum.pl Unicode character database +lib/unicore/lib/Alpha.pl Unicode character database +lib/unicore/lib/Alphabet.pl Unicode character database +lib/unicore/lib/Any.pl Unicode character database +lib/unicore/lib/Arabic.pl Unicode character database +lib/unicore/lib/Armenian.pl Unicode character database +lib/unicore/lib/ASCII.pl Unicode character database +lib/unicore/lib/AsciiHex.pl Unicode character database +lib/unicore/lib/Assigned.pl Unicode character database +lib/unicore/lib/Bengali.pl Unicode character database +lib/unicore/lib/BidiAL.pl Unicode character database +lib/unicore/lib/BidiAN.pl Unicode character database +lib/unicore/lib/BidiB.pl Unicode character database +lib/unicore/lib/BidiBN.pl Unicode character database +lib/unicore/lib/BidiCont.pl Unicode character database +lib/unicore/lib/BidiCS.pl Unicode character database +lib/unicore/lib/BidiEN.pl Unicode character database +lib/unicore/lib/BidiES.pl Unicode character database +lib/unicore/lib/BidiET.pl Unicode character database +lib/unicore/lib/BidiL.pl Unicode character database +lib/unicore/lib/BidiLRE.pl Unicode character database +lib/unicore/lib/BidiLRO.pl Unicode character database +lib/unicore/lib/BidiNSM.pl Unicode character database +lib/unicore/lib/BidiON.pl Unicode character database +lib/unicore/lib/BidiPDF.pl Unicode character database +lib/unicore/lib/BidiR.pl Unicode character database +lib/unicore/lib/BidiRLE.pl Unicode character database +lib/unicore/lib/BidiRLO.pl Unicode character database +lib/unicore/lib/BidiS.pl Unicode character database +lib/unicore/lib/BidiWS.pl Unicode character database +lib/unicore/lib/Blank.pl Unicode character database +lib/unicore/lib/Bopomofo.pl Unicode character database +lib/unicore/lib/C.pl Unicode character database +lib/unicore/lib/Canadian.pl Unicode character database +lib/unicore/lib/Canon.pl Unicode character database +lib/unicore/lib/Cc.pl Unicode character database +lib/unicore/lib/Cf.pl Unicode character database +lib/unicore/lib/Cherokee.pl Unicode character database +lib/unicore/lib/Cn.pl Unicode character database +lib/unicore/lib/Cntrl.pl Unicode character database +lib/unicore/lib/Co.pl Unicode character database +lib/unicore/lib/Common.pl Unicode character database +lib/unicore/lib/Compat.pl Unicode character database +lib/unicore/lib/Cs.pl Unicode character database +lib/unicore/lib/Cyrillic.pl Unicode character database +lib/unicore/lib/Dash.pl Unicode character database +lib/unicore/lib/DCcircle.pl Unicode character database +lib/unicore/lib/DCcompat.pl Unicode character database +lib/unicore/lib/DCfinal.pl Unicode character database +lib/unicore/lib/DCfont.pl Unicode character database +lib/unicore/lib/DCfracti.pl Unicode character database +lib/unicore/lib/DCinitia.pl Unicode character database +lib/unicore/lib/DCisolat.pl Unicode character database +lib/unicore/lib/DCmedial.pl Unicode character database +lib/unicore/lib/DCnarrow.pl Unicode character database +lib/unicore/lib/DCnoBrea.pl Unicode character database +lib/unicore/lib/DCsmall.pl Unicode character database +lib/unicore/lib/DCsquare.pl Unicode character database +lib/unicore/lib/DCsub.pl Unicode character database +lib/unicore/lib/DCsuper.pl Unicode character database +lib/unicore/lib/DCvertic.pl Unicode character database +lib/unicore/lib/DCwide.pl Unicode character database +lib/unicore/lib/Deseret.pl Unicode character database +lib/unicore/lib/Devanaga.pl Unicode character database +lib/unicore/lib/Diacriti.pl Unicode character database +lib/unicore/lib/Digit.pl Unicode character database +lib/unicore/lib/Ethiopic.pl Unicode character database +lib/unicore/lib/Extender.pl Unicode character database +lib/unicore/lib/Georgian.pl Unicode character database +lib/unicore/lib/Gothic.pl Unicode character database +lib/unicore/lib/Graph.pl Unicode character database +lib/unicore/lib/Greek.pl Unicode character database +lib/unicore/lib/Gujarati.pl Unicode character database +lib/unicore/lib/Gurmukhi.pl Unicode character database +lib/unicore/lib/Han.pl Unicode character database +lib/unicore/lib/Hangul.pl Unicode character database +lib/unicore/lib/Hebrew.pl Unicode character database +lib/unicore/lib/HexDigit.pl Unicode character database +lib/unicore/lib/Hiragana.pl Unicode character database +lib/unicore/lib/Hyphen.pl Unicode character database +lib/unicore/lib/IdContin.pl Unicode character database +lib/unicore/lib/Ideograp.pl Unicode character database +lib/unicore/lib/IdStart.pl Unicode character database +lib/unicore/lib/InAlphab.pl Unicode character database +lib/unicore/lib/InArabi2.pl Unicode character database +lib/unicore/lib/InArabi3.pl Unicode character database +lib/unicore/lib/InArabic.pl Unicode character database +lib/unicore/lib/InArmeni.pl Unicode character database +lib/unicore/lib/InArrows.pl Unicode character database +lib/unicore/lib/InBasicL.pl Unicode character database +lib/unicore/lib/InBengal.pl Unicode character database +lib/unicore/lib/InBlockE.pl Unicode character database +lib/unicore/lib/InBopom2.pl Unicode character database +lib/unicore/lib/InBopomo.pl Unicode character database +lib/unicore/lib/InBoxDra.pl Unicode character database +lib/unicore/lib/InBraill.pl Unicode character database +lib/unicore/lib/InByzant.pl Unicode character database +lib/unicore/lib/InCherok.pl Unicode character database +lib/unicore/lib/InCjkCo2.pl Unicode character database +lib/unicore/lib/InCjkCo3.pl Unicode character database +lib/unicore/lib/InCjkCo4.pl Unicode character database +lib/unicore/lib/InCjkCom.pl Unicode character database +lib/unicore/lib/InCjkRad.pl Unicode character database +lib/unicore/lib/InCjkSym.pl Unicode character database +lib/unicore/lib/InCjkUn2.pl Unicode character database +lib/unicore/lib/InCjkUn3.pl Unicode character database +lib/unicore/lib/InCjkUni.pl Unicode character database +lib/unicore/lib/InCombi2.pl Unicode character database +lib/unicore/lib/InCombi3.pl Unicode character database +lib/unicore/lib/InCombin.pl Unicode character database +lib/unicore/lib/InContro.pl Unicode character database +lib/unicore/lib/InCurren.pl Unicode character database +lib/unicore/lib/InCyrill.pl Unicode character database +lib/unicore/lib/InDesere.pl Unicode character database +lib/unicore/lib/InDevana.pl Unicode character database +lib/unicore/lib/InDingba.pl Unicode character database +lib/unicore/lib/InEnclo2.pl Unicode character database +lib/unicore/lib/InEnclos.pl Unicode character database +lib/unicore/lib/InEthiop.pl Unicode character database +lib/unicore/lib/InGenera.pl Unicode character database +lib/unicore/lib/InGeomet.pl Unicode character database +lib/unicore/lib/InGeorgi.pl Unicode character database +lib/unicore/lib/InGothic.pl Unicode character database +lib/unicore/lib/InGreek.pl Unicode character database +lib/unicore/lib/InGreekE.pl Unicode character database +lib/unicore/lib/InGujara.pl Unicode character database +lib/unicore/lib/InGurmuk.pl Unicode character database +lib/unicore/lib/InHalfwi.pl Unicode character database +lib/unicore/lib/InHangu2.pl Unicode character database +lib/unicore/lib/InHangu3.pl Unicode character database +lib/unicore/lib/InHangul.pl Unicode character database +lib/unicore/lib/InHebrew.pl Unicode character database +lib/unicore/lib/Inherite.pl Unicode character database +lib/unicore/lib/InHighPr.pl Unicode character database +lib/unicore/lib/InHighSu.pl Unicode character database +lib/unicore/lib/InHiraga.pl Unicode character database +lib/unicore/lib/InIdeogr.pl Unicode character database +lib/unicore/lib/InIpaExt.pl Unicode character database +lib/unicore/lib/InKanbun.pl Unicode character database +lib/unicore/lib/InKangxi.pl Unicode character database +lib/unicore/lib/InKannad.pl Unicode character database +lib/unicore/lib/InKataka.pl Unicode character database +lib/unicore/lib/InKhmer.pl Unicode character database +lib/unicore/lib/InLao.pl Unicode character database +lib/unicore/lib/InLatin1.pl Unicode character database +lib/unicore/lib/InLatin2.pl Unicode character database +lib/unicore/lib/InLatin3.pl Unicode character database +lib/unicore/lib/InLatinE.pl Unicode character database +lib/unicore/lib/InLetter.pl Unicode character database +lib/unicore/lib/InLowSur.pl Unicode character database +lib/unicore/lib/InMalaya.pl Unicode character database +lib/unicore/lib/InMathe2.pl Unicode character database +lib/unicore/lib/InMathem.pl Unicode character database +lib/unicore/lib/InMisce2.pl Unicode character database +lib/unicore/lib/InMiscel.pl Unicode character database +lib/unicore/lib/InMongol.pl Unicode character database +lib/unicore/lib/InMusica.pl Unicode character database +lib/unicore/lib/InMyanma.pl Unicode character database +lib/unicore/lib/InNumber.pl Unicode character database +lib/unicore/lib/InOgham.pl Unicode character database +lib/unicore/lib/InOldIta.pl Unicode character database +lib/unicore/lib/InOptica.pl Unicode character database +lib/unicore/lib/InOriya.pl Unicode character database +lib/unicore/lib/InPrivat.pl Unicode character database +lib/unicore/lib/InRunic.pl Unicode character database +lib/unicore/lib/InSinhal.pl Unicode character database +lib/unicore/lib/InSmallF.pl Unicode character database +lib/unicore/lib/InSpacin.pl Unicode character database +lib/unicore/lib/InSpecia.pl Unicode character database +lib/unicore/lib/InSupers.pl Unicode character database +lib/unicore/lib/InSyriac.pl Unicode character database +lib/unicore/lib/InTags.pl Unicode character database +lib/unicore/lib/InTamil.pl Unicode character database +lib/unicore/lib/InTelugu.pl Unicode character database +lib/unicore/lib/InThaana.pl Unicode character database +lib/unicore/lib/InThai.pl Unicode character database +lib/unicore/lib/InTibeta.pl Unicode character database +lib/unicore/lib/InUnifie.pl Unicode character database +lib/unicore/lib/InYiRadi.pl Unicode character database +lib/unicore/lib/InYiSyll.pl Unicode character database +lib/unicore/lib/JoinCont.pl Unicode character database +lib/unicore/lib/Kannada.pl Unicode character database +lib/unicore/lib/Katakana.pl Unicode character database +lib/unicore/lib/Khmer.pl Unicode character database +lib/unicore/lib/L.pl Unicode character database +lib/unicore/lib/Lao.pl Unicode character database +lib/unicore/lib/Latin.pl Unicode character database +lib/unicore/lib/Ll.pl Unicode character database +lib/unicore/lib/Lm.pl Unicode character database +lib/unicore/lib/Lo.pl Unicode character database +lib/unicore/lib/Lower.pl Unicode character database +lib/unicore/lib/Lowercas.pl Unicode character database +lib/unicore/lib/Lt.pl Unicode character database +lib/unicore/lib/Lu.pl Unicode character database +lib/unicore/lib/L_.pl Unicode character database +lib/unicore/lib/M.pl Unicode character database +lib/unicore/lib/Malayala.pl Unicode character database +lib/unicore/lib/Math.pl Unicode character database +lib/unicore/lib/Mc.pl Unicode character database +lib/unicore/lib/Me.pl Unicode character database +lib/unicore/lib/Mirrored.pl Unicode character database +lib/unicore/lib/Mn.pl Unicode character database +lib/unicore/lib/Mongolia.pl Unicode character database +lib/unicore/lib/Myanmar.pl Unicode character database +lib/unicore/lib/N.pl Unicode character database +lib/unicore/lib/Nd.pl Unicode character database +lib/unicore/lib/Nl.pl Unicode character database +lib/unicore/lib/No.pl Unicode character database +lib/unicore/lib/Nonchara.pl Unicode character database +lib/unicore/lib/Ogham.pl Unicode character database +lib/unicore/lib/OldItali.pl Unicode character database +lib/unicore/lib/Oriya.pl Unicode character database +lib/unicore/lib/OtherAlp.pl Unicode character database +lib/unicore/lib/OtherLow.pl Unicode character database +lib/unicore/lib/OtherMat.pl Unicode character database +lib/unicore/lib/OtherUpp.pl Unicode character database +lib/unicore/lib/P.pl Unicode character database +lib/unicore/lib/Pc.pl Unicode character database +lib/unicore/lib/Pd.pl Unicode character database +lib/unicore/lib/Pe.pl Unicode character database +lib/unicore/lib/Pf.pl Unicode character database +lib/unicore/lib/Pi.pl Unicode character database +lib/unicore/lib/Po.pl Unicode character database +lib/unicore/lib/Print.pl Unicode character database +lib/unicore/lib/Ps.pl Unicode character database +lib/unicore/lib/Punct.pl Unicode character database +lib/unicore/lib/Quotatio.pl Unicode character database +lib/unicore/lib/Runic.pl Unicode character database +lib/unicore/lib/S.pl Unicode character database +lib/unicore/lib/Sc.pl Unicode character database +lib/unicore/lib/Sinhala.pl Unicode character database +lib/unicore/lib/Sk.pl Unicode character database +lib/unicore/lib/Sm.pl Unicode character database +lib/unicore/lib/So.pl Unicode character database +lib/unicore/lib/Space.pl Unicode character database +lib/unicore/lib/SpacePer.pl Unicode character database +lib/unicore/lib/Syriac.pl Unicode character database +lib/unicore/lib/Tamil.pl Unicode character database +lib/unicore/lib/Telugu.pl Unicode character database +lib/unicore/lib/Terminal.pl Unicode character database +lib/unicore/lib/Thaana.pl Unicode character database +lib/unicore/lib/Thai.pl Unicode character database +lib/unicore/lib/Tibetan.pl Unicode character database +lib/unicore/lib/Title.pl Unicode character database +lib/unicore/lib/Upper.pl Unicode character database +lib/unicore/lib/Uppercas.pl Unicode character database +lib/unicore/lib/WhiteSpa.pl Unicode character database +lib/unicore/lib/Word.pl Unicode character database +lib/unicore/lib/XDigit.pl Unicode character database +lib/unicore/lib/Yi.pl Unicode character database +lib/unicore/lib/Z.pl Unicode character database +lib/unicore/lib/Zl.pl Unicode character database +lib/unicore/lib/Zp.pl Unicode character database +lib/unicore/lib/Zs.pl Unicode character database +lib/unicore/lib/_CanonDC.pl Unicode character database +lib/unicore/lib/_CaseIgn.pl Unicode character database +lib/unicore/lib/_CombAbo.pl Unicode character database lib/unicore/LineBrk.txt Unicode character database lib/unicore/Makefile Unicode character database lib/unicore/mktables Unicode character database generator diff --git a/lib/unicore/Canonical.pl b/lib/unicore/Canonical.pl new file mode 100644 index 0000000..ac1a660 --- /dev/null +++ b/lib/unicore/Canonical.pl @@ -0,0 +1,704 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by ./mktables from e.g. Unicode.txt. +# Any changes made here will be lost! + +## +## Data in this file used by ../utf8_heavy.pl +## + +## Mapping from lc(canonical name) to filename in ./lib +%utf8::Canonical = ( + alphabetic => 'Alphabet', + + # InAlphabeticPresentationForms + alphabeticpresentationforms => 'InAlphab', + + arabic => 'Arabic', + + # InArabicPresentationFormsA + arabicpresentationformsa => 'InArabi3', + + # InArabicPresentationFormsB + arabicpresentationformsb => 'InArabi2', + + armenian => 'Armenian', + + # InArrows + arrows => 'InArrows', + + # AsciiHexDigit + asciihexdigit => 'AsciiHex', + + # InBasicLatin + basiclatin => 'InBasicL', + + bengali => 'Bengali', + + # BidiControl + bidicontrol => 'BidiCont', + + # InBlockElements + blockelements => 'InBlockE', + + bopomofo => 'Bopomofo', + + # InBopomofoExtended + bopomofoextended => 'InBopom2', + + # InBoxDrawing + boxdrawing => 'InBoxDra', + + # InBraillePatterns + braillepatterns => 'InBraill', + + # InByzantineMusicalSymbols + byzantinemusicalsymbols => 'InByzant', + + # CanadianAboriginal + canadianaboriginal => 'Canadian', + + cherokee => 'Cherokee', + + # InCjkCompatibility + cjkcompatibility => 'InCjkCom', + + # InCjkCompatibilityForms + cjkcompatibilityforms => 'InCjkCo2', + + # InCjkCompatibilityIdeographs + cjkcompatibilityideographs => 'InCjkCo3', + + # InCjkCompatibilityIdeographsSupplement + cjkcompatibilityideographssupplement => 'InCjkCo4', + + # InCjkRadicalsSupplement + cjkradicalssupplement => 'InCjkRad', + + # InCjkSymbolsAndPunctuation + cjksymbolsandpunctuation => 'InCjkSym', + + # InCjkUnifiedIdeographs + cjkunifiedideographs => 'InCjkUni', + + # InCjkUnifiedIdeographsExtensionA + cjkunifiedideographsextensiona => 'InCjkUn3', + + # InCjkUnifiedIdeographsExtensionB + cjkunifiedideographsextensionb => 'InCjkUn2', + + # ClosePunctuation + closepunctuation => 'Pe', + + # InCombiningDiacriticalMarks + combiningdiacriticalmarks => 'InCombi3', + + # InCombiningHalfMarks + combininghalfmarks => 'InCombin', + + # InCombiningMarksForSymbols + combiningmarksforsymbols => 'InCombi2', + + common => 'Common', + + # ConnectorPunctuation + connectorpunctuation => 'Pc', + + control => 'Cc', + + # InControlPictures + controlpictures => 'InContro', + + # CurrencySymbol + currencysymbol => 'Sc', + + # InCurrencySymbols + currencysymbols => 'InCurren', + + cyrillic => 'Cyrillic', + dash => 'Dash', + + # DashPunctuation + dashpunctuation => 'Pd', + + # DecimalNumber + decimalnumber => 'Nd', + + deseret => 'Deseret', + devanagari => 'Devanaga', + diacritic => 'Diacriti', + + # InDingbats + dingbats => 'InDingba', + + # InEnclosedAlphanumerics + enclosedalphanumerics => 'InEnclos', + + # InEnclosedCjkLettersAndMonths + enclosedcjklettersandmonths => 'InEnclo2', + + # EnclosingMark + enclosingmark => 'Me', + + ethiopic => 'Ethiopic', + extender => 'Extender', + + # FinalPunctuation + finalpunctuation => 'Pf', + + format => 'Cf', + + # InGeneralPunctuation + generalpunctuation => 'InGenera', + + # InGeometricShapes + geometricshapes => 'InGeomet', + + georgian => 'Georgian', + gothic => 'Gothic', + greek => 'Greek', + + # InGreekExtended + greekextended => 'InGreekE', + + gujarati => 'Gujarati', + gurmukhi => 'Gurmukhi', + + # InHalfwidthAndFullwidthForms + halfwidthandfullwidthforms => 'InHalfwi', + + han => 'Han', + hangul => 'Hangul', + + # InHangulCompatibilityJamo + hangulcompatibilityjamo => 'InHangu3', + + # InHangulJamo + hanguljamo => 'InHangul', + + # InHangulSyllables + hangulsyllables => 'InHangu2', + + hebrew => 'Hebrew', + + # HexDigit + hexdigit => 'HexDigit', + + # InHighPrivateUseSurrogates + highprivateusesurrogates => 'InHighPr', + + # InHighSurrogates + highsurrogates => 'InHighSu', + + hiragana => 'Hiragana', + hyphen => 'Hyphen', + + # IdContinue + idcontinue => 'IdContin', + + ideographic => 'Ideograp', + + # InIdeographicDescriptionCharacters + ideographicdescriptioncharacters => 'InIdeogr', + + # IdStart + idstart => 'IdStart', + + # InAlphabeticPresentationForms + inalphabeticpresentationforms => 'InAlphab', + + # InArabic + inarabic => 'InArabic', + + # InArabicPresentationFormsA + inarabicpresentationformsa => 'InArabi3', + + # InArabicPresentationFormsB + inarabicpresentationformsb => 'InArabi2', + + # InArmenian + inarmenian => 'InArmeni', + + # InArrows + inarrows => 'InArrows', + + # InBasicLatin + inbasiclatin => 'InBasicL', + + # InBengali + inbengali => 'InBengal', + + # InBlockElements + inblockelements => 'InBlockE', + + # InBopomofo + inbopomofo => 'InBopomo', + + # InBopomofoExtended + inbopomofoextended => 'InBopom2', + + # InBoxDrawing + inboxdrawing => 'InBoxDra', + + # InBraillePatterns + inbraillepatterns => 'InBraill', + + # InByzantineMusicalSymbols + inbyzantinemusicalsymbols => 'InByzant', + + # InCherokee + incherokee => 'InCherok', + + # InCjkCompatibility + incjkcompatibility => 'InCjkCom', + + # InCjkCompatibilityForms + incjkcompatibilityforms => 'InCjkCo2', + + # InCjkCompatibilityIdeographs + incjkcompatibilityideographs => 'InCjkCo3', + + # InCjkCompatibilityIdeographsSupplement + incjkcompatibilityideographssupplement => 'InCjkCo4', + + # InCjkRadicalsSupplement + incjkradicalssupplement => 'InCjkRad', + + # InCjkSymbolsAndPunctuation + incjksymbolsandpunctuation => 'InCjkSym', + + # InCjkUnifiedIdeographs + incjkunifiedideographs => 'InCjkUni', + + # InCjkUnifiedIdeographsExtensionA + incjkunifiedideographsextensiona => 'InCjkUn3', + + # InCjkUnifiedIdeographsExtensionB + incjkunifiedideographsextensionb => 'InCjkUn2', + + # InCombiningDiacriticalMarks + incombiningdiacriticalmarks => 'InCombi3', + + # InCombiningHalfMarks + incombininghalfmarks => 'InCombin', + + # InCombiningMarksForSymbols + incombiningmarksforsymbols => 'InCombi2', + + # InControlPictures + incontrolpictures => 'InContro', + + # InCurrencySymbols + incurrencysymbols => 'InCurren', + + # InCyrillic + incyrillic => 'InCyrill', + + # InDeseret + indeseret => 'InDesere', + + # InDevanagari + indevanagari => 'InDevana', + + # InDingbats + indingbats => 'InDingba', + + # InEnclosedAlphanumerics + inenclosedalphanumerics => 'InEnclos', + + # InEnclosedCjkLettersAndMonths + inenclosedcjklettersandmonths => 'InEnclo2', + + # InEthiopic + inethiopic => 'InEthiop', + + # InGeneralPunctuation + ingeneralpunctuation => 'InGenera', + + # InGeometricShapes + ingeometricshapes => 'InGeomet', + + # InGeorgian + ingeorgian => 'InGeorgi', + + # InGothic + ingothic => 'InGothic', + + # InGreek + ingreek => 'InGreek', + + # InGreekExtended + ingreekextended => 'InGreekE', + + # InGujarati + ingujarati => 'InGujara', + + # InGurmukhi + ingurmukhi => 'InGurmuk', + + # InHalfwidthAndFullwidthForms + inhalfwidthandfullwidthforms => 'InHalfwi', + + # InHangulCompatibilityJamo + inhangulcompatibilityjamo => 'InHangu3', + + # InHangulJamo + inhanguljamo => 'InHangul', + + # InHangulSyllables + inhangulsyllables => 'InHangu2', + + # InHebrew + inhebrew => 'InHebrew', + + inherited => 'Inherite', + + # InHighPrivateUseSurrogates + inhighprivateusesurrogates => 'InHighPr', + + # InHighSurrogates + inhighsurrogates => 'InHighSu', + + # InHiragana + inhiragana => 'InHiraga', + + # InIdeographicDescriptionCharacters + inideographicdescriptioncharacters => 'InIdeogr', + + # InIpaExtensions + inipaextensions => 'InIpaExt', + + # InitialPunctuation + initialpunctuation => 'Pi', + + # InKanbun + inkanbun => 'InKanbun', + + # InKangxiRadicals + inkangxiradicals => 'InKangxi', + + # InKannada + inkannada => 'InKannad', + + # InKatakana + inkatakana => 'InKataka', + + # InKhmer + inkhmer => 'InKhmer', + + # InLao + inlao => 'InLao', + + # InLatin1Supplement + inlatin1supplement => 'InLatin1', + + # InLatinExtendedA + inlatinextendeda => 'InLatin2', + + # InLatinExtendedAdditional + inlatinextendedadditional => 'InLatin3', + + # InLatinExtendedB + inlatinextendedb => 'InLatinE', + + # InLetterlikeSymbols + inletterlikesymbols => 'InLetter', + + # InLowSurrogates + inlowsurrogates => 'InLowSur', + + # InMalayalam + inmalayalam => 'InMalaya', + + # InMathematicalAlphanumericSymbols + inmathematicalalphanumericsymbols => 'InMathe2', + + # InMathematicalOperators + inmathematicaloperators => 'InMathem', + + # InMiscellaneousSymbols + inmiscellaneoussymbols => 'InMiscel', + + # InMiscellaneousTechnical + inmiscellaneoustechnical => 'InMisce2', + + # InMongolian + inmongolian => 'InMongol', + + # InMusicalSymbols + inmusicalsymbols => 'InMusica', + + # InMyanmar + inmyanmar => 'InMyanma', + + # InNumberForms + innumberforms => 'InNumber', + + # InOgham + inogham => 'InOgham', + + # InOldItalic + inolditalic => 'InOldIta', + + # InOpticalCharacterRecognition + inopticalcharacterrecognition => 'InOptica', + + # InOriya + inoriya => 'InOriya', + + # InPrivateUse + inprivateuse => 'InPrivat', + + # InRunic + inrunic => 'InRunic', + + # InSinhala + insinhala => 'InSinhal', + + # InSmallFormVariants + insmallformvariants => 'InSmallF', + + # InSpacingModifierLetters + inspacingmodifierletters => 'InSpacin', + + # InSpecials + inspecials => 'InSpecia', + + # InSuperscriptsAndSubscripts + insuperscriptsandsubscripts => 'InSupers', + + # InSyriac + insyriac => 'InSyriac', + + # InTags + intags => 'InTags', + + # InTamil + intamil => 'InTamil', + + # InTelugu + intelugu => 'InTelugu', + + # InThaana + inthaana => 'InThaana', + + # InThai + inthai => 'InThai', + + # InTibetan + intibetan => 'InTibeta', + + # InUnifiedCanadianAboriginalSyllabics + inunifiedcanadianaboriginalsyllabics => 'InUnifie', + + # InYiRadicals + inyiradicals => 'InYiRadi', + + # InYiSyllables + inyisyllables => 'InYiSyll', + + # InIpaExtensions + ipaextensions => 'InIpaExt', + + # JoinControl + joincontrol => 'JoinCont', + + # InKanbun + kanbun => 'InKanbun', + + # InKangxiRadicals + kangxiradicals => 'InKangxi', + + kannada => 'Kannada', + katakana => 'Katakana', + khmer => 'Khmer', + lao => 'Lao', + latin => 'Latin', + + # InLatin1Supplement + latin1supplement => 'InLatin1', + + # InLatinExtendedA + latinextendeda => 'InLatin2', + + # InLatinExtendedAdditional + latinextendedadditional => 'InLatin3', + + # InLatinExtendedB + latinextendedb => 'InLatinE', + + letter => 'L', + + # InLetterlikeSymbols + letterlikesymbols => 'InLetter', + + # LetterNumber + letternumber => 'Nl', + + # LineSeparator + lineseparator => 'Zl', + + lowercase => 'Lowercas', + + # LowercaseLetter + lowercaseletter => 'Ll', + + # InLowSurrogates + lowsurrogates => 'InLowSur', + + malayalam => 'Malayala', + mark => 'M', + math => 'Math', + + # InMathematicalAlphanumericSymbols + mathematicalalphanumericsymbols => 'InMathe2', + + # InMathematicalOperators + mathematicaloperators => 'InMathem', + + # MathSymbol + mathsymbol => 'Sm', + + # InMiscellaneousSymbols + miscellaneoussymbols => 'InMiscel', + + # InMiscellaneousTechnical + miscellaneoustechnical => 'InMisce2', + + # ModifierLetter + modifierletter => 'Lm', + + # ModifierSymbol + modifiersymbol => 'Sk', + + mongolian => 'Mongolia', + + # InMusicalSymbols + musicalsymbols => 'InMusica', + + myanmar => 'Myanmar', + + # NoncharacterCodePoint + noncharactercodepoint => 'Nonchara', + + # NonSpacingMark + nonspacingmark => 'Mn', + + number => 'N', + + # InNumberForms + numberforms => 'InNumber', + + ogham => 'Ogham', + + # OldItalic + olditalic => 'OldItali', + + # OpenPunctuation + openpunctuation => 'Ps', + + # InOpticalCharacterRecognition + opticalcharacterrecognition => 'InOptica', + + oriya => 'Oriya', + other => 'C', + + # OtherAlphabetic + otheralphabetic => 'OtherAlp', + + # OtherLetter + otherletter => 'Lo', + + # OtherLowercase + otherlowercase => 'OtherLow', + + # OtherMath + othermath => 'OtherMat', + + # OtherNumber + othernumber => 'No', + + # OtherPunctuation + otherpunctuation => 'Po', + + # OtherSymbol + othersymbol => 'So', + + # OtherUppercase + otheruppercase => 'OtherUpp', + + # ParagraphSeparator + paragraphseparator => 'Zp', + + # PrivateUse + privateuse => 'Co', + + punctuation => 'P', + + # QuotationMark + quotationmark => 'Quotatio', + + runic => 'Runic', + separator => 'Z', + sinhala => 'Sinhala', + + # InSmallFormVariants + smallformvariants => 'InSmallF', + + # SpaceSeparator + spaceseparator => 'Zs', + + # SpacingMark + spacingmark => 'Mc', + + # InSpacingModifierLetters + spacingmodifierletters => 'InSpacin', + + # InSpecials + specials => 'InSpecia', + + # InSuperscriptsAndSubscripts + superscriptsandsubscripts => 'InSupers', + + surrogate => 'Cs', + symbol => 'S', + syriac => 'Syriac', + + # InTags + tags => 'InTags', + + tamil => 'Tamil', + telugu => 'Telugu', + + # TerminalPunctuation + terminalpunctuation => 'Terminal', + + thaana => 'Thaana', + thai => 'Thai', + tibetan => 'Tibetan', + + # TitlecaseLetter + titlecaseletter => 'Lt', + + unassigned => 'Cn', + + # InUnifiedCanadianAboriginalSyllabics + unifiedcanadianaboriginalsyllabics => 'InUnifie', + + uppercase => 'Uppercas', + + # UppercaseLetter + uppercaseletter => 'Lu', + + # WhiteSpace + whitespace => 'WhiteSpa', + + yi => 'Yi', + + # InYiRadicals + yiradicals => 'InYiRadi', + + # InYiSyllables + yisyllables => 'InYiSyll', + +); +1 diff --git a/lib/unicore/Exact.pl b/lib/unicore/Exact.pl new file mode 100644 index 0000000..c72557a --- /dev/null +++ b/lib/unicore/Exact.pl @@ -0,0 +1,110 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by ./mktables from e.g. Unicode.txt. +# Any changes made here will be lost! + +## +## Data in this file used by ../utf8_heavy.pl +## + +## Mapping from name to filename in ./lib +%utf8::Exact = ( + ASCII => 'ASCII', + All => 'Any', + Alnum => 'Alnum', + Alpha => 'Alpha', + Any => 'Any', + Assigned => 'Assigned', + BidiAL => 'BidiAL', + BidiAN => 'BidiAN', + BidiB => 'BidiB', + BidiBN => 'BidiBN', + BidiCS => 'BidiCS', + BidiEN => 'BidiEN', + BidiES => 'BidiES', + BidiET => 'BidiET', + BidiL => 'BidiL', + BidiLRE => 'BidiLRE', + BidiLRO => 'BidiLRO', + BidiNSM => 'BidiNSM', + BidiON => 'BidiON', + BidiPDF => 'BidiPDF', + BidiR => 'BidiR', + BidiRLE => 'BidiRLE', + BidiRLO => 'BidiRLO', + BidiS => 'BidiS', + BidiWS => 'BidiWS', + Blank => 'Blank', + C => 'C', + Canon => 'Canon', + Cc => 'Cc', + Cf => 'Cf', + Cn => 'Cn', + Cntrl => 'Cntrl', + Co => 'Co', + Compat => 'Compat', + Cs => 'Cs', + DCcircle => 'DCcircle', + DCcompat => 'DCcompat', + DCfinal => 'DCfinal', + DCfont => 'DCfont', + DCfraction => 'DCfracti', + DCinitial => 'DCinitia', + DCisolated => 'DCisolat', + DCmedial => 'DCmedial', + DCnarrow => 'DCnarrow', + DCnoBreak => 'DCnoBrea', + DCsmall => 'DCsmall', + DCsquare => 'DCsquare', + DCsub => 'DCsub', + DCsuper => 'DCsuper', + DCvertical => 'DCvertic', + DCwide => 'DCwide', + Digit => 'Digit', + Graph => 'Graph', + L => 'L', +'L&' => 'L_', + Ll => 'Ll', + Lm => 'Lm', + Lo => 'Lo', + Lower => 'Lower', + Lt => 'Lt', + Lu => 'Lu', + M => 'M', + Mc => 'Mc', + Me => 'Me', + Mirrored => 'Mirrored', + Mn => 'Mn', + N => 'N', + Nd => 'Nd', + Nl => 'Nl', + No => 'No', + P => 'P', + Pc => 'Pc', + Pd => 'Pd', + Pe => 'Pe', + Pf => 'Pf', + Pi => 'Pi', + Po => 'Po', + Print => 'Print', + Ps => 'Ps', + Punct => 'Punct', + S => 'S', + Sc => 'Sc', + Sk => 'Sk', + Sm => 'Sm', + So => 'So', + Space => 'Space', + SpacePerl => 'SpacePer', + Title => 'Title', + Upper => 'Upper', + Word => 'Word', + XDigit => 'XDigit', + Z => 'Z', + Zl => 'Zl', + Zp => 'Zp', + Zs => 'Zs', + _CanonDCIJ => '_CanonDC', + _CaseIgnorable => '_CaseIgn', + _CombAbove => '_CombAbo', +); +1; diff --git a/lib/unicore/In.pl b/lib/unicore/In.pl deleted file mode 100644 index 00f16e1..0000000 --- a/lib/unicore/In.pl +++ /dev/null @@ -1,219 +0,0 @@ -# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by ./mktables from e.g. Unicode.txt. -# Any changes made here will be lost! - -## -## Data in this file used by ../utf8_heavy.pl -## - -## Mapping from name to filename in ./In -%utf8::In = ( -); - -## Mappings from regex to filename in ./In/ -%utf8::InPat = ( - 'al' => { - 'Alphabetic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms' => 'Alphabet', - }, - 'ar' => { - 'Arabic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms(?:[-_]|\s+)?A' => 'ArabicP2', - 'Armenian' => 'Armenian', - 'Arabic' => 'Arabic', - 'Arrows' => 'Arrows', - 'Arabic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms(?:[-_]|\s+)?B' => 'ArabicPr', - }, - 'ba' => { - 'Basic(?:[-_]|\s+)?Latin' => 'BasicLat', - }, - 'be' => { - 'Bengali' => 'Bengali', - }, - 'bl' => { - 'Block(?:[-_]|\s+)?Elements' => 'BlockEle', - }, - 'bo' => { - 'Bopomofo(?:[-_]|\s+)?Extended' => 'Bopomof2', - 'Box(?:[-_]|\s+)?Drawing' => 'BoxDrawi', - 'Bopomofo' => 'Bopomofo', - }, - 'br' => { - 'Braille(?:[-_]|\s+)?Patterns' => 'BrailleP', - }, - 'by' => { - 'Byzantine(?:[-_]|\s+)?Musical(?:[-_]|\s+)?Symbols' => 'Byzantin', - }, - 'ch' => { - 'Cherokee' => 'Cherokee', - }, - 'cj' => { - 'Cjk(?:[-_]|\s+)?Unified(?:[-_]|\s+)?Ideographs(?:[-_]|\s+)?Extension(?:[-_]|\s+)?A' => 'CjkUnif2', - 'Cjk(?:[-_]|\s+)?Radicals(?:[-_]|\s+)?Supplement' => 'CjkRadic', - 'Cjk(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Ideographs' => 'CjkComp3', - 'Cjk(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Ideographs(?:[-_]|\s+)?Supplement' => 'CjkComp4', - 'Cjk(?:[-_]|\s+)?Compatibility' => 'CjkCompa', - 'Cjk(?:[-_]|\s+)?Unified(?:[-_]|\s+)?Ideographs' => 'CjkUnifi', - 'Cjk(?:[-_]|\s+)?Symbols(?:[-_]|\s+)?And(?:[-_]|\s+)?Punctuation' => 'CjkSymbo', - 'Cjk(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Forms' => 'CjkComp2', - 'Cjk(?:[-_]|\s+)?Unified(?:[-_]|\s+)?Ideographs(?:[-_]|\s+)?Extension(?:[-_]|\s+)?B' => 'CjkUnif3', - }, - 'co' => { - 'Combining(?:[-_]|\s+)?Diacritical(?:[-_]|\s+)?Marks' => 'Combini2', - 'Combining(?:[-_]|\s+)?Half(?:[-_]|\s+)?Marks' => 'Combinin', - 'Control(?:[-_]|\s+)?Pictures' => 'ControlP', - 'Combining(?:[-_]|\s+)?Marks(?:[-_]|\s+)?For(?:[-_]|\s+)?Symbols' => 'Combini3', - }, - 'cu' => { - 'Currency(?:[-_]|\s+)?Symbols' => 'Currency', - }, - 'cy' => { - 'Cyrillic' => 'Cyrillic', - }, - 'de' => { - 'Deseret' => 'Deseret', - 'Devanagari' => 'Devanaga', - }, - 'di' => { - 'Dingbats' => 'Dingbats', - }, - 'en' => { - 'Enclosed(?:[-_]|\s+)?Alphanumerics' => 'Enclosed', - 'Enclosed(?:[-_]|\s+)?Cjk(?:[-_]|\s+)?Letters(?:[-_]|\s+)?And(?:[-_]|\s+)?Months' => 'Enclose2', - }, - 'et' => { - 'Ethiopic' => 'Ethiopic', - }, - 'ge' => { - 'Geometric(?:[-_]|\s+)?Shapes' => 'Geometri', - 'General(?:[-_]|\s+)?Punctuation' => 'GeneralP', - 'Georgian' => 'Georgian', - }, - 'go' => { - 'Gothic' => 'Gothic', - }, - 'gr' => { - 'Greek(?:[-_]|\s+)?Extended' => 'GreekExt', - 'Greek' => 'Greek', - }, - 'gu' => { - 'Gujarati' => 'Gujarati', - 'Gurmukhi' => 'Gurmukhi', - }, - 'ha' => { - 'Hangul(?:[-_]|\s+)?Syllables' => 'HangulSy', - 'Hangul(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Jamo' => 'HangulCo', - 'Halfwidth(?:[-_]|\s+)?And(?:[-_]|\s+)?Fullwidth(?:[-_]|\s+)?Forms' => 'Halfwidt', - 'Hangul(?:[-_]|\s+)?Jamo' => 'HangulJa', - }, - 'he' => { - 'Hebrew' => 'Hebrew', - }, - 'hi' => { - 'High(?:[-_]|\s+)?Surrogates' => 'HighSurr', - 'Hiragana' => 'Hiragana', - 'High(?:[-_]|\s+)?Private(?:[-_]|\s+)?Use(?:[-_]|\s+)?Surrogates' => 'HighPriv', - }, - 'id' => { - 'Ideographic(?:[-_]|\s+)?Description(?:[-_]|\s+)?Characters' => 'Ideograp', - }, - 'ip' => { - 'Ipa(?:[-_]|\s+)?Extensions' => 'IpaExten', - }, - 'ka' => { - 'Kannada' => 'Kannada', - 'Kanbun' => 'Kanbun', - 'Kangxi(?:[-_]|\s+)?Radicals' => 'KangxiRa', - 'Katakana' => 'Katakana', - }, - 'kh' => { - 'Khmer' => 'Khmer', - }, - 'la' => { - 'Lao' => 'Lao', - 'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?B' => 'LatinExt', - 'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?A' => 'LatinEx2', - 'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?Additional' => 'LatinEx3', - 'Latin(?:[-_]|\s+)?1(?:[-_]|\s+)?Supplement' => 'Latin1Su', - }, - 'le' => { - 'Letterlike(?:[-_]|\s+)?Symbols' => 'Letterli', - }, - 'lo' => { - 'Low(?:[-_]|\s+)?Surrogates' => 'LowSurro', - }, - 'ma' => { - 'Mathematical(?:[-_]|\s+)?Operators' => 'Mathemat', - 'Mathematical(?:[-_]|\s+)?Alphanumeric(?:[-_]|\s+)?Symbols' => 'Mathema2', - 'Malayalam' => 'Malayala', - }, - 'mi' => { - 'Miscellaneous(?:[-_]|\s+)?Technical' => 'Miscell2', - 'Miscellaneous(?:[-_]|\s+)?Symbols' => 'Miscella', - }, - 'mo' => { - 'Mongolian' => 'Mongolia', - }, - 'mu' => { - 'Musical(?:[-_]|\s+)?Symbols' => 'MusicalS', - }, - 'my' => { - 'Myanmar' => 'Myanmar', - }, - 'nu' => { - 'Number(?:[-_]|\s+)?Forms' => 'NumberFo', - }, - 'og' => { - 'Ogham' => 'Ogham', - }, - 'ol' => { - 'Old(?:[-_]|\s+)?Italic' => 'OldItali', - }, - 'op' => { - 'Optical(?:[-_]|\s+)?Character(?:[-_]|\s+)?Recognition' => 'OpticalC', - }, - 'or' => { - 'Oriya' => 'Oriya', - }, - 'pr' => { - 'Private(?:[-_]|\s+)?Use' => 'PrivateU', - }, - 'ru' => { - 'Runic' => 'Runic', - }, - 'si' => { - 'Sinhala' => 'Sinhala', - }, - 'sm' => { - 'Small(?:[-_]|\s+)?Form(?:[-_]|\s+)?Variants' => 'SmallFor', - }, - 'sp' => { - 'Spacing(?:[-_]|\s+)?Modifier(?:[-_]|\s+)?Letters' => 'SpacingM', - 'Specials' => 'Specials', - }, - 'su' => { - 'Superscripts(?:[-_]|\s+)?And(?:[-_]|\s+)?Subscripts' => 'Superscr', - }, - 'sy' => { - 'Syriac' => 'Syriac', - }, - 'ta' => { - 'Tamil' => 'Tamil', - 'Tags' => 'Tags', - }, - 'te' => { - 'Telugu' => 'Telugu', - }, - 'th' => { - 'Thaana' => 'Thaana', - 'Thai' => 'Thai', - }, - 'ti' => { - 'Tibetan' => 'Tibetan', - }, - 'un' => { - 'Unified(?:[-_]|\s+)?Canadian(?:[-_]|\s+)?Aboriginal(?:[-_]|\s+)?Syllabics' => 'UnifiedC', - }, - 'yi' => { - 'Yi(?:[-_]|\s+)?Syllables' => 'YiSyllab', - 'Yi(?:[-_]|\s+)?Radicals' => 'YiRadica', - }, -); diff --git a/lib/unicore/Is.pl b/lib/unicore/Is.pl deleted file mode 100644 index 91debee..0000000 --- a/lib/unicore/Is.pl +++ /dev/null @@ -1,375 +0,0 @@ -# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by ./mktables from e.g. Unicode.txt. -# Any changes made here will be lost! - -## -## Data in this file used by ../utf8_heavy.pl -## - -## Mapping from name to filename in ./Is -%utf8::Is = ( - 'ASCII' => 'ASCII', - 'Alnum' => 'Alnum', - 'Alpha' => 'Alpha', - 'BidiAL' => 'BidiAL', - 'BidiAN' => 'BidiAN', - 'BidiB' => 'BidiB', - 'BidiBN' => 'BidiBN', - 'BidiCS' => 'BidiCS', - 'BidiEN' => 'BidiEN', - 'BidiES' => 'BidiES', - 'BidiET' => 'BidiET', - 'BidiL' => 'BidiL', - 'BidiLRE' => 'BidiLRE', - 'BidiLRO' => 'BidiLRO', - 'BidiNSM' => 'BidiNSM', - 'BidiON' => 'BidiON', - 'BidiPDF' => 'BidiPDF', - 'BidiR' => 'BidiR', - 'BidiRLE' => 'BidiRLE', - 'BidiRLO' => 'BidiRLO', - 'BidiS' => 'BidiS', - 'BidiWS' => 'BidiWS', - 'Blank' => 'Blank', - 'C' => 'C', - 'Canon' => 'Canon', - 'Cc' => 'Cc', - 'Cf' => 'Cf', - 'Cn' => 'Cn', - 'Cntrl' => 'Cntrl', - 'Co' => 'Co', - 'Compat' => 'Compat', - 'Cs' => 'Cs', - 'DCcircle' => 'DCcircle', - 'DCcompat' => 'DCcompat', - 'DCfinal' => 'DCfinal', - 'DCfont' => 'DCfont', - 'DCfraction' => 'DCfracti', - 'DCinitial' => 'DCinitia', - 'DCisolated' => 'DCisolat', - 'DCmedial' => 'DCmedial', - 'DCnarrow' => 'DCnarrow', - 'DCnoBreak' => 'DCnoBrea', - 'DCsmall' => 'DCsmall', - 'DCsquare' => 'DCsquare', - 'DCsub' => 'DCsub', - 'DCsuper' => 'DCsuper', - 'DCvertical' => 'DCvertic', - 'DCwide' => 'DCwide', - 'Digit' => 'Digit', - 'Graph' => 'Graph', - 'L' => 'L', - 'L&' => 'L_', - 'LbrkAI' => 'LbrkAI', - 'LbrkAL' => 'LbrkAL', - 'LbrkB2' => 'LbrkB2', - 'LbrkBA' => 'LbrkBA', - 'LbrkBB' => 'LbrkBB', - 'LbrkBK' => 'LbrkBK', - 'LbrkCB' => 'LbrkCB', - 'LbrkCL' => 'LbrkCL', - 'LbrkCM' => 'LbrkCM', - 'LbrkCR' => 'LbrkCR', - 'LbrkEX' => 'LbrkEX', - 'LbrkGL' => 'LbrkGL', - 'LbrkHY' => 'LbrkHY', - 'LbrkID' => 'LbrkID', - 'LbrkIN' => 'LbrkIN', - 'LbrkIS' => 'LbrkIS', - 'LbrkLF' => 'LbrkLF', - 'LbrkNS' => 'LbrkNS', - 'LbrkNU' => 'LbrkNU', - 'LbrkOP' => 'LbrkOP', - 'LbrkPO' => 'LbrkPO', - 'LbrkPR' => 'LbrkPR', - 'LbrkQU' => 'LbrkQU', - 'LbrkSA' => 'LbrkSA', - 'LbrkSG' => 'LbrkSG', - 'LbrkSP' => 'LbrkSP', - 'LbrkSY' => 'LbrkSY', - 'LbrkXX' => 'LbrkXX', - 'LbrkZW' => 'LbrkZW', - 'Ll' => 'Ll', - 'Lm' => 'Lm', - 'Lo' => 'Lo', - 'Lower' => 'Lower', - 'Lt' => 'Lt', - 'Lu' => 'Lu', - 'M' => 'M', - 'Mc' => 'Mc', - 'Me' => 'Me', - 'Mirrored' => 'Mirrored', - 'Mn' => 'Mn', - 'N' => 'N', - 'Nd' => 'Nd', - 'Nl' => 'Nl', - 'No' => 'No', - 'P' => 'P', - 'Pc' => 'Pc', - 'Pd' => 'Pd', - 'Pe' => 'Pe', - 'Pf' => 'Pf', - 'Pi' => 'Pi', - 'Po' => 'Po', - 'Print' => 'Print', - 'Ps' => 'Ps', - 'Punct' => 'Punct', - 'S' => 'S', - 'Sc' => 'Sc', - 'Sk' => 'Sk', - 'Sm' => 'Sm', - 'So' => 'So', - 'Space' => 'Space', - 'SpacePerl' => 'SpacePer', - 'Title' => 'Title', - 'Upper' => 'Upper', - 'Word' => 'Word', - 'XDigit' => 'XDigit', - 'Z' => 'Z', - 'Zl' => 'Zl', - 'Zp' => 'Zp', - 'Zs' => 'Zs', - '_CanonDCIJ' => '_CanonDC', - '_CaseIgnorable' => '_CaseIgn', - '_CombAbove' => '_CombAbo', -); - -## Mappings from regex to filename in ./Is/ -%utf8::IsPat = ( - 'al' => { - 'All' => 'Any', - 'Alphabetic' => 'Alphabet', - }, - 'an' => { - 'Any' => 'Any', - }, - 'ar' => { - 'Armenian' => 'Armenian', - 'Arabic' => 'Arabic', - }, - 'as' => { - 'Ascii(?:[-_]|\s+)?Hex(?:[-_]|\s+)?Digit' => 'AsciiHex', - 'Assigned' => 'Assigned', - }, - 'be' => { - 'Bengali' => 'Bengali', - }, - 'bi' => { - 'Bidi(?:[-_]|\s+)?Control' => 'BidiCont', - }, - 'bo' => { - 'Bopomofo' => 'Bopomofo', - }, - 'ca' => { - 'Canadian(?:[-_]|\s+)?Aboriginal' => 'Canadian', - }, - 'ch' => { - 'Cherokee' => 'Cherokee', - }, - 'cl' => { - 'Close(?:[-_]|\s+)?Punctuation' => 'Pe', - }, - 'co' => { - 'Control' => 'Cc', - 'Common' => 'Common', - 'Connector(?:[-_]|\s+)?Punctuation' => 'Pc', - }, - 'cu' => { - 'Currency(?:[-_]|\s+)?Symbol' => 'Sc', - }, - 'cy' => { - 'Cyrillic' => 'Cyrillic', - }, - 'da' => { - 'Dash(?:[-_]|\s+)?Punctuation' => 'Pd', - 'Dash' => 'Dash', - }, - 'de' => { - 'Deseret' => 'Deseret', - 'Devanagari' => 'Devanaga', - 'Decimal(?:[-_]|\s+)?Number' => 'Nd', - }, - 'di' => { - 'Diacritic' => 'Diacriti', - }, - 'en' => { - 'Enclosing(?:[-_]|\s+)?Mark' => 'Me', - }, - 'et' => { - 'Ethiopic' => 'Ethiopic', - }, - 'ex' => { - 'Extender' => 'Extender', - }, - 'fi' => { - 'Final(?:[-_]|\s+)?Punctuation' => 'Pf', - }, - 'fo' => { - 'Format' => 'Cf', - }, - 'ge' => { - 'Georgian' => 'Georgian', - }, - 'go' => { - 'Gothic' => 'Gothic', - }, - 'gr' => { - 'Greek' => 'Greek', - }, - 'gu' => { - 'Gujarati' => 'Gujarati', - 'Gurmukhi' => 'Gurmukhi', - }, - 'ha' => { - 'Hangul' => 'Hangul', - 'Han' => 'Han', - }, - 'he' => { - 'Hebrew' => 'Hebrew', - 'Hex(?:[-_]|\s+)?Digit' => 'HexDigit', - }, - 'hi' => { - 'Hiragana' => 'Hiragana', - }, - 'hy' => { - 'Hyphen' => 'Hyphen', - }, - 'id' => { - 'Ideographic' => 'Ideograp', - 'Id(?:[-_]|\s+)?Continue' => 'IdContin', - 'Id(?:[-_]|\s+)?Start' => 'IdStart', - }, - 'in' => { - 'Inherited' => 'Inherite', - 'Initial(?:[-_]|\s+)?Punctuation' => 'Pi', - }, - 'jo' => { - 'Join(?:[-_]|\s+)?Control' => 'JoinCont', - }, - 'ka' => { - 'Kannada' => 'Kannada', - 'Katakana' => 'Katakana', - }, - 'kh' => { - 'Khmer' => 'Khmer', - }, - 'la' => { - 'Lao' => 'Lao', - 'Latin' => 'Latin', - }, - 'le' => { - 'Letter(?:[-_]|\s+)?Number' => 'Nl', - 'Letter' => 'L', - }, - 'li' => { - 'Line(?:[-_]|\s+)?Separator' => 'Zl', - }, - 'lo' => { - 'Lowercase' => 'Lowercas', - 'Lowercase(?:[-_]|\s+)?Letter' => 'Ll', - }, - 'ma' => { - 'Math' => 'Math', - 'Malayalam' => 'Malayala', - 'Mark' => 'M', - 'Math(?:[-_]|\s+)?Symbol' => 'Sm', - }, - 'mo' => { - 'Modifier(?:[-_]|\s+)?Symbol' => 'Sk', - 'Mongolian' => 'Mongolia', - 'Modifier(?:[-_]|\s+)?Letter' => 'Lm', - }, - 'my' => { - 'Myanmar' => 'Myanmar', - }, - 'no' => { - 'Noncharacter(?:[-_]|\s+)?Code(?:[-_]|\s+)?Point' => 'Nonchara', - 'Non(?:[-_]|\s+)?Spacing(?:[-_]|\s+)?Mark' => 'Mn', - }, - 'nu' => { - 'Number' => 'N', - }, - 'og' => { - 'Ogham' => 'Ogham', - }, - 'ol' => { - 'Old(?:[-_]|\s+)?Italic' => 'OldItali', - }, - 'op' => { - 'Open(?:[-_]|\s+)?Punctuation' => 'Ps', - }, - 'or' => { - 'Oriya' => 'Oriya', - }, - 'ot' => { - 'Other(?:[-_]|\s+)?Punctuation' => 'Po', - 'Other(?:[-_]|\s+)?Uppercase' => 'OtherUpp', - 'Other(?:[-_]|\s+)?Alphabetic' => 'OtherAlp', - 'Other(?:[-_]|\s+)?Symbol' => 'So', - 'Other(?:[-_]|\s+)?Number' => 'No', - 'Other' => 'C', - 'Other(?:[-_]|\s+)?Math' => 'OtherMat', - 'Other(?:[-_]|\s+)?Letter' => 'Lo', - 'Other(?:[-_]|\s+)?Lowercase' => 'OtherLow', - }, - 'pa' => { - 'Paragraph(?:[-_]|\s+)?Separator' => 'Zp', - }, - 'pr' => { - 'Private(?:[-_]|\s+)?Use' => 'Co', - }, - 'pu' => { - 'Punctuation' => 'P', - }, - 'qu' => { - 'Quotation(?:[-_]|\s+)?Mark' => 'Quotatio', - }, - 'ru' => { - 'Runic' => 'Runic', - }, - 'se' => { - 'Separator' => 'Z', - }, - 'si' => { - 'Sinhala' => 'Sinhala', - }, - 'sp' => { - 'Space(?:[-_]|\s+)?Separator' => 'Zs', - 'Spacing(?:[-_]|\s+)?Mark' => 'Mc', - }, - 'su' => { - 'Surrogate' => 'Cs', - }, - 'sy' => { - 'Syriac' => 'Syriac', - 'Symbol' => 'S', - }, - 'ta' => { - 'Tamil' => 'Tamil', - }, - 'te' => { - 'Telugu' => 'Telugu', - 'Terminal(?:[-_]|\s+)?Punctuation' => 'Terminal', - }, - 'th' => { - 'Thaana' => 'Thaana', - 'Thai' => 'Thai', - }, - 'ti' => { - 'Tibetan' => 'Tibetan', - 'Titlecase(?:[-_]|\s+)?Letter' => 'Lt', - }, - 'un' => { - 'Unassigned' => 'Cn', - }, - 'up' => { - 'Uppercase' => 'Uppercas', - 'Uppercase(?:[-_]|\s+)?Letter' => 'Lu', - }, - 'wh' => { - 'White(?:[-_]|\s+)?Space' => 'WhiteSpa', - }, - 'yi' => { - 'Yi' => 'Yi', - }, -); diff --git a/lib/unicore/Is/LbrkAI.pl b/lib/unicore/Is/LbrkAI.pl deleted file mode 100644 index 36e3e17..0000000 --- a/lib/unicore/Is/LbrkAI.pl +++ /dev/null @@ -1,145 +0,0 @@ -# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by ./mktables from e.g. Unicode.txt. -# Any changes made here will be lost! - -# -# This file supports: -# \p{LbrkAI} -# -# Meaning: Linebreak category 'AI' -# -return <<'END'; -00A1 -00A7 00A8 -00AA -00B2 00B3 -00B6 00BA -00BC 00BF -00C6 -00D0 -00D7 00D8 -00DE 00E1 -00E6 -00E8 00EA -00EC 00ED -00F0 -00F2 00F3 -00F7 00FA -00FC -00FE -0101 -0111 -0113 -011B -0126 0127 -012B -0131 0133 -0138 -013F 0142 -0144 -0148 014A -014D -0152 0153 -0166 0167 -016B -01CE -01D0 -01D2 -01D4 -01D6 -01D8 -01DA -01DC -0251 -0261 -02C7 -02C9 02CB -02CD -02D0 -02D8 02DB -02DD -0391 03A1 -03A3 03A9 -03B1 03C1 -03C3 03C9 -0401 -0410 044F -0451 -2015 2016 -2020 2021 -203B -2074 -207F -2081 2084 -2105 -2113 -2121 2122 -212B -2154 2155 -215B -215E -2160 216B -2170 2179 -2190 2199 -21D2 -21D4 -2200 -2202 2203 -2207 2208 -220B -220F -2211 -2215 -221A -221D 2220 -2223 -2225 -2227 222C -222E -2234 2237 -223C 223D -2248 -224C -2252 -2260 2261 -2264 2267 -226A 226B -226E 226F -2282 2283 -2286 2287 -2295 -2299 -22A5 -22BF -2312 -2460 24BF -24D0 24E9 -2500 254B -2550 2574 -2580 258F -2592 2595 -25A0 25A1 -25A3 25A9 -25B2 25B3 -25B6 25B7 -25BC 25BD -25C0 25C1 -25C6 25C8 -25CB -25CE 25D1 -25E2 25E5 -25EF -2605 2606 -2609 -260E 260F -261C -261E -2640 -2642 -2660 2661 -2663 2665 -2667 266A -266C 266D -266F -FFFD -END diff --git a/lib/unicore/Is/LbrkAL.pl b/lib/unicore/Is/LbrkAL.pl deleted file mode 100644 index 59b3c4a..0000000 --- a/lib/unicore/Is/LbrkAL.pl +++ /dev/null @@ -1,425 +0,0 @@ -# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by ./mktables from e.g. Unicode.txt. -# Any changes made here will be lost! - -# -# This file supports: -# \p{LbrkAL} -# -# Meaning: Linebreak category 'AL' -# -return <<'END'; -0023 -0026 -002A -003C 003E -0040 005A -005E 007A -007E -00A6 -00A9 -00AC -00AE 00AF -00B5 -00C0 00C5 -00C7 00CF -00D1 00D6 -00D9 00DD -00E2 00E5 -00E7 -00EB -00EE 00EF -00F1 -00F4 00F6 -00FB -00FD -00FF 0100 -0102 0110 -0112 -0114 011A -011C 0125 -0128 012A -012C 0130 -0134 0137 -0139 013E -0143 -0145 0147 -014B 014C -014E 0151 -0154 0165 -0168 016A -016C 01CD -01CF -01D1 -01D3 -01D5 -01D7 -01D9 -01DB -01DD 021F -0222 0233 -0250 -0252 0260 -0262 02AD -02B0 02C6 -02CE 02CF -02D1 02D7 -02DC -02DE 02EE -0374 0375 -037A -037E -0384 038A -038C -038E 0390 -03AA 03B0 -03C2 -03CA 03CE -03D0 03D7 -03DA 03F5 -0400 -0402 040F -0450 -0452 0482 -048C 04C4 -04C7 04C8 -04CB 04CC -04D0 04F5 -04F8 04F9 -0531 0556 -0559 055F -0561 0587 -05BE -05C0 -05C3 -05D0 05EA -05F0 05F4 -060C -061B -061F -0621 063A -0640 064A -066A 066D -0671 06D5 -06E5 06E6 -06E9 -06FA 06FE -0700 070D -0710 -0712 072C -0780 07A5 -0905 0939 -093D -0950 -0958 0961 -0964 0965 -0970 -0985 098C -098F 0990 -0993 09A8 -09AA 09B0 -09B2 -09B6 09B9 -09DC 09DD -09DF 09E1 -09F0 09F1 -09F4 09FA -0A05 0A0A -0A0F 0A10 -0A13 0A28 -0A2A 0A30 -0A32 0A33 -0A35 0A36 -0A38 0A39 -0A59 0A5C -0A5E -0A72 0A74 -0A85 0A8B -0A8D -0A8F 0A91 -0A93 0AA8 -0AAA 0AB0 -0AB2 0AB3 -0AB5 0AB9 -0ABD -0AD0 -0AE0 -0B05 0B0C -0B0F 0B10 -0B13 0B28 -0B2A 0B30 -0B32 0B33 -0B36 0B39 -0B3D -0B5C 0B5D -0B5F 0B61 -0B70 -0B85 0B8A -0B8E 0B90 -0B92 0B95 -0B99 0B9A -0B9C -0B9E 0B9F -0BA3 0BA4 -0BA8 0BAA -0BAE 0BB5 -0BB7 0BB9 -0BF0 0BF2 -0C05 0C0C -0C0E 0C10 -0C12 0C28 -0C2A 0C33 -0C35 0C39 -0C60 0C61 -0C85 0C8C -0C8E 0C90 -0C92 0CA8 -0CAA 0CB3 -0CB5 0CB9 -0CDE -0CE0 0CE1 -0D05 0D0C -0D0E 0D10 -0D12 0D28 -0D2A 0D39 -0D60 0D61 -0D85 0D96 -0D9A 0DB1 -0DB3 0DBB -0DBD -0DC0 0DC6 -0DF4 -0E4F -0F00 0F0A -0F0D 0F17 -0F1A 0F1F -0F2A 0F34 -0F36 -0F38 -0F40 0F47 -0F49 0F6A -0F85 -0F88 0F8B -0FBE 0FC5 -0FC7 0FCC -0FCF -104A 104F -10A0 10C5 -10D0 10F6 -10FB -1200 1206 -1208 1246 -1248 -124A 124D -1250 1256 -1258 -125A 125D -1260 1286 -1288 -128A 128D -1290 12AE -12B0 -12B2 12B5 -12B8 12BE -12C0 -12C2 12C5 -12C8 12CE -12D0 12D6 -12D8 12EE -12F0 130E -1310 -1312 1315 -1318 131E -1320 1346 -1348 135A -1362 1368 -1372 137C -13A0 13F4 -1401 1676 -1681 169A -16A0 16F0 -17DC -1800 1805 -1807 180A -1820 1877 -1880 18A8 -1E00 1E9B -1EA0 1EF9 -1F00 1F15 -1F18 1F1D -1F20 1F45 -1F48 1F4D -1F50 1F57 -1F59 -1F5B -1F5D -1F5F 1F7D -1F80 1FB4 -1FB6 1FC4 -1FC6 1FD3 -1FD6 1FDB -1FDD 1FEF -1FF2 1FF4 -1FF6 1FFE -2017 -2022 2023 -2038 -203D 2043 -2048 204D -2070 -2075 207C -2080 -2085 208C -2100 2102 -2104 -2106 2108 -210A 2112 -2114 2115 -2117 2120 -2123 2125 -2127 212A -212C 213A -2153 -2156 215A -215C 215D -215F -216C 216F -217A 2183 -219A 21D1 -21D3 -21D5 21F3 -2201 -2204 2206 -2209 220A -220C 220E -2210 -2214 -2216 2219 -221B 221C -2221 2222 -2224 -2226 -222D -222F 2233 -2238 223B -223E 2247 -2249 224B -224D 2251 -2253 225F -2262 2263 -2268 2269 -226C 226D -2270 2281 -2284 2285 -2288 2294 -2296 2298 -229A 22A4 -22A6 22BE -22C0 22F1 -2300 2311 -2313 2328 -232B 237B -237D 239A -2400 2426 -2440 244A -24C0 24CF -24EA -254C 254F -2575 257F -2590 2591 -25A2 -25AA 25B1 -25B4 25B5 -25B8 25BB -25BE 25BF -25C2 25C5 -25C9 25CA -25CC 25CD -25D2 25E1 -25E6 25EE -25F0 25F7 -2600 2604 -2607 2608 -260A 260D -2610 2613 -2619 261B -261D -261F 263F -2641 -2643 265F -2662 -2666 -266B -266E -2670 2671 -2701 2704 -2706 2709 -270C 2727 -2729 274B -274D -274F 2752 -2756 -2758 275E -2761 2767 -2776 2794 -2798 27AF -27B1 27BE -2800 28FF -FB00 FB06 -FB13 FB17 -FB1D -FB1F FB36 -FB38 FB3C -FB3E -FB40 FB41 -FB43 FB44 -FB46 FBB1 -FBD3 FD3D -FD50 FD8F -FD92 FDC7 -FDF0 FDFB -FE70 FE72 -FE74 -FE76 FEFC -FF66 -FF71 FF9D -FFA0 FFBE -FFC2 FFC7 -FFCA FFCF -FFD2 FFD7 -FFDA FFDC -FFE8 FFEE -10300 1031E -10320 10323 -10330 1034A -10400 10425 -10428 1044D -1D000 1D0F5 -1D100 1D126 -1D12A 1D164 -1D16A 1D16C -1D183 1D184 -1D18C 1D1A9 -1D1AE 1D1DD -1D400 1D454 -1D456 1D49C -1D49E 1D49F -1D4A2 -1D4A5 1D4A6 -1D4A9 1D4AC -1D4AE 1D4B9 -1D4BB -1D4BD 1D4C0 -1D4C2 1D4C3 -1D4C5 1D505 -1D507 1D50A -1D50D 1D514 -1D516 1D51C -1D51E 1D539 -1D53B 1D53E -1D540 1D544 -1D546 -1D54A 1D550 -1D552 1D6A3 -1D6A8 1D7C9 -END diff --git a/lib/unicore/Is/LbrkB2.pl b/lib/unicore/Is/LbrkB2.pl deleted file mode 100644 index dee0b69..0000000 --- a/lib/unicore/Is/LbrkB2.pl +++ /dev/null @@ -1,13 +0,0 @@ -# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by ./mktables from e.g. Unicode.txt. -# Any changes made here will be lost! - -# -# This file supports: -# \p{LbrkB2} -# -# Meaning: Linebreak category 'B2' -# -return <<'END'; -2014 -END diff --git a/lib/unicore/Is/LbrkBA.pl b/lib/unicore/Is/LbrkBA.pl deleted file mode 100644 index fcc8c61..0000000 --- a/lib/unicore/Is/LbrkBA.pl +++ /dev/null @@ -1,25 +0,0 @@ -# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by ./mktables from e.g. Unicode.txt. -# Any changes made here will be lost! - -# -# This file supports: -# \p{LbrkBA} -# -# Meaning: Linebreak category 'BA' -# -return <<'END'; -0009 -007C -00AD -058A -0F0B -1361 -1680 -17D5 -2000 2006 -2008 200A -2010 -2012 2013 -2027 -END diff --git a/lib/unicore/Is/LbrkBB.pl b/lib/unicore/Is/LbrkBB.pl deleted file mode 100644 index 5f71338..0000000 --- a/lib/unicore/Is/LbrkBB.pl +++ /dev/null @@ -1,16 +0,0 @@ -# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by ./mktables from e.g. Unicode.txt. -# Any changes made here will be lost! - -# -# This file supports: -# \p{LbrkBB} -# -# Meaning: Linebreak category 'BB' -# -return <<'END'; -00B4 -02C8 -02CC -1806 -END diff --git a/lib/unicore/Is/LbrkBK.pl b/lib/unicore/Is/LbrkBK.pl deleted file mode 100644 index 27b4389..0000000 --- a/lib/unicore/Is/LbrkBK.pl +++ /dev/null @@ -1,14 +0,0 @@ -# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by ./mktables from e.g. Unicode.txt. -# Any changes made here will be lost! - -# -# This file supports: -# \p{LbrkBK} -# -# Meaning: Linebreak category 'BK' -# -return <<'END'; -000C -2028 2029 -END diff --git a/lib/unicore/Is/LbrkCB.pl b/lib/unicore/Is/LbrkCB.pl deleted file mode 100644 index 2a71fd5..0000000 --- a/lib/unicore/Is/LbrkCB.pl +++ /dev/null @@ -1,13 +0,0 @@ -# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by ./mktables from e.g. Unicode.txt. -# Any changes made here will be lost! - -# -# This file supports: -# \p{LbrkCB} -# -# Meaning: Linebreak category 'CB' -# -return <<'END'; -FFFC -END diff --git a/lib/unicore/Is/LbrkCL.pl b/lib/unicore/Is/LbrkCL.pl deleted file mode 100644 index beeeadc..0000000 --- a/lib/unicore/Is/LbrkCL.pl +++ /dev/null @@ -1,54 +0,0 @@ -# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by ./mktables from e.g. Unicode.txt. -# Any changes made here will be lost! - -# -# This file supports: -# \p{LbrkCL} -# -# Meaning: Linebreak category 'CL' -# -return <<'END'; -0029 -005D -007D -0F3B -0F3D -169C -2046 -207E -208E -232A -3001 3002 -3009 -300B -300D -300F -3011 -3015 -3017 -3019 -301B -301E 301F -FD3F -FE36 -FE38 -FE3A -FE3C -FE3E -FE40 -FE42 -FE44 -FE50 -FE52 -FE5A -FE5C -FE5E -FF09 -FF0C -FF0E -FF3D -FF5D -FF61 -FF63 FF64 -END diff --git a/lib/unicore/Is/LbrkCM.pl b/lib/unicore/Is/LbrkCM.pl deleted file mode 100644 index 1db78a9..0000000 --- a/lib/unicore/Is/LbrkCM.pl +++ /dev/null @@ -1,130 +0,0 @@ -# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by ./mktables from e.g. Unicode.txt. -# Any changes made here will be lost! - -# -# This file supports: -# \p{LbrkCM} -# -# Meaning: Linebreak category 'CM' -# -return <<'END'; -0000 0008 -000B -000E 001F -007F 009F -0300 034E -0360 0362 -0483 0486 -0488 0489 -0591 05A1 -05A3 05B9 -05BB 05BD -05BF -05C1 05C2 -05C4 -064B 0655 -0670 -06D6 06E4 -06E7 06E8 -06EA 06ED -070F -0711 -0730 074A -07A6 07B0 -0901 0903 -093C -093E 094D -0951 0954 -0962 0963 -0981 0983 -09BC -09BE 09C4 -09C7 09C8 -09CB 09CD -09D7 -09E2 09E3 -0A02 -0A3C -0A3E 0A42 -0A47 0A48 -0A4B 0A4D -0A70 0A71 -0A81 0A83 -0ABC -0ABE 0AC5 -0AC7 0AC9 -0ACB 0ACD -0B01 0B03 -0B3C -0B3E 0B43 -0B47 0B48 -0B4B 0B4D -0B56 0B57 -0B82 0B83 -0BBE 0BC2 -0BC6 0BC8 -0BCA 0BCD -0BD7 -0C01 0C03 -0C3E 0C44 -0C46 0C48 -0C4A 0C4D -0C55 0C56 -0C82 0C83 -0CBE 0CC4 -0CC6 0CC8 -0CCA 0CCD -0CD5 0CD6 -0D02 0D03 -0D3E 0D43 -0D46 0D48 -0D4A 0D4D -0D57 -0D82 0D83 -0DCA -0DCF 0DD4 -0DD6 -0DD8 0DDF -0DF2 0DF3 -0E31 -0E34 0E3A -0E47 0E4E -0EB1 -0EB4 0EB9 -0EBB 0EBC -0EC8 0ECD -0F18 0F19 -0F35 -0F37 -0F39 -0F3E 0F3F -0F71 0F84 -0F86 0F87 -0F90 0F97 -0F99 0FBC -0FC6 -102C 1032 -1036 1039 -1056 1059 -1160 11A2 -11A8 11F9 -17B4 17D3 -180B 180E -18A9 -200C 200F -202A 202E -206A 206F -20D0 20E3 -302A 302F -3099 309A -FB1E -FE20 FE23 -FFF9 FFFB -1D165 1D169 -1D16D 1D182 -1D185 1D18B -1D1AA 1D1AD -E0001 -E0020 E007F -END diff --git a/lib/unicore/Is/LbrkCR.pl b/lib/unicore/Is/LbrkCR.pl deleted file mode 100644 index 22a4702..0000000 --- a/lib/unicore/Is/LbrkCR.pl +++ /dev/null @@ -1,13 +0,0 @@ -# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by ./mktables from e.g. Unicode.txt. -# Any changes made here will be lost! - -# -# This file supports: -# \p{LbrkCR} -# -# Meaning: Linebreak category 'CR' -# -return <<'END'; -000D -END diff --git a/lib/unicore/Is/LbrkEX.pl b/lib/unicore/Is/LbrkEX.pl deleted file mode 100644 index 48626e0..0000000 --- a/lib/unicore/Is/LbrkEX.pl +++ /dev/null @@ -1,17 +0,0 @@ -# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by ./mktables from e.g. Unicode.txt. -# Any changes made here will be lost! - -# -# This file supports: -# \p{LbrkEX} -# -# Meaning: Linebreak category 'EX' -# -return <<'END'; -0021 -003F -FE56 FE57 -FF01 -FF1F -END diff --git a/lib/unicore/Is/LbrkGL.pl b/lib/unicore/Is/LbrkGL.pl deleted file mode 100644 index ae2f909..0000000 --- a/lib/unicore/Is/LbrkGL.pl +++ /dev/null @@ -1,18 +0,0 @@ -# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by ./mktables from e.g. Unicode.txt. -# Any changes made here will be lost! - -# -# This file supports: -# \p{LbrkGL} -# -# Meaning: Linebreak category 'GL' -# -return <<'END'; -00A0 -0F0C -2007 -2011 -202F -FEFF -END diff --git a/lib/unicore/Is/LbrkHY.pl b/lib/unicore/Is/LbrkHY.pl deleted file mode 100644 index 09cfd36..0000000 --- a/lib/unicore/Is/LbrkHY.pl +++ /dev/null @@ -1,13 +0,0 @@ -# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by ./mktables from e.g. Unicode.txt. -# Any changes made here will be lost! - -# -# This file supports: -# \p{LbrkHY} -# -# Meaning: Linebreak category 'HY' -# -return <<'END'; -002D -END diff --git a/lib/unicore/Is/LbrkID.pl b/lib/unicore/Is/LbrkID.pl deleted file mode 100644 index f91dd0e..0000000 --- a/lib/unicore/Is/LbrkID.pl +++ /dev/null @@ -1,91 +0,0 @@ -# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by ./mktables from e.g. Unicode.txt. -# Any changes made here will be lost! - -# -# This file supports: -# \p{LbrkID} -# -# Meaning: Linebreak category 'ID' -# -return <<'END'; -1100 1159 -115F -2E80 2E99 -2E9B 2EF3 -2F00 2FD5 -2FF0 2FFB -3000 -3003 3004 -3006 3007 -3012 3013 -3020 3029 -3030 303A -303E 303F -3042 -3044 -3046 -3048 -304A 3062 -3064 3082 -3084 -3086 -3088 308D -308F 3094 -30A2 -30A4 -30A6 -30A8 -30AA 30C2 -30C4 30E2 -30E4 -30E6 -30E8 30ED -30EF 30F4 -30F7 30FA -30FC -30FE -3105 312C -3131 318E -3190 31B7 -3200 321C -3220 3243 -3260 327B -327F 32B0 -32C0 32CB -32D0 32FE -3300 3376 -337B 33DD -33E0 33FE -3400 4DB5 -4E00 9FA5 -A000 A48C -A490 A4A1 -A4A4 A4B3 -A4B5 A4C0 -A4C2 A4C4 -A4C6 -AC00 D7A3 -F900 FA2D -FE30 FE34 -FE49 FE4F -FE51 -FE58 -FE5F FE66 -FE68 -FE6B -FF02 FF03 -FF06 FF07 -FF0A FF0B -FF0D -FF0F FF19 -FF1C FF1E -FF20 FF3A -FF3C -FF3E FF5A -FF5C -FF5E -FFE2 FFE4 -20000 2A6D6 -2F800 2FA1D -END diff --git a/lib/unicore/Is/LbrkIN.pl b/lib/unicore/Is/LbrkIN.pl deleted file mode 100644 index e2920eb..0000000 --- a/lib/unicore/Is/LbrkIN.pl +++ /dev/null @@ -1,13 +0,0 @@ -# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by ./mktables from e.g. Unicode.txt. -# Any changes made here will be lost! - -# -# This file supports: -# \p{LbrkIN} -# -# Meaning: Linebreak category 'IN' -# -return <<'END'; -2024 2026 -END diff --git a/lib/unicore/Is/LbrkIS.pl b/lib/unicore/Is/LbrkIS.pl deleted file mode 100644 index 32159ba..0000000 --- a/lib/unicore/Is/LbrkIS.pl +++ /dev/null @@ -1,16 +0,0 @@ -# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by ./mktables from e.g. Unicode.txt. -# Any changes made here will be lost! - -# -# This file supports: -# \p{LbrkIS} -# -# Meaning: Linebreak category 'IS' -# -return <<'END'; -002C -002E -003A 003B -0589 -END diff --git a/lib/unicore/Is/LbrkLF.pl b/lib/unicore/Is/LbrkLF.pl deleted file mode 100644 index 84d9ef4..0000000 --- a/lib/unicore/Is/LbrkLF.pl +++ /dev/null @@ -1,13 +0,0 @@ -# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by ./mktables from e.g. Unicode.txt. -# Any changes made here will be lost! - -# -# This file supports: -# \p{LbrkLF} -# -# Meaning: Linebreak category 'LF' -# -return <<'END'; -000A -END diff --git a/lib/unicore/Is/LbrkNS.pl b/lib/unicore/Is/LbrkNS.pl deleted file mode 100644 index 829d01b..0000000 --- a/lib/unicore/Is/LbrkNS.pl +++ /dev/null @@ -1,48 +0,0 @@ -# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by ./mktables from e.g. Unicode.txt. -# Any changes made here will be lost! - -# -# This file supports: -# \p{LbrkNS} -# -# Meaning: Linebreak category 'NS' -# -return <<'END'; -0E5A 0E5B -17D4 -17D6 17DA -203C -2044 -3005 -301C -3041 -3043 -3045 -3047 -3049 -3063 -3083 -3085 -3087 -308E -309B 309E -30A1 -30A3 -30A5 -30A7 -30A9 -30C3 -30E3 -30E5 -30E7 -30EE -30F5 30F6 -30FB -30FD -FE54 FE55 -FF1A FF1B -FF65 -FF67 FF70 -FF9E FF9F -END diff --git a/lib/unicore/Is/LbrkNU.pl b/lib/unicore/Is/LbrkNU.pl deleted file mode 100644 index bfecec3..0000000 --- a/lib/unicore/Is/LbrkNU.pl +++ /dev/null @@ -1,32 +0,0 @@ -# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by ./mktables from e.g. Unicode.txt. -# Any changes made here will be lost! - -# -# This file supports: -# \p{LbrkNU} -# -# Meaning: Linebreak category 'NU' -# -return <<'END'; -0030 0039 -0660 0669 -06F0 06F9 -0966 096F -09E6 09EF -0A66 0A6F -0AE6 0AEF -0B66 0B6F -0BE7 0BEF -0C66 0C6F -0CE6 0CEF -0D66 0D6F -0E50 0E59 -0ED0 0ED9 -0F20 0F29 -1040 1049 -1369 1371 -17E0 17E9 -1810 1819 -1D7CE 1D7FF -END diff --git a/lib/unicore/Is/LbrkOP.pl b/lib/unicore/Is/LbrkOP.pl deleted file mode 100644 index 6560490..0000000 --- a/lib/unicore/Is/LbrkOP.pl +++ /dev/null @@ -1,50 +0,0 @@ -# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by ./mktables from e.g. Unicode.txt. -# Any changes made here will be lost! - -# -# This file supports: -# \p{LbrkOP} -# -# Meaning: Linebreak category 'OP' -# -return <<'END'; -0028 -005B -007B -0F3A -0F3C -169B -201A -201E -2045 -207D -208D -2329 -3008 -300A -300C -300E -3010 -3014 -3016 -3018 -301A -301D -FD3E -FE35 -FE37 -FE39 -FE3B -FE3D -FE3F -FE41 -FE43 -FE59 -FE5B -FE5D -FF08 -FF3B -FF5B -FF62 -END diff --git a/lib/unicore/Is/LbrkPO.pl b/lib/unicore/Is/LbrkPO.pl deleted file mode 100644 index 0ea5548..0000000 --- a/lib/unicore/Is/LbrkPO.pl +++ /dev/null @@ -1,23 +0,0 @@ -# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by ./mktables from e.g. Unicode.txt. -# Any changes made here will be lost! - -# -# This file supports: -# \p{LbrkPO} -# -# Meaning: Linebreak category 'PO' -# -return <<'END'; -0025 -00A2 -00B0 -2030 2037 -20A7 -2103 -2109 -2126 -FE6A -FF05 -FFE0 -END diff --git a/lib/unicore/Is/LbrkPR.pl b/lib/unicore/Is/LbrkPR.pl deleted file mode 100644 index be6c388..0000000 --- a/lib/unicore/Is/LbrkPR.pl +++ /dev/null @@ -1,28 +0,0 @@ -# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by ./mktables from e.g. Unicode.txt. -# Any changes made here will be lost! - -# -# This file supports: -# \p{LbrkPR} -# -# Meaning: Linebreak category 'PR' -# -return <<'END'; -0024 -002B -005C -00A3 00A5 -00B1 -09F2 09F3 -0E3F -17DB -20A0 20A6 -20A8 20AF -2116 -2212 2213 -FE69 -FF04 -FFE1 -FFE5 FFE6 -END diff --git a/lib/unicore/Is/LbrkQU.pl b/lib/unicore/Is/LbrkQU.pl deleted file mode 100644 index f23ef75..0000000 --- a/lib/unicore/Is/LbrkQU.pl +++ /dev/null @@ -1,20 +0,0 @@ -# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by ./mktables from e.g. Unicode.txt. -# Any changes made here will be lost! - -# -# This file supports: -# \p{LbrkQU} -# -# Meaning: Linebreak category 'QU' -# -return <<'END'; -0022 -0027 -00AB -00BB -2018 2019 -201B 201D -201F -2039 203A -END diff --git a/lib/unicore/Is/LbrkSA.pl b/lib/unicore/Is/LbrkSA.pl deleted file mode 100644 index fc3d98c..0000000 --- a/lib/unicore/Is/LbrkSA.pl +++ /dev/null @@ -1,37 +0,0 @@ -# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by ./mktables from e.g. Unicode.txt. -# Any changes made here will be lost! - -# -# This file supports: -# \p{LbrkSA} -# -# Meaning: Linebreak category 'SA' -# -return <<'END'; -0E01 0E30 -0E32 0E33 -0E40 0E46 -0E81 0E82 -0E84 -0E87 0E88 -0E8A -0E8D -0E94 0E97 -0E99 0E9F -0EA1 0EA3 -0EA5 -0EA7 -0EAA 0EAB -0EAD 0EB0 -0EB2 0EB3 -0EBD -0EC0 0EC4 -0EC6 -0EDC 0EDD -1000 1021 -1023 1027 -1029 102A -1050 1055 -1780 17B3 -END diff --git a/lib/unicore/Is/LbrkSG.pl b/lib/unicore/Is/LbrkSG.pl deleted file mode 100644 index a5acf16..0000000 --- a/lib/unicore/Is/LbrkSG.pl +++ /dev/null @@ -1,13 +0,0 @@ -# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by ./mktables from e.g. Unicode.txt. -# Any changes made here will be lost! - -# -# This file supports: -# \p{LbrkSG} -# -# Meaning: Linebreak category 'SG' -# -return <<'END'; -D800 DFFF -END diff --git a/lib/unicore/Is/LbrkSP.pl b/lib/unicore/Is/LbrkSP.pl deleted file mode 100644 index c21e46d..0000000 --- a/lib/unicore/Is/LbrkSP.pl +++ /dev/null @@ -1,13 +0,0 @@ -# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by ./mktables from e.g. Unicode.txt. -# Any changes made here will be lost! - -# -# This file supports: -# \p{LbrkSP} -# -# Meaning: Linebreak category 'SP' -# -return <<'END'; -0020 -END diff --git a/lib/unicore/Is/LbrkSY.pl b/lib/unicore/Is/LbrkSY.pl deleted file mode 100644 index 554b302..0000000 --- a/lib/unicore/Is/LbrkSY.pl +++ /dev/null @@ -1,13 +0,0 @@ -# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by ./mktables from e.g. Unicode.txt. -# Any changes made here will be lost! - -# -# This file supports: -# \p{LbrkSY} -# -# Meaning: Linebreak category 'SY' -# -return <<'END'; -002F -END diff --git a/lib/unicore/Is/LbrkXX.pl b/lib/unicore/Is/LbrkXX.pl deleted file mode 100644 index 6ab9fcf..0000000 --- a/lib/unicore/Is/LbrkXX.pl +++ /dev/null @@ -1,15 +0,0 @@ -# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by ./mktables from e.g. Unicode.txt. -# Any changes made here will be lost! - -# -# This file supports: -# \p{LbrkXX} -# -# Meaning: Linebreak category 'XX' -# -return <<'END'; -E000 F8FF -F0000 FFFFD -100000 10FFFD -END diff --git a/lib/unicore/Is/LbrkZW.pl b/lib/unicore/Is/LbrkZW.pl deleted file mode 100644 index a338cba..0000000 --- a/lib/unicore/Is/LbrkZW.pl +++ /dev/null @@ -1,13 +0,0 @@ -# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by ./mktables from e.g. Unicode.txt. -# Any changes made here will be lost! - -# -# This file supports: -# \p{LbrkZW} -# -# Meaning: Linebreak category 'ZW' -# -return <<'END'; -200B -END diff --git a/lib/unicore/Makefile b/lib/unicore/Makefile index 948e982..99b7026 100644 --- a/lib/unicore/Makefile +++ b/lib/unicore/Makefile @@ -1,6 +1,12 @@ all: ../../miniperl -I../../lib ./mktables +TestProp.pl: mktables Unicode.txt Scripts.txt Blocks.txt PropList.txt + ../../miniperl -I../../lib ./mktables -maketest + +test: TestProp.pl + ../../miniperl -I../../lib TestProp.pl + clean: rm -f *.pl */*.pl rm -f Properties diff --git a/lib/unicore/Properties b/lib/unicore/Properties index 377fa5a..143ca59 100644 --- a/lib/unicore/Properties +++ b/lib/unicore/Properties @@ -6,15 +6,15 @@ ## \p{ASCII} [[:ASCII:]] -* \p{All} Alias for \p{Any} ([\x{0000}-\x{10FFFF}]) \p{Alnum} [[:Alnum:]] * \p{Alphabetic} [\p{L}\p{OtherAlphabetic}] \p{Alpha} [[:Alpha:]] -* \p{Any} [\x{0000}-\x{10FFFF}] + \p{Any} Alias for \p{Any} ([\x{0000}-\x{10FFFF}]) + \p{Any} [\x{0000}-\x{10FFFF}] * \p{Arabic} Script 'ARABIC' * \p{Armenian} Script 'ARMENIAN' * \p{AsciiHexDigit} Extended property 'ASCII_Hex_Digit' -* \p{Assigned} All assigned code points + \p{Assigned} All assigned code points * \p{Bengali} Script 'BENGALI' \p{BidiAL} Bi-directional category 'AL' \p{BidiAN} Bi-directional category 'AN' @@ -40,20 +40,22 @@ * \p{Bopomofo} Script 'BOPOMOFO' * \p{CanadianAboriginal} Script 'CANADIAN-ABORIGINAL' \p{Canon} Decomposes to multiple characters +* \p{Cc} Alias for \p{Cc} (General Category 'Cc') \p{Cc} General Category 'Cc' +* \p{Cf} Alias for \p{Cf} (General Category 'Cf') \p{Cf} General Category 'Cf' * \p{Cherokee} Script 'CHEROKEE' -* \p{ClosePunctuation} Alias for \p{Pe} (General Category 'Pe') \p{Cntrl} [[:Cntrl:]] +* \p{Cn} Alias for \p{Cn} (General Category 'Cn' [not functional in Perl]) \p{Cn} General Category 'Cn' [not functional in Perl] * \p{Common} Pseudo-Script of codepoints not in other Unicode scripts \p{Compat} Compatible with a more-basic character -* \p{ConnectorPunctuation} Alias for \p{Pc} (General Category 'Pc') -* \p{Control} Alias for \p{Cc} (General Category 'Cc') +* \p{Co} Alias for \p{Co} (General Category 'Co') \p{Co} General Category 'Co' +* \p{Cs} Alias for \p{Cs} (General Category 'Cs') \p{Cs} General Category 'Cs' -* \p{CurrencySymbol} Alias for \p{Sc} (General Category 'Sc') * \p{Cyrillic} Script 'CYRILLIC' +* \p{C} Alias for \p{C} (Major Category 'C') \p{C} Major Category 'C' \p{DCcircle} Compatible with 'circle' \p{DCcompat} Compatible with 'compat' @@ -71,18 +73,13 @@ \p{DCsuper} Compatible with 'super' \p{DCvertical} Compatible with 'vertical' \p{DCwide} Compatible with 'wide' -* \p{DashPunctuation} Alias for \p{Pd} (General Category 'Pd') * \p{Dash} Extended property 'Dash' -* \p{DecimalNumber} Alias for \p{Nd} (General Category 'Nd') * \p{Deseret} Script 'DESERET' * \p{Devanagari} Script 'DEVANAGARI' * \p{Diacritic} Extended property 'Diacritic' \p{Digit} [[:Digit:]] -* \p{EnclosingMark} Alias for \p{Me} (General Category 'Me') * \p{Ethiopic} Script 'ETHIOPIC' * \p{Extender} Extended property 'Extender' -* \p{FinalPunctuation} Alias for \p{Pf} (General Category 'Pf') -* \p{Format} Alias for \p{Cf} (General Category 'Cf') * \p{Georgian} Script 'GEORGIAN' * \p{Gothic} Script 'GOTHIC' \p{Graph} [[:Graph:]] @@ -195,7 +192,6 @@ * \p{InYiRadicals} Block 'Yi Radicals' * \p{InYiSyllables} Block 'Yi Syllables' * \p{Inherited} Script 'INHERITED' -* \p{InitialPunctuation} Alias for \p{Pi} (General Category 'Pi') * \p{JoinControl} Extended property 'Join_Control' * \p{Kannada} Script 'KANNADA' * \p{Katakana} Script 'KATAKANA' @@ -203,108 +199,82 @@ \p{L&} [\p{Ll}\p{Lu}\p{Lt}] * \p{Lao} Script 'LAO' * \p{Latin} Script 'LATIN' - \p{LbrkAI} Linebreak category 'AI' - \p{LbrkAL} Linebreak category 'AL' - \p{LbrkB2} Linebreak category 'B2' - \p{LbrkBA} Linebreak category 'BA' - \p{LbrkBB} Linebreak category 'BB' - \p{LbrkBK} Linebreak category 'BK' - \p{LbrkCB} Linebreak category 'CB' - \p{LbrkCL} Linebreak category 'CL' - \p{LbrkCM} Linebreak category 'CM' - \p{LbrkCR} Linebreak category 'CR' - \p{LbrkEX} Linebreak category 'EX' - \p{LbrkGL} Linebreak category 'GL' - \p{LbrkHY} Linebreak category 'HY' - \p{LbrkID} Linebreak category 'ID' - \p{LbrkIN} Linebreak category 'IN' - \p{LbrkIS} Linebreak category 'IS' - \p{LbrkLF} Linebreak category 'LF' - \p{LbrkNS} Linebreak category 'NS' - \p{LbrkNU} Linebreak category 'NU' - \p{LbrkOP} Linebreak category 'OP' - \p{LbrkPO} Linebreak category 'PO' - \p{LbrkPR} Linebreak category 'PR' - \p{LbrkQU} Linebreak category 'QU' - \p{LbrkSA} Linebreak category 'SA' - \p{LbrkSG} Linebreak category 'SG' - \p{LbrkSP} Linebreak category 'SP' - \p{LbrkSY} Linebreak category 'SY' - \p{LbrkXX} Linebreak category 'XX' - \p{LbrkZW} Linebreak category 'ZW' -* \p{LetterNumber} Alias for \p{Nl} (General Category 'Nl') -* \p{Letter} Alias for \p{L} (Major Category 'L') -* \p{LineSeparator} Alias for \p{Zl} (General Category 'Zl') +* \p{Ll} Alias for \p{Ll} (General Category 'Ll') \p{Ll} General Category 'Ll' +* \p{Lm} Alias for \p{Lm} (General Category 'Lm') \p{Lm} General Category 'Lm' -* \p{LowercaseLetter} Alias for \p{Ll} (General Category 'Ll') * \p{Lowercase} [\p{Ll}\p{OtherLowercase}] \p{Lower} [[:Lower:]] +* \p{Lo} Alias for \p{Lo} (General Category 'Lo') \p{Lo} General Category 'Lo' +* \p{Lt} Alias for \p{Lt} (General Category 'Lt') \p{Lt} General Category 'Lt' +* \p{Lu} Alias for \p{Lu} (General Category 'Lu') \p{Lu} General Category 'Lu' +* \p{L} Alias for \p{L} (Major Category 'L') \p{L} Major Category 'L' * \p{Malayalam} Script 'MALAYALAM' -* \p{Mark} Alias for \p{M} (Major Category 'M') -* \p{MathSymbol} Alias for \p{Sm} (General Category 'Sm') * \p{Math} [\p{Sm}\p{OtherMath}] +* \p{Mc} Alias for \p{Mc} (General Category 'Mc') \p{Mc} General Category 'Mc' +* \p{Me} Alias for \p{Me} (General Category 'Me') \p{Me} General Category 'Me' \p{Mirrored} Mirrored in bidirectional text +* \p{Mn} Alias for \p{Mn} (General Category 'Mn') \p{Mn} General Category 'Mn' -* \p{ModifierLetter} Alias for \p{Lm} (General Category 'Lm') -* \p{ModifierSymbol} Alias for \p{Sk} (General Category 'Sk') * \p{Mongolian} Script 'MONGOLIAN' * \p{Myanmar} Script 'MYANMAR' +* \p{M} Alias for \p{M} (Major Category 'M') \p{M} Major Category 'M' +* \p{Nd} Alias for \p{Nd} (General Category 'Nd') \p{Nd} General Category 'Nd' +* \p{Nl} Alias for \p{Nl} (General Category 'Nl') \p{Nl} General Category 'Nl' -* \p{NonSpacingMark} Alias for \p{Mn} (General Category 'Mn') * \p{NoncharacterCodePoint} Extended property 'Noncharacter_Code_Point' +* \p{No} Alias for \p{No} (General Category 'No') \p{No} General Category 'No' -* \p{Number} Alias for \p{N} (Major Category 'N') +* \p{N} Alias for \p{N} (Major Category 'N') \p{N} Major Category 'N' * \p{Ogham} Script 'OGHAM' * \p{OldItalic} Script 'OLD-ITALIC' -* \p{OpenPunctuation} Alias for \p{Ps} (General Category 'Ps') * \p{Oriya} Script 'ORIYA' * \p{OtherAlphabetic} Extended property 'Other_Alphabetic' -* \p{OtherLetter} Alias for \p{Lo} (General Category 'Lo') * \p{OtherLowercase} Extended property 'Other_Lowercase' * \p{OtherMath} Extended property 'Other_Math' -* \p{OtherNumber} Alias for \p{No} (General Category 'No') -* \p{OtherPunctuation} Alias for \p{Po} (General Category 'Po') -* \p{OtherSymbol} Alias for \p{So} (General Category 'So') * \p{OtherUppercase} Extended property 'Other_Uppercase' -* \p{Other} Alias for \p{C} (Major Category 'C') -* \p{ParagraphSeparator} Alias for \p{Zp} (General Category 'Zp') +* \p{Pc} Alias for \p{Pc} (General Category 'Pc') \p{Pc} General Category 'Pc' +* \p{Pd} Alias for \p{Pd} (General Category 'Pd') \p{Pd} General Category 'Pd' +* \p{Pe} Alias for \p{Pe} (General Category 'Pe') \p{Pe} General Category 'Pe' +* \p{Pf} Alias for \p{Pf} (General Category 'Pf') \p{Pf} General Category 'Pf' +* \p{Pi} Alias for \p{Pi} (General Category 'Pi') \p{Pi} General Category 'Pi' +* \p{Po} Alias for \p{Po} (General Category 'Po') \p{Po} General Category 'Po' \p{Print} [[:Print:]] -* \p{PrivateUse} Alias for \p{Co} (General Category 'Co') +* \p{Ps} Alias for \p{Ps} (General Category 'Ps') \p{Ps} General Category 'Ps' -* \p{Punctuation} Alias for \p{P} (Major Category 'P') \p{Punct} [[:Punct:]] +* \p{P} Alias for \p{P} (Major Category 'P') \p{P} Major Category 'P' * \p{QuotationMark} Extended property 'Quotation_Mark' * \p{Runic} Script 'RUNIC' +* \p{Sc} Alias for \p{Sc} (General Category 'Sc') \p{Sc} General Category 'Sc' -* \p{Separator} Alias for \p{Z} (Major Category 'Z') * \p{Sinhala} Script 'SINHALA' +* \p{Sk} Alias for \p{Sk} (General Category 'Sk') \p{Sk} General Category 'Sk' +* \p{Sm} Alias for \p{Sm} (General Category 'Sm') \p{Sm} General Category 'Sm' +* \p{So} Alias for \p{So} (General Category 'So') \p{So} General Category 'So' \p{SpacePerl} \s -* \p{SpaceSeparator} Alias for \p{Zs} (General Category 'Zs') \p{Space} [[:Space:]] -* \p{SpacingMark} Alias for \p{Mc} (General Category 'Mc') -* \p{Surrogate} Alias for \p{Cs} (General Category 'Cs') -* \p{Symbol} Alias for \p{S} (Major Category 'S') * \p{Syriac} Script 'SYRIAC' +* \p{S} Alias for \p{S} (Major Category 'S') \p{S} Major Category 'S' * \p{Tamil} Script 'TAMIL' * \p{Telugu} Script 'TELUGU' @@ -312,19 +282,20 @@ * \p{Thaana} Script 'THAANA' * \p{Thai} Script 'THAI' * \p{Tibetan} Script 'TIBETAN' -* \p{TitlecaseLetter} Alias for \p{Lt} (General Category 'Lt') \p{Title} [[:Title:]] -* \p{Unassigned} Alias for \p{Cn} (General Category 'Cn' [not functional in Perl]) -* \p{UppercaseLetter} Alias for \p{Lu} (General Category 'Lu') * \p{Uppercase} [\p{Lu}\p{Other_Uppercase}] \p{Upper} [[:Upper:]] * \p{WhiteSpace} Extended property 'White_space' \p{Word} [[:Word:]] \p{XDigit} [[:XDigit:]] * \p{Yi} Script 'YI' +* \p{Zl} Alias for \p{Zl} (General Category 'Zl') \p{Zl} General Category 'Zl' +* \p{Zp} Alias for \p{Zp} (General Category 'Zp') \p{Zp} General Category 'Zp' +* \p{Zs} Alias for \p{Zs} (General Category 'Zs') \p{Zs} General Category 'Zs' +* \p{Z} Alias for \p{Z} (Major Category 'Z') \p{Z} Major Category 'Z' \p{_CanonDCIJ} (for internal casefolding use) \p{_CaseIgnorable} (for internal casefolding use) diff --git a/lib/unicore/To/Digit.pl b/lib/unicore/To/Digit.pl index 74b34a1..9531d15 100644 --- a/lib/unicore/To/Digit.pl +++ b/lib/unicore/To/Digit.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; diff --git a/lib/unicore/To/Fold.pl b/lib/unicore/To/Fold.pl index 5a9117f..ae5642d 100644 --- a/lib/unicore/To/Fold.pl +++ b/lib/unicore/To/Fold.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! diff --git a/lib/unicore/To/Lower.pl b/lib/unicore/To/Lower.pl index 0574910..0cf3d23 100644 --- a/lib/unicore/To/Lower.pl +++ b/lib/unicore/To/Lower.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! diff --git a/lib/unicore/To/Title.pl b/lib/unicore/To/Title.pl index 576cff3..445732f 100644 --- a/lib/unicore/To/Title.pl +++ b/lib/unicore/To/Title.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! diff --git a/lib/unicore/To/Upper.pl b/lib/unicore/To/Upper.pl index 3b3f07f..3b57ed3 100644 --- a/lib/unicore/To/Upper.pl +++ b/lib/unicore/To/Upper.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! diff --git a/lib/unicore/Is/ASCII.pl b/lib/unicore/lib/ASCII.pl similarity index 100% rename from lib/unicore/Is/ASCII.pl rename to lib/unicore/lib/ASCII.pl diff --git a/lib/unicore/Is/Alnum.pl b/lib/unicore/lib/Alnum.pl similarity index 100% rename from lib/unicore/Is/Alnum.pl rename to lib/unicore/lib/Alnum.pl diff --git a/lib/unicore/Is/Alpha.pl b/lib/unicore/lib/Alpha.pl similarity index 100% rename from lib/unicore/Is/Alpha.pl rename to lib/unicore/lib/Alpha.pl diff --git a/lib/unicore/Is/Alphabet.pl b/lib/unicore/lib/Alphabet.pl similarity index 100% rename from lib/unicore/Is/Alphabet.pl rename to lib/unicore/lib/Alphabet.pl diff --git a/lib/unicore/Is/Any.pl b/lib/unicore/lib/Any.pl similarity index 76% rename from lib/unicore/Is/Any.pl rename to lib/unicore/lib/Any.pl index 45a0636..8179ec7 100644 --- a/lib/unicore/Is/Any.pl +++ b/lib/unicore/lib/Any.pl @@ -4,8 +4,8 @@ # # This file supports: -# \p{Any} (and fuzzy permutations) -# \p{All} (and fuzzy permutations) +# \p{Any} +# \p{Any} # # Meaning: [\x{0000}-\x{10FFFF}] # diff --git a/lib/unicore/Is/Arabic.pl b/lib/unicore/lib/Arabic.pl similarity index 100% rename from lib/unicore/Is/Arabic.pl rename to lib/unicore/lib/Arabic.pl diff --git a/lib/unicore/Is/Armenian.pl b/lib/unicore/lib/Armenian.pl similarity index 100% rename from lib/unicore/Is/Armenian.pl rename to lib/unicore/lib/Armenian.pl diff --git a/lib/unicore/Is/AsciiHex.pl b/lib/unicore/lib/AsciiHex.pl similarity index 100% rename from lib/unicore/Is/AsciiHex.pl rename to lib/unicore/lib/AsciiHex.pl diff --git a/lib/unicore/Is/Assigned.pl b/lib/unicore/lib/Assigned.pl similarity index 99% rename from lib/unicore/Is/Assigned.pl rename to lib/unicore/lib/Assigned.pl index 3646421..e1ace2e 100644 --- a/lib/unicore/Is/Assigned.pl +++ b/lib/unicore/lib/Assigned.pl @@ -4,7 +4,7 @@ # # This file supports: -# \p{Assigned} (and fuzzy permutations) +# \p{Assigned} # # Meaning: All assigned code points # diff --git a/lib/unicore/Is/Bengali.pl b/lib/unicore/lib/Bengali.pl similarity index 100% rename from lib/unicore/Is/Bengali.pl rename to lib/unicore/lib/Bengali.pl diff --git a/lib/unicore/Is/BidiAL.pl b/lib/unicore/lib/BidiAL.pl similarity index 100% rename from lib/unicore/Is/BidiAL.pl rename to lib/unicore/lib/BidiAL.pl diff --git a/lib/unicore/Is/BidiAN.pl b/lib/unicore/lib/BidiAN.pl similarity index 100% rename from lib/unicore/Is/BidiAN.pl rename to lib/unicore/lib/BidiAN.pl diff --git a/lib/unicore/Is/BidiB.pl b/lib/unicore/lib/BidiB.pl similarity index 100% rename from lib/unicore/Is/BidiB.pl rename to lib/unicore/lib/BidiB.pl diff --git a/lib/unicore/Is/BidiBN.pl b/lib/unicore/lib/BidiBN.pl similarity index 100% rename from lib/unicore/Is/BidiBN.pl rename to lib/unicore/lib/BidiBN.pl diff --git a/lib/unicore/Is/BidiCS.pl b/lib/unicore/lib/BidiCS.pl similarity index 100% rename from lib/unicore/Is/BidiCS.pl rename to lib/unicore/lib/BidiCS.pl diff --git a/lib/unicore/Is/BidiCont.pl b/lib/unicore/lib/BidiCont.pl similarity index 100% rename from lib/unicore/Is/BidiCont.pl rename to lib/unicore/lib/BidiCont.pl diff --git a/lib/unicore/Is/BidiEN.pl b/lib/unicore/lib/BidiEN.pl similarity index 100% rename from lib/unicore/Is/BidiEN.pl rename to lib/unicore/lib/BidiEN.pl diff --git a/lib/unicore/Is/BidiES.pl b/lib/unicore/lib/BidiES.pl similarity index 100% rename from lib/unicore/Is/BidiES.pl rename to lib/unicore/lib/BidiES.pl diff --git a/lib/unicore/Is/BidiET.pl b/lib/unicore/lib/BidiET.pl similarity index 100% rename from lib/unicore/Is/BidiET.pl rename to lib/unicore/lib/BidiET.pl diff --git a/lib/unicore/Is/BidiL.pl b/lib/unicore/lib/BidiL.pl similarity index 100% rename from lib/unicore/Is/BidiL.pl rename to lib/unicore/lib/BidiL.pl diff --git a/lib/unicore/Is/BidiLRE.pl b/lib/unicore/lib/BidiLRE.pl similarity index 100% rename from lib/unicore/Is/BidiLRE.pl rename to lib/unicore/lib/BidiLRE.pl diff --git a/lib/unicore/Is/BidiLRO.pl b/lib/unicore/lib/BidiLRO.pl similarity index 100% rename from lib/unicore/Is/BidiLRO.pl rename to lib/unicore/lib/BidiLRO.pl diff --git a/lib/unicore/Is/BidiNSM.pl b/lib/unicore/lib/BidiNSM.pl similarity index 100% rename from lib/unicore/Is/BidiNSM.pl rename to lib/unicore/lib/BidiNSM.pl diff --git a/lib/unicore/Is/BidiON.pl b/lib/unicore/lib/BidiON.pl similarity index 100% rename from lib/unicore/Is/BidiON.pl rename to lib/unicore/lib/BidiON.pl diff --git a/lib/unicore/Is/BidiPDF.pl b/lib/unicore/lib/BidiPDF.pl similarity index 100% rename from lib/unicore/Is/BidiPDF.pl rename to lib/unicore/lib/BidiPDF.pl diff --git a/lib/unicore/Is/BidiR.pl b/lib/unicore/lib/BidiR.pl similarity index 100% rename from lib/unicore/Is/BidiR.pl rename to lib/unicore/lib/BidiR.pl diff --git a/lib/unicore/Is/BidiRLE.pl b/lib/unicore/lib/BidiRLE.pl similarity index 100% rename from lib/unicore/Is/BidiRLE.pl rename to lib/unicore/lib/BidiRLE.pl diff --git a/lib/unicore/Is/BidiRLO.pl b/lib/unicore/lib/BidiRLO.pl similarity index 100% rename from lib/unicore/Is/BidiRLO.pl rename to lib/unicore/lib/BidiRLO.pl diff --git a/lib/unicore/Is/BidiS.pl b/lib/unicore/lib/BidiS.pl similarity index 100% rename from lib/unicore/Is/BidiS.pl rename to lib/unicore/lib/BidiS.pl diff --git a/lib/unicore/Is/BidiWS.pl b/lib/unicore/lib/BidiWS.pl similarity index 100% rename from lib/unicore/Is/BidiWS.pl rename to lib/unicore/lib/BidiWS.pl diff --git a/lib/unicore/Is/Blank.pl b/lib/unicore/lib/Blank.pl similarity index 100% rename from lib/unicore/Is/Blank.pl rename to lib/unicore/lib/Blank.pl diff --git a/lib/unicore/Is/Bopomofo.pl b/lib/unicore/lib/Bopomofo.pl similarity index 100% rename from lib/unicore/Is/Bopomofo.pl rename to lib/unicore/lib/Bopomofo.pl diff --git a/lib/unicore/Is/C.pl b/lib/unicore/lib/C.pl similarity index 99% rename from lib/unicore/Is/C.pl rename to lib/unicore/lib/C.pl index f9f7420..c3a04e8 100644 --- a/lib/unicore/Is/C.pl +++ b/lib/unicore/lib/C.pl @@ -5,7 +5,7 @@ # # This file supports: # \p{C} -# \p{Other} (and fuzzy permutations) +# \p{C} (and fuzzy permutations) # # Meaning: Major Category 'C' # diff --git a/lib/unicore/Is/Canadian.pl b/lib/unicore/lib/Canadian.pl similarity index 100% rename from lib/unicore/Is/Canadian.pl rename to lib/unicore/lib/Canadian.pl diff --git a/lib/unicore/Is/Canon.pl b/lib/unicore/lib/Canon.pl similarity index 100% rename from lib/unicore/Is/Canon.pl rename to lib/unicore/lib/Canon.pl diff --git a/lib/unicore/Is/Cc.pl b/lib/unicore/lib/Cc.pl similarity index 86% rename from lib/unicore/Is/Cc.pl rename to lib/unicore/lib/Cc.pl index 0d654ab..4238ba8 100644 --- a/lib/unicore/Is/Cc.pl +++ b/lib/unicore/lib/Cc.pl @@ -5,7 +5,7 @@ # # This file supports: # \p{Cc} -# \p{Control} (and fuzzy permutations) +# \p{Cc} (and fuzzy permutations) # # Meaning: General Category 'Cc' # diff --git a/lib/unicore/Is/Cf.pl b/lib/unicore/lib/Cf.pl similarity index 89% rename from lib/unicore/Is/Cf.pl rename to lib/unicore/lib/Cf.pl index efc1336..9c05455 100644 --- a/lib/unicore/Is/Cf.pl +++ b/lib/unicore/lib/Cf.pl @@ -5,7 +5,7 @@ # # This file supports: # \p{Cf} -# \p{Format} (and fuzzy permutations) +# \p{Cf} (and fuzzy permutations) # # Meaning: General Category 'Cf' # diff --git a/lib/unicore/Is/Cherokee.pl b/lib/unicore/lib/Cherokee.pl similarity index 100% rename from lib/unicore/Is/Cherokee.pl rename to lib/unicore/lib/Cherokee.pl diff --git a/lib/unicore/Is/Cn.pl b/lib/unicore/lib/Cn.pl similarity index 98% rename from lib/unicore/Is/Cn.pl rename to lib/unicore/lib/Cn.pl index b2598e7..c666285 100644 --- a/lib/unicore/Is/Cn.pl +++ b/lib/unicore/lib/Cn.pl @@ -5,7 +5,7 @@ # # This file supports: # \p{Cn} -# \p{Unassigned} (and fuzzy permutations) +# \p{Cn} (and fuzzy permutations) # # Meaning: General Category 'Cn' [not functional in Perl] # diff --git a/lib/unicore/Is/Cntrl.pl b/lib/unicore/lib/Cntrl.pl similarity index 100% rename from lib/unicore/Is/Cntrl.pl rename to lib/unicore/lib/Cntrl.pl diff --git a/lib/unicore/Is/Co.pl b/lib/unicore/lib/Co.pl similarity index 86% rename from lib/unicore/Is/Co.pl rename to lib/unicore/lib/Co.pl index 46da434..bd792f3 100644 --- a/lib/unicore/Is/Co.pl +++ b/lib/unicore/lib/Co.pl @@ -5,7 +5,7 @@ # # This file supports: # \p{Co} -# \p{PrivateUse} (and fuzzy permutations) +# \p{Co} (and fuzzy permutations) # # Meaning: General Category 'Co' # diff --git a/lib/unicore/Is/Common.pl b/lib/unicore/lib/Common.pl similarity index 100% rename from lib/unicore/Is/Common.pl rename to lib/unicore/lib/Common.pl diff --git a/lib/unicore/Is/Compat.pl b/lib/unicore/lib/Compat.pl similarity index 100% rename from lib/unicore/Is/Compat.pl rename to lib/unicore/lib/Compat.pl diff --git a/lib/unicore/Is/Cs.pl b/lib/unicore/lib/Cs.pl similarity index 85% rename from lib/unicore/Is/Cs.pl rename to lib/unicore/lib/Cs.pl index a6a181f..5a1ea95 100644 --- a/lib/unicore/Is/Cs.pl +++ b/lib/unicore/lib/Cs.pl @@ -5,7 +5,7 @@ # # This file supports: # \p{Cs} -# \p{Surrogate} (and fuzzy permutations) +# \p{Cs} (and fuzzy permutations) # # Meaning: General Category 'Cs' # diff --git a/lib/unicore/Is/Cyrillic.pl b/lib/unicore/lib/Cyrillic.pl similarity index 100% rename from lib/unicore/Is/Cyrillic.pl rename to lib/unicore/lib/Cyrillic.pl diff --git a/lib/unicore/Is/DCcircle.pl b/lib/unicore/lib/DCcircle.pl similarity index 100% rename from lib/unicore/Is/DCcircle.pl rename to lib/unicore/lib/DCcircle.pl diff --git a/lib/unicore/Is/DCcompat.pl b/lib/unicore/lib/DCcompat.pl similarity index 100% rename from lib/unicore/Is/DCcompat.pl rename to lib/unicore/lib/DCcompat.pl diff --git a/lib/unicore/Is/DCfinal.pl b/lib/unicore/lib/DCfinal.pl similarity index 100% rename from lib/unicore/Is/DCfinal.pl rename to lib/unicore/lib/DCfinal.pl diff --git a/lib/unicore/Is/DCfont.pl b/lib/unicore/lib/DCfont.pl similarity index 100% rename from lib/unicore/Is/DCfont.pl rename to lib/unicore/lib/DCfont.pl diff --git a/lib/unicore/Is/DCfracti.pl b/lib/unicore/lib/DCfracti.pl similarity index 100% rename from lib/unicore/Is/DCfracti.pl rename to lib/unicore/lib/DCfracti.pl diff --git a/lib/unicore/Is/DCinitia.pl b/lib/unicore/lib/DCinitia.pl similarity index 100% rename from lib/unicore/Is/DCinitia.pl rename to lib/unicore/lib/DCinitia.pl diff --git a/lib/unicore/Is/DCisolat.pl b/lib/unicore/lib/DCisolat.pl similarity index 100% rename from lib/unicore/Is/DCisolat.pl rename to lib/unicore/lib/DCisolat.pl diff --git a/lib/unicore/Is/DCmedial.pl b/lib/unicore/lib/DCmedial.pl similarity index 100% rename from lib/unicore/Is/DCmedial.pl rename to lib/unicore/lib/DCmedial.pl diff --git a/lib/unicore/Is/DCnarrow.pl b/lib/unicore/lib/DCnarrow.pl similarity index 100% rename from lib/unicore/Is/DCnarrow.pl rename to lib/unicore/lib/DCnarrow.pl diff --git a/lib/unicore/Is/DCnoBrea.pl b/lib/unicore/lib/DCnoBrea.pl similarity index 100% rename from lib/unicore/Is/DCnoBrea.pl rename to lib/unicore/lib/DCnoBrea.pl diff --git a/lib/unicore/Is/DCsmall.pl b/lib/unicore/lib/DCsmall.pl similarity index 100% rename from lib/unicore/Is/DCsmall.pl rename to lib/unicore/lib/DCsmall.pl diff --git a/lib/unicore/Is/DCsquare.pl b/lib/unicore/lib/DCsquare.pl similarity index 100% rename from lib/unicore/Is/DCsquare.pl rename to lib/unicore/lib/DCsquare.pl diff --git a/lib/unicore/Is/DCsub.pl b/lib/unicore/lib/DCsub.pl similarity index 100% rename from lib/unicore/Is/DCsub.pl rename to lib/unicore/lib/DCsub.pl diff --git a/lib/unicore/Is/DCsuper.pl b/lib/unicore/lib/DCsuper.pl similarity index 100% rename from lib/unicore/Is/DCsuper.pl rename to lib/unicore/lib/DCsuper.pl diff --git a/lib/unicore/Is/DCvertic.pl b/lib/unicore/lib/DCvertic.pl similarity index 100% rename from lib/unicore/Is/DCvertic.pl rename to lib/unicore/lib/DCvertic.pl diff --git a/lib/unicore/Is/DCwide.pl b/lib/unicore/lib/DCwide.pl similarity index 100% rename from lib/unicore/Is/DCwide.pl rename to lib/unicore/lib/DCwide.pl diff --git a/lib/unicore/Is/Dash.pl b/lib/unicore/lib/Dash.pl similarity index 100% rename from lib/unicore/Is/Dash.pl rename to lib/unicore/lib/Dash.pl diff --git a/lib/unicore/Is/Deseret.pl b/lib/unicore/lib/Deseret.pl similarity index 100% rename from lib/unicore/Is/Deseret.pl rename to lib/unicore/lib/Deseret.pl diff --git a/lib/unicore/Is/Devanaga.pl b/lib/unicore/lib/Devanaga.pl similarity index 100% rename from lib/unicore/Is/Devanaga.pl rename to lib/unicore/lib/Devanaga.pl diff --git a/lib/unicore/Is/Diacriti.pl b/lib/unicore/lib/Diacriti.pl similarity index 100% rename from lib/unicore/Is/Diacriti.pl rename to lib/unicore/lib/Diacriti.pl diff --git a/lib/unicore/Is/Digit.pl b/lib/unicore/lib/Digit.pl similarity index 100% rename from lib/unicore/Is/Digit.pl rename to lib/unicore/lib/Digit.pl diff --git a/lib/unicore/Is/Ethiopic.pl b/lib/unicore/lib/Ethiopic.pl similarity index 100% rename from lib/unicore/Is/Ethiopic.pl rename to lib/unicore/lib/Ethiopic.pl diff --git a/lib/unicore/Is/Extender.pl b/lib/unicore/lib/Extender.pl similarity index 100% rename from lib/unicore/Is/Extender.pl rename to lib/unicore/lib/Extender.pl diff --git a/lib/unicore/Is/Georgian.pl b/lib/unicore/lib/Georgian.pl similarity index 100% rename from lib/unicore/Is/Georgian.pl rename to lib/unicore/lib/Georgian.pl diff --git a/lib/unicore/Is/Gothic.pl b/lib/unicore/lib/Gothic.pl similarity index 100% rename from lib/unicore/Is/Gothic.pl rename to lib/unicore/lib/Gothic.pl diff --git a/lib/unicore/Is/Graph.pl b/lib/unicore/lib/Graph.pl similarity index 100% rename from lib/unicore/Is/Graph.pl rename to lib/unicore/lib/Graph.pl diff --git a/lib/unicore/Is/Greek.pl b/lib/unicore/lib/Greek.pl similarity index 100% rename from lib/unicore/Is/Greek.pl rename to lib/unicore/lib/Greek.pl diff --git a/lib/unicore/Is/Gujarati.pl b/lib/unicore/lib/Gujarati.pl similarity index 100% rename from lib/unicore/Is/Gujarati.pl rename to lib/unicore/lib/Gujarati.pl diff --git a/lib/unicore/Is/Gurmukhi.pl b/lib/unicore/lib/Gurmukhi.pl similarity index 100% rename from lib/unicore/Is/Gurmukhi.pl rename to lib/unicore/lib/Gurmukhi.pl diff --git a/lib/unicore/Is/Han.pl b/lib/unicore/lib/Han.pl similarity index 100% rename from lib/unicore/Is/Han.pl rename to lib/unicore/lib/Han.pl diff --git a/lib/unicore/Is/Hangul.pl b/lib/unicore/lib/Hangul.pl similarity index 100% rename from lib/unicore/Is/Hangul.pl rename to lib/unicore/lib/Hangul.pl diff --git a/lib/unicore/Is/Hebrew.pl b/lib/unicore/lib/Hebrew.pl similarity index 100% rename from lib/unicore/Is/Hebrew.pl rename to lib/unicore/lib/Hebrew.pl diff --git a/lib/unicore/Is/HexDigit.pl b/lib/unicore/lib/HexDigit.pl similarity index 100% rename from lib/unicore/Is/HexDigit.pl rename to lib/unicore/lib/HexDigit.pl diff --git a/lib/unicore/Is/Hiragana.pl b/lib/unicore/lib/Hiragana.pl similarity index 100% rename from lib/unicore/Is/Hiragana.pl rename to lib/unicore/lib/Hiragana.pl diff --git a/lib/unicore/Is/Hyphen.pl b/lib/unicore/lib/Hyphen.pl similarity index 100% rename from lib/unicore/Is/Hyphen.pl rename to lib/unicore/lib/Hyphen.pl diff --git a/lib/unicore/Is/IdContin.pl b/lib/unicore/lib/IdContin.pl similarity index 100% rename from lib/unicore/Is/IdContin.pl rename to lib/unicore/lib/IdContin.pl diff --git a/lib/unicore/Is/IdStart.pl b/lib/unicore/lib/IdStart.pl similarity index 100% rename from lib/unicore/Is/IdStart.pl rename to lib/unicore/lib/IdStart.pl diff --git a/lib/unicore/Is/Ideograp.pl b/lib/unicore/lib/Ideograp.pl similarity index 100% rename from lib/unicore/Is/Ideograp.pl rename to lib/unicore/lib/Ideograp.pl diff --git a/lib/unicore/In/Alphabet.pl b/lib/unicore/lib/InAlphab.pl similarity index 100% rename from lib/unicore/In/Alphabet.pl rename to lib/unicore/lib/InAlphab.pl diff --git a/lib/unicore/In/ArabicPr.pl b/lib/unicore/lib/InArabi2.pl similarity index 100% rename from lib/unicore/In/ArabicPr.pl rename to lib/unicore/lib/InArabi2.pl diff --git a/lib/unicore/In/ArabicP2.pl b/lib/unicore/lib/InArabi3.pl similarity index 100% rename from lib/unicore/In/ArabicP2.pl rename to lib/unicore/lib/InArabi3.pl diff --git a/lib/unicore/In/Arabic.pl b/lib/unicore/lib/InArabic.pl similarity index 100% rename from lib/unicore/In/Arabic.pl rename to lib/unicore/lib/InArabic.pl diff --git a/lib/unicore/In/Armenian.pl b/lib/unicore/lib/InArmeni.pl similarity index 100% rename from lib/unicore/In/Armenian.pl rename to lib/unicore/lib/InArmeni.pl diff --git a/lib/unicore/In/Arrows.pl b/lib/unicore/lib/InArrows.pl similarity index 100% rename from lib/unicore/In/Arrows.pl rename to lib/unicore/lib/InArrows.pl diff --git a/lib/unicore/In/BasicLat.pl b/lib/unicore/lib/InBasicL.pl similarity index 100% rename from lib/unicore/In/BasicLat.pl rename to lib/unicore/lib/InBasicL.pl diff --git a/lib/unicore/In/Bengali.pl b/lib/unicore/lib/InBengal.pl similarity index 100% rename from lib/unicore/In/Bengali.pl rename to lib/unicore/lib/InBengal.pl diff --git a/lib/unicore/In/BlockEle.pl b/lib/unicore/lib/InBlockE.pl similarity index 100% rename from lib/unicore/In/BlockEle.pl rename to lib/unicore/lib/InBlockE.pl diff --git a/lib/unicore/In/Bopomof2.pl b/lib/unicore/lib/InBopom2.pl similarity index 100% rename from lib/unicore/In/Bopomof2.pl rename to lib/unicore/lib/InBopom2.pl diff --git a/lib/unicore/In/Bopomofo.pl b/lib/unicore/lib/InBopomo.pl similarity index 100% rename from lib/unicore/In/Bopomofo.pl rename to lib/unicore/lib/InBopomo.pl diff --git a/lib/unicore/In/BoxDrawi.pl b/lib/unicore/lib/InBoxDra.pl similarity index 100% rename from lib/unicore/In/BoxDrawi.pl rename to lib/unicore/lib/InBoxDra.pl diff --git a/lib/unicore/In/BrailleP.pl b/lib/unicore/lib/InBraill.pl similarity index 100% rename from lib/unicore/In/BrailleP.pl rename to lib/unicore/lib/InBraill.pl diff --git a/lib/unicore/In/Byzantin.pl b/lib/unicore/lib/InByzant.pl similarity index 100% rename from lib/unicore/In/Byzantin.pl rename to lib/unicore/lib/InByzant.pl diff --git a/lib/unicore/In/Cherokee.pl b/lib/unicore/lib/InCherok.pl similarity index 100% rename from lib/unicore/In/Cherokee.pl rename to lib/unicore/lib/InCherok.pl diff --git a/lib/unicore/In/CjkComp2.pl b/lib/unicore/lib/InCjkCo2.pl similarity index 100% rename from lib/unicore/In/CjkComp2.pl rename to lib/unicore/lib/InCjkCo2.pl diff --git a/lib/unicore/In/CjkComp3.pl b/lib/unicore/lib/InCjkCo3.pl similarity index 100% rename from lib/unicore/In/CjkComp3.pl rename to lib/unicore/lib/InCjkCo3.pl diff --git a/lib/unicore/In/CjkComp4.pl b/lib/unicore/lib/InCjkCo4.pl similarity index 100% rename from lib/unicore/In/CjkComp4.pl rename to lib/unicore/lib/InCjkCo4.pl diff --git a/lib/unicore/In/CjkCompa.pl b/lib/unicore/lib/InCjkCom.pl similarity index 100% rename from lib/unicore/In/CjkCompa.pl rename to lib/unicore/lib/InCjkCom.pl diff --git a/lib/unicore/In/CjkRadic.pl b/lib/unicore/lib/InCjkRad.pl similarity index 100% rename from lib/unicore/In/CjkRadic.pl rename to lib/unicore/lib/InCjkRad.pl diff --git a/lib/unicore/In/CjkSymbo.pl b/lib/unicore/lib/InCjkSym.pl similarity index 100% rename from lib/unicore/In/CjkSymbo.pl rename to lib/unicore/lib/InCjkSym.pl diff --git a/lib/unicore/In/CjkUnif3.pl b/lib/unicore/lib/InCjkUn2.pl similarity index 100% rename from lib/unicore/In/CjkUnif3.pl rename to lib/unicore/lib/InCjkUn2.pl diff --git a/lib/unicore/In/CjkUnif2.pl b/lib/unicore/lib/InCjkUn3.pl similarity index 100% rename from lib/unicore/In/CjkUnif2.pl rename to lib/unicore/lib/InCjkUn3.pl diff --git a/lib/unicore/In/CjkUnifi.pl b/lib/unicore/lib/InCjkUni.pl similarity index 100% rename from lib/unicore/In/CjkUnifi.pl rename to lib/unicore/lib/InCjkUni.pl diff --git a/lib/unicore/In/Combini3.pl b/lib/unicore/lib/InCombi2.pl similarity index 100% rename from lib/unicore/In/Combini3.pl rename to lib/unicore/lib/InCombi2.pl diff --git a/lib/unicore/In/Combini2.pl b/lib/unicore/lib/InCombi3.pl similarity index 100% rename from lib/unicore/In/Combini2.pl rename to lib/unicore/lib/InCombi3.pl diff --git a/lib/unicore/In/Combinin.pl b/lib/unicore/lib/InCombin.pl similarity index 100% rename from lib/unicore/In/Combinin.pl rename to lib/unicore/lib/InCombin.pl diff --git a/lib/unicore/In/ControlP.pl b/lib/unicore/lib/InContro.pl similarity index 100% rename from lib/unicore/In/ControlP.pl rename to lib/unicore/lib/InContro.pl diff --git a/lib/unicore/In/Currency.pl b/lib/unicore/lib/InCurren.pl similarity index 100% rename from lib/unicore/In/Currency.pl rename to lib/unicore/lib/InCurren.pl diff --git a/lib/unicore/In/Cyrillic.pl b/lib/unicore/lib/InCyrill.pl similarity index 100% rename from lib/unicore/In/Cyrillic.pl rename to lib/unicore/lib/InCyrill.pl diff --git a/lib/unicore/In/Deseret.pl b/lib/unicore/lib/InDesere.pl similarity index 100% rename from lib/unicore/In/Deseret.pl rename to lib/unicore/lib/InDesere.pl diff --git a/lib/unicore/In/Devanaga.pl b/lib/unicore/lib/InDevana.pl similarity index 100% rename from lib/unicore/In/Devanaga.pl rename to lib/unicore/lib/InDevana.pl diff --git a/lib/unicore/In/Dingbats.pl b/lib/unicore/lib/InDingba.pl similarity index 100% rename from lib/unicore/In/Dingbats.pl rename to lib/unicore/lib/InDingba.pl diff --git a/lib/unicore/In/Enclose2.pl b/lib/unicore/lib/InEnclo2.pl similarity index 100% rename from lib/unicore/In/Enclose2.pl rename to lib/unicore/lib/InEnclo2.pl diff --git a/lib/unicore/In/Enclosed.pl b/lib/unicore/lib/InEnclos.pl similarity index 100% rename from lib/unicore/In/Enclosed.pl rename to lib/unicore/lib/InEnclos.pl diff --git a/lib/unicore/In/Ethiopic.pl b/lib/unicore/lib/InEthiop.pl similarity index 100% rename from lib/unicore/In/Ethiopic.pl rename to lib/unicore/lib/InEthiop.pl diff --git a/lib/unicore/In/GeneralP.pl b/lib/unicore/lib/InGenera.pl similarity index 100% rename from lib/unicore/In/GeneralP.pl rename to lib/unicore/lib/InGenera.pl diff --git a/lib/unicore/In/Geometri.pl b/lib/unicore/lib/InGeomet.pl similarity index 100% rename from lib/unicore/In/Geometri.pl rename to lib/unicore/lib/InGeomet.pl diff --git a/lib/unicore/In/Georgian.pl b/lib/unicore/lib/InGeorgi.pl similarity index 100% rename from lib/unicore/In/Georgian.pl rename to lib/unicore/lib/InGeorgi.pl diff --git a/lib/unicore/In/Gothic.pl b/lib/unicore/lib/InGothic.pl similarity index 100% rename from lib/unicore/In/Gothic.pl rename to lib/unicore/lib/InGothic.pl diff --git a/lib/unicore/In/Greek.pl b/lib/unicore/lib/InGreek.pl similarity index 100% rename from lib/unicore/In/Greek.pl rename to lib/unicore/lib/InGreek.pl diff --git a/lib/unicore/In/GreekExt.pl b/lib/unicore/lib/InGreekE.pl similarity index 100% rename from lib/unicore/In/GreekExt.pl rename to lib/unicore/lib/InGreekE.pl diff --git a/lib/unicore/In/Gujarati.pl b/lib/unicore/lib/InGujara.pl similarity index 100% rename from lib/unicore/In/Gujarati.pl rename to lib/unicore/lib/InGujara.pl diff --git a/lib/unicore/In/Gurmukhi.pl b/lib/unicore/lib/InGurmuk.pl similarity index 100% rename from lib/unicore/In/Gurmukhi.pl rename to lib/unicore/lib/InGurmuk.pl diff --git a/lib/unicore/In/Halfwidt.pl b/lib/unicore/lib/InHalfwi.pl similarity index 100% rename from lib/unicore/In/Halfwidt.pl rename to lib/unicore/lib/InHalfwi.pl diff --git a/lib/unicore/In/HangulSy.pl b/lib/unicore/lib/InHangu2.pl similarity index 100% rename from lib/unicore/In/HangulSy.pl rename to lib/unicore/lib/InHangu2.pl diff --git a/lib/unicore/In/HangulCo.pl b/lib/unicore/lib/InHangu3.pl similarity index 100% rename from lib/unicore/In/HangulCo.pl rename to lib/unicore/lib/InHangu3.pl diff --git a/lib/unicore/In/HangulJa.pl b/lib/unicore/lib/InHangul.pl similarity index 100% rename from lib/unicore/In/HangulJa.pl rename to lib/unicore/lib/InHangul.pl diff --git a/lib/unicore/In/Hebrew.pl b/lib/unicore/lib/InHebrew.pl similarity index 100% rename from lib/unicore/In/Hebrew.pl rename to lib/unicore/lib/InHebrew.pl diff --git a/lib/unicore/In/HighPriv.pl b/lib/unicore/lib/InHighPr.pl similarity index 100% rename from lib/unicore/In/HighPriv.pl rename to lib/unicore/lib/InHighPr.pl diff --git a/lib/unicore/In/HighSurr.pl b/lib/unicore/lib/InHighSu.pl similarity index 100% rename from lib/unicore/In/HighSurr.pl rename to lib/unicore/lib/InHighSu.pl diff --git a/lib/unicore/In/Hiragana.pl b/lib/unicore/lib/InHiraga.pl similarity index 100% rename from lib/unicore/In/Hiragana.pl rename to lib/unicore/lib/InHiraga.pl diff --git a/lib/unicore/In/Ideograp.pl b/lib/unicore/lib/InIdeogr.pl similarity index 100% rename from lib/unicore/In/Ideograp.pl rename to lib/unicore/lib/InIdeogr.pl diff --git a/lib/unicore/In/IpaExten.pl b/lib/unicore/lib/InIpaExt.pl similarity index 100% rename from lib/unicore/In/IpaExten.pl rename to lib/unicore/lib/InIpaExt.pl diff --git a/lib/unicore/In/Kanbun.pl b/lib/unicore/lib/InKanbun.pl similarity index 100% rename from lib/unicore/In/Kanbun.pl rename to lib/unicore/lib/InKanbun.pl diff --git a/lib/unicore/In/KangxiRa.pl b/lib/unicore/lib/InKangxi.pl similarity index 100% rename from lib/unicore/In/KangxiRa.pl rename to lib/unicore/lib/InKangxi.pl diff --git a/lib/unicore/In/Kannada.pl b/lib/unicore/lib/InKannad.pl similarity index 100% rename from lib/unicore/In/Kannada.pl rename to lib/unicore/lib/InKannad.pl diff --git a/lib/unicore/In/Katakana.pl b/lib/unicore/lib/InKataka.pl similarity index 100% rename from lib/unicore/In/Katakana.pl rename to lib/unicore/lib/InKataka.pl diff --git a/lib/unicore/In/Khmer.pl b/lib/unicore/lib/InKhmer.pl similarity index 100% rename from lib/unicore/In/Khmer.pl rename to lib/unicore/lib/InKhmer.pl diff --git a/lib/unicore/In/Lao.pl b/lib/unicore/lib/InLao.pl similarity index 100% rename from lib/unicore/In/Lao.pl rename to lib/unicore/lib/InLao.pl diff --git a/lib/unicore/In/Latin1Su.pl b/lib/unicore/lib/InLatin1.pl similarity index 100% rename from lib/unicore/In/Latin1Su.pl rename to lib/unicore/lib/InLatin1.pl diff --git a/lib/unicore/In/LatinEx2.pl b/lib/unicore/lib/InLatin2.pl similarity index 100% rename from lib/unicore/In/LatinEx2.pl rename to lib/unicore/lib/InLatin2.pl diff --git a/lib/unicore/In/LatinEx3.pl b/lib/unicore/lib/InLatin3.pl similarity index 100% rename from lib/unicore/In/LatinEx3.pl rename to lib/unicore/lib/InLatin3.pl diff --git a/lib/unicore/In/LatinExt.pl b/lib/unicore/lib/InLatinE.pl similarity index 100% rename from lib/unicore/In/LatinExt.pl rename to lib/unicore/lib/InLatinE.pl diff --git a/lib/unicore/In/Letterli.pl b/lib/unicore/lib/InLetter.pl similarity index 100% rename from lib/unicore/In/Letterli.pl rename to lib/unicore/lib/InLetter.pl diff --git a/lib/unicore/In/LowSurro.pl b/lib/unicore/lib/InLowSur.pl similarity index 100% rename from lib/unicore/In/LowSurro.pl rename to lib/unicore/lib/InLowSur.pl diff --git a/lib/unicore/In/Malayala.pl b/lib/unicore/lib/InMalaya.pl similarity index 100% rename from lib/unicore/In/Malayala.pl rename to lib/unicore/lib/InMalaya.pl diff --git a/lib/unicore/In/Mathema2.pl b/lib/unicore/lib/InMathe2.pl similarity index 100% rename from lib/unicore/In/Mathema2.pl rename to lib/unicore/lib/InMathe2.pl diff --git a/lib/unicore/In/Mathemat.pl b/lib/unicore/lib/InMathem.pl similarity index 100% rename from lib/unicore/In/Mathemat.pl rename to lib/unicore/lib/InMathem.pl diff --git a/lib/unicore/In/Miscell2.pl b/lib/unicore/lib/InMisce2.pl similarity index 100% rename from lib/unicore/In/Miscell2.pl rename to lib/unicore/lib/InMisce2.pl diff --git a/lib/unicore/In/Miscella.pl b/lib/unicore/lib/InMiscel.pl similarity index 100% rename from lib/unicore/In/Miscella.pl rename to lib/unicore/lib/InMiscel.pl diff --git a/lib/unicore/In/Mongolia.pl b/lib/unicore/lib/InMongol.pl similarity index 100% rename from lib/unicore/In/Mongolia.pl rename to lib/unicore/lib/InMongol.pl diff --git a/lib/unicore/In/MusicalS.pl b/lib/unicore/lib/InMusica.pl similarity index 100% rename from lib/unicore/In/MusicalS.pl rename to lib/unicore/lib/InMusica.pl diff --git a/lib/unicore/In/Myanmar.pl b/lib/unicore/lib/InMyanma.pl similarity index 100% rename from lib/unicore/In/Myanmar.pl rename to lib/unicore/lib/InMyanma.pl diff --git a/lib/unicore/In/NumberFo.pl b/lib/unicore/lib/InNumber.pl similarity index 100% rename from lib/unicore/In/NumberFo.pl rename to lib/unicore/lib/InNumber.pl diff --git a/lib/unicore/In/Ogham.pl b/lib/unicore/lib/InOgham.pl similarity index 100% rename from lib/unicore/In/Ogham.pl rename to lib/unicore/lib/InOgham.pl diff --git a/lib/unicore/In/OldItali.pl b/lib/unicore/lib/InOldIta.pl similarity index 100% rename from lib/unicore/In/OldItali.pl rename to lib/unicore/lib/InOldIta.pl diff --git a/lib/unicore/In/OpticalC.pl b/lib/unicore/lib/InOptica.pl similarity index 100% rename from lib/unicore/In/OpticalC.pl rename to lib/unicore/lib/InOptica.pl diff --git a/lib/unicore/In/Oriya.pl b/lib/unicore/lib/InOriya.pl similarity index 100% rename from lib/unicore/In/Oriya.pl rename to lib/unicore/lib/InOriya.pl diff --git a/lib/unicore/In/PrivateU.pl b/lib/unicore/lib/InPrivat.pl similarity index 100% rename from lib/unicore/In/PrivateU.pl rename to lib/unicore/lib/InPrivat.pl diff --git a/lib/unicore/In/Runic.pl b/lib/unicore/lib/InRunic.pl similarity index 100% rename from lib/unicore/In/Runic.pl rename to lib/unicore/lib/InRunic.pl diff --git a/lib/unicore/In/Sinhala.pl b/lib/unicore/lib/InSinhal.pl similarity index 100% rename from lib/unicore/In/Sinhala.pl rename to lib/unicore/lib/InSinhal.pl diff --git a/lib/unicore/In/SmallFor.pl b/lib/unicore/lib/InSmallF.pl similarity index 100% rename from lib/unicore/In/SmallFor.pl rename to lib/unicore/lib/InSmallF.pl diff --git a/lib/unicore/In/SpacingM.pl b/lib/unicore/lib/InSpacin.pl similarity index 100% rename from lib/unicore/In/SpacingM.pl rename to lib/unicore/lib/InSpacin.pl diff --git a/lib/unicore/In/Specials.pl b/lib/unicore/lib/InSpecia.pl similarity index 100% rename from lib/unicore/In/Specials.pl rename to lib/unicore/lib/InSpecia.pl diff --git a/lib/unicore/In/Superscr.pl b/lib/unicore/lib/InSupers.pl similarity index 100% rename from lib/unicore/In/Superscr.pl rename to lib/unicore/lib/InSupers.pl diff --git a/lib/unicore/In/Syriac.pl b/lib/unicore/lib/InSyriac.pl similarity index 100% rename from lib/unicore/In/Syriac.pl rename to lib/unicore/lib/InSyriac.pl diff --git a/lib/unicore/In/Tags.pl b/lib/unicore/lib/InTags.pl similarity index 100% rename from lib/unicore/In/Tags.pl rename to lib/unicore/lib/InTags.pl diff --git a/lib/unicore/In/Tamil.pl b/lib/unicore/lib/InTamil.pl similarity index 100% rename from lib/unicore/In/Tamil.pl rename to lib/unicore/lib/InTamil.pl diff --git a/lib/unicore/In/Telugu.pl b/lib/unicore/lib/InTelugu.pl similarity index 100% rename from lib/unicore/In/Telugu.pl rename to lib/unicore/lib/InTelugu.pl diff --git a/lib/unicore/In/Thaana.pl b/lib/unicore/lib/InThaana.pl similarity index 100% rename from lib/unicore/In/Thaana.pl rename to lib/unicore/lib/InThaana.pl diff --git a/lib/unicore/In/Thai.pl b/lib/unicore/lib/InThai.pl similarity index 100% rename from lib/unicore/In/Thai.pl rename to lib/unicore/lib/InThai.pl diff --git a/lib/unicore/In/Tibetan.pl b/lib/unicore/lib/InTibeta.pl similarity index 100% rename from lib/unicore/In/Tibetan.pl rename to lib/unicore/lib/InTibeta.pl diff --git a/lib/unicore/In/UnifiedC.pl b/lib/unicore/lib/InUnifie.pl similarity index 100% rename from lib/unicore/In/UnifiedC.pl rename to lib/unicore/lib/InUnifie.pl diff --git a/lib/unicore/In/YiRadica.pl b/lib/unicore/lib/InYiRadi.pl similarity index 100% rename from lib/unicore/In/YiRadica.pl rename to lib/unicore/lib/InYiRadi.pl diff --git a/lib/unicore/In/YiSyllab.pl b/lib/unicore/lib/InYiSyll.pl similarity index 100% rename from lib/unicore/In/YiSyllab.pl rename to lib/unicore/lib/InYiSyll.pl diff --git a/lib/unicore/Is/Inherite.pl b/lib/unicore/lib/Inherite.pl similarity index 100% rename from lib/unicore/Is/Inherite.pl rename to lib/unicore/lib/Inherite.pl diff --git a/lib/unicore/Is/JoinCont.pl b/lib/unicore/lib/JoinCont.pl similarity index 100% rename from lib/unicore/Is/JoinCont.pl rename to lib/unicore/lib/JoinCont.pl diff --git a/lib/unicore/Is/Kannada.pl b/lib/unicore/lib/Kannada.pl similarity index 100% rename from lib/unicore/Is/Kannada.pl rename to lib/unicore/lib/Kannada.pl diff --git a/lib/unicore/Is/Katakana.pl b/lib/unicore/lib/Katakana.pl similarity index 100% rename from lib/unicore/Is/Katakana.pl rename to lib/unicore/lib/Katakana.pl diff --git a/lib/unicore/Is/Khmer.pl b/lib/unicore/lib/Khmer.pl similarity index 100% rename from lib/unicore/Is/Khmer.pl rename to lib/unicore/lib/Khmer.pl diff --git a/lib/unicore/Is/L.pl b/lib/unicore/lib/L.pl similarity index 98% rename from lib/unicore/Is/L.pl rename to lib/unicore/lib/L.pl index 663fd1e..b3f6df3 100644 --- a/lib/unicore/Is/L.pl +++ b/lib/unicore/lib/L.pl @@ -5,7 +5,7 @@ # # This file supports: # \p{L} -# \p{Letter} (and fuzzy permutations) +# \p{L} (and fuzzy permutations) # # Meaning: Major Category 'L' # diff --git a/lib/unicore/Is/L_.pl b/lib/unicore/lib/L_.pl similarity index 100% rename from lib/unicore/Is/L_.pl rename to lib/unicore/lib/L_.pl diff --git a/lib/unicore/Is/Lao.pl b/lib/unicore/lib/Lao.pl similarity index 100% rename from lib/unicore/Is/Lao.pl rename to lib/unicore/lib/Lao.pl diff --git a/lib/unicore/Is/Latin.pl b/lib/unicore/lib/Latin.pl similarity index 100% rename from lib/unicore/Is/Latin.pl rename to lib/unicore/lib/Latin.pl diff --git a/lib/unicore/Is/Ll.pl b/lib/unicore/lib/Ll.pl similarity index 98% rename from lib/unicore/Is/Ll.pl rename to lib/unicore/lib/Ll.pl index 1cecfe7..573c56a 100644 --- a/lib/unicore/Is/Ll.pl +++ b/lib/unicore/lib/Ll.pl @@ -5,7 +5,7 @@ # # This file supports: # \p{Ll} -# \p{LowercaseLetter} (and fuzzy permutations) +# \p{Ll} (and fuzzy permutations) # # Meaning: General Category 'Ll' # diff --git a/lib/unicore/Is/Lm.pl b/lib/unicore/lib/Lm.pl similarity index 89% rename from lib/unicore/Is/Lm.pl rename to lib/unicore/lib/Lm.pl index 7cbb55e..2dfd4f3 100644 --- a/lib/unicore/Is/Lm.pl +++ b/lib/unicore/lib/Lm.pl @@ -5,7 +5,7 @@ # # This file supports: # \p{Lm} -# \p{ModifierLetter} (and fuzzy permutations) +# \p{Lm} (and fuzzy permutations) # # Meaning: General Category 'Lm' # diff --git a/lib/unicore/Is/Lo.pl b/lib/unicore/lib/Lo.pl similarity index 98% rename from lib/unicore/Is/Lo.pl rename to lib/unicore/lib/Lo.pl index b0b46e6..7113103 100644 --- a/lib/unicore/Is/Lo.pl +++ b/lib/unicore/lib/Lo.pl @@ -5,7 +5,7 @@ # # This file supports: # \p{Lo} -# \p{OtherLetter} (and fuzzy permutations) +# \p{Lo} (and fuzzy permutations) # # Meaning: General Category 'Lo' # diff --git a/lib/unicore/Is/Lower.pl b/lib/unicore/lib/Lower.pl similarity index 100% rename from lib/unicore/Is/Lower.pl rename to lib/unicore/lib/Lower.pl diff --git a/lib/unicore/Is/Lowercas.pl b/lib/unicore/lib/Lowercas.pl similarity index 100% rename from lib/unicore/Is/Lowercas.pl rename to lib/unicore/lib/Lowercas.pl diff --git a/lib/unicore/Is/Lt.pl b/lib/unicore/lib/Lt.pl similarity index 86% rename from lib/unicore/Is/Lt.pl rename to lib/unicore/lib/Lt.pl index ed8af13..b758932 100644 --- a/lib/unicore/Is/Lt.pl +++ b/lib/unicore/lib/Lt.pl @@ -5,7 +5,7 @@ # # This file supports: # \p{Lt} -# \p{TitlecaseLetter} (and fuzzy permutations) +# \p{Lt} (and fuzzy permutations) # # Meaning: General Category 'Lt' # diff --git a/lib/unicore/Is/Lu.pl b/lib/unicore/lib/Lu.pl similarity index 98% rename from lib/unicore/Is/Lu.pl rename to lib/unicore/lib/Lu.pl index 7ded16b..2923b3b 100644 --- a/lib/unicore/Is/Lu.pl +++ b/lib/unicore/lib/Lu.pl @@ -5,7 +5,7 @@ # # This file supports: # \p{Lu} -# \p{UppercaseLetter} (and fuzzy permutations) +# \p{Lu} (and fuzzy permutations) # # Meaning: General Category 'Lu' # diff --git a/lib/unicore/Is/M.pl b/lib/unicore/lib/M.pl similarity index 97% rename from lib/unicore/Is/M.pl rename to lib/unicore/lib/M.pl index a4cd30a..0274aed 100644 --- a/lib/unicore/Is/M.pl +++ b/lib/unicore/lib/M.pl @@ -5,7 +5,7 @@ # # This file supports: # \p{M} -# \p{Mark} (and fuzzy permutations) +# \p{M} (and fuzzy permutations) # # Meaning: Major Category 'M' # diff --git a/lib/unicore/Is/Malayala.pl b/lib/unicore/lib/Malayala.pl similarity index 100% rename from lib/unicore/Is/Malayala.pl rename to lib/unicore/lib/Malayala.pl diff --git a/lib/unicore/Is/Math.pl b/lib/unicore/lib/Math.pl similarity index 100% rename from lib/unicore/Is/Math.pl rename to lib/unicore/lib/Math.pl diff --git a/lib/unicore/Is/Mc.pl b/lib/unicore/lib/Mc.pl similarity index 94% rename from lib/unicore/Is/Mc.pl rename to lib/unicore/lib/Mc.pl index 5cb32b8..70181af 100644 --- a/lib/unicore/Is/Mc.pl +++ b/lib/unicore/lib/Mc.pl @@ -5,7 +5,7 @@ # # This file supports: # \p{Mc} -# \p{SpacingMark} (and fuzzy permutations) +# \p{Mc} (and fuzzy permutations) # # Meaning: General Category 'Mc' # diff --git a/lib/unicore/Is/Me.pl b/lib/unicore/lib/Me.pl similarity index 85% rename from lib/unicore/Is/Me.pl rename to lib/unicore/lib/Me.pl index 3afdefd..03ba369 100644 --- a/lib/unicore/Is/Me.pl +++ b/lib/unicore/lib/Me.pl @@ -5,7 +5,7 @@ # # This file supports: # \p{Me} -# \p{EnclosingMark} (and fuzzy permutations) +# \p{Me} (and fuzzy permutations) # # Meaning: General Category 'Me' # diff --git a/lib/unicore/Is/Mirrored.pl b/lib/unicore/lib/Mirrored.pl similarity index 100% rename from lib/unicore/Is/Mirrored.pl rename to lib/unicore/lib/Mirrored.pl diff --git a/lib/unicore/Is/Mn.pl b/lib/unicore/lib/Mn.pl similarity index 96% rename from lib/unicore/Is/Mn.pl rename to lib/unicore/lib/Mn.pl index c86c640..c707ec2 100644 --- a/lib/unicore/Is/Mn.pl +++ b/lib/unicore/lib/Mn.pl @@ -5,7 +5,7 @@ # # This file supports: # \p{Mn} -# \p{NonSpacingMark} (and fuzzy permutations) +# \p{Mn} (and fuzzy permutations) # # Meaning: General Category 'Mn' # diff --git a/lib/unicore/Is/Mongolia.pl b/lib/unicore/lib/Mongolia.pl similarity index 100% rename from lib/unicore/Is/Mongolia.pl rename to lib/unicore/lib/Mongolia.pl diff --git a/lib/unicore/Is/Myanmar.pl b/lib/unicore/lib/Myanmar.pl similarity index 100% rename from lib/unicore/Is/Myanmar.pl rename to lib/unicore/lib/Myanmar.pl diff --git a/lib/unicore/Is/N.pl b/lib/unicore/lib/N.pl similarity index 94% rename from lib/unicore/Is/N.pl rename to lib/unicore/lib/N.pl index 57b4170..07d2da4 100644 --- a/lib/unicore/Is/N.pl +++ b/lib/unicore/lib/N.pl @@ -5,7 +5,7 @@ # # This file supports: # \p{N} -# \p{Number} (and fuzzy permutations) +# \p{N} (and fuzzy permutations) # # Meaning: Major Category 'N' # diff --git a/lib/unicore/Is/Nd.pl b/lib/unicore/lib/Nd.pl similarity index 91% rename from lib/unicore/Is/Nd.pl rename to lib/unicore/lib/Nd.pl index c6fd133..d51cb07 100644 --- a/lib/unicore/Is/Nd.pl +++ b/lib/unicore/lib/Nd.pl @@ -5,7 +5,7 @@ # # This file supports: # \p{Nd} -# \p{DecimalNumber} (and fuzzy permutations) +# \p{Nd} (and fuzzy permutations) # # Meaning: General Category 'Nd' # diff --git a/lib/unicore/Is/Nl.pl b/lib/unicore/lib/Nl.pl similarity index 86% rename from lib/unicore/Is/Nl.pl rename to lib/unicore/lib/Nl.pl index 690bed4..9b42189 100644 --- a/lib/unicore/Is/Nl.pl +++ b/lib/unicore/lib/Nl.pl @@ -5,7 +5,7 @@ # # This file supports: # \p{Nl} -# \p{LetterNumber} (and fuzzy permutations) +# \p{Nl} (and fuzzy permutations) # # Meaning: General Category 'Nl' # diff --git a/lib/unicore/Is/No.pl b/lib/unicore/lib/No.pl similarity index 90% rename from lib/unicore/Is/No.pl rename to lib/unicore/lib/No.pl index f67310a..b0cc18c 100644 --- a/lib/unicore/Is/No.pl +++ b/lib/unicore/lib/No.pl @@ -5,7 +5,7 @@ # # This file supports: # \p{No} -# \p{OtherNumber} (and fuzzy permutations) +# \p{No} (and fuzzy permutations) # # Meaning: General Category 'No' # diff --git a/lib/unicore/Is/Nonchara.pl b/lib/unicore/lib/Nonchara.pl similarity index 100% rename from lib/unicore/Is/Nonchara.pl rename to lib/unicore/lib/Nonchara.pl diff --git a/lib/unicore/Is/Ogham.pl b/lib/unicore/lib/Ogham.pl similarity index 100% rename from lib/unicore/Is/Ogham.pl rename to lib/unicore/lib/Ogham.pl diff --git a/lib/unicore/Is/OldItali.pl b/lib/unicore/lib/OldItali.pl similarity index 100% rename from lib/unicore/Is/OldItali.pl rename to lib/unicore/lib/OldItali.pl diff --git a/lib/unicore/Is/Oriya.pl b/lib/unicore/lib/Oriya.pl similarity index 100% rename from lib/unicore/Is/Oriya.pl rename to lib/unicore/lib/Oriya.pl diff --git a/lib/unicore/Is/OtherAlp.pl b/lib/unicore/lib/OtherAlp.pl similarity index 100% rename from lib/unicore/Is/OtherAlp.pl rename to lib/unicore/lib/OtherAlp.pl diff --git a/lib/unicore/Is/OtherLow.pl b/lib/unicore/lib/OtherLow.pl similarity index 100% rename from lib/unicore/Is/OtherLow.pl rename to lib/unicore/lib/OtherLow.pl diff --git a/lib/unicore/Is/OtherMat.pl b/lib/unicore/lib/OtherMat.pl similarity index 100% rename from lib/unicore/Is/OtherMat.pl rename to lib/unicore/lib/OtherMat.pl diff --git a/lib/unicore/Is/OtherUpp.pl b/lib/unicore/lib/OtherUpp.pl similarity index 100% rename from lib/unicore/Is/OtherUpp.pl rename to lib/unicore/lib/OtherUpp.pl diff --git a/lib/unicore/Is/P.pl b/lib/unicore/lib/P.pl similarity index 95% rename from lib/unicore/Is/P.pl rename to lib/unicore/lib/P.pl index 599bc30..df116cf 100644 --- a/lib/unicore/Is/P.pl +++ b/lib/unicore/lib/P.pl @@ -5,7 +5,7 @@ # # This file supports: # \p{P} -# \p{Punctuation} (and fuzzy permutations) +# \p{P} (and fuzzy permutations) # # Meaning: Major Category 'P' # diff --git a/lib/unicore/Is/Pc.pl b/lib/unicore/lib/Pc.pl similarity index 84% rename from lib/unicore/Is/Pc.pl rename to lib/unicore/lib/Pc.pl index 04a8c1f..f4a03a9 100644 --- a/lib/unicore/Is/Pc.pl +++ b/lib/unicore/lib/Pc.pl @@ -5,7 +5,7 @@ # # This file supports: # \p{Pc} -# \p{ConnectorPunctuation} (and fuzzy permutations) +# \p{Pc} (and fuzzy permutations) # # Meaning: General Category 'Pc' # diff --git a/lib/unicore/Is/Pd.pl b/lib/unicore/lib/Pd.pl similarity index 86% rename from lib/unicore/Is/Pd.pl rename to lib/unicore/lib/Pd.pl index 453ec5a..be8412a 100644 --- a/lib/unicore/Is/Pd.pl +++ b/lib/unicore/lib/Pd.pl @@ -5,7 +5,7 @@ # # This file supports: # \p{Pd} -# \p{DashPunctuation} (and fuzzy permutations) +# \p{Pd} (and fuzzy permutations) # # Meaning: General Category 'Pd' # diff --git a/lib/unicore/Is/Pe.pl b/lib/unicore/lib/Pe.pl similarity index 90% rename from lib/unicore/Is/Pe.pl rename to lib/unicore/lib/Pe.pl index 2be04ae..72c1bae 100644 --- a/lib/unicore/Is/Pe.pl +++ b/lib/unicore/lib/Pe.pl @@ -5,7 +5,7 @@ # # This file supports: # \p{Pe} -# \p{ClosePunctuation} (and fuzzy permutations) +# \p{Pe} (and fuzzy permutations) # # Meaning: General Category 'Pe' # diff --git a/lib/unicore/Is/Pf.pl b/lib/unicore/lib/Pf.pl similarity index 84% rename from lib/unicore/Is/Pf.pl rename to lib/unicore/lib/Pf.pl index b8c60da..9af533d 100644 --- a/lib/unicore/Is/Pf.pl +++ b/lib/unicore/lib/Pf.pl @@ -5,7 +5,7 @@ # # This file supports: # \p{Pf} -# \p{FinalPunctuation} (and fuzzy permutations) +# \p{Pf} (and fuzzy permutations) # # Meaning: General Category 'Pf' # diff --git a/lib/unicore/Is/Pi.pl b/lib/unicore/lib/Pi.pl similarity index 84% rename from lib/unicore/Is/Pi.pl rename to lib/unicore/lib/Pi.pl index 868d4fb..54fa4e6 100644 --- a/lib/unicore/Is/Pi.pl +++ b/lib/unicore/lib/Pi.pl @@ -5,7 +5,7 @@ # # This file supports: # \p{Pi} -# \p{InitialPunctuation} (and fuzzy permutations) +# \p{Pi} (and fuzzy permutations) # # Meaning: General Category 'Pi' # diff --git a/lib/unicore/Is/Po.pl b/lib/unicore/lib/Po.pl similarity index 94% rename from lib/unicore/Is/Po.pl rename to lib/unicore/lib/Po.pl index c24a8f4..d1ec8cc 100644 --- a/lib/unicore/Is/Po.pl +++ b/lib/unicore/lib/Po.pl @@ -5,7 +5,7 @@ # # This file supports: # \p{Po} -# \p{OtherPunctuation} (and fuzzy permutations) +# \p{Po} (and fuzzy permutations) # # Meaning: General Category 'Po' # diff --git a/lib/unicore/Is/Print.pl b/lib/unicore/lib/Print.pl similarity index 100% rename from lib/unicore/Is/Print.pl rename to lib/unicore/lib/Print.pl diff --git a/lib/unicore/Is/Ps.pl b/lib/unicore/lib/Ps.pl similarity index 91% rename from lib/unicore/Is/Ps.pl rename to lib/unicore/lib/Ps.pl index 8c29336..f03aa02 100644 --- a/lib/unicore/Is/Ps.pl +++ b/lib/unicore/lib/Ps.pl @@ -5,7 +5,7 @@ # # This file supports: # \p{Ps} -# \p{OpenPunctuation} (and fuzzy permutations) +# \p{Ps} (and fuzzy permutations) # # Meaning: General Category 'Ps' # diff --git a/lib/unicore/Is/Punct.pl b/lib/unicore/lib/Punct.pl similarity index 100% rename from lib/unicore/Is/Punct.pl rename to lib/unicore/lib/Punct.pl diff --git a/lib/unicore/Is/Quotatio.pl b/lib/unicore/lib/Quotatio.pl similarity index 100% rename from lib/unicore/Is/Quotatio.pl rename to lib/unicore/lib/Quotatio.pl diff --git a/lib/unicore/Is/Runic.pl b/lib/unicore/lib/Runic.pl similarity index 100% rename from lib/unicore/Is/Runic.pl rename to lib/unicore/lib/Runic.pl diff --git a/lib/unicore/Is/S.pl b/lib/unicore/lib/S.pl similarity index 97% rename from lib/unicore/Is/S.pl rename to lib/unicore/lib/S.pl index 5e51785..f9ded7d 100644 --- a/lib/unicore/Is/S.pl +++ b/lib/unicore/lib/S.pl @@ -5,7 +5,7 @@ # # This file supports: # \p{S} -# \p{Symbol} (and fuzzy permutations) +# \p{S} (and fuzzy permutations) # # Meaning: Major Category 'S' # diff --git a/lib/unicore/Is/Sc.pl b/lib/unicore/lib/Sc.pl similarity index 87% rename from lib/unicore/Is/Sc.pl rename to lib/unicore/lib/Sc.pl index b9818c2..31cabc3 100644 --- a/lib/unicore/Is/Sc.pl +++ b/lib/unicore/lib/Sc.pl @@ -5,7 +5,7 @@ # # This file supports: # \p{Sc} -# \p{CurrencySymbol} (and fuzzy permutations) +# \p{Sc} (and fuzzy permutations) # # Meaning: General Category 'Sc' # diff --git a/lib/unicore/Is/Sinhala.pl b/lib/unicore/lib/Sinhala.pl similarity index 100% rename from lib/unicore/Is/Sinhala.pl rename to lib/unicore/lib/Sinhala.pl diff --git a/lib/unicore/Is/Sk.pl b/lib/unicore/lib/Sk.pl similarity index 90% rename from lib/unicore/Is/Sk.pl rename to lib/unicore/lib/Sk.pl index 47febf5..34d4ae3 100644 --- a/lib/unicore/Is/Sk.pl +++ b/lib/unicore/lib/Sk.pl @@ -5,7 +5,7 @@ # # This file supports: # \p{Sk} -# \p{ModifierSymbol} (and fuzzy permutations) +# \p{Sk} (and fuzzy permutations) # # Meaning: General Category 'Sk' # diff --git a/lib/unicore/Is/Sm.pl b/lib/unicore/lib/Sm.pl similarity index 93% rename from lib/unicore/Is/Sm.pl rename to lib/unicore/lib/Sm.pl index 5b423bf..bde87a9 100644 --- a/lib/unicore/Is/Sm.pl +++ b/lib/unicore/lib/Sm.pl @@ -5,7 +5,7 @@ # # This file supports: # \p{Sm} -# \p{MathSymbol} (and fuzzy permutations) +# \p{Sm} (and fuzzy permutations) # # Meaning: General Category 'Sm' # diff --git a/lib/unicore/Is/So.pl b/lib/unicore/lib/So.pl similarity index 96% rename from lib/unicore/Is/So.pl rename to lib/unicore/lib/So.pl index 7cb9987..0cc548e 100644 --- a/lib/unicore/Is/So.pl +++ b/lib/unicore/lib/So.pl @@ -5,7 +5,7 @@ # # This file supports: # \p{So} -# \p{OtherSymbol} (and fuzzy permutations) +# \p{So} (and fuzzy permutations) # # Meaning: General Category 'So' # diff --git a/lib/unicore/Is/Space.pl b/lib/unicore/lib/Space.pl similarity index 100% rename from lib/unicore/Is/Space.pl rename to lib/unicore/lib/Space.pl diff --git a/lib/unicore/Is/SpacePer.pl b/lib/unicore/lib/SpacePer.pl similarity index 100% rename from lib/unicore/Is/SpacePer.pl rename to lib/unicore/lib/SpacePer.pl diff --git a/lib/unicore/Is/Syriac.pl b/lib/unicore/lib/Syriac.pl similarity index 100% rename from lib/unicore/Is/Syriac.pl rename to lib/unicore/lib/Syriac.pl diff --git a/lib/unicore/Is/Tamil.pl b/lib/unicore/lib/Tamil.pl similarity index 100% rename from lib/unicore/Is/Tamil.pl rename to lib/unicore/lib/Tamil.pl diff --git a/lib/unicore/Is/Telugu.pl b/lib/unicore/lib/Telugu.pl similarity index 100% rename from lib/unicore/Is/Telugu.pl rename to lib/unicore/lib/Telugu.pl diff --git a/lib/unicore/Is/Terminal.pl b/lib/unicore/lib/Terminal.pl similarity index 100% rename from lib/unicore/Is/Terminal.pl rename to lib/unicore/lib/Terminal.pl diff --git a/lib/unicore/Is/Thaana.pl b/lib/unicore/lib/Thaana.pl similarity index 100% rename from lib/unicore/Is/Thaana.pl rename to lib/unicore/lib/Thaana.pl diff --git a/lib/unicore/Is/Thai.pl b/lib/unicore/lib/Thai.pl similarity index 100% rename from lib/unicore/Is/Thai.pl rename to lib/unicore/lib/Thai.pl diff --git a/lib/unicore/Is/Tibetan.pl b/lib/unicore/lib/Tibetan.pl similarity index 100% rename from lib/unicore/Is/Tibetan.pl rename to lib/unicore/lib/Tibetan.pl diff --git a/lib/unicore/Is/Title.pl b/lib/unicore/lib/Title.pl similarity index 100% rename from lib/unicore/Is/Title.pl rename to lib/unicore/lib/Title.pl diff --git a/lib/unicore/Is/Upper.pl b/lib/unicore/lib/Upper.pl similarity index 100% rename from lib/unicore/Is/Upper.pl rename to lib/unicore/lib/Upper.pl diff --git a/lib/unicore/Is/Uppercas.pl b/lib/unicore/lib/Uppercas.pl similarity index 100% rename from lib/unicore/Is/Uppercas.pl rename to lib/unicore/lib/Uppercas.pl diff --git a/lib/unicore/Is/WhiteSpa.pl b/lib/unicore/lib/WhiteSpa.pl similarity index 100% rename from lib/unicore/Is/WhiteSpa.pl rename to lib/unicore/lib/WhiteSpa.pl diff --git a/lib/unicore/Is/Word.pl b/lib/unicore/lib/Word.pl similarity index 100% rename from lib/unicore/Is/Word.pl rename to lib/unicore/lib/Word.pl diff --git a/lib/unicore/Is/XDigit.pl b/lib/unicore/lib/XDigit.pl similarity index 100% rename from lib/unicore/Is/XDigit.pl rename to lib/unicore/lib/XDigit.pl diff --git a/lib/unicore/Is/Yi.pl b/lib/unicore/lib/Yi.pl similarity index 100% rename from lib/unicore/Is/Yi.pl rename to lib/unicore/lib/Yi.pl diff --git a/lib/unicore/Is/Z.pl b/lib/unicore/lib/Z.pl similarity index 87% rename from lib/unicore/Is/Z.pl rename to lib/unicore/lib/Z.pl index 4c13a1e..3a053e1 100644 --- a/lib/unicore/Is/Z.pl +++ b/lib/unicore/lib/Z.pl @@ -5,7 +5,7 @@ # # This file supports: # \p{Z} -# \p{Separator} (and fuzzy permutations) +# \p{Z} (and fuzzy permutations) # # Meaning: Major Category 'Z' # diff --git a/lib/unicore/Is/Zl.pl b/lib/unicore/lib/Zl.pl similarity index 83% rename from lib/unicore/Is/Zl.pl rename to lib/unicore/lib/Zl.pl index 80f5902..2fa53b3 100644 --- a/lib/unicore/Is/Zl.pl +++ b/lib/unicore/lib/Zl.pl @@ -5,7 +5,7 @@ # # This file supports: # \p{Zl} -# \p{LineSeparator} (and fuzzy permutations) +# \p{Zl} (and fuzzy permutations) # # Meaning: General Category 'Zl' # diff --git a/lib/unicore/Is/Zp.pl b/lib/unicore/lib/Zp.pl similarity index 82% rename from lib/unicore/Is/Zp.pl rename to lib/unicore/lib/Zp.pl index 908dbb9..7860fc3 100644 --- a/lib/unicore/Is/Zp.pl +++ b/lib/unicore/lib/Zp.pl @@ -5,7 +5,7 @@ # # This file supports: # \p{Zp} -# \p{ParagraphSeparator} (and fuzzy permutations) +# \p{Zp} (and fuzzy permutations) # # Meaning: General Category 'Zp' # diff --git a/lib/unicore/Is/Zs.pl b/lib/unicore/lib/Zs.pl similarity index 85% rename from lib/unicore/Is/Zs.pl rename to lib/unicore/lib/Zs.pl index 593fa23..56ff72e 100644 --- a/lib/unicore/Is/Zs.pl +++ b/lib/unicore/lib/Zs.pl @@ -5,7 +5,7 @@ # # This file supports: # \p{Zs} -# \p{SpaceSeparator} (and fuzzy permutations) +# \p{Zs} (and fuzzy permutations) # # Meaning: General Category 'Zs' # diff --git a/lib/unicore/Is/_CanonDC.pl b/lib/unicore/lib/_CanonDC.pl similarity index 100% rename from lib/unicore/Is/_CanonDC.pl rename to lib/unicore/lib/_CanonDC.pl diff --git a/lib/unicore/Is/_CaseIgn.pl b/lib/unicore/lib/_CaseIgn.pl similarity index 100% rename from lib/unicore/Is/_CaseIgn.pl rename to lib/unicore/lib/_CaseIgn.pl diff --git a/lib/unicore/Is/_CombAbo.pl b/lib/unicore/lib/_CombAbo.pl similarity index 100% rename from lib/unicore/Is/_CombAbo.pl rename to lib/unicore/lib/_CombAbo.pl diff --git a/lib/unicore/mktables b/lib/unicore/mktables index 1e1f7ed..a6c234c 100644 --- a/lib/unicore/mktables +++ b/lib/unicore/mktables @@ -1,19 +1,20 @@ #!/usr/bin/perl -w use strict; use Carp; + ## ## mktables -- create the runtime Perl Unicode files (lib/unicore/**/*.pl) ## from the Unicode database files (lib/unicore/*.txt). ## -mkdir("In", 0755); -mkdir("Is", 0755); -mkdir("To", 0755); +mkdir("lib", 0755); +mkdir("To", 0755); ## ## Process any args. ## -my $Verbose = 0; +my $Verbose = 0; +my $MakeTestScript = 0; while (@ARGV) { @@ -22,8 +23,10 @@ while (@ARGV) $Verbose = 1; } elsif ($arg eq '-q') { $Verbose = 0; + } elsif ($arg eq '-maketest') { + $MakeTestScript = 1; } else { - die "usage: $0 [-v|-q]"; + die "usage: $0 [-v|-q] [-maketest]"; } } @@ -36,6 +39,35 @@ my $HEADER=<<"EOF"; EOF + +## +## Given a filename and a reference to an array of lines, +## write the lines to the file only if the contents have not changed. +## +sub WriteIfChanged($\@) +{ + my $file = shift; + my $lines = shift; + + my $TextToWrite = join '', @$lines; + if (open IN, $file) { + local($/) = undef; + my $PreviousText = ; + close IN; + if ($PreviousText eq $TextToWrite) { + print "$file unchanged.\n" if $Verbose; + return; + } + } + if (not open OUT, ">$file") { + die "$0: can't open $file for output: $!\n"; + } + print "$file written.\n" if $Verbose; + + print OUT $TextToWrite; + close OUT; +} + ## ## The main datastructure (a "Table") represents a set of code points that ## are part of a particular quality (that are part of \pL, \p{InGreek}, @@ -55,36 +87,25 @@ my %TableInfo; my %TableDesc; my %FuzzyNames; my %AliasInfo; +my %CanonicalToOrig; ## ## Turn something like ## OLD-ITALIC -## to +## into ## OldItalic ## sub CanonicalName($) { - my $name = lc shift; + my $orig = shift; + my $name = lc $orig; $name =~ s/(?$filename")) { - die "$0: can't write $filename: $!\n"; - } - - print OUT $HEADER; + my @OUT = $HEADER; if (defined $comment) { $comment =~ s/\s+\Z//; $comment =~ s/^/# /gm; - print OUT "#\n$comment\n#\n"; + push @OUT, "#\n$comment\n#\n"; } - print OUT "return <<'END';\n"; + push @OUT, "return <<'END';\n"; for my $set (@$Table) { @@ -394,14 +409,65 @@ sub Table::Write my $name = $set->[RANGE_NAME]; if ($start == $end) { - printf OUT "%04X\t\t%s\n", $start, $name; + push @OUT, sprintf "%04X\t\t%s\n", $start, $name; } else { - printf OUT "%04X\t%04X\t%s\n", $start, $end, $name; + push @OUT, sprintf "%04X\t%04X\t%s\n", $start, $end, $name; } } - print OUT "END\n"; - close OUT; + push @OUT, "END\n"; + + WriteIfChanged($filename, @OUT); +} + +## This used only for making the test script. +## helper function +sub IsUsable($) +{ + my $code = shift; + return 0 if $code <= 0x0000; ## don't use null + return 0 if $code >= $LastUnicodeCodepoint; ## keep in range + return 0 if ($code >= 0xD800 and $code <= 0xDFFF); ## no surrogates + return 0 if ($code >= 0xFDD0 and $code <= 0xFDEF); ## utf8.c says no good + return 0 if (($code & 0xFFFF) == 0xFFFE); ## utf8.c says no good + return 0 if (($code & 0xFFFF) == 0xFFFF); ## utf8.c says no good + return 1; +} + +## Return a code point that's part of the table. +## Returns nothing if the table is empty (or covers only surrogates). +## This used only for making the test script. +sub Table::ValidCode +{ + my $Table = shift; #self + for my $set (@$Table) { + return $set->[RANGE_END] if IsUsable($set->[RANGE_END]); + } + return (); +} + +## Return a code point that's not part of the table +## Returns nothing if the table covers all code points. +## This used only for making the test script. +sub Table::InvalidCode +{ + my $Table = shift; #self + + return 0x1234 if $Table->IsEmpty(); + + for my $set (@$Table) + { + if (IsUsable($set->[RANGE_END] + 1)) + { + return $set->[RANGE_END] + 1; + } + + if (IsUsable($set->[RANGE_START] - 1)) + { + return $set->[RANGE_START] - 1; + } + } + return (); } ########################################################################### @@ -434,8 +500,16 @@ sub New_Alias($$$@) confess "$0: bad args to New_Alias" } - if (not $TableInfo{$Type}->{$Name}) { - confess "$0: don't have orignial $Type => $Name to make alias" + $Alias = CanonicalName($Alias) if $Fuzzy; + + if (not $TableInfo{$Type}->{$Name}) + { + my $CName = CanonicalName($Name); + if ($TableInfo{$Type}->{$CName}) { + confess "$0: Use canonical form '$CName' instead of '$Name' for alias."; + } else { + confess "$0: don't have orignial $Type => $Name to make alias"; + } } if ($TableInfo{$Alias}) { confess "$0: already have original $Type => $Alias; can't make alias"; @@ -451,7 +525,7 @@ sub New_Alias($$$@) ## All assigned code points my $Assigned = Table->New(Is => 'Assigned', Desc => "All assigned code points", - Fuzzy => 1); + Fuzzy => 0); my $Name = Table->New(); ## all characters, individually by name my $General = Table->New(); ## all characters, grouped by category @@ -694,7 +768,7 @@ sub Unicode_Txt() Fuzzy => 0); ## Unassigned is the same as 'Cn' - New_Alias(Is => 'Unassigned', SameAs => 'Cn', Fuzzy => 1); + New_Alias(Is => 'Unassigned', SameAs => 'Cn', Fuzzy => 0); $Cat{C}->Replace($Cat{C}->Merge($Cat{Cn})); ## Now merge in Cn into C @@ -709,10 +783,10 @@ sub Unicode_Txt() my $Any = Table->New(Is => 'Any', Desc => sprintf("[\\x{0000}-\\x{%X}]", $LastUnicodeCodepoint), - Fuzzy => 1); + Fuzzy => 0); $Any->RawAppendRange(0, $LastUnicodeCodepoint); - New_Alias(Is => 'All', SameAs => 'Any', Fuzzy => 1); + New_Alias(Is => 'All', SameAs => 'Any', Fuzzy => 0); ## ## Build special properties for Perl's internal case-folding needs: @@ -1106,6 +1180,199 @@ sub Make_GC_Aliases() } } + +## +## These are used in: +## MakePropTestScript() +## WriteAllMappings() +## for making the test script. +## +my %FuzzyNameToTest; +my %ExactNameToTest; + + +## This used only for making the test script +sub GenTests($$$$) +{ + my $FH = shift; + my $Prop = shift; + my $MatchCode = shift; + my $FailCode = shift; + + if (defined $MatchCode) { + printf $FH qq/Expect(1, "\\x{%04X}", '\\p{$Prop}' );\n/, $MatchCode; + printf $FH qq/Expect(0, "\\x{%04X}", '\\p{^$Prop}');\n/, $MatchCode; + printf $FH qq/Expect(0, "\\x{%04X}", '\\P{$Prop}' );\n/, $MatchCode; + printf $FH qq/Expect(1, "\\x{%04X}", '\\P{^$Prop}');\n/, $MatchCode; + } + if (defined $FailCode) { + printf $FH qq/Expect(0, "\\x{%04X}", '\\p{$Prop}' );\n/, $FailCode; + printf $FH qq/Expect(1, "\\x{%04X}", '\\p{^$Prop}');\n/, $FailCode; + printf $FH qq/Expect(1, "\\x{%04X}", '\\P{$Prop}' );\n/, $FailCode; + printf $FH qq/Expect(0, "\\x{%04X}", '\\P{^$Prop}');\n/, $FailCode; + } +} + +## This used only for making the test script +sub ExpectError($$) +{ + my $FH = shift; + my $prop = shift; + + print $FH qq/Error('\\p{$prop}');\n/; + print $FH qq/Error('\\P{$prop}');\n/; +} + +## This used only for making the test script +my @GoodSeps = ( + " ", + "-", + " \t ", + "", + "", + "_", + ); +my @BadSeps = ( + "--", + "__", + " _", + "/" + ); + +## This used only for making the test script +sub RandomlyFuzzifyName($;$) +{ + my $Name = shift; + my $WantError = shift; ## if true, make an error + + my @parts; + for my $part (split /[-\s_]+/, $Name) + { + if (@parts) { + if ($WantError and rand() < 0.3) { + push @parts, $BadSeps[rand(@BadSeps)]; + $WantError = 0; + } else { + push @parts, $GoodSeps[rand(@GoodSeps)]; + } + } + my $switch = int rand(4); + if ($switch == 0) { + push @parts, uc $part; + } elsif ($switch == 1) { + push @parts, lc $part; + } elsif ($switch == 2) { + push @parts, ucfirst $part; + } else { + push @parts, $part; + } + } + my $new = join('', @parts); + + if ($WantError) { + if (rand() >= 0.5) { + $new .= $BadSeps[rand(@BadSeps)]; + } else { + $new = $BadSeps[rand(@BadSeps)] . $new; + } + } + return $new; +} + +## This used only for making the test script +sub MakePropTestScript() +{ + ## this written directly -- it's huge. + if (not open OUT, ">TestProp.pl") { + die "$0: TestProp.pl: $!\n"; + } + print OUT ; + + while (my ($Name, $Table) = each %ExactNameToTest) + { + GenTests(*OUT, $Name, $Table->ValidCode, $Table->InvalidCode); + ExpectError(*OUT, uc $Name) if uc $Name ne $Name; + ExpectError(*OUT, lc $Name) if lc $Name ne $Name; + } + + + while (my ($Name, $Table) = each %FuzzyNameToTest) + { + my $Orig = $CanonicalToOrig{$Name}; + my %Names = ( + $Name => 1, + $Orig => 1, + RandomlyFuzzifyName($Orig) => 1 + ); + + for my $N (keys %Names) { + GenTests(*OUT, $N, $Table->ValidCode, $Table->InvalidCode); + } + + ExpectError(*OUT, RandomlyFuzzifyName($Orig, 'ERROR')); + } + + print OUT "Finished();\n"; + close OUT; +} + + +## +## These are used only in: +## RegisterFileForName() +## WriteAllMappings() +## +my %Exact; ## will become %utf8::Exact; +my %Canonical; ## will become %utf8::Canonical; +my %CaComment; ## Comment for %Canonical entry of same key + +## +## Given info about a name and a datafile that it should be associated with, +## register that assocation in %Exact and %Canonical. +sub RegisterFileForName($$$$) +{ + my $Type = shift; + my $Name = shift; + my $IsFuzzy = shift; + my $filename = shift; + + ## + ## Now in details for the mapping. $Type eq 'Is' has the + ## Is removed, as it will be removed in utf8_heavy when this + ## data is being checked. In keeps its "In", but a second + ## sans-In record is written if it doesn't conflict with + ## anything already there. + ## + if (not $IsFuzzy) + { + if ($Type eq 'Is') { + die "oops[$Name]" if $Exact{$Name}; + $Exact{$Name} = $filename; + } else { + die "oops[$Type$Name]" if $Exact{"$Type$Name"}; + $Exact{"$Type$Name"} = $filename; + $Exact{$Name} = $filename if not $Exact{$Name}; + } + } + else + { + my $CName = lc $Name; + if ($Type eq 'Is') { + die "oops[$CName]" if $Canonical{$CName}; + $Canonical{$CName} = $filename; + $CaComment{$CName} = $Name if $Name =~ tr/A-Z// >= 2; + } else { + die "oops[$Type$CName]" if $Canonical{lc "$Type$CName"}; + $Canonical{lc "$Type$CName"} = $filename; + $CaComment{lc "$Type$CName"} = "$Type$Name"; + if (not $Canonical{$CName}) { + $Canonical{$CName} = $filename; + $CaComment{$CName} = "$Type$Name"; + } + } + } +} + ## ## Writes the info accumulated in ## @@ -1118,51 +1385,52 @@ sub WriteAllMappings() { my @MAP; - for my $Type ('In', 'Is') - { - my %Filenames; - my %NameToFile; + my %BaseNames; ## Base names already used (for avoiding 8.3 conflicts) - my %Exact; ## will become %utf8::Is or %utf8::In - my %Pat; ## will become %utf8::IsPat or %utf8::InPat + ## 'Is' *MUST* come first, so its names have precidence over 'In's + for my $Type ('Is', 'In') + { + my %RawNameToFile; ## a per-$Type cache - ## - ## First write all the files to the $Type/ directory - ## - for my $Name (sort { length $a <=> length $b } keys %{$TableInfo{$Type}}) + for my $Name (sort {length $a <=> length $b} keys %{$TableInfo{$Type}}) { + ## Note: $Name is already canonical my $Table = $TableInfo{$Type}->{$Name}; + my $IsFuzzy = $FuzzyNames{$Type}->{$Name}; ## Need an 8.3 safe filename (which means "an 8 safe" $filename) - my $filename = $FuzzyNames{$Type}->{$Name} ? CanonicalName($Name): $Name; - $filename =~ s/[^\w_]+/_/g; # "L&" -> "L_" - substr($filename, 8) = '' if length($filename) > 8; - - ## - ## Make sure the filename doesn't conflict with something we - ## might have already written. If we have, say, - ## GreekExtended1 - ## GreekExtended2 - ## they become - ## GreekExt - ## GreekEx2 - ## - while (my $num = $Filenames{lc $filename}++) + my $filename; { - $num++; ## so filenames with numbers start with '2', which - ## just looks more natural. - ## Want to append $num, but if it'll make the filename longer - ## than 8 characters, pre-truncate $filename so that the result - ## is acceptable. - my $delta = length($filename) + length($num) - 8; - if ($delta > 0) { - substr($filename, -$delta) = $num; - } else { - $filename .= $num; + ## 'Is' items lose 'Is' from the basename. + $filename = $Type eq 'Is' ? $Name : "$Type$Name"; + + $filename =~ s/[^\w_]+/_/g; # "L&" -> "L_" + substr($filename, 8) = '' if length($filename) > 8; + + ## + ## Make sure the basename doesn't conflict with something we + ## might have already written. If we have, say, + ## InGreekExtended1 + ## InGreekExtended2 + ## they become + ## InGreekE + ## InGreek2 + ## + while (my $num = $BaseNames{lc $filename}++) + { + $num++; ## so basenames with numbers start with '2', which + ## just looks more natural. + ## Want to append $num, but if it'll make the basename longer + ## than 8 characters, pre-truncate $filename so that the result + ## is acceptable. + my $delta = length($filename) + length($num) - 8; + if ($delta > 0) { + substr($filename, -$delta) = $num; + } else { + $filename .= $num; + } } - } - - $Exact{$Name} = $filename; + }; ## ## Construct a nice comment to add to the file, and build data @@ -1187,8 +1455,7 @@ sub WriteAllMappings() for my $N (@Supported) { my $IsFuzzy = $FuzzyNames{$Type}->{$N}; - my $CName = $IsFuzzy ? CanonicalName($N): $N; - my $Prop = "\\p{$TypeToShow$CName}"; + my $Prop = "\\p{$TypeToShow$Name}"; $OrigProp = $Prop if not $OrigProp; #cache for aliases if ($IsFuzzy) { $Comment .= "\t$Prop (and fuzzy permutations)\n"; @@ -1208,98 +1475,119 @@ sub WriteAllMappings() ## ## Okay, write the file... ## - $Table->Write("$Type/$filename.pl", $Comment); - } + $Table->Write("lib/$filename.pl", $Comment); - ## - ## Write out the map - ## - if (not open MAP, ">Properties") { - die "$0: can't write Properties: $!\n"; - } - print MAP "##\n"; - print MAP "## This file created by $0\n"; - print MAP "## List of built-in \\p{...}/\\P{...} properties.\n"; - print MAP "##\n"; - print MAP "## '*' means name may be 'fuzzy'\n"; - print MAP "##\n"; - print MAP "\n"; - print MAP sort { substr($a,2) cmp substr($b, 2) } @MAP; - close MAP; + ## and register it + $RawNameToFile{$Name} = $filename; + RegisterFileForName($Type => $Name, $IsFuzzy, $filename); - ## - ## Build %Pat - ## - while (my ($Fuzzy, $Real) = each %{$FuzzyNames{$Type}}) - { - my $File = $Exact{$Real}; - - if (not $File) { - die "$0: oops [$Real]"; - } - - ## The prefix length of 2 is enough spread, - ## and besides, we have 'Yi' as an In category. - my $Prefix = lc(substr($Fuzzy, 0, 2)); - my $Regex = NameToRegex($Fuzzy); - - if ($Pat{$Prefix}->{$Regex}) { - warn "WHOA, conflict with /$Regex/: $Pat{$Prefix}->{$Regex} vs $File\n"; + if ($IsFuzzy) + { + my $CName = CanonicalName($Type . '_'. $Name); + $FuzzyNameToTest{$Name} = $Table if !$FuzzyNameToTest{$Name}; + $FuzzyNameToTest{$CName} = $Table if !$FuzzyNameToTest{$CName}; + } else { + $ExactNameToTest{$Name} = $Table; } - $Pat{$Prefix}->{$Regex} = $File; } - ## - ## Since the fuzzy method will provide for a way to match $Fuzzy, - ## there's no need for $Fuzzy to be in %Exact as well. - ## This can't be done in the loop above because there could be - ## multiple $Fuzzys pointing at the same $Real, and we don't want - ## the first to delete the exact mapping out from under the second. - ## - for my $Fuzzy (keys %{$FuzzyNames{$Type}}) + ## Register aliase info + for my $Name (sort {length $a <=> length $b} keys %{$AliasInfo{$Type}}) { - delete $Exact{$Fuzzy}; + my $Alias = $AliasInfo{$Type}->{$Name}; + my $IsFuzzy = $FuzzyNames{$Type}->{$Alias}; + my $filename = $RawNameToFile{$Name}; + die "oops [$Alias]->[$Name]" if not $filename; + RegisterFileForName($Type => $Alias, $IsFuzzy, $filename); + + my $Table = $TableInfo{$Type}->{$Name}; + die "oops" if not $Table; + if ($IsFuzzy) + { + my $CName = CanonicalName($Type .'_'. $Alias); + $FuzzyNameToTest{$Alias} = $Table if !$FuzzyNameToTest{$Alias}; + $FuzzyNameToTest{$CName} = $Table if !$FuzzyNameToTest{$CName}; + } else { + $ExactNameToTest{$Alias} = $Table; + } } + } + ## + ## Write out the property list + ## + { + my @OUT = ( + "##\n", + "## This file created by $0\n", + "## List of built-in \\p{...}/\\P{...} properties.\n", + "##\n", + "## '*' means name may be 'fuzzy'\n", + "##\n\n", + sort { substr($a,2) cmp substr($b, 2) } @MAP, + ); + WriteIfChanged('Properties', @OUT); + } + use Text::Tabs (); ## using this makes the files about half the size + + ## Write Exact.pl + { + my @OUT = ( + $HEADER, + "##\n", + "## Data in this file used by ../utf8_heavy.pl\n", + "##\n\n", + "## Mapping from name to filename in ./lib\n", + "%utf8::Exact = (\n", + ); - ## - ## Now write In.pl / Is.pl - ## - if (not open OUT, ">$Type.pl") { - die "$0: $Type.pl: $!\n"; - } - print OUT $HEADER; - print OUT "##\n"; - print OUT "## Data in this file used by ../utf8_heavy.pl\n"; - print OUT "##\n"; - print OUT "\n"; - print OUT "## Mapping from name to filename in ./$Type\n"; - print OUT "%utf8::$Type = (\n"; for my $Name (sort keys %Exact) { my $File = $Exact{$Name}; - printf OUT " %-41s => %s,\n", "'$Name'", "'$File'"; + $Name = $Name =~ m/\W/ ? qq/'$Name'/ : " $Name "; + my $Text = sprintf("%-15s => %s,\n", $Name, qq/'$File'/); + push @OUT, Text::Tabs::unexpand($Text); } - print OUT ");\n\n"; + push @OUT, ");\n1;\n"; + + WriteIfChanged('Exact.pl', @OUT); + } - print OUT "## Mappings from regex to filename in ./$Type/\n"; - print OUT "%utf8::${Type}Pat = (\n"; - for my $Prefix (sort keys %Pat) + ## Write Canonical.pl + { + my @OUT = ( + $HEADER, + "##\n", + "## Data in this file used by ../utf8_heavy.pl\n", + "##\n\n", + "## Mapping from lc(canonical name) to filename in ./lib\n", + "%utf8::Canonical = (\n", + ); + my $Trail = ""; ## used just to keep the spacing pretty + for my $Name (sort keys %Canonical) { - print OUT " '$Prefix' => {\n"; - while (my ($Regex, $File) = each %{ $Pat{$Prefix} }) { - print OUT "\t'$Regex' => '$File',\n"; + my $File = $Canonical{$Name}; + if ($CaComment{$Name}) { + push @OUT, "\n" if not $Trail; + push @OUT, " # $CaComment{$Name}\n"; + $Trail = "\n"; + } else { + $Trail = ""; } - print OUT " },\n"; + $Name = $Name =~ m/\W/ ? qq/'$Name'/ : " $Name "; + my $Text = sprintf(" %-41s => %s,\n$Trail", $Name, qq/'$File'/); + push @OUT, Text::Tabs::unexpand($Text); } - print OUT ");\n"; - - close(OUT); + push @OUT, ");\n1\n"; + WriteIfChanged('Canonical.pl', @OUT); } + + MakePropTestScript() if $MakeTestScript; } + sub SpecCase_txt() { # @@ -1338,24 +1626,25 @@ sub SpecCase_txt() for my $case (qw(Lower Title Upper)) { my $NormalCase = do "To/$case.pl" || die "$0: $@\n"; - if (not open OUT, ">To/$case.pl") { - die "$0: To/$case.txt: $!"; - } - print OUT $HEADER, "\n"; - print OUT "%utf8::ToSpec$case =\n(\n"; + my @OUT = ( + $HEADER, "\n", + "%utf8::ToSpec$case =\n(\n", + ); for my $prop (sort { $a->[0] <=> $b->[0] } @{$CaseInfo{$case}}) { my ($ix, $code, $to) = @$prop; my $tostr = join "", map { sprintf "\\x{%s}", $_ } split ' ', $to; - printf OUT qq['%04X' => "$tostr",\n], $ix; + push @OUT, sprintf qq['%04X' => "$tostr",\n], $ix; } - print OUT ");\n\n"; - print OUT "return <<'END';\n"; - print OUT $NormalCase; - print OUT "END\n"; - close OUT; + push @OUT, ( + ");\n\n", + "return <<'END';\n", + $NormalCase, + "END\n" + ); + WriteIfChanged("To/$case.pl", @OUT); } } @@ -1367,7 +1656,7 @@ sub SpecCase_txt() sub CaseFold_txt() { if (not open IN, "CaseFold.txt") { - die "$0: To/Fold.pl: $!\n"; + die "$0: CaseFold.txt: $!\n"; } my $Fold = Table->New(); @@ -1393,23 +1682,25 @@ sub CaseFold_txt() # # Prepend the special foldings to the common foldings. # - my $CommonFold = do "To/Fold.pl" || die "$0: To/Fold.pl: $!\n"; - if (not open OUT, ">To/Fold.pl") { - die "$0: To/Fold.pl: $!\n"; - } - print OUT $HEADER, "\n"; - print OUT "%utf8::ToSpecFold =\n(\n"; + + my @OUT = ( + $HEADER, "\n", + "%utf8::ToSpecFold =\n(\n", + ); for my $code (sort { $a <=> $b } keys %Fold) { my $foldstr = join "", map { sprintf "\\x{%s}", $_ } split ' ', $Fold{$code}; - printf OUT qq['%04X' => "$foldstr",\n], $code; + push @OUT, sprintf qq['%04X' => "$foldstr",\n], $code; } - print OUT ");\n\n"; - print OUT "return <<'END';\n"; - print OUT $CommonFold; - print OUT "END\n"; - close OUT; + push @OUT, ( + ");\n\n", + "return <<'END';\n", + $CommonFold, + "END\n", + ); + + WriteIfChanged("To/Fold.pl", @OUT); } ## Do it.... @@ -1421,15 +1712,65 @@ PropList_txt(); Scripts_txt(); Blocks_txt(); +WriteAllMappings(); + LineBrk_Txt(); ArabShap_txt(); Jamo_txt(); SpecCase_txt(); +CaseFold_txt(); -WriteAllMappings(); +exit(0); -CaseFold_txt(); +## TRAILING CODE IS USED BY MakePropTestScript() +__DATA__ +use strict; +use warnings; + +my $Tests = 0; +my $Fails = 0; -# That's all, folks! +sub Expect($$$) +{ + my $Expect = shift; + my $String = shift; + my $Regex = shift; + my $Line = (caller)[2]; + + $Tests++; + my $RegObj; + my $result = eval { + $RegObj = qr/$Regex/; + $String =~ $RegObj ? 1 : 0 + }; + + if (not defined $result) { + print "couldn't compile /$Regex/ on $0 line $Line: $@\n"; + $Fails++; + } elsif ($result ^ $Expect) { + print "bad result (expected $Expect) on $0 line $Line: $@\n"; + $Fails++; + } +} -__END__ +sub Error($) +{ + my $Regex = shift; + $Tests++; + if (eval { 'x' =~ qr/$Regex/; 1 }) { + $Fails++; + my $Line = (caller)[2]; + print "expected error for /$Regex/ on $0 line $Line: $@\n"; + } +} + +sub Finished() +{ + if ($Fails == 0) { + print "All $Tests tests passed.\n"; + exit(0); + } else { + print "$Tests tests, $Fails failed!\n"; + exit(-1); + } +} diff --git a/lib/utf8_heavy.pl b/lib/utf8_heavy.pl index 3f14afe..28e0d70 100644 --- a/lib/utf8_heavy.pl +++ b/lib/utf8_heavy.pl @@ -8,35 +8,33 @@ sub DESTROY {} sub croak { require Carp; Carp::croak(@_) } +my %Cache; + +## +## "SWASH" == "SWATCH HASH". A "swatch" is a swatch of the Unicode landscape +## + sub SWASHNEW { my ($class, $type, $list, $minbits, $none) = @_; local $^D = 0 if $^D; print STDERR "SWASHNEW @_\n" if DEBUG; - ## check to see if we've already got it. - { - no strict 'refs'; - if ($type and ref ${"${class}::{$type}"} eq $class) { - warn qq/Found \${"${class}::{$type}"}\n/ if DEBUG; - return ${"${class}::{$type}"}; - } - } - ## ## Get the list of codepoints for the type. ## Called from utf8.c ## ## Given a $type, our goal is to fill $list with the set of codepoint - ## ranges. As we try various interpretations of $type, sometimes we'll - ## end up with the $list directly, and sometimes we'll end up with a - ## $file name that holds the list data. + ## ranges. ## ## To make the parsing of $type clear, this code takes the a rather ## unorthadox approach of last'ing out of the block once we have the ## info we need. Were this to be a subroutine, the 'last' would just ## be a 'return'. ## + my $file; ## file to load data from, and also part of the %Cache key. + my $ListSorted = 0; + if ($type) { $type =~ s/^\s+//; @@ -44,131 +42,43 @@ sub SWASHNEW { print "type = $type\n" if DEBUG; - my $file; - ## Figure out what file to load to get the data.... GETFILE: { ## - ## First, see if it's an "Is" name (the 'Is' is optional) + ## 'Is' is always optional, so if it's there, remove it. + ## Same with 'Category=' and 'Script='. ## - ## Because we check "Is" names first, they have precidence over - ## "In" names. For example, "Greek" is both a script and a - ## block. "IsGreek" always gets the script, while "InGreek" - ## always gets the block. "Greek" gets the script because we - ## check "Is" names first. + ## 'Block=' is replaced by 'In'. ## - if ($type =~ m{^ - ## "Is" prefix, or "Script=" or "Category=" - (?: Is [- _]? | (?:Script|Category)\s*=\s* )? - ## name to check in the "Is" symbol table. - ([A-Z].*) - $ - }ix) - { - my $istype = $1; - ## - ## Input ($type) Name To Check ($istype) - ## ------------- ----------------------- - ## IsLu Lu - ## Lu Lu - ## Category = Lu Lu - ## Foo Foo - ## Script = Greek Greek - ## - - print "istype = $istype\n" if DEBUG; - - ## Load "Is" mapping data, if not yet loaded. - do "unicore/Is.pl" if not defined %utf8::Is; - - ## - ## If the "Is" mapping data has an exact match, it points - ## to the file we need. - ## - if (exists $utf8::Is{$istype}) - { - $file = "unicore/Is/$utf8::Is{$istype}.pl"; - last GETFILE; - } - - ## - ## Need to look at %utf8::IsPat (loaded from "unicore/Is.pl") - ## to see if there's a regex that matches this $istype. - ## If so, the associated name is the file we need. - ## - my $prefix = substr(lc($istype), 0, 2); - if (my $hashref = $utf8::IsPat{$prefix}) - { - while (my ($pat, $name) = each %{$hashref}) - { - print "isprefix = $prefix, Is = $istype, pat = $pat\n" if DEBUG; - ## - ## The following regex probably need not be cached, - ## since every time there's a match, the results of - ## the entire call to SWASHNEW() is cached, so there's - ## a very limited number of times any one $pat will - ## be evaluated as a regex, at least with "reasonable" - ## code that doesn't try a baziilion \p{Random} names. - ## - if ($istype =~ /^$pat$/i) - { - $file = "unicore/Is/$name.pl"; - keys %{$hashref}; ## reset the 'each' above - last GETFILE; - } - } - } + $type =~ s/^Is(?:\s+|[-_])?//i + or + $type =~ s/^Category\s*=\s*//i + or + $type =~ s/^Script\s*=\s*//i + or + $type =~ s/^Block\s*=\s*/In/i; + + ## + ## See if it's in the direct mapping table. + ## + require "unicore/Exact.pl"; + if (my $base = $utf8::Exact{$type}) { + $file = "unicore/lib/$base.pl"; + last GETFILE; } ## - ## Couldn't find via "Is" -- let's try via "In"..... + ## If not there exactly, try the canonical form. The canonical + ## form is lowercased, with any separators (\s+|[-_]) removed. ## - if ($type =~ m{^ - ( In(?!herited$)[- _]? | Block\s*=\s*)? - ([A-Z].*) - $ - }xi) - { - my $intype = $2; - print "intype = $intype\n" if DEBUG; - - ## - ## Input ($type) Name To Check ($intype) - ## ------------- ----------------------- - ## Inherited Inherited - ## InGreek Greek - ## Block = Greek Greek - ## - - ## Load "In" mapping data, if not yet loaded. - do "unicore/In.pl" if not defined %utf8::In; - - ## If there's a direct match, it points to the file we need - if (exists $utf8::In{$intype}) { - $file = "unicore/In/$utf8::In{$intype}.pl"; - last GETFILE; - } - - ## - ## Need to look at %utf8::InPat (loaded from "unicore/In.pl") - ## to see if there's a regex that matches this $intype. - ## If so, the associated name is the file we need. - ## - my $prefix = substr(lc($intype), 0, 2); - if (my $hashref = $utf8::InPat{$prefix}) - { - print "inprefix = $prefix, In = $intype\n" if DEBUG; - while (my ($pat, $name) = each %{$hashref}) - { - print "inprefix = $prefix, In = $intype, k = $pat\n" if DEBUG; - if ($intype =~ /^$pat$/i) { - $file = "unicore/In/$name.pl"; - print "inprefix = $prefix, In = $intype, k = $pat, file = $file\n" if DEBUG; - keys %{$hashref}; ## reset the 'each' above - last GETFILE; - } - } - } + my $canonical = lc $type; + $canonical =~ s/(?<=[a-z\d])(?:\s+|[-_])(?=[a-z\d])//g; + print "canonical = $canonical\n" if DEBUG; + + require "unicore/Canonical.pl"; + if (my $base = $utf8::Canonical{$canonical}) { + $file = "unicore/lib/$base.pl"; + last GETFILE; } ## @@ -188,16 +98,28 @@ sub SWASHNEW { croak("Can't find Unicode character property \"$type\""); } + print "found it (file='$file')\n" if DEBUG; + ## ## If we reach here, it was due to a 'last GETFILE' above, so we - ## have a filename, so now we load it. + ## have a filename, so now we load it if we haven't already. + ## If we have, return the cached results. The cache key is the + ## file to load. ## + if ($Cache{$file} and ref($Cache{$file}) eq $class) + { + print "Returning cached '$file' for \\p{$type}\n" if DEBUG; + return $Cache{$class, $file}; + } + $list = do $file; + $ListSorted = 1; ## we know that these lists are sorted } my $extras; my $bits; + my $ORIG = $list; if ($list) { my @tmp = split(/^/m, $list); my %seen; @@ -247,8 +169,7 @@ sub SWASHNEW { print STDERR "CLASS = $class, TYPE => $type, BITS => $bits, NONE => $none\nEXTRAS =>\n$extras\nLIST =>\n$list\n" if DEBUG; - no strict 'refs'; - ${"${class}::{$type}"} = bless { + my $SWASH = bless { TYPE => $type, BITS => $bits, EXTRAS => $extras, @@ -256,6 +177,12 @@ sub SWASHNEW { NONE => $none, @extras, } => $class; + + if ($file) { + $Cache{$class, $file} = $SWASH; + } + + return $SWASH; } # NOTE: utf8.c:swash_init() assumes entries are never modified once generated.