lib/unicore/Bidirectional.pl Unicode character database
lib/unicore/Blocks.pl Unicode character database
lib/unicore/Blocks.txt Unicode character database
+lib/unicore/Canonical.pl Unicode character database
lib/unicore/CaseFold.txt Unicode character database
lib/unicore/Category.pl Unicode character database
lib/unicore/CombiningClass.pl Unicode character database
lib/unicore/CompExcl.txt Unicode character database
lib/unicore/Decomposition.pl Unicode character database
lib/unicore/EAWidth.txt Unicode character database
-lib/unicore/In.pl Unicode character database
-lib/unicore/In/Alphabet.pl Unicode character database
-lib/unicore/In/Arabic.pl Unicode character database
-lib/unicore/In/ArabicP2.pl Unicode character database
-lib/unicore/In/ArabicPr.pl Unicode character database
-lib/unicore/In/Armenian.pl Unicode character database
-lib/unicore/In/Arrows.pl Unicode character database
-lib/unicore/In/BasicLat.pl Unicode character database
-lib/unicore/In/Bengali.pl Unicode character database
-lib/unicore/In/BlockEle.pl Unicode character database
-lib/unicore/In/Bopomof2.pl Unicode character database
-lib/unicore/In/Bopomofo.pl Unicode character database
-lib/unicore/In/BoxDrawi.pl Unicode character database
-lib/unicore/In/BrailleP.pl Unicode character database
-lib/unicore/In/Byzantin.pl Unicode character database
-lib/unicore/In/Cherokee.pl Unicode character database
-lib/unicore/In/CjkComp2.pl Unicode character database
-lib/unicore/In/CjkComp3.pl Unicode character database
-lib/unicore/In/CjkComp4.pl Unicode character database
-lib/unicore/In/CjkCompa.pl Unicode character database
-lib/unicore/In/CjkRadic.pl Unicode character database
-lib/unicore/In/CjkSymbo.pl Unicode character database
-lib/unicore/In/CjkUnif2.pl Unicode character database
-lib/unicore/In/CjkUnif3.pl Unicode character database
-lib/unicore/In/CjkUnifi.pl Unicode character database
-lib/unicore/In/Combini2.pl Unicode character database
-lib/unicore/In/Combini3.pl Unicode character database
-lib/unicore/In/Combinin.pl Unicode character database
-lib/unicore/In/ControlP.pl Unicode character database
-lib/unicore/In/Currency.pl Unicode character database
-lib/unicore/In/Cyrillic.pl Unicode character database
-lib/unicore/In/Deseret.pl Unicode character database
-lib/unicore/In/Devanaga.pl Unicode character database
-lib/unicore/In/Dingbats.pl Unicode character database
-lib/unicore/In/Enclose2.pl Unicode character database
-lib/unicore/In/Enclosed.pl Unicode character database
-lib/unicore/In/Ethiopic.pl Unicode character database
-lib/unicore/In/GeneralP.pl Unicode character database
-lib/unicore/In/Geometri.pl Unicode character database
-lib/unicore/In/Georgian.pl Unicode character database
-lib/unicore/In/Gothic.pl Unicode character database
-lib/unicore/In/Greek.pl Unicode character database
-lib/unicore/In/GreekExt.pl Unicode character database
-lib/unicore/In/Gujarati.pl Unicode character database
-lib/unicore/In/Gurmukhi.pl Unicode character database
-lib/unicore/In/Halfwidt.pl Unicode character database
-lib/unicore/In/HangulCo.pl Unicode character database
-lib/unicore/In/HangulJa.pl Unicode character database
-lib/unicore/In/HangulSy.pl Unicode character database
-lib/unicore/In/Hebrew.pl Unicode character database
-lib/unicore/In/HighPriv.pl Unicode character database
-lib/unicore/In/HighSurr.pl Unicode character database
-lib/unicore/In/Hiragana.pl Unicode character database
-lib/unicore/In/Ideograp.pl Unicode character database
-lib/unicore/In/IpaExten.pl Unicode character database
-lib/unicore/In/Kanbun.pl Unicode character database
-lib/unicore/In/KangxiRa.pl Unicode character database
-lib/unicore/In/Kannada.pl Unicode character database
-lib/unicore/In/Katakana.pl Unicode character database
-lib/unicore/In/Khmer.pl Unicode character database
-lib/unicore/In/Lao.pl Unicode character database
-lib/unicore/In/Latin1Su.pl Unicode character database
-lib/unicore/In/LatinEx2.pl Unicode character database
-lib/unicore/In/LatinEx3.pl Unicode character database
-lib/unicore/In/LatinExt.pl Unicode character database
-lib/unicore/In/Letterli.pl Unicode character database
-lib/unicore/In/LowSurro.pl Unicode character database
-lib/unicore/In/Malayala.pl Unicode character database
-lib/unicore/In/Mathema2.pl Unicode character database
-lib/unicore/In/Mathemat.pl Unicode character database
-lib/unicore/In/Miscell2.pl Unicode character database
-lib/unicore/In/Miscella.pl Unicode character database
-lib/unicore/In/Mongolia.pl Unicode character database
-lib/unicore/In/MusicalS.pl Unicode character database
-lib/unicore/In/Myanmar.pl Unicode character database
-lib/unicore/In/NumberFo.pl Unicode character database
-lib/unicore/In/Ogham.pl Unicode character database
-lib/unicore/In/OldItali.pl Unicode character database
-lib/unicore/In/OpticalC.pl Unicode character database
-lib/unicore/In/Oriya.pl Unicode character database
-lib/unicore/In/PrivateU.pl Unicode character database
-lib/unicore/In/Runic.pl Unicode character database
-lib/unicore/In/Sinhala.pl Unicode character database
-lib/unicore/In/SmallFor.pl Unicode character database
-lib/unicore/In/SpacingM.pl Unicode character database
-lib/unicore/In/Specials.pl Unicode character database
-lib/unicore/In/Superscr.pl Unicode character database
-lib/unicore/In/Syriac.pl Unicode character database
-lib/unicore/In/Tags.pl Unicode character database
-lib/unicore/In/Tamil.pl Unicode character database
-lib/unicore/In/Telugu.pl Unicode character database
-lib/unicore/In/Thaana.pl Unicode character database
-lib/unicore/In/Thai.pl Unicode character database
-lib/unicore/In/Tibetan.pl Unicode character database
-lib/unicore/In/UnifiedC.pl Unicode character database
-lib/unicore/In/YiRadica.pl Unicode character database
-lib/unicore/In/YiSyllab.pl Unicode character database
+lib/unicore/Exact.pl Unicode character database
lib/unicore/Index.txt Unicode character database
-lib/unicore/Is.pl Unicode character database
-lib/unicore/Is/_CanonDC.pl Unicode character database
-lib/unicore/Is/_CaseIgn.pl Unicode character database
-lib/unicore/Is/_CombAbo.pl Unicode character database
-lib/unicore/Is/Alnum.pl Unicode character database
-lib/unicore/Is/Alpha.pl Unicode character database
-lib/unicore/Is/Alphabet.pl Unicode character database
-lib/unicore/Is/Any.pl Unicode character database
-lib/unicore/Is/Arabic.pl Unicode character database
-lib/unicore/Is/Armenian.pl Unicode character database
-lib/unicore/Is/ASCII.pl Unicode character database
-lib/unicore/Is/AsciiHex.pl Unicode character database
-lib/unicore/Is/Assigned.pl Unicode character database
-lib/unicore/Is/Bengali.pl Unicode character database
-lib/unicore/Is/BidiAL.pl Unicode character database
-lib/unicore/Is/BidiAN.pl Unicode character database
-lib/unicore/Is/BidiB.pl Unicode character database
-lib/unicore/Is/BidiBN.pl Unicode character database
-lib/unicore/Is/BidiCont.pl Unicode character database
-lib/unicore/Is/BidiCS.pl Unicode character database
-lib/unicore/Is/BidiEN.pl Unicode character database
-lib/unicore/Is/BidiES.pl Unicode character database
-lib/unicore/Is/BidiET.pl Unicode character database
-lib/unicore/Is/BidiL.pl Unicode character database
-lib/unicore/Is/BidiLRE.pl Unicode character database
-lib/unicore/Is/BidiLRO.pl Unicode character database
-lib/unicore/Is/BidiNSM.pl Unicode character database
-lib/unicore/Is/BidiON.pl Unicode character database
-lib/unicore/Is/BidiPDF.pl Unicode character database
-lib/unicore/Is/BidiR.pl Unicode character database
-lib/unicore/Is/BidiRLE.pl Unicode character database
-lib/unicore/Is/BidiRLO.pl Unicode character database
-lib/unicore/Is/BidiS.pl Unicode character database
-lib/unicore/Is/BidiWS.pl Unicode character database
-lib/unicore/Is/Blank.pl Unicode character database
-lib/unicore/Is/Bopomofo.pl Unicode character database
-lib/unicore/Is/C.pl Unicode character database
-lib/unicore/Is/Canadian.pl Unicode character database
-lib/unicore/Is/Canon.pl Unicode character database
-lib/unicore/Is/Cc.pl Unicode character database
-lib/unicore/Is/Cf.pl Unicode character database
-lib/unicore/Is/Cherokee.pl Unicode character database
-lib/unicore/Is/Cn.pl Unicode character database
-lib/unicore/Is/Cntrl.pl Unicode character database
-lib/unicore/Is/Co.pl Unicode character database
-lib/unicore/Is/Common.pl Unicode character database
-lib/unicore/Is/Compat.pl Unicode character database
-lib/unicore/Is/Cs.pl Unicode character database
-lib/unicore/Is/Cyrillic.pl Unicode character database
-lib/unicore/Is/Dash.pl Unicode character database
-lib/unicore/Is/DCcircle.pl Unicode character database
-lib/unicore/Is/DCcompat.pl Unicode character database
-lib/unicore/Is/DCfinal.pl Unicode character database
-lib/unicore/Is/DCfont.pl Unicode character database
-lib/unicore/Is/DCfracti.pl Unicode character database
-lib/unicore/Is/DCinitia.pl Unicode character database
-lib/unicore/Is/DCisolat.pl Unicode character database
-lib/unicore/Is/DCmedial.pl Unicode character database
-lib/unicore/Is/DCnarrow.pl Unicode character database
-lib/unicore/Is/DCnoBrea.pl Unicode character database
-lib/unicore/Is/DCsmall.pl Unicode character database
-lib/unicore/Is/DCsquare.pl Unicode character database
-lib/unicore/Is/DCsub.pl Unicode character database
-lib/unicore/Is/DCsuper.pl Unicode character database
-lib/unicore/Is/DCvertic.pl Unicode character database
-lib/unicore/Is/DCwide.pl Unicode character database
-lib/unicore/Is/Deseret.pl Unicode character database
-lib/unicore/Is/Devanaga.pl Unicode character database
-lib/unicore/Is/Diacriti.pl Unicode character database
-lib/unicore/Is/Digit.pl Unicode character database
-lib/unicore/Is/Ethiopic.pl Unicode character database
-lib/unicore/Is/Extender.pl Unicode character database
-lib/unicore/Is/Georgian.pl Unicode character database
-lib/unicore/Is/Gothic.pl Unicode character database
-lib/unicore/Is/Graph.pl Unicode character database
-lib/unicore/Is/Greek.pl Unicode character database
-lib/unicore/Is/Gujarati.pl Unicode character database
-lib/unicore/Is/Gurmukhi.pl Unicode character database
-lib/unicore/Is/Han.pl Unicode character database
-lib/unicore/Is/Hangul.pl Unicode character database
-lib/unicore/Is/Hebrew.pl Unicode character database
-lib/unicore/Is/HexDigit.pl Unicode character database
-lib/unicore/Is/Hiragana.pl Unicode character database
-lib/unicore/Is/Hyphen.pl Unicode character database
-lib/unicore/Is/IdContin.pl Unicode character database
-lib/unicore/Is/Ideograp.pl Unicode character database
-lib/unicore/Is/IdStart.pl Unicode character database
-lib/unicore/Is/Inherite.pl Unicode character database
-lib/unicore/Is/JoinCont.pl Unicode character database
-lib/unicore/Is/Kannada.pl Unicode character database
-lib/unicore/Is/Katakana.pl Unicode character database
-lib/unicore/Is/Khmer.pl Unicode character database
-lib/unicore/Is/L.pl Unicode character database
-lib/unicore/Is/L_.pl Unicode character database
-lib/unicore/Is/Lao.pl Unicode character database
-lib/unicore/Is/Latin.pl Unicode character database
-lib/unicore/Is/LbrkAI.pl Unicode character database
-lib/unicore/Is/LbrkAL.pl Unicode character database
-lib/unicore/Is/LbrkB2.pl Unicode character database
-lib/unicore/Is/LbrkBA.pl Unicode character database
-lib/unicore/Is/LbrkBB.pl Unicode character database
-lib/unicore/Is/LbrkBK.pl Unicode character database
-lib/unicore/Is/LbrkCB.pl Unicode character database
-lib/unicore/Is/LbrkCL.pl Unicode character database
-lib/unicore/Is/LbrkCM.pl Unicode character database
-lib/unicore/Is/LbrkCR.pl Unicode character database
-lib/unicore/Is/LbrkEX.pl Unicode character database
-lib/unicore/Is/LbrkGL.pl Unicode character database
-lib/unicore/Is/LbrkHY.pl Unicode character database
-lib/unicore/Is/LbrkID.pl Unicode character database
-lib/unicore/Is/LbrkIN.pl Unicode character database
-lib/unicore/Is/LbrkIS.pl Unicode character database
-lib/unicore/Is/LbrkLF.pl Unicode character database
-lib/unicore/Is/LbrkNS.pl Unicode character database
-lib/unicore/Is/LbrkNU.pl Unicode character database
-lib/unicore/Is/LbrkOP.pl Unicode character database
-lib/unicore/Is/LbrkPO.pl Unicode character database
-lib/unicore/Is/LbrkPR.pl Unicode character database
-lib/unicore/Is/LbrkQU.pl Unicode character database
-lib/unicore/Is/LbrkSA.pl Unicode character database
-lib/unicore/Is/LbrkSG.pl Unicode character database
-lib/unicore/Is/LbrkSP.pl Unicode character database
-lib/unicore/Is/LbrkSY.pl Unicode character database
-lib/unicore/Is/LbrkXX.pl Unicode character database
-lib/unicore/Is/LbrkZW.pl Unicode character database
-lib/unicore/Is/Ll.pl Unicode character database
-lib/unicore/Is/Lm.pl Unicode character database
-lib/unicore/Is/Lo.pl Unicode character database
-lib/unicore/Is/Lower.pl Unicode character database
-lib/unicore/Is/Lowercas.pl Unicode character database
-lib/unicore/Is/Lt.pl Unicode character database
-lib/unicore/Is/Lu.pl Unicode character database
-lib/unicore/Is/M.pl Unicode character database
-lib/unicore/Is/Malayala.pl Unicode character database
-lib/unicore/Is/Math.pl Unicode character database
-lib/unicore/Is/Mc.pl Unicode character database
-lib/unicore/Is/Me.pl Unicode character database
-lib/unicore/Is/Mirrored.pl Unicode character database
-lib/unicore/Is/Mn.pl Unicode character database
-lib/unicore/Is/Mongolia.pl Unicode character database
-lib/unicore/Is/Myanmar.pl Unicode character database
-lib/unicore/Is/N.pl Unicode character database
-lib/unicore/Is/Nd.pl Unicode character database
-lib/unicore/Is/Nl.pl Unicode character database
-lib/unicore/Is/No.pl Unicode character database
-lib/unicore/Is/Nonchara.pl Unicode character database
-lib/unicore/Is/Ogham.pl Unicode character database
-lib/unicore/Is/OldItali.pl Unicode character database
-lib/unicore/Is/Oriya.pl Unicode character database
-lib/unicore/Is/OtherAlp.pl Unicode character database
-lib/unicore/Is/OtherLow.pl Unicode character database
-lib/unicore/Is/OtherMat.pl Unicode character database
-lib/unicore/Is/OtherUpp.pl Unicode character database
-lib/unicore/Is/P.pl Unicode character database
-lib/unicore/Is/Pc.pl Unicode character database
-lib/unicore/Is/Pd.pl Unicode character database
-lib/unicore/Is/Pe.pl Unicode character database
-lib/unicore/Is/Pf.pl Unicode character database
-lib/unicore/Is/Pi.pl Unicode character database
-lib/unicore/Is/Po.pl Unicode character database
-lib/unicore/Is/Print.pl Unicode character database
-lib/unicore/Is/Ps.pl Unicode character database
-lib/unicore/Is/Punct.pl Unicode character database
-lib/unicore/Is/Quotatio.pl Unicode character database
-lib/unicore/Is/Runic.pl Unicode character database
-lib/unicore/Is/S.pl Unicode character database
-lib/unicore/Is/Sc.pl Unicode character database
-lib/unicore/Is/Sinhala.pl Unicode character database
-lib/unicore/Is/Sk.pl Unicode character database
-lib/unicore/Is/Sm.pl Unicode character database
-lib/unicore/Is/So.pl Unicode character database
-lib/unicore/Is/Space.pl Unicode character database
-lib/unicore/Is/SpacePer.pl Unicode character database
-lib/unicore/Is/Syriac.pl Unicode character database
-lib/unicore/Is/Tamil.pl Unicode character database
-lib/unicore/Is/Telugu.pl Unicode character database
-lib/unicore/Is/Terminal.pl Unicode character database
-lib/unicore/Is/Thaana.pl Unicode character database
-lib/unicore/Is/Thai.pl Unicode character database
-lib/unicore/Is/Tibetan.pl Unicode character database
-lib/unicore/Is/Title.pl Unicode character database
-lib/unicore/Is/Upper.pl Unicode character database
-lib/unicore/Is/Uppercas.pl Unicode character database
-lib/unicore/Is/WhiteSpa.pl Unicode character database
-lib/unicore/Is/Word.pl Unicode character database
-lib/unicore/Is/XDigit.pl Unicode character database
-lib/unicore/Is/Yi.pl Unicode character database
-lib/unicore/Is/Z.pl Unicode character database
-lib/unicore/Is/Zl.pl Unicode character database
-lib/unicore/Is/Zp.pl Unicode character database
-lib/unicore/Is/Zs.pl Unicode character database
lib/unicore/Jamo.txt Unicode character database
lib/unicore/JamoShort.pl Unicode character database
lib/unicore/Lbrk.pl Unicode character database
+lib/unicore/lib/Alnum.pl Unicode character database
+lib/unicore/lib/Alpha.pl Unicode character database
+lib/unicore/lib/Alphabet.pl Unicode character database
+lib/unicore/lib/Any.pl Unicode character database
+lib/unicore/lib/Arabic.pl Unicode character database
+lib/unicore/lib/Armenian.pl Unicode character database
+lib/unicore/lib/ASCII.pl Unicode character database
+lib/unicore/lib/AsciiHex.pl Unicode character database
+lib/unicore/lib/Assigned.pl Unicode character database
+lib/unicore/lib/Bengali.pl Unicode character database
+lib/unicore/lib/BidiAL.pl Unicode character database
+lib/unicore/lib/BidiAN.pl Unicode character database
+lib/unicore/lib/BidiB.pl Unicode character database
+lib/unicore/lib/BidiBN.pl Unicode character database
+lib/unicore/lib/BidiCont.pl Unicode character database
+lib/unicore/lib/BidiCS.pl Unicode character database
+lib/unicore/lib/BidiEN.pl Unicode character database
+lib/unicore/lib/BidiES.pl Unicode character database
+lib/unicore/lib/BidiET.pl Unicode character database
+lib/unicore/lib/BidiL.pl Unicode character database
+lib/unicore/lib/BidiLRE.pl Unicode character database
+lib/unicore/lib/BidiLRO.pl Unicode character database
+lib/unicore/lib/BidiNSM.pl Unicode character database
+lib/unicore/lib/BidiON.pl Unicode character database
+lib/unicore/lib/BidiPDF.pl Unicode character database
+lib/unicore/lib/BidiR.pl Unicode character database
+lib/unicore/lib/BidiRLE.pl Unicode character database
+lib/unicore/lib/BidiRLO.pl Unicode character database
+lib/unicore/lib/BidiS.pl Unicode character database
+lib/unicore/lib/BidiWS.pl Unicode character database
+lib/unicore/lib/Blank.pl Unicode character database
+lib/unicore/lib/Bopomofo.pl Unicode character database
+lib/unicore/lib/C.pl Unicode character database
+lib/unicore/lib/Canadian.pl Unicode character database
+lib/unicore/lib/Canon.pl Unicode character database
+lib/unicore/lib/Cc.pl Unicode character database
+lib/unicore/lib/Cf.pl Unicode character database
+lib/unicore/lib/Cherokee.pl Unicode character database
+lib/unicore/lib/Cn.pl Unicode character database
+lib/unicore/lib/Cntrl.pl Unicode character database
+lib/unicore/lib/Co.pl Unicode character database
+lib/unicore/lib/Common.pl Unicode character database
+lib/unicore/lib/Compat.pl Unicode character database
+lib/unicore/lib/Cs.pl Unicode character database
+lib/unicore/lib/Cyrillic.pl Unicode character database
+lib/unicore/lib/Dash.pl Unicode character database
+lib/unicore/lib/DCcircle.pl Unicode character database
+lib/unicore/lib/DCcompat.pl Unicode character database
+lib/unicore/lib/DCfinal.pl Unicode character database
+lib/unicore/lib/DCfont.pl Unicode character database
+lib/unicore/lib/DCfracti.pl Unicode character database
+lib/unicore/lib/DCinitia.pl Unicode character database
+lib/unicore/lib/DCisolat.pl Unicode character database
+lib/unicore/lib/DCmedial.pl Unicode character database
+lib/unicore/lib/DCnarrow.pl Unicode character database
+lib/unicore/lib/DCnoBrea.pl Unicode character database
+lib/unicore/lib/DCsmall.pl Unicode character database
+lib/unicore/lib/DCsquare.pl Unicode character database
+lib/unicore/lib/DCsub.pl Unicode character database
+lib/unicore/lib/DCsuper.pl Unicode character database
+lib/unicore/lib/DCvertic.pl Unicode character database
+lib/unicore/lib/DCwide.pl Unicode character database
+lib/unicore/lib/Deseret.pl Unicode character database
+lib/unicore/lib/Devanaga.pl Unicode character database
+lib/unicore/lib/Diacriti.pl Unicode character database
+lib/unicore/lib/Digit.pl Unicode character database
+lib/unicore/lib/Ethiopic.pl Unicode character database
+lib/unicore/lib/Extender.pl Unicode character database
+lib/unicore/lib/Georgian.pl Unicode character database
+lib/unicore/lib/Gothic.pl Unicode character database
+lib/unicore/lib/Graph.pl Unicode character database
+lib/unicore/lib/Greek.pl Unicode character database
+lib/unicore/lib/Gujarati.pl Unicode character database
+lib/unicore/lib/Gurmukhi.pl Unicode character database
+lib/unicore/lib/Han.pl Unicode character database
+lib/unicore/lib/Hangul.pl Unicode character database
+lib/unicore/lib/Hebrew.pl Unicode character database
+lib/unicore/lib/HexDigit.pl Unicode character database
+lib/unicore/lib/Hiragana.pl Unicode character database
+lib/unicore/lib/Hyphen.pl Unicode character database
+lib/unicore/lib/IdContin.pl Unicode character database
+lib/unicore/lib/Ideograp.pl Unicode character database
+lib/unicore/lib/IdStart.pl Unicode character database
+lib/unicore/lib/InAlphab.pl Unicode character database
+lib/unicore/lib/InArabi2.pl Unicode character database
+lib/unicore/lib/InArabi3.pl Unicode character database
+lib/unicore/lib/InArabic.pl Unicode character database
+lib/unicore/lib/InArmeni.pl Unicode character database
+lib/unicore/lib/InArrows.pl Unicode character database
+lib/unicore/lib/InBasicL.pl Unicode character database
+lib/unicore/lib/InBengal.pl Unicode character database
+lib/unicore/lib/InBlockE.pl Unicode character database
+lib/unicore/lib/InBopom2.pl Unicode character database
+lib/unicore/lib/InBopomo.pl Unicode character database
+lib/unicore/lib/InBoxDra.pl Unicode character database
+lib/unicore/lib/InBraill.pl Unicode character database
+lib/unicore/lib/InByzant.pl Unicode character database
+lib/unicore/lib/InCherok.pl Unicode character database
+lib/unicore/lib/InCjkCo2.pl Unicode character database
+lib/unicore/lib/InCjkCo3.pl Unicode character database
+lib/unicore/lib/InCjkCo4.pl Unicode character database
+lib/unicore/lib/InCjkCom.pl Unicode character database
+lib/unicore/lib/InCjkRad.pl Unicode character database
+lib/unicore/lib/InCjkSym.pl Unicode character database
+lib/unicore/lib/InCjkUn2.pl Unicode character database
+lib/unicore/lib/InCjkUn3.pl Unicode character database
+lib/unicore/lib/InCjkUni.pl Unicode character database
+lib/unicore/lib/InCombi2.pl Unicode character database
+lib/unicore/lib/InCombi3.pl Unicode character database
+lib/unicore/lib/InCombin.pl Unicode character database
+lib/unicore/lib/InContro.pl Unicode character database
+lib/unicore/lib/InCurren.pl Unicode character database
+lib/unicore/lib/InCyrill.pl Unicode character database
+lib/unicore/lib/InDesere.pl Unicode character database
+lib/unicore/lib/InDevana.pl Unicode character database
+lib/unicore/lib/InDingba.pl Unicode character database
+lib/unicore/lib/InEnclo2.pl Unicode character database
+lib/unicore/lib/InEnclos.pl Unicode character database
+lib/unicore/lib/InEthiop.pl Unicode character database
+lib/unicore/lib/InGenera.pl Unicode character database
+lib/unicore/lib/InGeomet.pl Unicode character database
+lib/unicore/lib/InGeorgi.pl Unicode character database
+lib/unicore/lib/InGothic.pl Unicode character database
+lib/unicore/lib/InGreek.pl Unicode character database
+lib/unicore/lib/InGreekE.pl Unicode character database
+lib/unicore/lib/InGujara.pl Unicode character database
+lib/unicore/lib/InGurmuk.pl Unicode character database
+lib/unicore/lib/InHalfwi.pl Unicode character database
+lib/unicore/lib/InHangu2.pl Unicode character database
+lib/unicore/lib/InHangu3.pl Unicode character database
+lib/unicore/lib/InHangul.pl Unicode character database
+lib/unicore/lib/InHebrew.pl Unicode character database
+lib/unicore/lib/Inherite.pl Unicode character database
+lib/unicore/lib/InHighPr.pl Unicode character database
+lib/unicore/lib/InHighSu.pl Unicode character database
+lib/unicore/lib/InHiraga.pl Unicode character database
+lib/unicore/lib/InIdeogr.pl Unicode character database
+lib/unicore/lib/InIpaExt.pl Unicode character database
+lib/unicore/lib/InKanbun.pl Unicode character database
+lib/unicore/lib/InKangxi.pl Unicode character database
+lib/unicore/lib/InKannad.pl Unicode character database
+lib/unicore/lib/InKataka.pl Unicode character database
+lib/unicore/lib/InKhmer.pl Unicode character database
+lib/unicore/lib/InLao.pl Unicode character database
+lib/unicore/lib/InLatin1.pl Unicode character database
+lib/unicore/lib/InLatin2.pl Unicode character database
+lib/unicore/lib/InLatin3.pl Unicode character database
+lib/unicore/lib/InLatinE.pl Unicode character database
+lib/unicore/lib/InLetter.pl Unicode character database
+lib/unicore/lib/InLowSur.pl Unicode character database
+lib/unicore/lib/InMalaya.pl Unicode character database
+lib/unicore/lib/InMathe2.pl Unicode character database
+lib/unicore/lib/InMathem.pl Unicode character database
+lib/unicore/lib/InMisce2.pl Unicode character database
+lib/unicore/lib/InMiscel.pl Unicode character database
+lib/unicore/lib/InMongol.pl Unicode character database
+lib/unicore/lib/InMusica.pl Unicode character database
+lib/unicore/lib/InMyanma.pl Unicode character database
+lib/unicore/lib/InNumber.pl Unicode character database
+lib/unicore/lib/InOgham.pl Unicode character database
+lib/unicore/lib/InOldIta.pl Unicode character database
+lib/unicore/lib/InOptica.pl Unicode character database
+lib/unicore/lib/InOriya.pl Unicode character database
+lib/unicore/lib/InPrivat.pl Unicode character database
+lib/unicore/lib/InRunic.pl Unicode character database
+lib/unicore/lib/InSinhal.pl Unicode character database
+lib/unicore/lib/InSmallF.pl Unicode character database
+lib/unicore/lib/InSpacin.pl Unicode character database
+lib/unicore/lib/InSpecia.pl Unicode character database
+lib/unicore/lib/InSupers.pl Unicode character database
+lib/unicore/lib/InSyriac.pl Unicode character database
+lib/unicore/lib/InTags.pl Unicode character database
+lib/unicore/lib/InTamil.pl Unicode character database
+lib/unicore/lib/InTelugu.pl Unicode character database
+lib/unicore/lib/InThaana.pl Unicode character database
+lib/unicore/lib/InThai.pl Unicode character database
+lib/unicore/lib/InTibeta.pl Unicode character database
+lib/unicore/lib/InUnifie.pl Unicode character database
+lib/unicore/lib/InYiRadi.pl Unicode character database
+lib/unicore/lib/InYiSyll.pl Unicode character database
+lib/unicore/lib/JoinCont.pl Unicode character database
+lib/unicore/lib/Kannada.pl Unicode character database
+lib/unicore/lib/Katakana.pl Unicode character database
+lib/unicore/lib/Khmer.pl Unicode character database
+lib/unicore/lib/L.pl Unicode character database
+lib/unicore/lib/Lao.pl Unicode character database
+lib/unicore/lib/Latin.pl Unicode character database
+lib/unicore/lib/Ll.pl Unicode character database
+lib/unicore/lib/Lm.pl Unicode character database
+lib/unicore/lib/Lo.pl Unicode character database
+lib/unicore/lib/Lower.pl Unicode character database
+lib/unicore/lib/Lowercas.pl Unicode character database
+lib/unicore/lib/Lt.pl Unicode character database
+lib/unicore/lib/Lu.pl Unicode character database
+lib/unicore/lib/L_.pl Unicode character database
+lib/unicore/lib/M.pl Unicode character database
+lib/unicore/lib/Malayala.pl Unicode character database
+lib/unicore/lib/Math.pl Unicode character database
+lib/unicore/lib/Mc.pl Unicode character database
+lib/unicore/lib/Me.pl Unicode character database
+lib/unicore/lib/Mirrored.pl Unicode character database
+lib/unicore/lib/Mn.pl Unicode character database
+lib/unicore/lib/Mongolia.pl Unicode character database
+lib/unicore/lib/Myanmar.pl Unicode character database
+lib/unicore/lib/N.pl Unicode character database
+lib/unicore/lib/Nd.pl Unicode character database
+lib/unicore/lib/Nl.pl Unicode character database
+lib/unicore/lib/No.pl Unicode character database
+lib/unicore/lib/Nonchara.pl Unicode character database
+lib/unicore/lib/Ogham.pl Unicode character database
+lib/unicore/lib/OldItali.pl Unicode character database
+lib/unicore/lib/Oriya.pl Unicode character database
+lib/unicore/lib/OtherAlp.pl Unicode character database
+lib/unicore/lib/OtherLow.pl Unicode character database
+lib/unicore/lib/OtherMat.pl Unicode character database
+lib/unicore/lib/OtherUpp.pl Unicode character database
+lib/unicore/lib/P.pl Unicode character database
+lib/unicore/lib/Pc.pl Unicode character database
+lib/unicore/lib/Pd.pl Unicode character database
+lib/unicore/lib/Pe.pl Unicode character database
+lib/unicore/lib/Pf.pl Unicode character database
+lib/unicore/lib/Pi.pl Unicode character database
+lib/unicore/lib/Po.pl Unicode character database
+lib/unicore/lib/Print.pl Unicode character database
+lib/unicore/lib/Ps.pl Unicode character database
+lib/unicore/lib/Punct.pl Unicode character database
+lib/unicore/lib/Quotatio.pl Unicode character database
+lib/unicore/lib/Runic.pl Unicode character database
+lib/unicore/lib/S.pl Unicode character database
+lib/unicore/lib/Sc.pl Unicode character database
+lib/unicore/lib/Sinhala.pl Unicode character database
+lib/unicore/lib/Sk.pl Unicode character database
+lib/unicore/lib/Sm.pl Unicode character database
+lib/unicore/lib/So.pl Unicode character database
+lib/unicore/lib/Space.pl Unicode character database
+lib/unicore/lib/SpacePer.pl Unicode character database
+lib/unicore/lib/Syriac.pl Unicode character database
+lib/unicore/lib/Tamil.pl Unicode character database
+lib/unicore/lib/Telugu.pl Unicode character database
+lib/unicore/lib/Terminal.pl Unicode character database
+lib/unicore/lib/Thaana.pl Unicode character database
+lib/unicore/lib/Thai.pl Unicode character database
+lib/unicore/lib/Tibetan.pl Unicode character database
+lib/unicore/lib/Title.pl Unicode character database
+lib/unicore/lib/Upper.pl Unicode character database
+lib/unicore/lib/Uppercas.pl Unicode character database
+lib/unicore/lib/WhiteSpa.pl Unicode character database
+lib/unicore/lib/Word.pl Unicode character database
+lib/unicore/lib/XDigit.pl Unicode character database
+lib/unicore/lib/Yi.pl Unicode character database
+lib/unicore/lib/Z.pl Unicode character database
+lib/unicore/lib/Zl.pl Unicode character database
+lib/unicore/lib/Zp.pl Unicode character database
+lib/unicore/lib/Zs.pl Unicode character database
+lib/unicore/lib/_CanonDC.pl Unicode character database
+lib/unicore/lib/_CaseIgn.pl Unicode character database
+lib/unicore/lib/_CombAbo.pl Unicode character database
lib/unicore/LineBrk.txt Unicode character database
lib/unicore/Makefile Unicode character database
lib/unicore/mktables Unicode character database generator
--- /dev/null
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by ./mktables from e.g. Unicode.txt.
+# Any changes made here will be lost!
+
+##
+## Data in this file used by ../utf8_heavy.pl
+##
+
+## Mapping from lc(canonical name) to filename in ./lib
+%utf8::Canonical = (
+ alphabetic => 'Alphabet',
+
+ # InAlphabeticPresentationForms
+ alphabeticpresentationforms => 'InAlphab',
+
+ arabic => 'Arabic',
+
+ # InArabicPresentationFormsA
+ arabicpresentationformsa => 'InArabi3',
+
+ # InArabicPresentationFormsB
+ arabicpresentationformsb => 'InArabi2',
+
+ armenian => 'Armenian',
+
+ # InArrows
+ arrows => 'InArrows',
+
+ # AsciiHexDigit
+ asciihexdigit => 'AsciiHex',
+
+ # InBasicLatin
+ basiclatin => 'InBasicL',
+
+ bengali => 'Bengali',
+
+ # BidiControl
+ bidicontrol => 'BidiCont',
+
+ # InBlockElements
+ blockelements => 'InBlockE',
+
+ bopomofo => 'Bopomofo',
+
+ # InBopomofoExtended
+ bopomofoextended => 'InBopom2',
+
+ # InBoxDrawing
+ boxdrawing => 'InBoxDra',
+
+ # InBraillePatterns
+ braillepatterns => 'InBraill',
+
+ # InByzantineMusicalSymbols
+ byzantinemusicalsymbols => 'InByzant',
+
+ # CanadianAboriginal
+ canadianaboriginal => 'Canadian',
+
+ cherokee => 'Cherokee',
+
+ # InCjkCompatibility
+ cjkcompatibility => 'InCjkCom',
+
+ # InCjkCompatibilityForms
+ cjkcompatibilityforms => 'InCjkCo2',
+
+ # InCjkCompatibilityIdeographs
+ cjkcompatibilityideographs => 'InCjkCo3',
+
+ # InCjkCompatibilityIdeographsSupplement
+ cjkcompatibilityideographssupplement => 'InCjkCo4',
+
+ # InCjkRadicalsSupplement
+ cjkradicalssupplement => 'InCjkRad',
+
+ # InCjkSymbolsAndPunctuation
+ cjksymbolsandpunctuation => 'InCjkSym',
+
+ # InCjkUnifiedIdeographs
+ cjkunifiedideographs => 'InCjkUni',
+
+ # InCjkUnifiedIdeographsExtensionA
+ cjkunifiedideographsextensiona => 'InCjkUn3',
+
+ # InCjkUnifiedIdeographsExtensionB
+ cjkunifiedideographsextensionb => 'InCjkUn2',
+
+ # ClosePunctuation
+ closepunctuation => 'Pe',
+
+ # InCombiningDiacriticalMarks
+ combiningdiacriticalmarks => 'InCombi3',
+
+ # InCombiningHalfMarks
+ combininghalfmarks => 'InCombin',
+
+ # InCombiningMarksForSymbols
+ combiningmarksforsymbols => 'InCombi2',
+
+ common => 'Common',
+
+ # ConnectorPunctuation
+ connectorpunctuation => 'Pc',
+
+ control => 'Cc',
+
+ # InControlPictures
+ controlpictures => 'InContro',
+
+ # CurrencySymbol
+ currencysymbol => 'Sc',
+
+ # InCurrencySymbols
+ currencysymbols => 'InCurren',
+
+ cyrillic => 'Cyrillic',
+ dash => 'Dash',
+
+ # DashPunctuation
+ dashpunctuation => 'Pd',
+
+ # DecimalNumber
+ decimalnumber => 'Nd',
+
+ deseret => 'Deseret',
+ devanagari => 'Devanaga',
+ diacritic => 'Diacriti',
+
+ # InDingbats
+ dingbats => 'InDingba',
+
+ # InEnclosedAlphanumerics
+ enclosedalphanumerics => 'InEnclos',
+
+ # InEnclosedCjkLettersAndMonths
+ enclosedcjklettersandmonths => 'InEnclo2',
+
+ # EnclosingMark
+ enclosingmark => 'Me',
+
+ ethiopic => 'Ethiopic',
+ extender => 'Extender',
+
+ # FinalPunctuation
+ finalpunctuation => 'Pf',
+
+ format => 'Cf',
+
+ # InGeneralPunctuation
+ generalpunctuation => 'InGenera',
+
+ # InGeometricShapes
+ geometricshapes => 'InGeomet',
+
+ georgian => 'Georgian',
+ gothic => 'Gothic',
+ greek => 'Greek',
+
+ # InGreekExtended
+ greekextended => 'InGreekE',
+
+ gujarati => 'Gujarati',
+ gurmukhi => 'Gurmukhi',
+
+ # InHalfwidthAndFullwidthForms
+ halfwidthandfullwidthforms => 'InHalfwi',
+
+ han => 'Han',
+ hangul => 'Hangul',
+
+ # InHangulCompatibilityJamo
+ hangulcompatibilityjamo => 'InHangu3',
+
+ # InHangulJamo
+ hanguljamo => 'InHangul',
+
+ # InHangulSyllables
+ hangulsyllables => 'InHangu2',
+
+ hebrew => 'Hebrew',
+
+ # HexDigit
+ hexdigit => 'HexDigit',
+
+ # InHighPrivateUseSurrogates
+ highprivateusesurrogates => 'InHighPr',
+
+ # InHighSurrogates
+ highsurrogates => 'InHighSu',
+
+ hiragana => 'Hiragana',
+ hyphen => 'Hyphen',
+
+ # IdContinue
+ idcontinue => 'IdContin',
+
+ ideographic => 'Ideograp',
+
+ # InIdeographicDescriptionCharacters
+ ideographicdescriptioncharacters => 'InIdeogr',
+
+ # IdStart
+ idstart => 'IdStart',
+
+ # InAlphabeticPresentationForms
+ inalphabeticpresentationforms => 'InAlphab',
+
+ # InArabic
+ inarabic => 'InArabic',
+
+ # InArabicPresentationFormsA
+ inarabicpresentationformsa => 'InArabi3',
+
+ # InArabicPresentationFormsB
+ inarabicpresentationformsb => 'InArabi2',
+
+ # InArmenian
+ inarmenian => 'InArmeni',
+
+ # InArrows
+ inarrows => 'InArrows',
+
+ # InBasicLatin
+ inbasiclatin => 'InBasicL',
+
+ # InBengali
+ inbengali => 'InBengal',
+
+ # InBlockElements
+ inblockelements => 'InBlockE',
+
+ # InBopomofo
+ inbopomofo => 'InBopomo',
+
+ # InBopomofoExtended
+ inbopomofoextended => 'InBopom2',
+
+ # InBoxDrawing
+ inboxdrawing => 'InBoxDra',
+
+ # InBraillePatterns
+ inbraillepatterns => 'InBraill',
+
+ # InByzantineMusicalSymbols
+ inbyzantinemusicalsymbols => 'InByzant',
+
+ # InCherokee
+ incherokee => 'InCherok',
+
+ # InCjkCompatibility
+ incjkcompatibility => 'InCjkCom',
+
+ # InCjkCompatibilityForms
+ incjkcompatibilityforms => 'InCjkCo2',
+
+ # InCjkCompatibilityIdeographs
+ incjkcompatibilityideographs => 'InCjkCo3',
+
+ # InCjkCompatibilityIdeographsSupplement
+ incjkcompatibilityideographssupplement => 'InCjkCo4',
+
+ # InCjkRadicalsSupplement
+ incjkradicalssupplement => 'InCjkRad',
+
+ # InCjkSymbolsAndPunctuation
+ incjksymbolsandpunctuation => 'InCjkSym',
+
+ # InCjkUnifiedIdeographs
+ incjkunifiedideographs => 'InCjkUni',
+
+ # InCjkUnifiedIdeographsExtensionA
+ incjkunifiedideographsextensiona => 'InCjkUn3',
+
+ # InCjkUnifiedIdeographsExtensionB
+ incjkunifiedideographsextensionb => 'InCjkUn2',
+
+ # InCombiningDiacriticalMarks
+ incombiningdiacriticalmarks => 'InCombi3',
+
+ # InCombiningHalfMarks
+ incombininghalfmarks => 'InCombin',
+
+ # InCombiningMarksForSymbols
+ incombiningmarksforsymbols => 'InCombi2',
+
+ # InControlPictures
+ incontrolpictures => 'InContro',
+
+ # InCurrencySymbols
+ incurrencysymbols => 'InCurren',
+
+ # InCyrillic
+ incyrillic => 'InCyrill',
+
+ # InDeseret
+ indeseret => 'InDesere',
+
+ # InDevanagari
+ indevanagari => 'InDevana',
+
+ # InDingbats
+ indingbats => 'InDingba',
+
+ # InEnclosedAlphanumerics
+ inenclosedalphanumerics => 'InEnclos',
+
+ # InEnclosedCjkLettersAndMonths
+ inenclosedcjklettersandmonths => 'InEnclo2',
+
+ # InEthiopic
+ inethiopic => 'InEthiop',
+
+ # InGeneralPunctuation
+ ingeneralpunctuation => 'InGenera',
+
+ # InGeometricShapes
+ ingeometricshapes => 'InGeomet',
+
+ # InGeorgian
+ ingeorgian => 'InGeorgi',
+
+ # InGothic
+ ingothic => 'InGothic',
+
+ # InGreek
+ ingreek => 'InGreek',
+
+ # InGreekExtended
+ ingreekextended => 'InGreekE',
+
+ # InGujarati
+ ingujarati => 'InGujara',
+
+ # InGurmukhi
+ ingurmukhi => 'InGurmuk',
+
+ # InHalfwidthAndFullwidthForms
+ inhalfwidthandfullwidthforms => 'InHalfwi',
+
+ # InHangulCompatibilityJamo
+ inhangulcompatibilityjamo => 'InHangu3',
+
+ # InHangulJamo
+ inhanguljamo => 'InHangul',
+
+ # InHangulSyllables
+ inhangulsyllables => 'InHangu2',
+
+ # InHebrew
+ inhebrew => 'InHebrew',
+
+ inherited => 'Inherite',
+
+ # InHighPrivateUseSurrogates
+ inhighprivateusesurrogates => 'InHighPr',
+
+ # InHighSurrogates
+ inhighsurrogates => 'InHighSu',
+
+ # InHiragana
+ inhiragana => 'InHiraga',
+
+ # InIdeographicDescriptionCharacters
+ inideographicdescriptioncharacters => 'InIdeogr',
+
+ # InIpaExtensions
+ inipaextensions => 'InIpaExt',
+
+ # InitialPunctuation
+ initialpunctuation => 'Pi',
+
+ # InKanbun
+ inkanbun => 'InKanbun',
+
+ # InKangxiRadicals
+ inkangxiradicals => 'InKangxi',
+
+ # InKannada
+ inkannada => 'InKannad',
+
+ # InKatakana
+ inkatakana => 'InKataka',
+
+ # InKhmer
+ inkhmer => 'InKhmer',
+
+ # InLao
+ inlao => 'InLao',
+
+ # InLatin1Supplement
+ inlatin1supplement => 'InLatin1',
+
+ # InLatinExtendedA
+ inlatinextendeda => 'InLatin2',
+
+ # InLatinExtendedAdditional
+ inlatinextendedadditional => 'InLatin3',
+
+ # InLatinExtendedB
+ inlatinextendedb => 'InLatinE',
+
+ # InLetterlikeSymbols
+ inletterlikesymbols => 'InLetter',
+
+ # InLowSurrogates
+ inlowsurrogates => 'InLowSur',
+
+ # InMalayalam
+ inmalayalam => 'InMalaya',
+
+ # InMathematicalAlphanumericSymbols
+ inmathematicalalphanumericsymbols => 'InMathe2',
+
+ # InMathematicalOperators
+ inmathematicaloperators => 'InMathem',
+
+ # InMiscellaneousSymbols
+ inmiscellaneoussymbols => 'InMiscel',
+
+ # InMiscellaneousTechnical
+ inmiscellaneoustechnical => 'InMisce2',
+
+ # InMongolian
+ inmongolian => 'InMongol',
+
+ # InMusicalSymbols
+ inmusicalsymbols => 'InMusica',
+
+ # InMyanmar
+ inmyanmar => 'InMyanma',
+
+ # InNumberForms
+ innumberforms => 'InNumber',
+
+ # InOgham
+ inogham => 'InOgham',
+
+ # InOldItalic
+ inolditalic => 'InOldIta',
+
+ # InOpticalCharacterRecognition
+ inopticalcharacterrecognition => 'InOptica',
+
+ # InOriya
+ inoriya => 'InOriya',
+
+ # InPrivateUse
+ inprivateuse => 'InPrivat',
+
+ # InRunic
+ inrunic => 'InRunic',
+
+ # InSinhala
+ insinhala => 'InSinhal',
+
+ # InSmallFormVariants
+ insmallformvariants => 'InSmallF',
+
+ # InSpacingModifierLetters
+ inspacingmodifierletters => 'InSpacin',
+
+ # InSpecials
+ inspecials => 'InSpecia',
+
+ # InSuperscriptsAndSubscripts
+ insuperscriptsandsubscripts => 'InSupers',
+
+ # InSyriac
+ insyriac => 'InSyriac',
+
+ # InTags
+ intags => 'InTags',
+
+ # InTamil
+ intamil => 'InTamil',
+
+ # InTelugu
+ intelugu => 'InTelugu',
+
+ # InThaana
+ inthaana => 'InThaana',
+
+ # InThai
+ inthai => 'InThai',
+
+ # InTibetan
+ intibetan => 'InTibeta',
+
+ # InUnifiedCanadianAboriginalSyllabics
+ inunifiedcanadianaboriginalsyllabics => 'InUnifie',
+
+ # InYiRadicals
+ inyiradicals => 'InYiRadi',
+
+ # InYiSyllables
+ inyisyllables => 'InYiSyll',
+
+ # InIpaExtensions
+ ipaextensions => 'InIpaExt',
+
+ # JoinControl
+ joincontrol => 'JoinCont',
+
+ # InKanbun
+ kanbun => 'InKanbun',
+
+ # InKangxiRadicals
+ kangxiradicals => 'InKangxi',
+
+ kannada => 'Kannada',
+ katakana => 'Katakana',
+ khmer => 'Khmer',
+ lao => 'Lao',
+ latin => 'Latin',
+
+ # InLatin1Supplement
+ latin1supplement => 'InLatin1',
+
+ # InLatinExtendedA
+ latinextendeda => 'InLatin2',
+
+ # InLatinExtendedAdditional
+ latinextendedadditional => 'InLatin3',
+
+ # InLatinExtendedB
+ latinextendedb => 'InLatinE',
+
+ letter => 'L',
+
+ # InLetterlikeSymbols
+ letterlikesymbols => 'InLetter',
+
+ # LetterNumber
+ letternumber => 'Nl',
+
+ # LineSeparator
+ lineseparator => 'Zl',
+
+ lowercase => 'Lowercas',
+
+ # LowercaseLetter
+ lowercaseletter => 'Ll',
+
+ # InLowSurrogates
+ lowsurrogates => 'InLowSur',
+
+ malayalam => 'Malayala',
+ mark => 'M',
+ math => 'Math',
+
+ # InMathematicalAlphanumericSymbols
+ mathematicalalphanumericsymbols => 'InMathe2',
+
+ # InMathematicalOperators
+ mathematicaloperators => 'InMathem',
+
+ # MathSymbol
+ mathsymbol => 'Sm',
+
+ # InMiscellaneousSymbols
+ miscellaneoussymbols => 'InMiscel',
+
+ # InMiscellaneousTechnical
+ miscellaneoustechnical => 'InMisce2',
+
+ # ModifierLetter
+ modifierletter => 'Lm',
+
+ # ModifierSymbol
+ modifiersymbol => 'Sk',
+
+ mongolian => 'Mongolia',
+
+ # InMusicalSymbols
+ musicalsymbols => 'InMusica',
+
+ myanmar => 'Myanmar',
+
+ # NoncharacterCodePoint
+ noncharactercodepoint => 'Nonchara',
+
+ # NonSpacingMark
+ nonspacingmark => 'Mn',
+
+ number => 'N',
+
+ # InNumberForms
+ numberforms => 'InNumber',
+
+ ogham => 'Ogham',
+
+ # OldItalic
+ olditalic => 'OldItali',
+
+ # OpenPunctuation
+ openpunctuation => 'Ps',
+
+ # InOpticalCharacterRecognition
+ opticalcharacterrecognition => 'InOptica',
+
+ oriya => 'Oriya',
+ other => 'C',
+
+ # OtherAlphabetic
+ otheralphabetic => 'OtherAlp',
+
+ # OtherLetter
+ otherletter => 'Lo',
+
+ # OtherLowercase
+ otherlowercase => 'OtherLow',
+
+ # OtherMath
+ othermath => 'OtherMat',
+
+ # OtherNumber
+ othernumber => 'No',
+
+ # OtherPunctuation
+ otherpunctuation => 'Po',
+
+ # OtherSymbol
+ othersymbol => 'So',
+
+ # OtherUppercase
+ otheruppercase => 'OtherUpp',
+
+ # ParagraphSeparator
+ paragraphseparator => 'Zp',
+
+ # PrivateUse
+ privateuse => 'Co',
+
+ punctuation => 'P',
+
+ # QuotationMark
+ quotationmark => 'Quotatio',
+
+ runic => 'Runic',
+ separator => 'Z',
+ sinhala => 'Sinhala',
+
+ # InSmallFormVariants
+ smallformvariants => 'InSmallF',
+
+ # SpaceSeparator
+ spaceseparator => 'Zs',
+
+ # SpacingMark
+ spacingmark => 'Mc',
+
+ # InSpacingModifierLetters
+ spacingmodifierletters => 'InSpacin',
+
+ # InSpecials
+ specials => 'InSpecia',
+
+ # InSuperscriptsAndSubscripts
+ superscriptsandsubscripts => 'InSupers',
+
+ surrogate => 'Cs',
+ symbol => 'S',
+ syriac => 'Syriac',
+
+ # InTags
+ tags => 'InTags',
+
+ tamil => 'Tamil',
+ telugu => 'Telugu',
+
+ # TerminalPunctuation
+ terminalpunctuation => 'Terminal',
+
+ thaana => 'Thaana',
+ thai => 'Thai',
+ tibetan => 'Tibetan',
+
+ # TitlecaseLetter
+ titlecaseletter => 'Lt',
+
+ unassigned => 'Cn',
+
+ # InUnifiedCanadianAboriginalSyllabics
+ unifiedcanadianaboriginalsyllabics => 'InUnifie',
+
+ uppercase => 'Uppercas',
+
+ # UppercaseLetter
+ uppercaseletter => 'Lu',
+
+ # WhiteSpace
+ whitespace => 'WhiteSpa',
+
+ yi => 'Yi',
+
+ # InYiRadicals
+ yiradicals => 'InYiRadi',
+
+ # InYiSyllables
+ yisyllables => 'InYiSyll',
+
+);
+1
--- /dev/null
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by ./mktables from e.g. Unicode.txt.
+# Any changes made here will be lost!
+
+##
+## Data in this file used by ../utf8_heavy.pl
+##
+
+## Mapping from name to filename in ./lib
+%utf8::Exact = (
+ ASCII => 'ASCII',
+ All => 'Any',
+ Alnum => 'Alnum',
+ Alpha => 'Alpha',
+ Any => 'Any',
+ Assigned => 'Assigned',
+ BidiAL => 'BidiAL',
+ BidiAN => 'BidiAN',
+ BidiB => 'BidiB',
+ BidiBN => 'BidiBN',
+ BidiCS => 'BidiCS',
+ BidiEN => 'BidiEN',
+ BidiES => 'BidiES',
+ BidiET => 'BidiET',
+ BidiL => 'BidiL',
+ BidiLRE => 'BidiLRE',
+ BidiLRO => 'BidiLRO',
+ BidiNSM => 'BidiNSM',
+ BidiON => 'BidiON',
+ BidiPDF => 'BidiPDF',
+ BidiR => 'BidiR',
+ BidiRLE => 'BidiRLE',
+ BidiRLO => 'BidiRLO',
+ BidiS => 'BidiS',
+ BidiWS => 'BidiWS',
+ Blank => 'Blank',
+ C => 'C',
+ Canon => 'Canon',
+ Cc => 'Cc',
+ Cf => 'Cf',
+ Cn => 'Cn',
+ Cntrl => 'Cntrl',
+ Co => 'Co',
+ Compat => 'Compat',
+ Cs => 'Cs',
+ DCcircle => 'DCcircle',
+ DCcompat => 'DCcompat',
+ DCfinal => 'DCfinal',
+ DCfont => 'DCfont',
+ DCfraction => 'DCfracti',
+ DCinitial => 'DCinitia',
+ DCisolated => 'DCisolat',
+ DCmedial => 'DCmedial',
+ DCnarrow => 'DCnarrow',
+ DCnoBreak => 'DCnoBrea',
+ DCsmall => 'DCsmall',
+ DCsquare => 'DCsquare',
+ DCsub => 'DCsub',
+ DCsuper => 'DCsuper',
+ DCvertical => 'DCvertic',
+ DCwide => 'DCwide',
+ Digit => 'Digit',
+ Graph => 'Graph',
+ L => 'L',
+'L&' => 'L_',
+ Ll => 'Ll',
+ Lm => 'Lm',
+ Lo => 'Lo',
+ Lower => 'Lower',
+ Lt => 'Lt',
+ Lu => 'Lu',
+ M => 'M',
+ Mc => 'Mc',
+ Me => 'Me',
+ Mirrored => 'Mirrored',
+ Mn => 'Mn',
+ N => 'N',
+ Nd => 'Nd',
+ Nl => 'Nl',
+ No => 'No',
+ P => 'P',
+ Pc => 'Pc',
+ Pd => 'Pd',
+ Pe => 'Pe',
+ Pf => 'Pf',
+ Pi => 'Pi',
+ Po => 'Po',
+ Print => 'Print',
+ Ps => 'Ps',
+ Punct => 'Punct',
+ S => 'S',
+ Sc => 'Sc',
+ Sk => 'Sk',
+ Sm => 'Sm',
+ So => 'So',
+ Space => 'Space',
+ SpacePerl => 'SpacePer',
+ Title => 'Title',
+ Upper => 'Upper',
+ Word => 'Word',
+ XDigit => 'XDigit',
+ Z => 'Z',
+ Zl => 'Zl',
+ Zp => 'Zp',
+ Zs => 'Zs',
+ _CanonDCIJ => '_CanonDC',
+ _CaseIgnorable => '_CaseIgn',
+ _CombAbove => '_CombAbo',
+);
+1;
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-##
-## Data in this file used by ../utf8_heavy.pl
-##
-
-## Mapping from name to filename in ./In
-%utf8::In = (
-);
-
-## Mappings from regex to filename in ./In/
-%utf8::InPat = (
- 'al' => {
- 'Alphabetic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms' => 'Alphabet',
- },
- 'ar' => {
- 'Arabic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms(?:[-_]|\s+)?A' => 'ArabicP2',
- 'Armenian' => 'Armenian',
- 'Arabic' => 'Arabic',
- 'Arrows' => 'Arrows',
- 'Arabic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms(?:[-_]|\s+)?B' => 'ArabicPr',
- },
- 'ba' => {
- 'Basic(?:[-_]|\s+)?Latin' => 'BasicLat',
- },
- 'be' => {
- 'Bengali' => 'Bengali',
- },
- 'bl' => {
- 'Block(?:[-_]|\s+)?Elements' => 'BlockEle',
- },
- 'bo' => {
- 'Bopomofo(?:[-_]|\s+)?Extended' => 'Bopomof2',
- 'Box(?:[-_]|\s+)?Drawing' => 'BoxDrawi',
- 'Bopomofo' => 'Bopomofo',
- },
- 'br' => {
- 'Braille(?:[-_]|\s+)?Patterns' => 'BrailleP',
- },
- 'by' => {
- 'Byzantine(?:[-_]|\s+)?Musical(?:[-_]|\s+)?Symbols' => 'Byzantin',
- },
- 'ch' => {
- 'Cherokee' => 'Cherokee',
- },
- 'cj' => {
- 'Cjk(?:[-_]|\s+)?Unified(?:[-_]|\s+)?Ideographs(?:[-_]|\s+)?Extension(?:[-_]|\s+)?A' => 'CjkUnif2',
- 'Cjk(?:[-_]|\s+)?Radicals(?:[-_]|\s+)?Supplement' => 'CjkRadic',
- 'Cjk(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Ideographs' => 'CjkComp3',
- 'Cjk(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Ideographs(?:[-_]|\s+)?Supplement' => 'CjkComp4',
- 'Cjk(?:[-_]|\s+)?Compatibility' => 'CjkCompa',
- 'Cjk(?:[-_]|\s+)?Unified(?:[-_]|\s+)?Ideographs' => 'CjkUnifi',
- 'Cjk(?:[-_]|\s+)?Symbols(?:[-_]|\s+)?And(?:[-_]|\s+)?Punctuation' => 'CjkSymbo',
- 'Cjk(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Forms' => 'CjkComp2',
- 'Cjk(?:[-_]|\s+)?Unified(?:[-_]|\s+)?Ideographs(?:[-_]|\s+)?Extension(?:[-_]|\s+)?B' => 'CjkUnif3',
- },
- 'co' => {
- 'Combining(?:[-_]|\s+)?Diacritical(?:[-_]|\s+)?Marks' => 'Combini2',
- 'Combining(?:[-_]|\s+)?Half(?:[-_]|\s+)?Marks' => 'Combinin',
- 'Control(?:[-_]|\s+)?Pictures' => 'ControlP',
- 'Combining(?:[-_]|\s+)?Marks(?:[-_]|\s+)?For(?:[-_]|\s+)?Symbols' => 'Combini3',
- },
- 'cu' => {
- 'Currency(?:[-_]|\s+)?Symbols' => 'Currency',
- },
- 'cy' => {
- 'Cyrillic' => 'Cyrillic',
- },
- 'de' => {
- 'Deseret' => 'Deseret',
- 'Devanagari' => 'Devanaga',
- },
- 'di' => {
- 'Dingbats' => 'Dingbats',
- },
- 'en' => {
- 'Enclosed(?:[-_]|\s+)?Alphanumerics' => 'Enclosed',
- 'Enclosed(?:[-_]|\s+)?Cjk(?:[-_]|\s+)?Letters(?:[-_]|\s+)?And(?:[-_]|\s+)?Months' => 'Enclose2',
- },
- 'et' => {
- 'Ethiopic' => 'Ethiopic',
- },
- 'ge' => {
- 'Geometric(?:[-_]|\s+)?Shapes' => 'Geometri',
- 'General(?:[-_]|\s+)?Punctuation' => 'GeneralP',
- 'Georgian' => 'Georgian',
- },
- 'go' => {
- 'Gothic' => 'Gothic',
- },
- 'gr' => {
- 'Greek(?:[-_]|\s+)?Extended' => 'GreekExt',
- 'Greek' => 'Greek',
- },
- 'gu' => {
- 'Gujarati' => 'Gujarati',
- 'Gurmukhi' => 'Gurmukhi',
- },
- 'ha' => {
- 'Hangul(?:[-_]|\s+)?Syllables' => 'HangulSy',
- 'Hangul(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Jamo' => 'HangulCo',
- 'Halfwidth(?:[-_]|\s+)?And(?:[-_]|\s+)?Fullwidth(?:[-_]|\s+)?Forms' => 'Halfwidt',
- 'Hangul(?:[-_]|\s+)?Jamo' => 'HangulJa',
- },
- 'he' => {
- 'Hebrew' => 'Hebrew',
- },
- 'hi' => {
- 'High(?:[-_]|\s+)?Surrogates' => 'HighSurr',
- 'Hiragana' => 'Hiragana',
- 'High(?:[-_]|\s+)?Private(?:[-_]|\s+)?Use(?:[-_]|\s+)?Surrogates' => 'HighPriv',
- },
- 'id' => {
- 'Ideographic(?:[-_]|\s+)?Description(?:[-_]|\s+)?Characters' => 'Ideograp',
- },
- 'ip' => {
- 'Ipa(?:[-_]|\s+)?Extensions' => 'IpaExten',
- },
- 'ka' => {
- 'Kannada' => 'Kannada',
- 'Kanbun' => 'Kanbun',
- 'Kangxi(?:[-_]|\s+)?Radicals' => 'KangxiRa',
- 'Katakana' => 'Katakana',
- },
- 'kh' => {
- 'Khmer' => 'Khmer',
- },
- 'la' => {
- 'Lao' => 'Lao',
- 'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?B' => 'LatinExt',
- 'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?A' => 'LatinEx2',
- 'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?Additional' => 'LatinEx3',
- 'Latin(?:[-_]|\s+)?1(?:[-_]|\s+)?Supplement' => 'Latin1Su',
- },
- 'le' => {
- 'Letterlike(?:[-_]|\s+)?Symbols' => 'Letterli',
- },
- 'lo' => {
- 'Low(?:[-_]|\s+)?Surrogates' => 'LowSurro',
- },
- 'ma' => {
- 'Mathematical(?:[-_]|\s+)?Operators' => 'Mathemat',
- 'Mathematical(?:[-_]|\s+)?Alphanumeric(?:[-_]|\s+)?Symbols' => 'Mathema2',
- 'Malayalam' => 'Malayala',
- },
- 'mi' => {
- 'Miscellaneous(?:[-_]|\s+)?Technical' => 'Miscell2',
- 'Miscellaneous(?:[-_]|\s+)?Symbols' => 'Miscella',
- },
- 'mo' => {
- 'Mongolian' => 'Mongolia',
- },
- 'mu' => {
- 'Musical(?:[-_]|\s+)?Symbols' => 'MusicalS',
- },
- 'my' => {
- 'Myanmar' => 'Myanmar',
- },
- 'nu' => {
- 'Number(?:[-_]|\s+)?Forms' => 'NumberFo',
- },
- 'og' => {
- 'Ogham' => 'Ogham',
- },
- 'ol' => {
- 'Old(?:[-_]|\s+)?Italic' => 'OldItali',
- },
- 'op' => {
- 'Optical(?:[-_]|\s+)?Character(?:[-_]|\s+)?Recognition' => 'OpticalC',
- },
- 'or' => {
- 'Oriya' => 'Oriya',
- },
- 'pr' => {
- 'Private(?:[-_]|\s+)?Use' => 'PrivateU',
- },
- 'ru' => {
- 'Runic' => 'Runic',
- },
- 'si' => {
- 'Sinhala' => 'Sinhala',
- },
- 'sm' => {
- 'Small(?:[-_]|\s+)?Form(?:[-_]|\s+)?Variants' => 'SmallFor',
- },
- 'sp' => {
- 'Spacing(?:[-_]|\s+)?Modifier(?:[-_]|\s+)?Letters' => 'SpacingM',
- 'Specials' => 'Specials',
- },
- 'su' => {
- 'Superscripts(?:[-_]|\s+)?And(?:[-_]|\s+)?Subscripts' => 'Superscr',
- },
- 'sy' => {
- 'Syriac' => 'Syriac',
- },
- 'ta' => {
- 'Tamil' => 'Tamil',
- 'Tags' => 'Tags',
- },
- 'te' => {
- 'Telugu' => 'Telugu',
- },
- 'th' => {
- 'Thaana' => 'Thaana',
- 'Thai' => 'Thai',
- },
- 'ti' => {
- 'Tibetan' => 'Tibetan',
- },
- 'un' => {
- 'Unified(?:[-_]|\s+)?Canadian(?:[-_]|\s+)?Aboriginal(?:[-_]|\s+)?Syllabics' => 'UnifiedC',
- },
- 'yi' => {
- 'Yi(?:[-_]|\s+)?Syllables' => 'YiSyllab',
- 'Yi(?:[-_]|\s+)?Radicals' => 'YiRadica',
- },
-);
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-##
-## Data in this file used by ../utf8_heavy.pl
-##
-
-## Mapping from name to filename in ./Is
-%utf8::Is = (
- 'ASCII' => 'ASCII',
- 'Alnum' => 'Alnum',
- 'Alpha' => 'Alpha',
- 'BidiAL' => 'BidiAL',
- 'BidiAN' => 'BidiAN',
- 'BidiB' => 'BidiB',
- 'BidiBN' => 'BidiBN',
- 'BidiCS' => 'BidiCS',
- 'BidiEN' => 'BidiEN',
- 'BidiES' => 'BidiES',
- 'BidiET' => 'BidiET',
- 'BidiL' => 'BidiL',
- 'BidiLRE' => 'BidiLRE',
- 'BidiLRO' => 'BidiLRO',
- 'BidiNSM' => 'BidiNSM',
- 'BidiON' => 'BidiON',
- 'BidiPDF' => 'BidiPDF',
- 'BidiR' => 'BidiR',
- 'BidiRLE' => 'BidiRLE',
- 'BidiRLO' => 'BidiRLO',
- 'BidiS' => 'BidiS',
- 'BidiWS' => 'BidiWS',
- 'Blank' => 'Blank',
- 'C' => 'C',
- 'Canon' => 'Canon',
- 'Cc' => 'Cc',
- 'Cf' => 'Cf',
- 'Cn' => 'Cn',
- 'Cntrl' => 'Cntrl',
- 'Co' => 'Co',
- 'Compat' => 'Compat',
- 'Cs' => 'Cs',
- 'DCcircle' => 'DCcircle',
- 'DCcompat' => 'DCcompat',
- 'DCfinal' => 'DCfinal',
- 'DCfont' => 'DCfont',
- 'DCfraction' => 'DCfracti',
- 'DCinitial' => 'DCinitia',
- 'DCisolated' => 'DCisolat',
- 'DCmedial' => 'DCmedial',
- 'DCnarrow' => 'DCnarrow',
- 'DCnoBreak' => 'DCnoBrea',
- 'DCsmall' => 'DCsmall',
- 'DCsquare' => 'DCsquare',
- 'DCsub' => 'DCsub',
- 'DCsuper' => 'DCsuper',
- 'DCvertical' => 'DCvertic',
- 'DCwide' => 'DCwide',
- 'Digit' => 'Digit',
- 'Graph' => 'Graph',
- 'L' => 'L',
- 'L&' => 'L_',
- 'LbrkAI' => 'LbrkAI',
- 'LbrkAL' => 'LbrkAL',
- 'LbrkB2' => 'LbrkB2',
- 'LbrkBA' => 'LbrkBA',
- 'LbrkBB' => 'LbrkBB',
- 'LbrkBK' => 'LbrkBK',
- 'LbrkCB' => 'LbrkCB',
- 'LbrkCL' => 'LbrkCL',
- 'LbrkCM' => 'LbrkCM',
- 'LbrkCR' => 'LbrkCR',
- 'LbrkEX' => 'LbrkEX',
- 'LbrkGL' => 'LbrkGL',
- 'LbrkHY' => 'LbrkHY',
- 'LbrkID' => 'LbrkID',
- 'LbrkIN' => 'LbrkIN',
- 'LbrkIS' => 'LbrkIS',
- 'LbrkLF' => 'LbrkLF',
- 'LbrkNS' => 'LbrkNS',
- 'LbrkNU' => 'LbrkNU',
- 'LbrkOP' => 'LbrkOP',
- 'LbrkPO' => 'LbrkPO',
- 'LbrkPR' => 'LbrkPR',
- 'LbrkQU' => 'LbrkQU',
- 'LbrkSA' => 'LbrkSA',
- 'LbrkSG' => 'LbrkSG',
- 'LbrkSP' => 'LbrkSP',
- 'LbrkSY' => 'LbrkSY',
- 'LbrkXX' => 'LbrkXX',
- 'LbrkZW' => 'LbrkZW',
- 'Ll' => 'Ll',
- 'Lm' => 'Lm',
- 'Lo' => 'Lo',
- 'Lower' => 'Lower',
- 'Lt' => 'Lt',
- 'Lu' => 'Lu',
- 'M' => 'M',
- 'Mc' => 'Mc',
- 'Me' => 'Me',
- 'Mirrored' => 'Mirrored',
- 'Mn' => 'Mn',
- 'N' => 'N',
- 'Nd' => 'Nd',
- 'Nl' => 'Nl',
- 'No' => 'No',
- 'P' => 'P',
- 'Pc' => 'Pc',
- 'Pd' => 'Pd',
- 'Pe' => 'Pe',
- 'Pf' => 'Pf',
- 'Pi' => 'Pi',
- 'Po' => 'Po',
- 'Print' => 'Print',
- 'Ps' => 'Ps',
- 'Punct' => 'Punct',
- 'S' => 'S',
- 'Sc' => 'Sc',
- 'Sk' => 'Sk',
- 'Sm' => 'Sm',
- 'So' => 'So',
- 'Space' => 'Space',
- 'SpacePerl' => 'SpacePer',
- 'Title' => 'Title',
- 'Upper' => 'Upper',
- 'Word' => 'Word',
- 'XDigit' => 'XDigit',
- 'Z' => 'Z',
- 'Zl' => 'Zl',
- 'Zp' => 'Zp',
- 'Zs' => 'Zs',
- '_CanonDCIJ' => '_CanonDC',
- '_CaseIgnorable' => '_CaseIgn',
- '_CombAbove' => '_CombAbo',
-);
-
-## Mappings from regex to filename in ./Is/
-%utf8::IsPat = (
- 'al' => {
- 'All' => 'Any',
- 'Alphabetic' => 'Alphabet',
- },
- 'an' => {
- 'Any' => 'Any',
- },
- 'ar' => {
- 'Armenian' => 'Armenian',
- 'Arabic' => 'Arabic',
- },
- 'as' => {
- 'Ascii(?:[-_]|\s+)?Hex(?:[-_]|\s+)?Digit' => 'AsciiHex',
- 'Assigned' => 'Assigned',
- },
- 'be' => {
- 'Bengali' => 'Bengali',
- },
- 'bi' => {
- 'Bidi(?:[-_]|\s+)?Control' => 'BidiCont',
- },
- 'bo' => {
- 'Bopomofo' => 'Bopomofo',
- },
- 'ca' => {
- 'Canadian(?:[-_]|\s+)?Aboriginal' => 'Canadian',
- },
- 'ch' => {
- 'Cherokee' => 'Cherokee',
- },
- 'cl' => {
- 'Close(?:[-_]|\s+)?Punctuation' => 'Pe',
- },
- 'co' => {
- 'Control' => 'Cc',
- 'Common' => 'Common',
- 'Connector(?:[-_]|\s+)?Punctuation' => 'Pc',
- },
- 'cu' => {
- 'Currency(?:[-_]|\s+)?Symbol' => 'Sc',
- },
- 'cy' => {
- 'Cyrillic' => 'Cyrillic',
- },
- 'da' => {
- 'Dash(?:[-_]|\s+)?Punctuation' => 'Pd',
- 'Dash' => 'Dash',
- },
- 'de' => {
- 'Deseret' => 'Deseret',
- 'Devanagari' => 'Devanaga',
- 'Decimal(?:[-_]|\s+)?Number' => 'Nd',
- },
- 'di' => {
- 'Diacritic' => 'Diacriti',
- },
- 'en' => {
- 'Enclosing(?:[-_]|\s+)?Mark' => 'Me',
- },
- 'et' => {
- 'Ethiopic' => 'Ethiopic',
- },
- 'ex' => {
- 'Extender' => 'Extender',
- },
- 'fi' => {
- 'Final(?:[-_]|\s+)?Punctuation' => 'Pf',
- },
- 'fo' => {
- 'Format' => 'Cf',
- },
- 'ge' => {
- 'Georgian' => 'Georgian',
- },
- 'go' => {
- 'Gothic' => 'Gothic',
- },
- 'gr' => {
- 'Greek' => 'Greek',
- },
- 'gu' => {
- 'Gujarati' => 'Gujarati',
- 'Gurmukhi' => 'Gurmukhi',
- },
- 'ha' => {
- 'Hangul' => 'Hangul',
- 'Han' => 'Han',
- },
- 'he' => {
- 'Hebrew' => 'Hebrew',
- 'Hex(?:[-_]|\s+)?Digit' => 'HexDigit',
- },
- 'hi' => {
- 'Hiragana' => 'Hiragana',
- },
- 'hy' => {
- 'Hyphen' => 'Hyphen',
- },
- 'id' => {
- 'Ideographic' => 'Ideograp',
- 'Id(?:[-_]|\s+)?Continue' => 'IdContin',
- 'Id(?:[-_]|\s+)?Start' => 'IdStart',
- },
- 'in' => {
- 'Inherited' => 'Inherite',
- 'Initial(?:[-_]|\s+)?Punctuation' => 'Pi',
- },
- 'jo' => {
- 'Join(?:[-_]|\s+)?Control' => 'JoinCont',
- },
- 'ka' => {
- 'Kannada' => 'Kannada',
- 'Katakana' => 'Katakana',
- },
- 'kh' => {
- 'Khmer' => 'Khmer',
- },
- 'la' => {
- 'Lao' => 'Lao',
- 'Latin' => 'Latin',
- },
- 'le' => {
- 'Letter(?:[-_]|\s+)?Number' => 'Nl',
- 'Letter' => 'L',
- },
- 'li' => {
- 'Line(?:[-_]|\s+)?Separator' => 'Zl',
- },
- 'lo' => {
- 'Lowercase' => 'Lowercas',
- 'Lowercase(?:[-_]|\s+)?Letter' => 'Ll',
- },
- 'ma' => {
- 'Math' => 'Math',
- 'Malayalam' => 'Malayala',
- 'Mark' => 'M',
- 'Math(?:[-_]|\s+)?Symbol' => 'Sm',
- },
- 'mo' => {
- 'Modifier(?:[-_]|\s+)?Symbol' => 'Sk',
- 'Mongolian' => 'Mongolia',
- 'Modifier(?:[-_]|\s+)?Letter' => 'Lm',
- },
- 'my' => {
- 'Myanmar' => 'Myanmar',
- },
- 'no' => {
- 'Noncharacter(?:[-_]|\s+)?Code(?:[-_]|\s+)?Point' => 'Nonchara',
- 'Non(?:[-_]|\s+)?Spacing(?:[-_]|\s+)?Mark' => 'Mn',
- },
- 'nu' => {
- 'Number' => 'N',
- },
- 'og' => {
- 'Ogham' => 'Ogham',
- },
- 'ol' => {
- 'Old(?:[-_]|\s+)?Italic' => 'OldItali',
- },
- 'op' => {
- 'Open(?:[-_]|\s+)?Punctuation' => 'Ps',
- },
- 'or' => {
- 'Oriya' => 'Oriya',
- },
- 'ot' => {
- 'Other(?:[-_]|\s+)?Punctuation' => 'Po',
- 'Other(?:[-_]|\s+)?Uppercase' => 'OtherUpp',
- 'Other(?:[-_]|\s+)?Alphabetic' => 'OtherAlp',
- 'Other(?:[-_]|\s+)?Symbol' => 'So',
- 'Other(?:[-_]|\s+)?Number' => 'No',
- 'Other' => 'C',
- 'Other(?:[-_]|\s+)?Math' => 'OtherMat',
- 'Other(?:[-_]|\s+)?Letter' => 'Lo',
- 'Other(?:[-_]|\s+)?Lowercase' => 'OtherLow',
- },
- 'pa' => {
- 'Paragraph(?:[-_]|\s+)?Separator' => 'Zp',
- },
- 'pr' => {
- 'Private(?:[-_]|\s+)?Use' => 'Co',
- },
- 'pu' => {
- 'Punctuation' => 'P',
- },
- 'qu' => {
- 'Quotation(?:[-_]|\s+)?Mark' => 'Quotatio',
- },
- 'ru' => {
- 'Runic' => 'Runic',
- },
- 'se' => {
- 'Separator' => 'Z',
- },
- 'si' => {
- 'Sinhala' => 'Sinhala',
- },
- 'sp' => {
- 'Space(?:[-_]|\s+)?Separator' => 'Zs',
- 'Spacing(?:[-_]|\s+)?Mark' => 'Mc',
- },
- 'su' => {
- 'Surrogate' => 'Cs',
- },
- 'sy' => {
- 'Syriac' => 'Syriac',
- 'Symbol' => 'S',
- },
- 'ta' => {
- 'Tamil' => 'Tamil',
- },
- 'te' => {
- 'Telugu' => 'Telugu',
- 'Terminal(?:[-_]|\s+)?Punctuation' => 'Terminal',
- },
- 'th' => {
- 'Thaana' => 'Thaana',
- 'Thai' => 'Thai',
- },
- 'ti' => {
- 'Tibetan' => 'Tibetan',
- 'Titlecase(?:[-_]|\s+)?Letter' => 'Lt',
- },
- 'un' => {
- 'Unassigned' => 'Cn',
- },
- 'up' => {
- 'Uppercase' => 'Uppercas',
- 'Uppercase(?:[-_]|\s+)?Letter' => 'Lu',
- },
- 'wh' => {
- 'White(?:[-_]|\s+)?Space' => 'WhiteSpa',
- },
- 'yi' => {
- 'Yi' => 'Yi',
- },
-);
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-# \p{LbrkAI}
-#
-# Meaning: Linebreak category 'AI'
-#
-return <<'END';
-00A1
-00A7 00A8
-00AA
-00B2 00B3
-00B6 00BA
-00BC 00BF
-00C6
-00D0
-00D7 00D8
-00DE 00E1
-00E6
-00E8 00EA
-00EC 00ED
-00F0
-00F2 00F3
-00F7 00FA
-00FC
-00FE
-0101
-0111
-0113
-011B
-0126 0127
-012B
-0131 0133
-0138
-013F 0142
-0144
-0148 014A
-014D
-0152 0153
-0166 0167
-016B
-01CE
-01D0
-01D2
-01D4
-01D6
-01D8
-01DA
-01DC
-0251
-0261
-02C7
-02C9 02CB
-02CD
-02D0
-02D8 02DB
-02DD
-0391 03A1
-03A3 03A9
-03B1 03C1
-03C3 03C9
-0401
-0410 044F
-0451
-2015 2016
-2020 2021
-203B
-2074
-207F
-2081 2084
-2105
-2113
-2121 2122
-212B
-2154 2155
-215B
-215E
-2160 216B
-2170 2179
-2190 2199
-21D2
-21D4
-2200
-2202 2203
-2207 2208
-220B
-220F
-2211
-2215
-221A
-221D 2220
-2223
-2225
-2227 222C
-222E
-2234 2237
-223C 223D
-2248
-224C
-2252
-2260 2261
-2264 2267
-226A 226B
-226E 226F
-2282 2283
-2286 2287
-2295
-2299
-22A5
-22BF
-2312
-2460 24BF
-24D0 24E9
-2500 254B
-2550 2574
-2580 258F
-2592 2595
-25A0 25A1
-25A3 25A9
-25B2 25B3
-25B6 25B7
-25BC 25BD
-25C0 25C1
-25C6 25C8
-25CB
-25CE 25D1
-25E2 25E5
-25EF
-2605 2606
-2609
-260E 260F
-261C
-261E
-2640
-2642
-2660 2661
-2663 2665
-2667 266A
-266C 266D
-266F
-FFFD
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-# \p{LbrkAL}
-#
-# Meaning: Linebreak category 'AL'
-#
-return <<'END';
-0023
-0026
-002A
-003C 003E
-0040 005A
-005E 007A
-007E
-00A6
-00A9
-00AC
-00AE 00AF
-00B5
-00C0 00C5
-00C7 00CF
-00D1 00D6
-00D9 00DD
-00E2 00E5
-00E7
-00EB
-00EE 00EF
-00F1
-00F4 00F6
-00FB
-00FD
-00FF 0100
-0102 0110
-0112
-0114 011A
-011C 0125
-0128 012A
-012C 0130
-0134 0137
-0139 013E
-0143
-0145 0147
-014B 014C
-014E 0151
-0154 0165
-0168 016A
-016C 01CD
-01CF
-01D1
-01D3
-01D5
-01D7
-01D9
-01DB
-01DD 021F
-0222 0233
-0250
-0252 0260
-0262 02AD
-02B0 02C6
-02CE 02CF
-02D1 02D7
-02DC
-02DE 02EE
-0374 0375
-037A
-037E
-0384 038A
-038C
-038E 0390
-03AA 03B0
-03C2
-03CA 03CE
-03D0 03D7
-03DA 03F5
-0400
-0402 040F
-0450
-0452 0482
-048C 04C4
-04C7 04C8
-04CB 04CC
-04D0 04F5
-04F8 04F9
-0531 0556
-0559 055F
-0561 0587
-05BE
-05C0
-05C3
-05D0 05EA
-05F0 05F4
-060C
-061B
-061F
-0621 063A
-0640 064A
-066A 066D
-0671 06D5
-06E5 06E6
-06E9
-06FA 06FE
-0700 070D
-0710
-0712 072C
-0780 07A5
-0905 0939
-093D
-0950
-0958 0961
-0964 0965
-0970
-0985 098C
-098F 0990
-0993 09A8
-09AA 09B0
-09B2
-09B6 09B9
-09DC 09DD
-09DF 09E1
-09F0 09F1
-09F4 09FA
-0A05 0A0A
-0A0F 0A10
-0A13 0A28
-0A2A 0A30
-0A32 0A33
-0A35 0A36
-0A38 0A39
-0A59 0A5C
-0A5E
-0A72 0A74
-0A85 0A8B
-0A8D
-0A8F 0A91
-0A93 0AA8
-0AAA 0AB0
-0AB2 0AB3
-0AB5 0AB9
-0ABD
-0AD0
-0AE0
-0B05 0B0C
-0B0F 0B10
-0B13 0B28
-0B2A 0B30
-0B32 0B33
-0B36 0B39
-0B3D
-0B5C 0B5D
-0B5F 0B61
-0B70
-0B85 0B8A
-0B8E 0B90
-0B92 0B95
-0B99 0B9A
-0B9C
-0B9E 0B9F
-0BA3 0BA4
-0BA8 0BAA
-0BAE 0BB5
-0BB7 0BB9
-0BF0 0BF2
-0C05 0C0C
-0C0E 0C10
-0C12 0C28
-0C2A 0C33
-0C35 0C39
-0C60 0C61
-0C85 0C8C
-0C8E 0C90
-0C92 0CA8
-0CAA 0CB3
-0CB5 0CB9
-0CDE
-0CE0 0CE1
-0D05 0D0C
-0D0E 0D10
-0D12 0D28
-0D2A 0D39
-0D60 0D61
-0D85 0D96
-0D9A 0DB1
-0DB3 0DBB
-0DBD
-0DC0 0DC6
-0DF4
-0E4F
-0F00 0F0A
-0F0D 0F17
-0F1A 0F1F
-0F2A 0F34
-0F36
-0F38
-0F40 0F47
-0F49 0F6A
-0F85
-0F88 0F8B
-0FBE 0FC5
-0FC7 0FCC
-0FCF
-104A 104F
-10A0 10C5
-10D0 10F6
-10FB
-1200 1206
-1208 1246
-1248
-124A 124D
-1250 1256
-1258
-125A 125D
-1260 1286
-1288
-128A 128D
-1290 12AE
-12B0
-12B2 12B5
-12B8 12BE
-12C0
-12C2 12C5
-12C8 12CE
-12D0 12D6
-12D8 12EE
-12F0 130E
-1310
-1312 1315
-1318 131E
-1320 1346
-1348 135A
-1362 1368
-1372 137C
-13A0 13F4
-1401 1676
-1681 169A
-16A0 16F0
-17DC
-1800 1805
-1807 180A
-1820 1877
-1880 18A8
-1E00 1E9B
-1EA0 1EF9
-1F00 1F15
-1F18 1F1D
-1F20 1F45
-1F48 1F4D
-1F50 1F57
-1F59
-1F5B
-1F5D
-1F5F 1F7D
-1F80 1FB4
-1FB6 1FC4
-1FC6 1FD3
-1FD6 1FDB
-1FDD 1FEF
-1FF2 1FF4
-1FF6 1FFE
-2017
-2022 2023
-2038
-203D 2043
-2048 204D
-2070
-2075 207C
-2080
-2085 208C
-2100 2102
-2104
-2106 2108
-210A 2112
-2114 2115
-2117 2120
-2123 2125
-2127 212A
-212C 213A
-2153
-2156 215A
-215C 215D
-215F
-216C 216F
-217A 2183
-219A 21D1
-21D3
-21D5 21F3
-2201
-2204 2206
-2209 220A
-220C 220E
-2210
-2214
-2216 2219
-221B 221C
-2221 2222
-2224
-2226
-222D
-222F 2233
-2238 223B
-223E 2247
-2249 224B
-224D 2251
-2253 225F
-2262 2263
-2268 2269
-226C 226D
-2270 2281
-2284 2285
-2288 2294
-2296 2298
-229A 22A4
-22A6 22BE
-22C0 22F1
-2300 2311
-2313 2328
-232B 237B
-237D 239A
-2400 2426
-2440 244A
-24C0 24CF
-24EA
-254C 254F
-2575 257F
-2590 2591
-25A2
-25AA 25B1
-25B4 25B5
-25B8 25BB
-25BE 25BF
-25C2 25C5
-25C9 25CA
-25CC 25CD
-25D2 25E1
-25E6 25EE
-25F0 25F7
-2600 2604
-2607 2608
-260A 260D
-2610 2613
-2619 261B
-261D
-261F 263F
-2641
-2643 265F
-2662
-2666
-266B
-266E
-2670 2671
-2701 2704
-2706 2709
-270C 2727
-2729 274B
-274D
-274F 2752
-2756
-2758 275E
-2761 2767
-2776 2794
-2798 27AF
-27B1 27BE
-2800 28FF
-FB00 FB06
-FB13 FB17
-FB1D
-FB1F FB36
-FB38 FB3C
-FB3E
-FB40 FB41
-FB43 FB44
-FB46 FBB1
-FBD3 FD3D
-FD50 FD8F
-FD92 FDC7
-FDF0 FDFB
-FE70 FE72
-FE74
-FE76 FEFC
-FF66
-FF71 FF9D
-FFA0 FFBE
-FFC2 FFC7
-FFCA FFCF
-FFD2 FFD7
-FFDA FFDC
-FFE8 FFEE
-10300 1031E
-10320 10323
-10330 1034A
-10400 10425
-10428 1044D
-1D000 1D0F5
-1D100 1D126
-1D12A 1D164
-1D16A 1D16C
-1D183 1D184
-1D18C 1D1A9
-1D1AE 1D1DD
-1D400 1D454
-1D456 1D49C
-1D49E 1D49F
-1D4A2
-1D4A5 1D4A6
-1D4A9 1D4AC
-1D4AE 1D4B9
-1D4BB
-1D4BD 1D4C0
-1D4C2 1D4C3
-1D4C5 1D505
-1D507 1D50A
-1D50D 1D514
-1D516 1D51C
-1D51E 1D539
-1D53B 1D53E
-1D540 1D544
-1D546
-1D54A 1D550
-1D552 1D6A3
-1D6A8 1D7C9
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-# \p{LbrkB2}
-#
-# Meaning: Linebreak category 'B2'
-#
-return <<'END';
-2014
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-# \p{LbrkBA}
-#
-# Meaning: Linebreak category 'BA'
-#
-return <<'END';
-0009
-007C
-00AD
-058A
-0F0B
-1361
-1680
-17D5
-2000 2006
-2008 200A
-2010
-2012 2013
-2027
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-# \p{LbrkBB}
-#
-# Meaning: Linebreak category 'BB'
-#
-return <<'END';
-00B4
-02C8
-02CC
-1806
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-# \p{LbrkBK}
-#
-# Meaning: Linebreak category 'BK'
-#
-return <<'END';
-000C
-2028 2029
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-# \p{LbrkCB}
-#
-# Meaning: Linebreak category 'CB'
-#
-return <<'END';
-FFFC
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-# \p{LbrkCL}
-#
-# Meaning: Linebreak category 'CL'
-#
-return <<'END';
-0029
-005D
-007D
-0F3B
-0F3D
-169C
-2046
-207E
-208E
-232A
-3001 3002
-3009
-300B
-300D
-300F
-3011
-3015
-3017
-3019
-301B
-301E 301F
-FD3F
-FE36
-FE38
-FE3A
-FE3C
-FE3E
-FE40
-FE42
-FE44
-FE50
-FE52
-FE5A
-FE5C
-FE5E
-FF09
-FF0C
-FF0E
-FF3D
-FF5D
-FF61
-FF63 FF64
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-# \p{LbrkCM}
-#
-# Meaning: Linebreak category 'CM'
-#
-return <<'END';
-0000 0008
-000B
-000E 001F
-007F 009F
-0300 034E
-0360 0362
-0483 0486
-0488 0489
-0591 05A1
-05A3 05B9
-05BB 05BD
-05BF
-05C1 05C2
-05C4
-064B 0655
-0670
-06D6 06E4
-06E7 06E8
-06EA 06ED
-070F
-0711
-0730 074A
-07A6 07B0
-0901 0903
-093C
-093E 094D
-0951 0954
-0962 0963
-0981 0983
-09BC
-09BE 09C4
-09C7 09C8
-09CB 09CD
-09D7
-09E2 09E3
-0A02
-0A3C
-0A3E 0A42
-0A47 0A48
-0A4B 0A4D
-0A70 0A71
-0A81 0A83
-0ABC
-0ABE 0AC5
-0AC7 0AC9
-0ACB 0ACD
-0B01 0B03
-0B3C
-0B3E 0B43
-0B47 0B48
-0B4B 0B4D
-0B56 0B57
-0B82 0B83
-0BBE 0BC2
-0BC6 0BC8
-0BCA 0BCD
-0BD7
-0C01 0C03
-0C3E 0C44
-0C46 0C48
-0C4A 0C4D
-0C55 0C56
-0C82 0C83
-0CBE 0CC4
-0CC6 0CC8
-0CCA 0CCD
-0CD5 0CD6
-0D02 0D03
-0D3E 0D43
-0D46 0D48
-0D4A 0D4D
-0D57
-0D82 0D83
-0DCA
-0DCF 0DD4
-0DD6
-0DD8 0DDF
-0DF2 0DF3
-0E31
-0E34 0E3A
-0E47 0E4E
-0EB1
-0EB4 0EB9
-0EBB 0EBC
-0EC8 0ECD
-0F18 0F19
-0F35
-0F37
-0F39
-0F3E 0F3F
-0F71 0F84
-0F86 0F87
-0F90 0F97
-0F99 0FBC
-0FC6
-102C 1032
-1036 1039
-1056 1059
-1160 11A2
-11A8 11F9
-17B4 17D3
-180B 180E
-18A9
-200C 200F
-202A 202E
-206A 206F
-20D0 20E3
-302A 302F
-3099 309A
-FB1E
-FE20 FE23
-FFF9 FFFB
-1D165 1D169
-1D16D 1D182
-1D185 1D18B
-1D1AA 1D1AD
-E0001
-E0020 E007F
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-# \p{LbrkCR}
-#
-# Meaning: Linebreak category 'CR'
-#
-return <<'END';
-000D
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-# \p{LbrkEX}
-#
-# Meaning: Linebreak category 'EX'
-#
-return <<'END';
-0021
-003F
-FE56 FE57
-FF01
-FF1F
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-# \p{LbrkGL}
-#
-# Meaning: Linebreak category 'GL'
-#
-return <<'END';
-00A0
-0F0C
-2007
-2011
-202F
-FEFF
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-# \p{LbrkHY}
-#
-# Meaning: Linebreak category 'HY'
-#
-return <<'END';
-002D
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-# \p{LbrkID}
-#
-# Meaning: Linebreak category 'ID'
-#
-return <<'END';
-1100 1159
-115F
-2E80 2E99
-2E9B 2EF3
-2F00 2FD5
-2FF0 2FFB
-3000
-3003 3004
-3006 3007
-3012 3013
-3020 3029
-3030 303A
-303E 303F
-3042
-3044
-3046
-3048
-304A 3062
-3064 3082
-3084
-3086
-3088 308D
-308F 3094
-30A2
-30A4
-30A6
-30A8
-30AA 30C2
-30C4 30E2
-30E4
-30E6
-30E8 30ED
-30EF 30F4
-30F7 30FA
-30FC
-30FE
-3105 312C
-3131 318E
-3190 31B7
-3200 321C
-3220 3243
-3260 327B
-327F 32B0
-32C0 32CB
-32D0 32FE
-3300 3376
-337B 33DD
-33E0 33FE
-3400 4DB5
-4E00 9FA5
-A000 A48C
-A490 A4A1
-A4A4 A4B3
-A4B5 A4C0
-A4C2 A4C4
-A4C6
-AC00 D7A3
-F900 FA2D
-FE30 FE34
-FE49 FE4F
-FE51
-FE58
-FE5F FE66
-FE68
-FE6B
-FF02 FF03
-FF06 FF07
-FF0A FF0B
-FF0D
-FF0F FF19
-FF1C FF1E
-FF20 FF3A
-FF3C
-FF3E FF5A
-FF5C
-FF5E
-FFE2 FFE4
-20000 2A6D6
-2F800 2FA1D
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-# \p{LbrkIN}
-#
-# Meaning: Linebreak category 'IN'
-#
-return <<'END';
-2024 2026
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-# \p{LbrkIS}
-#
-# Meaning: Linebreak category 'IS'
-#
-return <<'END';
-002C
-002E
-003A 003B
-0589
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-# \p{LbrkLF}
-#
-# Meaning: Linebreak category 'LF'
-#
-return <<'END';
-000A
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-# \p{LbrkNS}
-#
-# Meaning: Linebreak category 'NS'
-#
-return <<'END';
-0E5A 0E5B
-17D4
-17D6 17DA
-203C
-2044
-3005
-301C
-3041
-3043
-3045
-3047
-3049
-3063
-3083
-3085
-3087
-308E
-309B 309E
-30A1
-30A3
-30A5
-30A7
-30A9
-30C3
-30E3
-30E5
-30E7
-30EE
-30F5 30F6
-30FB
-30FD
-FE54 FE55
-FF1A FF1B
-FF65
-FF67 FF70
-FF9E FF9F
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-# \p{LbrkNU}
-#
-# Meaning: Linebreak category 'NU'
-#
-return <<'END';
-0030 0039
-0660 0669
-06F0 06F9
-0966 096F
-09E6 09EF
-0A66 0A6F
-0AE6 0AEF
-0B66 0B6F
-0BE7 0BEF
-0C66 0C6F
-0CE6 0CEF
-0D66 0D6F
-0E50 0E59
-0ED0 0ED9
-0F20 0F29
-1040 1049
-1369 1371
-17E0 17E9
-1810 1819
-1D7CE 1D7FF
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-# \p{LbrkOP}
-#
-# Meaning: Linebreak category 'OP'
-#
-return <<'END';
-0028
-005B
-007B
-0F3A
-0F3C
-169B
-201A
-201E
-2045
-207D
-208D
-2329
-3008
-300A
-300C
-300E
-3010
-3014
-3016
-3018
-301A
-301D
-FD3E
-FE35
-FE37
-FE39
-FE3B
-FE3D
-FE3F
-FE41
-FE43
-FE59
-FE5B
-FE5D
-FF08
-FF3B
-FF5B
-FF62
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-# \p{LbrkPO}
-#
-# Meaning: Linebreak category 'PO'
-#
-return <<'END';
-0025
-00A2
-00B0
-2030 2037
-20A7
-2103
-2109
-2126
-FE6A
-FF05
-FFE0
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-# \p{LbrkPR}
-#
-# Meaning: Linebreak category 'PR'
-#
-return <<'END';
-0024
-002B
-005C
-00A3 00A5
-00B1
-09F2 09F3
-0E3F
-17DB
-20A0 20A6
-20A8 20AF
-2116
-2212 2213
-FE69
-FF04
-FFE1
-FFE5 FFE6
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-# \p{LbrkQU}
-#
-# Meaning: Linebreak category 'QU'
-#
-return <<'END';
-0022
-0027
-00AB
-00BB
-2018 2019
-201B 201D
-201F
-2039 203A
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-# \p{LbrkSA}
-#
-# Meaning: Linebreak category 'SA'
-#
-return <<'END';
-0E01 0E30
-0E32 0E33
-0E40 0E46
-0E81 0E82
-0E84
-0E87 0E88
-0E8A
-0E8D
-0E94 0E97
-0E99 0E9F
-0EA1 0EA3
-0EA5
-0EA7
-0EAA 0EAB
-0EAD 0EB0
-0EB2 0EB3
-0EBD
-0EC0 0EC4
-0EC6
-0EDC 0EDD
-1000 1021
-1023 1027
-1029 102A
-1050 1055
-1780 17B3
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-# \p{LbrkSG}
-#
-# Meaning: Linebreak category 'SG'
-#
-return <<'END';
-D800 DFFF
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-# \p{LbrkSP}
-#
-# Meaning: Linebreak category 'SP'
-#
-return <<'END';
-0020
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-# \p{LbrkSY}
-#
-# Meaning: Linebreak category 'SY'
-#
-return <<'END';
-002F
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-# \p{LbrkXX}
-#
-# Meaning: Linebreak category 'XX'
-#
-return <<'END';
-E000 F8FF
-F0000 FFFFD
-100000 10FFFD
-END
+++ /dev/null
-# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by ./mktables from e.g. Unicode.txt.
-# Any changes made here will be lost!
-
-#
-# This file supports:
-# \p{LbrkZW}
-#
-# Meaning: Linebreak category 'ZW'
-#
-return <<'END';
-200B
-END
all:
../../miniperl -I../../lib ./mktables
+TestProp.pl: mktables Unicode.txt Scripts.txt Blocks.txt PropList.txt
+ ../../miniperl -I../../lib ./mktables -maketest
+
+test: TestProp.pl
+ ../../miniperl -I../../lib TestProp.pl
+
clean:
rm -f *.pl */*.pl
rm -f Properties
##
\p{ASCII} [[:ASCII:]]
-* \p{All} Alias for \p{Any} ([\x{0000}-\x{10FFFF}])
\p{Alnum} [[:Alnum:]]
* \p{Alphabetic} [\p{L}\p{OtherAlphabetic}]
\p{Alpha} [[:Alpha:]]
-* \p{Any} [\x{0000}-\x{10FFFF}]
+ \p{Any} Alias for \p{Any} ([\x{0000}-\x{10FFFF}])
+ \p{Any} [\x{0000}-\x{10FFFF}]
* \p{Arabic} Script 'ARABIC'
* \p{Armenian} Script 'ARMENIAN'
* \p{AsciiHexDigit} Extended property 'ASCII_Hex_Digit'
-* \p{Assigned} All assigned code points
+ \p{Assigned} All assigned code points
* \p{Bengali} Script 'BENGALI'
\p{BidiAL} Bi-directional category 'AL'
\p{BidiAN} Bi-directional category 'AN'
* \p{Bopomofo} Script 'BOPOMOFO'
* \p{CanadianAboriginal} Script 'CANADIAN-ABORIGINAL'
\p{Canon} Decomposes to multiple characters
+* \p{Cc} Alias for \p{Cc} (General Category 'Cc')
\p{Cc} General Category 'Cc'
+* \p{Cf} Alias for \p{Cf} (General Category 'Cf')
\p{Cf} General Category 'Cf'
* \p{Cherokee} Script 'CHEROKEE'
-* \p{ClosePunctuation} Alias for \p{Pe} (General Category 'Pe')
\p{Cntrl} [[:Cntrl:]]
+* \p{Cn} Alias for \p{Cn} (General Category 'Cn' [not functional in Perl])
\p{Cn} General Category 'Cn' [not functional in Perl]
* \p{Common} Pseudo-Script of codepoints not in other Unicode scripts
\p{Compat} Compatible with a more-basic character
-* \p{ConnectorPunctuation} Alias for \p{Pc} (General Category 'Pc')
-* \p{Control} Alias for \p{Cc} (General Category 'Cc')
+* \p{Co} Alias for \p{Co} (General Category 'Co')
\p{Co} General Category 'Co'
+* \p{Cs} Alias for \p{Cs} (General Category 'Cs')
\p{Cs} General Category 'Cs'
-* \p{CurrencySymbol} Alias for \p{Sc} (General Category 'Sc')
* \p{Cyrillic} Script 'CYRILLIC'
+* \p{C} Alias for \p{C} (Major Category 'C')
\p{C} Major Category 'C'
\p{DCcircle} Compatible with 'circle'
\p{DCcompat} Compatible with 'compat'
\p{DCsuper} Compatible with 'super'
\p{DCvertical} Compatible with 'vertical'
\p{DCwide} Compatible with 'wide'
-* \p{DashPunctuation} Alias for \p{Pd} (General Category 'Pd')
* \p{Dash} Extended property 'Dash'
-* \p{DecimalNumber} Alias for \p{Nd} (General Category 'Nd')
* \p{Deseret} Script 'DESERET'
* \p{Devanagari} Script 'DEVANAGARI'
* \p{Diacritic} Extended property 'Diacritic'
\p{Digit} [[:Digit:]]
-* \p{EnclosingMark} Alias for \p{Me} (General Category 'Me')
* \p{Ethiopic} Script 'ETHIOPIC'
* \p{Extender} Extended property 'Extender'
-* \p{FinalPunctuation} Alias for \p{Pf} (General Category 'Pf')
-* \p{Format} Alias for \p{Cf} (General Category 'Cf')
* \p{Georgian} Script 'GEORGIAN'
* \p{Gothic} Script 'GOTHIC'
\p{Graph} [[:Graph:]]
* \p{InYiRadicals} Block 'Yi Radicals'
* \p{InYiSyllables} Block 'Yi Syllables'
* \p{Inherited} Script 'INHERITED'
-* \p{InitialPunctuation} Alias for \p{Pi} (General Category 'Pi')
* \p{JoinControl} Extended property 'Join_Control'
* \p{Kannada} Script 'KANNADA'
* \p{Katakana} Script 'KATAKANA'
\p{L&} [\p{Ll}\p{Lu}\p{Lt}]
* \p{Lao} Script 'LAO'
* \p{Latin} Script 'LATIN'
- \p{LbrkAI} Linebreak category 'AI'
- \p{LbrkAL} Linebreak category 'AL'
- \p{LbrkB2} Linebreak category 'B2'
- \p{LbrkBA} Linebreak category 'BA'
- \p{LbrkBB} Linebreak category 'BB'
- \p{LbrkBK} Linebreak category 'BK'
- \p{LbrkCB} Linebreak category 'CB'
- \p{LbrkCL} Linebreak category 'CL'
- \p{LbrkCM} Linebreak category 'CM'
- \p{LbrkCR} Linebreak category 'CR'
- \p{LbrkEX} Linebreak category 'EX'
- \p{LbrkGL} Linebreak category 'GL'
- \p{LbrkHY} Linebreak category 'HY'
- \p{LbrkID} Linebreak category 'ID'
- \p{LbrkIN} Linebreak category 'IN'
- \p{LbrkIS} Linebreak category 'IS'
- \p{LbrkLF} Linebreak category 'LF'
- \p{LbrkNS} Linebreak category 'NS'
- \p{LbrkNU} Linebreak category 'NU'
- \p{LbrkOP} Linebreak category 'OP'
- \p{LbrkPO} Linebreak category 'PO'
- \p{LbrkPR} Linebreak category 'PR'
- \p{LbrkQU} Linebreak category 'QU'
- \p{LbrkSA} Linebreak category 'SA'
- \p{LbrkSG} Linebreak category 'SG'
- \p{LbrkSP} Linebreak category 'SP'
- \p{LbrkSY} Linebreak category 'SY'
- \p{LbrkXX} Linebreak category 'XX'
- \p{LbrkZW} Linebreak category 'ZW'
-* \p{LetterNumber} Alias for \p{Nl} (General Category 'Nl')
-* \p{Letter} Alias for \p{L} (Major Category 'L')
-* \p{LineSeparator} Alias for \p{Zl} (General Category 'Zl')
+* \p{Ll} Alias for \p{Ll} (General Category 'Ll')
\p{Ll} General Category 'Ll'
+* \p{Lm} Alias for \p{Lm} (General Category 'Lm')
\p{Lm} General Category 'Lm'
-* \p{LowercaseLetter} Alias for \p{Ll} (General Category 'Ll')
* \p{Lowercase} [\p{Ll}\p{OtherLowercase}]
\p{Lower} [[:Lower:]]
+* \p{Lo} Alias for \p{Lo} (General Category 'Lo')
\p{Lo} General Category 'Lo'
+* \p{Lt} Alias for \p{Lt} (General Category 'Lt')
\p{Lt} General Category 'Lt'
+* \p{Lu} Alias for \p{Lu} (General Category 'Lu')
\p{Lu} General Category 'Lu'
+* \p{L} Alias for \p{L} (Major Category 'L')
\p{L} Major Category 'L'
* \p{Malayalam} Script 'MALAYALAM'
-* \p{Mark} Alias for \p{M} (Major Category 'M')
-* \p{MathSymbol} Alias for \p{Sm} (General Category 'Sm')
* \p{Math} [\p{Sm}\p{OtherMath}]
+* \p{Mc} Alias for \p{Mc} (General Category 'Mc')
\p{Mc} General Category 'Mc'
+* \p{Me} Alias for \p{Me} (General Category 'Me')
\p{Me} General Category 'Me'
\p{Mirrored} Mirrored in bidirectional text
+* \p{Mn} Alias for \p{Mn} (General Category 'Mn')
\p{Mn} General Category 'Mn'
-* \p{ModifierLetter} Alias for \p{Lm} (General Category 'Lm')
-* \p{ModifierSymbol} Alias for \p{Sk} (General Category 'Sk')
* \p{Mongolian} Script 'MONGOLIAN'
* \p{Myanmar} Script 'MYANMAR'
+* \p{M} Alias for \p{M} (Major Category 'M')
\p{M} Major Category 'M'
+* \p{Nd} Alias for \p{Nd} (General Category 'Nd')
\p{Nd} General Category 'Nd'
+* \p{Nl} Alias for \p{Nl} (General Category 'Nl')
\p{Nl} General Category 'Nl'
-* \p{NonSpacingMark} Alias for \p{Mn} (General Category 'Mn')
* \p{NoncharacterCodePoint} Extended property 'Noncharacter_Code_Point'
+* \p{No} Alias for \p{No} (General Category 'No')
\p{No} General Category 'No'
-* \p{Number} Alias for \p{N} (Major Category 'N')
+* \p{N} Alias for \p{N} (Major Category 'N')
\p{N} Major Category 'N'
* \p{Ogham} Script 'OGHAM'
* \p{OldItalic} Script 'OLD-ITALIC'
-* \p{OpenPunctuation} Alias for \p{Ps} (General Category 'Ps')
* \p{Oriya} Script 'ORIYA'
* \p{OtherAlphabetic} Extended property 'Other_Alphabetic'
-* \p{OtherLetter} Alias for \p{Lo} (General Category 'Lo')
* \p{OtherLowercase} Extended property 'Other_Lowercase'
* \p{OtherMath} Extended property 'Other_Math'
-* \p{OtherNumber} Alias for \p{No} (General Category 'No')
-* \p{OtherPunctuation} Alias for \p{Po} (General Category 'Po')
-* \p{OtherSymbol} Alias for \p{So} (General Category 'So')
* \p{OtherUppercase} Extended property 'Other_Uppercase'
-* \p{Other} Alias for \p{C} (Major Category 'C')
-* \p{ParagraphSeparator} Alias for \p{Zp} (General Category 'Zp')
+* \p{Pc} Alias for \p{Pc} (General Category 'Pc')
\p{Pc} General Category 'Pc'
+* \p{Pd} Alias for \p{Pd} (General Category 'Pd')
\p{Pd} General Category 'Pd'
+* \p{Pe} Alias for \p{Pe} (General Category 'Pe')
\p{Pe} General Category 'Pe'
+* \p{Pf} Alias for \p{Pf} (General Category 'Pf')
\p{Pf} General Category 'Pf'
+* \p{Pi} Alias for \p{Pi} (General Category 'Pi')
\p{Pi} General Category 'Pi'
+* \p{Po} Alias for \p{Po} (General Category 'Po')
\p{Po} General Category 'Po'
\p{Print} [[:Print:]]
-* \p{PrivateUse} Alias for \p{Co} (General Category 'Co')
+* \p{Ps} Alias for \p{Ps} (General Category 'Ps')
\p{Ps} General Category 'Ps'
-* \p{Punctuation} Alias for \p{P} (Major Category 'P')
\p{Punct} [[:Punct:]]
+* \p{P} Alias for \p{P} (Major Category 'P')
\p{P} Major Category 'P'
* \p{QuotationMark} Extended property 'Quotation_Mark'
* \p{Runic} Script 'RUNIC'
+* \p{Sc} Alias for \p{Sc} (General Category 'Sc')
\p{Sc} General Category 'Sc'
-* \p{Separator} Alias for \p{Z} (Major Category 'Z')
* \p{Sinhala} Script 'SINHALA'
+* \p{Sk} Alias for \p{Sk} (General Category 'Sk')
\p{Sk} General Category 'Sk'
+* \p{Sm} Alias for \p{Sm} (General Category 'Sm')
\p{Sm} General Category 'Sm'
+* \p{So} Alias for \p{So} (General Category 'So')
\p{So} General Category 'So'
\p{SpacePerl} \s
-* \p{SpaceSeparator} Alias for \p{Zs} (General Category 'Zs')
\p{Space} [[:Space:]]
-* \p{SpacingMark} Alias for \p{Mc} (General Category 'Mc')
-* \p{Surrogate} Alias for \p{Cs} (General Category 'Cs')
-* \p{Symbol} Alias for \p{S} (Major Category 'S')
* \p{Syriac} Script 'SYRIAC'
+* \p{S} Alias for \p{S} (Major Category 'S')
\p{S} Major Category 'S'
* \p{Tamil} Script 'TAMIL'
* \p{Telugu} Script 'TELUGU'
* \p{Thaana} Script 'THAANA'
* \p{Thai} Script 'THAI'
* \p{Tibetan} Script 'TIBETAN'
-* \p{TitlecaseLetter} Alias for \p{Lt} (General Category 'Lt')
\p{Title} [[:Title:]]
-* \p{Unassigned} Alias for \p{Cn} (General Category 'Cn' [not functional in Perl])
-* \p{UppercaseLetter} Alias for \p{Lu} (General Category 'Lu')
* \p{Uppercase} [\p{Lu}\p{Other_Uppercase}]
\p{Upper} [[:Upper:]]
* \p{WhiteSpace} Extended property 'White_space'
\p{Word} [[:Word:]]
\p{XDigit} [[:XDigit:]]
* \p{Yi} Script 'YI'
+* \p{Zl} Alias for \p{Zl} (General Category 'Zl')
\p{Zl} General Category 'Zl'
+* \p{Zp} Alias for \p{Zp} (General Category 'Zp')
\p{Zp} General Category 'Zp'
+* \p{Zs} Alias for \p{Zs} (General Category 'Zs')
\p{Zs} General Category 'Zs'
+* \p{Z} Alias for \p{Z} (Major Category 'Z')
\p{Z} Major Category 'Z'
\p{_CanonDCIJ} (for internal casefolding use)
\p{_CaseIgnorable} (for internal casefolding use)
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables from e.g. Unicode.txt.
+# This file is built by ./mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
return <<'END';
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables from e.g. Unicode.txt.
+# This file is built by ./mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables from e.g. Unicode.txt.
+# This file is built by ./mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables from e.g. Unicode.txt.
+# This file is built by ./mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables from e.g. Unicode.txt.
+# This file is built by ./mktables from e.g. Unicode.txt.
# Any changes made here will be lost!
#
# This file supports:
-# \p{Any} (and fuzzy permutations)
-# \p{All} (and fuzzy permutations)
+# \p{Any}
+# \p{Any}
#
# Meaning: [\x{0000}-\x{10FFFF}]
#
#
# This file supports:
-# \p{Assigned} (and fuzzy permutations)
+# \p{Assigned}
#
# Meaning: All assigned code points
#
#
# This file supports:
# \p{C}
-# \p{Other} (and fuzzy permutations)
+# \p{C} (and fuzzy permutations)
#
# Meaning: Major Category 'C'
#
#
# This file supports:
# \p{Cc}
-# \p{Control} (and fuzzy permutations)
+# \p{Cc} (and fuzzy permutations)
#
# Meaning: General Category 'Cc'
#
#
# This file supports:
# \p{Cf}
-# \p{Format} (and fuzzy permutations)
+# \p{Cf} (and fuzzy permutations)
#
# Meaning: General Category 'Cf'
#
#
# This file supports:
# \p{Cn}
-# \p{Unassigned} (and fuzzy permutations)
+# \p{Cn} (and fuzzy permutations)
#
# Meaning: General Category 'Cn' [not functional in Perl]
#
#
# This file supports:
# \p{Co}
-# \p{PrivateUse} (and fuzzy permutations)
+# \p{Co} (and fuzzy permutations)
#
# Meaning: General Category 'Co'
#
#
# This file supports:
# \p{Cs}
-# \p{Surrogate} (and fuzzy permutations)
+# \p{Cs} (and fuzzy permutations)
#
# Meaning: General Category 'Cs'
#
#
# This file supports:
# \p{L}
-# \p{Letter} (and fuzzy permutations)
+# \p{L} (and fuzzy permutations)
#
# Meaning: Major Category 'L'
#
#
# This file supports:
# \p{Ll}
-# \p{LowercaseLetter} (and fuzzy permutations)
+# \p{Ll} (and fuzzy permutations)
#
# Meaning: General Category 'Ll'
#
#
# This file supports:
# \p{Lm}
-# \p{ModifierLetter} (and fuzzy permutations)
+# \p{Lm} (and fuzzy permutations)
#
# Meaning: General Category 'Lm'
#
#
# This file supports:
# \p{Lo}
-# \p{OtherLetter} (and fuzzy permutations)
+# \p{Lo} (and fuzzy permutations)
#
# Meaning: General Category 'Lo'
#
#
# This file supports:
# \p{Lt}
-# \p{TitlecaseLetter} (and fuzzy permutations)
+# \p{Lt} (and fuzzy permutations)
#
# Meaning: General Category 'Lt'
#
#
# This file supports:
# \p{Lu}
-# \p{UppercaseLetter} (and fuzzy permutations)
+# \p{Lu} (and fuzzy permutations)
#
# Meaning: General Category 'Lu'
#
#
# This file supports:
# \p{M}
-# \p{Mark} (and fuzzy permutations)
+# \p{M} (and fuzzy permutations)
#
# Meaning: Major Category 'M'
#
#
# This file supports:
# \p{Mc}
-# \p{SpacingMark} (and fuzzy permutations)
+# \p{Mc} (and fuzzy permutations)
#
# Meaning: General Category 'Mc'
#
#
# This file supports:
# \p{Me}
-# \p{EnclosingMark} (and fuzzy permutations)
+# \p{Me} (and fuzzy permutations)
#
# Meaning: General Category 'Me'
#
#
# This file supports:
# \p{Mn}
-# \p{NonSpacingMark} (and fuzzy permutations)
+# \p{Mn} (and fuzzy permutations)
#
# Meaning: General Category 'Mn'
#
#
# This file supports:
# \p{N}
-# \p{Number} (and fuzzy permutations)
+# \p{N} (and fuzzy permutations)
#
# Meaning: Major Category 'N'
#
#
# This file supports:
# \p{Nd}
-# \p{DecimalNumber} (and fuzzy permutations)
+# \p{Nd} (and fuzzy permutations)
#
# Meaning: General Category 'Nd'
#
#
# This file supports:
# \p{Nl}
-# \p{LetterNumber} (and fuzzy permutations)
+# \p{Nl} (and fuzzy permutations)
#
# Meaning: General Category 'Nl'
#
#
# This file supports:
# \p{No}
-# \p{OtherNumber} (and fuzzy permutations)
+# \p{No} (and fuzzy permutations)
#
# Meaning: General Category 'No'
#
#
# This file supports:
# \p{P}
-# \p{Punctuation} (and fuzzy permutations)
+# \p{P} (and fuzzy permutations)
#
# Meaning: Major Category 'P'
#
#
# This file supports:
# \p{Pc}
-# \p{ConnectorPunctuation} (and fuzzy permutations)
+# \p{Pc} (and fuzzy permutations)
#
# Meaning: General Category 'Pc'
#
#
# This file supports:
# \p{Pd}
-# \p{DashPunctuation} (and fuzzy permutations)
+# \p{Pd} (and fuzzy permutations)
#
# Meaning: General Category 'Pd'
#
#
# This file supports:
# \p{Pe}
-# \p{ClosePunctuation} (and fuzzy permutations)
+# \p{Pe} (and fuzzy permutations)
#
# Meaning: General Category 'Pe'
#
#
# This file supports:
# \p{Pf}
-# \p{FinalPunctuation} (and fuzzy permutations)
+# \p{Pf} (and fuzzy permutations)
#
# Meaning: General Category 'Pf'
#
#
# This file supports:
# \p{Pi}
-# \p{InitialPunctuation} (and fuzzy permutations)
+# \p{Pi} (and fuzzy permutations)
#
# Meaning: General Category 'Pi'
#
#
# This file supports:
# \p{Po}
-# \p{OtherPunctuation} (and fuzzy permutations)
+# \p{Po} (and fuzzy permutations)
#
# Meaning: General Category 'Po'
#
#
# This file supports:
# \p{Ps}
-# \p{OpenPunctuation} (and fuzzy permutations)
+# \p{Ps} (and fuzzy permutations)
#
# Meaning: General Category 'Ps'
#
#
# This file supports:
# \p{S}
-# \p{Symbol} (and fuzzy permutations)
+# \p{S} (and fuzzy permutations)
#
# Meaning: Major Category 'S'
#
#
# This file supports:
# \p{Sc}
-# \p{CurrencySymbol} (and fuzzy permutations)
+# \p{Sc} (and fuzzy permutations)
#
# Meaning: General Category 'Sc'
#
#
# This file supports:
# \p{Sk}
-# \p{ModifierSymbol} (and fuzzy permutations)
+# \p{Sk} (and fuzzy permutations)
#
# Meaning: General Category 'Sk'
#
#
# This file supports:
# \p{Sm}
-# \p{MathSymbol} (and fuzzy permutations)
+# \p{Sm} (and fuzzy permutations)
#
# Meaning: General Category 'Sm'
#
#
# This file supports:
# \p{So}
-# \p{OtherSymbol} (and fuzzy permutations)
+# \p{So} (and fuzzy permutations)
#
# Meaning: General Category 'So'
#
#
# This file supports:
# \p{Z}
-# \p{Separator} (and fuzzy permutations)
+# \p{Z} (and fuzzy permutations)
#
# Meaning: Major Category 'Z'
#
#
# This file supports:
# \p{Zl}
-# \p{LineSeparator} (and fuzzy permutations)
+# \p{Zl} (and fuzzy permutations)
#
# Meaning: General Category 'Zl'
#
#
# This file supports:
# \p{Zp}
-# \p{ParagraphSeparator} (and fuzzy permutations)
+# \p{Zp} (and fuzzy permutations)
#
# Meaning: General Category 'Zp'
#
#
# This file supports:
# \p{Zs}
-# \p{SpaceSeparator} (and fuzzy permutations)
+# \p{Zs} (and fuzzy permutations)
#
# Meaning: General Category 'Zs'
#
#!/usr/bin/perl -w
use strict;
use Carp;
+
##
## mktables -- create the runtime Perl Unicode files (lib/unicore/**/*.pl)
## from the Unicode database files (lib/unicore/*.txt).
##
-mkdir("In", 0755);
-mkdir("Is", 0755);
-mkdir("To", 0755);
+mkdir("lib", 0755);
+mkdir("To", 0755);
##
## Process any args.
##
-my $Verbose = 0;
+my $Verbose = 0;
+my $MakeTestScript = 0;
while (@ARGV)
{
$Verbose = 1;
} elsif ($arg eq '-q') {
$Verbose = 0;
+ } elsif ($arg eq '-maketest') {
+ $MakeTestScript = 1;
} else {
- die "usage: $0 [-v|-q]";
+ die "usage: $0 [-v|-q] [-maketest]";
}
}
EOF
+
+##
+## Given a filename and a reference to an array of lines,
+## write the lines to the file only if the contents have not changed.
+##
+sub WriteIfChanged($\@)
+{
+ my $file = shift;
+ my $lines = shift;
+
+ my $TextToWrite = join '', @$lines;
+ if (open IN, $file) {
+ local($/) = undef;
+ my $PreviousText = <IN>;
+ close IN;
+ if ($PreviousText eq $TextToWrite) {
+ print "$file unchanged.\n" if $Verbose;
+ return;
+ }
+ }
+ if (not open OUT, ">$file") {
+ die "$0: can't open $file for output: $!\n";
+ }
+ print "$file written.\n" if $Verbose;
+
+ print OUT $TextToWrite;
+ close OUT;
+}
+
##
## The main datastructure (a "Table") represents a set of code points that
## are part of a particular quality (that are part of \pL, \p{InGreek},
my %TableDesc;
my %FuzzyNames;
my %AliasInfo;
+my %CanonicalToOrig;
##
## Turn something like
## OLD-ITALIC
-## to
+## into
## OldItalic
##
sub CanonicalName($)
{
- my $name = lc shift;
+ my $orig = shift;
+ my $name = lc $orig;
$name =~ s/(?<![a-z])(\w)/\u$1/g;
- $name =~ s/[_\W]+//g;
- return $name;
-}
+ $name =~ s/[-_\s]+//g;
-##
-## Turn something like
-## OLD-ITALIC
-## to
-## Old_Italic
-##
-sub CanonicalNameForPattern($)
-{
- my $name = lc shift;
- $name =~ s/(?<![a-z])(\w)/\u$1/g;
- $name =~ s/[_\W]+/_/;
+ $CanonicalToOrig{$name} = $orig if not $CanonicalToOrig{$name};
return $name;
}
-
##
## Associates a property ("Greek", "Lu", "Assigned",...) with a Table.
##
my $Fuzzy = delete $Args{Fuzzy};
my $Desc = delete $Args{Desc}; # description
- $Name = CanonicalNameForPattern($Name) if $Fuzzy;
+ $Name = CanonicalName($Name) if $Fuzzy;
## sanity check a few args
if (%Args or ($Type ne 'Is' and $Type ne 'In') or not ref $Table) {
my $filename = shift;
my $comment = shift;
- print "$filename\n" if $Verbose;
-
- if (not open(OUT, ">$filename")) {
- die "$0: can't write $filename: $!\n";
- }
-
- print OUT $HEADER;
+ my @OUT = $HEADER;
if (defined $comment) {
$comment =~ s/\s+\Z//;
$comment =~ s/^/# /gm;
- print OUT "#\n$comment\n#\n";
+ push @OUT, "#\n$comment\n#\n";
}
- print OUT "return <<'END';\n";
+ push @OUT, "return <<'END';\n";
for my $set (@$Table)
{
my $name = $set->[RANGE_NAME];
if ($start == $end) {
- printf OUT "%04X\t\t%s\n", $start, $name;
+ push @OUT, sprintf "%04X\t\t%s\n", $start, $name;
} else {
- printf OUT "%04X\t%04X\t%s\n", $start, $end, $name;
+ push @OUT, sprintf "%04X\t%04X\t%s\n", $start, $end, $name;
}
}
- print OUT "END\n";
- close OUT;
+ push @OUT, "END\n";
+
+ WriteIfChanged($filename, @OUT);
+}
+
+## This used only for making the test script.
+## helper function
+sub IsUsable($)
+{
+ my $code = shift;
+ return 0 if $code <= 0x0000; ## don't use null
+ return 0 if $code >= $LastUnicodeCodepoint; ## keep in range
+ return 0 if ($code >= 0xD800 and $code <= 0xDFFF); ## no surrogates
+ return 0 if ($code >= 0xFDD0 and $code <= 0xFDEF); ## utf8.c says no good
+ return 0 if (($code & 0xFFFF) == 0xFFFE); ## utf8.c says no good
+ return 0 if (($code & 0xFFFF) == 0xFFFF); ## utf8.c says no good
+ return 1;
+}
+
+## Return a code point that's part of the table.
+## Returns nothing if the table is empty (or covers only surrogates).
+## This used only for making the test script.
+sub Table::ValidCode
+{
+ my $Table = shift; #self
+ for my $set (@$Table) {
+ return $set->[RANGE_END] if IsUsable($set->[RANGE_END]);
+ }
+ return ();
+}
+
+## Return a code point that's not part of the table
+## Returns nothing if the table covers all code points.
+## This used only for making the test script.
+sub Table::InvalidCode
+{
+ my $Table = shift; #self
+
+ return 0x1234 if $Table->IsEmpty();
+
+ for my $set (@$Table)
+ {
+ if (IsUsable($set->[RANGE_END] + 1))
+ {
+ return $set->[RANGE_END] + 1;
+ }
+
+ if (IsUsable($set->[RANGE_START] - 1))
+ {
+ return $set->[RANGE_START] - 1;
+ }
+ }
+ return ();
}
###########################################################################
confess "$0: bad args to New_Alias"
}
- if (not $TableInfo{$Type}->{$Name}) {
- confess "$0: don't have orignial $Type => $Name to make alias"
+ $Alias = CanonicalName($Alias) if $Fuzzy;
+
+ if (not $TableInfo{$Type}->{$Name})
+ {
+ my $CName = CanonicalName($Name);
+ if ($TableInfo{$Type}->{$CName}) {
+ confess "$0: Use canonical form '$CName' instead of '$Name' for alias.";
+ } else {
+ confess "$0: don't have orignial $Type => $Name to make alias";
+ }
}
if ($TableInfo{$Alias}) {
confess "$0: already have original $Type => $Alias; can't make alias";
## All assigned code points
my $Assigned = Table->New(Is => 'Assigned',
Desc => "All assigned code points",
- Fuzzy => 1);
+ Fuzzy => 0);
my $Name = Table->New(); ## all characters, individually by name
my $General = Table->New(); ## all characters, grouped by category
Fuzzy => 0);
## Unassigned is the same as 'Cn'
- New_Alias(Is => 'Unassigned', SameAs => 'Cn', Fuzzy => 1);
+ New_Alias(Is => 'Unassigned', SameAs => 'Cn', Fuzzy => 0);
$Cat{C}->Replace($Cat{C}->Merge($Cat{Cn})); ## Now merge in Cn into C
my $Any = Table->New(Is => 'Any',
Desc => sprintf("[\\x{0000}-\\x{%X}]",
$LastUnicodeCodepoint),
- Fuzzy => 1);
+ Fuzzy => 0);
$Any->RawAppendRange(0, $LastUnicodeCodepoint);
- New_Alias(Is => 'All', SameAs => 'Any', Fuzzy => 1);
+ New_Alias(Is => 'All', SameAs => 'Any', Fuzzy => 0);
##
## Build special properties for Perl's internal case-folding needs:
}
}
+
+##
+## These are used in:
+## MakePropTestScript()
+## WriteAllMappings()
+## for making the test script.
+##
+my %FuzzyNameToTest;
+my %ExactNameToTest;
+
+
+## This used only for making the test script
+sub GenTests($$$$)
+{
+ my $FH = shift;
+ my $Prop = shift;
+ my $MatchCode = shift;
+ my $FailCode = shift;
+
+ if (defined $MatchCode) {
+ printf $FH qq/Expect(1, "\\x{%04X}", '\\p{$Prop}' );\n/, $MatchCode;
+ printf $FH qq/Expect(0, "\\x{%04X}", '\\p{^$Prop}');\n/, $MatchCode;
+ printf $FH qq/Expect(0, "\\x{%04X}", '\\P{$Prop}' );\n/, $MatchCode;
+ printf $FH qq/Expect(1, "\\x{%04X}", '\\P{^$Prop}');\n/, $MatchCode;
+ }
+ if (defined $FailCode) {
+ printf $FH qq/Expect(0, "\\x{%04X}", '\\p{$Prop}' );\n/, $FailCode;
+ printf $FH qq/Expect(1, "\\x{%04X}", '\\p{^$Prop}');\n/, $FailCode;
+ printf $FH qq/Expect(1, "\\x{%04X}", '\\P{$Prop}' );\n/, $FailCode;
+ printf $FH qq/Expect(0, "\\x{%04X}", '\\P{^$Prop}');\n/, $FailCode;
+ }
+}
+
+## This used only for making the test script
+sub ExpectError($$)
+{
+ my $FH = shift;
+ my $prop = shift;
+
+ print $FH qq/Error('\\p{$prop}');\n/;
+ print $FH qq/Error('\\P{$prop}');\n/;
+}
+
+## This used only for making the test script
+my @GoodSeps = (
+ " ",
+ "-",
+ " \t ",
+ "",
+ "",
+ "_",
+ );
+my @BadSeps = (
+ "--",
+ "__",
+ " _",
+ "/"
+ );
+
+## This used only for making the test script
+sub RandomlyFuzzifyName($;$)
+{
+ my $Name = shift;
+ my $WantError = shift; ## if true, make an error
+
+ my @parts;
+ for my $part (split /[-\s_]+/, $Name)
+ {
+ if (@parts) {
+ if ($WantError and rand() < 0.3) {
+ push @parts, $BadSeps[rand(@BadSeps)];
+ $WantError = 0;
+ } else {
+ push @parts, $GoodSeps[rand(@GoodSeps)];
+ }
+ }
+ my $switch = int rand(4);
+ if ($switch == 0) {
+ push @parts, uc $part;
+ } elsif ($switch == 1) {
+ push @parts, lc $part;
+ } elsif ($switch == 2) {
+ push @parts, ucfirst $part;
+ } else {
+ push @parts, $part;
+ }
+ }
+ my $new = join('', @parts);
+
+ if ($WantError) {
+ if (rand() >= 0.5) {
+ $new .= $BadSeps[rand(@BadSeps)];
+ } else {
+ $new = $BadSeps[rand(@BadSeps)] . $new;
+ }
+ }
+ return $new;
+}
+
+## This used only for making the test script
+sub MakePropTestScript()
+{
+ ## this written directly -- it's huge.
+ if (not open OUT, ">TestProp.pl") {
+ die "$0: TestProp.pl: $!\n";
+ }
+ print OUT <DATA>;
+
+ while (my ($Name, $Table) = each %ExactNameToTest)
+ {
+ GenTests(*OUT, $Name, $Table->ValidCode, $Table->InvalidCode);
+ ExpectError(*OUT, uc $Name) if uc $Name ne $Name;
+ ExpectError(*OUT, lc $Name) if lc $Name ne $Name;
+ }
+
+
+ while (my ($Name, $Table) = each %FuzzyNameToTest)
+ {
+ my $Orig = $CanonicalToOrig{$Name};
+ my %Names = (
+ $Name => 1,
+ $Orig => 1,
+ RandomlyFuzzifyName($Orig) => 1
+ );
+
+ for my $N (keys %Names) {
+ GenTests(*OUT, $N, $Table->ValidCode, $Table->InvalidCode);
+ }
+
+ ExpectError(*OUT, RandomlyFuzzifyName($Orig, 'ERROR'));
+ }
+
+ print OUT "Finished();\n";
+ close OUT;
+}
+
+
+##
+## These are used only in:
+## RegisterFileForName()
+## WriteAllMappings()
+##
+my %Exact; ## will become %utf8::Exact;
+my %Canonical; ## will become %utf8::Canonical;
+my %CaComment; ## Comment for %Canonical entry of same key
+
+##
+## Given info about a name and a datafile that it should be associated with,
+## register that assocation in %Exact and %Canonical.
+sub RegisterFileForName($$$$)
+{
+ my $Type = shift;
+ my $Name = shift;
+ my $IsFuzzy = shift;
+ my $filename = shift;
+
+ ##
+ ## Now in details for the mapping. $Type eq 'Is' has the
+ ## Is removed, as it will be removed in utf8_heavy when this
+ ## data is being checked. In keeps its "In", but a second
+ ## sans-In record is written if it doesn't conflict with
+ ## anything already there.
+ ##
+ if (not $IsFuzzy)
+ {
+ if ($Type eq 'Is') {
+ die "oops[$Name]" if $Exact{$Name};
+ $Exact{$Name} = $filename;
+ } else {
+ die "oops[$Type$Name]" if $Exact{"$Type$Name"};
+ $Exact{"$Type$Name"} = $filename;
+ $Exact{$Name} = $filename if not $Exact{$Name};
+ }
+ }
+ else
+ {
+ my $CName = lc $Name;
+ if ($Type eq 'Is') {
+ die "oops[$CName]" if $Canonical{$CName};
+ $Canonical{$CName} = $filename;
+ $CaComment{$CName} = $Name if $Name =~ tr/A-Z// >= 2;
+ } else {
+ die "oops[$Type$CName]" if $Canonical{lc "$Type$CName"};
+ $Canonical{lc "$Type$CName"} = $filename;
+ $CaComment{lc "$Type$CName"} = "$Type$Name";
+ if (not $Canonical{$CName}) {
+ $Canonical{$CName} = $filename;
+ $CaComment{$CName} = "$Type$Name";
+ }
+ }
+ }
+}
+
##
## Writes the info accumulated in
##
{
my @MAP;
- for my $Type ('In', 'Is')
- {
- my %Filenames;
- my %NameToFile;
+ my %BaseNames; ## Base names already used (for avoiding 8.3 conflicts)
- my %Exact; ## will become %utf8::Is or %utf8::In
- my %Pat; ## will become %utf8::IsPat or %utf8::InPat
+ ## 'Is' *MUST* come first, so its names have precidence over 'In's
+ for my $Type ('Is', 'In')
+ {
+ my %RawNameToFile; ## a per-$Type cache
- ##
- ## First write all the files to the $Type/ directory
- ##
- for my $Name (sort { length $a <=> length $b } keys %{$TableInfo{$Type}})
+ for my $Name (sort {length $a <=> length $b} keys %{$TableInfo{$Type}})
{
+ ## Note: $Name is already canonical
my $Table = $TableInfo{$Type}->{$Name};
+ my $IsFuzzy = $FuzzyNames{$Type}->{$Name};
## Need an 8.3 safe filename (which means "an 8 safe" $filename)
- my $filename = $FuzzyNames{$Type}->{$Name} ? CanonicalName($Name): $Name;
- $filename =~ s/[^\w_]+/_/g; # "L&" -> "L_"
- substr($filename, 8) = '' if length($filename) > 8;
-
- ##
- ## Make sure the filename doesn't conflict with something we
- ## might have already written. If we have, say,
- ## GreekExtended1
- ## GreekExtended2
- ## they become
- ## GreekExt
- ## GreekEx2
- ##
- while (my $num = $Filenames{lc $filename}++)
+ my $filename;
{
- $num++; ## so filenames with numbers start with '2', which
- ## just looks more natural.
- ## Want to append $num, but if it'll make the filename longer
- ## than 8 characters, pre-truncate $filename so that the result
- ## is acceptable.
- my $delta = length($filename) + length($num) - 8;
- if ($delta > 0) {
- substr($filename, -$delta) = $num;
- } else {
- $filename .= $num;
+ ## 'Is' items lose 'Is' from the basename.
+ $filename = $Type eq 'Is' ? $Name : "$Type$Name";
+
+ $filename =~ s/[^\w_]+/_/g; # "L&" -> "L_"
+ substr($filename, 8) = '' if length($filename) > 8;
+
+ ##
+ ## Make sure the basename doesn't conflict with something we
+ ## might have already written. If we have, say,
+ ## InGreekExtended1
+ ## InGreekExtended2
+ ## they become
+ ## InGreekE
+ ## InGreek2
+ ##
+ while (my $num = $BaseNames{lc $filename}++)
+ {
+ $num++; ## so basenames with numbers start with '2', which
+ ## just looks more natural.
+ ## Want to append $num, but if it'll make the basename longer
+ ## than 8 characters, pre-truncate $filename so that the result
+ ## is acceptable.
+ my $delta = length($filename) + length($num) - 8;
+ if ($delta > 0) {
+ substr($filename, -$delta) = $num;
+ } else {
+ $filename .= $num;
+ }
}
- }
-
- $Exact{$Name} = $filename;
+ };
##
## Construct a nice comment to add to the file, and build data
for my $N (@Supported)
{
my $IsFuzzy = $FuzzyNames{$Type}->{$N};
- my $CName = $IsFuzzy ? CanonicalName($N): $N;
- my $Prop = "\\p{$TypeToShow$CName}";
+ my $Prop = "\\p{$TypeToShow$Name}";
$OrigProp = $Prop if not $OrigProp; #cache for aliases
if ($IsFuzzy) {
$Comment .= "\t$Prop (and fuzzy permutations)\n";
##
## Okay, write the file...
##
- $Table->Write("$Type/$filename.pl", $Comment);
- }
+ $Table->Write("lib/$filename.pl", $Comment);
- ##
- ## Write out the map
- ##
- if (not open MAP, ">Properties") {
- die "$0: can't write Properties: $!\n";
- }
- print MAP "##\n";
- print MAP "## This file created by $0\n";
- print MAP "## List of built-in \\p{...}/\\P{...} properties.\n";
- print MAP "##\n";
- print MAP "## '*' means name may be 'fuzzy'\n";
- print MAP "##\n";
- print MAP "\n";
- print MAP sort { substr($a,2) cmp substr($b, 2) } @MAP;
- close MAP;
+ ## and register it
+ $RawNameToFile{$Name} = $filename;
+ RegisterFileForName($Type => $Name, $IsFuzzy, $filename);
- ##
- ## Build %Pat
- ##
- while (my ($Fuzzy, $Real) = each %{$FuzzyNames{$Type}})
- {
- my $File = $Exact{$Real};
-
- if (not $File) {
- die "$0: oops [$Real]";
- }
-
- ## The prefix length of 2 is enough spread,
- ## and besides, we have 'Yi' as an In category.
- my $Prefix = lc(substr($Fuzzy, 0, 2));
- my $Regex = NameToRegex($Fuzzy);
-
- if ($Pat{$Prefix}->{$Regex}) {
- warn "WHOA, conflict with /$Regex/: $Pat{$Prefix}->{$Regex} vs $File\n";
+ if ($IsFuzzy)
+ {
+ my $CName = CanonicalName($Type . '_'. $Name);
+ $FuzzyNameToTest{$Name} = $Table if !$FuzzyNameToTest{$Name};
+ $FuzzyNameToTest{$CName} = $Table if !$FuzzyNameToTest{$CName};
+ } else {
+ $ExactNameToTest{$Name} = $Table;
}
- $Pat{$Prefix}->{$Regex} = $File;
}
- ##
- ## Since the fuzzy method will provide for a way to match $Fuzzy,
- ## there's no need for $Fuzzy to be in %Exact as well.
- ## This can't be done in the loop above because there could be
- ## multiple $Fuzzys pointing at the same $Real, and we don't want
- ## the first to delete the exact mapping out from under the second.
- ##
- for my $Fuzzy (keys %{$FuzzyNames{$Type}})
+ ## Register aliase info
+ for my $Name (sort {length $a <=> length $b} keys %{$AliasInfo{$Type}})
{
- delete $Exact{$Fuzzy};
+ my $Alias = $AliasInfo{$Type}->{$Name};
+ my $IsFuzzy = $FuzzyNames{$Type}->{$Alias};
+ my $filename = $RawNameToFile{$Name};
+ die "oops [$Alias]->[$Name]" if not $filename;
+ RegisterFileForName($Type => $Alias, $IsFuzzy, $filename);
+
+ my $Table = $TableInfo{$Type}->{$Name};
+ die "oops" if not $Table;
+ if ($IsFuzzy)
+ {
+ my $CName = CanonicalName($Type .'_'. $Alias);
+ $FuzzyNameToTest{$Alias} = $Table if !$FuzzyNameToTest{$Alias};
+ $FuzzyNameToTest{$CName} = $Table if !$FuzzyNameToTest{$CName};
+ } else {
+ $ExactNameToTest{$Alias} = $Table;
+ }
}
+ }
+ ##
+ ## Write out the property list
+ ##
+ {
+ my @OUT = (
+ "##\n",
+ "## This file created by $0\n",
+ "## List of built-in \\p{...}/\\P{...} properties.\n",
+ "##\n",
+ "## '*' means name may be 'fuzzy'\n",
+ "##\n\n",
+ sort { substr($a,2) cmp substr($b, 2) } @MAP,
+ );
+ WriteIfChanged('Properties', @OUT);
+ }
+ use Text::Tabs (); ## using this makes the files about half the size
+
+ ## Write Exact.pl
+ {
+ my @OUT = (
+ $HEADER,
+ "##\n",
+ "## Data in this file used by ../utf8_heavy.pl\n",
+ "##\n\n",
+ "## Mapping from name to filename in ./lib\n",
+ "%utf8::Exact = (\n",
+ );
- ##
- ## Now write In.pl / Is.pl
- ##
- if (not open OUT, ">$Type.pl") {
- die "$0: $Type.pl: $!\n";
- }
- print OUT $HEADER;
- print OUT "##\n";
- print OUT "## Data in this file used by ../utf8_heavy.pl\n";
- print OUT "##\n";
- print OUT "\n";
- print OUT "## Mapping from name to filename in ./$Type\n";
- print OUT "%utf8::$Type = (\n";
for my $Name (sort keys %Exact)
{
my $File = $Exact{$Name};
- printf OUT " %-41s => %s,\n", "'$Name'", "'$File'";
+ $Name = $Name =~ m/\W/ ? qq/'$Name'/ : " $Name ";
+ my $Text = sprintf("%-15s => %s,\n", $Name, qq/'$File'/);
+ push @OUT, Text::Tabs::unexpand($Text);
}
- print OUT ");\n\n";
+ push @OUT, ");\n1;\n";
+
+ WriteIfChanged('Exact.pl', @OUT);
+ }
- print OUT "## Mappings from regex to filename in ./$Type/\n";
- print OUT "%utf8::${Type}Pat = (\n";
- for my $Prefix (sort keys %Pat)
+ ## Write Canonical.pl
+ {
+ my @OUT = (
+ $HEADER,
+ "##\n",
+ "## Data in this file used by ../utf8_heavy.pl\n",
+ "##\n\n",
+ "## Mapping from lc(canonical name) to filename in ./lib\n",
+ "%utf8::Canonical = (\n",
+ );
+ my $Trail = ""; ## used just to keep the spacing pretty
+ for my $Name (sort keys %Canonical)
{
- print OUT " '$Prefix' => {\n";
- while (my ($Regex, $File) = each %{ $Pat{$Prefix} }) {
- print OUT "\t'$Regex' => '$File',\n";
+ my $File = $Canonical{$Name};
+ if ($CaComment{$Name}) {
+ push @OUT, "\n" if not $Trail;
+ push @OUT, " # $CaComment{$Name}\n";
+ $Trail = "\n";
+ } else {
+ $Trail = "";
}
- print OUT " },\n";
+ $Name = $Name =~ m/\W/ ? qq/'$Name'/ : " $Name ";
+ my $Text = sprintf(" %-41s => %s,\n$Trail", $Name, qq/'$File'/);
+ push @OUT, Text::Tabs::unexpand($Text);
}
- print OUT ");\n";
-
- close(OUT);
+ push @OUT, ");\n1\n";
+ WriteIfChanged('Canonical.pl', @OUT);
}
+
+ MakePropTestScript() if $MakeTestScript;
}
+
sub SpecCase_txt()
{
#
for my $case (qw(Lower Title Upper))
{
my $NormalCase = do "To/$case.pl" || die "$0: $@\n";
- if (not open OUT, ">To/$case.pl") {
- die "$0: To/$case.txt: $!";
- }
- print OUT $HEADER, "\n";
- print OUT "%utf8::ToSpec$case =\n(\n";
+ my @OUT = (
+ $HEADER, "\n",
+ "%utf8::ToSpec$case =\n(\n",
+ );
for my $prop (sort { $a->[0] <=> $b->[0] } @{$CaseInfo{$case}}) {
my ($ix, $code, $to) = @$prop;
my $tostr =
join "", map { sprintf "\\x{%s}", $_ } split ' ', $to;
- printf OUT qq['%04X' => "$tostr",\n], $ix;
+ push @OUT, sprintf qq['%04X' => "$tostr",\n], $ix;
}
- print OUT ");\n\n";
- print OUT "return <<'END';\n";
- print OUT $NormalCase;
- print OUT "END\n";
- close OUT;
+ push @OUT, (
+ ");\n\n",
+ "return <<'END';\n",
+ $NormalCase,
+ "END\n"
+ );
+ WriteIfChanged("To/$case.pl", @OUT);
}
}
sub CaseFold_txt()
{
if (not open IN, "CaseFold.txt") {
- die "$0: To/Fold.pl: $!\n";
+ die "$0: CaseFold.txt: $!\n";
}
my $Fold = Table->New();
#
# Prepend the special foldings to the common foldings.
#
-
my $CommonFold = do "To/Fold.pl" || die "$0: To/Fold.pl: $!\n";
- if (not open OUT, ">To/Fold.pl") {
- die "$0: To/Fold.pl: $!\n";
- }
- print OUT $HEADER, "\n";
- print OUT "%utf8::ToSpecFold =\n(\n";
+
+ my @OUT = (
+ $HEADER, "\n",
+ "%utf8::ToSpecFold =\n(\n",
+ );
for my $code (sort { $a <=> $b } keys %Fold) {
my $foldstr =
join "", map { sprintf "\\x{%s}", $_ } split ' ', $Fold{$code};
- printf OUT qq['%04X' => "$foldstr",\n], $code;
+ push @OUT, sprintf qq['%04X' => "$foldstr",\n], $code;
}
- print OUT ");\n\n";
- print OUT "return <<'END';\n";
- print OUT $CommonFold;
- print OUT "END\n";
- close OUT;
+ push @OUT, (
+ ");\n\n",
+ "return <<'END';\n",
+ $CommonFold,
+ "END\n",
+ );
+
+ WriteIfChanged("To/Fold.pl", @OUT);
}
## Do it....
Scripts_txt();
Blocks_txt();
+WriteAllMappings();
+
LineBrk_Txt();
ArabShap_txt();
Jamo_txt();
SpecCase_txt();
+CaseFold_txt();
-WriteAllMappings();
+exit(0);
-CaseFold_txt();
+## TRAILING CODE IS USED BY MakePropTestScript()
+__DATA__
+use strict;
+use warnings;
+
+my $Tests = 0;
+my $Fails = 0;
-# That's all, folks!
+sub Expect($$$)
+{
+ my $Expect = shift;
+ my $String = shift;
+ my $Regex = shift;
+ my $Line = (caller)[2];
+
+ $Tests++;
+ my $RegObj;
+ my $result = eval {
+ $RegObj = qr/$Regex/;
+ $String =~ $RegObj ? 1 : 0
+ };
+
+ if (not defined $result) {
+ print "couldn't compile /$Regex/ on $0 line $Line: $@\n";
+ $Fails++;
+ } elsif ($result ^ $Expect) {
+ print "bad result (expected $Expect) on $0 line $Line: $@\n";
+ $Fails++;
+ }
+}
-__END__
+sub Error($)
+{
+ my $Regex = shift;
+ $Tests++;
+ if (eval { 'x' =~ qr/$Regex/; 1 }) {
+ $Fails++;
+ my $Line = (caller)[2];
+ print "expected error for /$Regex/ on $0 line $Line: $@\n";
+ }
+}
+
+sub Finished()
+{
+ if ($Fails == 0) {
+ print "All $Tests tests passed.\n";
+ exit(0);
+ } else {
+ print "$Tests tests, $Fails failed!\n";
+ exit(-1);
+ }
+}
sub croak { require Carp; Carp::croak(@_) }
+my %Cache;
+
+##
+## "SWASH" == "SWATCH HASH". A "swatch" is a swatch of the Unicode landscape
+##
+
sub SWASHNEW {
my ($class, $type, $list, $minbits, $none) = @_;
local $^D = 0 if $^D;
print STDERR "SWASHNEW @_\n" if DEBUG;
- ## check to see if we've already got it.
- {
- no strict 'refs';
- if ($type and ref ${"${class}::{$type}"} eq $class) {
- warn qq/Found \${"${class}::{$type}"}\n/ if DEBUG;
- return ${"${class}::{$type}"};
- }
- }
-
##
## Get the list of codepoints for the type.
## Called from utf8.c
##
## Given a $type, our goal is to fill $list with the set of codepoint
- ## ranges. As we try various interpretations of $type, sometimes we'll
- ## end up with the $list directly, and sometimes we'll end up with a
- ## $file name that holds the list data.
+ ## ranges.
##
## To make the parsing of $type clear, this code takes the a rather
## unorthadox approach of last'ing out of the block once we have the
## info we need. Were this to be a subroutine, the 'last' would just
## be a 'return'.
##
+ my $file; ## file to load data from, and also part of the %Cache key.
+ my $ListSorted = 0;
+
if ($type)
{
$type =~ s/^\s+//;
print "type = $type\n" if DEBUG;
- my $file;
- ## Figure out what file to load to get the data....
GETFILE:
{
##
- ## First, see if it's an "Is" name (the 'Is' is optional)
+ ## 'Is' is always optional, so if it's there, remove it.
+ ## Same with 'Category=' and 'Script='.
##
- ## Because we check "Is" names first, they have precidence over
- ## "In" names. For example, "Greek" is both a script and a
- ## block. "IsGreek" always gets the script, while "InGreek"
- ## always gets the block. "Greek" gets the script because we
- ## check "Is" names first.
+ ## 'Block=' is replaced by 'In'.
##
- if ($type =~ m{^
- ## "Is" prefix, or "Script=" or "Category="
- (?: Is [- _]? | (?:Script|Category)\s*=\s* )?
- ## name to check in the "Is" symbol table.
- ([A-Z].*)
- $
- }ix)
- {
- my $istype = $1;
- ##
- ## Input ($type) Name To Check ($istype)
- ## ------------- -----------------------
- ## IsLu Lu
- ## Lu Lu
- ## Category = Lu Lu
- ## Foo Foo
- ## Script = Greek Greek
- ##
-
- print "istype = $istype\n" if DEBUG;
-
- ## Load "Is" mapping data, if not yet loaded.
- do "unicore/Is.pl" if not defined %utf8::Is;
-
- ##
- ## If the "Is" mapping data has an exact match, it points
- ## to the file we need.
- ##
- if (exists $utf8::Is{$istype})
- {
- $file = "unicore/Is/$utf8::Is{$istype}.pl";
- last GETFILE;
- }
-
- ##
- ## Need to look at %utf8::IsPat (loaded from "unicore/Is.pl")
- ## to see if there's a regex that matches this $istype.
- ## If so, the associated name is the file we need.
- ##
- my $prefix = substr(lc($istype), 0, 2);
- if (my $hashref = $utf8::IsPat{$prefix})
- {
- while (my ($pat, $name) = each %{$hashref})
- {
- print "isprefix = $prefix, Is = $istype, pat = $pat\n" if DEBUG;
- ##
- ## The following regex probably need not be cached,
- ## since every time there's a match, the results of
- ## the entire call to SWASHNEW() is cached, so there's
- ## a very limited number of times any one $pat will
- ## be evaluated as a regex, at least with "reasonable"
- ## code that doesn't try a baziilion \p{Random} names.
- ##
- if ($istype =~ /^$pat$/i)
- {
- $file = "unicore/Is/$name.pl";
- keys %{$hashref}; ## reset the 'each' above
- last GETFILE;
- }
- }
- }
+ $type =~ s/^Is(?:\s+|[-_])?//i
+ or
+ $type =~ s/^Category\s*=\s*//i
+ or
+ $type =~ s/^Script\s*=\s*//i
+ or
+ $type =~ s/^Block\s*=\s*/In/i;
+
+ ##
+ ## See if it's in the direct mapping table.
+ ##
+ require "unicore/Exact.pl";
+ if (my $base = $utf8::Exact{$type}) {
+ $file = "unicore/lib/$base.pl";
+ last GETFILE;
}
##
- ## Couldn't find via "Is" -- let's try via "In".....
+ ## If not there exactly, try the canonical form. The canonical
+ ## form is lowercased, with any separators (\s+|[-_]) removed.
##
- if ($type =~ m{^
- ( In(?!herited$)[- _]? | Block\s*=\s*)?
- ([A-Z].*)
- $
- }xi)
- {
- my $intype = $2;
- print "intype = $intype\n" if DEBUG;
-
- ##
- ## Input ($type) Name To Check ($intype)
- ## ------------- -----------------------
- ## Inherited Inherited
- ## InGreek Greek
- ## Block = Greek Greek
- ##
-
- ## Load "In" mapping data, if not yet loaded.
- do "unicore/In.pl" if not defined %utf8::In;
-
- ## If there's a direct match, it points to the file we need
- if (exists $utf8::In{$intype}) {
- $file = "unicore/In/$utf8::In{$intype}.pl";
- last GETFILE;
- }
-
- ##
- ## Need to look at %utf8::InPat (loaded from "unicore/In.pl")
- ## to see if there's a regex that matches this $intype.
- ## If so, the associated name is the file we need.
- ##
- my $prefix = substr(lc($intype), 0, 2);
- if (my $hashref = $utf8::InPat{$prefix})
- {
- print "inprefix = $prefix, In = $intype\n" if DEBUG;
- while (my ($pat, $name) = each %{$hashref})
- {
- print "inprefix = $prefix, In = $intype, k = $pat\n" if DEBUG;
- if ($intype =~ /^$pat$/i) {
- $file = "unicore/In/$name.pl";
- print "inprefix = $prefix, In = $intype, k = $pat, file = $file\n" if DEBUG;
- keys %{$hashref}; ## reset the 'each' above
- last GETFILE;
- }
- }
- }
+ my $canonical = lc $type;
+ $canonical =~ s/(?<=[a-z\d])(?:\s+|[-_])(?=[a-z\d])//g;
+ print "canonical = $canonical\n" if DEBUG;
+
+ require "unicore/Canonical.pl";
+ if (my $base = $utf8::Canonical{$canonical}) {
+ $file = "unicore/lib/$base.pl";
+ last GETFILE;
}
##
croak("Can't find Unicode character property \"$type\"");
}
+ print "found it (file='$file')\n" if DEBUG;
+
##
## If we reach here, it was due to a 'last GETFILE' above, so we
- ## have a filename, so now we load it.
+ ## have a filename, so now we load it if we haven't already.
+ ## If we have, return the cached results. The cache key is the
+ ## file to load.
##
+ if ($Cache{$file} and ref($Cache{$file}) eq $class)
+ {
+ print "Returning cached '$file' for \\p{$type}\n" if DEBUG;
+ return $Cache{$class, $file};
+ }
+
$list = do $file;
+ $ListSorted = 1; ## we know that these lists are sorted
}
my $extras;
my $bits;
+ my $ORIG = $list;
if ($list) {
my @tmp = split(/^/m, $list);
my %seen;
print STDERR "CLASS = $class, TYPE => $type, BITS => $bits, NONE => $none\nEXTRAS =>\n$extras\nLIST =>\n$list\n" if DEBUG;
- no strict 'refs';
- ${"${class}::{$type}"} = bless {
+ my $SWASH = bless {
TYPE => $type,
BITS => $bits,
EXTRAS => $extras,
NONE => $none,
@extras,
} => $class;
+
+ if ($file) {
+ $Cache{$class, $file} = $SWASH;
+ }
+
+ return $SWASH;
}
# NOTE: utf8.c:swash_init() assumes entries are never modified once generated.