From: Jarkko Hietaniemi Date: Fri, 26 Oct 2001 14:12:04 +0000 (+0000) Subject: Unicode: add the case folding table. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=c4051cc5dfb167f01d02a988561fb93023e83cac;p=p5sagit%2Fp5-mst-13.2.git Unicode: add the case folding table. p4raw-id: //depot/perl@12689 --- diff --git a/MANIFEST b/MANIFEST index 2e3e9af..99e6773 100644 --- a/MANIFEST +++ b/MANIFEST @@ -1606,6 +1606,7 @@ lib/unicore/Scripts.pl Unicode character database lib/unicore/Scripts.txt Unicode character database lib/unicore/SpecCase.txt Unicode character database lib/unicore/To/Digit.pl Unicode character database +lib/unicore/To/Fold.pl Unicode character database lib/unicore/To/Lower.pl Unicode character database lib/unicore/To/Title.pl Unicode character database lib/unicore/To/Upper.pl Unicode character database diff --git a/lib/unicore/To/Fold.pl b/lib/unicore/To/Fold.pl new file mode 100644 index 0000000..5a24150 --- /dev/null +++ b/lib/unicore/To/Fold.pl @@ -0,0 +1,830 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables from e.g. Unicode.txt. +# Any changes made here will be lost! + +%utf8::ToSpecFold = ( +'223' => "\x{0073}\x{0073}", +'304' => "\x{0069}", +'305' => "\x{0069}", +'329' => "\x{02BC}\x{006E}", +'496' => "\x{006A}\x{030C}", +'912' => "\x{03B9}\x{0308}\x{0301}", +'944' => "\x{03C5}\x{0308}\x{0301}", +'1415' => "\x{0565}\x{0582}", +'7830' => "\x{0068}\x{0331}", +'7831' => "\x{0074}\x{0308}", +'7832' => "\x{0077}\x{030A}", +'7833' => "\x{0079}\x{030A}", +'7834' => "\x{0061}\x{02BE}", +'8016' => "\x{03C5}\x{0313}", +'8018' => "\x{03C5}\x{0313}\x{0300}", +'8020' => "\x{03C5}\x{0313}\x{0301}", +'8022' => "\x{03C5}\x{0313}\x{0342}", +'8064' => "\x{1F00}\x{03B9}", +'8065' => "\x{1F01}\x{03B9}", +'8066' => "\x{1F02}\x{03B9}", +'8067' => "\x{1F03}\x{03B9}", +'8068' => "\x{1F04}\x{03B9}", +'8069' => "\x{1F05}\x{03B9}", +'8070' => "\x{1F06}\x{03B9}", +'8071' => "\x{1F07}\x{03B9}", +'8072' => "\x{1F00}\x{03B9}", +'8073' => "\x{1F01}\x{03B9}", +'8074' => "\x{1F02}\x{03B9}", +'8075' => "\x{1F03}\x{03B9}", +'8076' => "\x{1F04}\x{03B9}", +'8077' => "\x{1F05}\x{03B9}", +'8078' => "\x{1F06}\x{03B9}", +'8079' => "\x{1F07}\x{03B9}", +'8080' => "\x{1F20}\x{03B9}", +'8081' => "\x{1F21}\x{03B9}", +'8082' => "\x{1F22}\x{03B9}", +'8083' => "\x{1F23}\x{03B9}", +'8084' => "\x{1F24}\x{03B9}", +'8085' => "\x{1F25}\x{03B9}", +'8086' => "\x{1F26}\x{03B9}", +'8087' => "\x{1F27}\x{03B9}", +'8088' => "\x{1F20}\x{03B9}", +'8089' => "\x{1F21}\x{03B9}", +'8090' => "\x{1F22}\x{03B9}", +'8091' => "\x{1F23}\x{03B9}", +'8092' => "\x{1F24}\x{03B9}", +'8093' => "\x{1F25}\x{03B9}", +'8094' => "\x{1F26}\x{03B9}", +'8095' => "\x{1F27}\x{03B9}", +'8096' => "\x{1F60}\x{03B9}", +'8097' => "\x{1F61}\x{03B9}", +'8098' => "\x{1F62}\x{03B9}", +'8099' => "\x{1F63}\x{03B9}", +'8100' => "\x{1F64}\x{03B9}", +'8101' => "\x{1F65}\x{03B9}", +'8102' => "\x{1F66}\x{03B9}", +'8103' => "\x{1F67}\x{03B9}", +'8104' => "\x{1F60}\x{03B9}", +'8105' => "\x{1F61}\x{03B9}", +'8106' => "\x{1F62}\x{03B9}", +'8107' => "\x{1F63}\x{03B9}", +'8108' => "\x{1F64}\x{03B9}", +'8109' => "\x{1F65}\x{03B9}", +'8110' => "\x{1F66}\x{03B9}", +'8111' => "\x{1F67}\x{03B9}", +'8114' => "\x{1F70}\x{03B9}", +'8115' => "\x{03B1}\x{03B9}", +'8116' => "\x{03AC}\x{03B9}", +'8118' => "\x{03B1}\x{0342}", +'8119' => "\x{03B1}\x{0342}\x{03B9}", +'8124' => "\x{03B1}\x{03B9}", +'8130' => "\x{1F74}\x{03B9}", +'8131' => "\x{03B7}\x{03B9}", +'8132' => "\x{03AE}\x{03B9}", +'8134' => "\x{03B7}\x{0342}", +'8135' => "\x{03B7}\x{0342}\x{03B9}", +'8140' => "\x{03B7}\x{03B9}", +'8146' => "\x{03B9}\x{0308}\x{0300}", +'8147' => "\x{03B9}\x{0308}\x{0301}", +'8150' => "\x{03B9}\x{0342}", +'8151' => "\x{03B9}\x{0308}\x{0342}", +'8162' => "\x{03C5}\x{0308}\x{0300}", +'8163' => "\x{03C5}\x{0308}\x{0301}", +'8164' => "\x{03C1}\x{0313}", +'8166' => "\x{03C5}\x{0342}", +'8167' => "\x{03C5}\x{0308}\x{0342}", +'8178' => "\x{1F7C}\x{03B9}", +'8179' => "\x{03C9}\x{03B9}", +'8180' => "\x{03CE}\x{03B9}", +'8182' => "\x{03C9}\x{0342}", +'8183' => "\x{03C9}\x{0342}\x{03B9}", +'8188' => "\x{03C9}\x{03B9}", +'64256' => "\x{0066}\x{0066}", +'64257' => "\x{0066}\x{0069}", +'64258' => "\x{0066}\x{006C}", +'64259' => "\x{0066}\x{0066}\x{0069}", +'64260' => "\x{0066}\x{0066}\x{006C}", +'64261' => "\x{0073}\x{0074}", +'64262' => "\x{0073}\x{0074}", +'64275' => "\x{0574}\x{0576}", +'64276' => "\x{0574}\x{0565}", +'64277' => "\x{0574}\x{056B}", +'64278' => "\x{057E}\x{0576}", +'64279' => "\x{0574}\x{056D}", +); + +return <<'END'; +0041 0061 +0042 0062 +0043 0063 +0044 0064 +0045 0065 +0046 0066 +0047 0067 +0048 0068 +0049 0069 +004A 006A +004B 006B +004C 006C +004D 006D +004E 006E +004F 006F +0050 0070 +0051 0071 +0052 0072 +0053 0073 +0054 0074 +0055 0075 +0056 0076 +0057 0077 +0058 0078 +0059 0079 +005A 007A +00B5 03BC +00C0 00E0 +00C1 00E1 +00C2 00E2 +00C3 00E3 +00C4 00E4 +00C5 00E5 +00C6 00E6 +00C7 00E7 +00C8 00E8 +00C9 00E9 +00CA 00EA +00CB 00EB +00CC 00EC +00CD 00ED +00CE 00EE +00CF 00EF +00D0 00F0 +00D1 00F1 +00D2 00F2 +00D3 00F3 +00D4 00F4 +00D5 00F5 +00D6 00F6 +00D8 00F8 +00D9 00F9 +00DA 00FA +00DB 00FB +00DC 00FC +00DD 00FD +00DE 00FE +0100 0101 +0102 0103 +0104 0105 +0106 0107 +0108 0109 +010A 010B +010C 010D +010E 010F +0110 0111 +0112 0113 +0114 0115 +0116 0117 +0118 0119 +011A 011B +011C 011D +011E 011F +0120 0121 +0122 0123 +0124 0125 +0126 0127 +0128 0129 +012A 012B +012C 012D +012E 012F +0132 0133 +0134 0135 +0136 0137 +0139 013A +013B 013C +013D 013E +013F 0140 +0141 0142 +0143 0144 +0145 0146 +0147 0148 +014A 014B +014C 014D +014E 014F +0150 0151 +0152 0153 +0154 0155 +0156 0157 +0158 0159 +015A 015B +015C 015D +015E 015F +0160 0161 +0162 0163 +0164 0165 +0166 0167 +0168 0169 +016A 016B +016C 016D +016E 016F +0170 0171 +0172 0173 +0174 0175 +0176 0177 +0178 00FF +0179 017A +017B 017C +017D 017E +017F 0073 +0181 0253 +0182 0183 +0184 0185 +0186 0254 +0187 0188 +0189 0256 +018A 0257 +018B 018C +018E 01DD +018F 0259 +0190 025B +0191 0192 +0193 0260 +0194 0263 +0196 0269 +0197 0268 +0198 0199 +019C 026F +019D 0272 +019F 0275 +01A0 01A1 +01A2 01A3 +01A4 01A5 +01A6 0280 +01A7 01A8 +01A9 0283 +01AC 01AD +01AE 0288 +01AF 01B0 +01B1 028A +01B2 028B +01B3 01B4 +01B5 01B6 +01B7 0292 +01B8 01B9 +01BC 01BD +01C4 01C5 01C6 +01C7 01C8 01C9 +01CA 01CB 01CC +01CD 01CE +01CF 01D0 +01D1 01D2 +01D3 01D4 +01D5 01D6 +01D7 01D8 +01D9 01DA +01DB 01DC +01DE 01DF +01E0 01E1 +01E2 01E3 +01E4 01E5 +01E6 01E7 +01E8 01E9 +01EA 01EB +01EC 01ED +01EE 01EF +01F1 01F2 01F3 +01F4 01F5 +01F6 0195 +01F7 01BF +01F8 01F9 +01FA 01FB +01FC 01FD +01FE 01FF +0200 0201 +0202 0203 +0204 0205 +0206 0207 +0208 0209 +020A 020B +020C 020D +020E 020F +0210 0211 +0212 0213 +0214 0215 +0216 0217 +0218 0219 +021A 021B +021C 021D +021E 021F +0222 0223 +0224 0225 +0226 0227 +0228 0229 +022A 022B +022C 022D +022E 022F +0230 0231 +0232 0233 +0345 03B9 +0386 03AC +0388 03AD +0389 03AE +038A 03AF +038C 03CC +038E 03CD +038F 03CE +0391 03B1 +0392 03B2 +0393 03B3 +0394 03B4 +0395 03B5 +0396 03B6 +0397 03B7 +0398 03B8 +0399 03B9 +039A 03BA +039B 03BB +039C 03BC +039D 03BD +039E 03BE +039F 03BF +03A0 03C0 +03A1 03C1 +03A3 03C3 +03A4 03C4 +03A5 03C5 +03A6 03C6 +03A7 03C7 +03A8 03C8 +03A9 03C9 +03AA 03CA +03AB 03CB +03C2 03C3 +03D0 03B2 +03D1 03B8 +03D5 03C6 +03D6 03C0 +03DA 03DB +03DC 03DD +03DE 03DF +03E0 03E1 +03E2 03E3 +03E4 03E5 +03E6 03E7 +03E8 03E9 +03EA 03EB +03EC 03ED +03EE 03EF +03F0 03BA +03F1 03C1 +03F2 03C3 +03F4 03B8 +03F5 03B5 +0400 0450 +0401 0451 +0402 0452 +0403 0453 +0404 0454 +0405 0455 +0406 0456 +0407 0457 +0408 0458 +0409 0459 +040A 045A +040B 045B +040C 045C +040D 045D +040E 045E +040F 045F +0410 0430 +0411 0431 +0412 0432 +0413 0433 +0414 0434 +0415 0435 +0416 0436 +0417 0437 +0418 0438 +0419 0439 +041A 043A +041B 043B +041C 043C +041D 043D +041E 043E +041F 043F +0420 0440 +0421 0441 +0422 0442 +0423 0443 +0424 0444 +0425 0445 +0426 0446 +0427 0447 +0428 0448 +0429 0449 +042A 044A +042B 044B +042C 044C +042D 044D +042E 044E +042F 044F +0460 0461 +0462 0463 +0464 0465 +0466 0467 +0468 0469 +046A 046B +046C 046D +046E 046F +0470 0471 +0472 0473 +0474 0475 +0476 0477 +0478 0479 +047A 047B +047C 047D +047E 047F +0480 0481 +048C 048D +048E 048F +0490 0491 +0492 0493 +0494 0495 +0496 0497 +0498 0499 +049A 049B +049C 049D +049E 049F +04A0 04A1 +04A2 04A3 +04A4 04A5 +04A6 04A7 +04A8 04A9 +04AA 04AB +04AC 04AD +04AE 04AF +04B0 04B1 +04B2 04B3 +04B4 04B5 +04B6 04B7 +04B8 04B9 +04BA 04BB +04BC 04BD +04BE 04BF +04C1 04C2 +04C3 04C4 +04C7 04C8 +04CB 04CC +04D0 04D1 +04D2 04D3 +04D4 04D5 +04D6 04D7 +04D8 04D9 +04DA 04DB +04DC 04DD +04DE 04DF +04E0 04E1 +04E2 04E3 +04E4 04E5 +04E6 04E7 +04E8 04E9 +04EA 04EB +04EC 04ED +04EE 04EF +04F0 04F1 +04F2 04F3 +04F4 04F5 +04F8 04F9 +0531 0561 +0532 0562 +0533 0563 +0534 0564 +0535 0565 +0536 0566 +0537 0567 +0538 0568 +0539 0569 +053A 056A +053B 056B +053C 056C +053D 056D +053E 056E +053F 056F +0540 0570 +0541 0571 +0542 0572 +0543 0573 +0544 0574 +0545 0575 +0546 0576 +0547 0577 +0548 0578 +0549 0579 +054A 057A +054B 057B +054C 057C +054D 057D +054E 057E +054F 057F +0550 0580 +0551 0581 +0552 0582 +0553 0583 +0554 0584 +0555 0585 +0556 0586 +1E00 1E01 +1E02 1E03 +1E04 1E05 +1E06 1E07 +1E08 1E09 +1E0A 1E0B +1E0C 1E0D +1E0E 1E0F +1E10 1E11 +1E12 1E13 +1E14 1E15 +1E16 1E17 +1E18 1E19 +1E1A 1E1B +1E1C 1E1D +1E1E 1E1F +1E20 1E21 +1E22 1E23 +1E24 1E25 +1E26 1E27 +1E28 1E29 +1E2A 1E2B +1E2C 1E2D +1E2E 1E2F +1E30 1E31 +1E32 1E33 +1E34 1E35 +1E36 1E37 +1E38 1E39 +1E3A 1E3B +1E3C 1E3D +1E3E 1E3F +1E40 1E41 +1E42 1E43 +1E44 1E45 +1E46 1E47 +1E48 1E49 +1E4A 1E4B +1E4C 1E4D +1E4E 1E4F +1E50 1E51 +1E52 1E53 +1E54 1E55 +1E56 1E57 +1E58 1E59 +1E5A 1E5B +1E5C 1E5D +1E5E 1E5F +1E60 1E61 +1E62 1E63 +1E64 1E65 +1E66 1E67 +1E68 1E69 +1E6A 1E6B +1E6C 1E6D +1E6E 1E6F +1E70 1E71 +1E72 1E73 +1E74 1E75 +1E76 1E77 +1E78 1E79 +1E7A 1E7B +1E7C 1E7D +1E7E 1E7F +1E80 1E81 +1E82 1E83 +1E84 1E85 +1E86 1E87 +1E88 1E89 +1E8A 1E8B +1E8C 1E8D +1E8E 1E8F +1E90 1E91 +1E92 1E93 +1E94 1E95 +1E9B 1E61 +1EA0 1EA1 +1EA2 1EA3 +1EA4 1EA5 +1EA6 1EA7 +1EA8 1EA9 +1EAA 1EAB +1EAC 1EAD +1EAE 1EAF +1EB0 1EB1 +1EB2 1EB3 +1EB4 1EB5 +1EB6 1EB7 +1EB8 1EB9 +1EBA 1EBB +1EBC 1EBD +1EBE 1EBF +1EC0 1EC1 +1EC2 1EC3 +1EC4 1EC5 +1EC6 1EC7 +1EC8 1EC9 +1ECA 1ECB +1ECC 1ECD +1ECE 1ECF +1ED0 1ED1 +1ED2 1ED3 +1ED4 1ED5 +1ED6 1ED7 +1ED8 1ED9 +1EDA 1EDB +1EDC 1EDD +1EDE 1EDF +1EE0 1EE1 +1EE2 1EE3 +1EE4 1EE5 +1EE6 1EE7 +1EE8 1EE9 +1EEA 1EEB +1EEC 1EED +1EEE 1EEF +1EF0 1EF1 +1EF2 1EF3 +1EF4 1EF5 +1EF6 1EF7 +1EF8 1EF9 +1F08 1F00 +1F09 1F01 +1F0A 1F02 +1F0B 1F03 +1F0C 1F04 +1F0D 1F05 +1F0E 1F06 +1F0F 1F07 +1F18 1F10 +1F19 1F11 +1F1A 1F12 +1F1B 1F13 +1F1C 1F14 +1F1D 1F15 +1F28 1F20 +1F29 1F21 +1F2A 1F22 +1F2B 1F23 +1F2C 1F24 +1F2D 1F25 +1F2E 1F26 +1F2F 1F27 +1F38 1F30 +1F39 1F31 +1F3A 1F32 +1F3B 1F33 +1F3C 1F34 +1F3D 1F35 +1F3E 1F36 +1F3F 1F37 +1F48 1F40 +1F49 1F41 +1F4A 1F42 +1F4B 1F43 +1F4C 1F44 +1F4D 1F45 +1F59 1F51 +1F5B 1F53 +1F5D 1F55 +1F5F 1F57 +1F68 1F60 +1F69 1F61 +1F6A 1F62 +1F6B 1F63 +1F6C 1F64 +1F6D 1F65 +1F6E 1F66 +1F6F 1F67 +1FB8 1FB0 +1FB9 1FB1 +1FBA 1F70 +1FBB 1F71 +1FBE 03B9 +1FC8 1F72 +1FC9 1F73 +1FCA 1F74 +1FCB 1F75 +1FD8 1FD0 +1FD9 1FD1 +1FDA 1F76 +1FDB 1F77 +1FE8 1FE0 +1FE9 1FE1 +1FEA 1F7A +1FEB 1F7B +1FEC 1FE5 +1FF8 1F78 +1FF9 1F79 +1FFA 1F7C +1FFB 1F7D +2126 03C9 +212A 006B +212B 00E5 +2160 2170 +2161 2171 +2162 2172 +2163 2173 +2164 2174 +2165 2175 +2166 2176 +2167 2177 +2168 2178 +2169 2179 +216A 217A +216B 217B +216C 217C +216D 217D +216E 217E +216F 217F +24B6 24D0 +24B7 24D1 +24B8 24D2 +24B9 24D3 +24BA 24D4 +24BB 24D5 +24BC 24D6 +24BD 24D7 +24BE 24D8 +24BF 24D9 +24C0 24DA +24C1 24DB +24C2 24DC +24C3 24DD +24C4 24DE +24C5 24DF +24C6 24E0 +24C7 24E1 +24C8 24E2 +24C9 24E3 +24CA 24E4 +24CB 24E5 +24CC 24E6 +24CD 24E7 +24CE 24E8 +24CF 24E9 +FF21 FF41 +FF22 FF42 +FF23 FF43 +FF24 FF44 +FF25 FF45 +FF26 FF46 +FF27 FF47 +FF28 FF48 +FF29 FF49 +FF2A FF4A +FF2B FF4B +FF2C FF4C +FF2D FF4D +FF2E FF4E +FF2F FF4F +FF30 FF50 +FF31 FF51 +FF32 FF52 +FF33 FF53 +FF34 FF54 +FF35 FF55 +FF36 FF56 +FF37 FF57 +FF38 FF58 +FF39 FF59 +FF3A FF5A +10400 10428 +10401 10429 +10402 1042A +10403 1042B +10404 1042C +10405 1042D +10406 1042E +10407 1042F +10408 10430 +10409 10431 +1040A 10432 +1040B 10433 +1040C 10434 +1040D 10435 +1040E 10436 +1040F 10437 +10410 10438 +10411 10439 +10412 1043A +10413 1043B +10414 1043C +10415 1043D +10416 1043E +10417 1043F +10418 10440 +10419 10441 +1041A 10442 +1041B 10443 +1041C 10444 +1041D 10445 +1041E 10446 +1041F 10447 +10420 10448 +10421 10449 +10422 1044A +10423 1044B +10424 1044C +10425 1044D +END diff --git a/lib/unicore/mktables b/lib/unicore/mktables index 7d8912d..ea04974 100644 --- a/lib/unicore/mktables +++ b/lib/unicore/mktables @@ -748,7 +748,7 @@ if (open(my $SpecCase, "SpecCase.txt")) { # Prepend them to the To/{Upper,Lower,Title}.pl. for my $case (qw(Lower Title Upper)) { - my $NormalCase = do "To/$case.pl"; + my $NormalCase = do "To/$case.pl" || die "$0: To/$case.pl: $!\n"; if (open(my $Case, ">To/$case.pl")) { header($Case); print $Case <) { + next unless /^([0-9A-Fa-f]+)\s*;\s*([CFI])\s*;\s*([0-9A-Fa-f]+(?: [0-9A-Fa-f]+)*)\s*;/; + + my ($code, $status, $fold) = ($1, $2, $3); + + if ($status eq 'C') { # Common: one-to-one folding + append(\@Fold, $code, $fold); + } else { # F: full, or I: dotted uppercase I -> dotless lowercase I + $Fold{hex($code)} = $fold; + } + } + + flush(\@Fold, "To/Fold.pl"); + + # + # Prepend the special foldings to the common foldings. + # + + my $CommonFold = do "To/Fold.pl" || die "$0: To/Fold.pl: $!\n"; + if (open(my $Fold, ">To/Fold.pl")) { + header($Fold); + print $Fold < $b } keys %Fold) { + my $foldstr = + join "", map { sprintf "\\x{%s}", $_ } split ' ', $Fold{$code}; + print $Fold qq['$code' => "$foldstr",\n]; + } + print $Fold <