From: SADAHIRO Tomoyuki Date: Fri, 16 Nov 2001 23:31:04 +0000 (+0900) Subject: Unicode General Category: Cn X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=c3a8a2b80b1e078151fa0dd5467f616ad64a998e;p=p5sagit%2Fp5-mst-13.2.git Unicode General Category: Cn Message-Id: <20011116232709.23CF.BQW10602@nifty.com> (with the tweak that Cntrl is not C, it's Cc) p4raw-id: //depot/perl@13045 --- diff --git a/lib/unicore/In/163.pl b/lib/unicore/In/163.pl index ede4971..dd91382 100644 --- a/lib/unicore/In/163.pl +++ b/lib/unicore/In/163.pl @@ -2,22 +2,395 @@ # This file is built by mktables from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -0000 FDCF -FDF0 FFFD -10000 1FFFD -20000 2FFFD -30000 3FFFD -40000 4FFFD -50000 5FFFD -60000 6FFFD -70000 7FFFD -80000 8FFFD -90000 9FFFD -A0000 AFFFD -B0000 BFFFD -C0000 CFFFD -D0000 DFFFD -E0000 EFFFD +0000 021F +0222 0233 +0250 02AD +02B0 02EE +0300 034E +0360 0362 +0374 0375 +037A +037E +0384 038A +038C +038E 03A1 +03A3 03CE +03D0 03D7 +03DA 03F5 +0400 0486 +0488 0489 +048C 04C4 +04C7 04C8 +04CB 04CC +04D0 04F5 +04F8 04F9 +0531 0556 +0559 055F +0561 0587 +0589 058A +0591 05A1 +05A3 05B9 +05BB 05C4 +05D0 05EA +05F0 05F4 +060C +061B +061F +0621 063A +0640 0655 +0660 066D +0670 06ED +06F0 06FE +0700 070D +070F 072C +0730 074A +0780 07B0 +0901 0903 +0905 0939 +093C 094D +0950 0954 +0958 0970 +0981 0983 +0985 098C +098F 0990 +0993 09A8 +09AA 09B0 +09B2 +09B6 09B9 +09BC +09BE 09C4 +09C7 09C8 +09CB 09CD +09D7 +09DC 09DD +09DF 09E3 +09E6 09FA +0A02 +0A05 0A0A +0A0F 0A10 +0A13 0A28 +0A2A 0A30 +0A32 0A33 +0A35 0A36 +0A38 0A39 +0A3C +0A3E 0A42 +0A47 0A48 +0A4B 0A4D +0A59 0A5C +0A5E +0A66 0A74 +0A81 0A83 +0A85 0A8B +0A8D +0A8F 0A91 +0A93 0AA8 +0AAA 0AB0 +0AB2 0AB3 +0AB5 0AB9 +0ABC 0AC5 +0AC7 0AC9 +0ACB 0ACD +0AD0 +0AE0 +0AE6 0AEF +0B01 0B03 +0B05 0B0C +0B0F 0B10 +0B13 0B28 +0B2A 0B30 +0B32 0B33 +0B36 0B39 +0B3C 0B43 +0B47 0B48 +0B4B 0B4D +0B56 0B57 +0B5C 0B5D +0B5F 0B61 +0B66 0B70 +0B82 0B83 +0B85 0B8A +0B8E 0B90 +0B92 0B95 +0B99 0B9A +0B9C +0B9E 0B9F +0BA3 0BA4 +0BA8 0BAA +0BAE 0BB5 +0BB7 0BB9 +0BBE 0BC2 +0BC6 0BC8 +0BCA 0BCD +0BD7 +0BE7 0BF2 +0C01 0C03 +0C05 0C0C +0C0E 0C10 +0C12 0C28 +0C2A 0C33 +0C35 0C39 +0C3E 0C44 +0C46 0C48 +0C4A 0C4D +0C55 0C56 +0C60 0C61 +0C66 0C6F +0C82 0C83 +0C85 0C8C +0C8E 0C90 +0C92 0CA8 +0CAA 0CB3 +0CB5 0CB9 +0CBE 0CC4 +0CC6 0CC8 +0CCA 0CCD +0CD5 0CD6 +0CDE +0CE0 0CE1 +0CE6 0CEF +0D02 0D03 +0D05 0D0C +0D0E 0D10 +0D12 0D28 +0D2A 0D39 +0D3E 0D43 +0D46 0D48 +0D4A 0D4D +0D57 +0D60 0D61 +0D66 0D6F +0D82 0D83 +0D85 0D96 +0D9A 0DB1 +0DB3 0DBB +0DBD +0DC0 0DC6 +0DCA +0DCF 0DD4 +0DD6 +0DD8 0DDF +0DF2 0DF4 +0E01 0E3A +0E3F 0E5B +0E81 0E82 +0E84 +0E87 0E88 +0E8A +0E8D +0E94 0E97 +0E99 0E9F +0EA1 0EA3 +0EA5 +0EA7 +0EAA 0EAB +0EAD 0EB9 +0EBB 0EBD +0EC0 0EC4 +0EC6 +0EC8 0ECD +0ED0 0ED9 +0EDC 0EDD +0F00 0F47 +0F49 0F6A +0F71 0F8B +0F90 0F97 +0F99 0FBC +0FBE 0FCC +0FCF +1000 1021 +1023 1027 +1029 102A +102C 1032 +1036 1039 +1040 1059 +10A0 10C5 +10D0 10F6 +10FB +1100 1159 +115F 11A2 +11A8 11F9 +1200 1206 +1208 1246 +1248 +124A 124D +1250 1256 +1258 +125A 125D +1260 1286 +1288 +128A 128D +1290 12AE +12B0 +12B2 12B5 +12B8 12BE +12C0 +12C2 12C5 +12C8 12CE +12D0 12D6 +12D8 12EE +12F0 130E +1310 +1312 1315 +1318 131E +1320 1346 +1348 135A +1361 137C +13A0 13F4 +1401 1676 +1680 169C +16A0 16F0 +1780 17DC +17E0 17E9 +1800 180E +1810 1819 +1820 1877 +1880 18A9 +1E00 1E9B +1EA0 1EF9 +1F00 1F15 +1F18 1F1D +1F20 1F45 +1F48 1F4D +1F50 1F57 +1F59 +1F5B +1F5D +1F5F 1F7D +1F80 1FB4 +1FB6 1FC4 +1FC6 1FD3 +1FD6 1FDB +1FDD 1FEF +1FF2 1FF4 +1FF6 1FFE +2000 2046 +2048 204D +206A 2070 +2074 208E +20A0 20AF +20D0 20E3 +2100 213A +2153 2183 +2190 21F3 +2200 22F1 +2300 237B +237D 239A +2400 2426 +2440 244A +2460 24EA +2500 2595 +25A0 25F7 +2600 2613 +2619 2671 +2701 2704 +2706 2709 +270C 2727 +2729 274B +274D +274F 2752 +2756 +2758 275E +2761 2767 +2776 2794 +2798 27AF +27B1 27BE +2800 28FF +2E80 2E99 +2E9B 2EF3 +2F00 2FD5 +2FF0 2FFB +3000 303A +303E 303F +3041 3094 +3099 309E +30A1 30FE +3105 312C +3131 318E +3190 31B7 +3200 321C +3220 3243 +3260 327B +327F 32B0 +32C0 32CB +32D0 32FE +3300 3376 +337B 33DD +33E0 33FE +3400 4DB5 +4E00 9FA5 +A000 A48C +A490 A4A1 +A4A4 A4B3 +A4B5 A4C0 +A4C2 A4C4 +A4C6 +AC00 D7A3 +D800 FA2D +FB00 FB06 +FB13 FB17 +FB1D FB36 +FB38 FB3C +FB3E +FB40 FB41 +FB43 FB44 +FB46 FBB1 +FBD3 FD3F +FD50 FD8F +FD92 FDC7 +FDF0 FDFB +FE20 FE23 +FE30 FE44 +FE49 FE52 +FE54 FE66 +FE68 FE6B +FE70 FE72 +FE74 +FE76 FEFC +FEFF +FF01 FF5E +FF61 FFBE +FFC2 FFC7 +FFCA FFCF +FFD2 FFD7 +FFDA FFDC +FFE0 FFE6 +FFE8 FFEE +FFF9 FFFD +10300 1031E +10320 10323 +10330 1034A +10400 10425 +10428 1044D +1D000 1D0F5 +1D100 1D126 +1D12A 1D1DD +1D400 1D454 +1D456 1D49C +1D49E 1D49F +1D4A2 +1D4A5 1D4A6 +1D4A9 1D4AC +1D4AE 1D4B9 +1D4BB +1D4BD 1D4C0 +1D4C2 1D4C3 +1D4C5 1D505 +1D507 1D50A +1D50D 1D514 +1D516 1D51C +1D51E 1D539 +1D53B 1D53E +1D540 1D544 +1D546 +1D54A 1D550 +1D552 1D6A3 +1D6A8 1D7C9 +1D7CE 1D7FF +20000 2A6D6 +2F800 2FA1D +E0001 +E0020 E007F F0000 FFFFD 100000 10FFFD END diff --git a/lib/unicore/Is/C.pl b/lib/unicore/Is/C.pl index b58d48d..3b29608 100644 --- a/lib/unicore/Is/C.pl +++ b/lib/unicore/Is/C.pl @@ -4,17 +4,393 @@ return <<'END'; 0000 001F 007F 009F -070F -180B 180E +0220 0221 +0234 024F +02AE 02AF +02EF 02FF +034F 035F +0363 0373 +0376 0379 +037B 037D +037F 0383 +038B +038D +03A2 +03CF +03D8 03D9 +03F6 03FF +0487 +048A 048B +04C5 04C6 +04C9 04CA +04CD 04CF +04F6 04F7 +04FA 0530 +0557 0558 +0560 +0588 +058B 0590 +05A2 +05BA +05C5 05CF +05EB 05EF +05F5 060B +060D 061A +061C 061E +0620 +063B 063F +0656 065F +066E 066F +06EE 06EF +06FF +070E 070F +072D 072F +074B 077F +07B1 0900 +0904 +093A 093B +094E 094F +0955 0957 +0971 0980 +0984 +098D 098E +0991 0992 +09A9 +09B1 +09B3 09B5 +09BA 09BB +09BD +09C5 09C6 +09C9 09CA +09CE 09D6 +09D8 09DB +09DE +09E4 09E5 +09FB 0A01 +0A03 0A04 +0A0B 0A0E +0A11 0A12 +0A29 +0A31 +0A34 +0A37 +0A3A 0A3B +0A3D +0A43 0A46 +0A49 0A4A +0A4E 0A58 +0A5D +0A5F 0A65 +0A75 0A80 +0A84 +0A8C +0A8E +0A92 +0AA9 +0AB1 +0AB4 +0ABA 0ABB +0AC6 +0ACA +0ACE 0ACF +0AD1 0ADF +0AE1 0AE5 +0AF0 0B00 +0B04 +0B0D 0B0E +0B11 0B12 +0B29 +0B31 +0B34 0B35 +0B3A 0B3B +0B44 0B46 +0B49 0B4A +0B4E 0B55 +0B58 0B5B +0B5E +0B62 0B65 +0B71 0B81 +0B84 +0B8B 0B8D +0B91 +0B96 0B98 +0B9B +0B9D +0BA0 0BA2 +0BA5 0BA7 +0BAB 0BAD +0BB6 +0BBA 0BBD +0BC3 0BC5 +0BC9 +0BCE 0BD6 +0BD8 0BE6 +0BF3 0C00 +0C04 +0C0D +0C11 +0C29 +0C34 +0C3A 0C3D +0C45 +0C49 +0C4E 0C54 +0C57 0C5F +0C62 0C65 +0C70 0C81 +0C84 +0C8D +0C91 +0CA9 +0CB4 +0CBA 0CBD +0CC5 +0CC9 +0CCE 0CD4 +0CD7 0CDD +0CDF +0CE2 0CE5 +0CF0 0D01 +0D04 +0D0D +0D11 +0D29 +0D3A 0D3D +0D44 0D45 +0D49 +0D4E 0D56 +0D58 0D5F +0D62 0D65 +0D70 0D81 +0D84 +0D97 0D99 +0DB2 +0DBC +0DBE 0DBF +0DC7 0DC9 +0DCB 0DCE +0DD5 +0DD7 +0DE0 0DF1 +0DF5 0E00 +0E3B 0E3E +0E5C 0E80 +0E83 +0E85 0E86 +0E89 +0E8B 0E8C +0E8E 0E93 +0E98 +0EA0 +0EA4 +0EA6 +0EA8 0EA9 +0EAC +0EBA +0EBE 0EBF +0EC5 +0EC7 +0ECE 0ECF +0EDA 0EDB +0EDE 0EFF +0F48 +0F6B 0F70 +0F8C 0F8F +0F98 +0FBD +0FCD 0FCE +0FD0 0FFF +1022 +1028 +102B +1033 1035 +103A 103F +105A 109F +10C6 10CF +10F7 10FA +10FC 10FF +115A 115E +11A3 11A7 +11FA 11FF +1207 +1247 +1249 +124E 124F +1257 +1259 +125E 125F +1287 +1289 +128E 128F +12AF +12B1 +12B6 12B7 +12BF +12C1 +12C6 12C7 +12CF +12D7 +12EF +130F +1311 +1316 1317 +131F +1347 +135B 1360 +137D 139F +13F5 1400 +1677 167F +169D 169F +16F1 177F +17DD 17DF +17EA 17FF +180B 180F +181A 181F +1878 187F +18AA 1DFF +1E9C 1E9F +1EFA 1EFF +1F16 1F17 +1F1E 1F1F +1F46 1F47 +1F4E 1F4F +1F58 +1F5A +1F5C +1F5E +1F7E 1F7F +1FB5 +1FC5 +1FD4 1FD5 +1FDC +1FF0 1FF1 +1FF5 +1FFF 200C 200F 202A 202E -206A 206F -D800 F8FF -FEFF -FFF9 FFFB +2047 +204E 206F +2071 2073 +208F 209F +20B0 20CF +20E4 20FF +213B 2152 +2184 218F +21F4 21FF +22F2 22FF +237C +239B 23FF +2427 243F +244B 245F +24EB 24FF +2596 259F +25F8 25FF +2614 2618 +2672 2700 +2705 +270A 270B +2728 +274C +274E +2753 2755 +2757 +275F 2760 +2768 2775 +2795 2797 +27B0 +27BF 27FF +2900 2E7F +2E9A +2EF4 2EFF +2FD6 2FEF +2FFC 2FFF +303B 303D +3040 +3095 3098 +309F 30A0 +30FF 3104 +312D 3130 +318F +31B8 31FF +321D 321F +3244 325F +327C 327E +32B1 32BF +32CC 32CF +32FF +3377 337A +33DE 33DF +33FF +4DB6 4DFF +9FA6 9FFF +A48D A48F +A4A2 A4A3 +A4B4 +A4C1 +A4C5 +A4C7 ABFF +D7A4 F8FF +FA2E FAFF +FB07 FB12 +FB18 FB1C +FB37 +FB3D +FB3F +FB42 +FB45 +FBB2 FBD2 +FD40 FD4F +FD90 FD91 +FDC8 FDEF +FDFC FE1F +FE24 FE2F +FE45 FE48 +FE53 +FE67 +FE6C FE6F +FE73 +FE75 +FEFD FF00 +FF5F FF60 +FFBF FFC1 +FFC8 FFC9 +FFD0 FFD1 +FFD8 FFD9 +FFDD FFDF +FFE7 +FFEF FFFB +FFFE 102FF +1031F +10324 1032F +1034B 103FF +10426 10427 +1044E 1CFFF +1D0F6 1D0FF +1D127 1D129 1D173 1D17A -E0001 -E0020 E007F -F0000 FFFFD -100000 10FFFD +1D1DE 1D3FF +1D455 +1D49D +1D4A0 1D4A1 +1D4A3 1D4A4 +1D4A7 1D4A8 +1D4AD +1D4BA +1D4BC +1D4C1 +1D4C4 +1D506 +1D50B 1D50C +1D515 +1D51D +1D53A +1D53F +1D545 +1D547 1D549 +1D551 +1D6A4 1D6A7 +1D7CA 1D7CD +1D800 1FFFF +2A6D7 2F7FF +2FA1E 10FFFF END diff --git a/lib/unicore/Is/Cn.pl b/lib/unicore/Is/Cn.pl index fe9a619..d730353 100644 --- a/lib/unicore/Is/Cn.pl +++ b/lib/unicore/Is/Cn.pl @@ -2,22 +2,395 @@ # This file is built by mktables from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; -FDD0 FDEF Noncharacter_Code_Point -FFFE FFFF Noncharacter_Code_Point -1FFFE 1FFFF Noncharacter_Code_Point -2FFFE 2FFFF Noncharacter_Code_Point -3FFFE 3FFFF Noncharacter_Code_Point -4FFFE 4FFFF Noncharacter_Code_Point -5FFFE 5FFFF Noncharacter_Code_Point -6FFFE 6FFFF Noncharacter_Code_Point -7FFFE 7FFFF Noncharacter_Code_Point -8FFFE 8FFFF Noncharacter_Code_Point -9FFFE 9FFFF Noncharacter_Code_Point -AFFFE AFFFF Noncharacter_Code_Point -BFFFE BFFFF Noncharacter_Code_Point -CFFFE CFFFF Noncharacter_Code_Point -DFFFE DFFFF Noncharacter_Code_Point -EFFFE EFFFF Noncharacter_Code_Point -FFFFE FFFFF Noncharacter_Code_Point -10FFFE 10FFFF Noncharacter_Code_Point +0220 0221 +0234 024F +02AE 02AF +02EF 02FF +034F 035F +0363 0373 +0376 0379 +037B 037D +037F 0383 +038B +038D +03A2 +03CF +03D8 03D9 +03F6 03FF +0487 +048A 048B +04C5 04C6 +04C9 04CA +04CD 04CF +04F6 04F7 +04FA 0530 +0557 0558 +0560 +0588 +058B 0590 +05A2 +05BA +05C5 05CF +05EB 05EF +05F5 060B +060D 061A +061C 061E +0620 +063B 063F +0656 065F +066E 066F +06EE 06EF +06FF +070E +072D 072F +074B 077F +07B1 0900 +0904 +093A 093B +094E 094F +0955 0957 +0971 0980 +0984 +098D 098E +0991 0992 +09A9 +09B1 +09B3 09B5 +09BA 09BB +09BD +09C5 09C6 +09C9 09CA +09CE 09D6 +09D8 09DB +09DE +09E4 09E5 +09FB 0A01 +0A03 0A04 +0A0B 0A0E +0A11 0A12 +0A29 +0A31 +0A34 +0A37 +0A3A 0A3B +0A3D +0A43 0A46 +0A49 0A4A +0A4E 0A58 +0A5D +0A5F 0A65 +0A75 0A80 +0A84 +0A8C +0A8E +0A92 +0AA9 +0AB1 +0AB4 +0ABA 0ABB +0AC6 +0ACA +0ACE 0ACF +0AD1 0ADF +0AE1 0AE5 +0AF0 0B00 +0B04 +0B0D 0B0E +0B11 0B12 +0B29 +0B31 +0B34 0B35 +0B3A 0B3B +0B44 0B46 +0B49 0B4A +0B4E 0B55 +0B58 0B5B +0B5E +0B62 0B65 +0B71 0B81 +0B84 +0B8B 0B8D +0B91 +0B96 0B98 +0B9B +0B9D +0BA0 0BA2 +0BA5 0BA7 +0BAB 0BAD +0BB6 +0BBA 0BBD +0BC3 0BC5 +0BC9 +0BCE 0BD6 +0BD8 0BE6 +0BF3 0C00 +0C04 +0C0D +0C11 +0C29 +0C34 +0C3A 0C3D +0C45 +0C49 +0C4E 0C54 +0C57 0C5F +0C62 0C65 +0C70 0C81 +0C84 +0C8D +0C91 +0CA9 +0CB4 +0CBA 0CBD +0CC5 +0CC9 +0CCE 0CD4 +0CD7 0CDD +0CDF +0CE2 0CE5 +0CF0 0D01 +0D04 +0D0D +0D11 +0D29 +0D3A 0D3D +0D44 0D45 +0D49 +0D4E 0D56 +0D58 0D5F +0D62 0D65 +0D70 0D81 +0D84 +0D97 0D99 +0DB2 +0DBC +0DBE 0DBF +0DC7 0DC9 +0DCB 0DCE +0DD5 +0DD7 +0DE0 0DF1 +0DF5 0E00 +0E3B 0E3E +0E5C 0E80 +0E83 +0E85 0E86 +0E89 +0E8B 0E8C +0E8E 0E93 +0E98 +0EA0 +0EA4 +0EA6 +0EA8 0EA9 +0EAC +0EBA +0EBE 0EBF +0EC5 +0EC7 +0ECE 0ECF +0EDA 0EDB +0EDE 0EFF +0F48 +0F6B 0F70 +0F8C 0F8F +0F98 +0FBD +0FCD 0FCE +0FD0 0FFF +1022 +1028 +102B +1033 1035 +103A 103F +105A 109F +10C6 10CF +10F7 10FA +10FC 10FF +115A 115E +11A3 11A7 +11FA 11FF +1207 +1247 +1249 +124E 124F +1257 +1259 +125E 125F +1287 +1289 +128E 128F +12AF +12B1 +12B6 12B7 +12BF +12C1 +12C6 12C7 +12CF +12D7 +12EF +130F +1311 +1316 1317 +131F +1347 +135B 1360 +137D 139F +13F5 1400 +1677 167F +169D 169F +16F1 177F +17DD 17DF +17EA 17FF +180F +181A 181F +1878 187F +18AA 1DFF +1E9C 1E9F +1EFA 1EFF +1F16 1F17 +1F1E 1F1F +1F46 1F47 +1F4E 1F4F +1F58 +1F5A +1F5C +1F5E +1F7E 1F7F +1FB5 +1FC5 +1FD4 1FD5 +1FDC +1FF0 1FF1 +1FF5 +1FFF +2047 +204E 2069 +2071 2073 +208F 209F +20B0 20CF +20E4 20FF +213B 2152 +2184 218F +21F4 21FF +22F2 22FF +237C +239B 23FF +2427 243F +244B 245F +24EB 24FF +2596 259F +25F8 25FF +2614 2618 +2672 2700 +2705 +270A 270B +2728 +274C +274E +2753 2755 +2757 +275F 2760 +2768 2775 +2795 2797 +27B0 +27BF 27FF +2900 2E7F +2E9A +2EF4 2EFF +2FD6 2FEF +2FFC 2FFF +303B 303D +3040 +3095 3098 +309F 30A0 +30FF 3104 +312D 3130 +318F +31B8 31FF +321D 321F +3244 325F +327C 327E +32B1 32BF +32CC 32CF +32FF +3377 337A +33DE 33DF +33FF +4DB6 4DFF +9FA6 9FFF +A48D A48F +A4A2 A4A3 +A4B4 +A4C1 +A4C5 +A4C7 ABFF +D7A4 D7FF +FA2E FAFF +FB07 FB12 +FB18 FB1C +FB37 +FB3D +FB3F +FB42 +FB45 +FBB2 FBD2 +FD40 FD4F +FD90 FD91 +FDC8 FDEF +FDFC FE1F +FE24 FE2F +FE45 FE48 +FE53 +FE67 +FE6C FE6F +FE73 +FE75 +FEFD FEFE +FF00 +FF5F FF60 +FFBF FFC1 +FFC8 FFC9 +FFD0 FFD1 +FFD8 FFD9 +FFDD FFDF +FFE7 +FFEF FFF8 +FFFE 102FF +1031F +10324 1032F +1034B 103FF +10426 10427 +1044E 1CFFF +1D0F6 1D0FF +1D127 1D129 +1D1DE 1D3FF +1D455 +1D49D +1D4A0 1D4A1 +1D4A3 1D4A4 +1D4A7 1D4A8 +1D4AD +1D4BA +1D4BC +1D4C1 +1D4C4 +1D506 +1D50B 1D50C +1D515 +1D51D +1D53A +1D53F +1D545 +1D547 1D549 +1D551 +1D6A4 1D6A7 +1D7CA 1D7CD +1D800 1FFFF +2A6D7 2F7FF +2FA1E E0000 +E0002 E001F +E0080 EFFFF +FFFFE FFFFF +10FFFE 10FFFF END diff --git a/lib/unicore/mktables b/lib/unicore/mktables index 5615aee..8a95266 100644 --- a/lib/unicore/mktables +++ b/lib/unicore/mktables @@ -30,6 +30,12 @@ sub append { } } +sub append_range { + my ($table, $code_ini, $code_fin, $name) = @_; + append($table, $code_ini, $name); + extend($table, $code_fin); +} + sub inverse { my ($table) = @_; my $inverse = []; @@ -174,6 +180,30 @@ if (open(my $Unicode, "Unicode.txt")) { my @Number; my @Mirrored; my %To; + + + my $LastCodeInt = -1; # a numeric, not a hexadecimal string. + + # UnicodeData-3.1.0.html says + # no characters in the file have the property, Cn, Not Assigned. + + sub check_no_characters { # in the scope of my $LastCodeInt; + my $code = shift; + my $diff_from_last = hex($code) - $LastCodeInt; + my $code_ini = sprintf("%04X", $LastCodeInt + 1); + $LastCodeInt = hex($code); + if ($diff_from_last == 1) { + return; + } elsif ($diff_from_last == 2) { + append($Cat{Cn} ||= [], $code_ini); + append($Cat{C} ||= [], $code_ini); + } else { + my $code_fin = sprintf("%04X", hex($code) - 1); + append_range($Cat{Cn} ||= [], $code_ini, $code_fin); + append_range($Cat{C} ||= [], $code_ini, $code_fin); + } + } + while (<$Unicode>) { next unless /^[0-9A-Fa-f]+;/; s/\s+$//; @@ -184,6 +214,12 @@ if (open(my $Unicode, "Unicode.txt")) { $upper, $lower, $title) = split(/\s*;\s*/); if ($name =~ /^<(.+), (First|Last)>$/) { + if($2 eq 'First') { + check_no_characters($code); + } else { + $LastCodeInt = hex($code); + } + $name = $1; gencat(\@Name, \%General, \@General, \%Cat, $name, $cat, $code, @@ -193,6 +229,7 @@ if (open(my $Unicode, "Unicode.txt")) { $InIn{$name} = $General{$name}; } } else { + check_no_characters($code); gencat(\@Name, \%General, \@General, \%Cat, $name, $cat, $code, \&append); @@ -223,6 +260,8 @@ if (open(my $Unicode, "Unicode.txt")) { } } + check_no_characters(sprintf("%X", $LastUnicodeCodepoint + 1)); + flush(\@Name, "Name.pl"); foreach my $cat (sort keys %Cat) { @@ -467,10 +506,10 @@ for my $prop (sort { $a->[0] <=> $b->[0] } @Props) { } } -# Assigned is everything not Cn aka Noncharacter_Code_Point +# Assigned is everything not Cn $In{Assigned} = $InId++; -my $Assigned = inverse($Prop{Noncharacter_Code_Point}); +my $Assigned = inverse($Cat{Cn}); $InIn{Assigned} = $Assigned; sub merge_general_and_extended { @@ -643,10 +682,6 @@ EOT die "$0: In.pl: $!\n"; } -# Easy low-calorie cheat. -use File::Copy; -copy("In/$In{Noncharacter_Code_Point}.pl", "Is/Cn.pl"); - # # Write out the real In mappings # (the In.pl written out just above has the virtual In mappings)