X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2Funicore%2FCaseFolding.txt;h=9d28037776be4273052908fc991e846f48d22558;hb=15baf0c4b0f9a876f29eed6822a55401efbcabec;hp=f25d9bfed4dfc476c27be73fef9794de7e8d79db;hpb=a2bd7410d0183a5974c36ff0549fab8692d0713b;p=p5sagit%2Fp5-mst-13.2.git diff --git a/lib/unicore/CaseFolding.txt b/lib/unicore/CaseFolding.txt index f25d9bf..9d28037 100644 --- a/lib/unicore/CaseFolding.txt +++ b/lib/unicore/CaseFolding.txt @@ -1,8 +1,8 @@ -# CaseFolding-4.1.0.txt -# Date: 2005-03-26, 00:24:43 GMT [MD] +# CaseFolding-5.1.0.txt +# Date: 2008-03-03, 21:57:14 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2005 Unicode, Inc. +# Copyright (c) 1991-2008 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see UCD.html # @@ -23,8 +23,9 @@ # # NOTE: case folding does not preserve normalization formats! # -# For information on case folding, see -# UTR #21 Case Mappings, at http://www.unicode.org/unicode/reports/tr21/ +# For information on case folding, including how to have case folding +# preserve normalization formats, see Section 3.13 Default Case Algorithms in +# The Unicode Standard, Version 5.0. # # ================================================================================ # Format @@ -51,7 +52,7 @@ # behavior. (The default option is to exclude them.) # # ================================================================= - +# @missing 0000..10FFFF; 0041; C; 0061; # LATIN CAPITAL LETTER A 0042; C; 0062; # LATIN CAPITAL LETTER B 0043; C; 0063; # LATIN CAPITAL LETTER C @@ -272,10 +273,23 @@ 022E; C; 022F; # LATIN CAPITAL LETTER O WITH DOT ABOVE 0230; C; 0231; # LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON 0232; C; 0233; # LATIN CAPITAL LETTER Y WITH MACRON +023A; C; 2C65; # LATIN CAPITAL LETTER A WITH STROKE 023B; C; 023C; # LATIN CAPITAL LETTER C WITH STROKE 023D; C; 019A; # LATIN CAPITAL LETTER L WITH BAR -0241; C; 0294; # LATIN CAPITAL LETTER GLOTTAL STOP +023E; C; 2C66; # LATIN CAPITAL LETTER T WITH DIAGONAL STROKE +0241; C; 0242; # LATIN CAPITAL LETTER GLOTTAL STOP +0243; C; 0180; # LATIN CAPITAL LETTER B WITH STROKE +0244; C; 0289; # LATIN CAPITAL LETTER U BAR +0245; C; 028C; # LATIN CAPITAL LETTER TURNED V +0246; C; 0247; # LATIN CAPITAL LETTER E WITH STROKE +0248; C; 0249; # LATIN CAPITAL LETTER J WITH STROKE +024A; C; 024B; # LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL +024C; C; 024D; # LATIN CAPITAL LETTER R WITH STROKE +024E; C; 024F; # LATIN CAPITAL LETTER Y WITH STROKE 0345; C; 03B9; # COMBINING GREEK YPOGEGRAMMENI +0370; C; 0371; # GREEK CAPITAL LETTER HETA +0372; C; 0373; # GREEK CAPITAL LETTER ARCHAIC SAMPI +0376; C; 0377; # GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA 0386; C; 03AC; # GREEK CAPITAL LETTER ALPHA WITH TONOS 0388; C; 03AD; # GREEK CAPITAL LETTER EPSILON WITH TONOS 0389; C; 03AE; # GREEK CAPITAL LETTER ETA WITH TONOS @@ -312,6 +326,7 @@ 03AB; C; 03CB; # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA 03B0; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS 03C2; C; 03C3; # GREEK SMALL LETTER FINAL SIGMA +03CF; C; 03D7; # GREEK CAPITAL KAI SYMBOL 03D0; C; 03B2; # GREEK BETA SYMBOL 03D1; C; 03B8; # GREEK THETA SYMBOL 03D5; C; 03C6; # GREEK PHI SYMBOL @@ -335,6 +350,9 @@ 03F7; C; 03F8; # GREEK CAPITAL LETTER SHO 03F9; C; 03F2; # GREEK CAPITAL LUNATE SIGMA SYMBOL 03FA; C; 03FB; # GREEK CAPITAL LETTER SAN +03FD; C; 037B; # GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL +03FE; C; 037C; # GREEK CAPITAL DOTTED LUNATE SIGMA SYMBOL +03FF; C; 037D; # GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL 0400; C; 0450; # CYRILLIC CAPITAL LETTER IE WITH GRAVE 0401; C; 0451; # CYRILLIC CAPITAL LETTER IO 0402; C; 0452; # CYRILLIC CAPITAL LETTER DJE @@ -427,6 +445,7 @@ 04BA; C; 04BB; # CYRILLIC CAPITAL LETTER SHHA 04BC; C; 04BD; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE 04BE; C; 04BF; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER +04C0; C; 04CF; # CYRILLIC LETTER PALOCHKA 04C1; C; 04C2; # CYRILLIC CAPITAL LETTER ZHE WITH BREVE 04C3; C; 04C4; # CYRILLIC CAPITAL LETTER KA WITH HOOK 04C5; C; 04C6; # CYRILLIC CAPITAL LETTER EL WITH TAIL @@ -455,6 +474,9 @@ 04F4; C; 04F5; # CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS 04F6; C; 04F7; # CYRILLIC CAPITAL LETTER GHE WITH DESCENDER 04F8; C; 04F9; # CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS +04FA; C; 04FB; # CYRILLIC CAPITAL LETTER GHE WITH STROKE AND HOOK +04FC; C; 04FD; # CYRILLIC CAPITAL LETTER HA WITH HOOK +04FE; C; 04FF; # CYRILLIC CAPITAL LETTER HA WITH STROKE 0500; C; 0501; # CYRILLIC CAPITAL LETTER KOMI DE 0502; C; 0503; # CYRILLIC CAPITAL LETTER KOMI DJE 0504; C; 0505; # CYRILLIC CAPITAL LETTER KOMI ZJE @@ -463,6 +485,16 @@ 050A; C; 050B; # CYRILLIC CAPITAL LETTER KOMI NJE 050C; C; 050D; # CYRILLIC CAPITAL LETTER KOMI SJE 050E; C; 050F; # CYRILLIC CAPITAL LETTER KOMI TJE +0510; C; 0511; # CYRILLIC CAPITAL LETTER REVERSED ZE +0512; C; 0513; # CYRILLIC CAPITAL LETTER EL WITH HOOK +0514; C; 0515; # CYRILLIC CAPITAL LETTER LHA +0516; C; 0517; # CYRILLIC CAPITAL LETTER RHA +0518; C; 0519; # CYRILLIC CAPITAL LETTER YAE +051A; C; 051B; # CYRILLIC CAPITAL LETTER QA +051C; C; 051D; # CYRILLIC CAPITAL LETTER WE +051E; C; 051F; # CYRILLIC CAPITAL LETTER ALEUT KA +0520; C; 0521; # CYRILLIC CAPITAL LETTER EL WITH MIDDLE HOOK +0522; C; 0523; # CYRILLIC CAPITAL LETTER EN WITH MIDDLE HOOK 0531; C; 0561; # ARMENIAN CAPITAL LETTER AYB 0532; C; 0562; # ARMENIAN CAPITAL LETTER BEN 0533; C; 0563; # ARMENIAN CAPITAL LETTER GIM @@ -621,6 +653,8 @@ 1E99; F; 0079 030A; # LATIN SMALL LETTER Y WITH RING ABOVE 1E9A; F; 0061 02BE; # LATIN SMALL LETTER A WITH RIGHT HALF RING 1E9B; C; 1E61; # LATIN SMALL LETTER LONG S WITH DOT ABOVE +1E9E; F; 0073 0073; # LATIN CAPITAL LETTER SHARP S +1E9E; S; 00DF; # LATIN CAPITAL LETTER SHARP S 1EA0; C; 1EA1; # LATIN CAPITAL LETTER A WITH DOT BELOW 1EA2; C; 1EA3; # LATIN CAPITAL LETTER A WITH HOOK ABOVE 1EA4; C; 1EA5; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE @@ -666,6 +700,9 @@ 1EF4; C; 1EF5; # LATIN CAPITAL LETTER Y WITH DOT BELOW 1EF6; C; 1EF7; # LATIN CAPITAL LETTER Y WITH HOOK ABOVE 1EF8; C; 1EF9; # LATIN CAPITAL LETTER Y WITH TILDE +1EFA; C; 1EFB; # LATIN CAPITAL LETTER MIDDLE-WELSH LL +1EFC; C; 1EFD; # LATIN CAPITAL LETTER MIDDLE-WELSH V +1EFE; C; 1EFF; # LATIN CAPITAL LETTER Y WITH LOOP 1F08; C; 1F00; # GREEK CAPITAL LETTER ALPHA WITH PSILI 1F09; C; 1F01; # GREEK CAPITAL LETTER ALPHA WITH DASIA 1F0A; C; 1F02; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA @@ -845,6 +882,7 @@ 2126; C; 03C9; # OHM SIGN 212A; C; 006B; # KELVIN SIGN 212B; C; 00E5; # ANGSTROM SIGN +2132; C; 214E; # TURNED CAPITAL F 2160; C; 2170; # ROMAN NUMERAL ONE 2161; C; 2171; # ROMAN NUMERAL TWO 2162; C; 2172; # ROMAN NUMERAL THREE @@ -861,6 +899,7 @@ 216D; C; 217D; # ROMAN NUMERAL ONE HUNDRED 216E; C; 217E; # ROMAN NUMERAL FIVE HUNDRED 216F; C; 217F; # ROMAN NUMERAL ONE THOUSAND +2183; C; 2184; # ROMAN NUMERAL REVERSED ONE HUNDRED 24B6; C; 24D0; # CIRCLED LATIN CAPITAL LETTER A 24B7; C; 24D1; # CIRCLED LATIN CAPITAL LETTER B 24B8; C; 24D2; # CIRCLED LATIN CAPITAL LETTER C @@ -934,6 +973,18 @@ 2C2C; C; 2C5C; # GLAGOLITIC CAPITAL LETTER SHTAPIC 2C2D; C; 2C5D; # GLAGOLITIC CAPITAL LETTER TROKUTASTI A 2C2E; C; 2C5E; # GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE +2C60; C; 2C61; # LATIN CAPITAL LETTER L WITH DOUBLE BAR +2C62; C; 026B; # LATIN CAPITAL LETTER L WITH MIDDLE TILDE +2C63; C; 1D7D; # LATIN CAPITAL LETTER P WITH STROKE +2C64; C; 027D; # LATIN CAPITAL LETTER R WITH TAIL +2C67; C; 2C68; # LATIN CAPITAL LETTER H WITH DESCENDER +2C69; C; 2C6A; # LATIN CAPITAL LETTER K WITH DESCENDER +2C6B; C; 2C6C; # LATIN CAPITAL LETTER Z WITH DESCENDER +2C6D; C; 0251; # LATIN CAPITAL LETTER ALPHA +2C6E; C; 0271; # LATIN CAPITAL LETTER M WITH HOOK +2C6F; C; 0250; # LATIN CAPITAL LETTER TURNED A +2C72; C; 2C73; # LATIN CAPITAL LETTER W WITH HOOK +2C75; C; 2C76; # LATIN CAPITAL LETTER HALF H 2C80; C; 2C81; # COPTIC CAPITAL LETTER ALFA 2C82; C; 2C83; # COPTIC CAPITAL LETTER VIDA 2C84; C; 2C85; # COPTIC CAPITAL LETTER GAMMA @@ -984,6 +1035,87 @@ 2CDE; C; 2CDF; # COPTIC CAPITAL LETTER OLD NUBIAN NGI 2CE0; C; 2CE1; # COPTIC CAPITAL LETTER OLD NUBIAN NYI 2CE2; C; 2CE3; # COPTIC CAPITAL LETTER OLD NUBIAN WAU +A640; C; A641; # CYRILLIC CAPITAL LETTER ZEMLYA +A642; C; A643; # CYRILLIC CAPITAL LETTER DZELO +A644; C; A645; # CYRILLIC CAPITAL LETTER REVERSED DZE +A646; C; A647; # CYRILLIC CAPITAL LETTER IOTA +A648; C; A649; # CYRILLIC CAPITAL LETTER DJERV +A64A; C; A64B; # CYRILLIC CAPITAL LETTER MONOGRAPH UK +A64C; C; A64D; # CYRILLIC CAPITAL LETTER BROAD OMEGA +A64E; C; A64F; # CYRILLIC CAPITAL LETTER NEUTRAL YER +A650; C; A651; # CYRILLIC CAPITAL LETTER YERU WITH BACK YER +A652; C; A653; # CYRILLIC CAPITAL LETTER IOTIFIED YAT +A654; C; A655; # CYRILLIC CAPITAL LETTER REVERSED YU +A656; C; A657; # CYRILLIC CAPITAL LETTER IOTIFIED A +A658; C; A659; # CYRILLIC CAPITAL LETTER CLOSED LITTLE YUS +A65A; C; A65B; # CYRILLIC CAPITAL LETTER BLENDED YUS +A65C; C; A65D; # CYRILLIC CAPITAL LETTER IOTIFIED CLOSED LITTLE YUS +A65E; C; A65F; # CYRILLIC CAPITAL LETTER YN +A662; C; A663; # CYRILLIC CAPITAL LETTER SOFT DE +A664; C; A665; # CYRILLIC CAPITAL LETTER SOFT EL +A666; C; A667; # CYRILLIC CAPITAL LETTER SOFT EM +A668; C; A669; # CYRILLIC CAPITAL LETTER MONOCULAR O +A66A; C; A66B; # CYRILLIC CAPITAL LETTER BINOCULAR O +A66C; C; A66D; # CYRILLIC CAPITAL LETTER DOUBLE MONOCULAR O +A680; C; A681; # CYRILLIC CAPITAL LETTER DWE +A682; C; A683; # CYRILLIC CAPITAL LETTER DZWE +A684; C; A685; # CYRILLIC CAPITAL LETTER ZHWE +A686; C; A687; # CYRILLIC CAPITAL LETTER CCHE +A688; C; A689; # CYRILLIC CAPITAL LETTER DZZE +A68A; C; A68B; # CYRILLIC CAPITAL LETTER TE WITH MIDDLE HOOK +A68C; C; A68D; # CYRILLIC CAPITAL LETTER TWE +A68E; C; A68F; # CYRILLIC CAPITAL LETTER TSWE +A690; C; A691; # CYRILLIC CAPITAL LETTER TSSE +A692; C; A693; # CYRILLIC CAPITAL LETTER TCHE +A694; C; A695; # CYRILLIC CAPITAL LETTER HWE +A696; C; A697; # CYRILLIC CAPITAL LETTER SHWE +A722; C; A723; # LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF +A724; C; A725; # LATIN CAPITAL LETTER EGYPTOLOGICAL AIN +A726; C; A727; # LATIN CAPITAL LETTER HENG +A728; C; A729; # LATIN CAPITAL LETTER TZ +A72A; C; A72B; # LATIN CAPITAL LETTER TRESILLO +A72C; C; A72D; # LATIN CAPITAL LETTER CUATRILLO +A72E; C; A72F; # LATIN CAPITAL LETTER CUATRILLO WITH COMMA +A732; C; A733; # LATIN CAPITAL LETTER AA +A734; C; A735; # LATIN CAPITAL LETTER AO +A736; C; A737; # LATIN CAPITAL LETTER AU +A738; C; A739; # LATIN CAPITAL LETTER AV +A73A; C; A73B; # LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR +A73C; C; A73D; # LATIN CAPITAL LETTER AY +A73E; C; A73F; # LATIN CAPITAL LETTER REVERSED C WITH DOT +A740; C; A741; # LATIN CAPITAL LETTER K WITH STROKE +A742; C; A743; # LATIN CAPITAL LETTER K WITH DIAGONAL STROKE +A744; C; A745; # LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE +A746; C; A747; # LATIN CAPITAL LETTER BROKEN L +A748; C; A749; # LATIN CAPITAL LETTER L WITH HIGH STROKE +A74A; C; A74B; # LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY +A74C; C; A74D; # LATIN CAPITAL LETTER O WITH LOOP +A74E; C; A74F; # LATIN CAPITAL LETTER OO +A750; C; A751; # LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER +A752; C; A753; # LATIN CAPITAL LETTER P WITH FLOURISH +A754; C; A755; # LATIN CAPITAL LETTER P WITH SQUIRREL TAIL +A756; C; A757; # LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER +A758; C; A759; # LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE +A75A; C; A75B; # LATIN CAPITAL LETTER R ROTUNDA +A75C; C; A75D; # LATIN CAPITAL LETTER RUM ROTUNDA +A75E; C; A75F; # LATIN CAPITAL LETTER V WITH DIAGONAL STROKE +A760; C; A761; # LATIN CAPITAL LETTER VY +A762; C; A763; # LATIN CAPITAL LETTER VISIGOTHIC Z +A764; C; A765; # LATIN CAPITAL LETTER THORN WITH STROKE +A766; C; A767; # LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER +A768; C; A769; # LATIN CAPITAL LETTER VEND +A76A; C; A76B; # LATIN CAPITAL LETTER ET +A76C; C; A76D; # LATIN CAPITAL LETTER IS +A76E; C; A76F; # LATIN CAPITAL LETTER CON +A779; C; A77A; # LATIN CAPITAL LETTER INSULAR D +A77B; C; A77C; # LATIN CAPITAL LETTER INSULAR F +A77D; C; 1D79; # LATIN CAPITAL LETTER INSULAR G +A77E; C; A77F; # LATIN CAPITAL LETTER TURNED INSULAR G +A780; C; A781; # LATIN CAPITAL LETTER TURNED L +A782; C; A783; # LATIN CAPITAL LETTER INSULAR R +A784; C; A785; # LATIN CAPITAL LETTER INSULAR S +A786; C; A787; # LATIN CAPITAL LETTER INSULAR T +A78B; C; A78C; # LATIN CAPITAL LETTER SALTILLO FB00; F; 0066 0066; # LATIN SMALL LIGATURE FF FB01; F; 0066 0069; # LATIN SMALL LIGATURE FI FB02; F; 0066 006C; # LATIN SMALL LIGATURE FL