X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=t%2Fop%2Flc.t;h=1fbb3e1afbf8288e5074befb73c0b33852472dab;hb=3e6618b7a3a573729d121d2ba204ea2164d47107;hp=9333c6c7cad3f41ddfa9437a92374695bfcfb03c;hpb=983ffd37e39751798fdd14853511af238c5fe291;p=p5sagit%2Fp5-mst-13.2.git diff --git a/t/op/lc.t b/t/op/lc.t index 9333c6c..1fbb3e1 100644 --- a/t/op/lc.t +++ b/t/op/lc.t @@ -1,6 +1,6 @@ #!./perl -print "1..42\n"; +print "1..51\n"; my $test = 1; @@ -49,45 +49,90 @@ ok(uc($b) eq "HELLO\.\* WORLD", 'uc'); ok(lc($b) eq "hello\.\* world", 'lc'); # \x{100} is LATIN CAPITAL LETTER A WITH MACRON; its bijective lowercase is -# \x{100}, LATIN SMALL LETTER A WITH MACRON. - -$a = "\x{100}\x{101}\x{41}\x{61}"; -$b = "\x{101}\x{100}\x{61}\x{41}"; - -ok("\Q$a\E." eq "\x{100}\x{101}\x{41}\x{61}.", '\Q\E \x{100}\x{101}\x{41}\x{61}'); -ok("\u$a" eq "\x{100}\x{101}\x{41}\x{61}", '\u'); -ok("\l$a" eq "\x{101}\x{101}\x{41}\x{61}", '\l'); -ok("\U$a" eq "\x{100}\x{100}\x{41}\x{41}", '\U'); -ok("\L$a" eq "\x{101}\x{101}\x{61}\x{61}", '\L'); - -ok(quotemeta($a) eq "\x{100}\x{101}\x{41}\x{61}", 'quotemeta'); -ok(ucfirst($a) eq "\x{100}\x{101}\x{41}\x{61}", 'ucfirst'); -ok(lcfirst($a) eq "\x{101}\x{101}\x{41}\x{61}", 'lcfirst'); -ok(uc($a) eq "\x{100}\x{100}\x{41}\x{41}", 'uc'); -ok(lc($a) eq "\x{101}\x{101}\x{61}\x{61}", 'lc'); - -ok("\Q$b\E." eq "\x{101}\x{100}\x{61}\x{41}.", '\Q\E \x{101}\x{100}\x{61}\x{41}'); -ok("\u$b" eq "\x{100}\x{100}\x{61}\x{41}", '\u'); -ok("\l$b" eq "\x{101}\x{100}\x{61}\x{41}", '\l'); -ok("\U$b" eq "\x{100}\x{100}\x{41}\x{41}", '\U'); -ok("\L$b" eq "\x{101}\x{101}\x{61}\x{61}", '\L'); - -ok(quotemeta($b) eq "\x{101}\x{100}\x{61}\x{41}", 'quotemeta'); -ok(ucfirst($b) eq "\x{100}\x{100}\x{61}\x{41}", 'ucfirst'); -ok(lcfirst($b) eq "\x{101}\x{100}\x{61}\x{41}", 'lcfirst'); -ok(uc($b) eq "\x{100}\x{100}\x{41}\x{41}", 'uc'); -ok(lc($b) eq "\x{101}\x{101}\x{61}\x{61}", 'lc'); +# \x{101}, LATIN SMALL LETTER A WITH MACRON. + +$a = "\x{100}\x{101}Aa"; +$b = "\x{101}\x{100}aA"; + +ok("\Q$a\E." eq "\x{100}\x{101}Aa.", '\Q\E \x{100}\x{101}Aa'); +ok("\u$a" eq "\x{100}\x{101}Aa", '\u'); +ok("\l$a" eq "\x{101}\x{101}Aa", '\l'); +ok("\U$a" eq "\x{100}\x{100}AA", '\U'); +ok("\L$a" eq "\x{101}\x{101}aa", '\L'); + +ok(quotemeta($a) eq "\x{100}\x{101}Aa", 'quotemeta'); +ok(ucfirst($a) eq "\x{100}\x{101}Aa", 'ucfirst'); +ok(lcfirst($a) eq "\x{101}\x{101}Aa", 'lcfirst'); +ok(uc($a) eq "\x{100}\x{100}AA", 'uc'); +ok(lc($a) eq "\x{101}\x{101}aa", 'lc'); + +ok("\Q$b\E." eq "\x{101}\x{100}aA.", '\Q\E \x{101}\x{100}aA'); +ok("\u$b" eq "\x{100}\x{100}aA", '\u'); +ok("\l$b" eq "\x{101}\x{100}aA", '\l'); +ok("\U$b" eq "\x{100}\x{100}AA", '\U'); +ok("\L$b" eq "\x{101}\x{101}aa", '\L'); + +ok(quotemeta($b) eq "\x{101}\x{100}aA", 'quotemeta'); +ok(ucfirst($b) eq "\x{100}\x{100}aA", 'ucfirst'); +ok(lcfirst($b) eq "\x{101}\x{100}aA", 'lcfirst'); +ok(uc($b) eq "\x{100}\x{100}AA", 'uc'); +ok(lc($b) eq "\x{101}\x{101}aa", 'lc'); # \x{DF} is LATIN SMALL LETTER SHARP S, its uppercase is SS or \x{53}\x{53}; # \x{149} is LATIN SMALL LETTER N PRECEDED BY APOSTROPHE, its uppercase is # \x{2BC}\x{E4} or MODIFIER LETTER APOSTROPHE and N. -ok("\U\x{DF}ab\x{149}cd" eq "\x{53}\x{53}AB\x{2BC}\x{4E}CD", - "multicharacter uppercase"); +# In EBCDIC \x{DF} is LATIN SMALL LETTER Y WITH DIAERESIS, +# and it's uppercase is \x{178}, LATIN CAPITAL LETTER Y WITH DIAERESIS. + +if (ord("A") == 193) { # EBCDIC + ok("\U\x{DF}aB\x{149}cD" eq "\x{178}AB\x{2BC}NCD", + "multicharacter uppercase"); +} elsif (ord("A") == 65) { + ok("\U\x{DF}aB\x{149}cD" eq "SSAB\x{2BC}NCD", + "multicharacter uppercase"); +} else { + ok(0, "what is your encoding?"); +} # The \x{DF} is its own lowercase, ditto for \x{149}. # There are no single character -> multiple characters lowercase mappings. -ok("\L\x{DF}AB\x{149}CD" eq "\x{DF}ab\x{149}cd", - "multicharacter lowercase"); +if (ord("A") == 193) { # EBCDIC + ok("\LaB\x{149}cD" eq "ab\x{149}cd", + "multicharacter lowercase"); +} elsif (ord("A") == 65) { + ok("\L\x{DF}aB\x{149}cD" eq "\x{DF}ab\x{149}cd", + "multicharacter lowercase"); +} else { + ok(0, "what is your encoding?"); +} + +# titlecase is used for \u / ucfirst. + +# \x{587} is ARMENIAN SMALL LIGATURE ECH YIWN and its titlecase is +# \x{535}\x{582} ARMENIAN CAPITAL LETTER ECH + ARMENIAN SMALL LETTER YIWN +# while its lowercase is +# \x{587} itself +# and its uppercase is +# \x{535}\x{552} ARMENIAN CAPITAL LETTER ECH + ARMENIAN CAPITAL LETTER YIWN + +$a = "\x{587}"; + +ok("\L\x{587}" eq "\x{587}", "ligature lowercase"); +ok("\u\x{587}" eq "\x{535}\x{582}", "ligature titlecase"); +ok("\U\x{587}" eq "\x{535}\x{552}", "ligature uppercase"); + +# mktables had problems where many-to-one case mappings didn't work right. +# The lib/unifold.t should give the fourth folding, "casefolding", a good +# workout. + +ok(lc("\x{1C4}") eq "\x{1C6}", "U+01C4 lc is U+01C6"); +ok(lc("\x{1C5}") eq "\x{1C6}", "U+01C5 lc is U+01C6, too"); + +ok(ucfirst("\x{3C2}") eq "\x{3A3}", "U+03C2 ucfirst is U+03A3"); +ok(ucfirst("\x{3C3}") eq "\x{3A3}", "U+03C3 ucfirst is U+03A3, too"); + +ok(uc("\x{1C5}") eq "\x{1C4}", "U+01C5 uc is U+01C4"); +ok(uc("\x{1C6}") eq "\x{1C4}", "U+01C6 uc is U+01C4, too");