Commit | Line | Data |
b0f2b690 |
1 | #!./perl |
2 | |
3a2263fe |
3 | BEGIN { |
4 | chdir 't'; |
5 | @INC = '../lib'; |
6 | require './test.pl'; |
983ffd37 |
7 | } |
b0f2b690 |
8 | |
aa4af542 |
9 | plan tests => 93; |
c6502f5c |
10 | |
11 | is(lc(undef), "", "lc(undef) is ''"); |
12 | is(lcfirst(undef), "", "lcfirst(undef) is ''"); |
13 | is(uc(undef), "", "uc(undef) is ''"); |
14 | is(ucfirst(undef), "", "ucfirst(undef) is ''"); |
3a2263fe |
15 | |
b0f2b690 |
16 | $a = "HELLO.* world"; |
17 | $b = "hello.* WORLD"; |
18 | |
3a2263fe |
19 | is("\Q$a\E." , "HELLO\\.\\*\\ world.", '\Q\E HELLO.* world'); |
20 | is("\u$a" , "HELLO\.\* world", '\u'); |
21 | is("\l$a" , "hELLO\.\* world", '\l'); |
22 | is("\U$a" , "HELLO\.\* WORLD", '\U'); |
23 | is("\L$a" , "hello\.\* world", '\L'); |
24 | |
25 | is(quotemeta($a) , "HELLO\\.\\*\\ world", 'quotemeta'); |
26 | is(ucfirst($a) , "HELLO\.\* world", 'ucfirst'); |
27 | is(lcfirst($a) , "hELLO\.\* world", 'lcfirst'); |
28 | is(uc($a) , "HELLO\.\* WORLD", 'uc'); |
29 | is(lc($a) , "hello\.\* world", 'lc'); |
30 | |
31 | is("\Q$b\E." , "hello\\.\\*\\ WORLD.", '\Q\E hello.* WORLD'); |
32 | is("\u$b" , "Hello\.\* WORLD", '\u'); |
33 | is("\l$b" , "hello\.\* WORLD", '\l'); |
34 | is("\U$b" , "HELLO\.\* WORLD", '\U'); |
35 | is("\L$b" , "hello\.\* world", '\L'); |
36 | |
37 | is(quotemeta($b) , "hello\\.\\*\\ WORLD", 'quotemeta'); |
38 | is(ucfirst($b) , "Hello\.\* WORLD", 'ucfirst'); |
39 | is(lcfirst($b) , "hello\.\* WORLD", 'lcfirst'); |
40 | is(uc($b) , "HELLO\.\* WORLD", 'uc'); |
41 | is(lc($b) , "hello\.\* world", 'lc'); |
983ffd37 |
42 | |
43 | # \x{100} is LATIN CAPITAL LETTER A WITH MACRON; its bijective lowercase is |
7e965bc5 |
44 | # \x{101}, LATIN SMALL LETTER A WITH MACRON. |
b0f2b690 |
45 | |
2533d950 |
46 | $a = "\x{100}\x{101}Aa"; |
47 | $b = "\x{101}\x{100}aA"; |
b0f2b690 |
48 | |
3a2263fe |
49 | is("\Q$a\E." , "\x{100}\x{101}Aa.", '\Q\E \x{100}\x{101}Aa'); |
50 | is("\u$a" , "\x{100}\x{101}Aa", '\u'); |
51 | is("\l$a" , "\x{101}\x{101}Aa", '\l'); |
52 | is("\U$a" , "\x{100}\x{100}AA", '\U'); |
53 | is("\L$a" , "\x{101}\x{101}aa", '\L'); |
54 | |
55 | is(quotemeta($a) , "\x{100}\x{101}Aa", 'quotemeta'); |
56 | is(ucfirst($a) , "\x{100}\x{101}Aa", 'ucfirst'); |
57 | is(lcfirst($a) , "\x{101}\x{101}Aa", 'lcfirst'); |
58 | is(uc($a) , "\x{100}\x{100}AA", 'uc'); |
59 | is(lc($a) , "\x{101}\x{101}aa", 'lc'); |
60 | |
61 | is("\Q$b\E." , "\x{101}\x{100}aA.", '\Q\E \x{101}\x{100}aA'); |
62 | is("\u$b" , "\x{100}\x{100}aA", '\u'); |
63 | is("\l$b" , "\x{101}\x{100}aA", '\l'); |
64 | is("\U$b" , "\x{100}\x{100}AA", '\U'); |
65 | is("\L$b" , "\x{101}\x{101}aa", '\L'); |
66 | |
67 | is(quotemeta($b) , "\x{101}\x{100}aA", 'quotemeta'); |
68 | is(ucfirst($b) , "\x{100}\x{100}aA", 'ucfirst'); |
69 | is(lcfirst($b) , "\x{101}\x{100}aA", 'lcfirst'); |
70 | is(uc($b) , "\x{100}\x{100}AA", 'uc'); |
71 | is(lc($b) , "\x{101}\x{101}aa", 'lc'); |
983ffd37 |
72 | |
73 | # \x{DF} is LATIN SMALL LETTER SHARP S, its uppercase is SS or \x{53}\x{53}; |
74 | # \x{149} is LATIN SMALL LETTER N PRECEDED BY APOSTROPHE, its uppercase is |
75 | # \x{2BC}\x{E4} or MODIFIER LETTER APOSTROPHE and N. |
76 | |
8a38a836 |
77 | is(latin1_to_native("\U\x{DF}aB\x{149}cD"), latin1_to_native("SSAB\x{2BC}NCD"), |
c811e616 |
78 | "multicharacter uppercase"); |
983ffd37 |
79 | |
80 | # The \x{DF} is its own lowercase, ditto for \x{149}. |
81 | # There are no single character -> multiple characters lowercase mappings. |
b0f2b690 |
82 | |
8a38a836 |
83 | is(latin1_to_native("\L\x{DF}aB\x{149}cD"), latin1_to_native("\x{DF}ab\x{149}cd"), |
c811e616 |
84 | "multicharacter lowercase"); |
b0f2b690 |
85 | |
44bc797b |
86 | # titlecase is used for \u / ucfirst. |
87 | |
88 | # \x{587} is ARMENIAN SMALL LIGATURE ECH YIWN and its titlecase is |
89 | # \x{535}\x{582} ARMENIAN CAPITAL LETTER ECH + ARMENIAN SMALL LETTER YIWN |
90 | # while its lowercase is |
91 | # \x{587} itself |
92 | # and its uppercase is |
93 | # \x{535}\x{552} ARMENIAN CAPITAL LETTER ECH + ARMENIAN CAPITAL LETTER YIWN |
94 | |
95 | $a = "\x{587}"; |
96 | |
3a2263fe |
97 | is("\L\x{587}" , "\x{587}", "ligature lowercase"); |
98 | is("\u\x{587}" , "\x{535}\x{582}", "ligature titlecase"); |
99 | is("\U\x{587}" , "\x{535}\x{552}", "ligature uppercase"); |
44bc797b |
100 | |
2e3dedfe |
101 | # mktables had problems where many-to-one case mappings didn't work right. |
89ebb4a3 |
102 | # The lib/uni/fold.t should give the fourth folding, "casefolding", a good |
103 | # workout (one cannot directly get that from Perl). |
83171573 |
104 | # \x{01C4} is LATIN CAPITAL LETTER DZ WITH CARON |
105 | # \x{01C5} is LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON |
106 | # \x{01C6} is LATIN SMALL LETTER DZ WITH CARON |
107 | # \x{03A3} is GREEK CAPITAL LETTER SIGMA |
108 | # \x{03C2} is GREEK SMALL LETTER FINAL SIGMA |
109 | # \x{03C3} is GREEK SMALL LETTER SIGMA |
2e3dedfe |
110 | |
3a2263fe |
111 | is(lc("\x{1C4}") , "\x{1C6}", "U+01C4 lc is U+01C6"); |
112 | is(lc("\x{1C5}") , "\x{1C6}", "U+01C5 lc is U+01C6, too"); |
2e3dedfe |
113 | |
3a2263fe |
114 | is(ucfirst("\x{3C2}") , "\x{3A3}", "U+03C2 ucfirst is U+03A3"); |
115 | is(ucfirst("\x{3C3}") , "\x{3A3}", "U+03C3 ucfirst is U+03A3, too"); |
2e3dedfe |
116 | |
3a2263fe |
117 | is(uc("\x{1C5}") , "\x{1C4}", "U+01C5 uc is U+01C4"); |
118 | is(uc("\x{1C6}") , "\x{1C4}", "U+01C6 uc is U+01C4, too"); |
2e3dedfe |
119 | |
ada6e8a9 |
120 | # #18107: A host of bugs involving [ul]c{,first}. AMS 20021106 |
121 | $a = "\x{3c3}foo.bar"; # \x{3c3} == GREEK SMALL LETTER SIGMA. |
122 | $b = "\x{3a3}FOO.BAR"; # \x{3a3} == GREEK CAPITAL LETTER SIGMA. |
123 | |
124 | ($c = $b) =~ s/(\w+)/lc($1)/ge; |
3a2263fe |
125 | is($c , $a, "Using s///e to change case."); |
ada6e8a9 |
126 | |
d1eb3177 |
127 | ($c = $a) =~ s/(\p{IsWord}+)/uc($1)/ge; |
3a2263fe |
128 | is($c , $b, "Using s///e to change case."); |
ada6e8a9 |
129 | |
d1eb3177 |
130 | ($c = $b) =~ s/(\p{IsWord}+)/lcfirst($1)/ge; |
3a2263fe |
131 | is($c , "\x{3c3}FOO.bAR", "Using s///e to change case."); |
ada6e8a9 |
132 | |
d1eb3177 |
133 | ($c = $a) =~ s/(\p{IsWord}+)/ucfirst($1)/ge; |
3a2263fe |
134 | is($c , "\x{3a3}foo.Bar", "Using s///e to change case."); |
135 | |
136 | # #18931: perl5.8.0 bug in \U..\E processing |
04d26ece |
137 | # Test case from Nicholas Clark. |
3a2263fe |
138 | for my $a (0,1) { |
139 | $_ = 'abcdefgh'; |
140 | $_ .= chr 256; |
141 | chop; |
142 | /(.*)/; |
143 | is(uc($1), "ABCDEFGH", "[perl #18931]"); |
144 | } |
145 | |
146 | { |
147 | foreach (0, 1) { |
148 | $a = v10.v257; |
149 | chop $a; |
150 | $a =~ s/^(\s*)(\w*)/$1\u$2/; |
151 | is($a, v10, "[perl #18857]"); |
3a2263fe |
152 | } |
153 | } |
6818a357 |
154 | |
155 | |
156 | # [perl #38619] Bug in lc and uc (interaction between UTF-8, substr, and lc/uc) |
157 | |
158 | for ("a\x{100}", "xyz\x{100}") { |
159 | is(substr(uc($_), 0), uc($_), "[perl #38619] uc"); |
160 | } |
161 | for ("A\x{100}", "XYZ\x{100}") { |
162 | is(substr(lc($_), 0), lc($_), "[perl #38619] lc"); |
163 | } |
164 | for ("a\x{100}", "ßyz\x{100}") { # ß to Ss (different length) |
165 | is(substr(ucfirst($_), 0), ucfirst($_), "[perl #38619] ucfirst"); |
166 | } |
167 | |
168 | # Related to [perl #38619] |
169 | # the original report concerns PERL_MAGIC_utf8. |
170 | # these cases concern PERL_MAGIC_regex_global. |
171 | |
172 | for (map { $_ } "a\x{100}", "abc\x{100}", "\x{100}") { |
173 | chop; # get ("a", "abc", "") in utf8 |
174 | my $return = uc($_) =~ /\G(.?)/g; |
175 | my $result = $return ? $1 : "not"; |
176 | my $expect = (uc($_) =~ /(.?)/g)[0]; |
177 | is($return, 1, "[perl #38619]"); |
178 | is($result, $expect, "[perl #38619]"); |
179 | } |
180 | |
181 | for (map { $_ } "A\x{100}", "ABC\x{100}", "\x{100}") { |
182 | chop; # get ("A", "ABC", "") in utf8 |
183 | my $return = lc($_) =~ /\G(.?)/g; |
184 | my $result = $return ? $1 : "not"; |
185 | my $expect = (lc($_) =~ /(.?)/g)[0]; |
186 | is($return, 1, "[perl #38619]"); |
187 | is($result, $expect, "[perl #38619]"); |
188 | } |
189 | |
c2955298 |
190 | for (1, 4, 9, 16, 25) { |
191 | is(uc "\x{03B0}" x $_, "\x{3a5}\x{308}\x{301}" x $_, |
192 | 'uc U+03B0 grows threefold'); |
193 | |
194 | is(lc "\x{0130}" x $_, "i\x{307}" x $_, 'lc U+0130 grows'); |
195 | } |
17fa0776 |
196 | |
197 | # bug #43207 |
198 | my $temp = "Hello"; |
199 | for ("$temp") { |
200 | lc $_; |
201 | is($_, "Hello"); |
202 | } |
aa4af542 |
203 | |
204 | # new in Unicode 5.1.0 |
205 | is(lc("\x{1E9E}"), "\x{df}", "lc(LATIN CAPITAL LETTER SHARP S)"); |