Commit | Line | Data |
b0f2b690 |
1 | #!./perl |
2 | |
3a2263fe |
3 | BEGIN { |
4 | chdir 't'; |
5 | @INC = '../lib'; |
6 | require './test.pl'; |
983ffd37 |
7 | } |
b0f2b690 |
8 | |
aa4af542 |
9 | plan tests => 93; |
c6502f5c |
10 | |
11 | is(lc(undef), "", "lc(undef) is ''"); |
12 | is(lcfirst(undef), "", "lcfirst(undef) is ''"); |
13 | is(uc(undef), "", "uc(undef) is ''"); |
14 | is(ucfirst(undef), "", "ucfirst(undef) is ''"); |
3a2263fe |
15 | |
b0f2b690 |
16 | $a = "HELLO.* world"; |
17 | $b = "hello.* WORLD"; |
18 | |
3a2263fe |
19 | is("\Q$a\E." , "HELLO\\.\\*\\ world.", '\Q\E HELLO.* world'); |
20 | is("\u$a" , "HELLO\.\* world", '\u'); |
21 | is("\l$a" , "hELLO\.\* world", '\l'); |
22 | is("\U$a" , "HELLO\.\* WORLD", '\U'); |
23 | is("\L$a" , "hello\.\* world", '\L'); |
24 | |
25 | is(quotemeta($a) , "HELLO\\.\\*\\ world", 'quotemeta'); |
26 | is(ucfirst($a) , "HELLO\.\* world", 'ucfirst'); |
27 | is(lcfirst($a) , "hELLO\.\* world", 'lcfirst'); |
28 | is(uc($a) , "HELLO\.\* WORLD", 'uc'); |
29 | is(lc($a) , "hello\.\* world", 'lc'); |
30 | |
31 | is("\Q$b\E." , "hello\\.\\*\\ WORLD.", '\Q\E hello.* WORLD'); |
32 | is("\u$b" , "Hello\.\* WORLD", '\u'); |
33 | is("\l$b" , "hello\.\* WORLD", '\l'); |
34 | is("\U$b" , "HELLO\.\* WORLD", '\U'); |
35 | is("\L$b" , "hello\.\* world", '\L'); |
36 | |
37 | is(quotemeta($b) , "hello\\.\\*\\ WORLD", 'quotemeta'); |
38 | is(ucfirst($b) , "Hello\.\* WORLD", 'ucfirst'); |
39 | is(lcfirst($b) , "hello\.\* WORLD", 'lcfirst'); |
40 | is(uc($b) , "HELLO\.\* WORLD", 'uc'); |
41 | is(lc($b) , "hello\.\* world", 'lc'); |
983ffd37 |
42 | |
43 | # \x{100} is LATIN CAPITAL LETTER A WITH MACRON; its bijective lowercase is |
7e965bc5 |
44 | # \x{101}, LATIN SMALL LETTER A WITH MACRON. |
b0f2b690 |
45 | |
2533d950 |
46 | $a = "\x{100}\x{101}Aa"; |
47 | $b = "\x{101}\x{100}aA"; |
b0f2b690 |
48 | |
3a2263fe |
49 | is("\Q$a\E." , "\x{100}\x{101}Aa.", '\Q\E \x{100}\x{101}Aa'); |
50 | is("\u$a" , "\x{100}\x{101}Aa", '\u'); |
51 | is("\l$a" , "\x{101}\x{101}Aa", '\l'); |
52 | is("\U$a" , "\x{100}\x{100}AA", '\U'); |
53 | is("\L$a" , "\x{101}\x{101}aa", '\L'); |
54 | |
55 | is(quotemeta($a) , "\x{100}\x{101}Aa", 'quotemeta'); |
56 | is(ucfirst($a) , "\x{100}\x{101}Aa", 'ucfirst'); |
57 | is(lcfirst($a) , "\x{101}\x{101}Aa", 'lcfirst'); |
58 | is(uc($a) , "\x{100}\x{100}AA", 'uc'); |
59 | is(lc($a) , "\x{101}\x{101}aa", 'lc'); |
60 | |
61 | is("\Q$b\E." , "\x{101}\x{100}aA.", '\Q\E \x{101}\x{100}aA'); |
62 | is("\u$b" , "\x{100}\x{100}aA", '\u'); |
63 | is("\l$b" , "\x{101}\x{100}aA", '\l'); |
64 | is("\U$b" , "\x{100}\x{100}AA", '\U'); |
65 | is("\L$b" , "\x{101}\x{101}aa", '\L'); |
66 | |
67 | is(quotemeta($b) , "\x{101}\x{100}aA", 'quotemeta'); |
68 | is(ucfirst($b) , "\x{100}\x{100}aA", 'ucfirst'); |
69 | is(lcfirst($b) , "\x{101}\x{100}aA", 'lcfirst'); |
70 | is(uc($b) , "\x{100}\x{100}AA", 'uc'); |
71 | is(lc($b) , "\x{101}\x{101}aa", 'lc'); |
983ffd37 |
72 | |
73 | # \x{DF} is LATIN SMALL LETTER SHARP S, its uppercase is SS or \x{53}\x{53}; |
74 | # \x{149} is LATIN SMALL LETTER N PRECEDED BY APOSTROPHE, its uppercase is |
75 | # \x{2BC}\x{E4} or MODIFIER LETTER APOSTROPHE and N. |
76 | |
c811e616 |
77 | # In EBCDIC \x{DF} is LATIN SMALL LETTER Y WITH DIAERESIS, |
78 | # and it's uppercase is \x{178}, LATIN CAPITAL LETTER Y WITH DIAERESIS. |
79 | |
80 | if (ord("A") == 193) { # EBCDIC |
3a2263fe |
81 | is("\U\x{DF}aB\x{149}cD" , "\x{178}AB\x{2BC}NCD", |
c811e616 |
82 | "multicharacter uppercase"); |
83 | } elsif (ord("A") == 65) { |
3a2263fe |
84 | is("\U\x{DF}aB\x{149}cD" , "SSAB\x{2BC}NCD", |
c811e616 |
85 | "multicharacter uppercase"); |
86 | } else { |
3a2263fe |
87 | fail("what is your encoding?"); |
c811e616 |
88 | } |
983ffd37 |
89 | |
90 | # The \x{DF} is its own lowercase, ditto for \x{149}. |
91 | # There are no single character -> multiple characters lowercase mappings. |
b0f2b690 |
92 | |
c811e616 |
93 | if (ord("A") == 193) { # EBCDIC |
3a2263fe |
94 | is("\LaB\x{149}cD" , "ab\x{149}cd", |
c811e616 |
95 | "multicharacter lowercase"); |
96 | } elsif (ord("A") == 65) { |
3a2263fe |
97 | is("\L\x{DF}aB\x{149}cD" , "\x{DF}ab\x{149}cd", |
c811e616 |
98 | "multicharacter lowercase"); |
99 | } else { |
3a2263fe |
100 | fail("what is your encoding?"); |
c811e616 |
101 | } |
b0f2b690 |
102 | |
44bc797b |
103 | # titlecase is used for \u / ucfirst. |
104 | |
105 | # \x{587} is ARMENIAN SMALL LIGATURE ECH YIWN and its titlecase is |
106 | # \x{535}\x{582} ARMENIAN CAPITAL LETTER ECH + ARMENIAN SMALL LETTER YIWN |
107 | # while its lowercase is |
108 | # \x{587} itself |
109 | # and its uppercase is |
110 | # \x{535}\x{552} ARMENIAN CAPITAL LETTER ECH + ARMENIAN CAPITAL LETTER YIWN |
111 | |
112 | $a = "\x{587}"; |
113 | |
3a2263fe |
114 | is("\L\x{587}" , "\x{587}", "ligature lowercase"); |
115 | is("\u\x{587}" , "\x{535}\x{582}", "ligature titlecase"); |
116 | is("\U\x{587}" , "\x{535}\x{552}", "ligature uppercase"); |
44bc797b |
117 | |
2e3dedfe |
118 | # mktables had problems where many-to-one case mappings didn't work right. |
89ebb4a3 |
119 | # The lib/uni/fold.t should give the fourth folding, "casefolding", a good |
120 | # workout (one cannot directly get that from Perl). |
83171573 |
121 | # \x{01C4} is LATIN CAPITAL LETTER DZ WITH CARON |
122 | # \x{01C5} is LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON |
123 | # \x{01C6} is LATIN SMALL LETTER DZ WITH CARON |
124 | # \x{03A3} is GREEK CAPITAL LETTER SIGMA |
125 | # \x{03C2} is GREEK SMALL LETTER FINAL SIGMA |
126 | # \x{03C3} is GREEK SMALL LETTER SIGMA |
2e3dedfe |
127 | |
3a2263fe |
128 | is(lc("\x{1C4}") , "\x{1C6}", "U+01C4 lc is U+01C6"); |
129 | is(lc("\x{1C5}") , "\x{1C6}", "U+01C5 lc is U+01C6, too"); |
2e3dedfe |
130 | |
3a2263fe |
131 | is(ucfirst("\x{3C2}") , "\x{3A3}", "U+03C2 ucfirst is U+03A3"); |
132 | is(ucfirst("\x{3C3}") , "\x{3A3}", "U+03C3 ucfirst is U+03A3, too"); |
2e3dedfe |
133 | |
3a2263fe |
134 | is(uc("\x{1C5}") , "\x{1C4}", "U+01C5 uc is U+01C4"); |
135 | is(uc("\x{1C6}") , "\x{1C4}", "U+01C6 uc is U+01C4, too"); |
2e3dedfe |
136 | |
ada6e8a9 |
137 | # #18107: A host of bugs involving [ul]c{,first}. AMS 20021106 |
138 | $a = "\x{3c3}foo.bar"; # \x{3c3} == GREEK SMALL LETTER SIGMA. |
139 | $b = "\x{3a3}FOO.BAR"; # \x{3a3} == GREEK CAPITAL LETTER SIGMA. |
140 | |
141 | ($c = $b) =~ s/(\w+)/lc($1)/ge; |
3a2263fe |
142 | is($c , $a, "Using s///e to change case."); |
ada6e8a9 |
143 | |
144 | ($c = $a) =~ s/(\w+)/uc($1)/ge; |
3a2263fe |
145 | is($c , $b, "Using s///e to change case."); |
ada6e8a9 |
146 | |
147 | ($c = $b) =~ s/(\w+)/lcfirst($1)/ge; |
3a2263fe |
148 | is($c , "\x{3c3}FOO.bAR", "Using s///e to change case."); |
ada6e8a9 |
149 | |
150 | ($c = $a) =~ s/(\w+)/ucfirst($1)/ge; |
3a2263fe |
151 | is($c , "\x{3a3}foo.Bar", "Using s///e to change case."); |
152 | |
153 | # #18931: perl5.8.0 bug in \U..\E processing |
04d26ece |
154 | # Test case from Nicholas Clark. |
3a2263fe |
155 | for my $a (0,1) { |
156 | $_ = 'abcdefgh'; |
157 | $_ .= chr 256; |
158 | chop; |
159 | /(.*)/; |
160 | is(uc($1), "ABCDEFGH", "[perl #18931]"); |
161 | } |
162 | |
163 | { |
164 | foreach (0, 1) { |
165 | $a = v10.v257; |
166 | chop $a; |
167 | $a =~ s/^(\s*)(\w*)/$1\u$2/; |
168 | is($a, v10, "[perl #18857]"); |
3a2263fe |
169 | } |
170 | } |
6818a357 |
171 | |
172 | |
173 | # [perl #38619] Bug in lc and uc (interaction between UTF-8, substr, and lc/uc) |
174 | |
175 | for ("a\x{100}", "xyz\x{100}") { |
176 | is(substr(uc($_), 0), uc($_), "[perl #38619] uc"); |
177 | } |
178 | for ("A\x{100}", "XYZ\x{100}") { |
179 | is(substr(lc($_), 0), lc($_), "[perl #38619] lc"); |
180 | } |
181 | for ("a\x{100}", "ßyz\x{100}") { # ß to Ss (different length) |
182 | is(substr(ucfirst($_), 0), ucfirst($_), "[perl #38619] ucfirst"); |
183 | } |
184 | |
185 | # Related to [perl #38619] |
186 | # the original report concerns PERL_MAGIC_utf8. |
187 | # these cases concern PERL_MAGIC_regex_global. |
188 | |
189 | for (map { $_ } "a\x{100}", "abc\x{100}", "\x{100}") { |
190 | chop; # get ("a", "abc", "") in utf8 |
191 | my $return = uc($_) =~ /\G(.?)/g; |
192 | my $result = $return ? $1 : "not"; |
193 | my $expect = (uc($_) =~ /(.?)/g)[0]; |
194 | is($return, 1, "[perl #38619]"); |
195 | is($result, $expect, "[perl #38619]"); |
196 | } |
197 | |
198 | for (map { $_ } "A\x{100}", "ABC\x{100}", "\x{100}") { |
199 | chop; # get ("A", "ABC", "") in utf8 |
200 | my $return = lc($_) =~ /\G(.?)/g; |
201 | my $result = $return ? $1 : "not"; |
202 | my $expect = (lc($_) =~ /(.?)/g)[0]; |
203 | is($return, 1, "[perl #38619]"); |
204 | is($result, $expect, "[perl #38619]"); |
205 | } |
206 | |
c2955298 |
207 | for (1, 4, 9, 16, 25) { |
208 | is(uc "\x{03B0}" x $_, "\x{3a5}\x{308}\x{301}" x $_, |
209 | 'uc U+03B0 grows threefold'); |
210 | |
211 | is(lc "\x{0130}" x $_, "i\x{307}" x $_, 'lc U+0130 grows'); |
212 | } |
17fa0776 |
213 | |
214 | # bug #43207 |
215 | my $temp = "Hello"; |
216 | for ("$temp") { |
217 | lc $_; |
218 | is($_, "Hello"); |
219 | } |
aa4af542 |
220 | |
221 | # new in Unicode 5.1.0 |
222 | is(lc("\x{1E9E}"), "\x{df}", "lc(LATIN CAPITAL LETTER SHARP S)"); |