Commit | Line | Data |
b0f2b690 |
1 | #!./perl |
2 | |
3a2263fe |
3 | BEGIN { |
4 | chdir 't'; |
5 | @INC = '../lib'; |
6 | require './test.pl'; |
983ffd37 |
7 | } |
b0f2b690 |
8 | |
c2955298 |
9 | plan tests => 87; |
3a2263fe |
10 | |
b0f2b690 |
11 | $a = "HELLO.* world"; |
12 | $b = "hello.* WORLD"; |
13 | |
3a2263fe |
14 | is("\Q$a\E." , "HELLO\\.\\*\\ world.", '\Q\E HELLO.* world'); |
15 | is("\u$a" , "HELLO\.\* world", '\u'); |
16 | is("\l$a" , "hELLO\.\* world", '\l'); |
17 | is("\U$a" , "HELLO\.\* WORLD", '\U'); |
18 | is("\L$a" , "hello\.\* world", '\L'); |
19 | |
20 | is(quotemeta($a) , "HELLO\\.\\*\\ world", 'quotemeta'); |
21 | is(ucfirst($a) , "HELLO\.\* world", 'ucfirst'); |
22 | is(lcfirst($a) , "hELLO\.\* world", 'lcfirst'); |
23 | is(uc($a) , "HELLO\.\* WORLD", 'uc'); |
24 | is(lc($a) , "hello\.\* world", 'lc'); |
25 | |
26 | is("\Q$b\E." , "hello\\.\\*\\ WORLD.", '\Q\E hello.* WORLD'); |
27 | is("\u$b" , "Hello\.\* WORLD", '\u'); |
28 | is("\l$b" , "hello\.\* WORLD", '\l'); |
29 | is("\U$b" , "HELLO\.\* WORLD", '\U'); |
30 | is("\L$b" , "hello\.\* world", '\L'); |
31 | |
32 | is(quotemeta($b) , "hello\\.\\*\\ WORLD", 'quotemeta'); |
33 | is(ucfirst($b) , "Hello\.\* WORLD", 'ucfirst'); |
34 | is(lcfirst($b) , "hello\.\* WORLD", 'lcfirst'); |
35 | is(uc($b) , "HELLO\.\* WORLD", 'uc'); |
36 | is(lc($b) , "hello\.\* world", 'lc'); |
983ffd37 |
37 | |
38 | # \x{100} is LATIN CAPITAL LETTER A WITH MACRON; its bijective lowercase is |
7e965bc5 |
39 | # \x{101}, LATIN SMALL LETTER A WITH MACRON. |
b0f2b690 |
40 | |
2533d950 |
41 | $a = "\x{100}\x{101}Aa"; |
42 | $b = "\x{101}\x{100}aA"; |
b0f2b690 |
43 | |
3a2263fe |
44 | is("\Q$a\E." , "\x{100}\x{101}Aa.", '\Q\E \x{100}\x{101}Aa'); |
45 | is("\u$a" , "\x{100}\x{101}Aa", '\u'); |
46 | is("\l$a" , "\x{101}\x{101}Aa", '\l'); |
47 | is("\U$a" , "\x{100}\x{100}AA", '\U'); |
48 | is("\L$a" , "\x{101}\x{101}aa", '\L'); |
49 | |
50 | is(quotemeta($a) , "\x{100}\x{101}Aa", 'quotemeta'); |
51 | is(ucfirst($a) , "\x{100}\x{101}Aa", 'ucfirst'); |
52 | is(lcfirst($a) , "\x{101}\x{101}Aa", 'lcfirst'); |
53 | is(uc($a) , "\x{100}\x{100}AA", 'uc'); |
54 | is(lc($a) , "\x{101}\x{101}aa", 'lc'); |
55 | |
56 | is("\Q$b\E." , "\x{101}\x{100}aA.", '\Q\E \x{101}\x{100}aA'); |
57 | is("\u$b" , "\x{100}\x{100}aA", '\u'); |
58 | is("\l$b" , "\x{101}\x{100}aA", '\l'); |
59 | is("\U$b" , "\x{100}\x{100}AA", '\U'); |
60 | is("\L$b" , "\x{101}\x{101}aa", '\L'); |
61 | |
62 | is(quotemeta($b) , "\x{101}\x{100}aA", 'quotemeta'); |
63 | is(ucfirst($b) , "\x{100}\x{100}aA", 'ucfirst'); |
64 | is(lcfirst($b) , "\x{101}\x{100}aA", 'lcfirst'); |
65 | is(uc($b) , "\x{100}\x{100}AA", 'uc'); |
66 | is(lc($b) , "\x{101}\x{101}aa", 'lc'); |
983ffd37 |
67 | |
68 | # \x{DF} is LATIN SMALL LETTER SHARP S, its uppercase is SS or \x{53}\x{53}; |
69 | # \x{149} is LATIN SMALL LETTER N PRECEDED BY APOSTROPHE, its uppercase is |
70 | # \x{2BC}\x{E4} or MODIFIER LETTER APOSTROPHE and N. |
71 | |
c811e616 |
72 | # In EBCDIC \x{DF} is LATIN SMALL LETTER Y WITH DIAERESIS, |
73 | # and it's uppercase is \x{178}, LATIN CAPITAL LETTER Y WITH DIAERESIS. |
74 | |
75 | if (ord("A") == 193) { # EBCDIC |
3a2263fe |
76 | is("\U\x{DF}aB\x{149}cD" , "\x{178}AB\x{2BC}NCD", |
c811e616 |
77 | "multicharacter uppercase"); |
78 | } elsif (ord("A") == 65) { |
3a2263fe |
79 | is("\U\x{DF}aB\x{149}cD" , "SSAB\x{2BC}NCD", |
c811e616 |
80 | "multicharacter uppercase"); |
81 | } else { |
3a2263fe |
82 | fail("what is your encoding?"); |
c811e616 |
83 | } |
983ffd37 |
84 | |
85 | # The \x{DF} is its own lowercase, ditto for \x{149}. |
86 | # There are no single character -> multiple characters lowercase mappings. |
b0f2b690 |
87 | |
c811e616 |
88 | if (ord("A") == 193) { # EBCDIC |
3a2263fe |
89 | is("\LaB\x{149}cD" , "ab\x{149}cd", |
c811e616 |
90 | "multicharacter lowercase"); |
91 | } elsif (ord("A") == 65) { |
3a2263fe |
92 | is("\L\x{DF}aB\x{149}cD" , "\x{DF}ab\x{149}cd", |
c811e616 |
93 | "multicharacter lowercase"); |
94 | } else { |
3a2263fe |
95 | fail("what is your encoding?"); |
c811e616 |
96 | } |
b0f2b690 |
97 | |
44bc797b |
98 | # titlecase is used for \u / ucfirst. |
99 | |
100 | # \x{587} is ARMENIAN SMALL LIGATURE ECH YIWN and its titlecase is |
101 | # \x{535}\x{582} ARMENIAN CAPITAL LETTER ECH + ARMENIAN SMALL LETTER YIWN |
102 | # while its lowercase is |
103 | # \x{587} itself |
104 | # and its uppercase is |
105 | # \x{535}\x{552} ARMENIAN CAPITAL LETTER ECH + ARMENIAN CAPITAL LETTER YIWN |
106 | |
107 | $a = "\x{587}"; |
108 | |
3a2263fe |
109 | is("\L\x{587}" , "\x{587}", "ligature lowercase"); |
110 | is("\u\x{587}" , "\x{535}\x{582}", "ligature titlecase"); |
111 | is("\U\x{587}" , "\x{535}\x{552}", "ligature uppercase"); |
44bc797b |
112 | |
2e3dedfe |
113 | # mktables had problems where many-to-one case mappings didn't work right. |
89ebb4a3 |
114 | # The lib/uni/fold.t should give the fourth folding, "casefolding", a good |
115 | # workout (one cannot directly get that from Perl). |
83171573 |
116 | # \x{01C4} is LATIN CAPITAL LETTER DZ WITH CARON |
117 | # \x{01C5} is LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON |
118 | # \x{01C6} is LATIN SMALL LETTER DZ WITH CARON |
119 | # \x{03A3} is GREEK CAPITAL LETTER SIGMA |
120 | # \x{03C2} is GREEK SMALL LETTER FINAL SIGMA |
121 | # \x{03C3} is GREEK SMALL LETTER SIGMA |
2e3dedfe |
122 | |
3a2263fe |
123 | is(lc("\x{1C4}") , "\x{1C6}", "U+01C4 lc is U+01C6"); |
124 | is(lc("\x{1C5}") , "\x{1C6}", "U+01C5 lc is U+01C6, too"); |
2e3dedfe |
125 | |
3a2263fe |
126 | is(ucfirst("\x{3C2}") , "\x{3A3}", "U+03C2 ucfirst is U+03A3"); |
127 | is(ucfirst("\x{3C3}") , "\x{3A3}", "U+03C3 ucfirst is U+03A3, too"); |
2e3dedfe |
128 | |
3a2263fe |
129 | is(uc("\x{1C5}") , "\x{1C4}", "U+01C5 uc is U+01C4"); |
130 | is(uc("\x{1C6}") , "\x{1C4}", "U+01C6 uc is U+01C4, too"); |
2e3dedfe |
131 | |
ada6e8a9 |
132 | # #18107: A host of bugs involving [ul]c{,first}. AMS 20021106 |
133 | $a = "\x{3c3}foo.bar"; # \x{3c3} == GREEK SMALL LETTER SIGMA. |
134 | $b = "\x{3a3}FOO.BAR"; # \x{3a3} == GREEK CAPITAL LETTER SIGMA. |
135 | |
136 | ($c = $b) =~ s/(\w+)/lc($1)/ge; |
3a2263fe |
137 | is($c , $a, "Using s///e to change case."); |
ada6e8a9 |
138 | |
139 | ($c = $a) =~ s/(\w+)/uc($1)/ge; |
3a2263fe |
140 | is($c , $b, "Using s///e to change case."); |
ada6e8a9 |
141 | |
142 | ($c = $b) =~ s/(\w+)/lcfirst($1)/ge; |
3a2263fe |
143 | is($c , "\x{3c3}FOO.bAR", "Using s///e to change case."); |
ada6e8a9 |
144 | |
145 | ($c = $a) =~ s/(\w+)/ucfirst($1)/ge; |
3a2263fe |
146 | is($c , "\x{3a3}foo.Bar", "Using s///e to change case."); |
147 | |
148 | # #18931: perl5.8.0 bug in \U..\E processing |
04d26ece |
149 | # Test case from Nicholas Clark. |
3a2263fe |
150 | for my $a (0,1) { |
151 | $_ = 'abcdefgh'; |
152 | $_ .= chr 256; |
153 | chop; |
154 | /(.*)/; |
155 | is(uc($1), "ABCDEFGH", "[perl #18931]"); |
156 | } |
157 | |
158 | { |
159 | foreach (0, 1) { |
160 | $a = v10.v257; |
161 | chop $a; |
162 | $a =~ s/^(\s*)(\w*)/$1\u$2/; |
163 | is($a, v10, "[perl #18857]"); |
3a2263fe |
164 | } |
165 | } |
6818a357 |
166 | |
167 | |
168 | # [perl #38619] Bug in lc and uc (interaction between UTF-8, substr, and lc/uc) |
169 | |
170 | for ("a\x{100}", "xyz\x{100}") { |
171 | is(substr(uc($_), 0), uc($_), "[perl #38619] uc"); |
172 | } |
173 | for ("A\x{100}", "XYZ\x{100}") { |
174 | is(substr(lc($_), 0), lc($_), "[perl #38619] lc"); |
175 | } |
176 | for ("a\x{100}", "ßyz\x{100}") { # ß to Ss (different length) |
177 | is(substr(ucfirst($_), 0), ucfirst($_), "[perl #38619] ucfirst"); |
178 | } |
179 | |
180 | # Related to [perl #38619] |
181 | # the original report concerns PERL_MAGIC_utf8. |
182 | # these cases concern PERL_MAGIC_regex_global. |
183 | |
184 | for (map { $_ } "a\x{100}", "abc\x{100}", "\x{100}") { |
185 | chop; # get ("a", "abc", "") in utf8 |
186 | my $return = uc($_) =~ /\G(.?)/g; |
187 | my $result = $return ? $1 : "not"; |
188 | my $expect = (uc($_) =~ /(.?)/g)[0]; |
189 | is($return, 1, "[perl #38619]"); |
190 | is($result, $expect, "[perl #38619]"); |
191 | } |
192 | |
193 | for (map { $_ } "A\x{100}", "ABC\x{100}", "\x{100}") { |
194 | chop; # get ("A", "ABC", "") in utf8 |
195 | my $return = lc($_) =~ /\G(.?)/g; |
196 | my $result = $return ? $1 : "not"; |
197 | my $expect = (lc($_) =~ /(.?)/g)[0]; |
198 | is($return, 1, "[perl #38619]"); |
199 | is($result, $expect, "[perl #38619]"); |
200 | } |
201 | |
c2955298 |
202 | for (1, 4, 9, 16, 25) { |
203 | is(uc "\x{03B0}" x $_, "\x{3a5}\x{308}\x{301}" x $_, |
204 | 'uc U+03B0 grows threefold'); |
205 | |
206 | is(lc "\x{0130}" x $_, "i\x{307}" x $_, 'lc U+0130 grows'); |
207 | } |