Commit | Line | Data |
8d063cd8 |
1 | #!./perl |
8d37f932 |
2 | # |
3 | # This is a home for regular expression tests that don't fit into |
4 | # the format supported by op/regexp.t. If you want to add a test |
5 | # that does fit that format, add it to op/re_tests, not here. |
8d063cd8 |
6 | |
9133bbab |
7 | $| = 1; |
3568d838 |
8 | |
080c2dec |
9 | print "1..1006\n"; |
8d37f932 |
10 | |
e4d48cc9 |
11 | BEGIN { |
12 | chdir 't' if -d 't'; |
20822f61 |
13 | @INC = '../lib'; |
e4d48cc9 |
14 | } |
ffbc6a93 |
15 | |
8d37f932 |
16 | eval 'use Config'; # Defaults assumed if this fails |
8d063cd8 |
17 | |
18 | $x = "abc\ndef\n"; |
19 | |
20 | if ($x =~ /^abc/) {print "ok 1\n";} else {print "not ok 1\n";} |
21 | if ($x !~ /^def/) {print "ok 2\n";} else {print "not ok 2\n";} |
22 | |
f02c194e |
23 | # used to be a test for $* |
24 | if ($x =~ /^def/m) {print "ok 3\n";} else {print "not ok 3\n";} |
8d063cd8 |
25 | |
26 | $_ = '123'; |
27 | if (/^([0-9][0-9]*)/) {print "ok 4\n";} else {print "not ok 4\n";} |
28 | |
29 | if ($x =~ /^xxx/) {print "not ok 5\n";} else {print "ok 5\n";} |
30 | if ($x !~ /^abc/) {print "not ok 6\n";} else {print "ok 6\n";} |
31 | |
32 | if ($x =~ /def/) {print "ok 7\n";} else {print "not ok 7\n";} |
33 | if ($x !~ /def/) {print "not ok 8\n";} else {print "ok 8\n";} |
34 | |
35 | if ($x !~ /.def/) {print "ok 9\n";} else {print "not ok 9\n";} |
36 | if ($x =~ /.def/) {print "not ok 10\n";} else {print "ok 10\n";} |
37 | |
38 | if ($x =~ /\ndef/) {print "ok 11\n";} else {print "not ok 11\n";} |
39 | if ($x !~ /\ndef/) {print "not ok 12\n";} else {print "ok 12\n";} |
40 | |
41 | $_ = 'aaabbbccc'; |
42 | if (/(a*b*)(c*)/ && $1 eq 'aaabbb' && $2 eq 'ccc') { |
43 | print "ok 13\n"; |
44 | } else { |
45 | print "not ok 13\n"; |
46 | } |
47 | if (/(a+b+c+)/ && $1 eq 'aaabbbccc') { |
48 | print "ok 14\n"; |
49 | } else { |
50 | print "not ok 14\n"; |
51 | } |
52 | |
53 | if (/a+b?c+/) {print "not ok 15\n";} else {print "ok 15\n";} |
54 | |
55 | $_ = 'aaabccc'; |
56 | if (/a+b?c+/) {print "ok 16\n";} else {print "not ok 16\n";} |
57 | if (/a*b+c*/) {print "ok 17\n";} else {print "not ok 17\n";} |
58 | |
59 | $_ = 'aaaccc'; |
60 | if (/a*b?c*/) {print "ok 18\n";} else {print "not ok 18\n";} |
61 | if (/a*b+c*/) {print "not ok 19\n";} else {print "ok 19\n";} |
62 | |
63 | $_ = 'abcdef'; |
64 | if (/bcd|xyz/) {print "ok 20\n";} else {print "not ok 20\n";} |
65 | if (/xyz|bcd/) {print "ok 21\n";} else {print "not ok 21\n";} |
66 | |
67 | if (m|bc/*d|) {print "ok 22\n";} else {print "not ok 22\n";} |
378cc40b |
68 | |
69 | if (/^$_$/) {print "ok 23\n";} else {print "not ok 23\n";} |
70 | |
f02c194e |
71 | # used to be a test for $* |
72 | if ("ab\ncd\n" =~ /^cd/m) {print "ok 24\n";} else {print "not ok 24\n";} |
378cc40b |
73 | |
cb55de95 |
74 | $XXX{123} = 123; |
75 | $XXX{234} = 234; |
76 | $XXX{345} = 345; |
77 | |
78 | @XXX = ('ok 25','not ok 25', 'ok 26','not ok 26','not ok 27'); |
79 | while ($_ = shift(@XXX)) { |
80 | ?(.*)? && (print $1,"\n"); |
81 | /not/ && reset; |
82 | /not ok 26/ && reset 'X'; |
83 | } |
84 | |
85 | while (($key,$val) = each(%XXX)) { |
86 | print "not ok 27\n"; |
87 | exit; |
88 | } |
89 | |
90 | print "ok 27\n"; |
378cc40b |
91 | |
378cc40b |
92 | 'cde' =~ /[^ab]*/; |
93 | 'xyz' =~ //; |
94 | if ($& eq 'xyz') {print "ok 28\n";} else {print "not ok 28\n";} |
95 | |
96 | $foo = '[^ab]*'; |
97 | 'cde' =~ /$foo/; |
98 | 'xyz' =~ //; |
99 | if ($& eq 'xyz') {print "ok 29\n";} else {print "not ok 29\n";} |
100 | |
101 | $foo = '[^ab]*'; |
102 | 'cde' =~ /$foo/; |
103 | 'xyz' =~ /$null/; |
104 | if ($& eq 'xyz') {print "ok 30\n";} else {print "not ok 30\n";} |
a687059c |
105 | |
106 | $_ = 'abcdefghi'; |
107 | /def/; # optimized up to cmd |
108 | if ("$`:$&:$'" eq 'abc:def:ghi') {print "ok 31\n";} else {print "not ok 31\n";} |
109 | |
110 | /cde/ + 0; # optimized only to spat |
111 | if ("$`:$&:$'" eq 'ab:cde:fghi') {print "ok 32\n";} else {print "not ok 32\n";} |
112 | |
113 | /[d][e][f]/; # not optimized |
114 | if ("$`:$&:$'" eq 'abc:def:ghi') {print "ok 33\n";} else {print "not ok 33\n";} |
115 | |
116 | $_ = 'now is the {time for all} good men to come to.'; |
117 | / {([^}]*)}/; |
118 | if ($1 eq 'time for all') {print "ok 34\n";} else {print "not ok 34 $1\n";} |
119 | |
120 | $_ = 'xxx {3,4} yyy zzz'; |
121 | print /( {3,4})/ ? "ok 35\n" : "not ok 35\n"; |
122 | print $1 eq ' ' ? "ok 36\n" : "not ok 36\n"; |
123 | print /( {4,})/ ? "not ok 37\n" : "ok 37\n"; |
124 | print /( {2,3}.)/ ? "ok 38\n" : "not ok 38\n"; |
125 | print $1 eq ' y' ? "ok 39\n" : "not ok 39\n"; |
126 | print /(y{2,3}.)/ ? "ok 40\n" : "not ok 40\n"; |
127 | print $1 eq 'yyy ' ? "ok 41\n" : "not ok 41\n"; |
128 | print /x {3,4}/ ? "not ok 42\n" : "ok 42\n"; |
129 | print /^xxx {3,4}/ ? "not ok 43\n" : "ok 43\n"; |
352d5a3a |
130 | |
131 | $_ = "now is the time for all good men to come to."; |
132 | @words = /(\w+)/g; |
133 | print join(':',@words) eq "now:is:the:time:for:all:good:men:to:come:to" |
134 | ? "ok 44\n" |
135 | : "not ok 44\n"; |
136 | |
137 | @words = (); |
138 | while (/\w+/g) { |
139 | push(@words, $&); |
140 | } |
141 | print join(':',@words) eq "now:is:the:time:for:all:good:men:to:come:to" |
142 | ? "ok 45\n" |
143 | : "not ok 45\n"; |
144 | |
145 | @words = (); |
71be2cbc |
146 | pos = 0; |
352d5a3a |
147 | while (/to/g) { |
148 | push(@words, $&); |
149 | } |
150 | print join(':',@words) eq "to:to" |
151 | ? "ok 46\n" |
71be2cbc |
152 | : "not ok 46 `@words'\n"; |
352d5a3a |
153 | |
71be2cbc |
154 | pos $_ = 0; |
352d5a3a |
155 | @words = /to/g; |
156 | print join(':',@words) eq "to:to" |
157 | ? "ok 47\n" |
71be2cbc |
158 | : "not ok 47 `@words'\n"; |
352d5a3a |
159 | |
160 | $_ = "abcdefghi"; |
161 | |
162 | $pat1 = 'def'; |
163 | $pat2 = '^def'; |
164 | $pat3 = '.def.'; |
165 | $pat4 = 'abc'; |
166 | $pat5 = '^abc'; |
167 | $pat6 = 'abc$'; |
168 | $pat7 = 'ghi'; |
169 | $pat8 = '\w*ghi'; |
170 | $pat9 = 'ghi$'; |
171 | |
172 | $t1=$t2=$t3=$t4=$t5=$t6=$t7=$t8=$t9=0; |
173 | |
174 | for $iter (1..5) { |
175 | $t1++ if /$pat1/o; |
176 | $t2++ if /$pat2/o; |
177 | $t3++ if /$pat3/o; |
178 | $t4++ if /$pat4/o; |
179 | $t5++ if /$pat5/o; |
180 | $t6++ if /$pat6/o; |
181 | $t7++ if /$pat7/o; |
182 | $t8++ if /$pat8/o; |
183 | $t9++ if /$pat9/o; |
184 | } |
185 | |
186 | $x = "$t1$t2$t3$t4$t5$t6$t7$t8$t9"; |
187 | print $x eq '505550555' ? "ok 48\n" : "not ok 48 $x\n"; |
1462b684 |
188 | |
189 | $xyz = 'xyz'; |
190 | print "abc" =~ /^abc$|$xyz/ ? "ok 49\n" : "not ok 49\n"; |
191 | |
192 | # perl 4.009 says "unmatched ()" |
193 | eval '"abc" =~ /a(bc$)|$xyz/; $result = "$&:$1"'; |
194 | print $@ eq "" ? "ok 50\n" : "not ok 50\n"; |
195 | print $result eq "abc:bc" ? "ok 51\n" : "not ok 51\n"; |
a0d0e21e |
196 | |
197 | |
198 | $_="abcfooabcbar"; |
199 | $x=/abc/g; |
200 | print $` eq "" ? "ok 52\n" : "not ok 52\n" if $x; |
201 | $x=/abc/g; |
202 | print $` eq "abcfoo" ? "ok 53\n" : "not ok 53\n" if $x; |
203 | $x=/abc/g; |
204 | print $x == 0 ? "ok 54\n" : "not ok 54\n"; |
71be2cbc |
205 | pos = 0; |
a0d0e21e |
206 | $x=/ABC/gi; |
207 | print $` eq "" ? "ok 55\n" : "not ok 55\n" if $x; |
208 | $x=/ABC/gi; |
209 | print $` eq "abcfoo" ? "ok 56\n" : "not ok 56\n" if $x; |
210 | $x=/ABC/gi; |
211 | print $x == 0 ? "ok 57\n" : "not ok 57\n"; |
71be2cbc |
212 | pos = 0; |
a0d0e21e |
213 | $x=/abc/g; |
214 | print $' eq "fooabcbar" ? "ok 58\n" : "not ok 58\n" if $x; |
215 | $x=/abc/g; |
216 | print $' eq "bar" ? "ok 59\n" : "not ok 59\n" if $x; |
217 | $_ .= ''; |
218 | @x=/abc/g; |
219 | print scalar @x == 2 ? "ok 60\n" : "not ok 60\n"; |
71be2cbc |
220 | |
221 | $_ = "abdc"; |
222 | pos $_ = 2; |
c90c0ff4 |
223 | /\Gc/gc; |
71be2cbc |
224 | print "not " if (pos $_) != 2; |
225 | print "ok 61\n"; |
c90c0ff4 |
226 | /\Gc/g; |
227 | print "not " if defined pos $_; |
228 | print "ok 62\n"; |
c277df42 |
229 | |
230 | $out = 1; |
231 | 'abc' =~ m'a(?{ $out = 2 })b'; |
232 | print "not " if $out != 2; |
233 | print "ok 63\n"; |
234 | |
235 | $out = 1; |
236 | 'abc' =~ m'a(?{ $out = 3 })c'; |
237 | print "not " if $out != 1; |
459f542a |
238 | print "ok 64\n"; |
c277df42 |
239 | |
240 | $_ = 'foobar1 bar2 foobar3 barfoobar5 foobar6'; |
241 | @out = /(?<!foo)bar./g; |
242 | print "not " if "@out" ne 'bar2 barf'; |
243 | print "ok 65\n"; |
244 | |
8d37f932 |
245 | # Tests which depend on REG_INFTY |
246 | $reg_infty = defined $Config{reg_infty} ? $Config{reg_infty} : 32767; |
247 | $reg_infty_m = $reg_infty - 1; $reg_infty_p = $reg_infty + 1; |
248 | |
249 | # As well as failing if the pattern matches do unexpected things, the |
250 | # next three tests will fail if you should have picked up a lower-than- |
251 | # default value for $reg_infty from Config.pm, but have not. |
252 | |
253 | undef $@; |
254 | print "not " if eval q(('aaa' =~ /(a{1,$reg_infty_m})/)[0] ne 'aaa') || $@; |
255 | print "ok 66\n"; |
256 | |
257 | undef $@; |
258 | print "not " if eval q(('a' x $reg_infty_m) !~ /a{$reg_infty_m}/) || $@; |
259 | print "ok 67\n"; |
260 | |
261 | undef $@; |
262 | print "not " if eval q(('a' x ($reg_infty_m - 1)) =~ /a{$reg_infty_m}/) || $@; |
263 | print "ok 68\n"; |
264 | |
265 | undef $@; |
266 | eval "'aaa' =~ /a{1,$reg_infty}/"; |
9baa0206 |
267 | print "not " if $@ !~ m%^\QQuantifier in {,} bigger than%; |
8d37f932 |
268 | print "ok 69\n"; |
269 | |
270 | eval "'aaa' =~ /a{1,$reg_infty_p}/"; |
271 | print "not " |
9baa0206 |
272 | if $@ !~ m%^\QQuantifier in {,} bigger than%; |
8d37f932 |
273 | print "ok 70\n"; |
274 | undef $@; |
275 | |
276 | # Poke a couple more parse failures |
277 | |
278 | $context = 'x' x 256; |
279 | eval qq("${context}y" =~ /(?<=$context)y/); |
9baa0206 |
280 | print "not " if $@ !~ m%^\QLookbehind longer than 255 not%; |
8d37f932 |
281 | print "ok 71\n"; |
282 | |
b8c5462f |
283 | # removed test |
8d37f932 |
284 | print "ok 72\n"; |
285 | |
c277df42 |
286 | # Long Monsters |
8d37f932 |
287 | $test = 73; |
c277df42 |
288 | for $l (125, 140, 250, 270, 300000, 30) { # Ordered to free memory |
289 | $a = 'a' x $l; |
290 | print "# length=$l\nnot " unless "ba$a=" =~ /a$a=/; |
291 | print "ok $test\n"; |
292 | $test++; |
73d6d589 |
293 | |
c277df42 |
294 | print "not " if "b$a=" =~ /a$a=/; |
295 | print "ok $test\n"; |
296 | $test++; |
297 | } |
298 | |
299 | # 20000 nodes, each taking 3 words per string, and 1 per branch |
300 | $long_constant_len = join '|', 12120 .. 32645; |
301 | $long_var_len = join '|', 8120 .. 28645; |
302 | %ans = ( 'ax13876y25677lbc' => 1, |
303 | 'ax13876y25677mcb' => 0, # not b. |
304 | 'ax13876y35677nbc' => 0, # Num too big |
305 | 'ax13876y25677y21378obc' => 1, |
306 | 'ax13876y25677y21378zbc' => 0, # Not followed by [k-o] |
307 | 'ax13876y25677y21378y21378kbc' => 1, |
308 | 'ax13876y25677y21378y21378kcb' => 0, # Not b. |
309 | 'ax13876y25677y21378y21378y21378kbc' => 0, # 5 runs |
310 | ); |
311 | |
312 | for ( keys %ans ) { |
73d6d589 |
313 | print "# const-len `$_' not => $ans{$_}\nnot " |
c277df42 |
314 | if $ans{$_} xor /a(?=([yx]($long_constant_len)){2,4}[k-o]).*b./o; |
315 | print "ok $test\n"; |
316 | $test++; |
73d6d589 |
317 | print "# var-len `$_' not => $ans{$_}\nnot " |
c277df42 |
318 | if $ans{$_} xor /a(?=([yx]($long_var_len)){2,4}[k-o]).*b./o; |
319 | print "ok $test\n"; |
320 | $test++; |
321 | } |
322 | |
323 | $_ = " a (bla()) and x(y b((l)u((e))) and b(l(e)e)e"; |
324 | $expect = "(bla()) ((l)u((e))) (l(e)e)"; |
325 | |
73d6d589 |
326 | sub matchit { |
cc6b7395 |
327 | m/ |
c277df42 |
328 | ( |
73d6d589 |
329 | \( |
c277df42 |
330 | (?{ $c = 1 }) # Initialize |
331 | (?: |
332 | (?(?{ $c == 0 }) # PREVIOUS iteration was OK, stop the loop |
333 | (?! |
334 | ) # Fail: will unwind one iteration back |
73d6d589 |
335 | ) |
c277df42 |
336 | (?: |
337 | [^()]+ # Match a big chunk |
338 | (?= |
339 | [()] |
340 | ) # Do not try to match subchunks |
341 | | |
73d6d589 |
342 | \( |
c277df42 |
343 | (?{ ++$c }) |
344 | | |
73d6d589 |
345 | \) |
c277df42 |
346 | (?{ --$c }) |
347 | ) |
348 | )+ # This may not match with different subblocks |
349 | ) |
350 | (?(?{ $c != 0 }) |
351 | (?! |
352 | ) # Fail |
353 | ) # Otherwise the chunk 1 may succeed with $c>0 |
cc6b7395 |
354 | /xg; |
c277df42 |
355 | } |
356 | |
0f5d15d6 |
357 | @ans = (); |
c277df42 |
358 | push @ans, $res while $res = matchit; |
359 | |
360 | print "# ans='@ans'\n# expect='$expect'\nnot " if "@ans" ne "1 1 1"; |
361 | print "ok $test\n"; |
362 | $test++; |
363 | |
364 | @ans = matchit; |
365 | |
366 | print "# ans='@ans'\n# expect='$expect'\nnot " if "@ans" ne $expect; |
367 | print "ok $test\n"; |
368 | $test++; |
369 | |
96776eda |
370 | print "not " unless "abc" =~ /^(??{"a"})b/; |
371 | print "ok $test\n"; |
372 | $test++; |
373 | |
0f5d15d6 |
374 | my $matched; |
14455d6c |
375 | $matched = qr/\((?:(?>[^()]+)|(??{$matched}))*\)/; |
0f5d15d6 |
376 | |
377 | @ans = @ans1 = (); |
378 | push(@ans, $res), push(@ans1, $&) while $res = m/$matched/g; |
379 | |
380 | print "# ans='@ans'\n# expect='$expect'\nnot " if "@ans" ne "1 1 1"; |
381 | print "ok $test\n"; |
382 | $test++; |
383 | |
384 | print "# ans1='@ans1'\n# expect='$expect'\nnot " if "@ans1" ne $expect; |
385 | print "ok $test\n"; |
386 | $test++; |
387 | |
388 | @ans = m/$matched/g; |
389 | |
390 | print "# ans='@ans'\n# expect='$expect'\nnot " if "@ans" ne $expect; |
391 | print "ok $test\n"; |
392 | $test++; |
393 | |
c277df42 |
394 | @ans = ('a/b' =~ m%(.*/)?(.*)%); # Stack may be bad |
395 | print "not " if "@ans" ne 'a/ b'; |
396 | print "ok $test\n"; |
397 | $test++; |
398 | |
cc6b7395 |
399 | $code = '{$blah = 45}'; |
c277df42 |
400 | $blah = 12; |
2cd61cdb |
401 | eval { /(?$code)/ }; |
402 | print "not " unless $@ and $@ =~ /not allowed at runtime/ and $blah == 12; |
e4d48cc9 |
403 | print "ok $test\n"; |
404 | $test++; |
405 | |
2cd61cdb |
406 | for $code ('{$blah = 45}','=xx') { |
407 | $blah = 12; |
408 | $res = eval { "xx" =~ /(?$code)/o }; |
409 | if ($code eq '=xx') { |
410 | print "#'$@','$res','$blah'\nnot " unless not $@ and $res; |
411 | } else { |
73d6d589 |
412 | print "#'$@','$res','$blah'\nnot " unless $@ and $@ =~ /not allowed at runtime/ and $blah == 12; |
2cd61cdb |
413 | } |
414 | print "ok $test\n"; |
415 | $test++; |
416 | } |
417 | |
e4d48cc9 |
418 | $code = '{$blah = 45}'; |
419 | $blah = 12; |
420 | eval "/(?$code)/"; |
cc6b7395 |
421 | print "not " if $blah != 45; |
422 | print "ok $test\n"; |
423 | $test++; |
424 | |
425 | $blah = 12; |
426 | /(?{$blah = 45})/; |
c277df42 |
427 | print "not " if $blah != 45; |
428 | print "ok $test\n"; |
429 | $test++; |
430 | |
74d6a13a |
431 | $x = 'banana'; |
432 | $x =~ /.a/g; |
433 | print "not " unless pos($x) == 2; |
434 | print "ok $test\n"; |
435 | $test++; |
436 | |
437 | $x =~ /.z/gc; |
438 | print "not " unless pos($x) == 2; |
439 | print "ok $test\n"; |
440 | $test++; |
441 | |
442 | sub f { |
443 | my $p = $_[0]; |
444 | return $p; |
445 | } |
446 | |
447 | $x =~ /.a/g; |
448 | print "not " unless f(pos($x)) == 4; |
449 | print "ok $test\n"; |
450 | $test++; |
4599a1de |
451 | |
ce862d02 |
452 | $x = $^R = 67; |
453 | 'foot' =~ /foo(?{$x = 12; 75})[t]/; |
454 | print "not " unless $^R eq '75'; |
455 | print "ok $test\n"; |
456 | $test++; |
457 | |
458 | $x = $^R = 67; |
459 | 'foot' =~ /foo(?{$x = 12; 75})[xy]/; |
460 | print "not " unless $^R eq '67' and $x eq '12'; |
461 | print "ok $test\n"; |
462 | $test++; |
463 | |
464 | $x = $^R = 67; |
465 | 'foot' =~ /foo(?{ $^R + 12 })((?{ $x = 12; $^R + 17 })[xy])?/; |
466 | print "not " unless $^R eq '79' and $x eq '12'; |
467 | print "ok $test\n"; |
468 | $test++; |
469 | |
8782bef2 |
470 | print "not " unless qr/\b\v$/i eq '(?i-xsm:\bv$)'; |
471 | print "ok $test\n"; |
472 | $test++; |
473 | |
474 | print "not " unless qr/\b\v$/s eq '(?s-xim:\bv$)'; |
475 | print "ok $test\n"; |
476 | $test++; |
477 | |
478 | print "not " unless qr/\b\v$/m eq '(?m-xis:\bv$)'; |
479 | print "ok $test\n"; |
480 | $test++; |
481 | |
482 | print "not " unless qr/\b\v$/x eq '(?x-ism:\bv$)'; |
483 | print "ok $test\n"; |
484 | $test++; |
485 | |
486 | print "not " unless qr/\b\v$/xism eq '(?msix:\bv$)'; |
487 | print "ok $test\n"; |
488 | $test++; |
489 | |
490 | print "not " unless qr/\b\v$/ eq '(?-xism:\bv$)'; |
97197631 |
491 | print "ok $test\n"; |
492 | $test++; |
493 | |
7e5428c5 |
494 | $_ = 'xabcx'; |
495 | foreach $ans ('', 'c') { |
496 | /(?<=(?=a)..)((?=c)|.)/g; |
02db2b7b |
497 | print "# \$1 ='$1'\n# \$ans='$ans'\nnot " unless $1 eq $ans; |
7e5428c5 |
498 | print "ok $test\n"; |
499 | $test++; |
500 | } |
501 | |
502 | $_ = 'a'; |
503 | foreach $ans ('', 'a', '') { |
504 | /^|a|$/g; |
02db2b7b |
505 | print "# \$& ='$&'\n# \$ans='$ans'\nnot " unless $& eq $ans; |
7e5428c5 |
506 | print "ok $test\n"; |
507 | $test++; |
508 | } |
509 | |
09f25ae4 |
510 | sub prefixify { |
73d6d589 |
511 | my($v,$a,$b,$res) = @_; |
512 | $v =~ s/\Q$a\E/$b/; |
513 | print "not " unless $res eq $v; |
09f25ae4 |
514 | print "ok $test\n"; |
515 | $test++; |
516 | } |
517 | prefixify('/a/b/lib/arch', "/a/b/lib", 'X/lib', 'X/lib/arch'); |
518 | prefixify('/a/b/man/arch', "/a/b/man", 'X/man', 'X/man/arch'); |
519 | |
520 | $_ = 'var="foo"'; |
521 | /(\")/; |
522 | print "not " unless $1 and /$1/; |
523 | print "ok $test\n"; |
524 | $test++; |
525 | |
73d6d589 |
526 | $a=qr/(?{++$b})/; |
2cd61cdb |
527 | $b = 7; |
73d6d589 |
528 | /$a$a/; |
529 | print "not " unless $b eq '9'; |
2cd61cdb |
530 | print "ok $test\n"; |
531 | $test++; |
532 | |
73d6d589 |
533 | $c="$a"; |
534 | /$a$a/; |
535 | print "not " unless $b eq '11'; |
2cd61cdb |
536 | print "ok $test\n"; |
537 | $test++; |
538 | |
539 | { |
73d6d589 |
540 | use re "eval"; |
541 | /$a$c$a/; |
542 | print "not " unless $b eq '14'; |
2cd61cdb |
543 | print "ok $test\n"; |
544 | $test++; |
545 | |
160cb429 |
546 | local $lex_a = 2; |
547 | my $lex_a = 43; |
548 | my $lex_b = 17; |
549 | my $lex_c = 27; |
550 | my $lex_res = ($lex_b =~ qr/$lex_b(?{ $lex_c = $lex_a++ })/); |
551 | print "not " unless $lex_res eq '1'; |
552 | print "ok $test\n"; |
553 | $test++; |
554 | print "not " unless $lex_a eq '44'; |
555 | print "ok $test\n"; |
556 | $test++; |
557 | print "not " unless $lex_c eq '43'; |
558 | print "ok $test\n"; |
559 | $test++; |
560 | |
561 | |
73d6d589 |
562 | no re "eval"; |
2cd61cdb |
563 | $match = eval { /$a$c$a/ }; |
73d6d589 |
564 | print "not " |
2cd61cdb |
565 | unless $b eq '14' and $@ =~ /Eval-group not allowed/ and not $match; |
566 | print "ok $test\n"; |
567 | $test++; |
568 | } |
cbce877f |
569 | |
570 | { |
160cb429 |
571 | local $lex_a = 2; |
572 | my $lex_a = 43; |
573 | my $lex_b = 17; |
574 | my $lex_c = 27; |
575 | my $lex_res = ($lex_b =~ qr/17(?{ $lex_c = $lex_a++ })/); |
576 | print "not " unless $lex_res eq '1'; |
577 | print "ok $test\n"; |
578 | $test++; |
579 | print "not " unless $lex_a eq '44'; |
580 | print "ok $test\n"; |
581 | $test++; |
582 | print "not " unless $lex_c eq '43'; |
583 | print "ok $test\n"; |
584 | $test++; |
585 | } |
586 | |
587 | { |
cbce877f |
588 | package aa; |
589 | $c = 2; |
590 | $::c = 3; |
591 | '' =~ /(?{ $c = 4 })/; |
592 | print "not " unless $c == 4; |
593 | } |
594 | print "ok $test\n"; |
595 | $test++; |
596 | print "not " unless $c == 3; |
597 | print "ok $test\n"; |
73d6d589 |
598 | $test++; |
599 | |
4599a1de |
600 | sub must_warn_pat { |
601 | my $warn_pat = shift; |
602 | return sub { print "not " unless $_[0] =~ /$warn_pat/ } |
603 | } |
604 | |
605 | sub must_warn { |
606 | my ($warn_pat, $code) = @_; |
9f1b1f2d |
607 | local %SIG; |
608 | eval 'BEGIN { use warnings; $SIG{__WARN__} = $warn_pat };' . $code; |
4599a1de |
609 | print "ok $test\n"; |
610 | $test++; |
611 | } |
612 | |
613 | |
614 | sub make_must_warn { |
615 | my $warn_pat = shift; |
616 | return sub { must_warn(must_warn_pat($warn_pat)) } |
617 | } |
618 | |
619 | my $for_future = make_must_warn('reserved for future extensions'); |
620 | |
621 | &$for_future('q(a:[b]:) =~ /[x[:foo:]]/'); |
9baa0206 |
622 | |
623 | #&$for_future('q(a=[b]=) =~ /[x[=foo=]]/'); |
624 | print "ok $test\n"; $test++; # now a fatal croak |
625 | |
626 | #&$for_future('q(a.[b].) =~ /[x[.foo.]]/'); |
627 | print "ok $test\n"; $test++; # now a fatal croak |
f7e33566 |
628 | |
629 | # test if failure of patterns returns empty list |
630 | $_ = 'aaa'; |
631 | @_ = /bbb/; |
632 | print "not " if @_; |
633 | print "ok $test\n"; |
634 | $test++; |
635 | |
636 | @_ = /bbb/g; |
637 | print "not " if @_; |
638 | print "ok $test\n"; |
639 | $test++; |
640 | |
641 | @_ = /(bbb)/; |
642 | print "not " if @_; |
643 | print "ok $test\n"; |
644 | $test++; |
645 | |
646 | @_ = /(bbb)/g; |
647 | print "not " if @_; |
648 | print "ok $test\n"; |
649 | $test++; |
650 | |
6cef1e77 |
651 | /a(?=.$)/; |
652 | print "not " if $#+ != 0 or $#- != 0; |
653 | print "ok $test\n"; |
654 | $test++; |
655 | |
656 | print "not " if $+[0] != 2 or $-[0] != 1; |
657 | print "ok $test\n"; |
658 | $test++; |
659 | |
73d6d589 |
660 | print "not " |
6cef1e77 |
661 | if defined $+[1] or defined $-[1] or defined $+[2] or defined $-[2]; |
662 | print "ok $test\n"; |
663 | $test++; |
664 | |
665 | /a(a)(a)/; |
666 | print "not " if $#+ != 2 or $#- != 2; |
667 | print "ok $test\n"; |
668 | $test++; |
669 | |
670 | print "not " if $+[0] != 3 or $-[0] != 0; |
671 | print "ok $test\n"; |
672 | $test++; |
673 | |
674 | print "not " if $+[1] != 2 or $-[1] != 1; |
675 | print "ok $test\n"; |
676 | $test++; |
677 | |
678 | print "not " if $+[2] != 3 or $-[2] != 2; |
679 | print "ok $test\n"; |
680 | $test++; |
681 | |
73d6d589 |
682 | print "not " |
6cef1e77 |
683 | if defined $+[3] or defined $-[3] or defined $+[4] or defined $-[4]; |
684 | print "ok $test\n"; |
685 | $test++; |
686 | |
687 | /.(a)(b)?(a)/; |
688 | print "not " if $#+ != 3 or $#- != 3; |
689 | print "ok $test\n"; |
690 | $test++; |
691 | |
692 | print "not " if $+[0] != 3 or $-[0] != 0; |
693 | print "ok $test\n"; |
694 | $test++; |
695 | |
696 | print "not " if $+[1] != 2 or $-[1] != 1; |
697 | print "ok $test\n"; |
698 | $test++; |
699 | |
700 | print "not " if $+[3] != 3 or $-[3] != 2; |
701 | print "ok $test\n"; |
702 | $test++; |
703 | |
73d6d589 |
704 | print "not " |
6cef1e77 |
705 | if defined $+[2] or defined $-[2] or defined $+[4] or defined $-[4]; |
706 | print "ok $test\n"; |
707 | $test++; |
708 | |
709 | /.(a)/; |
710 | print "not " if $#+ != 1 or $#- != 1; |
711 | print "ok $test\n"; |
712 | $test++; |
713 | |
714 | print "not " if $+[0] != 2 or $-[0] != 0; |
715 | print "ok $test\n"; |
716 | $test++; |
717 | |
718 | print "not " if $+[1] != 2 or $-[1] != 1; |
719 | print "ok $test\n"; |
720 | $test++; |
721 | |
73d6d589 |
722 | print "not " |
6cef1e77 |
723 | if defined $+[2] or defined $-[2] or defined $+[3] or defined $-[3]; |
724 | print "ok $test\n"; |
725 | $test++; |
726 | |
03a27ae7 |
727 | eval { $+[0] = 13; }; |
73d6d589 |
728 | print "not " |
03a27ae7 |
729 | if $@ !~ /^Modification of a read-only value attempted/; |
730 | print "ok $test\n"; |
731 | $test++; |
732 | |
733 | eval { $-[0] = 13; }; |
73d6d589 |
734 | print "not " |
03a27ae7 |
735 | if $@ !~ /^Modification of a read-only value attempted/; |
736 | print "ok $test\n"; |
737 | $test++; |
738 | |
739 | eval { @+ = (7, 6, 5); }; |
73d6d589 |
740 | print "not " |
03a27ae7 |
741 | if $@ !~ /^Modification of a read-only value attempted/; |
742 | print "ok $test\n"; |
743 | $test++; |
744 | |
745 | eval { @- = qw(foo bar); }; |
73d6d589 |
746 | print "not " |
03a27ae7 |
747 | if $@ !~ /^Modification of a read-only value attempted/; |
748 | print "ok $test\n"; |
749 | $test++; |
750 | |
8f580fb8 |
751 | /.(a)(ba*)?/; |
752 | print "#$#-..$#+\nnot " if $#+ != 2 or $#- != 1; |
753 | print "ok $test\n"; |
754 | $test++; |
755 | |
ad94a511 |
756 | $_ = 'aaa'; |
757 | pos = 1; |
758 | @a = /\Ga/g; |
759 | print "not " unless "@a" eq "a a"; |
760 | print "ok $test\n"; |
761 | $test++; |
762 | |
22e551b9 |
763 | $str = 'abcde'; |
764 | pos $str = 2; |
765 | |
766 | print "not " if $str =~ /^\G/; |
767 | print "ok $test\n"; |
768 | $test++; |
769 | |
770 | print "not " if $str =~ /^.\G/; |
771 | print "ok $test\n"; |
772 | $test++; |
773 | |
774 | print "not " unless $str =~ /^..\G/; |
775 | print "ok $test\n"; |
776 | $test++; |
777 | |
778 | print "not " if $str =~ /^...\G/; |
779 | print "ok $test\n"; |
780 | $test++; |
781 | |
782 | print "not " unless $str =~ /.\G./ and $& eq 'bc'; |
783 | print "ok $test\n"; |
784 | $test++; |
785 | |
786 | print "not " unless $str =~ /\G../ and $& eq 'cd'; |
787 | print "ok $test\n"; |
788 | $test++; |
789 | |
9661b544 |
790 | undef $foo; undef $bar; |
791 | print "#'$str','$foo','$bar'\nnot " |
73d6d589 |
792 | unless $str =~ /b(?{$foo = $_; $bar = pos})c/ |
9661b544 |
793 | and $foo eq 'abcde' and $bar eq 2; |
794 | print "ok $test\n"; |
795 | $test++; |
796 | |
797 | undef $foo; undef $bar; |
798 | pos $str = undef; |
799 | print "#'$str','$foo','$bar'\nnot " |
73d6d589 |
800 | unless $str =~ /b(?{$foo = $_; $bar = pos})c/g |
9661b544 |
801 | and $foo eq 'abcde' and $bar eq 2 and pos $str eq 3; |
802 | print "ok $test\n"; |
803 | $test++; |
804 | |
805 | $_ = $str; |
806 | |
807 | undef $foo; undef $bar; |
808 | print "#'$str','$foo','$bar'\nnot " |
73d6d589 |
809 | unless /b(?{$foo = $_; $bar = pos})c/ |
9661b544 |
810 | and $foo eq 'abcde' and $bar eq 2; |
811 | print "ok $test\n"; |
812 | $test++; |
813 | |
814 | undef $foo; undef $bar; |
815 | print "#'$str','$foo','$bar'\nnot " |
73d6d589 |
816 | unless /b(?{$foo = $_; $bar = pos})c/g |
9661b544 |
817 | and $foo eq 'abcde' and $bar eq 2 and pos eq 3; |
818 | print "ok $test\n"; |
819 | $test++; |
820 | |
821 | undef $foo; undef $bar; |
822 | pos = undef; |
823 | 1 while /b(?{$foo = $_; $bar = pos})c/g; |
824 | print "#'$str','$foo','$bar'\nnot " |
825 | unless $foo eq 'abcde' and $bar eq 2 and not defined pos; |
826 | print "ok $test\n"; |
827 | $test++; |
828 | |
829 | undef $foo; undef $bar; |
830 | $_ = 'abcde|abcde'; |
831 | print "#'$str','$foo','$bar','$_'\nnot " |
73d6d589 |
832 | unless s/b(?{$foo = $_; $bar = pos})c/x/g and $foo eq 'abcde|abcde' |
9661b544 |
833 | and $bar eq 8 and $_ eq 'axde|axde'; |
834 | print "ok $test\n"; |
835 | $test++; |
836 | |
5c5e4c24 |
837 | @res = (); |
838 | # List context: |
839 | $_ = 'abcde|abcde'; |
840 | @dummy = /([ace]).(?{push @res, $1,$2})([ce])(?{push @res, $1,$2})/g; |
841 | @res = map {defined $_ ? "'$_'" : 'undef'} @res; |
842 | $res = "@res"; |
843 | print "#'@res' '$_'\nnot " |
844 | unless "@res" eq "'a' undef 'a' 'c' 'e' undef 'a' undef 'a' 'c'"; |
845 | print "ok $test\n"; |
846 | $test++; |
847 | |
848 | @res = (); |
849 | @dummy = /([ace]).(?{push @res, $`,$&,$'})([ce])(?{push @res, $`,$&,$'})/g; |
850 | @res = map {defined $_ ? "'$_'" : 'undef'} @res; |
851 | $res = "@res"; |
852 | print "#'@res' '$_'\nnot " |
853 | unless "@res" eq |
854 | "'' 'ab' 'cde|abcde' " . |
855 | "'' 'abc' 'de|abcde' " . |
856 | "'abcd' 'e|' 'abcde' " . |
857 | "'abcde|' 'ab' 'cde' " . |
858 | "'abcde|' 'abc' 'de'" ; |
859 | print "ok $test\n"; |
860 | $test++; |
861 | |
b7a35066 |
862 | #Some more \G anchor checks |
863 | $foo='aabbccddeeffgg'; |
864 | |
865 | pos($foo)=1; |
866 | |
867 | $foo=~/.\G(..)/g; |
868 | print "not " unless($1 eq 'ab'); |
869 | print "ok $test\n"; |
870 | $test++; |
871 | |
872 | pos($foo) += 1; |
873 | $foo=~/.\G(..)/g; |
874 | print "not " unless($1 eq 'cc'); |
875 | print "ok $test\n"; |
876 | $test++; |
877 | |
878 | pos($foo) += 1; |
879 | $foo=~/.\G(..)/g; |
880 | print "not " unless($1 eq 'de'); |
881 | print "ok $test\n"; |
882 | $test++; |
883 | |
0ef3e39e |
884 | print "not " unless $foo =~ /\Gef/g; |
885 | print "ok $test\n"; |
886 | $test++; |
887 | |
b7a35066 |
888 | undef pos $foo; |
889 | |
890 | $foo=~/\G(..)/g; |
891 | print "not " unless($1 eq 'aa'); |
892 | print "ok $test\n"; |
893 | $test++; |
894 | |
895 | $foo=~/\G(..)/g; |
896 | print "not " unless($1 eq 'bb'); |
897 | print "ok $test\n"; |
898 | $test++; |
899 | |
900 | pos($foo)=5; |
901 | $foo=~/\G(..)/g; |
902 | print "not " unless($1 eq 'cd'); |
903 | print "ok $test\n"; |
904 | $test++; |
905 | |
73d6d589 |
906 | $_='123x123'; |
e60df1fa |
907 | @res = /(\d*|x)/g; |
908 | print "not " unless('123||x|123|' eq join '|', @res); |
909 | print "ok $test\n"; |
910 | $test++; |
911 | |
9d080a66 |
912 | # see if matching against temporaries (created via pp_helem()) is safe |
913 | { foo => "ok $test\n".$^X }->{foo} =~ /^(.*)\n/g; |
914 | print "$1\n"; |
915 | $test++; |
916 | |
cf93c79d |
917 | # See if $i work inside (?{}) in the presense of saved substrings and |
918 | # changing $_ |
919 | @a = qw(foo bar); |
920 | @b = (); |
921 | s/(\w)(?{push @b, $1})/,$1,/g for @a; |
922 | |
923 | print "# \@b='@b', expect 'f o o b a r'\nnot " unless("@b" eq "f o o b a r"); |
924 | print "ok $test\n"; |
925 | $test++; |
926 | |
927 | print "not " unless("@a" eq ",f,,o,,o, ,b,,a,,r,"); |
928 | print "ok $test\n"; |
929 | $test++; |
930 | |
2c914db6 |
931 | $brackets = qr{ |
14455d6c |
932 | { (?> [^{}]+ | (??{ $brackets }) )* } |
2c914db6 |
933 | }x; |
934 | |
935 | "{{}" =~ $brackets; |
936 | print "ok $test\n"; # Did we survive? |
937 | $test++; |
938 | |
939 | "something { long { and } hairy" =~ $brackets; |
940 | print "ok $test\n"; # Did we survive? |
941 | $test++; |
942 | |
14455d6c |
943 | "something { long { and } hairy" =~ m/((??{ $brackets }))/; |
2c914db6 |
944 | print "not " unless $1 eq "{ and }"; |
945 | print "ok $test\n"; |
946 | $test++; |
947 | |
30944b6d |
948 | $_ = "a-a\nxbb"; |
949 | pos=1; |
950 | m/^-.*bb/mg and print "not "; |
951 | print "ok $test\n"; |
952 | $test++; |
30382c73 |
953 | |
954 | $text = "aaXbXcc"; |
955 | pos($text)=0; |
956 | $text =~ /\GXb*X/g and print 'not '; |
957 | print "ok $test\n"; |
958 | $test++; |
3cf5c195 |
959 | |
960 | $text = "xA\n" x 500; |
961 | $text =~ /^\s*A/m and print 'not '; |
962 | print "ok $test\n"; |
963 | $test++; |
d506a20d |
964 | |
965 | $text = "abc dbf"; |
966 | @res = ($text =~ /.*?(b).*?\b/g); |
967 | "@res" eq 'b b' or print 'not '; |
968 | print "ok $test\n"; |
969 | $test++; |
970 | |
9442cb0e |
971 | @a = map chr,0..255; |
aeaf5620 |
972 | |
973 | @b = grep(/\S/,@a); |
974 | @c = grep(/[^\s]/,@a); |
975 | print "not " if "@b" ne "@c"; |
9442cb0e |
976 | print "ok $test\n"; |
977 | $test++; |
978 | |
aeaf5620 |
979 | @b = grep(/\S/,@a); |
980 | @c = grep(/[\S]/,@a); |
981 | print "not " if "@b" ne "@c"; |
9442cb0e |
982 | print "ok $test\n"; |
983 | $test++; |
984 | |
aeaf5620 |
985 | @b = grep(/\s/,@a); |
986 | @c = grep(/[^\S]/,@a); |
987 | print "not " if "@b" ne "@c"; |
9442cb0e |
988 | print "ok $test\n"; |
989 | $test++; |
990 | |
aeaf5620 |
991 | @b = grep(/\s/,@a); |
992 | @c = grep(/[\s]/,@a); |
993 | print "not " if "@b" ne "@c"; |
9442cb0e |
994 | print "ok $test\n"; |
995 | $test++; |
996 | |
aeaf5620 |
997 | @b = grep(/\D/,@a); |
998 | @c = grep(/[^\d]/,@a); |
999 | print "not " if "@b" ne "@c"; |
9442cb0e |
1000 | print "ok $test\n"; |
1001 | $test++; |
1002 | |
aeaf5620 |
1003 | @b = grep(/\D/,@a); |
1004 | @c = grep(/[\D]/,@a); |
1005 | print "not " if "@b" ne "@c"; |
9442cb0e |
1006 | print "ok $test\n"; |
1007 | $test++; |
1008 | |
aeaf5620 |
1009 | @b = grep(/\d/,@a); |
1010 | @c = grep(/[^\D]/,@a); |
1011 | print "not " if "@b" ne "@c"; |
9442cb0e |
1012 | print "ok $test\n"; |
1013 | $test++; |
1014 | |
aeaf5620 |
1015 | @b = grep(/\d/,@a); |
1016 | @c = grep(/[\d]/,@a); |
1017 | print "not " if "@b" ne "@c"; |
9442cb0e |
1018 | print "ok $test\n"; |
1019 | $test++; |
1020 | |
aeaf5620 |
1021 | @b = grep(/\W/,@a); |
1022 | @c = grep(/[^\w]/,@a); |
1023 | print "not " if "@b" ne "@c"; |
9442cb0e |
1024 | print "ok $test\n"; |
1025 | $test++; |
1026 | |
aeaf5620 |
1027 | @b = grep(/\W/,@a); |
1028 | @c = grep(/[\W]/,@a); |
1029 | print "not " if "@b" ne "@c"; |
9442cb0e |
1030 | print "ok $test\n"; |
1031 | $test++; |
1032 | |
aeaf5620 |
1033 | @b = grep(/\w/,@a); |
1034 | @c = grep(/[^\W]/,@a); |
1035 | print "not " if "@b" ne "@c"; |
9442cb0e |
1036 | print "ok $test\n"; |
1037 | $test++; |
1038 | |
aeaf5620 |
1039 | @b = grep(/\w/,@a); |
1040 | @c = grep(/[\w]/,@a); |
1041 | print "not " if "@b" ne "@c"; |
9442cb0e |
1042 | print "ok $test\n"; |
1043 | $test++; |
1aeab75a |
1044 | |
1045 | # see if backtracking optimization works correctly |
1046 | "\n\n" =~ /\n $ \n/x or print "not "; |
1047 | print "ok $test\n"; |
1048 | $test++; |
1049 | |
1050 | "\n\n" =~ /\n* $ \n/x or print "not "; |
1051 | print "ok $test\n"; |
1052 | $test++; |
1053 | |
1054 | "\n\n" =~ /\n+ $ \n/x or print "not "; |
1055 | print "ok $test\n"; |
1056 | $test++; |
05b4157f |
1057 | |
1058 | [] =~ /^ARRAY/ or print "# [] \nnot "; |
1059 | print "ok $test\n"; |
1060 | $test++; |
1061 | |
1062 | eval << 'EOE'; |
1063 | { |
1064 | package S; |
1065 | use overload '""' => sub { 'Object S' }; |
1066 | sub new { bless [] } |
1067 | } |
1068 | $a = 'S'->new; |
1069 | EOE |
1070 | |
1071 | $a and $a =~ /^Object\sS/ or print "# '$a' \nnot "; |
1072 | print "ok $test\n"; |
1073 | $test++; |
815d35b9 |
1074 | |
1075 | # test result of match used as match (!) |
1076 | 'a1b' =~ ('xyz' =~ /y/) and $` eq 'a' or print "not "; |
1077 | print "ok $test\n"; |
1078 | $test++; |
1079 | |
1080 | 'a1b' =~ ('xyz' =~ /t/) and $` eq 'a' or print "not "; |
1081 | print "ok $test\n"; |
1082 | $test++; |
5e39e1e5 |
1083 | |
1084 | $w = 0; |
1085 | { |
1086 | local $SIG{__WARN__} = sub { $w = 1 }; |
1087 | local $^W = 1; |
1088 | $w = 1 if ("1\n" x 102) =~ /^\s*\n/m; |
1089 | } |
1090 | print $w ? "not " : "", "ok $test\n"; |
1091 | $test++; |
aaa51d5e |
1092 | |
1093 | my %space = ( spc => " ", |
1094 | tab => "\t", |
1095 | cr => "\r", |
1096 | lf => "\n", |
1097 | ff => "\f", |
75369ccb |
1098 | # There's no \v but the vertical tabulator seems miraculously |
1099 | # be 11 both in ASCII and EBCDIC. |
aaa51d5e |
1100 | vt => chr(11), |
1101 | false => "space" ); |
1102 | |
1103 | my @space0 = sort grep { $space{$_} =~ /\s/ } keys %space; |
1104 | my @space1 = sort grep { $space{$_} =~ /[[:space:]]/ } keys %space; |
1105 | my @space2 = sort grep { $space{$_} =~ /[[:blank:]]/ } keys %space; |
1106 | |
1107 | print "not " unless "@space0" eq "cr ff lf spc tab"; |
3bec3564 |
1108 | print "ok $test # @space0\n"; |
aaa51d5e |
1109 | $test++; |
1110 | |
1111 | print "not " unless "@space1" eq "cr ff lf spc tab vt"; |
3bec3564 |
1112 | print "ok $test # @space1\n"; |
aaa51d5e |
1113 | $test++; |
1114 | |
1115 | print "not " unless "@space2" eq "spc tab"; |
3bec3564 |
1116 | print "ok $test # @space2\n"; |
aaa51d5e |
1117 | $test++; |
73d6d589 |
1118 | |
a1933d95 |
1119 | # bugid 20001021.005 - this caused a SEGV |
1120 | print "not " unless undef =~ /^([^\/]*)(.*)$/; |
1121 | print "ok $test\n"; |
1122 | $test++; |
b91bb191 |
1123 | |
1124 | # bugid 20000731.001 |
1125 | |
1126 | print "not " unless "A \x{263a} B z C" =~ /A . B (??{ "z" }) C/; |
1127 | print "ok $test\n"; |
1128 | $test++; |
1129 | |
5ae032e5 |
1130 | my $ordA = ord('A'); |
1131 | |
3baa4c62 |
1132 | $_ = "a\x{100}b"; |
1133 | if (/(.)(\C)(\C)(.)/) { |
1134 | print "ok 232\n"; |
1135 | if ($1 eq "a") { |
1136 | print "ok 233\n"; |
1137 | } else { |
1138 | print "not ok 233\n"; |
1139 | } |
5ae032e5 |
1140 | if ($ordA == 65) { # ASCII (or equivalent), should be UTF-8 |
1141 | if ($2 eq "\xC4") { |
1142 | print "ok 234\n"; |
1143 | } else { |
1144 | print "not ok 234\n"; |
1145 | } |
1146 | if ($3 eq "\x80") { |
1147 | print "ok 235\n"; |
1148 | } else { |
1149 | print "not ok 235\n"; |
1150 | } |
1151 | } elsif ($ordA == 193) { # EBCDIC (or equivalent), should be UTF-EBCDIC |
1152 | if ($2 eq "\x8C") { |
1153 | print "ok 234\n"; |
1154 | } else { |
1155 | print "not ok 234\n"; |
1156 | } |
1157 | if ($3 eq "\x41") { |
1158 | print "ok 235\n"; |
1159 | } else { |
1160 | print "not ok 235\n"; |
1161 | } |
3baa4c62 |
1162 | } else { |
5ae032e5 |
1163 | for (234..235) { |
1164 | print "not ok $_ # ord('A') == $ordA\n"; |
1165 | } |
3baa4c62 |
1166 | } |
1167 | if ($4 eq "b") { |
1168 | print "ok 236\n"; |
1169 | } else { |
1170 | print "not ok 236\n"; |
1171 | } |
1172 | } else { |
1173 | for (232..236) { |
1174 | print "not ok $_\n"; |
1175 | } |
1176 | } |
1177 | $_ = "\x{100}"; |
1178 | if (/(\C)/g) { |
1179 | print "ok 237\n"; |
73d6d589 |
1180 | # currently \C are still tagged as UTF-8 |
5ae032e5 |
1181 | if ($ordA == 65) { |
1182 | if ($1 eq "\xC4") { |
1183 | print "ok 238\n"; |
1184 | } else { |
1185 | print "not ok 238\n"; |
1186 | } |
1187 | } elsif ($ordA == 193) { |
1188 | if ($1 eq "\x8C") { |
1189 | print "ok 238\n"; |
1190 | } else { |
1191 | print "not ok 238\n"; |
1192 | } |
3baa4c62 |
1193 | } else { |
5ae032e5 |
1194 | print "not ok 238 # ord('A') == $ordA\n"; |
3baa4c62 |
1195 | } |
1196 | } else { |
1197 | for (237..238) { |
1198 | print "not ok $_\n"; |
1199 | } |
1200 | } |
1201 | if (/(\C)/g) { |
1202 | print "ok 239\n"; |
73d6d589 |
1203 | # currently \C are still tagged as UTF-8 |
5ae032e5 |
1204 | if ($ordA == 65) { |
1205 | if ($1 eq "\x80") { |
1206 | print "ok 240\n"; |
1207 | } else { |
1208 | print "not ok 240\n"; |
1209 | } |
1210 | } elsif ($ordA == 193) { |
1211 | if ($1 eq "\x41") { |
1212 | print "ok 240\n"; |
1213 | } else { |
1214 | print "not ok 240\n"; |
1215 | } |
3baa4c62 |
1216 | } else { |
5ae032e5 |
1217 | print "not ok 240 # ord('A') == $ordA\n"; |
3baa4c62 |
1218 | } |
1219 | } else { |
1220 | for (239..240) { |
1221 | print "not ok $_\n"; |
1222 | } |
1223 | } |
b485d051 |
1224 | |
db615365 |
1225 | { |
1226 | # japhy -- added 03/03/2001 |
1227 | () = (my $str = "abc") =~ /(...)/; |
1228 | $str = "def"; |
1229 | print "not " if $1 ne "abc"; |
fd291da9 |
1230 | print "ok 241\n"; |
1231 | } |
1232 | |
1233 | # The 242 and 243 go with the 244 and 245. |
1234 | # The trick is that in EBCDIC the explicit numeric range should match |
1235 | # (as also in non-EBCDIC) but the explicit alphabetic range should not match. |
1236 | |
1237 | if ("\x8e" =~ /[\x89-\x91]/) { |
1238 | print "ok 242\n"; |
1239 | } else { |
1240 | print "not ok 242\n"; |
1241 | } |
1242 | |
1243 | if ("\xce" =~ /[\xc9-\xd1]/) { |
db615365 |
1244 | print "ok 243\n"; |
fd291da9 |
1245 | } else { |
1246 | print "not ok 243\n"; |
1247 | } |
1248 | |
1249 | # In most places these tests would succeed since \x8e does not |
1250 | # in most character sets match 'i' or 'j' nor would \xce match |
1251 | # 'I' or 'J', but strictly speaking these tests are here for |
1252 | # the good of EBCDIC, so let's test these only there. |
1253 | if (ord('i') == 0x89 && ord('J') == 0xd1) { # EBCDIC |
1254 | if ("\x8e" !~ /[i-j]/) { |
1255 | print "ok 244\n"; |
1256 | } else { |
1257 | print "not ok 244\n"; |
1258 | } |
1259 | if ("\xce" !~ /[I-J]/) { |
1260 | print "ok 245\n"; |
1261 | } else { |
1262 | print "not ok 245\n"; |
1263 | } |
1264 | } else { |
1265 | for (244..245) { |
60425c38 |
1266 | print "ok $_ # Skip: only in EBCDIC\n"; |
fd291da9 |
1267 | } |
db615365 |
1268 | } |
4765795a |
1269 | |
1270 | print "not " unless "\x{ab}" =~ /\x{ab}/; |
1271 | print "ok 246\n"; |
1272 | |
1273 | print "not " unless "\x{abcd}" =~ /\x{abcd}/; |
1274 | print "ok 247\n"; |
1275 | |
1276 | { |
f9969324 |
1277 | # bug id 20001008.001 |
1278 | |
1279 | my $test = 248; |
1280 | my @x = ("stra\337e 138","stra\337e 138"); |
1281 | for (@x) { |
1282 | s/(\d+)\s*([\w\-]+)/$1 . uc $2/e; |
1283 | my($latin) = /^(.+)(?:\s+\d)/; |
1284 | print $latin eq "stra\337e" ? "ok $test\n" : # 248,249 |
1285 | "#latin[$latin]\nnot ok $test\n"; |
1286 | $test++; |
1287 | $latin =~ s/stra\337e/straße/; # \303\237 after the 2nd a |
1288 | use utf8; # needed for the raw UTF-8 |
1289 | $latin =~ s!(s)tr(?:aß|s+e)!$1tr.!; # \303\237 after the a |
4765795a |
1290 | } |
1291 | } |
1292 | |
1293 | { |
1294 | print "not " unless "ba\xd4c" =~ /([a\xd4]+)/ && $1 eq "a\xd4"; |
1295 | print "ok 250\n"; |
1296 | |
1297 | print "not " unless "ba\xd4c" =~ /([a\xd4]+)/ && $1 eq "a\x{d4}"; |
1298 | print "ok 251\n"; |
1299 | |
1300 | print "not " unless "ba\x{d4}c" =~ /([a\xd4]+)/ && $1 eq "a\x{d4}"; |
1301 | print "ok 252\n"; |
1302 | |
1303 | print "not " unless "ba\x{d4}c" =~ /([a\xd4]+)/ && $1 eq "a\xd4"; |
1304 | print "ok 253\n"; |
1305 | |
1306 | print "not " unless "ba\xd4c" =~ /([a\x{d4}]+)/ && $1 eq "a\xd4"; |
1307 | print "ok 254\n"; |
1308 | |
1309 | print "not " unless "ba\xd4c" =~ /([a\x{d4}]+)/ && $1 eq "a\x{d4}"; |
1310 | print "ok 255\n"; |
1311 | |
1312 | print "not " unless "ba\x{d4}c" =~ /([a\x{d4}]+)/ && $1 eq "a\x{d4}"; |
1313 | print "ok 256\n"; |
1314 | |
1315 | print "not " unless "ba\x{d4}c" =~ /([a\x{d4}]+)/ && $1 eq "a\xd4"; |
1316 | print "ok 257\n"; |
1317 | } |
1318 | |
1319 | { |
1320 | # the first half of 20001028.003 |
1321 | |
1322 | my $X = chr(1448); |
1323 | my ($Y) = $X =~ /(.*)/; |
1324 | print "not " unless $Y eq v1448 && length($Y) == 1; |
1325 | print "ok 258\n"; |
1326 | } |
1327 | |
1328 | { |
1329 | # 20001108.001 |
1330 | |
1331 | my $X = "Szab\x{f3},Bal\x{e1}zs"; |
1332 | my $Y = $X; |
1333 | $Y =~ s/(B)/$1/ for 0..3; |
1334 | print "not " unless $Y eq $X && $X eq "Szab\x{f3},Bal\x{e1}zs"; |
1335 | print "ok 259\n"; |
1336 | } |
1337 | |
1338 | { |
1339 | # the second half of 20001028.003 |
1340 | |
3568d838 |
1341 | my $X = ''; |
4765795a |
1342 | $X =~ s/^/chr(1488)/e; |
1343 | print "not " unless length $X == 1 && ord($X) == 1488; |
1344 | print "ok 260\n"; |
1345 | } |
1346 | |
1347 | { |
1348 | # 20000517.001 |
1349 | |
1350 | my $x = "\x{100}A"; |
1351 | |
1352 | $x =~ s/A/B/; |
1353 | |
1354 | print "not " unless $x eq "\x{100}B" && length($x) == 2; |
1355 | print "ok 261\n"; |
1356 | } |
1357 | |
1358 | { |
1359 | # bug id 20001230.002 |
1360 | |
1361 | print "not " unless "École" =~ /^\C\C(.)/ && $1 eq 'c'; |
1362 | print "ok 262\n"; |
1363 | |
1364 | print "not " unless "École" =~ /^\C\C(c)/; |
1365 | print "ok 263\n"; |
1366 | } |
1367 | |
1368 | { |
1369 | my $test = 264; # till 575 |
1370 | |
1371 | use charnames ':full'; |
1372 | |
1373 | # This is far from complete testing, there are dozens of character |
1374 | # classes in Unicode. The mixing of literals and \N{...} is |
1375 | # intentional so that in non-Latin-1 places we test the native |
1376 | # characters, not the Unicode code points. |
1377 | |
1378 | my %s = ( |
1379 | "a" => 'Ll', |
1380 | "\N{CYRILLIC SMALL LETTER A}" => 'Ll', |
1381 | "A" => 'Lu', |
1382 | "\N{GREEK CAPITAL LETTER ALPHA}" => 'Lu', |
1383 | "\N{HIRAGANA LETTER SMALL A}" => 'Lo', |
1384 | "\N{COMBINING GRAVE ACCENT}" => 'Mn', |
1385 | "0" => 'Nd', |
1386 | "\N{ARABIC-INDIC DIGIT ZERO}" => 'Nd', |
1387 | "_" => 'N', |
1388 | "!" => 'P', |
1389 | " " => 'Zs', |
1390 | "\0" => 'Cc', |
1391 | ); |
73d6d589 |
1392 | |
3568d838 |
1393 | for my $char (map { s/^\S+ //; $_ } |
1394 | sort map { sprintf("%06x", ord($_))." $_" } keys %s) { |
4765795a |
1395 | my $class = $s{$char}; |
3568d838 |
1396 | my $code = sprintf("%06x", ord($char)); |
1397 | printf "#\n# 0x$code\n#\n"; |
4765795a |
1398 | print "# IsAlpha\n"; |
1399 | if ($class =~ /^[LM]/) { |
1400 | print "not " unless $char =~ /\p{IsAlpha}/; |
1401 | print "ok $test\n"; $test++; |
1402 | print "not " if $char =~ /\P{IsAlpha}/; |
1403 | print "ok $test\n"; $test++; |
1404 | } else { |
1405 | print "not " if $char =~ /\p{IsAlpha}/; |
1406 | print "ok $test\n"; $test++; |
1407 | print "not " unless $char =~ /\P{IsAlpha}/; |
1408 | print "ok $test\n"; $test++; |
1409 | } |
1410 | print "# IsAlnum\n"; |
1411 | if ($class =~ /^[LMN]/ && $char ne "_") { |
1412 | print "not " unless $char =~ /\p{IsAlnum}/; |
1413 | print "ok $test\n"; $test++; |
1414 | print "not " if $char =~ /\P{IsAlnum}/; |
1415 | print "ok $test\n"; $test++; |
1416 | } else { |
1417 | print "not " if $char =~ /\p{IsAlnum}/; |
1418 | print "ok $test\n"; $test++; |
1419 | print "not " unless $char =~ /\P{IsAlnum}/; |
1420 | print "ok $test\n"; $test++; |
1421 | } |
1422 | print "# IsASCII\n"; |
24817587 |
1423 | if (ord("A") == 193) { |
1424 | print "ok $test # Skip: in EBCDIC\n"; $test++; |
1425 | print "ok $test # Skip: in EBCDIC\n"; $test++; |
4765795a |
1426 | } else { |
24817587 |
1427 | if ($code le '00007f') { |
1428 | print "not " unless $char =~ /\p{IsASCII}/; |
1429 | print "ok $test\n"; $test++; |
1430 | print "not " if $char =~ /\P{IsASCII}/; |
1431 | print "ok $test\n"; $test++; |
1432 | } else { |
1433 | print "not " if $char =~ /\p{IsASCII}/; |
1434 | print "ok $test\n"; $test++; |
1435 | print "not " unless $char =~ /\P{IsASCII}/; |
1436 | print "ok $test\n"; $test++; |
1437 | } |
4765795a |
1438 | } |
1439 | print "# IsCntrl\n"; |
1440 | if ($class =~ /^C/) { |
1441 | print "not " unless $char =~ /\p{IsCntrl}/; |
1442 | print "ok $test\n"; $test++; |
1443 | print "not " if $char =~ /\P{IsCntrl}/; |
1444 | print "ok $test\n"; $test++; |
1445 | } else { |
1446 | print "not " if $char =~ /\p{IsCntrl}/; |
1447 | print "ok $test\n"; $test++; |
1448 | print "not " unless $char =~ /\P{IsCntrl}/; |
1449 | print "ok $test\n"; $test++; |
1450 | } |
1451 | print "# IsBlank\n"; |
1452 | if ($class =~ /^Z[lp]/ || $char eq " ") { |
1453 | print "not " unless $char =~ /\p{IsBlank}/; |
1454 | print "ok $test\n"; $test++; |
1455 | print "not " if $char =~ /\P{IsBlank}/; |
1456 | print "ok $test\n"; $test++; |
1457 | } else { |
1458 | print "not " if $char =~ /\p{IsBlank}/; |
1459 | print "ok $test\n"; $test++; |
1460 | print "not " unless $char =~ /\P{IsBlank}/; |
1461 | print "ok $test\n"; $test++; |
1462 | } |
1463 | print "# IsDigit\n"; |
1464 | if ($class =~ /^Nd$/) { |
1465 | print "not " unless $char =~ /\p{IsDigit}/; |
1466 | print "ok $test\n"; $test++; |
1467 | print "not " if $char =~ /\P{IsDigit}/; |
1468 | print "ok $test\n"; $test++; |
1469 | } else { |
1470 | print "not " if $char =~ /\p{IsDigit}/; |
1471 | print "ok $test\n"; $test++; |
1472 | print "not " unless $char =~ /\P{IsDigit}/; |
1473 | print "ok $test\n"; $test++; |
1474 | } |
1475 | print "# IsGraph\n"; |
1476 | if ($class =~ /^([LMNPS])|Co/) { |
1477 | print "not " unless $char =~ /\p{IsGraph}/; |
1478 | print "ok $test\n"; $test++; |
1479 | print "not " if $char =~ /\P{IsGraph}/; |
1480 | print "ok $test\n"; $test++; |
1481 | } else { |
1482 | print "not " if $char =~ /\p{IsGraph}/; |
1483 | print "ok $test\n"; $test++; |
1484 | print "not " unless $char =~ /\P{IsGraph}/; |
1485 | print "ok $test\n"; $test++; |
1486 | } |
1487 | print "# IsLower\n"; |
1488 | if ($class =~ /^Ll$/) { |
1489 | print "not " unless $char =~ /\p{IsLower}/; |
1490 | print "ok $test\n"; $test++; |
1491 | print "not " if $char =~ /\P{IsLower}/; |
1492 | print "ok $test\n"; $test++; |
1493 | } else { |
1494 | print "not " if $char =~ /\p{IsLower}/; |
1495 | print "ok $test\n"; $test++; |
1496 | print "not " unless $char =~ /\P{IsLower}/; |
1497 | print "ok $test\n"; $test++; |
1498 | } |
1499 | print "# IsPrint\n"; |
1500 | if ($class =~ /^([LMNPS])|Co|Zs/) { |
1501 | print "not " unless $char =~ /\p{IsPrint}/; |
1502 | print "ok $test\n"; $test++; |
1503 | print "not " if $char =~ /\P{IsPrint}/; |
1504 | print "ok $test\n"; $test++; |
1505 | } else { |
1506 | print "not " if $char =~ /\p{IsPrint}/; |
1507 | print "ok $test\n"; $test++; |
1508 | print "not " unless $char =~ /\P{IsPrint}/; |
1509 | print "ok $test\n"; $test++; |
1510 | } |
1511 | print "# IsPunct\n"; |
1512 | if ($class =~ /^P/ || $char eq "_") { |
1513 | print "not " unless $char =~ /\p{IsPunct}/; |
1514 | print "ok $test\n"; $test++; |
1515 | print "not " if $char =~ /\P{IsPunct}/; |
1516 | print "ok $test\n"; $test++; |
1517 | } else { |
1518 | print "not " if $char =~ /\p{IsPunct}/; |
1519 | print "ok $test\n"; $test++; |
1520 | print "not " unless $char =~ /\P{IsPunct}/; |
1521 | print "ok $test\n"; $test++; |
1522 | } |
1523 | print "# IsSpace\n"; |
1524 | if ($class =~ /^Z/ || ($code =~ /^(0009|000A|000B|000C|000D)$/)) { |
1525 | print "not " unless $char =~ /\p{IsSpace}/; |
1526 | print "ok $test\n"; $test++; |
1527 | print "not " if $char =~ /\P{IsSpace}/; |
1528 | print "ok $test\n"; $test++; |
1529 | } else { |
1530 | print "not " if $char =~ /\p{IsSpace}/; |
1531 | print "ok $test\n"; $test++; |
1532 | print "not " unless $char =~ /\P{IsSpace}/; |
1533 | print "ok $test\n"; $test++; |
1534 | } |
1535 | print "# IsUpper\n"; |
1536 | if ($class =~ /^L[ut]/) { |
1537 | print "not " unless $char =~ /\p{IsUpper}/; |
1538 | print "ok $test\n"; $test++; |
1539 | print "not " if $char =~ /\P{IsUpper}/; |
1540 | print "ok $test\n"; $test++; |
1541 | } else { |
1542 | print "not " if $char =~ /\p{IsUpper}/; |
1543 | print "ok $test\n"; $test++; |
1544 | print "not " unless $char =~ /\P{IsUpper}/; |
1545 | print "ok $test\n"; $test++; |
1546 | } |
1547 | print "# IsWord\n"; |
1548 | if ($class =~ /^[LMN]/ || $char eq "_") { |
1549 | print "not " unless $char =~ /\p{IsWord}/; |
1550 | print "ok $test\n"; $test++; |
1551 | print "not " if $char =~ /\P{IsWord}/; |
1552 | print "ok $test\n"; $test++; |
1553 | } else { |
1554 | print "not " if $char =~ /\p{IsWord}/; |
1555 | print "ok $test\n"; $test++; |
1556 | print "not " unless $char =~ /\P{IsWord}/; |
1557 | print "ok $test\n"; $test++; |
1558 | } |
1559 | } |
1560 | } |
1561 | |
1562 | { |
1563 | $_ = "abc\x{100}\x{200}\x{300}\x{380}\x{400}defg"; |
1564 | |
1565 | if (/(.\x{300})./) { |
1566 | print "ok 576\n"; |
1567 | |
1568 | print "not " unless $` eq "abc\x{100}" && length($`) == 4; |
73d6d589 |
1569 | print "ok 577\n"; |
4765795a |
1570 | |
1571 | print "not " unless $& eq "\x{200}\x{300}\x{380}" && length($&) == 3; |
73d6d589 |
1572 | print "ok 578\n"; |
4765795a |
1573 | |
1574 | print "not " unless $' eq "\x{400}defg" && length($') == 5; |
73d6d589 |
1575 | print "ok 579\n"; |
4765795a |
1576 | |
1577 | print "not " unless $1 eq "\x{200}\x{300}" && length($1) == 2; |
73d6d589 |
1578 | print "ok 580\n"; |
a8a2fe91 |
1579 | } else { |
1580 | for (576..580) { print "not ok $_\n" } |
4765795a |
1581 | } |
1582 | } |
8269fa76 |
1583 | |
1584 | { |
1585 | # bug id 20010306.008 |
1586 | |
1587 | $a = "a\x{1234}"; |
1588 | # The original bug report had 'no utf8' here but that was irrelevant. |
1589 | $a =~ m/\w/; # used to core dump |
1590 | |
1591 | print "ok 581\n"; |
1592 | } |
b8ef571c |
1593 | |
1594 | { |
339e86bc |
1595 | $test = 582; |
1596 | |
b8ef571c |
1597 | # bugid 20010410.006 |
1598 | for my $rx ( |
1599 | '/(.*?)\{(.*?)\}/csg', |
1600 | '/(.*?)\{(.*?)\}/cg', |
1601 | '/(.*?)\{(.*?)\}/sg', |
1602 | '/(.*?)\{(.*?)\}/g', |
1603 | '/(.+?)\{(.+?)\}/csg', |
1604 | ) |
1605 | { |
1606 | my($input, $i); |
1607 | |
1608 | $i = 0; |
1609 | $input = "a{b}c{d}"; |
1610 | eval <<EOT; |
1611 | while (eval \$input =~ $rx) { |
1612 | print "# \\\$1 = '\$1' \\\$2 = '\$2'\n"; |
1613 | ++\$i; |
1614 | } |
1615 | EOT |
1616 | print "not " unless $i == 2; |
1617 | print "ok " . $test++ . "\n"; |
1618 | } |
1619 | } |
209a9bc1 |
1620 | |
1621 | { |
1622 | # from Robin Houston |
1623 | |
b851fbc1 |
1624 | my $x = "\x{10FFFD}"; |
209a9bc1 |
1625 | $x =~ s/(.)/$1/g; |
b851fbc1 |
1626 | print "not " unless ord($x) == 0x10FFFD && length($x) == 1; |
209a9bc1 |
1627 | print "ok 587\n"; |
1628 | } |
3568d838 |
1629 | |
1630 | { |
1631 | my $x = "\x7f"; |
1632 | |
1633 | print "not " if $x =~ /[\x80-\xff]/; |
1634 | print "ok 588\n"; |
1635 | |
1636 | print "not " if $x =~ /[\x80-\x{100}]/; |
1637 | print "ok 589\n"; |
1638 | |
1639 | print "not " if $x =~ /[\x{100}]/; |
1640 | print "ok 590\n"; |
1641 | |
1642 | print "not " if $x =~ /\p{InLatin1Supplement}/; |
1643 | print "ok 591\n"; |
1644 | |
1645 | print "not " unless $x =~ /\P{InLatin1Supplement}/; |
1646 | print "ok 592\n"; |
1647 | |
1648 | print "not " if $x =~ /\p{InLatinExtendedA}/; |
1649 | print "ok 593\n"; |
1650 | |
1651 | print "not " unless $x =~ /\P{InLatinExtendedA}/; |
1652 | print "ok 594\n"; |
1653 | } |
1654 | |
1655 | { |
1656 | my $x = "\x80"; |
1657 | |
1658 | print "not " unless $x =~ /[\x80-\xff]/; |
1659 | print "ok 595\n"; |
1660 | |
1661 | print "not " unless $x =~ /[\x80-\x{100}]/; |
1662 | print "ok 596\n"; |
1663 | |
1664 | print "not " if $x =~ /[\x{100}]/; |
1665 | print "ok 597\n"; |
1666 | |
1667 | print "not " unless $x =~ /\p{InLatin1Supplement}/; |
1668 | print "ok 598\n"; |
1669 | |
1670 | print "not " if $x =~ /\P{InLatin1Supplement}/; |
1671 | print "ok 599\n"; |
1672 | |
1673 | print "not " if $x =~ /\p{InLatinExtendedA}/; |
1674 | print "ok 600\n"; |
1675 | |
1676 | print "not " unless $x =~ /\P{InLatinExtendedA}/; |
1677 | print "ok 601\n"; |
1678 | } |
1679 | |
1680 | { |
1681 | my $x = "\xff"; |
1682 | |
1683 | print "not " unless $x =~ /[\x80-\xff]/; |
1684 | print "ok 602\n"; |
1685 | |
1686 | print "not " unless $x =~ /[\x80-\x{100}]/; |
1687 | print "ok 603\n"; |
1688 | |
1689 | print "not " if $x =~ /[\x{100}]/; |
1690 | print "ok 604\n"; |
1691 | |
1692 | print "not " unless $x =~ /\p{InLatin1Supplement}/; |
1693 | print "ok 605\n"; |
1694 | |
1695 | print "not " if $x =~ /\P{InLatin1Supplement}/; |
1696 | print "ok 606\n"; |
1697 | |
1698 | print "not " if $x =~ /\p{InLatinExtendedA}/; |
1699 | print "ok 607\n"; |
1700 | |
1701 | print "not " unless $x =~ /\P{InLatinExtendedA}/; |
1702 | print "ok 608\n"; |
1703 | } |
1704 | |
1705 | { |
1706 | my $x = "\x{100}"; |
1707 | |
1708 | print "not " if $x =~ /[\x80-\xff]/; |
1709 | print "ok 609\n"; |
1710 | |
1711 | print "not " unless $x =~ /[\x80-\x{100}]/; |
1712 | print "ok 610\n"; |
1713 | |
1714 | print "not " unless $x =~ /[\x{100}]/; |
1715 | print "ok 611\n"; |
1716 | |
1717 | print "not " if $x =~ /\p{InLatin1Supplement}/; |
1718 | print "ok 612\n"; |
1719 | |
1720 | print "not " unless $x =~ /\P{InLatin1Supplement}/; |
1721 | print "ok 613\n"; |
1722 | |
1723 | print "not " unless $x =~ /\p{InLatinExtendedA}/; |
1724 | print "ok 614\n"; |
1725 | |
1726 | print "not " if $x =~ /\P{InLatinExtendedA}/; |
1727 | print "ok 615\n"; |
1728 | } |
1729 | |
9d1d55b5 |
1730 | { |
1731 | # from japhy |
1732 | my $w; |
1733 | use warnings; |
1734 | local $SIG{__WARN__} = sub { $w .= shift }; |
1735 | |
1736 | $w = ""; |
1737 | eval 'qr/(?c)/'; |
1738 | print "not " if $w !~ /^Useless \(\?c\)/; |
1739 | print "ok 616\n"; |
1740 | |
1741 | $w = ""; |
1742 | eval 'qr/(?-c)/'; |
1743 | print "not " if $w !~ /^Useless \(\?-c\)/; |
1744 | print "ok 617\n"; |
1745 | |
1746 | $w = ""; |
1747 | eval 'qr/(?g)/'; |
1748 | print "not " if $w !~ /^Useless \(\?g\)/; |
1749 | print "ok 618\n"; |
1750 | |
1751 | $w = ""; |
1752 | eval 'qr/(?-g)/'; |
1753 | print "not " if $w !~ /^Useless \(\?-g\)/; |
1754 | print "ok 619\n"; |
1755 | |
1756 | $w = ""; |
1757 | eval 'qr/(?o)/'; |
1758 | print "not " if $w !~ /^Useless \(\?o\)/; |
1759 | print "ok 620\n"; |
1760 | |
1761 | $w = ""; |
1762 | eval 'qr/(?-o)/'; |
1763 | print "not " if $w !~ /^Useless \(\?-o\)/; |
1764 | print "ok 621\n"; |
1765 | |
1766 | # now test multi-error regexes |
1767 | |
1768 | $w = ""; |
1769 | eval 'qr/(?g-o)/'; |
1770 | print "not " if $w !~ /^Useless \(\?g\).*\nUseless \(\?-o\)/; |
1771 | print "ok 622\n"; |
1772 | |
1773 | $w = ""; |
1774 | eval 'qr/(?g-c)/'; |
1775 | print "not " if $w !~ /^Useless \(\?g\).*\nUseless \(\?-c\)/; |
1776 | print "ok 623\n"; |
1777 | |
1778 | $w = ""; |
1779 | eval 'qr/(?o-cg)/'; # (?c) means (?g) error won't be thrown |
1780 | print "not " if $w !~ /^Useless \(\?o\).*\nUseless \(\?-c\)/; |
1781 | print "ok 624\n"; |
1782 | |
1783 | $w = ""; |
1784 | eval 'qr/(?ogc)/'; |
1785 | print "not " if $w !~ /^Useless \(\?o\).*\nUseless \(\?g\).*\nUseless \(\?c\)/; |
1786 | print "ok 625\n"; |
1787 | } |
a72deede |
1788 | |
1789 | # More Unicode "class" tests |
1790 | |
1791 | { |
1792 | use charnames ':full'; |
1793 | |
1794 | print "not " unless "\N{LATIN CAPITAL LETTER A}" =~ /\p{InBasicLatin}/; |
1795 | print "ok 626\n"; |
1796 | |
1797 | print "not " unless "\N{LATIN CAPITAL LETTER A WITH GRAVE}" =~ /\p{InLatin1Supplement}/; |
1798 | print "ok 627\n"; |
1799 | |
1800 | print "not " unless "\N{LATIN CAPITAL LETTER A WITH MACRON}" =~ /\p{InLatinExtendedA}/; |
1801 | print "ok 628\n"; |
1802 | |
1803 | print "not " unless "\N{LATIN SMALL LETTER B WITH STROKE}" =~ /\p{InLatinExtendedB}/; |
1804 | print "ok 629\n"; |
1805 | |
1806 | print "not " unless "\N{KATAKANA LETTER SMALL A}" =~ /\p{InKatakana}/; |
1807 | print "ok 630\n"; |
1808 | } |
1809 | |
6002328a |
1810 | $_ = "foo"; |
1811 | |
1812 | eval <<"EOT"; die if $@; |
1813 | /f |
1814 | o\r |
1815 | o |
1816 | \$ |
1817 | /x && print "ok 631\n"; |
1818 | EOT |
1819 | |
1820 | eval <<"EOT"; die if $@; |
1821 | /f |
1822 | o |
1823 | o |
1824 | \$\r |
1825 | /x && print "ok 632\n"; |
1826 | EOT |
1827 | |
569b5e07 |
1828 | #test /o feature |
1829 | sub test_o { $_[0] =~/$_[1]/o; return $1} |
1830 | if(test_o('abc','(.)..') eq 'a') { |
395ddfe6 |
1831 | print "ok 633\n"; |
569b5e07 |
1832 | } else { |
395ddfe6 |
1833 | print "not ok 633\n"; |
569b5e07 |
1834 | } |
1835 | if(test_o('abc','..(.)') eq 'a') { |
395ddfe6 |
1836 | print "ok 634\n"; |
569b5e07 |
1837 | } else { |
395ddfe6 |
1838 | print "not ok 634\n"; |
569b5e07 |
1839 | } |
1840 | |
f79b3095 |
1841 | # 635..639: ID 20010619.003 (only the space character is |
1842 | # supposed to be [:print:], not the whole isprint()). |
1843 | |
1844 | print "not " if "\n" =~ /[[:print:]]/; |
1845 | print "ok 635\n"; |
1846 | |
1847 | print "not " if "\t" =~ /[[:print:]]/; |
1848 | print "ok 636\n"; |
1849 | |
e857312d |
1850 | # Amazingly vertical tabulator is the same in ASCII and EBCDIC. |
f79b3095 |
1851 | print "not " if "\014" =~ /[[:print:]]/; |
1852 | print "ok 637\n"; |
1853 | |
1854 | print "not " if "\r" =~ /[[:print:]]/; |
1855 | print "ok 638\n"; |
1856 | |
1857 | print "not " unless " " =~ /[[:print:]]/; |
1858 | print "ok 639\n"; |
1859 | |
a01268b5 |
1860 | ## |
1861 | ## Test basic $^N usage outside of a regex |
1862 | ## |
1863 | $x = "abcdef"; |
1864 | $T="ok 640\n";if ($x =~ /cde/ and not defined $^N) {print $T} else {print "not $T"}; |
1865 | $T="ok 641\n";if ($x =~ /(cde)/ and $^N eq "cde") {print $T} else {print "not $T"}; |
1866 | $T="ok 642\n";if ($x =~ /(c)(d)(e)/ and $^N eq "e") {print $T} else {print "not $T"}; |
1867 | $T="ok 643\n";if ($x =~ /(c(d)e)/ and $^N eq "cde") {print $T} else {print "not $T"}; |
1868 | $T="ok 644\n";if ($x =~ /(foo)|(c(d)e)/ and $^N eq "cde") {print $T} else {print "not $T"}; |
1869 | $T="ok 645\n";if ($x =~ /(c(d)e)|(foo)/ and $^N eq "cde") {print $T} else {print "not $T"}; |
1870 | $T="ok 646\n";if ($x =~ /(c(d)e)|(abc)/ and $^N eq "abc") {print $T} else {print "not $T"}; |
1871 | $T="ok 647\n";if ($x =~ /(c(d)e)|(abc)x/ and $^N eq "cde") {print $T} else {print "not $T"}; |
1872 | $T="ok 648\n";if ($x =~ /(c(d)e)(abc)?/ and $^N eq "cde") {print $T} else {print "not $T"}; |
1873 | $T="ok 649\n";if ($x =~ /(?:c(d)e)/ and $^N eq "d" ) {print $T} else {print "not $T"}; |
1874 | $T="ok 650\n";if ($x =~ /(?:c(d)e)(?:f)/ and $^N eq "d" ) {print $T} else {print "not $T"}; |
1875 | $T="ok 651\n";if ($x =~ /(?:([abc])|([def]))*/ and $^N eq "f" ){print $T} else {print "not $T"}; |
1876 | $T="ok 652\n";if ($x =~ /(?:([ace])|([bdf]))*/ and $^N eq "f" ){print $T} else {print "not $T"}; |
1877 | $T="ok 653\n";if ($x =~ /(([ace])|([bd]))*/ and $^N eq "e" ){print $T} else {print "not $T"}; |
1878 | { |
1879 | $T="ok 654\n";if($x =~ /(([ace])|([bdf]))*/ and $^N eq "f" ){print $T} else {print "not $T"}; |
1880 | } |
1881 | ## test to see if $^N is automatically localized -- it should now |
1882 | ## have the value set in test 653 |
1883 | $T="ok 655\n";if ($^N eq "e" ){print $T} else {print "not $T"}; |
1884 | |
1885 | ## |
1886 | ## Now test inside (?{...}) |
1887 | ## |
1888 | $T="ok 656\n";if ($x =~ /a([abc])(?{$y=$^N})c/ and $y eq "b" ){print $T} else {print "not $T"}; |
1889 | $T="ok 657\n";if ($x =~ /a([abc]+)(?{$y=$^N})d/ and $y eq "bc"){print $T} else {print "not $T"}; |
1890 | $T="ok 658\n";if ($x =~ /a([abcdefg]+)(?{$y=$^N})d/ and $y eq "bc"){print $T} else {print "not $T"}; |
1891 | $T="ok 659\n";if ($x =~ /(a([abcdefg]+)(?{$y=$^N})d)(?{$z=$^N})e/ and $y eq "bc" and $z eq "abcd") |
1892 | {print $T} else {print "not $T"}; |
1893 | $T="ok 660\n";if ($x =~ /(a([abcdefg]+)(?{$y=$^N})de)(?{$z=$^N})/ and $y eq "bc" and $z eq "abcde") |
1894 | {print $T} else {print "not $T"}; |
2796c109 |
1895 | |
1896 | # Test the Unicode script classes |
1897 | |
cf25bb62 |
1898 | print "not " unless chr(0x100) =~ /\p{IsLatin}/; # outside Latin-1 |
2796c109 |
1899 | print "ok 661\n"; |
1900 | |
cf25bb62 |
1901 | print "not " unless chr(0x212b) =~ /\p{IsLatin}/; # Angstrom sign, very outside |
2796c109 |
1902 | print "ok 662\n"; |
1903 | |
cf25bb62 |
1904 | print "not " unless chr(0x5d0) =~ /\p{IsHebrew}/; # inside InHebrew |
2796c109 |
1905 | print "ok 663\n"; |
1906 | |
cf25bb62 |
1907 | print "not " unless chr(0xfb4f) =~ /\p{IsHebrew}/; # outside InHebrew |
2796c109 |
1908 | print "ok 664\n"; |
1909 | |
cf25bb62 |
1910 | print "not " unless chr(0xb5) =~ /\p{IsGreek}/; # singleton (not in a range) |
5f9563ea |
1911 | print "ok 665\n"; |
1912 | |
cf25bb62 |
1913 | print "not " unless chr(0x37a) =~ /\p{IsGreek}/; # singleton |
5f9563ea |
1914 | print "ok 666\n"; |
1915 | |
cf25bb62 |
1916 | print "not " unless chr(0x386) =~ /\p{IsGreek}/; # singleton |
5f9563ea |
1917 | print "ok 667\n"; |
1918 | |
cf25bb62 |
1919 | print "not " unless chr(0x387) =~ /\P{IsGreek}/; # not there |
5f9563ea |
1920 | print "ok 668\n"; |
1921 | |
cf25bb62 |
1922 | print "not " unless chr(0x388) =~ /\p{IsGreek}/; # range |
5f9563ea |
1923 | print "ok 669\n"; |
1924 | |
cf25bb62 |
1925 | print "not " unless chr(0x38a) =~ /\p{IsGreek}/; # range |
5f9563ea |
1926 | print "ok 670\n"; |
1927 | |
cf25bb62 |
1928 | print "not " unless chr(0x38b) =~ /\P{IsGreek}/; # not there |
5f9563ea |
1929 | print "ok 671\n"; |
1930 | |
cf25bb62 |
1931 | print "not " unless chr(0x38c) =~ /\p{IsGreek}/; # singleton |
5f9563ea |
1932 | print "ok 672\n"; |
1933 | |
550cec39 |
1934 | if (ord("A") == 65) { |
7be5a6cf |
1935 | ## |
1936 | ## Test [:cntrl:]... |
1937 | ## |
1938 | ## Should probably put in tests for all the POSIX stuff, but not sure how to |
1939 | ## guarantee a specific locale...... |
1940 | ## |
550cec39 |
1941 | $AllBytes = join('', map { chr($_) } 0..255); |
1942 | ($x = $AllBytes) =~ s/[[:cntrl:]]//g; |
1943 | if ($x ne join('', map { chr($_) } 0x20..0x7E, 0x80..0xFF)) { |
1944 | print "not "; |
1945 | } |
1946 | print "ok 673\n"; |
1947 | |
1948 | ($x = $AllBytes) =~ s/[^[:cntrl:]]//g; |
1949 | if ($x ne join('', map { chr($_) } 0..0x1F, 0x7F)) { print "not " } |
1950 | print "ok 674\n"; |
1951 | } else { |
1952 | print "ok $_ # Skip: EBCDIC\n" for 673..674; |
1953 | } |
f33976b4 |
1954 | |
1955 | # With /s modifier UTF8 chars were interpreted as bytes |
1956 | { |
1957 | my $a = "Hello \x{263A} World"; |
1958 | |
1959 | my @a = ($a =~ /./gs); |
1960 | |
1961 | print "not " unless $#a == 12; |
1962 | print "ok 675\n"; |
1963 | } |
cce850e4 |
1964 | |
1965 | @a = ("foo\nbar" =~ /./g); |
1966 | print "ok 676\n" if @a == 6 && "@a" eq "f o o b a r"; |
1967 | |
1968 | @a = ("foo\nbar" =~ /./gs); |
1969 | print "ok 677\n" if @a == 7 && "@a" eq "f o o \n b a r"; |
1970 | |
1971 | @a = ("foo\nbar" =~ /\C/g); |
1972 | print "ok 678\n" if @a == 7 && "@a" eq "f o o \n b a r"; |
1973 | |
1974 | @a = ("foo\nbar" =~ /\C/gs); |
1975 | print "ok 679\n" if @a == 7 && "@a" eq "f o o \n b a r"; |
1976 | |
1977 | @a = ("foo\n\x{100}bar" =~ /./g); |
1978 | print "ok 680\n" if @a == 7 && "@a" eq "f o o \x{100} b a r"; |
1979 | |
1980 | @a = ("foo\n\x{100}bar" =~ /./gs); |
1981 | print "ok 681\n" if @a == 8 && "@a" eq "f o o \n \x{100} b a r"; |
1982 | |
1983 | ($a, $b) = map { chr } ord('A') == 65 ? (0xc4, 0x80) : (0x8c, 0x41); |
1984 | |
1985 | @a = ("foo\n\x{100}bar" =~ /\C/g); |
1986 | print "ok 682\n" if @a == 9 && "@a" eq "f o o \n $a $b b a r"; |
1987 | |
1988 | @a = ("foo\n\x{100}bar" =~ /\C/gs); |
1989 | print "ok 683\n" if @a == 9 && "@a" eq "f o o \n $a $b b a r"; |
1990 | |
0af80b60 |
1991 | { |
1992 | # [ID 20010814.004] pos() doesn't work when using =~m// in list context |
1993 | $_ = "ababacadaea"; |
1994 | $a = join ":", /b./gc; |
1995 | $b = join ":", /a./gc; |
1996 | $c = pos; |
1997 | print "$a $b $c" eq 'ba:ba ad:ae 10' ? "ok 684\n" : "not ok 684\t# $a $b $c\n"; |
1998 | } |
d9f424b2 |
1999 | |
2000 | { |
75685a94 |
2001 | # [ID 20010407.006] matching utf8 return values from functions does not work |
2002 | |
d9f424b2 |
2003 | package ID_20010407_006; |
2004 | |
2005 | sub x { |
2006 | "a\x{1234}"; |
2007 | } |
2008 | |
2009 | my $x = x; |
2010 | my $y; |
2011 | |
2012 | $x =~ /(..)/; $y = $1; |
2013 | print "not " unless length($y) == 2 && $y eq $x; |
75685a94 |
2014 | print "ok 685\n"; |
d9f424b2 |
2015 | |
2016 | x =~ /(..)/; $y = $1; |
2017 | print "not " unless length($y) == 2 && $y eq $x; |
2018 | print "ok 686\n"; |
2019 | } |
a4c04bdc |
2020 | |
e2d8ce26 |
2021 | |
a4c04bdc |
2022 | my $test = 687; |
2023 | |
2024 | # Force scalar context on the patern match |
2025 | sub ok ($$) { |
2026 | my($ok, $name) = @_; |
2027 | |
2028 | printf "%sok %d - %s\n", ($ok ? "" : "not "), $test, $name; |
2029 | |
2030 | printf "# Failed test at line %d\n", (caller)[2] unless $ok; |
2031 | |
2032 | $test++; |
2033 | return $ok; |
2034 | } |
2035 | |
2036 | { |
2037 | # Check that \x## works. 5.6.1 and 5.005_03 fail some of these. |
2038 | $x = "\x4e" . "E"; |
2039 | ok ($x =~ /^\x4EE$/, "Check only 2 bytes of hex are matched."); |
2040 | |
2041 | $x = "\x4e" . "i"; |
2042 | ok ($x =~ /^\x4Ei$/, "Check that invalid hex digit stops it (2)"); |
2043 | |
2044 | $x = "\x4" . "j"; |
2045 | ok ($x =~ /^\x4j$/, "Check that invalid hex digit stops it (1)"); |
2046 | |
2047 | $x = "\x0" . "k"; |
2048 | ok ($x =~ /^\xk$/, "Check that invalid hex digit stops it (0)"); |
2049 | |
2050 | $x = "\x0" . "x"; |
2051 | ok ($x =~ /^\xx$/, "\\xx isn't to be treated as \\0"); |
2052 | |
2053 | $x = "\x0" . "xa"; |
2054 | ok ($x =~ /^\xxa$/, "\\xxa isn't to be treated as \\xa"); |
2055 | |
2056 | $x = "\x9" . "_b"; |
2057 | ok ($x =~ /^\x9_b$/, "\\x9_b isn't to be treated as \\x9b"); |
2058 | |
2059 | print "# and now again in [] ranges\n"; |
2060 | |
2061 | $x = "\x4e" . "E"; |
2062 | ok ($x =~ /^[\x4EE]{2}$/, "Check only 2 bytes of hex are matched."); |
2063 | |
2064 | $x = "\x4e" . "i"; |
2065 | ok ($x =~ /^[\x4Ei]{2}$/, "Check that invalid hex digit stops it (2)"); |
2066 | |
2067 | $x = "\x4" . "j"; |
2068 | ok ($x =~ /^[\x4j]{2}$/, "Check that invalid hex digit stops it (1)"); |
2069 | |
2070 | $x = "\x0" . "k"; |
2071 | ok ($x =~ /^[\xk]{2}$/, "Check that invalid hex digit stops it (0)"); |
2072 | |
2073 | $x = "\x0" . "x"; |
2074 | ok ($x =~ /^[\xx]{2}$/, "\\xx isn't to be treated as \\0"); |
2075 | |
2076 | $x = "\x0" . "xa"; |
2077 | ok ($x =~ /^[\xxa]{3}$/, "\\xxa isn't to be treated as \\xa"); |
2078 | |
2079 | $x = "\x9" . "_b"; |
2080 | ok ($x =~ /^[\x9_b]{3}$/, "\\x9_b isn't to be treated as \\x9b"); |
2081 | |
2082 | } |
2083 | |
2084 | { |
2085 | # Check that \x{##} works. 5.6.1 fails quite a few of these. |
2086 | |
2087 | $x = "\x9b"; |
2088 | ok ($x =~ /^\x{9_b}$/, "\\x{9_b} is to be treated as \\x9b"); |
2089 | |
2090 | $x = "\x9b" . "y"; |
2091 | ok ($x =~ /^\x{9_b}y$/, "\\x{9_b} is to be treated as \\x9b (again)"); |
2092 | |
2093 | $x = "\x9b" . "y"; |
2094 | ok ($x =~ /^\x{9b_}y$/, "\\x{9b_} is to be treated as \\x9b"); |
2095 | |
2096 | $x = "\x9b" . "y"; |
2097 | ok ($x =~ /^\x{9_bq}y$/, "\\x{9_bc} is to be treated as \\x9b"); |
2098 | |
2099 | $x = "\x0" . "y"; |
2100 | ok ($x =~ /^\x{x9b}y$/, "\\x{x9b} is to be treated as \\x0"); |
2101 | |
2102 | $x = "\x0" . "y"; |
2103 | ok ($x =~ /^\x{0x9b}y$/, "\\x{0x9b} is to be treated as \\x0"); |
2104 | |
2105 | $x = "\x9b" . "y"; |
2106 | ok ($x =~ /^\x{09b}y$/, "\\x{09b} is to be treated as \\x9b"); |
2107 | |
2108 | print "# and now again in [] ranges\n"; |
2109 | |
2110 | $x = "\x9b"; |
2111 | ok ($x =~ /^[\x{9_b}]$/, "\\x{9_b} is to be treated as \\x9b"); |
2112 | |
2113 | $x = "\x9b" . "y"; |
2114 | ok ($x =~ /^[\x{9_b}y]{2}$/, "\\x{9_b} is to be treated as \\x9b (again)"); |
2115 | |
2116 | $x = "\x9b" . "y"; |
2117 | ok ($x =~ /^[\x{9b_}y]{2}$/, "\\x{9b_} is to be treated as \\x9b"); |
2118 | |
2119 | $x = "\x9b" . "y"; |
2120 | ok ($x =~ /^[\x{9_bq}y]{2}$/, "\\x{9_bc} is to be treated as \\x9b"); |
2121 | |
2122 | $x = "\x0" . "y"; |
2123 | ok ($x =~ /^[\x{x9b}y]{2}$/, "\\x{x9b} is to be treated as \\x0"); |
2124 | |
2125 | $x = "\x0" . "y"; |
2126 | ok ($x =~ /^[\x{0x9b}y]{2}$/, "\\x{0x9b} is to be treated as \\x0"); |
2127 | |
2128 | $x = "\x9b" . "y"; |
2129 | ok ($x =~ /^[\x{09b}y]{2}$/, "\\x{09b} is to be treated as \\x9b"); |
2130 | } |
e2d8ce26 |
2131 | |
2132 | { |
d9efae67 |
2133 | # high bit bug -- japhy |
2134 | my $x = "ab\200d"; |
2135 | $x =~ /.*?\200/ or print "not "; |
2136 | print "ok 715\n"; |
e2d8ce26 |
2137 | } |
2138 | |
4193bef7 |
2139 | print "# some Unicode properties\n"; |
2140 | |
d9efae67 |
2141 | { |
4193bef7 |
2142 | # Dashes, underbars, case. |
d9efae67 |
2143 | print "not " unless "\x80" =~ /\p{in-latin1_SUPPLEMENT}/; |
2144 | print "ok 716\n"; |
ab13f0c7 |
2145 | |
4193bef7 |
2146 | # Complement, leading and trailing whitespace. |
ab13f0c7 |
2147 | print "not " unless "\x80" =~ /\P{ ^ In Latin 1 Supplement }/; |
2148 | print "ok 717\n"; |
4193bef7 |
2149 | |
f173cd49 |
2150 | # No ^In, dashes, case, dash, any intervening (word-break) whitespace. |
2151 | # (well, newlines don't work...) |
2152 | print "not " unless "\x80" =~ /\p{latin-1 supplement}/; |
4193bef7 |
2153 | print "ok 718\n"; |
2154 | } |
2155 | |
2156 | { |
2157 | print "not " unless "a" =~ /\pL/; |
2158 | print "ok 719\n"; |
4193bef7 |
2159 | |
4193bef7 |
2160 | print "not " unless "a" =~ /\p{IsLl}/; |
2161 | print "ok 720\n"; |
4193bef7 |
2162 | |
c87b7cc2 |
2163 | print "not " if "a" =~ /\p{IsLu}/; |
4193bef7 |
2164 | print "ok 721\n"; |
4193bef7 |
2165 | |
61247495 |
2166 | print "not " unless "a" =~ /\p{Ll}/; |
4193bef7 |
2167 | print "ok 722\n"; |
c87b7cc2 |
2168 | |
61247495 |
2169 | print "not " if "a" =~ /\p{Lu}/; |
c87b7cc2 |
2170 | print "ok 723\n"; |
2171 | |
61247495 |
2172 | print "not " unless "A" =~ /\pL/; |
c87b7cc2 |
2173 | print "ok 724\n"; |
2174 | |
61247495 |
2175 | print "not " unless "A" =~ /\p{IsLu}/; |
c87b7cc2 |
2176 | print "ok 725\n"; |
2177 | |
61247495 |
2178 | print "not " if "A" =~ /\p{IsLl}/; |
c87b7cc2 |
2179 | print "ok 726\n"; |
2180 | |
61247495 |
2181 | print "not " unless "A" =~ /\p{Lu}/; |
c87b7cc2 |
2182 | print "ok 727\n"; |
2183 | |
61247495 |
2184 | print "not " if "A" =~ /\p{Ll}/; |
c87b7cc2 |
2185 | print "ok 728\n"; |
2186 | |
61247495 |
2187 | print "not " if "a" =~ /\PL/; |
c87b7cc2 |
2188 | print "ok 729\n"; |
2189 | |
61247495 |
2190 | print "not " if "a" =~ /\P{IsLl}/; |
c87b7cc2 |
2191 | print "ok 730\n"; |
61247495 |
2192 | |
2193 | print "not " unless "a" =~ /\P{IsLu}/; |
2194 | print "ok 731\n"; |
2195 | |
2196 | print "not " if "a" =~ /\P{Ll}/; |
2197 | print "ok 732\n"; |
2198 | |
2199 | print "not " unless "a" =~ /\P{Lu}/; |
2200 | print "ok 733\n"; |
2201 | |
2202 | print "not " if "A" =~ /\PL/; |
2203 | print "ok 734\n"; |
2204 | |
2205 | print "not " if "A" =~ /\P{IsLu}/; |
2206 | print "ok 735\n"; |
2207 | |
2208 | print "not " unless "A" =~ /\P{IsLl}/; |
2209 | print "ok 736\n"; |
2210 | |
2211 | print "not " if "A" =~ /\P{Lu}/; |
2212 | print "ok 737\n"; |
2213 | |
2214 | print "not " unless "A" =~ /\P{Ll}/; |
2215 | print "ok 738\n"; |
2216 | |
4193bef7 |
2217 | } |
9b4e380a |
2218 | |
2219 | { |
2220 | print "not " if "a" =~ /\p{Common}/; |
2221 | print "ok 739\n"; |
2222 | |
2223 | print "not " unless "1" =~ /\p{Common}/; |
2224 | print "ok 740\n"; |
2225 | } |
2226 | |
2227 | { |
2228 | print "not " if "a" =~ /\p{Inherited}/; |
2229 | print "ok 741\n"; |
2230 | |
2231 | print "not " unless "\x{300}" =~ /\p{Inherited}/; |
2232 | print "ok 742\n"; |
2233 | } |
2234 | |
2235 | { |
2236 | print "not " unless "a" =~ /\p{L&}/; |
2237 | print "ok 743\n"; |
2238 | |
2239 | print "not " if "1" =~ /\p{L&}/; |
2240 | print "ok 744\n"; |
2241 | } |
d73e5302 |
2242 | |
2243 | { |
904bc114 |
2244 | print "not " unless "a" =~ /\p{Lowercase Letter}/; |
d73e5302 |
2245 | print "ok 745\n"; |
2246 | |
904bc114 |
2247 | print "not " if "A" =~ /\p{lowercaseletter}/; |
d73e5302 |
2248 | print "ok 746\n"; |
2249 | } |
2250 | |
2251 | { |
cf25bb62 |
2252 | print "not " unless "\x{AC00}" =~ /\p{HangulSyllables}/; |
d73e5302 |
2253 | print "ok 747\n"; |
2254 | } |
71d929cb |
2255 | |
2256 | { |
701a277b |
2257 | # Script=, Block=, Category= |
2258 | |
71d929cb |
2259 | print "not " unless "\x{0100}" =~ /\p{Script=Latin}/; |
2260 | print "ok 748\n"; |
2261 | |
2262 | print "not " unless "\x{0100}" =~ /\p{Block=LatinExtendedA}/; |
2263 | print "ok 749\n"; |
2264 | |
2265 | print "not " unless "\x{0100}" =~ /\p{Category=UppercaseLetter}/; |
2266 | print "ok 750\n"; |
2267 | } |
2268 | |
ef54fa25 |
2269 | { |
701a277b |
2270 | print "# the basic character classes and Unicode \n"; |
2271 | |
ef54fa25 |
2272 | # 0100;LATIN CAPITAL LETTER A WITH MACRON;Lu;0;L;0041 0304;;;;N;LATIN CAPITAL LETTER A MACRON;;;0101; |
2273 | print "not " unless "\x{0100}" =~ /\w/; |
2274 | print "ok 751\n"; |
2275 | |
2276 | # 0660;ARABIC-INDIC DIGIT ZERO;Nd;0;AN;;0;0;0;N;;;;; |
2277 | print "not " unless "\x{0660}" =~ /\d/; |
2278 | print "ok 752\n"; |
2279 | |
2280 | # 1680;OGHAM SPACE MARK;Zs;0;WS;;;;;N;;;;; |
2281 | print "not " unless "\x{1680}" =~ /\s/; |
2282 | print "ok 753\n"; |
2283 | } |
701a277b |
2284 | |
2285 | { |
2286 | print "# folding matches and Unicode\n"; |
2287 | |
2288 | print "not " unless "a\x{100}" =~ /A/i; |
2289 | print "ok 754\n"; |
2290 | |
27e28630 |
2291 | print "not " unless "A\x{100}" =~ /a/i; |
701a277b |
2292 | print "ok 755\n"; |
2293 | |
2294 | print "not " unless "a\x{100}" =~ /a/i; |
2295 | print "ok 756\n"; |
2296 | |
2297 | print "not " unless "A\x{100}" =~ /A/i; |
2298 | print "ok 757\n"; |
bc517b45 |
2299 | |
2300 | print "not " unless "\x{101}a" =~ /\x{100}/i; |
2301 | print "ok 758\n"; |
2302 | |
2303 | print "not " unless "\x{100}a" =~ /\x{100}/i; |
2304 | print "ok 759\n"; |
2305 | |
2306 | print "not " unless "\x{101}a" =~ /\x{101}/i; |
2307 | print "ok 760\n"; |
2308 | |
2309 | print "not " unless "\x{100}a" =~ /\x{101}/i; |
2310 | print "ok 761\n"; |
2311 | |
2312 | print "not " unless "a\x{100}" =~ /A\x{100}/i; |
2313 | print "ok 762\n"; |
2314 | |
27e28630 |
2315 | print "not " unless "A\x{100}" =~ /a\x{100}/i; |
bc517b45 |
2316 | print "ok 763\n"; |
2317 | |
2318 | print "not " unless "a\x{100}" =~ /a\x{100}/i; |
2319 | print "ok 764\n"; |
2320 | |
2321 | print "not " unless "A\x{100}" =~ /A\x{100}/i; |
2322 | print "ok 765\n"; |
2323 | |
2324 | print "not " unless "a\x{100}" =~ /[A]/i; |
2325 | print "ok 766\n"; |
2326 | |
27e28630 |
2327 | print "not " unless "A\x{100}" =~ /[a]/i; |
bc517b45 |
2328 | print "ok 767\n"; |
2329 | |
2330 | print "not " unless "a\x{100}" =~ /[a]/i; |
2331 | print "ok 768\n"; |
2332 | |
2333 | print "not " unless "A\x{100}" =~ /[A]/i; |
2334 | print "ok 769\n"; |
2335 | |
2336 | print "not " unless "\x{101}a" =~ /[\x{100}]/i; |
2337 | print "ok 770\n"; |
2338 | |
2339 | print "not " unless "\x{100}a" =~ /[\x{100}]/i; |
2340 | print "ok 771\n"; |
2341 | |
2342 | print "not " unless "\x{101}a" =~ /[\x{101}]/i; |
2343 | print "ok 772\n"; |
2344 | |
2345 | print "not " unless "\x{100}a" =~ /[\x{101}]/i; |
2346 | print "ok 773\n"; |
2347 | |
701a277b |
2348 | } |
a5961de5 |
2349 | |
2350 | { |
2351 | use charnames ':full'; |
2352 | |
2353 | print "# LATIN LETTER A WITH GRAVE\n"; |
2354 | my $lower = "\N{LATIN SMALL LETTER A WITH GRAVE}"; |
2355 | my $UPPER = "\N{LATIN CAPITAL LETTER A WITH GRAVE}"; |
2356 | |
bc517b45 |
2357 | print $lower =~ m/$UPPER/i ? "ok 774\n" : "not ok 774\n"; |
2358 | print $UPPER =~ m/$lower/i ? "ok 775\n" : "not ok 775\n"; |
2359 | print $lower =~ m/[$UPPER]/i ? "ok 776\n" : "not ok 776\n"; |
2360 | print $UPPER =~ m/[$lower]/i ? "ok 777\n" : "not ok 777\n"; |
a5961de5 |
2361 | |
2362 | print "# GREEK LETTER ALPHA WITH VRACHY\n"; |
2363 | |
2364 | $lower = "\N{GREEK CAPITAL LETTER ALPHA WITH VRACHY}"; |
2365 | $UPPER = "\N{GREEK SMALL LETTER ALPHA WITH VRACHY}"; |
2366 | |
bc517b45 |
2367 | print $lower =~ m/$UPPER/i ? "ok 778\n" : "not ok 778\n"; |
2368 | print $UPPER =~ m/$lower/i ? "ok 779\n" : "not ok 779\n"; |
2369 | print $lower =~ m/[$UPPER]/i ? "ok 780\n" : "not ok 780\n"; |
2370 | print $UPPER =~ m/[$lower]/i ? "ok 781\n" : "not ok 781\n"; |
a5961de5 |
2371 | |
2372 | print "# LATIN LETTER Y WITH DIAERESIS\n"; |
2373 | |
2374 | $lower = "\N{LATIN CAPITAL LETTER Y WITH DIAERESIS}"; |
2375 | $UPPER = "\N{LATIN SMALL LETTER Y WITH DIAERESIS}"; |
bc517b45 |
2376 | print $lower =~ m/$UPPER/i ? "ok 782\n" : "not ok 782\n"; |
2377 | print $UPPER =~ m/$lower/i ? "ok 783\n" : "not ok 783\n"; |
2378 | print $lower =~ m/[$UPPER]/i ? "ok 784\n" : "not ok 784\n"; |
2379 | print $UPPER =~ m/[$lower]/i ? "ok 785\n" : "not ok 785\n"; |
a5961de5 |
2380 | } |
55da9344 |
2381 | |
2382 | { |
2383 | use warnings; |
2384 | use charnames ':full'; |
2385 | |
2386 | print "# GREEK CAPITAL LETTER SIGMA vs COMBINING GREEK PERISPOMENI\n"; |
2387 | |
2388 | my $SIGMA = "\N{GREEK CAPITAL LETTER SIGMA}"; |
d07ddd77 |
2389 | my $char = "\N{COMBINING GREEK PERISPOMENI}"; |
55da9344 |
2390 | |
bc517b45 |
2391 | # Before #13843 this was failing by matching falsely. |
2392 | print "_:$char:_" =~ m/_:$SIGMA:_/i ? "not ok 786\n" : "ok 786\n"; |
55da9344 |
2393 | } |
b7c83a7e |
2394 | |
2395 | { |
2396 | print "# \\X\n"; |
2397 | |
2398 | use charnames ':full'; |
2399 | |
eb08e2da |
2400 | print "a!" =~ /^(\X)!/ && $1 eq "a" ? |
2401 | "ok 787\n" : "not ok 787 # $1\n"; |
2402 | print "\xDF!" =~ /^(\X)!/ && $1 eq "\xDF" ? |
2403 | "ok 788\n" : "not ok 788 # $1\n"; |
2404 | print "\x{100}!" =~ /^(\X)!/ && $1 eq "\x{100}" ? |
2405 | "ok 789\n" : "not ok 789 # $1\n"; |
2406 | print "\x{100}\x{300}!" =~ /^(\X)!/ && $1 eq "\x{100}\x{300}" ? |
2407 | "ok 790\n" : "not ok 790 # $1\n"; |
2408 | print "\N{LATIN CAPITAL LETTER E}!" =~ /^(\X)!/ && |
2409 | $1 eq "\N{LATIN CAPITAL LETTER E}" ? |
2410 | "ok 791\n" : "not ok 791 # $1\n"; |
2411 | print "\N{LATIN CAPITAL LETTER E}\N{COMBINING GRAVE ACCENT}!" =~ |
2412 | /^(\X)!/ && |
2413 | $1 eq "\N{LATIN CAPITAL LETTER E}\N{COMBINING GRAVE ACCENT}" ? |
2414 | "ok 792\n" : "not ok 792 # $1\n"; |
b7c83a7e |
2415 | } |
112bedeb |
2416 | |
2417 | { |
2418 | print "#\\C and \\X\n"; |
2419 | |
2420 | print "!abc!" =~ /a\Cc/ ? "ok 793\n" : "not ok 793\n"; |
2421 | print "!abc!" =~ /a\Xc/ ? "ok 794\n" : "not ok 794\n"; |
2422 | } |
09091399 |
2423 | |
2424 | { |
2425 | print "# FINAL SIGMA\n"; |
2426 | |
2427 | my $SIGMA = "\x{03A3}"; # CAPITAL |
2428 | my $Sigma = "\x{03C2}"; # SMALL FINAL |
2429 | my $sigma = "\x{03C3}"; # SMALL |
2430 | |
2431 | print $SIGMA =~ /$SIGMA/i ? "ok 795\n" : "not ok 795\n"; |
2432 | print $SIGMA =~ /$Sigma/i ? "ok 796\n" : "not ok 796\n"; |
2433 | print $SIGMA =~ /$sigma/i ? "ok 797\n" : "not ok 797\n"; |
2434 | |
2435 | print $Sigma =~ /$SIGMA/i ? "ok 798\n" : "not ok 798\n"; |
2436 | print $Sigma =~ /$Sigma/i ? "ok 799\n" : "not ok 799\n"; |
2437 | print $Sigma =~ /$sigma/i ? "ok 800\n" : "not ok 800\n"; |
2438 | |
2439 | print $sigma =~ /$SIGMA/i ? "ok 801\n" : "not ok 801\n"; |
2440 | print $sigma =~ /$Sigma/i ? "ok 802\n" : "not ok 802\n"; |
2441 | print $sigma =~ /$sigma/i ? "ok 803\n" : "not ok 803\n"; |
2442 | |
2443 | print $SIGMA =~ /[$SIGMA]/i ? "ok 804\n" : "not ok 804\n"; |
2444 | print $SIGMA =~ /[$Sigma]/i ? "ok 805\n" : "not ok 805\n"; |
2445 | print $SIGMA =~ /[$sigma]/i ? "ok 806\n" : "not ok 806\n"; |
2446 | |
2447 | print $Sigma =~ /[$SIGMA]/i ? "ok 807\n" : "not ok 807\n"; |
2448 | print $Sigma =~ /[$Sigma]/i ? "ok 808\n" : "not ok 808\n"; |
2449 | print $Sigma =~ /[$sigma]/i ? "ok 809\n" : "not ok 809\n"; |
2450 | |
2451 | print $sigma =~ /[$SIGMA]/i ? "ok 810\n" : "not ok 810\n"; |
2452 | print $sigma =~ /[$Sigma]/i ? "ok 811\n" : "not ok 811\n"; |
2453 | print $sigma =~ /[$sigma]/i ? "ok 812\n" : "not ok 812\n"; |
2454 | } |
e036fef9 |
2455 | |
2456 | { |
2457 | print "# parlez-vous?\n"; |
2458 | |
2459 | use charnames ':full'; |
2460 | |
2461 | print "fran\N{LATIN SMALL LETTER C}ais" =~ |
2462 | /fran.ais/ && |
2463 | $& eq "francais" ? |
2464 | "ok 813\n" : "not ok 813\n"; |
2465 | |
2466 | print "fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" =~ |
2467 | /fran.ais/ && |
2468 | $& eq "fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" ? |
2469 | "ok 814\n" : "not ok 814\n"; |
2470 | |
2471 | print "fran\N{LATIN SMALL LETTER C}ais" =~ |
2472 | /fran\Cais/ && |
2473 | $& eq "francais" ? |
2474 | "ok 815\n" : "not ok 815\n"; |
2475 | |
2476 | print "franc\N{COMBINING CEDILLA}ais" =~ |
2477 | /franc\C\Cais/ ? # COMBINING CEDILLA is two bytes when encoded |
2478 | "ok 816\n" : "not ok 816\n"; |
2479 | |
2480 | print "fran\N{LATIN SMALL LETTER C}ais" =~ |
2481 | /fran\Xais/ && |
2482 | $& eq "francais" ? |
2483 | "ok 817\n" : "not ok 817\n"; |
2484 | |
2485 | print "fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" =~ |
2486 | /fran\Xais/ && |
2487 | $& eq "fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" ? |
2488 | "ok 818\n" : "not ok 818\n"; |
2489 | |
2490 | print "franc\N{COMBINING CEDILLA}ais" =~ |
2491 | /fran\Xais/ && |
2492 | $& eq "franc\N{COMBINING CEDILLA}ais" ? |
2493 | "ok 819\n" : "not ok 819\n"; |
2494 | |
2495 | print "fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" =~ |
2496 | /fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais/ && |
2497 | $& eq "fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" ? |
2498 | "ok 820\n" : "not ok 820\n"; |
2499 | |
2500 | print "franc\N{COMBINING CEDILLA}ais" =~ |
2501 | /franc\N{COMBINING CEDILLA}ais/ && |
2502 | $& eq "franc\N{COMBINING CEDILLA}ais" ? |
2503 | "ok 821\n" : "not ok 821\n"; |
2504 | |
2505 | print "fran\N{LATIN SMALL LETTER C}ais" =~ |
2506 | /fran(?:c\N{COMBINING CEDILLA}?|\N{LATIN SMALL LETTER C WITH CEDILLA})ais/ && |
2507 | $& eq "francais" ? |
2508 | "ok 822\n" : "not ok 822\n"; |
2509 | |
2510 | print "fran\N{LATIN SMALL LETTER C}ais" =~ |
2511 | /fran(?:c\N{COMBINING CEDILLA}?|\N{LATIN SMALL LETTER C WITH CEDILLA})ais/ && |
2512 | $& eq "francais" ? |
2513 | "ok 823\n" : "not ok 823\n"; |
2514 | |
2515 | print "fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" =~ |
2516 | /fran(?:c\N{COMBINING CEDILLA}?|\N{LATIN SMALL LETTER C WITH CEDILLA})ais/ && |
2517 | $& eq "fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" ? |
2518 | "ok 824\n" : "not ok 824\n"; |
2519 | |
2520 | print "franc\N{COMBINING CEDILLA}ais" =~ |
2521 | /fran(?:c\N{COMBINING CEDILLA}?|\N{LATIN SMALL LETTER C WITH CEDILLA})ais/ && |
2522 | $& eq "franc\N{COMBINING CEDILLA}ais" ? |
2523 | "ok 825\n" : "not ok 825\n"; |
2524 | } |
ffce6cc2 |
2525 | |
2526 | { |
2527 | print "# Does lingering (and useless) UTF8 flag mess up /i matching?\n"; |
2528 | |
2529 | { |
2530 | my $regex = "ABcde"; |
2531 | my $string = "abcDE\x{100}"; |
2532 | chop($string); |
2533 | if ($string =~ m/$regex/i) { |
2534 | print "ok 826\n"; |
2535 | } else { |
2536 | print "not ok 826\n"; |
2537 | } |
2538 | } |
2539 | |
2540 | { |
2541 | my $regex = "ABcde\x{100}"; |
2542 | my $string = "abcDE"; |
2543 | chop($regex); |
2544 | if ($string =~ m/$regex/i) { |
2545 | print "ok 827\n"; |
2546 | } else { |
2547 | print "not ok 827\n"; |
2548 | } |
2549 | } |
2550 | |
2551 | { |
2552 | my $regex = "ABcde\x{100}"; |
2553 | my $string = "abcDE\x{100}"; |
2554 | chop($regex); |
2555 | chop($string); |
2556 | if ($string =~ m/$regex/i) { |
2557 | print "ok 828\n"; |
2558 | } else { |
2559 | print "not ok 828\n"; |
2560 | } |
2561 | } |
2562 | } |
d07ddd77 |
2563 | |
2564 | { |
2565 | print "# more SIGMAs\n"; |
2566 | |
2567 | my $SIGMA = "\x{03A3}"; # CAPITAL |
2568 | my $Sigma = "\x{03C2}"; # SMALL FINAL |
2569 | my $sigma = "\x{03C3}"; # SMALL |
2570 | |
2571 | my $S3 = "$SIGMA$Sigma$sigma"; |
2572 | |
2573 | print ":$S3:" =~ /:(($SIGMA)+):/i && $1 eq $S3 && $2 eq $sigma ? |
2574 | "ok 829\n" : "not ok 829\n"; |
2575 | print ":$S3:" =~ /:(($Sigma)+):/i && $1 eq $S3 && $2 eq $sigma ? |
2576 | "ok 830\n" : "not ok 830\n"; |
2577 | print ":$S3:" =~ /:(($sigma)+):/i && $1 eq $S3 && $2 eq $sigma ? |
2578 | "ok 831\n" : "not ok 831\n"; |
2579 | |
2580 | print ":$S3:" =~ /:(([$SIGMA])+):/i && $1 eq $S3 && $2 eq $sigma ? |
2581 | "ok 832\n" : "not ok 832\n"; |
2582 | print ":$S3:" =~ /:(([$Sigma])+):/i && $1 eq $S3 && $2 eq $sigma ? |
2583 | "ok 833\n" : "not ok 833\n"; |
2584 | print ":$S3:" =~ /:(([$sigma])+):/i && $1 eq $S3 && $2 eq $sigma ? |
2585 | "ok 834\n" : "not ok 834\n"; |
2586 | } |
925f9e00 |
2587 | |
2588 | { |
2589 | print "# LATIN SMALL LETTER SHARP S\n"; |
2590 | |
2591 | use charnames ':full'; |
2592 | |
2593 | print "\N{LATIN SMALL LETTER SHARP S}" =~ |
2594 | /\N{LATIN SMALL LETTER SHARP S}/ ? "ok 835\n" : "not ok 835\n"; |
2595 | |
2596 | print "\N{LATIN SMALL LETTER SHARP S}" =~ |
2597 | /\N{LATIN SMALL LETTER SHARP S}/i ? "ok 836\n" : "not ok 836\n"; |
2598 | |
2599 | print "\N{LATIN SMALL LETTER SHARP S}" =~ |
2600 | /[\N{LATIN SMALL LETTER SHARP S}]/ ? "ok 837\n" : "not ok 837\n"; |
2601 | |
2602 | print "\N{LATIN SMALL LETTER SHARP S}" =~ |
2603 | /[\N{LATIN SMALL LETTER SHARP S}]/i ? "ok 838\n" : "not ok 838\n"; |
2604 | |
2605 | print "ss" =~ |
2606 | /\N{LATIN SMALL LETTER SHARP S}/i ? "ok 839\n" : "not ok 839\n"; |
2607 | |
2608 | print "SS" =~ |
2609 | /\N{LATIN SMALL LETTER SHARP S}/i ? "ok 840\n" : "not ok 840\n"; |
2610 | |
e0f9d4a8 |
2611 | print "ss" =~ |
2612 | /[\N{LATIN SMALL LETTER SHARP S}]/i ? "ok 841\n" : "not ok 841\n"; |
2613 | |
2614 | print "SS" =~ |
2615 | /[\N{LATIN SMALL LETTER SHARP S}]/i ? "ok 842\n" : "not ok 842\n"; |
5486206c |
2616 | |
2617 | print "\N{LATIN SMALL LETTER SHARP S}" =~ /ss/i ? |
2618 | "ok 843\n" : "not ok 843\n"; |
2619 | |
2620 | print "\N{LATIN SMALL LETTER SHARP S}" =~ /SS/i ? |
2621 | "ok 844\n" : "not ok 844\n"; |
925f9e00 |
2622 | } |
d8f6a732 |
2623 | |
2624 | { |
2625 | print "# more whitespace: U+0085, U+2028, U+2029\n"; |
2626 | |
2627 | # U+0085 needs to be forced to be Unicode, the \x{100} does that. |
5486206c |
2628 | print "<\x{100}\x{0085}>" =~ /<\x{100}\s>/ ? "ok 845\n" : "not ok 845\n"; |
2629 | print "<\x{2028}>" =~ /<\s>/ ? "ok 846\n" : "not ok 846\n"; |
2630 | print "<\x{2029}>" =~ /<\s>/ ? "ok 847\n" : "not ok 847\n"; |
d8f6a732 |
2631 | } |
2632 | |
def8e4ea |
2633 | { |
229196e5 |
2634 | print "# . with /s should work on characters, as opposed to bytes\n"; |
def8e4ea |
2635 | |
2636 | my $s = "\x{e4}\x{100}"; |
2637 | |
2638 | # This is not expected to match: the point is that |
2639 | # neither should we get "Malformed UTF-8" warnings. |
2640 | print $s =~ /\G(.+?)\n/gcs ? |
5486206c |
2641 | "not ok 848\n" : "ok 848\n"; |
def8e4ea |
2642 | |
2643 | my @c; |
2644 | |
2645 | while ($s =~ /\G(.)/gs) { |
2646 | push @c, $1; |
2647 | } |
2648 | |
5486206c |
2649 | print join("", @c) eq $s ? "ok 849\n" : "not ok 849\n"; |
a0804c9e |
2650 | |
2651 | my $t1 = "Q003\n\n\x{e4}\x{f6}\n\nQ004\n\n\x{e7}"; # test only chars < 256 |
2652 | my $r1 = ""; |
2653 | while ($t1 =~ / \G ( .+? ) \n\s+ ( .+? ) ( $ | \n\s+ ) /xgcs) { |
2654 | $r1 .= $1 . $2; |
2655 | } |
2656 | |
2657 | my $t2 = $t1 . "\x{100}"; # repeat with a larger char |
2658 | my $r2 = ""; |
2659 | while ($t2 =~ / \G ( .+? ) \n\s+ ( .+? ) ( $ | \n\s+ ) /xgcs) { |
2660 | $r2 .= $1 . $2; |
2661 | } |
2662 | $r2 =~ s/\x{100}//; |
5486206c |
2663 | print $r1 eq $r2 ? "ok 850\n" : "not ok 850\n"; |
def8e4ea |
2664 | } |
e54858b0 |
2665 | |
2666 | { |
2667 | print "# Unicode lookbehind\n"; |
2668 | |
5486206c |
2669 | print "A\x{100}B" =~ /(?<=A.)B/ ? "ok 851\n" : "not ok 851\n"; |
2670 | print "A\x{200}\x{300}B" =~ /(?<=A..)B/ ? "ok 852\n" : "not ok 852\n"; |
2671 | print "\x{400}AB" =~ /(?<=\x{400}.)B/ ? "ok 853\n" : "not ok 853\n"; |
2672 | print "\x{500\x{600}}B" =~ /(?<=\x{500}.)B/ ? "ok 854\n" : "not ok 854\n"; |
e54858b0 |
2673 | } |
c46248c1 |
2674 | |
2675 | { |
85fd1718 |
2676 | print "# UTF-8 hash keys and /\$/\n"; |
2677 | # http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters/2002-01/msg01327.html |
2678 | |
2679 | my $u = "a\x{100}"; |
2680 | my $v = substr($u,0,1); |
2681 | my $w = substr($u,1,1); |
2682 | my %u = ( $u => $u, $v => $v, $w => $w ); |
8d21bda2 |
2683 | my $i = 855; |
85fd1718 |
2684 | for (keys %u) { |
2685 | my $m1 = /^\w*$/ ? 1 : 0; |
2686 | my $m2 = $u{$_}=~/^\w*$/ ? 1 : 0; |
2687 | print $m1 == $m2 ? "ok $i\n" : "not ok $i # $m1 $m2\n"; |
2688 | $i++; |
2689 | } |
2690 | } |
8d21bda2 |
2691 | |
2692 | { |
2693 | print "# [ID 20020124.005]\n"; |
2694 | # Fixed by #14795. |
2695 | my $i = 858; |
2696 | for my $char ("a", "\x{df}", "\x{100}"){ |
2697 | $x = "$char b $char"; |
2698 | $x =~ s{($char)}{ |
2699 | "c" =~ /c/; |
2700 | "x"; |
2701 | }ge; |
2702 | print substr($x,0,1) eq substr($x,-1,1) ? |
2703 | "ok $i\n" : "not ok $i # debug: $x\n"; |
2704 | $i++; |
2705 | } |
2706 | } |
6e602e29 |
2707 | |
2708 | { |
2709 | print "# SEGV in s/// and UTF-8\n"; |
2710 | $s = "s#\x{100}" x 4; |
2711 | $s =~ s/[^\w]/ /g; |
2712 | print $s eq "s \x{100}" x 4 ? "ok 861\n" : "not ok 861\n"; |
2713 | } |
f272994b |
2714 | |
2715 | { |
2716 | print "# UTF-8 bug (maybe alreayd known?)\n"; |
2717 | my $u; |
2718 | |
2719 | $u = "foo"; |
2720 | $u =~ s/./\x{100}/g; |
2721 | print $u eq "\x{100}\x{100}\x{100}" ? "ok 862\n" : "not ok 862\n"; |
2722 | |
2723 | $u = "foobar"; |
2724 | $u =~ s/[ao]/\x{100}/g; |
2725 | print $u eq "f\x{100}\x{100}b\x{100}r" ? "ok 863\n" : "not ok 863\n"; |
2726 | |
2727 | $u =~ s/\x{100}/e/g; |
2728 | print $u eq "feeber" ? "ok 864\n" : "not ok 864\n"; |
2729 | } |
2730 | |
33b8afdf |
2731 | { |
2732 | print "# UTF-8 bug with s///\n"; |
2733 | # check utf8/non-utf8 mixtures |
2734 | # try to force all float/anchored check combinations |
2735 | my $c = "\x{100}"; |
2736 | my $test = 865; |
2737 | my $subst; |
2738 | for my $re ( |
2739 | "xx.*$c", "x.*$c$c", "$c.*xx", "$c$c.*x", "xx.*(?=$c)", "(?=$c).*xx", |
2740 | ) { |
2741 | print "xxx" =~ /$re/ ? "not ok $test\n" : "ok $test\n"; |
2742 | ++$test; |
2743 | print +($subst = "xxx") =~ s/$re// ? "not ok $test\n" : "ok $test\n"; |
2744 | ++$test; |
2745 | } |
2746 | for my $re ("xx.*$c*", "$c*.*xx") { |
2747 | print "xxx" =~ /$re/ ? "ok $test\n" : "not ok $test\n"; |
2748 | ++$test; |
2749 | ($subst = "xxx") =~ s/$re//; |
2750 | print $subst eq '' ? "ok $test\n" : "not ok $test\t# $subst\n"; |
2751 | ++$test; |
2752 | } |
2753 | for my $re ("xxy*", "y*xx") { |
2754 | print "xx$c" =~ /$re/ ? "ok $test\n" : "not ok $test\n"; |
2755 | ++$test; |
2756 | ($subst = "xx$c") =~ s/$re//; |
2757 | print $subst eq $c ? "ok $test\n" : "not ok $test\n"; |
2758 | ++$test; |
2759 | print "xy$c" =~ /$re/ ? "not ok $test\n" : "ok $test\n"; |
2760 | ++$test; |
2761 | print +($subst = "xy$c") =~ /$re/ ? "not ok $test\n" : "ok $test\n"; |
2762 | ++$test; |
2763 | } |
2764 | for my $re ("xy$c*z", "x$c*yz") { |
2765 | print "xyz" =~ /$re/ ? "ok $test\n" : "not ok $test\n"; |
2766 | ++$test; |
2767 | ($subst = "xyz") =~ s/$re//; |
2768 | print $subst eq '' ? "ok $test\n" : "not ok $test\n"; |
2769 | ++$test; |
2770 | } |
2771 | } |
574c8022 |
2772 | |
ff385a1b |
2773 | { |
2774 | print "# qr/.../x\n"; |
4c79aee6 |
2775 | my $test = 893; |
ff385a1b |
2776 | |
2777 | my $R = qr/ A B C # D E/x; |
2778 | |
2779 | print eval {"ABCDE" =~ $R} ? "ok $test\n" : "not ok $test\n"; |
2780 | $test++; |
2781 | |
2782 | print eval {"ABCDE" =~ m/$R/} ? "ok $test\n" : "not ok $test\n"; |
2783 | $test++; |
2784 | |
2785 | print eval {"ABCDE" =~ m/($R)/} ? "ok $test\n" : "not ok $test\n"; |
2786 | $test++; |
2787 | } |
bc45ce41 |
2788 | |
2789 | { |
2790 | print "# illegal Unicode properties\n"; |
4c79aee6 |
2791 | my $test = 896; |
bc45ce41 |
2792 | |
2793 | print eval { "a" =~ /\pq / } ? "not ok $test\n" : "ok $test\n"; |
2794 | $test++; |
2795 | |
2796 | print eval { "a" =~ /\p{qrst} / } ? "not ok $test\n" : "ok $test\n"; |
2797 | $test++; |
2798 | } |
d65afb4b |
2799 | |
2800 | { |
491fd90a |
2801 | print "# [ID 20020412.005] wrong pmop flags checked when empty pattern\n"; |
d65afb4b |
2802 | # requires reuse of last successful pattern |
2803 | my $test = 898; |
2804 | $test =~ /\d/; |
2805 | for (0 .. 1) { |
2806 | my $match = ?? + 0; |
2807 | if ($match != $_) { |
2808 | print "ok $test\n"; |
2809 | } else { |
2810 | printf "not ok %s\t# 'match once' %s on %s iteration\n", $test, |
2811 | $match ? 'succeeded' : 'failed', $_ ? 'second' : 'first'; |
2812 | } |
2813 | ++$test; |
2814 | } |
2815 | $test =~ /(\d)/; |
2816 | my $result = join '', $test =~ //g; |
2817 | if ($result eq $test) { |
2818 | print "ok $test\n"; |
2819 | } else { |
2820 | printf "not ok %s\t# expected '%s', got '%s'\n", $test, $test, $result; |
2821 | } |
2822 | ++$test; |
2823 | } |
491fd90a |
2824 | |
2825 | print "# user-defined character properties\n"; |
2826 | |
2827 | sub InKana1 { |
2828 | return <<'END'; |
2829 | 3040 309F |
2830 | 30A0 30FF |
2831 | END |
2832 | } |
2833 | |
2834 | sub InKana2 { |
2835 | return <<'END'; |
2836 | +utf8::InHiragana |
2837 | +utf8::InKatakana |
2838 | END |
2839 | } |
2840 | |
2841 | sub InKana3 { |
2842 | return <<'END'; |
2843 | +utf8::InHiragana |
2844 | +utf8::InKatakana |
2845 | -utf8::IsCn |
2846 | END |
2847 | } |
2848 | |
2849 | sub InNotKana { |
2850 | return <<'END'; |
2851 | !utf8::InHiragana |
2852 | -utf8::InKatakana |
2853 | +utf8::IsCn |
2854 | END |
2855 | } |
2856 | |
2857 | $test = 901; |
2858 | |
2859 | print "\x{3040}" =~ /\p{InKana1}/ ? "ok $test\n" : "not ok $test\n"; $test++; |
2860 | print "\x{303F}" =~ /\P{InKana1}/ ? "ok $test\n" : "not ok $test\n"; $test++; |
2861 | |
2862 | print "\x{3040}" =~ /\p{InKana2}/ ? "ok $test\n" : "not ok $test\n"; $test++; |
2863 | print "\x{303F}" =~ /\P{InKana2}/ ? "ok $test\n" : "not ok $test\n"; $test++; |
2864 | |
2865 | print "\x{3041}" =~ /\p{InKana3}/ ? "ok $test\n" : "not ok $test\n"; $test++; |
2866 | print "\x{3040}" =~ /\P{InKana3}/ ? "ok $test\n" : "not ok $test\n"; $test++; |
2867 | |
2868 | print "\x{3040}" =~ /\p{InNotKana}/ ? "ok $test\n" : "not ok $test\n"; $test++; |
2869 | print "\x{3041}" =~ /\P{InNotKana}/ ? "ok $test\n" : "not ok $test\n"; $test++; |
2870 | |
11ef8fdd |
2871 | sub InConsonant { # Not EBCDIC-aware. |
2872 | return <<EOF; |
2873 | 0061 007f |
2874 | -0061 |
2875 | -0065 |
2876 | -0069 |
2877 | -006f |
2878 | -0075 |
2879 | EOF |
2880 | } |
2881 | |
2882 | print "d" =~ /\p{InConsonant}/ ? "ok $test\n" : "not ok $test\n"; $test++; |
2883 | print "e" =~ /\P{InConsonant}/ ? "ok $test\n" : "not ok $test\n"; $test++; |
2884 | |
faf11cac |
2885 | { |
2886 | print "# [ID 20020630.002] utf8 regex only matches 32k\n"; |
2887 | $test = 911; |
2888 | for ([ 'byte', "\x{ff}" ], [ 'utf8', "\x{1ff}" ]) { |
2889 | my($type, $char) = @$_; |
2890 | for my $len (32000, 32768, 33000) { |
2891 | my $s = $char . "f" x $len; |
2892 | my $r = $s =~ /$char([f]*)/gc; |
2893 | print $r ? "ok $test\n" : "not ok $test\t# <$type x $len> fail\n"; |
2894 | ++$test; |
2895 | print +(!$r or pos($s) == $len + 1) ? "ok $test\n" |
2896 | : "not ok $test\t# <$type x $len> pos @{[ pos($s) ]}\n"; |
2897 | ++$test; |
2898 | } |
2899 | } |
2900 | } |
2901 | |
2902 | $test = 923; |
f3b1e556 |
2903 | |
2904 | $a = bless qr/foo/, 'Foo'; |
2905 | print(('goodfood' =~ $a ? '' : 'not '), |
2906 | "ok $test\t# reblessed qr// matches\n"); |
2907 | ++$test; |
2908 | |
2909 | print(($a eq '(?-xism:foo)' ? '' : 'not '), |
2910 | "ok $test\t# reblessed qr// stringizes\n"); |
2911 | ++$test; |
446eaa42 |
2912 | |
2913 | $x = "\x{3fe}"; |
cb50f42d |
2914 | $z=$y = "\317\276"; # $y is byte representation of $x |
2915 | |
446eaa42 |
2916 | $a = qr/$x/; |
2917 | print(($x =~ $a ? '' : 'not '), "ok $test - utf8 interpolation in qr//\n"); |
2918 | ++$test; |
2919 | |
2920 | print(("a$a" =~ $x ? '' : 'not '), |
cb50f42d |
2921 | "ok $test - stringifed qr// preserves utf8\n"); |
2922 | ++$test; |
2923 | |
2924 | print(("a$x" =~ /^a$a\z/ ? '' : 'not '), |
2925 | "ok $test - interpolated qr// preserves utf8\n"); |
2926 | ++$test; |
2927 | |
2928 | print(("a$x" =~ /^a(??{$a})\z/ ? '' : 'not '), |
2929 | "ok $test - postponed interpolation of qr// preserves utf8\n"); |
2930 | ++$test; |
2931 | |
ab01544f |
2932 | print((length(qr/##/x) == 12 ? '' : 'not '), |
2933 | "ok $test - ## in qr// doesn't corrupt memory [perl #17776]\n"); |
2934 | ++$test; |
2935 | |
cb50f42d |
2936 | { use re 'eval'; |
2937 | |
2938 | print(("$x$x" =~ /^$x(??{$x})\z/ ? '' : 'not '), |
2939 | "ok $test - postponed utf8 string in utf8 re matches utf8\n"); |
2940 | ++$test; |
2941 | |
2942 | print(("$y$x" =~ /^$y(??{$x})\z/ ? '' : 'not '), |
2943 | "ok $test - postponed utf8 string in non-utf8 re matches utf8\n"); |
446eaa42 |
2944 | ++$test; |
2945 | |
cb50f42d |
2946 | print(("$y$x" !~ /^$y(??{$y})\z/ ? '' : 'not '), |
2947 | "ok $test - postponed non-utf8 string in non-utf8 re doesn't match utf8\n"); |
446eaa42 |
2948 | ++$test; |
2949 | |
cb50f42d |
2950 | print(("$x$x" !~ /^$x(??{$y})\z/ ? '' : 'not '), |
2951 | "ok $test - postponed non-utf8 string in utf8 re doesn't match utf8\n"); |
446eaa42 |
2952 | ++$test; |
2953 | |
cb50f42d |
2954 | print(("$y$y" =~ /^$y(??{$y})\z/ ? '' : 'not '), |
2955 | "ok $test - postponed non-utf8 string in non-utf8 re matches non-utf8\n"); |
2956 | ++$test; |
2957 | |
2958 | print(("$x$y" =~ /^$x(??{$y})\z/ ? '' : 'not '), |
2959 | "ok $test - postponed non-utf8 string in utf8 re matches non-utf8\n"); |
2960 | ++$test; |
2961 | $y = $z; # reset $y after upgrade |
2962 | |
2963 | print(("$x$y" !~ /^$x(??{$x})\z/ ? '' : 'not '), |
2964 | "ok $test - postponed utf8 string in utf8 re doesn't match non-utf8\n"); |
2965 | ++$test; |
2966 | $y = $z; # reset $y after upgrade |
2967 | |
2968 | print(("$y$y" !~ /^$y(??{$x})\z/ ? '' : 'not '), |
2969 | "ok $test - postponed utf8 string in non-utf8 re doesn't match non-utf8\n"); |
2970 | ++$test; |
2971 | |
2972 | } # no re 'eval' |
2973 | |
2d92f8a0 |
2974 | print "# more user-defined character properties\n"; |
2975 | |
2976 | sub IsSyriac1 { |
2977 | return <<'END'; |
2978 | 0712 072C |
2979 | 0730 074A |
2980 | END |
2981 | } |
2982 | |
2983 | print "\x{0712}" =~ /\p{IsSyriac1}/ ? "ok $test\n" : "not ok $test\n"; $test++; |
2984 | print "\x{072F}" =~ /\P{IsSyriac1}/ ? "ok $test\n" : "not ok $test\n"; $test++; |
2985 | |
2986 | sub Syriac1 { |
2987 | return <<'END'; |
2988 | 0712 072C |
2989 | 0730 074A |
2990 | END |
2991 | } |
2992 | |
2993 | print "\x{0712}" =~ /\p{Syriac1}/ ? "ok $test\n" : "not ok $test\n"; $test++; |
2994 | print "\x{072F}" =~ /\P{Syriac1}/ ? "ok $test\n" : "not ok $test\n"; $test++; |
2995 | |
0e933229 |
2996 | { |
3a2263fe |
2997 | print "# Change #18179\n"; |
2998 | # previously failed with "panic: end_shift |
0e933229 |
2999 | my $s = "\x{100}" x 5; |
3000 | my $ok = $s =~ /(\x{100}{4})/; |
3001 | my($ord, $len) = (ord $1, length $1); |
3002 | print +($ok && $ord == 0x100 && $len == 4) |
3003 | ? "ok $test\n" : "not ok $test\t# $ok/$ord/$len\n"; |
3004 | ++$test; |
3005 | } |
3006 | |
f14c76ed |
3007 | { |
3008 | print "# [perl #15763]\n"; |
3009 | |
3010 | $a = "x\x{100}"; |
3011 | chop $a; # but leaves the UTF-8 flag |
3012 | $a .= "y"; # 1 byte before "y" |
3013 | |
3014 | ok($a =~ /^\C/, 'match one \C on 1-byte UTF-8'); |
3015 | ok($a =~ /^\C{1}/, 'match \C{1}'); |
3016 | |
3017 | ok($a =~ /^\Cy/, 'match \Cy'); |
3018 | ok($a =~ /^\C{1}y/, 'match \C{1}y'); |
3019 | |
3020 | $a = "\x{100}y"; # 2 bytes before "y" |
3021 | |
3022 | ok($a =~ /^\C/, 'match one \C on 2-byte UTF-8'); |
3023 | ok($a =~ /^\C{1}/, 'match \C{1}'); |
3024 | ok($a =~ /^\C\C/, 'match two \C'); |
3025 | ok($a =~ /^\C{2}/, 'match \C{2}'); |
3026 | |
3027 | ok($a =~ /^\C\C\C/, 'match three \C on 2-byte UTF-8 and a byte'); |
3028 | ok($a =~ /^\C{3}/, 'match \C{3}'); |
3029 | |
3030 | ok($a =~ /^\C\Cy/, 'match two \C'); |
3031 | ok($a =~ /^\C{2}y/, 'match \C{2}'); |
3032 | |
836995da |
3033 | ok($a !~ /^\C\C\Cy/, q{don't match three \Cy}); |
3034 | ok($a !~ /^\C{2}\Cy/, q{don't match \C{3}y}); |
f14c76ed |
3035 | |
3036 | $a = "\x{1000}y"; # 3 bytes before "y" |
3037 | |
3038 | ok($a =~ /^\C/, 'match one \C on three-byte UTF-8'); |
3039 | ok($a =~ /^\C{1}/, 'match \C{1}'); |
3040 | ok($a =~ /^\C\C/, 'match two \C'); |
3041 | ok($a =~ /^\C{2}/, 'match \C{2}'); |
3042 | ok($a =~ /^\C\C\C/, 'match three \C'); |
3043 | ok($a =~ /^\C{3}/, 'match \C{3}'); |
3044 | |
3045 | ok($a =~ /^\C\C\C\C/, 'match four \C on three-byte UTF-8 and a byte'); |
3046 | ok($a =~ /^\C{4}/, 'match \C{4}'); |
3047 | |
3048 | ok($a =~ /^\C\C\Cy/, 'match three \Cy'); |
3049 | ok($a =~ /^\C{3}y/, 'match \C{3}y'); |
3050 | |
836995da |
3051 | ok($a !~ /^\C\C\C\C\y/, q{don't match four \Cy}); |
3052 | ok($a !~ /^\C{4}y/, q{don't match \C{4}y}); |
f14c76ed |
3053 | } |
3054 | |
5dec093f |
3055 | $_ = 'aaaaaaaaaa'; |
3056 | utf8::upgrade($_); chop $_; $\="\n"; |
3057 | ok(/[^\s]+/, "m/[^\s]/ utf8"); |
3058 | ok(/[^\d]+/, "m/[^\d]/ utf8"); |
3059 | ok(($a = $_, $_ =~ s/[^\s]+/./g), "s/[^\s]/ utf8"); |
3060 | ok(($a = $_, $a =~ s/[^\d]+/./g), "s/[^\s]/ utf8"); |
f14c76ed |
3061 | |
ce97f9d6 |
3062 | ok("\x{100}" =~ /\x{100}/, "[perl #15397]"); |
3063 | ok("\x{100}" =~ /(\x{100})/, "[perl #15397]"); |
3064 | ok("\x{100}" =~ /(\x{100}){1}/, "[perl #15397]"); |
3065 | ok("\x{100}\x{100}" =~ /(\x{100}){2}/, "[perl #15397]"); |
3066 | ok("\x{100}\x{100}" =~ /(\x{100})(\x{100})/, "[perl #15397]"); |
3067 | |
fbeb8e69 |
3068 | $x = "CD"; |
3069 | $x =~ /(AB)*?CD/; |
3070 | ok(!defined $1, "[perl #7471]"); |
ce97f9d6 |
3071 | |
fbeb8e69 |
3072 | $x = "CD"; |
3073 | $x =~ /(AB)*CD/; |
3074 | ok(!defined $1, "[perl #7471]"); |
3075 | |
ec391688 |
3076 | $pattern = "^(b+?|a){1,2}c"; |
3077 | ok("bac" =~ /$pattern/ && $1 eq 'a', "[perl #3547]"); |
3078 | ok("bbac" =~ /$pattern/ && $1 eq 'a', "[perl #3547]"); |
3079 | ok("bbbac" =~ /$pattern/ && $1 eq 'a', "[perl #3547]"); |
3080 | ok("bbbbac" =~ /$pattern/ && $1 eq 'a', "[perl #3547]"); |
3081 | |
a30b2f1f |
3082 | { |
3083 | # [perl #18232] |
3084 | "\x{100}" =~ /(.)/; |
3085 | ok( $1 eq "\x{100}", '$1 is utf-8 [perl #18232]' ); |
3086 | { 'a' =~ /./; } |
3087 | ok( $1 eq "\x{100}", '$1 is still utf-8' ); |
3088 | ok( $1 ne "\xC4\x80", '$1 is not non-utf-8' ); |
3089 | } |
3090 | |
f119b0fb |
3091 | { |
3092 | use utf8; |
3093 | my $attr = 'Name-1' ; |
3094 | |
3095 | my $NormalChar = qr/[\p{IsDigit}\p{IsLower}\p{IsUpper}]/; |
3096 | my $NormalWord = qr/${NormalChar}+?/; |
3097 | my $PredNameHyphen = qr/^${NormalWord}(\-${NormalWord})*?$/; |
3098 | |
3099 | $attr =~ /^$/; |
3100 | ok( $attr =~ $PredNameHyphen, "[perl #19767] original test" ); |
3101 | } |
3102 | |
3103 | { |
3104 | use utf8; |
3105 | "a" =~ m/[b]/; |
3106 | ok ( "0" =~ /\p{N}+\z/, "[perl #19767] variant test" ); |
3107 | } |
3108 | |
faf82a0b |
3109 | { |
3110 | |
3111 | $p = 1; |
3112 | foreach (1,2,3,4) { |
3113 | $p++ if /(??{ $p })/ |
3114 | } |
3115 | ok ($p == 5, "[perl #20683] (??{ }) returns stale values"); |
3116 | { package P; $a=1; sub TIESCALAR { bless[] } sub FETCH { $a++ } } |
3117 | tie $p, P; |
3118 | foreach (1,2,3,4) { |
3119 | /(??{ $p })/ |
3120 | } |
3121 | ok ( $p == 5, "(??{ }) returns stale values"); |
3122 | } |
3123 | |
351208f1 |
3124 | { |
3125 | # Subject: Odd regexp behavior |
3126 | # From: Markus Kuhn <Markus.Kuhn@cl.cam.ac.uk> |
3127 | # Date: Wed, 26 Feb 2003 16:53:12 +0000 |
3128 | # Message-Id: <E18o4nw-0008Ly-00@wisbech.cl.cam.ac.uk> |
3129 | # To: perl-unicode@perl.org |
3130 | |
3131 | $x = "\x{2019}\nk"; $x =~ s/(\S)\n(\S)/$1 $2/sg; |
3132 | ok($x eq "\x{2019} k", "Markus Kuhn 2003-02-26"); |
3133 | |
3134 | $x = "b\nk"; $x =~ s/(\S)\n(\S)/$1 $2/sg; |
3135 | ok($x eq "b k", "Markus Kuhn 2003-02-26"); |
3136 | |
3137 | ok("\x{2019}" =~ /\S/, "Markus Kuhn 2003-02-26"); |
3138 | } |
3139 | |
91e09a61 |
3140 | { |
3141 | my $i; |
3142 | ok('-1-3-5-' eq join('', split /((??{$i++}))/, '-1-3-5-'), |
080c2dec |
3143 | "[perl #21411] (??{ .. }) corrupts split's stack"); |
3144 | split /(?{'WOW'})/, 'abc'; |
3145 | ok('a|b|c' eq join ('|', @_), |
3146 | "[perl #21411] (?{ .. }) version of the above"); |
3147 | } |
3148 | |
3149 | { |
3150 | split /(?{ split "" })/, "abc"; |
3151 | ok(1,'cache_re & "(?{": it dumps core in 5.6.1 & 5.8.0'); |
91e09a61 |
3152 | } |
3153 | |
c517dc2b |
3154 | { |
3155 | ok("\x{100}\n" =~ /\x{100}\n$/, "UTF8 length cache and fbm_compile"); |
3156 | } |
ec391688 |
3157 | |
16e1b944 |
3158 | { |
3159 | package Str; |
3160 | use overload q/""/ => sub { ${$_[0]}; }; |
3161 | sub new { my ($c, $v) = @_; bless \$v, $c; } |
3162 | |
3163 | package main; |
3164 | $_ = Str->new("a\x{100}/\x{100}b"); |
3165 | ok(join(":", /\b(.)\x{100}/g) eq "a:/", "re_intuit_start and PL_bostr"); |
3166 | } |
3167 | |
7ef91622 |
3168 | { |
3169 | $_ = "code: 'x' { '...' }\n"; study; |
3170 | my @x; push @x, $& while m/'[^\']*'/gx; |
3171 | ok(join(":", @x) eq "'x':'...'", |
3172 | "[perl #17757] Parse::RecDescent triggers infinite loop"); |
3173 | } |
3174 | |
14ebb1a2 |
3175 | { |
3176 | my $re = qq/^([^X]*)X/; |
3177 | utf8::upgrade($re); |
3178 | ok("\x{100}X" =~ /$re/, "S_cl_and ANYOF_UNICODE & ANYOF_INVERTED"); |
3179 | } |
3180 | |
f02c194e |
3181 | # bug #22354 |
3182 | sub func ($) { |
3183 | ok( "a\nb" !~ /^b/, $_[0] ); |
3184 | ok( "a\nb" =~ /^b/m, "$_[0] - with /m" ); |
3185 | } |
3186 | func "standalone"; |
3187 | $_ = "x"; s/x/func "in subst"/e; |
3188 | $_ = "x"; s/x/func "in multiline subst"/em; |
3189 | #$_ = "x"; /x(?{func "in regexp"})/; |
3190 | #$_ = "x"; /x(?{func "in multiline regexp"})/m; |
3191 | |
3192 | # last test 1004 |