split t/re/pat.t into new pieces
[p5sagit/p5-mst-13.2.git] / t / re / pat_rt_report.t
CommitLineData
e425a60b 1#!./perl
2#
3# This is a home for regular expression tests that don't fit into
4# the format supported by re/regexp.t. If you want to add a test
5# that does fit that format, add it to re/re_tests, not here.
6
7use strict;
8use warnings;
9use 5.010;
10
11
12sub run_tests;
13
14$| = 1;
15
e425a60b 16
17BEGIN {
18 chdir 't' if -d 't';
9d45b377 19 @INC = ('../lib','.');
20 do "re/ReTest.pl" or die $@;
e425a60b 21}
e425a60b 22
e425a60b 23
9d45b377 24plan tests => 2525; # Update this when adding/deleting tests.
e425a60b 25
9d45b377 26run_tests() unless caller;
e425a60b 27
28#
29# Tests start here.
30#
31sub run_tests {
32
e425a60b 33
34 {
35 local $BugId = '20000731.001';
36 ok "A \x{263a} B z C" =~ /A . B (??{ "z" }) C/,
37 "Match UTF-8 char in presense of (??{ })";
38 }
39
40
41 {
42 local $BugId = '20001021.005';
43 no warnings 'uninitialized';
44 ok undef =~ /^([^\/]*)(.*)$/, "Used to cause a SEGV";
45 }
46
e425a60b 47 {
48 local $Message = 'bug id 20001008.001';
49
50 my @x = ("stra\337e 138", "stra\337e 138");
51 for (@x) {
52 ok s/(\d+)\s*([\w\-]+)/$1 . uc $2/e;
53 ok my ($latin) = /^(.+)(?:\s+\d)/;
54 iseq $latin, "stra\337e";
55 ok $latin =~ s/stra\337e/straße/;
56 #
57 # Previous code follows, but outcommented - there were no tests.
58 #
59 # $latin =~ s/stra\337e/straße/; # \303\237 after the 2nd a
60 # use utf8; # needed for the raw UTF-8
61 # $latin =~ s!(s)tr(?:aß|s+e)!$1tr.!; # \303\237 after the a
62 }
63 }
64
65
66 {
e425a60b 67 local $BugId = '20001028.003';
68
69 # Fist half of the bug.
70 local $Message = 'HEBREW ACCENT QADMA matched by .*';
71 my $X = chr (1448);
72 ok my ($Y) = $X =~ /(.*)/;
73 iseq $Y, v1448;
74 iseq length ($Y), 1;
75
76 # Second half of the bug.
77 $Message = 'HEBREW ACCENT QADMA in replacement';
78 $X = '';
79 $X =~ s/^/chr(1488)/e;
80 iseq length $X, 1;
81 iseq ord ($X), 1488;
82 }
83
84
85 {
86 local $BugId = '20001108.001';
87 local $Message = 'Repeated s///';
88 my $X = "Szab\x{f3},Bal\x{e1}zs";
89 my $Y = $X;
90 $Y =~ s/(B)/$1/ for 0 .. 3;
91 iseq $Y, $X;
92 iseq $X, "Szab\x{f3},Bal\x{e1}zs";
93 }
94
95
96 {
97 local $BugId = '20000517.001';
98 local $Message = 's/// on UTF-8 string';
99 my $x = "\x{100}A";
100 $x =~ s/A/B/;
101 iseq $x, "\x{100}B";
102 iseq length $x, 2;
103 }
104
105
106 {
107 local $BugId = '20001230.002';
108 local $Message = '\C and É';
109 ok "École" =~ /^\C\C(.)/ && $1 eq 'c';
110 ok "École" =~ /^\C\C(c)/;
111 }
112
113
e425a60b 114 {
115 # The original bug report had 'no utf8' here but that was irrelevant.
116 local $BugId = '20010306.008';
117 local $Message = "Don't dump core";
118 my $a = "a\x{1234}";
119 ok $a =~ m/\w/; # used to core dump.
120 }
121
122
123 {
124 local $BugId = '20010410.006';
125 local $Message = '/g in scalar context';
126 for my $rx ('/(.*?)\{(.*?)\}/csg',
127 '/(.*?)\{(.*?)\}/cg',
128 '/(.*?)\{(.*?)\}/sg',
129 '/(.*?)\{(.*?)\}/g',
130 '/(.+?)\{(.+?)\}/csg',) {
131 my $i = 0;
132 my $input = "a{b}c{d}";
133 eval <<" --";
134 while (eval \$input =~ $rx) {
135 \$i ++;
136 }
137 --
138 iseq $i, 2;
139 }
140 }
141
e425a60b 142 {
143 local $BugId = "20010619.003";
144 # Amazingly vertical tabulator is the same in ASCII and EBCDIC.
145 for ("\n", "\t", "\014", "\r") {
146 ok !/[[:print:]]/, "'$_' not in [[:print:]]";
147 }
148 for (" ") {
149 ok /[[:print:]]/, "'$_' in [[:print:]]";
150 }
151 }
152
153
e425a60b 154
155 {
156 # [ID 20010814.004] pos() doesn't work when using =~m// in list context
157 local $BugId = '20010814.004';
158 $_ = "ababacadaea";
159 my $a = join ":", /b./gc;
160 my $b = join ":", /a./gc;
161 my $c = pos;
162 iseq "$a $b $c", 'ba:ba ad:ae 10', "pos() works with () = m//";
163 }
164
165
166 {
167 # [ID 20010407.006] matching utf8 return values from
168 # functions does not work
169 local $BugId = '20010407.006';
170 local $Message = 'UTF-8 return values from functions';
171 package ID_20010407_006;
172 sub x {"a\x{1234}"}
173 my $x = x;
174 my $y;
175 ::ok $x =~ /(..)/;
176 $y = $1;
177 ::ok length ($y) == 2 && $y eq $x;
178 ::ok x =~ /(..)/;
179 $y = $1;
180 ::ok length ($y) == 2 && $y eq $x;
181 }
182
e425a60b 183 {
184 # High bit bug -- japhy
185 my $x = "ab\200d";
186 ok $x =~ /.*?\200/, "High bit fine";
187 }
188
189
190 {
e425a60b 191 local $Message = 'UTF-8 hash keys and /$/';
192 # http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters
193 # /2002-01/msg01327.html
194
195 my $u = "a\x{100}";
196 my $v = substr ($u, 0, 1);
197 my $w = substr ($u, 1, 1);
198 my %u = ($u => $u, $v => $v, $w => $w);
199 for (keys %u) {
200 my $m1 = /^\w*$/ ? 1 : 0;
201 my $m2 = $u {$_} =~ /^\w*$/ ? 1 : 0;
202 iseq $m1, $m2;
203 }
204 }
205
206
207 {
208 local $BugId = "20020124.005";
209 local $PatchId = "14795";
210 local $Message = "s///eg";
211
212 for my $char ("a", "\x{df}", "\x{100}") {
213 my $x = "$char b $char";
214 $x =~ s{($char)}{
215 "c" =~ /c/;
216 "x";
217 }ge;
218 iseq substr ($x, 0, 1), substr ($x, -1, 1);
219 }
220 }
221
222
223 {
e425a60b 224 local $BugId = "20020412.005";
225 local $Message = "Correct pmop flags checked when empty pattern";
226
227 # Requires reuse of last successful pattern.
228 my $num = 123;
229 $num =~ /\d/;
230 for (0 .. 1) {
231 my $match = ?? + 0;
232 ok $match != $_, $Message,
233 sprintf "'match one' %s on %s iteration" =>
234 $match ? 'succeeded' : 'failed',
235 $_ ? 'second' : 'first';
236 }
237 $num =~ /(\d)/;
238 my $result = join "" => $num =~ //g;
239 iseq $result, $num;
240 }
241
242
243 {
244 local $BugId = '20020630.002';
245 local $Message = 'UTF-8 regex matches above 32k';
246 for (['byte', "\x{ff}"], ['utf8', "\x{1ff}"]) {
247 my ($type, $char) = @$_;
248 for my $len (32000, 32768, 33000) {
249 my $s = $char . "f" x $len;
250 my $r = $s =~ /$char([f]*)/gc;
251 ok $r, $Message, "<$type x $len>";
252 ok !$r || pos ($s) == $len + 1, $Message,
253 "<$type x $len>; pos = @{[pos $s]}";
254 }
255 }
256 }
257
258
259 {
260 our $a = bless qr /foo/ => 'Foo';
261 ok 'goodfood' =~ $a, "Reblessed qr // matches";
262 iseq $a, '(?-xism:foo)', "Reblessed qr // stringifies";
263 my $x = "\x{3fe}";
264 my $z = my $y = "\317\276"; # Byte representation of $x
265 $a = qr /$x/;
266 ok $x =~ $a, "UTF-8 interpolation in qr //";
267 ok "a$a" =~ $x, "Stringified qr // preserves UTF-8";
268 ok "a$x" =~ /^a$a\z/, "Interpolated qr // preserves UTF-8";
269 ok "a$x" =~ /^a(??{$a})\z/,
270 "Postponed interpolation of qr // preserves UTF-8";
271 {
272 local $BugId = '17776';
273 iseq length qr /##/x, 12, "## in qr // doesn't corrupt memory";
274 }
275 {
276 use re 'eval';
277 ok "$x$x" =~ /^$x(??{$x})\z/,
278 "Postponed UTF-8 string in UTF-8 re matches UTF-8";
279 ok "$y$x" =~ /^$y(??{$x})\z/,
280 "Postponed UTF-8 string in non-UTF-8 re matches UTF-8";
281 ok "$y$x" !~ /^$y(??{$y})\z/,
282 "Postponed non-UTF-8 string in non-UTF-8 re doesn't match UTF-8";
283 ok "$x$x" !~ /^$x(??{$y})\z/,
284 "Postponed non-UTF-8 string in UTF-8 re doesn't match UTF-8";
285 ok "$y$y" =~ /^$y(??{$y})\z/,
286 "Postponed non-UTF-8 string in non-UTF-8 re matches non-UTF8";
287 ok "$x$y" =~ /^$x(??{$y})\z/,
288 "Postponed non-UTF-8 string in UTF-8 re matches non-UTF8";
289
290 $y = $z; # Reset $y after upgrade.
291 ok "$x$y" !~ /^$x(??{$x})\z/,
292 "Postponed UTF-8 string in UTF-8 re doesn't match non-UTF-8";
293 ok "$y$y" !~ /^$y(??{$x})\z/,
294 "Postponed UTF-8 string in non-UTF-8 re doesn't match non-UTF-8";
295 }
296 }
297
298
299 {
300 local $PatchId = '18179';
301 my $s = "\x{100}" x 5;
302 my $ok = $s =~ /(\x{100}{4})/;
303 my ($ord, $len) = (ord $1, length $1);
304 ok $ok && $ord == 0x100 && $len == 4, "No panic: end_shift";
305 }
306
307
308 {
309 local $BugId = '15763';
310 our $a = "x\x{100}";
311 chop $a; # Leaves the UTF-8 flag
312 $a .= "y"; # 1 byte before 'y'.
313
314 ok $a =~ /^\C/, 'match one \C on 1-byte UTF-8';
315 ok $a =~ /^\C{1}/, 'match \C{1}';
316
317 ok $a =~ /^\Cy/, 'match \Cy';
318 ok $a =~ /^\C{1}y/, 'match \C{1}y';
319
320 ok $a !~ /^\C\Cy/, q {don't match two \Cy};
321 ok $a !~ /^\C{2}y/, q {don't match \C{2}y};
322
323 $a = "\x{100}y"; # 2 bytes before "y"
324
325 ok $a =~ /^\C/, 'match one \C on 2-byte UTF-8';
326 ok $a =~ /^\C{1}/, 'match \C{1}';
327 ok $a =~ /^\C\C/, 'match two \C';
328 ok $a =~ /^\C{2}/, 'match \C{2}';
329
330 ok $a =~ /^\C\C\C/, 'match three \C on 2-byte UTF-8 and a byte';
331 ok $a =~ /^\C{3}/, 'match \C{3}';
332
333 ok $a =~ /^\C\Cy/, 'match two \C';
334 ok $a =~ /^\C{2}y/, 'match \C{2}';
335
336 ok $a !~ /^\C\C\Cy/, q {don't match three \Cy};
337 ok $a !~ /^\C{2}\Cy/, q {don't match \C{2}\Cy};
338 ok $a !~ /^\C{3}y/, q {don't match \C{3}y};
339
340 $a = "\x{1000}y"; # 3 bytes before "y"
341
342 ok $a =~ /^\C/, 'match one \C on three-byte UTF-8';
343 ok $a =~ /^\C{1}/, 'match \C{1}';
344 ok $a =~ /^\C\C/, 'match two \C';
345 ok $a =~ /^\C{2}/, 'match \C{2}';
346 ok $a =~ /^\C\C\C/, 'match three \C';
347 ok $a =~ /^\C{3}/, 'match \C{3}';
348
349 ok $a =~ /^\C\C\C\C/, 'match four \C on three-byte UTF-8 and a byte';
350 ok $a =~ /^\C{4}/, 'match \C{4}';
351
352 ok $a =~ /^\C\C\Cy/, 'match three \Cy';
353 ok $a =~ /^\C{3}y/, 'match \C{3}y';
354
355 ok $a !~ /^\C\C\C\Cy/, q {don't match four \Cy};
356 ok $a !~ /^\C{4}y/, q {don't match \C{4}y};
357 }
358
359
360 {
e425a60b 361 local $BugId = '15397';
362 local $Message = 'UTF-8 matching';
363 ok "\x{100}" =~ /\x{100}/;
364 ok "\x{100}" =~ /(\x{100})/;
365 ok "\x{100}" =~ /(\x{100}){1}/;
366 ok "\x{100}\x{100}" =~ /(\x{100}){2}/;
367 ok "\x{100}\x{100}" =~ /(\x{100})(\x{100})/;
368 }
369
370
371 {
372 local $BugId = '7471';
373 local $Message = 'Neither ()* nor ()*? sets $1 when matched 0 times';
374 local $_ = 'CD';
375 ok /(AB)*?CD/ && !defined $1;
376 ok /(AB)*CD/ && !defined $1;
377 }
378
379
380 {
381 local $BugId = '3547';
382 local $Message = "Caching shouldn't prevent match";
383 my $pattern = "^(b+?|a){1,2}c";
384 ok "bac" =~ /$pattern/ && $1 eq 'a';
385 ok "bbac" =~ /$pattern/ && $1 eq 'a';
386 ok "bbbac" =~ /$pattern/ && $1 eq 'a';
387 ok "bbbbac" =~ /$pattern/ && $1 eq 'a';
388 }
389
390
391
392 {
393 local $BugId = '18232';
394 local $Message = '$1 should keep UTF-8 ness';
395 ok "\x{100}" =~ /(.)/;
396 iseq $1, "\x{100}", '$1 is UTF-8';
397 { 'a' =~ /./; }
398 iseq $1, "\x{100}", '$1 is still UTF-8';
399 isneq $1, "\xC4\x80", '$1 is not non-UTF-8';
400 }
401
402
403 {
404 local $BugId = '19767';
405 local $Message = "Optimizer doesn't prematurely reject match";
406 use utf8;
407
408 my $attr = 'Name-1';
409 my $NormalChar = qr /[\p{IsDigit}\p{IsLower}\p{IsUpper}]/;
410 my $NormalWord = qr /${NormalChar}+?/;
411 my $PredNameHyphen = qr /^${NormalWord}(\-${NormalWord})*?$/;
412
413 $attr =~ /^$/;
414 ok $attr =~ $PredNameHyphen; # Original test.
415
416 "a" =~ m/[b]/;
417 ok "0" =~ /\p{N}+\z/; # Variant.
418 }
419
420
421 {
422 local $BugId = '20683';
423 local $Message = "(??{ }) doesn't return stale values";
424 our $p = 1;
425 foreach (1, 2, 3, 4) {
426 $p ++ if /(??{ $p })/
427 }
428 iseq $p, 5;
429
430 {
431 package P;
432 $a = 1;
433 sub TIESCALAR {bless []}
434 sub FETCH {$a ++}
435 }
436 tie $p, "P";
437 foreach (1, 2, 3, 4) {
438 /(??{ $p })/
439 }
440 iseq $p, 5;
441 }
442
443
444 {
445 # Subject: Odd regexp behavior
446 # From: Markus Kuhn <Markus.Kuhn@cl.cam.ac.uk>
447 # Date: Wed, 26 Feb 2003 16:53:12 +0000
448 # Message-Id: <E18o4nw-0008Ly-00@wisbech.cl.cam.ac.uk>
449 # To: perl-unicode@perl.org
450
451 local $Message = 'Markus Kuhn 2003-02-26';
452
453 my $x = "\x{2019}\nk";
454 ok $x =~ s/(\S)\n(\S)/$1 $2/sg;
455 ok $x eq "\x{2019} k";
456
457 $x = "b\nk";
458 ok $x =~ s/(\S)\n(\S)/$1 $2/sg;
459 ok $x eq "b k";
460
461 ok "\x{2019}" =~ /\S/;
462 }
463
464
465 {
466 local $BugId = '21411';
467 local $Message = "(??{ .. }) in split doesn't corrupt its stack";
468 our $i;
469 ok '-1-3-5-' eq join '', split /((??{$i++}))/, '-1-3-5-';
470 no warnings 'syntax';
471 @_ = split /(?{'WOW'})/, 'abc';
472 local $" = "|";
473 iseq "@_", "a|b|c";
474 }
475
476
477 {
478 # XXX DAPM 13-Apr-06. Recursive split is still broken. It's only luck it
479 # hasn't been crashing. Disable this test until it is fixed properly.
480 # XXX also check what it returns rather than just doing ok(1,...)
481 # split /(?{ split "" })/, "abc";
482 local $TODO = "Recursive split is still broken";
483 ok 0, 'cache_re & "(?{": it dumps core in 5.6.1 & 5.8.0';
484 }
485
486
487 {
e425a60b 488 local $BugId = '17757';
489 $_ = "code: 'x' { '...' }\n"; study;
490 my @x; push @x, $& while m/'[^\']*'/gx;
491 local $" = ":";
492 iseq "@x", "'x':'...'", "Parse::RecDescent triggered infinite loop";
493 }
494
495
496 {
e425a60b 497 local $BugId = '22354';
498 sub func ($) {
499 ok "a\nb" !~ /^b/, "Propagated modifier; $_[0]";
500 ok "a\nb" =~ /^b/m, "Propagated modifier; $_[0] - with /m";
501 }
502 func "standalone";
503 $_ = "x"; s/x/func "in subst"/e;
504 $_ = "x"; s/x/func "in multiline subst"/em;
505
506 #
507 # Next two give 'panic: malloc'.
508 # Outcommented, using two TODOs.
509 #
510 local $TODO = 'panic: malloc';
511 local $Message = 'Postponed regexp and propaged modifier';
512 # ok 0 for 1 .. 2;
513 SKIP: {
514 skip "panic: malloc", 2;
515 $_ = "x"; /x(?{func "in regexp"})/;
516 $_ = "x"; /x(?{func "in multiline regexp"})/m;
517 }
518 }
519
520
521 {
522 local $BugId = '19049';
523 $_ = "abcdef\n";
524 my @x = m/./g;
525 iseq "abcde", $`, 'Global match sets $`';
526 }
527
528
529 {
e425a60b 530 # [perl #23769] Unicode regex broken on simple example
531 # regrepeat() didn't handle UTF-8 EXACT case right.
532 local $BugId = '23769';
533 my $Mess = 'regrepeat() handles UTF-8 EXACT case right';
534 local $Message = $Mess;
535
536 my $s = "\x{a0}\x{a0}\x{a0}\x{100}"; chop $s;
537
538 ok $s =~ /\x{a0}/;
539 ok $s =~ /\x{a0}+/;
540 ok $s =~ /\x{a0}\x{a0}/;
541
542 $Message = "$Mess (easy variant)";
543 ok "aaa\x{100}" =~ /(a+)/;
544 iseq $1, "aaa";
545
546 $Message = "$Mess (easy invariant)";
547 ok "aaa\x{100} " =~ /(a+?)/;
548 iseq $1, "a";
549
550 $Message = "$Mess (regrepeat variant)";
551 ok "\xa0\xa0\xa0\x{100} " =~ /(\xa0+?)/;
552 iseq $1, "\xa0";
553
554 $Message = "$Mess (regrepeat invariant)";
555 ok "\xa0\xa0\xa0\x{100}" =~ /(\xa0+)/;
556 iseq $1, "\xa0\xa0\xa0";
557
558 $Message = "$Mess (hard variant)";
559 ok "\xa0\xa1\xa0\xa1\xa0\xa1\x{100}" =~ /((?:\xa0\xa1)+?)/;
560 iseq $1, "\xa0\xa1";
561
562 $Message = "$Mess (hard invariant)";
563 ok "ababab\x{100} " =~ /((?:ab)+)/;
564 iseq $1, 'ababab';
565
566 ok "\xa0\xa1\xa0\xa1\xa0\xa1\x{100}" =~ /((?:\xa0\xa1)+)/;
567 iseq $1, "\xa0\xa1\xa0\xa1\xa0\xa1";
568
569 ok "ababab\x{100} " =~ /((?:ab)+?)/;
570 iseq $1, "ab";
571
572 $Message = "Don't match first byte of UTF-8 representation";
573 ok "\xc4\xc4\xc4" !~ /(\x{100}+)/;
574 ok "\xc4\xc4\xc4" !~ /(\x{100}+?)/;
575 ok "\xc4\xc4\xc4" !~ /(\x{100}++)/;
576 }
577
578
579 {
e425a60b 580 # perl panic: pp_match start/end pointers
581 local $BugId = '25269';
582 iseq "a-bc", eval {my ($x, $y) = "bca" =~ /^(?=.*(a)).*(bc)/; "$x-$y"},
583 'Captures can move backwards in string';
584 }
585
586
587 {
588 local $BugId = '27940'; # \cA not recognized in character classes
589 ok "a\cAb" =~ /\cA/, '\cA in pattern';
590 ok "a\cAb" =~ /[\cA]/, '\cA in character class';
591 ok "a\cAb" =~ /[\cA-\cB]/, '\cA in character class range';
592 ok "abc" =~ /[^\cA-\cB]/, '\cA in negated character class range';
593 ok "a\cBb" =~ /[\cA-\cC]/, '\cB in character class range';
594 ok "a\cCbc" =~ /[^\cA-\cB]/, '\cC in negated character class range';
595 ok "a\cAb" =~ /(??{"\cA"})/, '\cA in ??{} pattern';
596 ok "ab" !~ /a\cIb/x, '\cI in pattern';
597 }
598
599
600 {
9d45b377 601 # perl #28532: optional zero-width match at end of string is ignored
602 local $BugId = '28532';
603 ok "abc" =~ /^abc(\z)?/ && defined($1),
604 'Optional zero-width match at end of string';
605 ok "abc" =~ /^abc(\z)??/ && !defined($1),
606 'Optional zero-width match at end of string';
e425a60b 607 }
608
609
e425a60b 610
611 {
612 local $BugId = '36207';
613 my $utf8 = "\xe9\x{100}"; chop $utf8;
614 my $latin1 = "\xe9";
615
616 ok $utf8 =~ /\xe9/i, "utf8/latin";
617 ok $utf8 =~ /$latin1/i, "utf8/latin runtime";
618 ok $utf8 =~ /(abc|\xe9)/i, "utf8/latin trie";
619 ok $utf8 =~ /(abc|$latin1)/i, "utf8/latin trie runtime";
620
621 ok "\xe9" =~ /$utf8/i, "latin/utf8";
622 ok "\xe9" =~ /(abc|$utf8)/i, "latin/utf8 trie";
623 ok $latin1 =~ /$utf8/i, "latin/utf8 runtime";
624 ok $latin1 =~ /(abc|$utf8)/i, "latin/utf8 trie runtime";
625 }
626
627
628 {
629 local $BugId = '37038';
630 my $s = "abcd";
631 $s =~ /(..)(..)/g;
632 $s = $1;
633 $s = $2;
634 iseq $2, 'cd',
635 "Assigning to original string does not corrupt match vars";
636 }
637
638
639 {
9d45b377 640 local $PatchId = '26410';
e425a60b 641 {
642 package wooosh;
643 sub gloople {"!"}
644 }
645 my $aeek = bless {} => 'wooosh';
646 eval_ok sub {$aeek -> gloople () =~ /(.)/g},
647 "//g match against return value of sub";
648
649 sub gloople {"!"}
650 eval_ok sub {gloople () =~ /(.)/g},
651 "26410 didn't affect sub calls for some reason";
652 }
653
654
655 {
656 local $TODO = "See changes 26925-26928, which reverted change 26410";
657 {
658 package lv;
659 our $var = "abc";
660 sub variable : lvalue {$var}
661 }
662 my $o = bless [] => 'lv';
663 my $f = "";
664 my $r = eval {
665 for (1 .. 2) {
666 $f .= $1 if $o -> variable =~ /(.)/g;
667 }
668 1;
669 };
670 if ($r) {
671 iseq $f, "ab", "pos() retained between calls";
672 }
673 else {
674 local $TODO;
675 ok 0, "Code failed: $@";
676 }
677
678 our $var = "abc";
679 sub variable : lvalue {$var}
680 my $g = "";
681 my $s = eval {
682 for (1 .. 2) {
683 $g .= $1 if variable =~ /(.)/g;
684 }
685 1;
686 };
687 if ($s) {
688 iseq $g, "ab", "pos() retained between calls";
689 }
690 else {
691 local $TODO;
692 ok 0, "Code failed: $@";
693 }
694 }
695
696
697 SKIP:
698 {
699 local $BugId = '37836';
700 skip "In EBCDIC" if $IS_EBCDIC;
701 no warnings 'utf8';
702 $_ = pack 'U0C2', 0xa2, 0xf8; # Ill-formed UTF-8
703 my $ret = 0;
704 eval_ok sub {!($ret = s/[\0]+//g)},
705 "Ill-formed UTF-8 doesn't match NUL in class";
706 }
707
708
709 {
710 # chr(65535) should be allowed in regexes
711 local $BugId = '38293';
712 no warnings 'utf8'; # To allow non-characters
713 my ($c, $r, $s);
714
715 $c = chr 0xffff;
716 $c =~ s/$c//g;
717 ok $c eq "", "U+FFFF, parsed as atom";
718
719 $c = chr 0xffff;
720 $r = "\\$c";
721 $c =~ s/$r//g;
722 ok $c eq "", "U+FFFF backslashed, parsed as atom";
723
724 $c = chr 0xffff;
725 $c =~ s/[$c]//g;
726 ok $c eq "", "U+FFFF, parsed in class";
727
728 $c = chr 0xffff;
729 $r = "[\\$c]";
730 $c =~ s/$r//g;
731 ok $c eq "", "U+FFFF backslashed, parsed in class";
732
733 $s = "A\x{ffff}B";
734 $s =~ s/\x{ffff}//i;
735 ok $s eq "AB", "U+FFFF, EXACTF";
736
737 $s = "\x{ffff}A";
738 $s =~ s/\bA//;
739 ok $s eq "\x{ffff}", "U+FFFF, BOUND";
740
741 $s = "\x{ffff}!";
742 $s =~ s/\B!//;
743 ok $s eq "\x{ffff}", "U+FFFF, NBOUND";
744 }
745
746
747 {
748 local $BugId = '39583';
749
750 # The printing characters
751 my @chars = ("A" .. "Z");
752 my $delim = ",";
753 my $size = 32771 - 4;
754 my $str = '';
755
756 # Create some random junk. Inefficient, but it works.
757 for (my $i = 0; $i < $size; $ i++) {
758 $str .= $chars [rand @chars];
759 }
760
761 $str .= ($delim x 4);
762 my $res;
763 my $matched;
764 ok $str =~ s/^(.*?)${delim}{4}//s, "Pattern matches";
765 iseq $str, "", "Empty string";
766 ok defined $1 && length ($1) == $size, '$1 is correct size';
767 }
768
769
770 {
771 local $BugId = '27940';
772 ok "\0-A" =~ /\c@-A/, '@- should not be interpolated in a pattern';
773 ok "\0\0A" =~ /\c@+A/, '@+ should not be interpolated in a pattern';
774 ok "X\@-A" =~ /X@-A/, '@- should not be interpolated in a pattern';
775 ok "X\@\@A" =~ /X@+A/, '@+ should not be interpolated in a pattern';
776
777 ok "X\0A" =~ /X\c@?A/, '\c@?';
778 ok "X\0A" =~ /X\c@*A/, '\c@*';
779 ok "X\0A" =~ /X\c@(A)/, '\c@(';
780 ok "X\0A" =~ /X(\c@)A/, '\c@)';
781 ok "X\0A" =~ /X\c@|ZA/, '\c@|';
782
783 ok "X\@A" =~ /X@?A/, '@?';
784 ok "X\@A" =~ /X@*A/, '@*';
785 ok "X\@A" =~ /X@(A)/, '@(';
786 ok "X\@A" =~ /X(@)A/, '@)';
787 ok "X\@A" =~ /X@|ZA/, '@|';
788
789 local $" = ','; # non-whitespace and non-RE-specific
790 ok 'abc' =~ /(.)(.)(.)/, 'The last successful match is bogus';
791 ok "A@+B" =~ /A@{+}B/, 'Interpolation of @+ in /@{+}/';
792 ok "A@-B" =~ /A@{-}B/, 'Interpolation of @- in /@{-}/';
793 ok "A@+B" =~ /A@{+}B/x, 'Interpolation of @+ in /@{+}/x';
794 ok "A@-B" =~ /A@{-}B/x, 'Interpolation of @- in /@{-}/x';
795 }
796
797
798 {
9d45b377 799 local $BugId = '50496';
e425a60b 800 my $s = 'foo bar baz';
801 my (@k, @v, @fetch, $res);
802 my $count = 0;
803 my @names = qw ($+{A} $+{B} $+{C});
804 if ($s =~ /(?<A>foo)\s+(?<B>bar)?\s+(?<C>baz)/) {
805 while (my ($k, $v) = each (%+)) {
806 $count++;
807 }
808 @k = sort keys (%+);
809 @v = sort values (%+);
810 $res = 1;
811 push @fetch,
812 ["$+{A}", "$1"],
813 ["$+{B}", "$2"],
814 ["$+{C}", "$3"],
815 ;
816 }
817 foreach (0 .. 2) {
818 if ($fetch [$_]) {
819 iseq $fetch [$_] [0], $fetch [$_] [1], $names [$_];
820 } else {
821 ok 0, $names[$_];
822 }
823 }
824 iseq $res, 1, "'$s' =~ /(?<A>foo)\\s+(?<B>bar)?\\s+(?<C>baz)/";
825 iseq $count, 3, "Got 3 keys in %+ via each";
826 iseq 0 + @k, 3, 'Got 3 keys in %+ via keys';
827 iseq "@k", "A B C", "Got expected keys";
828 iseq "@v", "bar baz foo", "Got expected values";
829 eval '
830 no warnings "uninitialized";
831 print for $+ {this_key_doesnt_exist};
832 ';
833 ok !$@, 'lvalue $+ {...} should not throw an exception';
834 }
835
836
837 {
838 #
839 # Almost the same as the block above, except that the capture is nested.
840 #
841 local $BugId = '50496';
842 my $s = 'foo bar baz';
843 my (@k, @v, @fetch, $res);
844 my $count = 0;
845 my @names = qw ($+{A} $+{B} $+{C} $+{D});
846 if ($s =~ /(?<D>(?<A>foo)\s+(?<B>bar)?\s+(?<C>baz))/) {
847 while (my ($k,$v) = each(%+)) {
848 $count++;
849 }
850 @k = sort keys (%+);
851 @v = sort values (%+);
852 $res = 1;
853 push @fetch,
854 ["$+{A}", "$2"],
855 ["$+{B}", "$3"],
856 ["$+{C}", "$4"],
857 ["$+{D}", "$1"],
858 ;
859 }
860 foreach (0 .. 3) {
861 if ($fetch [$_]) {
862 iseq $fetch [$_] [0], $fetch [$_] [1], $names [$_];
863 } else {
864 ok 0, $names [$_];
865 }
866 }
867 iseq $res, 1, "'$s' =~ /(?<D>(?<A>foo)\\s+(?<B>bar)?\\s+(?<C>baz))/";
868 iseq $count, 4, "Got 4 keys in %+ via each";
869 iseq @k, 4, 'Got 4 keys in %+ via keys';
870 iseq "@k", "A B C D", "Got expected keys";
871 iseq "@v", "bar baz foo foo bar baz", "Got expected values";
872 eval '
873 no warnings "uninitialized";
874 print for $+ {this_key_doesnt_exist};
875 ';
876 ok !$@,'lvalue $+ {...} should not throw an exception';
877 }
878
879
880 {
e425a60b 881 local $BugId = '36046';
882 my $str = 'abc';
883 my $count = 0;
884 my $mval = 0;
885 my $pval = 0;
886 while ($str =~ /b/g) {$mval = $#-; $pval = $#+; $count ++}
887 iseq $mval, 0, '@- should be empty';
888 iseq $pval, 0, '@+ should be empty';
889 iseq $count, 1, 'Should have matched once only';
890 }
891
892
e425a60b 893
894
895 {
896 local $BugId = '40684';
897 local $Message = '/m in precompiled regexp';
898 my $s = "abc\ndef";
899 my $rex = qr'^abc$'m;
900 ok $s =~ m/$rex/;
901 ok $s =~ m/^abc$/m;
902 }
903
904
905 {
e425a60b 906 local $BugId = '36909';
907 local $Message = '(?: ... )? should not lose $^R';
908 $^R = 'Nothing';
909 {
910 local $^R = "Bad";
911 ok 'x foofoo y' =~ m {
912 (foo) # $^R correctly set
913 (?{ "last regexp code result" })
914 }x;
915 iseq $^R, 'last regexp code result';
916 }
917 iseq $^R, 'Nothing';
918
919 {
920 local $^R = "Bad";
921
922 ok 'x foofoo y' =~ m {
923 (?:foo|bar)+ # $^R correctly set
924 (?{ "last regexp code result" })
925 }x;
926 iseq $^R, 'last regexp code result';
927 }
928 iseq $^R, 'Nothing';
929
930 {
931 local $^R = "Bad";
932 ok 'x foofoo y' =~ m {
933 (foo|bar)\1+ # $^R undefined
934 (?{ "last regexp code result" })
935 }x;
936 iseq $^R, 'last regexp code result';
937 }
938 iseq $^R, 'Nothing';
939
940 {
941 local $^R = "Bad";
942 ok 'x foofoo y' =~ m {
943 (foo|bar)\1 # This time without the +
944 (?{"last regexp code result"})
945 }x;
946 iseq $^R, 'last regexp code result';
947 }
948 iseq $^R, 'Nothing';
949 }
950
951
952 {
953 local $BugId = '22395';
954 local $Message = 'Match is linear, not quadratic';
955 our $count;
956 for my $l (10, 100, 1000) {
957 $count = 0;
958 ('a' x $l) =~ /(.*)(?{$count++})[bc]/;
959 local $TODO = "Should be L+1 not L*(L+3)/2 (L=$l)";
960 iseq $count, $l + 1;
961 }
962 }
963
964
965 {
966 local $BugId = '22614';
967 local $Message = '@-/@+ should not have undefined values';
968 local $_ = 'ab';
969 our @len = ();
970 /(.){1,}(?{push @len,0+@-})(.){1,}(?{})^/;
971 iseq "@len", "2 2 2";
972 }
973
974
975 {
976 local $BugId = '18209';
977 local $Message = '$& set on s///';
978 my $text = ' word1 word2 word3 word4 word5 word6 ';
979
980 my @words = ('word1', 'word3', 'word5');
981 my $count;
982 foreach my $word (@words) {
983 $text =~ s/$word\s//gi; # Leave a space to seperate words
984 # in the resultant str.
985 # The following block is not working.
986 if ($&) {
987 $count ++;
988 }
989 # End bad block
990 }
991 iseq $count, 3;
992 iseq $text, ' word2 word4 word6 ';
993 }
994
995
996 {
997 # RT#6893
998 local $BugId = '6893';
999 local $_ = qq (A\nB\nC\n);
1000 my @res;
1001 while (m#(\G|\n)([^\n]*)\n#gsx) {
1002 push @res, "$2";
1003 last if @res > 3;
1004 }
1005 iseq "@res", "A B C", "/g pattern shouldn't infinite loop";
1006 }
1007
1008
e425a60b 1009
1010 {
1011 local $BugId = '41010';
1012 local $Message = 'No optimizer bug';
1013 my @tails = ('', '(?(1))', '(|)', '()?');
1014 my @quants = ('*','+');
1015 my $doit = sub {
1016 my $pats = shift;
1017 for (@_) {
1018 for my $pat (@$pats) {
1019 for my $quant (@quants) {
1020 for my $tail (@tails) {
1021 my $re = "($pat$quant\$)$tail";
1022 ok /$re/ && $1 eq $_, "'$_' =~ /$re/";
1023 ok /$re/m && $1 eq $_, "'$_' =~ /$re/m";
1024 }
1025 }
1026 }
1027 }
1028 };
1029
1030 my @dpats = ('\d',
1031 '[1234567890]',
1032 '(1|[23]|4|[56]|[78]|[90])',
1033 '(?:1|[23]|4|[56]|[78]|[90])',
1034 '(1|2|3|4|5|6|7|8|9|0)',
1035 '(?:1|2|3|4|5|6|7|8|9|0)');
1036 my @spats = ('[ ]', ' ', '( |\t)', '(?: |\t)', '[ \t]', '\s');
1037 my @sstrs = (' ');
1038 my @dstrs = ('12345');
1039 $doit -> (\@spats, @sstrs);
1040 $doit -> (\@dpats, @dstrs);
1041 }
1042
1043
e425a60b 1044
1045 {
1046 local $BugId = '45605';
1047 # [perl #45605] Regexp failure with utf8-flagged and byte-flagged string
1048
1049 my $utf_8 = "\xd6schel";
1050 utf8::upgrade ($utf_8);
1051 $utf_8 =~ m {(\xd6|&Ouml;)schel};
1052 iseq $1, "\xd6", "Upgrade error";
1053 }
1054
1055 {
e425a60b 1056 # Regardless of utf8ness any character matches itself when
1057 # doing a case insensitive match. See also [perl #36207]
1058 local $BugId = '36207';
1059 for my $o (0 .. 255) {
1060 my @ch = (chr ($o), chr ($o));
1061 utf8::upgrade ($ch [1]);
1062 for my $u_str (0, 1) {
1063 for my $u_pat (0, 1) {
1064 ok $ch [$u_str] =~ /\Q$ch[$u_pat]\E/i,
1065 "\$c =~ /\$c/i : chr ($o) : u_str = $u_str u_pat = $u_pat";
1066 ok $ch [$u_str] =~ /\Q$ch[$u_pat]\E|xyz/i,
1067 "\$c=~/\$c|xyz/i : chr($o) : u_str = $u_str u_pat = $u_pat";
1068 }
1069 }
1070 }
1071 }
1072
1073
1074 {
e425a60b 1075 local $BugId = '49190';
1076 local $Message = '$REGMARK in replacement';
1077 our $REGMARK;
1078 my $_ = "A";
1079 ok s/(*:B)A/$REGMARK/;
1080 iseq $_, "B";
1081 $_ = "CCCCBAA";
1082 ok s/(*:X)A+|(*:Y)B+|(*:Z)C+/$REGMARK/g;
1083 iseq $_, "ZYX";
1084 }
1085
1086
1087 {
e425a60b 1088 local $BugId = '52658';
1089 local $Message = 'Substitution evaluation in list context';
1090 my $reg = '../xxx/';
1091 my @te = ($reg =~ m{^(/?(?:\.\./)*)},
1092 $reg =~ s/(x)/'b'/eg > 1 ? '##' : '++');
1093 iseq $reg, '../bbb/';
1094 iseq $te [0], '../';
1095 }
1096
1097 # This currently has to come before any "use encoding" in this file.
1098 {
1099 local $Message;
1100 local $BugId = '59342';
1101 must_warn 'qr/\400/', '^Use of octal value above 377';
1102 }
1103
1104
e425a60b 1105
1106 {
1107 local $BugId = '60034';
1108 my $a = "xyzt" x 8192;
1109 ok $a =~ /\A(?>[a-z])*\z/,
1110 '(?>) does not cause wrongness on long string';
1111 my $b = $a . chr 256;
1112 chop $b;
1113 {
1114 iseq $a, $b;
1115 }
1116 ok $b =~ /\A(?>[a-z])*\z/,
1117 '(?>) does not cause wrongness on long string with UTF-8';
1118 }
1119
1120
1121 #
1122 # Keep the following tests last -- they may crash perl
1123 #
1124 print "# Tests that follow may crash perl\n";
1125 {
1126 local $BugId = '19049/38869';
1127 local $Message = 'Pattern in a loop, failure should not ' .
1128 'affect previous success';
1129 my @list = (
1130 'ab cdef', # Matches regex
1131 ('e' x 40000 ) .'ab c' # Matches not, but 'ab c' matches part of it
1132 );
1133 my $y;
1134 my $x;
1135 foreach (@list) {
1136 m/ab(.+)cd/i; # The ignore-case seems to be important
1137 $y = $1; # Use $1, which might not be from the last match!
1138 $x = substr ($list [0], $- [0], $+ [0] - $- [0]);
1139 }
1140 iseq $y, ' ';
1141 iseq $x, 'ab cd';
1142 }
1143
1144
1145 {
1146 local $BugId = '24274';
1147
1148 ok (("a" x (2 ** 15 - 10)) =~ /^()(a|bb)*$/, "Recursive stack cracker");
1149 ok ((q(a)x 100) =~ /^(??{'(.)'x 100})/,
1150 "Regexp /^(??{'(.)'x 100})/ crashes older perls");
1151 }
1152
1153
1154 {
e425a60b 1155 # [perl #45337] utf8 + "[a]a{2}" + /$.../ = panic: sv_len_utf8 cache
1156 local $BugId = '45337';
1157 local ${^UTF8CACHE} = -1;
1158 local $Message = "Shouldn't panic";
1159 my $s = "[a]a{2}";
1160 utf8::upgrade $s;
1161 ok "aaa" =~ /$s/;
1162 }
1163 {
1164 local $BugId = '57042';
1165 local $Message = "Check if tree logic breaks \$^R";
1166 my $cond_re = qr/\s*
1167 \s* (?:
1168 \( \s* A (?{1})
1169 | \( \s* B (?{2})
1170 )
1171 /x;
1172 my @res;
1173 for my $line ("(A)","(B)") {
1174 if ($line =~ m/$cond_re/) {
1175 push @res, $^R ? "#$^R" : "UNDEF";
1176 }
1177 }
1178 iseq "@res","#1 #2";
1179 }
1180 {
1181 no warnings 'closure';
1182 my $re = qr/A(??{"1"})/;
1183 ok "A1B" =~ m/^((??{ $re }))((??{"B"}))$/;
1184 ok $1 eq "A1";
1185 ok $2 eq "B";
1186 }
1187
1188
e425a60b 1189
1190 # This only works under -DEBUGGING because it relies on an assert().
1191 {
1192 local $BugId = '60508';
1193 local $Message = "Check capture offset re-entrancy of utf8 code.";
1194
1195 sub fswash { $_[0] =~ s/([>X])//g; }
1196
1197 my $k1 = "." x 4 . ">>";
1198 fswash($k1);
1199
1200 my $k2 = "\x{f1}\x{2022}";
1201 $k2 =~ s/([\360-\362])/>/g;
1202 fswash($k2);
1203
1204 iseq($k2, "\x{2022}", "utf8::SWASHNEW doesn't cause capture leaks");
1205 }
1206
1207
1208 {
1209 local $BugId = 65372; # minimal CURLYM limited to 32767 matches
1210 my @pat = (
1211 qr{a(x|y)*b}, # CURLYM
1212 qr{a(x|y)*?b}, # .. with minmod
1213 qr{a([wx]|[yz])*b}, # .. and without tries
1214 qr{a([wx]|[yz])*?b},
1215 );
1216 my $len = 32768;
1217 my $s = join '', 'a', 'x' x $len, 'b';
1218 for my $pat (@pat) {
1219 ok($s =~ $pat, $pat);
1220 }
1221 }
e425a60b 1222} # End of sub run_tests
1223
12241;