Commit | Line | Data |
e425a60b |
1 | #!./perl |
2 | # |
3 | # This is a home for regular expression tests that don't fit into |
4 | # the format supported by re/regexp.t. If you want to add a test |
5 | # that does fit that format, add it to re/re_tests, not here. |
6 | |
7 | use strict; |
8 | use warnings; |
9 | use 5.010; |
10 | |
11 | |
12 | sub run_tests; |
13 | |
14 | $| = 1; |
15 | |
e425a60b |
16 | |
17 | BEGIN { |
18 | chdir 't' if -d 't'; |
9d45b377 |
19 | @INC = ('../lib','.'); |
20 | do "re/ReTest.pl" or die $@; |
e425a60b |
21 | } |
e425a60b |
22 | |
e425a60b |
23 | |
9d45b377 |
24 | plan tests => 2525; # Update this when adding/deleting tests. |
e425a60b |
25 | |
9d45b377 |
26 | run_tests() unless caller; |
e425a60b |
27 | |
28 | # |
29 | # Tests start here. |
30 | # |
31 | sub run_tests { |
32 | |
e425a60b |
33 | |
34 | { |
35 | local $BugId = '20000731.001'; |
36 | ok "A \x{263a} B z C" =~ /A . B (??{ "z" }) C/, |
37 | "Match UTF-8 char in presense of (??{ })"; |
38 | } |
39 | |
40 | |
41 | { |
42 | local $BugId = '20001021.005'; |
43 | no warnings 'uninitialized'; |
44 | ok undef =~ /^([^\/]*)(.*)$/, "Used to cause a SEGV"; |
45 | } |
46 | |
e425a60b |
47 | { |
48 | local $Message = 'bug id 20001008.001'; |
49 | |
50 | my @x = ("stra\337e 138", "stra\337e 138"); |
51 | for (@x) { |
52 | ok s/(\d+)\s*([\w\-]+)/$1 . uc $2/e; |
53 | ok my ($latin) = /^(.+)(?:\s+\d)/; |
54 | iseq $latin, "stra\337e"; |
55 | ok $latin =~ s/stra\337e/straße/; |
56 | # |
57 | # Previous code follows, but outcommented - there were no tests. |
58 | # |
59 | # $latin =~ s/stra\337e/straße/; # \303\237 after the 2nd a |
60 | # use utf8; # needed for the raw UTF-8 |
61 | # $latin =~ s!(s)tr(?:aß|s+e)!$1tr.!; # \303\237 after the a |
62 | } |
63 | } |
64 | |
65 | |
66 | { |
e425a60b |
67 | local $BugId = '20001028.003'; |
68 | |
69 | # Fist half of the bug. |
70 | local $Message = 'HEBREW ACCENT QADMA matched by .*'; |
71 | my $X = chr (1448); |
72 | ok my ($Y) = $X =~ /(.*)/; |
73 | iseq $Y, v1448; |
74 | iseq length ($Y), 1; |
75 | |
76 | # Second half of the bug. |
77 | $Message = 'HEBREW ACCENT QADMA in replacement'; |
78 | $X = ''; |
79 | $X =~ s/^/chr(1488)/e; |
80 | iseq length $X, 1; |
81 | iseq ord ($X), 1488; |
82 | } |
83 | |
84 | |
85 | { |
86 | local $BugId = '20001108.001'; |
87 | local $Message = 'Repeated s///'; |
88 | my $X = "Szab\x{f3},Bal\x{e1}zs"; |
89 | my $Y = $X; |
90 | $Y =~ s/(B)/$1/ for 0 .. 3; |
91 | iseq $Y, $X; |
92 | iseq $X, "Szab\x{f3},Bal\x{e1}zs"; |
93 | } |
94 | |
95 | |
96 | { |
97 | local $BugId = '20000517.001'; |
98 | local $Message = 's/// on UTF-8 string'; |
99 | my $x = "\x{100}A"; |
100 | $x =~ s/A/B/; |
101 | iseq $x, "\x{100}B"; |
102 | iseq length $x, 2; |
103 | } |
104 | |
105 | |
106 | { |
107 | local $BugId = '20001230.002'; |
108 | local $Message = '\C and É'; |
109 | ok "École" =~ /^\C\C(.)/ && $1 eq 'c'; |
110 | ok "École" =~ /^\C\C(c)/; |
111 | } |
112 | |
113 | |
e425a60b |
114 | { |
115 | # The original bug report had 'no utf8' here but that was irrelevant. |
116 | local $BugId = '20010306.008'; |
117 | local $Message = "Don't dump core"; |
118 | my $a = "a\x{1234}"; |
119 | ok $a =~ m/\w/; # used to core dump. |
120 | } |
121 | |
122 | |
123 | { |
124 | local $BugId = '20010410.006'; |
125 | local $Message = '/g in scalar context'; |
126 | for my $rx ('/(.*?)\{(.*?)\}/csg', |
127 | '/(.*?)\{(.*?)\}/cg', |
128 | '/(.*?)\{(.*?)\}/sg', |
129 | '/(.*?)\{(.*?)\}/g', |
130 | '/(.+?)\{(.+?)\}/csg',) { |
131 | my $i = 0; |
132 | my $input = "a{b}c{d}"; |
133 | eval <<" --"; |
134 | while (eval \$input =~ $rx) { |
135 | \$i ++; |
136 | } |
137 | -- |
138 | iseq $i, 2; |
139 | } |
140 | } |
141 | |
e425a60b |
142 | { |
143 | local $BugId = "20010619.003"; |
144 | # Amazingly vertical tabulator is the same in ASCII and EBCDIC. |
145 | for ("\n", "\t", "\014", "\r") { |
146 | ok !/[[:print:]]/, "'$_' not in [[:print:]]"; |
147 | } |
148 | for (" ") { |
149 | ok /[[:print:]]/, "'$_' in [[:print:]]"; |
150 | } |
151 | } |
152 | |
153 | |
e425a60b |
154 | |
155 | { |
156 | # [ID 20010814.004] pos() doesn't work when using =~m// in list context |
157 | local $BugId = '20010814.004'; |
158 | $_ = "ababacadaea"; |
159 | my $a = join ":", /b./gc; |
160 | my $b = join ":", /a./gc; |
161 | my $c = pos; |
162 | iseq "$a $b $c", 'ba:ba ad:ae 10', "pos() works with () = m//"; |
163 | } |
164 | |
165 | |
166 | { |
167 | # [ID 20010407.006] matching utf8 return values from |
168 | # functions does not work |
169 | local $BugId = '20010407.006'; |
170 | local $Message = 'UTF-8 return values from functions'; |
171 | package ID_20010407_006; |
172 | sub x {"a\x{1234}"} |
173 | my $x = x; |
174 | my $y; |
175 | ::ok $x =~ /(..)/; |
176 | $y = $1; |
177 | ::ok length ($y) == 2 && $y eq $x; |
178 | ::ok x =~ /(..)/; |
179 | $y = $1; |
180 | ::ok length ($y) == 2 && $y eq $x; |
181 | } |
182 | |
e425a60b |
183 | { |
184 | # High bit bug -- japhy |
185 | my $x = "ab\200d"; |
186 | ok $x =~ /.*?\200/, "High bit fine"; |
187 | } |
188 | |
189 | |
190 | { |
e425a60b |
191 | local $Message = 'UTF-8 hash keys and /$/'; |
192 | # http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters |
193 | # /2002-01/msg01327.html |
194 | |
195 | my $u = "a\x{100}"; |
196 | my $v = substr ($u, 0, 1); |
197 | my $w = substr ($u, 1, 1); |
198 | my %u = ($u => $u, $v => $v, $w => $w); |
199 | for (keys %u) { |
200 | my $m1 = /^\w*$/ ? 1 : 0; |
201 | my $m2 = $u {$_} =~ /^\w*$/ ? 1 : 0; |
202 | iseq $m1, $m2; |
203 | } |
204 | } |
205 | |
206 | |
207 | { |
208 | local $BugId = "20020124.005"; |
209 | local $PatchId = "14795"; |
210 | local $Message = "s///eg"; |
211 | |
212 | for my $char ("a", "\x{df}", "\x{100}") { |
213 | my $x = "$char b $char"; |
214 | $x =~ s{($char)}{ |
215 | "c" =~ /c/; |
216 | "x"; |
217 | }ge; |
218 | iseq substr ($x, 0, 1), substr ($x, -1, 1); |
219 | } |
220 | } |
221 | |
222 | |
223 | { |
e425a60b |
224 | local $BugId = "20020412.005"; |
225 | local $Message = "Correct pmop flags checked when empty pattern"; |
226 | |
227 | # Requires reuse of last successful pattern. |
228 | my $num = 123; |
229 | $num =~ /\d/; |
230 | for (0 .. 1) { |
231 | my $match = ?? + 0; |
232 | ok $match != $_, $Message, |
233 | sprintf "'match one' %s on %s iteration" => |
234 | $match ? 'succeeded' : 'failed', |
235 | $_ ? 'second' : 'first'; |
236 | } |
237 | $num =~ /(\d)/; |
238 | my $result = join "" => $num =~ //g; |
239 | iseq $result, $num; |
240 | } |
241 | |
242 | |
243 | { |
244 | local $BugId = '20020630.002'; |
245 | local $Message = 'UTF-8 regex matches above 32k'; |
246 | for (['byte', "\x{ff}"], ['utf8', "\x{1ff}"]) { |
247 | my ($type, $char) = @$_; |
248 | for my $len (32000, 32768, 33000) { |
249 | my $s = $char . "f" x $len; |
250 | my $r = $s =~ /$char([f]*)/gc; |
251 | ok $r, $Message, "<$type x $len>"; |
252 | ok !$r || pos ($s) == $len + 1, $Message, |
253 | "<$type x $len>; pos = @{[pos $s]}"; |
254 | } |
255 | } |
256 | } |
257 | |
258 | |
259 | { |
260 | our $a = bless qr /foo/ => 'Foo'; |
261 | ok 'goodfood' =~ $a, "Reblessed qr // matches"; |
262 | iseq $a, '(?-xism:foo)', "Reblessed qr // stringifies"; |
263 | my $x = "\x{3fe}"; |
264 | my $z = my $y = "\317\276"; # Byte representation of $x |
265 | $a = qr /$x/; |
266 | ok $x =~ $a, "UTF-8 interpolation in qr //"; |
267 | ok "a$a" =~ $x, "Stringified qr // preserves UTF-8"; |
268 | ok "a$x" =~ /^a$a\z/, "Interpolated qr // preserves UTF-8"; |
269 | ok "a$x" =~ /^a(??{$a})\z/, |
270 | "Postponed interpolation of qr // preserves UTF-8"; |
271 | { |
272 | local $BugId = '17776'; |
273 | iseq length qr /##/x, 12, "## in qr // doesn't corrupt memory"; |
274 | } |
275 | { |
276 | use re 'eval'; |
277 | ok "$x$x" =~ /^$x(??{$x})\z/, |
278 | "Postponed UTF-8 string in UTF-8 re matches UTF-8"; |
279 | ok "$y$x" =~ /^$y(??{$x})\z/, |
280 | "Postponed UTF-8 string in non-UTF-8 re matches UTF-8"; |
281 | ok "$y$x" !~ /^$y(??{$y})\z/, |
282 | "Postponed non-UTF-8 string in non-UTF-8 re doesn't match UTF-8"; |
283 | ok "$x$x" !~ /^$x(??{$y})\z/, |
284 | "Postponed non-UTF-8 string in UTF-8 re doesn't match UTF-8"; |
285 | ok "$y$y" =~ /^$y(??{$y})\z/, |
286 | "Postponed non-UTF-8 string in non-UTF-8 re matches non-UTF8"; |
287 | ok "$x$y" =~ /^$x(??{$y})\z/, |
288 | "Postponed non-UTF-8 string in UTF-8 re matches non-UTF8"; |
289 | |
290 | $y = $z; # Reset $y after upgrade. |
291 | ok "$x$y" !~ /^$x(??{$x})\z/, |
292 | "Postponed UTF-8 string in UTF-8 re doesn't match non-UTF-8"; |
293 | ok "$y$y" !~ /^$y(??{$x})\z/, |
294 | "Postponed UTF-8 string in non-UTF-8 re doesn't match non-UTF-8"; |
295 | } |
296 | } |
297 | |
298 | |
299 | { |
300 | local $PatchId = '18179'; |
301 | my $s = "\x{100}" x 5; |
302 | my $ok = $s =~ /(\x{100}{4})/; |
303 | my ($ord, $len) = (ord $1, length $1); |
304 | ok $ok && $ord == 0x100 && $len == 4, "No panic: end_shift"; |
305 | } |
306 | |
307 | |
308 | { |
309 | local $BugId = '15763'; |
310 | our $a = "x\x{100}"; |
311 | chop $a; # Leaves the UTF-8 flag |
312 | $a .= "y"; # 1 byte before 'y'. |
313 | |
314 | ok $a =~ /^\C/, 'match one \C on 1-byte UTF-8'; |
315 | ok $a =~ /^\C{1}/, 'match \C{1}'; |
316 | |
317 | ok $a =~ /^\Cy/, 'match \Cy'; |
318 | ok $a =~ /^\C{1}y/, 'match \C{1}y'; |
319 | |
320 | ok $a !~ /^\C\Cy/, q {don't match two \Cy}; |
321 | ok $a !~ /^\C{2}y/, q {don't match \C{2}y}; |
322 | |
323 | $a = "\x{100}y"; # 2 bytes before "y" |
324 | |
325 | ok $a =~ /^\C/, 'match one \C on 2-byte UTF-8'; |
326 | ok $a =~ /^\C{1}/, 'match \C{1}'; |
327 | ok $a =~ /^\C\C/, 'match two \C'; |
328 | ok $a =~ /^\C{2}/, 'match \C{2}'; |
329 | |
330 | ok $a =~ /^\C\C\C/, 'match three \C on 2-byte UTF-8 and a byte'; |
331 | ok $a =~ /^\C{3}/, 'match \C{3}'; |
332 | |
333 | ok $a =~ /^\C\Cy/, 'match two \C'; |
334 | ok $a =~ /^\C{2}y/, 'match \C{2}'; |
335 | |
336 | ok $a !~ /^\C\C\Cy/, q {don't match three \Cy}; |
337 | ok $a !~ /^\C{2}\Cy/, q {don't match \C{2}\Cy}; |
338 | ok $a !~ /^\C{3}y/, q {don't match \C{3}y}; |
339 | |
340 | $a = "\x{1000}y"; # 3 bytes before "y" |
341 | |
342 | ok $a =~ /^\C/, 'match one \C on three-byte UTF-8'; |
343 | ok $a =~ /^\C{1}/, 'match \C{1}'; |
344 | ok $a =~ /^\C\C/, 'match two \C'; |
345 | ok $a =~ /^\C{2}/, 'match \C{2}'; |
346 | ok $a =~ /^\C\C\C/, 'match three \C'; |
347 | ok $a =~ /^\C{3}/, 'match \C{3}'; |
348 | |
349 | ok $a =~ /^\C\C\C\C/, 'match four \C on three-byte UTF-8 and a byte'; |
350 | ok $a =~ /^\C{4}/, 'match \C{4}'; |
351 | |
352 | ok $a =~ /^\C\C\Cy/, 'match three \Cy'; |
353 | ok $a =~ /^\C{3}y/, 'match \C{3}y'; |
354 | |
355 | ok $a !~ /^\C\C\C\Cy/, q {don't match four \Cy}; |
356 | ok $a !~ /^\C{4}y/, q {don't match \C{4}y}; |
357 | } |
358 | |
359 | |
360 | { |
e425a60b |
361 | local $BugId = '15397'; |
362 | local $Message = 'UTF-8 matching'; |
363 | ok "\x{100}" =~ /\x{100}/; |
364 | ok "\x{100}" =~ /(\x{100})/; |
365 | ok "\x{100}" =~ /(\x{100}){1}/; |
366 | ok "\x{100}\x{100}" =~ /(\x{100}){2}/; |
367 | ok "\x{100}\x{100}" =~ /(\x{100})(\x{100})/; |
368 | } |
369 | |
370 | |
371 | { |
372 | local $BugId = '7471'; |
373 | local $Message = 'Neither ()* nor ()*? sets $1 when matched 0 times'; |
374 | local $_ = 'CD'; |
375 | ok /(AB)*?CD/ && !defined $1; |
376 | ok /(AB)*CD/ && !defined $1; |
377 | } |
378 | |
379 | |
380 | { |
381 | local $BugId = '3547'; |
382 | local $Message = "Caching shouldn't prevent match"; |
383 | my $pattern = "^(b+?|a){1,2}c"; |
384 | ok "bac" =~ /$pattern/ && $1 eq 'a'; |
385 | ok "bbac" =~ /$pattern/ && $1 eq 'a'; |
386 | ok "bbbac" =~ /$pattern/ && $1 eq 'a'; |
387 | ok "bbbbac" =~ /$pattern/ && $1 eq 'a'; |
388 | } |
389 | |
390 | |
391 | |
392 | { |
393 | local $BugId = '18232'; |
394 | local $Message = '$1 should keep UTF-8 ness'; |
395 | ok "\x{100}" =~ /(.)/; |
396 | iseq $1, "\x{100}", '$1 is UTF-8'; |
397 | { 'a' =~ /./; } |
398 | iseq $1, "\x{100}", '$1 is still UTF-8'; |
399 | isneq $1, "\xC4\x80", '$1 is not non-UTF-8'; |
400 | } |
401 | |
402 | |
403 | { |
404 | local $BugId = '19767'; |
405 | local $Message = "Optimizer doesn't prematurely reject match"; |
406 | use utf8; |
407 | |
408 | my $attr = 'Name-1'; |
409 | my $NormalChar = qr /[\p{IsDigit}\p{IsLower}\p{IsUpper}]/; |
410 | my $NormalWord = qr /${NormalChar}+?/; |
411 | my $PredNameHyphen = qr /^${NormalWord}(\-${NormalWord})*?$/; |
412 | |
413 | $attr =~ /^$/; |
414 | ok $attr =~ $PredNameHyphen; # Original test. |
415 | |
416 | "a" =~ m/[b]/; |
417 | ok "0" =~ /\p{N}+\z/; # Variant. |
418 | } |
419 | |
420 | |
421 | { |
422 | local $BugId = '20683'; |
423 | local $Message = "(??{ }) doesn't return stale values"; |
424 | our $p = 1; |
425 | foreach (1, 2, 3, 4) { |
426 | $p ++ if /(??{ $p })/ |
427 | } |
428 | iseq $p, 5; |
429 | |
430 | { |
431 | package P; |
432 | $a = 1; |
433 | sub TIESCALAR {bless []} |
434 | sub FETCH {$a ++} |
435 | } |
436 | tie $p, "P"; |
437 | foreach (1, 2, 3, 4) { |
438 | /(??{ $p })/ |
439 | } |
440 | iseq $p, 5; |
441 | } |
442 | |
443 | |
444 | { |
445 | # Subject: Odd regexp behavior |
446 | # From: Markus Kuhn <Markus.Kuhn@cl.cam.ac.uk> |
447 | # Date: Wed, 26 Feb 2003 16:53:12 +0000 |
448 | # Message-Id: <E18o4nw-0008Ly-00@wisbech.cl.cam.ac.uk> |
449 | # To: perl-unicode@perl.org |
450 | |
451 | local $Message = 'Markus Kuhn 2003-02-26'; |
452 | |
453 | my $x = "\x{2019}\nk"; |
454 | ok $x =~ s/(\S)\n(\S)/$1 $2/sg; |
455 | ok $x eq "\x{2019} k"; |
456 | |
457 | $x = "b\nk"; |
458 | ok $x =~ s/(\S)\n(\S)/$1 $2/sg; |
459 | ok $x eq "b k"; |
460 | |
461 | ok "\x{2019}" =~ /\S/; |
462 | } |
463 | |
464 | |
465 | { |
466 | local $BugId = '21411'; |
467 | local $Message = "(??{ .. }) in split doesn't corrupt its stack"; |
468 | our $i; |
469 | ok '-1-3-5-' eq join '', split /((??{$i++}))/, '-1-3-5-'; |
470 | no warnings 'syntax'; |
471 | @_ = split /(?{'WOW'})/, 'abc'; |
472 | local $" = "|"; |
473 | iseq "@_", "a|b|c"; |
474 | } |
475 | |
476 | |
477 | { |
478 | # XXX DAPM 13-Apr-06. Recursive split is still broken. It's only luck it |
479 | # hasn't been crashing. Disable this test until it is fixed properly. |
480 | # XXX also check what it returns rather than just doing ok(1,...) |
481 | # split /(?{ split "" })/, "abc"; |
482 | local $TODO = "Recursive split is still broken"; |
483 | ok 0, 'cache_re & "(?{": it dumps core in 5.6.1 & 5.8.0'; |
484 | } |
485 | |
486 | |
487 | { |
e425a60b |
488 | local $BugId = '17757'; |
489 | $_ = "code: 'x' { '...' }\n"; study; |
490 | my @x; push @x, $& while m/'[^\']*'/gx; |
491 | local $" = ":"; |
492 | iseq "@x", "'x':'...'", "Parse::RecDescent triggered infinite loop"; |
493 | } |
494 | |
495 | |
496 | { |
e425a60b |
497 | local $BugId = '22354'; |
498 | sub func ($) { |
499 | ok "a\nb" !~ /^b/, "Propagated modifier; $_[0]"; |
500 | ok "a\nb" =~ /^b/m, "Propagated modifier; $_[0] - with /m"; |
501 | } |
502 | func "standalone"; |
503 | $_ = "x"; s/x/func "in subst"/e; |
504 | $_ = "x"; s/x/func "in multiline subst"/em; |
505 | |
506 | # |
507 | # Next two give 'panic: malloc'. |
508 | # Outcommented, using two TODOs. |
509 | # |
510 | local $TODO = 'panic: malloc'; |
511 | local $Message = 'Postponed regexp and propaged modifier'; |
512 | # ok 0 for 1 .. 2; |
513 | SKIP: { |
514 | skip "panic: malloc", 2; |
515 | $_ = "x"; /x(?{func "in regexp"})/; |
516 | $_ = "x"; /x(?{func "in multiline regexp"})/m; |
517 | } |
518 | } |
519 | |
520 | |
521 | { |
522 | local $BugId = '19049'; |
523 | $_ = "abcdef\n"; |
524 | my @x = m/./g; |
525 | iseq "abcde", $`, 'Global match sets $`'; |
526 | } |
527 | |
528 | |
529 | { |
e425a60b |
530 | # [perl #23769] Unicode regex broken on simple example |
531 | # regrepeat() didn't handle UTF-8 EXACT case right. |
532 | local $BugId = '23769'; |
533 | my $Mess = 'regrepeat() handles UTF-8 EXACT case right'; |
534 | local $Message = $Mess; |
535 | |
536 | my $s = "\x{a0}\x{a0}\x{a0}\x{100}"; chop $s; |
537 | |
538 | ok $s =~ /\x{a0}/; |
539 | ok $s =~ /\x{a0}+/; |
540 | ok $s =~ /\x{a0}\x{a0}/; |
541 | |
542 | $Message = "$Mess (easy variant)"; |
543 | ok "aaa\x{100}" =~ /(a+)/; |
544 | iseq $1, "aaa"; |
545 | |
546 | $Message = "$Mess (easy invariant)"; |
547 | ok "aaa\x{100} " =~ /(a+?)/; |
548 | iseq $1, "a"; |
549 | |
550 | $Message = "$Mess (regrepeat variant)"; |
551 | ok "\xa0\xa0\xa0\x{100} " =~ /(\xa0+?)/; |
552 | iseq $1, "\xa0"; |
553 | |
554 | $Message = "$Mess (regrepeat invariant)"; |
555 | ok "\xa0\xa0\xa0\x{100}" =~ /(\xa0+)/; |
556 | iseq $1, "\xa0\xa0\xa0"; |
557 | |
558 | $Message = "$Mess (hard variant)"; |
559 | ok "\xa0\xa1\xa0\xa1\xa0\xa1\x{100}" =~ /((?:\xa0\xa1)+?)/; |
560 | iseq $1, "\xa0\xa1"; |
561 | |
562 | $Message = "$Mess (hard invariant)"; |
563 | ok "ababab\x{100} " =~ /((?:ab)+)/; |
564 | iseq $1, 'ababab'; |
565 | |
566 | ok "\xa0\xa1\xa0\xa1\xa0\xa1\x{100}" =~ /((?:\xa0\xa1)+)/; |
567 | iseq $1, "\xa0\xa1\xa0\xa1\xa0\xa1"; |
568 | |
569 | ok "ababab\x{100} " =~ /((?:ab)+?)/; |
570 | iseq $1, "ab"; |
571 | |
572 | $Message = "Don't match first byte of UTF-8 representation"; |
573 | ok "\xc4\xc4\xc4" !~ /(\x{100}+)/; |
574 | ok "\xc4\xc4\xc4" !~ /(\x{100}+?)/; |
575 | ok "\xc4\xc4\xc4" !~ /(\x{100}++)/; |
576 | } |
577 | |
578 | |
579 | { |
e425a60b |
580 | # perl panic: pp_match start/end pointers |
581 | local $BugId = '25269'; |
582 | iseq "a-bc", eval {my ($x, $y) = "bca" =~ /^(?=.*(a)).*(bc)/; "$x-$y"}, |
583 | 'Captures can move backwards in string'; |
584 | } |
585 | |
586 | |
587 | { |
588 | local $BugId = '27940'; # \cA not recognized in character classes |
589 | ok "a\cAb" =~ /\cA/, '\cA in pattern'; |
590 | ok "a\cAb" =~ /[\cA]/, '\cA in character class'; |
591 | ok "a\cAb" =~ /[\cA-\cB]/, '\cA in character class range'; |
592 | ok "abc" =~ /[^\cA-\cB]/, '\cA in negated character class range'; |
593 | ok "a\cBb" =~ /[\cA-\cC]/, '\cB in character class range'; |
594 | ok "a\cCbc" =~ /[^\cA-\cB]/, '\cC in negated character class range'; |
595 | ok "a\cAb" =~ /(??{"\cA"})/, '\cA in ??{} pattern'; |
596 | ok "ab" !~ /a\cIb/x, '\cI in pattern'; |
597 | } |
598 | |
599 | |
600 | { |
9d45b377 |
601 | # perl #28532: optional zero-width match at end of string is ignored |
602 | local $BugId = '28532'; |
603 | ok "abc" =~ /^abc(\z)?/ && defined($1), |
604 | 'Optional zero-width match at end of string'; |
605 | ok "abc" =~ /^abc(\z)??/ && !defined($1), |
606 | 'Optional zero-width match at end of string'; |
e425a60b |
607 | } |
608 | |
609 | |
e425a60b |
610 | |
611 | { |
612 | local $BugId = '36207'; |
613 | my $utf8 = "\xe9\x{100}"; chop $utf8; |
614 | my $latin1 = "\xe9"; |
615 | |
616 | ok $utf8 =~ /\xe9/i, "utf8/latin"; |
617 | ok $utf8 =~ /$latin1/i, "utf8/latin runtime"; |
618 | ok $utf8 =~ /(abc|\xe9)/i, "utf8/latin trie"; |
619 | ok $utf8 =~ /(abc|$latin1)/i, "utf8/latin trie runtime"; |
620 | |
621 | ok "\xe9" =~ /$utf8/i, "latin/utf8"; |
622 | ok "\xe9" =~ /(abc|$utf8)/i, "latin/utf8 trie"; |
623 | ok $latin1 =~ /$utf8/i, "latin/utf8 runtime"; |
624 | ok $latin1 =~ /(abc|$utf8)/i, "latin/utf8 trie runtime"; |
625 | } |
626 | |
627 | |
628 | { |
629 | local $BugId = '37038'; |
630 | my $s = "abcd"; |
631 | $s =~ /(..)(..)/g; |
632 | $s = $1; |
633 | $s = $2; |
634 | iseq $2, 'cd', |
635 | "Assigning to original string does not corrupt match vars"; |
636 | } |
637 | |
638 | |
639 | { |
9d45b377 |
640 | local $PatchId = '26410'; |
e425a60b |
641 | { |
642 | package wooosh; |
643 | sub gloople {"!"} |
644 | } |
645 | my $aeek = bless {} => 'wooosh'; |
646 | eval_ok sub {$aeek -> gloople () =~ /(.)/g}, |
647 | "//g match against return value of sub"; |
648 | |
649 | sub gloople {"!"} |
650 | eval_ok sub {gloople () =~ /(.)/g}, |
651 | "26410 didn't affect sub calls for some reason"; |
652 | } |
653 | |
654 | |
655 | { |
656 | local $TODO = "See changes 26925-26928, which reverted change 26410"; |
657 | { |
658 | package lv; |
659 | our $var = "abc"; |
660 | sub variable : lvalue {$var} |
661 | } |
662 | my $o = bless [] => 'lv'; |
663 | my $f = ""; |
664 | my $r = eval { |
665 | for (1 .. 2) { |
666 | $f .= $1 if $o -> variable =~ /(.)/g; |
667 | } |
668 | 1; |
669 | }; |
670 | if ($r) { |
671 | iseq $f, "ab", "pos() retained between calls"; |
672 | } |
673 | else { |
674 | local $TODO; |
675 | ok 0, "Code failed: $@"; |
676 | } |
677 | |
678 | our $var = "abc"; |
679 | sub variable : lvalue {$var} |
680 | my $g = ""; |
681 | my $s = eval { |
682 | for (1 .. 2) { |
683 | $g .= $1 if variable =~ /(.)/g; |
684 | } |
685 | 1; |
686 | }; |
687 | if ($s) { |
688 | iseq $g, "ab", "pos() retained between calls"; |
689 | } |
690 | else { |
691 | local $TODO; |
692 | ok 0, "Code failed: $@"; |
693 | } |
694 | } |
695 | |
696 | |
697 | SKIP: |
698 | { |
699 | local $BugId = '37836'; |
700 | skip "In EBCDIC" if $IS_EBCDIC; |
701 | no warnings 'utf8'; |
702 | $_ = pack 'U0C2', 0xa2, 0xf8; # Ill-formed UTF-8 |
703 | my $ret = 0; |
704 | eval_ok sub {!($ret = s/[\0]+//g)}, |
705 | "Ill-formed UTF-8 doesn't match NUL in class"; |
706 | } |
707 | |
708 | |
709 | { |
710 | # chr(65535) should be allowed in regexes |
711 | local $BugId = '38293'; |
712 | no warnings 'utf8'; # To allow non-characters |
713 | my ($c, $r, $s); |
714 | |
715 | $c = chr 0xffff; |
716 | $c =~ s/$c//g; |
717 | ok $c eq "", "U+FFFF, parsed as atom"; |
718 | |
719 | $c = chr 0xffff; |
720 | $r = "\\$c"; |
721 | $c =~ s/$r//g; |
722 | ok $c eq "", "U+FFFF backslashed, parsed as atom"; |
723 | |
724 | $c = chr 0xffff; |
725 | $c =~ s/[$c]//g; |
726 | ok $c eq "", "U+FFFF, parsed in class"; |
727 | |
728 | $c = chr 0xffff; |
729 | $r = "[\\$c]"; |
730 | $c =~ s/$r//g; |
731 | ok $c eq "", "U+FFFF backslashed, parsed in class"; |
732 | |
733 | $s = "A\x{ffff}B"; |
734 | $s =~ s/\x{ffff}//i; |
735 | ok $s eq "AB", "U+FFFF, EXACTF"; |
736 | |
737 | $s = "\x{ffff}A"; |
738 | $s =~ s/\bA//; |
739 | ok $s eq "\x{ffff}", "U+FFFF, BOUND"; |
740 | |
741 | $s = "\x{ffff}!"; |
742 | $s =~ s/\B!//; |
743 | ok $s eq "\x{ffff}", "U+FFFF, NBOUND"; |
744 | } |
745 | |
746 | |
747 | { |
748 | local $BugId = '39583'; |
749 | |
750 | # The printing characters |
751 | my @chars = ("A" .. "Z"); |
752 | my $delim = ","; |
753 | my $size = 32771 - 4; |
754 | my $str = ''; |
755 | |
756 | # Create some random junk. Inefficient, but it works. |
757 | for (my $i = 0; $i < $size; $ i++) { |
758 | $str .= $chars [rand @chars]; |
759 | } |
760 | |
761 | $str .= ($delim x 4); |
762 | my $res; |
763 | my $matched; |
764 | ok $str =~ s/^(.*?)${delim}{4}//s, "Pattern matches"; |
765 | iseq $str, "", "Empty string"; |
766 | ok defined $1 && length ($1) == $size, '$1 is correct size'; |
767 | } |
768 | |
769 | |
770 | { |
771 | local $BugId = '27940'; |
772 | ok "\0-A" =~ /\c@-A/, '@- should not be interpolated in a pattern'; |
773 | ok "\0\0A" =~ /\c@+A/, '@+ should not be interpolated in a pattern'; |
774 | ok "X\@-A" =~ /X@-A/, '@- should not be interpolated in a pattern'; |
775 | ok "X\@\@A" =~ /X@+A/, '@+ should not be interpolated in a pattern'; |
776 | |
777 | ok "X\0A" =~ /X\c@?A/, '\c@?'; |
778 | ok "X\0A" =~ /X\c@*A/, '\c@*'; |
779 | ok "X\0A" =~ /X\c@(A)/, '\c@('; |
780 | ok "X\0A" =~ /X(\c@)A/, '\c@)'; |
781 | ok "X\0A" =~ /X\c@|ZA/, '\c@|'; |
782 | |
783 | ok "X\@A" =~ /X@?A/, '@?'; |
784 | ok "X\@A" =~ /X@*A/, '@*'; |
785 | ok "X\@A" =~ /X@(A)/, '@('; |
786 | ok "X\@A" =~ /X(@)A/, '@)'; |
787 | ok "X\@A" =~ /X@|ZA/, '@|'; |
788 | |
789 | local $" = ','; # non-whitespace and non-RE-specific |
790 | ok 'abc' =~ /(.)(.)(.)/, 'The last successful match is bogus'; |
791 | ok "A@+B" =~ /A@{+}B/, 'Interpolation of @+ in /@{+}/'; |
792 | ok "A@-B" =~ /A@{-}B/, 'Interpolation of @- in /@{-}/'; |
793 | ok "A@+B" =~ /A@{+}B/x, 'Interpolation of @+ in /@{+}/x'; |
794 | ok "A@-B" =~ /A@{-}B/x, 'Interpolation of @- in /@{-}/x'; |
795 | } |
796 | |
797 | |
798 | { |
9d45b377 |
799 | local $BugId = '50496'; |
e425a60b |
800 | my $s = 'foo bar baz'; |
801 | my (@k, @v, @fetch, $res); |
802 | my $count = 0; |
803 | my @names = qw ($+{A} $+{B} $+{C}); |
804 | if ($s =~ /(?<A>foo)\s+(?<B>bar)?\s+(?<C>baz)/) { |
805 | while (my ($k, $v) = each (%+)) { |
806 | $count++; |
807 | } |
808 | @k = sort keys (%+); |
809 | @v = sort values (%+); |
810 | $res = 1; |
811 | push @fetch, |
812 | ["$+{A}", "$1"], |
813 | ["$+{B}", "$2"], |
814 | ["$+{C}", "$3"], |
815 | ; |
816 | } |
817 | foreach (0 .. 2) { |
818 | if ($fetch [$_]) { |
819 | iseq $fetch [$_] [0], $fetch [$_] [1], $names [$_]; |
820 | } else { |
821 | ok 0, $names[$_]; |
822 | } |
823 | } |
824 | iseq $res, 1, "'$s' =~ /(?<A>foo)\\s+(?<B>bar)?\\s+(?<C>baz)/"; |
825 | iseq $count, 3, "Got 3 keys in %+ via each"; |
826 | iseq 0 + @k, 3, 'Got 3 keys in %+ via keys'; |
827 | iseq "@k", "A B C", "Got expected keys"; |
828 | iseq "@v", "bar baz foo", "Got expected values"; |
829 | eval ' |
830 | no warnings "uninitialized"; |
831 | print for $+ {this_key_doesnt_exist}; |
832 | '; |
833 | ok !$@, 'lvalue $+ {...} should not throw an exception'; |
834 | } |
835 | |
836 | |
837 | { |
838 | # |
839 | # Almost the same as the block above, except that the capture is nested. |
840 | # |
841 | local $BugId = '50496'; |
842 | my $s = 'foo bar baz'; |
843 | my (@k, @v, @fetch, $res); |
844 | my $count = 0; |
845 | my @names = qw ($+{A} $+{B} $+{C} $+{D}); |
846 | if ($s =~ /(?<D>(?<A>foo)\s+(?<B>bar)?\s+(?<C>baz))/) { |
847 | while (my ($k,$v) = each(%+)) { |
848 | $count++; |
849 | } |
850 | @k = sort keys (%+); |
851 | @v = sort values (%+); |
852 | $res = 1; |
853 | push @fetch, |
854 | ["$+{A}", "$2"], |
855 | ["$+{B}", "$3"], |
856 | ["$+{C}", "$4"], |
857 | ["$+{D}", "$1"], |
858 | ; |
859 | } |
860 | foreach (0 .. 3) { |
861 | if ($fetch [$_]) { |
862 | iseq $fetch [$_] [0], $fetch [$_] [1], $names [$_]; |
863 | } else { |
864 | ok 0, $names [$_]; |
865 | } |
866 | } |
867 | iseq $res, 1, "'$s' =~ /(?<D>(?<A>foo)\\s+(?<B>bar)?\\s+(?<C>baz))/"; |
868 | iseq $count, 4, "Got 4 keys in %+ via each"; |
869 | iseq @k, 4, 'Got 4 keys in %+ via keys'; |
870 | iseq "@k", "A B C D", "Got expected keys"; |
871 | iseq "@v", "bar baz foo foo bar baz", "Got expected values"; |
872 | eval ' |
873 | no warnings "uninitialized"; |
874 | print for $+ {this_key_doesnt_exist}; |
875 | '; |
876 | ok !$@,'lvalue $+ {...} should not throw an exception'; |
877 | } |
878 | |
879 | |
880 | { |
e425a60b |
881 | local $BugId = '36046'; |
882 | my $str = 'abc'; |
883 | my $count = 0; |
884 | my $mval = 0; |
885 | my $pval = 0; |
886 | while ($str =~ /b/g) {$mval = $#-; $pval = $#+; $count ++} |
887 | iseq $mval, 0, '@- should be empty'; |
888 | iseq $pval, 0, '@+ should be empty'; |
889 | iseq $count, 1, 'Should have matched once only'; |
890 | } |
891 | |
892 | |
e425a60b |
893 | |
894 | |
895 | { |
896 | local $BugId = '40684'; |
897 | local $Message = '/m in precompiled regexp'; |
898 | my $s = "abc\ndef"; |
899 | my $rex = qr'^abc$'m; |
900 | ok $s =~ m/$rex/; |
901 | ok $s =~ m/^abc$/m; |
902 | } |
903 | |
904 | |
905 | { |
e425a60b |
906 | local $BugId = '36909'; |
907 | local $Message = '(?: ... )? should not lose $^R'; |
908 | $^R = 'Nothing'; |
909 | { |
910 | local $^R = "Bad"; |
911 | ok 'x foofoo y' =~ m { |
912 | (foo) # $^R correctly set |
913 | (?{ "last regexp code result" }) |
914 | }x; |
915 | iseq $^R, 'last regexp code result'; |
916 | } |
917 | iseq $^R, 'Nothing'; |
918 | |
919 | { |
920 | local $^R = "Bad"; |
921 | |
922 | ok 'x foofoo y' =~ m { |
923 | (?:foo|bar)+ # $^R correctly set |
924 | (?{ "last regexp code result" }) |
925 | }x; |
926 | iseq $^R, 'last regexp code result'; |
927 | } |
928 | iseq $^R, 'Nothing'; |
929 | |
930 | { |
931 | local $^R = "Bad"; |
932 | ok 'x foofoo y' =~ m { |
933 | (foo|bar)\1+ # $^R undefined |
934 | (?{ "last regexp code result" }) |
935 | }x; |
936 | iseq $^R, 'last regexp code result'; |
937 | } |
938 | iseq $^R, 'Nothing'; |
939 | |
940 | { |
941 | local $^R = "Bad"; |
942 | ok 'x foofoo y' =~ m { |
943 | (foo|bar)\1 # This time without the + |
944 | (?{"last regexp code result"}) |
945 | }x; |
946 | iseq $^R, 'last regexp code result'; |
947 | } |
948 | iseq $^R, 'Nothing'; |
949 | } |
950 | |
951 | |
952 | { |
953 | local $BugId = '22395'; |
954 | local $Message = 'Match is linear, not quadratic'; |
955 | our $count; |
956 | for my $l (10, 100, 1000) { |
957 | $count = 0; |
958 | ('a' x $l) =~ /(.*)(?{$count++})[bc]/; |
959 | local $TODO = "Should be L+1 not L*(L+3)/2 (L=$l)"; |
960 | iseq $count, $l + 1; |
961 | } |
962 | } |
963 | |
964 | |
965 | { |
966 | local $BugId = '22614'; |
967 | local $Message = '@-/@+ should not have undefined values'; |
968 | local $_ = 'ab'; |
969 | our @len = (); |
970 | /(.){1,}(?{push @len,0+@-})(.){1,}(?{})^/; |
971 | iseq "@len", "2 2 2"; |
972 | } |
973 | |
974 | |
975 | { |
976 | local $BugId = '18209'; |
977 | local $Message = '$& set on s///'; |
978 | my $text = ' word1 word2 word3 word4 word5 word6 '; |
979 | |
980 | my @words = ('word1', 'word3', 'word5'); |
981 | my $count; |
982 | foreach my $word (@words) { |
983 | $text =~ s/$word\s//gi; # Leave a space to seperate words |
984 | # in the resultant str. |
985 | # The following block is not working. |
986 | if ($&) { |
987 | $count ++; |
988 | } |
989 | # End bad block |
990 | } |
991 | iseq $count, 3; |
992 | iseq $text, ' word2 word4 word6 '; |
993 | } |
994 | |
995 | |
996 | { |
997 | # RT#6893 |
998 | local $BugId = '6893'; |
999 | local $_ = qq (A\nB\nC\n); |
1000 | my @res; |
1001 | while (m#(\G|\n)([^\n]*)\n#gsx) { |
1002 | push @res, "$2"; |
1003 | last if @res > 3; |
1004 | } |
1005 | iseq "@res", "A B C", "/g pattern shouldn't infinite loop"; |
1006 | } |
1007 | |
1008 | |
e425a60b |
1009 | |
1010 | { |
1011 | local $BugId = '41010'; |
1012 | local $Message = 'No optimizer bug'; |
1013 | my @tails = ('', '(?(1))', '(|)', '()?'); |
1014 | my @quants = ('*','+'); |
1015 | my $doit = sub { |
1016 | my $pats = shift; |
1017 | for (@_) { |
1018 | for my $pat (@$pats) { |
1019 | for my $quant (@quants) { |
1020 | for my $tail (@tails) { |
1021 | my $re = "($pat$quant\$)$tail"; |
1022 | ok /$re/ && $1 eq $_, "'$_' =~ /$re/"; |
1023 | ok /$re/m && $1 eq $_, "'$_' =~ /$re/m"; |
1024 | } |
1025 | } |
1026 | } |
1027 | } |
1028 | }; |
1029 | |
1030 | my @dpats = ('\d', |
1031 | '[1234567890]', |
1032 | '(1|[23]|4|[56]|[78]|[90])', |
1033 | '(?:1|[23]|4|[56]|[78]|[90])', |
1034 | '(1|2|3|4|5|6|7|8|9|0)', |
1035 | '(?:1|2|3|4|5|6|7|8|9|0)'); |
1036 | my @spats = ('[ ]', ' ', '( |\t)', '(?: |\t)', '[ \t]', '\s'); |
1037 | my @sstrs = (' '); |
1038 | my @dstrs = ('12345'); |
1039 | $doit -> (\@spats, @sstrs); |
1040 | $doit -> (\@dpats, @dstrs); |
1041 | } |
1042 | |
1043 | |
e425a60b |
1044 | |
1045 | { |
1046 | local $BugId = '45605'; |
1047 | # [perl #45605] Regexp failure with utf8-flagged and byte-flagged string |
1048 | |
1049 | my $utf_8 = "\xd6schel"; |
1050 | utf8::upgrade ($utf_8); |
1051 | $utf_8 =~ m {(\xd6|Ö)schel}; |
1052 | iseq $1, "\xd6", "Upgrade error"; |
1053 | } |
1054 | |
1055 | { |
e425a60b |
1056 | # Regardless of utf8ness any character matches itself when |
1057 | # doing a case insensitive match. See also [perl #36207] |
1058 | local $BugId = '36207'; |
1059 | for my $o (0 .. 255) { |
1060 | my @ch = (chr ($o), chr ($o)); |
1061 | utf8::upgrade ($ch [1]); |
1062 | for my $u_str (0, 1) { |
1063 | for my $u_pat (0, 1) { |
1064 | ok $ch [$u_str] =~ /\Q$ch[$u_pat]\E/i, |
1065 | "\$c =~ /\$c/i : chr ($o) : u_str = $u_str u_pat = $u_pat"; |
1066 | ok $ch [$u_str] =~ /\Q$ch[$u_pat]\E|xyz/i, |
1067 | "\$c=~/\$c|xyz/i : chr($o) : u_str = $u_str u_pat = $u_pat"; |
1068 | } |
1069 | } |
1070 | } |
1071 | } |
1072 | |
1073 | |
1074 | { |
e425a60b |
1075 | local $BugId = '49190'; |
1076 | local $Message = '$REGMARK in replacement'; |
1077 | our $REGMARK; |
1078 | my $_ = "A"; |
1079 | ok s/(*:B)A/$REGMARK/; |
1080 | iseq $_, "B"; |
1081 | $_ = "CCCCBAA"; |
1082 | ok s/(*:X)A+|(*:Y)B+|(*:Z)C+/$REGMARK/g; |
1083 | iseq $_, "ZYX"; |
1084 | } |
1085 | |
1086 | |
1087 | { |
e425a60b |
1088 | local $BugId = '52658'; |
1089 | local $Message = 'Substitution evaluation in list context'; |
1090 | my $reg = '../xxx/'; |
1091 | my @te = ($reg =~ m{^(/?(?:\.\./)*)}, |
1092 | $reg =~ s/(x)/'b'/eg > 1 ? '##' : '++'); |
1093 | iseq $reg, '../bbb/'; |
1094 | iseq $te [0], '../'; |
1095 | } |
1096 | |
1097 | # This currently has to come before any "use encoding" in this file. |
1098 | { |
1099 | local $Message; |
1100 | local $BugId = '59342'; |
1101 | must_warn 'qr/\400/', '^Use of octal value above 377'; |
1102 | } |
1103 | |
1104 | |
e425a60b |
1105 | |
1106 | { |
1107 | local $BugId = '60034'; |
1108 | my $a = "xyzt" x 8192; |
1109 | ok $a =~ /\A(?>[a-z])*\z/, |
1110 | '(?>) does not cause wrongness on long string'; |
1111 | my $b = $a . chr 256; |
1112 | chop $b; |
1113 | { |
1114 | iseq $a, $b; |
1115 | } |
1116 | ok $b =~ /\A(?>[a-z])*\z/, |
1117 | '(?>) does not cause wrongness on long string with UTF-8'; |
1118 | } |
1119 | |
1120 | |
1121 | # |
1122 | # Keep the following tests last -- they may crash perl |
1123 | # |
1124 | print "# Tests that follow may crash perl\n"; |
1125 | { |
1126 | local $BugId = '19049/38869'; |
1127 | local $Message = 'Pattern in a loop, failure should not ' . |
1128 | 'affect previous success'; |
1129 | my @list = ( |
1130 | 'ab cdef', # Matches regex |
1131 | ('e' x 40000 ) .'ab c' # Matches not, but 'ab c' matches part of it |
1132 | ); |
1133 | my $y; |
1134 | my $x; |
1135 | foreach (@list) { |
1136 | m/ab(.+)cd/i; # The ignore-case seems to be important |
1137 | $y = $1; # Use $1, which might not be from the last match! |
1138 | $x = substr ($list [0], $- [0], $+ [0] - $- [0]); |
1139 | } |
1140 | iseq $y, ' '; |
1141 | iseq $x, 'ab cd'; |
1142 | } |
1143 | |
1144 | |
1145 | { |
1146 | local $BugId = '24274'; |
1147 | |
1148 | ok (("a" x (2 ** 15 - 10)) =~ /^()(a|bb)*$/, "Recursive stack cracker"); |
1149 | ok ((q(a)x 100) =~ /^(??{'(.)'x 100})/, |
1150 | "Regexp /^(??{'(.)'x 100})/ crashes older perls"); |
1151 | } |
1152 | |
1153 | |
1154 | { |
e425a60b |
1155 | # [perl #45337] utf8 + "[a]a{2}" + /$.../ = panic: sv_len_utf8 cache |
1156 | local $BugId = '45337'; |
1157 | local ${^UTF8CACHE} = -1; |
1158 | local $Message = "Shouldn't panic"; |
1159 | my $s = "[a]a{2}"; |
1160 | utf8::upgrade $s; |
1161 | ok "aaa" =~ /$s/; |
1162 | } |
1163 | { |
1164 | local $BugId = '57042'; |
1165 | local $Message = "Check if tree logic breaks \$^R"; |
1166 | my $cond_re = qr/\s* |
1167 | \s* (?: |
1168 | \( \s* A (?{1}) |
1169 | | \( \s* B (?{2}) |
1170 | ) |
1171 | /x; |
1172 | my @res; |
1173 | for my $line ("(A)","(B)") { |
1174 | if ($line =~ m/$cond_re/) { |
1175 | push @res, $^R ? "#$^R" : "UNDEF"; |
1176 | } |
1177 | } |
1178 | iseq "@res","#1 #2"; |
1179 | } |
1180 | { |
1181 | no warnings 'closure'; |
1182 | my $re = qr/A(??{"1"})/; |
1183 | ok "A1B" =~ m/^((??{ $re }))((??{"B"}))$/; |
1184 | ok $1 eq "A1"; |
1185 | ok $2 eq "B"; |
1186 | } |
1187 | |
1188 | |
e425a60b |
1189 | |
1190 | # This only works under -DEBUGGING because it relies on an assert(). |
1191 | { |
1192 | local $BugId = '60508'; |
1193 | local $Message = "Check capture offset re-entrancy of utf8 code."; |
1194 | |
1195 | sub fswash { $_[0] =~ s/([>X])//g; } |
1196 | |
1197 | my $k1 = "." x 4 . ">>"; |
1198 | fswash($k1); |
1199 | |
1200 | my $k2 = "\x{f1}\x{2022}"; |
1201 | $k2 =~ s/([\360-\362])/>/g; |
1202 | fswash($k2); |
1203 | |
1204 | iseq($k2, "\x{2022}", "utf8::SWASHNEW doesn't cause capture leaks"); |
1205 | } |
1206 | |
1207 | |
1208 | { |
1209 | local $BugId = 65372; # minimal CURLYM limited to 32767 matches |
1210 | my @pat = ( |
1211 | qr{a(x|y)*b}, # CURLYM |
1212 | qr{a(x|y)*?b}, # .. with minmod |
1213 | qr{a([wx]|[yz])*b}, # .. and without tries |
1214 | qr{a([wx]|[yz])*?b}, |
1215 | ); |
1216 | my $len = 32768; |
1217 | my $s = join '', 'a', 'x' x $len, 'b'; |
1218 | for my $pat (@pat) { |
1219 | ok($s =~ $pat, $pat); |
1220 | } |
1221 | } |
e425a60b |
1222 | } # End of sub run_tests |
1223 | |
1224 | 1; |