From: Bram Date: Thu, 12 Mar 2009 19:43:57 +0000 (+0100) Subject: Fix #56194 Regex: (((??{1 + $^N}))) behaves differently in 5.10.0 than in blead X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=34a81e2b89e80eb8560606e2b29037d4da625156;p=p5sagit%2Fp5-mst-13.2.git Fix #56194 Regex: (((??{1 + $^N}))) behaves differently in 5.10.0 than in blead PL_reglastparen and PL_reglastcloseparen contains a pointer are set to & rex->lastparen and & rex->lastcloseparen. In case END the rex var is modified but PL_reglastparen and PL_reglastcloseparen is not. Some part of the codes access PL_reglastparen while other parts use rex->lastparen. This patch corrects this and adds 3 assertions. I'm currently unable to proof (with a test case) that the code in case EVAL_ab is really nessesary... Logically speaking it is nessesary but I do not know if it can cause test failures. Also in the patch are missing regressions between 5.8 -> 5.10 and 5.10 -> 5.11. (and a test script that contains these regressions) Message-ID: [Includes message and patch edits by committer.] --- diff --git a/regexec.c b/regexec.c index 45ece8e..58d973a 100644 --- a/regexec.c +++ b/regexec.c @@ -2841,6 +2841,11 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) state_num = OP(scan); reenter_switch: + + assert(PL_reglastparen == &rex->lastparen); + assert(PL_reglastcloseparen == &rex->lastcloseparen); + assert(PL_regoffs == rex->offs); + switch (state_num) { case BOL: if (locinput == PL_bostr) @@ -3889,9 +3894,12 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) regcpblow(ST.cp); cur_eval = ST.prev_eval; cur_curlyx = ST.prev_curlyx; - + + /* rex was changed so update the pointer in PL_reglastparen and PL_reglastcloseparen */ PL_reglastparen = &rex->lastparen; PL_reglastcloseparen = &rex->lastcloseparen; + /* also update PL_regoffs */ + PL_regoffs = rex->offs; /* XXXX This is too dramatic a measure... */ PL_reg_maxiter = 0; @@ -3907,6 +3915,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) SETREX(rex_sv,ST.prev_rex); rex = (struct regexp *)SvANY(rex_sv); rexi = RXi_GET(rex); + /* rex was changed so update the pointer in PL_reglastparen and PL_reglastcloseparen */ PL_reglastparen = &rex->lastparen; PL_reglastcloseparen = &rex->lastcloseparen; @@ -4909,6 +4918,11 @@ NULL cur_curlyx = cur_eval->u.eval.prev_curlyx; ReREFCNT_inc(rex_sv); st->u.eval.cp = regcppush(0); /* Save *all* the positions. */ + + /* rex was changed so update the pointer in PL_reglastparen and PL_reglastcloseparen */ + PL_reglastparen = &rex->lastparen; + PL_reglastcloseparen = &rex->lastcloseparen; + REGCP_SET(st->u.eval.lastcp); PL_reginput = locinput; diff --git a/t/op/pat.t b/t/op/pat.t index 859ec00..c1cb120 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -13,7 +13,7 @@ sub run_tests; $| = 1; -my $EXPECTED_TESTS = 3965; # Update this when adding/deleting tests. +my $EXPECTED_TESTS = 4061; # Update this when adding/deleting tests. BEGIN { chdir 't' if -d 't'; @@ -4123,6 +4123,222 @@ sub run_tests { ok $1 eq "A1"; ok $2 eq "B"; } + + + { + use re 'eval'; + local $Message = 'Test if $^N and $+ work in (?{{})'; + our @ctl_n = (); + our @plus = (); + our $nested_tags; + $nested_tags = qr{ + < + ((\w)+) + (?{ + push @ctl_n, (defined $^N ? $^N : "undef"); + push @plus, (defined $+ ? $+ : "undef"); + }) + > + (??{$nested_tags})* + + }x; + + + my $c = 0; + for my $test ( + [ 1, qr#^$nested_tags$#, "bla blubb bla", "a b a" ], + [ 1, qr#^($nested_tags)$#, "bla blubb ", "a b a" ], + [ 1, qr#^(|)$nested_tags$#, "bla blubb bla", "a b a" ], + [ 1, qr#^(?:|)$nested_tags$#, "bla blubb bla", "a b a" ], + [ 1, qr#^<(bl|bla)>$nested_tags<(/\1)>$#, "blubb /bla", "b /bla" ], + [ 1, qr#(??{"(|)"})$nested_tags$#, "bla blubb bla", "a b a" ], + [ 1, qr#^(??{"(bla|)"})$nested_tags$#, "bla blubb bla", "a b a" ], + [ 1, qr#^(??{"(|)"})(??{$nested_tags})$#, "bla blubb undef", "a b undef" ], + [ 1, qr#^(??{"(?:|)"})$nested_tags$#, "bla blubb bla", "a b a" ], + [ 1, qr#^((??{"(?:bla|)"}))((??{$nested_tags}))$#, "bla blubb ", "a b " ], + [ 1, qr#^((??{"(?!)?"}))((??{$nested_tags}))$#, "bla blubb ", "a b " ], + [ 1, qr#^((??{"(?:|<(/?bla)>)"}))((??{$nested_tags}))\1$#, "bla blubb ", "a b " ], + [ 0, qr#^((??{"(?!)"}))?((??{$nested_tags}))(?!)$#, "bla blubb undef", "a b undef" ], + + ) { + $c++; + @ctl_n = (); + @plus = (); + my $match = (("" =~ $test->[1]) ? 1 : 0); + push @ctl_n, (defined $^N ? $^N : "undef"); + push @plus, (defined $+ ? $+ : "undef"); + ok($test->[0] == $match, "match $c"); + if ($test->[0] != $match) { + # unset @ctl_n and @plus + @ctl_n = @plus = (); + } + iseq("@ctl_n", $test->[2], "ctl_n $c"); + iseq("@plus", $test->[3], "plus $c"); + } + } + + { + use re 'eval'; + local $BugId = '56194'; + + our $f; + local $f; + $f = sub { + defined $_[0] ? $_[0] : "undef"; + }; + + ok("123" =~ m/^(\d)(((??{1 + $^N})))+$/); + + our @ctl_n; + our @plus; + + my $re = qr#(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))*(?{$^N})#; + my $re2 = qr#(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))*(?{$^N})(|a(b)c|def)(??{"$^R"})#; + my $re3 = qr#(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1})){2}(?{$^N})(|a(b)c|def)(??{"$^R"})#; + our $re5; + local $re5 = qr#(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1})){2}(?{$^N})#; + my $re6 = qr#(??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1})#; + my $re7 = qr#(??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1})#; + my $re8 = qr/(\d+)/; + my $c = 0; + for my $test ( + [ + "1233", + qr#^(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))+(??{$^N})$#, + "1 2 3 3", + "1 2 3 3", + "\$1 = 1, \$2 = 3, \$3 = undef, \$4 = undef, \$5 = undef", + ], + [ + "1233", + qr#^(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))+(abc|def|)?(??{$+})$#, + "1 2 3 3", + "1 2 3 3", + "\$1 = 1, \$2 = 3, \$3 = undef, \$4 = undef, \$5 = undef", + ], + [ + "1233", + qr#^(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))+(|abc|def)?(??{$+})$#, + "1 2 3 3", + "1 2 3 3", + "\$1 = 1, \$2 = 3, \$3 = undef, \$4 = undef, \$5 = undef", + ], + [ + "1233", + qr#^(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))+(abc|def|)?(??{$^N})$#, + "1 2 3 3", + "1 2 3 3", + "\$1 = 1, \$2 = 3, \$3 = undef, \$4 = undef, \$5 = undef", + ], + [ + "1233", + qr#^(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))+(|abc|def)?(??{$^N})$#, + "1 2 3 3", + "1 2 3 3", + "\$1 = 1, \$2 = 3, \$3 = undef, \$4 = undef, \$5 = undef", + ], + [ + "123abc3", + qr#^($re)(|a(b)c|def)(??{$^R})$#, + "1 2 3 abc", + "1 2 3 b", + "\$1 = 123, \$2 = 1, \$3 = 3, \$4 = abc, \$5 = b", + ], + [ + "123abc3", + qr#^($re2)$#, + "1 2 3 123abc3", + "1 2 3 b", + "\$1 = 123abc3, \$2 = 1, \$3 = 3, \$4 = abc, \$5 = b", + ], + [ + "123abc3", + qr#^($re3)$#, + "1 2 123abc3", + "1 2 b", + "\$1 = 123abc3, \$2 = 1, \$3 = 3, \$4 = abc, \$5 = b", + ], + [ + "123abc3", + qr#^(??{$re5})(|abc|def)(??{"$^R"})$#, + "1 2 abc", + "1 2 abc", + "\$1 = abc, \$2 = undef, \$3 = undef, \$4 = undef, \$5 = undef", + ], + [ + "123abc3", + qr#^(??{$re5})(|a(b)c|def)(??{"$^R"})$#, + "1 2 abc", + "1 2 b", + "\$1 = abc, \$2 = b, \$3 = undef, \$4 = undef, \$5 = undef", + ], + [ + "1234", + qr#^((\d+)((??{push @ctl_n, $f->($^N); push @plus, $f->($+);$^N + 1}))((??{push @ctl_n, $f->($^N); push @plus, $f->($+);$^N + 1}))((??{push @ctl_n, $f->($^N); push @plus, $f->($+);$^N + 1})))$#, + "1234 123 12 1 2 3 1234", + "1234 123 12 1 2 3 4", + "\$1 = 1234, \$2 = 1, \$3 = 2, \$4 = 3, \$5 = 4", + ], + [ + "1234556", + qr#^(\d+)($re6)($re6)($re6)$re6(($re6)$re6)$#, + "1234556 123455 12345 1234 123 12 1 2 3 4 4 5 56", + "1234556 123455 12345 1234 123 12 1 2 3 4 4 5 5", + "\$1 = 1, \$2 = 2, \$3 = 3, \$4 = 4, \$5 = 56", + ], + [ + "12345562", + qr#^((??{$re8}))($re7)($re7)($re7)$re7($re7)($re7(\2))$#, + "12345562 1234556 123455 12345 1234 123 12 1 2 3 4 4 5 62", + "12345562 1234556 123455 12345 1234 123 12 1 2 3 4 4 5 2", + "\$1 = 1, \$2 = 2, \$3 = 3, \$4 = 4, \$5 = 5", + ], + ) { + $c++; + @ctl_n = (); + @plus = (); + undef $^R; + my $match = $test->[0] =~ $test->[1]; + my $str = join(", ", '$1 = '.$f->($1), '$2 = '.$f->($2), '$3 = '.$f->($3), '$4 = '.$f->($4),'$5 = '.$f->($5)); + push @ctl_n, $f->($^N); + push @plus, $f->($+); + ok($match, "match $c"); + if (not $match) { + # unset $str, @ctl_n and @plus + $str = ""; + @ctl_n = @plus = (); + } + iseq("@ctl_n", $test->[2], "ctl_n $c"); + iseq("@plus", $test->[3], "plus $c"); + iseq($str, $test->[4], "str $c"); + } + SKIP: { + if ($] le '5.010') { + skip "test segfaults on perl < 5.10", 4; + } + + @ctl_n = (); + @plus = (); + + our $re4; + local $re4 = qr#(1)((??{push @ctl_n, $f->($^N); push @plus, $f->($+);$^N + 1})){2}(?{$^N})(|abc|def)(??{"$^R"})#; + undef $^R; + my $match = "123abc3" =~ m/^(??{$re4})$/; + my $str = join(", ", '$1 = '.$f->($1), '$2 = '.$f->($2), '$3 = '.$f->($3), '$4 = '.$f->($4),'$5 = '.$f->($5),'$^R = '.$f->($^R)); + push @ctl_n, $f->($^N); + push @plus, $f->($+); + ok($match); + if (not $match) { + # unset $str + @ctl_n = (); + @plus = (); + $str = ""; + } + iseq("@ctl_n", "1 2 undef"); + iseq("@plus", "1 2 undef"); + iseq($str, "\$1 = undef, \$2 = undef, \$3 = undef, \$4 = undef, \$5 = undef, \$^R = undef"); + } + } # # This should be the last test. #