scalars used in postponed subexpressions aren't first class regexps,

[p5sagit/p5-mst-13.2.git] / t / op / pat.t
diff --git a/t/op/pat.t b/t/op/pat.t

index a5b98f6..138e5b1 100755 (executable)
--- a/t/op/pat.t
+++ b/t/op/pat.t
@@ -2033,9 +2033,10 @@ $test = 687;
 # Force scalar context on the patern match
 sub ok ($;$) {
     my($ok, $name) = @_;
+    my $todo = $TODO ? " # TODO $TODO" : '';
 
     printf "%sok %d - %s\n", ($ok ? "" : "not "), $test,
-        ($name||$Message)."\tLine ".((caller)[2]);
+        ($name||$Message)."$todo\tLine ".((caller)[2]);
 
     printf "# Failed test at line %d\n", (caller)[2] unless $ok;
 
@@ -3074,12 +3075,15 @@ ok("A" =~ /\p{AsciiHexAndDash}/, "'A' is AsciiHexAndDash");
     ok($a !~ /^\C{4}y/,     q{don't match \C{4}y});
 }
 
-$_ = 'aaaaaaaaaa';
-utf8::upgrade($_); chop $_; $\="\n";
-ok(/[^\s]+/, "m/[^\s]/ utf8");
-ok(/[^\d]+/, "m/[^\d]/ utf8");
-ok(($a = $_, $_ =~ s/[^\s]+/./g), "s/[^\s]/ utf8");
-ok(($a = $_, $a =~ s/[^\d]+/./g), "s/[^\s]/ utf8");
+{
+    local $\;
+    $_ = 'aaaaaaaaaa';
+    utf8::upgrade($_); chop $_; $\="\n";
+    ok(/[^\s]+/, "m/[^\s]/ utf8");
+    ok(/[^\d]+/, "m/[^\d]/ utf8");
+    ok(($a = $_, $_ =~ s/[^\s]+/./g), "s/[^\s]/ utf8");
+    ok(($a = $_, $a =~ s/[^\d]+/./g), "s/[^\s]/ utf8");
+}
 
 ok("\x{100}" =~ /\x{100}/, "[perl #15397]");
 ok("\x{100}" =~ /(\x{100})/, "[perl #15397]");
@@ -3406,9 +3410,9 @@ if (!$ENV{PERL_SKIP_PSYCHO_TEST}){
     ok($utf8 =~ /(abc|\xe9)/i, "utf8/latin trie");
     ok($utf8 =~ /(abc|$latin1)/i, "utf8/latin trie runtime");
 
-    ok("\xe9" =~ /$utf8/i, "# TODO latin/utf8");
+    ok("\xe9" =~ /$utf8/i, "# latin/utf8");
     ok("\xe9" =~ /(abc|$utf8)/i, "# latin/utf8 trie");
-    ok($latin1 =~ /$utf8/i, "# TODO latin/utf8 runtime");
+    ok($latin1 =~ /$utf8/i, "# latin/utf8 runtime");
     ok($latin1 =~ /(abc|$utf8)/i, "# latin/utf8 trie runtime");
 }
 
@@ -3445,6 +3449,7 @@ if (!$ENV{PERL_SKIP_PSYCHO_TEST}){
 }
 
 {
+    local $TODO = "See changes 26925-26928, which reverted change 26410";
     package lv;
     $var = "abc";
     sub variable : lvalue { $var }
@@ -3453,16 +3458,17 @@ if (!$ENV{PERL_SKIP_PSYCHO_TEST}){
     my $o = bless [], "lv";
     my $f = "";
     eval { for (1..2) { $f .= $1 if $o->variable =~ /(.)/g } };
-    ok($f eq "ab", "pos retained between calls # TODO") or print "# $@\n";
+    ok($f eq "ab", "pos retained between calls") or print "# $@\n";
 }
 
 {
+    local $TODO = "See changes 26925-26928, which reverted change 26410";
     $var = "abc";
     sub variable : lvalue { $var }
 
     my $f = "";
     eval { for (1..2) { $f .= $1 if variable() =~ /(.)/g } };
-    ok($f eq "ab", "pos retained between calls # TODO") or print "# $@\n";
+    ok($f eq "ab", "pos retained between calls") or print "# $@\n";
 }
 
 # [perl #37836] Simple Regex causes SEGV when run on specific data
@@ -3692,13 +3698,14 @@ SKIP:{
 
 sub iseq($$;$) { 
     my ( $got, $expect, $name)=@_;
+    my $todo = $TODO ? " # TODO $TODO" : '';
     
     $_=defined($_) ? "'$_'" : "undef"
         for $got, $expect;
         
     my $ok=  $got eq $expect;
         
-    printf "%sok %d - %s\n", ($ok ? "" : "not "), $test,
+    printf "%sok %d - %s$todo\n", ($ok ? "" : "not "), $test,
         ($name||$Message)."\tLine ".((caller)[2]);
 
     printf "# Failed test at line %d\n".
@@ -4106,12 +4113,13 @@ for my $c ("z", "\0", "!", chr(254), chr(256)) {
     iseq($^R,'Nothing');
 }
 {
-    local $Message="RT#22395";
+    local $Message="RT 22395";
+    local $TODO = "Should be L+1 not L*(L+3)/2 (L=$l)";
     our $count;
     for my $l (10,100,1000) {
        $count=0;
        ('a' x $l) =~ /(.*)(?{$count++})[bc]/;
-       iseq( $count, $l + 1, "# TODO Should be L+1 not L*(L+3)/2 (L=$l)");
+       iseq( $count, $l + 1);
     }
 }
 {
@@ -4314,6 +4322,7 @@ sub kt
     iseq("$1$2","foobar");
 }
 {
+    local $Message = "HORIZWS";
     local $_="\t \r\n \n \t".chr(11)."\n";
     s/\H/H/g;
     s/\h/h/g;
@@ -4325,6 +4334,7 @@ sub kt
     iseq($_,"hhHHhHhhHH");
 }    
 {
+    local $Message = "Various whitespace special patterns";
     my @h=map { chr( $_ ) } (
         0x09,   0x20,   0xa0,   0x1680, 0x180e, 0x2000, 0x2001, 0x2002,
         0x2003, 0x2004, 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200a,
@@ -4337,15 +4347,176 @@ sub kt
         my $ary=shift @$t;
         foreach my $pat (@$t) {
             foreach my $str (@$ary) {
-                ok($str=~/($pat)/);
-                iseq($1,$str);
+                ok($str=~/($pat)/,$pat);
+                iseq($1,$str,$pat);
                 utf8::upgrade($str);
-                ok($str=~/($pat)/);
-                iseq($1,$str);
+                ok($str=~/($pat)/,"Upgraded string - $pat");
+                iseq($1,$str,"Upgraded string - $pat");
+            }
+        }
+    }
+}
+{
+    local $Message = "Check that \\xDF match properly in its various forms";
+    # test that \xDF matches properly. this is pretty hacky stuff,
+    # but its actually needed. the malarky with '-' is to prevent
+    # compilation caching from playing any role in the test.
+    my @df= (chr(0xDF),'-',chr(0xDF));
+    utf8::upgrade($df[2]);
+    my @strs= ('ss','sS','Ss','SS',chr(0xDF));
+    my @ss= map { ("$_", "$_") } @strs;
+    utf8::upgrade($ss[$_*2+1]) for 0..$#strs;
+
+    for my $ssi (0..$#ss) {
+        for my $dfi (0..$#df) {
+            my $pat= $df[$dfi];
+            my $str= $ss[$ssi];
+            my $utf_df= ($dfi > 1) ? 'utf8' : '';
+            my $utf_ss= ($ssi % 2) ? 'utf8' : '';
+            (my $sstr=$str)=~s/\xDF/\\xDF/;
+
+            if ($utf_df || $utf_ss || length($ss[$ssi])==1) {
+                my $ret= $str=~/$pat/i;
+                next if $pat eq '-';
+                ok($ret, 
+                    "\"$sstr\"=~/\\xDF/i (str is @{[$utf_ss||'latin']}, pat is @{[$utf_df||'latin']})");
+            } else {
+                my $ret= $str !~ /$pat/i;
+                next if $pat eq '-';
+                ok($ret, 
+                    "\"$sstr\"!~/\\xDF/i (str is @{[$utf_ss||'latin']}, pat is @{[$utf_df||'latin']})");
             }
         }
     }
 }
+{
+    local $Message = "BBC(Bleadperl Breaks CPAN) Today: String::Multibyte";
+    my $re  = qr/(?:[\x00-\xFF]{4})/;
+    my $hyp = "\0\0\0-";
+    my $esc = "\0\0\0\\";
+
+    my $str = "$esc$hyp$hyp$esc$esc";
+    my @a = ($str =~ /\G(?:\Q$esc$esc\E|\Q$esc$hyp\E|$re)/g);
+
+    iseq(0+@a,3);
+    iseq(join('=', @a),"$esc$hyp=$hyp=$esc$esc");
+}
+# test for keys in %+ and %-
+{
+    my $_ = "abcdef";
+    /(?<foo>a)|(?<foo>b)/;
+    iseq( (join ",", sort keys %+), "foo" );
+    iseq( (join ",", sort keys %-), "foo" );
+    iseq( (join ",", sort values %+), "a" );
+    iseq( (join ",", sort map "@$_", values %-), "a " );
+    /(?<bar>a)(?<bar>b)(?<quux>.)/;
+    iseq( (join ",", sort keys %+), "bar,quux" );
+    iseq( (join ",", sort keys %-), "bar,quux" );
+    iseq( (join ",", sort values %+), "a,c" ); # leftmost
+    iseq( (join ",", sort map "@$_", values %-), "a b,c" );
+    /(?<un>a)(?<deux>c)?/; # second buffer won't capture
+    iseq( (join ",", sort keys %+), "un" );
+    iseq( (join ",", sort keys %-), "deux,un" );
+    iseq( (join ",", sort values %+), "a" );
+    iseq( (join ",", sort map "@$_", values %-), ",a" );
+}
+
+# length() on captures, the numbered ones end up in Perl_magic_len
+{
+    my $_ = "aoeu \xe6var ook";
+    /^ \w+ \s (?<eek>\S+)/x;
+
+    iseq( length($`), 0, 'length $`' );
+    iseq( length($'), 4, q[length $'] );
+    iseq( length($&), 9, 'length $&' );
+    iseq( length($1), 4, 'length $1' );
+    iseq( length($+{eek}), 4, 'length $+{eek} == length $1' );
+}
+
+{
+    my $ok=-1;
+
+    $ok=exists($-{x}) ? 1 : 0
+        if 'bar'=~/(?<x>foo)|bar/;
+    iseq($ok,1,'$-{x} exists after "bar"=~/(?<x>foo)|bar/');
+    iseq(scalar(%+), 0, 'scalar %+ == 0 after "bar"=~/(?<x>foo)|bar/');
+    iseq(scalar(%-), 1, 'scalar %- == 1 after "bar"=~/(?<x>foo)|bar/');
+
+    $ok=-1;
+    $ok=exists($+{x}) ? 1 : 0
+        if 'bar'=~/(?<x>foo)|bar/;
+    iseq($ok,0,'$+{x} not exists after "bar"=~/(?<x>foo)|bar/');
+    iseq(scalar(%+), 0, 'scalar %+ == 0 after "bar"=~/(?<x>foo)|bar/');
+    iseq(scalar(%-), 1, 'scalar %- == 1 after "bar"=~/(?<x>foo)|bar/');
+
+    $ok=-1;
+    $ok=exists($-{x}) ? 1 : 0
+        if 'foo'=~/(?<x>foo)|bar/;
+    iseq($ok,1,'$-{x} exists after "foo"=~/(?<x>foo)|bar/');
+    iseq(scalar(%+), 1, 'scalar %+ == 1 after "foo"=~/(?<x>foo)|bar/');
+    iseq(scalar(%-), 1, 'scalar %- == 1 after "foo"=~/(?<x>foo)|bar/');
+
+    $ok=-1;
+    $ok=exists($+{x}) ? 1 : 0
+        if 'foo'=~/(?<x>foo)|bar/;
+    iseq($ok,1,'$+{x} exists after "foo"=~/(?<x>foo)|bar/');
+}
+{
+    local $_;
+    ($_ = 'abc')=~/(abc)/g;
+    $_ = '123'; 
+    iseq("$1",'abc',"/g leads to unsafe match vars: $1");
+}
+{
+    local $Message="Message-ID: <20070818091501.7eff4831@r2d2>";
+    my $str= "";
+    for(0..5){
+        my @x;
+        $str .= "@x"; # this should ALWAYS be the empty string
+        'a'=~/(a|)/;
+        push @x,1;
+    }
+    iseq(length($str),"0","Trie scope error, string should be empty");
+    $str="";
+    my @foo = ('a')x5;
+    for (@foo) {
+        my @bar;
+        $str .= "@bar";
+        s/a|/push @bar, 1/e;
+    }
+    iseq(length($str),"0","Trie scope error, string should be empty");
+}
+{
+# [perl #45605] Regexp failure with utf8-flagged and byte-flagged string
+
+    my $utf_8 = "\xd6schel";
+    utf8::upgrade($utf_8);
+    $utf_8 =~ m{(\xd6|&Ouml;)schel};
+    iseq($1,"\xd6","#45605");
+}
+
+{
+    # Regardless of utf8ness any character matches itself when 
+    # doing a case insensitive match. See also [perl #36207] 
+    for my $o (0..255) {
+        my @ch=(chr($o),chr($o));
+        utf8::upgrade($ch[1]);
+        for my $u_str (0,1) {
+            for my $u_pat (0,1) {
+                ok( $ch[$u_str]=~/\Q$ch[$u_pat]\E/i,
+                    "\$c=~/\$c/i : chr($o) : u_str=$u_str u_pat=$u_pat");
+                ok( $ch[$u_str]=~/\Q$ch[$u_pat]\E|xyz/i,
+                "# \$c=~/\$c|xyz/i : chr($o) : u_str=$u_str u_pat=$u_pat");
+            }
+        }
+    }
+}
+
+{
+    my $a = 3; "" =~ /(??{ $a })/;
+    my $b = $a;
+    iseq($b, $a, "copy of scalar used for postponed subexpression");
+}
 
 # Test counter is at bottom of file. Put new tests above here.
 #-------------------------------------------------------------------
@@ -4392,44 +4563,19 @@ ok($@=~/\QSequence \k... not terminated in regex;\E/);
     iseq($_,"!Bang!1!Bang!2!Bang!3!Bang!");
 }
 
-# test for keys in %+ and %-
-{
-    my $_ = "abcdef";
-    /(?<foo>a)|(?<foo>b)/;
-    iseq( (join ",", sort keys %+), "foo" );
-    iseq( (join ",", sort keys %-), "foo" );
-    iseq( (join ",", sort values %+), "a" );
-    iseq( (join ",", sort map "@$_", values %-), "a " );
-    /(?<bar>a)(?<bar>b)(?<quux>.)/;
-    iseq( (join ",", sort keys %+), "bar,quux" );
-    iseq( (join ",", sort keys %-), "bar,quux" );
-    iseq( (join ",", sort values %+), "a,c" ); # leftmost
-    iseq( (join ",", sort map "@$_", values %-), "a b,c" );
-    /(?<un>a)(?<deux>c)?/; # second buffer won't capture
-    iseq( (join ",", sort keys %+), "un" );
-    iseq( (join ",", sort keys %-), "deux,un" );
-    iseq( (join ",", sort values %+), "a" );
-    iseq( (join ",", sort map "@$_", values %-), ",a" );
-}
+# [perl #45337] utf8 + "[a]a{2}" + /$.../ = panic: sv_len_utf8 cache
 
-# length() on captures, these end up in Perl_magic_len
 {
-    my $_ = "aoeu \xe6var ook";
-    /^ \w+ \s (?<eek>\S+)/x;
-
-    iseq( length($`), 0, 'length $`' );
-    iseq( length($'), 4, q[length $'] );
-    iseq( length($&), 9, 'length $&' );
-    iseq( length($1), 4, 'length $1' );
-    iseq( length($+{eek}), 4, 'length $+{eek} == length $1' );
+    local ${^UTF8CACHE} = -1;
+    my $s="[a]a{2}";
+    utf8::upgrade $s;
+    ok("aaa" =~ /$s/, "#45337");
 }
 
 # Put new tests above the dotted line about a page above this comment
 iseq(0+$::test,$::TestCount,"Got the right number of tests!");
 # Don't forget to update this!
 BEGIN {
-    $::TestCount = 1928;
+    $::TestCount = 4014;
     print "1..$::TestCount\n";
 }
-
-