fix unicode split /\s+/

[p5sagit/p5-mst-13.2.git] / t / op / pat.t
diff --git a/t/op/pat.t b/t/op/pat.t

index f42847f..94703c1 100755 (executable)
--- a/t/op/pat.t
+++ b/t/op/pat.t
@@ -3665,27 +3665,28 @@ SKIP:{
 }
 
 {
-    if (ord("A") == 193) {
-       for (1..10) {
-           print "ok $test # Skip: in EBCDIC";
-           $test++;
-       }
-    } else {
-       use utf8;
-       # ñ = U+00F1 (n-tilde)
-       # ̧ = U+0327 (cedilla)
-       # ² = U+00B2 (superscript two)
-
-       ok("..foo foo.." =~ /(?'ñ'foo) \k<ñ>/, 'Named capture UTF');
-       ok($+{ñ} eq 'foo', 'Named capture UTF');
-       ok("..bar bar.." =~ /(?<_ñ>bar) \k'_ñ'/, 'Named capture UTF');
-       ok($+{_ñ} eq 'bar', 'Named capture UTF');
-       ok("..abc abc.." =~ /(?'ç'abc) \k'ç'/, 'Named capture UTF');
-       ok($+{ç} eq 'abc', 'Named capture UTF');
-       ok("..xyz xyz.." =~ /(?'ņ̃'xyz) \k'ņ̃'/, 'Named capture UTF');
-       ok($+{ņ̃} eq 'xyz', 'Named capture UTF');
-       ok("..456 456.." =~ /(?<a²>456) \k'a²'/, 'Named capture UTF');
-       ok($+{a²} eq '456', 'Named capture UTF');
+    my @ary = (
+       pack('U', 0x00F1),            # n-tilde
+       '_'.pack('U', 0x00F1),        # _ + n-tilde
+       'c'.pack('U', 0x0327),        # c + cedilla
+       pack('U*', 0x00F1, 0x0327),   # n-tilde + cedilla
+       'a'.pack('U', 0x00B2),        # a + superscript two
+       pack('U', 0x0391),            # ALPHA
+       pack('U', 0x0391).'2',        # ALPHA + 2
+       pack('U', 0x0391).'_',        # ALPHA + _
+    );
+    for my $uni (@ary) {
+       my ($r1, $c1, $r2, $c2) = eval qq{
+           use utf8;
+           scalar("..foo foo.." =~ /(?'${uni}'foo) \\k'${uni}'/),
+               \$+{${uni}},
+           scalar("..bar bar.." =~ /(?<${uni}>bar) \\k<${uni}>/),
+               \$+{${uni}};
+       };
+       ok($r1,                         "Named capture UTF (?'')");
+       ok(defined $c1 && $c1 eq 'foo', "Named capture UTF \%+");
+       ok($r2,                         "Named capture UTF (?<>)");
+       ok(defined $c2 && $c2 eq 'bar', "Named capture UTF \%+");
     }
 }
 
@@ -3744,7 +3745,24 @@ sub iseq($$;$) {
     ';
     ok(!$@,'lvalue $+{...} should not throw an exception');
 }
-
+{
+    my $s='foo bar baz';
+    my @res;
+    if ('1234'=~/(?<A>1)(?<B>2)(?<A>3)(?<B>4)/) {
+        foreach my $name (sort keys(%-)) {
+            my $ary = $-{$name};
+            foreach my $idx (0..$#$ary) {
+                push @res,"$name:$idx:$ary->[$idx]";
+            }
+        }
+    }
+    my @expect=qw(A:0:1 A:1:3 B:0:2 B:1:4);
+    iseq("@res","@expect","Check %-");
+    eval'
+        print for $-{this_key_doesnt_exist};
+    ';
+    ok(!$@,'lvalue $-{...} should not throw an exception');
+}
 # stress test CURLYX/WHILEM.
 #
 # This test includes varying levels of nesting, and according to
@@ -3895,6 +3913,25 @@ for my $c ("z", "\0", "!", chr(254), chr(256)) {
     1 while /.(??{'(*PRUNE)'})(?{$count++})(*FAIL)/g;
     iseq($count,4,"/.(*PRUNE)/");
 }
+{   # Test the \v form of the (*PRUNE) pattern
+    our $count = 0;
+    'aaab'=~/a+b?(?{$count++})(*FAIL)/;
+    iseq($count,9,"expect 9 for no \\v");
+    $count = 0;
+    'aaab'=~/a+b?\v(?{$count++})(*FAIL)/;
+    iseq($count,3,"expect 3 with \\v");
+    local $_='aaab';
+    $count=0;
+    1 while /.\v(?{$count++})(*FAIL)/g;
+    iseq($count,4,"/.\\v/");
+    $count = 0;
+    'aaab'=~/a+b?(??{'\v'})(?{$count++})(*FAIL)/;
+    iseq($count,3,"expect 3 with \\v");
+    local $_='aaab';
+    $count=0;
+    1 while /.(??{'\v'})(?{$count++})(*FAIL)/g;
+    iseq($count,4,"/.\\v/");
+}
 {   # Test the (*SKIP) pattern
     our $count = 0;
     'aaab'=~/a+b?(*SKIP)(?{$count++})(*FAIL)/;
@@ -3910,6 +3947,21 @@ for my $c ("z", "\0", "!", chr(254), chr(256)) {
     iseq($count,2,"Expect 2 with (*SKIP)" );
     iseq("@res","aaab aaab","adjacent (*SKIP) works as expected" );
 }
+{   # Test the \V form of the (*SKIP) pattern
+    our $count = 0;
+    'aaab'=~/a+b?\V(?{$count++})(*FAIL)/;
+    iseq($count,1,"expect 1 with \\V");
+    local $_='aaab';
+    $count=0;
+    1 while /.\V(?{$count++})(*FAIL)/g;
+    iseq($count,4,"/.\\V/");
+    $_='aaabaaab';
+    $count=0;
+    our @res=();
+    1 while /(a+b?)\V(?{$count++; push @res,$1})(*FAIL)/g;
+    iseq($count,2,"Expect 2 with \\V" );
+    iseq("@res","aaab aaab","adjacent \\V works as expected" );
+}
 {   # Test the (*SKIP) pattern
     our $count = 0;
     'aaab'=~/a+b?(*MARK:foo)(*SKIP)(?{$count++})(*FAIL)/;
@@ -4190,6 +4242,22 @@ for my $c ("z", "\0", "!", chr(254), chr(256)) {
     ok(!$REGMARK);
     iseq($REGERROR,'foo');
 }
+{
+    my $x;
+    $x = "abc.def.ghi.jkl";
+    $x =~ s/.*\K\..*//;
+    ok($x eq "abc.def.ghi");
+    
+    $x = "one two three four";
+    $x =~ s/o+ \Kthree//g;
+    ok($x eq "one two  four");
+    
+    $x = "abcde";
+    $x =~ s/(.)\K/$1/g;
+    ok($x eq "aabbccddee");
+}
+
+
 # Test counter is at bottom of file. Put new tests above here.
 #-------------------------------------------------------------------
 # Keep the following tests last -- they may crash perl
@@ -4239,7 +4307,7 @@ ok($@=~/\QSequence \k... not terminated in regex;\E/);
 iseq(0+$::test,$::TestCount,"Got the right number of tests!");
 # Don't forget to update this!
 BEGIN {
-    $::TestCount = 1584; 
+    $::TestCount = 1620;
     print "1..$::TestCount\n";
 }