Re: Regex-Unicode bugs

[p5sagit/p5-mst-13.2.git] / t / op / pat.t
diff --git a/t/op/pat.t b/t/op/pat.t

index a00e624..82749a0 100755 (executable)
--- a/t/op/pat.t
+++ b/t/op/pat.t
@@ -6,7 +6,7 @@
 
 $| = 1;
 
-print "1..864\n";
+print "1..908\n";
 
 BEGIN {
     chdir 't' if -d 't';
@@ -2730,3 +2730,102 @@ print "# some Unicode properties\n";
     print $u eq "feeber" ? "ok 864\n" : "not ok 864\n";
 }
 
+{
+    print "# UTF-8 bug with s///\n";
+    # check utf8/non-utf8 mixtures
+    # try to force all float/anchored check combinations
+    my $c = "\x{100}";
+    my $test = 865;
+    my $subst;
+    for my $re (
+       "xx.*$c", "x.*$c$c", "$c.*xx", "$c$c.*x", "xx.*(?=$c)", "(?=$c).*xx",
+    ) {
+       print "xxx" =~ /$re/ ? "not ok $test\n" : "ok $test\n";
+       ++$test;
+       print +($subst = "xxx") =~ s/$re// ? "not ok $test\n" : "ok $test\n";
+       ++$test;
+    }
+    for my $re ("xx.*$c*", "$c*.*xx") {
+       print "xxx" =~ /$re/ ? "ok $test\n" : "not ok $test\n";
+       ++$test;
+       ($subst = "xxx") =~ s/$re//;
+       print $subst eq '' ? "ok $test\n" : "not ok $test\t# $subst\n";
+       ++$test;
+    }
+    for my $re ("xxy*", "y*xx") {
+       print "xx$c" =~ /$re/ ? "ok $test\n" : "not ok $test\n";
+       ++$test;
+       ($subst = "xx$c") =~ s/$re//;
+       print $subst eq $c ? "ok $test\n" : "not ok $test\n";
+       ++$test;
+       print "xy$c" =~ /$re/ ? "not ok $test\n" : "ok $test\n";
+       ++$test;
+       print +($subst = "xy$c") =~ /$re/ ? "not ok $test\n" : "ok $test\n";
+       ++$test;
+    }
+    for my $re ("xy$c*z", "x$c*yz") {
+       print "xyz" =~ /$re/ ? "ok $test\n" : "not ok $test\n";
+       ++$test;
+       ($subst = "xyz") =~ s/$re//;
+       print $subst eq '' ? "ok $test\n" : "not ok $test\n";
+       ++$test;
+    }
+}
+
+
+{
+    print "# Unicode hash keys and \\w\n";
+    # This is not really a regex test but regexes bring
+    # out the issue nicely.
+    use strict;
+    my $test = 893;
+    my $u3 = "f\x{df}\x{100}";
+    my $u2 = substr($u3,0,2);
+    my $u1 = substr($u2,0,1);
+    my %u = ( $u1 => $u1, $u2 => $u2, $u3 => $u3 );  
+
+    for (keys %u) {
+       print /^\w+$/ && $u{$_} =~ /^\w+$/ ?
+           "ok $test\n" : "not ok $test\n";
+       $test++;
+   }
+
+    for (each %u) {
+       print /^\w+$/ && $u{$_} =~ /^\w+$/ ?
+           "ok $test\n" : "not ok $test\n";
+       $test++;
+   }
+
+    for (%u) {
+       print /^\w+$/ && $u{$_} =~ /^\w+$/ ?
+           "ok $test\n" : "not ok $test\n";
+       $test++;
+   }
+}
+
+{
+    print "# qr/.../x\n";
+    my $test = 904;
+
+    my $R = qr/ A B C # D E/x;
+
+    print eval {"ABCDE" =~ $R} ? "ok $test\n" : "not ok $test\n";
+    $test++;
+
+    print eval {"ABCDE" =~ m/$R/} ? "ok $test\n" : "not ok $test\n";
+    $test++;
+
+    print eval {"ABCDE" =~ m/($R)/} ? "ok $test\n" : "not ok $test\n";
+    $test++;
+}
+
+{
+    print "# illegal Unicode properties\n";
+    my $test = 907;
+
+    print eval { "a" =~ /\pq / }      ? "not ok $test\n" : "ok $test\n";
+    $test++;
+
+    print eval { "a" =~ /\p{qrst} / } ? "not ok $test\n" : "ok $test\n";
+    $test++;
+}