One more iteration of the ibcmp_utf8() interface,

[p5sagit/p5-mst-13.2.git] / t / op / pat.t
diff --git a/t/op/pat.t b/t/op/pat.t

index 7a88b04..5cdb2e5 100755 (executable)
--- a/t/op/pat.t
+++ b/t/op/pat.t
@@ -6,7 +6,7 @@
 
 $| = 1;
 
-print "1..792\n";
+print "1..834\n";
 
 BEGIN {
     chdir 't' if -d 't';
@@ -2380,11 +2380,7 @@ print "# some Unicode properties\n";
     print "# GREEK CAPITAL LETTER SIGMA vs COMBINING GREEK PERISPOMENI\n";
 
     my $SIGMA = "\N{GREEK CAPITAL LETTER SIGMA}";
-
-    my $hSIGMA = sprintf "%04x", ord $SIGMA;
-    
-    my $char = "\N{COMBINING GREEK PERISPOMENI}";
-    my $code = sprintf "%04x", ord($char);
+    my $char  = "\N{COMBINING GREEK PERISPOMENI}";
 
     # Before #13843 this was failing by matching falsely.
     print "_:$char:_" =~ m/_:$SIGMA:_/i ? "not ok 786\n" : "ok 786\n";
@@ -2395,12 +2391,190 @@ print "# some Unicode properties\n";
 
     use charnames ':full';
 
-    print "a!"              =~ /\X!/ ? "ok 787\n" : "not ok 787\n";
-    print "\xDF!"           =~ /\X!/ ? "ok 788\n" : "not ok 788\n";
-    print "\x{100}!"        =~ /\X!/ ? "ok 789\n" : "not ok 789\n";
-    print "\x{100}\x{300}!" =~ /\X!/ ? "ok 790\n" : "not ok 790\n";
-    print "\N{LATIN CAPITAL LETTER E}!" =~ /\X!/ ?
-       "ok 791\n" : "not ok 791\n";
-    print "\N{LATIN CAPITAL LETTER E}\N{COMBINING GRAVE ACCENT}!" =~ /\X!/ ?
-       "ok 792\n" : "not ok 792\n";
+    print "a!"              =~ /^(\X)!/ && $1 eq "a" ?
+       "ok 787\n" : "not ok 787 # $1\n";
+    print "\xDF!"           =~ /^(\X)!/ && $1 eq "\xDF" ?
+       "ok 788\n" : "not ok 788 # $1\n";
+    print "\x{100}!"        =~ /^(\X)!/ && $1 eq "\x{100}" ?
+       "ok 789\n" : "not ok 789 # $1\n";
+    print "\x{100}\x{300}!" =~ /^(\X)!/ && $1 eq "\x{100}\x{300}" ?
+       "ok 790\n" : "not ok 790 # $1\n";
+    print "\N{LATIN CAPITAL LETTER E}!" =~ /^(\X)!/ &&
+       $1 eq "\N{LATIN CAPITAL LETTER E}" ?
+       "ok 791\n" : "not ok 791 # $1\n";
+    print "\N{LATIN CAPITAL LETTER E}\N{COMBINING GRAVE ACCENT}!" =~
+       /^(\X)!/ &&
+       $1 eq "\N{LATIN CAPITAL LETTER E}\N{COMBINING GRAVE ACCENT}" ?
+       "ok 792\n" : "not ok 792 # $1\n";
+}
+
+{
+    print "#\\C and \\X\n";
+
+    print "!abc!" =~ /a\Cc/ ? "ok 793\n" : "not ok 793\n";
+    print "!abc!" =~ /a\Xc/ ? "ok 794\n" : "not ok 794\n";
+}
+
+{
+    print "# FINAL SIGMA\n";
+
+    my $SIGMA = "\x{03A3}"; # CAPITAL
+    my $Sigma = "\x{03C2}"; # SMALL FINAL
+    my $sigma = "\x{03C3}"; # SMALL
+
+    print $SIGMA =~ /$SIGMA/i ? "ok 795\n" : "not ok 795\n";
+    print $SIGMA =~ /$Sigma/i ? "ok 796\n" : "not ok 796\n";
+    print $SIGMA =~ /$sigma/i ? "ok 797\n" : "not ok 797\n";
+
+    print $Sigma =~ /$SIGMA/i ? "ok 798\n" : "not ok 798\n";
+    print $Sigma =~ /$Sigma/i ? "ok 799\n" : "not ok 799\n";
+    print $Sigma =~ /$sigma/i ? "ok 800\n" : "not ok 800\n";
+
+    print $sigma =~ /$SIGMA/i ? "ok 801\n" : "not ok 801\n";
+    print $sigma =~ /$Sigma/i ? "ok 802\n" : "not ok 802\n";
+    print $sigma =~ /$sigma/i ? "ok 803\n" : "not ok 803\n";
+    
+    print $SIGMA =~ /[$SIGMA]/i ? "ok 804\n" : "not ok 804\n";
+    print $SIGMA =~ /[$Sigma]/i ? "ok 805\n" : "not ok 805\n";
+    print $SIGMA =~ /[$sigma]/i ? "ok 806\n" : "not ok 806\n";
+
+    print $Sigma =~ /[$SIGMA]/i ? "ok 807\n" : "not ok 807\n";
+    print $Sigma =~ /[$Sigma]/i ? "ok 808\n" : "not ok 808\n";
+    print $Sigma =~ /[$sigma]/i ? "ok 809\n" : "not ok 809\n";
+
+    print $sigma =~ /[$SIGMA]/i ? "ok 810\n" : "not ok 810\n";
+    print $sigma =~ /[$Sigma]/i ? "ok 811\n" : "not ok 811\n";
+    print $sigma =~ /[$sigma]/i ? "ok 812\n" : "not ok 812\n";
+}
+
+{
+    print "# parlez-vous?\n";
+
+    use charnames ':full';
+
+    print "fran\N{LATIN SMALL LETTER C}ais" =~
+         /fran.ais/ &&
+       $& eq "francais" ?
+       "ok 813\n" : "not ok 813\n";
+
+    print "fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" =~
+         /fran.ais/ &&
+       $& eq "fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" ?
+       "ok 814\n" : "not ok 814\n";
+
+    print "fran\N{LATIN SMALL LETTER C}ais" =~
+          /fran\Cais/ &&
+        $& eq "francais" ?
+       "ok 815\n" : "not ok 815\n";
+
+    print "franc\N{COMBINING CEDILLA}ais" =~
+         /franc\C\Cais/ ? # COMBINING CEDILLA is two bytes when encoded
+       "ok 816\n" : "not ok 816\n";
+
+    print "fran\N{LATIN SMALL LETTER C}ais" =~
+         /fran\Xais/ &&
+       $& eq "francais" ?
+       "ok 817\n" : "not ok 817\n";
+
+    print "fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" =~
+         /fran\Xais/  &&
+        $& eq "fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" ?
+       "ok 818\n" : "not ok 818\n";
+
+    print "franc\N{COMBINING CEDILLA}ais" =~
+         /fran\Xais/ &&
+         $& eq "franc\N{COMBINING CEDILLA}ais" ?
+        "ok 819\n" : "not ok 819\n";
+
+    print "fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" =~
+         /fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais/  &&
+        $& eq "fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" ?
+       "ok 820\n" : "not ok 820\n";
+
+    print "franc\N{COMBINING CEDILLA}ais" =~
+         /franc\N{COMBINING CEDILLA}ais/  &&
+        $& eq "franc\N{COMBINING CEDILLA}ais" ?
+       "ok 821\n" : "not ok 821\n";
+
+    print "fran\N{LATIN SMALL LETTER C}ais" =~
+         /fran(?:c\N{COMBINING CEDILLA}?|\N{LATIN SMALL LETTER C WITH CEDILLA})ais/ &&
+       $& eq "francais" ?
+       "ok 822\n" : "not ok 822\n";
+
+    print "fran\N{LATIN SMALL LETTER C}ais" =~
+         /fran(?:c\N{COMBINING CEDILLA}?|\N{LATIN SMALL LETTER C WITH CEDILLA})ais/ &&
+       $& eq "francais" ?
+       "ok 823\n" : "not ok 823\n";
+
+    print "fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" =~
+         /fran(?:c\N{COMBINING CEDILLA}?|\N{LATIN SMALL LETTER C WITH CEDILLA})ais/ &&
+       $& eq "fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" ?
+       "ok 824\n" : "not ok 824\n";
+
+    print "franc\N{COMBINING CEDILLA}ais" =~
+         /fran(?:c\N{COMBINING CEDILLA}?|\N{LATIN SMALL LETTER C WITH CEDILLA})ais/ &&
+       $& eq "franc\N{COMBINING CEDILLA}ais" ?
+       "ok 825\n" : "not ok 825\n";
+}
+
+{
+    print "# Does lingering (and useless) UTF8 flag mess up /i matching?\n";
+
+    {
+       my $regex  = "ABcde";
+       my $string = "abcDE\x{100}";
+       chop($string);
+       if ($string =~ m/$regex/i) {
+           print "ok 826\n";
+       } else {
+           print "not ok 826\n";
+       }
+    }
+
+    {
+       my $regex  = "ABcde\x{100}";
+       my $string = "abcDE";
+       chop($regex);
+       if ($string =~ m/$regex/i) {
+           print "ok 827\n";
+       } else {
+           print "not ok 827\n";
+       }
+    }
+
+    {
+       my $regex  = "ABcde\x{100}";
+       my $string = "abcDE\x{100}";
+       chop($regex);
+       chop($string);
+       if ($string =~ m/$regex/i) {
+           print "ok 828\n";
+       } else {
+           print "not ok 828\n";
+       }
+    }
+}
+
+{
+    print "# more SIGMAs\n";
+
+    my $SIGMA = "\x{03A3}"; # CAPITAL
+    my $Sigma = "\x{03C2}"; # SMALL FINAL
+    my $sigma = "\x{03C3}"; # SMALL
+
+    my $S3 = "$SIGMA$Sigma$sigma";
+
+    print ":$S3:" =~ /:(($SIGMA)+):/i   && $1 eq $S3 && $2 eq $sigma ?
+       "ok 829\n" : "not ok 829\n";
+    print ":$S3:" =~ /:(($Sigma)+):/i   && $1 eq $S3 && $2 eq $sigma ?
+       "ok 830\n" : "not ok 830\n";
+    print ":$S3:" =~ /:(($sigma)+):/i   && $1 eq $S3 && $2 eq $sigma ?
+       "ok 831\n" : "not ok 831\n";
+
+    print ":$S3:" =~ /:(([$SIGMA])+):/i && $1 eq $S3 && $2 eq $sigma ?
+       "ok 832\n" : "not ok 832\n";
+    print ":$S3:" =~ /:(([$Sigma])+):/i && $1 eq $S3 && $2 eq $sigma ?
+       "ok 833\n" : "not ok 833\n";
+    print ":$S3:" =~ /:(([$sigma])+):/i && $1 eq $S3 && $2 eq $sigma ?
+       "ok 834\n" : "not ok 834\n";
 }