Integrate mainline

[p5sagit/p5-mst-13.2.git] / t / op / pat.t
diff --git a/t/op/pat.t b/t/op/pat.t

index 2531d71..2042f39 100755 (executable)
--- a/t/op/pat.t
+++ b/t/op/pat.t
@@ -6,7 +6,7 @@
 
 $| = 1;
 
-print "1..634\n";
+print "1..715\n";
 
 BEGIN {
     chdir 't' if -d 't';
@@ -1129,6 +1129,8 @@ print "not " unless "A \x{263a} B z C" =~ /A . B (??{ "z" }) C/;
 print "ok $test\n";
 $test++;
 
+my $ordA = ord('A');
+
 $_ = "a\x{100}b";
 if (/(.)(\C)(\C)(.)/) {
   print "ok 232\n";
@@ -1137,15 +1139,32 @@ if (/(.)(\C)(\C)(.)/) {
   } else {
     print "not ok 233\n";
   }
-  if ($2 eq "\xC4") {
-    print "ok 234\n";
-  } else {
-    print "not ok 234\n";
-  }
-  if ($3 eq "\x80") {
-    print "ok 235\n";
+  if ($ordA == 65) { # ASCII (or equivalent), should be UTF-8
+      if ($2 eq "\xC4") {
+         print "ok 234\n";
+      } else {
+         print "not ok 234\n";
+      }
+      if ($3 eq "\x80") {
+         print "ok 235\n";
+      } else {
+         print "not ok 235\n";
+      }
+  } elsif ($ordA == 193) { # EBCDIC (or equivalent), should be UTF-EBCDIC
+      if ($2 eq "\x8C") {
+         print "ok 234\n";
+      } else {
+         print "not ok 234\n";
+      }
+      if ($3 eq "\x41") {
+         print "ok 235\n";
+      } else {
+         print "not ok 235\n";
+      }
   } else {
-    print "not ok 235\n";
+      for (234..235) {
+         print "not ok $_ # ord('A') == $ordA\n";
+      }
   }
   if ($4 eq "b") {
     print "ok 236\n";
@@ -1161,10 +1180,20 @@ $_ = "\x{100}";
 if (/(\C)/g) {
   print "ok 237\n";
   # currently \C are still tagged as UTF-8
-  if ($1 eq "\xC4") {
-    print "ok 238\n";
+  if ($ordA == 65) {
+      if ($1 eq "\xC4") {
+         print "ok 238\n";
+      } else {
+         print "not ok 238\n";
+      }
+  } elsif ($ordA == 193) {
+      if ($1 eq "\x8C") {
+         print "ok 238\n";
+      } else {
+         print "not ok 238\n";
+      }
   } else {
-    print "not ok 238\n";
+      print "not ok 238 # ord('A') == $ordA\n";
   }
 } else {
   for (237..238) {
@@ -1174,10 +1203,20 @@ if (/(\C)/g) {
 if (/(\C)/g) {
   print "ok 239\n";
   # currently \C are still tagged as UTF-8
-  if ($1 eq "\x80") {
-    print "ok 240\n";
+  if ($ordA == 65) {
+      if ($1 eq "\x80") {
+         print "ok 240\n";
+      } else {
+         print "not ok 240\n";
+      }
+  } elsif ($ordA == 193) {
+      if ($1 eq "\x41") {
+         print "ok 240\n";
+      } else {
+         print "not ok 240\n";
+      }
   } else {
-    print "not ok 240\n";
+      print "not ok 240 # ord('A') == $ordA\n";
   }
 } else {
   for (239..240) {
@@ -1248,7 +1287,7 @@ print "ok 247\n";
            "#latin[$latin]\nnot ok $test\n";
        $test++;
        $latin =~ s/stra\337e/straße/; # \303\237 after the 2nd a
-       use utf8;
+       use utf8; # needed for the raw UTF-8
        $latin =~ s!(s)tr(?:aß|s+e)!$1tr.!; # \303\237 after the a
     }
 }
@@ -1796,3 +1835,295 @@ if(test_o('abc','..(.)') eq 'a') {
     print "not ok 634\n";
 }
 
+# 635..639: ID 20010619.003 (only the space character is
+# supposed to be [:print:], not the whole isprint()).
+
+print "not " if "\n"     =~ /[[:print:]]/;
+print "ok 635\n";
+
+print "not " if "\t"     =~ /[[:print:]]/;
+print "ok 636\n";
+
+# Amazingly vertical tabulator is the same in ASCII and EBCDIC.
+print "not " if "\014"  =~ /[[:print:]]/;
+print "ok 637\n";
+
+print "not " if "\r"    =~ /[[:print:]]/;
+print "ok 638\n";
+
+print "not " unless " " =~ /[[:print:]]/;
+print "ok 639\n";
+
+##
+## Test basic $^N usage outside of a regex
+##
+$x = "abcdef";
+$T="ok 640\n";if ($x =~ /cde/ and not defined $^N)         {print $T} else {print "not $T"};
+$T="ok 641\n";if ($x =~ /(cde)/          and $^N eq "cde") {print $T} else {print "not $T"};
+$T="ok 642\n";if ($x =~ /(c)(d)(e)/      and $^N eq   "e") {print $T} else {print "not $T"};
+$T="ok 643\n";if ($x =~ /(c(d)e)/        and $^N eq "cde") {print $T} else {print "not $T"};
+$T="ok 644\n";if ($x =~ /(foo)|(c(d)e)/  and $^N eq "cde") {print $T} else {print "not $T"};
+$T="ok 645\n";if ($x =~ /(c(d)e)|(foo)/  and $^N eq "cde") {print $T} else {print "not $T"};
+$T="ok 646\n";if ($x =~ /(c(d)e)|(abc)/  and $^N eq "abc") {print $T} else {print "not $T"};
+$T="ok 647\n";if ($x =~ /(c(d)e)|(abc)x/ and $^N eq "cde") {print $T} else {print "not $T"};
+$T="ok 648\n";if ($x =~ /(c(d)e)(abc)?/  and $^N eq "cde") {print $T} else {print "not $T"};
+$T="ok 649\n";if ($x =~ /(?:c(d)e)/      and $^N eq  "d" ) {print $T} else {print "not $T"};
+$T="ok 650\n";if ($x =~ /(?:c(d)e)(?:f)/ and $^N eq  "d" ) {print $T} else {print "not $T"};
+$T="ok 651\n";if ($x =~ /(?:([abc])|([def]))*/ and $^N eq  "f" ){print $T} else {print "not $T"};
+$T="ok 652\n";if ($x =~ /(?:([ace])|([bdf]))*/ and $^N eq  "f" ){print $T} else {print "not $T"};
+$T="ok 653\n";if ($x =~ /(([ace])|([bd]))*/    and $^N eq  "e" ){print $T} else {print "not $T"};
+{
+ $T="ok 654\n";if($x =~ /(([ace])|([bdf]))*/   and $^N eq  "f" ){print $T} else {print "not $T"};
+}
+## test to see if $^N is automatically localized -- it should now
+## have the value set in test 653
+$T="ok 655\n";if ($^N eq  "e" ){print $T} else {print "not $T"};
+
+##
+## Now test inside (?{...})
+##
+$T="ok 656\n";if ($x =~ /a([abc])(?{$y=$^N})c/      and $y eq "b" ){print $T} else {print "not $T"};
+$T="ok 657\n";if ($x =~ /a([abc]+)(?{$y=$^N})d/     and $y eq "bc"){print $T} else {print "not $T"};
+$T="ok 658\n";if ($x =~ /a([abcdefg]+)(?{$y=$^N})d/ and $y eq "bc"){print $T} else {print "not $T"};
+$T="ok 659\n";if ($x =~ /(a([abcdefg]+)(?{$y=$^N})d)(?{$z=$^N})e/ and $y eq "bc" and $z eq "abcd")
+              {print $T} else {print "not $T"};
+$T="ok 660\n";if ($x =~ /(a([abcdefg]+)(?{$y=$^N})de)(?{$z=$^N})/ and $y eq "bc" and $z eq "abcde")
+              {print $T} else {print "not $T"};
+
+# Test the Unicode script classes
+
+print "not " unless chr(0x100) =~ /\p{InLatin}/; # outside Latin-1
+print "ok 661\n";
+
+print "not " unless chr(0x212b) =~ /\p{InLatin}/; # Angstrom sign, very outside
+print "ok 662\n";
+
+print "not " unless chr(0x5d0) =~ /\p{InHebrew}/; # inside HebrewBlock
+print "ok 663\n";
+
+print "not " unless chr(0xfb4f) =~ /\p{InHebrew}/; # outside HebrewBlock
+print "ok 664\n";
+
+print "not " unless chr(0xb5) =~ /\p{InGreek}/; # singleton (not in a range)
+print "ok 665\n";
+
+print "not " unless chr(0x37a) =~ /\p{InGreek}/; # singleton
+print "ok 666\n";
+
+print "not " unless chr(0x386) =~ /\p{InGreek}/; # singleton
+print "ok 667\n";
+
+print "not " unless chr(0x387) =~ /\P{InGreek}/; # not there
+print "ok 668\n";
+
+print "not " unless chr(0x388) =~ /\p{InGreek}/; # range
+print "ok 669\n";
+
+print "not " unless chr(0x38a) =~ /\p{InGreek}/; # range
+print "ok 670\n";
+
+print "not " unless chr(0x38b) =~ /\P{InGreek}/; # not there
+print "ok 671\n";
+
+print "not " unless chr(0x38c) =~ /\p{InGreek}/; # singleton
+print "ok 672\n";
+
+##
+## Test [:cntrl:]...
+##
+## Should probably put in tests for all the POSIX stuff, but not sure how to
+## guarantee a specific locale......
+##
+$AllBytes = join('', map { chr($_) } 0..255);
+($x = $AllBytes) =~ s/[[:cntrl:]]//g;
+if ($x ne join('', map { chr($_) } 0x20..0x7E, 0x80..0xFF)) { print "not " };
+print "ok 673\n";
+
+($x = $AllBytes) =~ s/[^[:cntrl:]]//g;
+if ($x ne join('', map { chr($_) } 0..0x1F, 0x7F)) { print "not " };
+print "ok 674\n";
+
+# With /s modifier UTF8 chars were interpreted as bytes
+{
+    my $a = "Hello \x{263A} World";
+    
+    my @a = ($a =~ /./gs);
+    
+    print "not " unless $#a == 12;
+    print "ok 675\n";
+}
+
+@a = ("foo\nbar" =~ /./g);
+print "ok 676\n" if @a == 6 && "@a" eq "f o o b a r";
+
+@a = ("foo\nbar" =~ /./gs);
+print "ok 677\n" if @a == 7 && "@a" eq "f o o \n b a r";
+
+@a = ("foo\nbar" =~ /\C/g);
+print "ok 678\n" if @a == 7 && "@a" eq "f o o \n b a r";
+
+@a = ("foo\nbar" =~ /\C/gs);
+print "ok 679\n" if @a == 7 && "@a" eq "f o o \n b a r";
+
+@a = ("foo\n\x{100}bar" =~ /./g);
+print "ok 680\n" if @a == 7 && "@a" eq "f o o \x{100} b a r";
+
+@a = ("foo\n\x{100}bar" =~ /./gs);
+print "ok 681\n" if @a == 8 && "@a" eq "f o o \n \x{100} b a r";
+
+($a, $b) = map { chr } ord('A') == 65 ? (0xc4, 0x80) : (0x8c, 0x41);
+
+@a = ("foo\n\x{100}bar" =~ /\C/g);
+print "ok 682\n" if @a == 9 && "@a" eq "f o o \n $a $b b a r";
+
+@a = ("foo\n\x{100}bar" =~ /\C/gs);
+print "ok 683\n" if @a == 9 && "@a" eq "f o o \n $a $b b a r";
+
+{
+    # [ID 20010814.004] pos() doesn't work when using =~m// in list context
+    $_ = "ababacadaea";
+    $a = join ":", /b./gc;
+    $b = join ":", /a./gc;
+    $c = pos;
+    print "$a $b $c" eq 'ba:ba ad:ae 10' ? "ok 684\n" : "not ok 684\t# $a $b $c\n";
+}
+
+{
+    # [ID 20010407.006] matching utf8 return values from functions does not work
+
+    package ID_20010407_006;
+
+    sub x {
+       "a\x{1234}";
+    }
+
+    my $x = x;
+    my $y;
+
+    $x =~ /(..)/; $y = $1;
+    print "not " unless length($y) == 2 && $y eq $x;
+    print "ok 685\n";
+
+    x  =~ /(..)/; $y = $1;
+    print "not " unless length($y) == 2 && $y eq $x;
+    print "ok 686\n";
+}
+
+
+my $test = 687;
+
+# Force scalar context on the patern match
+sub ok ($$) {
+    my($ok, $name) = @_;
+
+    printf "%sok %d - %s\n", ($ok ? "" : "not "), $test, $name;
+
+    printf "# Failed test at line %d\n", (caller)[2] unless $ok;
+
+    $test++;
+    return $ok;
+}
+
+{
+    # Check that \x## works. 5.6.1 and 5.005_03 fail some of these.
+    $x = "\x4e" . "E";
+    ok ($x =~ /^\x4EE$/, "Check only 2 bytes of hex are matched.");
+
+    $x = "\x4e" . "i";
+    ok ($x =~ /^\x4Ei$/, "Check that invalid hex digit stops it (2)");
+
+    $x = "\x4" . "j";
+    ok ($x =~ /^\x4j$/,  "Check that invalid hex digit stops it (1)");
+
+    $x = "\x0" . "k";
+    ok ($x =~ /^\xk$/,   "Check that invalid hex digit stops it (0)");
+
+    $x = "\x0" . "x";
+    ok ($x =~ /^\xx$/, "\\xx isn't to be treated as \\0");
+
+    $x = "\x0" . "xa";
+    ok ($x =~ /^\xxa$/, "\\xxa isn't to be treated as \\xa");
+
+    $x = "\x9" . "_b";
+    ok ($x =~ /^\x9_b$/, "\\x9_b isn't to be treated as \\x9b");
+
+    print "# and now again in [] ranges\n";
+
+    $x = "\x4e" . "E";
+    ok ($x =~ /^[\x4EE]{2}$/, "Check only 2 bytes of hex are matched.");
+
+    $x = "\x4e" . "i";
+    ok ($x =~ /^[\x4Ei]{2}$/, "Check that invalid hex digit stops it (2)");
+
+    $x = "\x4" . "j";
+    ok ($x =~ /^[\x4j]{2}$/,  "Check that invalid hex digit stops it (1)");
+
+    $x = "\x0" . "k";
+    ok ($x =~ /^[\xk]{2}$/,   "Check that invalid hex digit stops it (0)");
+
+    $x = "\x0" . "x";
+    ok ($x =~ /^[\xx]{2}$/, "\\xx isn't to be treated as \\0");
+
+    $x = "\x0" . "xa";
+    ok ($x =~ /^[\xxa]{3}$/, "\\xxa isn't to be treated as \\xa");
+
+    $x = "\x9" . "_b";
+    ok ($x =~ /^[\x9_b]{3}$/, "\\x9_b isn't to be treated as \\x9b");
+
+}
+
+{
+    # Check that \x{##} works. 5.6.1 fails quite a few of these.
+
+    $x = "\x9b";
+    ok ($x =~ /^\x{9_b}$/, "\\x{9_b} is to be treated as \\x9b");
+
+    $x = "\x9b" . "y";
+    ok ($x =~ /^\x{9_b}y$/, "\\x{9_b} is to be treated as \\x9b (again)");
+
+    $x = "\x9b" . "y";
+    ok ($x =~ /^\x{9b_}y$/, "\\x{9b_} is to be treated as \\x9b");
+
+    $x = "\x9b" . "y";
+    ok ($x =~ /^\x{9_bq}y$/, "\\x{9_bc} is to be treated as \\x9b");
+
+    $x = "\x0" . "y";
+    ok ($x =~ /^\x{x9b}y$/, "\\x{x9b} is to be treated as \\x0");
+
+    $x = "\x0" . "y";
+    ok ($x =~ /^\x{0x9b}y$/, "\\x{0x9b} is to be treated as \\x0");
+
+    $x = "\x9b" . "y";
+    ok ($x =~ /^\x{09b}y$/, "\\x{09b} is to be treated as \\x9b");
+
+    print "# and now again in [] ranges\n";
+
+    $x = "\x9b";
+    ok ($x =~ /^[\x{9_b}]$/, "\\x{9_b} is to be treated as \\x9b");
+
+    $x = "\x9b" . "y";
+    ok ($x =~ /^[\x{9_b}y]{2}$/, "\\x{9_b} is to be treated as \\x9b (again)");
+
+    $x = "\x9b" . "y";
+    ok ($x =~ /^[\x{9b_}y]{2}$/, "\\x{9b_} is to be treated as \\x9b");
+
+    $x = "\x9b" . "y";
+    ok ($x =~ /^[\x{9_bq}y]{2}$/, "\\x{9_bc} is to be treated as \\x9b");
+
+    $x = "\x0" . "y";
+    ok ($x =~ /^[\x{x9b}y]{2}$/, "\\x{x9b} is to be treated as \\x0");
+
+    $x = "\x0" . "y";
+    ok ($x =~ /^[\x{0x9b}y]{2}$/, "\\x{0x9b} is to be treated as \\x0");
+
+    $x = "\x9b" . "y";
+    ok ($x =~ /^[\x{09b}y]{2}$/, "\\x{09b} is to be treated as \\x9b");
+}
+
+{
+  # high bit bug -- japhy
+  my $x = "ab\200d";
+  $x =~ /.*?\200/ or print "not ";
+  print "ok 715\n";
+}
+