Re: [PATCH] Re: [perl #39530] printf: bad formatting of hexadecimal conversion of...

[p5sagit/p5-mst-13.2.git] / t / op / pat.t
diff --git a/t/op/pat.t b/t/op/pat.t

index b257b47..0b5c1a5 100755 (executable)
--- a/t/op/pat.t
+++ b/t/op/pat.t
@@ -6,7 +6,7 @@
 
 $| = 1;
 
-print "1..1065\n";
+print "1..1208\n";
 
 BEGIN {
     chdir 't' if -d 't';
@@ -79,12 +79,21 @@ $XXX{345} = 345;
 while ($_ = shift(@XXX)) {
     ?(.*)? && (print $1,"\n");
     /not/ && reset;
-    /not ok 26/ && reset 'X';
+    if (/not ok 26/) {
+      if ($^O eq 'VMS') {
+       $_ = shift(@XXX);
+      }
+      else {
+       reset 'X';
+      }
+   }
 }
 
-while (($key,$val) = each(%XXX)) {
+if ($^O ne 'VMS') {
+  while (($key,$val) = each(%XXX)) {
     print "not ok 27\n";
     exit;
+  }
 }
 
 print "ok 27\n";
@@ -2984,8 +2993,8 @@ sub IsSyriac1 {
 END
 }
 
-print "\x{0712}" =~ /\p{IsSyriac1}/ ? "ok $test\n" : "not ok $test\n"; $test++;
-print "\x{072F}" =~ /\P{IsSyriac1}/ ? "ok $test\n" : "not ok $test\n"; $test++;
+ok("\x{0712}" =~ /\p{IsSyriac1}/, '\x{0712}, \p{IsSyriac1}');
+ok("\x{072F}" =~ /\P{IsSyriac1}/, '\x{072F}, \P{IsSyriac1}');
 
 sub Syriac1 {
     return <<'END';
@@ -2994,8 +3003,29 @@ sub Syriac1 {
 END
 }
 
-print "\x{0712}" =~ /\p{Syriac1}/ ? "ok $test\n" : "not ok $test\n"; $test++;
-print "\x{072F}" =~ /\P{Syriac1}/ ? "ok $test\n" : "not ok $test\n"; $test++;
+ok("\x{0712}" =~ /\p{Syriac1}/, '\x{0712}, \p{Syriac1}');
+ok("\x{072F}" =~ /\P{Syriac1}/, '\x{072F}, \p{Syriac1}');
+
+print "# user-defined character properties may lack \\n at the end\n";
+sub InGreekSmall   { return "03B1\t03C9" }
+sub InGreekCapital { return "0391\t03A9\n-03A2" }
+
+ok("\x{03C0}" =~ /\p{InGreekSmall}/,   "Small pi");
+ok("\x{03C2}" =~ /\p{InGreekSmall}/,   "Final sigma");
+ok("\x{03A0}" =~ /\p{InGreekCapital}/, "Capital PI");
+ok("\x{03A2}" =~ /\P{InGreekCapital}/, "Reserved");
+
+sub AsciiHexAndDash {
+    return <<'END';
++utf8::ASCII_Hex_Digit
++utf8::Dash
+END
+}
+
+ok("-" =~ /\p{Dash}/,            "'-' is Dash");
+ok("A" =~ /\p{ASCII_Hex_Digit}/, "'A' is ASCII_Hex_Digit");
+ok("-" =~ /\p{AsciiHexAndDash}/, "'-' is AsciiHexAndDash");
+ok("A" =~ /\p{AsciiHexAndDash}/, "'A' is AsciiHexAndDash");
 
 {
     print "# Change #18179\n";
@@ -3151,7 +3181,10 @@ ok("bbbbac" =~ /$pattern/ && $1 eq 'a', "[perl #3547]");
 }
 
 {
-    split /(?{ split "" })/, "abc";
+    # XXX DAPM 13-Apr-06. Recursive split is still broken. It's only luck it
+    # hasn't been crashing. Disable this test until it is fixed properly.
+    # XXX also check what it returns rather than just doing ok(1,...)
+    # split /(?{ split "" })/, "abc";
     ok(1,'cache_re & "(?{": it dumps core in 5.6.1 & 5.8.0');
 }
 
@@ -3285,4 +3318,206 @@ ok(("abc" =~ /^abc(\z)?/) && defined($1),
 ok(("abc" =~ /^abc(\z)??/) && !defined($1),
     'optional zero-width match at end of string');
 
-# last test 1065
+
+
+{ # TRIE related
+    my @got=();
+    "words"=~/(word|word|word)(?{push @got,$1})s$/;
+    ok(@got==1,"TRIE optimation is working") or warn "# @got";
+    @got=();
+    "words"=~/(word|word|word)(?{push @got,$1})s$/i;
+    ok(@got==1,"TRIEF optimisation is working") or warn "# @got";
+
+    my @nums=map {int rand 1000} 1..100;
+    my $re="(".(join "|",@nums).")";
+    $re=qr/\b$re\b/;
+
+    foreach (@nums) {
+        ok($_=~/$re/,"Trie nums");
+    }
+    $_=join " ", @nums;
+    @got=();
+    push @got,$1 while /$re/g;
+
+    my %count;
+    $count{$_}++ for @got;
+    my $ok=1;
+    for (@nums) {
+        $ok=0 if --$count{$_}<0;
+    }
+    ok($ok,"Trie min count matches");
+}
+
+
+# TRIE related
+# LATIN SMALL/CAPITAL LETTER A WITH MACRON
+ok(("foba  \x{101}foo" =~ qr/(foo|\x{100}foo|bar)/i) && $1 eq "\x{101}foo",
+   "TRIEF + LATIN SMALL/CAPITAL LETTER A WITH MACRON");
+
+# LATIN SMALL/CAPITAL LETTER A WITH RING BELOW
+ok(("foba  \x{1E01}foo" =~ qr/(foo|\x{1E00}foo|bar)/i) && $1 eq "\x{1E01}foo",
+   "TRIEF + LATIN SMALL/CAPITAL LETTER A WITH RING BELOW");
+
+# DESERET SMALL/CAPITAL LETTER LONG I
+ok(("foba  \x{10428}foo" =~ qr/(foo|\x{10400}foo|bar)/i) &&  $1 eq "\x{10428}foo",
+   "TRIEF + DESERET SMALL/CAPITAL LETTER LONG I");
+
+# LATIN SMALL/CAPITAL LETTER A WITH RING BELOW + 'X'
+ok(("foba  \x{1E01}xfoo" =~ qr/(foo|\x{1E00}Xfoo|bar)/i) &&  $1 eq "\x{1E01}xfoo",
+   "TRIEF + LATIN SMALL/CAPITAL LETTER A WITH RING BELOW + 'X'");
+
+{# TRIE related
+
+use charnames ':full';
+
+$s="\N{LATIN SMALL LETTER SHARP S}";
+ok(("foba  ba$s" =~ qr/(foo|Ba$s|bar)/i)
+    &&  $1 eq "ba$s",
+   "TRIEF + LATIN SMALL LETTER SHARP S =~ ss");
+ok(("foba  ba$s" =~ qr/(Ba$s|foo|bar)/i)
+    &&  $1 eq "ba$s",
+   "TRIEF + LATIN SMALL LETTER SHARP S =~ ss");
+ok(("foba  ba$s" =~ qr/(foo|bar|Ba$s)/i)
+    &&  $1 eq "ba$s",
+   "TRIEF + LATIN SMALL LETTER SHARP S =~ ss");
+
+ok(("foba  ba$s" =~ qr/(foo|Bass|bar)/i)
+    &&  $1 eq "ba$s",
+   "TRIEF + LATIN SMALL LETTER SHARP S =~ ss");
+
+ok(("foba  ba$s" =~ qr/(foo|BaSS|bar)/i)
+    &&  $1 eq "ba$s",
+   "TRIEF + LATIN SMALL LETTER SHARP S =~ SS");
+}
+
+
+
+{
+    my @normal=qw(these are some normal words);
+    my $psycho=join "|",@normal,map chr $_,255..20000;
+    ok(('these'=~/($psycho)/) && $1 eq 'these','Pyscho');
+}
+
+# [perl #36207] mixed utf8 / latin-1 and case folding
+
+{
+    my $utf8 = "\xe9\x{100}"; chop $utf8;
+    my $latin1 = "\xe9";
+
+    ok($utf8 =~ /\xe9/i, "utf8/latin");
+    ok($utf8 =~ /$latin1/i, "utf8/latin runtime");
+    ok($utf8 =~ /(abc|\xe9)/i, "utf8/latin trie");
+    ok($utf8 =~ /(abc|$latin1)/i, "utf8/latin trie runtime");
+
+    ok("\xe9" =~ /$utf8/i, "# TODO latin/utf8");
+    ok("\xe9" =~ /(abc|$utf8)/i, "# latin/utf8 trie");
+    ok($latin1 =~ /$utf8/i, "# TODO latin/utf8 runtime");
+    ok($latin1 =~ /(abc|$utf8)/i, "# latin/utf8 trie runtime");
+}
+
+# [perl #37038] Global regular matches generate invalid pointers
+
+{
+    my $s = "abcd";
+    $s =~ /(..)(..)/g;
+    $s = $1;
+    $s = $2;
+    ok($s eq 'cd',
+       "# assigning to original string should not corrupt match vars");
+}
+
+{
+    package wooosh;
+    sub gloople {
+      "!";
+    }
+    package main;
+    
+    my $aeek = bless {}, 'wooosh';
+    eval {$aeek->gloople() =~ /(.)/g;};
+    ok($@ eq "", "//g match against return value of sub") or print "# $@\n";
+}
+
+{
+    sub gloople {
+      "!";
+    }
+    eval {gloople() =~ /(.)/g;};
+    ok($@ eq "", "# 26410 didn't affect sub calls for some reason")
+       or print "# $@\n";
+}
+
+{
+    package lv;
+    $var = "abc";
+    sub variable : lvalue { $var }
+
+    package main;
+    my $o = bless [], "lv";
+    my $f = "";
+    eval { for (1..2) { $f .= $1 if $o->variable =~ /(.)/g } };
+    ok($f eq "ab", "pos retained between calls # TODO") or print "# $@\n";
+}
+
+{
+    $var = "abc";
+    sub variable : lvalue { $var }
+
+    my $f = "";
+    eval { for (1..2) { $f .= $1 if variable() =~ /(.)/g } };
+    ok($f eq "ab", "pos retained between calls # TODO") or print "# $@\n";
+}
+
+# [perl #37836] Simple Regex causes SEGV when run on specific data
+if ($ordA == 193) {
+    print "ok $test # Skip: in EBCDIC\n"; $test++;
+} else {
+    no warnings 'utf8';
+    $_ = pack('U0C2', 0xa2, 0xf8); # ill-formed UTF-8
+    my $ret = 0;
+    eval { $ret = s/[\0]+//g };
+    ok($ret == 0, "ill-formed UTF-8 doesn't match NUL in class");
+}
+
+{ # [perl #38293] chr(65535) should be allowed in regexes
+    no warnings 'utf8'; # to allow non-characters
+    my($c, $r, $s);
+
+    $c = chr 0xffff;
+    $c =~ s/$c//g;
+    ok($c eq "", "U+FFFF, parsed as atom");
+
+    $c = chr 0xffff;
+    $r = "\\$c";
+    $c =~ s/$r//g;
+    ok($c eq "", "U+FFFF backslashed, parsed as atom");
+
+    $c = chr 0xffff;
+    $c =~ s/[$c]//g;
+    ok($c eq "", "U+FFFF, parsed in class");
+
+    $c = chr 0xffff;
+    $r = "[\\$c]";
+    $c =~ s/$r//g;
+    ok($c eq "", "U+FFFF backslashed, parsed in class");
+
+    $s = "A\x{ffff}B";
+    $s =~ s/\x{ffff}//i;
+    ok($s eq "AB", "U+FFFF, EXACTF");
+
+    $s = "\x{ffff}A";
+    $s =~ s/\bA//;
+    ok($s eq "\x{ffff}", "U+FFFF, BOUND");
+
+    $s = "\x{ffff}!";
+    $s =~ s/\B!//;
+    ok($s eq "\x{ffff}", "U+FFFF, NBOUND");
+} # non-characters end
+
+
+# Keep the following test last -- it may crash perl
+
+ok(("a" x (2**15 - 10)) =~ /^()(a|bb)*$/, "Recursive stack cracker: #24274")
+    or print "# Unexpected outcome: should pass or crash perl\n";
+
+# last test 1200