X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=t%2Fop%2Fpat.t;h=62520dda6bf79ec9518e0825734e2ae107d5bd50;hb=f14c76ed18fcf3fc609cea29294703220581a43a;hp=5681d6a02ba9c0bc33a40e03519a086a4afbb071;hpb=11ef8fddd64f78304dc923b07dffddd7a4f28074;p=p5sagit%2Fp5-mst-13.2.git

diff --git a/t/op/pat.t b/t/op/pat.t
index 5681d6a..62520dd 100755
--- a/t/op/pat.t
+++ b/t/op/pat.t
@@ -6,7 +6,7 @@
 
 $| = 1;
 
-print "1..910\n";
+print "1..968\n";
 
 BEGIN {
     chdir 't' if -d 't';
@@ -2884,3 +2884,175 @@ EOF
 print "d" =~ /\p{InConsonant}/ ? "ok $test\n" : "not ok $test\n"; $test++;
 print "e" =~ /\P{InConsonant}/ ? "ok $test\n" : "not ok $test\n"; $test++;
 
+{
+    print "# [ID 20020630.002] utf8 regex only matches 32k\n";
+    $test = 911;
+    for ([ 'byte', "\x{ff}" ], [ 'utf8', "\x{1ff}" ]) {
+	my($type, $char) = @$_;
+	for my $len (32000, 32768, 33000) {
+	    my $s = $char . "f" x $len;
+	    my $r = $s =~ /$char([f]*)/gc;
+	    print $r ? "ok $test\n" : "not ok $test\t# <$type x $len> fail\n";
+	    ++$test;
+	    print +(!$r or pos($s) == $len + 1) ? "ok $test\n"
+		: "not ok $test\t# <$type x $len> pos @{[ pos($s) ]}\n";
+	    ++$test;
+	}
+    }
+}
+
+$test = 923;
+
+$a = bless qr/foo/, 'Foo';
+print(('goodfood' =~ $a ? '' : 'not '),
+	"ok $test\t# reblessed qr// matches\n");
+++$test;
+
+print(($a eq '(?-xism:foo)' ? '' : 'not '),
+	"ok $test\t# reblessed qr// stringizes\n");
+++$test;
+
+$x = "\x{3fe}";
+$z=$y = "\317\276"; # $y is byte representation of $x
+
+$a = qr/$x/;
+print(($x =~ $a ? '' : 'not '), "ok $test - utf8 interpolation in qr//\n");
+++$test;
+
+print(("a$a" =~ $x ? '' : 'not '),
+      "ok $test - stringifed qr// preserves utf8\n");
+++$test;
+
+print(("a$x" =~ /^a$a\z/ ? '' : 'not '),
+      "ok $test - interpolated qr// preserves utf8\n");
+++$test;
+
+print(("a$x" =~ /^a(??{$a})\z/ ? '' : 'not '),
+      "ok $test - postponed interpolation of qr// preserves utf8\n");
+++$test;
+
+print((length(qr/##/x) == 12 ? '' : 'not '),
+      "ok $test - ## in qr// doesn't corrupt memory [perl #17776]\n");
+++$test;
+
+{ use re 'eval';
+
+print(("$x$x" =~ /^$x(??{$x})\z/ ? '' : 'not '),
+      "ok $test - postponed utf8 string in utf8 re matches utf8\n");
+++$test;
+
+print(("$y$x" =~ /^$y(??{$x})\z/ ? '' : 'not '),
+      "ok $test - postponed utf8 string in non-utf8 re matches utf8\n");
+++$test;
+
+print(("$y$x" !~ /^$y(??{$y})\z/ ? '' : 'not '),
+      "ok $test - postponed non-utf8 string in non-utf8 re doesn't match utf8\n");
+++$test;
+
+print(("$x$x" !~ /^$x(??{$y})\z/ ? '' : 'not '),
+      "ok $test - postponed non-utf8 string in utf8 re doesn't match utf8\n");
+++$test;
+
+print(("$y$y" =~ /^$y(??{$y})\z/ ? '' : 'not '),
+      "ok $test - postponed non-utf8 string in non-utf8 re matches non-utf8\n");
+++$test;
+
+print(("$x$y" =~ /^$x(??{$y})\z/ ? '' : 'not '),
+      "ok $test - postponed non-utf8 string in utf8 re matches non-utf8\n");
+++$test;
+$y = $z; # reset $y after upgrade
+
+print(("$x$y" !~ /^$x(??{$x})\z/ ? '' : 'not '),
+      "ok $test - postponed utf8 string in utf8 re doesn't match non-utf8\n");
+++$test;
+$y = $z; # reset $y after upgrade
+
+print(("$y$y" !~ /^$y(??{$x})\z/ ? '' : 'not '),
+      "ok $test - postponed utf8 string in non-utf8 re doesn't match non-utf8\n");
+++$test;
+
+} # no re 'eval'
+
+print "# more user-defined character properties\n";
+
+sub IsSyriac1 {
+    return <<'END';
+0712	072C
+0730	074A
+END
+}
+
+print "\x{0712}" =~ /\p{IsSyriac1}/ ? "ok $test\n" : "not ok $test\n"; $test++;
+print "\x{072F}" =~ /\P{IsSyriac1}/ ? "ok $test\n" : "not ok $test\n"; $test++;
+
+sub Syriac1 {
+    return <<'END';
+0712	072C
+0730	074A
+END
+}
+
+print "\x{0712}" =~ /\p{Syriac1}/ ? "ok $test\n" : "not ok $test\n"; $test++;
+print "\x{072F}" =~ /\P{Syriac1}/ ? "ok $test\n" : "not ok $test\n"; $test++;
+
+{
+    print "# Change #18179\n";
+    # previously failed with "panic: end_shift
+    my $s = "\x{100}" x 5;
+    my $ok = $s =~ /(\x{100}{4})/;
+    my($ord, $len) = (ord $1, length $1);
+    print +($ok && $ord == 0x100 && $len == 4)
+	    ? "ok $test\n" : "not ok $test\t# $ok/$ord/$len\n";
+    ++$test;
+}
+
+{
+    print "# [perl #15763]\n";
+
+    $a = "x\x{100}";
+    chop $a; # but leaves the UTF-8 flag
+    $a .= "y"; # 1 byte before "y"
+
+    ok($a =~ /^\C/,      'match one \C on 1-byte UTF-8');
+    ok($a =~ /^\C{1}/,   'match \C{1}');
+
+    ok($a =~ /^\Cy/,      'match \Cy');
+    ok($a =~ /^\C{1}y/,   'match \C{1}y');
+
+    $a = "\x{100}y"; # 2 bytes before "y"
+
+    ok($a =~ /^\C/,       'match one \C on 2-byte UTF-8');
+    ok($a =~ /^\C{1}/,    'match \C{1}');
+    ok($a =~ /^\C\C/,     'match two \C');
+    ok($a =~ /^\C{2}/,    'match \C{2}');
+
+    ok($a =~ /^\C\C\C/,    'match three \C on 2-byte UTF-8 and a byte');
+    ok($a =~ /^\C{3}/,     'match \C{3}');
+
+    ok($a =~ /^\C\Cy/,     'match two \C');
+    ok($a =~ /^\C{2}y/,    'match \C{2}');
+
+    ok($a !~ /^\C\C\Cy/,    'not match three \Cy');
+    ok($a !~ /^\C{2}\Cy/,   'not match \C{3}y');
+
+    $a = "\x{1000}y"; # 3 bytes before "y"
+
+    ok($a =~ /^\C/,         'match one \C on three-byte UTF-8');
+    ok($a =~ /^\C{1}/,      'match \C{1}');
+    ok($a =~ /^\C\C/,       'match two \C');
+    ok($a =~ /^\C{2}/,      'match \C{2}');
+    ok($a =~ /^\C\C\C/,     'match three \C');
+    ok($a =~ /^\C{3}/,      'match \C{3}');
+
+    ok($a =~ /^\C\C\C\C/,   'match four \C on three-byte UTF-8 and a byte');
+    ok($a =~ /^\C{4}/,      'match \C{4}');
+
+    ok($a =~ /^\C\C\Cy/,    'match three \Cy');
+    ok($a =~ /^\C{3}y/,     'match \C{3}y');
+
+    ok($a !~ /^\C\C\C\C\y/, 'not match four \Cy');
+    ok($a !~ /^\C{4}y/,     'not match \C{4}y');
+}
+
+# last test 968
+