X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=t%2Fop%2Fpat.t;h=62520dda6bf79ec9518e0825734e2ae107d5bd50;hb=f14c76ed18fcf3fc609cea29294703220581a43a;hp=5681d6a02ba9c0bc33a40e03519a086a4afbb071;hpb=11ef8fddd64f78304dc923b07dffddd7a4f28074;p=p5sagit%2Fp5-mst-13.2.git diff --git a/t/op/pat.t b/t/op/pat.t index 5681d6a..62520dd 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -6,7 +6,7 @@ $| = 1; -print "1..910\n"; +print "1..968\n"; BEGIN { chdir 't' if -d 't'; @@ -2884,3 +2884,175 @@ EOF print "d" =~ /\p{InConsonant}/ ? "ok $test\n" : "not ok $test\n"; $test++; print "e" =~ /\P{InConsonant}/ ? "ok $test\n" : "not ok $test\n"; $test++; +{ + print "# [ID 20020630.002] utf8 regex only matches 32k\n"; + $test = 911; + for ([ 'byte', "\x{ff}" ], [ 'utf8', "\x{1ff}" ]) { + my($type, $char) = @$_; + for my $len (32000, 32768, 33000) { + my $s = $char . "f" x $len; + my $r = $s =~ /$char([f]*)/gc; + print $r ? "ok $test\n" : "not ok $test\t# <$type x $len> fail\n"; + ++$test; + print +(!$r or pos($s) == $len + 1) ? "ok $test\n" + : "not ok $test\t# <$type x $len> pos @{[ pos($s) ]}\n"; + ++$test; + } + } +} + +$test = 923; + +$a = bless qr/foo/, 'Foo'; +print(('goodfood' =~ $a ? '' : 'not '), + "ok $test\t# reblessed qr// matches\n"); +++$test; + +print(($a eq '(?-xism:foo)' ? '' : 'not '), + "ok $test\t# reblessed qr// stringizes\n"); +++$test; + +$x = "\x{3fe}"; +$z=$y = "\317\276"; # $y is byte representation of $x + +$a = qr/$x/; +print(($x =~ $a ? '' : 'not '), "ok $test - utf8 interpolation in qr//\n"); +++$test; + +print(("a$a" =~ $x ? '' : 'not '), + "ok $test - stringifed qr// preserves utf8\n"); +++$test; + +print(("a$x" =~ /^a$a\z/ ? '' : 'not '), + "ok $test - interpolated qr// preserves utf8\n"); +++$test; + +print(("a$x" =~ /^a(??{$a})\z/ ? '' : 'not '), + "ok $test - postponed interpolation of qr// preserves utf8\n"); +++$test; + +print((length(qr/##/x) == 12 ? '' : 'not '), + "ok $test - ## in qr// doesn't corrupt memory [perl #17776]\n"); +++$test; + +{ use re 'eval'; + +print(("$x$x" =~ /^$x(??{$x})\z/ ? '' : 'not '), + "ok $test - postponed utf8 string in utf8 re matches utf8\n"); +++$test; + +print(("$y$x" =~ /^$y(??{$x})\z/ ? '' : 'not '), + "ok $test - postponed utf8 string in non-utf8 re matches utf8\n"); +++$test; + +print(("$y$x" !~ /^$y(??{$y})\z/ ? '' : 'not '), + "ok $test - postponed non-utf8 string in non-utf8 re doesn't match utf8\n"); +++$test; + +print(("$x$x" !~ /^$x(??{$y})\z/ ? '' : 'not '), + "ok $test - postponed non-utf8 string in utf8 re doesn't match utf8\n"); +++$test; + +print(("$y$y" =~ /^$y(??{$y})\z/ ? '' : 'not '), + "ok $test - postponed non-utf8 string in non-utf8 re matches non-utf8\n"); +++$test; + +print(("$x$y" =~ /^$x(??{$y})\z/ ? '' : 'not '), + "ok $test - postponed non-utf8 string in utf8 re matches non-utf8\n"); +++$test; +$y = $z; # reset $y after upgrade + +print(("$x$y" !~ /^$x(??{$x})\z/ ? '' : 'not '), + "ok $test - postponed utf8 string in utf8 re doesn't match non-utf8\n"); +++$test; +$y = $z; # reset $y after upgrade + +print(("$y$y" !~ /^$y(??{$x})\z/ ? '' : 'not '), + "ok $test - postponed utf8 string in non-utf8 re doesn't match non-utf8\n"); +++$test; + +} # no re 'eval' + +print "# more user-defined character properties\n"; + +sub IsSyriac1 { + return <<'END'; +0712 072C +0730 074A +END +} + +print "\x{0712}" =~ /\p{IsSyriac1}/ ? "ok $test\n" : "not ok $test\n"; $test++; +print "\x{072F}" =~ /\P{IsSyriac1}/ ? "ok $test\n" : "not ok $test\n"; $test++; + +sub Syriac1 { + return <<'END'; +0712 072C +0730 074A +END +} + +print "\x{0712}" =~ /\p{Syriac1}/ ? "ok $test\n" : "not ok $test\n"; $test++; +print "\x{072F}" =~ /\P{Syriac1}/ ? "ok $test\n" : "not ok $test\n"; $test++; + +{ + print "# Change #18179\n"; + # previously failed with "panic: end_shift + my $s = "\x{100}" x 5; + my $ok = $s =~ /(\x{100}{4})/; + my($ord, $len) = (ord $1, length $1); + print +($ok && $ord == 0x100 && $len == 4) + ? "ok $test\n" : "not ok $test\t# $ok/$ord/$len\n"; + ++$test; +} + +{ + print "# [perl #15763]\n"; + + $a = "x\x{100}"; + chop $a; # but leaves the UTF-8 flag + $a .= "y"; # 1 byte before "y" + + ok($a =~ /^\C/, 'match one \C on 1-byte UTF-8'); + ok($a =~ /^\C{1}/, 'match \C{1}'); + + ok($a =~ /^\Cy/, 'match \Cy'); + ok($a =~ /^\C{1}y/, 'match \C{1}y'); + + $a = "\x{100}y"; # 2 bytes before "y" + + ok($a =~ /^\C/, 'match one \C on 2-byte UTF-8'); + ok($a =~ /^\C{1}/, 'match \C{1}'); + ok($a =~ /^\C\C/, 'match two \C'); + ok($a =~ /^\C{2}/, 'match \C{2}'); + + ok($a =~ /^\C\C\C/, 'match three \C on 2-byte UTF-8 and a byte'); + ok($a =~ /^\C{3}/, 'match \C{3}'); + + ok($a =~ /^\C\Cy/, 'match two \C'); + ok($a =~ /^\C{2}y/, 'match \C{2}'); + + ok($a !~ /^\C\C\Cy/, 'not match three \Cy'); + ok($a !~ /^\C{2}\Cy/, 'not match \C{3}y'); + + $a = "\x{1000}y"; # 3 bytes before "y" + + ok($a =~ /^\C/, 'match one \C on three-byte UTF-8'); + ok($a =~ /^\C{1}/, 'match \C{1}'); + ok($a =~ /^\C\C/, 'match two \C'); + ok($a =~ /^\C{2}/, 'match \C{2}'); + ok($a =~ /^\C\C\C/, 'match three \C'); + ok($a =~ /^\C{3}/, 'match \C{3}'); + + ok($a =~ /^\C\C\C\C/, 'match four \C on three-byte UTF-8 and a byte'); + ok($a =~ /^\C{4}/, 'match \C{4}'); + + ok($a =~ /^\C\C\Cy/, 'match three \Cy'); + ok($a =~ /^\C{3}y/, 'match \C{3}y'); + + ok($a !~ /^\C\C\C\C\y/, 'not match four \Cy'); + ok($a !~ /^\C{4}y/, 'not match \C{4}y'); +} + +# last test 968 +