X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=t%2Fop%2Fpat.t;h=62520dda6bf79ec9518e0825734e2ae107d5bd50;hb=f14c76ed18fcf3fc609cea29294703220581a43a;hp=d9e8c3d43d23142f37b6d3d5cc55ae4df095f7d8;hpb=abd0b5231bd7a22bf5db95c3cd70e965d9b5f4b3;p=p5sagit%2Fp5-mst-13.2.git diff --git a/t/op/pat.t b/t/op/pat.t index d9e8c3d..62520dd 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -6,7 +6,7 @@ $| = 1; -print "1..854\n"; +print "1..968\n"; BEGIN { chdir 't' if -d 't'; @@ -1422,16 +1422,21 @@ print "ok 247\n"; print "ok $test\n"; $test++; } print "# IsASCII\n"; - if ($code le '00007f') { - print "not " unless $char =~ /\p{IsASCII}/; - print "ok $test\n"; $test++; - print "not " if $char =~ /\P{IsASCII}/; - print "ok $test\n"; $test++; + if (ord("A") == 193) { + print "ok $test # Skip: in EBCDIC\n"; $test++; + print "ok $test # Skip: in EBCDIC\n"; $test++; } else { - print "not " if $char =~ /\p{IsASCII}/; - print "ok $test\n"; $test++; - print "not " unless $char =~ /\P{IsASCII}/; - print "ok $test\n"; $test++; + if ($code le '00007f') { + print "not " unless $char =~ /\p{IsASCII}/; + print "ok $test\n"; $test++; + print "not " if $char =~ /\P{IsASCII}/; + print "ok $test\n"; $test++; + } else { + print "not " if $char =~ /\p{IsASCII}/; + print "ok $test\n"; $test++; + print "not " unless $char =~ /\P{IsASCII}/; + print "ok $test\n"; $test++; + } } print "# IsCntrl\n"; if ($class =~ /^C/) { @@ -1892,56 +1897,62 @@ $T="ok 660\n";if ($x =~ /(a([abcdefg]+)(?{$y=$^N})de)(?{$z=$^N})/ and $y eq "bc" # Test the Unicode script classes -print "not " unless chr(0x100) =~ /\p{InLatin}/; # outside Latin-1 +print "not " unless chr(0x100) =~ /\p{IsLatin}/; # outside Latin-1 print "ok 661\n"; -print "not " unless chr(0x212b) =~ /\p{InLatin}/; # Angstrom sign, very outside +print "not " unless chr(0x212b) =~ /\p{IsLatin}/; # Angstrom sign, very outside print "ok 662\n"; -print "not " unless chr(0x5d0) =~ /\p{InHebrew}/; # inside HebrewBlock +print "not " unless chr(0x5d0) =~ /\p{IsHebrew}/; # inside InHebrew print "ok 663\n"; -print "not " unless chr(0xfb4f) =~ /\p{InHebrew}/; # outside HebrewBlock +print "not " unless chr(0xfb4f) =~ /\p{IsHebrew}/; # outside InHebrew print "ok 664\n"; -print "not " unless chr(0xb5) =~ /\p{InGreek}/; # singleton (not in a range) +print "not " unless chr(0xb5) =~ /\p{IsGreek}/; # singleton (not in a range) print "ok 665\n"; -print "not " unless chr(0x37a) =~ /\p{InGreek}/; # singleton +print "not " unless chr(0x37a) =~ /\p{IsGreek}/; # singleton print "ok 666\n"; -print "not " unless chr(0x386) =~ /\p{InGreek}/; # singleton +print "not " unless chr(0x386) =~ /\p{IsGreek}/; # singleton print "ok 667\n"; -print "not " unless chr(0x387) =~ /\P{InGreek}/; # not there +print "not " unless chr(0x387) =~ /\P{IsGreek}/; # not there print "ok 668\n"; -print "not " unless chr(0x388) =~ /\p{InGreek}/; # range +print "not " unless chr(0x388) =~ /\p{IsGreek}/; # range print "ok 669\n"; -print "not " unless chr(0x38a) =~ /\p{InGreek}/; # range +print "not " unless chr(0x38a) =~ /\p{IsGreek}/; # range print "ok 670\n"; -print "not " unless chr(0x38b) =~ /\P{InGreek}/; # not there +print "not " unless chr(0x38b) =~ /\P{IsGreek}/; # not there print "ok 671\n"; -print "not " unless chr(0x38c) =~ /\p{InGreek}/; # singleton +print "not " unless chr(0x38c) =~ /\p{IsGreek}/; # singleton print "ok 672\n"; +if (ord("A") == 65) { ## ## Test [:cntrl:]... ## ## Should probably put in tests for all the POSIX stuff, but not sure how to ## guarantee a specific locale...... ## -$AllBytes = join('', map { chr($_) } 0..255); -($x = $AllBytes) =~ s/[[:cntrl:]]//g; -if ($x ne join('', map { chr($_) } 0x20..0x7E, 0x80..0xFF)) { print "not " }; -print "ok 673\n"; + $AllBytes = join('', map { chr($_) } 0..255); + ($x = $AllBytes) =~ s/[[:cntrl:]]//g; + if ($x ne join('', map { chr($_) } 0x20..0x7E, 0x80..0xFF)) { + print "not "; + } + print "ok 673\n"; -($x = $AllBytes) =~ s/[^[:cntrl:]]//g; -if ($x ne join('', map { chr($_) } 0..0x1F, 0x7F)) { print "not " }; -print "ok 674\n"; + ($x = $AllBytes) =~ s/[^[:cntrl:]]//g; + if ($x ne join('', map { chr($_) } 0..0x1F, 0x7F)) { print "not " } + print "ok 674\n"; +} else { + print "ok $_ # Skip: EBCDIC\n" for 673..674; +} # With /s modifier UTF8 chars were interpreted as bytes { @@ -2232,18 +2243,15 @@ print "# some Unicode properties\n"; } { - print "not " unless "a" =~ /\p{LowercaseLetter}/; + print "not " unless "a" =~ /\p{Lowercase Letter}/; print "ok 745\n"; - print "not " if "A" =~ /\p{ - Lowercase - Letter - }/x; + print "not " if "A" =~ /\p{lowercaseletter}/; print "ok 746\n"; } { - print "not " unless "\x{AC00}" =~ /\p{HangulSyllable}/; + print "not " unless "\x{AC00}" =~ /\p{HangulSyllables}/; print "ok 747\n"; } @@ -2282,7 +2290,7 @@ print "# some Unicode properties\n"; print "not " unless "a\x{100}" =~ /A/i; print "ok 754\n"; - print "not " unless "A\x{100}" =~ /A/i; + print "not " unless "A\x{100}" =~ /a/i; print "ok 755\n"; print "not " unless "a\x{100}" =~ /a/i; @@ -2306,7 +2314,7 @@ print "# some Unicode properties\n"; print "not " unless "a\x{100}" =~ /A\x{100}/i; print "ok 762\n"; - print "not " unless "A\x{100}" =~ /A\x{100}/i; + print "not " unless "A\x{100}" =~ /a\x{100}/i; print "ok 763\n"; print "not " unless "a\x{100}" =~ /a\x{100}/i; @@ -2318,7 +2326,7 @@ print "# some Unicode properties\n"; print "not " unless "a\x{100}" =~ /[A]/i; print "ok 766\n"; - print "not " unless "A\x{100}" =~ /[A]/i; + print "not " unless "A\x{100}" =~ /[a]/i; print "ok 767\n"; print "not " unless "a\x{100}" =~ /[a]/i; @@ -2665,3 +2673,386 @@ print "# some Unicode properties\n"; print "\x{400}AB" =~ /(?<=\x{400}.)B/ ? "ok 853\n" : "not ok 853\n"; print "\x{500\x{600}}B" =~ /(?<=\x{500}.)B/ ? "ok 854\n" : "not ok 854\n"; } + +{ + print "# UTF-8 hash keys and /\$/\n"; + # http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters/2002-01/msg01327.html + + my $u = "a\x{100}"; + my $v = substr($u,0,1); + my $w = substr($u,1,1); + my %u = ( $u => $u, $v => $v, $w => $w ); + my $i = 855; + for (keys %u) { + my $m1 = /^\w*$/ ? 1 : 0; + my $m2 = $u{$_}=~/^\w*$/ ? 1 : 0; + print $m1 == $m2 ? "ok $i\n" : "not ok $i # $m1 $m2\n"; + $i++; + } +} + +{ + print "# [ID 20020124.005]\n"; + # Fixed by #14795. + my $i = 858; + for my $char ("a", "\x{df}", "\x{100}"){ + $x = "$char b $char"; + $x =~ s{($char)}{ + "c" =~ /c/; + "x"; + }ge; + print substr($x,0,1) eq substr($x,-1,1) ? + "ok $i\n" : "not ok $i # debug: $x\n"; + $i++; + } +} + +{ + print "# SEGV in s/// and UTF-8\n"; + $s = "s#\x{100}" x 4; + $s =~ s/[^\w]/ /g; + print $s eq "s \x{100}" x 4 ? "ok 861\n" : "not ok 861\n"; +} + +{ + print "# UTF-8 bug (maybe alreayd known?)\n"; + my $u; + + $u = "foo"; + $u =~ s/./\x{100}/g; + print $u eq "\x{100}\x{100}\x{100}" ? "ok 862\n" : "not ok 862\n"; + + $u = "foobar"; + $u =~ s/[ao]/\x{100}/g; + print $u eq "f\x{100}\x{100}b\x{100}r" ? "ok 863\n" : "not ok 863\n"; + + $u =~ s/\x{100}/e/g; + print $u eq "feeber" ? "ok 864\n" : "not ok 864\n"; +} + +{ + print "# UTF-8 bug with s///\n"; + # check utf8/non-utf8 mixtures + # try to force all float/anchored check combinations + my $c = "\x{100}"; + my $test = 865; + my $subst; + for my $re ( + "xx.*$c", "x.*$c$c", "$c.*xx", "$c$c.*x", "xx.*(?=$c)", "(?=$c).*xx", + ) { + print "xxx" =~ /$re/ ? "not ok $test\n" : "ok $test\n"; + ++$test; + print +($subst = "xxx") =~ s/$re// ? "not ok $test\n" : "ok $test\n"; + ++$test; + } + for my $re ("xx.*$c*", "$c*.*xx") { + print "xxx" =~ /$re/ ? "ok $test\n" : "not ok $test\n"; + ++$test; + ($subst = "xxx") =~ s/$re//; + print $subst eq '' ? "ok $test\n" : "not ok $test\t# $subst\n"; + ++$test; + } + for my $re ("xxy*", "y*xx") { + print "xx$c" =~ /$re/ ? "ok $test\n" : "not ok $test\n"; + ++$test; + ($subst = "xx$c") =~ s/$re//; + print $subst eq $c ? "ok $test\n" : "not ok $test\n"; + ++$test; + print "xy$c" =~ /$re/ ? "not ok $test\n" : "ok $test\n"; + ++$test; + print +($subst = "xy$c") =~ /$re/ ? "not ok $test\n" : "ok $test\n"; + ++$test; + } + for my $re ("xy$c*z", "x$c*yz") { + print "xyz" =~ /$re/ ? "ok $test\n" : "not ok $test\n"; + ++$test; + ($subst = "xyz") =~ s/$re//; + print $subst eq '' ? "ok $test\n" : "not ok $test\n"; + ++$test; + } +} + +{ + print "# qr/.../x\n"; + my $test = 893; + + my $R = qr/ A B C # D E/x; + + print eval {"ABCDE" =~ $R} ? "ok $test\n" : "not ok $test\n"; + $test++; + + print eval {"ABCDE" =~ m/$R/} ? "ok $test\n" : "not ok $test\n"; + $test++; + + print eval {"ABCDE" =~ m/($R)/} ? "ok $test\n" : "not ok $test\n"; + $test++; +} + +{ + print "# illegal Unicode properties\n"; + my $test = 896; + + print eval { "a" =~ /\pq / } ? "not ok $test\n" : "ok $test\n"; + $test++; + + print eval { "a" =~ /\p{qrst} / } ? "not ok $test\n" : "ok $test\n"; + $test++; +} + +{ + print "# [ID 20020412.005] wrong pmop flags checked when empty pattern\n"; + # requires reuse of last successful pattern + my $test = 898; + $test =~ /\d/; + for (0 .. 1) { + my $match = ?? + 0; + if ($match != $_) { + print "ok $test\n"; + } else { + printf "not ok %s\t# 'match once' %s on %s iteration\n", $test, + $match ? 'succeeded' : 'failed', $_ ? 'second' : 'first'; + } + ++$test; + } + $test =~ /(\d)/; + my $result = join '', $test =~ //g; + if ($result eq $test) { + print "ok $test\n"; + } else { + printf "not ok %s\t# expected '%s', got '%s'\n", $test, $test, $result; + } + ++$test; +} + +print "# user-defined character properties\n"; + +sub InKana1 { + return <<'END'; +3040 309F +30A0 30FF +END +} + +sub InKana2 { + return <<'END'; ++utf8::InHiragana ++utf8::InKatakana +END +} + +sub InKana3 { + return <<'END'; ++utf8::InHiragana ++utf8::InKatakana +-utf8::IsCn +END +} + +sub InNotKana { + return <<'END'; +!utf8::InHiragana +-utf8::InKatakana ++utf8::IsCn +END +} + +$test = 901; + +print "\x{3040}" =~ /\p{InKana1}/ ? "ok $test\n" : "not ok $test\n"; $test++; +print "\x{303F}" =~ /\P{InKana1}/ ? "ok $test\n" : "not ok $test\n"; $test++; + +print "\x{3040}" =~ /\p{InKana2}/ ? "ok $test\n" : "not ok $test\n"; $test++; +print "\x{303F}" =~ /\P{InKana2}/ ? "ok $test\n" : "not ok $test\n"; $test++; + +print "\x{3041}" =~ /\p{InKana3}/ ? "ok $test\n" : "not ok $test\n"; $test++; +print "\x{3040}" =~ /\P{InKana3}/ ? "ok $test\n" : "not ok $test\n"; $test++; + +print "\x{3040}" =~ /\p{InNotKana}/ ? "ok $test\n" : "not ok $test\n"; $test++; +print "\x{3041}" =~ /\P{InNotKana}/ ? "ok $test\n" : "not ok $test\n"; $test++; + +sub InConsonant { # Not EBCDIC-aware. + return < fail\n"; + ++$test; + print +(!$r or pos($s) == $len + 1) ? "ok $test\n" + : "not ok $test\t# <$type x $len> pos @{[ pos($s) ]}\n"; + ++$test; + } + } +} + +$test = 923; + +$a = bless qr/foo/, 'Foo'; +print(('goodfood' =~ $a ? '' : 'not '), + "ok $test\t# reblessed qr// matches\n"); +++$test; + +print(($a eq '(?-xism:foo)' ? '' : 'not '), + "ok $test\t# reblessed qr// stringizes\n"); +++$test; + +$x = "\x{3fe}"; +$z=$y = "\317\276"; # $y is byte representation of $x + +$a = qr/$x/; +print(($x =~ $a ? '' : 'not '), "ok $test - utf8 interpolation in qr//\n"); +++$test; + +print(("a$a" =~ $x ? '' : 'not '), + "ok $test - stringifed qr// preserves utf8\n"); +++$test; + +print(("a$x" =~ /^a$a\z/ ? '' : 'not '), + "ok $test - interpolated qr// preserves utf8\n"); +++$test; + +print(("a$x" =~ /^a(??{$a})\z/ ? '' : 'not '), + "ok $test - postponed interpolation of qr// preserves utf8\n"); +++$test; + +print((length(qr/##/x) == 12 ? '' : 'not '), + "ok $test - ## in qr// doesn't corrupt memory [perl #17776]\n"); +++$test; + +{ use re 'eval'; + +print(("$x$x" =~ /^$x(??{$x})\z/ ? '' : 'not '), + "ok $test - postponed utf8 string in utf8 re matches utf8\n"); +++$test; + +print(("$y$x" =~ /^$y(??{$x})\z/ ? '' : 'not '), + "ok $test - postponed utf8 string in non-utf8 re matches utf8\n"); +++$test; + +print(("$y$x" !~ /^$y(??{$y})\z/ ? '' : 'not '), + "ok $test - postponed non-utf8 string in non-utf8 re doesn't match utf8\n"); +++$test; + +print(("$x$x" !~ /^$x(??{$y})\z/ ? '' : 'not '), + "ok $test - postponed non-utf8 string in utf8 re doesn't match utf8\n"); +++$test; + +print(("$y$y" =~ /^$y(??{$y})\z/ ? '' : 'not '), + "ok $test - postponed non-utf8 string in non-utf8 re matches non-utf8\n"); +++$test; + +print(("$x$y" =~ /^$x(??{$y})\z/ ? '' : 'not '), + "ok $test - postponed non-utf8 string in utf8 re matches non-utf8\n"); +++$test; +$y = $z; # reset $y after upgrade + +print(("$x$y" !~ /^$x(??{$x})\z/ ? '' : 'not '), + "ok $test - postponed utf8 string in utf8 re doesn't match non-utf8\n"); +++$test; +$y = $z; # reset $y after upgrade + +print(("$y$y" !~ /^$y(??{$x})\z/ ? '' : 'not '), + "ok $test - postponed utf8 string in non-utf8 re doesn't match non-utf8\n"); +++$test; + +} # no re 'eval' + +print "# more user-defined character properties\n"; + +sub IsSyriac1 { + return <<'END'; +0712 072C +0730 074A +END +} + +print "\x{0712}" =~ /\p{IsSyriac1}/ ? "ok $test\n" : "not ok $test\n"; $test++; +print "\x{072F}" =~ /\P{IsSyriac1}/ ? "ok $test\n" : "not ok $test\n"; $test++; + +sub Syriac1 { + return <<'END'; +0712 072C +0730 074A +END +} + +print "\x{0712}" =~ /\p{Syriac1}/ ? "ok $test\n" : "not ok $test\n"; $test++; +print "\x{072F}" =~ /\P{Syriac1}/ ? "ok $test\n" : "not ok $test\n"; $test++; + +{ + print "# Change #18179\n"; + # previously failed with "panic: end_shift + my $s = "\x{100}" x 5; + my $ok = $s =~ /(\x{100}{4})/; + my($ord, $len) = (ord $1, length $1); + print +($ok && $ord == 0x100 && $len == 4) + ? "ok $test\n" : "not ok $test\t# $ok/$ord/$len\n"; + ++$test; +} + +{ + print "# [perl #15763]\n"; + + $a = "x\x{100}"; + chop $a; # but leaves the UTF-8 flag + $a .= "y"; # 1 byte before "y" + + ok($a =~ /^\C/, 'match one \C on 1-byte UTF-8'); + ok($a =~ /^\C{1}/, 'match \C{1}'); + + ok($a =~ /^\Cy/, 'match \Cy'); + ok($a =~ /^\C{1}y/, 'match \C{1}y'); + + $a = "\x{100}y"; # 2 bytes before "y" + + ok($a =~ /^\C/, 'match one \C on 2-byte UTF-8'); + ok($a =~ /^\C{1}/, 'match \C{1}'); + ok($a =~ /^\C\C/, 'match two \C'); + ok($a =~ /^\C{2}/, 'match \C{2}'); + + ok($a =~ /^\C\C\C/, 'match three \C on 2-byte UTF-8 and a byte'); + ok($a =~ /^\C{3}/, 'match \C{3}'); + + ok($a =~ /^\C\Cy/, 'match two \C'); + ok($a =~ /^\C{2}y/, 'match \C{2}'); + + ok($a !~ /^\C\C\Cy/, 'not match three \Cy'); + ok($a !~ /^\C{2}\Cy/, 'not match \C{3}y'); + + $a = "\x{1000}y"; # 3 bytes before "y" + + ok($a =~ /^\C/, 'match one \C on three-byte UTF-8'); + ok($a =~ /^\C{1}/, 'match \C{1}'); + ok($a =~ /^\C\C/, 'match two \C'); + ok($a =~ /^\C{2}/, 'match \C{2}'); + ok($a =~ /^\C\C\C/, 'match three \C'); + ok($a =~ /^\C{3}/, 'match \C{3}'); + + ok($a =~ /^\C\C\C\C/, 'match four \C on three-byte UTF-8 and a byte'); + ok($a =~ /^\C{4}/, 'match \C{4}'); + + ok($a =~ /^\C\C\Cy/, 'match three \Cy'); + ok($a =~ /^\C{3}y/, 'match \C{3}y'); + + ok($a !~ /^\C\C\C\C\y/, 'not match four \Cy'); + ok($a !~ /^\C{4}y/, 'not match \C{4}y'); +} + +# last test 968 +