$| = 1;
-print "1..854\n";
+print "1..996\n";
BEGIN {
chdir 't' if -d 't';
print "not " unless chr(0x38c) =~ /\p{IsGreek}/; # singleton
print "ok 672\n";
+if (ord("A") == 65) {
##
## Test [:cntrl:]...
##
## Should probably put in tests for all the POSIX stuff, but not sure how to
## guarantee a specific locale......
##
-$AllBytes = join('', map { chr($_) } 0..255);
-($x = $AllBytes) =~ s/[[:cntrl:]]//g;
-if ($x ne join('', map { chr($_) } 0x20..0x7E, 0x80..0xFF)) { print "not " };
-print "ok 673\n";
+ $AllBytes = join('', map { chr($_) } 0..255);
+ ($x = $AllBytes) =~ s/[[:cntrl:]]//g;
+ if ($x ne join('', map { chr($_) } 0x20..0x7E, 0x80..0xFF)) {
+ print "not ";
+ }
+ print "ok 673\n";
-($x = $AllBytes) =~ s/[^[:cntrl:]]//g;
-if ($x ne join('', map { chr($_) } 0..0x1F, 0x7F)) { print "not " };
-print "ok 674\n";
+ ($x = $AllBytes) =~ s/[^[:cntrl:]]//g;
+ if ($x ne join('', map { chr($_) } 0..0x1F, 0x7F)) { print "not " }
+ print "ok 674\n";
+} else {
+ print "ok $_ # Skip: EBCDIC\n" for 673..674;
+}
# With /s modifier UTF8 chars were interpreted as bytes
{
print "not " unless "a\x{100}" =~ /A/i;
print "ok 754\n";
- print "not " unless "A\x{100}" =~ /A/i;
+ print "not " unless "A\x{100}" =~ /a/i;
print "ok 755\n";
print "not " unless "a\x{100}" =~ /a/i;
print "not " unless "a\x{100}" =~ /A\x{100}/i;
print "ok 762\n";
- print "not " unless "A\x{100}" =~ /A\x{100}/i;
+ print "not " unless "A\x{100}" =~ /a\x{100}/i;
print "ok 763\n";
print "not " unless "a\x{100}" =~ /a\x{100}/i;
print "not " unless "a\x{100}" =~ /[A]/i;
print "ok 766\n";
- print "not " unless "A\x{100}" =~ /[A]/i;
+ print "not " unless "A\x{100}" =~ /[a]/i;
print "ok 767\n";
print "not " unless "a\x{100}" =~ /[a]/i;
print "\x{400}AB" =~ /(?<=\x{400}.)B/ ? "ok 853\n" : "not ok 853\n";
print "\x{500\x{600}}B" =~ /(?<=\x{500}.)B/ ? "ok 854\n" : "not ok 854\n";
}
+
+{
+ print "# UTF-8 hash keys and /\$/\n";
+ # http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters/2002-01/msg01327.html
+
+ my $u = "a\x{100}";
+ my $v = substr($u,0,1);
+ my $w = substr($u,1,1);
+ my %u = ( $u => $u, $v => $v, $w => $w );
+ my $i = 855;
+ for (keys %u) {
+ my $m1 = /^\w*$/ ? 1 : 0;
+ my $m2 = $u{$_}=~/^\w*$/ ? 1 : 0;
+ print $m1 == $m2 ? "ok $i\n" : "not ok $i # $m1 $m2\n";
+ $i++;
+ }
+}
+
+{
+ print "# [ID 20020124.005]\n";
+ # Fixed by #14795.
+ my $i = 858;
+ for my $char ("a", "\x{df}", "\x{100}"){
+ $x = "$char b $char";
+ $x =~ s{($char)}{
+ "c" =~ /c/;
+ "x";
+ }ge;
+ print substr($x,0,1) eq substr($x,-1,1) ?
+ "ok $i\n" : "not ok $i # debug: $x\n";
+ $i++;
+ }
+}
+
+{
+ print "# SEGV in s/// and UTF-8\n";
+ $s = "s#\x{100}" x 4;
+ $s =~ s/[^\w]/ /g;
+ print $s eq "s \x{100}" x 4 ? "ok 861\n" : "not ok 861\n";
+}
+
+{
+ print "# UTF-8 bug (maybe alreayd known?)\n";
+ my $u;
+
+ $u = "foo";
+ $u =~ s/./\x{100}/g;
+ print $u eq "\x{100}\x{100}\x{100}" ? "ok 862\n" : "not ok 862\n";
+
+ $u = "foobar";
+ $u =~ s/[ao]/\x{100}/g;
+ print $u eq "f\x{100}\x{100}b\x{100}r" ? "ok 863\n" : "not ok 863\n";
+
+ $u =~ s/\x{100}/e/g;
+ print $u eq "feeber" ? "ok 864\n" : "not ok 864\n";
+}
+
+{
+ print "# UTF-8 bug with s///\n";
+ # check utf8/non-utf8 mixtures
+ # try to force all float/anchored check combinations
+ my $c = "\x{100}";
+ my $test = 865;
+ my $subst;
+ for my $re (
+ "xx.*$c", "x.*$c$c", "$c.*xx", "$c$c.*x", "xx.*(?=$c)", "(?=$c).*xx",
+ ) {
+ print "xxx" =~ /$re/ ? "not ok $test\n" : "ok $test\n";
+ ++$test;
+ print +($subst = "xxx") =~ s/$re// ? "not ok $test\n" : "ok $test\n";
+ ++$test;
+ }
+ for my $re ("xx.*$c*", "$c*.*xx") {
+ print "xxx" =~ /$re/ ? "ok $test\n" : "not ok $test\n";
+ ++$test;
+ ($subst = "xxx") =~ s/$re//;
+ print $subst eq '' ? "ok $test\n" : "not ok $test\t# $subst\n";
+ ++$test;
+ }
+ for my $re ("xxy*", "y*xx") {
+ print "xx$c" =~ /$re/ ? "ok $test\n" : "not ok $test\n";
+ ++$test;
+ ($subst = "xx$c") =~ s/$re//;
+ print $subst eq $c ? "ok $test\n" : "not ok $test\n";
+ ++$test;
+ print "xy$c" =~ /$re/ ? "not ok $test\n" : "ok $test\n";
+ ++$test;
+ print +($subst = "xy$c") =~ /$re/ ? "not ok $test\n" : "ok $test\n";
+ ++$test;
+ }
+ for my $re ("xy$c*z", "x$c*yz") {
+ print "xyz" =~ /$re/ ? "ok $test\n" : "not ok $test\n";
+ ++$test;
+ ($subst = "xyz") =~ s/$re//;
+ print $subst eq '' ? "ok $test\n" : "not ok $test\n";
+ ++$test;
+ }
+}
+
+{
+ print "# qr/.../x\n";
+ my $test = 893;
+
+ my $R = qr/ A B C # D E/x;
+
+ print eval {"ABCDE" =~ $R} ? "ok $test\n" : "not ok $test\n";
+ $test++;
+
+ print eval {"ABCDE" =~ m/$R/} ? "ok $test\n" : "not ok $test\n";
+ $test++;
+
+ print eval {"ABCDE" =~ m/($R)/} ? "ok $test\n" : "not ok $test\n";
+ $test++;
+}
+
+{
+ print "# illegal Unicode properties\n";
+ my $test = 896;
+
+ print eval { "a" =~ /\pq / } ? "not ok $test\n" : "ok $test\n";
+ $test++;
+
+ print eval { "a" =~ /\p{qrst} / } ? "not ok $test\n" : "ok $test\n";
+ $test++;
+}
+
+{
+ print "# [ID 20020412.005] wrong pmop flags checked when empty pattern\n";
+ # requires reuse of last successful pattern
+ my $test = 898;
+ $test =~ /\d/;
+ for (0 .. 1) {
+ my $match = ?? + 0;
+ if ($match != $_) {
+ print "ok $test\n";
+ } else {
+ printf "not ok %s\t# 'match once' %s on %s iteration\n", $test,
+ $match ? 'succeeded' : 'failed', $_ ? 'second' : 'first';
+ }
+ ++$test;
+ }
+ $test =~ /(\d)/;
+ my $result = join '', $test =~ //g;
+ if ($result eq $test) {
+ print "ok $test\n";
+ } else {
+ printf "not ok %s\t# expected '%s', got '%s'\n", $test, $test, $result;
+ }
+ ++$test;
+}
+
+print "# user-defined character properties\n";
+
+sub InKana1 {
+ return <<'END';
+3040 309F
+30A0 30FF
+END
+}
+
+sub InKana2 {
+ return <<'END';
++utf8::InHiragana
++utf8::InKatakana
+END
+}
+
+sub InKana3 {
+ return <<'END';
++utf8::InHiragana
++utf8::InKatakana
+-utf8::IsCn
+END
+}
+
+sub InNotKana {
+ return <<'END';
+!utf8::InHiragana
+-utf8::InKatakana
++utf8::IsCn
+END
+}
+
+$test = 901;
+
+print "\x{3040}" =~ /\p{InKana1}/ ? "ok $test\n" : "not ok $test\n"; $test++;
+print "\x{303F}" =~ /\P{InKana1}/ ? "ok $test\n" : "not ok $test\n"; $test++;
+
+print "\x{3040}" =~ /\p{InKana2}/ ? "ok $test\n" : "not ok $test\n"; $test++;
+print "\x{303F}" =~ /\P{InKana2}/ ? "ok $test\n" : "not ok $test\n"; $test++;
+
+print "\x{3041}" =~ /\p{InKana3}/ ? "ok $test\n" : "not ok $test\n"; $test++;
+print "\x{3040}" =~ /\P{InKana3}/ ? "ok $test\n" : "not ok $test\n"; $test++;
+
+print "\x{3040}" =~ /\p{InNotKana}/ ? "ok $test\n" : "not ok $test\n"; $test++;
+print "\x{3041}" =~ /\P{InNotKana}/ ? "ok $test\n" : "not ok $test\n"; $test++;
+
+sub InConsonant { # Not EBCDIC-aware.
+ return <<EOF;
+0061 007f
+-0061
+-0065
+-0069
+-006f
+-0075
+EOF
+}
+
+print "d" =~ /\p{InConsonant}/ ? "ok $test\n" : "not ok $test\n"; $test++;
+print "e" =~ /\P{InConsonant}/ ? "ok $test\n" : "not ok $test\n"; $test++;
+
+{
+ print "# [ID 20020630.002] utf8 regex only matches 32k\n";
+ $test = 911;
+ for ([ 'byte', "\x{ff}" ], [ 'utf8', "\x{1ff}" ]) {
+ my($type, $char) = @$_;
+ for my $len (32000, 32768, 33000) {
+ my $s = $char . "f" x $len;
+ my $r = $s =~ /$char([f]*)/gc;
+ print $r ? "ok $test\n" : "not ok $test\t# <$type x $len> fail\n";
+ ++$test;
+ print +(!$r or pos($s) == $len + 1) ? "ok $test\n"
+ : "not ok $test\t# <$type x $len> pos @{[ pos($s) ]}\n";
+ ++$test;
+ }
+ }
+}
+
+$test = 923;
+
+$a = bless qr/foo/, 'Foo';
+print(('goodfood' =~ $a ? '' : 'not '),
+ "ok $test\t# reblessed qr// matches\n");
+++$test;
+
+print(($a eq '(?-xism:foo)' ? '' : 'not '),
+ "ok $test\t# reblessed qr// stringizes\n");
+++$test;
+
+$x = "\x{3fe}";
+$z=$y = "\317\276"; # $y is byte representation of $x
+
+$a = qr/$x/;
+print(($x =~ $a ? '' : 'not '), "ok $test - utf8 interpolation in qr//\n");
+++$test;
+
+print(("a$a" =~ $x ? '' : 'not '),
+ "ok $test - stringifed qr// preserves utf8\n");
+++$test;
+
+print(("a$x" =~ /^a$a\z/ ? '' : 'not '),
+ "ok $test - interpolated qr// preserves utf8\n");
+++$test;
+
+print(("a$x" =~ /^a(??{$a})\z/ ? '' : 'not '),
+ "ok $test - postponed interpolation of qr// preserves utf8\n");
+++$test;
+
+print((length(qr/##/x) == 12 ? '' : 'not '),
+ "ok $test - ## in qr// doesn't corrupt memory [perl #17776]\n");
+++$test;
+
+{ use re 'eval';
+
+print(("$x$x" =~ /^$x(??{$x})\z/ ? '' : 'not '),
+ "ok $test - postponed utf8 string in utf8 re matches utf8\n");
+++$test;
+
+print(("$y$x" =~ /^$y(??{$x})\z/ ? '' : 'not '),
+ "ok $test - postponed utf8 string in non-utf8 re matches utf8\n");
+++$test;
+
+print(("$y$x" !~ /^$y(??{$y})\z/ ? '' : 'not '),
+ "ok $test - postponed non-utf8 string in non-utf8 re doesn't match utf8\n");
+++$test;
+
+print(("$x$x" !~ /^$x(??{$y})\z/ ? '' : 'not '),
+ "ok $test - postponed non-utf8 string in utf8 re doesn't match utf8\n");
+++$test;
+
+print(("$y$y" =~ /^$y(??{$y})\z/ ? '' : 'not '),
+ "ok $test - postponed non-utf8 string in non-utf8 re matches non-utf8\n");
+++$test;
+
+print(("$x$y" =~ /^$x(??{$y})\z/ ? '' : 'not '),
+ "ok $test - postponed non-utf8 string in utf8 re matches non-utf8\n");
+++$test;
+$y = $z; # reset $y after upgrade
+
+print(("$x$y" !~ /^$x(??{$x})\z/ ? '' : 'not '),
+ "ok $test - postponed utf8 string in utf8 re doesn't match non-utf8\n");
+++$test;
+$y = $z; # reset $y after upgrade
+
+print(("$y$y" !~ /^$y(??{$x})\z/ ? '' : 'not '),
+ "ok $test - postponed utf8 string in non-utf8 re doesn't match non-utf8\n");
+++$test;
+
+} # no re 'eval'
+
+print "# more user-defined character properties\n";
+
+sub IsSyriac1 {
+ return <<'END';
+0712 072C
+0730 074A
+END
+}
+
+print "\x{0712}" =~ /\p{IsSyriac1}/ ? "ok $test\n" : "not ok $test\n"; $test++;
+print "\x{072F}" =~ /\P{IsSyriac1}/ ? "ok $test\n" : "not ok $test\n"; $test++;
+
+sub Syriac1 {
+ return <<'END';
+0712 072C
+0730 074A
+END
+}
+
+print "\x{0712}" =~ /\p{Syriac1}/ ? "ok $test\n" : "not ok $test\n"; $test++;
+print "\x{072F}" =~ /\P{Syriac1}/ ? "ok $test\n" : "not ok $test\n"; $test++;
+
+{
+ print "# Change #18179\n";
+ # previously failed with "panic: end_shift
+ my $s = "\x{100}" x 5;
+ my $ok = $s =~ /(\x{100}{4})/;
+ my($ord, $len) = (ord $1, length $1);
+ print +($ok && $ord == 0x100 && $len == 4)
+ ? "ok $test\n" : "not ok $test\t# $ok/$ord/$len\n";
+ ++$test;
+}
+
+{
+ print "# [perl #15763]\n";
+
+ $a = "x\x{100}";
+ chop $a; # but leaves the UTF-8 flag
+ $a .= "y"; # 1 byte before "y"
+
+ ok($a =~ /^\C/, 'match one \C on 1-byte UTF-8');
+ ok($a =~ /^\C{1}/, 'match \C{1}');
+
+ ok($a =~ /^\Cy/, 'match \Cy');
+ ok($a =~ /^\C{1}y/, 'match \C{1}y');
+
+ $a = "\x{100}y"; # 2 bytes before "y"
+
+ ok($a =~ /^\C/, 'match one \C on 2-byte UTF-8');
+ ok($a =~ /^\C{1}/, 'match \C{1}');
+ ok($a =~ /^\C\C/, 'match two \C');
+ ok($a =~ /^\C{2}/, 'match \C{2}');
+
+ ok($a =~ /^\C\C\C/, 'match three \C on 2-byte UTF-8 and a byte');
+ ok($a =~ /^\C{3}/, 'match \C{3}');
+
+ ok($a =~ /^\C\Cy/, 'match two \C');
+ ok($a =~ /^\C{2}y/, 'match \C{2}');
+
+ ok($a !~ /^\C\C\Cy/, q{don't match three \Cy});
+ ok($a !~ /^\C{2}\Cy/, q{don't match \C{3}y});
+
+ $a = "\x{1000}y"; # 3 bytes before "y"
+
+ ok($a =~ /^\C/, 'match one \C on three-byte UTF-8');
+ ok($a =~ /^\C{1}/, 'match \C{1}');
+ ok($a =~ /^\C\C/, 'match two \C');
+ ok($a =~ /^\C{2}/, 'match \C{2}');
+ ok($a =~ /^\C\C\C/, 'match three \C');
+ ok($a =~ /^\C{3}/, 'match \C{3}');
+
+ ok($a =~ /^\C\C\C\C/, 'match four \C on three-byte UTF-8 and a byte');
+ ok($a =~ /^\C{4}/, 'match \C{4}');
+
+ ok($a =~ /^\C\C\Cy/, 'match three \Cy');
+ ok($a =~ /^\C{3}y/, 'match \C{3}y');
+
+ ok($a !~ /^\C\C\C\C\y/, q{don't match four \Cy});
+ ok($a !~ /^\C{4}y/, q{don't match \C{4}y});
+}
+
+$_ = 'aaaaaaaaaa';
+utf8::upgrade($_); chop $_; $\="\n";
+ok(/[^\s]+/, "m/[^\s]/ utf8");
+ok(/[^\d]+/, "m/[^\d]/ utf8");
+ok(($a = $_, $_ =~ s/[^\s]+/./g), "s/[^\s]/ utf8");
+ok(($a = $_, $a =~ s/[^\d]+/./g), "s/[^\s]/ utf8");
+
+ok("\x{100}" =~ /\x{100}/, "[perl #15397]");
+ok("\x{100}" =~ /(\x{100})/, "[perl #15397]");
+ok("\x{100}" =~ /(\x{100}){1}/, "[perl #15397]");
+ok("\x{100}\x{100}" =~ /(\x{100}){2}/, "[perl #15397]");
+ok("\x{100}\x{100}" =~ /(\x{100})(\x{100})/, "[perl #15397]");
+
+$x = "CD";
+$x =~ /(AB)*?CD/;
+ok(!defined $1, "[perl #7471]");
+
+$x = "CD";
+$x =~ /(AB)*CD/;
+ok(!defined $1, "[perl #7471]");
+
+$pattern = "^(b+?|a){1,2}c";
+ok("bac" =~ /$pattern/ && $1 eq 'a', "[perl #3547]");
+ok("bbac" =~ /$pattern/ && $1 eq 'a', "[perl #3547]");
+ok("bbbac" =~ /$pattern/ && $1 eq 'a', "[perl #3547]");
+ok("bbbbac" =~ /$pattern/ && $1 eq 'a', "[perl #3547]");
+
+{
+ # [perl #18232]
+ "\x{100}" =~ /(.)/;
+ ok( $1 eq "\x{100}", '$1 is utf-8 [perl #18232]' );
+ { 'a' =~ /./; }
+ ok( $1 eq "\x{100}", '$1 is still utf-8' );
+ ok( $1 ne "\xC4\x80", '$1 is not non-utf-8' );
+}
+
+{
+ use utf8;
+ my $attr = 'Name-1' ;
+
+ my $NormalChar = qr/[\p{IsDigit}\p{IsLower}\p{IsUpper}]/;
+ my $NormalWord = qr/${NormalChar}+?/;
+ my $PredNameHyphen = qr/^${NormalWord}(\-${NormalWord})*?$/;
+
+ $attr =~ /^$/;
+ ok( $attr =~ $PredNameHyphen, "[perl #19767] original test" );
+}
+
+{
+ use utf8;
+ "a" =~ m/[b]/;
+ ok ( "0" =~ /\p{N}+\z/, "[perl #19767] variant test" );
+}
+
+{
+
+ $p = 1;
+ foreach (1,2,3,4) {
+ $p++ if /(??{ $p })/
+ }
+ ok ($p == 5, "[perl #20683] (??{ }) returns stale values");
+ { package P; $a=1; sub TIESCALAR { bless[] } sub FETCH { $a++ } }
+ tie $p, P;
+ foreach (1,2,3,4) {
+ /(??{ $p })/
+ }
+ ok ( $p == 5, "(??{ }) returns stale values");
+}
+
+{
+ # Subject: Odd regexp behavior
+ # From: Markus Kuhn <Markus.Kuhn@cl.cam.ac.uk>
+ # Date: Wed, 26 Feb 2003 16:53:12 +0000
+ # Message-Id: <E18o4nw-0008Ly-00@wisbech.cl.cam.ac.uk>
+ # To: perl-unicode@perl.org
+
+ $x = "\x{2019}\nk"; $x =~ s/(\S)\n(\S)/$1 $2/sg;
+ ok($x eq "\x{2019} k", "Markus Kuhn 2003-02-26");
+
+ $x = "b\nk"; $x =~ s/(\S)\n(\S)/$1 $2/sg;
+ ok($x eq "b k", "Markus Kuhn 2003-02-26");
+
+ ok("\x{2019}" =~ /\S/, "Markus Kuhn 2003-02-26");
+}
+
+{
+ my $i;
+ ok('-1-3-5-' eq join('', split /((??{$i++}))/, '-1-3-5-'),
+ "[perl #21411] (??{ .. }) corrupts split's stack")
+}
+
+{
+ ok("\x{100}\n" =~ /\x{100}\n$/, "UTF8 length cache and fbm_compile");
+}
+
+{
+ package Str;
+ use overload q/""/ => sub { ${$_[0]}; };
+ sub new { my ($c, $v) = @_; bless \$v, $c; }
+
+ package main;
+ $_ = Str->new("a\x{100}/\x{100}b");
+ ok(join(":", /\b(.)\x{100}/g) eq "a:/", "re_intuit_start and PL_bostr");
+}
+
+# last test 996