$| = 1;
-print "1..847\n";
+print "1..858\n";
BEGIN {
chdir 't' if -d 't';
print "ok $test\n"; $test++;
}
print "# IsASCII\n";
- if ($code le '00007f') {
- print "not " unless $char =~ /\p{IsASCII}/;
- print "ok $test\n"; $test++;
- print "not " if $char =~ /\P{IsASCII}/;
- print "ok $test\n"; $test++;
+ if (ord("A") == 193) {
+ print "ok $test # Skip: in EBCDIC\n"; $test++;
+ print "ok $test # Skip: in EBCDIC\n"; $test++;
} else {
- print "not " if $char =~ /\p{IsASCII}/;
- print "ok $test\n"; $test++;
- print "not " unless $char =~ /\P{IsASCII}/;
- print "ok $test\n"; $test++;
+ if ($code le '00007f') {
+ print "not " unless $char =~ /\p{IsASCII}/;
+ print "ok $test\n"; $test++;
+ print "not " if $char =~ /\P{IsASCII}/;
+ print "ok $test\n"; $test++;
+ } else {
+ print "not " if $char =~ /\p{IsASCII}/;
+ print "ok $test\n"; $test++;
+ print "not " unless $char =~ /\P{IsASCII}/;
+ print "ok $test\n"; $test++;
+ }
}
print "# IsCntrl\n";
if ($class =~ /^C/) {
# Test the Unicode script classes
-print "not " unless chr(0x100) =~ /\p{InLatin}/; # outside Latin-1
+print "not " unless chr(0x100) =~ /\p{IsLatin}/; # outside Latin-1
print "ok 661\n";
-print "not " unless chr(0x212b) =~ /\p{InLatin}/; # Angstrom sign, very outside
+print "not " unless chr(0x212b) =~ /\p{IsLatin}/; # Angstrom sign, very outside
print "ok 662\n";
-print "not " unless chr(0x5d0) =~ /\p{InHebrew}/; # inside HebrewBlock
+print "not " unless chr(0x5d0) =~ /\p{IsHebrew}/; # inside InHebrew
print "ok 663\n";
-print "not " unless chr(0xfb4f) =~ /\p{InHebrew}/; # outside HebrewBlock
+print "not " unless chr(0xfb4f) =~ /\p{IsHebrew}/; # outside InHebrew
print "ok 664\n";
-print "not " unless chr(0xb5) =~ /\p{InGreek}/; # singleton (not in a range)
+print "not " unless chr(0xb5) =~ /\p{IsGreek}/; # singleton (not in a range)
print "ok 665\n";
-print "not " unless chr(0x37a) =~ /\p{InGreek}/; # singleton
+print "not " unless chr(0x37a) =~ /\p{IsGreek}/; # singleton
print "ok 666\n";
-print "not " unless chr(0x386) =~ /\p{InGreek}/; # singleton
+print "not " unless chr(0x386) =~ /\p{IsGreek}/; # singleton
print "ok 667\n";
-print "not " unless chr(0x387) =~ /\P{InGreek}/; # not there
+print "not " unless chr(0x387) =~ /\P{IsGreek}/; # not there
print "ok 668\n";
-print "not " unless chr(0x388) =~ /\p{InGreek}/; # range
+print "not " unless chr(0x388) =~ /\p{IsGreek}/; # range
print "ok 669\n";
-print "not " unless chr(0x38a) =~ /\p{InGreek}/; # range
+print "not " unless chr(0x38a) =~ /\p{IsGreek}/; # range
print "ok 670\n";
-print "not " unless chr(0x38b) =~ /\P{InGreek}/; # not there
+print "not " unless chr(0x38b) =~ /\P{IsGreek}/; # not there
print "ok 671\n";
-print "not " unless chr(0x38c) =~ /\p{InGreek}/; # singleton
+print "not " unless chr(0x38c) =~ /\p{IsGreek}/; # singleton
print "ok 672\n";
+if (ord("A") == 65) {
##
## Test [:cntrl:]...
##
## Should probably put in tests for all the POSIX stuff, but not sure how to
## guarantee a specific locale......
##
-$AllBytes = join('', map { chr($_) } 0..255);
-($x = $AllBytes) =~ s/[[:cntrl:]]//g;
-if ($x ne join('', map { chr($_) } 0x20..0x7E, 0x80..0xFF)) { print "not " };
-print "ok 673\n";
+ $AllBytes = join('', map { chr($_) } 0..255);
+ ($x = $AllBytes) =~ s/[[:cntrl:]]//g;
+ if ($x ne join('', map { chr($_) } 0x20..0x7E, 0x80..0xFF)) {
+ print "not ";
+ }
+ print "ok 673\n";
-($x = $AllBytes) =~ s/[^[:cntrl:]]//g;
-if ($x ne join('', map { chr($_) } 0..0x1F, 0x7F)) { print "not " };
-print "ok 674\n";
+ ($x = $AllBytes) =~ s/[^[:cntrl:]]//g;
+ if ($x ne join('', map { chr($_) } 0..0x1F, 0x7F)) { print "not " }
+ print "ok 674\n";
+} else {
+ print "ok $_ # Skip: EBCDIC\n" for 673..674;
+}
# With /s modifier UTF8 chars were interpreted as bytes
{
}
{
- print "not " unless "a" =~ /\p{LowercaseLetter}/;
+ print "not " unless "a" =~ /\p{Lowercase Letter}/;
print "ok 745\n";
- print "not " if "A" =~ /\p{
- Lowercase
- Letter
- }/x;
+ print "not " if "A" =~ /\p{lowercaseletter}/;
print "ok 746\n";
}
{
- print "not " unless "\x{AC00}" =~ /\p{HangulSyllable}/;
+ print "not " unless "\x{AC00}" =~ /\p{HangulSyllables}/;
print "ok 747\n";
}
print "not " unless "a\x{100}" =~ /A/i;
print "ok 754\n";
- print "not " unless "A\x{100}" =~ /A/i;
+ print "not " unless "A\x{100}" =~ /a/i;
print "ok 755\n";
print "not " unless "a\x{100}" =~ /a/i;
print "not " unless "a\x{100}" =~ /A\x{100}/i;
print "ok 762\n";
- print "not " unless "A\x{100}" =~ /A\x{100}/i;
+ print "not " unless "A\x{100}" =~ /a\x{100}/i;
print "ok 763\n";
print "not " unless "a\x{100}" =~ /a\x{100}/i;
print "not " unless "a\x{100}" =~ /[A]/i;
print "ok 766\n";
- print "not " unless "A\x{100}" =~ /[A]/i;
+ print "not " unless "A\x{100}" =~ /[a]/i;
print "ok 767\n";
print "not " unless "a\x{100}" =~ /[a]/i;
print "SS" =~
/\N{LATIN SMALL LETTER SHARP S}/i ? "ok 840\n" : "not ok 840\n";
-# These are a bit tricky. Since the LATIN SMALL LETTER SHARP S is U+00DF,
-# the ANYOF reduces to a byte. The Unicodeness needs to be caught earlier.
-# print "ss" =~
-# /[\N{LATIN SMALL LETTER SHARP S}]/i ? "ok 841\n" : "not ok 841\n";
-#
-# print "SS" =~
-# /[\N{LATIN SMALL LETTER SHARP S}]/i ? "ok 842\n" : "not ok 842\n";
+ print "ss" =~
+ /[\N{LATIN SMALL LETTER SHARP S}]/i ? "ok 841\n" : "not ok 841\n";
+
+ print "SS" =~
+ /[\N{LATIN SMALL LETTER SHARP S}]/i ? "ok 842\n" : "not ok 842\n";
+
+ print "\N{LATIN SMALL LETTER SHARP S}" =~ /ss/i ?
+ "ok 843\n" : "not ok 843\n";
+
+ print "\N{LATIN SMALL LETTER SHARP S}" =~ /SS/i ?
+ "ok 844\n" : "not ok 844\n";
}
{
print "# more whitespace: U+0085, U+2028, U+2029\n";
# U+0085 needs to be forced to be Unicode, the \x{100} does that.
- print "<\x{100}\x{0085}>" =~ /<\x{100}\s>/ ? "ok 841\n" : "not ok 841\n";
- print "<\x{2028}>" =~ /<\s>/ ? "ok 842\n" : "not ok 842\n";
- print "<\x{2029}>" =~ /<\s>/ ? "ok 843\n" : "not ok 843\n";
+ print "<\x{100}\x{0085}>" =~ /<\x{100}\s>/ ? "ok 845\n" : "not ok 845\n";
+ print "<\x{2028}>" =~ /<\s>/ ? "ok 846\n" : "not ok 846\n";
+ print "<\x{2029}>" =~ /<\s>/ ? "ok 847\n" : "not ok 847\n";
}
{
- print "# . with /s should work on characters, not bytes\n";
+ print "# . with /s should work on characters, as opposed to bytes\n";
my $s = "\x{e4}\x{100}";
# This is not expected to match: the point is that
# neither should we get "Malformed UTF-8" warnings.
print $s =~ /\G(.+?)\n/gcs ?
- "not ok 844\n" : "ok 844\n";
+ "not ok 848\n" : "ok 848\n";
my @c;
push @c, $1;
}
- print join("", @c) eq $s ? "ok 845\n" : "not ok 845\n";
+ print join("", @c) eq $s ? "ok 849\n" : "not ok 849\n";
my $t1 = "Q003\n\n\x{e4}\x{f6}\n\nQ004\n\n\x{e7}"; # test only chars < 256
my $r1 = "";
$r2 .= $1 . $2;
}
$r2 =~ s/\x{100}//;
- print $r1 eq $r2 ? "ok 846\n" : "not ok 846\n";
+ print $r1 eq $r2 ? "ok 850\n" : "not ok 850\n";
}
{
print "# Unicode lookbehind\n";
- print "A\x{100}B" =~ /(?<=A.)B/ ? "ok 847\n" : "not ok 847\n";
- print "A\x{200}\x{300}B" =~ /(?<=A..)B/ ? "ok 848\n" : "not ok 848\n";
+ print "A\x{100}B" =~ /(?<=A.)B/ ? "ok 851\n" : "not ok 851\n";
+ print "A\x{200}\x{300}B" =~ /(?<=A..)B/ ? "ok 852\n" : "not ok 852\n";
+ print "\x{400}AB" =~ /(?<=\x{400}.)B/ ? "ok 853\n" : "not ok 853\n";
+ print "\x{500\x{600}}B" =~ /(?<=\x{500}.)B/ ? "ok 854\n" : "not ok 854\n";
+}
+
+{
+ print "# [ID 20020124.005]\n";
+
+ # Fixed by #14795.
+
+ $char = "\x{f00f}";
+ $x = "$char b $char";
+
+ $x =~ s{($char)}{
+ "c" =~ /d/;
+ "x";
+ }ge;
+
+ print $x eq "x b x" ? "ok 855\n" : "not ok 855\n";
+}
+
+{
+ print "# UTF-8 hash keys and /\$/\n";
+ # http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters/2002-01/msg01327.html
+
+ my $u = "a\x{100}";
+ my $v = substr($u,0,1);
+ my $w = substr($u,1,1);
+ my %u = ( $u => $u, $v => $v, $w => $w );
+ my $i = 856;
+ for (keys %u) {
+ my $m1 = /^\w*$/ ? 1 : 0;
+ my $m2 = $u{$_}=~/^\w*$/ ? 1 : 0;
+ print $m1 == $m2 ? "ok $i\n" : "not ok $i # $m1 $m2\n";
+ $i++;
+ }
}