X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=t%2Fop%2Fpat.t;h=5cdb2e5ad221ed6f25b0f524b7487f795f8a863a;hb=d07ddd77a31b1e57c2f358652e4f3f85d2e29ad4;hp=2531d71954559edb4033e178d71f5118a373d738;hpb=395ddfe62c8314bb91a6a28057648356e4dfc9bf;p=p5sagit%2Fp5-mst-13.2.git diff --git a/t/op/pat.t b/t/op/pat.t index 2531d71..5cdb2e5 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -6,7 +6,7 @@ $| = 1; -print "1..634\n"; +print "1..834\n"; BEGIN { chdir 't' if -d 't'; @@ -1129,6 +1129,8 @@ print "not " unless "A \x{263a} B z C" =~ /A . B (??{ "z" }) C/; print "ok $test\n"; $test++; +my $ordA = ord('A'); + $_ = "a\x{100}b"; if (/(.)(\C)(\C)(.)/) { print "ok 232\n"; @@ -1137,15 +1139,32 @@ if (/(.)(\C)(\C)(.)/) { } else { print "not ok 233\n"; } - if ($2 eq "\xC4") { - print "ok 234\n"; - } else { - print "not ok 234\n"; - } - if ($3 eq "\x80") { - print "ok 235\n"; + if ($ordA == 65) { # ASCII (or equivalent), should be UTF-8 + if ($2 eq "\xC4") { + print "ok 234\n"; + } else { + print "not ok 234\n"; + } + if ($3 eq "\x80") { + print "ok 235\n"; + } else { + print "not ok 235\n"; + } + } elsif ($ordA == 193) { # EBCDIC (or equivalent), should be UTF-EBCDIC + if ($2 eq "\x8C") { + print "ok 234\n"; + } else { + print "not ok 234\n"; + } + if ($3 eq "\x41") { + print "ok 235\n"; + } else { + print "not ok 235\n"; + } } else { - print "not ok 235\n"; + for (234..235) { + print "not ok $_ # ord('A') == $ordA\n"; + } } if ($4 eq "b") { print "ok 236\n"; @@ -1161,10 +1180,20 @@ $_ = "\x{100}"; if (/(\C)/g) { print "ok 237\n"; # currently \C are still tagged as UTF-8 - if ($1 eq "\xC4") { - print "ok 238\n"; + if ($ordA == 65) { + if ($1 eq "\xC4") { + print "ok 238\n"; + } else { + print "not ok 238\n"; + } + } elsif ($ordA == 193) { + if ($1 eq "\x8C") { + print "ok 238\n"; + } else { + print "not ok 238\n"; + } } else { - print "not ok 238\n"; + print "not ok 238 # ord('A') == $ordA\n"; } } else { for (237..238) { @@ -1174,10 +1203,20 @@ if (/(\C)/g) { if (/(\C)/g) { print "ok 239\n"; # currently \C are still tagged as UTF-8 - if ($1 eq "\x80") { - print "ok 240\n"; + if ($ordA == 65) { + if ($1 eq "\x80") { + print "ok 240\n"; + } else { + print "not ok 240\n"; + } + } elsif ($ordA == 193) { + if ($1 eq "\x41") { + print "ok 240\n"; + } else { + print "not ok 240\n"; + } } else { - print "not ok 240\n"; + print "not ok 240 # ord('A') == $ordA\n"; } } else { for (239..240) { @@ -1248,7 +1287,7 @@ print "ok 247\n"; "#latin[$latin]\nnot ok $test\n"; $test++; $latin =~ s/stra\337e/straße/; # \303\237 after the 2nd a - use utf8; + use utf8; # needed for the raw UTF-8 $latin =~ s!(s)tr(?:aß|s+e)!$1tr.!; # \303\237 after the a } } @@ -1579,9 +1618,9 @@ EOT { # from Robin Houston - my $x = "\x{12345678}"; + my $x = "\x{10FFFD}"; $x =~ s/(.)/$1/g; - print "not " unless ord($x) == 0x12345678 && length($x) == 1; + print "not " unless ord($x) == 0x10FFFD && length($x) == 1; print "ok 587\n"; } @@ -1796,3 +1835,746 @@ if(test_o('abc','..(.)') eq 'a') { print "not ok 634\n"; } +# 635..639: ID 20010619.003 (only the space character is +# supposed to be [:print:], not the whole isprint()). + +print "not " if "\n" =~ /[[:print:]]/; +print "ok 635\n"; + +print "not " if "\t" =~ /[[:print:]]/; +print "ok 636\n"; + +# Amazingly vertical tabulator is the same in ASCII and EBCDIC. +print "not " if "\014" =~ /[[:print:]]/; +print "ok 637\n"; + +print "not " if "\r" =~ /[[:print:]]/; +print "ok 638\n"; + +print "not " unless " " =~ /[[:print:]]/; +print "ok 639\n"; + +## +## Test basic $^N usage outside of a regex +## +$x = "abcdef"; +$T="ok 640\n";if ($x =~ /cde/ and not defined $^N) {print $T} else {print "not $T"}; +$T="ok 641\n";if ($x =~ /(cde)/ and $^N eq "cde") {print $T} else {print "not $T"}; +$T="ok 642\n";if ($x =~ /(c)(d)(e)/ and $^N eq "e") {print $T} else {print "not $T"}; +$T="ok 643\n";if ($x =~ /(c(d)e)/ and $^N eq "cde") {print $T} else {print "not $T"}; +$T="ok 644\n";if ($x =~ /(foo)|(c(d)e)/ and $^N eq "cde") {print $T} else {print "not $T"}; +$T="ok 645\n";if ($x =~ /(c(d)e)|(foo)/ and $^N eq "cde") {print $T} else {print "not $T"}; +$T="ok 646\n";if ($x =~ /(c(d)e)|(abc)/ and $^N eq "abc") {print $T} else {print "not $T"}; +$T="ok 647\n";if ($x =~ /(c(d)e)|(abc)x/ and $^N eq "cde") {print $T} else {print "not $T"}; +$T="ok 648\n";if ($x =~ /(c(d)e)(abc)?/ and $^N eq "cde") {print $T} else {print "not $T"}; +$T="ok 649\n";if ($x =~ /(?:c(d)e)/ and $^N eq "d" ) {print $T} else {print "not $T"}; +$T="ok 650\n";if ($x =~ /(?:c(d)e)(?:f)/ and $^N eq "d" ) {print $T} else {print "not $T"}; +$T="ok 651\n";if ($x =~ /(?:([abc])|([def]))*/ and $^N eq "f" ){print $T} else {print "not $T"}; +$T="ok 652\n";if ($x =~ /(?:([ace])|([bdf]))*/ and $^N eq "f" ){print $T} else {print "not $T"}; +$T="ok 653\n";if ($x =~ /(([ace])|([bd]))*/ and $^N eq "e" ){print $T} else {print "not $T"}; +{ + $T="ok 654\n";if($x =~ /(([ace])|([bdf]))*/ and $^N eq "f" ){print $T} else {print "not $T"}; +} +## test to see if $^N is automatically localized -- it should now +## have the value set in test 653 +$T="ok 655\n";if ($^N eq "e" ){print $T} else {print "not $T"}; + +## +## Now test inside (?{...}) +## +$T="ok 656\n";if ($x =~ /a([abc])(?{$y=$^N})c/ and $y eq "b" ){print $T} else {print "not $T"}; +$T="ok 657\n";if ($x =~ /a([abc]+)(?{$y=$^N})d/ and $y eq "bc"){print $T} else {print "not $T"}; +$T="ok 658\n";if ($x =~ /a([abcdefg]+)(?{$y=$^N})d/ and $y eq "bc"){print $T} else {print "not $T"}; +$T="ok 659\n";if ($x =~ /(a([abcdefg]+)(?{$y=$^N})d)(?{$z=$^N})e/ and $y eq "bc" and $z eq "abcd") + {print $T} else {print "not $T"}; +$T="ok 660\n";if ($x =~ /(a([abcdefg]+)(?{$y=$^N})de)(?{$z=$^N})/ and $y eq "bc" and $z eq "abcde") + {print $T} else {print "not $T"}; + +# Test the Unicode script classes + +print "not " unless chr(0x100) =~ /\p{InLatin}/; # outside Latin-1 +print "ok 661\n"; + +print "not " unless chr(0x212b) =~ /\p{InLatin}/; # Angstrom sign, very outside +print "ok 662\n"; + +print "not " unless chr(0x5d0) =~ /\p{InHebrew}/; # inside HebrewBlock +print "ok 663\n"; + +print "not " unless chr(0xfb4f) =~ /\p{InHebrew}/; # outside HebrewBlock +print "ok 664\n"; + +print "not " unless chr(0xb5) =~ /\p{InGreek}/; # singleton (not in a range) +print "ok 665\n"; + +print "not " unless chr(0x37a) =~ /\p{InGreek}/; # singleton +print "ok 666\n"; + +print "not " unless chr(0x386) =~ /\p{InGreek}/; # singleton +print "ok 667\n"; + +print "not " unless chr(0x387) =~ /\P{InGreek}/; # not there +print "ok 668\n"; + +print "not " unless chr(0x388) =~ /\p{InGreek}/; # range +print "ok 669\n"; + +print "not " unless chr(0x38a) =~ /\p{InGreek}/; # range +print "ok 670\n"; + +print "not " unless chr(0x38b) =~ /\P{InGreek}/; # not there +print "ok 671\n"; + +print "not " unless chr(0x38c) =~ /\p{InGreek}/; # singleton +print "ok 672\n"; + +## +## Test [:cntrl:]... +## +## Should probably put in tests for all the POSIX stuff, but not sure how to +## guarantee a specific locale...... +## +$AllBytes = join('', map { chr($_) } 0..255); +($x = $AllBytes) =~ s/[[:cntrl:]]//g; +if ($x ne join('', map { chr($_) } 0x20..0x7E, 0x80..0xFF)) { print "not " }; +print "ok 673\n"; + +($x = $AllBytes) =~ s/[^[:cntrl:]]//g; +if ($x ne join('', map { chr($_) } 0..0x1F, 0x7F)) { print "not " }; +print "ok 674\n"; + +# With /s modifier UTF8 chars were interpreted as bytes +{ + my $a = "Hello \x{263A} World"; + + my @a = ($a =~ /./gs); + + print "not " unless $#a == 12; + print "ok 675\n"; +} + +@a = ("foo\nbar" =~ /./g); +print "ok 676\n" if @a == 6 && "@a" eq "f o o b a r"; + +@a = ("foo\nbar" =~ /./gs); +print "ok 677\n" if @a == 7 && "@a" eq "f o o \n b a r"; + +@a = ("foo\nbar" =~ /\C/g); +print "ok 678\n" if @a == 7 && "@a" eq "f o o \n b a r"; + +@a = ("foo\nbar" =~ /\C/gs); +print "ok 679\n" if @a == 7 && "@a" eq "f o o \n b a r"; + +@a = ("foo\n\x{100}bar" =~ /./g); +print "ok 680\n" if @a == 7 && "@a" eq "f o o \x{100} b a r"; + +@a = ("foo\n\x{100}bar" =~ /./gs); +print "ok 681\n" if @a == 8 && "@a" eq "f o o \n \x{100} b a r"; + +($a, $b) = map { chr } ord('A') == 65 ? (0xc4, 0x80) : (0x8c, 0x41); + +@a = ("foo\n\x{100}bar" =~ /\C/g); +print "ok 682\n" if @a == 9 && "@a" eq "f o o \n $a $b b a r"; + +@a = ("foo\n\x{100}bar" =~ /\C/gs); +print "ok 683\n" if @a == 9 && "@a" eq "f o o \n $a $b b a r"; + +{ + # [ID 20010814.004] pos() doesn't work when using =~m// in list context + $_ = "ababacadaea"; + $a = join ":", /b./gc; + $b = join ":", /a./gc; + $c = pos; + print "$a $b $c" eq 'ba:ba ad:ae 10' ? "ok 684\n" : "not ok 684\t# $a $b $c\n"; +} + +{ + # [ID 20010407.006] matching utf8 return values from functions does not work + + package ID_20010407_006; + + sub x { + "a\x{1234}"; + } + + my $x = x; + my $y; + + $x =~ /(..)/; $y = $1; + print "not " unless length($y) == 2 && $y eq $x; + print "ok 685\n"; + + x =~ /(..)/; $y = $1; + print "not " unless length($y) == 2 && $y eq $x; + print "ok 686\n"; +} + + +my $test = 687; + +# Force scalar context on the patern match +sub ok ($$) { + my($ok, $name) = @_; + + printf "%sok %d - %s\n", ($ok ? "" : "not "), $test, $name; + + printf "# Failed test at line %d\n", (caller)[2] unless $ok; + + $test++; + return $ok; +} + +{ + # Check that \x## works. 5.6.1 and 5.005_03 fail some of these. + $x = "\x4e" . "E"; + ok ($x =~ /^\x4EE$/, "Check only 2 bytes of hex are matched."); + + $x = "\x4e" . "i"; + ok ($x =~ /^\x4Ei$/, "Check that invalid hex digit stops it (2)"); + + $x = "\x4" . "j"; + ok ($x =~ /^\x4j$/, "Check that invalid hex digit stops it (1)"); + + $x = "\x0" . "k"; + ok ($x =~ /^\xk$/, "Check that invalid hex digit stops it (0)"); + + $x = "\x0" . "x"; + ok ($x =~ /^\xx$/, "\\xx isn't to be treated as \\0"); + + $x = "\x0" . "xa"; + ok ($x =~ /^\xxa$/, "\\xxa isn't to be treated as \\xa"); + + $x = "\x9" . "_b"; + ok ($x =~ /^\x9_b$/, "\\x9_b isn't to be treated as \\x9b"); + + print "# and now again in [] ranges\n"; + + $x = "\x4e" . "E"; + ok ($x =~ /^[\x4EE]{2}$/, "Check only 2 bytes of hex are matched."); + + $x = "\x4e" . "i"; + ok ($x =~ /^[\x4Ei]{2}$/, "Check that invalid hex digit stops it (2)"); + + $x = "\x4" . "j"; + ok ($x =~ /^[\x4j]{2}$/, "Check that invalid hex digit stops it (1)"); + + $x = "\x0" . "k"; + ok ($x =~ /^[\xk]{2}$/, "Check that invalid hex digit stops it (0)"); + + $x = "\x0" . "x"; + ok ($x =~ /^[\xx]{2}$/, "\\xx isn't to be treated as \\0"); + + $x = "\x0" . "xa"; + ok ($x =~ /^[\xxa]{3}$/, "\\xxa isn't to be treated as \\xa"); + + $x = "\x9" . "_b"; + ok ($x =~ /^[\x9_b]{3}$/, "\\x9_b isn't to be treated as \\x9b"); + +} + +{ + # Check that \x{##} works. 5.6.1 fails quite a few of these. + + $x = "\x9b"; + ok ($x =~ /^\x{9_b}$/, "\\x{9_b} is to be treated as \\x9b"); + + $x = "\x9b" . "y"; + ok ($x =~ /^\x{9_b}y$/, "\\x{9_b} is to be treated as \\x9b (again)"); + + $x = "\x9b" . "y"; + ok ($x =~ /^\x{9b_}y$/, "\\x{9b_} is to be treated as \\x9b"); + + $x = "\x9b" . "y"; + ok ($x =~ /^\x{9_bq}y$/, "\\x{9_bc} is to be treated as \\x9b"); + + $x = "\x0" . "y"; + ok ($x =~ /^\x{x9b}y$/, "\\x{x9b} is to be treated as \\x0"); + + $x = "\x0" . "y"; + ok ($x =~ /^\x{0x9b}y$/, "\\x{0x9b} is to be treated as \\x0"); + + $x = "\x9b" . "y"; + ok ($x =~ /^\x{09b}y$/, "\\x{09b} is to be treated as \\x9b"); + + print "# and now again in [] ranges\n"; + + $x = "\x9b"; + ok ($x =~ /^[\x{9_b}]$/, "\\x{9_b} is to be treated as \\x9b"); + + $x = "\x9b" . "y"; + ok ($x =~ /^[\x{9_b}y]{2}$/, "\\x{9_b} is to be treated as \\x9b (again)"); + + $x = "\x9b" . "y"; + ok ($x =~ /^[\x{9b_}y]{2}$/, "\\x{9b_} is to be treated as \\x9b"); + + $x = "\x9b" . "y"; + ok ($x =~ /^[\x{9_bq}y]{2}$/, "\\x{9_bc} is to be treated as \\x9b"); + + $x = "\x0" . "y"; + ok ($x =~ /^[\x{x9b}y]{2}$/, "\\x{x9b} is to be treated as \\x0"); + + $x = "\x0" . "y"; + ok ($x =~ /^[\x{0x9b}y]{2}$/, "\\x{0x9b} is to be treated as \\x0"); + + $x = "\x9b" . "y"; + ok ($x =~ /^[\x{09b}y]{2}$/, "\\x{09b} is to be treated as \\x9b"); +} + +{ + # high bit bug -- japhy + my $x = "ab\200d"; + $x =~ /.*?\200/ or print "not "; + print "ok 715\n"; +} + +print "# some Unicode properties\n"; + +{ + # Dashes, underbars, case. + print "not " unless "\x80" =~ /\p{in-latin1_SUPPLEMENT}/; + print "ok 716\n"; + + # Complement, leading and trailing whitespace. + print "not " unless "\x80" =~ /\P{ ^ In Latin 1 Supplement }/; + print "ok 717\n"; + + # No ^In, dashes, case, dash, any intervening (word-break) whitespace. + # (well, newlines don't work...) + print "not " unless "\x80" =~ /\p{latin-1 supplement}/; + print "ok 718\n"; +} + +{ + print "not " unless "a" =~ /\pL/; + print "ok 719\n"; + + print "not " unless "a" =~ /\p{IsLl}/; + print "ok 720\n"; + + print "not " if "a" =~ /\p{IsLu}/; + print "ok 721\n"; + + print "not " unless "a" =~ /\p{Ll}/; + print "ok 722\n"; + + print "not " if "a" =~ /\p{Lu}/; + print "ok 723\n"; + + print "not " unless "A" =~ /\pL/; + print "ok 724\n"; + + print "not " unless "A" =~ /\p{IsLu}/; + print "ok 725\n"; + + print "not " if "A" =~ /\p{IsLl}/; + print "ok 726\n"; + + print "not " unless "A" =~ /\p{Lu}/; + print "ok 727\n"; + + print "not " if "A" =~ /\p{Ll}/; + print "ok 728\n"; + + print "not " if "a" =~ /\PL/; + print "ok 729\n"; + + print "not " if "a" =~ /\P{IsLl}/; + print "ok 730\n"; + + print "not " unless "a" =~ /\P{IsLu}/; + print "ok 731\n"; + + print "not " if "a" =~ /\P{Ll}/; + print "ok 732\n"; + + print "not " unless "a" =~ /\P{Lu}/; + print "ok 733\n"; + + print "not " if "A" =~ /\PL/; + print "ok 734\n"; + + print "not " if "A" =~ /\P{IsLu}/; + print "ok 735\n"; + + print "not " unless "A" =~ /\P{IsLl}/; + print "ok 736\n"; + + print "not " if "A" =~ /\P{Lu}/; + print "ok 737\n"; + + print "not " unless "A" =~ /\P{Ll}/; + print "ok 738\n"; + +} + +{ + print "not " if "a" =~ /\p{Common}/; + print "ok 739\n"; + + print "not " unless "1" =~ /\p{Common}/; + print "ok 740\n"; +} + +{ + print "not " if "a" =~ /\p{Inherited}/; + print "ok 741\n"; + + print "not " unless "\x{300}" =~ /\p{Inherited}/; + print "ok 742\n"; +} + +{ + print "not " unless "a" =~ /\p{L&}/; + print "ok 743\n"; + + print "not " if "1" =~ /\p{L&}/; + print "ok 744\n"; +} + +{ + print "not " unless "a" =~ /\p{LowercaseLetter}/; + print "ok 745\n"; + + print "not " if "A" =~ /\p{ + Lowercase + Letter + }/x; + print "ok 746\n"; +} + +{ + print "not " unless "\x{AC00}" =~ /\p{HangulSyllable}/; + print "ok 747\n"; +} + +{ + # Script=, Block=, Category= + + print "not " unless "\x{0100}" =~ /\p{Script=Latin}/; + print "ok 748\n"; + + print "not " unless "\x{0100}" =~ /\p{Block=LatinExtendedA}/; + print "ok 749\n"; + + print "not " unless "\x{0100}" =~ /\p{Category=UppercaseLetter}/; + print "ok 750\n"; +} + +{ + print "# the basic character classes and Unicode \n"; + + # 0100;LATIN CAPITAL LETTER A WITH MACRON;Lu;0;L;0041 0304;;;;N;LATIN CAPITAL LETTER A MACRON;;;0101; + print "not " unless "\x{0100}" =~ /\w/; + print "ok 751\n"; + + # 0660;ARABIC-INDIC DIGIT ZERO;Nd;0;AN;;0;0;0;N;;;;; + print "not " unless "\x{0660}" =~ /\d/; + print "ok 752\n"; + + # 1680;OGHAM SPACE MARK;Zs;0;WS;;;;;N;;;;; + print "not " unless "\x{1680}" =~ /\s/; + print "ok 753\n"; +} + +{ + print "# folding matches and Unicode\n"; + + print "not " unless "a\x{100}" =~ /A/i; + print "ok 754\n"; + + print "not " unless "A\x{100}" =~ /A/i; + print "ok 755\n"; + + print "not " unless "a\x{100}" =~ /a/i; + print "ok 756\n"; + + print "not " unless "A\x{100}" =~ /A/i; + print "ok 757\n"; + + print "not " unless "\x{101}a" =~ /\x{100}/i; + print "ok 758\n"; + + print "not " unless "\x{100}a" =~ /\x{100}/i; + print "ok 759\n"; + + print "not " unless "\x{101}a" =~ /\x{101}/i; + print "ok 760\n"; + + print "not " unless "\x{100}a" =~ /\x{101}/i; + print "ok 761\n"; + + print "not " unless "a\x{100}" =~ /A\x{100}/i; + print "ok 762\n"; + + print "not " unless "A\x{100}" =~ /A\x{100}/i; + print "ok 763\n"; + + print "not " unless "a\x{100}" =~ /a\x{100}/i; + print "ok 764\n"; + + print "not " unless "A\x{100}" =~ /A\x{100}/i; + print "ok 765\n"; + + print "not " unless "a\x{100}" =~ /[A]/i; + print "ok 766\n"; + + print "not " unless "A\x{100}" =~ /[A]/i; + print "ok 767\n"; + + print "not " unless "a\x{100}" =~ /[a]/i; + print "ok 768\n"; + + print "not " unless "A\x{100}" =~ /[A]/i; + print "ok 769\n"; + + print "not " unless "\x{101}a" =~ /[\x{100}]/i; + print "ok 770\n"; + + print "not " unless "\x{100}a" =~ /[\x{100}]/i; + print "ok 771\n"; + + print "not " unless "\x{101}a" =~ /[\x{101}]/i; + print "ok 772\n"; + + print "not " unless "\x{100}a" =~ /[\x{101}]/i; + print "ok 773\n"; + +} + +{ + use charnames ':full'; + + print "# LATIN LETTER A WITH GRAVE\n"; + my $lower = "\N{LATIN SMALL LETTER A WITH GRAVE}"; + my $UPPER = "\N{LATIN CAPITAL LETTER A WITH GRAVE}"; + + print $lower =~ m/$UPPER/i ? "ok 774\n" : "not ok 774\n"; + print $UPPER =~ m/$lower/i ? "ok 775\n" : "not ok 775\n"; + print $lower =~ m/[$UPPER]/i ? "ok 776\n" : "not ok 776\n"; + print $UPPER =~ m/[$lower]/i ? "ok 777\n" : "not ok 777\n"; + + print "# GREEK LETTER ALPHA WITH VRACHY\n"; + + $lower = "\N{GREEK CAPITAL LETTER ALPHA WITH VRACHY}"; + $UPPER = "\N{GREEK SMALL LETTER ALPHA WITH VRACHY}"; + + print $lower =~ m/$UPPER/i ? "ok 778\n" : "not ok 778\n"; + print $UPPER =~ m/$lower/i ? "ok 779\n" : "not ok 779\n"; + print $lower =~ m/[$UPPER]/i ? "ok 780\n" : "not ok 780\n"; + print $UPPER =~ m/[$lower]/i ? "ok 781\n" : "not ok 781\n"; + + print "# LATIN LETTER Y WITH DIAERESIS\n"; + + $lower = "\N{LATIN CAPITAL LETTER Y WITH DIAERESIS}"; + $UPPER = "\N{LATIN SMALL LETTER Y WITH DIAERESIS}"; + print $lower =~ m/$UPPER/i ? "ok 782\n" : "not ok 782\n"; + print $UPPER =~ m/$lower/i ? "ok 783\n" : "not ok 783\n"; + print $lower =~ m/[$UPPER]/i ? "ok 784\n" : "not ok 784\n"; + print $UPPER =~ m/[$lower]/i ? "ok 785\n" : "not ok 785\n"; +} + +{ + use warnings; + use charnames ':full'; + + print "# GREEK CAPITAL LETTER SIGMA vs COMBINING GREEK PERISPOMENI\n"; + + my $SIGMA = "\N{GREEK CAPITAL LETTER SIGMA}"; + my $char = "\N{COMBINING GREEK PERISPOMENI}"; + + # Before #13843 this was failing by matching falsely. + print "_:$char:_" =~ m/_:$SIGMA:_/i ? "not ok 786\n" : "ok 786\n"; +} + +{ + print "# \\X\n"; + + use charnames ':full'; + + print "a!" =~ /^(\X)!/ && $1 eq "a" ? + "ok 787\n" : "not ok 787 # $1\n"; + print "\xDF!" =~ /^(\X)!/ && $1 eq "\xDF" ? + "ok 788\n" : "not ok 788 # $1\n"; + print "\x{100}!" =~ /^(\X)!/ && $1 eq "\x{100}" ? + "ok 789\n" : "not ok 789 # $1\n"; + print "\x{100}\x{300}!" =~ /^(\X)!/ && $1 eq "\x{100}\x{300}" ? + "ok 790\n" : "not ok 790 # $1\n"; + print "\N{LATIN CAPITAL LETTER E}!" =~ /^(\X)!/ && + $1 eq "\N{LATIN CAPITAL LETTER E}" ? + "ok 791\n" : "not ok 791 # $1\n"; + print "\N{LATIN CAPITAL LETTER E}\N{COMBINING GRAVE ACCENT}!" =~ + /^(\X)!/ && + $1 eq "\N{LATIN CAPITAL LETTER E}\N{COMBINING GRAVE ACCENT}" ? + "ok 792\n" : "not ok 792 # $1\n"; +} + +{ + print "#\\C and \\X\n"; + + print "!abc!" =~ /a\Cc/ ? "ok 793\n" : "not ok 793\n"; + print "!abc!" =~ /a\Xc/ ? "ok 794\n" : "not ok 794\n"; +} + +{ + print "# FINAL SIGMA\n"; + + my $SIGMA = "\x{03A3}"; # CAPITAL + my $Sigma = "\x{03C2}"; # SMALL FINAL + my $sigma = "\x{03C3}"; # SMALL + + print $SIGMA =~ /$SIGMA/i ? "ok 795\n" : "not ok 795\n"; + print $SIGMA =~ /$Sigma/i ? "ok 796\n" : "not ok 796\n"; + print $SIGMA =~ /$sigma/i ? "ok 797\n" : "not ok 797\n"; + + print $Sigma =~ /$SIGMA/i ? "ok 798\n" : "not ok 798\n"; + print $Sigma =~ /$Sigma/i ? "ok 799\n" : "not ok 799\n"; + print $Sigma =~ /$sigma/i ? "ok 800\n" : "not ok 800\n"; + + print $sigma =~ /$SIGMA/i ? "ok 801\n" : "not ok 801\n"; + print $sigma =~ /$Sigma/i ? "ok 802\n" : "not ok 802\n"; + print $sigma =~ /$sigma/i ? "ok 803\n" : "not ok 803\n"; + + print $SIGMA =~ /[$SIGMA]/i ? "ok 804\n" : "not ok 804\n"; + print $SIGMA =~ /[$Sigma]/i ? "ok 805\n" : "not ok 805\n"; + print $SIGMA =~ /[$sigma]/i ? "ok 806\n" : "not ok 806\n"; + + print $Sigma =~ /[$SIGMA]/i ? "ok 807\n" : "not ok 807\n"; + print $Sigma =~ /[$Sigma]/i ? "ok 808\n" : "not ok 808\n"; + print $Sigma =~ /[$sigma]/i ? "ok 809\n" : "not ok 809\n"; + + print $sigma =~ /[$SIGMA]/i ? "ok 810\n" : "not ok 810\n"; + print $sigma =~ /[$Sigma]/i ? "ok 811\n" : "not ok 811\n"; + print $sigma =~ /[$sigma]/i ? "ok 812\n" : "not ok 812\n"; +} + +{ + print "# parlez-vous?\n"; + + use charnames ':full'; + + print "fran\N{LATIN SMALL LETTER C}ais" =~ + /fran.ais/ && + $& eq "francais" ? + "ok 813\n" : "not ok 813\n"; + + print "fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" =~ + /fran.ais/ && + $& eq "fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" ? + "ok 814\n" : "not ok 814\n"; + + print "fran\N{LATIN SMALL LETTER C}ais" =~ + /fran\Cais/ && + $& eq "francais" ? + "ok 815\n" : "not ok 815\n"; + + print "franc\N{COMBINING CEDILLA}ais" =~ + /franc\C\Cais/ ? # COMBINING CEDILLA is two bytes when encoded + "ok 816\n" : "not ok 816\n"; + + print "fran\N{LATIN SMALL LETTER C}ais" =~ + /fran\Xais/ && + $& eq "francais" ? + "ok 817\n" : "not ok 817\n"; + + print "fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" =~ + /fran\Xais/ && + $& eq "fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" ? + "ok 818\n" : "not ok 818\n"; + + print "franc\N{COMBINING CEDILLA}ais" =~ + /fran\Xais/ && + $& eq "franc\N{COMBINING CEDILLA}ais" ? + "ok 819\n" : "not ok 819\n"; + + print "fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" =~ + /fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais/ && + $& eq "fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" ? + "ok 820\n" : "not ok 820\n"; + + print "franc\N{COMBINING CEDILLA}ais" =~ + /franc\N{COMBINING CEDILLA}ais/ && + $& eq "franc\N{COMBINING CEDILLA}ais" ? + "ok 821\n" : "not ok 821\n"; + + print "fran\N{LATIN SMALL LETTER C}ais" =~ + /fran(?:c\N{COMBINING CEDILLA}?|\N{LATIN SMALL LETTER C WITH CEDILLA})ais/ && + $& eq "francais" ? + "ok 822\n" : "not ok 822\n"; + + print "fran\N{LATIN SMALL LETTER C}ais" =~ + /fran(?:c\N{COMBINING CEDILLA}?|\N{LATIN SMALL LETTER C WITH CEDILLA})ais/ && + $& eq "francais" ? + "ok 823\n" : "not ok 823\n"; + + print "fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" =~ + /fran(?:c\N{COMBINING CEDILLA}?|\N{LATIN SMALL LETTER C WITH CEDILLA})ais/ && + $& eq "fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" ? + "ok 824\n" : "not ok 824\n"; + + print "franc\N{COMBINING CEDILLA}ais" =~ + /fran(?:c\N{COMBINING CEDILLA}?|\N{LATIN SMALL LETTER C WITH CEDILLA})ais/ && + $& eq "franc\N{COMBINING CEDILLA}ais" ? + "ok 825\n" : "not ok 825\n"; +} + +{ + print "# Does lingering (and useless) UTF8 flag mess up /i matching?\n"; + + { + my $regex = "ABcde"; + my $string = "abcDE\x{100}"; + chop($string); + if ($string =~ m/$regex/i) { + print "ok 826\n"; + } else { + print "not ok 826\n"; + } + } + + { + my $regex = "ABcde\x{100}"; + my $string = "abcDE"; + chop($regex); + if ($string =~ m/$regex/i) { + print "ok 827\n"; + } else { + print "not ok 827\n"; + } + } + + { + my $regex = "ABcde\x{100}"; + my $string = "abcDE\x{100}"; + chop($regex); + chop($string); + if ($string =~ m/$regex/i) { + print "ok 828\n"; + } else { + print "not ok 828\n"; + } + } +} + +{ + print "# more SIGMAs\n"; + + my $SIGMA = "\x{03A3}"; # CAPITAL + my $Sigma = "\x{03C2}"; # SMALL FINAL + my $sigma = "\x{03C3}"; # SMALL + + my $S3 = "$SIGMA$Sigma$sigma"; + + print ":$S3:" =~ /:(($SIGMA)+):/i && $1 eq $S3 && $2 eq $sigma ? + "ok 829\n" : "not ok 829\n"; + print ":$S3:" =~ /:(($Sigma)+):/i && $1 eq $S3 && $2 eq $sigma ? + "ok 830\n" : "not ok 830\n"; + print ":$S3:" =~ /:(($sigma)+):/i && $1 eq $S3 && $2 eq $sigma ? + "ok 831\n" : "not ok 831\n"; + + print ":$S3:" =~ /:(([$SIGMA])+):/i && $1 eq $S3 && $2 eq $sigma ? + "ok 832\n" : "not ok 832\n"; + print ":$S3:" =~ /:(([$Sigma])+):/i && $1 eq $S3 && $2 eq $sigma ? + "ok 833\n" : "not ok 833\n"; + print ":$S3:" =~ /:(([$sigma])+):/i && $1 eq $S3 && $2 eq $sigma ? + "ok 834\n" : "not ok 834\n"; +}