}
-
+print "# set PERL_SKIP_PSYCHO_TEST to skip this test\n";
if (!$ENV{PERL_SKIP_PSYCHO_TEST}){
my @normal=qw(these are some normal words);
my $psycho=join "|",@normal,map chr $_,255..20000;
ok($utf8 =~ /(abc|\xe9)/i, "utf8/latin trie");
ok($utf8 =~ /(abc|$latin1)/i, "utf8/latin trie runtime");
- ok("\xe9" =~ /$utf8/i, "# TODO latin/utf8");
+ ok("\xe9" =~ /$utf8/i, "# latin/utf8");
ok("\xe9" =~ /(abc|$utf8)/i, "# latin/utf8 trie");
- ok($latin1 =~ /$utf8/i, "# TODO latin/utf8 runtime");
+ ok($latin1 =~ /$utf8/i, "# latin/utf8 runtime");
ok($latin1 =~ /(abc|$utf8)/i, "# latin/utf8 trie runtime");
}
$s=~s/(?'digits'\d+)\k'digits'/$+{digits}/;
ok($s eq '123456','Named capture (single quotes) s///');
}
+
+{
+ my @ary = (
+ pack('U', 0x00F1), # n-tilde
+ '_'.pack('U', 0x00F1), # _ + n-tilde
+ 'c'.pack('U', 0x0327), # c + cedilla
+ pack('U*', 0x00F1, 0x0327), # n-tilde + cedilla
+ 'a'.pack('U', 0x00B2), # a + superscript two
+ pack('U', 0x0391), # ALPHA
+ pack('U', 0x0391).'2', # ALPHA + 2
+ pack('U', 0x0391).'_', # ALPHA + _
+ );
+ for my $uni (@ary) {
+ my ($r1, $c1, $r2, $c2) = eval qq{
+ use utf8;
+ scalar("..foo foo.." =~ /(?'${uni}'foo) \\k'${uni}'/),
+ \$+{${uni}},
+ scalar("..bar bar.." =~ /(?<${uni}>bar) \\k<${uni}>/),
+ \$+{${uni}};
+ };
+ ok($r1, "Named capture UTF (?'')");
+ ok(defined $c1 && $c1 eq 'foo', "Named capture UTF \%+");
+ ok($r2, "Named capture UTF (?<>)");
+ ok(defined $c2 && $c2 eq 'bar', "Named capture UTF \%+");
+ }
+}
+
sub iseq($$;$) {
my ( $got, $expect, $name)=@_;
';
ok(!$@,'lvalue $+{...} should not throw an exception');
}
-
+{
+ my $s='foo bar baz';
+ my @res;
+ if ('1234'=~/(?<A>1)(?<B>2)(?<A>3)(?<B>4)/) {
+ foreach my $name (sort keys(%-)) {
+ my $ary = $-{$name};
+ foreach my $idx (0..$#$ary) {
+ push @res,"$name:$idx:$ary->[$idx]";
+ }
+ }
+ }
+ my @expect=qw(A:0:1 A:1:3 B:0:2 B:1:4);
+ iseq("@res","@expect","Check %-");
+ eval'
+ print for $-{this_key_doesnt_exist};
+ ';
+ ok(!$@,'lvalue $-{...} should not throw an exception');
+}
# stress test CURLYX/WHILEM.
#
# This test includes varying levels of nesting, and according to
if ($ENV{PERL_SKIP_PSYCHO_TEST}){
printf "ok %d Skip: No psycho tests\n", $test++;
} else {
+ print "# set PERL_SKIP_PSYCHO_TEST to skip this test\n";
my $r = qr/^
(?:
( (?:a|z+)+ )
iseq($^R,'last regexp code result');
}
iseq($^R,'Nothing');
+
+ {
+ local $^R = "Bad";
+ ok('x foofoo y' =~ m{
+ (foo|bar)\1 # this time without the +
+ (?{"last regexp code result"})
+ }x);
+ iseq($^R,'last regexp code result');
+ }
+ iseq($^R,'Nothing');
}
{
local $Message="RT#22395";
ok(!$REGMARK);
iseq($REGERROR,'foo');
}
+{
+ my $x;
+ $x = "abc.def.ghi.jkl";
+ $x =~ s/.*\K\..*//;
+ ok($x eq "abc.def.ghi");
+
+ $x = "one two three four";
+ $x =~ s/o+ \Kthree//g;
+ ok($x eq "one two four");
+
+ $x = "abcde";
+ $x =~ s/(.)\K/$1/g;
+ ok($x eq "aabbccddee");
+}
+sub kt
+{
+ return '4' if $_[0] eq '09028623';
+}
+
+{ # Nested EVAL using PL_curpm (via $1 or friends)
+ my $re;
+ our $grabit = qr/ ([0-6][0-9]{7}) (??{ kt $1 }) [890] /x;
+ $re = qr/^ ( (??{ $grabit }) ) $ /x;
+ my @res = '0902862349' =~ $re;
+ iseq(join("-",@res),"0902862349",
+ 'PL_curpm is set properly on nested eval');
+
+ our $qr = qr/ (o) (??{ $1 }) /x;
+ ok( 'boob'=~/( b (??{ $qr }) b )/x && 1,
+ "PL_curpm, nested eval");
+}
+
+{
+ use charnames ":full";
+ ok("\N{ROMAN NUMERAL ONE}" =~ /\p{Alphabetic}/, "I =~ Alphabetic");
+ ok("\N{ROMAN NUMERAL ONE}" =~ /\p{Uppercase}/, "I =~ Uppercase");
+ ok("\N{ROMAN NUMERAL ONE}" !~ /\p{Lowercase}/, "I !~ Lowercase");
+ ok("\N{ROMAN NUMERAL ONE}" =~ /\p{IDStart}/, "I =~ ID_Start");
+ ok("\N{ROMAN NUMERAL ONE}" =~ /\p{IDContinue}/, "I =~ ID_Continue");
+ ok("\N{SMALL ROMAN NUMERAL ONE}" =~ /\p{Alphabetic}/, "i =~ Alphabetic");
+ ok("\N{SMALL ROMAN NUMERAL ONE}" !~ /\p{Uppercase}/, "i !~ Uppercase");
+ ok("\N{SMALL ROMAN NUMERAL ONE}" =~ /\p{Lowercase}/, "i =~ Lowercase");
+ ok("\N{SMALL ROMAN NUMERAL ONE}" =~ /\p{IDStart}/, "i =~ ID_Start");
+ ok("\N{SMALL ROMAN NUMERAL ONE}" =~ /\p{IDContinue}/, "i =~ ID_Continue");
+}
+
+{
+# requirement of Unicode Technical Standard #18, 1.7 Code Points
+# cf. http://www.unicode.org/reports/tr18/#Supplementary_Characters
+ for my $u (0x7FF, 0x800, 0xFFFF, 0x10000) {
+ no warnings 'utf8'; # oops
+ my $c = chr $u;
+ my $x = sprintf '%04X', $u;
+ ok( "A${c}B" =~ /A[\0-\x{10000}]B/, "unicode range - $x");
+ }
+}
+
+{
+ my $res="";
+
+ if ('1' =~ /(?|(?<digit>1)|(?<digit>2))/) {
+ $res = "@{$- {digit}}";
+ }
+ iseq($res,"1",
+ "Check that (?|...) doesnt cause dupe entries in the names array");
+ #---
+ $res="";
+ if ('11' =~ /(?|(?<digit>1)|(?<digit>2))(?&digit)/) {
+ $res = "@{$- {digit}}";
+ }
+ iseq($res, "1",
+ "Check that (?&..) to a buffer inside a (?|...) goes to the leftmost");
+}
+
+{
+ use warnings;
+ local $Message = "ASCII pattern that really is utf8";
+ my @w;
+ local $SIG{__WARN__}=sub{push @w,"@_"};
+ my $c=qq(\x{DF});
+ ok($c=~/${c}|\x{100}/);
+ ok(@w==0);
+}
+{
+ local $Message = "corruption of match results of qr// across scopes";
+ my $qr=qr/(fo+)(ba+r)/;
+ 'foobar'=~/$qr/;
+ iseq("$1$2","foobar");
+ {
+ 'foooooobaaaaar'=~/$qr/;
+ iseq("$1$2",'foooooobaaaaar');
+ }
+ iseq("$1$2","foobar");
+}
+{
+ local $Message = "HORIZWS";
+ local $_="\t \r\n \n \t".chr(11)."\n";
+ s/\H/H/g;
+ s/\h/h/g;
+ iseq($_,"hhHHhHhhHH");
+ $_="\t \r\n \n \t".chr(11)."\n";
+ utf8::upgrade($_);
+ s/\H/H/g;
+ s/\h/h/g;
+ iseq($_,"hhHHhHhhHH");
+}
+{
+ local $Message = "Various whitespace special patterns";
+ my @h=map { chr( $_ ) } (
+ 0x09, 0x20, 0xa0, 0x1680, 0x180e, 0x2000, 0x2001, 0x2002,
+ 0x2003, 0x2004, 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200a,
+ 0x202f, 0x205f, 0x3000
+ );
+ my @v=map { chr( $_ ) } ( 0x0a, 0x0b, 0x0c, 0x0d, 0x85, 0x2028, 0x2029 );
+ my @lb=( "\x0D\x0A",
+ map { chr( $_ ) } ( 0x0A..0x0D,0x85,0x2028,0x2029 ));
+ foreach my $t ([\@h,qr/\h/,qr/\h+/],[\@v,qr/\v/,qr/\v+/],[\@lb,qr/\R/,qr/\R+/],){
+ my $ary=shift @$t;
+ foreach my $pat (@$t) {
+ foreach my $str (@$ary) {
+ ok($str=~/($pat)/,$pat);
+ iseq($1,$str,$pat);
+ utf8::upgrade($str);
+ ok($str=~/($pat)/,"Upgraded string - $pat");
+ iseq($1,$str,"Upgraded string - $pat");
+ }
+ }
+ }
+}
+{
+ local $Message = "Check that \\xDF match properly in its various forms";
+ # test that \xDF matches properly. this is pretty hacky stuff,
+ # but its actually needed. the malarky with '-' is to prevent
+ # compilation caching from playing any role in the test.
+ my @df= (chr(0xDF),'-',chr(0xDF));
+ utf8::upgrade($df[2]);
+ my @strs= ('ss','sS','Ss','SS',chr(0xDF));
+ my @ss= map { ("$_", "$_") } @strs;
+ utf8::upgrade($ss[$_*2+1]) for 0..$#strs;
+
+ for my $ssi (0..$#ss) {
+ for my $dfi (0..$#df) {
+ my $pat= $df[$dfi];
+ my $str= $ss[$ssi];
+ my $utf_df= ($dfi > 1) ? 'utf8' : '';
+ my $utf_ss= ($ssi % 2) ? 'utf8' : '';
+ (my $sstr=$str)=~s/\xDF/\\xDF/;
+
+ if ($utf_df || $utf_ss || length($ss[$ssi])==1) {
+ my $ret= $str=~/$pat/i;
+ next if $pat eq '-';
+ ok($ret,
+ "\"$sstr\"=~/\\xDF/i (str is @{[$utf_ss||'latin']}, pat is @{[$utf_df||'latin']})");
+ } else {
+ my $ret= $str !~ /$pat/i;
+ next if $pat eq '-';
+ ok($ret,
+ "\"$sstr\"!~/\\xDF/i (str is @{[$utf_ss||'latin']}, pat is @{[$utf_df||'latin']})");
+ }
+ }
+ }
+}
+{
+ local $Message = "BBC(Bleadperl Breaks CPAN) Today: String::Multibyte";
+ my $re = qr/(?:[\x00-\xFF]{4})/;
+ my $hyp = "\0\0\0-";
+ my $esc = "\0\0\0\\";
+
+ my $str = "$esc$hyp$hyp$esc$esc";
+ my @a = ($str =~ /\G(?:\Q$esc$esc\E|\Q$esc$hyp\E|$re)/g);
+
+ iseq(0+@a,3);
+ iseq(join('=', @a),"$esc$hyp=$hyp=$esc$esc");
+}
+# test for keys in %+ and %-
+{
+ my $_ = "abcdef";
+ /(?<foo>a)|(?<foo>b)/;
+ iseq( (join ",", sort keys %+), "foo" );
+ iseq( (join ",", sort keys %-), "foo" );
+ iseq( (join ",", sort values %+), "a" );
+ iseq( (join ",", sort map "@$_", values %-), "a " );
+ /(?<bar>a)(?<bar>b)(?<quux>.)/;
+ iseq( (join ",", sort keys %+), "bar,quux" );
+ iseq( (join ",", sort keys %-), "bar,quux" );
+ iseq( (join ",", sort values %+), "a,c" ); # leftmost
+ iseq( (join ",", sort map "@$_", values %-), "a b,c" );
+ /(?<un>a)(?<deux>c)?/; # second buffer won't capture
+ iseq( (join ",", sort keys %+), "un" );
+ iseq( (join ",", sort keys %-), "deux,un" );
+ iseq( (join ",", sort values %+), "a" );
+ iseq( (join ",", sort map "@$_", values %-), ",a" );
+}
+
+# length() on captures, the numbered ones end up in Perl_magic_len
+{
+ my $_ = "aoeu \xe6var ook";
+ /^ \w+ \s (?<eek>\S+)/x;
+
+ iseq( length($`), 0, 'length $`' );
+ iseq( length($'), 4, q[length $'] );
+ iseq( length($&), 9, 'length $&' );
+ iseq( length($1), 4, 'length $1' );
+ iseq( length($+{eek}), 4, 'length $+{eek} == length $1' );
+}
+
+{
+ my $ok=-1;
+
+ $ok=exists($-{x}) ? 1 : 0
+ if 'bar'=~/(?<x>foo)|bar/;
+ iseq($ok,1,'$-{x} exists after "bar"=~/(?<x>foo)|bar/');
+ iseq(scalar(%+), 0, 'scalar %+ == 0 after "bar"=~/(?<x>foo)|bar/');
+ iseq(scalar(%-), 1, 'scalar %- == 1 after "bar"=~/(?<x>foo)|bar/');
+
+ $ok=-1;
+ $ok=exists($+{x}) ? 1 : 0
+ if 'bar'=~/(?<x>foo)|bar/;
+ iseq($ok,0,'$+{x} not exists after "bar"=~/(?<x>foo)|bar/');
+ iseq(scalar(%+), 0, 'scalar %+ == 0 after "bar"=~/(?<x>foo)|bar/');
+ iseq(scalar(%-), 1, 'scalar %- == 1 after "bar"=~/(?<x>foo)|bar/');
+
+ $ok=-1;
+ $ok=exists($-{x}) ? 1 : 0
+ if 'foo'=~/(?<x>foo)|bar/;
+ iseq($ok,1,'$-{x} exists after "foo"=~/(?<x>foo)|bar/');
+ iseq(scalar(%+), 1, 'scalar %+ == 1 after "foo"=~/(?<x>foo)|bar/');
+ iseq(scalar(%-), 1, 'scalar %- == 1 after "foo"=~/(?<x>foo)|bar/');
+
+ $ok=-1;
+ $ok=exists($+{x}) ? 1 : 0
+ if 'foo'=~/(?<x>foo)|bar/;
+ iseq($ok,1,'$+{x} exists after "foo"=~/(?<x>foo)|bar/');
+}
+{
+ local $_;
+ ($_ = 'abc')=~/(abc)/g;
+ $_ = '123';
+ iseq("$1",'abc',"/g leads to unsafe match vars: $1");
+}
+{
+ local $Message="Message-ID: <20070818091501.7eff4831@r2d2>";
+ my $str= "";
+ for(0..5){
+ my @x;
+ $str .= "@x"; # this should ALWAYS be the empty string
+ 'a'=~/(a|)/;
+ push @x,1;
+ }
+ iseq(length($str),"0","Trie scope error, string should be empty");
+ $str="";
+ my @foo = ('a')x5;
+ for (@foo) {
+ my @bar;
+ $str .= "@bar";
+ s/a|/push @bar, 1/e;
+ }
+ iseq(length($str),"0","Trie scope error, string should be empty");
+}
+{
+# [perl #45605] Regexp failure with utf8-flagged and byte-flagged string
+
+ my $utf_8 = "\xd6schel";
+ utf8::upgrade($utf_8);
+ $utf_8 =~ m{(\xd6|Ö)schel};
+ iseq($1,"\xd6","#45605");
+}
+
+{
+ # Regardless of utf8ness any character matches itself when
+ # doing a case insensitive match. See also [perl #36207]
+ for my $o (0..255) {
+ my @ch=(chr($o),chr($o));
+ utf8::upgrade($ch[1]);
+ for my $u_str (0,1) {
+ for my $u_pat (0,1) {
+ ok( $ch[$u_str]=~/\Q$ch[$u_pat]\E/i,
+ "\$c=~/\$c/i : chr($o) : u_str=$u_str u_pat=$u_pat");
+ ok( $ch[$u_str]=~/\Q$ch[$u_pat]\E|xyz/i,
+ "# \$c=~/\$c|xyz/i : chr($o) : u_str=$u_str u_pat=$u_pat");
+ }
+ }
+ }
+}
+
# Test counter is at bottom of file. Put new tests above here.
#-------------------------------------------------------------------
# Keep the following tests last -- they may crash perl
"Regexp /^(??{'(.)'x 100})/ crashes older perls")
or print "# Unexpected outcome: should pass or crash perl\n";
+eval '/\k/';
+ok($@=~/\QSequence \k... not terminated in regex;\E/);
+
{
local $Message = "substitution with lookahead (possible segv)";
$_="ns1ns1ns1";
iseq($_,"!Bang!1!Bang!2!Bang!3!Bang!");
}
+# [perl #45337] utf8 + "[a]a{2}" + /$.../ = panic: sv_len_utf8 cache
+
+{
+ local ${^UTF8CACHE} = -1;
+ my $s="[a]a{2}";
+ utf8::upgrade $s;
+ ok("aaa" =~ /$s/, "#45337");
+}
+
# Put new tests above the dotted line about a page above this comment
iseq(0+$::test,$::TestCount,"Got the right number of tests!");
# Don't forget to update this!
BEGIN {
- $::TestCount = 1573;
+ $::TestCount = 4013;
print "1..$::TestCount\n";
}
-