From: Hugo van der Sanden Date: Mon, 1 Jul 2002 13:28:04 +0000 (+0100) Subject: Re: [ID 20020630.002] utf8 regex only matches 32k X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=faf11cac614129491d0258772ee4e6f8a3fb39e8;p=p5sagit%2Fp5-mst-13.2.git Re: [ID 20020630.002] utf8 regex only matches 32k Message-Id: <200207011228.g61CS4T06766@crypt.compulink.co.uk> Date: Mon, 01 Jul 2002 13:28:04 +0100 p4raw-id: //depot/perl@17390 --- diff --git a/regexec.c b/regexec.c index 793201c..bdc05af 100644 --- a/regexec.c +++ b/regexec.c @@ -3990,7 +3990,9 @@ S_regrepeat(pTHX_ regnode *p, I32 max) register bool do_utf8 = PL_reg_match_utf8; scan = PL_reginput; - if (max != REG_INFTY && max < loceol - scan) + if (max == REG_INFTY) + max = I32_MAX; + else if (max < loceol - scan) loceol = scan + max; switch (OP(p)) { case REG_ANY: diff --git a/t/op/pat.t b/t/op/pat.t index 5681d6a..767c069 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -6,7 +6,7 @@ $| = 1; -print "1..910\n"; +print "1..922\n"; BEGIN { chdir 't' if -d 't'; @@ -2884,3 +2884,21 @@ EOF print "d" =~ /\p{InConsonant}/ ? "ok $test\n" : "not ok $test\n"; $test++; print "e" =~ /\P{InConsonant}/ ? "ok $test\n" : "not ok $test\n"; $test++; +{ + print "# [ID 20020630.002] utf8 regex only matches 32k\n"; + $test = 911; + for ([ 'byte', "\x{ff}" ], [ 'utf8', "\x{1ff}" ]) { + my($type, $char) = @$_; + for my $len (32000, 32768, 33000) { + my $s = $char . "f" x $len; + my $r = $s =~ /$char([f]*)/gc; + print $r ? "ok $test\n" : "not ok $test\t# <$type x $len> fail\n"; + ++$test; + print +(!$r or pos($s) == $len + 1) ? "ok $test\n" + : "not ok $test\t# <$type x $len> pos @{[ pos($s) ]}\n"; + ++$test; + } + } +} + +$test = 923;