From: Jarkko Hietaniemi Date: Wed, 9 Jan 2002 01:32:05 +0000 (+0000) Subject: SANY (. with /s) wasn't happy on Unicode. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=def8e4ea8caaa1180102a74b7613845e72bcb26c;p=p5sagit%2Fp5-mst-13.2.git SANY (. with /s) wasn't happy on Unicode. p4raw-id: //depot/perl@14143 --- diff --git a/regexec.c b/regexec.c index 4602b05..06f0d8e 100644 --- a/regexec.c +++ b/regexec.c @@ -3877,7 +3877,15 @@ S_regrepeat(pTHX_ regnode *p, I32 max) } break; case SANY: - scan = loceol; + if (do_utf8) { + loceol = PL_regeol; + while (scan < loceol) { + scan += UTF8SKIP(scan); + hardcount++; + } + } + else + scan = loceol; break; case CANY: scan = loceol; diff --git a/t/op/pat.t b/t/op/pat.t index a504186..6f33f15 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -6,7 +6,7 @@ $| = 1; -print "1..843\n"; +print "1..845\n"; BEGIN { chdir 't' if -d 't'; @@ -2620,3 +2620,21 @@ print "# some Unicode properties\n"; print "<\x{2029}>" =~ /<\s>/ ? "ok 843\n" : "not ok 843\n"; } +{ + print "# . with /s should work on characters, not bytes\n"; + + my $s = "\x{e4}\x{100}"; + + # This is not expected to match: the point is that + # neither should we get "Malformed UTF-8" warnings. + print $s =~ /\G(.+?)\n/gcs ? + "not ok 844\n" : "ok 844\n"; + + my @c; + + while ($s =~ /\G(.)/gs) { + push @c, $1; + } + + print join("", @c) eq $s ? "ok 845\n" : "not ok 845\n"; +}