SANY (. with /s) wasn't happy on Unicode.

diff --git a/regexec.c b/regexec.c

index 4602b05..06f0d8e 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -3877,7 +3877,15 @@ S_regrepeat(pTHX_ regnode *p, I32 max)
        }
        break;
     case SANY:
-       scan = loceol;
+        if (do_utf8) {
+           loceol = PL_regeol;
+           while (scan < loceol) {
+               scan += UTF8SKIP(scan);
+               hardcount++;
+           }
+       }
+       else
+           scan = loceol;
        break;
     case CANY:
        scan = loceol;
diff --git a/t/op/pat.t b/t/op/pat.t

index a504186..6f33f15 100755 (executable)
--- a/t/op/pat.t
+++ b/t/op/pat.t
@@ -6,7 +6,7 @@
 
 $| = 1;
 
-print "1..843\n";
+print "1..845\n";
 
 BEGIN {
     chdir 't' if -d 't';
@@ -2620,3 +2620,21 @@ print "# some Unicode properties\n";
     print "<\x{2029}>" =~ /<\s>/ ? "ok 843\n" : "not ok 843\n";
 }
 
+{
+    print "# . with /s should work on characters, not bytes\n";
+
+    my $s = "\x{e4}\x{100}";
+
+    # This is not expected to match: the point is that
+    # neither should we get "Malformed UTF-8" warnings.
+    print $s =~ /\G(.+?)\n/gcs ?
+       "not ok 844\n" : "ok 844\n";
+
+    my @c;
+
+    while ($s =~ /\G(.)/gs) {
+       push @c, $1;
+    }
+
+    print join("", @c) eq $s ? "ok 845\n" : "not ok 845\n";
+}
regexec.c		patch \| blob \| blame \| history
t/op/pat.t		patch \| blob \| blame \| history