[FIX] Re: UTF-8 failures (surprise!)
Adrian M. Enache [Fri, 31 Jan 2003 09:20:59 +0000 (11:20 +0200)]
Message-ID: <20030131072059.GB6045@ratsnest.hole>

p4raw-id: //depot/perl@18608

regexec.c
t/op/pat.t

index a6fd6ae..ee53a47 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -97,6 +97,7 @@
 #endif
 
 #define REGINCLASS(p,c)  (ANYOF_FLAGS(p) ? reginclass(p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
+#define REGINCLASS_utf8(p,c)  (ANYOF_FLAGS(p) ? reginclass(p,c,0,1) : ANYOF_BITMAP_TEST(p,*(c)))
 
 /*
  * Forwards.
@@ -4078,25 +4079,8 @@ S_regrepeat(pTHX_ regnode *p, I32 max)
        if (do_utf8) {
            loceol = PL_regeol;
            while (hardcount < max && scan < loceol) {
-                bool cont = FALSE;
-                if (ANYOF_FLAGS(p) & ANYOF_UNICODE) {
-                     if (reginclass(p, (U8*)scan, 0, do_utf8))
-                          cont = TRUE;
-                }
-                else {
-                     U8 c = (U8)scan[0];
-
-                     if (UTF8_IS_INVARIANT(c)) {
-                          if (ANYOF_BITMAP_TEST(p, c))
-                               cont = TRUE;
-                     }
-                     else {
-                          if (reginclass(p, (U8*)scan, 0, do_utf8))
-                               cont = TRUE;
-                     }
-               }
-               if (!cont)
-                    break;
+               if (!REGINCLASS_utf8(p, (U8*)scan))
+                       break;
                scan += UTF8SKIP(scan);
                hardcount++;
            }
index 1621276..7a324db 100755 (executable)
@@ -6,7 +6,7 @@
 
 $| = 1;
 
-print "1..968\n";
+print "1..972\n";
 
 BEGIN {
     chdir 't' if -d 't';
@@ -3054,5 +3054,11 @@ print "\x{072F}" =~ /\P{Syriac1}/ ? "ok $test\n" : "not ok $test\n"; $test++;
     ok($a !~ /^\C{4}y/,     q{don't match \C{4}y});
 }
 
-# last test 968
+$_ = 'aaaaaaaaaa';
+utf8::upgrade($_); chop $_; $\="\n";
+ok(/[^\s]+/, "m/[^\s]/ utf8");
+ok(/[^\d]+/, "m/[^\d]/ utf8");
+ok(($a = $_, $_ =~ s/[^\s]+/./g), "s/[^\s]/ utf8");
+ok(($a = $_, $a =~ s/[^\d]+/./g), "s/[^\s]/ utf8");
 
+# last test 972