From: Adrian M. Enache Date: Fri, 31 Jan 2003 09:20:59 +0000 (+0200) Subject: [FIX] Re: UTF-8 failures (surprise!) X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=5dec093fd414e8adffdf65a1d75b0b0d7a2938c3;p=p5sagit%2Fp5-mst-13.2.git [FIX] Re: UTF-8 failures (surprise!) Message-ID: <20030131072059.GB6045@ratsnest.hole> p4raw-id: //depot/perl@18608 --- diff --git a/regexec.c b/regexec.c index a6fd6ae..ee53a47 100644 --- a/regexec.c +++ b/regexec.c @@ -97,6 +97,7 @@ #endif #define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c))) +#define REGINCLASS_utf8(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c,0,1) : ANYOF_BITMAP_TEST(p,*(c))) /* * Forwards. @@ -4078,25 +4079,8 @@ S_regrepeat(pTHX_ regnode *p, I32 max) if (do_utf8) { loceol = PL_regeol; while (hardcount < max && scan < loceol) { - bool cont = FALSE; - if (ANYOF_FLAGS(p) & ANYOF_UNICODE) { - if (reginclass(p, (U8*)scan, 0, do_utf8)) - cont = TRUE; - } - else { - U8 c = (U8)scan[0]; - - if (UTF8_IS_INVARIANT(c)) { - if (ANYOF_BITMAP_TEST(p, c)) - cont = TRUE; - } - else { - if (reginclass(p, (U8*)scan, 0, do_utf8)) - cont = TRUE; - } - } - if (!cont) - break; + if (!REGINCLASS_utf8(p, (U8*)scan)) + break; scan += UTF8SKIP(scan); hardcount++; } diff --git a/t/op/pat.t b/t/op/pat.t index 1621276..7a324db 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -6,7 +6,7 @@ $| = 1; -print "1..968\n"; +print "1..972\n"; BEGIN { chdir 't' if -d 't'; @@ -3054,5 +3054,11 @@ print "\x{072F}" =~ /\P{Syriac1}/ ? "ok $test\n" : "not ok $test\n"; $test++; ok($a !~ /^\C{4}y/, q{don't match \C{4}y}); } -# last test 968 +$_ = 'aaaaaaaaaa'; +utf8::upgrade($_); chop $_; $\="\n"; +ok(/[^\s]+/, "m/[^\s]/ utf8"); +ok(/[^\d]+/, "m/[^\d]/ utf8"); +ok(($a = $_, $_ =~ s/[^\s]+/./g), "s/[^\s]/ utf8"); +ok(($a = $_, $a =~ s/[^\d]+/./g), "s/[^\s]/ utf8"); +# last test 972