From: Hugo van der Sanden Date: Tue, 21 Jan 2003 00:44:20 +0000 (+0000) Subject: integrate #18349 from maint-5.8: X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=388cc4de5f48b02cc9fe9b962f02cf603af02178;p=p5sagit%2Fp5-mst-13.2.git integrate #18349 from maint-5.8: At least partially address [perl #10000] by speeding up both the ASCII case (by about 2-3%) and the UTF-8 case (by about 45%). The major trick is to avoid hitting the costly S_reginclass(). (Even before this patch the speedup since 5.8.0 was about 40-50%.) After this the UTF-8 case is still about 30-60% slower than the ASCII case. (Note that I'm unable to reproduce the 10-fold speed difference of the original bug report; I can see a factor of 2 or 3, but no more.) p4raw-id: //depot/perl@18529 p4raw-integrated: from //depot/maint-5.8/perl@18528 'merge in' regexec.c (@18347..) --- diff --git a/regexec.c b/regexec.c index 4cf8069..f91af17 100644 --- a/regexec.c +++ b/regexec.c @@ -959,25 +959,40 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta /* We know what class it must start with. */ switch (OP(c)) { case ANYOF: - while (s < strend) { - STRLEN skip = do_utf8 ? UTF8SKIP(s) : 1; - - if (do_utf8 ? - reginclass(c, (U8*)s, 0, do_utf8) : - REGINCLASS(c, (U8*)s) || - (ANYOF_FOLD_SHARP_S(c, s, strend) && - /* The assignment of 2 is intentional: - * for the sharp s, the skip is 2. */ - (skip = SHARP_S_SKIP) - )) { - if (tmp && (norun || regtry(prog, s))) - goto got_it; - else - tmp = doevery; - } - else - tmp = 1; - s += skip; + if (do_utf8) { + while (s < strend) { + if ((ANYOF_FLAGS(c) & ANYOF_UNICODE) || + !UTF8_IS_INVARIANT((U8)s[0]) ? + reginclass(c, (U8*)s, 0, do_utf8) : + REGINCLASS(c, (U8*)s)) { + if (tmp && (norun || regtry(prog, s))) + goto got_it; + else + tmp = doevery; + } + else + tmp = 1; + s += UTF8SKIP(s); + } + } + else { + while (s < strend) { + STRLEN skip = 1; + + if (REGINCLASS(c, (U8*)s) || + (ANYOF_FOLD_SHARP_S(c, s, strend) && + /* The assignment of 2 is intentional: + * for the folded sharp s, the skip is 2. */ + (skip = SHARP_S_SKIP))) { + if (tmp && (norun || regtry(prog, s))) + goto got_it; + else + tmp = doevery; + } + else + tmp = 1; + s += skip; + } } break; case CANY: @@ -4053,8 +4068,26 @@ S_regrepeat(pTHX_ regnode *p, I32 max) case ANYOF: if (do_utf8) { loceol = PL_regeol; - while (hardcount < max && scan < loceol && - reginclass(p, (U8*)scan, 0, do_utf8)) { + while (hardcount < max && scan < loceol) { + bool cont = FALSE; + if (ANYOF_FLAGS(p) & ANYOF_UNICODE) { + if (reginclass(p, (U8*)scan, 0, do_utf8)) + cont = TRUE; + } + else { + U8 c = (U8)scan[0]; + + if (UTF8_IS_INVARIANT(c)) { + if (ANYOF_BITMAP_TEST(p, c)) + cont = TRUE; + } + else { + if (reginclass(p, (U8*)scan, 0, do_utf8)) + cont = TRUE; + } + } + if (!cont) + break; scan += UTF8SKIP(scan); hardcount++; }