From: Hugo van der Sanden <hv@crypt.org>
Date: Tue, 21 Jan 2003 00:44:20 +0000 (+0000)
Subject: integrate #18349 from maint-5.8:
X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=388cc4de5f48b02cc9fe9b962f02cf603af02178;p=p5sagit%2Fp5-mst-13.2.git

integrate #18349 from maint-5.8:
At least partially address [perl #10000] by speeding
up both the ASCII case (by about 2-3%) and the UTF-8 case
(by about 45%).  The major trick is to avoid hitting the
costly S_reginclass().  (Even before this patch the speedup
since 5.8.0 was about 40-50%.)  After this the UTF-8 case is
still about 30-60% slower than the ASCII case.  (Note that
I'm unable to reproduce the 10-fold speed difference of the
original bug report; I can see a factor of 2 or 3, but no more.)

p4raw-id: //depot/perl@18529
p4raw-integrated: from //depot/maint-5.8/perl@18528 'merge in'
	regexec.c (@18347..)
---

diff --git a/regexec.c b/regexec.c
index 4cf8069..f91af17 100644
--- a/regexec.c
+++ b/regexec.c
@@ -959,25 +959,40 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
 	/* We know what class it must start with. */
 	switch (OP(c)) {
 	case ANYOF:
-	    while (s < strend) {
-		STRLEN skip = do_utf8 ? UTF8SKIP(s) : 1;
-		  
-		if (do_utf8 ?
-		    reginclass(c, (U8*)s, 0, do_utf8) :
-		    REGINCLASS(c, (U8*)s) ||
-		    (ANYOF_FOLD_SHARP_S(c, s, strend) &&
-		     /* The assignment of 2 is intentional:
-		      * for the sharp s, the skip is 2. */
-		     (skip = SHARP_S_SKIP)
-		     )) {
-		    if (tmp && (norun || regtry(prog, s)))
-			goto got_it;
-		    else
-			tmp = doevery;
-		}
-		else 
-		    tmp = 1;
-		s += skip;
+	    if (do_utf8) {
+		 while (s < strend) {
+		      if ((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
+			  !UTF8_IS_INVARIANT((U8)s[0]) ?
+			  reginclass(c, (U8*)s, 0, do_utf8) :
+			  REGINCLASS(c, (U8*)s)) {
+			   if (tmp && (norun || regtry(prog, s)))
+				goto got_it;
+			   else
+				tmp = doevery;
+		      }
+		      else 
+			   tmp = 1;
+		      s += UTF8SKIP(s);
+		 }
+	    }
+	    else {
+		 while (s < strend) {
+		      STRLEN skip = 1;
+
+		      if (REGINCLASS(c, (U8*)s) ||
+			  (ANYOF_FOLD_SHARP_S(c, s, strend) &&
+			   /* The assignment of 2 is intentional:
+			    * for the folded sharp s, the skip is 2. */
+			   (skip = SHARP_S_SKIP))) {
+			   if (tmp && (norun || regtry(prog, s)))
+				goto got_it;
+			   else
+				tmp = doevery;
+		      }
+		      else 
+			   tmp = 1;
+		      s += skip;
+		 }
 	    }
 	    break;
 	case CANY:
@@ -4053,8 +4068,26 @@ S_regrepeat(pTHX_ regnode *p, I32 max)
     case ANYOF:
 	if (do_utf8) {
 	    loceol = PL_regeol;
-	    while (hardcount < max && scan < loceol &&
-		   reginclass(p, (U8*)scan, 0, do_utf8)) {
+	    while (hardcount < max && scan < loceol) {
+		 bool cont = FALSE;
+		 if (ANYOF_FLAGS(p) & ANYOF_UNICODE) {
+		      if (reginclass(p, (U8*)scan, 0, do_utf8))
+			   cont = TRUE;
+		 }
+		 else {
+		      U8 c = (U8)scan[0];
+
+		      if (UTF8_IS_INVARIANT(c)) {
+			   if (ANYOF_BITMAP_TEST(p, c))
+				cont = TRUE;
+		      }
+		      else {
+			   if (reginclass(p, (U8*)scan, 0, do_utf8))
+				cont = TRUE;
+		      }
+		}
+		if (!cont)
+		     break;
 		scan += UTF8SKIP(scan);
 		hardcount++;
 	    }