From: Jarkko Hietaniemi <jhi@iki.fi>
Date: Tue, 1 Jan 2002 17:29:05 +0000 (+0000)
Subject: Better support for multicharacter foldings.
X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=c7254714b14d9ee5485ea0c63c897c9eb3c2c21e;p=p5sagit%2Fp5-mst-13.2.git

Better support for multicharacter foldings.
Now all but two of the CaseFold.txt cases work-- but
only when the target string is single-character, more
debugging needed.

p4raw-id: //depot/perl@14001
---

diff --git a/regexec.c b/regexec.c
index 0f738d1..4db4729 100644
--- a/regexec.c
+++ b/regexec.c
@@ -960,7 +960,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
 	    c1 = *(U8*)m;
 	    c2 = PL_fold_locale[c1];
 	  do_exactf:
-	    e = strend - ln;
+	    e = do_utf8 ? s + ln - 1 : strend - ln;
 
 	    if (norun && e < s)
 		e = s;			/* Due to minlen logic of intuit() */
@@ -2406,31 +2406,37 @@ S_regmatch(pTHX_ regnode *prog)
 
 		if (do_utf8 && UTF) {
 		     /* Both the target and the pattern are utf8. */
-		     while (s < e) {
-			  if (l >= PL_regeol)
-			       sayNO;
-			  if (UTF8SKIP(s) != UTF8SKIP(l) ||
-			      memNE(s, (char*)l, UTF8SKIP(s))) {
-			       U8 lfoldbuf[UTF8_MAXLEN_FOLD+1];
-			       STRLEN lfoldlen;
+		     U8 lfoldbuf[UTF8_MAXLEN_FOLD+1], *lf;
+		     U8 sfoldbuf[UTF8_MAXLEN_FOLD+1], *sf;
+		     STRLEN lfoldlen, sfoldlen;
+		     STRLEN llen = 0;
+		     STRLEN slen = 0;
 
-			       /* Try one of them folded. */
+		     while (s < e) {
+			  /* Fold them and walk them characterwise.  */
 
+			  if (llen == 0) {
 			       to_utf8_fold((U8*)l, lfoldbuf, &lfoldlen);
-			       if (UTF8SKIP(s) != lfoldlen ||
-				   memNE(s, (char*)lfoldbuf, lfoldlen)) {
-				    U8 sfoldbuf[UTF8_MAXLEN_FOLD+1];
-				    STRLEN sfoldlen;
-
-				    /* Try both of them folded. */
-
-				    to_utf8_fold((U8*)s, sfoldbuf, &sfoldlen);
-				    if (sfoldlen != lfoldlen ||
-					memNE((char*)sfoldbuf,
-					      (char*)lfoldbuf, lfoldlen))
-				      sayNO;
-			       }
+			       lf   = lfoldbuf;
+			       llen = lfoldlen;
+			  }
+
+			  if (slen == 0) {
+			       to_utf8_fold((U8*)s, sfoldbuf, &sfoldlen);
+			       sf   = sfoldbuf;
+			       slen = sfoldlen;
+			  }
+
+			  while (llen && slen) {
+			       if (UTF8SKIP(lf) != UTF8SKIP(sf) ||
+				   memNE((char*)lf, (char*)sf, UTF8SKIP(lf)))
+				    sayNO;
+			       llen -= UTF8SKIP(lf);
+			       lf   += UTF8SKIP(lf);
+			       slen -= UTF8SKIP(sf);
+			       sf   += UTF8SKIP(sf);
 			  }
+			  
 			  l += UTF8SKIP(l);
 			  s += UTF8SKIP(s);
 		     }