From: Jarkko Hietaniemi <jhi@iki.fi>
Date: Tue, 1 Jan 2002 03:35:01 +0000 (+0000)
Subject: Unnecessary/Lingering UTF8 flag might mess up caseless matching
X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=ffce6cc29631cbe46068af7601780981e4969f2a;p=p5sagit%2Fp5-mst-13.2.git

Unnecessary/Lingering UTF8 flag might mess up caseless matching
(found by Jeffrey Friedl)

p4raw-id: //depot/perl@13992
---

diff --git a/regexec.c b/regexec.c
index 3aed549..0f738d1 100644
--- a/regexec.c
+++ b/regexec.c
@@ -995,7 +995,8 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
 		        c = utf8_to_uvchr((U8*)s, &len);
 			if ( c == c1
 			     && (ln == len ||
-				 !ibcmp_utf8(s, do_utf8, strend - s,
+				 !ibcmp_utf8(s, do_utf8,
+					     strend - s > ln ? ln : strend - s,
 					     m, UTF, ln))
 			     && (norun || regtry(prog, s)) )
 			    goto got_it;
@@ -1007,7 +1008,8 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
 				  && (f == c1 || f == c2)
 				  && (ln == foldlen ||
 				      !ibcmp_utf8((char *)foldbuf,
-						  do_utf8, foldlen,
+						  do_utf8,
+						  foldlen > ln ? ln : foldlen,
 						  m, UTF, ln))
 				  && (norun || regtry(prog, s)) )
 				  goto got_it;
@@ -1032,7 +1034,8 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
 
 			if ( (c == c1 || c == c2)
 			     && (ln == len ||
-				 !ibcmp_utf8(s, do_utf8, strend - s,
+				 !ibcmp_utf8(s, do_utf8,
+					     strend - s > ln ? ln : strend - s,
 					     m, UTF, ln))
 			     && (norun || regtry(prog, s)) )
 			    goto got_it;
@@ -1044,7 +1047,8 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
 				  && (f == c1 || f == c2)
 				  && (ln == foldlen ||
 				      !ibcmp_utf8((char *)foldbuf,
-						  do_utf8, foldlen,
+						  do_utf8,
+						  foldlen > ln ? ln : foldlen,
 						  m, UTF, ln))
 				  && (norun || regtry(prog, s)) )
 				  goto got_it;
diff --git a/t/op/pat.t b/t/op/pat.t
index 0eda689..b797bdf 100755
--- a/t/op/pat.t
+++ b/t/op/pat.t
@@ -6,7 +6,7 @@
 
 $| = 1;
 
-print "1..825\n";
+print "1..828\n";
 
 BEGIN {
     chdir 't' if -d 't';
@@ -2520,3 +2520,41 @@ print "# some Unicode properties\n";
 	$& eq "franc\N{COMBINING CEDILLA}ais" ?
 	"ok 825\n" : "not ok 825\n";
 }
+
+{
+    print "# Does lingering (and useless) UTF8 flag mess up /i matching?\n";
+
+    {
+	my $regex  = "ABcde";
+	my $string = "abcDE\x{100}";
+	chop($string);
+	if ($string =~ m/$regex/i) {
+	    print "ok 826\n";
+	} else {
+	    print "not ok 826\n";
+	}
+    }
+
+    {
+	my $regex  = "ABcde\x{100}";
+	my $string = "abcDE";
+	chop($regex);
+	if ($string =~ m/$regex/i) {
+	    print "ok 827\n";
+	} else {
+	    print "not ok 827\n";
+	}
+    }
+
+    {
+	my $regex  = "ABcde\x{100}";
+	my $string = "abcDE\x{100}";
+	chop($regex);
+	chop($string);
+	if ($string =~ m/$regex/i) {
+	    print "ok 828\n";
+	} else {
+	    print "not ok 828\n";
+	}
+    }
+}
diff --git a/utf8.c b/utf8.c
index 54ab529..0051796 100644
--- a/utf8.c
+++ b/utf8.c
@@ -1672,9 +1672,9 @@ Perl_ibcmp_utf8(pTHX_ const char *s1, bool u1, register I32 len1, const char *s2
      register U8 *be = b + len2;
      STRLEN la, lb;
      UV ca, cb;
-     STRLEN ulen1, ulen2;
-     U8 tmpbuf1[UTF8_MAXLEN_FOLD+1];
-     U8 tmpbuf2[UTF8_MAXLEN_FOLD+1];
+     STRLEN foldlen1, foldlen2;
+     U8 foldbuf1[UTF8_MAXLEN_FOLD+1];
+     U8 foldbuf2[UTF8_MAXLEN_FOLD+1];
      
      while (a < ae && b < be) {
 	  if (u1) {
@@ -1682,7 +1682,7 @@ Perl_ibcmp_utf8(pTHX_ const char *s1, bool u1, register I32 len1, const char *s2
 		    break;
 	       ca = utf8_to_uvchr((U8*)a, &la);
 	  } else {
-	       ca = *a;
+	       ca = NATIVE_TO_UNI(*a);
 	       la = 1;
 	  }
 	  if (u2) {
@@ -1690,21 +1690,17 @@ Perl_ibcmp_utf8(pTHX_ const char *s1, bool u1, register I32 len1, const char *s2
 		    break;
 	       cb = utf8_to_uvchr((U8*)b, &lb);
 	  } else {
-	       cb = *b;
+	       cb = NATIVE_TO_UNI(*b);
 	       lb = 1;
 	  }
 	  if (ca != cb) {
-	       if (u1)
-		    to_uni_fold(NATIVE_TO_UNI(ca), tmpbuf1, &ulen1);
-	       else
-		    ulen1 = 1;
-	       if (u2)
-		    to_uni_fold(NATIVE_TO_UNI(cb), tmpbuf2, &ulen2);
-	       else
-		    ulen2 = 1;
-	       if (ulen1 != ulen2
-		   || (ca < 256 && cb < 256 && ca != PL_fold[cb])
-		   || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
+	       to_uni_fold(ca, foldbuf1, &foldlen1);
+	       ca = utf8_to_uvchr(foldbuf1, 0);
+	       
+	       to_uni_fold(cb, foldbuf2, &foldlen2);
+	       cb = utf8_to_uvchr(foldbuf2, 0);
+
+	       if (ca != cb || foldlen1 != foldlen2)
 		    return 1; /* mismatch */
 	  }
 	  a += la;