From: Jarkko Hietaniemi <jhi@iki.fi>
Date: Sat, 22 Dec 2001 02:47:08 +0000 (+0000)
Subject: Unicode casefolding fixes.
X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=a5961de5f4215b5cd376e88c8c5d267c7f7123f6;p=p5sagit%2Fp5-mst-13.2.git

Unicode casefolding fixes.

p4raw-id: //depot/perl@13843
---

diff --git a/op.c b/op.c
index 9b1556e..c733052 100644
--- a/op.c
+++ b/op.c
@@ -3127,12 +3127,16 @@ Perl_pmruntime(pTHX_ OP *o, OP *expr, OP *repl)
 	    p = SvPV(pat, plen);
 	    pm->op_pmflags |= PMf_SKIPWHITE;
 	}
+        if (DO_UTF8(pat) || (PL_hints & HINT_UTF8))
+	    pm->op_pmdynflags |= PMdf_UTF8;
 	PM_SETRE(pm, CALLREGCOMP(aTHX_ p, p + plen, pm));
 	if (strEQ("\\s+", PM_GETRE(pm)->precomp))
 	    pm->op_pmflags |= PMf_WHITE;
 	op_free(expr);
     }
     else {
+        if (PL_hints & HINT_UTF8)
+	    pm->op_pmdynflags |= PMdf_UTF8;
 	if (pm->op_pmflags & PMf_KEEP || !(PL_hints & HINT_RE_EVAL))
 	    expr = newUNOP((!(PL_hints & HINT_RE_EVAL)
 			    ? OP_REGCRESET
diff --git a/regcomp.c b/regcomp.c
index 53d8947..463b778 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -1690,17 +1690,15 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
     if (exp == NULL)
 	FAIL("NULL regexp argument");
 
-    /* XXXX This looks very suspicious... */
-    if (pm->op_pmdynflags & PMdf_CMP_UTF8)
-        RExC_utf8 = 1;
-    else
-        RExC_utf8 = 0;
+    RExC_utf8 = pm->op_pmdynflags & PMdf_CMP_UTF8;
 
     RExC_precomp = exp;
-    DEBUG_r(if (!PL_colorset) reginitcolors());
-    DEBUG_r(PerlIO_printf(Perl_debug_log, "%sCompiling REx%s `%s%*s%s'\n",
-		      PL_colors[4],PL_colors[5],PL_colors[0],
-		      (int)(xend - exp), RExC_precomp, PL_colors[1]));
+    DEBUG_r({
+	 if (!PL_colorset) reginitcolors();
+	 PerlIO_printf(Perl_debug_log, "%sCompiling REx%s `%s%*s%s'\n",
+		       PL_colors[4],PL_colors[5],PL_colors[0],
+		       (int)(xend - exp), RExC_precomp, PL_colors[1]);
+    });
     RExC_flags16 = pm->op_pmflags;
     RExC_sawback = 0;
 
@@ -3967,10 +3965,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
 		}
 		else
 #endif
-		    for (i = prevvalue; i <= ceilvalue; i++)
-			ANYOF_BITMAP_SET(ret, i);
+		      for (i = prevvalue; i <= ceilvalue; i++)
+			  ANYOF_BITMAP_SET(ret, i);
 	  }
-	  if (value > 255) {
+	  if (value > 255 || UTF) {
 		ANYOF_FLAGS(ret) |= ANYOF_UNICODE;
 		if (prevvalue < value)
 		    Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\t%04"UVxf"\n",
diff --git a/regexec.c b/regexec.c
index 35a0a6c..b7528e7 100644
--- a/regexec.c
+++ b/regexec.c
@@ -4110,9 +4110,12 @@ S_reginclass(pTHX_ register regnode *n, register U8* p, register bool do_utf8)
 		    match = TRUE;
 		else if (flags & ANYOF_FOLD) {
 		    STRLEN ulen;
-		    U8 tmpbuf[UTF8_MAXLEN_UCLC+1];
+		    U8 tmpbuf[UTF8_MAXLEN_FOLD+1];
 
-		    toLOWER_utf8(p, tmpbuf, &ulen);
+		    to_utf8_fold(p, tmpbuf, &ulen);
+		    if (swash_fetch(sw, tmpbuf, do_utf8))
+			match = TRUE;
+		    to_utf8_upper(p, tmpbuf, &ulen);
 		    if (swash_fetch(sw, tmpbuf, do_utf8))
 			match = TRUE;
 		}
diff --git a/t/op/pat.t b/t/op/pat.t
index 077b957..ee7a736 100755
--- a/t/op/pat.t
+++ b/t/op/pat.t
@@ -6,7 +6,7 @@
 
 $| = 1;
 
-print "1..757\n";
+print "1..769\n";
 
 BEGIN {
     chdir 't' if -d 't';
@@ -2291,3 +2291,36 @@ print "# some Unicode properties\n";
     print "not " unless "A\x{100}" =~ /A/i;
     print "ok 757\n";
 }
+
+{
+    use charnames ':full';
+
+    print "# LATIN LETTER A WITH GRAVE\n";
+    my $lower = "\N{LATIN SMALL LETTER A WITH GRAVE}";
+    my $UPPER = "\N{LATIN CAPITAL LETTER A WITH GRAVE}";
+
+    print $lower =~ m/$UPPER/i   ? "ok 758\n" : "not ok 758\n";
+    print $UPPER =~ m/$lower/i   ? "ok 759\n" : "not ok 759\n";
+    print $lower =~ m/[$UPPER]/i ? "ok 760\n" : "not ok 760\n";
+    print $UPPER =~ m/[$lower]/i ? "ok 761\n" : "not ok 761\n";
+
+    print "# GREEK LETTER ALPHA WITH VRACHY\n";
+
+    $lower = "\N{GREEK CAPITAL LETTER ALPHA WITH VRACHY}";
+    $UPPER = "\N{GREEK SMALL LETTER ALPHA WITH VRACHY}";
+
+    print $lower =~ m/$UPPER/i   ? "ok 762\n" : "not ok 762\n";
+    print $UPPER =~ m/$lower/i   ? "ok 763\n" : "not ok 763\n";
+    print $lower =~ m/[$UPPER]/i ? "ok 764\n" : "not ok 764\n";
+    print $UPPER =~ m/[$lower]/i ? "ok 765\n" : "not ok 765\n";
+
+    print "# LATIN LETTER Y WITH DIAERESIS\n";
+
+    $lower = "\N{LATIN CAPITAL LETTER Y WITH DIAERESIS}";
+    $UPPER = "\N{LATIN SMALL LETTER Y WITH DIAERESIS}";
+
+    print $lower =~ m/$UPPER/i   ? "ok 766\n" : "not ok 766\n";
+    print $UPPER =~ m/$lower/i   ? "ok 767\n" : "not ok 767\n";
+    print $lower =~ m/[$UPPER]/i ? "ok 768\n" : "not ok 768\n";
+    print $UPPER =~ m/[$lower]/i ? "ok 769\n" : "not ok 769\n";
+}