From: Dave Mitchell <davem@fdisolutions.com>
Date: Fri, 8 Jul 2005 01:43:24 +0000 (+0000)
Subject: [perl #36207] UTF8/Latin 1/i regexp "Malformed character" warning
X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=809e8e66a971d59a948ca995e08b228927d82c66;p=p5sagit%2Fp5-mst-13.2.git

[perl #36207] UTF8/Latin 1/i regexp "Malformed character" warning
$utf8 =~ /latin/i didn't match.
Also added TODO for $latin =~ /utf8/i which also fails

p4raw-id: //depot/perl@25095
---

diff --git a/t/op/pat.t b/t/op/pat.t
index ce5d7a2..795177b 100755
--- a/t/op/pat.t
+++ b/t/op/pat.t
@@ -6,7 +6,7 @@
 
 $| = 1;
 
-print "1..1178\n";
+print "1..1180\n";
 
 BEGIN {
     chdir 't' if -d 't';
@@ -3364,4 +3364,14 @@ ok(("foba  ba$s" =~ qr/(foo|BaSS|bar)/i)
     my $psycho=join "|",@normal,map chr $_,255..20000;
     ok(('these'=~/($psycho)/) && $1 eq 'these','Pyscho');
 }
-# last test 1178
+
+# [perl #36207] mixed utf8 / latin-1 and case folding
+
+{
+    my $u = "\xe9\x{100}";
+    chop $u;
+    ok($u =~ /\xe9/i, "utf8/latin");
+    ok("\xe9" =~ /$u/i, "# TODO latin/utf8");
+}
+
+# last test 1180
diff --git a/utf8.c b/utf8.c
index b26d5a6..cdf24ce 100644
--- a/utf8.c
+++ b/utf8.c
@@ -2037,7 +2037,7 @@ Perl_ibcmp_utf8(pTHX_ const char *s1, char **pe1, register UV l1, bool u1, const
 	       if (u1)
 		    to_utf8_fold(p1, foldbuf1, &foldlen1);
 	       else {
-		    natbuf[0] = *p1;
+		    uvuni_to_utf8(natbuf, (UV) NATIVE_TO_UNI(((UV)*p1)));
 		    to_utf8_fold(natbuf, foldbuf1, &foldlen1);
 	       }
 	       q1 = foldbuf1;
@@ -2047,7 +2047,7 @@ Perl_ibcmp_utf8(pTHX_ const char *s1, char **pe1, register UV l1, bool u1, const
 	       if (u2)
 		    to_utf8_fold(p2, foldbuf2, &foldlen2);
 	       else {
-		    natbuf[0] = *p2;
+		    uvuni_to_utf8(natbuf, (UV) NATIVE_TO_UNI(((UV)*p2)));
 		    to_utf8_fold(natbuf, foldbuf2, &foldlen2);
 	       }
 	       q2 = foldbuf2;