[perl #36207] UTF8/Latin 1/i regexp "Malformed character" warning
Dave Mitchell [Fri, 8 Jul 2005 01:43:24 +0000 (01:43 +0000)]
$utf8 =~ /latin/i didn't match.
Also added TODO for $latin =~ /utf8/i which also fails

p4raw-id: //depot/perl@25095

t/op/pat.t
utf8.c

index ce5d7a2..795177b 100755 (executable)
@@ -6,7 +6,7 @@
 
 $| = 1;
 
-print "1..1178\n";
+print "1..1180\n";
 
 BEGIN {
     chdir 't' if -d 't';
@@ -3364,4 +3364,14 @@ ok(("foba  ba$s" =~ qr/(foo|BaSS|bar)/i)
     my $psycho=join "|",@normal,map chr $_,255..20000;
     ok(('these'=~/($psycho)/) && $1 eq 'these','Pyscho');
 }
-# last test 1178
+
+# [perl #36207] mixed utf8 / latin-1 and case folding
+
+{
+    my $u = "\xe9\x{100}";
+    chop $u;
+    ok($u =~ /\xe9/i, "utf8/latin");
+    ok("\xe9" =~ /$u/i, "# TODO latin/utf8");
+}
+
+# last test 1180
diff --git a/utf8.c b/utf8.c
index b26d5a6..cdf24ce 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -2037,7 +2037,7 @@ Perl_ibcmp_utf8(pTHX_ const char *s1, char **pe1, register UV l1, bool u1, const
               if (u1)
                    to_utf8_fold(p1, foldbuf1, &foldlen1);
               else {
-                   natbuf[0] = *p1;
+                   uvuni_to_utf8(natbuf, (UV) NATIVE_TO_UNI(((UV)*p1)));
                    to_utf8_fold(natbuf, foldbuf1, &foldlen1);
               }
               q1 = foldbuf1;
@@ -2047,7 +2047,7 @@ Perl_ibcmp_utf8(pTHX_ const char *s1, char **pe1, register UV l1, bool u1, const
               if (u2)
                    to_utf8_fold(p2, foldbuf2, &foldlen2);
               else {
-                   natbuf[0] = *p2;
+                   uvuni_to_utf8(natbuf, (UV) NATIVE_TO_UNI(((UV)*p2)));
                    to_utf8_fold(natbuf, foldbuf2, &foldlen2);
               }
               q2 = foldbuf2;