Re: [perl #36207] UTF8/Latin 1/i regexp "Malformed character" warning
Yves Orton [Fri, 8 Jul 2005 17:07:26 +0000 (19:07 +0200)]
Message-ID: <9b18b3110507080807f16d1eb@mail.gmail.com>

Fix trie codepath of mixed utf8/latin1 pattern matches

p4raw-id: //depot/perl@25106

regexec.c
t/op/pat.t

index f302259..802ed18 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -2612,7 +2612,7 @@ S_regmatch(pTHX_ regnode *prog)
 
                    if ( base ) {
 
-                       if ( do_utf8 || UTF ) {
+                       if ( do_utf8 ) {
                            if ( foldlen>0 ) {
                                uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags );
                                foldlen -= len;
@@ -2678,7 +2678,7 @@ S_regmatch(pTHX_ regnode *prog)
 
                    if ( base ) {
 
-                       if ( do_utf8 || UTF ) {
+                       if ( do_utf8 ) {
                            uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );
                        } else {
                            uvc = (U32)*uc;
index 795177b..fb0d5fe 100755 (executable)
@@ -6,7 +6,7 @@
 
 $| = 1;
 
-print "1..1180\n";
+print "1..1186\n";
 
 BEGIN {
     chdir 't' if -d 't';
@@ -3368,10 +3368,18 @@ ok(("foba  ba$s" =~ qr/(foo|BaSS|bar)/i)
 # [perl #36207] mixed utf8 / latin-1 and case folding
 
 {
-    my $u = "\xe9\x{100}";
-    chop $u;
-    ok($u =~ /\xe9/i, "utf8/latin");
-    ok("\xe9" =~ /$u/i, "# TODO latin/utf8");
+    my $utf8 = "\xe9\x{100}"; chop $utf8;
+    my $latin1 = "\xe9";
+
+    ok($utf8 =~ /\xe9/i, "utf8/latin");
+    ok($utf8 =~ /$latin1/i, "utf8/latin runtime");
+    ok($utf8 =~ /(abc|\xe9)/i, "utf8/latin trie");
+    ok($utf8 =~ /(abc|$latin1)/i, "utf8/latin trie runtime");
+
+    ok("\xe9" =~ /$utf8/i, "# TODO latin/utf8");
+    ok("\xe9" =~ /(abc|$utf8)/i, "# latin/utf8 trie");
+    ok($latin1 =~ /$utf8/i, "# TODO latin/utf8 runtime");
+    ok($latin1 =~ /(abc|$utf8)/i, "# latin/utf8 trie runtime");
 }
 
-# last test 1180
+# last test 1186