From: Yves Orton Date: Fri, 8 Jul 2005 17:07:26 +0000 (+0200) Subject: Re: [perl #36207] UTF8/Latin 1/i regexp "Malformed character" warning X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=108bb1ada68a74e70f8cfe3683513fea5b30f778;p=p5sagit%2Fp5-mst-13.2.git Re: [perl #36207] UTF8/Latin 1/i regexp "Malformed character" warning Message-ID: <9b18b3110507080807f16d1eb@mail.gmail.com> Fix trie codepath of mixed utf8/latin1 pattern matches p4raw-id: //depot/perl@25106 --- diff --git a/regexec.c b/regexec.c index f302259..802ed18 100644 --- a/regexec.c +++ b/regexec.c @@ -2612,7 +2612,7 @@ S_regmatch(pTHX_ regnode *prog) if ( base ) { - if ( do_utf8 || UTF ) { + if ( do_utf8 ) { if ( foldlen>0 ) { uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags ); foldlen -= len; @@ -2678,7 +2678,7 @@ S_regmatch(pTHX_ regnode *prog) if ( base ) { - if ( do_utf8 || UTF ) { + if ( do_utf8 ) { uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); } else { uvc = (U32)*uc; diff --git a/t/op/pat.t b/t/op/pat.t index 795177b..fb0d5fe 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -6,7 +6,7 @@ $| = 1; -print "1..1180\n"; +print "1..1186\n"; BEGIN { chdir 't' if -d 't'; @@ -3368,10 +3368,18 @@ ok(("foba ba$s" =~ qr/(foo|BaSS|bar)/i) # [perl #36207] mixed utf8 / latin-1 and case folding { - my $u = "\xe9\x{100}"; - chop $u; - ok($u =~ /\xe9/i, "utf8/latin"); - ok("\xe9" =~ /$u/i, "# TODO latin/utf8"); + my $utf8 = "\xe9\x{100}"; chop $utf8; + my $latin1 = "\xe9"; + + ok($utf8 =~ /\xe9/i, "utf8/latin"); + ok($utf8 =~ /$latin1/i, "utf8/latin runtime"); + ok($utf8 =~ /(abc|\xe9)/i, "utf8/latin trie"); + ok($utf8 =~ /(abc|$latin1)/i, "utf8/latin trie runtime"); + + ok("\xe9" =~ /$utf8/i, "# TODO latin/utf8"); + ok("\xe9" =~ /(abc|$utf8)/i, "# latin/utf8 trie"); + ok($latin1 =~ /$utf8/i, "# TODO latin/utf8 runtime"); + ok($latin1 =~ /(abc|$utf8)/i, "# latin/utf8 trie runtime"); } -# last test 1180 +# last test 1186