From: Tels Date: Sat, 22 Sep 2007 14:27:29 +0000 (+0200) Subject: Re: [perl #45605] Regexp failure with utf8-flagged string and byte-flagged pattern X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=0921ee73ba49a35b0e5ee887dcd21e4312b38dba;p=p5sagit%2Fp5-mst-13.2.git Re: [perl #45605] Regexp failure with utf8-flagged string and byte-flagged pattern Message-Id: <200709221427.30425@bloodgate.com> p4raw-id: //depot/perl@31961 --- diff --git a/regcomp.c b/regcomp.c index f876c54..23148d2 100644 --- a/regcomp.c +++ b/regcomp.c @@ -1405,7 +1405,20 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs /* store the codepoint in the bitmap, and if its ascii also store its folded equivelent. */ TRIE_BITMAP_SET(trie,uvc); - if ( folder ) TRIE_BITMAP_SET(trie,folder[ uvc ]); + + /* store the folded codepoint */ + if ( folder ) TRIE_BITMAP_SET(trie,folder[ uvc ]); + + if ( !UTF ) { + /* store first byte of utf8 representation of + codepoints in the 127 < uvc < 256 range */ + if (127 < uvc && uvc < 192) { + TRIE_BITMAP_SET(trie,194); + } else if (191 < uvc ) { + TRIE_BITMAP_SET(trie,195); + /* && uvc < 256 -- we know uvc is < 256 already */ + } + } set_bit = 0; /* We've done our bit :-) */ } } else { diff --git a/t/op/pat.t b/t/op/pat.t index 00d00e7..2697157 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -4478,6 +4478,14 @@ sub kt } iseq(length($str),"0","Trie scope error, string should be empty"); } +{ +# [perl #45605] Regexp failure with utf8-flagged and byte-flagged string + + my $utf_8 = "\xd6schel"; + utf8::upgrade($utf_8); + $utf_8 =~ m{(\xd6|Ö)schel}; + iseq($1,"\xd6","#45605"); +} # Test counter is at bottom of file. Put new tests above here. #------------------------------------------------------------------- @@ -4537,6 +4545,6 @@ ok($@=~/\QSequence \k... not terminated in regex;\E/); iseq(0+$::test,$::TestCount,"Got the right number of tests!"); # Don't forget to update this! BEGIN { - $::TestCount = 1964; + $::TestCount = 1965; print "1..$::TestCount\n"; }