#define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len, \
uvc, charid, foldlen, foldbuf, uniflags) STMT_START { \
+ UV uvc_unfolded = 0; \
switch (trie_type) { \
case trie_utf8_fold: \
if ( foldlen>0 ) { \
- uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags ); \
+ uvc_unfolded = uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags ); \
foldlen -= len; \
uscan += len; \
len=0; \
} else { \
- uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); \
+ uvc_unfolded = uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); \
uvc = to_uni_fold( uvc, foldbuf, &foldlen ); \
foldlen -= UNISKIP( uvc ); \
uscan = foldbuf + UNISKIP( uvc ); \
charid = (U16)SvIV(*svpp); \
} \
} \
+ if (!charid && trie_type == trie_utf8_fold && !UTF) { \
+ charid = trie->charmap[uvc_unfolded]; \
+ } \
} STMT_END
#define REXEC_FBC_EXACTISH_CHECK(CoNd) \
$| = 1;
-my $EXPECTED_TESTS = 3865; # Update this when adding/deleting tests.
+my $EXPECTED_TESTS = 3961; # Update this when adding/deleting tests.
BEGIN {
chdir 't' if -d 't';
iseq $1, "\xd6", "Upgrade error";
}
+ {
+# more TRIE/AHOCORASICK problems with mixed utf8 / latin-1 and case folding
+ for my $chr (160 .. 255) {
+ my $chr_byte = chr($chr);
+ my $chr_utf8 = chr($chr); utf8::upgrade($chr_utf8);
+ my $rx = qr{$chr_byte|X}i;
+ ok($chr_utf8 =~ $rx, "utf8/latin, codepoint $chr");
+ }
+ }
{
# Regardless of utf8ness any character matches itself when