if (do_utf8) {
STRLEN len;
+ /* The ibcmp_utf8() uses to_uni_fold() which is more
+ * correct folding for Unicode than using lowercase.
+ * However, it doesn't work quite fully since the folding
+ * is a one-to-many mapping and the regex optimizer is
+ * unaware of this, so it may throw out good matches.
+ * Fortunately, not getting this right is allowed
+ * for Unicode Regular Expression Support level 1,
+ * only one-to-one matching is required. --jhi */
if (c1 == c2)
while (s <= e) {
if ( utf8_to_uvchr((U8*)s, &len) == c1
c1 = *(PL_bostr + ln);
}
else { c1 = (U8)*STRING(text_node); }
- if (OP(next) == EXACTF)
+ if (OP(text_node) == EXACTF || OP(text_node) == REFF)
c2 = PL_fold[c1];
- else if (OP(text_node) == EXACTFL)
+ else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
c2 = PL_fold_locale[c1];
else
c2 = c1;
}
else { c1 = (U8)*STRING(text_node); }
- if (OP(text_node) == EXACTF)
+ if (OP(text_node) == EXACTF || OP(text_node) == REFF)
c2 = PL_fold[c1];
- else if (OP(text_node) == EXACTFL)
+ else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
c2 = PL_fold_locale[c1];
else
c2 = c1;
if (!UTF) {
c2 = c1 = *s;
- if (OP(text_node) == EXACTF)
+ if (OP(text_node) == EXACTF || OP(text_node) == REFF)
c2 = PL_fold[c1];
- else if (OP(text_node) == EXACTFL)
+ else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
c2 = PL_fold_locale[c1];
}
else { /* UTF */
- if (OP(text_node) == EXACTF) {
+ if (OP(text_node) == EXACTF || OP(text_node) == REFF) {
STRLEN ulen1, ulen2;
U8 tmpbuf1[UTF8_MAXLEN*2+1];
U8 tmpbuf2[UTF8_MAXLEN*2+1];