Re: [PATCH t/op/pack.t] Cleanup
[p5sagit/p5-mst-13.2.git] / regexec.c
index a8acb06..0d97be3 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -959,6 +959,14 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
 
            if (do_utf8) {
                STRLEN len;
+               /* The ibcmp_utf8() uses to_uni_fold() which is more
+                * correct folding for Unicode than using lowercase.
+                * However, it doesn't work quite fully since the folding
+                * is a one-to-many mapping and the regex optimizer is
+                * unaware of this, so it may throw out good matches.
+                * Fortunately, not getting this right is allowed
+                * for Unicode Regular Expression Support level 1,
+                * only one-to-one matching is required. --jhi */
                if (c1 == c2)
                    while (s <= e) {
                        if ( utf8_to_uvchr((U8*)s, &len) == c1
@@ -3152,9 +3160,9 @@ S_regmatch(pTHX_ regnode *prog)
                            c1 = *(PL_bostr + ln);
                        }
                        else { c1 = (U8)*STRING(text_node); }
-                       if (OP(next) == EXACTF)
+                       if (OP(text_node) == EXACTF || OP(text_node) == REFF)
                            c2 = PL_fold[c1];
-                       else if (OP(text_node) == EXACTFL)
+                       else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
                            c2 = PL_fold_locale[c1];
                        else
                            c2 = c1;
@@ -3235,9 +3243,9 @@ S_regmatch(pTHX_ regnode *prog)
                            }
                            else { c1 = (U8)*STRING(text_node); }
 
-                           if (OP(text_node) == EXACTF)
+                           if (OP(text_node) == EXACTF || OP(text_node) == REFF)
                                c2 = PL_fold[c1];
-                           else if (OP(text_node) == EXACTFL)
+                           else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
                                c2 = PL_fold_locale[c1];
                            else
                                c2 = c1;
@@ -3346,13 +3354,13 @@ S_regmatch(pTHX_ regnode *prog)
 
                    if (!UTF) {
                        c2 = c1 = *s;
-                       if (OP(text_node) == EXACTF)
+                       if (OP(text_node) == EXACTF || OP(text_node) == REFF)
                            c2 = PL_fold[c1];
-                       else if (OP(text_node) == EXACTFL)
+                       else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
                            c2 = PL_fold_locale[c1];
                    }
                    else { /* UTF */
-                       if (OP(text_node) == EXACTF) {
+                       if (OP(text_node) == EXACTF || OP(text_node) == REFF) {
                             STRLEN ulen1, ulen2;
                             U8 tmpbuf1[UTF8_MAXLEN*2+1];
                             U8 tmpbuf2[UTF8_MAXLEN*2+1];