Skip the RV printing test under threads until fixed.
[p5sagit/p5-mst-13.2.git] / regexec.c
index 6adb9ca..b691162 100644 (file)
--- a/regexec.c
+++ b/regexec.c
  */
 
 #define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
-#define CHR_DIST(a,b) (PL_reg_sv_utf8 ? utf8_distance(a,b) : a - b)
+#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
 
 #define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
 #define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
-#define HOP(pos,off) (PL_reg_sv_utf8 ? reghop((U8*)pos, off) : (U8*)(pos + off))
-#define HOPMAYBE(pos,off) (PL_reg_sv_utf8 ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
+#define HOP(pos,off) (PL_reg_match_utf8 ? reghop((U8*)pos, off) : (U8*)(pos + off))
+#define HOPMAYBE(pos,off) (PL_reg_match_utf8 ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
 #define HOPc(pos,off) ((char*)HOP(pos,off))
 #define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
 
 #define HOPBACK(pos, off) (            \
-    (UTF && PL_reg_sv_utf8)            \
+    (UTF && PL_reg_match_utf8)         \
        ? reghopmaybe((U8*)pos, -off)   \
     : (pos - off >= PL_bostr)          \
        ? (U8*)(pos - off)              \
 
 #define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
 #define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
-#define HOP3(pos,off,lim) (PL_reg_sv_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
-#define HOPMAYBE3(pos,off,lim) (PL_reg_sv_utf8 ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
+#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
+#define HOPMAYBE3(pos,off,lim) (PL_reg_match_utf8 ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
 #define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
 #define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
 
 #define LOAD_UTF8_CHARCLASS(a,b) STMT_START { if (!CAT2(PL_utf8_,a)) (void)CAT2(is_utf8_, a)((U8*)b); } STMT_END
 
+/* for use after a quantifier and before an EXACT-like node -- japhy */
+#define JUMPABLE(rn) ( \
+    OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
+    OP(rn) == SUSPEND || OP(rn) == IFMATCH \
+)
+
+#define NEAR_EXACT(rn) (PL_regkind[(U8)OP(rn)] == EXACT || JUMPABLE(rn))
+
+#define NEXT_IMPT(rn) STMT_START { \
+    while (JUMPABLE(rn)) \
+       if (OP(rn) == SUSPEND || OP(rn) == IFMATCH) \
+           rn = NEXTOPER(NEXTOPER(rn)); \
+       else rn += NEXT_OFF(rn); \
+} STMT_END 
+
 static void restore_pos(pTHX_ void *arg);
 
 STATIC CHECKPOINT
@@ -872,7 +887,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
        unsigned int c2;
        char *e;
        register I32 tmp = 1;   /* Scratch variable? */
-       register bool do_utf8 = PL_reg_sv_utf8;
+       register bool do_utf8 = PL_reg_match_utf8;
 
        /* We know what class it must start with. */
        switch (OP(c)) {
@@ -2003,7 +2018,7 @@ S_regmatch(pTHX_ regnode *prog)
 #if 0
     I32 firstcp = PL_savestack_ix;
 #endif
-    register bool do_utf8 = PL_reg_sv_utf8;
+    register bool do_utf8 = PL_reg_match_utf8;
 
 #ifdef DEBUGGING
     PL_regindent++;
@@ -3037,14 +3052,24 @@ S_regmatch(pTHX_ regnode *prog)
                if (ln && l == 0)
                    n = ln;     /* don't backtrack */
                locinput = PL_reginput;
-               if (PL_regkind[(U8)OP(next)] == EXACT) {
-                   c1 = (U8)*STRING(next);
-                   if (OP(next) == EXACTF)
-                       c2 = PL_fold[c1];
-                   else if (OP(next) == EXACTFL)
-                       c2 = PL_fold_locale[c1];
-                   else
-                       c2 = c1;
+               if (NEAR_EXACT(next)) {
+                   regnode *text_node = next;
+
+                   if (PL_regkind[(U8)OP(next)] != EXACT)
+                       NEXT_IMPT(text_node);
+
+                   if (PL_regkind[(U8)OP(text_node)] != EXACT) {
+                       c1 = c2 = -1000;
+                   }
+                   else {
+                       c1 = (U8)*STRING(text_node);
+                       if (OP(next) == EXACTF)
+                           c2 = PL_fold[c1];
+                       else if (OP(text_node) == EXACTFL)
+                           c2 = PL_fold_locale[c1];
+                       else
+                           c2 = c1;
+                   }
                }
                else
                    c1 = c2 = -1000;
@@ -3096,14 +3121,24 @@ S_regmatch(pTHX_ regnode *prog)
                                  (IV) n, (IV)l)
                    );
                if (n >= ln) {
-                   if (PL_regkind[(U8)OP(next)] == EXACT) {
-                       c1 = (U8)*STRING(next);
-                       if (OP(next) == EXACTF)
-                           c2 = PL_fold[c1];
-                       else if (OP(next) == EXACTFL)
-                           c2 = PL_fold_locale[c1];
-                       else
-                           c2 = c1;
+                   if (NEAR_EXACT(next)) {
+                       regnode *text_node = next;
+
+                       if (PL_regkind[(U8)OP(next)] != EXACT)
+                           NEXT_IMPT(text_node);
+
+                       if (PL_regkind[(U8)OP(text_node)] != EXACT) {
+                           c1 = c2 = -1000;
+                       }
+                       else {
+                           c1 = (U8)*STRING(text_node);
+                           if (OP(text_node) == EXACTF)
+                               c2 = PL_fold[c1];
+                           else if (OP(text_node) == EXACTFL)
+                               c2 = PL_fold_locale[c1];
+                           else
+                               c2 = c1;
+                       }
                    }
                    else
                        c1 = c2 = -1000;
@@ -3173,22 +3208,41 @@ S_regmatch(pTHX_ regnode *prog)
            * Lookahead to avoid useless match attempts
            * when we know what character comes next.
            */
-           if (PL_regkind[(U8)OP(next)] == EXACT) {
-               U8 *s = (U8*)STRING(next);
-               if (!UTF) {
-                   c2 = c1 = *s;
-                   if (OP(next) == EXACTF)
-                       c2 = PL_fold[c1];
-                   else if (OP(next) == EXACTFL)
-                       c2 = PL_fold_locale[c1];
-               }
-               else { /* UTF */
-                   if (OP(next) == EXACTF) {
-                       c1 = to_utf8_lower(s);
-                       c2 = to_utf8_upper(s);
+
+           /*
+           * Used to only do .*x and .*?x, but now it allows
+           * for )'s, ('s and (?{ ... })'s to be in the way
+           * of the quantifier and the EXACT-like node.  -- japhy
+           */
+
+           if (NEAR_EXACT(next)) {
+               U8 *s;
+               regnode *text_node = next;
+
+               if (PL_regkind[(U8)OP(next)] != EXACT)
+                   NEXT_IMPT(text_node);
+
+               if (PL_regkind[(U8)OP(text_node)] != EXACT) {
+                   c1 = c2 = -1000;
+               }
+               else {
+                   s = (U8*)STRING(text_node);
+
+                   if (!UTF) {
+                       c2 = c1 = *s;
+                       if (OP(text_node) == EXACTF)
+                           c2 = PL_fold[c1];
+                       else if (OP(text_node) == EXACTFL)
+                           c2 = PL_fold_locale[c1];
                    }
-                   else {
-                       c2 = c1 = utf8_to_uvchr(s, NULL);
+                   else { /* UTF */
+                       if (OP(text_node) == EXACTF) {
+                           c1 = to_utf8_lower(s);
+                           c2 = to_utf8_upper(s);
+                       }
+                       else {
+                           c2 = c1 = utf8_to_uvchr(s, NULL);
+                       }
                    }
                }
            }
@@ -3228,12 +3282,13 @@ S_regmatch(pTHX_ regnode *prog)
                        /* Find place 'next' could work */
                        if (!do_utf8) {
                            if (c1 == c2) {
-                               while (locinput <= e && *locinput != c1)
+                               while (locinput <= e &&
+                                      UCHARAT(locinput) != c1)
                                    locinput++;
                            } else {
                                while (locinput <= e
-                                      && *locinput != c1
-                                      && *locinput != c2)
+                                      && UCHARAT(locinput) != c1
+                                      && UCHARAT(locinput) != c2)
                                    locinput++;
                            }
                            count = locinput - old;
@@ -3584,7 +3639,7 @@ S_regrepeat(pTHX_ regnode *p, I32 max)
     register I32 c;
     register char *loceol = PL_regeol;
     register I32 hardcount = 0;
-    register bool do_utf8 = PL_reg_sv_utf8;
+    register bool do_utf8 = PL_reg_match_utf8;
 
     scan = PL_reginput;
     if (max != REG_INFTY && max < loceol - scan)
@@ -3823,7 +3878,7 @@ S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
        return 0;
 
     start = PL_reginput;
-    if (PL_reg_sv_utf8) {
+    if (PL_reg_match_utf8) {
        while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
            if (!count++) {
                l = 0;