const declaration fixup
[p5sagit/p5-mst-13.2.git] / regexec.c
index 4cf8069..a6fd6ae 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -541,6 +541,9 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
                            start_shift + (s - strbeg), end_shift, pp, 0);
        else
            goto fail_finish;
+       /* we may be pointing at the wrong string */
+       if (s && RX_MATCH_COPIED(prog))
+           s = prog->subbeg + (s - SvPVX(sv));
        if (data)
            *data->scream_olds = s;
     }
@@ -959,25 +962,40 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
        /* We know what class it must start with. */
        switch (OP(c)) {
        case ANYOF:
-           while (s < strend) {
-               STRLEN skip = do_utf8 ? UTF8SKIP(s) : 1;
-                 
-               if (do_utf8 ?
-                   reginclass(c, (U8*)s, 0, do_utf8) :
-                   REGINCLASS(c, (U8*)s) ||
-                   (ANYOF_FOLD_SHARP_S(c, s, strend) &&
-                    /* The assignment of 2 is intentional:
-                     * for the sharp s, the skip is 2. */
-                    (skip = SHARP_S_SKIP)
-                    )) {
-                   if (tmp && (norun || regtry(prog, s)))
-                       goto got_it;
-                   else
-                       tmp = doevery;
-               }
-               else 
-                   tmp = 1;
-               s += skip;
+           if (do_utf8) {
+                while (s < strend) {
+                     if ((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
+                         !UTF8_IS_INVARIANT((U8)s[0]) ?
+                         reginclass(c, (U8*)s, 0, do_utf8) :
+                         REGINCLASS(c, (U8*)s)) {
+                          if (tmp && (norun || regtry(prog, s)))
+                               goto got_it;
+                          else
+                               tmp = doevery;
+                     }
+                     else 
+                          tmp = 1;
+                     s += UTF8SKIP(s);
+                }
+           }
+           else {
+                while (s < strend) {
+                     STRLEN skip = 1;
+
+                     if (REGINCLASS(c, (U8*)s) ||
+                         (ANYOF_FOLD_SHARP_S(c, s, strend) &&
+                          /* The assignment of 2 is intentional:
+                           * for the folded sharp s, the skip is 2. */
+                          (skip = SHARP_S_SKIP))) {
+                          if (tmp && (norun || regtry(prog, s)))
+                               goto got_it;
+                          else
+                               tmp = doevery;
+                     }
+                     else 
+                          tmp = 1;
+                     s += skip;
+                }
            }
            break;
        case CANY:
@@ -1843,6 +1861,9 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
                 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
                                  (unsigned char*)strend, must,
                                  PL_multiline ? FBMrf_MULTILINE : 0))) ) {
+           /* we may be pointing at the wrong string */
+           if ((flags & REXEC_SCREAM) && RX_MATCH_COPIED(prog))
+               s = prog->subbeg + (s - SvPVX(sv));
            DEBUG_r( did_match = 1 );
            if (HOPc(s, -back_max) > last1) {
                last1 = HOPc(s, -back_min);
@@ -1929,6 +1950,9 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
                                   end_shift, &scream_pos, 1); /* last one */
                if (!last)
                    last = scream_olds; /* Only one occurrence. */
+               /* we may be pointing at the wrong string */
+               else if (RX_MATCH_COPIED(prog))
+                   s = prog->subbeg + (s - SvPVX(sv));
            }
            else {
                STRLEN len;
@@ -4053,8 +4077,26 @@ S_regrepeat(pTHX_ regnode *p, I32 max)
     case ANYOF:
        if (do_utf8) {
            loceol = PL_regeol;
-           while (hardcount < max && scan < loceol &&
-                  reginclass(p, (U8*)scan, 0, do_utf8)) {
+           while (hardcount < max && scan < loceol) {
+                bool cont = FALSE;
+                if (ANYOF_FLAGS(p) & ANYOF_UNICODE) {
+                     if (reginclass(p, (U8*)scan, 0, do_utf8))
+                          cont = TRUE;
+                }
+                else {
+                     U8 c = (U8)scan[0];
+
+                     if (UTF8_IS_INVARIANT(c)) {
+                          if (ANYOF_BITMAP_TEST(p, c))
+                               cont = TRUE;
+                     }
+                     else {
+                          if (reginclass(p, (U8*)scan, 0, do_utf8))
+                               cont = TRUE;
+                     }
+               }
+               if (!cont)
+                    break;
                scan += UTF8SKIP(scan);
                hardcount++;
            }