Benchmark notes (from Barrie Slaymaker <barries@slaysys.com>)
[p5sagit/p5-mst-13.2.git] / regexec.c
index 54f5e22..65a3b90 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -278,7 +278,16 @@ S_cache_re(pTHX_ regexp *prog)
 /* A failure to find a constant substring means that there is no need to make
    an expensive call to REx engine, thus we celebrate a failure.  Similarly,
    finding a substring too deep into the string means that less calls to
-   regtry() should be needed. */
+   regtry() should be needed.
+
+   REx compiler's optimizer found 4 possible hints:
+       a) Anchored substring;
+       b) Fixed substring;
+       c) Whether we are anchored (beginning-of-line or \G);
+       d) First node (of those at offset 0) which may distingush positions;
+   We use 'a', 'b', multiline-part of 'c', and try to find a position in the
+   string which does not contradict any of them.
+ */
 
 char *
 Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
@@ -293,6 +302,9 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
     I32 ml_anch;
     char *tmp;
     register char *other_last = Nullch;
+#ifdef DEBUGGING
+    char *i_strpos = strpos;
+#endif
 
     DEBUG_r( if (!PL_colorset) reginitcolors() );
     DEBUG_r(PerlIO_printf(Perl_debug_log,
@@ -420,7 +432,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
        goto fail_finish;
 
     /* Finish the diagnostic message */
-    DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - strpos)) );
+    DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
 
     /* Got a candidate.  Check MBOL anchoring, and the *other* substr.
        Start with the other substr.
@@ -431,11 +443,13 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
      */
 
     if (prog->float_substr && prog->anchored_substr) {
-       /* Take into account the anchored substring. */
+       /* Take into account the "other" substring. */
        /* XXXX May be hopelessly wrong for UTF... */
        if (!other_last)
            other_last = strpos - 1;
        if (check == prog->float_substr) {
+         do_other_anchored:
+           {
                char *last = s - start_shift, *last1, *last2;
                char *s1 = s;
 
@@ -446,7 +460,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
                        || (PL_bostr = strpos, /* Used in regcopmaybe() */
                            (t = reghopmaybe_c(s, -(prog->check_offset_max)))
                            && t > strpos)))
-                   ;
+                   /* EMPTY */;
                else
                    t = strpos;
                t += prog->anchored_offset;
@@ -478,7 +492,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
                    }
                    DEBUG_r(PerlIO_printf(Perl_debug_log,
                        ", trying floating at offset %ld...\n",
-                       (long)(s1 + 1 - strpos)));
+                       (long)(s1 + 1 - i_strpos)));
                    PL_regeol = strend;                 /* Used in HOP() */
                    other_last = last1 + prog->anchored_offset;
                    s = HOPc(last, 1);
@@ -486,14 +500,15 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
                }
                else {
                    DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
-                         (long)(s - strpos)));
+                         (long)(s - i_strpos)));
                    t = s - prog->anchored_offset;
                    other_last = s - 1;
+                   s = s1;
                    if (t == strpos)
                        goto try_at_start;
-                   s = s1;
                    goto try_at_offset;
                }
+           }
        }
        else {          /* Take into account the floating substring. */
                char *last, *last1;
@@ -529,7 +544,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
                    }
                    DEBUG_r(PerlIO_printf(Perl_debug_log,
                        ", trying anchored starting at offset %ld...\n",
-                       (long)(s1 + 1 - strpos)));
+                       (long)(s1 + 1 - i_strpos)));
                    other_last = last;
                    PL_regeol = strend;                 /* Used in HOP() */
                    s = HOPc(t, 1);
@@ -537,11 +552,11 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
                }
                else {
                    DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
-                         (long)(s - strpos)));
+                         (long)(s - i_strpos)));
                    other_last = s - 1;
+                   s = s1;
                    if (t == strpos)
                        goto try_at_start;
-                   s = s1;
                    goto try_at_offset;
                }
        }
@@ -559,18 +574,36 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
           cannot start at strpos. */
       try_at_offset:
        if (ml_anch && t[-1] != '\n') {
-         find_anchor:          /* Eventually fbm_*() should handle this */
+           /* Eventually fbm_*() should handle this, but often
+              anchored_offset is not 0, so this check will not be wasted. */
+           /* XXXX In the code below we prefer to look for "^" even in
+              presence of anchored substrings.  And we search even
+              beyond the found float position.  These pessimizations
+              are historical artefacts only.  */
+         find_anchor:
            while (t < strend - prog->minlen) {
                if (*t == '\n') {
                    if (t < s - prog->check_offset_min) {
+                       if (prog->anchored_substr) {
+                           /* We definitely contradict the found anchored
+                              substr.  Due to the above check we do not
+                              contradict "check" substr.
+                              Thus we can arrive here only if check substr
+                              is float.  Redo checking for "other"=="fixed".
+                            */
+                           strpos = t + 1;                         
+                           DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
+                               PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
+                           goto do_other_anchored;
+                       }
                        s = t + 1;
                        DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
-                           PL_colors[0],PL_colors[1], (long)(s - strpos)));
+                           PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
                        goto set_useful;
                    }
                    DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting at offset %ld...\n",
-                       PL_colors[0],PL_colors[1], (long)(t + 1 - strpos)));
-                   s = t + 1;
+                       PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
+                   strpos = s = t + 1;
                    goto restart;
                }
                t++;
@@ -596,8 +629,12 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
            t = strpos;
            goto find_anchor;
        }
+       DEBUG_r( if (ml_anch)
+           PerlIO_printf(Perl_debug_log, "Does not contradict /%s^%s/m...\n",
+                       PL_colors[0],PL_colors[1]);
+       );
       success_at_start:
-       if (!(prog->reganch & ROPT_NAUGHTY)
+       if (!(prog->reganch & ROPT_NAUGHTY)     /* XXXX If strpos moved? */
            && --BmUSEFUL(prog->check_substr) < 0
            && prog->check_substr == prog->float_substr) { /* boo */
            /* If flags & SOMETHING - do not do it many times on the same match */
@@ -612,7 +649,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
     }
 
     DEBUG_r(PerlIO_printf(Perl_debug_log, "%sGuessed:%s match at offset %ld\n",
-                         PL_colors[4], PL_colors[5], (long)(s - strpos)) );
+                         PL_colors[4], PL_colors[5], (long)(s - i_strpos)) );
     return s;
 
   fail_finish:                         /* Substring not found */
@@ -702,19 +739,23 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
     /* If there is a "must appear" string, look for it. */
     s = startpos;
 
-    if (prog->reganch & ROPT_GPOS_SEEN) {
+    if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
        MAGIC *mg;
 
-       if (!(flags & REXEC_IGNOREPOS) && sv && SvTYPE(sv) >= SVt_PVMG
-           && SvMAGIC(sv) && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0)
-           PL_reg_ganch = strbeg + mg->mg_len;
-       else
+       if (flags & REXEC_IGNOREPOS)    /* Means: check only at start */
            PL_reg_ganch = startpos;
-       if (prog->reganch & ROPT_ANCH_GPOS) {
-           if (s > PL_reg_ganch)
-               goto phooey;
-           s = PL_reg_ganch;
+       else if (sv && SvTYPE(sv) >= SVt_PVMG
+                 && SvMAGIC(sv)
+                 && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) {
+           PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
+           if (prog->reganch & ROPT_ANCH_GPOS) {
+               if (s > PL_reg_ganch)
+                   goto phooey;
+               s = PL_reg_ganch;
+           }
        }
+       else                            /* pos() not defined */
+           PL_reg_ganch = strbeg;
     }
 
     if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
@@ -755,9 +796,12 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
            end = HOPc(strend, -dontbother) - 1;
            /* for multiline we only have to try after newlines */
            if (prog->check_substr) {
+               if (s == startpos)
+                   goto after_try;
                while (1) {
                    if (regtry(prog, s))
                        goto got_it;
+                 after_try:
                    if (s >= end)
                        goto phooey;
                    s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
@@ -1465,7 +1509,7 @@ S_regtry(pTHX_ regexp *prog, char *startpos)
            }
            PL_reg_magic    = mg;
            PL_reg_oldpos   = mg->mg_len;
-           SAVEDESTRUCTOR(restore_pos, 0);
+           SAVEDESTRUCTOR_X(restore_pos, 0);
         }
        if (!PL_reg_curpm)
            New(22,PL_reg_curpm, 1, PMOP);
@@ -1562,11 +1606,19 @@ S_regmatch(pTHX_ regnode *prog)
 #ifdef DEBUGGING
 #  define sayYES goto yes
 #  define sayNO goto no
+#  define sayYES_FINAL goto yes_final
+#  define sayYES_LOUD  goto yes_loud
+#  define sayNO_FINAL  goto no_final
+#  define sayNO_SILENT goto do_no
 #  define saySAME(x) if (x) goto yes; else goto no
 #  define REPORT_CODE_OFF 24
 #else
 #  define sayYES return 1
 #  define sayNO return 0
+#  define sayYES_FINAL return 1
+#  define sayYES_LOUD  return 1
+#  define sayNO_FINAL  return 0
+#  define sayNO_SILENT return 0
 #  define saySAME(x) return x
 #endif
        DEBUG_r( {
@@ -2162,15 +2214,24 @@ S_regmatch(pTHX_ regnode *prog)
                    PL_reg_maxiter = 0;
 
                    if (regmatch(re->program + 1)) {
+                       /* Even though we succeeded, we need to restore
+                          global variables, since we may be wrapped inside
+                          SUSPEND, thus the match may be not finished yet. */
+
+                       /* XXXX Do this only if SUSPENDed? */
+                       PL_reg_call_cc = state.prev;
+                       PL_regcc = state.cc;
+                       PL_reg_re = state.re;
+                       cache_re(PL_reg_re);
+
+                       /* XXXX This is too dramatic a measure... */
+                       PL_reg_maxiter = 0;
+
+                       /* These are needed even if not SUSPEND. */
                        ReREFCNT_dec(re);
                        regcpblow(cp);
                        sayYES;
                    }
-                   DEBUG_r(
-                       PerlIO_printf(Perl_debug_log,
-                                     "%*s  failed...\n",
-                                     REPORT_CODE_OFF+PL_regindent*2, "")
-                       );
                    ReREFCNT_dec(re);
                    REGCP_UNWIND;
                    regcppop();
@@ -2357,11 +2418,6 @@ S_regmatch(pTHX_ regnode *prog)
                        );
                    if (regmatch(cc->next))
                        sayYES;
-                   DEBUG_r(
-                       PerlIO_printf(Perl_debug_log,
-                                     "%*s  failed...\n",
-                                     REPORT_CODE_OFF+PL_regindent*2, "")
-                       );
                    if (PL_regcc)
                        PL_regcc->cur = ln;
                    PL_regcc = cc;
@@ -2377,11 +2433,6 @@ S_regmatch(pTHX_ regnode *prog)
                        sayYES;
                    cc->cur = n - 1;
                    cc->lastloc = lastloc;
-                   DEBUG_r(
-                       PerlIO_printf(Perl_debug_log,
-                                     "%*s  failed...\n",
-                                     REPORT_CODE_OFF+PL_regindent*2, "")
-                       );
                    sayNO;
                }
 
@@ -2424,7 +2475,7 @@ S_regmatch(pTHX_ regnode *prog)
                                      "%*s  already tried at this position...\n",
                                      REPORT_CODE_OFF+PL_regindent*2, "")
                        );
-                       sayNO;
+                       sayNO_SILENT;
                    }
                    PL_reg_poscache[o] |= (1<<b);
                }
@@ -2474,11 +2525,6 @@ S_regmatch(pTHX_ regnode *prog)
                        regcpblow(cp);
                        sayYES;
                    }
-                   DEBUG_r(
-                       PerlIO_printf(Perl_debug_log,
-                                     "%*s  failed...\n",
-                                     REPORT_CODE_OFF+PL_regindent*2, "")
-                       );
                    REGCP_UNWIND;
                    regcppop();
                    cc->cur = n - 1;
@@ -2520,10 +2566,6 @@ S_regmatch(pTHX_ regnode *prog)
                    ln = PL_regcc->cur;
                if (regmatch(cc->next))
                    sayYES;
-               DEBUG_r(
-                   PerlIO_printf(Perl_debug_log, "%*s  failed...\n",
-                                 REPORT_CODE_OFF+PL_regindent*2, "")
-                   );
                if (PL_regcc)
                    PL_regcc->cur = ln;
                PL_regcc = cc;
@@ -2918,14 +2960,22 @@ S_regmatch(pTHX_ regnode *prog)
                                  "%*s  continuation failed...\n",
                                  REPORT_CODE_OFF+PL_regindent*2, "")
                    );
-               sayNO;
+               sayNO_SILENT;
+           }
+           if (locinput < PL_regtill) {
+               DEBUG_r(PerlIO_printf(Perl_debug_log,
+                                     "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
+                                     PL_colors[4],
+                                     (long)(locinput - PL_reg_starttry),
+                                     (long)(PL_regtill - PL_reg_starttry),
+                                     PL_colors[5]));
+               sayNO_FINAL;            /* Cannot match: too short. */
            }
-           if (locinput < PL_regtill)
-               sayNO;                  /* Cannot match: too short. */
-           /* Fall through */
+           PL_reginput = locinput;     /* put where regtry can find it */
+           sayYES_FINAL;               /* Success! */
        case SUCCEED:
            PL_reginput = locinput;     /* put where regtry can find it */
-           sayYES;                     /* Success! */
+           sayYES_LOUD;                /* Success! */
        case SUSPEND:
            n = 1;
            PL_reginput = locinput;
@@ -3001,7 +3051,7 @@ S_regmatch(pTHX_ regnode *prog)
                next = NULL;
            break;
        default:
-           PerlIO_printf(PerlIO_stderr(), "%lx %d\n",
+           PerlIO_printf(Perl_error_log, "%lx %d\n",
                          (unsigned long)scan, OP(scan));
            Perl_croak(aTHX_ "regexp memory corruption");
        }
@@ -3016,6 +3066,16 @@ S_regmatch(pTHX_ regnode *prog)
     /*NOTREACHED*/
     sayNO;
 
+yes_loud:
+    DEBUG_r(
+       PerlIO_printf(Perl_debug_log,
+                     "%*s  %scould match...%s\n",
+                     REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
+       );
+    goto yes;
+yes_final:
+    DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
+                         PL_colors[4],PL_colors[5]));
 yes:
 #ifdef DEBUGGING
     PL_regindent--;
@@ -3023,6 +3083,14 @@ yes:
     return 1;
 
 no:
+    DEBUG_r(
+       PerlIO_printf(Perl_debug_log,
+                     "%*s  %sfailed...%s\n",
+                     REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
+       );
+    goto do_no;
+no_final:
+do_no:
 #ifdef DEBUGGING
     PL_regindent--;
 #endif