Integrate with Sarathy.
[p5sagit/p5-mst-13.2.git] / regexec.c
index d55c5be..fa891c8 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -146,13 +146,13 @@ S_regcppush(pTHX_ I32 parenfloor)
 
 /* These are needed since we do not localize EVAL nodes: */
 #  define REGCP_SET  DEBUG_r(PerlIO_printf(Perl_debug_log,             \
-                            "  Setting an EVAL scope, savestack=%i\n", \
-                            PL_savestack_ix)); lastcp = PL_savestack_ix
+                            "  Setting an EVAL scope, savestack=%"IVdf"\n",    \
+                            (IV)PL_savestack_ix)); lastcp = PL_savestack_ix
 
 #  define REGCP_UNWIND  DEBUG_r(lastcp != PL_savestack_ix ?            \
                                PerlIO_printf(Perl_debug_log,           \
-                               "  Clearing an EVAL scope, savestack=%i..%i\n", \
-                               lastcp, PL_savestack_ix) : 0); regcpblow(lastcp)
+                               "  Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
+                               (IV)lastcp, (IV)PL_savestack_ix) : 0); regcpblow(lastcp)
 
 STATIC char *
 S_regcppop(pTHX)
@@ -176,18 +176,18 @@ S_regcppop(pTHX)
            PL_regendp[paren] = tmps;
        DEBUG_r(
            PerlIO_printf(Perl_debug_log,
-                         "     restoring \\%d to %d(%d)..%d%s\n",
-                         paren, PL_regstartp[paren], 
-                         PL_reg_start_tmp[paren] - PL_bostr,
-                         PL_regendp[paren], 
+                         "     restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
+                         (UV)paren, (IV)PL_regstartp[paren], 
+                         (IV)(PL_reg_start_tmp[paren] - PL_bostr),
+                         (IV)PL_regendp[paren], 
                          (paren > *PL_reglastparen ? "(no)" : ""));
        );
     }
     DEBUG_r(
        if (*PL_reglastparen + 1 <= PL_regnpar) {
            PerlIO_printf(Perl_debug_log,
-                         "     restoring \\%d..\\%d to undef\n",
-                         *PL_reglastparen + 1, PL_regnpar);
+                         "     restoring \\%"IVdf"..\\%"IVdf" to undef\n",
+                         (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
        }
     );
     for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
@@ -314,7 +314,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
                      PL_colors[1],
                      (strlen(prog->precomp) > 60 ? "..." : ""),
                      PL_colors[0],
-                     (strend - strpos > 60 ? 60 : strend - strpos),
+                     (int)(strend - strpos > 60 ? 60 : strend - strpos),
                      strpos, PL_colors[1],
                      (strend - strpos > 60 ? "..." : ""))
        );
@@ -424,7 +424,8 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
                          (s ? "Found" : "Did not find"),
                          ((check == prog->anchored_substr) ? "anchored" : "floating"),
                          PL_colors[0],
-                         SvCUR(check) - (SvTAIL(check)!=0), SvPVX(check),
+                         (int)(SvCUR(check) - (SvTAIL(check)!=0)),
+                         SvPVX(check),
                          PL_colors[1], (SvTAIL(check) ? "$" : ""),
                          (s ? " at offset " : "...\n") ) );
 
@@ -480,8 +481,8 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
                DEBUG_r(PerlIO_printf(Perl_debug_log, "%s anchored substr `%s%.*s%s'%s",
                        (s ? "Found" : "Contradicts"),
                        PL_colors[0],
-                         SvCUR(prog->anchored_substr)
-                         - (SvTAIL(prog->anchored_substr)!=0),
+                         (int)(SvCUR(prog->anchored_substr)
+                         - (SvTAIL(prog->anchored_substr)!=0)),
                          SvPVX(prog->anchored_substr),
                          PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
                if (!s) {
@@ -532,8 +533,8 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
                DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
                        (s ? "Found" : "Contradicts"),
                        PL_colors[0],
-                         SvCUR(prog->float_substr)
-                         - (SvTAIL(prog->float_substr)!=0),
+                         (int)(SvCUR(prog->float_substr)
+                         - (SvTAIL(prog->float_substr)!=0)),
                          SvPVX(prog->float_substr),
                          PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
                if (!s) {
@@ -642,6 +643,9 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
            prog->check_substr = Nullsv;        /* disable */
            prog->float_substr = Nullsv;        /* clear */
            s = strpos;
+           /* XXXX This is a remnant of the old implementation.  It
+                   looks wasteful, since now INTUIT can use many
+                   other heuristics too. */
            prog->reganch &= ~RE_USE_INTUIT;
        }
        else
@@ -739,19 +743,23 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
     /* If there is a "must appear" string, look for it. */
     s = startpos;
 
-    if (prog->reganch & ROPT_GPOS_SEEN) {
+    if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
        MAGIC *mg;
 
-       if (!(flags & REXEC_IGNOREPOS) && sv && SvTYPE(sv) >= SVt_PVMG
-           && SvMAGIC(sv) && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0)
-           PL_reg_ganch = strbeg + mg->mg_len;
-       else
+       if (flags & REXEC_IGNOREPOS)    /* Means: check only at start */
            PL_reg_ganch = startpos;
-       if (prog->reganch & ROPT_ANCH_GPOS) {
-           if (s > PL_reg_ganch)
-               goto phooey;
-           s = PL_reg_ganch;
+       else if (sv && SvTYPE(sv) >= SVt_PVMG
+                 && SvMAGIC(sv)
+                 && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) {
+           PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
+           if (prog->reganch & ROPT_ANCH_GPOS) {
+               if (s > PL_reg_ganch)
+                   goto phooey;
+               s = PL_reg_ganch;
+           }
        }
+       else                            /* pos() not defined */
+           PL_reg_ganch = strbeg;
     }
 
     if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
@@ -772,7 +780,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
                      PL_colors[1],
                      (strlen(prog->precomp) > 60 ? "..." : ""),
                      PL_colors[0],
-                     (strend - startpos > 60 ? 60 : strend - startpos),
+                     (int)(strend - startpos > 60 ? 60 : strend - startpos),
                      startpos, PL_colors[1],
                      (strend - startpos > 60 ? "..." : ""))
        );
@@ -800,9 +808,13 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
                  after_try:
                    if (s >= end)
                        goto phooey;
-                   s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
-                   if (!s)
-                       goto phooey;
+                   if (prog->reganch & RE_USE_INTUIT) {
+                       s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
+                       if (!s)
+                           goto phooey;
+                   }
+                   else
+                       s++;
                }               
            } else {
                if (s > startpos)
@@ -912,7 +924,11 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
     }
     else if (c = prog->regstclass) {
        I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
-       char *cc;
+       char *m;
+       int ln;
+       int c1;
+       int c2;
+       char *e;
 
        if (minlen)
            dontbother = minlen - 1;
@@ -921,7 +937,6 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
        /* We know what class it must start with. */
        switch (OP(c)) {
        case ANYOFUTF8:
-           cc = MASK(c);
            while (s < strend) {
                if (REGINCLASSUTF8(c, (U8*)s)) {
                    if (tmp && regtry(prog, s))
@@ -935,9 +950,8 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
            }
            break;
        case ANYOF:
-           cc = MASK(c);
            while (s < strend) {
-               if (REGINCLASS(cc, *s)) {
+               if (REGINCLASS(c, *s)) {
                    if (tmp && regtry(prog, s))
                        goto got_it;
                    else
@@ -948,6 +962,43 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
                s++;
            }
            break;
+       case EXACTF:
+           m = STRING(c);
+           ln = STR_LEN(c);
+           c1 = *m;
+           c2 = PL_fold[c1];
+           goto do_exactf;
+       case EXACTFL:
+           m = STRING(c);
+           ln = STR_LEN(c);
+           c1 = *m;
+           c2 = PL_fold_locale[c1];
+         do_exactf:
+           e = strend - ln;
+
+           /* Here it is NOT UTF!  */
+           if (c1 == c2) {
+               while (s <= e) {
+                   if ( *s == c1
+                        && (ln == 1 || (OP(c) == EXACTF
+                                        ? ibcmp(s, m, ln)
+                                        : ibcmp_locale(s, m, ln)))
+                        && regtry(prog, s) )
+                       goto got_it;
+                   s++;
+               }
+           } else {
+               while (s <= e) {
+                   if ( (*s == c1 || *s == c2)
+                        && (ln == 1 || (OP(c) == EXACTF
+                                        ? ibcmp(s, m, ln)
+                                        : ibcmp_locale(s, m, ln)))
+                        && regtry(prog, s) )
+                       goto got_it;
+                   s++;
+               }
+           }
+           break;
        case BOUNDL:
            PL_reg_flags |= RF_tainted;
            /* FALL THROUGH */
@@ -1359,6 +1410,9 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
                s += UTF8SKIP(s);
            }
            break;
+       default:
+           Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
+           break;
        }
     }
     else {
@@ -1477,8 +1531,8 @@ S_regtry(pTHX_ regexp *prog, char *startpos)
 
        PL_reg_eval_set = RS_init;
        DEBUG_r(DEBUG_s(
-           PerlIO_printf(Perl_debug_log, "  setting stack tmpbase at %i\n",
-                         PL_stack_sp - PL_stack_base);
+           PerlIO_printf(Perl_debug_log, "  setting stack tmpbase at %"IVdf"\n",
+                         (IV)(PL_stack_sp - PL_stack_base));
            ));
        SAVEINT(cxstack[cxstack_ix].blk_oldsp);
        cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
@@ -1505,7 +1559,7 @@ S_regtry(pTHX_ regexp *prog, char *startpos)
            }
            PL_reg_magic    = mg;
            PL_reg_oldpos   = mg->mg_len;
-           SAVEDESTRUCTOR(restore_pos, 0);
+           SAVEDESTRUCTOR_X(restore_pos, 0);
         }
        if (!PL_reg_curpm)
            New(22,PL_reg_curpm, 1, PMOP);
@@ -1602,11 +1656,19 @@ S_regmatch(pTHX_ regnode *prog)
 #ifdef DEBUGGING
 #  define sayYES goto yes
 #  define sayNO goto no
+#  define sayYES_FINAL goto yes_final
+#  define sayYES_LOUD  goto yes_loud
+#  define sayNO_FINAL  goto no_final
+#  define sayNO_SILENT goto do_no
 #  define saySAME(x) if (x) goto yes; else goto no
 #  define REPORT_CODE_OFF 24
 #else
 #  define sayYES return 1
 #  define sayNO return 0
+#  define sayYES_FINAL return 1
+#  define sayYES_LOUD  return 1
+#  define sayNO_FINAL  return 0
+#  define sayNO_SILENT return 0
 #  define saySAME(x) return x
 #endif
        DEBUG_r( {
@@ -1633,8 +1695,8 @@ S_regmatch(pTHX_ regnode *prog)
                pref0_len = pref_len;
            regprop(prop, scan);
            PerlIO_printf(Perl_debug_log, 
-                         "%4i <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3d:%*s%s\n",
-                         locinput - PL_bostr, 
+                         "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
+                         (IV)(locinput - PL_bostr), 
                          PL_colors[4], pref0_len, 
                          locinput - pref_len, PL_colors[5],
                          PL_colors[2], pref_len - pref0_len, 
@@ -1643,7 +1705,7 @@ S_regmatch(pTHX_ regnode *prog)
                          PL_colors[0], l, locinput, PL_colors[1],
                          15 - l - pref_len + 1,
                          "",
-                         scan - PL_regprogram, PL_regindent*2, "",
+                         (IV)(scan - PL_regprogram), PL_regindent*2, "",
                          SvPVX(prop));
        } );
 
@@ -1789,7 +1851,6 @@ S_regmatch(pTHX_ regnode *prog)
            nextchr = UCHARAT(locinput);
            break;
        case ANYOFUTF8:
-           s = MASK(scan);
            if (!REGINCLASSUTF8(scan, (U8*)locinput))
                sayNO;
            if (locinput >= PL_regeol)
@@ -1798,10 +1859,9 @@ S_regmatch(pTHX_ regnode *prog)
            nextchr = UCHARAT(locinput);
            break;
        case ANYOF:
-           s = MASK(scan);
            if (nextchr < 0)
                nextchr = UCHARAT(locinput);
-           if (!REGINCLASS(s, nextchr))
+           if (!REGINCLASS(scan, nextchr))
                sayNO;
            if (!nextchr && locinput >= PL_regeol)
                sayNO;
@@ -2129,7 +2189,7 @@ S_regmatch(pTHX_ regnode *prog)
            
            n = ARG(scan);
            PL_op = (OP_4tree*)PL_regdata->data[n];
-           DEBUG_r( PerlIO_printf(Perl_debug_log, "  re_eval 0x%x\n", PL_op) );
+           DEBUG_r( PerlIO_printf(Perl_debug_log, "  re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
            PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
            PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
 
@@ -2220,11 +2280,6 @@ S_regmatch(pTHX_ regnode *prog)
                        regcpblow(cp);
                        sayYES;
                    }
-                   DEBUG_r(
-                       PerlIO_printf(Perl_debug_log,
-                                     "%*s  failed...\n",
-                                     REPORT_CODE_OFF+PL_regindent*2, "")
-                       );
                    ReREFCNT_dec(re);
                    REGCP_UNWIND;
                    regcppop();
@@ -2411,11 +2466,6 @@ S_regmatch(pTHX_ regnode *prog)
                        );
                    if (regmatch(cc->next))
                        sayYES;
-                   DEBUG_r(
-                       PerlIO_printf(Perl_debug_log,
-                                     "%*s  failed...\n",
-                                     REPORT_CODE_OFF+PL_regindent*2, "")
-                       );
                    if (PL_regcc)
                        PL_regcc->cur = ln;
                    PL_regcc = cc;
@@ -2431,11 +2481,6 @@ S_regmatch(pTHX_ regnode *prog)
                        sayYES;
                    cc->cur = n - 1;
                    cc->lastloc = lastloc;
-                   DEBUG_r(
-                       PerlIO_printf(Perl_debug_log,
-                                     "%*s  failed...\n",
-                                     REPORT_CODE_OFF+PL_regindent*2, "")
-                       );
                    sayNO;
                }
 
@@ -2478,7 +2523,7 @@ S_regmatch(pTHX_ regnode *prog)
                                      "%*s  already tried at this position...\n",
                                      REPORT_CODE_OFF+PL_regindent*2, "")
                        );
-                       sayNO;
+                       sayNO_SILENT;
                    }
                    PL_reg_poscache[o] |= (1<<b);
                }
@@ -2528,11 +2573,6 @@ S_regmatch(pTHX_ regnode *prog)
                        regcpblow(cp);
                        sayYES;
                    }
-                   DEBUG_r(
-                       PerlIO_printf(Perl_debug_log,
-                                     "%*s  failed...\n",
-                                     REPORT_CODE_OFF+PL_regindent*2, "")
-                       );
                    REGCP_UNWIND;
                    regcppop();
                    cc->cur = n - 1;
@@ -2574,10 +2614,6 @@ S_regmatch(pTHX_ regnode *prog)
                    ln = PL_regcc->cur;
                if (regmatch(cc->next))
                    sayYES;
-               DEBUG_r(
-                   PerlIO_printf(Perl_debug_log, "%*s  failed...\n",
-                                 REPORT_CODE_OFF+PL_regindent*2, "")
-                   );
                if (PL_regcc)
                    PL_regcc->cur = ln;
                PL_regcc = cc;
@@ -2716,8 +2752,9 @@ S_regmatch(pTHX_ regnode *prog)
                locinput = PL_reginput;
                DEBUG_r(
                    PerlIO_printf(Perl_debug_log,
-                                 "%*s  matched %ld times, len=%ld...\n",
-                                 REPORT_CODE_OFF+PL_regindent*2, "", n, l)
+                                 "%*s  matched %"IVdf" times, len=%"IVdf"...\n",
+                                 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
+                                 (IV) n, (IV)l)
                    );
                if (n >= ln) {
                    if (PL_regkind[(U8)OP(next)] == EXACT) {
@@ -2741,8 +2778,8 @@ S_regmatch(pTHX_ regnode *prog)
                    {
                        DEBUG_r(
                                PerlIO_printf(Perl_debug_log,
-                                             "%*s  trying tail with n=%ld...\n",
-                                             REPORT_CODE_OFF+PL_regindent*2, "", n)
+                                             "%*s  trying tail with n=%"IVdf"...\n",
+                                             (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
                            );
                        if (paren) {
                            if (n) {
@@ -2972,14 +3009,22 @@ S_regmatch(pTHX_ regnode *prog)
                                  "%*s  continuation failed...\n",
                                  REPORT_CODE_OFF+PL_regindent*2, "")
                    );
-               sayNO;
+               sayNO_SILENT;
+           }
+           if (locinput < PL_regtill) {
+               DEBUG_r(PerlIO_printf(Perl_debug_log,
+                                     "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
+                                     PL_colors[4],
+                                     (long)(locinput - PL_reg_starttry),
+                                     (long)(PL_regtill - PL_reg_starttry),
+                                     PL_colors[5]));
+               sayNO_FINAL;            /* Cannot match: too short. */
            }
-           if (locinput < PL_regtill)
-               sayNO;                  /* Cannot match: too short. */
-           /* Fall through */
+           PL_reginput = locinput;     /* put where regtry can find it */
+           sayYES_FINAL;               /* Success! */
        case SUCCEED:
            PL_reginput = locinput;     /* put where regtry can find it */
-           sayYES;                     /* Success! */
+           sayYES_LOUD;                /* Success! */
        case SUSPEND:
            n = 1;
            PL_reginput = locinput;
@@ -3055,8 +3100,8 @@ S_regmatch(pTHX_ regnode *prog)
                next = NULL;
            break;
        default:
-           PerlIO_printf(PerlIO_stderr(), "%lx %d\n",
-                         (unsigned long)scan, OP(scan));
+           PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
+                         PTR2UV(scan), OP(scan));
            Perl_croak(aTHX_ "regexp memory corruption");
        }
        scan = next;
@@ -3070,6 +3115,16 @@ S_regmatch(pTHX_ regnode *prog)
     /*NOTREACHED*/
     sayNO;
 
+yes_loud:
+    DEBUG_r(
+       PerlIO_printf(Perl_debug_log,
+                     "%*s  %scould match...%s\n",
+                     REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
+       );
+    goto yes;
+yes_final:
+    DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
+                         PL_colors[4],PL_colors[5]));
 yes:
 #ifdef DEBUGGING
     PL_regindent--;
@@ -3077,6 +3132,14 @@ yes:
     return 1;
 
 no:
+    DEBUG_r(
+       PerlIO_printf(Perl_debug_log,
+                     "%*s  %sfailed...%s\n",
+                     REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
+       );
+    goto do_no;
+no_final:
+do_no:
 #ifdef DEBUGGING
     PL_regindent--;
 #endif
@@ -3096,7 +3159,6 @@ S_regrepeat(pTHX_ regnode *p, I32 max)
 {
     dTHR;
     register char *scan;
-    register char *opnd;
     register I32 c;
     register char *loceol = PL_regeol;
     register I32 hardcount = 0;
@@ -3152,8 +3214,7 @@ S_regrepeat(pTHX_ regnode *p, I32 max)
        }
        break;
     case ANYOF:
-       opnd = MASK(p);
-       while (scan < loceol && REGINCLASS(opnd, *scan))
+       while (scan < loceol && REGINCLASS(p, *scan))
            scan++;
        break;
     case ALNUM:
@@ -3292,8 +3353,8 @@ S_regrepeat(pTHX_ regnode *p, I32 max)
 
                regprop(prop, p);
                PerlIO_printf(Perl_debug_log, 
-                             "%*s  %s can match %ld times out of %ld...\n", 
-                             REPORT_CODE_OFF+1, "", SvPVX(prop),c,max);
+                             "%*s  %s can match %"IVdf" times out of %"IVdf"...\n", 
+                             REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
        });
     
     return(c);
@@ -3357,7 +3418,7 @@ S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
  */
 
 STATIC bool
-S_reginclass(pTHX_ register char *p, register I32 c)
+S_reginclass(pTHX_ register regnode *p, register I32 c)
 {
     dTHR;
     char flags = ANYOF_FLAGS(p);
@@ -3495,7 +3556,6 @@ S_reghopmaybe(pTHX_ U8* s, I32 off)
 }
 
 #ifdef PERL_OBJECT
-#define NO_XSLOCKS
 #include "XSUB.h"
 #endif