Re: recursion now removed from the regex engine
[p5sagit/p5-mst-13.2.git] / regexec.c
index 890736c..747b61b 100644 (file)
--- a/regexec.c
+++ b/regexec.c
 */
 
 #ifdef PERL_EXT_RE_BUILD
-/* need to replace pregcomp et al, so enable that */
-#  ifndef PERL_IN_XSUB_RE
-#    define PERL_IN_XSUB_RE
-#  endif
-/* need access to debugger hooks */
-#  if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
-#    define DEBUGGING
-#  endif
-#endif
-
-#ifdef PERL_IN_XSUB_RE
-/* We *really* need to overwrite these symbols: */
-#  define Perl_regexec_flags my_regexec
-#  define Perl_regdump my_regdump
-#  define Perl_regprop my_regprop
-#  define Perl_re_intuit_start my_re_intuit_start
-/* *These* symbols are masked to allow static link. */
-#  define Perl_pregexec my_pregexec
-#  define Perl_reginitcolors my_reginitcolors
-#  define Perl_regclass_swash my_regclass_swash
-
-#  define PERL_NO_GET_CONTEXT
+#include "re_top.h"
 #endif
 
 /*
 #define PERL_IN_REGEXEC_C
 #include "perl.h"
 
-#include "regcomp.h"
+#ifdef PERL_IN_XSUB_RE
+#  include "re_comp.h"
+#else
+#  include "regcomp.h"
+#endif
 
 #define RF_tainted     1               /* tainted information used? */
 #define RF_warned      2               /* warned about big count? */
@@ -299,6 +282,7 @@ S_regcppop(pTHX_ const regexp *rex)
  * pregexec and friends
  */
 
+#ifndef PERL_IN_XSUB_RE
 /*
  - pregexec - match a regexp against a string
  */
@@ -314,7 +298,7 @@ Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *stren
        regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
                      nosave ? 0 : REXEC_COPY_STR);
 }
-
+#endif
 
 /*
  * Need to implement the following flags for reg_anch:
@@ -2298,6 +2282,9 @@ typedef union re_unwind_t {
 /* Make sure there is a test for this +1 options in re_tests */
 #define TRIE_INITAL_ACCEPT_BUFFLEN 4;
 
+/* this value indiciates that the c1/c2 "next char" test should be skipped */
+#define CHRTEST_VOID -1000
+
 #define SLAB_FIRST(s) (&(s)->states[0])
 #define SLAB_LAST(s)  (&(s)->states[PERL_REGMATCH_SLAB_SLOTS-1])
 
@@ -2306,6 +2293,9 @@ typedef union re_unwind_t {
 STATIC regmatch_state *
 S_push_slab(pTHX)
 {
+#if PERL_VERSION < 9
+    dMY_CXT;
+#endif
     regmatch_slab *s = PL_regmatch_slab->next;
     if (!s) {
        Newx(s, 1, regmatch_slab);
@@ -2473,6 +2463,9 @@ S_push_slab(pTHX)
 STATIC I32                     /* 0 failure, 1 success */
 S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
 {
+#if PERL_VERSION < 9
+    dMY_CXT;
+#endif
     dVAR;
     register const bool do_utf8 = PL_reg_match_utf8;
     const U32 uniflags = UTF8_ALLOW_DEFAULT;
@@ -3872,94 +3865,109 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
                scan += NEXT_OFF(scan); /* Skip former OPEN. */
            PL_reginput = locinput;
            st->u.curlym.maxwanted = st->minmod ? st->ln : n;
-           if (st->u.curlym.maxwanted) {
-               while (PL_reginput < PL_regeol && st->u.curlym.matches < st->u.curlym.maxwanted) {
-                   /* resume to current state on success */
-                   st->u.yes.prev_yes_state = yes_state;
-                   yes_state = st;
-                   REGMATCH(scan, CURLYM1);
-                   yes_state = st->u.yes.prev_yes_state;
-                   /*** all unsaved local vars undefined at this point */
-                   if (!result)
-                       break;
-                   /* on first match, determine length, u.curlym.l */
-                   if (!st->u.curlym.matches++) {
-                       if (PL_reg_match_utf8) {
-                           char *s = locinput;
-                           while (s < PL_reginput) {
-                               st->u.curlym.l++;
-                               s += UTF8SKIP(s);
-                           }
-                       }
-                       else {
-                           st->u.curlym.l = PL_reginput - locinput;
-                       }
-                       if (st->u.curlym.l == 0) {
-                           st->u.curlym.matches = st->u.curlym.maxwanted;
-                           break;
+           while (PL_reginput < PL_regeol && st->u.curlym.matches < st->u.curlym.maxwanted) {
+               /* resume to current state on success */
+               st->u.yes.prev_yes_state = yes_state;
+               yes_state = st;
+               REGMATCH(scan, CURLYM1);
+               yes_state = st->u.yes.prev_yes_state;
+               /*** all unsaved local vars undefined at this point */
+               if (!result)
+                   break;
+               /* on first match, determine length, u.curlym.l */
+               if (!st->u.curlym.matches++) {
+                   if (PL_reg_match_utf8) {
+                       char *s = locinput;
+                       while (s < PL_reginput) {
+                           st->u.curlym.l++;
+                           s += UTF8SKIP(s);
                        }
                    }
-                   locinput = PL_reginput;
+                   else {
+                       st->u.curlym.l = PL_reginput - locinput;
+                   }
+                   if (st->u.curlym.l == 0) {
+                       st->u.curlym.matches = st->u.curlym.maxwanted;
+                       break;
+                   }
                }
+               locinput = PL_reginput;
            }
 
            PL_reginput = locinput;
-
-           if (st->minmod) {
+           if (st->u.curlym.matches < st->ln) {
                st->minmod = 0;
-               if (st->ln && st->u.curlym.matches < st->ln)
-                   sayNO;
-               if (HAS_TEXT(next) || JUMPABLE(next)) {
-                   regnode *text_node = next;
+               sayNO;
+           }
 
-                   if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
+           DEBUG_EXECUTE_r(
+               PerlIO_printf(Perl_debug_log,
+                         "%*s  matched %"IVdf" times, len=%"IVdf"...\n",
+                         (int)(REPORT_CODE_OFF+PL_regindent*2), "",
+                         (IV) st->u.curlym.matches, (IV)st->u.curlym.l)
+           );
+
+           /* calculate c1 and c1 for possible match of 1st char
+            * following curly */
+           st->u.curlym.c1 = st->u.curlym.c2 = CHRTEST_VOID;
+           if (HAS_TEXT(next) || JUMPABLE(next)) {
+               regnode *text_node = next;
+               if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
+               if (HAS_TEXT(text_node)
+                   && PL_regkind[(U8)OP(text_node)] != REF)
+               {
+                   st->u.curlym.c1 = (U8)*STRING(text_node);
+                   st->u.curlym.c2 =
+                       (OP(text_node) == EXACTF || OP(text_node) == REFF)
+                       ? PL_fold[st->u.curlym.c1]
+                       : (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
+                           ? PL_fold_locale[st->u.curlym.c1]
+                           : st->u.curlym.c1;
+               }
+           }
 
-                   if (! HAS_TEXT(text_node)) st->u.curlym.c1 = st->u.curlym.c2 = -1000;
-                   else {
-                       if (PL_regkind[(U8)OP(text_node)] == REF) {
-                           st->u.curlym.c1 = st->u.curlym.c2 = -1000;
-                           goto assume_ok_MM;
+           REGCP_SET(st->u.curlym.lastcp);
+
+           st->u.curlym.minmod = st->minmod;
+           st->minmod = 0;
+           while (st->u.curlym.matches >= st->ln
+               && (st->u.curlym.matches <= n
+                   /* for REG_INFTY, ln could overflow to negative */
+                   || (n == REG_INFTY && st->u.curlym.matches >= 0)))
+           { 
+               /* If it could work, try it. */
+               if (st->u.curlym.c1 == CHRTEST_VOID ||
+                   UCHARAT(PL_reginput) == st->u.curlym.c1 ||
+                   UCHARAT(PL_reginput) == st->u.curlym.c2)
+               {
+                   DEBUG_EXECUTE_r(
+                       PerlIO_printf(Perl_debug_log,
+                           "%*s  trying tail with matches=%"IVdf"...\n",
+                           (int)(REPORT_CODE_OFF+PL_regindent*2),
+                           "", (IV)st->u.curlym.matches)
+                       );
+                   if (st->u.curlym.paren) {
+                       if (st->u.curlym.matches) {
+                           PL_regstartp[st->u.curlym.paren]
+                               = HOPc(PL_reginput, -st->u.curlym.l) - PL_bostr;
+                           PL_regendp[st->u.curlym.paren] = PL_reginput - PL_bostr;
                        }
-                       else { st->u.curlym.c1 = (U8)*STRING(text_node); }
-                       if (OP(text_node) == EXACTF || OP(text_node) == REFF)
-                           st->u.curlym.c2 = PL_fold[st->u.curlym.c1];
-                       else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
-                           st->u.curlym.c2 = PL_fold_locale[st->u.curlym.c1];
                        else
-                           st->u.curlym.c2 = st->u.curlym.c1;
+                           PL_regendp[st->u.curlym.paren] = -1;
                    }
+                   /* resume to current state on success */
+                   st->u.yes.prev_yes_state = yes_state;
+                   yes_state = st;
+                   REGMATCH(next, CURLYM2);
+                   yes_state = st->u.yes.prev_yes_state;
+                   /*** all unsaved local vars undefined at this point */
+                   if (result)
+                       /* XXX tmp sayYES; */
+                       sayYES_FINAL;
+                   REGCP_UNWIND(st->u.curlym.lastcp);
                }
-               else
-                   st->u.curlym.c1 = st->u.curlym.c2 = -1000;
-           assume_ok_MM:
-               REGCP_SET(st->u.curlym.lastcp);
-               while (n >= st->ln || (n == REG_INFTY && st->ln > 0)) { /* ln overflow ? */
-                   /* If it could work, try it. */
-                   if (st->u.curlym.c1 == -1000 ||
-                       UCHARAT(PL_reginput) == st->u.curlym.c1 ||
-                       UCHARAT(PL_reginput) == st->u.curlym.c2)
-                   {
-                       if (st->u.curlym.paren) {
-                           if (st->ln) {
-                               PL_regstartp[st->u.curlym.paren] =
-                                   HOPc(PL_reginput, -st->u.curlym.l) - PL_bostr;
-                               PL_regendp[st->u.curlym.paren] = PL_reginput - PL_bostr;
-                           }
-                           else
-                               PL_regendp[st->u.curlym.paren] = -1;
-                       }
-                       /* resume to current state on success */
-                       st->u.yes.prev_yes_state = yes_state;
-                       yes_state = st;
-                       REGMATCH(next, CURLYM2);
-                       yes_state = st->u.yes.prev_yes_state;
-                       /*** all unsaved local vars undefined at this point */
-                       if (result)
-                           /* XXX tmp sayYES; */
-                           sayYES_FINAL;
-                       REGCP_UNWIND(st->u.curlym.lastcp);
-                   }
-                   /* Couldn't or didn't -- move forward. */
+               /* Couldn't or didn't -- move forward/backward. */
+               if (st->u.curlym.minmod) {
                    PL_reginput = locinput;
                    /* resume to current state on success */
                    st->u.yes.prev_yes_state = yes_state;
@@ -3968,80 +3976,13 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
                    yes_state = st->u.yes.prev_yes_state;
                    /*** all unsaved local vars undefined at this point */
                    if (result) {
-                       st->ln++;
+                       st->u.curlym.matches++;
                        locinput = PL_reginput;
                    }
                    else
                        sayNO;
                }
-           }
-           else {
-               DEBUG_EXECUTE_r(
-                   PerlIO_printf(Perl_debug_log,
-                             "%*s  matched %"IVdf" times, len=%"IVdf"...\n",
-                             (int)(REPORT_CODE_OFF+PL_regindent*2), "",
-                             (IV) st->u.curlym.matches, (IV)st->u.curlym.l)
-                   );
-               if (st->u.curlym.matches >= st->ln) {
-                   if (HAS_TEXT(next) || JUMPABLE(next)) {
-                       regnode *text_node = next;
-
-                       if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
-
-                       if (! HAS_TEXT(text_node)) st->u.curlym.c1 = st->u.curlym.c2 = -1000;
-                       else {
-                           if (PL_regkind[(U8)OP(text_node)] == REF) {
-                               st->u.curlym.c1 = st->u.curlym.c2 = -1000;
-                               goto assume_ok_REG;
-                           }
-                           else { st->u.curlym.c1 = (U8)*STRING(text_node); }
-
-                           if (OP(text_node) == EXACTF || OP(text_node) == REFF)
-                               st->u.curlym.c2 = PL_fold[st->u.curlym.c1];
-                           else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
-                               st->u.curlym.c2 = PL_fold_locale[st->u.curlym.c1];
-                           else
-                               st->u.curlym.c2 = st->u.curlym.c1;
-                       }
-                   }
-                   else
-                       st->u.curlym.c1 = st->u.curlym.c2 = -1000;
-               }
-           assume_ok_REG:
-               REGCP_SET(st->u.curlym.lastcp);
-               while (st->u.curlym.matches >= st->ln) {
-                   /* If it could work, try it. */
-                   if (st->u.curlym.c1 == -1000 ||
-                       UCHARAT(PL_reginput) == st->u.curlym.c1 ||
-                       UCHARAT(PL_reginput) == st->u.curlym.c2)
-                   {
-                       DEBUG_EXECUTE_r(
-                           PerlIO_printf(Perl_debug_log,
-                               "%*s  trying tail with matches=%"IVdf"...\n",
-                               (int)(REPORT_CODE_OFF+PL_regindent*2),
-                               "", (IV)st->u.curlym.matches)
-                           );
-                       if (st->u.curlym.paren) {
-                           if (st->u.curlym.matches) {
-                               PL_regstartp[st->u.curlym.paren]
-                                   = HOPc(PL_reginput, -st->u.curlym.l) - PL_bostr;
-                               PL_regendp[st->u.curlym.paren] = PL_reginput - PL_bostr;
-                           }
-                           else
-                               PL_regendp[st->u.curlym.paren] = -1;
-                       }
-                       /* resume to current state on success */
-                       st->u.yes.prev_yes_state = yes_state;
-                       yes_state = st;
-                       REGMATCH(next, CURLYM4);
-                       yes_state = st->u.yes.prev_yes_state;
-                       /*** all unsaved local vars undefined at this point */
-                       if (result)
-                           /* XXX tmp sayYES; */
-                           sayYES_FINAL;
-                       REGCP_UNWIND(st->u.curlym.lastcp);
-                   }
-                   /* Couldn't or didn't -- back up. */
+               else {
                    st->u.curlym.matches--;
                    locinput = HOPc(locinput, -st->u.curlym.l);
                    PL_reginput = locinput;
@@ -4096,10 +4037,11 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
 
                if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
 
-               if (! HAS_TEXT(text_node)) st->u.plus.c1 = st->u.plus.c2 = -1000;
+               if (! HAS_TEXT(text_node))
+                   st->u.plus.c1 = st->u.plus.c2 = CHRTEST_VOID;
                else {
                    if (PL_regkind[(U8)OP(text_node)] == REF) {
-                       st->u.plus.c1 = st->u.plus.c2 = -1000;
+                       st->u.plus.c1 = st->u.plus.c2 = CHRTEST_VOID;
                        goto assume_ok_easy;
                    }
                    else { s = (U8*)STRING(text_node); }
@@ -4133,7 +4075,7 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
                }
            }
            else
-               st->u.plus.c1 = st->u.plus.c2 = -1000;
+               st->u.plus.c1 = st->u.plus.c2 = CHRTEST_VOID;
        assume_ok_easy:
            PL_reginput = locinput;
            if (st->minmod) {
@@ -4142,7 +4084,7 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
                    sayNO;
                locinput = PL_reginput;
                REGCP_SET(st->u.plus.lastcp);
-               if (st->u.plus.c1 != -1000) {
+               if (st->u.plus.c1 != CHRTEST_VOID) {
                    st->u.plus.old = locinput;
                    st->u.plus.count = 0;
 
@@ -4191,11 +4133,11 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
                                    st->u.plus.count++;
                                }
                            } else {
-                               STRLEN len;
                                /* count initialised to
                                 * utf8_distance(old, locinput) */
                                while (locinput <= st->u.plus.e) {
-                                   UV c = utf8n_to_uvchr((U8*)locinput,
+                                   STRLEN len;
+                                   const UV c = utf8n_to_uvchr((U8*)locinput,
                                                          UTF8_MAXBYTES, &len,
                                                          uniflags);
                                    if (c == (UV)st->u.plus.c1 || c == (UV)st->u.plus.c2)
@@ -4230,7 +4172,7 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
                else
                while (n >= st->ln || (n == REG_INFTY && st->ln > 0)) { /* ln overflow ? */
                    UV c;
-                   if (st->u.plus.c1 != -1000) {
+                   if (st->u.plus.c1 != CHRTEST_VOID) {
                        if (do_utf8)
                            c = utf8n_to_uvchr((U8*)PL_reginput,
                                               UTF8_MAXBYTES, 0,
@@ -4246,7 +4188,7 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
                        }
                    }
                    /* If it could work, try it. */
-                   else if (st->u.plus.c1 == -1000)
+                   else if (st->u.plus.c1 == CHRTEST_VOID)
                    {
                        TRYPAREN(st->u.plus.paren, st->ln, PL_reginput, PLUS3);
                        /*** all unsaved local vars undefined at this point */
@@ -4280,7 +4222,7 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
                {
                    UV c = 0;
                    while (n >= st->ln) {
-                       if (st->u.plus.c1 != -1000) {
+                       if (st->u.plus.c1 != CHRTEST_VOID) {
                            if (do_utf8)
                                c = utf8n_to_uvchr((U8*)PL_reginput,
                                                   UTF8_MAXBYTES, 0,
@@ -4289,7 +4231,7 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
                                c = UCHARAT(PL_reginput);
                        }
                        /* If it could work, try it. */
-                       if (st->u.plus.c1 == -1000 || c == (UV)st->u.plus.c1 || c == (UV)st->u.plus.c2)
+                       if (st->u.plus.c1 == CHRTEST_VOID || c == (UV)st->u.plus.c1 || c == (UV)st->u.plus.c2)
                            {
                                TRYPAREN(st->u.plus.paren, n, PL_reginput, PLUS4);
                                /*** all unsaved local vars undefined at this point */
@@ -4433,8 +4375,9 @@ yes_final:
         * pop to the state marked by yes_state and continue from there */
 
        /*XXX tmp for CURLYM*/
-       regmatch_slab *oslab = PL_regmatch_slab;
-       regmatch_state *ost = st, *oys=yes_state;
+       regmatch_slab * const oslab = PL_regmatch_slab;
+       regmatch_state * const ost = st;
+       regmatch_state * const oys = yes_state;
        int odepth = depth;
 
        assert(st != yes_state);
@@ -4463,7 +4406,7 @@ yes_final:
            /* Restore parens of the caller without popping the
             * savestack */
            {
-               I32 tmp = PL_savestack_ix;
+               const I32 tmp = PL_savestack_ix;
                PL_savestack_ix = st->u.eval.lastcp;
                regcppop(rex);
                PL_savestack_ix = tmp;
@@ -4496,7 +4439,6 @@ yes_final:
        case resume_CURLYM1:
        case resume_CURLYM2:
        case resume_CURLYM3:
-       case resume_CURLYM4:
            PL_regmatch_slab =oslab;
            st = ost;
            PL_regmatch_state = st;
@@ -4550,8 +4492,6 @@ yes:
            goto resume_point_CURLYM2;
        case resume_CURLYM3:
            goto resume_point_CURLYM3;
-       case resume_CURLYM4:
-           goto resume_point_CURLYM4;
        case resume_PLUS1:
            goto resume_point_PLUS1;
        case resume_PLUS2:
@@ -4678,8 +4618,6 @@ do_no:
            goto resume_point_CURLYM2;
        case resume_CURLYM3:
            goto resume_point_CURLYM3;
-       case resume_CURLYM4:
-           goto resume_point_CURLYM4;
        case resume_IFMATCH:
            yes_state = st->u.yes.prev_yes_state;
            if (st->logical) {
@@ -4718,10 +4656,10 @@ final_exit:
 
     /* free all slabs above current one */
     if (orig_slab->next) {
-       regmatch_slab *osl, *sl = orig_slab->next;
+       regmatch_slab *sl = orig_slab->next;
        orig_slab->next = NULL;
        while (sl) {
-           osl = sl;
+           regmatch_slab * const osl = sl;
            sl = sl->next;
            Safefree(osl);
        }
@@ -4980,6 +4918,7 @@ S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max)
 }
 
 
+#ifndef PERL_IN_XSUB_RE
 /*
 - regclass_swash - prepare the utf8 swash
 */
@@ -5027,6 +4966,7 @@ Perl_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bool
 
     return sw;
 }
+#endif
 
 /*
  - reginclass - determine if a character falls into a character class