regexec.c

   1 /*    regexec.c
   2  */
   3
   4 /*
   5  * "One Ring to rule them all, One Ring to find them..."
   6  */
   7
   8 /* This file contains functions for executing a regular expression.  See
   9  * also regcomp.c which funnily enough, contains functions for compiling
  10  * a regular expression.
  11  *
  12  * This file is also copied at build time to ext/re/re_exec.c, where
  13  * it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
  14  * This causes the main functions to be compiled under new names and with
  15  * debugging support added, which makes "use re 'debug'" work.
  16  */
  17
  18 /* NOTE: this is derived from Henry Spencer's regexp code, and should not
  19  * confused with the original package (see point 3 below).  Thanks, Henry!
  20  */
  21
  22 /* Additional note: this code is very heavily munged from Henry's version
  23  * in places.  In some spots I've traded clarity for efficiency, so don't
  24  * blame Henry for some of the lack of readability.
  25  */
  26
  27 /* The names of the functions have been changed from regcomp and
  28  * regexec to  pregcomp and pregexec in order to avoid conflicts
  29  * with the POSIX routines of the same names.
  30 */
  31
  32 #ifdef PERL_EXT_RE_BUILD
  33 #include "re_top.h"
  34 #endif
  35
  36 /*
  37  * pregcomp and pregexec -- regsub and regerror are not used in perl
  38  *
  39  *      Copyright (c) 1986 by University of Toronto.
  40  *      Written by Henry Spencer.  Not derived from licensed software.
  41  *
  42  *      Permission is granted to anyone to use this software for any
  43  *      purpose on any computer system, and to redistribute it freely,
  44  *      subject to the following restrictions:
  45  *
  46  *      1. The author is not responsible for the consequences of use of
  47  *              this software, no matter how awful, even if they arise
  48  *              from defects in it.
  49  *
  50  *      2. The origin of this software must not be misrepresented, either
  51  *              by explicit claim or by omission.
  52  *
  53  *      3. Altered versions must be plainly marked as such, and must not
  54  *              be misrepresented as being the original software.
  55  *
  56  ****    Alterations to Henry's code are...
  57  ****
  58  ****    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
  59  ****    2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
  60  ****
  61  ****    You may distribute under the terms of either the GNU General Public
  62  ****    License or the Artistic License, as specified in the README file.
  63  *
  64  * Beware that some of this code is subtly aware of the way operator
  65  * precedence is structured in regular expressions.  Serious changes in
  66  * regular-expression syntax might require a total rethink.
  67  */
  68 #include "EXTERN.h"
  69 #define PERL_IN_REGEXEC_C
  70 #include "perl.h"
  71
  72 #ifdef PERL_IN_XSUB_RE
  73 #  include "re_comp.h"
  74 #else
  75 #  include "regcomp.h"
  76 #endif
  77
  78 #define RF_tainted      1               /* tainted information used? */
  79 #define RF_warned       2               /* warned about big count? */
  80
  81 #define RF_utf8         8               /* Pattern contains multibyte chars? */
  82
  83 #define UTF ((PL_reg_flags & RF_utf8) != 0)
  84
  85 #define RS_init         1               /* eval environment created */
  86 #define RS_set          2               /* replsv value is set */
  87
  88 #ifndef STATIC
  89 #define STATIC  static
  90 #endif
  91
  92 #define REGINCLASS(prog,p,c)  (ANYOF_FLAGS(p) ? reginclass(prog,p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
  93
  94 /*
  95  * Forwards.
  96  */
  97
  98 #define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
  99 #define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
 100
 101 #define HOPc(pos,off) \
 102         (char *)(PL_reg_match_utf8 \
 103             ? reghop3((U8*)pos, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr)) \
 104             : (U8*)(pos + off))
 105 #define HOPBACKc(pos, off) \
 106         (char*)(PL_reg_match_utf8\
 107             ? reghopmaybe3((U8*)pos, -off, (U8*)PL_bostr) \
 108             : (pos - off >= PL_bostr)           \
 109                 ? (U8*)pos - off                \
 110                 : NULL)
 111
 112 #define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)(pos), off, (U8*)(lim)) : (U8*)(pos + off))
 113 #define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
 114
 115 #define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
 116     if (!CAT2(PL_utf8_,class)) { bool ok; ENTER; save_re_context(); ok=CAT2(is_utf8_,class)((const U8*)str); assert(ok); LEAVE; } } STMT_END
 117 #define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS(alnum,"a")
 118 #define LOAD_UTF8_CHARCLASS_DIGIT() LOAD_UTF8_CHARCLASS(digit,"0")
 119 #define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
 120 #define LOAD_UTF8_CHARCLASS_MARK()  LOAD_UTF8_CHARCLASS(mark, "\xcd\x86")
 121
 122 /* TODO: Combine JUMPABLE and HAS_TEXT to cache OP(rn) */
 123
 124 /* for use after a quantifier and before an EXACT-like node -- japhy */
 125 #define JUMPABLE(rn) ( \
 126     OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
 127     OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
 128     OP(rn) == PLUS || OP(rn) == MINMOD || \
 129     (PL_regkind[OP(rn)] == CURLY && ARG1(rn) > 0) \
 130 )
 131
 132 #define HAS_TEXT(rn) ( \
 133     PL_regkind[OP(rn)] == EXACT || PL_regkind[OP(rn)] == REF \
 134 )
 135
 136 /*
 137   Search for mandatory following text node; for lookahead, the text must
 138   follow but for lookbehind (rn->flags != 0) we skip to the next step.
 139 */
 140 #define FIND_NEXT_IMPT(rn) STMT_START { \
 141     while (JUMPABLE(rn)) { \
 142         const OPCODE type = OP(rn); \
 143         if (type == SUSPEND || PL_regkind[type] == CURLY) \
 144             rn = NEXTOPER(NEXTOPER(rn)); \
 145         else if (type == PLUS) \
 146             rn = NEXTOPER(rn); \
 147         else if (type == IFMATCH) \
 148             rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
 149         else rn += NEXT_OFF(rn); \
 150     } \
 151 } STMT_END
 152
 153 static void restore_pos(pTHX_ void *arg);
 154
 155 STATIC CHECKPOINT
 156 S_regcppush(pTHX_ I32 parenfloor)
 157 {
 158     dVAR;
 159     const int retval = PL_savestack_ix;
 160 #define REGCP_PAREN_ELEMS 4
 161     const int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
 162     int p;
 163     GET_RE_DEBUG_FLAGS_DECL;
 164
 165     if (paren_elems_to_push < 0)
 166         Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
 167
 168 #define REGCP_OTHER_ELEMS 8
 169     SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
 170     for (p = PL_regsize; p > parenfloor; p--) {
 171 /* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
 172         SSPUSHINT(PL_regendp[p]);
 173         SSPUSHINT(PL_regstartp[p]);
 174         SSPUSHPTR(PL_reg_start_tmp[p]);
 175         SSPUSHINT(p);
 176         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 177           "     saving \\%"UVuf" %"IVdf"(%"IVdf")..%"IVdf"\n",
 178                       (UV)p, (IV)PL_regstartp[p],
 179                       (IV)(PL_reg_start_tmp[p] - PL_bostr),
 180                       (IV)PL_regendp[p]
 181         ));
 182     }
 183 /* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
 184     SSPUSHPTR(PL_regstartp);
 185     SSPUSHPTR(PL_regendp);
 186     SSPUSHINT(PL_regsize);
 187     SSPUSHINT(*PL_reglastparen);
 188     SSPUSHINT(*PL_reglastcloseparen);
 189     SSPUSHPTR(PL_reginput);
 190 #define REGCP_FRAME_ELEMS 2
 191 /* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
 192  * are needed for the regexp context stack bookkeeping. */
 193     SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
 194     SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
 195
 196     return retval;
 197 }
 198
 199 /* These are needed since we do not localize EVAL nodes: */
 200 #define REGCP_SET(cp)                                           \
 201     DEBUG_STATE_r(                                              \
 202             PerlIO_printf(Perl_debug_log,                       \
 203                 "  Setting an EVAL scope, savestack=%"IVdf"\n", \
 204                 (IV)PL_savestack_ix));                          \
 205     cp = PL_savestack_ix
 206
 207 #define REGCP_UNWIND(cp)                                        \
 208     DEBUG_STATE_r(                                              \
 209         if (cp != PL_savestack_ix)                              \
 210             PerlIO_printf(Perl_debug_log,                       \
 211                 "  Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
 212                 (IV)(cp), (IV)PL_savestack_ix));                \
 213     regcpblow(cp)
 214
 215 STATIC char *
 216 S_regcppop(pTHX_ const regexp *rex)
 217 {
 218     dVAR;
 219     I32 i;
 220     char *input;
 221
 222     GET_RE_DEBUG_FLAGS_DECL;
 223
 224     /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
 225     i = SSPOPINT;
 226     assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
 227     i = SSPOPINT; /* Parentheses elements to pop. */
 228     input = (char *) SSPOPPTR;
 229     *PL_reglastcloseparen = SSPOPINT;
 230     *PL_reglastparen = SSPOPINT;
 231     PL_regsize = SSPOPINT;
 232     PL_regendp=(I32 *) SSPOPPTR;
 233     PL_regstartp=(I32 *) SSPOPPTR;
 234
 235
 236     /* Now restore the parentheses context. */
 237     for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
 238          i > 0; i -= REGCP_PAREN_ELEMS) {
 239         I32 tmps;
 240         U32 paren = (U32)SSPOPINT;
 241         PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
 242         PL_regstartp[paren] = SSPOPINT;
 243         tmps = SSPOPINT;
 244         if (paren <= *PL_reglastparen)
 245             PL_regendp[paren] = tmps;
 246         DEBUG_EXECUTE_r(
 247             PerlIO_printf(Perl_debug_log,
 248                           "     restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
 249                           (UV)paren, (IV)PL_regstartp[paren],
 250                           (IV)(PL_reg_start_tmp[paren] - PL_bostr),
 251                           (IV)PL_regendp[paren],
 252                           (paren > *PL_reglastparen ? "(no)" : ""));
 253         );
 254     }
 255     DEBUG_EXECUTE_r(
 256         if (*PL_reglastparen + 1 <= rex->nparens) {
 257             PerlIO_printf(Perl_debug_log,
 258                           "     restoring \\%"IVdf"..\\%"IVdf" to undef\n",
 259                           (IV)(*PL_reglastparen + 1), (IV)rex->nparens);
 260         }
 261     );
 262 #if 1
 263     /* It would seem that the similar code in regtry()
 264      * already takes care of this, and in fact it is in
 265      * a better location to since this code can #if 0-ed out
 266      * but the code in regtry() is needed or otherwise tests
 267      * requiring null fields (pat.t#187 and split.t#{13,14}
 268      * (as of patchlevel 7877)  will fail.  Then again,
 269      * this code seems to be necessary or otherwise
 270      * building DynaLoader will fail:
 271      * "Error: '*' not in typemap in DynaLoader.xs, line 164"
 272      * --jhi */
 273     for (i = *PL_reglastparen + 1; (U32)i <= rex->nparens; i++) {
 274         if (i > PL_regsize)
 275             PL_regstartp[i] = -1;
 276         PL_regendp[i] = -1;
 277     }
 278 #endif
 279     return input;
 280 }
 281
 282 #define regcpblow(cp) LEAVE_SCOPE(cp)   /* Ignores regcppush()ed data. */
 283
 284 /*
 285  * pregexec and friends
 286  */
 287
 288 #ifndef PERL_IN_XSUB_RE
 289 /*
 290  - pregexec - match a regexp against a string
 291  */
 292 I32
 293 Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
 294          char *strbeg, I32 minend, SV *screamer, U32 nosave)
 295 /* strend: pointer to null at end of string */
 296 /* strbeg: real beginning of string */
 297 /* minend: end of match must be >=minend after stringarg. */
 298 /* nosave: For optimizations. */
 299 {
 300     return
 301         regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
 302                       nosave ? 0 : REXEC_COPY_STR);
 303 }
 304 #endif
 305
 306 /*
 307  * Need to implement the following flags for reg_anch:
 308  *
 309  * USE_INTUIT_NOML              - Useful to call re_intuit_start() first
 310  * USE_INTUIT_ML
 311  * INTUIT_AUTORITATIVE_NOML     - Can trust a positive answer
 312  * INTUIT_AUTORITATIVE_ML
 313  * INTUIT_ONCE_NOML             - Intuit can match in one location only.
 314  * INTUIT_ONCE_ML
 315  *
 316  * Another flag for this function: SECOND_TIME (so that float substrs
 317  * with giant delta may be not rechecked).
 318  */
 319
 320 /* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
 321
 322 /* If SCREAM, then SvPVX_const(sv) should be compatible with strpos and strend.
 323    Otherwise, only SvCUR(sv) is used to get strbeg. */
 324
 325 /* XXXX We assume that strpos is strbeg unless sv. */
 326
 327 /* XXXX Some places assume that there is a fixed substring.
 328         An update may be needed if optimizer marks as "INTUITable"
 329         RExen without fixed substrings.  Similarly, it is assumed that
 330         lengths of all the strings are no more than minlen, thus they
 331         cannot come from lookahead.
 332         (Or minlen should take into account lookahead.) */
 333
 334 /* A failure to find a constant substring means that there is no need to make
 335    an expensive call to REx engine, thus we celebrate a failure.  Similarly,
 336    finding a substring too deep into the string means that less calls to
 337    regtry() should be needed.
 338
 339    REx compiler's optimizer found 4 possible hints:
 340         a) Anchored substring;
 341         b) Fixed substring;
 342         c) Whether we are anchored (beginning-of-line or \G);
 343         d) First node (of those at offset 0) which may distingush positions;
 344    We use a)b)d) and multiline-part of c), and try to find a position in the
 345    string which does not contradict any of them.
 346  */
 347
 348 /* Most of decisions we do here should have been done at compile time.
 349    The nodes of the REx which we used for the search should have been
 350    deleted from the finite automaton. */
 351
 352 char *
 353 Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
 354                      char *strend, U32 flags, re_scream_pos_data *data)
 355 {
 356     dVAR;
 357     register I32 start_shift = 0;
 358     /* Should be nonnegative! */
 359     register I32 end_shift   = 0;
 360     register char *s;
 361     register SV *check;
 362     char *strbeg;
 363     char *t;
 364     const bool do_utf8 = (sv && SvUTF8(sv)) ? 1 : 0; /* if no sv we have to assume bytes */
 365     I32 ml_anch;
 366     register char *other_last = NULL;   /* other substr checked before this */
 367     char *check_at = NULL;              /* check substr found at this pos */
 368     const I32 multiline = prog->reganch & PMf_MULTILINE;
 369 #ifdef DEBUGGING
 370     const char * const i_strpos = strpos;
 371 #endif
 372
 373     GET_RE_DEBUG_FLAGS_DECL;
 374
 375     RX_MATCH_UTF8_set(prog,do_utf8);
 376
 377     if (prog->reganch & ROPT_UTF8) {
 378         PL_reg_flags |= RF_utf8;
 379     }
 380     DEBUG_EXECUTE_r(
 381         debug_start_match(prog, do_utf8, strpos, strend,
 382             sv ? "Guessing start of match in sv for"
 383                : "Guessing start of match in string for");
 384               );
 385
 386     /* CHR_DIST() would be more correct here but it makes things slow. */
 387     if (prog->minlen > strend - strpos) {
 388         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 389                               "String too short... [re_intuit_start]\n"));
 390         goto fail;
 391     }
 392
 393     strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
 394     PL_regeol = strend;
 395     if (do_utf8) {
 396         if (!prog->check_utf8 && prog->check_substr)
 397             to_utf8_substr(prog);
 398         check = prog->check_utf8;
 399     } else {
 400         if (!prog->check_substr && prog->check_utf8)
 401             to_byte_substr(prog);
 402         check = prog->check_substr;
 403     }
 404     if (check == &PL_sv_undef) {
 405         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 406                 "Non-utf8 string cannot match utf8 check string\n"));
 407         goto fail;
 408     }
 409     if (prog->reganch & ROPT_ANCH) {    /* Match at beg-of-str or after \n */
 410         ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
 411                      || ( (prog->reganch & ROPT_ANCH_BOL)
 412                           && !multiline ) );    /* Check after \n? */
 413
 414         if (!ml_anch) {
 415           if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
 416                                   | ROPT_IMPLICIT)) /* not a real BOL */
 417                /* SvCUR is not set on references: SvRV and SvPVX_const overlap */
 418                && sv && !SvROK(sv)
 419                && (strpos != strbeg)) {
 420               DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
 421               goto fail;
 422           }
 423           if (prog->check_offset_min == prog->check_offset_max &&
 424               !(prog->reganch & ROPT_CANY_SEEN)) {
 425             /* Substring at constant offset from beg-of-str... */
 426             I32 slen;
 427
 428             s = HOP3c(strpos, prog->check_offset_min, strend);
 429
 430             if (SvTAIL(check)) {
 431                 slen = SvCUR(check);    /* >= 1 */
 432
 433                 if ( strend - s > slen || strend - s < slen - 1
 434                      || (strend - s == slen && strend[-1] != '\n')) {
 435                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
 436                     goto fail_finish;
 437                 }
 438                 /* Now should match s[0..slen-2] */
 439                 slen--;
 440                 if (slen && (*SvPVX_const(check) != *s
 441                              || (slen > 1
 442                                  && memNE(SvPVX_const(check), s, slen)))) {
 443                   report_neq:
 444                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
 445                     goto fail_finish;
 446                 }
 447             }
 448             else if (*SvPVX_const(check) != *s
 449                      || ((slen = SvCUR(check)) > 1
 450                          && memNE(SvPVX_const(check), s, slen)))
 451                 goto report_neq;
 452             check_at = s;
 453             goto success_at_start;
 454           }
 455         }
 456         /* Match is anchored, but substr is not anchored wrt beg-of-str. */
 457         s = strpos;
 458         start_shift = prog->check_offset_min; /* okay to underestimate on CC */
 459         end_shift = prog->check_end_shift;
 460
 461         if (!ml_anch) {
 462             const I32 end = prog->check_offset_max + CHR_SVLEN(check)
 463                                          - (SvTAIL(check) != 0);
 464             const I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
 465
 466             if (end_shift < eshift)
 467                 end_shift = eshift;
 468         }
 469     }
 470     else {                              /* Can match at random position */
 471         ml_anch = 0;
 472         s = strpos;
 473         start_shift = prog->check_offset_min;  /* okay to underestimate on CC */
 474         end_shift = prog->check_end_shift;
 475
 476         /* end shift should be non negative here */
 477     }
 478
 479 #ifdef DEBUGGING        /* 7/99: reports of failure (with the older version) */
 480     if (end_shift < 0)
 481         Perl_croak(aTHX_ "panic: end_shift: %"IVdf" pattern:\n%s\n ",
 482                    (IV)end_shift, prog->precomp);
 483 #endif
 484
 485   restart:
 486     /* Find a possible match in the region s..strend by looking for
 487        the "check" substring in the region corrected by start/end_shift. */
 488
 489     {
 490         I32 srch_start_shift = start_shift;
 491         I32 srch_end_shift = end_shift;
 492         if (srch_start_shift < 0 && strbeg - s > srch_start_shift) {
 493             srch_end_shift -= ((strbeg - s) - srch_start_shift);
 494             srch_start_shift = strbeg - s;
 495         }
 496     DEBUG_OPTIMISE_MORE_r({
 497         PerlIO_printf(Perl_debug_log, "Check offset min: %"IVdf" Start shift: %"IVdf" End shift %"IVdf" Real End Shift: %"IVdf"\n",
 498             (IV)prog->check_offset_min,
 499             (IV)srch_start_shift,
 500             (IV)srch_end_shift,
 501             (IV)prog->check_end_shift);
 502     });
 503
 504     if (flags & REXEC_SCREAM) {
 505         I32 p = -1;                     /* Internal iterator of scream. */
 506         I32 * const pp = data ? data->scream_pos : &p;
 507
 508         if (PL_screamfirst[BmRARE(check)] >= 0
 509             || ( BmRARE(check) == '\n'
 510                  && (BmPREVIOUS(check) == SvCUR(check) - 1)
 511                  && SvTAIL(check) ))
 512             s = screaminstr(sv, check,
 513                             srch_start_shift + (s - strbeg), srch_end_shift, pp, 0);
 514         else
 515             goto fail_finish;
 516         /* we may be pointing at the wrong string */
 517         if (s && RX_MATCH_COPIED(prog))
 518             s = strbeg + (s - SvPVX_const(sv));
 519         if (data)
 520             *data->scream_olds = s;
 521     }
 522     else {
 523         U8* start_point;
 524         U8* end_point;
 525         if (prog->reganch & ROPT_CANY_SEEN) {
 526             start_point= (U8*)(s + srch_start_shift);
 527             end_point= (U8*)(strend - srch_end_shift);
 528         } else {
 529             start_point= HOP3(s, srch_start_shift, srch_start_shift < 0 ? strbeg : strend);
 530             end_point= HOP3(strend, -srch_end_shift, strbeg);
 531         }
 532         DEBUG_OPTIMISE_MORE_r({
 533             PerlIO_printf(Perl_debug_log, "fbm_instr len=%d str=<%.*s>\n",
 534                 (int)(end_point - start_point),
 535                 (int)(end_point - start_point) > 20 ? 20 : (int)(end_point - start_point),
 536                 start_point);
 537         });
 538
 539         s = fbm_instr( start_point, end_point,
 540                       check, multiline ? FBMrf_MULTILINE : 0);
 541     }
 542     }
 543     /* Update the count-of-usability, remove useless subpatterns,
 544         unshift s.  */
 545
 546     DEBUG_EXECUTE_r({
 547         RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
 548             SvPVX_const(check), RE_SV_DUMPLEN(check), 30);
 549         PerlIO_printf(Perl_debug_log, "%s %s substr %s%s%s",
 550                           (s ? "Found" : "Did not find"),
 551             (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)
 552                 ? "anchored" : "floating"),
 553             quoted,
 554             RE_SV_TAIL(check),
 555             (s ? " at offset " : "...\n") );
 556     });
 557
 558     if (!s)
 559         goto fail_finish;
 560     /* Finish the diagnostic message */
 561     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
 562
 563     /* XXX dmq: first branch is for positive lookbehind...
 564        Our check string is offset from the beginning of the pattern.
 565        So we need to do any stclass tests offset forward from that
 566        point. I think. :-(
 567      */
 568
 569
 570
 571     check_at=s;
 572
 573
 574     /* Got a candidate.  Check MBOL anchoring, and the *other* substr.
 575        Start with the other substr.
 576        XXXX no SCREAM optimization yet - and a very coarse implementation
 577        XXXX /ttx+/ results in anchored="ttx", floating="x".  floating will
 578                 *always* match.  Probably should be marked during compile...
 579        Probably it is right to do no SCREAM here...
 580      */
 581
 582     if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8)
 583                 : (prog->float_substr && prog->anchored_substr))
 584     {
 585         /* Take into account the "other" substring. */
 586         /* XXXX May be hopelessly wrong for UTF... */
 587         if (!other_last)
 588             other_last = strpos;
 589         if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
 590           do_other_anchored:
 591             {
 592                 char * const last = HOP3c(s, -start_shift, strbeg);
 593                 char *last1, *last2;
 594                 char * const saved_s = s;
 595                 SV* must;
 596
 597                 t = s - prog->check_offset_max;
 598                 if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
 599                     && (!do_utf8
 600                         || ((t = (char*)reghopmaybe3((U8*)s, -(prog->check_offset_max), (U8*)strpos))
 601                             && t > strpos)))
 602                     NOOP;
 603                 else
 604                     t = strpos;
 605                 t = HOP3c(t, prog->anchored_offset, strend);
 606                 if (t < other_last)     /* These positions already checked */
 607                     t = other_last;
 608                 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
 609                 if (last < last1)
 610                     last1 = last;
 611                 /* XXXX It is not documented what units *_offsets are in.
 612                    We assume bytes, but this is clearly wrong.
 613                    Meaning this code needs to be carefully reviewed for errors.
 614                    dmq.
 615                   */
 616
 617                 /* On end-of-str: see comment below. */
 618                 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
 619                 if (must == &PL_sv_undef) {
 620                     s = (char*)NULL;
 621                     DEBUG_r(must = prog->anchored_utf8);        /* for debug */
 622                 }
 623                 else
 624                     s = fbm_instr(
 625                         (unsigned char*)t,
 626                         HOP3(HOP3(last1, prog->anchored_offset, strend)
 627                                 + SvCUR(must), -(SvTAIL(must)!=0), strbeg),
 628                         must,
 629                         multiline ? FBMrf_MULTILINE : 0
 630                     );
 631                 DEBUG_EXECUTE_r({
 632                     RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
 633                         SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
 634                     PerlIO_printf(Perl_debug_log, "%s anchored substr %s%s",
 635                         (s ? "Found" : "Contradicts"),
 636                         quoted, RE_SV_TAIL(must));
 637                 });
 638
 639
 640                 if (!s) {
 641                     if (last1 >= last2) {
 642                         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 643                                                 ", giving up...\n"));
 644                         goto fail_finish;
 645                     }
 646                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 647                         ", trying floating at offset %ld...\n",
 648                         (long)(HOP3c(saved_s, 1, strend) - i_strpos)));
 649                     other_last = HOP3c(last1, prog->anchored_offset+1, strend);
 650                     s = HOP3c(last, 1, strend);
 651                     goto restart;
 652                 }
 653                 else {
 654                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
 655                           (long)(s - i_strpos)));
 656                     t = HOP3c(s, -prog->anchored_offset, strbeg);
 657                     other_last = HOP3c(s, 1, strend);
 658                     s = saved_s;
 659                     if (t == strpos)
 660                         goto try_at_start;
 661                     goto try_at_offset;
 662                 }
 663             }
 664         }
 665         else {          /* Take into account the floating substring. */
 666             char *last, *last1;
 667             char * const saved_s = s;
 668             SV* must;
 669
 670             t = HOP3c(s, -start_shift, strbeg);
 671             last1 = last =
 672                 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
 673             if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
 674                 last = HOP3c(t, prog->float_max_offset, strend);
 675             s = HOP3c(t, prog->float_min_offset, strend);
 676             if (s < other_last)
 677                 s = other_last;
 678  /* XXXX It is not documented what units *_offsets are in.  Assume bytes.  */
 679             must = do_utf8 ? prog->float_utf8 : prog->float_substr;
 680             /* fbm_instr() takes into account exact value of end-of-str
 681                if the check is SvTAIL(ed).  Since false positives are OK,
 682                and end-of-str is not later than strend we are OK. */
 683             if (must == &PL_sv_undef) {
 684                 s = (char*)NULL;
 685                 DEBUG_r(must = prog->float_utf8);       /* for debug message */
 686             }
 687             else
 688                 s = fbm_instr((unsigned char*)s,
 689                               (unsigned char*)last + SvCUR(must)
 690                                   - (SvTAIL(must)!=0),
 691                               must, multiline ? FBMrf_MULTILINE : 0);
 692             DEBUG_EXECUTE_r({
 693                 RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
 694                     SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
 695                 PerlIO_printf(Perl_debug_log, "%s floating substr %s%s",
 696                     (s ? "Found" : "Contradicts"),
 697                     quoted, RE_SV_TAIL(must));
 698             });
 699             if (!s) {
 700                 if (last1 == last) {
 701                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 702                                             ", giving up...\n"));
 703                     goto fail_finish;
 704                 }
 705                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 706                     ", trying anchored starting at offset %ld...\n",
 707                     (long)(saved_s + 1 - i_strpos)));
 708                 other_last = last;
 709                 s = HOP3c(t, 1, strend);
 710                 goto restart;
 711             }
 712             else {
 713                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
 714                       (long)(s - i_strpos)));
 715                 other_last = s; /* Fix this later. --Hugo */
 716                 s = saved_s;
 717                 if (t == strpos)
 718                     goto try_at_start;
 719                 goto try_at_offset;
 720             }
 721         }
 722     }
 723
 724
 725     t= (char*)HOP3( s, -prog->check_offset_max, (prog->check_offset_max<0) ? strend : strpos);
 726
 727     DEBUG_OPTIMISE_MORE_r(
 728         PerlIO_printf(Perl_debug_log,
 729             "Check offset min:%"IVdf" max:%"IVdf" S:%"IVdf" t:%"IVdf" D:%"IVdf" end:%"IVdf"\n",
 730             (IV)prog->check_offset_min,
 731             (IV)prog->check_offset_max,
 732             (IV)(s-strpos),
 733             (IV)(t-strpos),
 734             (IV)(t-s),
 735             (IV)(strend-strpos)
 736         )
 737     );
 738
 739     if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
 740         && (!do_utf8
 741             || ((t = (char*)reghopmaybe3((U8*)s, -prog->check_offset_max, (U8*) ((prog->check_offset_max<0) ? strend : strpos)))
 742                  && t > strpos)))
 743     {
 744         /* Fixed substring is found far enough so that the match
 745            cannot start at strpos. */
 746       try_at_offset:
 747         if (ml_anch && t[-1] != '\n') {
 748             /* Eventually fbm_*() should handle this, but often
 749                anchored_offset is not 0, so this check will not be wasted. */
 750             /* XXXX In the code below we prefer to look for "^" even in
 751                presence of anchored substrings.  And we search even
 752                beyond the found float position.  These pessimizations
 753                are historical artefacts only.  */
 754           find_anchor:
 755             while (t < strend - prog->minlen) {
 756                 if (*t == '\n') {
 757                     if (t < check_at - prog->check_offset_min) {
 758                         if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
 759                             /* Since we moved from the found position,
 760                                we definitely contradict the found anchored
 761                                substr.  Due to the above check we do not
 762                                contradict "check" substr.
 763                                Thus we can arrive here only if check substr
 764                                is float.  Redo checking for "other"=="fixed".
 765                              */
 766                             strpos = t + 1;
 767                             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
 768                                 PL_colors[0], PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
 769                             goto do_other_anchored;
 770                         }
 771                         /* We don't contradict the found floating substring. */
 772                         /* XXXX Why not check for STCLASS? */
 773                         s = t + 1;
 774                         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
 775                             PL_colors[0], PL_colors[1], (long)(s - i_strpos)));
 776                         goto set_useful;
 777                     }
 778                     /* Position contradicts check-string */
 779                     /* XXXX probably better to look for check-string
 780                        than for "\n", so one should lower the limit for t? */
 781                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
 782                         PL_colors[0], PL_colors[1], (long)(t + 1 - i_strpos)));
 783                     other_last = strpos = s = t + 1;
 784                     goto restart;
 785                 }
 786                 t++;
 787             }
 788             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
 789                         PL_colors[0], PL_colors[1]));
 790             goto fail_finish;
 791         }
 792         else {
 793             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
 794                         PL_colors[0], PL_colors[1]));
 795         }
 796         s = t;
 797       set_useful:
 798         ++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr);    /* hooray/5 */
 799     }
 800     else {
 801         /* The found string does not prohibit matching at strpos,
 802            - no optimization of calling REx engine can be performed,
 803            unless it was an MBOL and we are not after MBOL,
 804            or a future STCLASS check will fail this. */
 805       try_at_start:
 806         /* Even in this situation we may use MBOL flag if strpos is offset
 807            wrt the start of the string. */
 808         if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
 809             && (strpos != strbeg) && strpos[-1] != '\n'
 810             /* May be due to an implicit anchor of m{.*foo}  */
 811             && !(prog->reganch & ROPT_IMPLICIT))
 812         {
 813             t = strpos;
 814             goto find_anchor;
 815         }
 816         DEBUG_EXECUTE_r( if (ml_anch)
 817             PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
 818                         (long)(strpos - i_strpos), PL_colors[0], PL_colors[1]);
 819         );
 820       success_at_start:
 821         if (!(prog->reganch & ROPT_NAUGHTY)     /* XXXX If strpos moved? */
 822             && (do_utf8 ? (
 823                 prog->check_utf8                /* Could be deleted already */
 824                 && --BmUSEFUL(prog->check_utf8) < 0
 825                 && (prog->check_utf8 == prog->float_utf8)
 826             ) : (
 827                 prog->check_substr              /* Could be deleted already */
 828                 && --BmUSEFUL(prog->check_substr) < 0
 829                 && (prog->check_substr == prog->float_substr)
 830             )))
 831         {
 832             /* If flags & SOMETHING - do not do it many times on the same match */
 833             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
 834             SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
 835             if (do_utf8 ? prog->check_substr : prog->check_utf8)
 836                 SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
 837             prog->check_substr = prog->check_utf8 = NULL;       /* disable */
 838             prog->float_substr = prog->float_utf8 = NULL;       /* clear */
 839             check = NULL;                       /* abort */
 840             s = strpos;
 841             /* XXXX This is a remnant of the old implementation.  It
 842                     looks wasteful, since now INTUIT can use many
 843                     other heuristics. */
 844             prog->reganch &= ~RE_USE_INTUIT;
 845         }
 846         else
 847             s = strpos;
 848     }
 849
 850     /* Last resort... */
 851     /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
 852     /* trie stclasses are too expensive to use here, we are better off to
 853        leave it to regmatch itself */
 854     if (prog->regstclass && PL_regkind[OP(prog->regstclass)]!=TRIE) {
 855         /* minlen == 0 is possible if regstclass is \b or \B,
 856            and the fixed substr is ''$.
 857            Since minlen is already taken into account, s+1 is before strend;
 858            accidentally, minlen >= 1 guaranties no false positives at s + 1
 859            even for \b or \B.  But (minlen? 1 : 0) below assumes that
 860            regstclass does not come from lookahead...  */
 861         /* If regstclass takes bytelength more than 1: If charlength==1, OK.
 862            This leaves EXACTF only, which is dealt with in find_byclass().  */
 863         const U8* const str = (U8*)STRING(prog->regstclass);
 864         const int cl_l = (PL_regkind[OP(prog->regstclass)] == EXACT
 865                     ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
 866                     : 1);
 867         char * endpos;
 868         if (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
 869             endpos= HOP3c(s, (prog->minlen ? cl_l : 0), strend);
 870         else if (prog->float_substr || prog->float_utf8)
 871             endpos= HOP3c(HOP3c(check_at, -start_shift, strbeg), cl_l, strend);
 872         else
 873             endpos= strend;
 874
 875         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "start_shift: %"IVdf" check_at: %d s: %d endpos: %d\n",
 876                                       (IV)start_shift, check_at - strbeg, s - strbeg, endpos - strbeg));
 877
 878         t = s;
 879         s = find_byclass(prog, prog->regstclass, s, endpos, NULL);
 880         if (!s) {
 881 #ifdef DEBUGGING
 882             const char *what = NULL;
 883 #endif
 884             if (endpos == strend) {
 885                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 886                                 "Could not match STCLASS...\n") );
 887                 goto fail;
 888             }
 889             DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 890                                    "This position contradicts STCLASS...\n") );
 891             if ((prog->reganch & ROPT_ANCH) && !ml_anch)
 892                 goto fail;
 893             /* Contradict one of substrings */
 894             if (prog->anchored_substr || prog->anchored_utf8) {
 895                 if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
 896                     DEBUG_EXECUTE_r( what = "anchored" );
 897                   hop_and_restart:
 898                     s = HOP3c(t, 1, strend);
 899                     if (s + start_shift + end_shift > strend) {
 900                         /* XXXX Should be taken into account earlier? */
 901                         DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 902                                                "Could not match STCLASS...\n") );
 903                         goto fail;
 904                     }
 905                     if (!check)
 906                         goto giveup;
 907                     DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 908                                 "Looking for %s substr starting at offset %ld...\n",
 909                                  what, (long)(s + start_shift - i_strpos)) );
 910                     goto restart;
 911                 }
 912                 /* Have both, check_string is floating */
 913                 if (t + start_shift >= check_at) /* Contradicts floating=check */
 914                     goto retry_floating_check;
 915                 /* Recheck anchored substring, but not floating... */
 916                 s = check_at;
 917                 if (!check)
 918                     goto giveup;
 919                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 920                           "Looking for anchored substr starting at offset %ld...\n",
 921                           (long)(other_last - i_strpos)) );
 922                 goto do_other_anchored;
 923             }
 924             /* Another way we could have checked stclass at the
 925                current position only: */
 926             if (ml_anch) {
 927                 s = t = t + 1;
 928                 if (!check)
 929                     goto giveup;
 930                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 931                           "Looking for /%s^%s/m starting at offset %ld...\n",
 932                           PL_colors[0], PL_colors[1], (long)(t - i_strpos)) );
 933                 goto try_at_offset;
 934             }
 935             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))     /* Could have been deleted */
 936                 goto fail;
 937             /* Check is floating subtring. */
 938           retry_floating_check:
 939             t = check_at - start_shift;
 940             DEBUG_EXECUTE_r( what = "floating" );
 941             goto hop_and_restart;
 942         }
 943         if (t != s) {
 944             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 945                         "By STCLASS: moving %ld --> %ld\n",
 946                                   (long)(t - i_strpos), (long)(s - i_strpos))
 947                    );
 948         }
 949         else {
 950             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 951                                   "Does not contradict STCLASS...\n");
 952                    );
 953         }
 954     }
 955   giveup:
 956     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
 957                           PL_colors[4], (check ? "Guessed" : "Giving up"),
 958                           PL_colors[5], (long)(s - i_strpos)) );
 959     return s;
 960
 961   fail_finish:                          /* Substring not found */
 962     if (prog->check_substr || prog->check_utf8)         /* could be removed already */
 963         BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
 964   fail:
 965     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
 966                           PL_colors[4], PL_colors[5]));
 967     return NULL;
 968 }
 969
 970
 971
 972 #define REXEC_TRIE_READ_CHAR(trie_type, trie, uc, uscan, len, uvc, charid,  \
 973 foldlen, foldbuf, uniflags) STMT_START {                                    \
 974     switch (trie_type) {                                                    \
 975     case trie_utf8_fold:                                                    \
 976         if ( foldlen>0 ) {                                                  \
 977             uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags );     \
 978             foldlen -= len;                                                 \
 979             uscan += len;                                                   \
 980             len=0;                                                          \
 981         } else {                                                            \
 982             uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );   \
 983             uvc = to_uni_fold( uvc, foldbuf, &foldlen );                    \
 984             foldlen -= UNISKIP( uvc );                                      \
 985             uscan = foldbuf + UNISKIP( uvc );                               \
 986         }                                                                   \
 987         break;                                                              \
 988     case trie_utf8:                                                         \
 989         uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );       \
 990         break;                                                              \
 991     case trie_plain:                                                        \
 992         uvc = (UV)*uc;                                                      \
 993         len = 1;                                                            \
 994     }                                                                       \
 995                                                                             \
 996     if (uvc < 256) {                                                        \
 997         charid = trie->charmap[ uvc ];                                      \
 998     }                                                                       \
 999     else {                                                                  \
1000         charid = 0;                                                         \
1001         if (trie->widecharmap) {                                            \
1002             SV** const svpp = hv_fetch(trie->widecharmap,                   \
1003                         (char*)&uvc, sizeof(UV), 0);                        \
1004             if (svpp)                                                       \
1005                 charid = (U16)SvIV(*svpp);                                  \
1006         }                                                                   \
1007     }                                                                       \
1008 } STMT_END
1009
1010 #define REXEC_FBC_EXACTISH_CHECK(CoNd)                  \
1011     if ( (CoNd)                                        \
1012          && (ln == len ||                              \
1013              ibcmp_utf8(s, NULL, 0,  do_utf8,          \
1014                         m, NULL, ln, (bool)UTF))       \
1015          && (!reginfo || regtry(reginfo, s)) )         \
1016         goto got_it;                                   \
1017     else {                                             \
1018          U8 foldbuf[UTF8_MAXBYTES_CASE+1];             \
1019          uvchr_to_utf8(tmpbuf, c);                     \
1020          f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);  \
1021          if ( f != c                                   \
1022               && (f == c1 || f == c2)                  \
1023               && (ln == foldlen ||                     \
1024                   !ibcmp_utf8((char *) foldbuf,        \
1025                               NULL, foldlen, do_utf8,  \
1026                               m,                       \
1027                               NULL, ln, (bool)UTF))    \
1028               && (!reginfo || regtry(reginfo, s)) )    \
1029               goto got_it;                             \
1030     }                                                  \
1031     s += len
1032
1033 #define REXEC_FBC_EXACTISH_SCAN(CoNd)                     \
1034 STMT_START {                                              \
1035     while (s <= e) {                                      \
1036         if ( (CoNd)                                       \
1037              && (ln == 1 || !(OP(c) == EXACTF             \
1038                               ? ibcmp(s, m, ln)           \
1039                               : ibcmp_locale(s, m, ln)))  \
1040              && (!reginfo || regtry(reginfo, s)) )        \
1041             goto got_it;                                  \
1042         s++;                                              \
1043     }                                                     \
1044 } STMT_END
1045
1046 #define REXEC_FBC_UTF8_SCAN(CoDe)                     \
1047 STMT_START {                                          \
1048     while (s + (uskip = UTF8SKIP(s)) <= strend) {     \
1049         CoDe                                          \
1050         s += uskip;                                   \
1051     }                                                 \
1052 } STMT_END
1053
1054 #define REXEC_FBC_SCAN(CoDe)                          \
1055 STMT_START {                                          \
1056     while (s < strend) {                              \
1057         CoDe                                          \
1058         s++;                                          \
1059     }                                                 \
1060 } STMT_END
1061
1062 #define REXEC_FBC_UTF8_CLASS_SCAN(CoNd)               \
1063 REXEC_FBC_UTF8_SCAN(                                  \
1064     if (CoNd) {                                       \
1065         if (tmp && (!reginfo || regtry(reginfo, s)))  \
1066             goto got_it;                              \
1067         else                                          \
1068             tmp = doevery;                            \
1069     }                                                 \
1070     else                                              \
1071         tmp = 1;                                      \
1072 )
1073
1074 #define REXEC_FBC_CLASS_SCAN(CoNd)                    \
1075 REXEC_FBC_SCAN(                                       \
1076     if (CoNd) {                                       \
1077         if (tmp && (!reginfo || regtry(reginfo, s)))  \
1078             goto got_it;                              \
1079         else                                          \
1080             tmp = doevery;                            \
1081     }                                                 \
1082     else                                              \
1083         tmp = 1;                                      \
1084 )
1085
1086 #define REXEC_FBC_TRYIT               \
1087 if ((!reginfo || regtry(reginfo, s))) \
1088     goto got_it
1089
1090 #define REXEC_FBC_CSCAN_PRELOAD(UtFpReLoAd,CoNdUtF8,CoNd)      \
1091     if (do_utf8) {                                             \
1092         UtFpReLoAd;                                            \
1093         REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1094     }                                                          \
1095     else {                                                     \
1096         REXEC_FBC_CLASS_SCAN(CoNd);                            \
1097     }                                                          \
1098     break
1099
1100 #define REXEC_FBC_CSCAN_TAINT(CoNdUtF8,CoNd)                   \
1101     PL_reg_flags |= RF_tainted;                                \
1102     if (do_utf8) {                                             \
1103         REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1104     }                                                          \
1105     else {                                                     \
1106         REXEC_FBC_CLASS_SCAN(CoNd);                            \
1107     }                                                          \
1108     break
1109
1110 #define DUMP_EXEC_POS(li,s,doutf8) \
1111     dump_exec_pos(li,s,(PL_regeol),(PL_bostr),(PL_reg_starttry),doutf8)
1112
1113 /* We know what class REx starts with.  Try to find this position... */
1114 /* if reginfo is NULL, its a dryrun */
1115 /* annoyingly all the vars in this routine have different names from their counterparts
1116    in regmatch. /grrr */
1117
1118 STATIC char *
1119 S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
1120     const char *strend, const regmatch_info *reginfo)
1121 {
1122         dVAR;
1123         const I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
1124         char *m;
1125         STRLEN ln;
1126         STRLEN lnc;
1127         register STRLEN uskip;
1128         unsigned int c1;
1129         unsigned int c2;
1130         char *e;
1131         register I32 tmp = 1;   /* Scratch variable? */
1132         register const bool do_utf8 = PL_reg_match_utf8;
1133
1134         /* We know what class it must start with. */
1135         switch (OP(c)) {
1136         case ANYOF:
1137             if (do_utf8) {
1138                  REXEC_FBC_UTF8_CLASS_SCAN((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
1139                           !UTF8_IS_INVARIANT((U8)s[0]) ?
1140                           reginclass(prog, c, (U8*)s, 0, do_utf8) :
1141                           REGINCLASS(prog, c, (U8*)s));
1142             }
1143             else {
1144                  while (s < strend) {
1145                       STRLEN skip = 1;
1146
1147                       if (REGINCLASS(prog, c, (U8*)s) ||
1148                           (ANYOF_FOLD_SHARP_S(c, s, strend) &&
1149                            /* The assignment of 2 is intentional:
1150                             * for the folded sharp s, the skip is 2. */
1151                            (skip = SHARP_S_SKIP))) {
1152                            if (tmp && (!reginfo || regtry(reginfo, s)))
1153                                 goto got_it;
1154                            else
1155                                 tmp = doevery;
1156                       }
1157                       else
1158                            tmp = 1;
1159                       s += skip;
1160                  }
1161             }
1162             break;
1163         case CANY:
1164             REXEC_FBC_SCAN(
1165                 if (tmp && (!reginfo || regtry(reginfo, s)))
1166                     goto got_it;
1167                 else
1168                     tmp = doevery;
1169             );
1170             break;
1171         case EXACTF:
1172             m   = STRING(c);
1173             ln  = STR_LEN(c);   /* length to match in octets/bytes */
1174             lnc = (I32) ln;     /* length to match in characters */
1175             if (UTF) {
1176                 STRLEN ulen1, ulen2;
1177                 U8 *sm = (U8 *) m;
1178                 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
1179                 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
1180                 const U32 uniflags = UTF8_ALLOW_DEFAULT;
1181
1182                 to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
1183                 to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
1184
1185                 c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXBYTES_CASE,
1186                                     0, uniflags);
1187                 c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXBYTES_CASE,
1188                                     0, uniflags);
1189                 lnc = 0;
1190                 while (sm < ((U8 *) m + ln)) {
1191                     lnc++;
1192                     sm += UTF8SKIP(sm);
1193                 }
1194             }
1195             else {
1196                 c1 = *(U8*)m;
1197                 c2 = PL_fold[c1];
1198             }
1199             goto do_exactf;
1200         case EXACTFL:
1201             m   = STRING(c);
1202             ln  = STR_LEN(c);
1203             lnc = (I32) ln;
1204             c1 = *(U8*)m;
1205             c2 = PL_fold_locale[c1];
1206           do_exactf:
1207             e = HOP3c(strend, -((I32)lnc), s);
1208
1209             if (!reginfo && e < s)
1210                 e = s;                  /* Due to minlen logic of intuit() */
1211
1212             /* The idea in the EXACTF* cases is to first find the
1213              * first character of the EXACTF* node and then, if
1214              * necessary, case-insensitively compare the full
1215              * text of the node.  The c1 and c2 are the first
1216              * characters (though in Unicode it gets a bit
1217              * more complicated because there are more cases
1218              * than just upper and lower: one needs to use
1219              * the so-called folding case for case-insensitive
1220              * matching (called "loose matching" in Unicode).
1221              * ibcmp_utf8() will do just that. */
1222
1223             if (do_utf8) {
1224                 UV c, f;
1225                 U8 tmpbuf [UTF8_MAXBYTES+1];
1226                 STRLEN len, foldlen;
1227                 const U32 uniflags = UTF8_ALLOW_DEFAULT;
1228                 if (c1 == c2) {
1229                     /* Upper and lower of 1st char are equal -
1230                      * probably not a "letter". */
1231                     while (s <= e) {
1232                         c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1233                                            uniflags);
1234                         REXEC_FBC_EXACTISH_CHECK(c == c1);
1235                     }
1236                 }
1237                 else {
1238                     while (s <= e) {
1239                       c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1240                                            uniflags);
1241
1242                         /* Handle some of the three Greek sigmas cases.
1243                          * Note that not all the possible combinations
1244                          * are handled here: some of them are handled
1245                          * by the standard folding rules, and some of
1246                          * them (the character class or ANYOF cases)
1247                          * are handled during compiletime in
1248                          * regexec.c:S_regclass(). */
1249                         if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1250                             c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1251                             c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
1252
1253                         REXEC_FBC_EXACTISH_CHECK(c == c1 || c == c2);
1254                     }
1255                 }
1256             }
1257             else {
1258                 if (c1 == c2)
1259                     REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1);
1260                 else
1261                     REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1 || *(U8*)s == c2);
1262             }
1263             break;
1264         case BOUNDL:
1265             PL_reg_flags |= RF_tainted;
1266             /* FALL THROUGH */
1267         case BOUND:
1268             if (do_utf8) {
1269                 if (s == PL_bostr)
1270                     tmp = '\n';
1271                 else {
1272                     U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1273                     tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
1274                 }
1275                 tmp = ((OP(c) == BOUND ?
1276                         isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1277                 LOAD_UTF8_CHARCLASS_ALNUM();
1278                 REXEC_FBC_UTF8_SCAN(
1279                     if (tmp == !(OP(c) == BOUND ?
1280                                  (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1281                                  isALNUM_LC_utf8((U8*)s)))
1282                     {
1283                         tmp = !tmp;
1284                         REXEC_FBC_TRYIT;
1285                 }
1286                 );
1287             }
1288             else {
1289                 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1290                 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1291                 REXEC_FBC_SCAN(
1292                     if (tmp ==
1293                         !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1294                         tmp = !tmp;
1295                         REXEC_FBC_TRYIT;
1296                 }
1297                 );
1298             }
1299             if ((!prog->minlen && tmp) && (!reginfo || regtry(reginfo, s)))
1300                 goto got_it;
1301             break;
1302         case NBOUNDL:
1303             PL_reg_flags |= RF_tainted;
1304             /* FALL THROUGH */
1305         case NBOUND:
1306             if (do_utf8) {
1307                 if (s == PL_bostr)
1308                     tmp = '\n';
1309                 else {
1310                     U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1311                     tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
1312                 }
1313                 tmp = ((OP(c) == NBOUND ?
1314                         isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1315                 LOAD_UTF8_CHARCLASS_ALNUM();
1316                 REXEC_FBC_UTF8_SCAN(
1317                     if (tmp == !(OP(c) == NBOUND ?
1318                                  (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1319                                  isALNUM_LC_utf8((U8*)s)))
1320                         tmp = !tmp;
1321                     else REXEC_FBC_TRYIT;
1322                 );
1323             }
1324             else {
1325                 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1326                 tmp = ((OP(c) == NBOUND ?
1327                         isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1328                 REXEC_FBC_SCAN(
1329                     if (tmp ==
1330                         !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1331                         tmp = !tmp;
1332                     else REXEC_FBC_TRYIT;
1333                 );
1334             }
1335             if ((!prog->minlen && !tmp) && (!reginfo || regtry(reginfo, s)))
1336                 goto got_it;
1337             break;
1338         case ALNUM:
1339             REXEC_FBC_CSCAN_PRELOAD(
1340                 LOAD_UTF8_CHARCLASS_ALNUM(),
1341                 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8),
1342                 isALNUM(*s)
1343             );
1344         case ALNUML:
1345             REXEC_FBC_CSCAN_TAINT(
1346                 isALNUM_LC_utf8((U8*)s),
1347                 isALNUM_LC(*s)
1348             );
1349         case NALNUM:
1350             REXEC_FBC_CSCAN_PRELOAD(
1351                 LOAD_UTF8_CHARCLASS_ALNUM(),
1352                 !swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8),
1353                 !isALNUM(*s)
1354             );
1355         case NALNUML:
1356             REXEC_FBC_CSCAN_TAINT(
1357                 !isALNUM_LC_utf8((U8*)s),
1358                 !isALNUM_LC(*s)
1359             );
1360         case SPACE:
1361             REXEC_FBC_CSCAN_PRELOAD(
1362                 LOAD_UTF8_CHARCLASS_SPACE(),
1363                 *s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8),
1364                 isSPACE(*s)
1365             );
1366         case SPACEL:
1367             REXEC_FBC_CSCAN_TAINT(
1368                 *s == ' ' || isSPACE_LC_utf8((U8*)s),
1369                 isSPACE_LC(*s)
1370             );
1371         case NSPACE:
1372             REXEC_FBC_CSCAN_PRELOAD(
1373                 LOAD_UTF8_CHARCLASS_SPACE(),
1374                 !(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)),
1375                 !isSPACE(*s)
1376             );
1377         case NSPACEL:
1378             REXEC_FBC_CSCAN_TAINT(
1379                 !(*s == ' ' || isSPACE_LC_utf8((U8*)s)),
1380                 !isSPACE_LC(*s)
1381             );
1382         case DIGIT:
1383             REXEC_FBC_CSCAN_PRELOAD(
1384                 LOAD_UTF8_CHARCLASS_DIGIT(),
1385                 swash_fetch(PL_utf8_digit,(U8*)s, do_utf8),
1386                 isDIGIT(*s)
1387             );
1388         case DIGITL:
1389             REXEC_FBC_CSCAN_TAINT(
1390                 isDIGIT_LC_utf8((U8*)s),
1391                 isDIGIT_LC(*s)
1392             );
1393         case NDIGIT:
1394             REXEC_FBC_CSCAN_PRELOAD(
1395                 LOAD_UTF8_CHARCLASS_DIGIT(),
1396                 !swash_fetch(PL_utf8_digit,(U8*)s, do_utf8),
1397                 !isDIGIT(*s)
1398             );
1399         case NDIGITL:
1400             REXEC_FBC_CSCAN_TAINT(
1401                 !isDIGIT_LC_utf8((U8*)s),
1402                 !isDIGIT_LC(*s)
1403             );
1404         case AHOCORASICKC:
1405         case AHOCORASICK:
1406             {
1407                 const enum { trie_plain, trie_utf8, trie_utf8_fold }
1408                     trie_type = do_utf8 ?
1409                           (c->flags == EXACT ? trie_utf8 : trie_utf8_fold)
1410                         : trie_plain;
1411                 /* what trie are we using right now */
1412                 reg_ac_data *aho
1413                     = (reg_ac_data*)prog->data->data[ ARG( c ) ];
1414                 reg_trie_data *trie=aho->trie;
1415
1416                 const char *last_start = strend - trie->minlen;
1417 #ifdef DEBUGGING
1418                 const char *real_start = s;
1419 #endif
1420                 STRLEN maxlen = trie->maxlen;
1421                 SV *sv_points;
1422                 U8 **points; /* map of where we were in the input string
1423                                 when reading a given char. For ASCII this
1424                                 is unnecessary overhead as the relationship
1425                                 is always 1:1, but for unicode, especially
1426                                 case folded unicode this is not true. */
1427                 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
1428                 U8 *bitmap=NULL;
1429
1430
1431                 GET_RE_DEBUG_FLAGS_DECL;
1432
1433                 /* We can't just allocate points here. We need to wrap it in
1434                  * an SV so it gets freed properly if there is a croak while
1435                  * running the match */
1436                 ENTER;
1437                 SAVETMPS;
1438                 sv_points=newSV(maxlen * sizeof(U8 *));
1439                 SvCUR_set(sv_points,
1440                     maxlen * sizeof(U8 *));
1441                 SvPOK_on(sv_points);
1442                 sv_2mortal(sv_points);
1443                 points=(U8**)SvPV_nolen(sv_points );
1444                 if ( trie_type != trie_utf8_fold
1445                      && (trie->bitmap || OP(c)==AHOCORASICKC) )
1446                 {
1447                     if (trie->bitmap)
1448                         bitmap=(U8*)trie->bitmap;
1449                     else
1450                         bitmap=(U8*)ANYOF_BITMAP(c);
1451                 }
1452                 /* this is the Aho-Corasick algorithm modified a touch
1453                    to include special handling for long "unknown char"
1454                    sequences. The basic idea being that we use AC as long
1455                    as we are dealing with a possible matching char, when
1456                    we encounter an unknown char (and we have not encountered
1457                    an accepting state) we scan forward until we find a legal
1458                    starting char.
1459                    AC matching is basically that of trie matching, except
1460                    that when we encounter a failing transition, we fall back
1461                    to the current states "fail state", and try the current char
1462                    again, a process we repeat until we reach the root state,
1463                    state 1, or a legal transition. If we fail on the root state
1464                    then we can either terminate if we have reached an accepting
1465                    state previously, or restart the entire process from the beginning
1466                    if we have not.
1467
1468                  */
1469                 while (s <= last_start) {
1470                     const U32 uniflags = UTF8_ALLOW_DEFAULT;
1471                     U8 *uc = (U8*)s;
1472                     U16 charid = 0;
1473                     U32 base = 1;
1474                     U32 state = 1;
1475                     UV uvc = 0;
1476                     STRLEN len = 0;
1477                     STRLEN foldlen = 0;
1478                     U8 *uscan = (U8*)NULL;
1479                     U8 *leftmost = NULL;
1480 #ifdef DEBUGGING
1481                     U32 accepted_word= 0;
1482 #endif
1483                     U32 pointpos = 0;
1484
1485                     while ( state && uc <= (U8*)strend ) {
1486                         int failed=0;
1487                         U32 word = aho->states[ state ].wordnum;
1488
1489                         if( state==1 ) {
1490                             if ( bitmap ) {
1491                                 DEBUG_TRIE_EXECUTE_r(
1492                                     if ( uc <= (U8*)last_start && !BITMAP_TEST(bitmap,*uc) ) {
1493                                         dump_exec_pos( (char *)uc, c, strend, real_start,
1494                                             (char *)uc, do_utf8 );
1495                                         PerlIO_printf( Perl_debug_log,
1496                                             " Scanning for legal start char...\n");
1497                                     }
1498                                 );
1499                                 while ( uc <= (U8*)last_start  && !BITMAP_TEST(bitmap,*uc) ) {
1500                                     uc++;
1501                                 }
1502                                 s= (char *)uc;
1503                             }
1504                             if (uc >(U8*)last_start) break;
1505                         }
1506
1507                         if ( word ) {
1508                             U8 *lpos= points[ (pointpos - trie->wordlen[word-1] ) % maxlen ];
1509                             if (!leftmost || lpos < leftmost) {
1510                                 DEBUG_r(accepted_word=word);
1511                                 leftmost= lpos;
1512                             }
1513                             if (base==0) break;
1514
1515                         }
1516                         points[pointpos++ % maxlen]= uc;
1517                         REXEC_TRIE_READ_CHAR(trie_type, trie, uc, uscan, len,
1518                             uvc, charid, foldlen, foldbuf, uniflags);
1519                         DEBUG_TRIE_EXECUTE_r({
1520                             dump_exec_pos( (char *)uc, c, strend, real_start,
1521                                 s,   do_utf8 );
1522                             PerlIO_printf(Perl_debug_log,
1523                                 " Charid:%3u CP:%4"UVxf" ",
1524                                  charid, uvc);
1525                         });
1526
1527                         do {
1528 #ifdef DEBUGGING
1529                             word = aho->states[ state ].wordnum;
1530 #endif
1531                             base = aho->states[ state ].trans.base;
1532
1533                             DEBUG_TRIE_EXECUTE_r({
1534                                 if (failed)
1535                                     dump_exec_pos( (char *)uc, c, strend, real_start,
1536                                         s,   do_utf8 );
1537                                 PerlIO_printf( Perl_debug_log,
1538                                     "%sState: %4"UVxf", word=%"UVxf,
1539                                     failed ? " Fail transition to " : "",
1540                                     (UV)state, (UV)word);
1541                             });
1542                             if ( base ) {
1543                                 U32 tmp;
1544                                 if (charid &&
1545                                      (base + charid > trie->uniquecharcount )
1546                                      && (base + charid - 1 - trie->uniquecharcount
1547                                             < trie->lasttrans)
1548                                      && trie->trans[base + charid - 1 -
1549                                             trie->uniquecharcount].check == state
1550                                      && (tmp=trie->trans[base + charid - 1 -
1551                                         trie->uniquecharcount ].next))
1552                                 {
1553                                     DEBUG_TRIE_EXECUTE_r(
1554                                         PerlIO_printf( Perl_debug_log," - legal\n"));
1555                                     state = tmp;
1556                                     break;
1557                                 }
1558                                 else {
1559                                     DEBUG_TRIE_EXECUTE_r(
1560                                         PerlIO_printf( Perl_debug_log," - fail\n"));
1561                                     failed = 1;
1562                                     state = aho->fail[state];
1563                                 }
1564                             }
1565                             else {
1566                                 /* we must be accepting here */
1567                                 DEBUG_TRIE_EXECUTE_r(
1568                                         PerlIO_printf( Perl_debug_log," - accepting\n"));
1569                                 failed = 1;
1570                                 break;
1571                             }
1572                         } while(state);
1573                         uc += len;
1574                         if (failed) {
1575                             if (leftmost)
1576                                 break;
1577                             if (!state) state = 1;
1578                         }
1579                     }
1580                     if ( aho->states[ state ].wordnum ) {
1581                         U8 *lpos = points[ (pointpos - trie->wordlen[aho->states[ state ].wordnum-1]) % maxlen ];
1582                         if (!leftmost || lpos < leftmost) {
1583                             DEBUG_r(accepted_word=aho->states[ state ].wordnum);
1584                             leftmost = lpos;
1585                         }
1586                     }
1587                     if (leftmost) {
1588                         s = (char*)leftmost;
1589                         DEBUG_TRIE_EXECUTE_r({
1590                             PerlIO_printf(
1591                                 Perl_debug_log,"Matches word #%"UVxf" at position %d. Trying full pattern...\n",
1592                                 (UV)accepted_word, s - real_start
1593                             );
1594                         });
1595                         if (!reginfo || regtry(reginfo, s)) {
1596                             FREETMPS;
1597                             LEAVE;
1598                             goto got_it;
1599                         }
1600                         s = HOPc(s,1);
1601                         DEBUG_TRIE_EXECUTE_r({
1602                             PerlIO_printf( Perl_debug_log,"Pattern failed. Looking for new start point...\n");
1603                         });
1604                     } else {
1605                         DEBUG_TRIE_EXECUTE_r(
1606                             PerlIO_printf( Perl_debug_log,"No match.\n"));
1607                         break;
1608                     }
1609                 }
1610                 FREETMPS;
1611                 LEAVE;
1612             }
1613             break;
1614         default:
1615             Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1616             break;
1617         }
1618         return 0;
1619       got_it:
1620         return s;
1621 }
1622
1623 /*
1624  - regexec_flags - match a regexp against a string
1625  */
1626 I32
1627 Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1628               char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1629 /* strend: pointer to null at end of string */
1630 /* strbeg: real beginning of string */
1631 /* minend: end of match must be >=minend after stringarg. */
1632 /* data: May be used for some additional optimizations. */
1633 /* nosave: For optimizations. */
1634 {
1635     dVAR;
1636     register char *s;
1637     register regnode *c;
1638     register char *startpos = stringarg;
1639     I32 minlen;         /* must match at least this many chars */
1640     I32 dontbother = 0; /* how many characters not to try at end */
1641     I32 end_shift = 0;                  /* Same for the end. */         /* CC */
1642     I32 scream_pos = -1;                /* Internal iterator of scream. */
1643     char *scream_olds = NULL;
1644     SV* const oreplsv = GvSV(PL_replgv);
1645     const bool do_utf8 = (bool)DO_UTF8(sv);
1646     I32 multiline;
1647
1648     regmatch_info reginfo;  /* create some info to pass to regtry etc */
1649
1650     GET_RE_DEBUG_FLAGS_DECL;
1651
1652     PERL_UNUSED_ARG(data);
1653
1654     /* Be paranoid... */
1655     if (prog == NULL || startpos == NULL) {
1656         Perl_croak(aTHX_ "NULL regexp parameter");
1657         return 0;
1658     }
1659
1660     multiline = prog->reganch & PMf_MULTILINE;
1661     reginfo.prog = prog;
1662
1663     RX_MATCH_UTF8_set(prog, do_utf8);
1664     DEBUG_EXECUTE_r(
1665         debug_start_match(prog, do_utf8, startpos, strend,
1666         "Matching");
1667     );
1668
1669     minlen = prog->minlen;
1670
1671     if (strend - startpos < (minlen+(prog->check_offset_min<0?prog->check_offset_min:0))) {
1672         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
1673                               "String too short [regexec_flags]...\n"));
1674         goto phooey;
1675     }
1676
1677
1678     /* Check validity of program. */
1679     if (UCHARAT(prog->program) != REG_MAGIC) {
1680         Perl_croak(aTHX_ "corrupted regexp program");
1681     }
1682
1683     PL_reg_flags = 0;
1684     PL_reg_eval_set = 0;
1685     PL_reg_maxiter = 0;
1686
1687     if (prog->reganch & ROPT_UTF8)
1688         PL_reg_flags |= RF_utf8;
1689
1690     /* Mark beginning of line for ^ and lookbehind. */
1691     reginfo.bol = startpos; /* XXX not used ??? */
1692     PL_bostr  = strbeg;
1693     reginfo.sv = sv;
1694
1695     /* Mark end of line for $ (and such) */
1696     PL_regeol = strend;
1697
1698     /* see how far we have to get to not match where we matched before */
1699     reginfo.till = startpos+minend;
1700
1701     /* If there is a "must appear" string, look for it. */
1702     s = startpos;
1703
1704     if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to set reginfo->ganch */
1705         MAGIC *mg;
1706
1707         if (flags & REXEC_IGNOREPOS)    /* Means: check only at start */
1708             reginfo.ganch = startpos;
1709         else if (sv && SvTYPE(sv) >= SVt_PVMG
1710                   && SvMAGIC(sv)
1711                   && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1712                   && mg->mg_len >= 0) {
1713             reginfo.ganch = strbeg + mg->mg_len;        /* Defined pos() */
1714             if (prog->reganch & ROPT_ANCH_GPOS) {
1715                 if (s > reginfo.ganch)
1716                     goto phooey;
1717                 s = reginfo.ganch;
1718             }
1719         }
1720         else                            /* pos() not defined */
1721             reginfo.ganch = strbeg;
1722     }
1723
1724     if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL || prog->check_utf8 != NULL)) {
1725         re_scream_pos_data d;
1726
1727         d.scream_olds = &scream_olds;
1728         d.scream_pos = &scream_pos;
1729         s = re_intuit_start(prog, sv, s, strend, flags, &d);
1730         if (!s) {
1731             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
1732             goto phooey;        /* not present */
1733         }
1734     }
1735
1736
1737
1738     /* Simplest case:  anchored match need be tried only once. */
1739     /*  [unless only anchor is BOL and multiline is set] */
1740     if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1741         if (s == startpos && regtry(&reginfo, startpos))
1742             goto got_it;
1743         else if (multiline || (prog->reganch & ROPT_IMPLICIT)
1744                  || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1745         {
1746             char *end;
1747
1748             if (minlen)
1749                 dontbother = minlen - 1;
1750             end = HOP3c(strend, -dontbother, strbeg) - 1;
1751             /* for multiline we only have to try after newlines */
1752             if (prog->check_substr || prog->check_utf8) {
1753                 if (s == startpos)
1754                     goto after_try;
1755                 while (1) {
1756                     if (regtry(&reginfo, s))
1757                         goto got_it;
1758                   after_try:
1759                     if (s >= end)
1760                         goto phooey;
1761                     if (prog->reganch & RE_USE_INTUIT) {
1762                         s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1763                         if (!s)
1764                             goto phooey;
1765                     }
1766                     else
1767                         s++;
1768                 }
1769             } else {
1770                 if (s > startpos)
1771                     s--;
1772                 while (s < end) {
1773                     if (*s++ == '\n') { /* don't need PL_utf8skip here */
1774                         if (regtry(&reginfo, s))
1775                             goto got_it;
1776                     }
1777                 }
1778             }
1779         }
1780         goto phooey;
1781     } else if (ROPT_GPOS_CHECK == (prog->reganch & ROPT_GPOS_CHECK))
1782     {
1783         /* the warning about reginfo.ganch being used without intialization
1784            is bogus -- we set it above, when prog->reganch & ROPT_GPOS_SEEN
1785            and we only enter this block when the same bit is set. */
1786         if (regtry(&reginfo, reginfo.ganch))
1787             goto got_it;
1788         goto phooey;
1789     }
1790
1791     /* Messy cases:  unanchored match. */
1792     if ((prog->anchored_substr || prog->anchored_utf8) && prog->reganch & ROPT_SKIP) {
1793         /* we have /x+whatever/ */
1794         /* it must be a one character string (XXXX Except UTF?) */
1795         char ch;
1796 #ifdef DEBUGGING
1797         int did_match = 0;
1798 #endif
1799         if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1800             do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1801         ch = SvPVX_const(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
1802
1803         if (do_utf8) {
1804             REXEC_FBC_SCAN(
1805                 if (*s == ch) {
1806                     DEBUG_EXECUTE_r( did_match = 1 );
1807                     if (regtry(&reginfo, s)) goto got_it;
1808                     s += UTF8SKIP(s);
1809                     while (s < strend && *s == ch)
1810                         s += UTF8SKIP(s);
1811                 }
1812             );
1813         }
1814         else {
1815             REXEC_FBC_SCAN(
1816                 if (*s == ch) {
1817                     DEBUG_EXECUTE_r( did_match = 1 );
1818                     if (regtry(&reginfo, s)) goto got_it;
1819                     s++;
1820                     while (s < strend && *s == ch)
1821                         s++;
1822                 }
1823             );
1824         }
1825         DEBUG_EXECUTE_r(if (!did_match)
1826                 PerlIO_printf(Perl_debug_log,
1827                                   "Did not find anchored character...\n")
1828                );
1829     }
1830     else if (prog->anchored_substr != NULL
1831               || prog->anchored_utf8 != NULL
1832               || ((prog->float_substr != NULL || prog->float_utf8 != NULL)
1833                   && prog->float_max_offset < strend - s)) {
1834         SV *must;
1835         I32 back_max;
1836         I32 back_min;
1837         char *last;
1838         char *last1;            /* Last position checked before */
1839 #ifdef DEBUGGING
1840         int did_match = 0;
1841 #endif
1842         if (prog->anchored_substr || prog->anchored_utf8) {
1843             if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1844                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1845             must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
1846             back_max = back_min = prog->anchored_offset;
1847         } else {
1848             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1849                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1850             must = do_utf8 ? prog->float_utf8 : prog->float_substr;
1851             back_max = prog->float_max_offset;
1852             back_min = prog->float_min_offset;
1853         }
1854
1855
1856         if (must == &PL_sv_undef)
1857             /* could not downgrade utf8 check substring, so must fail */
1858             goto phooey;
1859
1860         if (back_min<0) {
1861             last = strend;
1862         } else {
1863             last = HOP3c(strend,        /* Cannot start after this */
1864                   -(I32)(CHR_SVLEN(must)
1865                          - (SvTAIL(must) != 0) + back_min), strbeg);
1866         }
1867         if (s > PL_bostr)
1868             last1 = HOPc(s, -1);
1869         else
1870             last1 = s - 1;      /* bogus */
1871
1872         /* XXXX check_substr already used to find "s", can optimize if
1873            check_substr==must. */
1874         scream_pos = -1;
1875         dontbother = end_shift;
1876         strend = HOPc(strend, -dontbother);
1877         while ( (s <= last) &&
1878                 ((flags & REXEC_SCREAM)
1879                  ? (s = screaminstr(sv, must, HOP3c(s, back_min, (back_min<0 ? strbeg : strend)) - strbeg,
1880                                     end_shift, &scream_pos, 0))
1881                  : (s = fbm_instr((unsigned char*)HOP3(s, back_min, (back_min<0 ? strbeg : strend)),
1882                                   (unsigned char*)strend, must,
1883                                   multiline ? FBMrf_MULTILINE : 0))) ) {
1884             /* we may be pointing at the wrong string */
1885             if ((flags & REXEC_SCREAM) && RX_MATCH_COPIED(prog))
1886                 s = strbeg + (s - SvPVX_const(sv));
1887             DEBUG_EXECUTE_r( did_match = 1 );
1888             if (HOPc(s, -back_max) > last1) {
1889                 last1 = HOPc(s, -back_min);
1890                 s = HOPc(s, -back_max);
1891             }
1892             else {
1893                 char * const t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1894
1895                 last1 = HOPc(s, -back_min);
1896                 s = t;
1897             }
1898             if (do_utf8) {
1899                 while (s <= last1) {
1900                     if (regtry(&reginfo, s))
1901                         goto got_it;
1902                     s += UTF8SKIP(s);
1903                 }
1904             }
1905             else {
1906                 while (s <= last1) {
1907                     if (regtry(&reginfo, s))
1908                         goto got_it;
1909                     s++;
1910                 }
1911             }
1912         }
1913         DEBUG_EXECUTE_r(if (!did_match) {
1914             RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
1915                 SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
1916             PerlIO_printf(Perl_debug_log, "Did not find %s substr %s%s...\n",
1917                               ((must == prog->anchored_substr || must == prog->anchored_utf8)
1918                                ? "anchored" : "floating"),
1919                 quoted, RE_SV_TAIL(must));
1920         });
1921         goto phooey;
1922     }
1923     else if ( (c = prog->regstclass) ) {
1924         if (minlen) {
1925             const OPCODE op = OP(prog->regstclass);
1926             /* don't bother with what can't match */
1927             if (PL_regkind[op] != EXACT && op != CANY && PL_regkind[op] != TRIE)
1928                 strend = HOPc(strend, -(minlen - 1));
1929         }
1930         DEBUG_EXECUTE_r({
1931             SV * const prop = sv_newmortal();
1932             regprop(prog, prop, c);
1933             {
1934                 RE_PV_QUOTED_DECL(quoted,UTF,PERL_DEBUG_PAD_ZERO(1),
1935                     s,strend-s,60);
1936                 PerlIO_printf(Perl_debug_log,
1937                     "Matching stclass %.*s against %s (%d chars)\n",
1938                     (int)SvCUR(prop), SvPVX_const(prop),
1939                      quoted, (int)(strend - s));
1940             }
1941         });
1942         if (find_byclass(prog, c, s, strend, &reginfo))
1943             goto got_it;
1944         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass... [regexec_flags]\n"));
1945     }
1946     else {
1947         dontbother = 0;
1948         if (prog->float_substr != NULL || prog->float_utf8 != NULL) {
1949             /* Trim the end. */
1950             char *last;
1951             SV* float_real;
1952
1953             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1954                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1955             float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
1956
1957             if (flags & REXEC_SCREAM) {
1958                 last = screaminstr(sv, float_real, s - strbeg,
1959                                    end_shift, &scream_pos, 1); /* last one */
1960                 if (!last)
1961                     last = scream_olds; /* Only one occurrence. */
1962                 /* we may be pointing at the wrong string */
1963                 else if (RX_MATCH_COPIED(prog))
1964                     s = strbeg + (s - SvPVX_const(sv));
1965             }
1966             else {
1967                 STRLEN len;
1968                 const char * const little = SvPV_const(float_real, len);
1969
1970                 if (SvTAIL(float_real)) {
1971                     if (memEQ(strend - len + 1, little, len - 1))
1972                         last = strend - len + 1;
1973                     else if (!multiline)
1974                         last = memEQ(strend - len, little, len)
1975                             ? strend - len : NULL;
1976                     else
1977                         goto find_last;
1978                 } else {
1979                   find_last:
1980                     if (len)
1981                         last = rninstr(s, strend, little, little + len);
1982                     else
1983                         last = strend;  /* matching "$" */
1984                 }
1985             }
1986             if (last == NULL) {
1987                 DEBUG_EXECUTE_r(
1988                     PerlIO_printf(Perl_debug_log,
1989                         "%sCan't trim the tail, match fails (should not happen)%s\n",
1990                         PL_colors[4], PL_colors[5]));
1991                 goto phooey; /* Should not happen! */
1992             }
1993             dontbother = strend - last + prog->float_min_offset;
1994         }
1995         if (minlen && (dontbother < minlen))
1996             dontbother = minlen - 1;
1997         strend -= dontbother;              /* this one's always in bytes! */
1998         /* We don't know much -- general case. */
1999         if (do_utf8) {
2000             for (;;) {
2001                 if (regtry(&reginfo, s))
2002                     goto got_it;
2003                 if (s >= strend)
2004                     break;
2005                 s += UTF8SKIP(s);
2006             };
2007         }
2008         else {
2009             do {
2010                 if (regtry(&reginfo, s))
2011                     goto got_it;
2012             } while (s++ < strend);
2013         }
2014     }
2015
2016     /* Failure. */
2017     goto phooey;
2018
2019 got_it:
2020     RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
2021
2022     if (PL_reg_eval_set) {
2023         /* Preserve the current value of $^R */
2024         if (oreplsv != GvSV(PL_replgv))
2025             sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
2026                                                   restored, the value remains
2027                                                   the same. */
2028         restore_pos(aTHX_ prog);
2029     }
2030
2031     /* make sure $`, $&, $', and $digit will work later */
2032     if ( !(flags & REXEC_NOT_FIRST) ) {
2033         RX_MATCH_COPY_FREE(prog);
2034         if (flags & REXEC_COPY_STR) {
2035             const I32 i = PL_regeol - startpos + (stringarg - strbeg);
2036 #ifdef PERL_OLD_COPY_ON_WRITE
2037             if ((SvIsCOW(sv)
2038                  || (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
2039                 if (DEBUG_C_TEST) {
2040                     PerlIO_printf(Perl_debug_log,
2041                                   "Copy on write: regexp capture, type %d\n",
2042                                   (int) SvTYPE(sv));
2043                 }
2044                 prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
2045                 prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
2046                 assert (SvPOKp(prog->saved_copy));
2047             } else
2048 #endif
2049             {
2050                 RX_MATCH_COPIED_on(prog);
2051                 s = savepvn(strbeg, i);
2052                 prog->subbeg = s;
2053             }
2054             prog->sublen = i;
2055         }
2056         else {
2057             prog->subbeg = strbeg;
2058             prog->sublen = PL_regeol - strbeg;  /* strend may have been modified */
2059         }
2060     }
2061
2062     return 1;
2063
2064 phooey:
2065     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
2066                           PL_colors[4], PL_colors[5]));
2067     if (PL_reg_eval_set)
2068         restore_pos(aTHX_ prog);
2069     return 0;
2070 }
2071
2072
2073 /*
2074  - regtry - try match at specific point
2075  */
2076 STATIC I32                      /* 0 failure, 1 success */
2077 S_regtry(pTHX_ const regmatch_info *reginfo, char *startpos)
2078 {
2079     dVAR;
2080     register I32 *sp;
2081     register I32 *ep;
2082     CHECKPOINT lastcp;
2083     regexp *prog = reginfo->prog;
2084     GET_RE_DEBUG_FLAGS_DECL;
2085
2086     if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
2087         MAGIC *mg;
2088
2089         PL_reg_eval_set = RS_init;
2090         DEBUG_EXECUTE_r(DEBUG_s(
2091             PerlIO_printf(Perl_debug_log, "  setting stack tmpbase at %"IVdf"\n",
2092                           (IV)(PL_stack_sp - PL_stack_base));
2093             ));
2094         SAVEI32(cxstack[cxstack_ix].blk_oldsp);
2095         cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2096         /* Otherwise OP_NEXTSTATE will free whatever on stack now.  */
2097         SAVETMPS;
2098         /* Apparently this is not needed, judging by wantarray. */
2099         /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
2100            cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2101
2102         if (reginfo->sv) {
2103             /* Make $_ available to executed code. */
2104             if (reginfo->sv != DEFSV) {
2105                 SAVE_DEFSV;
2106                 DEFSV = reginfo->sv;
2107             }
2108
2109             if (!(SvTYPE(reginfo->sv) >= SVt_PVMG && SvMAGIC(reginfo->sv)
2110                   && (mg = mg_find(reginfo->sv, PERL_MAGIC_regex_global)))) {
2111                 /* prepare for quick setting of pos */
2112 #ifdef PERL_OLD_COPY_ON_WRITE
2113                 if (SvIsCOW(sv))
2114                     sv_force_normal_flags(sv, 0);
2115 #endif
2116                 mg = sv_magicext(reginfo->sv, NULL, PERL_MAGIC_regex_global,
2117                                  &PL_vtbl_mglob, NULL, 0);
2118                 mg->mg_len = -1;
2119             }
2120             PL_reg_magic    = mg;
2121             PL_reg_oldpos   = mg->mg_len;
2122             SAVEDESTRUCTOR_X(restore_pos, prog);
2123         }
2124         if (!PL_reg_curpm) {
2125             Newxz(PL_reg_curpm, 1, PMOP);
2126 #ifdef USE_ITHREADS
2127             {
2128                 SV* const repointer = newSViv(0);
2129                 /* so we know which PL_regex_padav element is PL_reg_curpm */
2130                 SvFLAGS(repointer) |= SVf_BREAK;
2131                 av_push(PL_regex_padav,repointer);
2132                 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2133                 PL_regex_pad = AvARRAY(PL_regex_padav);
2134             }
2135 #endif
2136         }
2137         PM_SETRE(PL_reg_curpm, prog);
2138         PL_reg_oldcurpm = PL_curpm;
2139         PL_curpm = PL_reg_curpm;
2140         if (RX_MATCH_COPIED(prog)) {
2141             /*  Here is a serious problem: we cannot rewrite subbeg,
2142                 since it may be needed if this match fails.  Thus
2143                 $` inside (?{}) could fail... */
2144             PL_reg_oldsaved = prog->subbeg;
2145             PL_reg_oldsavedlen = prog->sublen;
2146 #ifdef PERL_OLD_COPY_ON_WRITE
2147             PL_nrs = prog->saved_copy;
2148 #endif
2149             RX_MATCH_COPIED_off(prog);
2150         }
2151         else
2152             PL_reg_oldsaved = NULL;
2153         prog->subbeg = PL_bostr;
2154         prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2155     }
2156     DEBUG_EXECUTE_r(PL_reg_starttry = startpos);
2157     prog->startp[0] = startpos - PL_bostr;
2158     PL_reginput = startpos;
2159     PL_reglastparen = &prog->lastparen;
2160     PL_reglastcloseparen = &prog->lastcloseparen;
2161     prog->lastparen = 0;
2162     prog->lastcloseparen = 0;
2163     PL_regsize = 0;
2164     PL_regstartp = prog->startp;
2165     PL_regendp = prog->endp;
2166     if (PL_reg_start_tmpl <= prog->nparens) {
2167         PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2168         if(PL_reg_start_tmp)
2169             Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2170         else
2171             Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2172     }
2173
2174     /* XXXX What this code is doing here?!!!  There should be no need
2175        to do this again and again, PL_reglastparen should take care of
2176        this!  --ilya*/
2177
2178     /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2179      * Actually, the code in regcppop() (which Ilya may be meaning by
2180      * PL_reglastparen), is not needed at all by the test suite
2181      * (op/regexp, op/pat, op/split), but that code is needed, oddly
2182      * enough, for building DynaLoader, or otherwise this
2183      * "Error: '*' not in typemap in DynaLoader.xs, line 164"
2184      * will happen.  Meanwhile, this code *is* needed for the
2185      * above-mentioned test suite tests to succeed.  The common theme
2186      * on those tests seems to be returning null fields from matches.
2187      * --jhi */
2188 #if 1
2189     sp = prog->startp;
2190     ep = prog->endp;
2191     if (prog->nparens) {
2192         register I32 i;
2193         for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
2194             *++sp = -1;
2195             *++ep = -1;
2196         }
2197     }
2198 #endif
2199     REGCP_SET(lastcp);
2200     if (regmatch(reginfo, prog->program + 1)) {
2201         prog->endp[0] = PL_reginput - PL_bostr;
2202         return 1;
2203     }
2204     REGCP_UNWIND(lastcp);
2205     return 0;
2206 }
2207
2208
2209 #define sayYES goto yes
2210 #define sayNO goto no
2211 #define sayNO_SILENT goto no_silent
2212 #define saySAME(x) if (x) goto yes; else goto no
2213
2214 /* we dont use STMT_START/END here because it leads to
2215    "unreachable code" warnings, which are bogus, but distracting. */
2216 #define CACHEsayNO \
2217     if (st->u.whilem.cache_offset | st->u.whilem.cache_bit) \
2218        PL_reg_poscache[st->u.whilem.cache_offset] |= \
2219             (1<<st->u.whilem.cache_bit); \
2220     sayNO
2221
2222 /* this is used to determine how far from the left messages like
2223    'failed...' are printed. It should be set such that messages
2224    are inline with the regop output that created them.
2225 */
2226 #define REPORT_CODE_OFF 32
2227
2228
2229 /* Make sure there is a test for this +1 options in re_tests */
2230 #define TRIE_INITAL_ACCEPT_BUFFLEN 4;
2231
2232 #define CHRTEST_UNINIT -1001 /* c1/c2 haven't been calculated yet */
2233 #define CHRTEST_VOID   -1000 /* the c1/c2 "next char" test should be skipped */
2234
2235 #define SLAB_FIRST(s) (&(s)->states[0])
2236 #define SLAB_LAST(s)  (&(s)->states[PERL_REGMATCH_SLAB_SLOTS-1])
2237
2238 /* grab a new slab and return the first slot in it */
2239
2240 STATIC regmatch_state *
2241 S_push_slab(pTHX)
2242 {
2243 #if PERL_VERSION < 9
2244     dMY_CXT;
2245 #endif
2246     regmatch_slab *s = PL_regmatch_slab->next;
2247     if (!s) {
2248         Newx(s, 1, regmatch_slab);
2249         s->prev = PL_regmatch_slab;
2250         s->next = NULL;
2251         PL_regmatch_slab->next = s;
2252     }
2253     PL_regmatch_slab = s;
2254     return SLAB_FIRST(s);
2255 }
2256
2257 /* simulate a recursive call to regmatch */
2258
2259 #define REGMATCH(ns, where) \
2260     st->scan = scan; \
2261     scan = (ns); \
2262     st->resume_state = resume_##where; \
2263     goto start_recurse; \
2264     resume_point_##where:
2265
2266 /* push a new state then goto it */
2267
2268 #define PUSH_STATE_GOTO(state, node) \
2269     scan = node; \
2270     st->resume_state = state; \
2271     goto push_state;
2272
2273 /* push a new state with success backtracking, then goto it */
2274
2275 #define PUSH_YES_STATE_GOTO(state, node) \
2276     scan = node; \
2277     st->resume_state = state; \
2278     goto push_yes_state;
2279
2280
2281
2282 /*
2283  - regmatch - main matching routine
2284  *
2285  * Conceptually the strategy is simple:  check to see whether the current
2286  * node matches, call self recursively to see whether the rest matches,
2287  * and then act accordingly.  In practice we make some effort to avoid
2288  * recursion, in particular by going through "ordinary" nodes (that don't
2289  * need to know whether the rest of the match failed) by a loop instead of
2290  * by recursion.
2291  */
2292 /* [lwall] I've hoisted the register declarations to the outer block in order to
2293  * maybe save a little bit of pushing and popping on the stack.  It also takes
2294  * advantage of machines that use a register save mask on subroutine entry.
2295  *
2296  * This function used to be heavily recursive, but since this had the
2297  * effect of blowing the CPU stack on complex regexes, it has been
2298  * restructured to be iterative, and to save state onto the heap rather
2299  * than the stack. Essentially whereever regmatch() used to be called, it
2300  * pushes the current state, notes where to return, then jumps back into
2301  * the main loop.
2302  *
2303  * Originally the structure of this function used to look something like
2304
2305     S_regmatch() {
2306         int a = 1, b = 2;
2307         ...
2308         while (scan != NULL) {
2309             a++; // do stuff with a and b
2310             ...
2311             switch (OP(scan)) {
2312                 case FOO: {
2313                     int local = 3;
2314                     ...
2315                     if (regmatch(...))  // recurse
2316                         goto yes;
2317                 }
2318                 ...
2319             }
2320         }
2321         yes:
2322         return 1;
2323     }
2324
2325  * Now it looks something like this:
2326
2327     typedef struct {
2328         int a, b, local;
2329         int resume_state;
2330     } regmatch_state;
2331
2332     S_regmatch() {
2333         regmatch_state *st = new();
2334         int depth=0;
2335         st->a++; // do stuff with a and b
2336         ...
2337         while (scan != NULL) {
2338             ...
2339             switch (OP(scan)) {
2340                 case FOO: {
2341                     st->local = 3;
2342                     ...
2343                     st->scan = scan;
2344                     scan = ...;
2345                     st->resume_state = resume_FOO;
2346                     goto start_recurse; // recurse
2347
2348                     resume_point_FOO:
2349                     if (result)
2350                         goto yes;
2351                 }
2352                 ...
2353             }
2354           start_recurse:
2355             st = new(); push a new state
2356             st->a = 1; st->b = 2;
2357             depth++;
2358         }
2359       yes:
2360         result = 1;
2361         if (depth--) {
2362             st = pop();
2363             switch (resume_state) {
2364             case resume_FOO:
2365                 goto resume_point_FOO;
2366             ...
2367             }
2368         }
2369         return result
2370     }
2371
2372  * WARNING: this means that any line in this function that contains a
2373  * REGMATCH() or TRYPAREN() is actually simulating a recursive call to
2374  * regmatch() using gotos instead. Thus the values of any local variables
2375  * not saved in the regmatch_state structure will have been lost when
2376  * execution resumes on the next line .
2377  *
2378  * States (ie the st pointer) are allocated in slabs of about 4K in size.
2379  * PL_regmatch_state always points to the currently active state, and
2380  * PL_regmatch_slab points to the slab currently containing PL_regmatch_state.
2381  * The first time regmatch is called, the first slab is allocated, and is
2382  * never freed until interpreter desctruction. When the slab is full,
2383  * a new one is allocated chained to the end. At exit from regmatch, slabs
2384  * allocated since entry are freed.
2385  */
2386
2387
2388 #define DEBUG_STATE_pp(pp)                                  \
2389     DEBUG_STATE_r({                                         \
2390         DUMP_EXEC_POS(locinput, scan, do_utf8);             \
2391         PerlIO_printf(Perl_debug_log,                       \
2392             "    %*s"pp" %s\n",                             \
2393             depth*2, "",                                    \
2394             reg_name[st->resume_state] );   \
2395     });
2396
2397
2398 #define REG_NODE_NUM(x) ((x) ? (int)((x)-prog) : -1)
2399
2400 #ifdef DEBUGGING
2401
2402 STATIC void
2403 S_debug_start_match(pTHX_ const regexp *prog, const bool do_utf8,
2404     const char *start, const char *end, const char *blurb)
2405 {
2406     const bool utf8_pat= prog->reganch & ROPT_UTF8 ? 1 : 0;
2407     if (!PL_colorset)
2408             reginitcolors();
2409     {
2410         RE_PV_QUOTED_DECL(s0, utf8_pat, PERL_DEBUG_PAD_ZERO(0),
2411             prog->precomp, prog->prelen, 60);
2412
2413         RE_PV_QUOTED_DECL(s1, do_utf8, PERL_DEBUG_PAD_ZERO(1),
2414             start, end - start, 60);
2415
2416         PerlIO_printf(Perl_debug_log,
2417             "%s%s REx%s %s against %s\n",
2418                        PL_colors[4], blurb, PL_colors[5], s0, s1);
2419
2420         if (do_utf8||utf8_pat)
2421             PerlIO_printf(Perl_debug_log, "UTF-8 %s%s%s...\n",
2422                 utf8_pat ? "pattern" : "",
2423                 utf8_pat && do_utf8 ? " and " : "",
2424                 do_utf8 ? "string" : ""
2425             );
2426     }
2427 }
2428
2429 STATIC void
2430 S_dump_exec_pos(pTHX_ const char *locinput,
2431                       const regnode *scan,
2432                       const char *loc_regeol,
2433                       const char *loc_bostr,
2434                       const char *loc_reg_starttry,
2435                       const bool do_utf8)
2436 {
2437     const int docolor = *PL_colors[0] || *PL_colors[2] || *PL_colors[4];
2438     const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
2439     int l = (loc_regeol - locinput) > taill ? taill : (loc_regeol - locinput);
2440     /* The part of the string before starttry has one color
2441        (pref0_len chars), between starttry and current
2442        position another one (pref_len - pref0_len chars),
2443        after the current position the third one.
2444        We assume that pref0_len <= pref_len, otherwise we
2445        decrease pref0_len.  */
2446     int pref_len = (locinput - loc_bostr) > (5 + taill) - l
2447         ? (5 + taill) - l : locinput - loc_bostr;
2448     int pref0_len;
2449
2450     while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2451         pref_len++;
2452     pref0_len = pref_len  - (locinput - loc_reg_starttry);
2453     if (l + pref_len < (5 + taill) && l < loc_regeol - locinput)
2454         l = ( loc_regeol - locinput > (5 + taill) - pref_len
2455               ? (5 + taill) - pref_len : loc_regeol - locinput);
2456     while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2457         l--;
2458     if (pref0_len < 0)
2459         pref0_len = 0;
2460     if (pref0_len > pref_len)
2461         pref0_len = pref_len;
2462     {
2463         const int is_uni = (do_utf8 && OP(scan) != CANY) ? 1 : 0;
2464
2465         RE_PV_COLOR_DECL(s0,len0,is_uni,PERL_DEBUG_PAD(0),
2466             (locinput - pref_len),pref0_len, 60, 4, 5);
2467
2468         RE_PV_COLOR_DECL(s1,len1,is_uni,PERL_DEBUG_PAD(1),
2469                     (locinput - pref_len + pref0_len),
2470                     pref_len - pref0_len, 60, 2, 3);
2471
2472         RE_PV_COLOR_DECL(s2,len2,is_uni,PERL_DEBUG_PAD(2),
2473                     locinput, loc_regeol - locinput, 10, 0, 1);
2474
2475         const STRLEN tlen=len0+len1+len2;
2476         PerlIO_printf(Perl_debug_log,
2477                     "%4"IVdf" <%.*s%.*s%s%.*s>%*s|",
2478                     (IV)(locinput - loc_bostr),
2479                     len0, s0,
2480                     len1, s1,
2481                     (docolor ? "" : "> <"),
2482                     len2, s2,
2483                     (int)(tlen > 19 ? 0 :  19 - tlen),
2484                     "");
2485     }
2486 }
2487
2488 #endif
2489
2490 STATIC I32                      /* 0 failure, 1 success */
2491 S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
2492 {
2493 #if PERL_VERSION < 9
2494     dMY_CXT;
2495 #endif
2496     dVAR;
2497     register const bool do_utf8 = PL_reg_match_utf8;
2498     const U32 uniflags = UTF8_ALLOW_DEFAULT;
2499
2500     regexp *rex = reginfo->prog;
2501
2502     regmatch_slab  *orig_slab;
2503     regmatch_state *orig_state;
2504
2505     /* the current state. This is a cached copy of PL_regmatch_state */
2506     register regmatch_state *st;
2507
2508     /* cache heavy used fields of st in registers */
2509     register regnode *scan;
2510     register regnode *next;
2511     register I32 n = 0; /* initialize to shut up compiler warning */
2512     register char *locinput = PL_reginput;
2513
2514     /* these variables are NOT saved during a recusive RFEGMATCH: */
2515     register I32 nextchr;   /* is always set to UCHARAT(locinput) */
2516     bool result = 0;        /* return value of S_regmatch */
2517     int depth = 0;          /* depth of recursion */
2518     int nochange_depth = 0; /* depth of RECURSE recursion with nochange*/
2519     regmatch_state *yes_state = NULL; /* state to pop to on success of
2520                                                             subpattern */
2521     regmatch_state *cur_eval = NULL; /* most recent EVAL_AB state */
2522     struct regmatch_state  *cur_curlyx = NULL; /* most recent curlyx */
2523     U32 state_num;
2524
2525     I32 parenfloor = 0;
2526
2527 #ifdef DEBUGGING
2528     GET_RE_DEBUG_FLAGS_DECL;
2529 #endif
2530
2531     /* on first ever call to regmatch, allocate first slab */
2532     if (!PL_regmatch_slab) {
2533         Newx(PL_regmatch_slab, 1, regmatch_slab);
2534         PL_regmatch_slab->prev = NULL;
2535         PL_regmatch_slab->next = NULL;
2536         PL_regmatch_state = SLAB_FIRST(PL_regmatch_slab);
2537     }
2538
2539     /* remember current high-water mark for exit */
2540     /* XXX this should be done with SAVE* instead */
2541     orig_slab  = PL_regmatch_slab;
2542     orig_state = PL_regmatch_state;
2543
2544     /* grab next free state slot */
2545     st = ++PL_regmatch_state;
2546     if (st >  SLAB_LAST(PL_regmatch_slab))
2547         st = PL_regmatch_state = S_push_slab(aTHX);
2548
2549     st->minmod = 0;
2550     st->sw = 0;
2551     st->logical = 0;
2552     cur_curlyx = NULL;
2553
2554     /* Note that nextchr is a byte even in UTF */
2555     nextchr = UCHARAT(locinput);
2556     scan = prog;
2557     while (scan != NULL) {
2558
2559         DEBUG_EXECUTE_r( {
2560             SV * const prop = sv_newmortal();
2561             regnode *rnext=regnext(scan);
2562             DUMP_EXEC_POS( locinput, scan, do_utf8 );
2563             regprop(rex, prop, scan);
2564
2565             PerlIO_printf(Perl_debug_log,
2566                     "%3"IVdf":%*s%s(%"IVdf")\n",
2567                     (IV)(scan - rex->program), depth*2, "",
2568                     SvPVX_const(prop),
2569                     (PL_regkind[OP(scan)] == END || !rnext) ?
2570                         0 : (IV)(rnext - rex->program));
2571         });
2572
2573         next = scan + NEXT_OFF(scan);
2574         if (next == scan)
2575             next = NULL;
2576         state_num = OP(scan);
2577
2578       reenter_switch:
2579         switch (state_num) {
2580         case BOL:
2581             if (locinput == PL_bostr)
2582             {
2583                 /* reginfo->till = reginfo->bol; */
2584                 break;
2585             }
2586             sayNO;
2587         case MBOL:
2588             if (locinput == PL_bostr ||
2589                 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
2590             {
2591                 break;
2592             }
2593             sayNO;
2594         case SBOL:
2595             if (locinput == PL_bostr)
2596                 break;
2597             sayNO;
2598         case GPOS:
2599             if (locinput == reginfo->ganch)
2600                 break;
2601             sayNO;
2602         case EOL:
2603                 goto seol;
2604         case MEOL:
2605             if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2606                 sayNO;
2607             break;
2608         case SEOL:
2609           seol:
2610             if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2611                 sayNO;
2612             if (PL_regeol - locinput > 1)
2613                 sayNO;
2614             break;
2615         case EOS:
2616             if (PL_regeol != locinput)
2617                 sayNO;
2618             break;
2619         case SANY:
2620             if (!nextchr && locinput >= PL_regeol)
2621                 sayNO;
2622             if (do_utf8) {
2623                 locinput += PL_utf8skip[nextchr];
2624                 if (locinput > PL_regeol)
2625                     sayNO;
2626                 nextchr = UCHARAT(locinput);
2627             }
2628             else
2629                 nextchr = UCHARAT(++locinput);
2630             break;
2631         case CANY:
2632             if (!nextchr && locinput >= PL_regeol)
2633                 sayNO;
2634             nextchr = UCHARAT(++locinput);
2635             break;
2636         case REG_ANY:
2637             if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2638                 sayNO;
2639             if (do_utf8) {
2640                 locinput += PL_utf8skip[nextchr];
2641                 if (locinput > PL_regeol)
2642                     sayNO;
2643                 nextchr = UCHARAT(locinput);
2644             }
2645             else
2646                 nextchr = UCHARAT(++locinput);
2647             break;
2648
2649 #undef  ST
2650 #define ST st->u.trie
2651         case TRIEC:
2652             /* In this case the charclass data is available inline so
2653                we can fail fast without a lot of extra overhead.
2654              */
2655             if (scan->flags == EXACT || !do_utf8) {
2656                 if(!ANYOF_BITMAP_TEST(scan, *locinput)) {
2657                     DEBUG_EXECUTE_r(
2658                         PerlIO_printf(Perl_debug_log,
2659                                   "%*s  %sfailed to match trie start class...%s\n",
2660                                   REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
2661                     );
2662                     sayNO_SILENT;
2663                     /* NOTREACHED */
2664                 }
2665             }
2666             /* FALL THROUGH */
2667         case TRIE:
2668             {
2669                 /* what type of TRIE am I? (utf8 makes this contextual) */
2670                 const enum { trie_plain, trie_utf8, trie_utf8_fold }
2671                     trie_type = do_utf8 ?
2672                           (scan->flags == EXACT ? trie_utf8 : trie_utf8_fold)
2673                         : trie_plain;
2674
2675                 /* what trie are we using right now */
2676                 reg_trie_data * const trie
2677                     = (reg_trie_data*)rex->data->data[ ARG( scan ) ];
2678                 U32 state = trie->startstate;
2679
2680                 if (trie->bitmap && trie_type != trie_utf8_fold &&
2681                     !TRIE_BITMAP_TEST(trie,*locinput)
2682                 ) {
2683                     if (trie->states[ state ].wordnum) {
2684                          DEBUG_EXECUTE_r(
2685                             PerlIO_printf(Perl_debug_log,
2686                                           "%*s  %smatched empty string...%s\n",
2687                                           REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
2688                         );
2689                         break;
2690                     } else {
2691                         DEBUG_EXECUTE_r(
2692                             PerlIO_printf(Perl_debug_log,
2693                                           "%*s  %sfailed to match trie start class...%s\n",
2694                                           REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
2695                         );
2696                         sayNO_SILENT;
2697                    }
2698                 }
2699
2700             {
2701                 U8 *uc = ( U8* )locinput;
2702
2703                 STRLEN len = 0;
2704                 STRLEN foldlen = 0;
2705                 U8 *uscan = (U8*)NULL;
2706                 STRLEN bufflen=0;
2707                 SV *sv_accept_buff = NULL;
2708                 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
2709
2710                 ST.accepted = 0; /* how many accepting states we have seen */
2711                 ST.B = next;
2712                 ST.jump = trie->jump;
2713
2714 #ifdef DEBUGGING
2715                 ST.me = scan;
2716 #endif
2717
2718
2719
2720                 /*
2721                    traverse the TRIE keeping track of all accepting states
2722                    we transition through until we get to a failing node.
2723                 */
2724
2725                 while ( state && uc <= (U8*)PL_regeol ) {
2726                     U32 base = trie->states[ state ].trans.base;
2727                     UV uvc = 0;
2728                     U16 charid;
2729                     /* We use charid to hold the wordnum as we don't use it
2730                        for charid until after we have done the wordnum logic.
2731                        We define an alias just so that the wordnum logic reads
2732                        more naturally. */
2733
2734 #define got_wordnum charid
2735                     got_wordnum = trie->states[ state ].wordnum;
2736
2737                     if ( got_wordnum ) {
2738                         if ( ! ST.accepted ) {
2739                             ENTER;
2740                             SAVETMPS;
2741                             bufflen = TRIE_INITAL_ACCEPT_BUFFLEN;
2742                             sv_accept_buff=newSV(bufflen *
2743                                             sizeof(reg_trie_accepted) - 1);
2744                             SvCUR_set(sv_accept_buff, 0);
2745                             SvPOK_on(sv_accept_buff);
2746                             sv_2mortal(sv_accept_buff);
2747                             SAVETMPS;
2748                             ST.accept_buff =
2749                                 (reg_trie_accepted*)SvPV_nolen(sv_accept_buff );
2750                         }
2751                         do {
2752                             if (ST.accepted >= bufflen) {
2753                                 bufflen *= 2;
2754                                 ST.accept_buff =(reg_trie_accepted*)
2755                                     SvGROW(sv_accept_buff,
2756                                         bufflen * sizeof(reg_trie_accepted));
2757                             }
2758                             SvCUR_set(sv_accept_buff,SvCUR(sv_accept_buff)
2759                                 + sizeof(reg_trie_accepted));
2760
2761
2762                             ST.accept_buff[ST.accepted].wordnum = got_wordnum;
2763                             ST.accept_buff[ST.accepted].endpos = uc;
2764                             ++ST.accepted;
2765                         } while (trie->nextword && (got_wordnum= trie->nextword[got_wordnum]));
2766                     }
2767 #undef got_wordnum
2768
2769                     DEBUG_TRIE_EXECUTE_r({
2770                                 DUMP_EXEC_POS( (char *)uc, scan, do_utf8 );
2771                                 PerlIO_printf( Perl_debug_log,
2772                                     "%*s  %sState: %4"UVxf" Accepted: %4"UVxf" ",
2773                                     2+depth * 2, "", PL_colors[4],
2774                                     (UV)state, (UV)ST.accepted );
2775                     });
2776
2777                     if ( base ) {
2778                         REXEC_TRIE_READ_CHAR(trie_type, trie, uc, uscan, len,
2779                             uvc, charid, foldlen, foldbuf, uniflags);
2780
2781                         if (charid &&
2782                              (base + charid > trie->uniquecharcount )
2783                              && (base + charid - 1 - trie->uniquecharcount
2784                                     < trie->lasttrans)
2785                              && trie->trans[base + charid - 1 -
2786                                     trie->uniquecharcount].check == state)
2787                         {
2788                             state = trie->trans[base + charid - 1 -
2789                                 trie->uniquecharcount ].next;
2790                         }
2791                         else {
2792                             state = 0;
2793                         }
2794                         uc += len;
2795
2796                     }
2797                     else {
2798                         state = 0;
2799                     }
2800                     DEBUG_TRIE_EXECUTE_r(
2801                         PerlIO_printf( Perl_debug_log,
2802                             "Charid:%3x CP:%4"UVxf" After State: %4"UVxf"%s\n",
2803                             charid, uvc, (UV)state, PL_colors[5] );
2804                     );
2805                 }
2806                 if (!ST.accepted )
2807                    sayNO;
2808
2809                 DEBUG_EXECUTE_r(
2810                     PerlIO_printf( Perl_debug_log,
2811                         "%*s  %sgot %"IVdf" possible matches%s\n",
2812                         REPORT_CODE_OFF + depth * 2, "",
2813                         PL_colors[4], (IV)ST.accepted, PL_colors[5] );
2814                 );
2815             }}
2816
2817             /* FALL THROUGH */
2818
2819         case TRIE_next_fail: /* we failed - try next alterative */
2820
2821             if ( ST.accepted == 1 ) {
2822                 /* only one choice left - just continue */
2823                 DEBUG_EXECUTE_r({
2824                     reg_trie_data * const trie
2825                         = (reg_trie_data*)rex->data->data[ ARG(ST.me) ];
2826                     SV ** const tmp = RX_DEBUG(reginfo->prog)
2827                                     ? av_fetch( trie->words, ST.accept_buff[ 0 ].wordnum-1, 0 )
2828                                     : NULL;
2829                     PerlIO_printf( Perl_debug_log,
2830                         "%*s  %sonly one match left: #%d <%s>%s\n",
2831                         REPORT_CODE_OFF+depth*2, "", PL_colors[4],
2832                         ST.accept_buff[ 0 ].wordnum,
2833                         tmp ? SvPV_nolen_const( *tmp ) : "not compiled under -Dr",
2834                         PL_colors[5] );
2835                 });
2836                 PL_reginput = (char *)ST.accept_buff[ 0 ].endpos;
2837                 /* in this case we free tmps/leave before we call regmatch
2838                    as we wont be using accept_buff again. */
2839                 FREETMPS;
2840                 LEAVE;
2841                 locinput = PL_reginput;
2842                 nextchr = UCHARAT(locinput);
2843
2844                 if ( !ST.jump )
2845                     scan = ST.B;
2846                 else
2847                     scan = ST.B - ST.jump[ST.accept_buff[0].wordnum];
2848
2849                 continue; /* execute rest of RE */
2850             }
2851
2852             if (!ST.accepted-- ) {
2853                 FREETMPS;
2854                 LEAVE;
2855                 sayNO;
2856             }
2857
2858             /*
2859                There are at least two accepting states left.  Presumably
2860                the number of accepting states is going to be low,
2861                typically two. So we simply scan through to find the one
2862                with lowest wordnum.  Once we find it, we swap the last
2863                state into its place and decrement the size. We then try to
2864                match the rest of the pattern at the point where the word
2865                ends. If we succeed, control just continues along the
2866                regex; if we fail we return here to try the next accepting
2867                state
2868              */
2869
2870             {
2871                 U32 best = 0;
2872                 U32 cur;
2873                 for( cur = 1 ; cur <= ST.accepted ; cur++ ) {
2874                     DEBUG_TRIE_EXECUTE_r(
2875                         PerlIO_printf( Perl_debug_log,
2876                             "%*s  %sgot %"IVdf" (%d) as best, looking at %"IVdf" (%d)%s\n",
2877                             REPORT_CODE_OFF + depth * 2, "", PL_colors[4],
2878                             (IV)best, ST.accept_buff[ best ].wordnum, (IV)cur,
2879                             ST.accept_buff[ cur ].wordnum, PL_colors[5] );
2880                     );
2881
2882                     if (ST.accept_buff[cur].wordnum <
2883                             ST.accept_buff[best].wordnum)
2884                         best = cur;
2885                 }
2886
2887                 DEBUG_EXECUTE_r({
2888                     reg_trie_data * const trie
2889                         = (reg_trie_data*)rex->data->data[ ARG(ST.me) ];
2890                     SV ** const tmp = RX_DEBUG(reginfo->prog)
2891                                 ? av_fetch( trie->words, ST.accept_buff[ best ].wordnum - 1, 0 )
2892                                 : NULL;
2893                     regnode *nextop=!ST.jump ?
2894                                     ST.B :
2895                                     ST.B - ST.jump[ST.accept_buff[best].wordnum];
2896                     PerlIO_printf( Perl_debug_log,
2897                         "%*s  %strying alternation #%d <%s> at node #%d %s\n",
2898                         REPORT_CODE_OFF+depth*2, "", PL_colors[4],
2899                         ST.accept_buff[best].wordnum,
2900                         tmp ? SvPV_nolen_const( *tmp ) : "not compiled under -Dr",
2901                             REG_NODE_NUM(nextop),
2902                         PL_colors[5] );
2903                 });
2904
2905                 if ( best<ST.accepted ) {
2906                     reg_trie_accepted tmp = ST.accept_buff[ best ];
2907                     ST.accept_buff[ best ] = ST.accept_buff[ ST.accepted ];
2908                     ST.accept_buff[ ST.accepted ] = tmp;
2909                     best = ST.accepted;
2910                 }
2911                 PL_reginput = (char *)ST.accept_buff[ best ].endpos;
2912                 if ( !ST.jump ) {
2913                     PUSH_STATE_GOTO(TRIE_next, ST.B);
2914                     /* NOTREACHED */
2915                 } else {
2916                     PUSH_STATE_GOTO(TRIE_next, ST.B - ST.jump[ST.accept_buff[best].wordnum]);
2917                     /* NOTREACHED */
2918                 }
2919                 /* NOTREACHED */
2920             }
2921             /* NOTREACHED */
2922
2923 #undef  ST
2924
2925         case EXACT: {
2926             char *s = STRING(scan);
2927             st->ln = STR_LEN(scan);
2928             if (do_utf8 != UTF) {
2929                 /* The target and the pattern have differing utf8ness. */
2930                 char *l = locinput;
2931                 const char * const e = s + st->ln;
2932
2933                 if (do_utf8) {
2934                     /* The target is utf8, the pattern is not utf8. */
2935                     while (s < e) {
2936                         STRLEN ulen;
2937                         if (l >= PL_regeol)
2938                              sayNO;
2939                         if (NATIVE_TO_UNI(*(U8*)s) !=
2940                             utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
2941                                             uniflags))
2942                              sayNO;
2943                         l += ulen;
2944                         s ++;
2945                     }
2946                 }
2947                 else {
2948                     /* The target is not utf8, the pattern is utf8. */
2949                     while (s < e) {
2950                         STRLEN ulen;
2951                         if (l >= PL_regeol)
2952                             sayNO;
2953                         if (NATIVE_TO_UNI(*((U8*)l)) !=
2954                             utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
2955                                            uniflags))
2956                             sayNO;
2957                         s += ulen;
2958                         l ++;
2959                     }
2960                 }
2961                 locinput = l;
2962                 nextchr = UCHARAT(locinput);
2963                 break;
2964             }
2965             /* The target and the pattern have the same utf8ness. */
2966             /* Inline the first character, for speed. */
2967             if (UCHARAT(s) != nextchr)
2968                 sayNO;
2969             if (PL_regeol - locinput < st->ln)
2970                 sayNO;
2971             if (st->ln > 1 && memNE(s, locinput, st->ln))
2972                 sayNO;
2973             locinput += st->ln;
2974             nextchr = UCHARAT(locinput);
2975             break;
2976             }
2977         case EXACTFL:
2978             PL_reg_flags |= RF_tainted;
2979             /* FALL THROUGH */
2980         case EXACTF: {
2981             char * const s = STRING(scan);
2982             st->ln = STR_LEN(scan);
2983
2984             if (do_utf8 || UTF) {
2985               /* Either target or the pattern are utf8. */
2986                 const char * const l = locinput;
2987                 char *e = PL_regeol;
2988
2989                 if (ibcmp_utf8(s, 0,  st->ln, (bool)UTF,
2990                                l, &e, 0,  do_utf8)) {
2991                      /* One more case for the sharp s:
2992                       * pack("U0U*", 0xDF) =~ /ss/i,
2993                       * the 0xC3 0x9F are the UTF-8
2994                       * byte sequence for the U+00DF. */
2995                      if (!(do_utf8 &&
2996                            toLOWER(s[0]) == 's' &&
2997                            st->ln >= 2 &&
2998                            toLOWER(s[1]) == 's' &&
2999                            (U8)l[0] == 0xC3 &&
3000                            e - l >= 2 &&
3001                            (U8)l[1] == 0x9F))
3002                           sayNO;
3003                 }
3004                 locinput = e;
3005                 nextchr = UCHARAT(locinput);
3006                 break;
3007             }
3008
3009             /* Neither the target and the pattern are utf8. */
3010
3011             /* Inline the first character, for speed. */
3012             if (UCHARAT(s) != nextchr &&
3013                 UCHARAT(s) != ((OP(scan) == EXACTF)
3014                                ? PL_fold : PL_fold_locale)[nextchr])
3015                 sayNO;
3016             if (PL_regeol - locinput < st->ln)
3017                 sayNO;
3018             if (st->ln > 1 && (OP(scan) == EXACTF
3019                            ? ibcmp(s, locinput, st->ln)
3020                            : ibcmp_locale(s, locinput, st->ln)))
3021                 sayNO;
3022             locinput += st->ln;
3023             nextchr = UCHARAT(locinput);
3024             break;
3025             }
3026         case ANYOF:
3027             if (do_utf8) {
3028                 STRLEN inclasslen = PL_regeol - locinput;
3029
3030                 if (!reginclass(rex, scan, (U8*)locinput, &inclasslen, do_utf8))
3031                     goto anyof_fail;
3032                 if (locinput >= PL_regeol)
3033                     sayNO;
3034                 locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
3035                 nextchr = UCHARAT(locinput);
3036                 break;
3037             }
3038             else {
3039                 if (nextchr < 0)
3040                     nextchr = UCHARAT(locinput);
3041                 if (!REGINCLASS(rex, scan, (U8*)locinput))
3042                     goto anyof_fail;
3043                 if (!nextchr && locinput >= PL_regeol)
3044                     sayNO;
3045                 nextchr = UCHARAT(++locinput);
3046                 break;
3047             }
3048         anyof_fail:
3049             /* If we might have the case of the German sharp s
3050              * in a casefolding Unicode character class. */
3051
3052             if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
3053                  locinput += SHARP_S_SKIP;
3054                  nextchr = UCHARAT(locinput);
3055             }
3056             else
3057                  sayNO;
3058             break;
3059         case ALNUML:
3060             PL_reg_flags |= RF_tainted;
3061             /* FALL THROUGH */
3062         case ALNUM:
3063             if (!nextchr)
3064                 sayNO;
3065             if (do_utf8) {
3066                 LOAD_UTF8_CHARCLASS_ALNUM();
3067                 if (!(OP(scan) == ALNUM
3068                       ? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
3069                       : isALNUM_LC_utf8((U8*)locinput)))
3070                 {
3071                     sayNO;
3072                 }
3073                 locinput += PL_utf8skip[nextchr];
3074                 nextchr = UCHARAT(locinput);
3075                 break;
3076             }
3077             if (!(OP(scan) == ALNUM
3078                   ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
3079                 sayNO;
3080             nextchr = UCHARAT(++locinput);
3081             break;
3082         case NALNUML:
3083             PL_reg_flags |= RF_tainted;
3084             /* FALL THROUGH */
3085         case NALNUM:
3086             if (!nextchr && locinput >= PL_regeol)
3087                 sayNO;
3088             if (do_utf8) {
3089                 LOAD_UTF8_CHARCLASS_ALNUM();
3090                 if (OP(scan) == NALNUM
3091                     ? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
3092                     : isALNUM_LC_utf8((U8*)locinput))
3093                 {
3094                     sayNO;
3095                 }
3096                 locinput += PL_utf8skip[nextchr];
3097                 nextchr = UCHARAT(locinput);
3098                 break;
3099             }
3100             if (OP(scan) == NALNUM
3101                 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
3102                 sayNO;
3103             nextchr = UCHARAT(++locinput);
3104             break;
3105         case BOUNDL:
3106         case NBOUNDL:
3107             PL_reg_flags |= RF_tainted;
3108             /* FALL THROUGH */
3109         case BOUND:
3110         case NBOUND:
3111             /* was last char in word? */
3112             if (do_utf8) {
3113                 if (locinput == PL_bostr)
3114                     st->ln = '\n';
3115                 else {
3116                     const U8 * const r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
3117
3118                     st->ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, uniflags);
3119                 }
3120                 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
3121                     st->ln = isALNUM_uni(st->ln);
3122                     LOAD_UTF8_CHARCLASS_ALNUM();
3123                     n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
3124                 }
3125                 else {
3126                     st->ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(st->ln));
3127                     n = isALNUM_LC_utf8((U8*)locinput);
3128                 }
3129             }
3130             else {
3131                 st->ln = (locinput != PL_bostr) ?
3132                     UCHARAT(locinput - 1) : '\n';
3133                 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
3134                     st->ln = isALNUM(st->ln);
3135                     n = isALNUM(nextchr);
3136                 }
3137                 else {
3138                     st->ln = isALNUM_LC(st->ln);
3139                     n = isALNUM_LC(nextchr);
3140                 }
3141             }
3142             if (((!st->ln) == (!n)) == (OP(scan) == BOUND ||
3143                                     OP(scan) == BOUNDL))
3144                     sayNO;
3145             break;
3146         case SPACEL:
3147             PL_reg_flags |= RF_tainted;
3148             /* FALL THROUGH */
3149         case SPACE:
3150             if (!nextchr)
3151                 sayNO;
3152             if (do_utf8) {
3153                 if (UTF8_IS_CONTINUED(nextchr)) {
3154                     LOAD_UTF8_CHARCLASS_SPACE();
3155                     if (!(OP(scan) == SPACE
3156                           ? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
3157                           : isSPACE_LC_utf8((U8*)locinput)))
3158                     {
3159                         sayNO;
3160                     }
3161                     locinput += PL_utf8skip[nextchr];
3162                     nextchr = UCHARAT(locinput);
3163                     break;
3164                 }
3165                 if (!(OP(scan) == SPACE
3166                       ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3167                     sayNO;
3168                 nextchr = UCHARAT(++locinput);
3169             }
3170             else {
3171                 if (!(OP(scan) == SPACE
3172                       ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3173                     sayNO;
3174                 nextchr = UCHARAT(++locinput);
3175             }
3176             break;
3177         case NSPACEL:
3178             PL_reg_flags |= RF_tainted;
3179             /* FALL THROUGH */
3180         case NSPACE:
3181             if (!nextchr && locinput >= PL_regeol)
3182                 sayNO;
3183             if (do_utf8) {
3184                 LOAD_UTF8_CHARCLASS_SPACE();
3185                 if (OP(scan) == NSPACE
3186                     ? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
3187                     : isSPACE_LC_utf8((U8*)locinput))
3188                 {
3189                     sayNO;
3190                 }
3191                 locinput += PL_utf8skip[nextchr];
3192                 nextchr = UCHARAT(locinput);
3193                 break;
3194             }
3195             if (OP(scan) == NSPACE
3196                 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
3197                 sayNO;
3198             nextchr = UCHARAT(++locinput);
3199             break;
3200         case DIGITL:
3201             PL_reg_flags |= RF_tainted;
3202             /* FALL THROUGH */
3203         case DIGIT:
3204             if (!nextchr)
3205                 sayNO;
3206             if (do_utf8) {
3207                 LOAD_UTF8_CHARCLASS_DIGIT();
3208                 if (!(OP(scan) == DIGIT
3209                       ? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
3210                       : isDIGIT_LC_utf8((U8*)locinput)))
3211                 {
3212                     sayNO;
3213                 }
3214                 locinput += PL_utf8skip[nextchr];
3215                 nextchr = UCHARAT(locinput);
3216                 break;
3217             }
3218             if (!(OP(scan) == DIGIT
3219                   ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
3220                 sayNO;
3221             nextchr = UCHARAT(++locinput);
3222             break;
3223         case NDIGITL:
3224             PL_reg_flags |= RF_tainted;
3225             /* FALL THROUGH */
3226         case NDIGIT:
3227             if (!nextchr && locinput >= PL_regeol)
3228                 sayNO;
3229             if (do_utf8) {
3230                 LOAD_UTF8_CHARCLASS_DIGIT();
3231                 if (OP(scan) == NDIGIT
3232                     ? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
3233                     : isDIGIT_LC_utf8((U8*)locinput))
3234                 {
3235                     sayNO;
3236                 }
3237                 locinput += PL_utf8skip[nextchr];
3238                 nextchr = UCHARAT(locinput);
3239                 break;
3240             }
3241             if (OP(scan) == NDIGIT
3242                 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
3243                 sayNO;
3244             nextchr = UCHARAT(++locinput);
3245             break;
3246         case CLUMP:
3247             if (locinput >= PL_regeol)
3248                 sayNO;
3249             if  (do_utf8) {
3250                 LOAD_UTF8_CHARCLASS_MARK();
3251                 if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3252                     sayNO;
3253                 locinput += PL_utf8skip[nextchr];
3254                 while (locinput < PL_regeol &&
3255                        swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3256                     locinput += UTF8SKIP(locinput);
3257                 if (locinput > PL_regeol)
3258                     sayNO;
3259             }
3260             else
3261                locinput++;
3262             nextchr = UCHARAT(locinput);
3263             break;
3264         case REFFL:
3265             PL_reg_flags |= RF_tainted;
3266             /* FALL THROUGH */
3267         case REF:
3268         case REFF: {
3269             char *s;
3270             n = ARG(scan);  /* which paren pair */
3271             st->ln = PL_regstartp[n];
3272             PL_reg_leftiter = PL_reg_maxiter;           /* Void cache */
3273             if ((I32)*PL_reglastparen < n || st->ln == -1)
3274                 sayNO;                  /* Do not match unless seen CLOSEn. */
3275             if (st->ln == PL_regendp[n])
3276                 break;
3277
3278             s = PL_bostr + st->ln;
3279             if (do_utf8 && OP(scan) != REF) {   /* REF can do byte comparison */
3280                 char *l = locinput;
3281                 const char *e = PL_bostr + PL_regendp[n];
3282                 /*
3283                  * Note that we can't do the "other character" lookup trick as
3284                  * in the 8-bit case (no pun intended) because in Unicode we
3285                  * have to map both upper and title case to lower case.
3286                  */
3287                 if (OP(scan) == REFF) {
3288                     while (s < e) {
3289                         STRLEN ulen1, ulen2;
3290                         U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3291                         U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
3292
3293                         if (l >= PL_regeol)
3294                             sayNO;
3295                         toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
3296                         toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
3297                         if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
3298                             sayNO;
3299                         s += ulen1;
3300                         l += ulen2;
3301                     }
3302                 }
3303                 locinput = l;
3304                 nextchr = UCHARAT(locinput);
3305                 break;
3306             }
3307
3308             /* Inline the first character, for speed. */
3309             if (UCHARAT(s) != nextchr &&
3310                 (OP(scan) == REF ||
3311                  (UCHARAT(s) != ((OP(scan) == REFF
3312                                   ? PL_fold : PL_fold_locale)[nextchr]))))
3313                 sayNO;
3314             st->ln = PL_regendp[n] - st->ln;
3315             if (locinput + st->ln > PL_regeol)
3316                 sayNO;
3317             if (st->ln > 1 && (OP(scan) == REF
3318                            ? memNE(s, locinput, st->ln)
3319                            : (OP(scan) == REFF
3320                               ? ibcmp(s, locinput, st->ln)
3321                               : ibcmp_locale(s, locinput, st->ln))))
3322                 sayNO;
3323             locinput += st->ln;
3324             nextchr = UCHARAT(locinput);
3325             break;
3326             }
3327
3328         case NOTHING:
3329         case TAIL:
3330             break;
3331         case BACK:
3332             break;
3333
3334 #undef  ST
3335 #define ST st->u.eval
3336         {
3337             SV *ret;
3338             regexp *re;
3339             regnode *startpoint;
3340
3341         case SRECURSE:
3342         case RECURSE: /*    /(...(?1))/      */
3343             if (cur_eval && cur_eval->locinput==locinput) {
3344                 if (cur_eval->u.eval.close_paren == ARG(scan))
3345                     Perl_croak(aTHX_ "Infinite recursion in RECURSE in regexp");
3346                 if ( ++nochange_depth > MAX_RECURSE_EVAL_NOCHANGE_DEPTH )
3347                     Perl_croak(aTHX_ "RECURSE without pos change exceeded limit in regexp");
3348             } else {
3349                 nochange_depth = 0;
3350             }
3351             re = rex;
3352             (void)ReREFCNT_inc(rex);
3353             if (OP(scan)==RECURSE) {
3354                 startpoint = scan + ARG2L(scan);
3355                 ST.close_paren = ARG(scan);
3356             } else {
3357                 startpoint = re->program+1;
3358                 ST.close_paren = 0;
3359             }
3360             goto eval_recurse_doit;
3361             /* NOTREACHED */
3362         case EVAL:  /*   /(?{A})B/   /(??{A})B/  and /(?(?{A})X|Y)B/   */
3363             if (cur_eval && cur_eval->locinput==locinput) {
3364                 if ( ++nochange_depth > MAX_RECURSE_EVAL_NOCHANGE_DEPTH )
3365                     Perl_croak(aTHX_ "EVAL without pos change exceeded limit in regexp");
3366             } else {
3367                 nochange_depth = 0;
3368             }
3369             {
3370                 /* execute the code in the {...} */
3371                 dSP;
3372                 SV ** const before = SP;
3373                 OP_4tree * const oop = PL_op;
3374                 COP * const ocurcop = PL_curcop;
3375                 PAD *old_comppad;
3376
3377                 n = ARG(scan);
3378                 PL_op = (OP_4tree*)rex->data->data[n];
3379                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log, "  re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
3380                 PAD_SAVE_LOCAL(old_comppad, (PAD*)rex->data->data[n + 2]);
3381                 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
3382
3383                 CALLRUNOPS(aTHX);                       /* Scalar context. */
3384                 SPAGAIN;
3385                 if (SP == before)
3386                     ret = &PL_sv_undef;   /* protect against empty (?{}) blocks. */
3387                 else {
3388                     ret = POPs;
3389                     PUTBACK;
3390                 }
3391
3392                 PL_op = oop;
3393                 PAD_RESTORE_LOCAL(old_comppad);
3394                 PL_curcop = ocurcop;
3395                 if (!st->logical) {
3396                     /* /(?{...})/ */
3397                     sv_setsv(save_scalar(PL_replgv), ret);
3398                     break;
3399                 }
3400             }
3401             if (st->logical == 2) { /* Postponed subexpression: /(??{...})/ */
3402
3403                 {
3404                     /* extract RE object from returned value; compiling if
3405                      * necessary */
3406
3407                     MAGIC *mg = NULL;
3408                     const SV *sv;
3409                     if(SvROK(ret) && SvSMAGICAL(sv = SvRV(ret)))
3410                         mg = mg_find(sv, PERL_MAGIC_qr);
3411                     else if (SvSMAGICAL(ret)) {
3412                         if (SvGMAGICAL(ret))
3413                             sv_unmagic(ret, PERL_MAGIC_qr);
3414                         else
3415                             mg = mg_find(ret, PERL_MAGIC_qr);
3416                     }
3417
3418                     if (mg) {
3419                         re = (regexp *)mg->mg_obj;
3420                         (void)ReREFCNT_inc(re);
3421                     }
3422                     else {
3423                         STRLEN len;
3424                         const char * const t = SvPV_const(ret, len);
3425                         PMOP pm;
3426                         const I32 osize = PL_regsize;
3427
3428                         Zero(&pm, 1, PMOP);
3429                         if (DO_UTF8(ret)) pm.op_pmdynflags |= PMdf_DYN_UTF8;
3430                         re = CALLREGCOMP((char*)t, (char*)t + len, &pm);
3431                         if (!(SvFLAGS(ret)
3432                               & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
3433                                 | SVs_GMG)))
3434                             sv_magic(ret,(SV*)ReREFCNT_inc(re),
3435                                         PERL_MAGIC_qr,0,0);
3436                         PL_regsize = osize;
3437                     }
3438                 }
3439                 DEBUG_EXECUTE_r(
3440                     debug_start_match(re, do_utf8, locinput, PL_regeol,
3441                         "Matching embedded");
3442                 );
3443                 startpoint = re->program + 1;
3444                 ST.close_paren = 0; /* only used for RECURSE */
3445                 /* borrowed from regtry */
3446                 if (PL_reg_start_tmpl <= re->nparens) {
3447                     PL_reg_start_tmpl = re->nparens*3/2 + 3;
3448                     if(PL_reg_start_tmp)
3449                         Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
3450                     else
3451                         Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
3452                 }
3453
3454         eval_recurse_doit: /* Share code with RECURSE below this line */
3455                 /* run the pattern returned from (??{...}) */
3456                 ST.cp = regcppush(0);   /* Save *all* the positions. */
3457                 REGCP_SET(ST.lastcp);
3458
3459                 PL_regstartp = re->startp; /* essentially NOOP on RECURSE */
3460                 PL_regendp = re->endp;     /* essentially NOOP on RECURSE */
3461
3462                 *PL_reglastparen = 0;
3463                 *PL_reglastcloseparen = 0;
3464                 PL_reginput = locinput;
3465
3466                 /* XXXX This is too dramatic a measure... */
3467                 PL_reg_maxiter = 0;
3468
3469                 st->logical = 0;
3470                 ST.toggle_reg_flags = PL_reg_flags;
3471                 if (re->reganch & ROPT_UTF8)
3472                     PL_reg_flags |= RF_utf8;
3473                 else
3474                     PL_reg_flags &= ~RF_utf8;
3475                 ST.toggle_reg_flags ^= PL_reg_flags; /* diff of old and new */
3476
3477                 ST.prev_rex = rex;
3478                 ST.prev_curlyx = cur_curlyx;
3479                 rex = re;
3480                 cur_curlyx = NULL;
3481                 ST.B = next;
3482                 ST.prev_eval = cur_eval;
3483                 cur_eval = st;
3484                 /* now continue from first node in postoned RE */
3485                 PUSH_YES_STATE_GOTO(EVAL_AB, startpoint);
3486                 /* NOTREACHED */
3487             }
3488             /* /(?(?{...})X|Y)/ */
3489             st->sw = (bool)SvTRUE(ret);
3490             st->logical = 0;
3491             break;
3492         }
3493
3494         case EVAL_AB: /* cleanup after a successful (??{A})B */
3495             /* note: this is called twice; first after popping B, then A */
3496             PL_reg_flags ^= ST.toggle_reg_flags;
3497             ReREFCNT_dec(rex);
3498             rex = ST.prev_rex;
3499             regcpblow(ST.cp);
3500             cur_eval = ST.prev_eval;
3501             cur_curlyx = ST.prev_curlyx;
3502             /* XXXX This is too dramatic a measure... */
3503             PL_reg_maxiter = 0;
3504             sayYES;
3505
3506
3507         case EVAL_AB_fail: /* unsuccessfully ran A or B in (??{A})B */
3508             /* note: this is called twice; first after popping B, then A */
3509             PL_reg_flags ^= ST.toggle_reg_flags;
3510             ReREFCNT_dec(rex);
3511             rex = ST.prev_rex;
3512             PL_reginput = locinput;
3513             REGCP_UNWIND(ST.lastcp);
3514             regcppop(rex);
3515             cur_eval = ST.prev_eval;
3516             cur_curlyx = ST.prev_curlyx;
3517             /* XXXX This is too dramatic a measure... */
3518             PL_reg_maxiter = 0;
3519             sayNO_SILENT;
3520 #undef ST
3521
3522         case OPEN:
3523             n = ARG(scan);  /* which paren pair */
3524             PL_reg_start_tmp[n] = locinput;
3525             if (n > PL_regsize)
3526                 PL_regsize = n;
3527             break;
3528         case CLOSE:
3529             n = ARG(scan);  /* which paren pair */
3530             PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
3531             PL_regendp[n] = locinput - PL_bostr;
3532             if (n > (I32)*PL_reglastparen)
3533                 *PL_reglastparen = n;
3534             *PL_reglastcloseparen = n;
3535             if (cur_eval && cur_eval->u.eval.close_paren == (U32)n) {
3536                 goto fake_end;
3537             }
3538             break;
3539         case GROUPP:
3540             n = ARG(scan);  /* which paren pair */
3541             st->sw = (bool)((I32)*PL_reglastparen >= n && PL_regendp[n] != -1);
3542             break;
3543         case IFTHEN:
3544             PL_reg_leftiter = PL_reg_maxiter;           /* Void cache */
3545             if (st->sw)
3546                 next = NEXTOPER(NEXTOPER(scan));
3547             else {
3548                 next = scan + ARG(scan);
3549                 if (OP(next) == IFTHEN) /* Fake one. */
3550                     next = NEXTOPER(NEXTOPER(next));
3551             }
3552             break;
3553         case LOGICAL:
3554             st->logical = scan->flags;
3555             break;
3556 /*******************************************************************
3557  cur_curlyx points to the regmatch_state associated with the most recent CURLYX.
3558  This struct contains info about the innermost (...)* loop (an
3559  "infoblock"), and a pointer to the next outer cur_curlyx.
3560
3561  Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
3562
3563    1) After matching Y, regnode for CURLYX is processed;
3564
3565    2) This regnode populates cur_curlyx, and calls regmatch() recursively
3566       with the starting point at WHILEM node;
3567
3568    3) Each hit of WHILEM node tries to match A and Z (in the order
3569       depending on the current iteration, min/max of {min,max} and
3570       greediness).  The information about where are nodes for "A"
3571       and "Z" is read from cur_curlyx, as is info on how many times "A"
3572       was already matched, and greediness.
3573
3574    4) After A matches, the same WHILEM node is hit again.
3575
3576    5) Each time WHILEM is hit, cur_curlyx is the infoblock created by CURLYX
3577       of the same pair.  Thus when WHILEM tries to match Z, it temporarily
3578       resets cur_curlyx, since this Y(A)*Z can be a part of some other loop:
3579       as in (Y(A)*Z)*.  If Z matches, the automaton will hit the WHILEM node
3580       of the external loop.
3581
3582  Currently present infoblocks form a tree with a stem formed by cur_curlyx
3583  and whatever it mentions via ->next, and additional attached trees
3584  corresponding to temporarily unset infoblocks as in "5" above.
3585
3586  In the following picture, infoblocks for outer loop of
3587  (Y(A)*?Z)*?T are denoted O, for inner I.  NULL starting block
3588  is denoted by x.  The matched string is YAAZYAZT.  Temporarily postponed
3589  infoblocks are drawn below the "reset" infoblock.
3590
3591  In fact in the picture below we do not show failed matches for Z and T
3592  by WHILEM blocks.  [We illustrate minimal matches, since for them it is
3593  more obvious *why* one needs to *temporary* unset infoblocks.]
3594
3595   Matched       REx position    InfoBlocks      Comment
3596                 (Y(A)*?Z)*?T    x
3597                 Y(A)*?Z)*?T     x <- O
3598   Y             (A)*?Z)*?T      x <- O
3599   Y             A)*?Z)*?T       x <- O <- I
3600   YA            )*?Z)*?T        x <- O <- I
3601   YA            A)*?Z)*?T       x <- O <- I
3602   YAA           )*?Z)*?T        x <- O <- I
3603   YAA           Z)*?T           x <- O          # Temporary unset I
3604                                      I
3605
3606   YAAZ          Y(A)*?Z)*?T     x <- O
3607                                      I
3608
3609   YAAZY         (A)*?Z)*?T      x <- O
3610                                      I
3611
3612   YAAZY         A)*?Z)*?T       x <- O <- I
3613                                      I
3614
3615   YAAZYA        )*?Z)*?T        x <- O <- I
3616                                      I
3617
3618   YAAZYA        Z)*?T           x <- O          # Temporary unset I
3619                                      I,I
3620
3621   YAAZYAZ       )*?T            x <- O
3622                                      I,I
3623
3624   YAAZYAZ       T               x               # Temporary unset O
3625                                 O
3626                                 I,I
3627
3628   YAAZYAZT                      x
3629                                 O
3630                                 I,I
3631  *******************************************************************/
3632
3633         case CURLYX: {
3634                 /* No need to save/restore up to this paren */
3635                 parenfloor = scan->flags;
3636
3637                 /* Dave says:
3638
3639                    CURLYX and WHILEM are always paired: they're the moral
3640                    equivalent of pp_enteriter anbd pp_iter.
3641
3642                    The only time next could be null is if the node tree is
3643                    corrupt. This was mentioned on p5p a few days ago.
3644
3645                    See http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters/2006-04/msg00556.html
3646                    So we'll assert that this is true:
3647                 */
3648                 assert(next);
3649                 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
3650                     next += ARG(next);
3651                 /* XXXX Probably it is better to teach regpush to support
3652                    parenfloor > PL_regsize... */
3653                 if (parenfloor > (I32)*PL_reglastparen)
3654                     parenfloor = *PL_reglastparen; /* Pessimization... */
3655
3656                 st->u.curlyx.cp = PL_savestack_ix;
3657                 st->u.curlyx.outercc = cur_curlyx;
3658                 cur_curlyx = st;
3659                 /* these fields contain the state of the current curly.
3660                  * they are accessed by subsequent WHILEMs;
3661                  * cur and lastloc are also updated by WHILEM */
3662                 st->u.curlyx.parenfloor = parenfloor;
3663                 st->u.curlyx.cur = -1; /* this will be updated by WHILEM */
3664                 st->u.curlyx.min = ARG1(scan);
3665                 st->u.curlyx.max  = ARG2(scan);
3666                 st->u.curlyx.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
3667                 st->u.curlyx.lastloc = 0;
3668                 /* st->next and st->minmod are also read by WHILEM */
3669
3670                 PL_reginput = locinput;
3671                 REGMATCH(PREVOPER(next), CURLYX); /* start on the WHILEM */
3672                 /*** all unsaved local vars undefined at this point */
3673                 regcpblow(st->u.curlyx.cp);
3674                 cur_curlyx = st->u.curlyx.outercc;
3675                 saySAME(result);
3676             }
3677             /* NOTREACHED */
3678         case WHILEM: {
3679                 /*
3680                  * This is really hard to understand, because after we match
3681                  * what we're trying to match, we must make sure the rest of
3682                  * the REx is going to match for sure, and to do that we have
3683                  * to go back UP the parse tree by recursing ever deeper.  And
3684                  * if it fails, we have to reset our parent's current state
3685                  * that we can try again after backing off.
3686                  */
3687
3688                 /* Dave says:
3689
3690                    cur_curlyx gets initialised by CURLYX ready for use by WHILEM.
3691                    So again, unless somethings been corrupted, cur_curlyx cannot
3692                    be null at that point in WHILEM.
3693
3694                    See http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters/2006-04/msg00556.html
3695                    So we'll assert that this is true:
3696                 */
3697                 assert(cur_curlyx);
3698                 st->u.whilem.lastloc = cur_curlyx->u.curlyx.lastloc; /* Detection of 0-len. */
3699                 st->u.whilem.cache_offset = 0;
3700                 st->u.whilem.cache_bit = 0;
3701
3702                 n = cur_curlyx->u.curlyx.cur + 1; /* how many we know we matched */
3703                 PL_reginput = locinput;
3704
3705                 DEBUG_EXECUTE_r(
3706                     PerlIO_printf(Perl_debug_log,
3707                                   "%*s  %ld out of %ld..%ld  cc=%"UVxf"\n",
3708                                   REPORT_CODE_OFF+depth*2, "",
3709                                   (long)n, (long)cur_curlyx->u.curlyx.min,
3710                                   (long)cur_curlyx->u.curlyx.max,
3711                                   PTR2UV(cur_curlyx))
3712                     );
3713
3714                 /* If degenerate scan matches "", assume scan done. */
3715
3716                 if (locinput == cur_curlyx->u.curlyx.lastloc && n >=
3717                     cur_curlyx->u.curlyx.min)
3718                 {
3719                     st->u.whilem.savecc = cur_curlyx;
3720                     cur_curlyx = cur_curlyx->u.curlyx.outercc;
3721                     DEBUG_EXECUTE_r(
3722                         PerlIO_printf(Perl_debug_log,
3723                            "%*s  empty match detected, try continuation...\n",
3724                            REPORT_CODE_OFF+depth*2, "")
3725                         );
3726                     REGMATCH(st->u.whilem.savecc->next, WHILEM1);
3727                     /*** all unsaved local vars undefined at this point */
3728                     cur_curlyx = st->u.whilem.savecc;
3729                     if (result)
3730                         sayYES;
3731                     sayNO;
3732                 }
3733
3734                 /* First just match a string of min scans. */
3735
3736                 if (n < cur_curlyx->u.curlyx.min) {
3737                     cur_curlyx->u.curlyx.cur = n;
3738                     cur_curlyx->u.curlyx.lastloc = locinput;
3739                     REGMATCH(cur_curlyx->u.curlyx.scan, WHILEM2);
3740                     /*** all unsaved local vars undefined at this point */
3741                     if (result)
3742                         sayYES;
3743                     cur_curlyx->u.curlyx.cur = n - 1;
3744                     cur_curlyx->u.curlyx.lastloc = st->u.whilem.lastloc;
3745                     sayNO;
3746                 }
3747
3748                 if (scan->flags) {
3749                     /* Check whether we already were at this position.
3750                         Postpone detection until we know the match is not
3751                         *that* much linear. */
3752                 if (!PL_reg_maxiter) {
3753                     PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
3754                     /* possible overflow for long strings and many CURLYX's */
3755                     if (PL_reg_maxiter < 0)
3756                         PL_reg_maxiter = I32_MAX;
3757                     PL_reg_leftiter = PL_reg_maxiter;
3758                 }
3759                 if (PL_reg_leftiter-- == 0) {
3760                     const I32 size = (PL_reg_maxiter + 7)/8;
3761                     if (PL_reg_poscache) {
3762                         if ((I32)PL_reg_poscache_size < size) {
3763                             Renew(PL_reg_poscache, size, char);
3764                             PL_reg_poscache_size = size;
3765                         }
3766                         Zero(PL_reg_poscache, size, char);
3767                     }
3768                     else {
3769                         PL_reg_poscache_size = size;
3770                         Newxz(PL_reg_poscache, size, char);
3771                     }
3772                     DEBUG_EXECUTE_r(
3773                         PerlIO_printf(Perl_debug_log,
3774               "%sDetected a super-linear match, switching on caching%s...\n",
3775                                       PL_colors[4], PL_colors[5])
3776                         );
3777                 }
3778                 if (PL_reg_leftiter < 0) {
3779                     st->u.whilem.cache_offset = locinput - PL_bostr;
3780
3781                     st->u.whilem.cache_offset = (scan->flags & 0xf) - 1
3782                             + st->u.whilem.cache_offset * (scan->flags>>4);
3783                     st->u.whilem.cache_bit = st->u.whilem.cache_offset % 8;
3784                     st->u.whilem.cache_offset /= 8;
3785                     if (PL_reg_poscache[st->u.whilem.cache_offset] & (1<<st->u.whilem.cache_bit)) {
3786                     DEBUG_EXECUTE_r(
3787                         PerlIO_printf(Perl_debug_log,
3788                                       "%*s  already tried at this position...\n",
3789                                       REPORT_CODE_OFF+depth*2, "")
3790                         );
3791                         sayNO; /* cache records failure */
3792                     }
3793                 }
3794                 }
3795
3796                 /* Prefer next over scan for minimal matching. */
3797
3798                 if (cur_curlyx->minmod) {
3799                     st->u.whilem.savecc = cur_curlyx;
3800                     cur_curlyx = cur_curlyx->u.curlyx.outercc;
3801                     st->u.whilem.cp = regcppush(st->u.whilem.savecc->u.curlyx.parenfloor);
3802                     REGCP_SET(st->u.whilem.lastcp);
3803                     REGMATCH(st->u.whilem.savecc->next, WHILEM3);
3804                     /*** all unsaved local vars undefined at this point */
3805                     cur_curlyx = st->u.whilem.savecc;
3806                     if (result) {
3807                         regcpblow(st->u.whilem.cp);
3808                         sayYES; /* All done. */
3809                     }
3810                     REGCP_UNWIND(st->u.whilem.lastcp);
3811                     regcppop(rex);
3812
3813                     if (n >= cur_curlyx->u.curlyx.max) { /* Maximum greed exceeded? */
3814                         if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3815                             && !(PL_reg_flags & RF_warned)) {
3816                             PL_reg_flags |= RF_warned;
3817                             Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
3818                                  "Complex regular subexpression recursion",
3819                                  REG_INFTY - 1);
3820                         }
3821                         CACHEsayNO;
3822                     }
3823
3824                     DEBUG_EXECUTE_r(
3825                         PerlIO_printf(Perl_debug_log,
3826                                       "%*s  trying longer...\n",
3827                                       REPORT_CODE_OFF+depth*2, "")
3828                         );
3829                     /* Try scanning more and see if it helps. */
3830                     PL_reginput = locinput;
3831                     cur_curlyx->u.curlyx.cur = n;
3832                     cur_curlyx->u.curlyx.lastloc = locinput;
3833                     st->u.whilem.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
3834                     REGCP_SET(st->u.whilem.lastcp);
3835                     REGMATCH(cur_curlyx->u.curlyx.scan, WHILEM4);
3836                     /*** all unsaved local vars undefined at this point */
3837                     if (result) {
3838                         regcpblow(st->u.whilem.cp);
3839                         sayYES;
3840                     }
3841                     REGCP_UNWIND(st->u.whilem.lastcp);
3842                     regcppop(rex);
3843                     cur_curlyx->u.curlyx.cur = n - 1;
3844                     cur_curlyx->u.curlyx.lastloc = st->u.whilem.lastloc;
3845                     CACHEsayNO;
3846                 }
3847
3848                 /* Prefer scan over next for maximal matching. */
3849
3850                 if (n < cur_curlyx->u.curlyx.max) {     /* More greed allowed? */
3851                     st->u.whilem.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
3852                     cur_curlyx->u.curlyx.cur = n;
3853                     cur_curlyx->u.curlyx.lastloc = locinput;
3854                     REGCP_SET(st->u.whilem.lastcp);
3855                     REGMATCH(cur_curlyx->u.curlyx.scan, WHILEM5);
3856                     /*** all unsaved local vars undefined at this point */
3857                     if (result) {
3858                         regcpblow(st->u.whilem.cp);
3859                         sayYES;
3860                     }
3861                     REGCP_UNWIND(st->u.whilem.lastcp);
3862                     regcppop(rex);      /* Restore some previous $<digit>s? */
3863                     PL_reginput = locinput;
3864                     DEBUG_EXECUTE_r(
3865                         PerlIO_printf(Perl_debug_log,
3866                                       "%*s  failed, try continuation...\n",
3867                                       REPORT_CODE_OFF+depth*2, "")
3868                         );
3869                 }
3870                 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3871                         && !(PL_reg_flags & RF_warned)) {
3872                     PL_reg_flags |= RF_warned;
3873                     Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
3874                          "Complex regular subexpression recursion",
3875                          REG_INFTY - 1);
3876                 }
3877
3878                 /* Failed deeper matches of scan, so see if this one works. */
3879                 st->u.whilem.savecc = cur_curlyx;
3880                 cur_curlyx = cur_curlyx->u.curlyx.outercc;
3881                 REGMATCH(st->u.whilem.savecc->next, WHILEM6);
3882                 /*** all unsaved local vars undefined at this point */
3883                 cur_curlyx = st->u.whilem.savecc;
3884                 if (result)
3885                     sayYES;
3886                 cur_curlyx->u.curlyx.cur = n - 1;
3887                 cur_curlyx->u.curlyx.lastloc = st->u.whilem.lastloc;
3888                 CACHEsayNO;
3889             }
3890             /* NOTREACHED */
3891
3892 #undef  ST
3893 #define ST st->u.branch
3894
3895         case BRANCHJ:       /*  /(...|A|...)/ with long next pointer */
3896             next = scan + ARG(scan);
3897             if (next == scan)
3898                 next = NULL;
3899             scan = NEXTOPER(scan);
3900             /* FALL THROUGH */
3901
3902         case BRANCH:        /*  /(...|A|...)/ */
3903             scan = NEXTOPER(scan); /* scan now points to inner node */
3904             if (!next || (OP(next) != BRANCH && OP(next) != BRANCHJ))
3905                 /* last branch; skip state push and jump direct to node */
3906                 continue;
3907             ST.lastparen = *PL_reglastparen;
3908             ST.next_branch = next;
3909             REGCP_SET(ST.cp);
3910             PL_reginput = locinput;
3911
3912             /* Now go into the branch */
3913             PUSH_STATE_GOTO(BRANCH_next, scan);
3914             /* NOTREACHED */
3915
3916         case BRANCH_next_fail: /* that branch failed; try the next, if any */
3917             REGCP_UNWIND(ST.cp);
3918             for (n = *PL_reglastparen; n > ST.lastparen; n--)
3919                 PL_regendp[n] = -1;
3920             *PL_reglastparen = n;
3921             scan = ST.next_branch;
3922             /* no more branches? */
3923             if (!scan || (OP(scan) != BRANCH && OP(scan) != BRANCHJ))
3924                 sayNO;
3925             continue; /* execute next BRANCH[J] op */
3926             /* NOTREACHED */
3927
3928         case MINMOD:
3929             st->minmod = 1;
3930             break;
3931
3932 #undef  ST
3933 #define ST st->u.curlym
3934
3935         case CURLYM:    /* /A{m,n}B/ where A is fixed-length */
3936
3937             /* This is an optimisation of CURLYX that enables us to push
3938              * only a single backtracking state, no matter now many matches
3939              * there are in {m,n}. It relies on the pattern being constant
3940              * length, with no parens to influence future backrefs
3941              */
3942
3943             ST.me = scan;
3944             scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
3945
3946             /* if paren positive, emulate an OPEN/CLOSE around A */
3947             if (ST.me->flags) {
3948                 I32 paren = ST.me->flags;
3949                 if (paren > PL_regsize)
3950                     PL_regsize = paren;
3951                 if (paren > (I32)*PL_reglastparen)
3952                     *PL_reglastparen = paren;
3953                 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3954             }
3955             ST.A = scan;
3956             ST.B = next;
3957             ST.alen = 0;
3958             ST.count = 0;
3959             ST.minmod = st->minmod;
3960             st->minmod = 0;
3961             ST.c1 = CHRTEST_UNINIT;
3962             REGCP_SET(ST.cp);
3963
3964             if (!(ST.minmod ? ARG1(ST.me) : ARG2(ST.me))) /* min/max */
3965                 goto curlym_do_B;
3966
3967           curlym_do_A: /* execute the A in /A{m,n}B/  */
3968             PL_reginput = locinput;
3969             PUSH_YES_STATE_GOTO(CURLYM_A, ST.A); /* match A */
3970             /* NOTREACHED */
3971
3972         case CURLYM_A: /* we've just matched an A */
3973             locinput = st->locinput;
3974             nextchr = UCHARAT(locinput);
3975
3976             ST.count++;
3977             /* after first match, determine A's length: u.curlym.alen */
3978             if (ST.count == 1) {
3979                 if (PL_reg_match_utf8) {
3980                     char *s = locinput;
3981                     while (s < PL_reginput) {
3982                         ST.alen++;
3983                         s += UTF8SKIP(s);
3984                     }
3985                 }
3986                 else {
3987                     ST.alen = PL_reginput - locinput;
3988                 }
3989                 if (ST.alen == 0)
3990                     ST.count = ST.minmod ? ARG1(ST.me) : ARG2(ST.me);
3991             }
3992             DEBUG_EXECUTE_r(
3993                 PerlIO_printf(Perl_debug_log,
3994                           "%*s  CURLYM now matched %"IVdf" times, len=%"IVdf"...\n",
3995                           (int)(REPORT_CODE_OFF+(depth*2)), "",
3996                           (IV) ST.count, (IV)ST.alen)
3997             );
3998
3999             locinput = PL_reginput;
4000             if (ST.count < (ST.minmod ? ARG1(ST.me) : ARG2(ST.me)))
4001                 goto curlym_do_A; /* try to match another A */
4002             goto curlym_do_B; /* try to match B */
4003
4004         case CURLYM_A_fail: /* just failed to match an A */
4005             REGCP_UNWIND(ST.cp);
4006             if (ST.minmod || ST.count < ARG1(ST.me) /* min*/ )
4007                 sayNO;
4008
4009           curlym_do_B: /* execute the B in /A{m,n}B/  */
4010             PL_reginput = locinput;
4011             if (ST.c1 == CHRTEST_UNINIT) {
4012                 /* calculate c1 and c2 for possible match of 1st char
4013                  * following curly */
4014                 ST.c1 = ST.c2 = CHRTEST_VOID;
4015                 if (HAS_TEXT(ST.B) || JUMPABLE(ST.B)) {
4016                     regnode *text_node = ST.B;
4017                     if (! HAS_TEXT(text_node))
4018                         FIND_NEXT_IMPT(text_node);
4019                     if (HAS_TEXT(text_node)
4020                         && PL_regkind[OP(text_node)] != REF)
4021                     {
4022                         ST.c1 = (U8)*STRING(text_node);
4023                         ST.c2 =
4024                             (OP(text_node) == EXACTF || OP(text_node) == REFF)
4025                             ? PL_fold[ST.c1]
4026                             : (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
4027                                 ? PL_fold_locale[ST.c1]
4028                                 : ST.c1;
4029                     }
4030                 }
4031             }
4032
4033             DEBUG_EXECUTE_r(
4034                 PerlIO_printf(Perl_debug_log,
4035                     "%*s  CURLYM trying tail with matches=%"IVdf"...\n",
4036                     (int)(REPORT_CODE_OFF+(depth*2)),
4037                     "", (IV)ST.count)
4038                 );
4039             if (ST.c1 != CHRTEST_VOID
4040                     && UCHARAT(PL_reginput) != ST.c1
4041                     && UCHARAT(PL_reginput) != ST.c2)
4042             {
4043                 /* simulate B failing */
4044                 state_num = CURLYM_B_fail;
4045                 goto reenter_switch;
4046             }
4047
4048             if (ST.me->flags) {
4049                 /* mark current A as captured */
4050                 I32 paren = ST.me->flags;
4051                 if (ST.count) {
4052                     PL_regstartp[paren]
4053                         = HOPc(PL_reginput, -ST.alen) - PL_bostr;
4054                     PL_regendp[paren] = PL_reginput - PL_bostr;
4055                 }
4056                 else
4057                     PL_regendp[paren] = -1;
4058             }
4059             PUSH_STATE_GOTO(CURLYM_B, ST.B); /* match B */
4060             /* NOTREACHED */
4061
4062         case CURLYM_B_fail: /* just failed to match a B */
4063             REGCP_UNWIND(ST.cp);
4064             if (ST.minmod) {
4065                 if (ST.count == ARG2(ST.me) /* max */)
4066                     sayNO;
4067                 goto curlym_do_A; /* try to match a further A */
4068             }
4069             /* backtrack one A */
4070             if (ST.count == ARG1(ST.me) /* min */)
4071                 sayNO;
4072             ST.count--;
4073             locinput = HOPc(locinput, -ST.alen);
4074             goto curlym_do_B; /* try to match B */
4075
4076 #undef ST
4077 #define ST st->u.curly
4078
4079 #define CURLY_SETPAREN(paren, success) \
4080     if (paren) { \
4081         if (success) { \
4082             PL_regstartp[paren] = HOPc(locinput, -1) - PL_bostr; \
4083             PL_regendp[paren] = locinput - PL_bostr; \
4084         } \
4085         else \
4086             PL_regendp[paren] = -1; \
4087     }
4088
4089         case STAR:              /*  /A*B/ where A is width 1 */
4090             ST.paren = 0;
4091             ST.min = 0;
4092             ST.max = REG_INFTY;
4093             scan = NEXTOPER(scan);
4094             goto repeat;
4095         case PLUS:              /*  /A+B/ where A is width 1 */
4096             ST.paren = 0;
4097             ST.min = 1;
4098             ST.max = REG_INFTY;
4099             scan = NEXTOPER(scan);
4100             goto repeat;
4101         case CURLYN:            /*  /(A){m,n}B/ where A is width 1 */
4102             ST.paren = scan->flags;     /* Which paren to set */
4103             if (ST.paren > PL_regsize)
4104                 PL_regsize = ST.paren;
4105             if (ST.paren > (I32)*PL_reglastparen)
4106                 *PL_reglastparen = ST.paren;
4107             ST.min = ARG1(scan);  /* min to match */
4108             ST.max = ARG2(scan);  /* max to match */
4109             scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
4110             goto repeat;
4111         case CURLY:             /*  /A{m,n}B/ where A is width 1 */
4112             ST.paren = 0;
4113             ST.min = ARG1(scan);  /* min to match */
4114             ST.max = ARG2(scan);  /* max to match */
4115             scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
4116           repeat:
4117             /*
4118             * Lookahead to avoid useless match attempts
4119             * when we know what character comes next.
4120             *
4121             * Used to only do .*x and .*?x, but now it allows
4122             * for )'s, ('s and (?{ ... })'s to be in the way
4123             * of the quantifier and the EXACT-like node.  -- japhy
4124             */
4125
4126             if (ST.min > ST.max) /* XXX make this a compile-time check? */
4127                 sayNO;
4128             if (HAS_TEXT(next) || JUMPABLE(next)) {
4129                 U8 *s;
4130                 regnode *text_node = next;
4131
4132                 if (! HAS_TEXT(text_node))
4133                     FIND_NEXT_IMPT(text_node);
4134
4135                 if (! HAS_TEXT(text_node))
4136                     ST.c1 = ST.c2 = CHRTEST_VOID;
4137                 else {
4138                     if (PL_regkind[OP(text_node)] == REF) {
4139                         ST.c1 = ST.c2 = CHRTEST_VOID;
4140                         goto assume_ok_easy;
4141                     }
4142                     else
4143                         s = (U8*)STRING(text_node);
4144
4145                     if (!UTF) {
4146                         ST.c2 = ST.c1 = *s;
4147                         if (OP(text_node) == EXACTF || OP(text_node) == REFF)
4148                             ST.c2 = PL_fold[ST.c1];
4149                         else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
4150                             ST.c2 = PL_fold_locale[ST.c1];
4151                     }
4152                     else { /* UTF */
4153                         if (OP(text_node) == EXACTF || OP(text_node) == REFF) {
4154                              STRLEN ulen1, ulen2;
4155                              U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
4156                              U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
4157
4158                              to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
4159                              to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
4160 #ifdef EBCDIC
4161                              ST.c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXLEN, 0,
4162                                                     ckWARN(WARN_UTF8) ?
4163                                                     0 : UTF8_ALLOW_ANY);
4164                              ST.c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXLEN, 0,
4165                                                     ckWARN(WARN_UTF8) ?
4166                                                     0 : UTF8_ALLOW_ANY);
4167 #else
4168                              ST.c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
4169                                                     uniflags);
4170                              ST.c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
4171                                                     uniflags);
4172 #endif
4173                         }
4174                         else {
4175                             ST.c2 = ST.c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
4176                                                      uniflags);
4177                         }
4178                     }
4179                 }
4180             }
4181             else
4182                 ST.c1 = ST.c2 = CHRTEST_VOID;
4183         assume_ok_easy:
4184
4185             ST.A = scan;
4186             ST.B = next;
4187             PL_reginput = locinput;
4188             if (st->minmod) {
4189                 st->minmod = 0;
4190                 if (ST.min && regrepeat(rex, ST.A, ST.min) < ST.min)
4191                     sayNO;
4192                 ST.count = ST.min;
4193                 locinput = PL_reginput;
4194                 REGCP_SET(ST.cp);
4195                 if (ST.c1 == CHRTEST_VOID)
4196                     goto curly_try_B_min;
4197
4198                 ST.oldloc = locinput;
4199
4200                 /* set ST.maxpos to the furthest point along the
4201                  * string that could possibly match */
4202                 if  (ST.max == REG_INFTY) {
4203                     ST.maxpos = PL_regeol - 1;
4204                     if (do_utf8)
4205                         while (UTF8_IS_CONTINUATION(*(U8*)ST.maxpos))
4206                             ST.maxpos--;
4207                 }
4208                 else if (do_utf8) {
4209                     int m = ST.max - ST.min;
4210                     for (ST.maxpos = locinput;
4211                          m >0 && ST.maxpos + UTF8SKIP(ST.maxpos) <= PL_regeol; m--)
4212                         ST.maxpos += UTF8SKIP(ST.maxpos);
4213                 }
4214                 else {
4215                     ST.maxpos = locinput + ST.max - ST.min;
4216                     if (ST.maxpos >= PL_regeol)
4217                         ST.maxpos = PL_regeol - 1;
4218                 }
4219                 goto curly_try_B_min_known;
4220
4221             }
4222             else {
4223                 ST.count = regrepeat(rex, ST.A, ST.max);
4224                 locinput = PL_reginput;
4225                 if (ST.count < ST.min)
4226                     sayNO;
4227                 if ((ST.count > ST.min)
4228                     && (PL_regkind[OP(ST.B)] == EOL) && (OP(ST.B) != MEOL))
4229                 {
4230                     /* A{m,n} must come at the end of the string, there's
4231                      * no point in backing off ... */
4232                     ST.min = ST.count;
4233                     /* ...except that $ and \Z can match before *and* after
4234                        newline at the end.  Consider "\n\n" =~ /\n+\Z\n/.
4235                        We may back off by one in this case. */
4236                     if (UCHARAT(PL_reginput - 1) == '\n' && OP(ST.B) != EOS)
4237                         ST.min--;
4238                 }
4239                 REGCP_SET(ST.cp);
4240                 goto curly_try_B_max;
4241             }
4242             /* NOTREACHED */
4243
4244
4245         case CURLY_B_min_known_fail:
4246             /* failed to find B in a non-greedy match where c1,c2 valid */
4247             if (ST.paren && ST.count)
4248                 PL_regendp[ST.paren] = -1;
4249
4250             PL_reginput = locinput;     /* Could be reset... */
4251             REGCP_UNWIND(ST.cp);
4252             /* Couldn't or didn't -- move forward. */
4253             ST.oldloc = locinput;
4254             if (do_utf8)
4255                 locinput += UTF8SKIP(locinput);
4256             else
4257                 locinput++;
4258             ST.count++;
4259           curly_try_B_min_known:
4260              /* find the next place where 'B' could work, then call B */
4261             {
4262                 int n;
4263                 if (do_utf8) {
4264                     n = (ST.oldloc == locinput) ? 0 : 1;
4265                     if (ST.c1 == ST.c2) {
4266                         STRLEN len;
4267                         /* set n to utf8_distance(oldloc, locinput) */
4268                         while (locinput <= ST.maxpos &&
4269                                utf8n_to_uvchr((U8*)locinput,
4270                                               UTF8_MAXBYTES, &len,
4271                                               uniflags) != (UV)ST.c1) {
4272                             locinput += len;
4273                             n++;
4274                         }
4275                     }
4276                     else {
4277                         /* set n to utf8_distance(oldloc, locinput) */
4278                         while (locinput <= ST.maxpos) {
4279                             STRLEN len;
4280                             const UV c = utf8n_to_uvchr((U8*)locinput,
4281                                                   UTF8_MAXBYTES, &len,
4282                                                   uniflags);
4283                             if (c == (UV)ST.c1 || c == (UV)ST.c2)
4284                                 break;
4285                             locinput += len;
4286                             n++;
4287                         }
4288                     }
4289                 }
4290                 else {
4291                     if (ST.c1 == ST.c2) {
4292                         while (locinput <= ST.maxpos &&
4293                                UCHARAT(locinput) != ST.c1)
4294                             locinput++;
4295                     }
4296                     else {
4297                         while (locinput <= ST.maxpos
4298                                && UCHARAT(locinput) != ST.c1
4299                                && UCHARAT(locinput) != ST.c2)
4300                             locinput++;
4301                     }
4302                     n = locinput - ST.oldloc;
4303                 }
4304                 if (locinput > ST.maxpos)
4305                     sayNO;
4306                 /* PL_reginput == oldloc now */
4307                 if (n) {
4308                     ST.count += n;
4309                     if (regrepeat(rex, ST.A, n) < n)
4310                         sayNO;
4311                 }
4312                 PL_reginput = locinput;
4313                 CURLY_SETPAREN(ST.paren, ST.count);
4314                 PUSH_STATE_GOTO(CURLY_B_min_known, ST.B);
4315             }
4316             /* NOTREACHED */
4317
4318
4319         case CURLY_B_min_fail:
4320             /* failed to find B in a non-greedy match where c1,c2 invalid */
4321             if (ST.paren && ST.count)
4322                 PL_regendp[ST.paren] = -1;
4323
4324             REGCP_UNWIND(ST.cp);
4325             /* failed -- move forward one */
4326             PL_reginput = locinput;
4327             if (regrepeat(rex, ST.A, 1)) {
4328                 ST.count++;
4329                 locinput = PL_reginput;
4330                 if (ST.count <= ST.max || (ST.max == REG_INFTY &&
4331                         ST.count > 0)) /* count overflow ? */
4332                 {
4333                   curly_try_B_min:
4334                     CURLY_SETPAREN(ST.paren, ST.count);
4335                     PUSH_STATE_GOTO(CURLY_B_min, ST.B);
4336                 }
4337             }
4338             sayNO;
4339             /* NOTREACHED */
4340
4341
4342         curly_try_B_max:
4343             /* a successful greedy match: now try to match B */
4344             {
4345                 UV c = 0;
4346                 if (ST.c1 != CHRTEST_VOID)
4347                     c = do_utf8 ? utf8n_to_uvchr((U8*)PL_reginput,
4348                                            UTF8_MAXBYTES, 0, uniflags)
4349                                 : (UV) UCHARAT(PL_reginput);
4350                 /* If it could work, try it. */
4351                 if (ST.c1 == CHRTEST_VOID || c == (UV)ST.c1 || c == (UV)ST.c2) {
4352                     CURLY_SETPAREN(ST.paren, ST.count);
4353                     PUSH_STATE_GOTO(CURLY_B_max, ST.B);
4354                     /* NOTREACHED */
4355                 }
4356             }
4357             /* FALL THROUGH */
4358         case CURLY_B_max_fail:
4359             /* failed to find B in a greedy match */
4360             if (ST.paren && ST.count)
4361                 PL_regendp[ST.paren] = -1;
4362
4363             REGCP_UNWIND(ST.cp);
4364             /*  back up. */
4365             if (--ST.count < ST.min)
4366                 sayNO;
4367             PL_reginput = locinput = HOPc(locinput, -1);
4368             goto curly_try_B_max;
4369
4370 #undef ST
4371
4372
4373         case END:
4374             fake_end:
4375             if (cur_eval) {
4376                 /* we've just finished A in /(??{A})B/; now continue with B */
4377                 I32 tmpix;
4378
4379
4380                 st->u.eval.toggle_reg_flags
4381                             = cur_eval->u.eval.toggle_reg_flags;
4382                 PL_reg_flags ^= st->u.eval.toggle_reg_flags;
4383
4384                 st->u.eval.prev_rex = rex;              /* inner */
4385                 rex    = cur_eval->u.eval.prev_rex;     /* outer */
4386                 cur_curlyx = cur_eval->u.eval.prev_curlyx;
4387                 ReREFCNT_inc(rex);
4388                 st->u.eval.cp = regcppush(0);   /* Save *all* the positions. */
4389                 REGCP_SET(st->u.eval.lastcp);
4390                 PL_reginput = locinput;
4391
4392                 /* Restore parens of the outer rex without popping the
4393                  * savestack */
4394                 tmpix = PL_savestack_ix;
4395                 PL_savestack_ix = cur_eval->u.eval.lastcp;
4396                 regcppop(rex);
4397                 PL_savestack_ix = tmpix;
4398
4399                 st->u.eval.prev_eval = cur_eval;
4400                 cur_eval = cur_eval->u.eval.prev_eval;
4401                 DEBUG_EXECUTE_r(
4402                     PerlIO_printf(Perl_debug_log, "%*s  EVAL trying tail ... %x\n",
4403                                       REPORT_CODE_OFF+depth*2, "",(int)cur_eval););
4404                 PUSH_YES_STATE_GOTO(EVAL_AB,
4405                         st->u.eval.prev_eval->u.eval.B); /* match B */
4406             }
4407
4408             if (locinput < reginfo->till) {
4409                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4410                                       "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
4411                                       PL_colors[4],
4412                                       (long)(locinput - PL_reg_starttry),
4413                                       (long)(reginfo->till - PL_reg_starttry),
4414                                       PL_colors[5]));
4415                 sayNO_SILENT;           /* Cannot match: too short. */
4416             }
4417             PL_reginput = locinput;     /* put where regtry can find it */
4418             sayYES;                     /* Success! */
4419
4420         case SUCCEED: /* successful SUSPEND/UNLESSM/IFMATCH/CURLYM */
4421             DEBUG_EXECUTE_r(
4422             PerlIO_printf(Perl_debug_log,
4423                 "%*s  %ssubpattern success...%s\n",
4424                 REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5]));
4425             PL_reginput = locinput;     /* put where regtry can find it */
4426             sayYES;                     /* Success! */
4427
4428 #undef  ST
4429 #define ST st->u.ifmatch
4430
4431         case SUSPEND:   /* (?>A) */
4432             ST.wanted = 1;
4433             PL_reginput = locinput;
4434             goto do_ifmatch;
4435
4436         case UNLESSM:   /* -ve lookaround: (?!A), or with flags, (?<!A) */
4437             ST.wanted = 0;
4438             goto ifmatch_trivial_fail_test;
4439
4440         case IFMATCH:   /* +ve lookaround: (?=A), or with flags, (?<=A) */
4441             ST.wanted = 1;
4442           ifmatch_trivial_fail_test:
4443             if (scan->flags) {
4444                 char * const s = HOPBACKc(locinput, scan->flags);
4445                 if (!s) {
4446                     /* trivial fail */
4447                     if (st->logical) {
4448                         st->logical = 0;
4449                         st->sw = 1 - (bool)ST.wanted;
4450                     }
4451                     else if (ST.wanted)
4452                         sayNO;
4453                     next = scan + ARG(scan);
4454                     if (next == scan)
4455                         next = NULL;
4456                     break;
4457                 }
4458                 PL_reginput = s;
4459             }
4460             else
4461                 PL_reginput = locinput;
4462
4463           do_ifmatch:
4464             ST.me = scan;
4465             /* execute body of (?...A) */
4466             PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)));
4467             /* NOTREACHED */
4468
4469         case IFMATCH_A_fail: /* body of (?...A) failed */
4470             ST.wanted = !ST.wanted;
4471             /* FALL THROUGH */
4472
4473         case IFMATCH_A: /* body of (?...A) succeeded */
4474             if (st->logical) {
4475                 st->logical = 0;
4476                 st->sw = (bool)ST.wanted;
4477             }
4478             else if (!ST.wanted)
4479                 sayNO;
4480
4481             if (OP(ST.me) == SUSPEND)
4482                 locinput = PL_reginput;
4483             else {
4484                 locinput = PL_reginput = st->locinput;
4485                 nextchr = UCHARAT(locinput);
4486             }
4487             scan = ST.me + ARG(ST.me);
4488             if (scan == ST.me)
4489                 scan = NULL;
4490             continue; /* execute B */
4491
4492 #undef ST
4493
4494         case LONGJMP:
4495             next = scan + ARG(scan);
4496             if (next == scan)
4497                 next = NULL;
4498             break;
4499         default:
4500             PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
4501                           PTR2UV(scan), OP(scan));
4502             Perl_croak(aTHX_ "regexp memory corruption");
4503         }
4504
4505         scan = next;
4506         continue;
4507         /* NOTREACHED */
4508
4509       push_yes_state:
4510         /* push a state that backtracks on success */
4511         st->u.yes.prev_yes_state = yes_state;
4512         yes_state = st;
4513         /* FALL THROUGH */
4514       push_state:
4515         /* push a new regex state, then continue at scan  */
4516         {
4517             regmatch_state *newst;
4518
4519             DEBUG_STATE_pp("push");
4520             depth++;
4521             st->locinput = locinput;
4522             newst = st+1;
4523             if (newst >  SLAB_LAST(PL_regmatch_slab))
4524                 newst = S_push_slab(aTHX);
4525             PL_regmatch_state = newst;
4526             /* XXX probably don't need to initialise these */
4527             newst->minmod = 0;
4528             newst->sw = 0;
4529             newst->logical = 0;
4530
4531
4532             locinput = PL_reginput;
4533             nextchr = UCHARAT(locinput);
4534             st = newst;
4535             continue;
4536             /* NOTREACHED */
4537         }
4538
4539         /* simulate recursively calling regmatch(), but without actually
4540          * recursing - ie save the current state on the heap rather than on
4541          * the stack, then re-enter the loop. This avoids complex regexes
4542          * blowing the processor stack */
4543
4544       start_recurse:
4545         {
4546             /* push new state */
4547             regmatch_state *oldst = st;
4548
4549             DEBUG_STATE_pp("push");
4550             depth++;
4551             st->u.yes.prev_yes_state = yes_state;
4552             yes_state = st;
4553
4554             /* grab the next free state slot */
4555             st++;
4556             if (st >  SLAB_LAST(PL_regmatch_slab))
4557                 st = S_push_slab(aTHX);
4558             PL_regmatch_state = st;
4559
4560             oldst->next = next;
4561             oldst->n = n;
4562             oldst->locinput = locinput;
4563
4564             locinput = PL_reginput;
4565             nextchr = UCHARAT(locinput);
4566             st->minmod = 0;
4567             st->sw = 0;
4568             st->logical = 0;
4569
4570         }
4571     }
4572
4573     /*
4574     * We get here only if there's trouble -- normally "case END" is
4575     * the terminating point.
4576     */
4577     Perl_croak(aTHX_ "corrupted regexp pointers");
4578     /*NOTREACHED*/
4579     sayNO;
4580
4581 yes:
4582     if (yes_state) {
4583         /* we have successfully completed a subexpression, but we must now
4584          * pop to the state marked by yes_state and continue from there */
4585         assert(st != yes_state);
4586 #ifdef DEBUGGING
4587         while (st != yes_state) {
4588             st--;
4589             if (st < SLAB_FIRST(PL_regmatch_slab)) {
4590                 PL_regmatch_slab = PL_regmatch_slab->prev;
4591                 st = SLAB_LAST(PL_regmatch_slab);
4592             }
4593             DEBUG_STATE_pp("pop (yes)");
4594             depth--;
4595         }
4596 #else
4597         while (yes_state < SLAB_FIRST(PL_regmatch_slab)
4598             || yes_state > SLAB_LAST(PL_regmatch_slab))
4599         {
4600             /* not in this slab, pop slab */
4601             depth -= (st - SLAB_FIRST(PL_regmatch_slab) + 1);
4602             PL_regmatch_slab = PL_regmatch_slab->prev;
4603             st = SLAB_LAST(PL_regmatch_slab);
4604         }
4605         depth -= (st - yes_state);
4606 #endif
4607         st = yes_state;
4608         yes_state = st->u.yes.prev_yes_state;
4609         PL_regmatch_state = st;
4610
4611         switch (st->resume_state) {
4612         case resume_CURLYX:
4613         case resume_WHILEM1:
4614         case resume_WHILEM2:
4615         case resume_WHILEM3:
4616         case resume_WHILEM4:
4617         case resume_WHILEM5:
4618         case resume_WHILEM6:
4619             result = 1;
4620             /* restore previous state and re-enter */
4621             scan        = st->scan;
4622             next        = st->next;
4623             n   = st->n;
4624             locinput= st->locinput;
4625             nextchr = UCHARAT(locinput);
4626             switch (st->resume_state) {
4627             case resume_CURLYX:
4628                 goto resume_point_CURLYX;
4629             case resume_WHILEM1:
4630                 goto resume_point_WHILEM1;
4631             case resume_WHILEM2:
4632                 goto resume_point_WHILEM2;
4633             case resume_WHILEM3:
4634                 goto resume_point_WHILEM3;
4635             case resume_WHILEM4:
4636                 goto resume_point_WHILEM4;
4637             case resume_WHILEM5:
4638                 goto resume_point_WHILEM5;
4639             case resume_WHILEM6:
4640                 goto resume_point_WHILEM6;
4641             }
4642             Perl_croak(aTHX_ "unexpected whilem resume state");
4643
4644         case EVAL_AB:
4645         case IFMATCH_A:
4646         case CURLYM_A:
4647             state_num = st->resume_state;
4648             goto reenter_switch;
4649
4650         case CURLYM_B:
4651         case BRANCH_next:
4652         case TRIE_next:
4653         case CURLY_B_max:
4654         default:
4655             Perl_croak(aTHX_ "unexpected yes resume state");
4656         }
4657     }
4658
4659     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
4660                           PL_colors[4], PL_colors[5]));
4661
4662     result = 1;
4663     goto final_exit;
4664
4665 no:
4666     DEBUG_EXECUTE_r(
4667         PerlIO_printf(Perl_debug_log,
4668             "%*s  %sfailed...%s\n",
4669             REPORT_CODE_OFF+depth*2, "",
4670             PL_colors[4], PL_colors[5])
4671         );
4672
4673 no_silent:
4674     result = 0;
4675
4676     if (depth) {
4677         /* there's a previous state to backtrack to */
4678         st--;
4679         if (st < SLAB_FIRST(PL_regmatch_slab)) {
4680             PL_regmatch_slab = PL_regmatch_slab->prev;
4681             st = SLAB_LAST(PL_regmatch_slab);
4682         }
4683         PL_regmatch_state = st;
4684         scan    = st->scan;
4685         next    = st->next;
4686         n       = st->n;
4687         locinput= st->locinput;
4688         nextchr = UCHARAT(locinput);
4689
4690         DEBUG_STATE_pp("pop");
4691         depth--;
4692         if (yes_state == st)
4693             yes_state = st->u.yes.prev_yes_state;
4694
4695         switch (st->resume_state) {
4696         case resume_CURLYX:
4697             goto resume_point_CURLYX;
4698         case resume_WHILEM1:
4699             goto resume_point_WHILEM1;
4700         case resume_WHILEM2:
4701             goto resume_point_WHILEM2;
4702         case resume_WHILEM3:
4703             goto resume_point_WHILEM3;
4704         case resume_WHILEM4:
4705             goto resume_point_WHILEM4;
4706         case resume_WHILEM5:
4707             goto resume_point_WHILEM5;
4708         case resume_WHILEM6:
4709             goto resume_point_WHILEM6;
4710
4711         case TRIE_next:
4712         case EVAL_AB:
4713         case BRANCH_next:
4714         case CURLYM_A:
4715         case CURLYM_B:
4716         case IFMATCH_A:
4717         case CURLY_B_max:
4718         case CURLY_B_min:
4719         case CURLY_B_min_known:
4720             state_num = st->resume_state + 1; /* failure = success + 1 */
4721             goto reenter_switch;
4722
4723         default:
4724             Perl_croak(aTHX_ "regexp resume memory corruption");
4725         }
4726     }
4727
4728   final_exit:
4729
4730     /* restore original high-water mark */
4731     PL_regmatch_slab  = orig_slab;
4732     PL_regmatch_state = orig_state;
4733
4734     /* free all slabs above current one */
4735     if (orig_slab->next) {
4736         regmatch_slab *sl = orig_slab->next;
4737         orig_slab->next = NULL;
4738         while (sl) {
4739             regmatch_slab * const osl = sl;
4740             sl = sl->next;
4741             Safefree(osl);
4742         }
4743     }
4744
4745     return result;
4746 }
4747
4748 /*
4749  - regrepeat - repeatedly match something simple, report how many
4750  */
4751 /*
4752  * [This routine now assumes that it will only match on things of length 1.
4753  * That was true before, but now we assume scan - reginput is the count,
4754  * rather than incrementing count on every character.  [Er, except utf8.]]
4755  */
4756 STATIC I32
4757 S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max)
4758 {
4759     dVAR;
4760     register char *scan;
4761     register I32 c;
4762     register char *loceol = PL_regeol;
4763     register I32 hardcount = 0;
4764     register bool do_utf8 = PL_reg_match_utf8;
4765
4766     scan = PL_reginput;
4767     if (max == REG_INFTY)
4768         max = I32_MAX;
4769     else if (max < loceol - scan)
4770         loceol = scan + max;
4771     switch (OP(p)) {
4772     case REG_ANY:
4773         if (do_utf8) {
4774             loceol = PL_regeol;
4775             while (scan < loceol && hardcount < max && *scan != '\n') {
4776                 scan += UTF8SKIP(scan);
4777                 hardcount++;
4778             }
4779         } else {
4780             while (scan < loceol && *scan != '\n')
4781                 scan++;
4782         }
4783         break;
4784     case SANY:
4785         if (do_utf8) {
4786             loceol = PL_regeol;
4787             while (scan < loceol && hardcount < max) {
4788                 scan += UTF8SKIP(scan);
4789                 hardcount++;
4790             }
4791         }
4792         else
4793             scan = loceol;
4794         break;
4795     case CANY:
4796         scan = loceol;
4797         break;
4798     case EXACT:         /* length of string is 1 */
4799         c = (U8)*STRING(p);
4800         while (scan < loceol && UCHARAT(scan) == c)
4801             scan++;
4802         break;
4803     case EXACTF:        /* length of string is 1 */
4804         c = (U8)*STRING(p);
4805         while (scan < loceol &&
4806                (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
4807             scan++;
4808         break;
4809     case EXACTFL:       /* length of string is 1 */
4810         PL_reg_flags |= RF_tainted;
4811         c = (U8)*STRING(p);
4812         while (scan < loceol &&
4813                (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
4814             scan++;
4815         break;
4816     case ANYOF:
4817         if (do_utf8) {
4818             loceol = PL_regeol;
4819             while (hardcount < max && scan < loceol &&
4820                    reginclass(prog, p, (U8*)scan, 0, do_utf8)) {
4821                 scan += UTF8SKIP(scan);
4822                 hardcount++;
4823             }
4824         } else {
4825             while (scan < loceol && REGINCLASS(prog, p, (U8*)scan))
4826                 scan++;
4827         }
4828         break;
4829     case ALNUM:
4830         if (do_utf8) {
4831             loceol = PL_regeol;
4832             LOAD_UTF8_CHARCLASS_ALNUM();
4833             while (hardcount < max && scan < loceol &&
4834                    swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
4835                 scan += UTF8SKIP(scan);
4836                 hardcount++;
4837             }
4838         } else {
4839             while (scan < loceol && isALNUM(*scan))
4840                 scan++;
4841         }
4842         break;
4843     case ALNUML:
4844         PL_reg_flags |= RF_tainted;
4845         if (do_utf8) {
4846             loceol = PL_regeol;
4847             while (hardcount < max && scan < loceol &&
4848                    isALNUM_LC_utf8((U8*)scan)) {
4849                 scan += UTF8SKIP(scan);
4850                 hardcount++;
4851             }
4852         } else {
4853             while (scan < loceol && isALNUM_LC(*scan))
4854                 scan++;
4855         }
4856         break;
4857     case NALNUM:
4858         if (do_utf8) {
4859             loceol = PL_regeol;
4860             LOAD_UTF8_CHARCLASS_ALNUM();
4861             while (hardcount < max && scan < loceol &&
4862                    !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
4863                 scan += UTF8SKIP(scan);
4864                 hardcount++;
4865             }
4866         } else {
4867             while (scan < loceol && !isALNUM(*scan))
4868                 scan++;
4869         }
4870         break;
4871     case NALNUML:
4872         PL_reg_flags |= RF_tainted;
4873         if (do_utf8) {
4874             loceol = PL_regeol;
4875             while (hardcount < max && scan < loceol &&
4876                    !isALNUM_LC_utf8((U8*)scan)) {
4877                 scan += UTF8SKIP(scan);
4878                 hardcount++;
4879             }
4880         } else {
4881             while (scan < loceol && !isALNUM_LC(*scan))
4882                 scan++;
4883         }
4884         break;
4885     case SPACE:
4886         if (do_utf8) {
4887             loceol = PL_regeol;
4888             LOAD_UTF8_CHARCLASS_SPACE();
4889             while (hardcount < max && scan < loceol &&
4890                    (*scan == ' ' ||
4891                     swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
4892                 scan += UTF8SKIP(scan);
4893                 hardcount++;
4894             }
4895         } else {
4896             while (scan < loceol && isSPACE(*scan))
4897                 scan++;
4898         }
4899         break;
4900     case SPACEL:
4901         PL_reg_flags |= RF_tainted;
4902         if (do_utf8) {
4903             loceol = PL_regeol;
4904             while (hardcount < max && scan < loceol &&
4905                    (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4906                 scan += UTF8SKIP(scan);
4907                 hardcount++;
4908             }
4909         } else {
4910             while (scan < loceol && isSPACE_LC(*scan))
4911                 scan++;
4912         }
4913         break;
4914     case NSPACE:
4915         if (do_utf8) {
4916             loceol = PL_regeol;
4917             LOAD_UTF8_CHARCLASS_SPACE();
4918             while (hardcount < max && scan < loceol &&
4919                    !(*scan == ' ' ||
4920                      swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
4921                 scan += UTF8SKIP(scan);
4922                 hardcount++;
4923             }
4924         } else {
4925             while (scan < loceol && !isSPACE(*scan))
4926                 scan++;
4927             break;
4928         }
4929     case NSPACEL:
4930         PL_reg_flags |= RF_tainted;
4931         if (do_utf8) {
4932             loceol = PL_regeol;
4933             while (hardcount < max && scan < loceol &&
4934                    !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4935                 scan += UTF8SKIP(scan);
4936                 hardcount++;
4937             }
4938         } else {
4939             while (scan < loceol && !isSPACE_LC(*scan))
4940                 scan++;
4941         }
4942         break;
4943     case DIGIT:
4944         if (do_utf8) {
4945             loceol = PL_regeol;
4946             LOAD_UTF8_CHARCLASS_DIGIT();
4947             while (hardcount < max && scan < loceol &&
4948                    swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
4949                 scan += UTF8SKIP(scan);
4950                 hardcount++;
4951             }
4952         } else {
4953             while (scan < loceol && isDIGIT(*scan))
4954                 scan++;
4955         }
4956         break;
4957     case NDIGIT:
4958         if (do_utf8) {
4959             loceol = PL_regeol;
4960             LOAD_UTF8_CHARCLASS_DIGIT();
4961             while (hardcount < max && scan < loceol &&
4962                    !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
4963                 scan += UTF8SKIP(scan);
4964                 hardcount++;
4965             }
4966         } else {
4967             while (scan < loceol && !isDIGIT(*scan))
4968                 scan++;
4969         }
4970         break;
4971     default:            /* Called on something of 0 width. */
4972         break;          /* So match right here or not at all. */
4973     }
4974
4975     if (hardcount)
4976         c = hardcount;
4977     else
4978         c = scan - PL_reginput;
4979     PL_reginput = scan;
4980
4981     DEBUG_r({
4982         GET_RE_DEBUG_FLAGS_DECL;
4983         DEBUG_EXECUTE_r({
4984             SV * const prop = sv_newmortal();
4985             regprop(prog, prop, p);
4986             PerlIO_printf(Perl_debug_log,
4987                         "%*s  %s can match %"IVdf" times out of %"IVdf"...\n",
4988                         REPORT_CODE_OFF+1, "", SvPVX_const(prop),(IV)c,(IV)max);
4989         });
4990     });
4991
4992     return(c);
4993 }
4994
4995
4996 #if !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION)
4997 /*
4998 - regclass_swash - prepare the utf8 swash
4999 */
5000
5001 SV *
5002 Perl_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bool doinit, SV** listsvp, SV **altsvp)
5003 {
5004     dVAR;
5005     SV *sw  = NULL;
5006     SV *si  = NULL;
5007     SV *alt = NULL;
5008     const struct reg_data * const data = prog ? prog->data : NULL;
5009
5010     if (data && data->count) {
5011         const U32 n = ARG(node);
5012
5013         if (data->what[n] == 's') {
5014             SV * const rv = (SV*)data->data[n];
5015             AV * const av = (AV*)SvRV((SV*)rv);
5016             SV **const ary = AvARRAY(av);
5017             SV **a, **b;
5018
5019             /* See the end of regcomp.c:S_regclass() for
5020              * documentation of these array elements. */
5021
5022             si = *ary;
5023             a  = SvROK(ary[1]) ? &ary[1] : 0;
5024             b  = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : 0;
5025
5026             if (a)
5027                 sw = *a;
5028             else if (si && doinit) {
5029                 sw = swash_init("utf8", "", si, 1, 0);
5030                 (void)av_store(av, 1, sw);
5031             }
5032             if (b)
5033                 alt = *b;
5034         }
5035     }
5036
5037     if (listsvp)
5038         *listsvp = si;
5039     if (altsvp)
5040         *altsvp  = alt;
5041
5042     return sw;
5043 }
5044 #endif
5045
5046 /*
5047  - reginclass - determine if a character falls into a character class
5048
5049   The n is the ANYOF regnode, the p is the target string, lenp
5050   is pointer to the maximum length of how far to go in the p
5051   (if the lenp is zero, UTF8SKIP(p) is used),
5052   do_utf8 tells whether the target string is in UTF-8.
5053
5054  */
5055
5056 STATIC bool
5057 S_reginclass(pTHX_ const regexp *prog, register const regnode *n, register const U8* p, STRLEN* lenp, register bool do_utf8)
5058 {
5059     dVAR;
5060     const char flags = ANYOF_FLAGS(n);
5061     bool match = FALSE;
5062     UV c = *p;
5063     STRLEN len = 0;
5064     STRLEN plen;
5065
5066     if (do_utf8 && !UTF8_IS_INVARIANT(c)) {
5067         c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &len,
5068                 (UTF8_ALLOW_DEFAULT & UTF8_ALLOW_ANYUV) | UTF8_CHECK_ONLY);
5069                 /* see [perl #37836] for UTF8_ALLOW_ANYUV */
5070         if (len == (STRLEN)-1)
5071             Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
5072     }
5073
5074     plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
5075     if (do_utf8 || (flags & ANYOF_UNICODE)) {
5076         if (lenp)
5077             *lenp = 0;
5078         if (do_utf8 && !ANYOF_RUNTIME(n)) {
5079             if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
5080                 match = TRUE;
5081         }
5082         if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
5083             match = TRUE;
5084         if (!match) {
5085             AV *av;
5086             SV * const sw = regclass_swash(prog, n, TRUE, 0, (SV**)&av);
5087
5088             if (sw) {
5089                 if (swash_fetch(sw, p, do_utf8))
5090                     match = TRUE;
5091                 else if (flags & ANYOF_FOLD) {
5092                     if (!match && lenp && av) {
5093                         I32 i;
5094                         for (i = 0; i <= av_len(av); i++) {
5095                             SV* const sv = *av_fetch(av, i, FALSE);
5096                             STRLEN len;
5097                             const char * const s = SvPV_const(sv, len);
5098
5099                             if (len <= plen && memEQ(s, (char*)p, len)) {
5100                                 *lenp = len;
5101                                 match = TRUE;
5102                                 break;
5103                             }
5104                         }
5105                     }
5106                     if (!match) {
5107                         U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
5108                         STRLEN tmplen;
5109
5110                         to_utf8_fold(p, tmpbuf, &tmplen);
5111                         if (swash_fetch(sw, tmpbuf, do_utf8))
5112                             match = TRUE;
5113                     }
5114                 }
5115             }
5116         }
5117         if (match && lenp && *lenp == 0)
5118             *lenp = UNISKIP(NATIVE_TO_UNI(c));
5119     }
5120     if (!match && c < 256) {
5121         if (ANYOF_BITMAP_TEST(n, c))
5122             match = TRUE;
5123         else if (flags & ANYOF_FOLD) {
5124             U8 f;
5125
5126             if (flags & ANYOF_LOCALE) {
5127                 PL_reg_flags |= RF_tainted;
5128                 f = PL_fold_locale[c];
5129             }
5130             else
5131                 f = PL_fold[c];
5132             if (f != c && ANYOF_BITMAP_TEST(n, f))
5133                 match = TRUE;
5134         }
5135
5136         if (!match && (flags & ANYOF_CLASS)) {
5137             PL_reg_flags |= RF_tainted;
5138             if (
5139                 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM)   &&  isALNUM_LC(c))  ||
5140                 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM)  && !isALNUM_LC(c))  ||
5141                 (ANYOF_CLASS_TEST(n, ANYOF_SPACE)   &&  isSPACE_LC(c))  ||
5142                 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE)  && !isSPACE_LC(c))  ||
5143                 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT)   &&  isDIGIT_LC(c))  ||
5144                 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT)  && !isDIGIT_LC(c))  ||
5145                 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC)  &&  isALNUMC_LC(c)) ||
5146                 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
5147                 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA)   &&  isALPHA_LC(c))  ||
5148                 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA)  && !isALPHA_LC(c))  ||
5149                 (ANYOF_CLASS_TEST(n, ANYOF_ASCII)   &&  isASCII(c))     ||
5150                 (ANYOF_CLASS_TEST(n, ANYOF_NASCII)  && !isASCII(c))     ||
5151                 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL)   &&  isCNTRL_LC(c))  ||
5152                 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL)  && !isCNTRL_LC(c))  ||
5153                 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH)   &&  isGRAPH_LC(c))  ||
5154                 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH)  && !isGRAPH_LC(c))  ||
5155                 (ANYOF_CLASS_TEST(n, ANYOF_LOWER)   &&  isLOWER_LC(c))  ||
5156                 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER)  && !isLOWER_LC(c))  ||
5157                 (ANYOF_CLASS_TEST(n, ANYOF_PRINT)   &&  isPRINT_LC(c))  ||
5158                 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT)  && !isPRINT_LC(c))  ||
5159                 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT)   &&  isPUNCT_LC(c))  ||
5160                 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT)  && !isPUNCT_LC(c))  ||
5161                 (ANYOF_CLASS_TEST(n, ANYOF_UPPER)   &&  isUPPER_LC(c))  ||
5162                 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER)  && !isUPPER_LC(c))  ||
5163                 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT)  &&  isXDIGIT(c))    ||
5164                 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c))    ||
5165                 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC)  &&  isPSXSPC(c))    ||
5166                 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c))    ||
5167                 (ANYOF_CLASS_TEST(n, ANYOF_BLANK)   &&  isBLANK(c))     ||
5168                 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK)  && !isBLANK(c))
5169                 ) /* How's that for a conditional? */
5170             {
5171                 match = TRUE;
5172             }
5173         }
5174     }
5175
5176     return (flags & ANYOF_INVERT) ? !match : match;
5177 }
5178
5179 STATIC U8 *
5180 S_reghop3(U8 *s, I32 off, const U8* lim)
5181 {
5182     dVAR;
5183     if (off >= 0) {
5184         while (off-- && s < lim) {
5185             /* XXX could check well-formedness here */
5186             s += UTF8SKIP(s);
5187         }
5188     }
5189     else {
5190         while (off++ && s > lim) {
5191             s--;
5192             if (UTF8_IS_CONTINUED(*s)) {
5193                 while (s > lim && UTF8_IS_CONTINUATION(*s))
5194                     s--;
5195             }
5196             /* XXX could check well-formedness here */
5197         }
5198     }
5199     return s;
5200 }
5201
5202 #ifdef XXX_dmq
5203 /* there are a bunch of places where we use two reghop3's that should
5204    be replaced with this routine. but since thats not done yet
5205    we ifdef it out - dmq
5206 */
5207 STATIC U8 *
5208 S_reghop4(U8 *s, I32 off, const U8* llim, const U8* rlim)
5209 {
5210     dVAR;
5211     if (off >= 0) {
5212         while (off-- && s < rlim) {
5213             /* XXX could check well-formedness here */
5214             s += UTF8SKIP(s);
5215         }
5216     }
5217     else {
5218         while (off++ && s > llim) {
5219             s--;
5220             if (UTF8_IS_CONTINUED(*s)) {
5221                 while (s > llim && UTF8_IS_CONTINUATION(*s))
5222                     s--;
5223             }
5224             /* XXX could check well-formedness here */
5225         }
5226     }
5227     return s;
5228 }
5229 #endif
5230
5231 STATIC U8 *
5232 S_reghopmaybe3(U8* s, I32 off, const U8* lim)
5233 {
5234     dVAR;
5235     if (off >= 0) {
5236         while (off-- && s < lim) {
5237             /* XXX could check well-formedness here */
5238             s += UTF8SKIP(s);
5239         }
5240         if (off >= 0)
5241             return NULL;
5242     }
5243     else {
5244         while (off++ && s > lim) {
5245             s--;
5246             if (UTF8_IS_CONTINUED(*s)) {
5247                 while (s > lim && UTF8_IS_CONTINUATION(*s))
5248                     s--;
5249             }
5250             /* XXX could check well-formedness here */
5251         }
5252         if (off <= 0)
5253             return NULL;
5254     }
5255     return s;
5256 }
5257
5258 static void
5259 restore_pos(pTHX_ void *arg)
5260 {
5261     dVAR;
5262     regexp * const rex = (regexp *)arg;
5263     if (PL_reg_eval_set) {
5264         if (PL_reg_oldsaved) {
5265             rex->subbeg = PL_reg_oldsaved;
5266             rex->sublen = PL_reg_oldsavedlen;
5267 #ifdef PERL_OLD_COPY_ON_WRITE
5268             rex->saved_copy = PL_nrs;
5269 #endif
5270             RX_MATCH_COPIED_on(rex);
5271         }
5272         PL_reg_magic->mg_len = PL_reg_oldpos;
5273         PL_reg_eval_set = 0;
5274         PL_curpm = PL_reg_oldcurpm;
5275     }
5276 }
5277
5278 STATIC void
5279 S_to_utf8_substr(pTHX_ register regexp *prog)
5280 {
5281     if (prog->float_substr && !prog->float_utf8) {
5282         SV* const sv = newSVsv(prog->float_substr);
5283         prog->float_utf8 = sv;
5284         sv_utf8_upgrade(sv);
5285         if (SvTAIL(prog->float_substr))
5286             SvTAIL_on(sv);
5287         if (prog->float_substr == prog->check_substr)
5288             prog->check_utf8 = sv;
5289     }
5290     if (prog->anchored_substr && !prog->anchored_utf8) {
5291         SV* const sv = newSVsv(prog->anchored_substr);
5292         prog->anchored_utf8 = sv;
5293         sv_utf8_upgrade(sv);
5294         if (SvTAIL(prog->anchored_substr))
5295             SvTAIL_on(sv);
5296         if (prog->anchored_substr == prog->check_substr)
5297             prog->check_utf8 = sv;
5298     }
5299 }
5300
5301 STATIC void
5302 S_to_byte_substr(pTHX_ register regexp *prog)
5303 {
5304     dVAR;
5305     if (prog->float_utf8 && !prog->float_substr) {
5306         SV* sv = newSVsv(prog->float_utf8);
5307         prog->float_substr = sv;
5308         if (sv_utf8_downgrade(sv, TRUE)) {
5309             if (SvTAIL(prog->float_utf8))
5310                 SvTAIL_on(sv);
5311         } else {
5312             SvREFCNT_dec(sv);
5313             prog->float_substr = sv = &PL_sv_undef;
5314         }
5315         if (prog->float_utf8 == prog->check_utf8)
5316             prog->check_substr = sv;
5317     }
5318     if (prog->anchored_utf8 && !prog->anchored_substr) {
5319         SV* sv = newSVsv(prog->anchored_utf8);
5320         prog->anchored_substr = sv;
5321         if (sv_utf8_downgrade(sv, TRUE)) {
5322             if (SvTAIL(prog->anchored_utf8))
5323                 SvTAIL_on(sv);
5324         } else {
5325             SvREFCNT_dec(sv);
5326             prog->anchored_substr = sv = &PL_sv_undef;
5327         }
5328         if (prog->anchored_utf8 == prog->check_utf8)
5329             prog->check_substr = sv;
5330     }
5331 }
5332
5333 /*
5334  * Local variables:
5335  * c-indentation-style: bsd
5336  * c-basic-offset: 4
5337  * indent-tabs-mode: t
5338  * End:
5339  *
5340  * ex: set ts=8 sts=4 sw=4 noet:
5341  */