regexec.c

   1 /*    regexec.c
   2  */
   3
   4 /*
   5  * "One Ring to rule them all, One Ring to find them..."
   6  */
   7
   8 /* This file contains functions for executing a regular expression.  See
   9  * also regcomp.c which funnily enough, contains functions for compiling
  10  * a regular expression.
  11  *
  12  * This file is also copied at build time to ext/re/re_exec.c, where
  13  * it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
  14  * This causes the main functions to be compiled under new names and with
  15  * debugging support added, which makes "use re 'debug'" work.
  16  */
  17
  18 /* NOTE: this is derived from Henry Spencer's regexp code, and should not
  19  * confused with the original package (see point 3 below).  Thanks, Henry!
  20  */
  21
  22 /* Additional note: this code is very heavily munged from Henry's version
  23  * in places.  In some spots I've traded clarity for efficiency, so don't
  24  * blame Henry for some of the lack of readability.
  25  */
  26
  27 /* The names of the functions have been changed from regcomp and
  28  * regexec to  pregcomp and pregexec in order to avoid conflicts
  29  * with the POSIX routines of the same names.
  30 */
  31
  32 #ifdef PERL_EXT_RE_BUILD
  33 #include "re_top.h"
  34 #endif
  35
  36 /*
  37  * pregcomp and pregexec -- regsub and regerror are not used in perl
  38  *
  39  *      Copyright (c) 1986 by University of Toronto.
  40  *      Written by Henry Spencer.  Not derived from licensed software.
  41  *
  42  *      Permission is granted to anyone to use this software for any
  43  *      purpose on any computer system, and to redistribute it freely,
  44  *      subject to the following restrictions:
  45  *
  46  *      1. The author is not responsible for the consequences of use of
  47  *              this software, no matter how awful, even if they arise
  48  *              from defects in it.
  49  *
  50  *      2. The origin of this software must not be misrepresented, either
  51  *              by explicit claim or by omission.
  52  *
  53  *      3. Altered versions must be plainly marked as such, and must not
  54  *              be misrepresented as being the original software.
  55  *
  56  ****    Alterations to Henry's code are...
  57  ****
  58  ****    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
  59  ****    2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 by Larry Wall and others
  60  ****
  61  ****    You may distribute under the terms of either the GNU General Public
  62  ****    License or the Artistic License, as specified in the README file.
  63  *
  64  * Beware that some of this code is subtly aware of the way operator
  65  * precedence is structured in regular expressions.  Serious changes in
  66  * regular-expression syntax might require a total rethink.
  67  */
  68 #include "EXTERN.h"
  69 #define PERL_IN_REGEXEC_C
  70 #include "perl.h"
  71
  72 #ifdef PERL_IN_XSUB_RE
  73 #  include "re_comp.h"
  74 #else
  75 #  include "regcomp.h"
  76 #endif
  77
  78 #define RF_tainted      1               /* tainted information used? */
  79 #define RF_warned       2               /* warned about big count? */
  80
  81 #define RF_utf8         8               /* Pattern contains multibyte chars? */
  82
  83 #define UTF ((PL_reg_flags & RF_utf8) != 0)
  84
  85 #define RS_init         1               /* eval environment created */
  86 #define RS_set          2               /* replsv value is set */
  87
  88 #ifndef STATIC
  89 #define STATIC  static
  90 #endif
  91
  92 #define REGINCLASS(prog,p,c)  (ANYOF_FLAGS(p) ? reginclass(prog,p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
  93
  94 /*
  95  * Forwards.
  96  */
  97
  98 #define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
  99 #define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
 100
 101 #define HOPc(pos,off) \
 102         (char *)(PL_reg_match_utf8 \
 103             ? reghop3((U8*)pos, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr)) \
 104             : (U8*)(pos + off))
 105 #define HOPBACKc(pos, off) \
 106         (char*)(PL_reg_match_utf8\
 107             ? reghopmaybe3((U8*)pos, -off, (U8*)PL_bostr) \
 108             : (pos - off >= PL_bostr)           \
 109                 ? (U8*)pos - off                \
 110                 : NULL)
 111
 112 #define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)(pos), off, (U8*)(lim)) : (U8*)(pos + off))
 113 #define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
 114
 115 #define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
 116     if (!CAT2(PL_utf8_,class)) { bool ok; ENTER; save_re_context(); ok=CAT2(is_utf8_,class)((const U8*)str); assert(ok); LEAVE; } } STMT_END
 117 #define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS(alnum,"a")
 118 #define LOAD_UTF8_CHARCLASS_DIGIT() LOAD_UTF8_CHARCLASS(digit,"0")
 119 #define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
 120 #define LOAD_UTF8_CHARCLASS_MARK()  LOAD_UTF8_CHARCLASS(mark, "\xcd\x86")
 121
 122 /* TODO: Combine JUMPABLE and HAS_TEXT to cache OP(rn) */
 123
 124 /* for use after a quantifier and before an EXACT-like node -- japhy */
 125 #define JUMPABLE(rn) ( \
 126     OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
 127     OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
 128     OP(rn) == PLUS || OP(rn) == MINMOD || \
 129     (PL_regkind[OP(rn)] == CURLY && ARG1(rn) > 0) \
 130 )
 131
 132 #define HAS_TEXT(rn) ( \
 133     PL_regkind[OP(rn)] == EXACT || PL_regkind[OP(rn)] == REF \
 134 )
 135
 136 /*
 137   Search for mandatory following text node; for lookahead, the text must
 138   follow but for lookbehind (rn->flags != 0) we skip to the next step.
 139 */
 140 #define FIND_NEXT_IMPT(rn) STMT_START { \
 141     while (JUMPABLE(rn)) { \
 142         const OPCODE type = OP(rn); \
 143         if (type == SUSPEND || PL_regkind[type] == CURLY) \
 144             rn = NEXTOPER(NEXTOPER(rn)); \
 145         else if (type == PLUS) \
 146             rn = NEXTOPER(rn); \
 147         else if (type == IFMATCH) \
 148             rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
 149         else rn += NEXT_OFF(rn); \
 150     } \
 151 } STMT_END
 152
 153
 154 static void restore_pos(pTHX_ void *arg);
 155
 156 STATIC CHECKPOINT
 157 S_regcppush(pTHX_ I32 parenfloor)
 158 {
 159     dVAR;
 160     const int retval = PL_savestack_ix;
 161 #define REGCP_PAREN_ELEMS 4
 162     const int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
 163     int p;
 164     GET_RE_DEBUG_FLAGS_DECL;
 165
 166     if (paren_elems_to_push < 0)
 167         Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
 168
 169 #define REGCP_OTHER_ELEMS 8
 170     SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
 171
 172     for (p = PL_regsize; p > parenfloor; p--) {
 173 /* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
 174         SSPUSHINT(PL_regendp[p]);
 175         SSPUSHINT(PL_regstartp[p]);
 176         SSPUSHPTR(PL_reg_start_tmp[p]);
 177         SSPUSHINT(p);
 178         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 179           "     saving \\%"UVuf" %"IVdf"(%"IVdf")..%"IVdf"\n",
 180                       (UV)p, (IV)PL_regstartp[p],
 181                       (IV)(PL_reg_start_tmp[p] - PL_bostr),
 182                       (IV)PL_regendp[p]
 183         ));
 184     }
 185 /* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
 186     SSPUSHPTR(PL_regstartp);
 187     SSPUSHPTR(PL_regendp);
 188     SSPUSHINT(PL_regsize);
 189     SSPUSHINT(*PL_reglastparen);
 190     SSPUSHINT(*PL_reglastcloseparen);
 191     SSPUSHPTR(PL_reginput);
 192 #define REGCP_FRAME_ELEMS 2
 193 /* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
 194  * are needed for the regexp context stack bookkeeping. */
 195     SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
 196     SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
 197
 198     return retval;
 199 }
 200
 201 /* These are needed since we do not localize EVAL nodes: */
 202 #define REGCP_SET(cp)                                           \
 203     DEBUG_STATE_r(                                              \
 204             PerlIO_printf(Perl_debug_log,                       \
 205                 "  Setting an EVAL scope, savestack=%"IVdf"\n", \
 206                 (IV)PL_savestack_ix));                          \
 207     cp = PL_savestack_ix
 208
 209 #define REGCP_UNWIND(cp)                                        \
 210     DEBUG_STATE_r(                                              \
 211         if (cp != PL_savestack_ix)                              \
 212             PerlIO_printf(Perl_debug_log,                       \
 213                 "  Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
 214                 (IV)(cp), (IV)PL_savestack_ix));                \
 215     regcpblow(cp)
 216
 217 STATIC char *
 218 S_regcppop(pTHX_ const regexp *rex)
 219 {
 220     dVAR;
 221     U32 i;
 222     char *input;
 223
 224     GET_RE_DEBUG_FLAGS_DECL;
 225
 226     /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
 227     i = SSPOPINT;
 228     assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
 229     i = SSPOPINT; /* Parentheses elements to pop. */
 230     input = (char *) SSPOPPTR;
 231     *PL_reglastcloseparen = SSPOPINT;
 232     *PL_reglastparen = SSPOPINT;
 233     PL_regsize = SSPOPINT;
 234     PL_regendp=(I32 *) SSPOPPTR;
 235     PL_regstartp=(I32 *) SSPOPPTR;
 236
 237
 238     /* Now restore the parentheses context. */
 239     for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
 240          i > 0; i -= REGCP_PAREN_ELEMS) {
 241         I32 tmps;
 242         U32 paren = (U32)SSPOPINT;
 243         PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
 244         PL_regstartp[paren] = SSPOPINT;
 245         tmps = SSPOPINT;
 246         if (paren <= *PL_reglastparen)
 247             PL_regendp[paren] = tmps;
 248         DEBUG_EXECUTE_r(
 249             PerlIO_printf(Perl_debug_log,
 250                           "     restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
 251                           (UV)paren, (IV)PL_regstartp[paren],
 252                           (IV)(PL_reg_start_tmp[paren] - PL_bostr),
 253                           (IV)PL_regendp[paren],
 254                           (paren > *PL_reglastparen ? "(no)" : ""));
 255         );
 256     }
 257     DEBUG_EXECUTE_r(
 258         if (*PL_reglastparen + 1 <= rex->nparens) {
 259             PerlIO_printf(Perl_debug_log,
 260                           "     restoring \\%"IVdf"..\\%"IVdf" to undef\n",
 261                           (IV)(*PL_reglastparen + 1), (IV)rex->nparens);
 262         }
 263     );
 264 #if 1
 265     /* It would seem that the similar code in regtry()
 266      * already takes care of this, and in fact it is in
 267      * a better location to since this code can #if 0-ed out
 268      * but the code in regtry() is needed or otherwise tests
 269      * requiring null fields (pat.t#187 and split.t#{13,14}
 270      * (as of patchlevel 7877)  will fail.  Then again,
 271      * this code seems to be necessary or otherwise
 272      * building DynaLoader will fail:
 273      * "Error: '*' not in typemap in DynaLoader.xs, line 164"
 274      * --jhi */
 275     for (i = *PL_reglastparen + 1; i <= rex->nparens; i++) {
 276         if (i > PL_regsize)
 277             PL_regstartp[i] = -1;
 278         PL_regendp[i] = -1;
 279     }
 280 #endif
 281     return input;
 282 }
 283
 284 #define regcpblow(cp) LEAVE_SCOPE(cp)   /* Ignores regcppush()ed data. */
 285
 286 /*
 287  * pregexec and friends
 288  */
 289
 290 #ifndef PERL_IN_XSUB_RE
 291 /*
 292  - pregexec - match a regexp against a string
 293  */
 294 I32
 295 Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
 296          char *strbeg, I32 minend, SV *screamer, U32 nosave)
 297 /* strend: pointer to null at end of string */
 298 /* strbeg: real beginning of string */
 299 /* minend: end of match must be >=minend after stringarg. */
 300 /* nosave: For optimizations. */
 301 {
 302     return
 303         regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
 304                       nosave ? 0 : REXEC_COPY_STR);
 305 }
 306 #endif
 307
 308 /*
 309  * Need to implement the following flags for reg_anch:
 310  *
 311  * USE_INTUIT_NOML              - Useful to call re_intuit_start() first
 312  * USE_INTUIT_ML
 313  * INTUIT_AUTORITATIVE_NOML     - Can trust a positive answer
 314  * INTUIT_AUTORITATIVE_ML
 315  * INTUIT_ONCE_NOML             - Intuit can match in one location only.
 316  * INTUIT_ONCE_ML
 317  *
 318  * Another flag for this function: SECOND_TIME (so that float substrs
 319  * with giant delta may be not rechecked).
 320  */
 321
 322 /* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
 323
 324 /* If SCREAM, then SvPVX_const(sv) should be compatible with strpos and strend.
 325    Otherwise, only SvCUR(sv) is used to get strbeg. */
 326
 327 /* XXXX We assume that strpos is strbeg unless sv. */
 328
 329 /* XXXX Some places assume that there is a fixed substring.
 330         An update may be needed if optimizer marks as "INTUITable"
 331         RExen without fixed substrings.  Similarly, it is assumed that
 332         lengths of all the strings are no more than minlen, thus they
 333         cannot come from lookahead.
 334         (Or minlen should take into account lookahead.)
 335   NOTE: Some of this comment is not correct. minlen does now take account
 336   of lookahead/behind. Further research is required. -- demerphq
 337
 338 */
 339
 340 /* A failure to find a constant substring means that there is no need to make
 341    an expensive call to REx engine, thus we celebrate a failure.  Similarly,
 342    finding a substring too deep into the string means that less calls to
 343    regtry() should be needed.
 344
 345    REx compiler's optimizer found 4 possible hints:
 346         a) Anchored substring;
 347         b) Fixed substring;
 348         c) Whether we are anchored (beginning-of-line or \G);
 349         d) First node (of those at offset 0) which may distingush positions;
 350    We use a)b)d) and multiline-part of c), and try to find a position in the
 351    string which does not contradict any of them.
 352  */
 353
 354 /* Most of decisions we do here should have been done at compile time.
 355    The nodes of the REx which we used for the search should have been
 356    deleted from the finite automaton. */
 357
 358 char *
 359 Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
 360                      char *strend, U32 flags, re_scream_pos_data *data)
 361 {
 362     dVAR;
 363     register I32 start_shift = 0;
 364     /* Should be nonnegative! */
 365     register I32 end_shift   = 0;
 366     register char *s;
 367     register SV *check;
 368     char *strbeg;
 369     char *t;
 370     const bool do_utf8 = (sv && SvUTF8(sv)) ? 1 : 0; /* if no sv we have to assume bytes */
 371     I32 ml_anch;
 372     register char *other_last = NULL;   /* other substr checked before this */
 373     char *check_at = NULL;              /* check substr found at this pos */
 374     const I32 multiline = prog->extflags & RXf_PMf_MULTILINE;
 375     RXi_GET_DECL(prog,progi);
 376 #ifdef DEBUGGING
 377     const char * const i_strpos = strpos;
 378 #endif
 379
 380     GET_RE_DEBUG_FLAGS_DECL;
 381
 382     RX_MATCH_UTF8_set(prog,do_utf8);
 383
 384     if (prog->extflags & RXf_UTF8) {
 385         PL_reg_flags |= RF_utf8;
 386     }
 387     DEBUG_EXECUTE_r(
 388         debug_start_match(prog, do_utf8, strpos, strend,
 389             sv ? "Guessing start of match in sv for"
 390                : "Guessing start of match in string for");
 391               );
 392
 393     /* CHR_DIST() would be more correct here but it makes things slow. */
 394     if (prog->minlen > strend - strpos) {
 395         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 396                               "String too short... [re_intuit_start]\n"));
 397         goto fail;
 398     }
 399
 400     strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
 401     PL_regeol = strend;
 402     if (do_utf8) {
 403         if (!prog->check_utf8 && prog->check_substr)
 404             to_utf8_substr(prog);
 405         check = prog->check_utf8;
 406     } else {
 407         if (!prog->check_substr && prog->check_utf8)
 408             to_byte_substr(prog);
 409         check = prog->check_substr;
 410     }
 411     if (check == &PL_sv_undef) {
 412         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 413                 "Non-utf8 string cannot match utf8 check string\n"));
 414         goto fail;
 415     }
 416     if (prog->extflags & RXf_ANCH) {    /* Match at beg-of-str or after \n */
 417         ml_anch = !( (prog->extflags & RXf_ANCH_SINGLE)
 418                      || ( (prog->extflags & RXf_ANCH_BOL)
 419                           && !multiline ) );    /* Check after \n? */
 420
 421         if (!ml_anch) {
 422           if ( !(prog->extflags & RXf_ANCH_GPOS) /* Checked by the caller */
 423                 && !(prog->intflags & PREGf_IMPLICIT) /* not a real BOL */
 424                /* SvCUR is not set on references: SvRV and SvPVX_const overlap */
 425                && sv && !SvROK(sv)
 426                && (strpos != strbeg)) {
 427               DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
 428               goto fail;
 429           }
 430           if (prog->check_offset_min == prog->check_offset_max &&
 431               !(prog->extflags & RXf_CANY_SEEN)) {
 432             /* Substring at constant offset from beg-of-str... */
 433             I32 slen;
 434
 435             s = HOP3c(strpos, prog->check_offset_min, strend);
 436
 437             if (SvTAIL(check)) {
 438                 slen = SvCUR(check);    /* >= 1 */
 439
 440                 if ( strend - s > slen || strend - s < slen - 1
 441                      || (strend - s == slen && strend[-1] != '\n')) {
 442                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
 443                     goto fail_finish;
 444                 }
 445                 /* Now should match s[0..slen-2] */
 446                 slen--;
 447                 if (slen && (*SvPVX_const(check) != *s
 448                              || (slen > 1
 449                                  && memNE(SvPVX_const(check), s, slen)))) {
 450                   report_neq:
 451                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
 452                     goto fail_finish;
 453                 }
 454             }
 455             else if (*SvPVX_const(check) != *s
 456                      || ((slen = SvCUR(check)) > 1
 457                          && memNE(SvPVX_const(check), s, slen)))
 458                 goto report_neq;
 459             check_at = s;
 460             goto success_at_start;
 461           }
 462         }
 463         /* Match is anchored, but substr is not anchored wrt beg-of-str. */
 464         s = strpos;
 465         start_shift = prog->check_offset_min; /* okay to underestimate on CC */
 466         end_shift = prog->check_end_shift;
 467
 468         if (!ml_anch) {
 469             const I32 end = prog->check_offset_max + CHR_SVLEN(check)
 470                                          - (SvTAIL(check) != 0);
 471             const I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
 472
 473             if (end_shift < eshift)
 474                 end_shift = eshift;
 475         }
 476     }
 477     else {                              /* Can match at random position */
 478         ml_anch = 0;
 479         s = strpos;
 480         start_shift = prog->check_offset_min;  /* okay to underestimate on CC */
 481         end_shift = prog->check_end_shift;
 482
 483         /* end shift should be non negative here */
 484     }
 485
 486 #ifdef DEBUGGING        /* 7/99: reports of failure (with the older version) */
 487     if (end_shift < 0)
 488         Perl_croak(aTHX_ "panic: end_shift: %"IVdf" pattern:\n%s\n ",
 489                    (IV)end_shift, prog->precomp);
 490 #endif
 491
 492   restart:
 493     /* Find a possible match in the region s..strend by looking for
 494        the "check" substring in the region corrected by start/end_shift. */
 495
 496     {
 497         I32 srch_start_shift = start_shift;
 498         I32 srch_end_shift = end_shift;
 499         if (srch_start_shift < 0 && strbeg - s > srch_start_shift) {
 500             srch_end_shift -= ((strbeg - s) - srch_start_shift);
 501             srch_start_shift = strbeg - s;
 502         }
 503     DEBUG_OPTIMISE_MORE_r({
 504         PerlIO_printf(Perl_debug_log, "Check offset min: %"IVdf" Start shift: %"IVdf" End shift %"IVdf" Real End Shift: %"IVdf"\n",
 505             (IV)prog->check_offset_min,
 506             (IV)srch_start_shift,
 507             (IV)srch_end_shift,
 508             (IV)prog->check_end_shift);
 509     });
 510
 511     if (flags & REXEC_SCREAM) {
 512         I32 p = -1;                     /* Internal iterator of scream. */
 513         I32 * const pp = data ? data->scream_pos : &p;
 514
 515         if (PL_screamfirst[BmRARE(check)] >= 0
 516             || ( BmRARE(check) == '\n'
 517                  && (BmPREVIOUS(check) == SvCUR(check) - 1)
 518                  && SvTAIL(check) ))
 519             s = screaminstr(sv, check,
 520                             srch_start_shift + (s - strbeg), srch_end_shift, pp, 0);
 521         else
 522             goto fail_finish;
 523         /* we may be pointing at the wrong string */
 524         if (s && RX_MATCH_COPIED(prog))
 525             s = strbeg + (s - SvPVX_const(sv));
 526         if (data)
 527             *data->scream_olds = s;
 528     }
 529     else {
 530         U8* start_point;
 531         U8* end_point;
 532         if (prog->extflags & RXf_CANY_SEEN) {
 533             start_point= (U8*)(s + srch_start_shift);
 534             end_point= (U8*)(strend - srch_end_shift);
 535         } else {
 536             start_point= HOP3(s, srch_start_shift, srch_start_shift < 0 ? strbeg : strend);
 537             end_point= HOP3(strend, -srch_end_shift, strbeg);
 538         }
 539         DEBUG_OPTIMISE_MORE_r({
 540             PerlIO_printf(Perl_debug_log, "fbm_instr len=%d str=<%.*s>\n",
 541                 (int)(end_point - start_point),
 542                 (int)(end_point - start_point) > 20 ? 20 : (int)(end_point - start_point),
 543                 start_point);
 544         });
 545
 546         s = fbm_instr( start_point, end_point,
 547                       check, multiline ? FBMrf_MULTILINE : 0);
 548     }
 549     }
 550     /* Update the count-of-usability, remove useless subpatterns,
 551         unshift s.  */
 552
 553     DEBUG_EXECUTE_r({
 554         RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
 555             SvPVX_const(check), RE_SV_DUMPLEN(check), 30);
 556         PerlIO_printf(Perl_debug_log, "%s %s substr %s%s%s",
 557                           (s ? "Found" : "Did not find"),
 558             (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)
 559                 ? "anchored" : "floating"),
 560             quoted,
 561             RE_SV_TAIL(check),
 562             (s ? " at offset " : "...\n") );
 563     });
 564
 565     if (!s)
 566         goto fail_finish;
 567     /* Finish the diagnostic message */
 568     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
 569
 570     /* XXX dmq: first branch is for positive lookbehind...
 571        Our check string is offset from the beginning of the pattern.
 572        So we need to do any stclass tests offset forward from that
 573        point. I think. :-(
 574      */
 575
 576
 577
 578     check_at=s;
 579
 580
 581     /* Got a candidate.  Check MBOL anchoring, and the *other* substr.
 582        Start with the other substr.
 583        XXXX no SCREAM optimization yet - and a very coarse implementation
 584        XXXX /ttx+/ results in anchored="ttx", floating="x".  floating will
 585                 *always* match.  Probably should be marked during compile...
 586        Probably it is right to do no SCREAM here...
 587      */
 588
 589     if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8)
 590                 : (prog->float_substr && prog->anchored_substr))
 591     {
 592         /* Take into account the "other" substring. */
 593         /* XXXX May be hopelessly wrong for UTF... */
 594         if (!other_last)
 595             other_last = strpos;
 596         if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
 597           do_other_anchored:
 598             {
 599                 char * const last = HOP3c(s, -start_shift, strbeg);
 600                 char *last1, *last2;
 601                 char * const saved_s = s;
 602                 SV* must;
 603
 604                 t = s - prog->check_offset_max;
 605                 if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
 606                     && (!do_utf8
 607                         || ((t = (char*)reghopmaybe3((U8*)s, -(prog->check_offset_max), (U8*)strpos))
 608                             && t > strpos)))
 609                     NOOP;
 610                 else
 611                     t = strpos;
 612                 t = HOP3c(t, prog->anchored_offset, strend);
 613                 if (t < other_last)     /* These positions already checked */
 614                     t = other_last;
 615                 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
 616                 if (last < last1)
 617                     last1 = last;
 618                 /* XXXX It is not documented what units *_offsets are in.
 619                    We assume bytes, but this is clearly wrong.
 620                    Meaning this code needs to be carefully reviewed for errors.
 621                    dmq.
 622                   */
 623
 624                 /* On end-of-str: see comment below. */
 625                 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
 626                 if (must == &PL_sv_undef) {
 627                     s = (char*)NULL;
 628                     DEBUG_r(must = prog->anchored_utf8);        /* for debug */
 629                 }
 630                 else
 631                     s = fbm_instr(
 632                         (unsigned char*)t,
 633                         HOP3(HOP3(last1, prog->anchored_offset, strend)
 634                                 + SvCUR(must), -(SvTAIL(must)!=0), strbeg),
 635                         must,
 636                         multiline ? FBMrf_MULTILINE : 0
 637                     );
 638                 DEBUG_EXECUTE_r({
 639                     RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
 640                         SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
 641                     PerlIO_printf(Perl_debug_log, "%s anchored substr %s%s",
 642                         (s ? "Found" : "Contradicts"),
 643                         quoted, RE_SV_TAIL(must));
 644                 });
 645
 646
 647                 if (!s) {
 648                     if (last1 >= last2) {
 649                         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 650                                                 ", giving up...\n"));
 651                         goto fail_finish;
 652                     }
 653                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 654                         ", trying floating at offset %ld...\n",
 655                         (long)(HOP3c(saved_s, 1, strend) - i_strpos)));
 656                     other_last = HOP3c(last1, prog->anchored_offset+1, strend);
 657                     s = HOP3c(last, 1, strend);
 658                     goto restart;
 659                 }
 660                 else {
 661                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
 662                           (long)(s - i_strpos)));
 663                     t = HOP3c(s, -prog->anchored_offset, strbeg);
 664                     other_last = HOP3c(s, 1, strend);
 665                     s = saved_s;
 666                     if (t == strpos)
 667                         goto try_at_start;
 668                     goto try_at_offset;
 669                 }
 670             }
 671         }
 672         else {          /* Take into account the floating substring. */
 673             char *last, *last1;
 674             char * const saved_s = s;
 675             SV* must;
 676
 677             t = HOP3c(s, -start_shift, strbeg);
 678             last1 = last =
 679                 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
 680             if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
 681                 last = HOP3c(t, prog->float_max_offset, strend);
 682             s = HOP3c(t, prog->float_min_offset, strend);
 683             if (s < other_last)
 684                 s = other_last;
 685  /* XXXX It is not documented what units *_offsets are in.  Assume bytes.  */
 686             must = do_utf8 ? prog->float_utf8 : prog->float_substr;
 687             /* fbm_instr() takes into account exact value of end-of-str
 688                if the check is SvTAIL(ed).  Since false positives are OK,
 689                and end-of-str is not later than strend we are OK. */
 690             if (must == &PL_sv_undef) {
 691                 s = (char*)NULL;
 692                 DEBUG_r(must = prog->float_utf8);       /* for debug message */
 693             }
 694             else
 695                 s = fbm_instr((unsigned char*)s,
 696                               (unsigned char*)last + SvCUR(must)
 697                                   - (SvTAIL(must)!=0),
 698                               must, multiline ? FBMrf_MULTILINE : 0);
 699             DEBUG_EXECUTE_r({
 700                 RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
 701                     SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
 702                 PerlIO_printf(Perl_debug_log, "%s floating substr %s%s",
 703                     (s ? "Found" : "Contradicts"),
 704                     quoted, RE_SV_TAIL(must));
 705             });
 706             if (!s) {
 707                 if (last1 == last) {
 708                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 709                                             ", giving up...\n"));
 710                     goto fail_finish;
 711                 }
 712                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 713                     ", trying anchored starting at offset %ld...\n",
 714                     (long)(saved_s + 1 - i_strpos)));
 715                 other_last = last;
 716                 s = HOP3c(t, 1, strend);
 717                 goto restart;
 718             }
 719             else {
 720                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
 721                       (long)(s - i_strpos)));
 722                 other_last = s; /* Fix this later. --Hugo */
 723                 s = saved_s;
 724                 if (t == strpos)
 725                     goto try_at_start;
 726                 goto try_at_offset;
 727             }
 728         }
 729     }
 730
 731
 732     t= (char*)HOP3( s, -prog->check_offset_max, (prog->check_offset_max<0) ? strend : strpos);
 733
 734     DEBUG_OPTIMISE_MORE_r(
 735         PerlIO_printf(Perl_debug_log,
 736             "Check offset min:%"IVdf" max:%"IVdf" S:%"IVdf" t:%"IVdf" D:%"IVdf" end:%"IVdf"\n",
 737             (IV)prog->check_offset_min,
 738             (IV)prog->check_offset_max,
 739             (IV)(s-strpos),
 740             (IV)(t-strpos),
 741             (IV)(t-s),
 742             (IV)(strend-strpos)
 743         )
 744     );
 745
 746     if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
 747         && (!do_utf8
 748             || ((t = (char*)reghopmaybe3((U8*)s, -prog->check_offset_max, (U8*) ((prog->check_offset_max<0) ? strend : strpos)))
 749                  && t > strpos)))
 750     {
 751         /* Fixed substring is found far enough so that the match
 752            cannot start at strpos. */
 753       try_at_offset:
 754         if (ml_anch && t[-1] != '\n') {
 755             /* Eventually fbm_*() should handle this, but often
 756                anchored_offset is not 0, so this check will not be wasted. */
 757             /* XXXX In the code below we prefer to look for "^" even in
 758                presence of anchored substrings.  And we search even
 759                beyond the found float position.  These pessimizations
 760                are historical artefacts only.  */
 761           find_anchor:
 762             while (t < strend - prog->minlen) {
 763                 if (*t == '\n') {
 764                     if (t < check_at - prog->check_offset_min) {
 765                         if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
 766                             /* Since we moved from the found position,
 767                                we definitely contradict the found anchored
 768                                substr.  Due to the above check we do not
 769                                contradict "check" substr.
 770                                Thus we can arrive here only if check substr
 771                                is float.  Redo checking for "other"=="fixed".
 772                              */
 773                             strpos = t + 1;
 774                             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
 775                                 PL_colors[0], PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
 776                             goto do_other_anchored;
 777                         }
 778                         /* We don't contradict the found floating substring. */
 779                         /* XXXX Why not check for STCLASS? */
 780                         s = t + 1;
 781                         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
 782                             PL_colors[0], PL_colors[1], (long)(s - i_strpos)));
 783                         goto set_useful;
 784                     }
 785                     /* Position contradicts check-string */
 786                     /* XXXX probably better to look for check-string
 787                        than for "\n", so one should lower the limit for t? */
 788                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
 789                         PL_colors[0], PL_colors[1], (long)(t + 1 - i_strpos)));
 790                     other_last = strpos = s = t + 1;
 791                     goto restart;
 792                 }
 793                 t++;
 794             }
 795             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
 796                         PL_colors[0], PL_colors[1]));
 797             goto fail_finish;
 798         }
 799         else {
 800             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
 801                         PL_colors[0], PL_colors[1]));
 802         }
 803         s = t;
 804       set_useful:
 805         ++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr);    /* hooray/5 */
 806     }
 807     else {
 808         /* The found string does not prohibit matching at strpos,
 809            - no optimization of calling REx engine can be performed,
 810            unless it was an MBOL and we are not after MBOL,
 811            or a future STCLASS check will fail this. */
 812       try_at_start:
 813         /* Even in this situation we may use MBOL flag if strpos is offset
 814            wrt the start of the string. */
 815         if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
 816             && (strpos != strbeg) && strpos[-1] != '\n'
 817             /* May be due to an implicit anchor of m{.*foo}  */
 818             && !(prog->intflags & PREGf_IMPLICIT))
 819         {
 820             t = strpos;
 821             goto find_anchor;
 822         }
 823         DEBUG_EXECUTE_r( if (ml_anch)
 824             PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
 825                           (long)(strpos - i_strpos), PL_colors[0], PL_colors[1]);
 826         );
 827       success_at_start:
 828         if (!(prog->intflags & PREGf_NAUGHTY)   /* XXXX If strpos moved? */
 829             && (do_utf8 ? (
 830                 prog->check_utf8                /* Could be deleted already */
 831                 && --BmUSEFUL(prog->check_utf8) < 0
 832                 && (prog->check_utf8 == prog->float_utf8)
 833             ) : (
 834                 prog->check_substr              /* Could be deleted already */
 835                 && --BmUSEFUL(prog->check_substr) < 0
 836                 && (prog->check_substr == prog->float_substr)
 837             )))
 838         {
 839             /* If flags & SOMETHING - do not do it many times on the same match */
 840             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
 841             SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
 842             if (do_utf8 ? prog->check_substr : prog->check_utf8)
 843                 SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
 844             prog->check_substr = prog->check_utf8 = NULL;       /* disable */
 845             prog->float_substr = prog->float_utf8 = NULL;       /* clear */
 846             check = NULL;                       /* abort */
 847             s = strpos;
 848             /* XXXX This is a remnant of the old implementation.  It
 849                     looks wasteful, since now INTUIT can use many
 850                     other heuristics. */
 851             prog->extflags &= ~RXf_USE_INTUIT;
 852         }
 853         else
 854             s = strpos;
 855     }
 856
 857     /* Last resort... */
 858     /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
 859     /* trie stclasses are too expensive to use here, we are better off to
 860        leave it to regmatch itself */
 861     if (progi->regstclass && PL_regkind[OP(progi->regstclass)]!=TRIE) {
 862         /* minlen == 0 is possible if regstclass is \b or \B,
 863            and the fixed substr is ''$.
 864            Since minlen is already taken into account, s+1 is before strend;
 865            accidentally, minlen >= 1 guaranties no false positives at s + 1
 866            even for \b or \B.  But (minlen? 1 : 0) below assumes that
 867            regstclass does not come from lookahead...  */
 868         /* If regstclass takes bytelength more than 1: If charlength==1, OK.
 869            This leaves EXACTF only, which is dealt with in find_byclass().  */
 870         const U8* const str = (U8*)STRING(progi->regstclass);
 871         const int cl_l = (PL_regkind[OP(progi->regstclass)] == EXACT
 872                     ? CHR_DIST(str+STR_LEN(progi->regstclass), str)
 873                     : 1);
 874         char * endpos;
 875         if (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
 876             endpos= HOP3c(s, (prog->minlen ? cl_l : 0), strend);
 877         else if (prog->float_substr || prog->float_utf8)
 878             endpos= HOP3c(HOP3c(check_at, -start_shift, strbeg), cl_l, strend);
 879         else
 880             endpos= strend;
 881
 882         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "start_shift: %"IVdf" check_at: %"IVdf" s: %"IVdf" endpos: %"IVdf"\n",
 883                                       (IV)start_shift, (IV)(check_at - strbeg), (IV)(s - strbeg), (IV)(endpos - strbeg)));
 884
 885         t = s;
 886         s = find_byclass(prog, progi->regstclass, s, endpos, NULL);
 887         if (!s) {
 888 #ifdef DEBUGGING
 889             const char *what = NULL;
 890 #endif
 891             if (endpos == strend) {
 892                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 893                                 "Could not match STCLASS...\n") );
 894                 goto fail;
 895             }
 896             DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 897                                    "This position contradicts STCLASS...\n") );
 898             if ((prog->extflags & RXf_ANCH) && !ml_anch)
 899                 goto fail;
 900             /* Contradict one of substrings */
 901             if (prog->anchored_substr || prog->anchored_utf8) {
 902                 if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
 903                     DEBUG_EXECUTE_r( what = "anchored" );
 904                   hop_and_restart:
 905                     s = HOP3c(t, 1, strend);
 906                     if (s + start_shift + end_shift > strend) {
 907                         /* XXXX Should be taken into account earlier? */
 908                         DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 909                                                "Could not match STCLASS...\n") );
 910                         goto fail;
 911                     }
 912                     if (!check)
 913                         goto giveup;
 914                     DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 915                                 "Looking for %s substr starting at offset %ld...\n",
 916                                  what, (long)(s + start_shift - i_strpos)) );
 917                     goto restart;
 918                 }
 919                 /* Have both, check_string is floating */
 920                 if (t + start_shift >= check_at) /* Contradicts floating=check */
 921                     goto retry_floating_check;
 922                 /* Recheck anchored substring, but not floating... */
 923                 s = check_at;
 924                 if (!check)
 925                     goto giveup;
 926                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 927                           "Looking for anchored substr starting at offset %ld...\n",
 928                           (long)(other_last - i_strpos)) );
 929                 goto do_other_anchored;
 930             }
 931             /* Another way we could have checked stclass at the
 932                current position only: */
 933             if (ml_anch) {
 934                 s = t = t + 1;
 935                 if (!check)
 936                     goto giveup;
 937                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 938                           "Looking for /%s^%s/m starting at offset %ld...\n",
 939                           PL_colors[0], PL_colors[1], (long)(t - i_strpos)) );
 940                 goto try_at_offset;
 941             }
 942             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))     /* Could have been deleted */
 943                 goto fail;
 944             /* Check is floating subtring. */
 945           retry_floating_check:
 946             t = check_at - start_shift;
 947             DEBUG_EXECUTE_r( what = "floating" );
 948             goto hop_and_restart;
 949         }
 950         if (t != s) {
 951             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 952                         "By STCLASS: moving %ld --> %ld\n",
 953                                   (long)(t - i_strpos), (long)(s - i_strpos))
 954                    );
 955         }
 956         else {
 957             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 958                                   "Does not contradict STCLASS...\n");
 959                    );
 960         }
 961     }
 962   giveup:
 963     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
 964                           PL_colors[4], (check ? "Guessed" : "Giving up"),
 965                           PL_colors[5], (long)(s - i_strpos)) );
 966     return s;
 967
 968   fail_finish:                          /* Substring not found */
 969     if (prog->check_substr || prog->check_utf8)         /* could be removed already */
 970         BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
 971   fail:
 972     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
 973                           PL_colors[4], PL_colors[5]));
 974     return NULL;
 975 }
 976
 977
 978
 979 #define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len,  \
 980 uvc, charid, foldlen, foldbuf, uniflags) STMT_START {                       \
 981     switch (trie_type) {                                                    \
 982     case trie_utf8_fold:                                                    \
 983         if ( foldlen>0 ) {                                                  \
 984             uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags );     \
 985             foldlen -= len;                                                 \
 986             uscan += len;                                                   \
 987             len=0;                                                          \
 988         } else {                                                            \
 989             uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );   \
 990             uvc = to_uni_fold( uvc, foldbuf, &foldlen );                    \
 991             foldlen -= UNISKIP( uvc );                                      \
 992             uscan = foldbuf + UNISKIP( uvc );                               \
 993         }                                                                   \
 994         break;                                                              \
 995     case trie_utf8:                                                         \
 996         uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );       \
 997         break;                                                              \
 998     case trie_plain:                                                        \
 999         uvc = (UV)*uc;                                                      \
1000         len = 1;                                                            \
1001     }                                                                       \
1002                                                                             \
1003     if (uvc < 256) {                                                        \
1004         charid = trie->charmap[ uvc ];                                      \
1005     }                                                                       \
1006     else {                                                                  \
1007         charid = 0;                                                         \
1008         if (widecharmap) {                                                  \
1009             SV** const svpp = hv_fetch(widecharmap,                         \
1010                         (char*)&uvc, sizeof(UV), 0);                        \
1011             if (svpp)                                                       \
1012                 charid = (U16)SvIV(*svpp);                                  \
1013         }                                                                   \
1014     }                                                                       \
1015 } STMT_END
1016
1017 #define REXEC_FBC_EXACTISH_CHECK(CoNd)                  \
1018     if ( (CoNd)                                        \
1019          && (ln == len ||                              \
1020              ibcmp_utf8(s, NULL, 0,  do_utf8,          \
1021                         m, NULL, ln, (bool)UTF))       \
1022          && (!reginfo || regtry(reginfo, &s)) )         \
1023         goto got_it;                                   \
1024     else {                                             \
1025          U8 foldbuf[UTF8_MAXBYTES_CASE+1];             \
1026          uvchr_to_utf8(tmpbuf, c);                     \
1027          f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);  \
1028          if ( f != c                                   \
1029               && (f == c1 || f == c2)                  \
1030               && (ln == foldlen ||                     \
1031                   !ibcmp_utf8((char *) foldbuf,        \
1032                               NULL, foldlen, do_utf8,  \
1033                               m,                       \
1034                               NULL, ln, (bool)UTF))    \
1035               && (!reginfo || regtry(reginfo, &s)) )    \
1036               goto got_it;                             \
1037     }                                                  \
1038     s += len
1039
1040 #define REXEC_FBC_EXACTISH_SCAN(CoNd)                     \
1041 STMT_START {                                              \
1042     while (s <= e) {                                      \
1043         if ( (CoNd)                                       \
1044              && (ln == 1 || !(OP(c) == EXACTF             \
1045                               ? ibcmp(s, m, ln)           \
1046                               : ibcmp_locale(s, m, ln)))  \
1047              && (!reginfo || regtry(reginfo, &s)) )        \
1048             goto got_it;                                  \
1049         s++;                                              \
1050     }                                                     \
1051 } STMT_END
1052
1053 #define REXEC_FBC_UTF8_SCAN(CoDe)                     \
1054 STMT_START {                                          \
1055     while (s + (uskip = UTF8SKIP(s)) <= strend) {     \
1056         CoDe                                          \
1057         s += uskip;                                   \
1058     }                                                 \
1059 } STMT_END
1060
1061 #define REXEC_FBC_SCAN(CoDe)                          \
1062 STMT_START {                                          \
1063     while (s < strend) {                              \
1064         CoDe                                          \
1065         s++;                                          \
1066     }                                                 \
1067 } STMT_END
1068
1069 #define REXEC_FBC_UTF8_CLASS_SCAN(CoNd)               \
1070 REXEC_FBC_UTF8_SCAN(                                  \
1071     if (CoNd) {                                       \
1072         if (tmp && (!reginfo || regtry(reginfo, &s)))  \
1073             goto got_it;                              \
1074         else                                          \
1075             tmp = doevery;                            \
1076     }                                                 \
1077     else                                              \
1078         tmp = 1;                                      \
1079 )
1080
1081 #define REXEC_FBC_CLASS_SCAN(CoNd)                    \
1082 REXEC_FBC_SCAN(                                       \
1083     if (CoNd) {                                       \
1084         if (tmp && (!reginfo || regtry(reginfo, &s)))  \
1085             goto got_it;                              \
1086         else                                          \
1087             tmp = doevery;                            \
1088     }                                                 \
1089     else                                              \
1090         tmp = 1;                                      \
1091 )
1092
1093 #define REXEC_FBC_TRYIT               \
1094 if ((!reginfo || regtry(reginfo, &s))) \
1095     goto got_it
1096
1097 #define REXEC_FBC_CSCAN_PRELOAD(UtFpReLoAd,CoNdUtF8,CoNd)      \
1098     if (do_utf8) {                                             \
1099         UtFpReLoAd;                                            \
1100         REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1101     }                                                          \
1102     else {                                                     \
1103         REXEC_FBC_CLASS_SCAN(CoNd);                            \
1104     }                                                          \
1105     break
1106
1107 #define REXEC_FBC_CSCAN_TAINT(CoNdUtF8,CoNd)                   \
1108     PL_reg_flags |= RF_tainted;                                \
1109     if (do_utf8) {                                             \
1110         REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1111     }                                                          \
1112     else {                                                     \
1113         REXEC_FBC_CLASS_SCAN(CoNd);                            \
1114     }                                                          \
1115     break
1116
1117 #define DUMP_EXEC_POS(li,s,doutf8) \
1118     dump_exec_pos(li,s,(PL_regeol),(PL_bostr),(PL_reg_starttry),doutf8)
1119
1120 /* We know what class REx starts with.  Try to find this position... */
1121 /* if reginfo is NULL, its a dryrun */
1122 /* annoyingly all the vars in this routine have different names from their counterparts
1123    in regmatch. /grrr */
1124
1125 STATIC char *
1126 S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
1127     const char *strend, regmatch_info *reginfo)
1128 {
1129         dVAR;
1130         const I32 doevery = (prog->intflags & PREGf_SKIP) == 0;
1131         char *m;
1132         STRLEN ln;
1133         STRLEN lnc;
1134         register STRLEN uskip;
1135         unsigned int c1;
1136         unsigned int c2;
1137         char *e;
1138         register I32 tmp = 1;   /* Scratch variable? */
1139         register const bool do_utf8 = PL_reg_match_utf8;
1140         RXi_GET_DECL(prog,progi);
1141
1142         /* We know what class it must start with. */
1143         switch (OP(c)) {
1144         case ANYOF:
1145             if (do_utf8) {
1146                  REXEC_FBC_UTF8_CLASS_SCAN((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
1147                           !UTF8_IS_INVARIANT((U8)s[0]) ?
1148                           reginclass(prog, c, (U8*)s, 0, do_utf8) :
1149                           REGINCLASS(prog, c, (U8*)s));
1150             }
1151             else {
1152                  while (s < strend) {
1153                       STRLEN skip = 1;
1154
1155                       if (REGINCLASS(prog, c, (U8*)s) ||
1156                           (ANYOF_FOLD_SHARP_S(c, s, strend) &&
1157                            /* The assignment of 2 is intentional:
1158                             * for the folded sharp s, the skip is 2. */
1159                            (skip = SHARP_S_SKIP))) {
1160                            if (tmp && (!reginfo || regtry(reginfo, &s)))
1161                                 goto got_it;
1162                            else
1163                                 tmp = doevery;
1164                       }
1165                       else
1166                            tmp = 1;
1167                       s += skip;
1168                  }
1169             }
1170             break;
1171         case CANY:
1172             REXEC_FBC_SCAN(
1173                 if (tmp && (!reginfo || regtry(reginfo, &s)))
1174                     goto got_it;
1175                 else
1176                     tmp = doevery;
1177             );
1178             break;
1179         case EXACTF:
1180             m   = STRING(c);
1181             ln  = STR_LEN(c);   /* length to match in octets/bytes */
1182             lnc = (I32) ln;     /* length to match in characters */
1183             if (UTF) {
1184                 STRLEN ulen1, ulen2;
1185                 U8 *sm = (U8 *) m;
1186                 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
1187                 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
1188                 const U32 uniflags = UTF8_ALLOW_DEFAULT;
1189
1190                 to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
1191                 to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
1192
1193                 c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXBYTES_CASE,
1194                                     0, uniflags);
1195                 c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXBYTES_CASE,
1196                                     0, uniflags);
1197                 lnc = 0;
1198                 while (sm < ((U8 *) m + ln)) {
1199                     lnc++;
1200                     sm += UTF8SKIP(sm);
1201                 }
1202             }
1203             else {
1204                 c1 = *(U8*)m;
1205                 c2 = PL_fold[c1];
1206             }
1207             goto do_exactf;
1208         case EXACTFL:
1209             m   = STRING(c);
1210             ln  = STR_LEN(c);
1211             lnc = (I32) ln;
1212             c1 = *(U8*)m;
1213             c2 = PL_fold_locale[c1];
1214           do_exactf:
1215             e = HOP3c(strend, -((I32)lnc), s);
1216
1217             if (!reginfo && e < s)
1218                 e = s;                  /* Due to minlen logic of intuit() */
1219
1220             /* The idea in the EXACTF* cases is to first find the
1221              * first character of the EXACTF* node and then, if
1222              * necessary, case-insensitively compare the full
1223              * text of the node.  The c1 and c2 are the first
1224              * characters (though in Unicode it gets a bit
1225              * more complicated because there are more cases
1226              * than just upper and lower: one needs to use
1227              * the so-called folding case for case-insensitive
1228              * matching (called "loose matching" in Unicode).
1229              * ibcmp_utf8() will do just that. */
1230
1231             if (do_utf8) {
1232                 UV c, f;
1233                 U8 tmpbuf [UTF8_MAXBYTES+1];
1234                 STRLEN len, foldlen;
1235                 const U32 uniflags = UTF8_ALLOW_DEFAULT;
1236                 if (c1 == c2) {
1237                     /* Upper and lower of 1st char are equal -
1238                      * probably not a "letter". */
1239                     while (s <= e) {
1240                         c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1241                                            uniflags);
1242                         REXEC_FBC_EXACTISH_CHECK(c == c1);
1243                     }
1244                 }
1245                 else {
1246                     while (s <= e) {
1247                       c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1248                                            uniflags);
1249
1250                         /* Handle some of the three Greek sigmas cases.
1251                          * Note that not all the possible combinations
1252                          * are handled here: some of them are handled
1253                          * by the standard folding rules, and some of
1254                          * them (the character class or ANYOF cases)
1255                          * are handled during compiletime in
1256                          * regexec.c:S_regclass(). */
1257                         if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1258                             c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1259                             c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
1260
1261                         REXEC_FBC_EXACTISH_CHECK(c == c1 || c == c2);
1262                     }
1263                 }
1264             }
1265             else {
1266                 if (c1 == c2)
1267                     REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1);
1268                 else
1269                     REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1 || *(U8*)s == c2);
1270             }
1271             break;
1272         case BOUNDL:
1273             PL_reg_flags |= RF_tainted;
1274             /* FALL THROUGH */
1275         case BOUND:
1276             if (do_utf8) {
1277                 if (s == PL_bostr)
1278                     tmp = '\n';
1279                 else {
1280                     U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1281                     tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
1282                 }
1283                 tmp = ((OP(c) == BOUND ?
1284                         isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1285                 LOAD_UTF8_CHARCLASS_ALNUM();
1286                 REXEC_FBC_UTF8_SCAN(
1287                     if (tmp == !(OP(c) == BOUND ?
1288                                  (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1289                                  isALNUM_LC_utf8((U8*)s)))
1290                     {
1291                         tmp = !tmp;
1292                         REXEC_FBC_TRYIT;
1293                 }
1294                 );
1295             }
1296             else {
1297                 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1298                 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1299                 REXEC_FBC_SCAN(
1300                     if (tmp ==
1301                         !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1302                         tmp = !tmp;
1303                         REXEC_FBC_TRYIT;
1304                 }
1305                 );
1306             }
1307             if ((!prog->minlen && tmp) && (!reginfo || regtry(reginfo, &s)))
1308                 goto got_it;
1309             break;
1310         case NBOUNDL:
1311             PL_reg_flags |= RF_tainted;
1312             /* FALL THROUGH */
1313         case NBOUND:
1314             if (do_utf8) {
1315                 if (s == PL_bostr)
1316                     tmp = '\n';
1317                 else {
1318                     U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1319                     tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
1320                 }
1321                 tmp = ((OP(c) == NBOUND ?
1322                         isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1323                 LOAD_UTF8_CHARCLASS_ALNUM();
1324                 REXEC_FBC_UTF8_SCAN(
1325                     if (tmp == !(OP(c) == NBOUND ?
1326                                  (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1327                                  isALNUM_LC_utf8((U8*)s)))
1328                         tmp = !tmp;
1329                     else REXEC_FBC_TRYIT;
1330                 );
1331             }
1332             else {
1333                 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1334                 tmp = ((OP(c) == NBOUND ?
1335                         isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1336                 REXEC_FBC_SCAN(
1337                     if (tmp ==
1338                         !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1339                         tmp = !tmp;
1340                     else REXEC_FBC_TRYIT;
1341                 );
1342             }
1343             if ((!prog->minlen && !tmp) && (!reginfo || regtry(reginfo, &s)))
1344                 goto got_it;
1345             break;
1346         case ALNUM:
1347             REXEC_FBC_CSCAN_PRELOAD(
1348                 LOAD_UTF8_CHARCLASS_ALNUM(),
1349                 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8),
1350                 isALNUM(*s)
1351             );
1352         case ALNUML:
1353             REXEC_FBC_CSCAN_TAINT(
1354                 isALNUM_LC_utf8((U8*)s),
1355                 isALNUM_LC(*s)
1356             );
1357         case NALNUM:
1358             REXEC_FBC_CSCAN_PRELOAD(
1359                 LOAD_UTF8_CHARCLASS_ALNUM(),
1360                 !swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8),
1361                 !isALNUM(*s)
1362             );
1363         case NALNUML:
1364             REXEC_FBC_CSCAN_TAINT(
1365                 !isALNUM_LC_utf8((U8*)s),
1366                 !isALNUM_LC(*s)
1367             );
1368         case SPACE:
1369             REXEC_FBC_CSCAN_PRELOAD(
1370                 LOAD_UTF8_CHARCLASS_SPACE(),
1371                 *s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8),
1372                 isSPACE(*s)
1373             );
1374         case SPACEL:
1375             REXEC_FBC_CSCAN_TAINT(
1376                 *s == ' ' || isSPACE_LC_utf8((U8*)s),
1377                 isSPACE_LC(*s)
1378             );
1379         case NSPACE:
1380             REXEC_FBC_CSCAN_PRELOAD(
1381                 LOAD_UTF8_CHARCLASS_SPACE(),
1382                 !(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)),
1383                 !isSPACE(*s)
1384             );
1385         case NSPACEL:
1386             REXEC_FBC_CSCAN_TAINT(
1387                 !(*s == ' ' || isSPACE_LC_utf8((U8*)s)),
1388                 !isSPACE_LC(*s)
1389             );
1390         case DIGIT:
1391             REXEC_FBC_CSCAN_PRELOAD(
1392                 LOAD_UTF8_CHARCLASS_DIGIT(),
1393                 swash_fetch(PL_utf8_digit,(U8*)s, do_utf8),
1394                 isDIGIT(*s)
1395             );
1396         case DIGITL:
1397             REXEC_FBC_CSCAN_TAINT(
1398                 isDIGIT_LC_utf8((U8*)s),
1399                 isDIGIT_LC(*s)
1400             );
1401         case NDIGIT:
1402             REXEC_FBC_CSCAN_PRELOAD(
1403                 LOAD_UTF8_CHARCLASS_DIGIT(),
1404                 !swash_fetch(PL_utf8_digit,(U8*)s, do_utf8),
1405                 !isDIGIT(*s)
1406             );
1407         case NDIGITL:
1408             REXEC_FBC_CSCAN_TAINT(
1409                 !isDIGIT_LC_utf8((U8*)s),
1410                 !isDIGIT_LC(*s)
1411             );
1412         case AHOCORASICKC:
1413         case AHOCORASICK:
1414             {
1415                 const enum { trie_plain, trie_utf8, trie_utf8_fold }
1416                     trie_type = do_utf8 ?
1417                           (c->flags == EXACT ? trie_utf8 : trie_utf8_fold)
1418                         : trie_plain;
1419                 /* what trie are we using right now */
1420                 reg_ac_data *aho
1421                     = (reg_ac_data*)progi->data->data[ ARG( c ) ];
1422                 reg_trie_data *trie
1423                     = (reg_trie_data*)progi->data->data[ aho->trie ];
1424                 HV *widecharmap = (HV*) progi->data->data[ aho->trie + 1 ];
1425
1426                 const char *last_start = strend - trie->minlen;
1427 #ifdef DEBUGGING
1428                 const char *real_start = s;
1429 #endif
1430                 STRLEN maxlen = trie->maxlen;
1431                 SV *sv_points;
1432                 U8 **points; /* map of where we were in the input string
1433                                 when reading a given char. For ASCII this
1434                                 is unnecessary overhead as the relationship
1435                                 is always 1:1, but for unicode, especially
1436                                 case folded unicode this is not true. */
1437                 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
1438                 U8 *bitmap=NULL;
1439
1440
1441                 GET_RE_DEBUG_FLAGS_DECL;
1442
1443                 /* We can't just allocate points here. We need to wrap it in
1444                  * an SV so it gets freed properly if there is a croak while
1445                  * running the match */
1446                 ENTER;
1447                 SAVETMPS;
1448                 sv_points=newSV(maxlen * sizeof(U8 *));
1449                 SvCUR_set(sv_points,
1450                     maxlen * sizeof(U8 *));
1451                 SvPOK_on(sv_points);
1452                 sv_2mortal(sv_points);
1453                 points=(U8**)SvPV_nolen(sv_points );
1454                 if ( trie_type != trie_utf8_fold
1455                      && (trie->bitmap || OP(c)==AHOCORASICKC) )
1456                 {
1457                     if (trie->bitmap)
1458                         bitmap=(U8*)trie->bitmap;
1459                     else
1460                         bitmap=(U8*)ANYOF_BITMAP(c);
1461                 }
1462                 /* this is the Aho-Corasick algorithm modified a touch
1463                    to include special handling for long "unknown char"
1464                    sequences. The basic idea being that we use AC as long
1465                    as we are dealing with a possible matching char, when
1466                    we encounter an unknown char (and we have not encountered
1467                    an accepting state) we scan forward until we find a legal
1468                    starting char.
1469                    AC matching is basically that of trie matching, except
1470                    that when we encounter a failing transition, we fall back
1471                    to the current states "fail state", and try the current char
1472                    again, a process we repeat until we reach the root state,
1473                    state 1, or a legal transition. If we fail on the root state
1474                    then we can either terminate if we have reached an accepting
1475                    state previously, or restart the entire process from the beginning
1476                    if we have not.
1477
1478                  */
1479                 while (s <= last_start) {
1480                     const U32 uniflags = UTF8_ALLOW_DEFAULT;
1481                     U8 *uc = (U8*)s;
1482                     U16 charid = 0;
1483                     U32 base = 1;
1484                     U32 state = 1;
1485                     UV uvc = 0;
1486                     STRLEN len = 0;
1487                     STRLEN foldlen = 0;
1488                     U8 *uscan = (U8*)NULL;
1489                     U8 *leftmost = NULL;
1490 #ifdef DEBUGGING
1491                     U32 accepted_word= 0;
1492 #endif
1493                     U32 pointpos = 0;
1494
1495                     while ( state && uc <= (U8*)strend ) {
1496                         int failed=0;
1497                         U32 word = aho->states[ state ].wordnum;
1498
1499                         if( state==1 ) {
1500                             if ( bitmap ) {
1501                                 DEBUG_TRIE_EXECUTE_r(
1502                                     if ( uc <= (U8*)last_start && !BITMAP_TEST(bitmap,*uc) ) {
1503                                         dump_exec_pos( (char *)uc, c, strend, real_start,
1504                                             (char *)uc, do_utf8 );
1505                                         PerlIO_printf( Perl_debug_log,
1506                                             " Scanning for legal start char...\n");
1507                                     }
1508                                 );
1509                                 while ( uc <= (U8*)last_start  && !BITMAP_TEST(bitmap,*uc) ) {
1510                                     uc++;
1511                                 }
1512                                 s= (char *)uc;
1513                             }
1514                             if (uc >(U8*)last_start) break;
1515                         }
1516
1517                         if ( word ) {
1518                             U8 *lpos= points[ (pointpos - trie->wordlen[word-1] ) % maxlen ];
1519                             if (!leftmost || lpos < leftmost) {
1520                                 DEBUG_r(accepted_word=word);
1521                                 leftmost= lpos;
1522                             }
1523                             if (base==0) break;
1524
1525                         }
1526                         points[pointpos++ % maxlen]= uc;
1527                         REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
1528                                              uscan, len, uvc, charid, foldlen,
1529                                              foldbuf, uniflags);
1530                         DEBUG_TRIE_EXECUTE_r({
1531                             dump_exec_pos( (char *)uc, c, strend, real_start,
1532                                 s,   do_utf8 );
1533                             PerlIO_printf(Perl_debug_log,
1534                                 " Charid:%3u CP:%4"UVxf" ",
1535                                  charid, uvc);
1536                         });
1537
1538                         do {
1539 #ifdef DEBUGGING
1540                             word = aho->states[ state ].wordnum;
1541 #endif
1542                             base = aho->states[ state ].trans.base;
1543
1544                             DEBUG_TRIE_EXECUTE_r({
1545                                 if (failed)
1546                                     dump_exec_pos( (char *)uc, c, strend, real_start,
1547                                         s,   do_utf8 );
1548                                 PerlIO_printf( Perl_debug_log,
1549                                     "%sState: %4"UVxf", word=%"UVxf,
1550                                     failed ? " Fail transition to " : "",
1551                                     (UV)state, (UV)word);
1552                             });
1553                             if ( base ) {
1554                                 U32 tmp;
1555                                 if (charid &&
1556                                      (base + charid > trie->uniquecharcount )
1557                                      && (base + charid - 1 - trie->uniquecharcount
1558                                             < trie->lasttrans)
1559                                      && trie->trans[base + charid - 1 -
1560                                             trie->uniquecharcount].check == state
1561                                      && (tmp=trie->trans[base + charid - 1 -
1562                                         trie->uniquecharcount ].next))
1563                                 {
1564                                     DEBUG_TRIE_EXECUTE_r(
1565                                         PerlIO_printf( Perl_debug_log," - legal\n"));
1566                                     state = tmp;
1567                                     break;
1568                                 }
1569                                 else {
1570                                     DEBUG_TRIE_EXECUTE_r(
1571                                         PerlIO_printf( Perl_debug_log," - fail\n"));
1572                                     failed = 1;
1573                                     state = aho->fail[state];
1574                                 }
1575                             }
1576                             else {
1577                                 /* we must be accepting here */
1578                                 DEBUG_TRIE_EXECUTE_r(
1579                                         PerlIO_printf( Perl_debug_log," - accepting\n"));
1580                                 failed = 1;
1581                                 break;
1582                             }
1583                         } while(state);
1584                         uc += len;
1585                         if (failed) {
1586                             if (leftmost)
1587                                 break;
1588                             if (!state) state = 1;
1589                         }
1590                     }
1591                     if ( aho->states[ state ].wordnum ) {
1592                         U8 *lpos = points[ (pointpos - trie->wordlen[aho->states[ state ].wordnum-1]) % maxlen ];
1593                         if (!leftmost || lpos < leftmost) {
1594                             DEBUG_r(accepted_word=aho->states[ state ].wordnum);
1595                             leftmost = lpos;
1596                         }
1597                     }
1598                     if (leftmost) {
1599                         s = (char*)leftmost;
1600                         DEBUG_TRIE_EXECUTE_r({
1601                             PerlIO_printf(
1602                                 Perl_debug_log,"Matches word #%"UVxf" at position %"IVdf". Trying full pattern...\n",
1603                                 (UV)accepted_word, (IV)(s - real_start)
1604                             );
1605                         });
1606                         if (!reginfo || regtry(reginfo, &s)) {
1607                             FREETMPS;
1608                             LEAVE;
1609                             goto got_it;
1610                         }
1611                         s = HOPc(s,1);
1612                         DEBUG_TRIE_EXECUTE_r({
1613                             PerlIO_printf( Perl_debug_log,"Pattern failed. Looking for new start point...\n");
1614                         });
1615                     } else {
1616                         DEBUG_TRIE_EXECUTE_r(
1617                             PerlIO_printf( Perl_debug_log,"No match.\n"));
1618                         break;
1619                     }
1620                 }
1621                 FREETMPS;
1622                 LEAVE;
1623             }
1624             break;
1625         default:
1626             Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1627             break;
1628         }
1629         return 0;
1630       got_it:
1631         return s;
1632 }
1633
1634 /*
1635  - regexec_flags - match a regexp against a string
1636  */
1637 I32
1638 Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1639               char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1640 /* strend: pointer to null at end of string */
1641 /* strbeg: real beginning of string */
1642 /* minend: end of match must be >=minend after stringarg. */
1643 /* data: May be used for some additional optimizations.
1644          Currently its only used, with a U32 cast, for transmitting
1645          the ganch offset when doing a /g match. This will change */
1646 /* nosave: For optimizations. */
1647 {
1648     dVAR;
1649     /*register*/ char *s;
1650     register regnode *c;
1651     /*register*/ char *startpos = stringarg;
1652     I32 minlen;         /* must match at least this many chars */
1653     I32 dontbother = 0; /* how many characters not to try at end */
1654     I32 end_shift = 0;                  /* Same for the end. */         /* CC */
1655     I32 scream_pos = -1;                /* Internal iterator of scream. */
1656     char *scream_olds = NULL;
1657     SV* const oreplsv = GvSV(PL_replgv);
1658     const bool do_utf8 = (bool)DO_UTF8(sv);
1659     I32 multiline;
1660     RXi_GET_DECL(prog,progi);
1661     regmatch_info reginfo;  /* create some info to pass to regtry etc */
1662
1663     GET_RE_DEBUG_FLAGS_DECL;
1664
1665     PERL_UNUSED_ARG(data);
1666
1667     /* Be paranoid... */
1668     if (prog == NULL || startpos == NULL) {
1669         Perl_croak(aTHX_ "NULL regexp parameter");
1670         return 0;
1671     }
1672
1673     multiline = prog->extflags & RXf_PMf_MULTILINE;
1674     reginfo.prog = prog;
1675
1676     RX_MATCH_UTF8_set(prog, do_utf8);
1677     DEBUG_EXECUTE_r(
1678         debug_start_match(prog, do_utf8, startpos, strend,
1679         "Matching");
1680     );
1681
1682     minlen = prog->minlen;
1683
1684     if (strend - startpos < (minlen+(prog->check_offset_min<0?prog->check_offset_min:0))) {
1685         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
1686                               "String too short [regexec_flags]...\n"));
1687         goto phooey;
1688     }
1689
1690
1691     /* Check validity of program. */
1692     if (UCHARAT(progi->program) != REG_MAGIC) {
1693         Perl_croak(aTHX_ "corrupted regexp program");
1694     }
1695
1696     PL_reg_flags = 0;
1697     PL_reg_eval_set = 0;
1698     PL_reg_maxiter = 0;
1699
1700     if (prog->extflags & RXf_UTF8)
1701         PL_reg_flags |= RF_utf8;
1702
1703     /* Mark beginning of line for ^ and lookbehind. */
1704     reginfo.bol = startpos; /* XXX not used ??? */
1705     PL_bostr  = strbeg;
1706     reginfo.sv = sv;
1707
1708     /* Mark end of line for $ (and such) */
1709     PL_regeol = strend;
1710
1711     /* see how far we have to get to not match where we matched before */
1712     reginfo.till = startpos+minend;
1713
1714     /* If there is a "must appear" string, look for it. */
1715     s = startpos;
1716
1717     if (prog->extflags & RXf_GPOS_SEEN) { /* Need to set reginfo->ganch */
1718         MAGIC *mg;
1719
1720         if (flags & REXEC_IGNOREPOS)    /* Means: check only at start */
1721             reginfo.ganch = startpos + prog->gofs;
1722         else if (sv && SvTYPE(sv) >= SVt_PVMG
1723                   && SvMAGIC(sv)
1724                   && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1725                   && mg->mg_len >= 0) {
1726             reginfo.ganch = strbeg + mg->mg_len;        /* Defined pos() */
1727             if (prog->extflags & RXf_ANCH_GPOS) {
1728                 if (s > reginfo.ganch)
1729                     goto phooey;
1730                 s = reginfo.ganch - prog->gofs;
1731             }
1732         }
1733         else if (data) {
1734             reginfo.ganch = strbeg + PTR2UV(data);
1735         } else                          /* pos() not defined */
1736             reginfo.ganch = strbeg;
1737     }
1738     if (PL_curpm && (PM_GETRE(PL_curpm) == prog)) {
1739         I32 *t;
1740         if (!progi->swap) {
1741         /* We have to be careful. If the previous successful match
1742            was from this regex we don't want a subsequent paritally
1743            successful match to clobber the old results.
1744            So when we detect this possibility we add a swap buffer
1745            to the re, and switch the buffer each match. If we fail
1746            we switch it back, otherwise we leave it swapped.
1747         */
1748             Newxz(progi->swap, 1, regexp_paren_ofs);
1749             /* no need to copy these */
1750             Newxz(progi->swap->startp, prog->nparens + 1, I32);
1751             Newxz(progi->swap->endp, prog->nparens + 1, I32);
1752         }
1753         t = progi->swap->startp;
1754         progi->swap->startp = prog->startp;
1755         prog->startp = t;
1756         t = progi->swap->endp;
1757         progi->swap->endp = prog->endp;
1758         prog->endp = t;
1759     }
1760     if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL || prog->check_utf8 != NULL)) {
1761         re_scream_pos_data d;
1762
1763         d.scream_olds = &scream_olds;
1764         d.scream_pos = &scream_pos;
1765         s = re_intuit_start(prog, sv, s, strend, flags, &d);
1766         if (!s) {
1767             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
1768             goto phooey;        /* not present */
1769         }
1770     }
1771
1772
1773
1774     /* Simplest case:  anchored match need be tried only once. */
1775     /*  [unless only anchor is BOL and multiline is set] */
1776     if (prog->extflags & (RXf_ANCH & ~RXf_ANCH_GPOS)) {
1777         if (s == startpos && regtry(&reginfo, &startpos))
1778             goto got_it;
1779         else if (multiline || (prog->intflags & PREGf_IMPLICIT)
1780                  || (prog->extflags & RXf_ANCH_MBOL)) /* XXXX SBOL? */
1781         {
1782             char *end;
1783
1784             if (minlen)
1785                 dontbother = minlen - 1;
1786             end = HOP3c(strend, -dontbother, strbeg) - 1;
1787             /* for multiline we only have to try after newlines */
1788             if (prog->check_substr || prog->check_utf8) {
1789                 if (s == startpos)
1790                     goto after_try;
1791                 while (1) {
1792                     if (regtry(&reginfo, &s))
1793                         goto got_it;
1794                   after_try:
1795                     if (s >= end)
1796                         goto phooey;
1797                     if (prog->extflags & RXf_USE_INTUIT) {
1798                         s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1799                         if (!s)
1800                             goto phooey;
1801                     }
1802                     else
1803                         s++;
1804                 }
1805             } else {
1806                 if (s > startpos)
1807                     s--;
1808                 while (s < end) {
1809                     if (*s++ == '\n') { /* don't need PL_utf8skip here */
1810                         if (regtry(&reginfo, &s))
1811                             goto got_it;
1812                     }
1813                 }
1814             }
1815         }
1816         goto phooey;
1817     } else if (RXf_GPOS_CHECK == (prog->extflags & RXf_GPOS_CHECK))
1818     {
1819         /* the warning about reginfo.ganch being used without intialization
1820            is bogus -- we set it above, when prog->extflags & RXf_GPOS_SEEN
1821            and we only enter this block when the same bit is set. */
1822         char *tmp_s = reginfo.ganch - prog->gofs;
1823         if (regtry(&reginfo, &tmp_s))
1824             goto got_it;
1825         goto phooey;
1826     }
1827
1828     /* Messy cases:  unanchored match. */
1829     if ((prog->anchored_substr || prog->anchored_utf8) && prog->intflags & PREGf_SKIP) {
1830         /* we have /x+whatever/ */
1831         /* it must be a one character string (XXXX Except UTF?) */
1832         char ch;
1833 #ifdef DEBUGGING
1834         int did_match = 0;
1835 #endif
1836         if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1837             do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1838         ch = SvPVX_const(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
1839
1840         if (do_utf8) {
1841             REXEC_FBC_SCAN(
1842                 if (*s == ch) {
1843                     DEBUG_EXECUTE_r( did_match = 1 );
1844                     if (regtry(&reginfo, &s)) goto got_it;
1845                     s += UTF8SKIP(s);
1846                     while (s < strend && *s == ch)
1847                         s += UTF8SKIP(s);
1848                 }
1849             );
1850         }
1851         else {
1852             REXEC_FBC_SCAN(
1853                 if (*s == ch) {
1854                     DEBUG_EXECUTE_r( did_match = 1 );
1855                     if (regtry(&reginfo, &s)) goto got_it;
1856                     s++;
1857                     while (s < strend && *s == ch)
1858                         s++;
1859                 }
1860             );
1861         }
1862         DEBUG_EXECUTE_r(if (!did_match)
1863                 PerlIO_printf(Perl_debug_log,
1864                                   "Did not find anchored character...\n")
1865                );
1866     }
1867     else if (prog->anchored_substr != NULL
1868               || prog->anchored_utf8 != NULL
1869               || ((prog->float_substr != NULL || prog->float_utf8 != NULL)
1870                   && prog->float_max_offset < strend - s)) {
1871         SV *must;
1872         I32 back_max;
1873         I32 back_min;
1874         char *last;
1875         char *last1;            /* Last position checked before */
1876 #ifdef DEBUGGING
1877         int did_match = 0;
1878 #endif
1879         if (prog->anchored_substr || prog->anchored_utf8) {
1880             if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1881                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1882             must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
1883             back_max = back_min = prog->anchored_offset;
1884         } else {
1885             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1886                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1887             must = do_utf8 ? prog->float_utf8 : prog->float_substr;
1888             back_max = prog->float_max_offset;
1889             back_min = prog->float_min_offset;
1890         }
1891
1892
1893         if (must == &PL_sv_undef)
1894             /* could not downgrade utf8 check substring, so must fail */
1895             goto phooey;
1896
1897         if (back_min<0) {
1898             last = strend;
1899         } else {
1900             last = HOP3c(strend,        /* Cannot start after this */
1901                   -(I32)(CHR_SVLEN(must)
1902                          - (SvTAIL(must) != 0) + back_min), strbeg);
1903         }
1904         if (s > PL_bostr)
1905             last1 = HOPc(s, -1);
1906         else
1907             last1 = s - 1;      /* bogus */
1908
1909         /* XXXX check_substr already used to find "s", can optimize if
1910            check_substr==must. */
1911         scream_pos = -1;
1912         dontbother = end_shift;
1913         strend = HOPc(strend, -dontbother);
1914         while ( (s <= last) &&
1915                 ((flags & REXEC_SCREAM)
1916                  ? (s = screaminstr(sv, must, HOP3c(s, back_min, (back_min<0 ? strbeg : strend)) - strbeg,
1917                                     end_shift, &scream_pos, 0))
1918                  : (s = fbm_instr((unsigned char*)HOP3(s, back_min, (back_min<0 ? strbeg : strend)),
1919                                   (unsigned char*)strend, must,
1920                                   multiline ? FBMrf_MULTILINE : 0))) ) {
1921             /* we may be pointing at the wrong string */
1922             if ((flags & REXEC_SCREAM) && RX_MATCH_COPIED(prog))
1923                 s = strbeg + (s - SvPVX_const(sv));
1924             DEBUG_EXECUTE_r( did_match = 1 );
1925             if (HOPc(s, -back_max) > last1) {
1926                 last1 = HOPc(s, -back_min);
1927                 s = HOPc(s, -back_max);
1928             }
1929             else {
1930                 char * const t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1931
1932                 last1 = HOPc(s, -back_min);
1933                 s = t;
1934             }
1935             if (do_utf8) {
1936                 while (s <= last1) {
1937                     if (regtry(&reginfo, &s))
1938                         goto got_it;
1939                     s += UTF8SKIP(s);
1940                 }
1941             }
1942             else {
1943                 while (s <= last1) {
1944                     if (regtry(&reginfo, &s))
1945                         goto got_it;
1946                     s++;
1947                 }
1948             }
1949         }
1950         DEBUG_EXECUTE_r(if (!did_match) {
1951             RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
1952                 SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
1953             PerlIO_printf(Perl_debug_log, "Did not find %s substr %s%s...\n",
1954                               ((must == prog->anchored_substr || must == prog->anchored_utf8)
1955                                ? "anchored" : "floating"),
1956                 quoted, RE_SV_TAIL(must));
1957         });
1958         goto phooey;
1959     }
1960     else if ( (c = progi->regstclass) ) {
1961         if (minlen) {
1962             const OPCODE op = OP(progi->regstclass);
1963             /* don't bother with what can't match */
1964             if (PL_regkind[op] != EXACT && op != CANY && PL_regkind[op] != TRIE)
1965                 strend = HOPc(strend, -(minlen - 1));
1966         }
1967         DEBUG_EXECUTE_r({
1968             SV * const prop = sv_newmortal();
1969             regprop(prog, prop, c);
1970             {
1971                 RE_PV_QUOTED_DECL(quoted,UTF,PERL_DEBUG_PAD_ZERO(1),
1972                     s,strend-s,60);
1973                 PerlIO_printf(Perl_debug_log,
1974                     "Matching stclass %.*s against %s (%d chars)\n",
1975                     (int)SvCUR(prop), SvPVX_const(prop),
1976                      quoted, (int)(strend - s));
1977             }
1978         });
1979         if (find_byclass(prog, c, s, strend, &reginfo))
1980             goto got_it;
1981         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass... [regexec_flags]\n"));
1982     }
1983     else {
1984         dontbother = 0;
1985         if (prog->float_substr != NULL || prog->float_utf8 != NULL) {
1986             /* Trim the end. */
1987             char *last;
1988             SV* float_real;
1989
1990             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1991                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1992             float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
1993
1994             if (flags & REXEC_SCREAM) {
1995                 last = screaminstr(sv, float_real, s - strbeg,
1996                                    end_shift, &scream_pos, 1); /* last one */
1997                 if (!last)
1998                     last = scream_olds; /* Only one occurrence. */
1999                 /* we may be pointing at the wrong string */
2000                 else if (RX_MATCH_COPIED(prog))
2001                     s = strbeg + (s - SvPVX_const(sv));
2002             }
2003             else {
2004                 STRLEN len;
2005                 const char * const little = SvPV_const(float_real, len);
2006
2007                 if (SvTAIL(float_real)) {
2008                     if (memEQ(strend - len + 1, little, len - 1))
2009                         last = strend - len + 1;
2010                     else if (!multiline)
2011                         last = memEQ(strend - len, little, len)
2012                             ? strend - len : NULL;
2013                     else
2014                         goto find_last;
2015                 } else {
2016                   find_last:
2017                     if (len)
2018                         last = rninstr(s, strend, little, little + len);
2019                     else
2020                         last = strend;  /* matching "$" */
2021                 }
2022             }
2023             if (last == NULL) {
2024                 DEBUG_EXECUTE_r(
2025                     PerlIO_printf(Perl_debug_log,
2026                         "%sCan't trim the tail, match fails (should not happen)%s\n",
2027                         PL_colors[4], PL_colors[5]));
2028                 goto phooey; /* Should not happen! */
2029             }
2030             dontbother = strend - last + prog->float_min_offset;
2031         }
2032         if (minlen && (dontbother < minlen))
2033             dontbother = minlen - 1;
2034         strend -= dontbother;              /* this one's always in bytes! */
2035         /* We don't know much -- general case. */
2036         if (do_utf8) {
2037             for (;;) {
2038                 if (regtry(&reginfo, &s))
2039                     goto got_it;
2040                 if (s >= strend)
2041                     break;
2042                 s += UTF8SKIP(s);
2043             };
2044         }
2045         else {
2046             do {
2047                 if (regtry(&reginfo, &s))
2048                     goto got_it;
2049             } while (s++ < strend);
2050         }
2051     }
2052
2053     /* Failure. */
2054     goto phooey;
2055
2056 got_it:
2057     RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
2058
2059     if (PL_reg_eval_set) {
2060         /* Preserve the current value of $^R */
2061         if (oreplsv != GvSV(PL_replgv))
2062             sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
2063                                                   restored, the value remains
2064                                                   the same. */
2065         restore_pos(aTHX_ prog);
2066     }
2067     if (prog->paren_names)
2068         (void)hv_iterinit(prog->paren_names);
2069
2070     /* make sure $`, $&, $', and $digit will work later */
2071     if ( !(flags & REXEC_NOT_FIRST) ) {
2072         RX_MATCH_COPY_FREE(prog);
2073         if (flags & REXEC_COPY_STR) {
2074             const I32 i = PL_regeol - startpos + (stringarg - strbeg);
2075 #ifdef PERL_OLD_COPY_ON_WRITE
2076             if ((SvIsCOW(sv)
2077                  || (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
2078                 if (DEBUG_C_TEST) {
2079                     PerlIO_printf(Perl_debug_log,
2080                                   "Copy on write: regexp capture, type %d\n",
2081                                   (int) SvTYPE(sv));
2082                 }
2083                 prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
2084                 prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
2085                 assert (SvPOKp(prog->saved_copy));
2086             } else
2087 #endif
2088             {
2089                 RX_MATCH_COPIED_on(prog);
2090                 s = savepvn(strbeg, i);
2091                 prog->subbeg = s;
2092             }
2093             prog->sublen = i;
2094         }
2095         else {
2096             prog->subbeg = strbeg;
2097             prog->sublen = PL_regeol - strbeg;  /* strend may have been modified */
2098         }
2099     }
2100
2101     return 1;
2102
2103 phooey:
2104     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
2105                           PL_colors[4], PL_colors[5]));
2106     if (PL_reg_eval_set)
2107         restore_pos(aTHX_ prog);
2108     if (progi->swap) {
2109         /* we failed :-( roll it back */
2110         I32 *t;
2111         t = progi->swap->startp;
2112         progi->swap->startp = prog->startp;
2113         prog->startp = t;
2114         t = progi->swap->endp;
2115         progi->swap->endp = prog->endp;
2116         prog->endp = t;
2117     }
2118     return 0;
2119 }
2120
2121
2122 /*
2123  - regtry - try match at specific point
2124  */
2125 STATIC I32                      /* 0 failure, 1 success */
2126 S_regtry(pTHX_ regmatch_info *reginfo, char **startpos)
2127 {
2128     dVAR;
2129     register I32 *sp;
2130     register I32 *ep;
2131     CHECKPOINT lastcp;
2132     regexp *prog = reginfo->prog;
2133     RXi_GET_DECL(prog,progi);
2134     GET_RE_DEBUG_FLAGS_DECL;
2135     reginfo->cutpoint=NULL;
2136
2137     if ((prog->extflags & RXf_EVAL_SEEN) && !PL_reg_eval_set) {
2138         MAGIC *mg;
2139
2140         PL_reg_eval_set = RS_init;
2141         DEBUG_EXECUTE_r(DEBUG_s(
2142             PerlIO_printf(Perl_debug_log, "  setting stack tmpbase at %"IVdf"\n",
2143                           (IV)(PL_stack_sp - PL_stack_base));
2144             ));
2145         SAVESTACK_CXPOS();
2146         cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2147         /* Otherwise OP_NEXTSTATE will free whatever on stack now.  */
2148         SAVETMPS;
2149         /* Apparently this is not needed, judging by wantarray. */
2150         /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
2151            cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2152
2153         if (reginfo->sv) {
2154             /* Make $_ available to executed code. */
2155             if (reginfo->sv != DEFSV) {
2156                 SAVE_DEFSV;
2157                 DEFSV = reginfo->sv;
2158             }
2159
2160             if (!(SvTYPE(reginfo->sv) >= SVt_PVMG && SvMAGIC(reginfo->sv)
2161                   && (mg = mg_find(reginfo->sv, PERL_MAGIC_regex_global)))) {
2162                 /* prepare for quick setting of pos */
2163 #ifdef PERL_OLD_COPY_ON_WRITE
2164                 if (SvIsCOW(reginfo->sv))
2165                     sv_force_normal_flags(reginfo->sv, 0);
2166 #endif
2167                 mg = sv_magicext(reginfo->sv, NULL, PERL_MAGIC_regex_global,
2168                                  &PL_vtbl_mglob, NULL, 0);
2169                 mg->mg_len = -1;
2170             }
2171             PL_reg_magic    = mg;
2172             PL_reg_oldpos   = mg->mg_len;
2173             SAVEDESTRUCTOR_X(restore_pos, prog);
2174         }
2175         if (!PL_reg_curpm) {
2176             Newxz(PL_reg_curpm, 1, PMOP);
2177 #ifdef USE_ITHREADS
2178             {
2179                 SV* const repointer = newSViv(0);
2180                 /* so we know which PL_regex_padav element is PL_reg_curpm */
2181                 SvFLAGS(repointer) |= SVf_BREAK;
2182                 av_push(PL_regex_padav,repointer);
2183                 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2184                 PL_regex_pad = AvARRAY(PL_regex_padav);
2185             }
2186 #endif
2187         }
2188         PM_SETRE(PL_reg_curpm, prog);
2189         PL_reg_oldcurpm = PL_curpm;
2190         PL_curpm = PL_reg_curpm;
2191         if (RX_MATCH_COPIED(prog)) {
2192             /*  Here is a serious problem: we cannot rewrite subbeg,
2193                 since it may be needed if this match fails.  Thus
2194                 $` inside (?{}) could fail... */
2195             PL_reg_oldsaved = prog->subbeg;
2196             PL_reg_oldsavedlen = prog->sublen;
2197 #ifdef PERL_OLD_COPY_ON_WRITE
2198             PL_nrs = prog->saved_copy;
2199 #endif
2200             RX_MATCH_COPIED_off(prog);
2201         }
2202         else
2203             PL_reg_oldsaved = NULL;
2204         prog->subbeg = PL_bostr;
2205         prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2206     }
2207     DEBUG_EXECUTE_r(PL_reg_starttry = *startpos);
2208     prog->startp[0] = *startpos - PL_bostr;
2209     PL_reginput = *startpos;
2210     PL_reglastparen = &prog->lastparen;
2211     PL_reglastcloseparen = &prog->lastcloseparen;
2212     prog->lastparen = 0;
2213     prog->lastcloseparen = 0;
2214     PL_regsize = 0;
2215     PL_regstartp = prog->startp;
2216     PL_regendp = prog->endp;
2217     if (PL_reg_start_tmpl <= prog->nparens) {
2218         PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2219         if(PL_reg_start_tmp)
2220             Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2221         else
2222             Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2223     }
2224
2225     /* XXXX What this code is doing here?!!!  There should be no need
2226        to do this again and again, PL_reglastparen should take care of
2227        this!  --ilya*/
2228
2229     /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2230      * Actually, the code in regcppop() (which Ilya may be meaning by
2231      * PL_reglastparen), is not needed at all by the test suite
2232      * (op/regexp, op/pat, op/split), but that code is needed, oddly
2233      * enough, for building DynaLoader, or otherwise this
2234      * "Error: '*' not in typemap in DynaLoader.xs, line 164"
2235      * will happen.  Meanwhile, this code *is* needed for the
2236      * above-mentioned test suite tests to succeed.  The common theme
2237      * on those tests seems to be returning null fields from matches.
2238      * --jhi */
2239 #if 1
2240     sp = PL_regstartp;
2241     ep = PL_regendp;
2242     if (prog->nparens) {
2243         register I32 i;
2244         for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
2245             *++sp = -1;
2246             *++ep = -1;
2247         }
2248     }
2249 #endif
2250     REGCP_SET(lastcp);
2251     if (regmatch(reginfo, progi->program + 1)) {
2252         PL_regendp[0] = PL_reginput - PL_bostr;
2253         return 1;
2254     }
2255     if (reginfo->cutpoint)
2256         *startpos= reginfo->cutpoint;
2257     REGCP_UNWIND(lastcp);
2258     return 0;
2259 }
2260
2261
2262 #define sayYES goto yes
2263 #define sayNO goto no
2264 #define sayNO_SILENT goto no_silent
2265
2266 /* we dont use STMT_START/END here because it leads to
2267    "unreachable code" warnings, which are bogus, but distracting. */
2268 #define CACHEsayNO \
2269     if (ST.cache_mask) \
2270        PL_reg_poscache[ST.cache_offset] |= ST.cache_mask; \
2271     sayNO
2272
2273 /* this is used to determine how far from the left messages like
2274    'failed...' are printed. It should be set such that messages
2275    are inline with the regop output that created them.
2276 */
2277 #define REPORT_CODE_OFF 32
2278
2279
2280 /* Make sure there is a test for this +1 options in re_tests */
2281 #define TRIE_INITAL_ACCEPT_BUFFLEN 4;
2282
2283 #define CHRTEST_UNINIT -1001 /* c1/c2 haven't been calculated yet */
2284 #define CHRTEST_VOID   -1000 /* the c1/c2 "next char" test should be skipped */
2285
2286 #define SLAB_FIRST(s) (&(s)->states[0])
2287 #define SLAB_LAST(s)  (&(s)->states[PERL_REGMATCH_SLAB_SLOTS-1])
2288
2289 /* grab a new slab and return the first slot in it */
2290
2291 STATIC regmatch_state *
2292 S_push_slab(pTHX)
2293 {
2294 #if PERL_VERSION < 9
2295     dMY_CXT;
2296 #endif
2297     regmatch_slab *s = PL_regmatch_slab->next;
2298     if (!s) {
2299         Newx(s, 1, regmatch_slab);
2300         s->prev = PL_regmatch_slab;
2301         s->next = NULL;
2302         PL_regmatch_slab->next = s;
2303     }
2304     PL_regmatch_slab = s;
2305     return SLAB_FIRST(s);
2306 }
2307
2308
2309 /* push a new state then goto it */
2310
2311 #define PUSH_STATE_GOTO(state, node) \
2312     scan = node; \
2313     st->resume_state = state; \
2314     goto push_state;
2315
2316 /* push a new state with success backtracking, then goto it */
2317
2318 #define PUSH_YES_STATE_GOTO(state, node) \
2319     scan = node; \
2320     st->resume_state = state; \
2321     goto push_yes_state;
2322
2323
2324
2325 /*
2326
2327 regmatch() - main matching routine
2328
2329 This is basically one big switch statement in a loop. We execute an op,
2330 set 'next' to point the next op, and continue. If we come to a point which
2331 we may need to backtrack to on failure such as (A|B|C), we push a
2332 backtrack state onto the backtrack stack. On failure, we pop the top
2333 state, and re-enter the loop at the state indicated. If there are no more
2334 states to pop, we return failure.
2335
2336 Sometimes we also need to backtrack on success; for example /A+/, where
2337 after successfully matching one A, we need to go back and try to
2338 match another one; similarly for lookahead assertions: if the assertion
2339 completes successfully, we backtrack to the state just before the assertion
2340 and then carry on.  In these cases, the pushed state is marked as
2341 'backtrack on success too'. This marking is in fact done by a chain of
2342 pointers, each pointing to the previous 'yes' state. On success, we pop to
2343 the nearest yes state, discarding any intermediate failure-only states.
2344 Sometimes a yes state is pushed just to force some cleanup code to be
2345 called at the end of a successful match or submatch; e.g. (??{$re}) uses
2346 it to free the inner regex.
2347
2348 Note that failure backtracking rewinds the cursor position, while
2349 success backtracking leaves it alone.
2350
2351 A pattern is complete when the END op is executed, while a subpattern
2352 such as (?=foo) is complete when the SUCCESS op is executed. Both of these
2353 ops trigger the "pop to last yes state if any, otherwise return true"
2354 behaviour.
2355
2356 A common convention in this function is to use A and B to refer to the two
2357 subpatterns (or to the first nodes thereof) in patterns like /A*B/: so A is
2358 the subpattern to be matched possibly multiple times, while B is the entire
2359 rest of the pattern. Variable and state names reflect this convention.
2360
2361 The states in the main switch are the union of ops and failure/success of
2362 substates associated with with that op.  For example, IFMATCH is the op
2363 that does lookahead assertions /(?=A)B/ and so the IFMATCH state means
2364 'execute IFMATCH'; while IFMATCH_A is a state saying that we have just
2365 successfully matched A and IFMATCH_A_fail is a state saying that we have
2366 just failed to match A. Resume states always come in pairs. The backtrack
2367 state we push is marked as 'IFMATCH_A', but when that is popped, we resume
2368 at IFMATCH_A or IFMATCH_A_fail, depending on whether we are backtracking
2369 on success or failure.
2370
2371 The struct that holds a backtracking state is actually a big union, with
2372 one variant for each major type of op. The variable st points to the
2373 top-most backtrack struct. To make the code clearer, within each
2374 block of code we #define ST to alias the relevant union.
2375
2376 Here's a concrete example of a (vastly oversimplified) IFMATCH
2377 implementation:
2378
2379     switch (state) {
2380     ....
2381
2382 #define ST st->u.ifmatch
2383
2384     case IFMATCH: // we are executing the IFMATCH op, (?=A)B
2385         ST.foo = ...; // some state we wish to save
2386         ...
2387         // push a yes backtrack state with a resume value of
2388         // IFMATCH_A/IFMATCH_A_fail, then continue execution at the
2389         // first node of A:
2390         PUSH_YES_STATE_GOTO(IFMATCH_A, A);
2391         // NOTREACHED
2392
2393     case IFMATCH_A: // we have successfully executed A; now continue with B
2394         next = B;
2395         bar = ST.foo; // do something with the preserved value
2396         break;
2397
2398     case IFMATCH_A_fail: // A failed, so the assertion failed
2399         ...;   // do some housekeeping, then ...
2400         sayNO; // propagate the failure
2401
2402 #undef ST
2403
2404     ...
2405     }
2406
2407 For any old-timers reading this who are familiar with the old recursive
2408 approach, the code above is equivalent to:
2409
2410     case IFMATCH: // we are executing the IFMATCH op, (?=A)B
2411     {
2412         int foo = ...
2413         ...
2414         if (regmatch(A)) {
2415             next = B;
2416             bar = foo;
2417             break;
2418         }
2419         ...;   // do some housekeeping, then ...
2420         sayNO; // propagate the failure
2421     }
2422
2423 The topmost backtrack state, pointed to by st, is usually free. If you
2424 want to claim it, populate any ST.foo fields in it with values you wish to
2425 save, then do one of
2426
2427         PUSH_STATE_GOTO(resume_state, node);
2428         PUSH_YES_STATE_GOTO(resume_state, node);
2429
2430 which sets that backtrack state's resume value to 'resume_state', pushes a
2431 new free entry to the top of the backtrack stack, then goes to 'node'.
2432 On backtracking, the free slot is popped, and the saved state becomes the
2433 new free state. An ST.foo field in this new top state can be temporarily
2434 accessed to retrieve values, but once the main loop is re-entered, it
2435 becomes available for reuse.
2436
2437 Note that the depth of the backtrack stack constantly increases during the
2438 left-to-right execution of the pattern, rather than going up and down with
2439 the pattern nesting. For example the stack is at its maximum at Z at the
2440 end of the pattern, rather than at X in the following:
2441
2442     /(((X)+)+)+....(Y)+....Z/
2443
2444 The only exceptions to this are lookahead/behind assertions and the cut,
2445 (?>A), which pop all the backtrack states associated with A before
2446 continuing.
2447
2448 Bascktrack state structs are allocated in slabs of about 4K in size.
2449 PL_regmatch_state and st always point to the currently active state,
2450 and PL_regmatch_slab points to the slab currently containing
2451 PL_regmatch_state.  The first time regmatch() is called, the first slab is
2452 allocated, and is never freed until interpreter destruction. When the slab
2453 is full, a new one is allocated and chained to the end. At exit from
2454 regmatch(), slabs allocated since entry are freed.
2455
2456 */
2457
2458
2459 #define DEBUG_STATE_pp(pp)                                  \
2460     DEBUG_STATE_r({                                         \
2461         DUMP_EXEC_POS(locinput, scan, do_utf8);             \
2462         PerlIO_printf(Perl_debug_log,                       \
2463             "    %*s"pp" %s%s%s%s%s\n",                     \
2464             depth*2, "",                                    \
2465             reg_name[st->resume_state],                     \
2466             ((st==yes_state||st==mark_state) ? "[" : ""),   \
2467             ((st==yes_state) ? "Y" : ""),                   \
2468             ((st==mark_state) ? "M" : ""),                  \
2469             ((st==yes_state||st==mark_state) ? "]" : "")    \
2470         );                                                  \
2471     });
2472
2473
2474 #define REG_NODE_NUM(x) ((x) ? (int)((x)-prog) : -1)
2475
2476 #ifdef DEBUGGING
2477
2478 STATIC void
2479 S_debug_start_match(pTHX_ const regexp *prog, const bool do_utf8,
2480     const char *start, const char *end, const char *blurb)
2481 {
2482     const bool utf8_pat= prog->extflags & RXf_UTF8 ? 1 : 0;
2483     if (!PL_colorset)
2484             reginitcolors();
2485     {
2486         RE_PV_QUOTED_DECL(s0, utf8_pat, PERL_DEBUG_PAD_ZERO(0),
2487             prog->precomp, prog->prelen, 60);
2488
2489         RE_PV_QUOTED_DECL(s1, do_utf8, PERL_DEBUG_PAD_ZERO(1),
2490             start, end - start, 60);
2491
2492         PerlIO_printf(Perl_debug_log,
2493             "%s%s REx%s %s against %s\n",
2494                        PL_colors[4], blurb, PL_colors[5], s0, s1);
2495
2496         if (do_utf8||utf8_pat)
2497             PerlIO_printf(Perl_debug_log, "UTF-8 %s%s%s...\n",
2498                 utf8_pat ? "pattern" : "",
2499                 utf8_pat && do_utf8 ? " and " : "",
2500                 do_utf8 ? "string" : ""
2501             );
2502     }
2503 }
2504
2505 STATIC void
2506 S_dump_exec_pos(pTHX_ const char *locinput,
2507                       const regnode *scan,
2508                       const char *loc_regeol,
2509                       const char *loc_bostr,
2510                       const char *loc_reg_starttry,
2511                       const bool do_utf8)
2512 {
2513     const int docolor = *PL_colors[0] || *PL_colors[2] || *PL_colors[4];
2514     const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
2515     int l = (loc_regeol - locinput) > taill ? taill : (loc_regeol - locinput);
2516     /* The part of the string before starttry has one color
2517        (pref0_len chars), between starttry and current
2518        position another one (pref_len - pref0_len chars),
2519        after the current position the third one.
2520        We assume that pref0_len <= pref_len, otherwise we
2521        decrease pref0_len.  */
2522     int pref_len = (locinput - loc_bostr) > (5 + taill) - l
2523         ? (5 + taill) - l : locinput - loc_bostr;
2524     int pref0_len;
2525
2526     while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2527         pref_len++;
2528     pref0_len = pref_len  - (locinput - loc_reg_starttry);
2529     if (l + pref_len < (5 + taill) && l < loc_regeol - locinput)
2530         l = ( loc_regeol - locinput > (5 + taill) - pref_len
2531               ? (5 + taill) - pref_len : loc_regeol - locinput);
2532     while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2533         l--;
2534     if (pref0_len < 0)
2535         pref0_len = 0;
2536     if (pref0_len > pref_len)
2537         pref0_len = pref_len;
2538     {
2539         const int is_uni = (do_utf8 && OP(scan) != CANY) ? 1 : 0;
2540
2541         RE_PV_COLOR_DECL(s0,len0,is_uni,PERL_DEBUG_PAD(0),
2542             (locinput - pref_len),pref0_len, 60, 4, 5);
2543
2544         RE_PV_COLOR_DECL(s1,len1,is_uni,PERL_DEBUG_PAD(1),
2545                     (locinput - pref_len + pref0_len),
2546                     pref_len - pref0_len, 60, 2, 3);
2547
2548         RE_PV_COLOR_DECL(s2,len2,is_uni,PERL_DEBUG_PAD(2),
2549                     locinput, loc_regeol - locinput, 10, 0, 1);
2550
2551         const STRLEN tlen=len0+len1+len2;
2552         PerlIO_printf(Perl_debug_log,
2553                     "%4"IVdf" <%.*s%.*s%s%.*s>%*s|",
2554                     (IV)(locinput - loc_bostr),
2555                     len0, s0,
2556                     len1, s1,
2557                     (docolor ? "" : "> <"),
2558                     len2, s2,
2559                     (int)(tlen > 19 ? 0 :  19 - tlen),
2560                     "");
2561     }
2562 }
2563
2564 #endif
2565
2566 /* reg_check_named_buff_matched()
2567  * Checks to see if a named buffer has matched. The data array of
2568  * buffer numbers corresponding to the buffer is expected to reside
2569  * in the regexp->data->data array in the slot stored in the ARG() of
2570  * node involved. Note that this routine doesn't actually care about the
2571  * name, that information is not preserved from compilation to execution.
2572  * Returns the index of the leftmost defined buffer with the given name
2573  * or 0 if non of the buffers matched.
2574  */
2575 STATIC I32
2576 S_reg_check_named_buff_matched(pTHX_ const regexp *rex, const regnode *scan) {
2577     I32 n;
2578     RXi_GET_DECL(rex,rexi);
2579     SV *sv_dat=(SV*)rexi->data->data[ ARG( scan ) ];
2580     I32 *nums=(I32*)SvPVX(sv_dat);
2581     for ( n=0; n<SvIVX(sv_dat); n++ ) {
2582         if ((I32)*PL_reglastparen >= nums[n] &&
2583             PL_regendp[nums[n]] != -1)
2584         {
2585             return nums[n];
2586         }
2587     }
2588     return 0;
2589 }
2590
2591 STATIC I32                      /* 0 failure, 1 success */
2592 S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
2593 {
2594 #if PERL_VERSION < 9
2595     dMY_CXT;
2596 #endif
2597     dVAR;
2598     register const bool do_utf8 = PL_reg_match_utf8;
2599     const U32 uniflags = UTF8_ALLOW_DEFAULT;
2600
2601     regexp *rex = reginfo->prog;
2602     RXi_GET_DECL(rex,rexi);
2603
2604     regmatch_slab  *orig_slab;
2605     regmatch_state *orig_state;
2606
2607     /* the current state. This is a cached copy of PL_regmatch_state */
2608     register regmatch_state *st;
2609
2610     /* cache heavy used fields of st in registers */
2611     register regnode *scan;
2612     register regnode *next;
2613     register U32 n = 0; /* general value; init to avoid compiler warning */
2614     register I32 ln = 0; /* len or last;  init to avoid compiler warning */
2615     register char *locinput = PL_reginput;
2616     register I32 nextchr;   /* is always set to UCHARAT(locinput) */
2617
2618     bool result = 0;        /* return value of S_regmatch */
2619     int depth = 0;          /* depth of backtrack stack */
2620     int nochange_depth = 0; /* depth of GOSUB recursion with nochange*/
2621     regmatch_state *yes_state = NULL; /* state to pop to on success of
2622                                                             subpattern */
2623     /* mark_state piggy backs on the yes_state logic so that when we unwind
2624        the stack on success we can update the mark_state as we go */
2625     regmatch_state *mark_state = NULL; /* last mark state we have seen */
2626
2627     regmatch_state *cur_eval = NULL; /* most recent EVAL_AB state */
2628     struct regmatch_state  *cur_curlyx = NULL; /* most recent curlyx */
2629     U32 state_num;
2630     bool no_final = 0;      /* prevent failure from backtracking? */
2631     bool do_cutgroup = 0;   /* no_final only until next branch/trie entry */
2632     char *startpoint = PL_reginput;
2633     SV *popmark = NULL;     /* are we looking for a mark? */
2634     SV *sv_commit = NULL;   /* last mark name seen in failure */
2635     SV *sv_yes_mark = NULL; /* last mark name we have seen
2636                                during a successfull match */
2637     U32 lastopen = 0;       /* last open we saw */
2638     bool has_cutgroup = RX_HAS_CUTGROUP(rex) ? 1 : 0;
2639
2640
2641     /* these three flags are set by various ops to signal information to
2642      * the very next op. They have a useful lifetime of exactly one loop
2643      * iteration, and are not preserved or restored by state pushes/pops
2644      */
2645     bool sw = 0;            /* the condition value in (?(cond)a|b) */
2646     bool minmod = 0;        /* the next "{n,m}" is a "{n,m}?" */
2647     int logical = 0;        /* the following EVAL is:
2648                                 0: (?{...})
2649                                 1: (?(?{...})X|Y)
2650                                 2: (??{...})
2651                                or the following IFMATCH/UNLESSM is:
2652                                 false: plain (?=foo)
2653                                 true:  used as a condition: (?(?=foo))
2654                             */
2655
2656 #ifdef DEBUGGING
2657     GET_RE_DEBUG_FLAGS_DECL;
2658 #endif
2659
2660     DEBUG_OPTIMISE_r( {
2661             PerlIO_printf(Perl_debug_log,"regmatch start\n");
2662     });
2663     /* on first ever call to regmatch, allocate first slab */
2664     if (!PL_regmatch_slab) {
2665         Newx(PL_regmatch_slab, 1, regmatch_slab);
2666         PL_regmatch_slab->prev = NULL;
2667         PL_regmatch_slab->next = NULL;
2668         PL_regmatch_state = SLAB_FIRST(PL_regmatch_slab);
2669     }
2670
2671     /* remember current high-water mark for exit */
2672     /* XXX this should be done with SAVE* instead */
2673     orig_slab  = PL_regmatch_slab;
2674     orig_state = PL_regmatch_state;
2675
2676     /* grab next free state slot */
2677     st = ++PL_regmatch_state;
2678     if (st >  SLAB_LAST(PL_regmatch_slab))
2679         st = PL_regmatch_state = S_push_slab(aTHX);
2680
2681     /* Note that nextchr is a byte even in UTF */
2682     nextchr = UCHARAT(locinput);
2683     scan = prog;
2684     while (scan != NULL) {
2685
2686         DEBUG_EXECUTE_r( {
2687             SV * const prop = sv_newmortal();
2688             regnode *rnext=regnext(scan);
2689             DUMP_EXEC_POS( locinput, scan, do_utf8 );
2690             regprop(rex, prop, scan);
2691
2692             PerlIO_printf(Perl_debug_log,
2693                     "%3"IVdf":%*s%s(%"IVdf")\n",
2694                     (IV)(scan - rexi->program), depth*2, "",
2695                     SvPVX_const(prop),
2696                     (PL_regkind[OP(scan)] == END || !rnext) ?
2697                         0 : (IV)(rnext - rexi->program));
2698         });
2699
2700         next = scan + NEXT_OFF(scan);
2701         if (next == scan)
2702             next = NULL;
2703         state_num = OP(scan);
2704
2705       reenter_switch:
2706         switch (state_num) {
2707         case BOL:
2708             if (locinput == PL_bostr)
2709             {
2710                 /* reginfo->till = reginfo->bol; */
2711                 break;
2712             }
2713             sayNO;
2714         case MBOL:
2715             if (locinput == PL_bostr ||
2716                 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
2717             {
2718                 break;
2719             }
2720             sayNO;
2721         case SBOL:
2722             if (locinput == PL_bostr)
2723                 break;
2724             sayNO;
2725         case GPOS:
2726             if (locinput == reginfo->ganch)
2727                 break;
2728             sayNO;
2729         case EOL:
2730                 goto seol;
2731         case MEOL:
2732             if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2733                 sayNO;
2734             break;
2735         case SEOL:
2736           seol:
2737             if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2738                 sayNO;
2739             if (PL_regeol - locinput > 1)
2740                 sayNO;
2741             break;
2742         case EOS:
2743             if (PL_regeol != locinput)
2744                 sayNO;
2745             break;
2746         case SANY:
2747             if (!nextchr && locinput >= PL_regeol)
2748                 sayNO;
2749             if (do_utf8) {
2750                 locinput += PL_utf8skip[nextchr];
2751                 if (locinput > PL_regeol)
2752                     sayNO;
2753                 nextchr = UCHARAT(locinput);
2754             }
2755             else
2756                 nextchr = UCHARAT(++locinput);
2757             break;
2758         case CANY:
2759             if (!nextchr && locinput >= PL_regeol)
2760                 sayNO;
2761             nextchr = UCHARAT(++locinput);
2762             break;
2763         case REG_ANY:
2764             if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2765                 sayNO;
2766             if (do_utf8) {
2767                 locinput += PL_utf8skip[nextchr];
2768                 if (locinput > PL_regeol)
2769                     sayNO;
2770                 nextchr = UCHARAT(locinput);
2771             }
2772             else
2773                 nextchr = UCHARAT(++locinput);
2774             break;
2775
2776 #undef  ST
2777 #define ST st->u.trie
2778         case TRIEC:
2779             /* In this case the charclass data is available inline so
2780                we can fail fast without a lot of extra overhead.
2781              */
2782             if (scan->flags == EXACT || !do_utf8) {
2783                 if(!ANYOF_BITMAP_TEST(scan, *locinput)) {
2784                     DEBUG_EXECUTE_r(
2785                         PerlIO_printf(Perl_debug_log,
2786                                   "%*s  %sfailed to match trie start class...%s\n",
2787                                   REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
2788                     );
2789                     sayNO_SILENT;
2790                     /* NOTREACHED */
2791                 }
2792             }
2793             /* FALL THROUGH */
2794         case TRIE:
2795             {
2796                 /* what type of TRIE am I? (utf8 makes this contextual) */
2797                 const enum { trie_plain, trie_utf8, trie_utf8_fold }
2798                     trie_type = do_utf8 ?
2799                           (scan->flags == EXACT ? trie_utf8 : trie_utf8_fold)
2800                         : trie_plain;
2801
2802                 /* what trie are we using right now */
2803                 reg_trie_data * const trie
2804                     = (reg_trie_data*)rexi->data->data[ ARG( scan ) ];
2805                 HV * widecharmap = (HV *)rexi->data->data[ ARG( scan ) + 1 ];
2806                 U32 state = trie->startstate;
2807
2808                 if (trie->bitmap && trie_type != trie_utf8_fold &&
2809                     !TRIE_BITMAP_TEST(trie,*locinput)
2810                 ) {
2811                     if (trie->states[ state ].wordnum) {
2812                          DEBUG_EXECUTE_r(
2813                             PerlIO_printf(Perl_debug_log,
2814                                           "%*s  %smatched empty string...%s\n",
2815                                           REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
2816                         );
2817                         break;
2818                     } else {
2819                         DEBUG_EXECUTE_r(
2820                             PerlIO_printf(Perl_debug_log,
2821                                           "%*s  %sfailed to match trie start class...%s\n",
2822                                           REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
2823                         );
2824                         sayNO_SILENT;
2825                    }
2826                 }
2827
2828             {
2829                 U8 *uc = ( U8* )locinput;
2830
2831                 STRLEN len = 0;
2832                 STRLEN foldlen = 0;
2833                 U8 *uscan = (U8*)NULL;
2834                 STRLEN bufflen=0;
2835                 SV *sv_accept_buff = NULL;
2836                 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
2837
2838                 ST.accepted = 0; /* how many accepting states we have seen */
2839                 ST.B = next;
2840                 ST.jump = trie->jump;
2841                 ST.me = scan;
2842
2843                 /*
2844                    traverse the TRIE keeping track of all accepting states
2845                    we transition through until we get to a failing node.
2846                 */
2847
2848                 while ( state && uc <= (U8*)PL_regeol ) {
2849                     U32 base = trie->states[ state ].trans.base;
2850                     UV uvc = 0;
2851                     U16 charid;
2852                     /* We use charid to hold the wordnum as we don't use it
2853                        for charid until after we have done the wordnum logic.
2854                        We define an alias just so that the wordnum logic reads
2855                        more naturally. */
2856
2857 #define got_wordnum charid
2858                     got_wordnum = trie->states[ state ].wordnum;
2859
2860                     if ( got_wordnum ) {
2861                         if ( ! ST.accepted ) {
2862                             ENTER;
2863                             SAVETMPS;
2864                             bufflen = TRIE_INITAL_ACCEPT_BUFFLEN;
2865                             sv_accept_buff=newSV(bufflen *
2866                                             sizeof(reg_trie_accepted) - 1);
2867                             SvCUR_set(sv_accept_buff, 0);
2868                             SvPOK_on(sv_accept_buff);
2869                             sv_2mortal(sv_accept_buff);
2870                             SAVETMPS;
2871                             ST.accept_buff =
2872                                 (reg_trie_accepted*)SvPV_nolen(sv_accept_buff );
2873                         }
2874                         do {
2875                             if (ST.accepted >= bufflen) {
2876                                 bufflen *= 2;
2877                                 ST.accept_buff =(reg_trie_accepted*)
2878                                     SvGROW(sv_accept_buff,
2879                                         bufflen * sizeof(reg_trie_accepted));
2880                             }
2881                             SvCUR_set(sv_accept_buff,SvCUR(sv_accept_buff)
2882                                 + sizeof(reg_trie_accepted));
2883
2884
2885                             ST.accept_buff[ST.accepted].wordnum = got_wordnum;
2886                             ST.accept_buff[ST.accepted].endpos = uc;
2887                             ++ST.accepted;
2888                         } while (trie->nextword && (got_wordnum= trie->nextword[got_wordnum]));
2889                     }
2890 #undef got_wordnum
2891
2892                     DEBUG_TRIE_EXECUTE_r({
2893                                 DUMP_EXEC_POS( (char *)uc, scan, do_utf8 );
2894                                 PerlIO_printf( Perl_debug_log,
2895                                     "%*s  %sState: %4"UVxf" Accepted: %4"UVxf" ",
2896                                     2+depth * 2, "", PL_colors[4],
2897                                     (UV)state, (UV)ST.accepted );
2898                     });
2899
2900                     if ( base ) {
2901                         REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
2902                                              uscan, len, uvc, charid, foldlen,
2903                                              foldbuf, uniflags);
2904
2905                         if (charid &&
2906                              (base + charid > trie->uniquecharcount )
2907                              && (base + charid - 1 - trie->uniquecharcount
2908                                     < trie->lasttrans)
2909                              && trie->trans[base + charid - 1 -
2910                                     trie->uniquecharcount].check == state)
2911                         {
2912                             state = trie->trans[base + charid - 1 -
2913                                 trie->uniquecharcount ].next;
2914                         }
2915                         else {
2916                             state = 0;
2917                         }
2918                         uc += len;
2919
2920                     }
2921                     else {
2922                         state = 0;
2923                     }
2924                     DEBUG_TRIE_EXECUTE_r(
2925                         PerlIO_printf( Perl_debug_log,
2926                             "Charid:%3x CP:%4"UVxf" After State: %4"UVxf"%s\n",
2927                             charid, uvc, (UV)state, PL_colors[5] );
2928                     );
2929                 }
2930                 if (!ST.accepted )
2931                    sayNO;
2932
2933                 DEBUG_EXECUTE_r(
2934                     PerlIO_printf( Perl_debug_log,
2935                         "%*s  %sgot %"IVdf" possible matches%s\n",
2936                         REPORT_CODE_OFF + depth * 2, "",
2937                         PL_colors[4], (IV)ST.accepted, PL_colors[5] );
2938                 );
2939             }}
2940
2941             /* FALL THROUGH */
2942         case TRIE_next_fail: /* we failed - try next alterative */
2943             if (do_cutgroup) {
2944                 do_cutgroup = 0;
2945                 no_final = 0;
2946             }
2947             if ( ST.accepted == 1 ) {
2948                 /* only one choice left - just continue */
2949                 DEBUG_EXECUTE_r({
2950                     AV *const trie_words
2951                         = (AV *) rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET];
2952                     SV ** const tmp = av_fetch( trie_words,
2953                         ST.accept_buff[ 0 ].wordnum-1, 0 );
2954                     SV *sv= tmp ? sv_newmortal() : NULL;
2955
2956                     PerlIO_printf( Perl_debug_log,
2957                         "%*s  %sonly one match left: #%d <%s>%s\n",
2958                         REPORT_CODE_OFF+depth*2, "", PL_colors[4],
2959                         ST.accept_buff[ 0 ].wordnum,
2960                         tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0,
2961                                 PL_colors[0], PL_colors[1],
2962                                 (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
2963                             )
2964                         : "not compiled under -Dr",
2965                         PL_colors[5] );
2966                 });
2967                 PL_reginput = (char *)ST.accept_buff[ 0 ].endpos;
2968                 /* in this case we free tmps/leave before we call regmatch
2969                    as we wont be using accept_buff again. */
2970
2971                 locinput = PL_reginput;
2972                 nextchr = UCHARAT(locinput);
2973                 if ( !ST.jump || !ST.jump[ST.accept_buff[0].wordnum])
2974                     scan = ST.B;
2975                 else
2976                     scan = ST.me + ST.jump[ST.accept_buff[0].wordnum];
2977                 if (!has_cutgroup) {
2978                     FREETMPS;
2979                     LEAVE;
2980                 } else {
2981                     ST.accepted--;
2982                     PUSH_YES_STATE_GOTO(TRIE_next, scan);
2983                 }
2984
2985                 continue; /* execute rest of RE */
2986             }
2987
2988             if (!ST.accepted-- ) {
2989                 DEBUG_EXECUTE_r({
2990                     PerlIO_printf( Perl_debug_log,
2991                         "%*s  %sTRIE failed...%s\n",
2992                         REPORT_CODE_OFF+depth*2, "",
2993                         PL_colors[4],
2994                         PL_colors[5] );
2995                 });
2996                 FREETMPS;
2997                 LEAVE;
2998                 sayNO_SILENT;
2999             }
3000
3001             /*
3002                There are at least two accepting states left.  Presumably
3003                the number of accepting states is going to be low,
3004                typically two. So we simply scan through to find the one
3005                with lowest wordnum.  Once we find it, we swap the last
3006                state into its place and decrement the size. We then try to
3007                match the rest of the pattern at the point where the word
3008                ends. If we succeed, control just continues along the
3009                regex; if we fail we return here to try the next accepting
3010                state
3011              */
3012
3013             {
3014                 U32 best = 0;
3015                 U32 cur;
3016                 for( cur = 1 ; cur <= ST.accepted ; cur++ ) {
3017                     DEBUG_TRIE_EXECUTE_r(
3018                         PerlIO_printf( Perl_debug_log,
3019                             "%*s  %sgot %"IVdf" (%d) as best, looking at %"IVdf" (%d)%s\n",
3020                             REPORT_CODE_OFF + depth * 2, "", PL_colors[4],
3021                             (IV)best, ST.accept_buff[ best ].wordnum, (IV)cur,
3022                             ST.accept_buff[ cur ].wordnum, PL_colors[5] );
3023                     );
3024
3025                     if (ST.accept_buff[cur].wordnum <
3026                             ST.accept_buff[best].wordnum)
3027                         best = cur;
3028                 }
3029
3030                 DEBUG_EXECUTE_r({
3031                     AV *const trie_words
3032                         = (AV *) rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET];
3033                     SV ** const tmp = av_fetch( trie_words,
3034                         ST.accept_buff[ best ].wordnum - 1, 0 );
3035                     regnode *nextop=(!ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) ?
3036                                     ST.B :
3037                                     ST.me + ST.jump[ST.accept_buff[best].wordnum];
3038                     SV *sv= tmp ? sv_newmortal() : NULL;
3039
3040                     PerlIO_printf( Perl_debug_log,
3041                         "%*s  %strying alternation #%d <%s> at node #%d %s\n",
3042                         REPORT_CODE_OFF+depth*2, "", PL_colors[4],
3043                         ST.accept_buff[best].wordnum,
3044                         tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0,
3045                                 PL_colors[0], PL_colors[1],
3046                                 (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
3047                             ) : "not compiled under -Dr",
3048                             REG_NODE_NUM(nextop),
3049                         PL_colors[5] );
3050                 });
3051
3052                 if ( best<ST.accepted ) {
3053                     reg_trie_accepted tmp = ST.accept_buff[ best ];
3054                     ST.accept_buff[ best ] = ST.accept_buff[ ST.accepted ];
3055                     ST.accept_buff[ ST.accepted ] = tmp;
3056                     best = ST.accepted;
3057                 }
3058                 PL_reginput = (char *)ST.accept_buff[ best ].endpos;
3059                 if ( !ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) {
3060                     scan = ST.B;
3061                     /* NOTREACHED */
3062                 } else {
3063                     scan = ST.me + ST.jump[ST.accept_buff[best].wordnum];
3064                     /* NOTREACHED */
3065                 }
3066                 if (has_cutgroup) {
3067                     PUSH_YES_STATE_GOTO(TRIE_next, scan);
3068                     /* NOTREACHED */
3069                 } else {
3070                     PUSH_STATE_GOTO(TRIE_next, scan);
3071                     /* NOTREACHED */
3072                 }
3073                 /* NOTREACHED */
3074             }
3075             /* NOTREACHED */
3076         case TRIE_next:
3077             FREETMPS;
3078             LEAVE;
3079             sayYES;
3080 #undef  ST
3081
3082         case EXACT: {
3083             char *s = STRING(scan);
3084             ln = STR_LEN(scan);
3085             if (do_utf8 != UTF) {
3086                 /* The target and the pattern have differing utf8ness. */
3087                 char *l = locinput;
3088                 const char * const e = s + ln;
3089
3090                 if (do_utf8) {
3091                     /* The target is utf8, the pattern is not utf8. */
3092                     while (s < e) {
3093                         STRLEN ulen;
3094                         if (l >= PL_regeol)
3095                              sayNO;
3096                         if (NATIVE_TO_UNI(*(U8*)s) !=
3097                             utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
3098                                             uniflags))
3099                              sayNO;
3100                         l += ulen;
3101                         s ++;
3102                     }
3103                 }
3104                 else {
3105                     /* The target is not utf8, the pattern is utf8. */
3106                     while (s < e) {
3107                         STRLEN ulen;
3108                         if (l >= PL_regeol)
3109                             sayNO;
3110                         if (NATIVE_TO_UNI(*((U8*)l)) !=
3111                             utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
3112                                            uniflags))
3113                             sayNO;
3114                         s += ulen;
3115                         l ++;
3116                     }
3117                 }
3118                 locinput = l;
3119                 nextchr = UCHARAT(locinput);
3120                 break;
3121             }
3122             /* The target and the pattern have the same utf8ness. */
3123             /* Inline the first character, for speed. */
3124             if (UCHARAT(s) != nextchr)
3125                 sayNO;
3126             if (PL_regeol - locinput < ln)
3127                 sayNO;
3128             if (ln > 1 && memNE(s, locinput, ln))
3129                 sayNO;
3130             locinput += ln;
3131             nextchr = UCHARAT(locinput);
3132             break;
3133             }
3134         case EXACTFL:
3135             PL_reg_flags |= RF_tainted;
3136             /* FALL THROUGH */
3137         case EXACTF: {
3138             char * const s = STRING(scan);
3139             ln = STR_LEN(scan);
3140
3141             if (do_utf8 || UTF) {
3142               /* Either target or the pattern are utf8. */
3143                 const char * const l = locinput;
3144                 char *e = PL_regeol;
3145
3146                 if (ibcmp_utf8(s, 0,  ln, (bool)UTF,
3147                                l, &e, 0,  do_utf8)) {
3148                      /* One more case for the sharp s:
3149                       * pack("U0U*", 0xDF) =~ /ss/i,
3150                       * the 0xC3 0x9F are the UTF-8
3151                       * byte sequence for the U+00DF. */
3152                      if (!(do_utf8 &&
3153                            toLOWER(s[0]) == 's' &&
3154                            ln >= 2 &&
3155                            toLOWER(s[1]) == 's' &&
3156                            (U8)l[0] == 0xC3 &&
3157                            e - l >= 2 &&
3158                            (U8)l[1] == 0x9F))
3159                           sayNO;
3160                 }
3161                 locinput = e;
3162                 nextchr = UCHARAT(locinput);
3163                 break;
3164             }
3165
3166             /* Neither the target and the pattern are utf8. */
3167
3168             /* Inline the first character, for speed. */
3169             if (UCHARAT(s) != nextchr &&
3170                 UCHARAT(s) != ((OP(scan) == EXACTF)
3171                                ? PL_fold : PL_fold_locale)[nextchr])
3172                 sayNO;
3173             if (PL_regeol - locinput < ln)
3174                 sayNO;
3175             if (ln > 1 && (OP(scan) == EXACTF
3176                            ? ibcmp(s, locinput, ln)
3177                            : ibcmp_locale(s, locinput, ln)))
3178                 sayNO;
3179             locinput += ln;
3180             nextchr = UCHARAT(locinput);
3181             break;
3182             }
3183         case ANYOF:
3184             if (do_utf8) {
3185                 STRLEN inclasslen = PL_regeol - locinput;
3186
3187                 if (!reginclass(rex, scan, (U8*)locinput, &inclasslen, do_utf8))
3188                     goto anyof_fail;
3189                 if (locinput >= PL_regeol)
3190                     sayNO;
3191                 locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
3192                 nextchr = UCHARAT(locinput);
3193                 break;
3194             }
3195             else {
3196                 if (nextchr < 0)
3197                     nextchr = UCHARAT(locinput);
3198                 if (!REGINCLASS(rex, scan, (U8*)locinput))
3199                     goto anyof_fail;
3200                 if (!nextchr && locinput >= PL_regeol)
3201                     sayNO;
3202                 nextchr = UCHARAT(++locinput);
3203                 break;
3204             }
3205         anyof_fail:
3206             /* If we might have the case of the German sharp s
3207              * in a casefolding Unicode character class. */
3208
3209             if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
3210                  locinput += SHARP_S_SKIP;
3211                  nextchr = UCHARAT(locinput);
3212             }
3213             else
3214                  sayNO;
3215             break;
3216         case ALNUML:
3217             PL_reg_flags |= RF_tainted;
3218             /* FALL THROUGH */
3219         case ALNUM:
3220             if (!nextchr)
3221                 sayNO;
3222             if (do_utf8) {
3223                 LOAD_UTF8_CHARCLASS_ALNUM();
3224                 if (!(OP(scan) == ALNUM
3225                       ? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
3226                       : isALNUM_LC_utf8((U8*)locinput)))
3227                 {
3228                     sayNO;
3229                 }
3230                 locinput += PL_utf8skip[nextchr];
3231                 nextchr = UCHARAT(locinput);
3232                 break;
3233             }
3234             if (!(OP(scan) == ALNUM
3235                   ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
3236                 sayNO;
3237             nextchr = UCHARAT(++locinput);
3238             break;
3239         case NALNUML:
3240             PL_reg_flags |= RF_tainted;
3241             /* FALL THROUGH */
3242         case NALNUM:
3243             if (!nextchr && locinput >= PL_regeol)
3244                 sayNO;
3245             if (do_utf8) {
3246                 LOAD_UTF8_CHARCLASS_ALNUM();
3247                 if (OP(scan) == NALNUM
3248                     ? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
3249                     : isALNUM_LC_utf8((U8*)locinput))
3250                 {
3251                     sayNO;
3252                 }
3253                 locinput += PL_utf8skip[nextchr];
3254                 nextchr = UCHARAT(locinput);
3255                 break;
3256             }
3257             if (OP(scan) == NALNUM
3258                 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
3259                 sayNO;
3260             nextchr = UCHARAT(++locinput);
3261             break;
3262         case BOUNDL:
3263         case NBOUNDL:
3264             PL_reg_flags |= RF_tainted;
3265             /* FALL THROUGH */
3266         case BOUND:
3267         case NBOUND:
3268             /* was last char in word? */
3269             if (do_utf8) {
3270                 if (locinput == PL_bostr)
3271                     ln = '\n';
3272                 else {
3273                     const U8 * const r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
3274
3275                     ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, uniflags);
3276                 }
3277                 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
3278                     ln = isALNUM_uni(ln);
3279                     LOAD_UTF8_CHARCLASS_ALNUM();
3280                     n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
3281                 }
3282                 else {
3283                     ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
3284                     n = isALNUM_LC_utf8((U8*)locinput);
3285                 }
3286             }
3287             else {
3288                 ln = (locinput != PL_bostr) ?
3289                     UCHARAT(locinput - 1) : '\n';
3290                 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
3291                     ln = isALNUM(ln);
3292                     n = isALNUM(nextchr);
3293                 }
3294                 else {
3295                     ln = isALNUM_LC(ln);
3296                     n = isALNUM_LC(nextchr);
3297                 }
3298             }
3299             if (((!ln) == (!n)) == (OP(scan) == BOUND ||
3300                                     OP(scan) == BOUNDL))
3301                     sayNO;
3302             break;
3303         case SPACEL:
3304             PL_reg_flags |= RF_tainted;
3305             /* FALL THROUGH */
3306         case SPACE:
3307             if (!nextchr)
3308                 sayNO;
3309             if (do_utf8) {
3310                 if (UTF8_IS_CONTINUED(nextchr)) {
3311                     LOAD_UTF8_CHARCLASS_SPACE();
3312                     if (!(OP(scan) == SPACE
3313                           ? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
3314                           : isSPACE_LC_utf8((U8*)locinput)))
3315                     {
3316                         sayNO;
3317                     }
3318                     locinput += PL_utf8skip[nextchr];
3319                     nextchr = UCHARAT(locinput);
3320                     break;
3321                 }
3322                 if (!(OP(scan) == SPACE
3323                       ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3324                     sayNO;
3325                 nextchr = UCHARAT(++locinput);
3326             }
3327             else {
3328                 if (!(OP(scan) == SPACE
3329                       ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3330                     sayNO;
3331                 nextchr = UCHARAT(++locinput);
3332             }
3333             break;
3334         case NSPACEL:
3335             PL_reg_flags |= RF_tainted;
3336             /* FALL THROUGH */
3337         case NSPACE:
3338             if (!nextchr && locinput >= PL_regeol)
3339                 sayNO;
3340             if (do_utf8) {
3341                 LOAD_UTF8_CHARCLASS_SPACE();
3342                 if (OP(scan) == NSPACE
3343                     ? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
3344                     : isSPACE_LC_utf8((U8*)locinput))
3345                 {
3346                     sayNO;
3347                 }
3348                 locinput += PL_utf8skip[nextchr];
3349                 nextchr = UCHARAT(locinput);
3350                 break;
3351             }
3352             if (OP(scan) == NSPACE
3353                 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
3354                 sayNO;
3355             nextchr = UCHARAT(++locinput);
3356             break;
3357         case DIGITL:
3358             PL_reg_flags |= RF_tainted;
3359             /* FALL THROUGH */
3360         case DIGIT:
3361             if (!nextchr)
3362                 sayNO;
3363             if (do_utf8) {
3364                 LOAD_UTF8_CHARCLASS_DIGIT();
3365                 if (!(OP(scan) == DIGIT
3366                       ? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
3367                       : isDIGIT_LC_utf8((U8*)locinput)))
3368                 {
3369                     sayNO;
3370                 }
3371                 locinput += PL_utf8skip[nextchr];
3372                 nextchr = UCHARAT(locinput);
3373                 break;
3374             }
3375             if (!(OP(scan) == DIGIT
3376                   ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
3377                 sayNO;
3378             nextchr = UCHARAT(++locinput);
3379             break;
3380         case NDIGITL:
3381             PL_reg_flags |= RF_tainted;
3382             /* FALL THROUGH */
3383         case NDIGIT:
3384             if (!nextchr && locinput >= PL_regeol)
3385                 sayNO;
3386             if (do_utf8) {
3387                 LOAD_UTF8_CHARCLASS_DIGIT();
3388                 if (OP(scan) == NDIGIT
3389                     ? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
3390                     : isDIGIT_LC_utf8((U8*)locinput))
3391                 {
3392                     sayNO;
3393                 }
3394                 locinput += PL_utf8skip[nextchr];
3395                 nextchr = UCHARAT(locinput);
3396                 break;
3397             }
3398             if (OP(scan) == NDIGIT
3399                 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
3400                 sayNO;
3401             nextchr = UCHARAT(++locinput);
3402             break;
3403         case CLUMP:
3404             if (locinput >= PL_regeol)
3405                 sayNO;
3406             if  (do_utf8) {
3407                 LOAD_UTF8_CHARCLASS_MARK();
3408                 if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3409                     sayNO;
3410                 locinput += PL_utf8skip[nextchr];
3411                 while (locinput < PL_regeol &&
3412                        swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3413                     locinput += UTF8SKIP(locinput);
3414                 if (locinput > PL_regeol)
3415                     sayNO;
3416             }
3417             else
3418                locinput++;
3419             nextchr = UCHARAT(locinput);
3420             break;
3421
3422         case NREFFL:
3423         {
3424             char *s;
3425             char type;
3426             PL_reg_flags |= RF_tainted;
3427             /* FALL THROUGH */
3428         case NREF:
3429         case NREFF:
3430             type = OP(scan);
3431             n = reg_check_named_buff_matched(rex,scan);
3432
3433             if ( n ) {
3434                 type = REF + ( type - NREF );
3435                 goto do_ref;
3436             } else {
3437                 sayNO;
3438             }
3439             /* unreached */
3440         case REFFL:
3441             PL_reg_flags |= RF_tainted;
3442             /* FALL THROUGH */
3443         case REF:
3444         case REFF:
3445             n = ARG(scan);  /* which paren pair */
3446             type = OP(scan);
3447           do_ref:
3448             ln = PL_regstartp[n];
3449             PL_reg_leftiter = PL_reg_maxiter;           /* Void cache */
3450             if (*PL_reglastparen < n || ln == -1)
3451                 sayNO;                  /* Do not match unless seen CLOSEn. */
3452             if (ln == PL_regendp[n])
3453                 break;
3454
3455             s = PL_bostr + ln;
3456             if (do_utf8 && type != REF) {       /* REF can do byte comparison */
3457                 char *l = locinput;
3458                 const char *e = PL_bostr + PL_regendp[n];
3459                 /*
3460                  * Note that we can't do the "other character" lookup trick as
3461                  * in the 8-bit case (no pun intended) because in Unicode we
3462                  * have to map both upper and title case to lower case.
3463                  */
3464                 if (type == REFF) {
3465                     while (s < e) {
3466                         STRLEN ulen1, ulen2;
3467                         U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3468                         U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
3469
3470                         if (l >= PL_regeol)
3471                             sayNO;
3472                         toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
3473                         toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
3474                         if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
3475                             sayNO;
3476                         s += ulen1;
3477                         l += ulen2;
3478                     }
3479                 }
3480                 locinput = l;
3481                 nextchr = UCHARAT(locinput);
3482                 break;
3483             }
3484
3485             /* Inline the first character, for speed. */
3486             if (UCHARAT(s) != nextchr &&
3487                 (type == REF ||
3488                  (UCHARAT(s) != (type == REFF
3489                                   ? PL_fold : PL_fold_locale)[nextchr])))
3490                 sayNO;
3491             ln = PL_regendp[n] - ln;
3492             if (locinput + ln > PL_regeol)
3493                 sayNO;
3494             if (ln > 1 && (type == REF
3495                            ? memNE(s, locinput, ln)
3496                            : (type == REFF
3497                               ? ibcmp(s, locinput, ln)
3498                               : ibcmp_locale(s, locinput, ln))))
3499                 sayNO;
3500             locinput += ln;
3501             nextchr = UCHARAT(locinput);
3502             break;
3503         }
3504         case NOTHING:
3505         case TAIL:
3506             break;
3507         case BACK:
3508             break;
3509
3510 #undef  ST
3511 #define ST st->u.eval
3512         {
3513             SV *ret;
3514             regexp *re;
3515             regexp_internal *rei;
3516             regnode *startpoint;
3517
3518         case GOSTART:
3519         case GOSUB: /*    /(...(?1))/      */
3520             if (cur_eval && cur_eval->locinput==locinput) {
3521                 if (cur_eval->u.eval.close_paren == (U32)ARG(scan))
3522                     Perl_croak(aTHX_ "Infinite recursion in regex");
3523                 if ( ++nochange_depth > MAX_RECURSE_EVAL_NOCHANGE_DEPTH )
3524                     Perl_croak(aTHX_
3525                         "Pattern subroutine nesting without pos change"
3526                         " exceeded limit in regex");
3527             } else {
3528                 nochange_depth = 0;
3529             }
3530             re = rex;
3531             rei = rexi;
3532             (void)ReREFCNT_inc(rex);
3533             if (OP(scan)==GOSUB) {
3534                 startpoint = scan + ARG2L(scan);
3535                 ST.close_paren = ARG(scan);
3536             } else {
3537                 startpoint = rei->program+1;
3538                 ST.close_paren = 0;
3539             }
3540             goto eval_recurse_doit;
3541             /* NOTREACHED */
3542         case EVAL:  /*   /(?{A})B/   /(??{A})B/  and /(?(?{A})X|Y)B/   */
3543             if (cur_eval && cur_eval->locinput==locinput) {
3544                 if ( ++nochange_depth > MAX_RECURSE_EVAL_NOCHANGE_DEPTH )
3545                     Perl_croak(aTHX_ "EVAL without pos change exceeded limit in regex");
3546             } else {
3547                 nochange_depth = 0;
3548             }
3549             {
3550                 /* execute the code in the {...} */
3551                 dSP;
3552                 SV ** const before = SP;
3553                 OP_4tree * const oop = PL_op;
3554                 COP * const ocurcop = PL_curcop;
3555                 PAD *old_comppad;
3556
3557                 n = ARG(scan);
3558                 PL_op = (OP_4tree*)rexi->data->data[n];
3559                 DEBUG_STATE_r( PerlIO_printf(Perl_debug_log,
3560                     "  re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
3561                 PAD_SAVE_LOCAL(old_comppad, (PAD*)rexi->data->data[n + 2]);
3562                 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
3563
3564                 if (sv_yes_mark) {
3565                     SV *sv_mrk = get_sv("REGMARK", 1);
3566                     sv_setsv(sv_mrk, sv_yes_mark);
3567                 }
3568
3569                 CALLRUNOPS(aTHX);                       /* Scalar context. */
3570                 SPAGAIN;
3571                 if (SP == before)
3572                     ret = &PL_sv_undef;   /* protect against empty (?{}) blocks. */
3573                 else {
3574                     ret = POPs;
3575                     PUTBACK;
3576                 }
3577
3578                 PL_op = oop;
3579                 PAD_RESTORE_LOCAL(old_comppad);
3580                 PL_curcop = ocurcop;
3581                 if (!logical) {
3582                     /* /(?{...})/ */
3583                     sv_setsv(save_scalar(PL_replgv), ret);
3584                     break;
3585                 }
3586             }
3587             if (logical == 2) { /* Postponed subexpression: /(??{...})/ */
3588                 logical = 0;
3589                 {
3590                     /* extract RE object from returned value; compiling if
3591                      * necessary */
3592
3593                     MAGIC *mg = NULL;
3594                     const SV *sv;
3595                     if(SvROK(ret) && SvSMAGICAL(sv = SvRV(ret)))
3596                         mg = mg_find(sv, PERL_MAGIC_qr);
3597                     else if (SvSMAGICAL(ret)) {
3598                         if (SvGMAGICAL(ret))
3599                             sv_unmagic(ret, PERL_MAGIC_qr);
3600                         else
3601                             mg = mg_find(ret, PERL_MAGIC_qr);
3602                     }
3603
3604                     if (mg) {
3605                         re = (regexp *)mg->mg_obj;
3606                         (void)ReREFCNT_inc(re);
3607                     }
3608                     else {
3609                         STRLEN len;
3610                         const char * const t = SvPV_const(ret, len);
3611                         PMOP pm;
3612                         const I32 osize = PL_regsize;
3613
3614                         Zero(&pm, 1, PMOP);
3615                         if (DO_UTF8(ret)) pm.op_pmdynflags |= PMdf_DYN_UTF8;
3616                         re = CALLREGCOMP((char*)t, (char*)t + len, &pm);
3617                         if (!(SvFLAGS(ret)
3618                               & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
3619                                 | SVs_GMG)))
3620                             sv_magic(ret,(SV*)ReREFCNT_inc(re),
3621                                         PERL_MAGIC_qr,0,0);
3622                         PL_regsize = osize;
3623                     }
3624                 }
3625                 rei = RXi_GET(re);
3626                 DEBUG_EXECUTE_r(
3627                     debug_start_match(re, do_utf8, locinput, PL_regeol,
3628                         "Matching embedded");
3629                 );
3630                 startpoint = rei->program + 1;
3631                 ST.close_paren = 0; /* only used for GOSUB */
3632                 /* borrowed from regtry */
3633                 if (PL_reg_start_tmpl <= re->nparens) {
3634                     PL_reg_start_tmpl = re->nparens*3/2 + 3;
3635                     if(PL_reg_start_tmp)
3636                         Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
3637                     else
3638                         Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
3639                 }
3640
3641         eval_recurse_doit: /* Share code with GOSUB below this line */
3642                 /* run the pattern returned from (??{...}) */
3643                 ST.cp = regcppush(0);   /* Save *all* the positions. */
3644                 REGCP_SET(ST.lastcp);
3645
3646                 PL_regstartp = re->startp; /* essentially NOOP on GOSUB */
3647                 PL_regendp = re->endp;     /* essentially NOOP on GOSUB */
3648
3649                 *PL_reglastparen = 0;
3650                 *PL_reglastcloseparen = 0;
3651                 PL_reginput = locinput;
3652                 PL_regsize = 0;
3653
3654                 /* XXXX This is too dramatic a measure... */
3655                 PL_reg_maxiter = 0;
3656
3657                 ST.toggle_reg_flags = PL_reg_flags;
3658                 if (re->extflags & RXf_UTF8)
3659                     PL_reg_flags |= RF_utf8;
3660                 else
3661                     PL_reg_flags &= ~RF_utf8;
3662                 ST.toggle_reg_flags ^= PL_reg_flags; /* diff of old and new */
3663
3664                 ST.prev_rex = rex;
3665                 ST.prev_curlyx = cur_curlyx;
3666                 rex = re;
3667                 rexi = rei;
3668                 cur_curlyx = NULL;
3669                 ST.B = next;
3670                 ST.prev_eval = cur_eval;
3671                 cur_eval = st;
3672                 /* now continue from first node in postoned RE */
3673                 PUSH_YES_STATE_GOTO(EVAL_AB, startpoint);
3674                 /* NOTREACHED */
3675             }
3676             /* logical is 1,   /(?(?{...})X|Y)/ */
3677             sw = (bool)SvTRUE(ret);
3678             logical = 0;
3679             break;
3680         }
3681
3682         case EVAL_AB: /* cleanup after a successful (??{A})B */
3683             /* note: this is called twice; first after popping B, then A */
3684             PL_reg_flags ^= ST.toggle_reg_flags;
3685             ReREFCNT_dec(rex);
3686             rex = ST.prev_rex;
3687             rexi = RXi_GET(rex);
3688             regcpblow(ST.cp);
3689             cur_eval = ST.prev_eval;
3690             cur_curlyx = ST.prev_curlyx;
3691             /* XXXX This is too dramatic a measure... */
3692             PL_reg_maxiter = 0;
3693             sayYES;
3694
3695
3696         case EVAL_AB_fail: /* unsuccessfully ran A or B in (??{A})B */
3697             /* note: this is called twice; first after popping B, then A */
3698             PL_reg_flags ^= ST.toggle_reg_flags;
3699             ReREFCNT_dec(rex);
3700             rex = ST.prev_rex;
3701             rexi = RXi_GET(rex);
3702             PL_reginput = locinput;
3703             REGCP_UNWIND(ST.lastcp);
3704             regcppop(rex);
3705             cur_eval = ST.prev_eval;
3706             cur_curlyx = ST.prev_curlyx;
3707             /* XXXX This is too dramatic a measure... */
3708             PL_reg_maxiter = 0;
3709             sayNO_SILENT;
3710 #undef ST
3711
3712         case OPEN:
3713             n = ARG(scan);  /* which paren pair */
3714             PL_reg_start_tmp[n] = locinput;
3715             if (n > PL_regsize)
3716                 PL_regsize = n;
3717             lastopen = n;
3718             break;
3719         case CLOSE:
3720             n = ARG(scan);  /* which paren pair */
3721             PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
3722             PL_regendp[n] = locinput - PL_bostr;
3723             /*if (n > PL_regsize)
3724                 PL_regsize = n;*/
3725             if (n > *PL_reglastparen)
3726                 *PL_reglastparen = n;
3727             *PL_reglastcloseparen = n;
3728             if (cur_eval && cur_eval->u.eval.close_paren == n) {
3729                 goto fake_end;
3730             }
3731             break;
3732         case ACCEPT:
3733             if (ARG(scan)){
3734                 regnode *cursor;
3735                 for (cursor=scan;
3736                      cursor && OP(cursor)!=END;
3737                      cursor=regnext(cursor))
3738                 {
3739                     if ( OP(cursor)==CLOSE ){
3740                         n = ARG(cursor);
3741                         if ( n <= lastopen ) {
3742                             PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
3743                             PL_regendp[n] = locinput - PL_bostr;
3744                             /*if (n > PL_regsize)
3745                             PL_regsize = n;*/
3746                             if (n > *PL_reglastparen)
3747                                 *PL_reglastparen = n;
3748                             *PL_reglastcloseparen = n;
3749                             if ( n == ARG(scan) || (cur_eval &&
3750                                 cur_eval->u.eval.close_paren == n))
3751                                 break;
3752                         }
3753                     }
3754                 }
3755             }
3756             goto fake_end;
3757             /*NOTREACHED*/
3758         case GROUPP:
3759             n = ARG(scan);  /* which paren pair */
3760             sw = (bool)(*PL_reglastparen >= n && PL_regendp[n] != -1);
3761             break;
3762         case NGROUPP:
3763             /* reg_check_named_buff_matched returns 0 for no match */
3764             sw = (bool)(0 < reg_check_named_buff_matched(rex,scan));
3765             break;
3766         case INSUBP:
3767             n = ARG(scan);
3768             sw = (cur_eval && (!n || cur_eval->u.eval.close_paren == n));
3769             break;
3770         case DEFINEP:
3771             sw = 0;
3772             break;
3773         case IFTHEN:
3774             PL_reg_leftiter = PL_reg_maxiter;           /* Void cache */
3775             if (sw)
3776                 next = NEXTOPER(NEXTOPER(scan));
3777             else {
3778                 next = scan + ARG(scan);
3779                 if (OP(next) == IFTHEN) /* Fake one. */
3780                     next = NEXTOPER(NEXTOPER(next));
3781             }
3782             break;
3783         case LOGICAL:
3784             logical = scan->flags;
3785             break;
3786
3787 /*******************************************************************
3788
3789 The CURLYX/WHILEM pair of ops handle the most generic case of the /A*B/
3790 pattern, where A and B are subpatterns. (For simple A, CURLYM or
3791 STAR/PLUS/CURLY/CURLYN are used instead.)
3792
3793 A*B is compiled as <CURLYX><A><WHILEM><B>
3794
3795 On entry to the subpattern, CURLYX is called. This pushes a CURLYX
3796 state, which contains the current count, initialised to -1. It also sets
3797 cur_curlyx to point to this state, with any previous value saved in the
3798 state block.
3799
3800 CURLYX then jumps straight to the WHILEM op, rather than executing A,
3801 since the pattern may possibly match zero times (i.e. it's a while {} loop
3802 rather than a do {} while loop).
3803
3804 Each entry to WHILEM represents a successful match of A. The count in the
3805 CURLYX block is incremented, another WHILEM state is pushed, and execution
3806 passes to A or B depending on greediness and the current count.
3807
3808 For example, if matching against the string a1a2a3b (where the aN are
3809 substrings that match /A/), then the match progresses as follows: (the
3810 pushed states are interspersed with the bits of strings matched so far):
3811
3812     <CURLYX cnt=-1>
3813     <CURLYX cnt=0><WHILEM>
3814     <CURLYX cnt=1><WHILEM> a1 <WHILEM>
3815     <CURLYX cnt=2><WHILEM> a1 <WHILEM> a2 <WHILEM>
3816     <CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM>
3817     <CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM> b
3818
3819 (Contrast this with something like CURLYM, which maintains only a single
3820 backtrack state:
3821
3822     <CURLYM cnt=0> a1
3823     a1 <CURLYM cnt=1> a2
3824     a1 a2 <CURLYM cnt=2> a3
3825     a1 a2 a3 <CURLYM cnt=3> b
3826 )
3827
3828 Each WHILEM state block marks a point to backtrack to upon partial failure
3829 of A or B, and also contains some minor state data related to that
3830 iteration.  The CURLYX block, pointed to by cur_curlyx, contains the
3831 overall state, such as the count, and pointers to the A and B ops.
3832
3833 This is complicated slightly by nested CURLYX/WHILEM's. Since cur_curlyx
3834 must always point to the *current* CURLYX block, the rules are:
3835
3836 When executing CURLYX, save the old cur_curlyx in the CURLYX state block,
3837 and set cur_curlyx to point the new block.
3838
3839 When popping the CURLYX block after a successful or unsuccessful match,
3840 restore the previous cur_curlyx.
3841
3842 When WHILEM is about to execute B, save the current cur_curlyx, and set it
3843 to the outer one saved in the CURLYX block.
3844
3845 When popping the WHILEM block after a successful or unsuccessful B match,
3846 restore the previous cur_curlyx.
3847
3848 Here's an example for the pattern (AI* BI)*BO
3849 I and O refer to inner and outer, C and W refer to CURLYX and WHILEM:
3850
3851 cur_
3852 curlyx backtrack stack
3853 ------ ---------------
3854 NULL
3855 CO     <CO prev=NULL> <WO>
3856 CI     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
3857 CO     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
3858 NULL   <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi <WO prev=CO> bo
3859
3860 At this point the pattern succeeds, and we work back down the stack to
3861 clean up, restoring as we go:
3862
3863 CO     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
3864 CI     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
3865 CO     <CO prev=NULL> <WO>
3866 NULL
3867
3868 *******************************************************************/
3869
3870 #define ST st->u.curlyx
3871
3872         case CURLYX:    /* start of /A*B/  (for complex A) */
3873         {
3874             /* No need to save/restore up to this paren */
3875             I32 parenfloor = scan->flags;
3876
3877             assert(next); /* keep Coverity happy */
3878             if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
3879                 next += ARG(next);
3880
3881             /* XXXX Probably it is better to teach regpush to support
3882                parenfloor > PL_regsize... */
3883             if (parenfloor > (I32)*PL_reglastparen)
3884                 parenfloor = *PL_reglastparen; /* Pessimization... */
3885
3886             ST.prev_curlyx= cur_curlyx;
3887             cur_curlyx = st;
3888             ST.cp = PL_savestack_ix;
3889
3890             /* these fields contain the state of the current curly.
3891              * they are accessed by subsequent WHILEMs */
3892             ST.parenfloor = parenfloor;
3893             ST.min = ARG1(scan);
3894             ST.max = ARG2(scan);
3895             ST.A = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
3896             ST.B = next;
3897             ST.minmod = minmod;
3898             minmod = 0;
3899             ST.count = -1;      /* this will be updated by WHILEM */
3900             ST.lastloc = NULL;  /* this will be updated by WHILEM */
3901
3902             PL_reginput = locinput;
3903             PUSH_YES_STATE_GOTO(CURLYX_end, PREVOPER(next));
3904             /* NOTREACHED */
3905         }
3906
3907         case CURLYX_end: /* just finished matching all of A*B */
3908             if (PL_reg_eval_set){
3909                 SV *pres= GvSV(PL_replgv);
3910                 SvREFCNT_inc(pres);
3911                 regcpblow(ST.cp);
3912                 sv_setsv(GvSV(PL_replgv), pres);
3913                 SvREFCNT_dec(pres);
3914             } else {
3915                 regcpblow(ST.cp);
3916             }
3917             cur_curlyx = ST.prev_curlyx;
3918             sayYES;
3919             /* NOTREACHED */
3920
3921         case CURLYX_end_fail: /* just failed to match all of A*B */
3922             regcpblow(ST.cp);
3923             cur_curlyx = ST.prev_curlyx;
3924             sayNO;
3925             /* NOTREACHED */
3926
3927
3928 #undef ST
3929 #define ST st->u.whilem
3930
3931         case WHILEM:     /* just matched an A in /A*B/  (for complex A) */
3932         {
3933             /* see the discussion above about CURLYX/WHILEM */
3934             I32 n;
3935             assert(cur_curlyx); /* keep Coverity happy */
3936             n = ++cur_curlyx->u.curlyx.count; /* how many A's matched */
3937             ST.save_lastloc = cur_curlyx->u.curlyx.lastloc;
3938             ST.cache_offset = 0;
3939             ST.cache_mask = 0;
3940
3941             PL_reginput = locinput;
3942
3943             DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
3944                   "%*s  whilem: matched %ld out of %ld..%ld\n",
3945                   REPORT_CODE_OFF+depth*2, "", (long)n,
3946                   (long)cur_curlyx->u.curlyx.min,
3947                   (long)cur_curlyx->u.curlyx.max)
3948             );
3949
3950             /* First just match a string of min A's. */
3951
3952             if (n < cur_curlyx->u.curlyx.min) {
3953                 cur_curlyx->u.curlyx.lastloc = locinput;
3954                 PUSH_STATE_GOTO(WHILEM_A_pre, cur_curlyx->u.curlyx.A);
3955                 /* NOTREACHED */
3956             }
3957
3958             /* If degenerate A matches "", assume A done. */
3959
3960             if (locinput == cur_curlyx->u.curlyx.lastloc) {
3961                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
3962                    "%*s  whilem: empty match detected, trying continuation...\n",
3963                    REPORT_CODE_OFF+depth*2, "")
3964                 );
3965                 goto do_whilem_B_max;
3966             }
3967
3968             /* super-linear cache processing */
3969
3970             if (scan->flags) {
3971
3972                 if (!PL_reg_maxiter) {
3973                     /* start the countdown: Postpone detection until we
3974                      * know the match is not *that* much linear. */
3975                     PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
3976                     /* possible overflow for long strings and many CURLYX's */
3977                     if (PL_reg_maxiter < 0)
3978                         PL_reg_maxiter = I32_MAX;
3979                     PL_reg_leftiter = PL_reg_maxiter;
3980                 }
3981
3982                 if (PL_reg_leftiter-- == 0) {
3983                     /* initialise cache */
3984                     const I32 size = (PL_reg_maxiter + 7)/8;
3985                     if (PL_reg_poscache) {
3986                         if ((I32)PL_reg_poscache_size < size) {
3987                             Renew(PL_reg_poscache, size, char);
3988                             PL_reg_poscache_size = size;
3989                         }
3990                         Zero(PL_reg_poscache, size, char);
3991                     }
3992                     else {
3993                         PL_reg_poscache_size = size;
3994                         Newxz(PL_reg_poscache, size, char);
3995                     }
3996                     DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
3997       "%swhilem: Detected a super-linear match, switching on caching%s...\n",
3998                               PL_colors[4], PL_colors[5])
3999                     );
4000                 }
4001
4002                 if (PL_reg_leftiter < 0) {
4003                     /* have we already failed at this position? */
4004                     I32 offset, mask;
4005                     offset  = (scan->flags & 0xf) - 1
4006                                 + (locinput - PL_bostr)  * (scan->flags>>4);
4007                     mask    = 1 << (offset % 8);
4008                     offset /= 8;
4009                     if (PL_reg_poscache[offset] & mask) {
4010                         DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4011                             "%*s  whilem: (cache) already tried at this position...\n",
4012                             REPORT_CODE_OFF+depth*2, "")
4013                         );
4014                         sayNO; /* cache records failure */
4015                     }
4016                     ST.cache_offset = offset;
4017                     ST.cache_mask   = mask;
4018                 }
4019             }
4020
4021             /* Prefer B over A for minimal matching. */
4022
4023             if (cur_curlyx->u.curlyx.minmod) {
4024                 ST.save_curlyx = cur_curlyx;
4025                 cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
4026                 ST.cp = regcppush(ST.save_curlyx->u.curlyx.parenfloor);
4027                 REGCP_SET(ST.lastcp);
4028                 PUSH_YES_STATE_GOTO(WHILEM_B_min, ST.save_curlyx->u.curlyx.B);
4029                 /* NOTREACHED */
4030             }
4031
4032             /* Prefer A over B for maximal matching. */
4033
4034             if (n < cur_curlyx->u.curlyx.max) { /* More greed allowed? */
4035                 ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
4036                 cur_curlyx->u.curlyx.lastloc = locinput;
4037                 REGCP_SET(ST.lastcp);
4038                 PUSH_STATE_GOTO(WHILEM_A_max, cur_curlyx->u.curlyx.A);
4039                 /* NOTREACHED */
4040             }
4041             goto do_whilem_B_max;
4042         }
4043         /* NOTREACHED */
4044
4045         case WHILEM_B_min: /* just matched B in a minimal match */
4046         case WHILEM_B_max: /* just matched B in a maximal match */
4047             cur_curlyx = ST.save_curlyx;
4048             sayYES;
4049             /* NOTREACHED */
4050
4051         case WHILEM_B_max_fail: /* just failed to match B in a maximal match */
4052             cur_curlyx = ST.save_curlyx;
4053             cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
4054             cur_curlyx->u.curlyx.count--;
4055             CACHEsayNO;
4056             /* NOTREACHED */
4057
4058         case WHILEM_A_min_fail: /* just failed to match A in a minimal match */
4059             REGCP_UNWIND(ST.lastcp);
4060             regcppop(rex);
4061             /* FALL THROUGH */
4062         case WHILEM_A_pre_fail: /* just failed to match even minimal A */
4063             cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
4064             cur_curlyx->u.curlyx.count--;
4065             CACHEsayNO;
4066             /* NOTREACHED */
4067
4068         case WHILEM_A_max_fail: /* just failed to match A in a maximal match */
4069             REGCP_UNWIND(ST.lastcp);
4070             regcppop(rex);      /* Restore some previous $<digit>s? */
4071             PL_reginput = locinput;
4072             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4073                 "%*s  whilem: failed, trying continuation...\n",
4074                 REPORT_CODE_OFF+depth*2, "")
4075             );
4076           do_whilem_B_max:
4077             if (cur_curlyx->u.curlyx.count >= REG_INFTY
4078                 && ckWARN(WARN_REGEXP)
4079                 && !(PL_reg_flags & RF_warned))
4080             {
4081                 PL_reg_flags |= RF_warned;
4082                 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
4083                      "Complex regular subexpression recursion",
4084                      REG_INFTY - 1);
4085             }
4086
4087             /* now try B */
4088             ST.save_curlyx = cur_curlyx;
4089             cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
4090             PUSH_YES_STATE_GOTO(WHILEM_B_max, ST.save_curlyx->u.curlyx.B);
4091             /* NOTREACHED */
4092
4093         case WHILEM_B_min_fail: /* just failed to match B in a minimal match */
4094             cur_curlyx = ST.save_curlyx;
4095             REGCP_UNWIND(ST.lastcp);
4096             regcppop(rex);
4097
4098             if (cur_curlyx->u.curlyx.count >= cur_curlyx->u.curlyx.max) {
4099                 /* Maximum greed exceeded */
4100                 if (cur_curlyx->u.curlyx.count >= REG_INFTY
4101                     && ckWARN(WARN_REGEXP)
4102                     && !(PL_reg_flags & RF_warned))
4103                 {
4104                     PL_reg_flags |= RF_warned;
4105                     Perl_warner(aTHX_ packWARN(WARN_REGEXP),
4106                         "%s limit (%d) exceeded",
4107                         "Complex regular subexpression recursion",
4108                         REG_INFTY - 1);
4109                 }
4110                 cur_curlyx->u.curlyx.count--;
4111                 CACHEsayNO;
4112             }
4113
4114             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4115                 "%*s  trying longer...\n", REPORT_CODE_OFF+depth*2, "")
4116             );
4117             /* Try grabbing another A and see if it helps. */
4118             PL_reginput = locinput;
4119             cur_curlyx->u.curlyx.lastloc = locinput;
4120             ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
4121             REGCP_SET(ST.lastcp);
4122             PUSH_STATE_GOTO(WHILEM_A_min, ST.save_curlyx->u.curlyx.A);
4123             /* NOTREACHED */
4124
4125 #undef  ST
4126 #define ST st->u.branch
4127
4128         case BRANCHJ:       /*  /(...|A|...)/ with long next pointer */
4129             next = scan + ARG(scan);
4130             if (next == scan)
4131                 next = NULL;
4132             scan = NEXTOPER(scan);
4133             /* FALL THROUGH */
4134
4135         case BRANCH:        /*  /(...|A|...)/ */
4136             scan = NEXTOPER(scan); /* scan now points to inner node */
4137             if ((!next || (OP(next) != BRANCH && OP(next) != BRANCHJ))
4138                 && !has_cutgroup)
4139             {
4140                 /* last branch; skip state push and jump direct to node */
4141                 continue;
4142             }
4143             ST.lastparen = *PL_reglastparen;
4144             ST.next_branch = next;
4145             REGCP_SET(ST.cp);
4146             PL_reginput = locinput;
4147
4148             /* Now go into the branch */
4149             if (has_cutgroup) {
4150                 PUSH_YES_STATE_GOTO(BRANCH_next, scan);
4151             } else {
4152                 PUSH_STATE_GOTO(BRANCH_next, scan);
4153             }
4154             /* NOTREACHED */
4155         case CUTGROUP:
4156             PL_reginput = locinput;
4157             sv_yes_mark = st->u.mark.mark_name = scan->flags ? NULL :
4158                 (SV*)rexi->data->data[ ARG( scan ) ];
4159             PUSH_STATE_GOTO(CUTGROUP_next,next);
4160             /* NOTREACHED */
4161         case CUTGROUP_next_fail:
4162             do_cutgroup = 1;
4163             no_final = 1;
4164             if (st->u.mark.mark_name)
4165                 sv_commit = st->u.mark.mark_name;
4166             sayNO;
4167             /* NOTREACHED */
4168         case BRANCH_next:
4169             sayYES;
4170             /* NOTREACHED */
4171         case BRANCH_next_fail: /* that branch failed; try the next, if any */
4172             if (do_cutgroup) {
4173                 do_cutgroup = 0;
4174                 no_final = 0;
4175             }
4176             REGCP_UNWIND(ST.cp);
4177             for (n = *PL_reglastparen; n > ST.lastparen; n--)
4178                 PL_regendp[n] = -1;
4179             *PL_reglastparen = n;
4180             /*dmq: *PL_reglastcloseparen = n; */
4181             scan = ST.next_branch;
4182             /* no more branches? */
4183             if (!scan || (OP(scan) != BRANCH && OP(scan) != BRANCHJ)) {
4184                 DEBUG_EXECUTE_r({
4185                     PerlIO_printf( Perl_debug_log,
4186                         "%*s  %sBRANCH failed...%s\n",
4187                         REPORT_CODE_OFF+depth*2, "",
4188                         PL_colors[4],
4189                         PL_colors[5] );
4190                 });
4191                 sayNO_SILENT;
4192             }
4193             continue; /* execute next BRANCH[J] op */
4194             /* NOTREACHED */
4195
4196         case MINMOD:
4197             minmod = 1;
4198             break;
4199
4200 #undef  ST
4201 #define ST st->u.curlym
4202
4203         case CURLYM:    /* /A{m,n}B/ where A is fixed-length */
4204
4205             /* This is an optimisation of CURLYX that enables us to push
4206              * only a single backtracking state, no matter now many matches
4207              * there are in {m,n}. It relies on the pattern being constant
4208              * length, with no parens to influence future backrefs
4209              */
4210
4211             ST.me = scan;
4212             scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
4213
4214             /* if paren positive, emulate an OPEN/CLOSE around A */
4215             if (ST.me->flags) {
4216                 U32 paren = ST.me->flags;
4217                 if (paren > PL_regsize)
4218                     PL_regsize = paren;
4219                 if (paren > *PL_reglastparen)
4220                     *PL_reglastparen = paren;
4221                 scan += NEXT_OFF(scan); /* Skip former OPEN. */
4222             }
4223             ST.A = scan;
4224             ST.B = next;
4225             ST.alen = 0;
4226             ST.count = 0;
4227             ST.minmod = minmod;
4228             minmod = 0;
4229             ST.c1 = CHRTEST_UNINIT;
4230             REGCP_SET(ST.cp);
4231
4232             if (!(ST.minmod ? ARG1(ST.me) : ARG2(ST.me))) /* min/max */
4233                 goto curlym_do_B;
4234
4235           curlym_do_A: /* execute the A in /A{m,n}B/  */
4236             PL_reginput = locinput;
4237             PUSH_YES_STATE_GOTO(CURLYM_A, ST.A); /* match A */
4238             /* NOTREACHED */
4239
4240         case CURLYM_A: /* we've just matched an A */
4241             locinput = st->locinput;
4242             nextchr = UCHARAT(locinput);
4243
4244             ST.count++;
4245             /* after first match, determine A's length: u.curlym.alen */
4246             if (ST.count == 1) {
4247                 if (PL_reg_match_utf8) {
4248                     char *s = locinput;
4249                     while (s < PL_reginput) {
4250                         ST.alen++;
4251                         s += UTF8SKIP(s);
4252                     }
4253                 }
4254                 else {
4255                     ST.alen = PL_reginput - locinput;
4256                 }
4257                 if (ST.alen == 0)
4258                     ST.count = ST.minmod ? ARG1(ST.me) : ARG2(ST.me);
4259             }
4260             DEBUG_EXECUTE_r(
4261                 PerlIO_printf(Perl_debug_log,
4262                           "%*s  CURLYM now matched %"IVdf" times, len=%"IVdf"...\n",
4263                           (int)(REPORT_CODE_OFF+(depth*2)), "",
4264                           (IV) ST.count, (IV)ST.alen)
4265             );
4266
4267             locinput = PL_reginput;
4268
4269             if (cur_eval && cur_eval->u.eval.close_paren &&
4270                 cur_eval->u.eval.close_paren == (U32)ST.me->flags)
4271                 goto fake_end;
4272
4273             if ( ST.count < (ST.minmod ? ARG1(ST.me) : ARG2(ST.me)) )
4274                 goto curlym_do_A; /* try to match another A */
4275             goto curlym_do_B; /* try to match B */
4276
4277         case CURLYM_A_fail: /* just failed to match an A */
4278             REGCP_UNWIND(ST.cp);
4279
4280             if (ST.minmod || ST.count < ARG1(ST.me) /* min*/
4281                 || (cur_eval && cur_eval->u.eval.close_paren &&
4282                     cur_eval->u.eval.close_paren == (U32)ST.me->flags))
4283                 sayNO;
4284
4285           curlym_do_B: /* execute the B in /A{m,n}B/  */
4286             PL_reginput = locinput;
4287             if (ST.c1 == CHRTEST_UNINIT) {
4288                 /* calculate c1 and c2 for possible match of 1st char
4289                  * following curly */
4290                 ST.c1 = ST.c2 = CHRTEST_VOID;
4291                 if (HAS_TEXT(ST.B) || JUMPABLE(ST.B)) {
4292                     regnode *text_node = ST.B;
4293                     if (! HAS_TEXT(text_node))
4294                         FIND_NEXT_IMPT(text_node);
4295                     if (HAS_TEXT(text_node)
4296                         && PL_regkind[OP(text_node)] != REF)
4297                     {
4298                         ST.c1 = (U8)*STRING(text_node);
4299                         ST.c2 =
4300                             (OP(text_node) == EXACTF || OP(text_node) == REFF)
4301                             ? PL_fold[ST.c1]
4302                             : (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
4303                                 ? PL_fold_locale[ST.c1]
4304                                 : ST.c1;
4305                     }
4306                 }
4307             }
4308
4309             DEBUG_EXECUTE_r(
4310                 PerlIO_printf(Perl_debug_log,
4311                     "%*s  CURLYM trying tail with matches=%"IVdf"...\n",
4312                     (int)(REPORT_CODE_OFF+(depth*2)),
4313                     "", (IV)ST.count)
4314                 );
4315             if (ST.c1 != CHRTEST_VOID
4316                     && UCHARAT(PL_reginput) != ST.c1
4317                     && UCHARAT(PL_reginput) != ST.c2)
4318             {
4319                 /* simulate B failing */
4320                 state_num = CURLYM_B_fail;
4321                 goto reenter_switch;
4322             }
4323
4324             if (ST.me->flags) {
4325                 /* mark current A as captured */
4326                 I32 paren = ST.me->flags;
4327                 if (ST.count) {
4328                     PL_regstartp[paren]
4329                         = HOPc(PL_reginput, -ST.alen) - PL_bostr;
4330                     PL_regendp[paren] = PL_reginput - PL_bostr;
4331                     /*dmq: *PL_reglastcloseparen = paren; */
4332                 }
4333                 else
4334                     PL_regendp[paren] = -1;
4335                 if (cur_eval && cur_eval->u.eval.close_paren &&
4336                     cur_eval->u.eval.close_paren == (U32)ST.me->flags)
4337                 {
4338                     if (ST.count)
4339                         goto fake_end;
4340                     else
4341                         sayNO;
4342                 }
4343             }
4344
4345             PUSH_STATE_GOTO(CURLYM_B, ST.B); /* match B */
4346             /* NOTREACHED */
4347
4348         case CURLYM_B_fail: /* just failed to match a B */
4349             REGCP_UNWIND(ST.cp);
4350             if (ST.minmod) {
4351                 if (ST.count == ARG2(ST.me) /* max */)
4352                     sayNO;
4353                 goto curlym_do_A; /* try to match a further A */
4354             }
4355             /* backtrack one A */
4356             if (ST.count == ARG1(ST.me) /* min */)
4357                 sayNO;
4358             ST.count--;
4359             locinput = HOPc(locinput, -ST.alen);
4360             goto curlym_do_B; /* try to match B */
4361
4362 #undef ST
4363 #define ST st->u.curly
4364
4365 #define CURLY_SETPAREN(paren, success) \
4366     if (paren) { \
4367         if (success) { \
4368             PL_regstartp[paren] = HOPc(locinput, -1) - PL_bostr; \
4369             PL_regendp[paren] = locinput - PL_bostr; \
4370             *PL_reglastcloseparen = paren; \
4371         } \
4372         else \
4373             PL_regendp[paren] = -1; \
4374     }
4375
4376         case STAR:              /*  /A*B/ where A is width 1 */
4377             ST.paren = 0;
4378             ST.min = 0;
4379             ST.max = REG_INFTY;
4380             scan = NEXTOPER(scan);
4381             goto repeat;
4382         case PLUS:              /*  /A+B/ where A is width 1 */
4383             ST.paren = 0;
4384             ST.min = 1;
4385             ST.max = REG_INFTY;
4386             scan = NEXTOPER(scan);
4387             goto repeat;
4388         case CURLYN:            /*  /(A){m,n}B/ where A is width 1 */
4389             ST.paren = scan->flags;     /* Which paren to set */
4390             if (ST.paren > PL_regsize)
4391                 PL_regsize = ST.paren;
4392             if (ST.paren > *PL_reglastparen)
4393                 *PL_reglastparen = ST.paren;
4394             ST.min = ARG1(scan);  /* min to match */
4395             ST.max = ARG2(scan);  /* max to match */
4396             if (cur_eval && cur_eval->u.eval.close_paren &&
4397                 cur_eval->u.eval.close_paren == (U32)ST.paren) {
4398                 ST.min=1;
4399                 ST.max=1;
4400             }
4401             scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
4402             goto repeat;
4403         case CURLY:             /*  /A{m,n}B/ where A is width 1 */
4404             ST.paren = 0;
4405             ST.min = ARG1(scan);  /* min to match */
4406             ST.max = ARG2(scan);  /* max to match */
4407             scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
4408           repeat:
4409             /*
4410             * Lookahead to avoid useless match attempts
4411             * when we know what character comes next.
4412             *
4413             * Used to only do .*x and .*?x, but now it allows
4414             * for )'s, ('s and (?{ ... })'s to be in the way
4415             * of the quantifier and the EXACT-like node.  -- japhy
4416             */
4417
4418             if (ST.min > ST.max) /* XXX make this a compile-time check? */
4419                 sayNO;
4420             if (HAS_TEXT(next) || JUMPABLE(next)) {
4421                 U8 *s;
4422                 regnode *text_node = next;
4423
4424                 if (! HAS_TEXT(text_node))
4425                     FIND_NEXT_IMPT(text_node);
4426
4427                 if (! HAS_TEXT(text_node))
4428                     ST.c1 = ST.c2 = CHRTEST_VOID;
4429                 else {
4430                     if (PL_regkind[OP(text_node)] == REF) {
4431                         ST.c1 = ST.c2 = CHRTEST_VOID;
4432                         goto assume_ok_easy;
4433                     }
4434                     else
4435                         s = (U8*)STRING(text_node);
4436
4437                     if (!UTF) {
4438                         ST.c2 = ST.c1 = *s;
4439                         if (OP(text_node) == EXACTF || OP(text_node) == REFF)
4440                             ST.c2 = PL_fold[ST.c1];
4441                         else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
4442                             ST.c2 = PL_fold_locale[ST.c1];
4443                     }
4444                     else { /* UTF */
4445                         if (OP(text_node) == EXACTF || OP(text_node) == REFF) {
4446                              STRLEN ulen1, ulen2;
4447                              U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
4448                              U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
4449
4450                              to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
4451                              to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
4452 #ifdef EBCDIC
4453                              ST.c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXLEN, 0,
4454                                                     ckWARN(WARN_UTF8) ?
4455                                                     0 : UTF8_ALLOW_ANY);
4456                              ST.c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXLEN, 0,
4457                                                     ckWARN(WARN_UTF8) ?
4458                                                     0 : UTF8_ALLOW_ANY);
4459 #else
4460                              ST.c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
4461                                                     uniflags);
4462                              ST.c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
4463                                                     uniflags);
4464 #endif
4465                         }
4466                         else {
4467                             ST.c2 = ST.c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
4468                                                      uniflags);
4469                         }
4470                     }
4471                 }
4472             }
4473             else
4474                 ST.c1 = ST.c2 = CHRTEST_VOID;
4475         assume_ok_easy:
4476
4477             ST.A = scan;
4478             ST.B = next;
4479             PL_reginput = locinput;
4480             if (minmod) {
4481                 minmod = 0;
4482                 if (ST.min && regrepeat(rex, ST.A, ST.min, depth) < ST.min)
4483                     sayNO;
4484                 ST.count = ST.min;
4485                 locinput = PL_reginput;
4486                 REGCP_SET(ST.cp);
4487                 if (ST.c1 == CHRTEST_VOID)
4488                     goto curly_try_B_min;
4489
4490                 ST.oldloc = locinput;
4491
4492                 /* set ST.maxpos to the furthest point along the
4493                  * string that could possibly match */
4494                 if  (ST.max == REG_INFTY) {
4495                     ST.maxpos = PL_regeol - 1;
4496                     if (do_utf8)
4497                         while (UTF8_IS_CONTINUATION(*(U8*)ST.maxpos))
4498                             ST.maxpos--;
4499                 }
4500                 else if (do_utf8) {
4501                     int m = ST.max - ST.min;
4502                     for (ST.maxpos = locinput;
4503                          m >0 && ST.maxpos + UTF8SKIP(ST.maxpos) <= PL_regeol; m--)
4504                         ST.maxpos += UTF8SKIP(ST.maxpos);
4505                 }
4506                 else {
4507                     ST.maxpos = locinput + ST.max - ST.min;
4508                     if (ST.maxpos >= PL_regeol)
4509                         ST.maxpos = PL_regeol - 1;
4510                 }
4511                 goto curly_try_B_min_known;
4512
4513             }
4514             else {
4515                 ST.count = regrepeat(rex, ST.A, ST.max, depth);
4516                 locinput = PL_reginput;
4517                 if (ST.count < ST.min)
4518                     sayNO;
4519                 if ((ST.count > ST.min)
4520                     && (PL_regkind[OP(ST.B)] == EOL) && (OP(ST.B) != MEOL))
4521                 {
4522                     /* A{m,n} must come at the end of the string, there's
4523                      * no point in backing off ... */
4524                     ST.min = ST.count;
4525                     /* ...except that $ and \Z can match before *and* after
4526                        newline at the end.  Consider "\n\n" =~ /\n+\Z\n/.
4527                        We may back off by one in this case. */
4528                     if (UCHARAT(PL_reginput - 1) == '\n' && OP(ST.B) != EOS)
4529                         ST.min--;
4530                 }
4531                 REGCP_SET(ST.cp);
4532                 goto curly_try_B_max;
4533             }
4534             /* NOTREACHED */
4535
4536
4537         case CURLY_B_min_known_fail:
4538             /* failed to find B in a non-greedy match where c1,c2 valid */
4539             if (ST.paren && ST.count)
4540                 PL_regendp[ST.paren] = -1;
4541
4542             PL_reginput = locinput;     /* Could be reset... */
4543             REGCP_UNWIND(ST.cp);
4544             /* Couldn't or didn't -- move forward. */
4545             ST.oldloc = locinput;
4546             if (do_utf8)
4547                 locinput += UTF8SKIP(locinput);
4548             else
4549                 locinput++;
4550             ST.count++;
4551           curly_try_B_min_known:
4552              /* find the next place where 'B' could work, then call B */
4553             {
4554                 int n;
4555                 if (do_utf8) {
4556                     n = (ST.oldloc == locinput) ? 0 : 1;
4557                     if (ST.c1 == ST.c2) {
4558                         STRLEN len;
4559                         /* set n to utf8_distance(oldloc, locinput) */
4560                         while (locinput <= ST.maxpos &&
4561                                utf8n_to_uvchr((U8*)locinput,
4562                                               UTF8_MAXBYTES, &len,
4563                                               uniflags) != (UV)ST.c1) {
4564                             locinput += len;
4565                             n++;
4566                         }
4567                     }
4568                     else {
4569                         /* set n to utf8_distance(oldloc, locinput) */
4570                         while (locinput <= ST.maxpos) {
4571                             STRLEN len;
4572                             const UV c = utf8n_to_uvchr((U8*)locinput,
4573                                                   UTF8_MAXBYTES, &len,
4574                                                   uniflags);
4575                             if (c == (UV)ST.c1 || c == (UV)ST.c2)
4576                                 break;
4577                             locinput += len;
4578                             n++;
4579                         }
4580                     }
4581                 }
4582                 else {
4583                     if (ST.c1 == ST.c2) {
4584                         while (locinput <= ST.maxpos &&
4585                                UCHARAT(locinput) != ST.c1)
4586                             locinput++;
4587                     }
4588                     else {
4589                         while (locinput <= ST.maxpos
4590                                && UCHARAT(locinput) != ST.c1
4591                                && UCHARAT(locinput) != ST.c2)
4592                             locinput++;
4593                     }
4594                     n = locinput - ST.oldloc;
4595                 }
4596                 if (locinput > ST.maxpos)
4597                     sayNO;
4598                 /* PL_reginput == oldloc now */
4599                 if (n) {
4600                     ST.count += n;
4601                     if (regrepeat(rex, ST.A, n, depth) < n)
4602                         sayNO;
4603                 }
4604                 PL_reginput = locinput;
4605                 CURLY_SETPAREN(ST.paren, ST.count);
4606                 if (cur_eval && cur_eval->u.eval.close_paren &&
4607                     cur_eval->u.eval.close_paren == (U32)ST.paren) {
4608                     goto fake_end;
4609                 }
4610                 PUSH_STATE_GOTO(CURLY_B_min_known, ST.B);
4611             }
4612             /* NOTREACHED */
4613
4614
4615         case CURLY_B_min_fail:
4616             /* failed to find B in a non-greedy match where c1,c2 invalid */
4617             if (ST.paren && ST.count)
4618                 PL_regendp[ST.paren] = -1;
4619
4620             REGCP_UNWIND(ST.cp);
4621             /* failed -- move forward one */
4622             PL_reginput = locinput;
4623             if (regrepeat(rex, ST.A, 1, depth)) {
4624                 ST.count++;
4625                 locinput = PL_reginput;
4626                 if (ST.count <= ST.max || (ST.max == REG_INFTY &&
4627                         ST.count > 0)) /* count overflow ? */
4628                 {
4629                   curly_try_B_min:
4630                     CURLY_SETPAREN(ST.paren, ST.count);
4631                     if (cur_eval && cur_eval->u.eval.close_paren &&
4632                         cur_eval->u.eval.close_paren == (U32)ST.paren) {
4633                         goto fake_end;
4634                     }
4635                     PUSH_STATE_GOTO(CURLY_B_min, ST.B);
4636                 }
4637             }
4638             sayNO;
4639             /* NOTREACHED */
4640
4641
4642         curly_try_B_max:
4643             /* a successful greedy match: now try to match B */
4644             if (cur_eval && cur_eval->u.eval.close_paren &&
4645                 cur_eval->u.eval.close_paren == (U32)ST.paren) {
4646                 goto fake_end;
4647             }
4648             {
4649                 UV c = 0;
4650                 if (ST.c1 != CHRTEST_VOID)
4651                     c = do_utf8 ? utf8n_to_uvchr((U8*)PL_reginput,
4652                                            UTF8_MAXBYTES, 0, uniflags)
4653                                 : (UV) UCHARAT(PL_reginput);
4654                 /* If it could work, try it. */
4655                 if (ST.c1 == CHRTEST_VOID || c == (UV)ST.c1 || c == (UV)ST.c2) {
4656                     CURLY_SETPAREN(ST.paren, ST.count);
4657                     PUSH_STATE_GOTO(CURLY_B_max, ST.B);
4658                     /* NOTREACHED */
4659                 }
4660             }
4661             /* FALL THROUGH */
4662         case CURLY_B_max_fail:
4663             /* failed to find B in a greedy match */
4664             if (ST.paren && ST.count)
4665                 PL_regendp[ST.paren] = -1;
4666
4667             REGCP_UNWIND(ST.cp);
4668             /*  back up. */
4669             if (--ST.count < ST.min)
4670                 sayNO;
4671             PL_reginput = locinput = HOPc(locinput, -1);
4672             goto curly_try_B_max;
4673
4674 #undef ST
4675
4676         case END:
4677             fake_end:
4678             if (cur_eval) {
4679                 /* we've just finished A in /(??{A})B/; now continue with B */
4680                 I32 tmpix;
4681
4682
4683                 st->u.eval.toggle_reg_flags
4684                             = cur_eval->u.eval.toggle_reg_flags;
4685                 PL_reg_flags ^= st->u.eval.toggle_reg_flags;
4686
4687                 st->u.eval.prev_rex = rex;              /* inner */
4688                 rex  = cur_eval->u.eval.prev_rex;       /* outer */
4689                 rexi = RXi_GET(rex);
4690                 cur_curlyx = cur_eval->u.eval.prev_curlyx;
4691                 ReREFCNT_inc(rex);
4692                 st->u.eval.cp = regcppush(0);   /* Save *all* the positions. */
4693                 REGCP_SET(st->u.eval.lastcp);
4694                 PL_reginput = locinput;
4695
4696                 /* Restore parens of the outer rex without popping the
4697                  * savestack */
4698                 tmpix = PL_savestack_ix;
4699                 PL_savestack_ix = cur_eval->u.eval.lastcp;
4700                 regcppop(rex);
4701                 PL_savestack_ix = tmpix;
4702
4703                 st->u.eval.prev_eval = cur_eval;
4704                 cur_eval = cur_eval->u.eval.prev_eval;
4705                 DEBUG_EXECUTE_r(
4706                     PerlIO_printf(Perl_debug_log, "%*s  EVAL trying tail ... %"UVxf"\n",
4707                                       REPORT_CODE_OFF+depth*2, "",PTR2UV(cur_eval)););
4708                 PUSH_YES_STATE_GOTO(EVAL_AB,
4709                         st->u.eval.prev_eval->u.eval.B); /* match B */
4710             }
4711
4712             if (locinput < reginfo->till) {
4713                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4714                                       "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
4715                                       PL_colors[4],
4716                                       (long)(locinput - PL_reg_starttry),
4717                                       (long)(reginfo->till - PL_reg_starttry),
4718                                       PL_colors[5]));
4719
4720                 sayNO_SILENT;           /* Cannot match: too short. */
4721             }
4722             PL_reginput = locinput;     /* put where regtry can find it */
4723             sayYES;                     /* Success! */
4724
4725         case SUCCEED: /* successful SUSPEND/UNLESSM/IFMATCH/CURLYM */
4726             DEBUG_EXECUTE_r(
4727             PerlIO_printf(Perl_debug_log,
4728                 "%*s  %ssubpattern success...%s\n",
4729                 REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5]));
4730             PL_reginput = locinput;     /* put where regtry can find it */
4731             sayYES;                     /* Success! */
4732
4733 #undef  ST
4734 #define ST st->u.ifmatch
4735
4736         case SUSPEND:   /* (?>A) */
4737             ST.wanted = 1;
4738             PL_reginput = locinput;
4739             goto do_ifmatch;
4740
4741         case UNLESSM:   /* -ve lookaround: (?!A), or with flags, (?<!A) */
4742             ST.wanted = 0;
4743             goto ifmatch_trivial_fail_test;
4744
4745         case IFMATCH:   /* +ve lookaround: (?=A), or with flags, (?<=A) */
4746             ST.wanted = 1;
4747           ifmatch_trivial_fail_test:
4748             if (scan->flags) {
4749                 char * const s = HOPBACKc(locinput, scan->flags);
4750                 if (!s) {
4751                     /* trivial fail */
4752                     if (logical) {
4753                         logical = 0;
4754                         sw = 1 - (bool)ST.wanted;
4755                     }
4756                     else if (ST.wanted)
4757                         sayNO;
4758                     next = scan + ARG(scan);
4759                     if (next == scan)
4760                         next = NULL;
4761                     break;
4762                 }
4763                 PL_reginput = s;
4764             }
4765             else
4766                 PL_reginput = locinput;
4767
4768           do_ifmatch:
4769             ST.me = scan;
4770             ST.logical = logical;
4771             /* execute body of (?...A) */
4772             PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)));
4773             /* NOTREACHED */
4774
4775         case IFMATCH_A_fail: /* body of (?...A) failed */
4776             ST.wanted = !ST.wanted;
4777             /* FALL THROUGH */
4778
4779         case IFMATCH_A: /* body of (?...A) succeeded */
4780             if (ST.logical) {
4781                 sw = (bool)ST.wanted;
4782             }
4783             else if (!ST.wanted)
4784                 sayNO;
4785
4786             if (OP(ST.me) == SUSPEND)
4787                 locinput = PL_reginput;
4788             else {
4789                 locinput = PL_reginput = st->locinput;
4790                 nextchr = UCHARAT(locinput);
4791             }
4792             scan = ST.me + ARG(ST.me);
4793             if (scan == ST.me)
4794                 scan = NULL;
4795             continue; /* execute B */
4796
4797 #undef ST
4798
4799         case LONGJMP:
4800             next = scan + ARG(scan);
4801             if (next == scan)
4802                 next = NULL;
4803             break;
4804         case COMMIT:
4805             reginfo->cutpoint = PL_regeol;
4806             /* FALLTHROUGH */
4807         case PRUNE:
4808             PL_reginput = locinput;
4809             if (!scan->flags)
4810                 sv_yes_mark = sv_commit = (SV*)rexi->data->data[ ARG( scan ) ];
4811             PUSH_STATE_GOTO(COMMIT_next,next);
4812             /* NOTREACHED */
4813         case COMMIT_next_fail:
4814             no_final = 1;
4815             /* FALLTHROUGH */
4816         case OPFAIL:
4817             sayNO;
4818             /* NOTREACHED */
4819
4820 #define ST st->u.mark
4821         case MARKPOINT:
4822             ST.prev_mark = mark_state;
4823             ST.mark_name = sv_commit = sv_yes_mark
4824                 = (SV*)rexi->data->data[ ARG( scan ) ];
4825             mark_state = st;
4826             ST.mark_loc = PL_reginput = locinput;
4827             PUSH_YES_STATE_GOTO(MARKPOINT_next,next);
4828             /* NOTREACHED */
4829         case MARKPOINT_next:
4830             mark_state = ST.prev_mark;
4831             sayYES;
4832             /* NOTREACHED */
4833         case MARKPOINT_next_fail:
4834             if (popmark && sv_eq(ST.mark_name,popmark))
4835             {
4836                 if (ST.mark_loc > startpoint)
4837                     reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
4838                 popmark = NULL; /* we found our mark */
4839                 sv_commit = ST.mark_name;
4840
4841                 DEBUG_EXECUTE_r({
4842                         PerlIO_printf(Perl_debug_log,
4843                             "%*s  %ssetting cutpoint to mark:%"SVf"...%s\n",
4844                             REPORT_CODE_OFF+depth*2, "",
4845                             PL_colors[4], SVfARG(sv_commit), PL_colors[5]);
4846                 });
4847             }
4848             mark_state = ST.prev_mark;
4849             sv_yes_mark = mark_state ?
4850                 mark_state->u.mark.mark_name : NULL;
4851             sayNO;
4852             /* NOTREACHED */
4853         case SKIP:
4854             PL_reginput = locinput;
4855             if (scan->flags) {
4856                 /* (*SKIP) : if we fail we cut here*/
4857                 ST.mark_name = NULL;
4858                 ST.mark_loc = locinput;
4859                 PUSH_STATE_GOTO(SKIP_next,next);
4860             } else {
4861                 /* (*SKIP:NAME) : if there is a (*MARK:NAME) fail where it was,
4862                    otherwise do nothing.  Meaning we need to scan
4863                  */
4864                 regmatch_state *cur = mark_state;
4865                 SV *find = (SV*)rexi->data->data[ ARG( scan ) ];
4866
4867                 while (cur) {
4868                     if ( sv_eq( cur->u.mark.mark_name,
4869                                 find ) )
4870                     {
4871                         ST.mark_name = find;
4872                         PUSH_STATE_GOTO( SKIP_next, next );
4873                     }
4874                     cur = cur->u.mark.prev_mark;
4875                 }
4876             }
4877             /* Didn't find our (*MARK:NAME) so ignore this (*SKIP:NAME) */
4878             break;
4879         case SKIP_next_fail:
4880             if (ST.mark_name) {
4881                 /* (*CUT:NAME) - Set up to search for the name as we
4882                    collapse the stack*/
4883                 popmark = ST.mark_name;
4884             } else {
4885                 /* (*CUT) - No name, we cut here.*/
4886                 if (ST.mark_loc > startpoint)
4887                     reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
4888                 /* but we set sv_commit to latest mark_name if there
4889                    is one so they can test to see how things lead to this
4890                    cut */
4891                 if (mark_state)
4892                     sv_commit=mark_state->u.mark.mark_name;
4893             }
4894             no_final = 1;
4895             sayNO;
4896             /* NOTREACHED */
4897 #undef ST
4898
4899         default:
4900             PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
4901                           PTR2UV(scan), OP(scan));
4902             Perl_croak(aTHX_ "regexp memory corruption");
4903
4904         } /* end switch */
4905
4906         /* switch break jumps here */
4907         scan = next; /* prepare to execute the next op and ... */
4908         continue;    /* ... jump back to the top, reusing st */
4909         /* NOTREACHED */
4910
4911       push_yes_state:
4912         /* push a state that backtracks on success */
4913         st->u.yes.prev_yes_state = yes_state;
4914         yes_state = st;
4915         /* FALL THROUGH */
4916       push_state:
4917         /* push a new regex state, then continue at scan  */
4918         {
4919             regmatch_state *newst;
4920
4921             DEBUG_STACK_r({
4922                 regmatch_state *cur = st;
4923                 regmatch_state *curyes = yes_state;
4924                 int curd = depth;
4925                 regmatch_slab *slab = PL_regmatch_slab;
4926                 for (;curd > -1;cur--,curd--) {
4927                     if (cur < SLAB_FIRST(slab)) {
4928                         slab = slab->prev;
4929                         cur = SLAB_LAST(slab);
4930                     }
4931                     PerlIO_printf(Perl_error_log, "%*s#%-3d %-10s %s\n",
4932                         REPORT_CODE_OFF + 2 + depth * 2,"",
4933                         curd, reg_name[cur->resume_state],
4934                         (curyes == cur) ? "yes" : ""
4935                     );
4936                     if (curyes == cur)
4937                         curyes = cur->u.yes.prev_yes_state;
4938                 }
4939             } else
4940                 DEBUG_STATE_pp("push")
4941             );
4942             depth++;
4943             st->locinput = locinput;
4944             newst = st+1;
4945             if (newst >  SLAB_LAST(PL_regmatch_slab))
4946                 newst = S_push_slab(aTHX);
4947             PL_regmatch_state = newst;
4948
4949             locinput = PL_reginput;
4950             nextchr = UCHARAT(locinput);
4951             st = newst;
4952             continue;
4953             /* NOTREACHED */
4954         }
4955     }
4956
4957     /*
4958     * We get here only if there's trouble -- normally "case END" is
4959     * the terminating point.
4960     */
4961     Perl_croak(aTHX_ "corrupted regexp pointers");
4962     /*NOTREACHED*/
4963     sayNO;
4964
4965 yes:
4966     if (yes_state) {
4967         /* we have successfully completed a subexpression, but we must now
4968          * pop to the state marked by yes_state and continue from there */
4969         assert(st != yes_state);
4970 #ifdef DEBUGGING
4971         while (st != yes_state) {
4972             st--;
4973             if (st < SLAB_FIRST(PL_regmatch_slab)) {
4974                 PL_regmatch_slab = PL_regmatch_slab->prev;
4975                 st = SLAB_LAST(PL_regmatch_slab);
4976             }
4977             DEBUG_STATE_r({
4978                 if (no_final) {
4979                     DEBUG_STATE_pp("pop (no final)");
4980                 } else {
4981                     DEBUG_STATE_pp("pop (yes)");
4982                 }
4983             });
4984             depth--;
4985         }
4986 #else
4987         while (yes_state < SLAB_FIRST(PL_regmatch_slab)
4988             || yes_state > SLAB_LAST(PL_regmatch_slab))
4989         {
4990             /* not in this slab, pop slab */
4991             depth -= (st - SLAB_FIRST(PL_regmatch_slab) + 1);
4992             PL_regmatch_slab = PL_regmatch_slab->prev;
4993             st = SLAB_LAST(PL_regmatch_slab);
4994         }
4995         depth -= (st - yes_state);
4996 #endif
4997         st = yes_state;
4998         yes_state = st->u.yes.prev_yes_state;
4999         PL_regmatch_state = st;
5000
5001         if (no_final) {
5002             locinput= st->locinput;
5003             nextchr = UCHARAT(locinput);
5004         }
5005         state_num = st->resume_state + no_final;
5006         goto reenter_switch;
5007     }
5008
5009     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
5010                           PL_colors[4], PL_colors[5]));
5011
5012     result = 1;
5013     goto final_exit;
5014
5015 no:
5016     DEBUG_EXECUTE_r(
5017         PerlIO_printf(Perl_debug_log,
5018             "%*s  %sfailed...%s\n",
5019             REPORT_CODE_OFF+depth*2, "",
5020             PL_colors[4], PL_colors[5])
5021         );
5022
5023 no_silent:
5024     if (no_final) {
5025         if (yes_state) {
5026             goto yes;
5027         } else {
5028             goto final_exit;
5029         }
5030     }
5031     if (depth) {
5032         /* there's a previous state to backtrack to */
5033         st--;
5034         if (st < SLAB_FIRST(PL_regmatch_slab)) {
5035             PL_regmatch_slab = PL_regmatch_slab->prev;
5036             st = SLAB_LAST(PL_regmatch_slab);
5037         }
5038         PL_regmatch_state = st;
5039         locinput= st->locinput;
5040         nextchr = UCHARAT(locinput);
5041
5042         DEBUG_STATE_pp("pop");
5043         depth--;
5044         if (yes_state == st)
5045             yes_state = st->u.yes.prev_yes_state;
5046
5047         state_num = st->resume_state + 1; /* failure = success + 1 */
5048         goto reenter_switch;
5049     }
5050     result = 0;
5051
5052   final_exit:
5053     if (rex->intflags & PREGf_VERBARG_SEEN) {
5054         SV *sv_err = get_sv("REGERROR", 1);
5055         SV *sv_mrk = get_sv("REGMARK", 1);
5056         if (result) {
5057             sv_commit = &PL_sv_no;
5058             if (!sv_yes_mark)
5059                 sv_yes_mark = &PL_sv_yes;
5060         } else {
5061             if (!sv_commit)
5062                 sv_commit = &PL_sv_yes;
5063             sv_yes_mark = &PL_sv_no;
5064         }
5065         sv_setsv(sv_err, sv_commit);
5066         sv_setsv(sv_mrk, sv_yes_mark);
5067     }
5068     /* restore original high-water mark */
5069     PL_regmatch_slab  = orig_slab;
5070     PL_regmatch_state = orig_state;
5071
5072     /* free all slabs above current one */
5073     if (orig_slab->next) {
5074         regmatch_slab *sl = orig_slab->next;
5075         orig_slab->next = NULL;
5076         while (sl) {
5077             regmatch_slab * const osl = sl;
5078             sl = sl->next;
5079             Safefree(osl);
5080         }
5081     }
5082
5083     return result;
5084 }
5085
5086 /*
5087  - regrepeat - repeatedly match something simple, report how many
5088  */
5089 /*
5090  * [This routine now assumes that it will only match on things of length 1.
5091  * That was true before, but now we assume scan - reginput is the count,
5092  * rather than incrementing count on every character.  [Er, except utf8.]]
5093  */
5094 STATIC I32
5095 S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
5096 {
5097     dVAR;
5098     register char *scan;
5099     register I32 c;
5100     register char *loceol = PL_regeol;
5101     register I32 hardcount = 0;
5102     register bool do_utf8 = PL_reg_match_utf8;
5103
5104     scan = PL_reginput;
5105     if (max == REG_INFTY)
5106         max = I32_MAX;
5107     else if (max < loceol - scan)
5108         loceol = scan + max;
5109     switch (OP(p)) {
5110     case REG_ANY:
5111         if (do_utf8) {
5112             loceol = PL_regeol;
5113             while (scan < loceol && hardcount < max && *scan != '\n') {
5114                 scan += UTF8SKIP(scan);
5115                 hardcount++;
5116             }
5117         } else {
5118             while (scan < loceol && *scan != '\n')
5119                 scan++;
5120         }
5121         break;
5122     case SANY:
5123         if (do_utf8) {
5124             loceol = PL_regeol;
5125             while (scan < loceol && hardcount < max) {
5126                 scan += UTF8SKIP(scan);
5127                 hardcount++;
5128             }
5129         }
5130         else
5131             scan = loceol;
5132         break;
5133     case CANY:
5134         scan = loceol;
5135         break;
5136     case EXACT:         /* length of string is 1 */
5137         c = (U8)*STRING(p);
5138         while (scan < loceol && UCHARAT(scan) == c)
5139             scan++;
5140         break;
5141     case EXACTF:        /* length of string is 1 */
5142         c = (U8)*STRING(p);
5143         while (scan < loceol &&
5144                (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
5145             scan++;
5146         break;
5147     case EXACTFL:       /* length of string is 1 */
5148         PL_reg_flags |= RF_tainted;
5149         c = (U8)*STRING(p);
5150         while (scan < loceol &&
5151                (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
5152             scan++;
5153         break;
5154     case ANYOF:
5155         if (do_utf8) {
5156             loceol = PL_regeol;
5157             while (hardcount < max && scan < loceol &&
5158                    reginclass(prog, p, (U8*)scan, 0, do_utf8)) {
5159                 scan += UTF8SKIP(scan);
5160                 hardcount++;
5161             }
5162         } else {
5163             while (scan < loceol && REGINCLASS(prog, p, (U8*)scan))
5164                 scan++;
5165         }
5166         break;
5167     case ALNUM:
5168         if (do_utf8) {
5169             loceol = PL_regeol;
5170             LOAD_UTF8_CHARCLASS_ALNUM();
5171             while (hardcount < max && scan < loceol &&
5172                    swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
5173                 scan += UTF8SKIP(scan);
5174                 hardcount++;
5175             }
5176         } else {
5177             while (scan < loceol && isALNUM(*scan))
5178                 scan++;
5179         }
5180         break;
5181     case ALNUML:
5182         PL_reg_flags |= RF_tainted;
5183         if (do_utf8) {
5184             loceol = PL_regeol;
5185             while (hardcount < max && scan < loceol &&
5186                    isALNUM_LC_utf8((U8*)scan)) {
5187                 scan += UTF8SKIP(scan);
5188                 hardcount++;
5189             }
5190         } else {
5191             while (scan < loceol && isALNUM_LC(*scan))
5192                 scan++;
5193         }
5194         break;
5195     case NALNUM:
5196         if (do_utf8) {
5197             loceol = PL_regeol;
5198             LOAD_UTF8_CHARCLASS_ALNUM();
5199             while (hardcount < max && scan < loceol &&
5200                    !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
5201                 scan += UTF8SKIP(scan);
5202                 hardcount++;
5203             }
5204         } else {
5205             while (scan < loceol && !isALNUM(*scan))
5206                 scan++;
5207         }
5208         break;
5209     case NALNUML:
5210         PL_reg_flags |= RF_tainted;
5211         if (do_utf8) {
5212             loceol = PL_regeol;
5213             while (hardcount < max && scan < loceol &&
5214                    !isALNUM_LC_utf8((U8*)scan)) {
5215                 scan += UTF8SKIP(scan);
5216                 hardcount++;
5217             }
5218         } else {
5219             while (scan < loceol && !isALNUM_LC(*scan))
5220                 scan++;
5221         }
5222         break;
5223     case SPACE:
5224         if (do_utf8) {
5225             loceol = PL_regeol;
5226             LOAD_UTF8_CHARCLASS_SPACE();
5227             while (hardcount < max && scan < loceol &&
5228                    (*scan == ' ' ||
5229                     swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
5230                 scan += UTF8SKIP(scan);
5231                 hardcount++;
5232             }
5233         } else {
5234             while (scan < loceol && isSPACE(*scan))
5235                 scan++;
5236         }
5237         break;
5238     case SPACEL:
5239         PL_reg_flags |= RF_tainted;
5240         if (do_utf8) {
5241             loceol = PL_regeol;
5242             while (hardcount < max && scan < loceol &&
5243                    (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
5244                 scan += UTF8SKIP(scan);
5245                 hardcount++;
5246             }
5247         } else {
5248             while (scan < loceol && isSPACE_LC(*scan))
5249                 scan++;
5250         }
5251         break;
5252     case NSPACE:
5253         if (do_utf8) {
5254             loceol = PL_regeol;
5255             LOAD_UTF8_CHARCLASS_SPACE();
5256             while (hardcount < max && scan < loceol &&
5257                    !(*scan == ' ' ||
5258                      swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
5259                 scan += UTF8SKIP(scan);
5260                 hardcount++;
5261             }
5262         } else {
5263             while (scan < loceol && !isSPACE(*scan))
5264                 scan++;
5265             break;
5266         }
5267     case NSPACEL:
5268         PL_reg_flags |= RF_tainted;
5269         if (do_utf8) {
5270             loceol = PL_regeol;
5271             while (hardcount < max && scan < loceol &&
5272                    !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
5273                 scan += UTF8SKIP(scan);
5274                 hardcount++;
5275             }
5276         } else {
5277             while (scan < loceol && !isSPACE_LC(*scan))
5278                 scan++;
5279         }
5280         break;
5281     case DIGIT:
5282         if (do_utf8) {
5283             loceol = PL_regeol;
5284             LOAD_UTF8_CHARCLASS_DIGIT();
5285             while (hardcount < max && scan < loceol &&
5286                    swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
5287                 scan += UTF8SKIP(scan);
5288                 hardcount++;
5289             }
5290         } else {
5291             while (scan < loceol && isDIGIT(*scan))
5292                 scan++;
5293         }
5294         break;
5295     case NDIGIT:
5296         if (do_utf8) {
5297             loceol = PL_regeol;
5298             LOAD_UTF8_CHARCLASS_DIGIT();
5299             while (hardcount < max && scan < loceol &&
5300                    !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
5301                 scan += UTF8SKIP(scan);
5302                 hardcount++;
5303             }
5304         } else {
5305             while (scan < loceol && !isDIGIT(*scan))
5306                 scan++;
5307         }
5308         break;
5309     default:            /* Called on something of 0 width. */
5310         break;          /* So match right here or not at all. */
5311     }
5312
5313     if (hardcount)
5314         c = hardcount;
5315     else
5316         c = scan - PL_reginput;
5317     PL_reginput = scan;
5318
5319     DEBUG_r({
5320         GET_RE_DEBUG_FLAGS_DECL;
5321         DEBUG_EXECUTE_r({
5322             SV * const prop = sv_newmortal();
5323             regprop(prog, prop, p);
5324             PerlIO_printf(Perl_debug_log,
5325                         "%*s  %s can match %"IVdf" times out of %"IVdf"...\n",
5326                         REPORT_CODE_OFF + depth*2, "", SvPVX_const(prop),(IV)c,(IV)max);
5327         });
5328     });
5329
5330     return(c);
5331 }
5332
5333
5334 #if !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION)
5335 /*
5336 - regclass_swash - prepare the utf8 swash
5337 */
5338
5339 SV *
5340 Perl_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bool doinit, SV** listsvp, SV **altsvp)
5341 {
5342     dVAR;
5343     SV *sw  = NULL;
5344     SV *si  = NULL;
5345     SV *alt = NULL;
5346     RXi_GET_DECL(prog,progi);
5347     const struct reg_data * const data = prog ? progi->data : NULL;
5348
5349     if (data && data->count) {
5350         const U32 n = ARG(node);
5351
5352         if (data->what[n] == 's') {
5353             SV * const rv = (SV*)data->data[n];
5354             AV * const av = (AV*)SvRV((SV*)rv);
5355             SV **const ary = AvARRAY(av);
5356             SV **a, **b;
5357
5358             /* See the end of regcomp.c:S_regclass() for
5359              * documentation of these array elements. */
5360
5361             si = *ary;
5362             a  = SvROK(ary[1]) ? &ary[1] : 0;
5363             b  = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : 0;
5364
5365             if (a)
5366                 sw = *a;
5367             else if (si && doinit) {
5368                 sw = swash_init("utf8", "", si, 1, 0);
5369                 (void)av_store(av, 1, sw);
5370             }
5371             if (b)
5372                 alt = *b;
5373         }
5374     }
5375
5376     if (listsvp)
5377         *listsvp = si;
5378     if (altsvp)
5379         *altsvp  = alt;
5380
5381     return sw;
5382 }
5383 #endif
5384
5385 /*
5386  - reginclass - determine if a character falls into a character class
5387
5388   The n is the ANYOF regnode, the p is the target string, lenp
5389   is pointer to the maximum length of how far to go in the p
5390   (if the lenp is zero, UTF8SKIP(p) is used),
5391   do_utf8 tells whether the target string is in UTF-8.
5392
5393  */
5394
5395 STATIC bool
5396 S_reginclass(pTHX_ const regexp *prog, register const regnode *n, register const U8* p, STRLEN* lenp, register bool do_utf8)
5397 {
5398     dVAR;
5399     const char flags = ANYOF_FLAGS(n);
5400     bool match = FALSE;
5401     UV c = *p;
5402     STRLEN len = 0;
5403     STRLEN plen;
5404
5405     if (do_utf8 && !UTF8_IS_INVARIANT(c)) {
5406         c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &len,
5407                 (UTF8_ALLOW_DEFAULT & UTF8_ALLOW_ANYUV) | UTF8_CHECK_ONLY);
5408                 /* see [perl #37836] for UTF8_ALLOW_ANYUV */
5409         if (len == (STRLEN)-1)
5410             Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
5411     }
5412
5413     plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
5414     if (do_utf8 || (flags & ANYOF_UNICODE)) {
5415         if (lenp)
5416             *lenp = 0;
5417         if (do_utf8 && !ANYOF_RUNTIME(n)) {
5418             if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
5419                 match = TRUE;
5420         }
5421         if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
5422             match = TRUE;
5423         if (!match) {
5424             AV *av;
5425             SV * const sw = regclass_swash(prog, n, TRUE, 0, (SV**)&av);
5426
5427             if (sw) {
5428                 if (swash_fetch(sw, p, do_utf8))
5429                     match = TRUE;
5430                 else if (flags & ANYOF_FOLD) {
5431                     if (!match && lenp && av) {
5432                         I32 i;
5433                         for (i = 0; i <= av_len(av); i++) {
5434                             SV* const sv = *av_fetch(av, i, FALSE);
5435                             STRLEN len;
5436                             const char * const s = SvPV_const(sv, len);
5437
5438                             if (len <= plen && memEQ(s, (char*)p, len)) {
5439                                 *lenp = len;
5440                                 match = TRUE;
5441                                 break;
5442                             }
5443                         }
5444                     }
5445                     if (!match) {
5446                         U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
5447                         STRLEN tmplen;
5448
5449                         to_utf8_fold(p, tmpbuf, &tmplen);
5450                         if (swash_fetch(sw, tmpbuf, do_utf8))
5451                             match = TRUE;
5452                     }
5453                 }
5454             }
5455         }
5456         if (match && lenp && *lenp == 0)
5457             *lenp = UNISKIP(NATIVE_TO_UNI(c));
5458     }
5459     if (!match && c < 256) {
5460         if (ANYOF_BITMAP_TEST(n, c))
5461             match = TRUE;
5462         else if (flags & ANYOF_FOLD) {
5463             U8 f;
5464
5465             if (flags & ANYOF_LOCALE) {
5466                 PL_reg_flags |= RF_tainted;
5467                 f = PL_fold_locale[c];
5468             }
5469             else
5470                 f = PL_fold[c];
5471             if (f != c && ANYOF_BITMAP_TEST(n, f))
5472                 match = TRUE;
5473         }
5474
5475         if (!match && (flags & ANYOF_CLASS)) {
5476             PL_reg_flags |= RF_tainted;
5477             if (
5478                 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM)   &&  isALNUM_LC(c))  ||
5479                 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM)  && !isALNUM_LC(c))  ||
5480                 (ANYOF_CLASS_TEST(n, ANYOF_SPACE)   &&  isSPACE_LC(c))  ||
5481                 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE)  && !isSPACE_LC(c))  ||
5482                 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT)   &&  isDIGIT_LC(c))  ||
5483                 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT)  && !isDIGIT_LC(c))  ||
5484                 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC)  &&  isALNUMC_LC(c)) ||
5485                 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
5486                 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA)   &&  isALPHA_LC(c))  ||
5487                 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA)  && !isALPHA_LC(c))  ||
5488                 (ANYOF_CLASS_TEST(n, ANYOF_ASCII)   &&  isASCII(c))     ||
5489                 (ANYOF_CLASS_TEST(n, ANYOF_NASCII)  && !isASCII(c))     ||
5490                 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL)   &&  isCNTRL_LC(c))  ||
5491                 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL)  && !isCNTRL_LC(c))  ||
5492                 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH)   &&  isGRAPH_LC(c))  ||
5493                 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH)  && !isGRAPH_LC(c))  ||
5494                 (ANYOF_CLASS_TEST(n, ANYOF_LOWER)   &&  isLOWER_LC(c))  ||
5495                 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER)  && !isLOWER_LC(c))  ||
5496                 (ANYOF_CLASS_TEST(n, ANYOF_PRINT)   &&  isPRINT_LC(c))  ||
5497                 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT)  && !isPRINT_LC(c))  ||
5498                 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT)   &&  isPUNCT_LC(c))  ||
5499                 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT)  && !isPUNCT_LC(c))  ||
5500                 (ANYOF_CLASS_TEST(n, ANYOF_UPPER)   &&  isUPPER_LC(c))  ||
5501                 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER)  && !isUPPER_LC(c))  ||
5502                 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT)  &&  isXDIGIT(c))    ||
5503                 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c))    ||
5504                 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC)  &&  isPSXSPC(c))    ||
5505                 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c))    ||
5506                 (ANYOF_CLASS_TEST(n, ANYOF_BLANK)   &&  isBLANK(c))     ||
5507                 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK)  && !isBLANK(c))
5508                 ) /* How's that for a conditional? */
5509             {
5510                 match = TRUE;
5511             }
5512         }
5513     }
5514
5515     return (flags & ANYOF_INVERT) ? !match : match;
5516 }
5517
5518 STATIC U8 *
5519 S_reghop3(U8 *s, I32 off, const U8* lim)
5520 {
5521     dVAR;
5522     if (off >= 0) {
5523         while (off-- && s < lim) {
5524             /* XXX could check well-formedness here */
5525             s += UTF8SKIP(s);
5526         }
5527     }
5528     else {
5529         while (off++ && s > lim) {
5530             s--;
5531             if (UTF8_IS_CONTINUED(*s)) {
5532                 while (s > lim && UTF8_IS_CONTINUATION(*s))
5533                     s--;
5534             }
5535             /* XXX could check well-formedness here */
5536         }
5537     }
5538     return s;
5539 }
5540
5541 #ifdef XXX_dmq
5542 /* there are a bunch of places where we use two reghop3's that should
5543    be replaced with this routine. but since thats not done yet
5544    we ifdef it out - dmq
5545 */
5546 STATIC U8 *
5547 S_reghop4(U8 *s, I32 off, const U8* llim, const U8* rlim)
5548 {
5549     dVAR;
5550     if (off >= 0) {
5551         while (off-- && s < rlim) {
5552             /* XXX could check well-formedness here */
5553             s += UTF8SKIP(s);
5554         }
5555     }
5556     else {
5557         while (off++ && s > llim) {
5558             s--;
5559             if (UTF8_IS_CONTINUED(*s)) {
5560                 while (s > llim && UTF8_IS_CONTINUATION(*s))
5561                     s--;
5562             }
5563             /* XXX could check well-formedness here */
5564         }
5565     }
5566     return s;
5567 }
5568 #endif
5569
5570 STATIC U8 *
5571 S_reghopmaybe3(U8* s, I32 off, const U8* lim)
5572 {
5573     dVAR;
5574     if (off >= 0) {
5575         while (off-- && s < lim) {
5576             /* XXX could check well-formedness here */
5577             s += UTF8SKIP(s);
5578         }
5579         if (off >= 0)
5580             return NULL;
5581     }
5582     else {
5583         while (off++ && s > lim) {
5584             s--;
5585             if (UTF8_IS_CONTINUED(*s)) {
5586                 while (s > lim && UTF8_IS_CONTINUATION(*s))
5587                     s--;
5588             }
5589             /* XXX could check well-formedness here */
5590         }
5591         if (off <= 0)
5592             return NULL;
5593     }
5594     return s;
5595 }
5596
5597 static void
5598 restore_pos(pTHX_ void *arg)
5599 {
5600     dVAR;
5601     regexp * const rex = (regexp *)arg;
5602     if (PL_reg_eval_set) {
5603         if (PL_reg_oldsaved) {
5604             rex->subbeg = PL_reg_oldsaved;
5605             rex->sublen = PL_reg_oldsavedlen;
5606 #ifdef PERL_OLD_COPY_ON_WRITE
5607             rex->saved_copy = PL_nrs;
5608 #endif
5609             RX_MATCH_COPIED_on(rex);
5610         }
5611         PL_reg_magic->mg_len = PL_reg_oldpos;
5612         PL_reg_eval_set = 0;
5613         PL_curpm = PL_reg_oldcurpm;
5614     }
5615 }
5616
5617 STATIC void
5618 S_to_utf8_substr(pTHX_ register regexp *prog)
5619 {
5620     int i = 1;
5621     do {
5622         if (prog->substrs->data[i].substr
5623             && !prog->substrs->data[i].utf8_substr) {
5624             SV* const sv = newSVsv(prog->substrs->data[i].substr);
5625             prog->substrs->data[i].utf8_substr = sv;
5626             sv_utf8_upgrade(sv);
5627             if (SvVALID(prog->substrs->data[i].substr))
5628                 fbm_compile(sv, 0);
5629             if (SvTAIL(prog->substrs->data[i].substr))
5630                 SvTAIL_on(sv);
5631             if (prog->substrs->data[i].substr == prog->check_substr)
5632                 prog->check_utf8 = sv;
5633         }
5634     } while (i--);
5635 }
5636
5637 STATIC void
5638 S_to_byte_substr(pTHX_ register regexp *prog)
5639 {
5640     dVAR;
5641     int i = 1;
5642     do {
5643         if (prog->substrs->data[i].utf8_substr
5644             && !prog->substrs->data[i].substr) {
5645             SV* sv = newSVsv(prog->substrs->data[i].utf8_substr);
5646             if (sv_utf8_downgrade(sv, TRUE)) {
5647                 if (SvVALID(prog->substrs->data[i].utf8_substr))
5648                     fbm_compile(sv, 0);
5649                 if (SvTAIL(prog->substrs->data[i].utf8_substr))
5650                     SvTAIL_on(sv);
5651             } else {
5652                 SvREFCNT_dec(sv);
5653                 sv = &PL_sv_undef;
5654             }
5655             prog->substrs->data[i].substr = sv;
5656             if (prog->substrs->data[i].utf8_substr == prog->check_utf8)
5657                 prog->check_substr = sv;
5658         }
5659     } while (i--);
5660 }
5661
5662 /*
5663  * Local variables:
5664  * c-indentation-style: bsd
5665  * c-basic-offset: 4
5666  * indent-tabs-mode: t
5667  * End:
5668  *
5669  * ex: set ts=8 sts=4 sw=4 noet:
5670  */