regexec.c

   1 /*    regexec.c
   2  */
   3
   4 /*
   5  * "One Ring to rule them all, One Ring to find them..."
   6  */
   7
   8 /* This file contains functions for executing a regular expression.  See
   9  * also regcomp.c which funnily enough, contains functions for compiling
  10  * a regular expression.
  11  *
  12  * This file is also copied at build time to ext/re/re_exec.c, where
  13  * it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
  14  * This causes the main functions to be compiled under new names and with
  15  * debugging support added, which makes "use re 'debug'" work.
  16  */
  17
  18 /* NOTE: this is derived from Henry Spencer's regexp code, and should not
  19  * confused with the original package (see point 3 below).  Thanks, Henry!
  20  */
  21
  22 /* Additional note: this code is very heavily munged from Henry's version
  23  * in places.  In some spots I've traded clarity for efficiency, so don't
  24  * blame Henry for some of the lack of readability.
  25  */
  26
  27 /* The names of the functions have been changed from regcomp and
  28  * regexec to  pregcomp and pregexec in order to avoid conflicts
  29  * with the POSIX routines of the same names.
  30 */
  31
  32 #ifdef PERL_EXT_RE_BUILD
  33 #include "re_top.h"
  34 #endif
  35
  36 /*
  37  * pregcomp and pregexec -- regsub and regerror are not used in perl
  38  *
  39  *      Copyright (c) 1986 by University of Toronto.
  40  *      Written by Henry Spencer.  Not derived from licensed software.
  41  *
  42  *      Permission is granted to anyone to use this software for any
  43  *      purpose on any computer system, and to redistribute it freely,
  44  *      subject to the following restrictions:
  45  *
  46  *      1. The author is not responsible for the consequences of use of
  47  *              this software, no matter how awful, even if they arise
  48  *              from defects in it.
  49  *
  50  *      2. The origin of this software must not be misrepresented, either
  51  *              by explicit claim or by omission.
  52  *
  53  *      3. Altered versions must be plainly marked as such, and must not
  54  *              be misrepresented as being the original software.
  55  *
  56  ****    Alterations to Henry's code are...
  57  ****
  58  ****    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
  59  ****    2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 by Larry Wall and others
  60  ****
  61  ****    You may distribute under the terms of either the GNU General Public
  62  ****    License or the Artistic License, as specified in the README file.
  63  *
  64  * Beware that some of this code is subtly aware of the way operator
  65  * precedence is structured in regular expressions.  Serious changes in
  66  * regular-expression syntax might require a total rethink.
  67  */
  68 #include "EXTERN.h"
  69 #define PERL_IN_REGEXEC_C
  70 #include "perl.h"
  71
  72 #ifdef PERL_IN_XSUB_RE
  73 #  include "re_comp.h"
  74 #else
  75 #  include "regcomp.h"
  76 #endif
  77
  78 #define RF_tainted      1               /* tainted information used? */
  79 #define RF_warned       2               /* warned about big count? */
  80
  81 #define RF_utf8         8               /* Pattern contains multibyte chars? */
  82
  83 #define UTF ((PL_reg_flags & RF_utf8) != 0)
  84
  85 #define RS_init         1               /* eval environment created */
  86 #define RS_set          2               /* replsv value is set */
  87
  88 #ifndef STATIC
  89 #define STATIC  static
  90 #endif
  91
  92 #define REGINCLASS(prog,p,c)  (ANYOF_FLAGS(p) ? reginclass(prog,p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
  93
  94 /*
  95  * Forwards.
  96  */
  97
  98 #define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
  99 #define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
 100
 101 #define HOPc(pos,off) \
 102         (char *)(PL_reg_match_utf8 \
 103             ? reghop3((U8*)pos, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr)) \
 104             : (U8*)(pos + off))
 105 #define HOPBACKc(pos, off) \
 106         (char*)(PL_reg_match_utf8\
 107             ? reghopmaybe3((U8*)pos, -off, (U8*)PL_bostr) \
 108             : (pos - off >= PL_bostr)           \
 109                 ? (U8*)pos - off                \
 110                 : NULL)
 111
 112 #define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)(pos), off, (U8*)(lim)) : (U8*)(pos + off))
 113 #define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
 114
 115 #define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
 116     if (!CAT2(PL_utf8_,class)) { bool ok; ENTER; save_re_context(); ok=CAT2(is_utf8_,class)((const U8*)str); assert(ok); LEAVE; } } STMT_END
 117 #define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS(alnum,"a")
 118 #define LOAD_UTF8_CHARCLASS_DIGIT() LOAD_UTF8_CHARCLASS(digit,"0")
 119 #define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
 120 #define LOAD_UTF8_CHARCLASS_MARK()  LOAD_UTF8_CHARCLASS(mark, "\xcd\x86")
 121
 122 /* TODO: Combine JUMPABLE and HAS_TEXT to cache OP(rn) */
 123
 124 /* for use after a quantifier and before an EXACT-like node -- japhy */
 125 /* it would be nice to rework regcomp.sym to generate this stuff. sigh */
 126 #define JUMPABLE(rn) (      \
 127     OP(rn) == OPEN ||       \
 128     (OP(rn) == CLOSE && (!cur_eval || cur_eval->u.eval.close_paren != ARG(rn))) || \
 129     OP(rn) == EVAL ||   \
 130     OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
 131     OP(rn) == PLUS || OP(rn) == MINMOD || \
 132     OP(rn) == KEEPS || (PL_regkind[OP(rn)] == VERB) || \
 133     (PL_regkind[OP(rn)] == CURLY && ARG1(rn) > 0) \
 134 )
 135 #define IS_EXACT(rn) (PL_regkind[OP(rn)] == EXACT)
 136
 137 #define HAS_TEXT(rn) ( IS_EXACT(rn) || PL_regkind[OP(rn)] == REF )
 138
 139 #if 0
 140 /* Currently these are only used when PL_regkind[OP(rn)] == EXACT so
 141    we don't need this definition. */
 142 #define IS_TEXT(rn)   ( OP(rn)==EXACT   || OP(rn)==REF   || OP(rn)==NREF   )
 143 #define IS_TEXTF(rn)  ( OP(rn)==EXACTF  || OP(rn)==REFF  || OP(rn)==NREFF  )
 144 #define IS_TEXTFL(rn) ( OP(rn)==EXACTFL || OP(rn)==REFFL || OP(rn)==NREFFL )
 145
 146 #else
 147 /* ... so we use this as its faster. */
 148 #define IS_TEXT(rn)   ( OP(rn)==EXACT   )
 149 #define IS_TEXTF(rn)  ( OP(rn)==EXACTF  )
 150 #define IS_TEXTFL(rn) ( OP(rn)==EXACTFL )
 151
 152 #endif
 153
 154 /*
 155   Search for mandatory following text node; for lookahead, the text must
 156   follow but for lookbehind (rn->flags != 0) we skip to the next step.
 157 */
 158 #define FIND_NEXT_IMPT(rn) STMT_START { \
 159     while (JUMPABLE(rn)) { \
 160         const OPCODE type = OP(rn); \
 161         if (type == SUSPEND || PL_regkind[type] == CURLY) \
 162             rn = NEXTOPER(NEXTOPER(rn)); \
 163         else if (type == PLUS) \
 164             rn = NEXTOPER(rn); \
 165         else if (type == IFMATCH) \
 166             rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
 167         else rn += NEXT_OFF(rn); \
 168     } \
 169 } STMT_END
 170
 171
 172 static void restore_pos(pTHX_ void *arg);
 173
 174 STATIC CHECKPOINT
 175 S_regcppush(pTHX_ I32 parenfloor)
 176 {
 177     dVAR;
 178     const int retval = PL_savestack_ix;
 179 #define REGCP_PAREN_ELEMS 4
 180     const int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
 181     int p;
 182     GET_RE_DEBUG_FLAGS_DECL;
 183
 184     if (paren_elems_to_push < 0)
 185         Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
 186
 187 #define REGCP_OTHER_ELEMS 8
 188     SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
 189
 190     for (p = PL_regsize; p > parenfloor; p--) {
 191 /* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
 192         SSPUSHINT(PL_regendp[p]);
 193         SSPUSHINT(PL_regstartp[p]);
 194         SSPUSHPTR(PL_reg_start_tmp[p]);
 195         SSPUSHINT(p);
 196         DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log,
 197           "     saving \\%"UVuf" %"IVdf"(%"IVdf")..%"IVdf"\n",
 198                       (UV)p, (IV)PL_regstartp[p],
 199                       (IV)(PL_reg_start_tmp[p] - PL_bostr),
 200                       (IV)PL_regendp[p]
 201         ));
 202     }
 203 /* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
 204     SSPUSHPTR(PL_regstartp);
 205     SSPUSHPTR(PL_regendp);
 206     SSPUSHINT(PL_regsize);
 207     SSPUSHINT(*PL_reglastparen);
 208     SSPUSHINT(*PL_reglastcloseparen);
 209     SSPUSHPTR(PL_reginput);
 210 #define REGCP_FRAME_ELEMS 2
 211 /* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
 212  * are needed for the regexp context stack bookkeeping. */
 213     SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
 214     SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
 215
 216     return retval;
 217 }
 218
 219 /* These are needed since we do not localize EVAL nodes: */
 220 #define REGCP_SET(cp)                                           \
 221     DEBUG_STATE_r(                                              \
 222             PerlIO_printf(Perl_debug_log,                       \
 223                 "  Setting an EVAL scope, savestack=%"IVdf"\n", \
 224                 (IV)PL_savestack_ix));                          \
 225     cp = PL_savestack_ix
 226
 227 #define REGCP_UNWIND(cp)                                        \
 228     DEBUG_STATE_r(                                              \
 229         if (cp != PL_savestack_ix)                              \
 230             PerlIO_printf(Perl_debug_log,                       \
 231                 "  Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
 232                 (IV)(cp), (IV)PL_savestack_ix));                \
 233     regcpblow(cp)
 234
 235 STATIC char *
 236 S_regcppop(pTHX_ const regexp *rex)
 237 {
 238     dVAR;
 239     U32 i;
 240     char *input;
 241
 242     GET_RE_DEBUG_FLAGS_DECL;
 243
 244     /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
 245     i = SSPOPINT;
 246     assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
 247     i = SSPOPINT; /* Parentheses elements to pop. */
 248     input = (char *) SSPOPPTR;
 249     *PL_reglastcloseparen = SSPOPINT;
 250     *PL_reglastparen = SSPOPINT;
 251     PL_regsize = SSPOPINT;
 252     PL_regendp=(I32 *) SSPOPPTR;
 253     PL_regstartp=(I32 *) SSPOPPTR;
 254
 255
 256     /* Now restore the parentheses context. */
 257     for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
 258          i > 0; i -= REGCP_PAREN_ELEMS) {
 259         I32 tmps;
 260         U32 paren = (U32)SSPOPINT;
 261         PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
 262         PL_regstartp[paren] = SSPOPINT;
 263         tmps = SSPOPINT;
 264         if (paren <= *PL_reglastparen)
 265             PL_regendp[paren] = tmps;
 266         DEBUG_BUFFERS_r(
 267             PerlIO_printf(Perl_debug_log,
 268                           "     restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
 269                           (UV)paren, (IV)PL_regstartp[paren],
 270                           (IV)(PL_reg_start_tmp[paren] - PL_bostr),
 271                           (IV)PL_regendp[paren],
 272                           (paren > *PL_reglastparen ? "(no)" : ""));
 273         );
 274     }
 275     DEBUG_BUFFERS_r(
 276         if (*PL_reglastparen + 1 <= rex->nparens) {
 277             PerlIO_printf(Perl_debug_log,
 278                           "     restoring \\%"IVdf"..\\%"IVdf" to undef\n",
 279                           (IV)(*PL_reglastparen + 1), (IV)rex->nparens);
 280         }
 281     );
 282 #if 1
 283     /* It would seem that the similar code in regtry()
 284      * already takes care of this, and in fact it is in
 285      * a better location to since this code can #if 0-ed out
 286      * but the code in regtry() is needed or otherwise tests
 287      * requiring null fields (pat.t#187 and split.t#{13,14}
 288      * (as of patchlevel 7877)  will fail.  Then again,
 289      * this code seems to be necessary or otherwise
 290      * building DynaLoader will fail:
 291      * "Error: '*' not in typemap in DynaLoader.xs, line 164"
 292      * --jhi */
 293     for (i = *PL_reglastparen + 1; i <= rex->nparens; i++) {
 294         if (i > PL_regsize)
 295             PL_regstartp[i] = -1;
 296         PL_regendp[i] = -1;
 297     }
 298 #endif
 299     return input;
 300 }
 301
 302 #define regcpblow(cp) LEAVE_SCOPE(cp)   /* Ignores regcppush()ed data. */
 303
 304 /*
 305  * pregexec and friends
 306  */
 307
 308 #ifndef PERL_IN_XSUB_RE
 309 /*
 310  - pregexec - match a regexp against a string
 311  */
 312 I32
 313 Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
 314          char *strbeg, I32 minend, SV *screamer, U32 nosave)
 315 /* strend: pointer to null at end of string */
 316 /* strbeg: real beginning of string */
 317 /* minend: end of match must be >=minend after stringarg. */
 318 /* nosave: For optimizations. */
 319 {
 320     return
 321         regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
 322                       nosave ? 0 : REXEC_COPY_STR);
 323 }
 324 #endif
 325
 326 /*
 327  * Need to implement the following flags for reg_anch:
 328  *
 329  * USE_INTUIT_NOML              - Useful to call re_intuit_start() first
 330  * USE_INTUIT_ML
 331  * INTUIT_AUTORITATIVE_NOML     - Can trust a positive answer
 332  * INTUIT_AUTORITATIVE_ML
 333  * INTUIT_ONCE_NOML             - Intuit can match in one location only.
 334  * INTUIT_ONCE_ML
 335  *
 336  * Another flag for this function: SECOND_TIME (so that float substrs
 337  * with giant delta may be not rechecked).
 338  */
 339
 340 /* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
 341
 342 /* If SCREAM, then SvPVX_const(sv) should be compatible with strpos and strend.
 343    Otherwise, only SvCUR(sv) is used to get strbeg. */
 344
 345 /* XXXX We assume that strpos is strbeg unless sv. */
 346
 347 /* XXXX Some places assume that there is a fixed substring.
 348         An update may be needed if optimizer marks as "INTUITable"
 349         RExen without fixed substrings.  Similarly, it is assumed that
 350         lengths of all the strings are no more than minlen, thus they
 351         cannot come from lookahead.
 352         (Or minlen should take into account lookahead.)
 353   NOTE: Some of this comment is not correct. minlen does now take account
 354   of lookahead/behind. Further research is required. -- demerphq
 355
 356 */
 357
 358 /* A failure to find a constant substring means that there is no need to make
 359    an expensive call to REx engine, thus we celebrate a failure.  Similarly,
 360    finding a substring too deep into the string means that less calls to
 361    regtry() should be needed.
 362
 363    REx compiler's optimizer found 4 possible hints:
 364         a) Anchored substring;
 365         b) Fixed substring;
 366         c) Whether we are anchored (beginning-of-line or \G);
 367         d) First node (of those at offset 0) which may distingush positions;
 368    We use a)b)d) and multiline-part of c), and try to find a position in the
 369    string which does not contradict any of them.
 370  */
 371
 372 /* Most of decisions we do here should have been done at compile time.
 373    The nodes of the REx which we used for the search should have been
 374    deleted from the finite automaton. */
 375
 376 char *
 377 Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
 378                      char *strend, U32 flags, re_scream_pos_data *data)
 379 {
 380     dVAR;
 381     register I32 start_shift = 0;
 382     /* Should be nonnegative! */
 383     register I32 end_shift   = 0;
 384     register char *s;
 385     register SV *check;
 386     char *strbeg;
 387     char *t;
 388     const bool do_utf8 = (sv && SvUTF8(sv)) ? 1 : 0; /* if no sv we have to assume bytes */
 389     I32 ml_anch;
 390     register char *other_last = NULL;   /* other substr checked before this */
 391     char *check_at = NULL;              /* check substr found at this pos */
 392     const I32 multiline = prog->extflags & RXf_PMf_MULTILINE;
 393     RXi_GET_DECL(prog,progi);
 394 #ifdef DEBUGGING
 395     const char * const i_strpos = strpos;
 396 #endif
 397
 398     GET_RE_DEBUG_FLAGS_DECL;
 399
 400     RX_MATCH_UTF8_set(prog,do_utf8);
 401
 402     if (prog->extflags & RXf_UTF8) {
 403         PL_reg_flags |= RF_utf8;
 404     }
 405     DEBUG_EXECUTE_r(
 406         debug_start_match(prog, do_utf8, strpos, strend,
 407             sv ? "Guessing start of match in sv for"
 408                : "Guessing start of match in string for");
 409               );
 410
 411     /* CHR_DIST() would be more correct here but it makes things slow. */
 412     if (prog->minlen > strend - strpos) {
 413         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 414                               "String too short... [re_intuit_start]\n"));
 415         goto fail;
 416     }
 417
 418     strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
 419     PL_regeol = strend;
 420     if (do_utf8) {
 421         if (!prog->check_utf8 && prog->check_substr)
 422             to_utf8_substr(prog);
 423         check = prog->check_utf8;
 424     } else {
 425         if (!prog->check_substr && prog->check_utf8)
 426             to_byte_substr(prog);
 427         check = prog->check_substr;
 428     }
 429     if (check == &PL_sv_undef) {
 430         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 431                 "Non-utf8 string cannot match utf8 check string\n"));
 432         goto fail;
 433     }
 434     if (prog->extflags & RXf_ANCH) {    /* Match at beg-of-str or after \n */
 435         ml_anch = !( (prog->extflags & RXf_ANCH_SINGLE)
 436                      || ( (prog->extflags & RXf_ANCH_BOL)
 437                           && !multiline ) );    /* Check after \n? */
 438
 439         if (!ml_anch) {
 440           if ( !(prog->extflags & RXf_ANCH_GPOS) /* Checked by the caller */
 441                 && !(prog->intflags & PREGf_IMPLICIT) /* not a real BOL */
 442                /* SvCUR is not set on references: SvRV and SvPVX_const overlap */
 443                && sv && !SvROK(sv)
 444                && (strpos != strbeg)) {
 445               DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
 446               goto fail;
 447           }
 448           if (prog->check_offset_min == prog->check_offset_max &&
 449               !(prog->extflags & RXf_CANY_SEEN)) {
 450             /* Substring at constant offset from beg-of-str... */
 451             I32 slen;
 452
 453             s = HOP3c(strpos, prog->check_offset_min, strend);
 454
 455             if (SvTAIL(check)) {
 456                 slen = SvCUR(check);    /* >= 1 */
 457
 458                 if ( strend - s > slen || strend - s < slen - 1
 459                      || (strend - s == slen && strend[-1] != '\n')) {
 460                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
 461                     goto fail_finish;
 462                 }
 463                 /* Now should match s[0..slen-2] */
 464                 slen--;
 465                 if (slen && (*SvPVX_const(check) != *s
 466                              || (slen > 1
 467                                  && memNE(SvPVX_const(check), s, slen)))) {
 468                   report_neq:
 469                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
 470                     goto fail_finish;
 471                 }
 472             }
 473             else if (*SvPVX_const(check) != *s
 474                      || ((slen = SvCUR(check)) > 1
 475                          && memNE(SvPVX_const(check), s, slen)))
 476                 goto report_neq;
 477             check_at = s;
 478             goto success_at_start;
 479           }
 480         }
 481         /* Match is anchored, but substr is not anchored wrt beg-of-str. */
 482         s = strpos;
 483         start_shift = prog->check_offset_min; /* okay to underestimate on CC */
 484         end_shift = prog->check_end_shift;
 485
 486         if (!ml_anch) {
 487             const I32 end = prog->check_offset_max + CHR_SVLEN(check)
 488                                          - (SvTAIL(check) != 0);
 489             const I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
 490
 491             if (end_shift < eshift)
 492                 end_shift = eshift;
 493         }
 494     }
 495     else {                              /* Can match at random position */
 496         ml_anch = 0;
 497         s = strpos;
 498         start_shift = prog->check_offset_min;  /* okay to underestimate on CC */
 499         end_shift = prog->check_end_shift;
 500
 501         /* end shift should be non negative here */
 502     }
 503
 504 #ifdef QDEBUGGING       /* 7/99: reports of failure (with the older version) */
 505     if (end_shift < 0)
 506         Perl_croak(aTHX_ "panic: end_shift: %"IVdf" pattern:\n%s\n ",
 507                    (IV)end_shift, prog->precomp);
 508 #endif
 509
 510   restart:
 511     /* Find a possible match in the region s..strend by looking for
 512        the "check" substring in the region corrected by start/end_shift. */
 513
 514     {
 515         I32 srch_start_shift = start_shift;
 516         I32 srch_end_shift = end_shift;
 517         if (srch_start_shift < 0 && strbeg - s > srch_start_shift) {
 518             srch_end_shift -= ((strbeg - s) - srch_start_shift);
 519             srch_start_shift = strbeg - s;
 520         }
 521     DEBUG_OPTIMISE_MORE_r({
 522         PerlIO_printf(Perl_debug_log, "Check offset min: %"IVdf" Start shift: %"IVdf" End shift %"IVdf" Real End Shift: %"IVdf"\n",
 523             (IV)prog->check_offset_min,
 524             (IV)srch_start_shift,
 525             (IV)srch_end_shift,
 526             (IV)prog->check_end_shift);
 527     });
 528
 529     if (flags & REXEC_SCREAM) {
 530         I32 p = -1;                     /* Internal iterator of scream. */
 531         I32 * const pp = data ? data->scream_pos : &p;
 532
 533         if (PL_screamfirst[BmRARE(check)] >= 0
 534             || ( BmRARE(check) == '\n'
 535                  && (BmPREVIOUS(check) == SvCUR(check) - 1)
 536                  && SvTAIL(check) ))
 537             s = screaminstr(sv, check,
 538                             srch_start_shift + (s - strbeg), srch_end_shift, pp, 0);
 539         else
 540             goto fail_finish;
 541         /* we may be pointing at the wrong string */
 542         if (s && RX_MATCH_COPIED(prog))
 543             s = strbeg + (s - SvPVX_const(sv));
 544         if (data)
 545             *data->scream_olds = s;
 546     }
 547     else {
 548         U8* start_point;
 549         U8* end_point;
 550         if (prog->extflags & RXf_CANY_SEEN) {
 551             start_point= (U8*)(s + srch_start_shift);
 552             end_point= (U8*)(strend - srch_end_shift);
 553         } else {
 554             start_point= HOP3(s, srch_start_shift, srch_start_shift < 0 ? strbeg : strend);
 555             end_point= HOP3(strend, -srch_end_shift, strbeg);
 556         }
 557         DEBUG_OPTIMISE_MORE_r({
 558             PerlIO_printf(Perl_debug_log, "fbm_instr len=%d str=<%.*s>\n",
 559                 (int)(end_point - start_point),
 560                 (int)(end_point - start_point) > 20 ? 20 : (int)(end_point - start_point),
 561                 start_point);
 562         });
 563
 564         s = fbm_instr( start_point, end_point,
 565                       check, multiline ? FBMrf_MULTILINE : 0);
 566     }
 567     }
 568     /* Update the count-of-usability, remove useless subpatterns,
 569         unshift s.  */
 570
 571     DEBUG_EXECUTE_r({
 572         RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
 573             SvPVX_const(check), RE_SV_DUMPLEN(check), 30);
 574         PerlIO_printf(Perl_debug_log, "%s %s substr %s%s%s",
 575                           (s ? "Found" : "Did not find"),
 576             (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)
 577                 ? "anchored" : "floating"),
 578             quoted,
 579             RE_SV_TAIL(check),
 580             (s ? " at offset " : "...\n") );
 581     });
 582
 583     if (!s)
 584         goto fail_finish;
 585     /* Finish the diagnostic message */
 586     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
 587
 588     /* XXX dmq: first branch is for positive lookbehind...
 589        Our check string is offset from the beginning of the pattern.
 590        So we need to do any stclass tests offset forward from that
 591        point. I think. :-(
 592      */
 593
 594
 595
 596     check_at=s;
 597
 598
 599     /* Got a candidate.  Check MBOL anchoring, and the *other* substr.
 600        Start with the other substr.
 601        XXXX no SCREAM optimization yet - and a very coarse implementation
 602        XXXX /ttx+/ results in anchored="ttx", floating="x".  floating will
 603                 *always* match.  Probably should be marked during compile...
 604        Probably it is right to do no SCREAM here...
 605      */
 606
 607     if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8)
 608                 : (prog->float_substr && prog->anchored_substr))
 609     {
 610         /* Take into account the "other" substring. */
 611         /* XXXX May be hopelessly wrong for UTF... */
 612         if (!other_last)
 613             other_last = strpos;
 614         if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
 615           do_other_anchored:
 616             {
 617                 char * const last = HOP3c(s, -start_shift, strbeg);
 618                 char *last1, *last2;
 619                 char * const saved_s = s;
 620                 SV* must;
 621
 622                 t = s - prog->check_offset_max;
 623                 if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
 624                     && (!do_utf8
 625                         || ((t = (char*)reghopmaybe3((U8*)s, -(prog->check_offset_max), (U8*)strpos))
 626                             && t > strpos)))
 627                     NOOP;
 628                 else
 629                     t = strpos;
 630                 t = HOP3c(t, prog->anchored_offset, strend);
 631                 if (t < other_last)     /* These positions already checked */
 632                     t = other_last;
 633                 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
 634                 if (last < last1)
 635                     last1 = last;
 636                 /* XXXX It is not documented what units *_offsets are in.
 637                    We assume bytes, but this is clearly wrong.
 638                    Meaning this code needs to be carefully reviewed for errors.
 639                    dmq.
 640                   */
 641
 642                 /* On end-of-str: see comment below. */
 643                 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
 644                 if (must == &PL_sv_undef) {
 645                     s = (char*)NULL;
 646                     DEBUG_r(must = prog->anchored_utf8);        /* for debug */
 647                 }
 648                 else
 649                     s = fbm_instr(
 650                         (unsigned char*)t,
 651                         HOP3(HOP3(last1, prog->anchored_offset, strend)
 652                                 + SvCUR(must), -(SvTAIL(must)!=0), strbeg),
 653                         must,
 654                         multiline ? FBMrf_MULTILINE : 0
 655                     );
 656                 DEBUG_EXECUTE_r({
 657                     RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
 658                         SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
 659                     PerlIO_printf(Perl_debug_log, "%s anchored substr %s%s",
 660                         (s ? "Found" : "Contradicts"),
 661                         quoted, RE_SV_TAIL(must));
 662                 });
 663
 664
 665                 if (!s) {
 666                     if (last1 >= last2) {
 667                         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 668                                                 ", giving up...\n"));
 669                         goto fail_finish;
 670                     }
 671                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 672                         ", trying floating at offset %ld...\n",
 673                         (long)(HOP3c(saved_s, 1, strend) - i_strpos)));
 674                     other_last = HOP3c(last1, prog->anchored_offset+1, strend);
 675                     s = HOP3c(last, 1, strend);
 676                     goto restart;
 677                 }
 678                 else {
 679                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
 680                           (long)(s - i_strpos)));
 681                     t = HOP3c(s, -prog->anchored_offset, strbeg);
 682                     other_last = HOP3c(s, 1, strend);
 683                     s = saved_s;
 684                     if (t == strpos)
 685                         goto try_at_start;
 686                     goto try_at_offset;
 687                 }
 688             }
 689         }
 690         else {          /* Take into account the floating substring. */
 691             char *last, *last1;
 692             char * const saved_s = s;
 693             SV* must;
 694
 695             t = HOP3c(s, -start_shift, strbeg);
 696             last1 = last =
 697                 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
 698             if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
 699                 last = HOP3c(t, prog->float_max_offset, strend);
 700             s = HOP3c(t, prog->float_min_offset, strend);
 701             if (s < other_last)
 702                 s = other_last;
 703  /* XXXX It is not documented what units *_offsets are in.  Assume bytes.  */
 704             must = do_utf8 ? prog->float_utf8 : prog->float_substr;
 705             /* fbm_instr() takes into account exact value of end-of-str
 706                if the check is SvTAIL(ed).  Since false positives are OK,
 707                and end-of-str is not later than strend we are OK. */
 708             if (must == &PL_sv_undef) {
 709                 s = (char*)NULL;
 710                 DEBUG_r(must = prog->float_utf8);       /* for debug message */
 711             }
 712             else
 713                 s = fbm_instr((unsigned char*)s,
 714                               (unsigned char*)last + SvCUR(must)
 715                                   - (SvTAIL(must)!=0),
 716                               must, multiline ? FBMrf_MULTILINE : 0);
 717             DEBUG_EXECUTE_r({
 718                 RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
 719                     SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
 720                 PerlIO_printf(Perl_debug_log, "%s floating substr %s%s",
 721                     (s ? "Found" : "Contradicts"),
 722                     quoted, RE_SV_TAIL(must));
 723             });
 724             if (!s) {
 725                 if (last1 == last) {
 726                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 727                                             ", giving up...\n"));
 728                     goto fail_finish;
 729                 }
 730                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 731                     ", trying anchored starting at offset %ld...\n",
 732                     (long)(saved_s + 1 - i_strpos)));
 733                 other_last = last;
 734                 s = HOP3c(t, 1, strend);
 735                 goto restart;
 736             }
 737             else {
 738                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
 739                       (long)(s - i_strpos)));
 740                 other_last = s; /* Fix this later. --Hugo */
 741                 s = saved_s;
 742                 if (t == strpos)
 743                     goto try_at_start;
 744                 goto try_at_offset;
 745             }
 746         }
 747     }
 748
 749
 750     t= (char*)HOP3( s, -prog->check_offset_max, (prog->check_offset_max<0) ? strend : strpos);
 751
 752     DEBUG_OPTIMISE_MORE_r(
 753         PerlIO_printf(Perl_debug_log,
 754             "Check offset min:%"IVdf" max:%"IVdf" S:%"IVdf" t:%"IVdf" D:%"IVdf" end:%"IVdf"\n",
 755             (IV)prog->check_offset_min,
 756             (IV)prog->check_offset_max,
 757             (IV)(s-strpos),
 758             (IV)(t-strpos),
 759             (IV)(t-s),
 760             (IV)(strend-strpos)
 761         )
 762     );
 763
 764     if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
 765         && (!do_utf8
 766             || ((t = (char*)reghopmaybe3((U8*)s, -prog->check_offset_max, (U8*) ((prog->check_offset_max<0) ? strend : strpos)))
 767                  && t > strpos)))
 768     {
 769         /* Fixed substring is found far enough so that the match
 770            cannot start at strpos. */
 771       try_at_offset:
 772         if (ml_anch && t[-1] != '\n') {
 773             /* Eventually fbm_*() should handle this, but often
 774                anchored_offset is not 0, so this check will not be wasted. */
 775             /* XXXX In the code below we prefer to look for "^" even in
 776                presence of anchored substrings.  And we search even
 777                beyond the found float position.  These pessimizations
 778                are historical artefacts only.  */
 779           find_anchor:
 780             while (t < strend - prog->minlen) {
 781                 if (*t == '\n') {
 782                     if (t < check_at - prog->check_offset_min) {
 783                         if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
 784                             /* Since we moved from the found position,
 785                                we definitely contradict the found anchored
 786                                substr.  Due to the above check we do not
 787                                contradict "check" substr.
 788                                Thus we can arrive here only if check substr
 789                                is float.  Redo checking for "other"=="fixed".
 790                              */
 791                             strpos = t + 1;
 792                             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
 793                                 PL_colors[0], PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
 794                             goto do_other_anchored;
 795                         }
 796                         /* We don't contradict the found floating substring. */
 797                         /* XXXX Why not check for STCLASS? */
 798                         s = t + 1;
 799                         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
 800                             PL_colors[0], PL_colors[1], (long)(s - i_strpos)));
 801                         goto set_useful;
 802                     }
 803                     /* Position contradicts check-string */
 804                     /* XXXX probably better to look for check-string
 805                        than for "\n", so one should lower the limit for t? */
 806                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
 807                         PL_colors[0], PL_colors[1], (long)(t + 1 - i_strpos)));
 808                     other_last = strpos = s = t + 1;
 809                     goto restart;
 810                 }
 811                 t++;
 812             }
 813             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
 814                         PL_colors[0], PL_colors[1]));
 815             goto fail_finish;
 816         }
 817         else {
 818             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
 819                         PL_colors[0], PL_colors[1]));
 820         }
 821         s = t;
 822       set_useful:
 823         ++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr);    /* hooray/5 */
 824     }
 825     else {
 826         /* The found string does not prohibit matching at strpos,
 827            - no optimization of calling REx engine can be performed,
 828            unless it was an MBOL and we are not after MBOL,
 829            or a future STCLASS check will fail this. */
 830       try_at_start:
 831         /* Even in this situation we may use MBOL flag if strpos is offset
 832            wrt the start of the string. */
 833         if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
 834             && (strpos != strbeg) && strpos[-1] != '\n'
 835             /* May be due to an implicit anchor of m{.*foo}  */
 836             && !(prog->intflags & PREGf_IMPLICIT))
 837         {
 838             t = strpos;
 839             goto find_anchor;
 840         }
 841         DEBUG_EXECUTE_r( if (ml_anch)
 842             PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
 843                           (long)(strpos - i_strpos), PL_colors[0], PL_colors[1]);
 844         );
 845       success_at_start:
 846         if (!(prog->intflags & PREGf_NAUGHTY)   /* XXXX If strpos moved? */
 847             && (do_utf8 ? (
 848                 prog->check_utf8                /* Could be deleted already */
 849                 && --BmUSEFUL(prog->check_utf8) < 0
 850                 && (prog->check_utf8 == prog->float_utf8)
 851             ) : (
 852                 prog->check_substr              /* Could be deleted already */
 853                 && --BmUSEFUL(prog->check_substr) < 0
 854                 && (prog->check_substr == prog->float_substr)
 855             )))
 856         {
 857             /* If flags & SOMETHING - do not do it many times on the same match */
 858             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
 859             SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
 860             if (do_utf8 ? prog->check_substr : prog->check_utf8)
 861                 SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
 862             prog->check_substr = prog->check_utf8 = NULL;       /* disable */
 863             prog->float_substr = prog->float_utf8 = NULL;       /* clear */
 864             check = NULL;                       /* abort */
 865             s = strpos;
 866             /* XXXX This is a remnant of the old implementation.  It
 867                     looks wasteful, since now INTUIT can use many
 868                     other heuristics. */
 869             prog->extflags &= ~RXf_USE_INTUIT;
 870         }
 871         else
 872             s = strpos;
 873     }
 874
 875     /* Last resort... */
 876     /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
 877     /* trie stclasses are too expensive to use here, we are better off to
 878        leave it to regmatch itself */
 879     if (progi->regstclass && PL_regkind[OP(progi->regstclass)]!=TRIE) {
 880         /* minlen == 0 is possible if regstclass is \b or \B,
 881            and the fixed substr is ''$.
 882            Since minlen is already taken into account, s+1 is before strend;
 883            accidentally, minlen >= 1 guaranties no false positives at s + 1
 884            even for \b or \B.  But (minlen? 1 : 0) below assumes that
 885            regstclass does not come from lookahead...  */
 886         /* If regstclass takes bytelength more than 1: If charlength==1, OK.
 887            This leaves EXACTF only, which is dealt with in find_byclass().  */
 888         const U8* const str = (U8*)STRING(progi->regstclass);
 889         const int cl_l = (PL_regkind[OP(progi->regstclass)] == EXACT
 890                     ? CHR_DIST(str+STR_LEN(progi->regstclass), str)
 891                     : 1);
 892         char * endpos;
 893         if (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
 894             endpos= HOP3c(s, (prog->minlen ? cl_l : 0), strend);
 895         else if (prog->float_substr || prog->float_utf8)
 896             endpos= HOP3c(HOP3c(check_at, -start_shift, strbeg), cl_l, strend);
 897         else
 898             endpos= strend;
 899
 900         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "start_shift: %"IVdf" check_at: %"IVdf" s: %"IVdf" endpos: %"IVdf"\n",
 901                                       (IV)start_shift, (IV)(check_at - strbeg), (IV)(s - strbeg), (IV)(endpos - strbeg)));
 902
 903         t = s;
 904         s = find_byclass(prog, progi->regstclass, s, endpos, NULL);
 905         if (!s) {
 906 #ifdef DEBUGGING
 907             const char *what = NULL;
 908 #endif
 909             if (endpos == strend) {
 910                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 911                                 "Could not match STCLASS...\n") );
 912                 goto fail;
 913             }
 914             DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 915                                    "This position contradicts STCLASS...\n") );
 916             if ((prog->extflags & RXf_ANCH) && !ml_anch)
 917                 goto fail;
 918             /* Contradict one of substrings */
 919             if (prog->anchored_substr || prog->anchored_utf8) {
 920                 if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
 921                     DEBUG_EXECUTE_r( what = "anchored" );
 922                   hop_and_restart:
 923                     s = HOP3c(t, 1, strend);
 924                     if (s + start_shift + end_shift > strend) {
 925                         /* XXXX Should be taken into account earlier? */
 926                         DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 927                                                "Could not match STCLASS...\n") );
 928                         goto fail;
 929                     }
 930                     if (!check)
 931                         goto giveup;
 932                     DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 933                                 "Looking for %s substr starting at offset %ld...\n",
 934                                  what, (long)(s + start_shift - i_strpos)) );
 935                     goto restart;
 936                 }
 937                 /* Have both, check_string is floating */
 938                 if (t + start_shift >= check_at) /* Contradicts floating=check */
 939                     goto retry_floating_check;
 940                 /* Recheck anchored substring, but not floating... */
 941                 s = check_at;
 942                 if (!check)
 943                     goto giveup;
 944                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 945                           "Looking for anchored substr starting at offset %ld...\n",
 946                           (long)(other_last - i_strpos)) );
 947                 goto do_other_anchored;
 948             }
 949             /* Another way we could have checked stclass at the
 950                current position only: */
 951             if (ml_anch) {
 952                 s = t = t + 1;
 953                 if (!check)
 954                     goto giveup;
 955                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 956                           "Looking for /%s^%s/m starting at offset %ld...\n",
 957                           PL_colors[0], PL_colors[1], (long)(t - i_strpos)) );
 958                 goto try_at_offset;
 959             }
 960             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))     /* Could have been deleted */
 961                 goto fail;
 962             /* Check is floating subtring. */
 963           retry_floating_check:
 964             t = check_at - start_shift;
 965             DEBUG_EXECUTE_r( what = "floating" );
 966             goto hop_and_restart;
 967         }
 968         if (t != s) {
 969             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 970                         "By STCLASS: moving %ld --> %ld\n",
 971                                   (long)(t - i_strpos), (long)(s - i_strpos))
 972                    );
 973         }
 974         else {
 975             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 976                                   "Does not contradict STCLASS...\n");
 977                    );
 978         }
 979     }
 980   giveup:
 981     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
 982                           PL_colors[4], (check ? "Guessed" : "Giving up"),
 983                           PL_colors[5], (long)(s - i_strpos)) );
 984     return s;
 985
 986   fail_finish:                          /* Substring not found */
 987     if (prog->check_substr || prog->check_utf8)         /* could be removed already */
 988         BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
 989   fail:
 990     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
 991                           PL_colors[4], PL_colors[5]));
 992     return NULL;
 993 }
 994
 995
 996
 997 #define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len,  \
 998 uvc, charid, foldlen, foldbuf, uniflags) STMT_START {                       \
 999     switch (trie_type) {                                                    \
1000     case trie_utf8_fold:                                                    \
1001         if ( foldlen>0 ) {                                                  \
1002             uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags );     \
1003             foldlen -= len;                                                 \
1004             uscan += len;                                                   \
1005             len=0;                                                          \
1006         } else {                                                            \
1007             uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );   \
1008             uvc = to_uni_fold( uvc, foldbuf, &foldlen );                    \
1009             foldlen -= UNISKIP( uvc );                                      \
1010             uscan = foldbuf + UNISKIP( uvc );                               \
1011         }                                                                   \
1012         break;                                                              \
1013     case trie_utf8:                                                         \
1014         uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );       \
1015         break;                                                              \
1016     case trie_plain:                                                        \
1017         uvc = (UV)*uc;                                                      \
1018         len = 1;                                                            \
1019     }                                                                       \
1020                                                                             \
1021     if (uvc < 256) {                                                        \
1022         charid = trie->charmap[ uvc ];                                      \
1023     }                                                                       \
1024     else {                                                                  \
1025         charid = 0;                                                         \
1026         if (widecharmap) {                                                  \
1027             SV** const svpp = hv_fetch(widecharmap,                         \
1028                         (char*)&uvc, sizeof(UV), 0);                        \
1029             if (svpp)                                                       \
1030                 charid = (U16)SvIV(*svpp);                                  \
1031         }                                                                   \
1032     }                                                                       \
1033 } STMT_END
1034
1035 #define REXEC_FBC_EXACTISH_CHECK(CoNd)                  \
1036     if ( (CoNd)                                        \
1037          && (ln == len ||                              \
1038              ibcmp_utf8(s, NULL, 0,  do_utf8,          \
1039                         m, NULL, ln, (bool)UTF))       \
1040          && (!reginfo || regtry(reginfo, &s)) )         \
1041         goto got_it;                                   \
1042     else {                                             \
1043          U8 foldbuf[UTF8_MAXBYTES_CASE+1];             \
1044          uvchr_to_utf8(tmpbuf, c);                     \
1045          f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);  \
1046          if ( f != c                                   \
1047               && (f == c1 || f == c2)                  \
1048               && (ln == foldlen ||                     \
1049                   !ibcmp_utf8((char *) foldbuf,        \
1050                               NULL, foldlen, do_utf8,  \
1051                               m,                       \
1052                               NULL, ln, (bool)UTF))    \
1053               && (!reginfo || regtry(reginfo, &s)) )    \
1054               goto got_it;                             \
1055     }                                                  \
1056     s += len
1057
1058 #define REXEC_FBC_EXACTISH_SCAN(CoNd)                     \
1059 STMT_START {                                              \
1060     while (s <= e) {                                      \
1061         if ( (CoNd)                                       \
1062              && (ln == 1 || !(OP(c) == EXACTF             \
1063                               ? ibcmp(s, m, ln)           \
1064                               : ibcmp_locale(s, m, ln)))  \
1065              && (!reginfo || regtry(reginfo, &s)) )        \
1066             goto got_it;                                  \
1067         s++;                                              \
1068     }                                                     \
1069 } STMT_END
1070
1071 #define REXEC_FBC_UTF8_SCAN(CoDe)                     \
1072 STMT_START {                                          \
1073     while (s + (uskip = UTF8SKIP(s)) <= strend) {     \
1074         CoDe                                          \
1075         s += uskip;                                   \
1076     }                                                 \
1077 } STMT_END
1078
1079 #define REXEC_FBC_SCAN(CoDe)                          \
1080 STMT_START {                                          \
1081     while (s < strend) {                              \
1082         CoDe                                          \
1083         s++;                                          \
1084     }                                                 \
1085 } STMT_END
1086
1087 #define REXEC_FBC_UTF8_CLASS_SCAN(CoNd)               \
1088 REXEC_FBC_UTF8_SCAN(                                  \
1089     if (CoNd) {                                       \
1090         if (tmp && (!reginfo || regtry(reginfo, &s)))  \
1091             goto got_it;                              \
1092         else                                          \
1093             tmp = doevery;                            \
1094     }                                                 \
1095     else                                              \
1096         tmp = 1;                                      \
1097 )
1098
1099 #define REXEC_FBC_CLASS_SCAN(CoNd)                    \
1100 REXEC_FBC_SCAN(                                       \
1101     if (CoNd) {                                       \
1102         if (tmp && (!reginfo || regtry(reginfo, &s)))  \
1103             goto got_it;                              \
1104         else                                          \
1105             tmp = doevery;                            \
1106     }                                                 \
1107     else                                              \
1108         tmp = 1;                                      \
1109 )
1110
1111 #define REXEC_FBC_TRYIT               \
1112 if ((!reginfo || regtry(reginfo, &s))) \
1113     goto got_it
1114
1115 #define REXEC_FBC_CSCAN_PRELOAD(UtFpReLoAd,CoNdUtF8,CoNd)      \
1116     if (do_utf8) {                                             \
1117         UtFpReLoAd;                                            \
1118         REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1119     }                                                          \
1120     else {                                                     \
1121         REXEC_FBC_CLASS_SCAN(CoNd);                            \
1122     }                                                          \
1123     break
1124
1125 #define REXEC_FBC_CSCAN_TAINT(CoNdUtF8,CoNd)                   \
1126     PL_reg_flags |= RF_tainted;                                \
1127     if (do_utf8) {                                             \
1128         REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1129     }                                                          \
1130     else {                                                     \
1131         REXEC_FBC_CLASS_SCAN(CoNd);                            \
1132     }                                                          \
1133     break
1134
1135 #define DUMP_EXEC_POS(li,s,doutf8) \
1136     dump_exec_pos(li,s,(PL_regeol),(PL_bostr),(PL_reg_starttry),doutf8)
1137
1138 /* We know what class REx starts with.  Try to find this position... */
1139 /* if reginfo is NULL, its a dryrun */
1140 /* annoyingly all the vars in this routine have different names from their counterparts
1141    in regmatch. /grrr */
1142
1143 STATIC char *
1144 S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
1145     const char *strend, regmatch_info *reginfo)
1146 {
1147         dVAR;
1148         const I32 doevery = (prog->intflags & PREGf_SKIP) == 0;
1149         char *m;
1150         STRLEN ln;
1151         STRLEN lnc;
1152         register STRLEN uskip;
1153         unsigned int c1;
1154         unsigned int c2;
1155         char *e;
1156         register I32 tmp = 1;   /* Scratch variable? */
1157         register const bool do_utf8 = PL_reg_match_utf8;
1158         RXi_GET_DECL(prog,progi);
1159
1160         /* We know what class it must start with. */
1161         switch (OP(c)) {
1162         case ANYOF:
1163             if (do_utf8) {
1164                  REXEC_FBC_UTF8_CLASS_SCAN((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
1165                           !UTF8_IS_INVARIANT((U8)s[0]) ?
1166                           reginclass(prog, c, (U8*)s, 0, do_utf8) :
1167                           REGINCLASS(prog, c, (U8*)s));
1168             }
1169             else {
1170                  while (s < strend) {
1171                       STRLEN skip = 1;
1172
1173                       if (REGINCLASS(prog, c, (U8*)s) ||
1174                           (ANYOF_FOLD_SHARP_S(c, s, strend) &&
1175                            /* The assignment of 2 is intentional:
1176                             * for the folded sharp s, the skip is 2. */
1177                            (skip = SHARP_S_SKIP))) {
1178                            if (tmp && (!reginfo || regtry(reginfo, &s)))
1179                                 goto got_it;
1180                            else
1181                                 tmp = doevery;
1182                       }
1183                       else
1184                            tmp = 1;
1185                       s += skip;
1186                  }
1187             }
1188             break;
1189         case CANY:
1190             REXEC_FBC_SCAN(
1191                 if (tmp && (!reginfo || regtry(reginfo, &s)))
1192                     goto got_it;
1193                 else
1194                     tmp = doevery;
1195             );
1196             break;
1197         case EXACTF:
1198             m   = STRING(c);
1199             ln  = STR_LEN(c);   /* length to match in octets/bytes */
1200             lnc = (I32) ln;     /* length to match in characters */
1201             if (UTF) {
1202                 STRLEN ulen1, ulen2;
1203                 U8 *sm = (U8 *) m;
1204                 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
1205                 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
1206                 const U32 uniflags = UTF8_ALLOW_DEFAULT;
1207
1208                 to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
1209                 to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
1210
1211                 c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXBYTES_CASE,
1212                                     0, uniflags);
1213                 c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXBYTES_CASE,
1214                                     0, uniflags);
1215                 lnc = 0;
1216                 while (sm < ((U8 *) m + ln)) {
1217                     lnc++;
1218                     sm += UTF8SKIP(sm);
1219                 }
1220             }
1221             else {
1222                 c1 = *(U8*)m;
1223                 c2 = PL_fold[c1];
1224             }
1225             goto do_exactf;
1226         case EXACTFL:
1227             m   = STRING(c);
1228             ln  = STR_LEN(c);
1229             lnc = (I32) ln;
1230             c1 = *(U8*)m;
1231             c2 = PL_fold_locale[c1];
1232           do_exactf:
1233             e = HOP3c(strend, -((I32)lnc), s);
1234
1235             if (!reginfo && e < s)
1236                 e = s;                  /* Due to minlen logic of intuit() */
1237
1238             /* The idea in the EXACTF* cases is to first find the
1239              * first character of the EXACTF* node and then, if
1240              * necessary, case-insensitively compare the full
1241              * text of the node.  The c1 and c2 are the first
1242              * characters (though in Unicode it gets a bit
1243              * more complicated because there are more cases
1244              * than just upper and lower: one needs to use
1245              * the so-called folding case for case-insensitive
1246              * matching (called "loose matching" in Unicode).
1247              * ibcmp_utf8() will do just that. */
1248
1249             if (do_utf8) {
1250                 UV c, f;
1251                 U8 tmpbuf [UTF8_MAXBYTES+1];
1252                 STRLEN len, foldlen;
1253                 const U32 uniflags = UTF8_ALLOW_DEFAULT;
1254                 if (c1 == c2) {
1255                     /* Upper and lower of 1st char are equal -
1256                      * probably not a "letter". */
1257                     while (s <= e) {
1258                         c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1259                                            uniflags);
1260                         REXEC_FBC_EXACTISH_CHECK(c == c1);
1261                     }
1262                 }
1263                 else {
1264                     while (s <= e) {
1265                       c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1266                                            uniflags);
1267
1268                         /* Handle some of the three Greek sigmas cases.
1269                          * Note that not all the possible combinations
1270                          * are handled here: some of them are handled
1271                          * by the standard folding rules, and some of
1272                          * them (the character class or ANYOF cases)
1273                          * are handled during compiletime in
1274                          * regexec.c:S_regclass(). */
1275                         if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1276                             c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1277                             c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
1278
1279                         REXEC_FBC_EXACTISH_CHECK(c == c1 || c == c2);
1280                     }
1281                 }
1282             }
1283             else {
1284                 if (c1 == c2)
1285                     REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1);
1286                 else
1287                     REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1 || *(U8*)s == c2);
1288             }
1289             break;
1290         case BOUNDL:
1291             PL_reg_flags |= RF_tainted;
1292             /* FALL THROUGH */
1293         case BOUND:
1294             if (do_utf8) {
1295                 if (s == PL_bostr)
1296                     tmp = '\n';
1297                 else {
1298                     U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1299                     tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
1300                 }
1301                 tmp = ((OP(c) == BOUND ?
1302                         isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1303                 LOAD_UTF8_CHARCLASS_ALNUM();
1304                 REXEC_FBC_UTF8_SCAN(
1305                     if (tmp == !(OP(c) == BOUND ?
1306                                  (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1307                                  isALNUM_LC_utf8((U8*)s)))
1308                     {
1309                         tmp = !tmp;
1310                         REXEC_FBC_TRYIT;
1311                 }
1312                 );
1313             }
1314             else {
1315                 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1316                 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1317                 REXEC_FBC_SCAN(
1318                     if (tmp ==
1319                         !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1320                         tmp = !tmp;
1321                         REXEC_FBC_TRYIT;
1322                 }
1323                 );
1324             }
1325             if ((!prog->minlen && tmp) && (!reginfo || regtry(reginfo, &s)))
1326                 goto got_it;
1327             break;
1328         case NBOUNDL:
1329             PL_reg_flags |= RF_tainted;
1330             /* FALL THROUGH */
1331         case NBOUND:
1332             if (do_utf8) {
1333                 if (s == PL_bostr)
1334                     tmp = '\n';
1335                 else {
1336                     U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1337                     tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
1338                 }
1339                 tmp = ((OP(c) == NBOUND ?
1340                         isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1341                 LOAD_UTF8_CHARCLASS_ALNUM();
1342                 REXEC_FBC_UTF8_SCAN(
1343                     if (tmp == !(OP(c) == NBOUND ?
1344                                  (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1345                                  isALNUM_LC_utf8((U8*)s)))
1346                         tmp = !tmp;
1347                     else REXEC_FBC_TRYIT;
1348                 );
1349             }
1350             else {
1351                 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1352                 tmp = ((OP(c) == NBOUND ?
1353                         isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1354                 REXEC_FBC_SCAN(
1355                     if (tmp ==
1356                         !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1357                         tmp = !tmp;
1358                     else REXEC_FBC_TRYIT;
1359                 );
1360             }
1361             if ((!prog->minlen && !tmp) && (!reginfo || regtry(reginfo, &s)))
1362                 goto got_it;
1363             break;
1364         case ALNUM:
1365             REXEC_FBC_CSCAN_PRELOAD(
1366                 LOAD_UTF8_CHARCLASS_ALNUM(),
1367                 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8),
1368                 isALNUM(*s)
1369             );
1370         case ALNUML:
1371             REXEC_FBC_CSCAN_TAINT(
1372                 isALNUM_LC_utf8((U8*)s),
1373                 isALNUM_LC(*s)
1374             );
1375         case NALNUM:
1376             REXEC_FBC_CSCAN_PRELOAD(
1377                 LOAD_UTF8_CHARCLASS_ALNUM(),
1378                 !swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8),
1379                 !isALNUM(*s)
1380             );
1381         case NALNUML:
1382             REXEC_FBC_CSCAN_TAINT(
1383                 !isALNUM_LC_utf8((U8*)s),
1384                 !isALNUM_LC(*s)
1385             );
1386         case SPACE:
1387             REXEC_FBC_CSCAN_PRELOAD(
1388                 LOAD_UTF8_CHARCLASS_SPACE(),
1389                 *s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8),
1390                 isSPACE(*s)
1391             );
1392         case SPACEL:
1393             REXEC_FBC_CSCAN_TAINT(
1394                 *s == ' ' || isSPACE_LC_utf8((U8*)s),
1395                 isSPACE_LC(*s)
1396             );
1397         case NSPACE:
1398             REXEC_FBC_CSCAN_PRELOAD(
1399                 LOAD_UTF8_CHARCLASS_SPACE(),
1400                 !(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)),
1401                 !isSPACE(*s)
1402             );
1403         case NSPACEL:
1404             REXEC_FBC_CSCAN_TAINT(
1405                 !(*s == ' ' || isSPACE_LC_utf8((U8*)s)),
1406                 !isSPACE_LC(*s)
1407             );
1408         case DIGIT:
1409             REXEC_FBC_CSCAN_PRELOAD(
1410                 LOAD_UTF8_CHARCLASS_DIGIT(),
1411                 swash_fetch(PL_utf8_digit,(U8*)s, do_utf8),
1412                 isDIGIT(*s)
1413             );
1414         case DIGITL:
1415             REXEC_FBC_CSCAN_TAINT(
1416                 isDIGIT_LC_utf8((U8*)s),
1417                 isDIGIT_LC(*s)
1418             );
1419         case NDIGIT:
1420             REXEC_FBC_CSCAN_PRELOAD(
1421                 LOAD_UTF8_CHARCLASS_DIGIT(),
1422                 !swash_fetch(PL_utf8_digit,(U8*)s, do_utf8),
1423                 !isDIGIT(*s)
1424             );
1425         case NDIGITL:
1426             REXEC_FBC_CSCAN_TAINT(
1427                 !isDIGIT_LC_utf8((U8*)s),
1428                 !isDIGIT_LC(*s)
1429             );
1430         case AHOCORASICKC:
1431         case AHOCORASICK:
1432             {
1433                 const enum { trie_plain, trie_utf8, trie_utf8_fold }
1434                     trie_type = do_utf8 ?
1435                           (c->flags == EXACT ? trie_utf8 : trie_utf8_fold)
1436                         : trie_plain;
1437                 /* what trie are we using right now */
1438                 reg_ac_data *aho
1439                     = (reg_ac_data*)progi->data->data[ ARG( c ) ];
1440                 reg_trie_data *trie
1441                     = (reg_trie_data*)progi->data->data[ aho->trie ];
1442                 HV *widecharmap = (HV*) progi->data->data[ aho->trie + 1 ];
1443
1444                 const char *last_start = strend - trie->minlen;
1445 #ifdef DEBUGGING
1446                 const char *real_start = s;
1447 #endif
1448                 STRLEN maxlen = trie->maxlen;
1449                 SV *sv_points;
1450                 U8 **points; /* map of where we were in the input string
1451                                 when reading a given char. For ASCII this
1452                                 is unnecessary overhead as the relationship
1453                                 is always 1:1, but for unicode, especially
1454                                 case folded unicode this is not true. */
1455                 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
1456                 U8 *bitmap=NULL;
1457
1458
1459                 GET_RE_DEBUG_FLAGS_DECL;
1460
1461                 /* We can't just allocate points here. We need to wrap it in
1462                  * an SV so it gets freed properly if there is a croak while
1463                  * running the match */
1464                 ENTER;
1465                 SAVETMPS;
1466                 sv_points=newSV(maxlen * sizeof(U8 *));
1467                 SvCUR_set(sv_points,
1468                     maxlen * sizeof(U8 *));
1469                 SvPOK_on(sv_points);
1470                 sv_2mortal(sv_points);
1471                 points=(U8**)SvPV_nolen(sv_points );
1472                 if ( trie_type != trie_utf8_fold
1473                      && (trie->bitmap || OP(c)==AHOCORASICKC) )
1474                 {
1475                     if (trie->bitmap)
1476                         bitmap=(U8*)trie->bitmap;
1477                     else
1478                         bitmap=(U8*)ANYOF_BITMAP(c);
1479                 }
1480                 /* this is the Aho-Corasick algorithm modified a touch
1481                    to include special handling for long "unknown char"
1482                    sequences. The basic idea being that we use AC as long
1483                    as we are dealing with a possible matching char, when
1484                    we encounter an unknown char (and we have not encountered
1485                    an accepting state) we scan forward until we find a legal
1486                    starting char.
1487                    AC matching is basically that of trie matching, except
1488                    that when we encounter a failing transition, we fall back
1489                    to the current states "fail state", and try the current char
1490                    again, a process we repeat until we reach the root state,
1491                    state 1, or a legal transition. If we fail on the root state
1492                    then we can either terminate if we have reached an accepting
1493                    state previously, or restart the entire process from the beginning
1494                    if we have not.
1495
1496                  */
1497                 while (s <= last_start) {
1498                     const U32 uniflags = UTF8_ALLOW_DEFAULT;
1499                     U8 *uc = (U8*)s;
1500                     U16 charid = 0;
1501                     U32 base = 1;
1502                     U32 state = 1;
1503                     UV uvc = 0;
1504                     STRLEN len = 0;
1505                     STRLEN foldlen = 0;
1506                     U8 *uscan = (U8*)NULL;
1507                     U8 *leftmost = NULL;
1508 #ifdef DEBUGGING
1509                     U32 accepted_word= 0;
1510 #endif
1511                     U32 pointpos = 0;
1512
1513                     while ( state && uc <= (U8*)strend ) {
1514                         int failed=0;
1515                         U32 word = aho->states[ state ].wordnum;
1516
1517                         if( state==1 ) {
1518                             if ( bitmap ) {
1519                                 DEBUG_TRIE_EXECUTE_r(
1520                                     if ( uc <= (U8*)last_start && !BITMAP_TEST(bitmap,*uc) ) {
1521                                         dump_exec_pos( (char *)uc, c, strend, real_start,
1522                                             (char *)uc, do_utf8 );
1523                                         PerlIO_printf( Perl_debug_log,
1524                                             " Scanning for legal start char...\n");
1525                                     }
1526                                 );
1527                                 while ( uc <= (U8*)last_start  && !BITMAP_TEST(bitmap,*uc) ) {
1528                                     uc++;
1529                                 }
1530                                 s= (char *)uc;
1531                             }
1532                             if (uc >(U8*)last_start) break;
1533                         }
1534
1535                         if ( word ) {
1536                             U8 *lpos= points[ (pointpos - trie->wordlen[word-1] ) % maxlen ];
1537                             if (!leftmost || lpos < leftmost) {
1538                                 DEBUG_r(accepted_word=word);
1539                                 leftmost= lpos;
1540                             }
1541                             if (base==0) break;
1542
1543                         }
1544                         points[pointpos++ % maxlen]= uc;
1545                         REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
1546                                              uscan, len, uvc, charid, foldlen,
1547                                              foldbuf, uniflags);
1548                         DEBUG_TRIE_EXECUTE_r({
1549                             dump_exec_pos( (char *)uc, c, strend, real_start,
1550                                 s,   do_utf8 );
1551                             PerlIO_printf(Perl_debug_log,
1552                                 " Charid:%3u CP:%4"UVxf" ",
1553                                  charid, uvc);
1554                         });
1555
1556                         do {
1557 #ifdef DEBUGGING
1558                             word = aho->states[ state ].wordnum;
1559 #endif
1560                             base = aho->states[ state ].trans.base;
1561
1562                             DEBUG_TRIE_EXECUTE_r({
1563                                 if (failed)
1564                                     dump_exec_pos( (char *)uc, c, strend, real_start,
1565                                         s,   do_utf8 );
1566                                 PerlIO_printf( Perl_debug_log,
1567                                     "%sState: %4"UVxf", word=%"UVxf,
1568                                     failed ? " Fail transition to " : "",
1569                                     (UV)state, (UV)word);
1570                             });
1571                             if ( base ) {
1572                                 U32 tmp;
1573                                 if (charid &&
1574                                      (base + charid > trie->uniquecharcount )
1575                                      && (base + charid - 1 - trie->uniquecharcount
1576                                             < trie->lasttrans)
1577                                      && trie->trans[base + charid - 1 -
1578                                             trie->uniquecharcount].check == state
1579                                      && (tmp=trie->trans[base + charid - 1 -
1580                                         trie->uniquecharcount ].next))
1581                                 {
1582                                     DEBUG_TRIE_EXECUTE_r(
1583                                         PerlIO_printf( Perl_debug_log," - legal\n"));
1584                                     state = tmp;
1585                                     break;
1586                                 }
1587                                 else {
1588                                     DEBUG_TRIE_EXECUTE_r(
1589                                         PerlIO_printf( Perl_debug_log," - fail\n"));
1590                                     failed = 1;
1591                                     state = aho->fail[state];
1592                                 }
1593                             }
1594                             else {
1595                                 /* we must be accepting here */
1596                                 DEBUG_TRIE_EXECUTE_r(
1597                                         PerlIO_printf( Perl_debug_log," - accepting\n"));
1598                                 failed = 1;
1599                                 break;
1600                             }
1601                         } while(state);
1602                         uc += len;
1603                         if (failed) {
1604                             if (leftmost)
1605                                 break;
1606                             if (!state) state = 1;
1607                         }
1608                     }
1609                     if ( aho->states[ state ].wordnum ) {
1610                         U8 *lpos = points[ (pointpos - trie->wordlen[aho->states[ state ].wordnum-1]) % maxlen ];
1611                         if (!leftmost || lpos < leftmost) {
1612                             DEBUG_r(accepted_word=aho->states[ state ].wordnum);
1613                             leftmost = lpos;
1614                         }
1615                     }
1616                     if (leftmost) {
1617                         s = (char*)leftmost;
1618                         DEBUG_TRIE_EXECUTE_r({
1619                             PerlIO_printf(
1620                                 Perl_debug_log,"Matches word #%"UVxf" at position %"IVdf". Trying full pattern...\n",
1621                                 (UV)accepted_word, (IV)(s - real_start)
1622                             );
1623                         });
1624                         if (!reginfo || regtry(reginfo, &s)) {
1625                             FREETMPS;
1626                             LEAVE;
1627                             goto got_it;
1628                         }
1629                         s = HOPc(s,1);
1630                         DEBUG_TRIE_EXECUTE_r({
1631                             PerlIO_printf( Perl_debug_log,"Pattern failed. Looking for new start point...\n");
1632                         });
1633                     } else {
1634                         DEBUG_TRIE_EXECUTE_r(
1635                             PerlIO_printf( Perl_debug_log,"No match.\n"));
1636                         break;
1637                     }
1638                 }
1639                 FREETMPS;
1640                 LEAVE;
1641             }
1642             break;
1643         default:
1644             Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1645             break;
1646         }
1647         return 0;
1648       got_it:
1649         return s;
1650 }
1651
1652 static void
1653 S_swap_match_buff (pTHX_ regexp *prog) {
1654     I32 *t;
1655
1656     if (!prog->swap) {
1657     /* We have to be careful. If the previous successful match
1658        was from this regex we don't want a subsequent paritally
1659        successful match to clobber the old results.
1660        So when we detect this possibility we add a swap buffer
1661        to the re, and switch the buffer each match. If we fail
1662        we switch it back, otherwise we leave it swapped.
1663     */
1664         Newxz(prog->swap, 1, regexp_paren_ofs);
1665         /* no need to copy these */
1666         Newxz(prog->swap->startp, 2 * (prog->nparens + 1), I32);
1667         prog->swap->endp = prog->swap->startp + prog->nparens + 1;
1668     }
1669     t = prog->swap->startp;
1670     prog->swap->startp = prog->startp;
1671     prog->startp = t;
1672     t = prog->swap->endp;
1673     prog->swap->endp = prog->endp;
1674     prog->endp = t;
1675 }
1676
1677
1678 /*
1679  - regexec_flags - match a regexp against a string
1680  */
1681 I32
1682 Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1683               char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1684 /* strend: pointer to null at end of string */
1685 /* strbeg: real beginning of string */
1686 /* minend: end of match must be >=minend after stringarg. */
1687 /* data: May be used for some additional optimizations.
1688          Currently its only used, with a U32 cast, for transmitting
1689          the ganch offset when doing a /g match. This will change */
1690 /* nosave: For optimizations. */
1691 {
1692     dVAR;
1693     /*register*/ char *s;
1694     register regnode *c;
1695     /*register*/ char *startpos = stringarg;
1696     I32 minlen;         /* must match at least this many chars */
1697     I32 dontbother = 0; /* how many characters not to try at end */
1698     I32 end_shift = 0;                  /* Same for the end. */         /* CC */
1699     I32 scream_pos = -1;                /* Internal iterator of scream. */
1700     char *scream_olds = NULL;
1701     SV* const oreplsv = GvSV(PL_replgv);
1702     const bool do_utf8 = (bool)DO_UTF8(sv);
1703     I32 multiline;
1704     RXi_GET_DECL(prog,progi);
1705     regmatch_info reginfo;  /* create some info to pass to regtry etc */
1706     bool swap_on_fail = 0;
1707
1708     GET_RE_DEBUG_FLAGS_DECL;
1709
1710     PERL_UNUSED_ARG(data);
1711
1712     /* Be paranoid... */
1713     if (prog == NULL || startpos == NULL) {
1714         Perl_croak(aTHX_ "NULL regexp parameter");
1715         return 0;
1716     }
1717
1718     multiline = prog->extflags & RXf_PMf_MULTILINE;
1719     reginfo.prog = prog;
1720
1721     RX_MATCH_UTF8_set(prog, do_utf8);
1722     DEBUG_EXECUTE_r(
1723         debug_start_match(prog, do_utf8, startpos, strend,
1724         "Matching");
1725     );
1726
1727     minlen = prog->minlen;
1728
1729     if (strend - startpos < (minlen+(prog->check_offset_min<0?prog->check_offset_min:0))) {
1730         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
1731                               "String too short [regexec_flags]...\n"));
1732         goto phooey;
1733     }
1734
1735
1736     /* Check validity of program. */
1737     if (UCHARAT(progi->program) != REG_MAGIC) {
1738         Perl_croak(aTHX_ "corrupted regexp program");
1739     }
1740
1741     PL_reg_flags = 0;
1742     PL_reg_eval_set = 0;
1743     PL_reg_maxiter = 0;
1744
1745     if (prog->extflags & RXf_UTF8)
1746         PL_reg_flags |= RF_utf8;
1747
1748     /* Mark beginning of line for ^ and lookbehind. */
1749     reginfo.bol = startpos; /* XXX not used ??? */
1750     PL_bostr  = strbeg;
1751     reginfo.sv = sv;
1752
1753     /* Mark end of line for $ (and such) */
1754     PL_regeol = strend;
1755
1756     /* see how far we have to get to not match where we matched before */
1757     reginfo.till = startpos+minend;
1758
1759     /* If there is a "must appear" string, look for it. */
1760     s = startpos;
1761
1762     if (prog->extflags & RXf_GPOS_SEEN) { /* Need to set reginfo->ganch */
1763         MAGIC *mg;
1764
1765         if (flags & REXEC_IGNOREPOS)    /* Means: check only at start */
1766             reginfo.ganch = startpos + prog->gofs;
1767         else if (sv && SvTYPE(sv) >= SVt_PVMG
1768                   && SvMAGIC(sv)
1769                   && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1770                   && mg->mg_len >= 0) {
1771             reginfo.ganch = strbeg + mg->mg_len;        /* Defined pos() */
1772             if (prog->extflags & RXf_ANCH_GPOS) {
1773                 if (s > reginfo.ganch)
1774                     goto phooey;
1775                 s = reginfo.ganch - prog->gofs;
1776             }
1777         }
1778         else if (data) {
1779             reginfo.ganch = strbeg + PTR2UV(data);
1780         } else                          /* pos() not defined */
1781             reginfo.ganch = strbeg;
1782     }
1783     if (PL_curpm && (PM_GETRE(PL_curpm) == prog)) {
1784         swap_on_fail = 1;
1785         swap_match_buff(prog); /* do we need a save destructor here for
1786                                   eval dies? */
1787     }
1788     if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL || prog->check_utf8 != NULL)) {
1789         re_scream_pos_data d;
1790
1791         d.scream_olds = &scream_olds;
1792         d.scream_pos = &scream_pos;
1793         s = re_intuit_start(prog, sv, s, strend, flags, &d);
1794         if (!s) {
1795             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
1796             goto phooey;        /* not present */
1797         }
1798     }
1799
1800
1801
1802     /* Simplest case:  anchored match need be tried only once. */
1803     /*  [unless only anchor is BOL and multiline is set] */
1804     if (prog->extflags & (RXf_ANCH & ~RXf_ANCH_GPOS)) {
1805         if (s == startpos && regtry(&reginfo, &startpos))
1806             goto got_it;
1807         else if (multiline || (prog->intflags & PREGf_IMPLICIT)
1808                  || (prog->extflags & RXf_ANCH_MBOL)) /* XXXX SBOL? */
1809         {
1810             char *end;
1811
1812             if (minlen)
1813                 dontbother = minlen - 1;
1814             end = HOP3c(strend, -dontbother, strbeg) - 1;
1815             /* for multiline we only have to try after newlines */
1816             if (prog->check_substr || prog->check_utf8) {
1817                 if (s == startpos)
1818                     goto after_try;
1819                 while (1) {
1820                     if (regtry(&reginfo, &s))
1821                         goto got_it;
1822                   after_try:
1823                     if (s >= end)
1824                         goto phooey;
1825                     if (prog->extflags & RXf_USE_INTUIT) {
1826                         s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1827                         if (!s)
1828                             goto phooey;
1829                     }
1830                     else
1831                         s++;
1832                 }
1833             } else {
1834                 if (s > startpos)
1835                     s--;
1836                 while (s < end) {
1837                     if (*s++ == '\n') { /* don't need PL_utf8skip here */
1838                         if (regtry(&reginfo, &s))
1839                             goto got_it;
1840                     }
1841                 }
1842             }
1843         }
1844         goto phooey;
1845     } else if (RXf_GPOS_CHECK == (prog->extflags & RXf_GPOS_CHECK))
1846     {
1847         /* the warning about reginfo.ganch being used without intialization
1848            is bogus -- we set it above, when prog->extflags & RXf_GPOS_SEEN
1849            and we only enter this block when the same bit is set. */
1850         char *tmp_s = reginfo.ganch - prog->gofs;
1851         if (regtry(&reginfo, &tmp_s))
1852             goto got_it;
1853         goto phooey;
1854     }
1855
1856     /* Messy cases:  unanchored match. */
1857     if ((prog->anchored_substr || prog->anchored_utf8) && prog->intflags & PREGf_SKIP) {
1858         /* we have /x+whatever/ */
1859         /* it must be a one character string (XXXX Except UTF?) */
1860         char ch;
1861 #ifdef DEBUGGING
1862         int did_match = 0;
1863 #endif
1864         if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1865             do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1866         ch = SvPVX_const(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
1867
1868         if (do_utf8) {
1869             REXEC_FBC_SCAN(
1870                 if (*s == ch) {
1871                     DEBUG_EXECUTE_r( did_match = 1 );
1872                     if (regtry(&reginfo, &s)) goto got_it;
1873                     s += UTF8SKIP(s);
1874                     while (s < strend && *s == ch)
1875                         s += UTF8SKIP(s);
1876                 }
1877             );
1878         }
1879         else {
1880             REXEC_FBC_SCAN(
1881                 if (*s == ch) {
1882                     DEBUG_EXECUTE_r( did_match = 1 );
1883                     if (regtry(&reginfo, &s)) goto got_it;
1884                     s++;
1885                     while (s < strend && *s == ch)
1886                         s++;
1887                 }
1888             );
1889         }
1890         DEBUG_EXECUTE_r(if (!did_match)
1891                 PerlIO_printf(Perl_debug_log,
1892                                   "Did not find anchored character...\n")
1893                );
1894     }
1895     else if (prog->anchored_substr != NULL
1896               || prog->anchored_utf8 != NULL
1897               || ((prog->float_substr != NULL || prog->float_utf8 != NULL)
1898                   && prog->float_max_offset < strend - s)) {
1899         SV *must;
1900         I32 back_max;
1901         I32 back_min;
1902         char *last;
1903         char *last1;            /* Last position checked before */
1904 #ifdef DEBUGGING
1905         int did_match = 0;
1906 #endif
1907         if (prog->anchored_substr || prog->anchored_utf8) {
1908             if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1909                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1910             must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
1911             back_max = back_min = prog->anchored_offset;
1912         } else {
1913             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1914                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1915             must = do_utf8 ? prog->float_utf8 : prog->float_substr;
1916             back_max = prog->float_max_offset;
1917             back_min = prog->float_min_offset;
1918         }
1919
1920
1921         if (must == &PL_sv_undef)
1922             /* could not downgrade utf8 check substring, so must fail */
1923             goto phooey;
1924
1925         if (back_min<0) {
1926             last = strend;
1927         } else {
1928             last = HOP3c(strend,        /* Cannot start after this */
1929                   -(I32)(CHR_SVLEN(must)
1930                          - (SvTAIL(must) != 0) + back_min), strbeg);
1931         }
1932         if (s > PL_bostr)
1933             last1 = HOPc(s, -1);
1934         else
1935             last1 = s - 1;      /* bogus */
1936
1937         /* XXXX check_substr already used to find "s", can optimize if
1938            check_substr==must. */
1939         scream_pos = -1;
1940         dontbother = end_shift;
1941         strend = HOPc(strend, -dontbother);
1942         while ( (s <= last) &&
1943                 ((flags & REXEC_SCREAM)
1944                  ? (s = screaminstr(sv, must, HOP3c(s, back_min, (back_min<0 ? strbeg : strend)) - strbeg,
1945                                     end_shift, &scream_pos, 0))
1946                  : (s = fbm_instr((unsigned char*)HOP3(s, back_min, (back_min<0 ? strbeg : strend)),
1947                                   (unsigned char*)strend, must,
1948                                   multiline ? FBMrf_MULTILINE : 0))) ) {
1949             /* we may be pointing at the wrong string */
1950             if ((flags & REXEC_SCREAM) && RX_MATCH_COPIED(prog))
1951                 s = strbeg + (s - SvPVX_const(sv));
1952             DEBUG_EXECUTE_r( did_match = 1 );
1953             if (HOPc(s, -back_max) > last1) {
1954                 last1 = HOPc(s, -back_min);
1955                 s = HOPc(s, -back_max);
1956             }
1957             else {
1958                 char * const t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1959
1960                 last1 = HOPc(s, -back_min);
1961                 s = t;
1962             }
1963             if (do_utf8) {
1964                 while (s <= last1) {
1965                     if (regtry(&reginfo, &s))
1966                         goto got_it;
1967                     s += UTF8SKIP(s);
1968                 }
1969             }
1970             else {
1971                 while (s <= last1) {
1972                     if (regtry(&reginfo, &s))
1973                         goto got_it;
1974                     s++;
1975                 }
1976             }
1977         }
1978         DEBUG_EXECUTE_r(if (!did_match) {
1979             RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
1980                 SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
1981             PerlIO_printf(Perl_debug_log, "Did not find %s substr %s%s...\n",
1982                               ((must == prog->anchored_substr || must == prog->anchored_utf8)
1983                                ? "anchored" : "floating"),
1984                 quoted, RE_SV_TAIL(must));
1985         });
1986         goto phooey;
1987     }
1988     else if ( (c = progi->regstclass) ) {
1989         if (minlen) {
1990             const OPCODE op = OP(progi->regstclass);
1991             /* don't bother with what can't match */
1992             if (PL_regkind[op] != EXACT && op != CANY && PL_regkind[op] != TRIE)
1993                 strend = HOPc(strend, -(minlen - 1));
1994         }
1995         DEBUG_EXECUTE_r({
1996             SV * const prop = sv_newmortal();
1997             regprop(prog, prop, c);
1998             {
1999                 RE_PV_QUOTED_DECL(quoted,do_utf8,PERL_DEBUG_PAD_ZERO(1),
2000                     s,strend-s,60);
2001                 PerlIO_printf(Perl_debug_log,
2002                     "Matching stclass %.*s against %s (%d chars)\n",
2003                     (int)SvCUR(prop), SvPVX_const(prop),
2004                      quoted, (int)(strend - s));
2005             }
2006         });
2007         if (find_byclass(prog, c, s, strend, &reginfo))
2008             goto got_it;
2009         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass... [regexec_flags]\n"));
2010     }
2011     else {
2012         dontbother = 0;
2013         if (prog->float_substr != NULL || prog->float_utf8 != NULL) {
2014             /* Trim the end. */
2015             char *last;
2016             SV* float_real;
2017
2018             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
2019                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
2020             float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
2021
2022             if (flags & REXEC_SCREAM) {
2023                 last = screaminstr(sv, float_real, s - strbeg,
2024                                    end_shift, &scream_pos, 1); /* last one */
2025                 if (!last)
2026                     last = scream_olds; /* Only one occurrence. */
2027                 /* we may be pointing at the wrong string */
2028                 else if (RX_MATCH_COPIED(prog))
2029                     s = strbeg + (s - SvPVX_const(sv));
2030             }
2031             else {
2032                 STRLEN len;
2033                 const char * const little = SvPV_const(float_real, len);
2034
2035                 if (SvTAIL(float_real)) {
2036                     if (memEQ(strend - len + 1, little, len - 1))
2037                         last = strend - len + 1;
2038                     else if (!multiline)
2039                         last = memEQ(strend - len, little, len)
2040                             ? strend - len : NULL;
2041                     else
2042                         goto find_last;
2043                 } else {
2044                   find_last:
2045                     if (len)
2046                         last = rninstr(s, strend, little, little + len);
2047                     else
2048                         last = strend;  /* matching "$" */
2049                 }
2050             }
2051             if (last == NULL) {
2052                 DEBUG_EXECUTE_r(
2053                     PerlIO_printf(Perl_debug_log,
2054                         "%sCan't trim the tail, match fails (should not happen)%s\n",
2055                         PL_colors[4], PL_colors[5]));
2056                 goto phooey; /* Should not happen! */
2057             }
2058             dontbother = strend - last + prog->float_min_offset;
2059         }
2060         if (minlen && (dontbother < minlen))
2061             dontbother = minlen - 1;
2062         strend -= dontbother;              /* this one's always in bytes! */
2063         /* We don't know much -- general case. */
2064         if (do_utf8) {
2065             for (;;) {
2066                 if (regtry(&reginfo, &s))
2067                     goto got_it;
2068                 if (s >= strend)
2069                     break;
2070                 s += UTF8SKIP(s);
2071             };
2072         }
2073         else {
2074             do {
2075                 if (regtry(&reginfo, &s))
2076                     goto got_it;
2077             } while (s++ < strend);
2078         }
2079     }
2080
2081     /* Failure. */
2082     goto phooey;
2083
2084 got_it:
2085     RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
2086
2087     if (PL_reg_eval_set) {
2088         /* Preserve the current value of $^R */
2089         if (oreplsv != GvSV(PL_replgv))
2090             sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
2091                                                   restored, the value remains
2092                                                   the same. */
2093         restore_pos(aTHX_ prog);
2094     }
2095     if (prog->paren_names)
2096         (void)hv_iterinit(prog->paren_names);
2097
2098     /* make sure $`, $&, $', and $digit will work later */
2099     if ( !(flags & REXEC_NOT_FIRST) ) {
2100         RX_MATCH_COPY_FREE(prog);
2101         if (flags & REXEC_COPY_STR) {
2102             const I32 i = PL_regeol - startpos + (stringarg - strbeg);
2103 #ifdef PERL_OLD_COPY_ON_WRITE
2104             if ((SvIsCOW(sv)
2105                  || (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
2106                 if (DEBUG_C_TEST) {
2107                     PerlIO_printf(Perl_debug_log,
2108                                   "Copy on write: regexp capture, type %d\n",
2109                                   (int) SvTYPE(sv));
2110                 }
2111                 prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
2112                 prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
2113                 assert (SvPOKp(prog->saved_copy));
2114             } else
2115 #endif
2116             {
2117                 RX_MATCH_COPIED_on(prog);
2118                 s = savepvn(strbeg, i);
2119                 prog->subbeg = s;
2120             }
2121             prog->sublen = i;
2122         }
2123         else {
2124             prog->subbeg = strbeg;
2125             prog->sublen = PL_regeol - strbeg;  /* strend may have been modified */
2126         }
2127     }
2128
2129     return 1;
2130
2131 phooey:
2132     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
2133                           PL_colors[4], PL_colors[5]));
2134     if (PL_reg_eval_set)
2135         restore_pos(aTHX_ prog);
2136     if (swap_on_fail)
2137         /* we failed :-( roll it back */
2138         swap_match_buff(prog);
2139
2140     return 0;
2141 }
2142
2143
2144 /*
2145  - regtry - try match at specific point
2146  */
2147 STATIC I32                      /* 0 failure, 1 success */
2148 S_regtry(pTHX_ regmatch_info *reginfo, char **startpos)
2149 {
2150     dVAR;
2151     register I32 *sp;
2152     register I32 *ep;
2153     CHECKPOINT lastcp;
2154     regexp *prog = reginfo->prog;
2155     RXi_GET_DECL(prog,progi);
2156     GET_RE_DEBUG_FLAGS_DECL;
2157     reginfo->cutpoint=NULL;
2158
2159     if ((prog->extflags & RXf_EVAL_SEEN) && !PL_reg_eval_set) {
2160         MAGIC *mg;
2161
2162         PL_reg_eval_set = RS_init;
2163         DEBUG_EXECUTE_r(DEBUG_s(
2164             PerlIO_printf(Perl_debug_log, "  setting stack tmpbase at %"IVdf"\n",
2165                           (IV)(PL_stack_sp - PL_stack_base));
2166             ));
2167         SAVESTACK_CXPOS();
2168         cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2169         /* Otherwise OP_NEXTSTATE will free whatever on stack now.  */
2170         SAVETMPS;
2171         /* Apparently this is not needed, judging by wantarray. */
2172         /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
2173            cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2174
2175         if (reginfo->sv) {
2176             /* Make $_ available to executed code. */
2177             if (reginfo->sv != DEFSV) {
2178                 SAVE_DEFSV;
2179                 DEFSV = reginfo->sv;
2180             }
2181
2182             if (!(SvTYPE(reginfo->sv) >= SVt_PVMG && SvMAGIC(reginfo->sv)
2183                   && (mg = mg_find(reginfo->sv, PERL_MAGIC_regex_global)))) {
2184                 /* prepare for quick setting of pos */
2185 #ifdef PERL_OLD_COPY_ON_WRITE
2186                 if (SvIsCOW(reginfo->sv))
2187                     sv_force_normal_flags(reginfo->sv, 0);
2188 #endif
2189                 mg = sv_magicext(reginfo->sv, NULL, PERL_MAGIC_regex_global,
2190                                  &PL_vtbl_mglob, NULL, 0);
2191                 mg->mg_len = -1;
2192             }
2193             PL_reg_magic    = mg;
2194             PL_reg_oldpos   = mg->mg_len;
2195             SAVEDESTRUCTOR_X(restore_pos, prog);
2196         }
2197         if (!PL_reg_curpm) {
2198             Newxz(PL_reg_curpm, 1, PMOP);
2199 #ifdef USE_ITHREADS
2200             {
2201                 SV* const repointer = newSViv(0);
2202                 /* so we know which PL_regex_padav element is PL_reg_curpm */
2203                 SvFLAGS(repointer) |= SVf_BREAK;
2204                 av_push(PL_regex_padav,repointer);
2205                 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2206                 PL_regex_pad = AvARRAY(PL_regex_padav);
2207             }
2208 #endif
2209         }
2210         PM_SETRE(PL_reg_curpm, prog);
2211         PL_reg_oldcurpm = PL_curpm;
2212         PL_curpm = PL_reg_curpm;
2213         if (RX_MATCH_COPIED(prog)) {
2214             /*  Here is a serious problem: we cannot rewrite subbeg,
2215                 since it may be needed if this match fails.  Thus
2216                 $` inside (?{}) could fail... */
2217             PL_reg_oldsaved = prog->subbeg;
2218             PL_reg_oldsavedlen = prog->sublen;
2219 #ifdef PERL_OLD_COPY_ON_WRITE
2220             PL_nrs = prog->saved_copy;
2221 #endif
2222             RX_MATCH_COPIED_off(prog);
2223         }
2224         else
2225             PL_reg_oldsaved = NULL;
2226         prog->subbeg = PL_bostr;
2227         prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2228     }
2229     DEBUG_EXECUTE_r(PL_reg_starttry = *startpos);
2230     prog->startp[0] = *startpos - PL_bostr;
2231     PL_reginput = *startpos;
2232     PL_reglastparen = &prog->lastparen;
2233     PL_reglastcloseparen = &prog->lastcloseparen;
2234     prog->lastparen = 0;
2235     prog->lastcloseparen = 0;
2236     PL_regsize = 0;
2237     PL_regstartp = prog->startp;
2238     PL_regendp = prog->endp;
2239     if (PL_reg_start_tmpl <= prog->nparens) {
2240         PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2241         if(PL_reg_start_tmp)
2242             Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2243         else
2244             Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2245     }
2246
2247     /* XXXX What this code is doing here?!!!  There should be no need
2248        to do this again and again, PL_reglastparen should take care of
2249        this!  --ilya*/
2250
2251     /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2252      * Actually, the code in regcppop() (which Ilya may be meaning by
2253      * PL_reglastparen), is not needed at all by the test suite
2254      * (op/regexp, op/pat, op/split), but that code is needed, oddly
2255      * enough, for building DynaLoader, or otherwise this
2256      * "Error: '*' not in typemap in DynaLoader.xs, line 164"
2257      * will happen.  Meanwhile, this code *is* needed for the
2258      * above-mentioned test suite tests to succeed.  The common theme
2259      * on those tests seems to be returning null fields from matches.
2260      * --jhi */
2261 #if 1
2262     sp = PL_regstartp;
2263     ep = PL_regendp;
2264     if (prog->nparens) {
2265         register I32 i;
2266         for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
2267             *++sp = -1;
2268             *++ep = -1;
2269         }
2270     }
2271 #endif
2272     REGCP_SET(lastcp);
2273     if (regmatch(reginfo, progi->program + 1)) {
2274         PL_regendp[0] = PL_reginput - PL_bostr;
2275         return 1;
2276     }
2277     if (reginfo->cutpoint)
2278         *startpos= reginfo->cutpoint;
2279     REGCP_UNWIND(lastcp);
2280     return 0;
2281 }
2282
2283
2284 #define sayYES goto yes
2285 #define sayNO goto no
2286 #define sayNO_SILENT goto no_silent
2287
2288 /* we dont use STMT_START/END here because it leads to
2289    "unreachable code" warnings, which are bogus, but distracting. */
2290 #define CACHEsayNO \
2291     if (ST.cache_mask) \
2292        PL_reg_poscache[ST.cache_offset] |= ST.cache_mask; \
2293     sayNO
2294
2295 /* this is used to determine how far from the left messages like
2296    'failed...' are printed. It should be set such that messages
2297    are inline with the regop output that created them.
2298 */
2299 #define REPORT_CODE_OFF 32
2300
2301
2302 /* Make sure there is a test for this +1 options in re_tests */
2303 #define TRIE_INITAL_ACCEPT_BUFFLEN 4;
2304
2305 #define CHRTEST_UNINIT -1001 /* c1/c2 haven't been calculated yet */
2306 #define CHRTEST_VOID   -1000 /* the c1/c2 "next char" test should be skipped */
2307
2308 #define SLAB_FIRST(s) (&(s)->states[0])
2309 #define SLAB_LAST(s)  (&(s)->states[PERL_REGMATCH_SLAB_SLOTS-1])
2310
2311 /* grab a new slab and return the first slot in it */
2312
2313 STATIC regmatch_state *
2314 S_push_slab(pTHX)
2315 {
2316 #if PERL_VERSION < 9 && !defined(PERL_CORE)
2317     dMY_CXT;
2318 #endif
2319     regmatch_slab *s = PL_regmatch_slab->next;
2320     if (!s) {
2321         Newx(s, 1, regmatch_slab);
2322         s->prev = PL_regmatch_slab;
2323         s->next = NULL;
2324         PL_regmatch_slab->next = s;
2325     }
2326     PL_regmatch_slab = s;
2327     return SLAB_FIRST(s);
2328 }
2329
2330
2331 /* push a new state then goto it */
2332
2333 #define PUSH_STATE_GOTO(state, node) \
2334     scan = node; \
2335     st->resume_state = state; \
2336     goto push_state;
2337
2338 /* push a new state with success backtracking, then goto it */
2339
2340 #define PUSH_YES_STATE_GOTO(state, node) \
2341     scan = node; \
2342     st->resume_state = state; \
2343     goto push_yes_state;
2344
2345
2346
2347 /*
2348
2349 regmatch() - main matching routine
2350
2351 This is basically one big switch statement in a loop. We execute an op,
2352 set 'next' to point the next op, and continue. If we come to a point which
2353 we may need to backtrack to on failure such as (A|B|C), we push a
2354 backtrack state onto the backtrack stack. On failure, we pop the top
2355 state, and re-enter the loop at the state indicated. If there are no more
2356 states to pop, we return failure.
2357
2358 Sometimes we also need to backtrack on success; for example /A+/, where
2359 after successfully matching one A, we need to go back and try to
2360 match another one; similarly for lookahead assertions: if the assertion
2361 completes successfully, we backtrack to the state just before the assertion
2362 and then carry on.  In these cases, the pushed state is marked as
2363 'backtrack on success too'. This marking is in fact done by a chain of
2364 pointers, each pointing to the previous 'yes' state. On success, we pop to
2365 the nearest yes state, discarding any intermediate failure-only states.
2366 Sometimes a yes state is pushed just to force some cleanup code to be
2367 called at the end of a successful match or submatch; e.g. (??{$re}) uses
2368 it to free the inner regex.
2369
2370 Note that failure backtracking rewinds the cursor position, while
2371 success backtracking leaves it alone.
2372
2373 A pattern is complete when the END op is executed, while a subpattern
2374 such as (?=foo) is complete when the SUCCESS op is executed. Both of these
2375 ops trigger the "pop to last yes state if any, otherwise return true"
2376 behaviour.
2377
2378 A common convention in this function is to use A and B to refer to the two
2379 subpatterns (or to the first nodes thereof) in patterns like /A*B/: so A is
2380 the subpattern to be matched possibly multiple times, while B is the entire
2381 rest of the pattern. Variable and state names reflect this convention.
2382
2383 The states in the main switch are the union of ops and failure/success of
2384 substates associated with with that op.  For example, IFMATCH is the op
2385 that does lookahead assertions /(?=A)B/ and so the IFMATCH state means
2386 'execute IFMATCH'; while IFMATCH_A is a state saying that we have just
2387 successfully matched A and IFMATCH_A_fail is a state saying that we have
2388 just failed to match A. Resume states always come in pairs. The backtrack
2389 state we push is marked as 'IFMATCH_A', but when that is popped, we resume
2390 at IFMATCH_A or IFMATCH_A_fail, depending on whether we are backtracking
2391 on success or failure.
2392
2393 The struct that holds a backtracking state is actually a big union, with
2394 one variant for each major type of op. The variable st points to the
2395 top-most backtrack struct. To make the code clearer, within each
2396 block of code we #define ST to alias the relevant union.
2397
2398 Here's a concrete example of a (vastly oversimplified) IFMATCH
2399 implementation:
2400
2401     switch (state) {
2402     ....
2403
2404 #define ST st->u.ifmatch
2405
2406     case IFMATCH: // we are executing the IFMATCH op, (?=A)B
2407         ST.foo = ...; // some state we wish to save
2408         ...
2409         // push a yes backtrack state with a resume value of
2410         // IFMATCH_A/IFMATCH_A_fail, then continue execution at the
2411         // first node of A:
2412         PUSH_YES_STATE_GOTO(IFMATCH_A, A);
2413         // NOTREACHED
2414
2415     case IFMATCH_A: // we have successfully executed A; now continue with B
2416         next = B;
2417         bar = ST.foo; // do something with the preserved value
2418         break;
2419
2420     case IFMATCH_A_fail: // A failed, so the assertion failed
2421         ...;   // do some housekeeping, then ...
2422         sayNO; // propagate the failure
2423
2424 #undef ST
2425
2426     ...
2427     }
2428
2429 For any old-timers reading this who are familiar with the old recursive
2430 approach, the code above is equivalent to:
2431
2432     case IFMATCH: // we are executing the IFMATCH op, (?=A)B
2433     {
2434         int foo = ...
2435         ...
2436         if (regmatch(A)) {
2437             next = B;
2438             bar = foo;
2439             break;
2440         }
2441         ...;   // do some housekeeping, then ...
2442         sayNO; // propagate the failure
2443     }
2444
2445 The topmost backtrack state, pointed to by st, is usually free. If you
2446 want to claim it, populate any ST.foo fields in it with values you wish to
2447 save, then do one of
2448
2449         PUSH_STATE_GOTO(resume_state, node);
2450         PUSH_YES_STATE_GOTO(resume_state, node);
2451
2452 which sets that backtrack state's resume value to 'resume_state', pushes a
2453 new free entry to the top of the backtrack stack, then goes to 'node'.
2454 On backtracking, the free slot is popped, and the saved state becomes the
2455 new free state. An ST.foo field in this new top state can be temporarily
2456 accessed to retrieve values, but once the main loop is re-entered, it
2457 becomes available for reuse.
2458
2459 Note that the depth of the backtrack stack constantly increases during the
2460 left-to-right execution of the pattern, rather than going up and down with
2461 the pattern nesting. For example the stack is at its maximum at Z at the
2462 end of the pattern, rather than at X in the following:
2463
2464     /(((X)+)+)+....(Y)+....Z/
2465
2466 The only exceptions to this are lookahead/behind assertions and the cut,
2467 (?>A), which pop all the backtrack states associated with A before
2468 continuing.
2469
2470 Bascktrack state structs are allocated in slabs of about 4K in size.
2471 PL_regmatch_state and st always point to the currently active state,
2472 and PL_regmatch_slab points to the slab currently containing
2473 PL_regmatch_state.  The first time regmatch() is called, the first slab is
2474 allocated, and is never freed until interpreter destruction. When the slab
2475 is full, a new one is allocated and chained to the end. At exit from
2476 regmatch(), slabs allocated since entry are freed.
2477
2478 */
2479
2480
2481 #define DEBUG_STATE_pp(pp)                                  \
2482     DEBUG_STATE_r({                                         \
2483         DUMP_EXEC_POS(locinput, scan, do_utf8);             \
2484         PerlIO_printf(Perl_debug_log,                       \
2485             "    %*s"pp" %s%s%s%s%s\n",                     \
2486             depth*2, "",                                    \
2487             PL_reg_name[st->resume_state],                     \
2488             ((st==yes_state||st==mark_state) ? "[" : ""),   \
2489             ((st==yes_state) ? "Y" : ""),                   \
2490             ((st==mark_state) ? "M" : ""),                  \
2491             ((st==yes_state||st==mark_state) ? "]" : "")    \
2492         );                                                  \
2493     });
2494
2495
2496 #define REG_NODE_NUM(x) ((x) ? (int)((x)-prog) : -1)
2497
2498 #ifdef DEBUGGING
2499
2500 STATIC void
2501 S_debug_start_match(pTHX_ const regexp *prog, const bool do_utf8,
2502     const char *start, const char *end, const char *blurb)
2503 {
2504     const bool utf8_pat= prog->extflags & RXf_UTF8 ? 1 : 0;
2505     if (!PL_colorset)
2506             reginitcolors();
2507     {
2508         RE_PV_QUOTED_DECL(s0, utf8_pat, PERL_DEBUG_PAD_ZERO(0),
2509             prog->precomp, prog->prelen, 60);
2510
2511         RE_PV_QUOTED_DECL(s1, do_utf8, PERL_DEBUG_PAD_ZERO(1),
2512             start, end - start, 60);
2513
2514         PerlIO_printf(Perl_debug_log,
2515             "%s%s REx%s %s against %s\n",
2516                        PL_colors[4], blurb, PL_colors[5], s0, s1);
2517
2518         if (do_utf8||utf8_pat)
2519             PerlIO_printf(Perl_debug_log, "UTF-8 %s%s%s...\n",
2520                 utf8_pat ? "pattern" : "",
2521                 utf8_pat && do_utf8 ? " and " : "",
2522                 do_utf8 ? "string" : ""
2523             );
2524     }
2525 }
2526
2527 STATIC void
2528 S_dump_exec_pos(pTHX_ const char *locinput,
2529                       const regnode *scan,
2530                       const char *loc_regeol,
2531                       const char *loc_bostr,
2532                       const char *loc_reg_starttry,
2533                       const bool do_utf8)
2534 {
2535     const int docolor = *PL_colors[0] || *PL_colors[2] || *PL_colors[4];
2536     const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
2537     int l = (loc_regeol - locinput) > taill ? taill : (loc_regeol - locinput);
2538     /* The part of the string before starttry has one color
2539        (pref0_len chars), between starttry and current
2540        position another one (pref_len - pref0_len chars),
2541        after the current position the third one.
2542        We assume that pref0_len <= pref_len, otherwise we
2543        decrease pref0_len.  */
2544     int pref_len = (locinput - loc_bostr) > (5 + taill) - l
2545         ? (5 + taill) - l : locinput - loc_bostr;
2546     int pref0_len;
2547
2548     while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2549         pref_len++;
2550     pref0_len = pref_len  - (locinput - loc_reg_starttry);
2551     if (l + pref_len < (5 + taill) && l < loc_regeol - locinput)
2552         l = ( loc_regeol - locinput > (5 + taill) - pref_len
2553               ? (5 + taill) - pref_len : loc_regeol - locinput);
2554     while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2555         l--;
2556     if (pref0_len < 0)
2557         pref0_len = 0;
2558     if (pref0_len > pref_len)
2559         pref0_len = pref_len;
2560     {
2561         const int is_uni = (do_utf8 && OP(scan) != CANY) ? 1 : 0;
2562
2563         RE_PV_COLOR_DECL(s0,len0,is_uni,PERL_DEBUG_PAD(0),
2564             (locinput - pref_len),pref0_len, 60, 4, 5);
2565
2566         RE_PV_COLOR_DECL(s1,len1,is_uni,PERL_DEBUG_PAD(1),
2567                     (locinput - pref_len + pref0_len),
2568                     pref_len - pref0_len, 60, 2, 3);
2569
2570         RE_PV_COLOR_DECL(s2,len2,is_uni,PERL_DEBUG_PAD(2),
2571                     locinput, loc_regeol - locinput, 10, 0, 1);
2572
2573         const STRLEN tlen=len0+len1+len2;
2574         PerlIO_printf(Perl_debug_log,
2575                     "%4"IVdf" <%.*s%.*s%s%.*s>%*s|",
2576                     (IV)(locinput - loc_bostr),
2577                     len0, s0,
2578                     len1, s1,
2579                     (docolor ? "" : "> <"),
2580                     len2, s2,
2581                     (int)(tlen > 19 ? 0 :  19 - tlen),
2582                     "");
2583     }
2584 }
2585
2586 #endif
2587
2588 /* reg_check_named_buff_matched()
2589  * Checks to see if a named buffer has matched. The data array of
2590  * buffer numbers corresponding to the buffer is expected to reside
2591  * in the regexp->data->data array in the slot stored in the ARG() of
2592  * node involved. Note that this routine doesn't actually care about the
2593  * name, that information is not preserved from compilation to execution.
2594  * Returns the index of the leftmost defined buffer with the given name
2595  * or 0 if non of the buffers matched.
2596  */
2597 STATIC I32
2598 S_reg_check_named_buff_matched(pTHX_ const regexp *rex, const regnode *scan) {
2599     I32 n;
2600     RXi_GET_DECL(rex,rexi);
2601     SV *sv_dat=(SV*)rexi->data->data[ ARG( scan ) ];
2602     I32 *nums=(I32*)SvPVX(sv_dat);
2603     for ( n=0; n<SvIVX(sv_dat); n++ ) {
2604         if ((I32)*PL_reglastparen >= nums[n] &&
2605             PL_regendp[nums[n]] != -1)
2606         {
2607             return nums[n];
2608         }
2609     }
2610     return 0;
2611 }
2612
2613 #define SETREX(Re1,Re2) \
2614     if (PL_reg_eval_set) PM_SETRE((PL_reg_curpm), (Re2)); \
2615     Re1 = (Re2)
2616
2617 STATIC I32                      /* 0 failure, 1 success */
2618 S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
2619 {
2620 #if PERL_VERSION < 9 && !defined(PERL_CORE)
2621     dMY_CXT;
2622 #endif
2623     dVAR;
2624     register const bool do_utf8 = PL_reg_match_utf8;
2625     const U32 uniflags = UTF8_ALLOW_DEFAULT;
2626
2627     regexp *rex = reginfo->prog;
2628     RXi_GET_DECL(rex,rexi);
2629
2630     regmatch_slab  *orig_slab;
2631     regmatch_state *orig_state;
2632
2633     /* the current state. This is a cached copy of PL_regmatch_state */
2634     register regmatch_state *st;
2635
2636     /* cache heavy used fields of st in registers */
2637     register regnode *scan;
2638     register regnode *next;
2639     register U32 n = 0; /* general value; init to avoid compiler warning */
2640     register I32 ln = 0; /* len or last;  init to avoid compiler warning */
2641     register char *locinput = PL_reginput;
2642     register I32 nextchr;   /* is always set to UCHARAT(locinput) */
2643
2644     bool result = 0;        /* return value of S_regmatch */
2645     int depth = 0;          /* depth of backtrack stack */
2646     U32 nochange_depth = 0; /* depth of GOSUB recursion with nochange */
2647     const U32 max_nochange_depth =
2648         (3 * rex->nparens > MAX_RECURSE_EVAL_NOCHANGE_DEPTH) ?
2649         3 * rex->nparens : MAX_RECURSE_EVAL_NOCHANGE_DEPTH;
2650
2651     regmatch_state *yes_state = NULL; /* state to pop to on success of
2652                                                             subpattern */
2653     /* mark_state piggy backs on the yes_state logic so that when we unwind
2654        the stack on success we can update the mark_state as we go */
2655     regmatch_state *mark_state = NULL; /* last mark state we have seen */
2656
2657     regmatch_state *cur_eval = NULL; /* most recent EVAL_AB state */
2658     struct regmatch_state  *cur_curlyx = NULL; /* most recent curlyx */
2659     U32 state_num;
2660     bool no_final = 0;      /* prevent failure from backtracking? */
2661     bool do_cutgroup = 0;   /* no_final only until next branch/trie entry */
2662     char *startpoint = PL_reginput;
2663     SV *popmark = NULL;     /* are we looking for a mark? */
2664     SV *sv_commit = NULL;   /* last mark name seen in failure */
2665     SV *sv_yes_mark = NULL; /* last mark name we have seen
2666                                during a successfull match */
2667     U32 lastopen = 0;       /* last open we saw */
2668     bool has_cutgroup = RX_HAS_CUTGROUP(rex) ? 1 : 0;
2669
2670
2671     /* these three flags are set by various ops to signal information to
2672      * the very next op. They have a useful lifetime of exactly one loop
2673      * iteration, and are not preserved or restored by state pushes/pops
2674      */
2675     bool sw = 0;            /* the condition value in (?(cond)a|b) */
2676     bool minmod = 0;        /* the next "{n,m}" is a "{n,m}?" */
2677     int logical = 0;        /* the following EVAL is:
2678                                 0: (?{...})
2679                                 1: (?(?{...})X|Y)
2680                                 2: (??{...})
2681                                or the following IFMATCH/UNLESSM is:
2682                                 false: plain (?=foo)
2683                                 true:  used as a condition: (?(?=foo))
2684                             */
2685
2686 #ifdef DEBUGGING
2687     GET_RE_DEBUG_FLAGS_DECL;
2688 #endif
2689
2690     DEBUG_OPTIMISE_r( DEBUG_EXECUTE_r({
2691             PerlIO_printf(Perl_debug_log,"regmatch start\n");
2692     }));
2693     /* on first ever call to regmatch, allocate first slab */
2694     if (!PL_regmatch_slab) {
2695         Newx(PL_regmatch_slab, 1, regmatch_slab);
2696         PL_regmatch_slab->prev = NULL;
2697         PL_regmatch_slab->next = NULL;
2698         PL_regmatch_state = SLAB_FIRST(PL_regmatch_slab);
2699     }
2700
2701     /* remember current high-water mark for exit */
2702     /* XXX this should be done with SAVE* instead */
2703     orig_slab  = PL_regmatch_slab;
2704     orig_state = PL_regmatch_state;
2705
2706     /* grab next free state slot */
2707     st = ++PL_regmatch_state;
2708     if (st >  SLAB_LAST(PL_regmatch_slab))
2709         st = PL_regmatch_state = S_push_slab(aTHX);
2710
2711     /* Note that nextchr is a byte even in UTF */
2712     nextchr = UCHARAT(locinput);
2713     scan = prog;
2714     while (scan != NULL) {
2715
2716         DEBUG_EXECUTE_r( {
2717             SV * const prop = sv_newmortal();
2718             regnode *rnext=regnext(scan);
2719             DUMP_EXEC_POS( locinput, scan, do_utf8 );
2720             regprop(rex, prop, scan);
2721
2722             PerlIO_printf(Perl_debug_log,
2723                     "%3"IVdf":%*s%s(%"IVdf")\n",
2724                     (IV)(scan - rexi->program), depth*2, "",
2725                     SvPVX_const(prop),
2726                     (PL_regkind[OP(scan)] == END || !rnext) ?
2727                         0 : (IV)(rnext - rexi->program));
2728         });
2729
2730         next = scan + NEXT_OFF(scan);
2731         if (next == scan)
2732             next = NULL;
2733         state_num = OP(scan);
2734
2735       reenter_switch:
2736         switch (state_num) {
2737         case BOL:
2738             if (locinput == PL_bostr)
2739             {
2740                 /* reginfo->till = reginfo->bol; */
2741                 break;
2742             }
2743             sayNO;
2744         case MBOL:
2745             if (locinput == PL_bostr ||
2746                 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
2747             {
2748                 break;
2749             }
2750             sayNO;
2751         case SBOL:
2752             if (locinput == PL_bostr)
2753                 break;
2754             sayNO;
2755         case GPOS:
2756             if (locinput == reginfo->ganch)
2757                 break;
2758             sayNO;
2759
2760         case KEEPS:
2761             /* update the startpoint */
2762             st->u.keeper.val = PL_regstartp[0];
2763             PL_reginput = locinput;
2764             PL_regstartp[0] = locinput - PL_bostr;
2765             PUSH_STATE_GOTO(KEEPS_next, next);
2766             /*NOT-REACHED*/
2767         case KEEPS_next_fail:
2768             /* rollback the start point change */
2769             PL_regstartp[0] = st->u.keeper.val;
2770             sayNO_SILENT;
2771             /*NOT-REACHED*/
2772         case EOL:
2773                 goto seol;
2774         case MEOL:
2775             if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2776                 sayNO;
2777             break;
2778         case SEOL:
2779           seol:
2780             if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2781                 sayNO;
2782             if (PL_regeol - locinput > 1)
2783                 sayNO;
2784             break;
2785         case EOS:
2786             if (PL_regeol != locinput)
2787                 sayNO;
2788             break;
2789         case SANY:
2790             if (!nextchr && locinput >= PL_regeol)
2791                 sayNO;
2792             if (do_utf8) {
2793                 locinput += PL_utf8skip[nextchr];
2794                 if (locinput > PL_regeol)
2795                     sayNO;
2796                 nextchr = UCHARAT(locinput);
2797             }
2798             else
2799                 nextchr = UCHARAT(++locinput);
2800             break;
2801         case CANY:
2802             if (!nextchr && locinput >= PL_regeol)
2803                 sayNO;
2804             nextchr = UCHARAT(++locinput);
2805             break;
2806         case REG_ANY:
2807             if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2808                 sayNO;
2809             if (do_utf8) {
2810                 locinput += PL_utf8skip[nextchr];
2811                 if (locinput > PL_regeol)
2812                     sayNO;
2813                 nextchr = UCHARAT(locinput);
2814             }
2815             else
2816                 nextchr = UCHARAT(++locinput);
2817             break;
2818
2819 #undef  ST
2820 #define ST st->u.trie
2821         case TRIEC:
2822             /* In this case the charclass data is available inline so
2823                we can fail fast without a lot of extra overhead.
2824              */
2825             if (scan->flags == EXACT || !do_utf8) {
2826                 if(!ANYOF_BITMAP_TEST(scan, *locinput)) {
2827                     DEBUG_EXECUTE_r(
2828                         PerlIO_printf(Perl_debug_log,
2829                                   "%*s  %sfailed to match trie start class...%s\n",
2830                                   REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
2831                     );
2832                     sayNO_SILENT;
2833                     /* NOTREACHED */
2834                 }
2835             }
2836             /* FALL THROUGH */
2837         case TRIE:
2838             {
2839                 /* what type of TRIE am I? (utf8 makes this contextual) */
2840                 const enum { trie_plain, trie_utf8, trie_utf8_fold }
2841                     trie_type = do_utf8 ?
2842                           (scan->flags == EXACT ? trie_utf8 : trie_utf8_fold)
2843                         : trie_plain;
2844
2845                 /* what trie are we using right now */
2846                 reg_trie_data * const trie
2847                     = (reg_trie_data*)rexi->data->data[ ARG( scan ) ];
2848                 HV * widecharmap = (HV *)rexi->data->data[ ARG( scan ) + 1 ];
2849                 U32 state = trie->startstate;
2850
2851                 if (trie->bitmap && trie_type != trie_utf8_fold &&
2852                     !TRIE_BITMAP_TEST(trie,*locinput)
2853                 ) {
2854                     if (trie->states[ state ].wordnum) {
2855                          DEBUG_EXECUTE_r(
2856                             PerlIO_printf(Perl_debug_log,
2857                                           "%*s  %smatched empty string...%s\n",
2858                                           REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
2859                         );
2860                         break;
2861                     } else {
2862                         DEBUG_EXECUTE_r(
2863                             PerlIO_printf(Perl_debug_log,
2864                                           "%*s  %sfailed to match trie start class...%s\n",
2865                                           REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
2866                         );
2867                         sayNO_SILENT;
2868                    }
2869                 }
2870
2871             {
2872                 U8 *uc = ( U8* )locinput;
2873
2874                 STRLEN len = 0;
2875                 STRLEN foldlen = 0;
2876                 U8 *uscan = (U8*)NULL;
2877                 STRLEN bufflen=0;
2878                 SV *sv_accept_buff = NULL;
2879                 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
2880
2881                 ST.accepted = 0; /* how many accepting states we have seen */
2882                 ST.B = next;
2883                 ST.jump = trie->jump;
2884                 ST.me = scan;
2885                 /*
2886                    traverse the TRIE keeping track of all accepting states
2887                    we transition through until we get to a failing node.
2888                 */
2889
2890                 while ( state && uc <= (U8*)PL_regeol ) {
2891                     U32 base = trie->states[ state ].trans.base;
2892                     UV uvc = 0;
2893                     U16 charid;
2894                     /* We use charid to hold the wordnum as we don't use it
2895                        for charid until after we have done the wordnum logic.
2896                        We define an alias just so that the wordnum logic reads
2897                        more naturally. */
2898
2899 #define got_wordnum charid
2900                     got_wordnum = trie->states[ state ].wordnum;
2901
2902                     if ( got_wordnum ) {
2903                         if ( ! ST.accepted ) {
2904                             ENTER;
2905                             SAVETMPS;
2906                             bufflen = TRIE_INITAL_ACCEPT_BUFFLEN;
2907                             sv_accept_buff=newSV(bufflen *
2908                                             sizeof(reg_trie_accepted) - 1);
2909                             SvCUR_set(sv_accept_buff, 0);
2910                             SvPOK_on(sv_accept_buff);
2911                             sv_2mortal(sv_accept_buff);
2912                             SAVETMPS;
2913                             ST.accept_buff =
2914                                 (reg_trie_accepted*)SvPV_nolen(sv_accept_buff );
2915                         }
2916                         do {
2917                             if (ST.accepted >= bufflen) {
2918                                 bufflen *= 2;
2919                                 ST.accept_buff =(reg_trie_accepted*)
2920                                     SvGROW(sv_accept_buff,
2921                                         bufflen * sizeof(reg_trie_accepted));
2922                             }
2923                             SvCUR_set(sv_accept_buff,SvCUR(sv_accept_buff)
2924                                 + sizeof(reg_trie_accepted));
2925
2926
2927                             ST.accept_buff[ST.accepted].wordnum = got_wordnum;
2928                             ST.accept_buff[ST.accepted].endpos = uc;
2929                             ++ST.accepted;
2930                         } while (trie->nextword && (got_wordnum= trie->nextword[got_wordnum]));
2931                     }
2932 #undef got_wordnum
2933
2934                     DEBUG_TRIE_EXECUTE_r({
2935                                 DUMP_EXEC_POS( (char *)uc, scan, do_utf8 );
2936                                 PerlIO_printf( Perl_debug_log,
2937                                     "%*s  %sState: %4"UVxf" Accepted: %4"UVxf" ",
2938                                     2+depth * 2, "", PL_colors[4],
2939                                     (UV)state, (UV)ST.accepted );
2940                     });
2941
2942                     if ( base ) {
2943                         REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
2944                                              uscan, len, uvc, charid, foldlen,
2945                                              foldbuf, uniflags);
2946
2947                         if (charid &&
2948                              (base + charid > trie->uniquecharcount )
2949                              && (base + charid - 1 - trie->uniquecharcount
2950                                     < trie->lasttrans)
2951                              && trie->trans[base + charid - 1 -
2952                                     trie->uniquecharcount].check == state)
2953                         {
2954                             state = trie->trans[base + charid - 1 -
2955                                 trie->uniquecharcount ].next;
2956                         }
2957                         else {
2958                             state = 0;
2959                         }
2960                         uc += len;
2961
2962                     }
2963                     else {
2964                         state = 0;
2965                     }
2966                     DEBUG_TRIE_EXECUTE_r(
2967                         PerlIO_printf( Perl_debug_log,
2968                             "Charid:%3x CP:%4"UVxf" After State: %4"UVxf"%s\n",
2969                             charid, uvc, (UV)state, PL_colors[5] );
2970                     );
2971                 }
2972                 if (!ST.accepted )
2973                    sayNO;
2974
2975                 DEBUG_EXECUTE_r(
2976                     PerlIO_printf( Perl_debug_log,
2977                         "%*s  %sgot %"IVdf" possible matches%s\n",
2978                         REPORT_CODE_OFF + depth * 2, "",
2979                         PL_colors[4], (IV)ST.accepted, PL_colors[5] );
2980                 );
2981             }}
2982             goto trie_first_try; /* jump into the fail handler */
2983             /* NOTREACHED */
2984         case TRIE_next_fail: /* we failed - try next alterative */
2985             if ( ST.jump) {
2986                 REGCP_UNWIND(ST.cp);
2987                 for (n = *PL_reglastparen; n > ST.lastparen; n--)
2988                     PL_regendp[n] = -1;
2989                 *PL_reglastparen = n;
2990             }
2991           trie_first_try:
2992             if (do_cutgroup) {
2993                 do_cutgroup = 0;
2994                 no_final = 0;
2995             }
2996
2997             if ( ST.jump) {
2998                 ST.lastparen = *PL_reglastparen;
2999                 REGCP_SET(ST.cp);
3000             }
3001             if ( ST.accepted == 1 ) {
3002                 /* only one choice left - just continue */
3003                 DEBUG_EXECUTE_r({
3004                     AV *const trie_words
3005                         = (AV *) rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET];
3006                     SV ** const tmp = av_fetch( trie_words,
3007                         ST.accept_buff[ 0 ].wordnum-1, 0 );
3008                     SV *sv= tmp ? sv_newmortal() : NULL;
3009
3010                     PerlIO_printf( Perl_debug_log,
3011                         "%*s  %sonly one match left: #%d <%s>%s\n",
3012                         REPORT_CODE_OFF+depth*2, "", PL_colors[4],
3013                         ST.accept_buff[ 0 ].wordnum,
3014                         tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0,
3015                                 PL_colors[0], PL_colors[1],
3016                                 (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
3017                             )
3018                         : "not compiled under -Dr",
3019                         PL_colors[5] );
3020                 });
3021                 PL_reginput = (char *)ST.accept_buff[ 0 ].endpos;
3022                 /* in this case we free tmps/leave before we call regmatch
3023                    as we wont be using accept_buff again. */
3024
3025                 locinput = PL_reginput;
3026                 nextchr = UCHARAT(locinput);
3027                 if ( !ST.jump || !ST.jump[ST.accept_buff[0].wordnum])
3028                     scan = ST.B;
3029                 else
3030                     scan = ST.me + ST.jump[ST.accept_buff[0].wordnum];
3031                 if (!has_cutgroup) {
3032                     FREETMPS;
3033                     LEAVE;
3034                 } else {
3035                     ST.accepted--;
3036                     PUSH_YES_STATE_GOTO(TRIE_next, scan);
3037                 }
3038
3039                 continue; /* execute rest of RE */
3040             }
3041
3042             if ( !ST.accepted-- ) {
3043                 DEBUG_EXECUTE_r({
3044                     PerlIO_printf( Perl_debug_log,
3045                         "%*s  %sTRIE failed...%s\n",
3046                         REPORT_CODE_OFF+depth*2, "",
3047                         PL_colors[4],
3048                         PL_colors[5] );
3049                 });
3050                 FREETMPS;
3051                 LEAVE;
3052                 sayNO_SILENT;
3053                 /*NOTREACHED*/
3054             }
3055
3056             /*
3057                There are at least two accepting states left.  Presumably
3058                the number of accepting states is going to be low,
3059                typically two. So we simply scan through to find the one
3060                with lowest wordnum.  Once we find it, we swap the last
3061                state into its place and decrement the size. We then try to
3062                match the rest of the pattern at the point where the word
3063                ends. If we succeed, control just continues along the
3064                regex; if we fail we return here to try the next accepting
3065                state
3066              */
3067
3068             {
3069                 U32 best = 0;
3070                 U32 cur;
3071                 for( cur = 1 ; cur <= ST.accepted ; cur++ ) {
3072                     DEBUG_TRIE_EXECUTE_r(
3073                         PerlIO_printf( Perl_debug_log,
3074                             "%*s  %sgot %"IVdf" (%d) as best, looking at %"IVdf" (%d)%s\n",
3075                             REPORT_CODE_OFF + depth * 2, "", PL_colors[4],
3076                             (IV)best, ST.accept_buff[ best ].wordnum, (IV)cur,
3077                             ST.accept_buff[ cur ].wordnum, PL_colors[5] );
3078                     );
3079
3080                     if (ST.accept_buff[cur].wordnum <
3081                             ST.accept_buff[best].wordnum)
3082                         best = cur;
3083                 }
3084
3085                 DEBUG_EXECUTE_r({
3086                     AV *const trie_words
3087                         = (AV *) rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET];
3088                     SV ** const tmp = av_fetch( trie_words,
3089                         ST.accept_buff[ best ].wordnum - 1, 0 );
3090                     regnode *nextop=(!ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) ?
3091                                     ST.B :
3092                                     ST.me + ST.jump[ST.accept_buff[best].wordnum];
3093                     SV *sv= tmp ? sv_newmortal() : NULL;
3094
3095                     PerlIO_printf( Perl_debug_log,
3096                         "%*s  %strying alternation #%d <%s> at node #%d %s\n",
3097                         REPORT_CODE_OFF+depth*2, "", PL_colors[4],
3098                         ST.accept_buff[best].wordnum,
3099                         tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0,
3100                                 PL_colors[0], PL_colors[1],
3101                                 (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
3102                             ) : "not compiled under -Dr",
3103                             REG_NODE_NUM(nextop),
3104                         PL_colors[5] );
3105                 });
3106
3107                 if ( best<ST.accepted ) {
3108                     reg_trie_accepted tmp = ST.accept_buff[ best ];
3109                     ST.accept_buff[ best ] = ST.accept_buff[ ST.accepted ];
3110                     ST.accept_buff[ ST.accepted ] = tmp;
3111                     best = ST.accepted;
3112                 }
3113                 PL_reginput = (char *)ST.accept_buff[ best ].endpos;
3114                 if ( !ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) {
3115                     scan = ST.B;
3116                     /* NOTREACHED */
3117                 } else {
3118                     scan = ST.me + ST.jump[ST.accept_buff[best].wordnum];
3119                     /* NOTREACHED */
3120                 }
3121                 if (has_cutgroup) {
3122                     PUSH_YES_STATE_GOTO(TRIE_next, scan);
3123                     /* NOTREACHED */
3124                 } else {
3125                     PUSH_STATE_GOTO(TRIE_next, scan);
3126                     /* NOTREACHED */
3127                 }
3128                 /* NOTREACHED */
3129             }
3130             /* NOTREACHED */
3131         case TRIE_next:
3132             FREETMPS;
3133             LEAVE;
3134             sayYES;
3135 #undef  ST
3136
3137         case EXACT: {
3138             char *s = STRING(scan);
3139             ln = STR_LEN(scan);
3140             if (do_utf8 != UTF) {
3141                 /* The target and the pattern have differing utf8ness. */
3142                 char *l = locinput;
3143                 const char * const e = s + ln;
3144
3145                 if (do_utf8) {
3146                     /* The target is utf8, the pattern is not utf8. */
3147                     while (s < e) {
3148                         STRLEN ulen;
3149                         if (l >= PL_regeol)
3150                              sayNO;
3151                         if (NATIVE_TO_UNI(*(U8*)s) !=
3152                             utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
3153                                             uniflags))
3154                              sayNO;
3155                         l += ulen;
3156                         s ++;
3157                     }
3158                 }
3159                 else {
3160                     /* The target is not utf8, the pattern is utf8. */
3161                     while (s < e) {
3162                         STRLEN ulen;
3163                         if (l >= PL_regeol)
3164                             sayNO;
3165                         if (NATIVE_TO_UNI(*((U8*)l)) !=
3166                             utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
3167                                            uniflags))
3168                             sayNO;
3169                         s += ulen;
3170                         l ++;
3171                     }
3172                 }
3173                 locinput = l;
3174                 nextchr = UCHARAT(locinput);
3175                 break;
3176             }
3177             /* The target and the pattern have the same utf8ness. */
3178             /* Inline the first character, for speed. */
3179             if (UCHARAT(s) != nextchr)
3180                 sayNO;
3181             if (PL_regeol - locinput < ln)
3182                 sayNO;
3183             if (ln > 1 && memNE(s, locinput, ln))
3184                 sayNO;
3185             locinput += ln;
3186             nextchr = UCHARAT(locinput);
3187             break;
3188             }
3189         case EXACTFL:
3190             PL_reg_flags |= RF_tainted;
3191             /* FALL THROUGH */
3192         case EXACTF: {
3193             char * const s = STRING(scan);
3194             ln = STR_LEN(scan);
3195
3196             if (do_utf8 || UTF) {
3197               /* Either target or the pattern are utf8. */
3198                 const char * const l = locinput;
3199                 char *e = PL_regeol;
3200
3201                 if (ibcmp_utf8(s, 0,  ln, (bool)UTF,
3202                                l, &e, 0,  do_utf8)) {
3203                      /* One more case for the sharp s:
3204                       * pack("U0U*", 0xDF) =~ /ss/i,
3205                       * the 0xC3 0x9F are the UTF-8
3206                       * byte sequence for the U+00DF. */
3207                      if (!(do_utf8 &&
3208                            toLOWER(s[0]) == 's' &&
3209                            ln >= 2 &&
3210                            toLOWER(s[1]) == 's' &&
3211                            (U8)l[0] == 0xC3 &&
3212                            e - l >= 2 &&
3213                            (U8)l[1] == 0x9F))
3214                           sayNO;
3215                 }
3216                 locinput = e;
3217                 nextchr = UCHARAT(locinput);
3218                 break;
3219             }
3220
3221             /* Neither the target and the pattern are utf8. */
3222
3223             /* Inline the first character, for speed. */
3224             if (UCHARAT(s) != nextchr &&
3225                 UCHARAT(s) != ((OP(scan) == EXACTF)
3226                                ? PL_fold : PL_fold_locale)[nextchr])
3227                 sayNO;
3228             if (PL_regeol - locinput < ln)
3229                 sayNO;
3230             if (ln > 1 && (OP(scan) == EXACTF
3231                            ? ibcmp(s, locinput, ln)
3232                            : ibcmp_locale(s, locinput, ln)))
3233                 sayNO;
3234             locinput += ln;
3235             nextchr = UCHARAT(locinput);
3236             break;
3237             }
3238         case ANYOF:
3239             if (do_utf8) {
3240                 STRLEN inclasslen = PL_regeol - locinput;
3241
3242                 if (!reginclass(rex, scan, (U8*)locinput, &inclasslen, do_utf8))
3243                     goto anyof_fail;
3244                 if (locinput >= PL_regeol)
3245                     sayNO;
3246                 locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
3247                 nextchr = UCHARAT(locinput);
3248                 break;
3249             }
3250             else {
3251                 if (nextchr < 0)
3252                     nextchr = UCHARAT(locinput);
3253                 if (!REGINCLASS(rex, scan, (U8*)locinput))
3254                     goto anyof_fail;
3255                 if (!nextchr && locinput >= PL_regeol)
3256                     sayNO;
3257                 nextchr = UCHARAT(++locinput);
3258                 break;
3259             }
3260         anyof_fail:
3261             /* If we might have the case of the German sharp s
3262              * in a casefolding Unicode character class. */
3263
3264             if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
3265                  locinput += SHARP_S_SKIP;
3266                  nextchr = UCHARAT(locinput);
3267             }
3268             else
3269                  sayNO;
3270             break;
3271         case ALNUML:
3272             PL_reg_flags |= RF_tainted;
3273             /* FALL THROUGH */
3274         case ALNUM:
3275             if (!nextchr)
3276                 sayNO;
3277             if (do_utf8) {
3278                 LOAD_UTF8_CHARCLASS_ALNUM();
3279                 if (!(OP(scan) == ALNUM
3280                       ? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
3281                       : isALNUM_LC_utf8((U8*)locinput)))
3282                 {
3283                     sayNO;
3284                 }
3285                 locinput += PL_utf8skip[nextchr];
3286                 nextchr = UCHARAT(locinput);
3287                 break;
3288             }
3289             if (!(OP(scan) == ALNUM
3290                   ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
3291                 sayNO;
3292             nextchr = UCHARAT(++locinput);
3293             break;
3294         case NALNUML:
3295             PL_reg_flags |= RF_tainted;
3296             /* FALL THROUGH */
3297         case NALNUM:
3298             if (!nextchr && locinput >= PL_regeol)
3299                 sayNO;
3300             if (do_utf8) {
3301                 LOAD_UTF8_CHARCLASS_ALNUM();
3302                 if (OP(scan) == NALNUM
3303                     ? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
3304                     : isALNUM_LC_utf8((U8*)locinput))
3305                 {
3306                     sayNO;
3307                 }
3308                 locinput += PL_utf8skip[nextchr];
3309                 nextchr = UCHARAT(locinput);
3310                 break;
3311             }
3312             if (OP(scan) == NALNUM
3313                 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
3314                 sayNO;
3315             nextchr = UCHARAT(++locinput);
3316             break;
3317         case BOUNDL:
3318         case NBOUNDL:
3319             PL_reg_flags |= RF_tainted;
3320             /* FALL THROUGH */
3321         case BOUND:
3322         case NBOUND:
3323             /* was last char in word? */
3324             if (do_utf8) {
3325                 if (locinput == PL_bostr)
3326                     ln = '\n';
3327                 else {
3328                     const U8 * const r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
3329
3330                     ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, uniflags);
3331                 }
3332                 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
3333                     ln = isALNUM_uni(ln);
3334                     LOAD_UTF8_CHARCLASS_ALNUM();
3335                     n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
3336                 }
3337                 else {
3338                     ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
3339                     n = isALNUM_LC_utf8((U8*)locinput);
3340                 }
3341             }
3342             else {
3343                 ln = (locinput != PL_bostr) ?
3344                     UCHARAT(locinput - 1) : '\n';
3345                 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
3346                     ln = isALNUM(ln);
3347                     n = isALNUM(nextchr);
3348                 }
3349                 else {
3350                     ln = isALNUM_LC(ln);
3351                     n = isALNUM_LC(nextchr);
3352                 }
3353             }
3354             if (((!ln) == (!n)) == (OP(scan) == BOUND ||
3355                                     OP(scan) == BOUNDL))
3356                     sayNO;
3357             break;
3358         case SPACEL:
3359             PL_reg_flags |= RF_tainted;
3360             /* FALL THROUGH */
3361         case SPACE:
3362             if (!nextchr)
3363                 sayNO;
3364             if (do_utf8) {
3365                 if (UTF8_IS_CONTINUED(nextchr)) {
3366                     LOAD_UTF8_CHARCLASS_SPACE();
3367                     if (!(OP(scan) == SPACE
3368                           ? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
3369                           : isSPACE_LC_utf8((U8*)locinput)))
3370                     {
3371                         sayNO;
3372                     }
3373                     locinput += PL_utf8skip[nextchr];
3374                     nextchr = UCHARAT(locinput);
3375                     break;
3376                 }
3377                 if (!(OP(scan) == SPACE
3378                       ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3379                     sayNO;
3380                 nextchr = UCHARAT(++locinput);
3381             }
3382             else {
3383                 if (!(OP(scan) == SPACE
3384                       ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3385                     sayNO;
3386                 nextchr = UCHARAT(++locinput);
3387             }
3388             break;
3389         case NSPACEL:
3390             PL_reg_flags |= RF_tainted;
3391             /* FALL THROUGH */
3392         case NSPACE:
3393             if (!nextchr && locinput >= PL_regeol)
3394                 sayNO;
3395             if (do_utf8) {
3396                 LOAD_UTF8_CHARCLASS_SPACE();
3397                 if (OP(scan) == NSPACE
3398                     ? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
3399                     : isSPACE_LC_utf8((U8*)locinput))
3400                 {
3401                     sayNO;
3402                 }
3403                 locinput += PL_utf8skip[nextchr];
3404                 nextchr = UCHARAT(locinput);
3405                 break;
3406             }
3407             if (OP(scan) == NSPACE
3408                 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
3409                 sayNO;
3410             nextchr = UCHARAT(++locinput);
3411             break;
3412         case DIGITL:
3413             PL_reg_flags |= RF_tainted;
3414             /* FALL THROUGH */
3415         case DIGIT:
3416             if (!nextchr)
3417                 sayNO;
3418             if (do_utf8) {
3419                 LOAD_UTF8_CHARCLASS_DIGIT();
3420                 if (!(OP(scan) == DIGIT
3421                       ? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
3422                       : isDIGIT_LC_utf8((U8*)locinput)))
3423                 {
3424                     sayNO;
3425                 }
3426                 locinput += PL_utf8skip[nextchr];
3427                 nextchr = UCHARAT(locinput);
3428                 break;
3429             }
3430             if (!(OP(scan) == DIGIT
3431                   ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
3432                 sayNO;
3433             nextchr = UCHARAT(++locinput);
3434             break;
3435         case NDIGITL:
3436             PL_reg_flags |= RF_tainted;
3437             /* FALL THROUGH */
3438         case NDIGIT:
3439             if (!nextchr && locinput >= PL_regeol)
3440                 sayNO;
3441             if (do_utf8) {
3442                 LOAD_UTF8_CHARCLASS_DIGIT();
3443                 if (OP(scan) == NDIGIT
3444                     ? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
3445                     : isDIGIT_LC_utf8((U8*)locinput))
3446                 {
3447                     sayNO;
3448                 }
3449                 locinput += PL_utf8skip[nextchr];
3450                 nextchr = UCHARAT(locinput);
3451                 break;
3452             }
3453             if (OP(scan) == NDIGIT
3454                 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
3455                 sayNO;
3456             nextchr = UCHARAT(++locinput);
3457             break;
3458         case CLUMP:
3459             if (locinput >= PL_regeol)
3460                 sayNO;
3461             if  (do_utf8) {
3462                 LOAD_UTF8_CHARCLASS_MARK();
3463                 if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3464                     sayNO;
3465                 locinput += PL_utf8skip[nextchr];
3466                 while (locinput < PL_regeol &&
3467                        swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3468                     locinput += UTF8SKIP(locinput);
3469                 if (locinput > PL_regeol)
3470                     sayNO;
3471             }
3472             else
3473                locinput++;
3474             nextchr = UCHARAT(locinput);
3475             break;
3476
3477         case NREFFL:
3478         {
3479             char *s;
3480             char type;
3481             PL_reg_flags |= RF_tainted;
3482             /* FALL THROUGH */
3483         case NREF:
3484         case NREFF:
3485             type = OP(scan);
3486             n = reg_check_named_buff_matched(rex,scan);
3487
3488             if ( n ) {
3489                 type = REF + ( type - NREF );
3490                 goto do_ref;
3491             } else {
3492                 sayNO;
3493             }
3494             /* unreached */
3495         case REFFL:
3496             PL_reg_flags |= RF_tainted;
3497             /* FALL THROUGH */
3498         case REF:
3499         case REFF:
3500             n = ARG(scan);  /* which paren pair */
3501             type = OP(scan);
3502           do_ref:
3503             ln = PL_regstartp[n];
3504             PL_reg_leftiter = PL_reg_maxiter;           /* Void cache */
3505             if (*PL_reglastparen < n || ln == -1)
3506                 sayNO;                  /* Do not match unless seen CLOSEn. */
3507             if (ln == PL_regendp[n])
3508                 break;
3509
3510             s = PL_bostr + ln;
3511             if (do_utf8 && type != REF) {       /* REF can do byte comparison */
3512                 char *l = locinput;
3513                 const char *e = PL_bostr + PL_regendp[n];
3514                 /*
3515                  * Note that we can't do the "other character" lookup trick as
3516                  * in the 8-bit case (no pun intended) because in Unicode we
3517                  * have to map both upper and title case to lower case.
3518                  */
3519                 if (type == REFF) {
3520                     while (s < e) {
3521                         STRLEN ulen1, ulen2;
3522                         U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3523                         U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
3524
3525                         if (l >= PL_regeol)
3526                             sayNO;
3527                         toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
3528                         toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
3529                         if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
3530                             sayNO;
3531                         s += ulen1;
3532                         l += ulen2;
3533                     }
3534                 }
3535                 locinput = l;
3536                 nextchr = UCHARAT(locinput);
3537                 break;
3538             }
3539
3540             /* Inline the first character, for speed. */
3541             if (UCHARAT(s) != nextchr &&
3542                 (type == REF ||
3543                  (UCHARAT(s) != (type == REFF
3544                                   ? PL_fold : PL_fold_locale)[nextchr])))
3545                 sayNO;
3546             ln = PL_regendp[n] - ln;
3547             if (locinput + ln > PL_regeol)
3548                 sayNO;
3549             if (ln > 1 && (type == REF
3550                            ? memNE(s, locinput, ln)
3551                            : (type == REFF
3552                               ? ibcmp(s, locinput, ln)
3553                               : ibcmp_locale(s, locinput, ln))))
3554                 sayNO;
3555             locinput += ln;
3556             nextchr = UCHARAT(locinput);
3557             break;
3558         }
3559         case NOTHING:
3560         case TAIL:
3561             break;
3562         case BACK:
3563             break;
3564
3565 #undef  ST
3566 #define ST st->u.eval
3567         {
3568             SV *ret;
3569             regexp *re;
3570             regexp_internal *rei;
3571             regnode *startpoint;
3572
3573         case GOSTART:
3574         case GOSUB: /*    /(...(?1))/   /(...(?&foo))/   */
3575             if (cur_eval && cur_eval->locinput==locinput) {
3576                 if (cur_eval->u.eval.close_paren == (U32)ARG(scan))
3577                     Perl_croak(aTHX_ "Infinite recursion in regex");
3578                 if ( ++nochange_depth > max_nochange_depth )
3579                     Perl_croak(aTHX_
3580                         "Pattern subroutine nesting without pos change"
3581                         " exceeded limit in regex");
3582             } else {
3583                 nochange_depth = 0;
3584             }
3585             re = rex;
3586             rei = rexi;
3587             (void)ReREFCNT_inc(rex);
3588             if (OP(scan)==GOSUB) {
3589                 startpoint = scan + ARG2L(scan);
3590                 ST.close_paren = ARG(scan);
3591             } else {
3592                 startpoint = rei->program+1;
3593                 ST.close_paren = 0;
3594             }
3595             goto eval_recurse_doit;
3596             /* NOTREACHED */
3597         case EVAL:  /*   /(?{A})B/   /(??{A})B/  and /(?(?{A})X|Y)B/   */
3598             if (cur_eval && cur_eval->locinput==locinput) {
3599                 if ( ++nochange_depth > max_nochange_depth )
3600                     Perl_croak(aTHX_ "EVAL without pos change exceeded limit in regex");
3601             } else {
3602                 nochange_depth = 0;
3603             }
3604             {
3605                 /* execute the code in the {...} */
3606                 dSP;
3607                 SV ** const before = SP;
3608                 OP_4tree * const oop = PL_op;
3609                 COP * const ocurcop = PL_curcop;
3610                 PAD *old_comppad;
3611
3612                 n = ARG(scan);
3613                 PL_op = (OP_4tree*)rexi->data->data[n];
3614                 DEBUG_STATE_r( PerlIO_printf(Perl_debug_log,
3615                     "  re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
3616                 PAD_SAVE_LOCAL(old_comppad, (PAD*)rexi->data->data[n + 2]);
3617                 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
3618
3619                 if (sv_yes_mark) {
3620                     SV *sv_mrk = get_sv("REGMARK", 1);
3621                     sv_setsv(sv_mrk, sv_yes_mark);
3622                 }
3623
3624                 CALLRUNOPS(aTHX);                       /* Scalar context. */
3625                 SPAGAIN;
3626                 if (SP == before)
3627                     ret = &PL_sv_undef;   /* protect against empty (?{}) blocks. */
3628                 else {
3629                     ret = POPs;
3630                     PUTBACK;
3631                 }
3632
3633                 PL_op = oop;
3634                 PAD_RESTORE_LOCAL(old_comppad);
3635                 PL_curcop = ocurcop;
3636                 if (!logical) {
3637                     /* /(?{...})/ */
3638                     sv_setsv(save_scalar(PL_replgv), ret);
3639                     break;
3640                 }
3641             }
3642             if (logical == 2) { /* Postponed subexpression: /(??{...})/ */
3643                 logical = 0;
3644                 {
3645                     /* extract RE object from returned value; compiling if
3646                      * necessary */
3647
3648                     MAGIC *mg = NULL;
3649                     const SV *sv;
3650                     if(SvROK(ret) && SvSMAGICAL(sv = SvRV(ret)))
3651                         mg = mg_find(sv, PERL_MAGIC_qr);
3652                     else if (SvSMAGICAL(ret)) {
3653                         if (SvGMAGICAL(ret))
3654                             sv_unmagic(ret, PERL_MAGIC_qr);
3655                         else
3656                             mg = mg_find(ret, PERL_MAGIC_qr);
3657                     }
3658
3659                     if (mg) {
3660                         re = reg_temp_copy((regexp *)mg->mg_obj); /*XXX:dmq*/
3661                     }
3662                     else {
3663                         STRLEN len;
3664                         const char * const t = SvPV_const(ret, len);
3665                         PMOP pm;
3666                         const I32 osize = PL_regsize;
3667
3668                         Zero(&pm, 1, PMOP);
3669                         if (DO_UTF8(ret)) pm.op_pmdynflags |= PMdf_DYN_UTF8;
3670                         re = CALLREGCOMP((char*)t, (char*)t + len, &pm);
3671                         if (!(SvFLAGS(ret)
3672                               & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
3673                                 | SVs_GMG)))
3674                             sv_magic(ret,(SV*)ReREFCNT_inc(re),
3675                                         PERL_MAGIC_qr,0,0);
3676                         PL_regsize = osize;
3677                     }
3678                 }
3679                 RX_MATCH_COPIED_off(re);
3680                 re->subbeg = rex->subbeg;
3681                 re->sublen = rex->sublen;
3682                 rei = RXi_GET(re);
3683                 DEBUG_EXECUTE_r(
3684                     debug_start_match(re, do_utf8, locinput, PL_regeol,
3685                         "Matching embedded");
3686                 );
3687                 startpoint = rei->program + 1;
3688                 ST.close_paren = 0; /* only used for GOSUB */
3689                 /* borrowed from regtry */
3690                 if (PL_reg_start_tmpl <= re->nparens) {
3691                     PL_reg_start_tmpl = re->nparens*3/2 + 3;
3692                     if(PL_reg_start_tmp)
3693                         Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
3694                     else
3695                         Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
3696                 }
3697
3698         eval_recurse_doit: /* Share code with GOSUB below this line */
3699                 /* run the pattern returned from (??{...}) */
3700                 ST.cp = regcppush(0);   /* Save *all* the positions. */
3701                 REGCP_SET(ST.lastcp);
3702
3703                 PL_regstartp = re->startp; /* essentially NOOP on GOSUB */
3704                 PL_regendp = re->endp;     /* essentially NOOP on GOSUB */
3705
3706                 *PL_reglastparen = 0;
3707                 *PL_reglastcloseparen = 0;
3708                 PL_reginput = locinput;
3709                 PL_regsize = 0;
3710
3711                 /* XXXX This is too dramatic a measure... */
3712                 PL_reg_maxiter = 0;
3713
3714                 ST.toggle_reg_flags = PL_reg_flags;
3715                 if (re->extflags & RXf_UTF8)
3716                     PL_reg_flags |= RF_utf8;
3717                 else
3718                     PL_reg_flags &= ~RF_utf8;
3719                 ST.toggle_reg_flags ^= PL_reg_flags; /* diff of old and new */
3720
3721                 ST.prev_rex = rex;
3722                 ST.prev_curlyx = cur_curlyx;
3723                 SETREX(rex,re);
3724                 rexi = rei;
3725                 cur_curlyx = NULL;
3726                 ST.B = next;
3727                 ST.prev_eval = cur_eval;
3728                 cur_eval = st;
3729                 /* now continue from first node in postoned RE */
3730                 PUSH_YES_STATE_GOTO(EVAL_AB, startpoint);
3731                 /* NOTREACHED */
3732             }
3733             /* logical is 1,   /(?(?{...})X|Y)/ */
3734             sw = (bool)SvTRUE(ret);
3735             logical = 0;
3736             break;
3737         }
3738
3739         case EVAL_AB: /* cleanup after a successful (??{A})B */
3740             /* note: this is called twice; first after popping B, then A */
3741             PL_reg_flags ^= ST.toggle_reg_flags;
3742             ReREFCNT_dec(rex);
3743             SETREX(rex,ST.prev_rex);
3744             rexi = RXi_GET(rex);
3745             regcpblow(ST.cp);
3746             cur_eval = ST.prev_eval;
3747             cur_curlyx = ST.prev_curlyx;
3748             /* XXXX This is too dramatic a measure... */
3749             PL_reg_maxiter = 0;
3750             if ( nochange_depth )
3751                 nochange_depth--;
3752             sayYES;
3753
3754
3755         case EVAL_AB_fail: /* unsuccessfully ran A or B in (??{A})B */
3756             /* note: this is called twice; first after popping B, then A */
3757             PL_reg_flags ^= ST.toggle_reg_flags;
3758             ReREFCNT_dec(rex);
3759             SETREX(rex,ST.prev_rex);
3760             rexi = RXi_GET(rex);
3761             PL_reginput = locinput;
3762             REGCP_UNWIND(ST.lastcp);
3763             regcppop(rex);
3764             cur_eval = ST.prev_eval;
3765             cur_curlyx = ST.prev_curlyx;
3766             /* XXXX This is too dramatic a measure... */
3767             PL_reg_maxiter = 0;
3768             if ( nochange_depth )
3769                 nochange_depth--;
3770             sayNO_SILENT;
3771 #undef ST
3772
3773         case OPEN:
3774             n = ARG(scan);  /* which paren pair */
3775             PL_reg_start_tmp[n] = locinput;
3776             if (n > PL_regsize)
3777                 PL_regsize = n;
3778             lastopen = n;
3779             break;
3780         case CLOSE:
3781             n = ARG(scan);  /* which paren pair */
3782             PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
3783             PL_regendp[n] = locinput - PL_bostr;
3784             /*if (n > PL_regsize)
3785                 PL_regsize = n;*/
3786             if (n > *PL_reglastparen)
3787                 *PL_reglastparen = n;
3788             *PL_reglastcloseparen = n;
3789             if (cur_eval && cur_eval->u.eval.close_paren == n) {
3790                 goto fake_end;
3791             }
3792             break;
3793         case ACCEPT:
3794             if (ARG(scan)){
3795                 regnode *cursor;
3796                 for (cursor=scan;
3797                      cursor && OP(cursor)!=END;
3798                      cursor=regnext(cursor))
3799                 {
3800                     if ( OP(cursor)==CLOSE ){
3801                         n = ARG(cursor);
3802                         if ( n <= lastopen ) {
3803                             PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
3804                             PL_regendp[n] = locinput - PL_bostr;
3805                             /*if (n > PL_regsize)
3806                             PL_regsize = n;*/
3807                             if (n > *PL_reglastparen)
3808                                 *PL_reglastparen = n;
3809                             *PL_reglastcloseparen = n;
3810                             if ( n == ARG(scan) || (cur_eval &&
3811                                 cur_eval->u.eval.close_paren == n))
3812                                 break;
3813                         }
3814                     }
3815                 }
3816             }
3817             goto fake_end;
3818             /*NOTREACHED*/
3819         case GROUPP:
3820             n = ARG(scan);  /* which paren pair */
3821             sw = (bool)(*PL_reglastparen >= n && PL_regendp[n] != -1);
3822             break;
3823         case NGROUPP:
3824             /* reg_check_named_buff_matched returns 0 for no match */
3825             sw = (bool)(0 < reg_check_named_buff_matched(rex,scan));
3826             break;
3827         case INSUBP:
3828             n = ARG(scan);
3829             sw = (cur_eval && (!n || cur_eval->u.eval.close_paren == n));
3830             break;
3831         case DEFINEP:
3832             sw = 0;
3833             break;
3834         case IFTHEN:
3835             PL_reg_leftiter = PL_reg_maxiter;           /* Void cache */
3836             if (sw)
3837                 next = NEXTOPER(NEXTOPER(scan));
3838             else {
3839                 next = scan + ARG(scan);
3840                 if (OP(next) == IFTHEN) /* Fake one. */
3841                     next = NEXTOPER(NEXTOPER(next));
3842             }
3843             break;
3844         case LOGICAL:
3845             logical = scan->flags;
3846             break;
3847
3848 /*******************************************************************
3849
3850 The CURLYX/WHILEM pair of ops handle the most generic case of the /A*B/
3851 pattern, where A and B are subpatterns. (For simple A, CURLYM or
3852 STAR/PLUS/CURLY/CURLYN are used instead.)
3853
3854 A*B is compiled as <CURLYX><A><WHILEM><B>
3855
3856 On entry to the subpattern, CURLYX is called. This pushes a CURLYX
3857 state, which contains the current count, initialised to -1. It also sets
3858 cur_curlyx to point to this state, with any previous value saved in the
3859 state block.
3860
3861 CURLYX then jumps straight to the WHILEM op, rather than executing A,
3862 since the pattern may possibly match zero times (i.e. it's a while {} loop
3863 rather than a do {} while loop).
3864
3865 Each entry to WHILEM represents a successful match of A. The count in the
3866 CURLYX block is incremented, another WHILEM state is pushed, and execution
3867 passes to A or B depending on greediness and the current count.
3868
3869 For example, if matching against the string a1a2a3b (where the aN are
3870 substrings that match /A/), then the match progresses as follows: (the
3871 pushed states are interspersed with the bits of strings matched so far):
3872
3873     <CURLYX cnt=-1>
3874     <CURLYX cnt=0><WHILEM>
3875     <CURLYX cnt=1><WHILEM> a1 <WHILEM>
3876     <CURLYX cnt=2><WHILEM> a1 <WHILEM> a2 <WHILEM>
3877     <CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM>
3878     <CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM> b
3879
3880 (Contrast this with something like CURLYM, which maintains only a single
3881 backtrack state:
3882
3883     <CURLYM cnt=0> a1
3884     a1 <CURLYM cnt=1> a2
3885     a1 a2 <CURLYM cnt=2> a3
3886     a1 a2 a3 <CURLYM cnt=3> b
3887 )
3888
3889 Each WHILEM state block marks a point to backtrack to upon partial failure
3890 of A or B, and also contains some minor state data related to that
3891 iteration.  The CURLYX block, pointed to by cur_curlyx, contains the
3892 overall state, such as the count, and pointers to the A and B ops.
3893
3894 This is complicated slightly by nested CURLYX/WHILEM's. Since cur_curlyx
3895 must always point to the *current* CURLYX block, the rules are:
3896
3897 When executing CURLYX, save the old cur_curlyx in the CURLYX state block,
3898 and set cur_curlyx to point the new block.
3899
3900 When popping the CURLYX block after a successful or unsuccessful match,
3901 restore the previous cur_curlyx.
3902
3903 When WHILEM is about to execute B, save the current cur_curlyx, and set it
3904 to the outer one saved in the CURLYX block.
3905
3906 When popping the WHILEM block after a successful or unsuccessful B match,
3907 restore the previous cur_curlyx.
3908
3909 Here's an example for the pattern (AI* BI)*BO
3910 I and O refer to inner and outer, C and W refer to CURLYX and WHILEM:
3911
3912 cur_
3913 curlyx backtrack stack
3914 ------ ---------------
3915 NULL
3916 CO     <CO prev=NULL> <WO>
3917 CI     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
3918 CO     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
3919 NULL   <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi <WO prev=CO> bo
3920
3921 At this point the pattern succeeds, and we work back down the stack to
3922 clean up, restoring as we go:
3923
3924 CO     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
3925 CI     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
3926 CO     <CO prev=NULL> <WO>
3927 NULL
3928
3929 *******************************************************************/
3930
3931 #define ST st->u.curlyx
3932
3933         case CURLYX:    /* start of /A*B/  (for complex A) */
3934         {
3935             /* No need to save/restore up to this paren */
3936             I32 parenfloor = scan->flags;
3937
3938             assert(next); /* keep Coverity happy */
3939             if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
3940                 next += ARG(next);
3941
3942             /* XXXX Probably it is better to teach regpush to support
3943                parenfloor > PL_regsize... */
3944             if (parenfloor > (I32)*PL_reglastparen)
3945                 parenfloor = *PL_reglastparen; /* Pessimization... */
3946
3947             ST.prev_curlyx= cur_curlyx;
3948             cur_curlyx = st;
3949             ST.cp = PL_savestack_ix;
3950
3951             /* these fields contain the state of the current curly.
3952              * they are accessed by subsequent WHILEMs */
3953             ST.parenfloor = parenfloor;
3954             ST.min = ARG1(scan);
3955             ST.max = ARG2(scan);
3956             ST.A = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
3957             ST.B = next;
3958             ST.minmod = minmod;
3959             minmod = 0;
3960             ST.count = -1;      /* this will be updated by WHILEM */
3961             ST.lastloc = NULL;  /* this will be updated by WHILEM */
3962
3963             PL_reginput = locinput;
3964             PUSH_YES_STATE_GOTO(CURLYX_end, PREVOPER(next));
3965             /* NOTREACHED */
3966         }
3967
3968         case CURLYX_end: /* just finished matching all of A*B */
3969             if (PL_reg_eval_set){
3970                 SV *pres= GvSV(PL_replgv);
3971                 SvREFCNT_inc(pres);
3972                 regcpblow(ST.cp);
3973                 sv_setsv(GvSV(PL_replgv), pres);
3974                 SvREFCNT_dec(pres);
3975             } else {
3976                 regcpblow(ST.cp);
3977             }
3978             cur_curlyx = ST.prev_curlyx;
3979             sayYES;
3980             /* NOTREACHED */
3981
3982         case CURLYX_end_fail: /* just failed to match all of A*B */
3983             regcpblow(ST.cp);
3984             cur_curlyx = ST.prev_curlyx;
3985             sayNO;
3986             /* NOTREACHED */
3987
3988
3989 #undef ST
3990 #define ST st->u.whilem
3991
3992         case WHILEM:     /* just matched an A in /A*B/  (for complex A) */
3993         {
3994             /* see the discussion above about CURLYX/WHILEM */
3995             I32 n;
3996             assert(cur_curlyx); /* keep Coverity happy */
3997             n = ++cur_curlyx->u.curlyx.count; /* how many A's matched */
3998             ST.save_lastloc = cur_curlyx->u.curlyx.lastloc;
3999             ST.cache_offset = 0;
4000             ST.cache_mask = 0;
4001
4002             PL_reginput = locinput;
4003
4004             DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4005                   "%*s  whilem: matched %ld out of %ld..%ld\n",
4006                   REPORT_CODE_OFF+depth*2, "", (long)n,
4007                   (long)cur_curlyx->u.curlyx.min,
4008                   (long)cur_curlyx->u.curlyx.max)
4009             );
4010
4011             /* First just match a string of min A's. */
4012
4013             if (n < cur_curlyx->u.curlyx.min) {
4014                 cur_curlyx->u.curlyx.lastloc = locinput;
4015                 PUSH_STATE_GOTO(WHILEM_A_pre, cur_curlyx->u.curlyx.A);
4016                 /* NOTREACHED */
4017             }
4018
4019             /* If degenerate A matches "", assume A done. */
4020
4021             if (locinput == cur_curlyx->u.curlyx.lastloc) {
4022                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4023                    "%*s  whilem: empty match detected, trying continuation...\n",
4024                    REPORT_CODE_OFF+depth*2, "")
4025                 );
4026                 goto do_whilem_B_max;
4027             }
4028
4029             /* super-linear cache processing */
4030
4031             if (scan->flags) {
4032
4033                 if (!PL_reg_maxiter) {
4034                     /* start the countdown: Postpone detection until we
4035                      * know the match is not *that* much linear. */
4036                     PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
4037                     /* possible overflow for long strings and many CURLYX's */
4038                     if (PL_reg_maxiter < 0)
4039                         PL_reg_maxiter = I32_MAX;
4040                     PL_reg_leftiter = PL_reg_maxiter;
4041                 }
4042
4043                 if (PL_reg_leftiter-- == 0) {
4044                     /* initialise cache */
4045                     const I32 size = (PL_reg_maxiter + 7)/8;
4046                     if (PL_reg_poscache) {
4047                         if ((I32)PL_reg_poscache_size < size) {
4048                             Renew(PL_reg_poscache, size, char);
4049                             PL_reg_poscache_size = size;
4050                         }
4051                         Zero(PL_reg_poscache, size, char);
4052                     }
4053                     else {
4054                         PL_reg_poscache_size = size;
4055                         Newxz(PL_reg_poscache, size, char);
4056                     }
4057                     DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4058       "%swhilem: Detected a super-linear match, switching on caching%s...\n",
4059                               PL_colors[4], PL_colors[5])
4060                     );
4061                 }
4062
4063                 if (PL_reg_leftiter < 0) {
4064                     /* have we already failed at this position? */
4065                     I32 offset, mask;
4066                     offset  = (scan->flags & 0xf) - 1
4067                                 + (locinput - PL_bostr)  * (scan->flags>>4);
4068                     mask    = 1 << (offset % 8);
4069                     offset /= 8;
4070                     if (PL_reg_poscache[offset] & mask) {
4071                         DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4072                             "%*s  whilem: (cache) already tried at this position...\n",
4073                             REPORT_CODE_OFF+depth*2, "")
4074                         );
4075                         sayNO; /* cache records failure */
4076                     }
4077                     ST.cache_offset = offset;
4078                     ST.cache_mask   = mask;
4079                 }
4080             }
4081
4082             /* Prefer B over A for minimal matching. */
4083
4084             if (cur_curlyx->u.curlyx.minmod) {
4085                 ST.save_curlyx = cur_curlyx;
4086                 cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
4087                 ST.cp = regcppush(ST.save_curlyx->u.curlyx.parenfloor);
4088                 REGCP_SET(ST.lastcp);
4089                 PUSH_YES_STATE_GOTO(WHILEM_B_min, ST.save_curlyx->u.curlyx.B);
4090                 /* NOTREACHED */
4091             }
4092
4093             /* Prefer A over B for maximal matching. */
4094
4095             if (n < cur_curlyx->u.curlyx.max) { /* More greed allowed? */
4096                 ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
4097                 cur_curlyx->u.curlyx.lastloc = locinput;
4098                 REGCP_SET(ST.lastcp);
4099                 PUSH_STATE_GOTO(WHILEM_A_max, cur_curlyx->u.curlyx.A);
4100                 /* NOTREACHED */
4101             }
4102             goto do_whilem_B_max;
4103         }
4104         /* NOTREACHED */
4105
4106         case WHILEM_B_min: /* just matched B in a minimal match */
4107         case WHILEM_B_max: /* just matched B in a maximal match */
4108             cur_curlyx = ST.save_curlyx;
4109             sayYES;
4110             /* NOTREACHED */
4111
4112         case WHILEM_B_max_fail: /* just failed to match B in a maximal match */
4113             cur_curlyx = ST.save_curlyx;
4114             cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
4115             cur_curlyx->u.curlyx.count--;
4116             CACHEsayNO;
4117             /* NOTREACHED */
4118
4119         case WHILEM_A_min_fail: /* just failed to match A in a minimal match */
4120             REGCP_UNWIND(ST.lastcp);
4121             regcppop(rex);
4122             /* FALL THROUGH */
4123         case WHILEM_A_pre_fail: /* just failed to match even minimal A */
4124             cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
4125             cur_curlyx->u.curlyx.count--;
4126             CACHEsayNO;
4127             /* NOTREACHED */
4128
4129         case WHILEM_A_max_fail: /* just failed to match A in a maximal match */
4130             REGCP_UNWIND(ST.lastcp);
4131             regcppop(rex);      /* Restore some previous $<digit>s? */
4132             PL_reginput = locinput;
4133             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4134                 "%*s  whilem: failed, trying continuation...\n",
4135                 REPORT_CODE_OFF+depth*2, "")
4136             );
4137           do_whilem_B_max:
4138             if (cur_curlyx->u.curlyx.count >= REG_INFTY
4139                 && ckWARN(WARN_REGEXP)
4140                 && !(PL_reg_flags & RF_warned))
4141             {
4142                 PL_reg_flags |= RF_warned;
4143                 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
4144                      "Complex regular subexpression recursion",
4145                      REG_INFTY - 1);
4146             }
4147
4148             /* now try B */
4149             ST.save_curlyx = cur_curlyx;
4150             cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
4151             PUSH_YES_STATE_GOTO(WHILEM_B_max, ST.save_curlyx->u.curlyx.B);
4152             /* NOTREACHED */
4153
4154         case WHILEM_B_min_fail: /* just failed to match B in a minimal match */
4155             cur_curlyx = ST.save_curlyx;
4156             REGCP_UNWIND(ST.lastcp);
4157             regcppop(rex);
4158
4159             if (cur_curlyx->u.curlyx.count >= cur_curlyx->u.curlyx.max) {
4160                 /* Maximum greed exceeded */
4161                 if (cur_curlyx->u.curlyx.count >= REG_INFTY
4162                     && ckWARN(WARN_REGEXP)
4163                     && !(PL_reg_flags & RF_warned))
4164                 {
4165                     PL_reg_flags |= RF_warned;
4166                     Perl_warner(aTHX_ packWARN(WARN_REGEXP),
4167                         "%s limit (%d) exceeded",
4168                         "Complex regular subexpression recursion",
4169                         REG_INFTY - 1);
4170                 }
4171                 cur_curlyx->u.curlyx.count--;
4172                 CACHEsayNO;
4173             }
4174
4175             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4176                 "%*s  trying longer...\n", REPORT_CODE_OFF+depth*2, "")
4177             );
4178             /* Try grabbing another A and see if it helps. */
4179             PL_reginput = locinput;
4180             cur_curlyx->u.curlyx.lastloc = locinput;
4181             ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
4182             REGCP_SET(ST.lastcp);
4183             PUSH_STATE_GOTO(WHILEM_A_min, ST.save_curlyx->u.curlyx.A);
4184             /* NOTREACHED */
4185
4186 #undef  ST
4187 #define ST st->u.branch
4188
4189         case BRANCHJ:       /*  /(...|A|...)/ with long next pointer */
4190             next = scan + ARG(scan);
4191             if (next == scan)
4192                 next = NULL;
4193             scan = NEXTOPER(scan);
4194             /* FALL THROUGH */
4195
4196         case BRANCH:        /*  /(...|A|...)/ */
4197             scan = NEXTOPER(scan); /* scan now points to inner node */
4198             if ((!next || (OP(next) != BRANCH && OP(next) != BRANCHJ))
4199                 && !has_cutgroup)
4200             {
4201                 /* last branch; skip state push and jump direct to node */
4202                 continue;
4203             }
4204             ST.lastparen = *PL_reglastparen;
4205             ST.next_branch = next;
4206             REGCP_SET(ST.cp);
4207             PL_reginput = locinput;
4208
4209             /* Now go into the branch */
4210             if (has_cutgroup) {
4211                 PUSH_YES_STATE_GOTO(BRANCH_next, scan);
4212             } else {
4213                 PUSH_STATE_GOTO(BRANCH_next, scan);
4214             }
4215             /* NOTREACHED */
4216         case CUTGROUP:
4217             PL_reginput = locinput;
4218             sv_yes_mark = st->u.mark.mark_name = scan->flags ? NULL :
4219                 (SV*)rexi->data->data[ ARG( scan ) ];
4220             PUSH_STATE_GOTO(CUTGROUP_next,next);
4221             /* NOTREACHED */
4222         case CUTGROUP_next_fail:
4223             do_cutgroup = 1;
4224             no_final = 1;
4225             if (st->u.mark.mark_name)
4226                 sv_commit = st->u.mark.mark_name;
4227             sayNO;
4228             /* NOTREACHED */
4229         case BRANCH_next:
4230             sayYES;
4231             /* NOTREACHED */
4232         case BRANCH_next_fail: /* that branch failed; try the next, if any */
4233             if (do_cutgroup) {
4234                 do_cutgroup = 0;
4235                 no_final = 0;
4236             }
4237             REGCP_UNWIND(ST.cp);
4238             for (n = *PL_reglastparen; n > ST.lastparen; n--)
4239                 PL_regendp[n] = -1;
4240             *PL_reglastparen = n;
4241             /*dmq: *PL_reglastcloseparen = n; */
4242             scan = ST.next_branch;
4243             /* no more branches? */
4244             if (!scan || (OP(scan) != BRANCH && OP(scan) != BRANCHJ)) {
4245                 DEBUG_EXECUTE_r({
4246                     PerlIO_printf( Perl_debug_log,
4247                         "%*s  %sBRANCH failed...%s\n",
4248                         REPORT_CODE_OFF+depth*2, "",
4249                         PL_colors[4],
4250                         PL_colors[5] );
4251                 });
4252                 sayNO_SILENT;
4253             }
4254             continue; /* execute next BRANCH[J] op */
4255             /* NOTREACHED */
4256
4257         case MINMOD:
4258             minmod = 1;
4259             break;
4260
4261 #undef  ST
4262 #define ST st->u.curlym
4263
4264         case CURLYM:    /* /A{m,n}B/ where A is fixed-length */
4265
4266             /* This is an optimisation of CURLYX that enables us to push
4267              * only a single backtracking state, no matter now many matches
4268              * there are in {m,n}. It relies on the pattern being constant
4269              * length, with no parens to influence future backrefs
4270              */
4271
4272             ST.me = scan;
4273             scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
4274
4275             /* if paren positive, emulate an OPEN/CLOSE around A */
4276             if (ST.me->flags) {
4277                 U32 paren = ST.me->flags;
4278                 if (paren > PL_regsize)
4279                     PL_regsize = paren;
4280                 if (paren > *PL_reglastparen)
4281                     *PL_reglastparen = paren;
4282                 scan += NEXT_OFF(scan); /* Skip former OPEN. */
4283             }
4284             ST.A = scan;
4285             ST.B = next;
4286             ST.alen = 0;
4287             ST.count = 0;
4288             ST.minmod = minmod;
4289             minmod = 0;
4290             ST.c1 = CHRTEST_UNINIT;
4291             REGCP_SET(ST.cp);
4292
4293             if (!(ST.minmod ? ARG1(ST.me) : ARG2(ST.me))) /* min/max */
4294                 goto curlym_do_B;
4295
4296           curlym_do_A: /* execute the A in /A{m,n}B/  */
4297             PL_reginput = locinput;
4298             PUSH_YES_STATE_GOTO(CURLYM_A, ST.A); /* match A */
4299             /* NOTREACHED */
4300
4301         case CURLYM_A: /* we've just matched an A */
4302             locinput = st->locinput;
4303             nextchr = UCHARAT(locinput);
4304
4305             ST.count++;
4306             /* after first match, determine A's length: u.curlym.alen */
4307             if (ST.count == 1) {
4308                 if (PL_reg_match_utf8) {
4309                     char *s = locinput;
4310                     while (s < PL_reginput) {
4311                         ST.alen++;
4312                         s += UTF8SKIP(s);
4313                     }
4314                 }
4315                 else {
4316                     ST.alen = PL_reginput - locinput;
4317                 }
4318                 if (ST.alen == 0)
4319                     ST.count = ST.minmod ? ARG1(ST.me) : ARG2(ST.me);
4320             }
4321             DEBUG_EXECUTE_r(
4322                 PerlIO_printf(Perl_debug_log,
4323                           "%*s  CURLYM now matched %"IVdf" times, len=%"IVdf"...\n",
4324                           (int)(REPORT_CODE_OFF+(depth*2)), "",
4325                           (IV) ST.count, (IV)ST.alen)
4326             );
4327
4328             locinput = PL_reginput;
4329
4330             if (cur_eval && cur_eval->u.eval.close_paren &&
4331                 cur_eval->u.eval.close_paren == (U32)ST.me->flags)
4332                 goto fake_end;
4333
4334             if ( ST.count < (ST.minmod ? ARG1(ST.me) : ARG2(ST.me)) )
4335                 goto curlym_do_A; /* try to match another A */
4336             goto curlym_do_B; /* try to match B */
4337
4338         case CURLYM_A_fail: /* just failed to match an A */
4339             REGCP_UNWIND(ST.cp);
4340
4341             if (ST.minmod || ST.count < ARG1(ST.me) /* min*/
4342                 || (cur_eval && cur_eval->u.eval.close_paren &&
4343                     cur_eval->u.eval.close_paren == (U32)ST.me->flags))
4344                 sayNO;
4345
4346           curlym_do_B: /* execute the B in /A{m,n}B/  */
4347             PL_reginput = locinput;
4348             if (ST.c1 == CHRTEST_UNINIT) {
4349                 /* calculate c1 and c2 for possible match of 1st char
4350                  * following curly */
4351                 ST.c1 = ST.c2 = CHRTEST_VOID;
4352                 if (HAS_TEXT(ST.B) || JUMPABLE(ST.B)) {
4353                     regnode *text_node = ST.B;
4354                     if (! HAS_TEXT(text_node))
4355                         FIND_NEXT_IMPT(text_node);
4356                     /* this used to be
4357
4358                         (HAS_TEXT(text_node) && PL_regkind[OP(text_node)] == EXACT)
4359
4360                         But the former is redundant in light of the latter.
4361
4362                         if this changes back then the macro for
4363                         IS_TEXT and friends need to change.
4364                      */
4365                     if (PL_regkind[OP(text_node)] == EXACT)
4366                     {
4367
4368                         ST.c1 = (U8)*STRING(text_node);
4369                         ST.c2 =
4370                             (IS_TEXTF(text_node))
4371                             ? PL_fold[ST.c1]
4372                             : (IS_TEXTFL(text_node))
4373                                 ? PL_fold_locale[ST.c1]
4374                                 : ST.c1;
4375                     }
4376                 }
4377             }
4378
4379             DEBUG_EXECUTE_r(
4380                 PerlIO_printf(Perl_debug_log,
4381                     "%*s  CURLYM trying tail with matches=%"IVdf"...\n",
4382                     (int)(REPORT_CODE_OFF+(depth*2)),
4383                     "", (IV)ST.count)
4384                 );
4385             if (ST.c1 != CHRTEST_VOID
4386                     && UCHARAT(PL_reginput) != ST.c1
4387                     && UCHARAT(PL_reginput) != ST.c2)
4388             {
4389                 /* simulate B failing */
4390                 DEBUG_OPTIMISE_r(
4391                     PerlIO_printf(Perl_debug_log,
4392                         "%*s  CURLYM Fast bail c1=%"IVdf" c2=%"IVdf"\n",
4393                         (int)(REPORT_CODE_OFF+(depth*2)),"",
4394                         (IV)ST.c1,(IV)ST.c2
4395                 ));
4396                 state_num = CURLYM_B_fail;
4397                 goto reenter_switch;
4398             }
4399
4400             if (ST.me->flags) {
4401                 /* mark current A as captured */
4402                 I32 paren = ST.me->flags;
4403                 if (ST.count) {
4404                     PL_regstartp[paren]
4405                         = HOPc(PL_reginput, -ST.alen) - PL_bostr;
4406                     PL_regendp[paren] = PL_reginput - PL_bostr;
4407                     /*dmq: *PL_reglastcloseparen = paren; */
4408                 }
4409                 else
4410                     PL_regendp[paren] = -1;
4411                 if (cur_eval && cur_eval->u.eval.close_paren &&
4412                     cur_eval->u.eval.close_paren == (U32)ST.me->flags)
4413                 {
4414                     if (ST.count)
4415                         goto fake_end;
4416                     else
4417                         sayNO;
4418                 }
4419             }
4420
4421             PUSH_STATE_GOTO(CURLYM_B, ST.B); /* match B */
4422             /* NOTREACHED */
4423
4424         case CURLYM_B_fail: /* just failed to match a B */
4425             REGCP_UNWIND(ST.cp);
4426             if (ST.minmod) {
4427                 if (ST.count == ARG2(ST.me) /* max */)
4428                     sayNO;
4429                 goto curlym_do_A; /* try to match a further A */
4430             }
4431             /* backtrack one A */
4432             if (ST.count == ARG1(ST.me) /* min */)
4433                 sayNO;
4434             ST.count--;
4435             locinput = HOPc(locinput, -ST.alen);
4436             goto curlym_do_B; /* try to match B */
4437
4438 #undef ST
4439 #define ST st->u.curly
4440
4441 #define CURLY_SETPAREN(paren, success) \
4442     if (paren) { \
4443         if (success) { \
4444             PL_regstartp[paren] = HOPc(locinput, -1) - PL_bostr; \
4445             PL_regendp[paren] = locinput - PL_bostr; \
4446             *PL_reglastcloseparen = paren; \
4447         } \
4448         else \
4449             PL_regendp[paren] = -1; \
4450     }
4451
4452         case STAR:              /*  /A*B/ where A is width 1 */
4453             ST.paren = 0;
4454             ST.min = 0;
4455             ST.max = REG_INFTY;
4456             scan = NEXTOPER(scan);
4457             goto repeat;
4458         case PLUS:              /*  /A+B/ where A is width 1 */
4459             ST.paren = 0;
4460             ST.min = 1;
4461             ST.max = REG_INFTY;
4462             scan = NEXTOPER(scan);
4463             goto repeat;
4464         case CURLYN:            /*  /(A){m,n}B/ where A is width 1 */
4465             ST.paren = scan->flags;     /* Which paren to set */
4466             if (ST.paren > PL_regsize)
4467                 PL_regsize = ST.paren;
4468             if (ST.paren > *PL_reglastparen)
4469                 *PL_reglastparen = ST.paren;
4470             ST.min = ARG1(scan);  /* min to match */
4471             ST.max = ARG2(scan);  /* max to match */
4472             if (cur_eval && cur_eval->u.eval.close_paren &&
4473                 cur_eval->u.eval.close_paren == (U32)ST.paren) {
4474                 ST.min=1;
4475                 ST.max=1;
4476             }
4477             scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
4478             goto repeat;
4479         case CURLY:             /*  /A{m,n}B/ where A is width 1 */
4480             ST.paren = 0;
4481             ST.min = ARG1(scan);  /* min to match */
4482             ST.max = ARG2(scan);  /* max to match */
4483             scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
4484           repeat:
4485             /*
4486             * Lookahead to avoid useless match attempts
4487             * when we know what character comes next.
4488             *
4489             * Used to only do .*x and .*?x, but now it allows
4490             * for )'s, ('s and (?{ ... })'s to be in the way
4491             * of the quantifier and the EXACT-like node.  -- japhy
4492             */
4493
4494             if (ST.min > ST.max) /* XXX make this a compile-time check? */
4495                 sayNO;
4496             if (HAS_TEXT(next) || JUMPABLE(next)) {
4497                 U8 *s;
4498                 regnode *text_node = next;
4499
4500                 if (! HAS_TEXT(text_node))
4501                     FIND_NEXT_IMPT(text_node);
4502
4503                 if (! HAS_TEXT(text_node))
4504                     ST.c1 = ST.c2 = CHRTEST_VOID;
4505                 else {
4506                     if ( PL_regkind[OP(text_node)] != EXACT ) {
4507                         ST.c1 = ST.c2 = CHRTEST_VOID;
4508                         goto assume_ok_easy;
4509                     }
4510                     else
4511                         s = (U8*)STRING(text_node);
4512
4513                     /*  Currently we only get here when
4514
4515                         PL_rekind[OP(text_node)] == EXACT
4516
4517                         if this changes back then the macro for IS_TEXT and
4518                         friends need to change. */
4519                     if (!UTF) {
4520                         ST.c2 = ST.c1 = *s;
4521                         if (IS_TEXTF(text_node))
4522                             ST.c2 = PL_fold[ST.c1];
4523                         else if (IS_TEXTFL(text_node))
4524                             ST.c2 = PL_fold_locale[ST.c1];
4525                     }
4526                     else { /* UTF */
4527                         if (IS_TEXTF(text_node)) {
4528                              STRLEN ulen1, ulen2;
4529                              U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
4530                              U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
4531
4532                              to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
4533                              to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
4534 #ifdef EBCDIC
4535                              ST.c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXLEN, 0,
4536                                                     ckWARN(WARN_UTF8) ?
4537                                                     0 : UTF8_ALLOW_ANY);
4538                              ST.c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXLEN, 0,
4539                                                     ckWARN(WARN_UTF8) ?
4540                                                     0 : UTF8_ALLOW_ANY);
4541 #else
4542                              ST.c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
4543                                                     uniflags);
4544                              ST.c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
4545                                                     uniflags);
4546 #endif
4547                         }
4548                         else {
4549                             ST.c2 = ST.c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
4550                                                      uniflags);
4551                         }
4552                     }
4553                 }
4554             }
4555             else
4556                 ST.c1 = ST.c2 = CHRTEST_VOID;
4557         assume_ok_easy:
4558
4559             ST.A = scan;
4560             ST.B = next;
4561             PL_reginput = locinput;
4562             if (minmod) {
4563                 minmod = 0;
4564                 if (ST.min && regrepeat(rex, ST.A, ST.min, depth) < ST.min)
4565                     sayNO;
4566                 ST.count = ST.min;
4567                 locinput = PL_reginput;
4568                 REGCP_SET(ST.cp);
4569                 if (ST.c1 == CHRTEST_VOID)
4570                     goto curly_try_B_min;
4571
4572                 ST.oldloc = locinput;
4573
4574                 /* set ST.maxpos to the furthest point along the
4575                  * string that could possibly match */
4576                 if  (ST.max == REG_INFTY) {
4577                     ST.maxpos = PL_regeol - 1;
4578                     if (do_utf8)
4579                         while (UTF8_IS_CONTINUATION(*(U8*)ST.maxpos))
4580                             ST.maxpos--;
4581                 }
4582                 else if (do_utf8) {
4583                     int m = ST.max - ST.min;
4584                     for (ST.maxpos = locinput;
4585                          m >0 && ST.maxpos + UTF8SKIP(ST.maxpos) <= PL_regeol; m--)
4586                         ST.maxpos += UTF8SKIP(ST.maxpos);
4587                 }
4588                 else {
4589                     ST.maxpos = locinput + ST.max - ST.min;
4590                     if (ST.maxpos >= PL_regeol)
4591                         ST.maxpos = PL_regeol - 1;
4592                 }
4593                 goto curly_try_B_min_known;
4594
4595             }
4596             else {
4597                 ST.count = regrepeat(rex, ST.A, ST.max, depth);
4598                 locinput = PL_reginput;
4599                 if (ST.count < ST.min)
4600                     sayNO;
4601                 if ((ST.count > ST.min)
4602                     && (PL_regkind[OP(ST.B)] == EOL) && (OP(ST.B) != MEOL))
4603                 {
4604                     /* A{m,n} must come at the end of the string, there's
4605                      * no point in backing off ... */
4606                     ST.min = ST.count;
4607                     /* ...except that $ and \Z can match before *and* after
4608                        newline at the end.  Consider "\n\n" =~ /\n+\Z\n/.
4609                        We may back off by one in this case. */
4610                     if (UCHARAT(PL_reginput - 1) == '\n' && OP(ST.B) != EOS)
4611                         ST.min--;
4612                 }
4613                 REGCP_SET(ST.cp);
4614                 goto curly_try_B_max;
4615             }
4616             /* NOTREACHED */
4617
4618
4619         case CURLY_B_min_known_fail:
4620             /* failed to find B in a non-greedy match where c1,c2 valid */
4621             if (ST.paren && ST.count)
4622                 PL_regendp[ST.paren] = -1;
4623
4624             PL_reginput = locinput;     /* Could be reset... */
4625             REGCP_UNWIND(ST.cp);
4626             /* Couldn't or didn't -- move forward. */
4627             ST.oldloc = locinput;
4628             if (do_utf8)
4629                 locinput += UTF8SKIP(locinput);
4630             else
4631                 locinput++;
4632             ST.count++;
4633           curly_try_B_min_known:
4634              /* find the next place where 'B' could work, then call B */
4635             {
4636                 int n;
4637                 if (do_utf8) {
4638                     n = (ST.oldloc == locinput) ? 0 : 1;
4639                     if (ST.c1 == ST.c2) {
4640                         STRLEN len;
4641                         /* set n to utf8_distance(oldloc, locinput) */
4642                         while (locinput <= ST.maxpos &&
4643                                utf8n_to_uvchr((U8*)locinput,
4644                                               UTF8_MAXBYTES, &len,
4645                                               uniflags) != (UV)ST.c1) {
4646                             locinput += len;
4647                             n++;
4648                         }
4649                     }
4650                     else {
4651                         /* set n to utf8_distance(oldloc, locinput) */
4652                         while (locinput <= ST.maxpos) {
4653                             STRLEN len;
4654                             const UV c = utf8n_to_uvchr((U8*)locinput,
4655                                                   UTF8_MAXBYTES, &len,
4656                                                   uniflags);
4657                             if (c == (UV)ST.c1 || c == (UV)ST.c2)
4658                                 break;
4659                             locinput += len;
4660                             n++;
4661                         }
4662                     }
4663                 }
4664                 else {
4665                     if (ST.c1 == ST.c2) {
4666                         while (locinput <= ST.maxpos &&
4667                                UCHARAT(locinput) != ST.c1)
4668                             locinput++;
4669                     }
4670                     else {
4671                         while (locinput <= ST.maxpos
4672                                && UCHARAT(locinput) != ST.c1
4673                                && UCHARAT(locinput) != ST.c2)
4674                             locinput++;
4675                     }
4676                     n = locinput - ST.oldloc;
4677                 }
4678                 if (locinput > ST.maxpos)
4679                     sayNO;
4680                 /* PL_reginput == oldloc now */
4681                 if (n) {
4682                     ST.count += n;
4683                     if (regrepeat(rex, ST.A, n, depth) < n)
4684                         sayNO;
4685                 }
4686                 PL_reginput = locinput;
4687                 CURLY_SETPAREN(ST.paren, ST.count);
4688                 if (cur_eval && cur_eval->u.eval.close_paren &&
4689                     cur_eval->u.eval.close_paren == (U32)ST.paren) {
4690                     goto fake_end;
4691                 }
4692                 PUSH_STATE_GOTO(CURLY_B_min_known, ST.B);
4693             }
4694             /* NOTREACHED */
4695
4696
4697         case CURLY_B_min_fail:
4698             /* failed to find B in a non-greedy match where c1,c2 invalid */
4699             if (ST.paren && ST.count)
4700                 PL_regendp[ST.paren] = -1;
4701
4702             REGCP_UNWIND(ST.cp);
4703             /* failed -- move forward one */
4704             PL_reginput = locinput;
4705             if (regrepeat(rex, ST.A, 1, depth)) {
4706                 ST.count++;
4707                 locinput = PL_reginput;
4708                 if (ST.count <= ST.max || (ST.max == REG_INFTY &&
4709                         ST.count > 0)) /* count overflow ? */
4710                 {
4711                   curly_try_B_min:
4712                     CURLY_SETPAREN(ST.paren, ST.count);
4713                     if (cur_eval && cur_eval->u.eval.close_paren &&
4714                         cur_eval->u.eval.close_paren == (U32)ST.paren) {
4715                         goto fake_end;
4716                     }
4717                     PUSH_STATE_GOTO(CURLY_B_min, ST.B);
4718                 }
4719             }
4720             sayNO;
4721             /* NOTREACHED */
4722
4723
4724         curly_try_B_max:
4725             /* a successful greedy match: now try to match B */
4726             if (cur_eval && cur_eval->u.eval.close_paren &&
4727                 cur_eval->u.eval.close_paren == (U32)ST.paren) {
4728                 goto fake_end;
4729             }
4730             {
4731                 UV c = 0;
4732                 if (ST.c1 != CHRTEST_VOID)
4733                     c = do_utf8 ? utf8n_to_uvchr((U8*)PL_reginput,
4734                                            UTF8_MAXBYTES, 0, uniflags)
4735                                 : (UV) UCHARAT(PL_reginput);
4736                 /* If it could work, try it. */
4737                 if (ST.c1 == CHRTEST_VOID || c == (UV)ST.c1 || c == (UV)ST.c2) {
4738                     CURLY_SETPAREN(ST.paren, ST.count);
4739                     PUSH_STATE_GOTO(CURLY_B_max, ST.B);
4740                     /* NOTREACHED */
4741                 }
4742             }
4743             /* FALL THROUGH */
4744         case CURLY_B_max_fail:
4745             /* failed to find B in a greedy match */
4746             if (ST.paren && ST.count)
4747                 PL_regendp[ST.paren] = -1;
4748
4749             REGCP_UNWIND(ST.cp);
4750             /*  back up. */
4751             if (--ST.count < ST.min)
4752                 sayNO;
4753             PL_reginput = locinput = HOPc(locinput, -1);
4754             goto curly_try_B_max;
4755
4756 #undef ST
4757
4758         case END:
4759             fake_end:
4760             if (cur_eval) {
4761                 /* we've just finished A in /(??{A})B/; now continue with B */
4762                 I32 tmpix;
4763                 st->u.eval.toggle_reg_flags
4764                             = cur_eval->u.eval.toggle_reg_flags;
4765                 PL_reg_flags ^= st->u.eval.toggle_reg_flags;
4766
4767                 st->u.eval.prev_rex = rex;              /* inner */
4768                 SETREX(rex,cur_eval->u.eval.prev_rex);
4769                 rexi = RXi_GET(rex);
4770                 cur_curlyx = cur_eval->u.eval.prev_curlyx;
4771                 ReREFCNT_inc(rex);
4772                 st->u.eval.cp = regcppush(0);   /* Save *all* the positions. */
4773                 REGCP_SET(st->u.eval.lastcp);
4774                 PL_reginput = locinput;
4775
4776                 /* Restore parens of the outer rex without popping the
4777                  * savestack */
4778                 tmpix = PL_savestack_ix;
4779                 PL_savestack_ix = cur_eval->u.eval.lastcp;
4780                 regcppop(rex);
4781                 PL_savestack_ix = tmpix;
4782
4783                 st->u.eval.prev_eval = cur_eval;
4784                 cur_eval = cur_eval->u.eval.prev_eval;
4785                 DEBUG_EXECUTE_r(
4786                     PerlIO_printf(Perl_debug_log, "%*s  EVAL trying tail ... %"UVxf"\n",
4787                                       REPORT_CODE_OFF+depth*2, "",PTR2UV(cur_eval)););
4788                 if ( nochange_depth )
4789                     nochange_depth--;
4790
4791                 PUSH_YES_STATE_GOTO(EVAL_AB,
4792                         st->u.eval.prev_eval->u.eval.B); /* match B */
4793             }
4794
4795             if (locinput < reginfo->till) {
4796                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4797                                       "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
4798                                       PL_colors[4],
4799                                       (long)(locinput - PL_reg_starttry),
4800                                       (long)(reginfo->till - PL_reg_starttry),
4801                                       PL_colors[5]));
4802
4803                 sayNO_SILENT;           /* Cannot match: too short. */
4804             }
4805             PL_reginput = locinput;     /* put where regtry can find it */
4806             sayYES;                     /* Success! */
4807
4808         case SUCCEED: /* successful SUSPEND/UNLESSM/IFMATCH/CURLYM */
4809             DEBUG_EXECUTE_r(
4810             PerlIO_printf(Perl_debug_log,
4811                 "%*s  %ssubpattern success...%s\n",
4812                 REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5]));
4813             PL_reginput = locinput;     /* put where regtry can find it */
4814             sayYES;                     /* Success! */
4815
4816 #undef  ST
4817 #define ST st->u.ifmatch
4818
4819         case SUSPEND:   /* (?>A) */
4820             ST.wanted = 1;
4821             PL_reginput = locinput;
4822             goto do_ifmatch;
4823
4824         case UNLESSM:   /* -ve lookaround: (?!A), or with flags, (?<!A) */
4825             ST.wanted = 0;
4826             goto ifmatch_trivial_fail_test;
4827
4828         case IFMATCH:   /* +ve lookaround: (?=A), or with flags, (?<=A) */
4829             ST.wanted = 1;
4830           ifmatch_trivial_fail_test:
4831             if (scan->flags) {
4832                 char * const s = HOPBACKc(locinput, scan->flags);
4833                 if (!s) {
4834                     /* trivial fail */
4835                     if (logical) {
4836                         logical = 0;
4837                         sw = 1 - (bool)ST.wanted;
4838                     }
4839                     else if (ST.wanted)
4840                         sayNO;
4841                     next = scan + ARG(scan);
4842                     if (next == scan)
4843                         next = NULL;
4844                     break;
4845                 }
4846                 PL_reginput = s;
4847             }
4848             else
4849                 PL_reginput = locinput;
4850
4851           do_ifmatch:
4852             ST.me = scan;
4853             ST.logical = logical;
4854             /* execute body of (?...A) */
4855             PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)));
4856             /* NOTREACHED */
4857
4858         case IFMATCH_A_fail: /* body of (?...A) failed */
4859             ST.wanted = !ST.wanted;
4860             /* FALL THROUGH */
4861
4862         case IFMATCH_A: /* body of (?...A) succeeded */
4863             if (ST.logical) {
4864                 sw = (bool)ST.wanted;
4865             }
4866             else if (!ST.wanted)
4867                 sayNO;
4868
4869             if (OP(ST.me) == SUSPEND)
4870                 locinput = PL_reginput;
4871             else {
4872                 locinput = PL_reginput = st->locinput;
4873                 nextchr = UCHARAT(locinput);
4874             }
4875             scan = ST.me + ARG(ST.me);
4876             if (scan == ST.me)
4877                 scan = NULL;
4878             continue; /* execute B */
4879
4880 #undef ST
4881
4882         case LONGJMP:
4883             next = scan + ARG(scan);
4884             if (next == scan)
4885                 next = NULL;
4886             break;
4887         case COMMIT:
4888             reginfo->cutpoint = PL_regeol;
4889             /* FALLTHROUGH */
4890         case PRUNE:
4891             PL_reginput = locinput;
4892             if (!scan->flags)
4893                 sv_yes_mark = sv_commit = (SV*)rexi->data->data[ ARG( scan ) ];
4894             PUSH_STATE_GOTO(COMMIT_next,next);
4895             /* NOTREACHED */
4896         case COMMIT_next_fail:
4897             no_final = 1;
4898             /* FALLTHROUGH */
4899         case OPFAIL:
4900             sayNO;
4901             /* NOTREACHED */
4902
4903 #define ST st->u.mark
4904         case MARKPOINT:
4905             ST.prev_mark = mark_state;
4906             ST.mark_name = sv_commit = sv_yes_mark
4907                 = (SV*)rexi->data->data[ ARG( scan ) ];
4908             mark_state = st;
4909             ST.mark_loc = PL_reginput = locinput;
4910             PUSH_YES_STATE_GOTO(MARKPOINT_next,next);
4911             /* NOTREACHED */
4912         case MARKPOINT_next:
4913             mark_state = ST.prev_mark;
4914             sayYES;
4915             /* NOTREACHED */
4916         case MARKPOINT_next_fail:
4917             if (popmark && sv_eq(ST.mark_name,popmark))
4918             {
4919                 if (ST.mark_loc > startpoint)
4920                     reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
4921                 popmark = NULL; /* we found our mark */
4922                 sv_commit = ST.mark_name;
4923
4924                 DEBUG_EXECUTE_r({
4925                         PerlIO_printf(Perl_debug_log,
4926                             "%*s  %ssetting cutpoint to mark:%"SVf"...%s\n",
4927                             REPORT_CODE_OFF+depth*2, "",
4928                             PL_colors[4], SVfARG(sv_commit), PL_colors[5]);
4929                 });
4930             }
4931             mark_state = ST.prev_mark;
4932             sv_yes_mark = mark_state ?
4933                 mark_state->u.mark.mark_name : NULL;
4934             sayNO;
4935             /* NOTREACHED */
4936         case SKIP:
4937             PL_reginput = locinput;
4938             if (scan->flags) {
4939                 /* (*SKIP) : if we fail we cut here*/
4940                 ST.mark_name = NULL;
4941                 ST.mark_loc = locinput;
4942                 PUSH_STATE_GOTO(SKIP_next,next);
4943             } else {
4944                 /* (*SKIP:NAME) : if there is a (*MARK:NAME) fail where it was,
4945                    otherwise do nothing.  Meaning we need to scan
4946                  */
4947                 regmatch_state *cur = mark_state;
4948                 SV *find = (SV*)rexi->data->data[ ARG( scan ) ];
4949
4950                 while (cur) {
4951                     if ( sv_eq( cur->u.mark.mark_name,
4952                                 find ) )
4953                     {
4954                         ST.mark_name = find;
4955                         PUSH_STATE_GOTO( SKIP_next, next );
4956                     }
4957                     cur = cur->u.mark.prev_mark;
4958                 }
4959             }
4960             /* Didn't find our (*MARK:NAME) so ignore this (*SKIP:NAME) */
4961             break;
4962         case SKIP_next_fail:
4963             if (ST.mark_name) {
4964                 /* (*CUT:NAME) - Set up to search for the name as we
4965                    collapse the stack*/
4966                 popmark = ST.mark_name;
4967             } else {
4968                 /* (*CUT) - No name, we cut here.*/
4969                 if (ST.mark_loc > startpoint)
4970                     reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
4971                 /* but we set sv_commit to latest mark_name if there
4972                    is one so they can test to see how things lead to this
4973                    cut */
4974                 if (mark_state)
4975                     sv_commit=mark_state->u.mark.mark_name;
4976             }
4977             no_final = 1;
4978             sayNO;
4979             /* NOTREACHED */
4980 #undef ST
4981
4982         default:
4983             PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
4984                           PTR2UV(scan), OP(scan));
4985             Perl_croak(aTHX_ "regexp memory corruption");
4986
4987         } /* end switch */
4988
4989         /* switch break jumps here */
4990         scan = next; /* prepare to execute the next op and ... */
4991         continue;    /* ... jump back to the top, reusing st */
4992         /* NOTREACHED */
4993
4994       push_yes_state:
4995         /* push a state that backtracks on success */
4996         st->u.yes.prev_yes_state = yes_state;
4997         yes_state = st;
4998         /* FALL THROUGH */
4999       push_state:
5000         /* push a new regex state, then continue at scan  */
5001         {
5002             regmatch_state *newst;
5003
5004             DEBUG_STACK_r({
5005                 regmatch_state *cur = st;
5006                 regmatch_state *curyes = yes_state;
5007                 int curd = depth;
5008                 regmatch_slab *slab = PL_regmatch_slab;
5009                 for (;curd > -1;cur--,curd--) {
5010                     if (cur < SLAB_FIRST(slab)) {
5011                         slab = slab->prev;
5012                         cur = SLAB_LAST(slab);
5013                     }
5014                     PerlIO_printf(Perl_error_log, "%*s#%-3d %-10s %s\n",
5015                         REPORT_CODE_OFF + 2 + depth * 2,"",
5016                         curd, PL_reg_name[cur->resume_state],
5017                         (curyes == cur) ? "yes" : ""
5018                     );
5019                     if (curyes == cur)
5020                         curyes = cur->u.yes.prev_yes_state;
5021                 }
5022             } else
5023                 DEBUG_STATE_pp("push")
5024             );
5025             depth++;
5026             st->locinput = locinput;
5027             newst = st+1;
5028             if (newst >  SLAB_LAST(PL_regmatch_slab))
5029                 newst = S_push_slab(aTHX);
5030             PL_regmatch_state = newst;
5031
5032             locinput = PL_reginput;
5033             nextchr = UCHARAT(locinput);
5034             st = newst;
5035             continue;
5036             /* NOTREACHED */
5037         }
5038     }
5039
5040     /*
5041     * We get here only if there's trouble -- normally "case END" is
5042     * the terminating point.
5043     */
5044     Perl_croak(aTHX_ "corrupted regexp pointers");
5045     /*NOTREACHED*/
5046     sayNO;
5047
5048 yes:
5049     if (yes_state) {
5050         /* we have successfully completed a subexpression, but we must now
5051          * pop to the state marked by yes_state and continue from there */
5052         assert(st != yes_state);
5053 #ifdef DEBUGGING
5054         while (st != yes_state) {
5055             st--;
5056             if (st < SLAB_FIRST(PL_regmatch_slab)) {
5057                 PL_regmatch_slab = PL_regmatch_slab->prev;
5058                 st = SLAB_LAST(PL_regmatch_slab);
5059             }
5060             DEBUG_STATE_r({
5061                 if (no_final) {
5062                     DEBUG_STATE_pp("pop (no final)");
5063                 } else {
5064                     DEBUG_STATE_pp("pop (yes)");
5065                 }
5066             });
5067             depth--;
5068         }
5069 #else
5070         while (yes_state < SLAB_FIRST(PL_regmatch_slab)
5071             || yes_state > SLAB_LAST(PL_regmatch_slab))
5072         {
5073             /* not in this slab, pop slab */
5074             depth -= (st - SLAB_FIRST(PL_regmatch_slab) + 1);
5075             PL_regmatch_slab = PL_regmatch_slab->prev;
5076             st = SLAB_LAST(PL_regmatch_slab);
5077         }
5078         depth -= (st - yes_state);
5079 #endif
5080         st = yes_state;
5081         yes_state = st->u.yes.prev_yes_state;
5082         PL_regmatch_state = st;
5083
5084         if (no_final) {
5085             locinput= st->locinput;
5086             nextchr = UCHARAT(locinput);
5087         }
5088         state_num = st->resume_state + no_final;
5089         goto reenter_switch;
5090     }
5091
5092     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
5093                           PL_colors[4], PL_colors[5]));
5094
5095     result = 1;
5096     goto final_exit;
5097
5098 no:
5099     DEBUG_EXECUTE_r(
5100         PerlIO_printf(Perl_debug_log,
5101             "%*s  %sfailed...%s\n",
5102             REPORT_CODE_OFF+depth*2, "",
5103             PL_colors[4], PL_colors[5])
5104         );
5105
5106 no_silent:
5107     if (no_final) {
5108         if (yes_state) {
5109             goto yes;
5110         } else {
5111             goto final_exit;
5112         }
5113     }
5114     if (depth) {
5115         /* there's a previous state to backtrack to */
5116         st--;
5117         if (st < SLAB_FIRST(PL_regmatch_slab)) {
5118             PL_regmatch_slab = PL_regmatch_slab->prev;
5119             st = SLAB_LAST(PL_regmatch_slab);
5120         }
5121         PL_regmatch_state = st;
5122         locinput= st->locinput;
5123         nextchr = UCHARAT(locinput);
5124
5125         DEBUG_STATE_pp("pop");
5126         depth--;
5127         if (yes_state == st)
5128             yes_state = st->u.yes.prev_yes_state;
5129
5130         state_num = st->resume_state + 1; /* failure = success + 1 */
5131         goto reenter_switch;
5132     }
5133     result = 0;
5134
5135   final_exit:
5136     if (rex->intflags & PREGf_VERBARG_SEEN) {
5137         SV *sv_err = get_sv("REGERROR", 1);
5138         SV *sv_mrk = get_sv("REGMARK", 1);
5139         if (result) {
5140             sv_commit = &PL_sv_no;
5141             if (!sv_yes_mark)
5142                 sv_yes_mark = &PL_sv_yes;
5143         } else {
5144             if (!sv_commit)
5145                 sv_commit = &PL_sv_yes;
5146             sv_yes_mark = &PL_sv_no;
5147         }
5148         sv_setsv(sv_err, sv_commit);
5149         sv_setsv(sv_mrk, sv_yes_mark);
5150     }
5151     /* restore original high-water mark */
5152     PL_regmatch_slab  = orig_slab;
5153     PL_regmatch_state = orig_state;
5154
5155     /* free all slabs above current one */
5156     if (orig_slab->next) {
5157         regmatch_slab *sl = orig_slab->next;
5158         orig_slab->next = NULL;
5159         while (sl) {
5160             regmatch_slab * const osl = sl;
5161             sl = sl->next;
5162             Safefree(osl);
5163         }
5164     }
5165
5166     return result;
5167 }
5168
5169 /*
5170  - regrepeat - repeatedly match something simple, report how many
5171  */
5172 /*
5173  * [This routine now assumes that it will only match on things of length 1.
5174  * That was true before, but now we assume scan - reginput is the count,
5175  * rather than incrementing count on every character.  [Er, except utf8.]]
5176  */
5177 STATIC I32
5178 S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
5179 {
5180     dVAR;
5181     register char *scan;
5182     register I32 c;
5183     register char *loceol = PL_regeol;
5184     register I32 hardcount = 0;
5185     register bool do_utf8 = PL_reg_match_utf8;
5186 #ifndef DEBUGGING
5187     PERL_UNUSED_ARG(depth);
5188 #endif
5189
5190     scan = PL_reginput;
5191     if (max == REG_INFTY)
5192         max = I32_MAX;
5193     else if (max < loceol - scan)
5194         loceol = scan + max;
5195     switch (OP(p)) {
5196     case REG_ANY:
5197         if (do_utf8) {
5198             loceol = PL_regeol;
5199             while (scan < loceol && hardcount < max && *scan != '\n') {
5200                 scan += UTF8SKIP(scan);
5201                 hardcount++;
5202             }
5203         } else {
5204             while (scan < loceol && *scan != '\n')
5205                 scan++;
5206         }
5207         break;
5208     case SANY:
5209         if (do_utf8) {
5210             loceol = PL_regeol;
5211             while (scan < loceol && hardcount < max) {
5212                 scan += UTF8SKIP(scan);
5213                 hardcount++;
5214             }
5215         }
5216         else
5217             scan = loceol;
5218         break;
5219     case CANY:
5220         scan = loceol;
5221         break;
5222     case EXACT:         /* length of string is 1 */
5223         c = (U8)*STRING(p);
5224         while (scan < loceol && UCHARAT(scan) == c)
5225             scan++;
5226         break;
5227     case EXACTF:        /* length of string is 1 */
5228         c = (U8)*STRING(p);
5229         while (scan < loceol &&
5230                (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
5231             scan++;
5232         break;
5233     case EXACTFL:       /* length of string is 1 */
5234         PL_reg_flags |= RF_tainted;
5235         c = (U8)*STRING(p);
5236         while (scan < loceol &&
5237                (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
5238             scan++;
5239         break;
5240     case ANYOF:
5241         if (do_utf8) {
5242             loceol = PL_regeol;
5243             while (hardcount < max && scan < loceol &&
5244                    reginclass(prog, p, (U8*)scan, 0, do_utf8)) {
5245                 scan += UTF8SKIP(scan);
5246                 hardcount++;
5247             }
5248         } else {
5249             while (scan < loceol && REGINCLASS(prog, p, (U8*)scan))
5250                 scan++;
5251         }
5252         break;
5253     case ALNUM:
5254         if (do_utf8) {
5255             loceol = PL_regeol;
5256             LOAD_UTF8_CHARCLASS_ALNUM();
5257             while (hardcount < max && scan < loceol &&
5258                    swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
5259                 scan += UTF8SKIP(scan);
5260                 hardcount++;
5261             }
5262         } else {
5263             while (scan < loceol && isALNUM(*scan))
5264                 scan++;
5265         }
5266         break;
5267     case ALNUML:
5268         PL_reg_flags |= RF_tainted;
5269         if (do_utf8) {
5270             loceol = PL_regeol;
5271             while (hardcount < max && scan < loceol &&
5272                    isALNUM_LC_utf8((U8*)scan)) {
5273                 scan += UTF8SKIP(scan);
5274                 hardcount++;
5275             }
5276         } else {
5277             while (scan < loceol && isALNUM_LC(*scan))
5278                 scan++;
5279         }
5280         break;
5281     case NALNUM:
5282         if (do_utf8) {
5283             loceol = PL_regeol;
5284             LOAD_UTF8_CHARCLASS_ALNUM();
5285             while (hardcount < max && scan < loceol &&
5286                    !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
5287                 scan += UTF8SKIP(scan);
5288                 hardcount++;
5289             }
5290         } else {
5291             while (scan < loceol && !isALNUM(*scan))
5292                 scan++;
5293         }
5294         break;
5295     case NALNUML:
5296         PL_reg_flags |= RF_tainted;
5297         if (do_utf8) {
5298             loceol = PL_regeol;
5299             while (hardcount < max && scan < loceol &&
5300                    !isALNUM_LC_utf8((U8*)scan)) {
5301                 scan += UTF8SKIP(scan);
5302                 hardcount++;
5303             }
5304         } else {
5305             while (scan < loceol && !isALNUM_LC(*scan))
5306                 scan++;
5307         }
5308         break;
5309     case SPACE:
5310         if (do_utf8) {
5311             loceol = PL_regeol;
5312             LOAD_UTF8_CHARCLASS_SPACE();
5313             while (hardcount < max && scan < loceol &&
5314                    (*scan == ' ' ||
5315                     swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
5316                 scan += UTF8SKIP(scan);
5317                 hardcount++;
5318             }
5319         } else {
5320             while (scan < loceol && isSPACE(*scan))
5321                 scan++;
5322         }
5323         break;
5324     case SPACEL:
5325         PL_reg_flags |= RF_tainted;
5326         if (do_utf8) {
5327             loceol = PL_regeol;
5328             while (hardcount < max && scan < loceol &&
5329                    (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
5330                 scan += UTF8SKIP(scan);
5331                 hardcount++;
5332             }
5333         } else {
5334             while (scan < loceol && isSPACE_LC(*scan))
5335                 scan++;
5336         }
5337         break;
5338     case NSPACE:
5339         if (do_utf8) {
5340             loceol = PL_regeol;
5341             LOAD_UTF8_CHARCLASS_SPACE();
5342             while (hardcount < max && scan < loceol &&
5343                    !(*scan == ' ' ||
5344                      swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
5345                 scan += UTF8SKIP(scan);
5346                 hardcount++;
5347             }
5348         } else {
5349             while (scan < loceol && !isSPACE(*scan))
5350                 scan++;
5351             break;
5352         }
5353     case NSPACEL:
5354         PL_reg_flags |= RF_tainted;
5355         if (do_utf8) {
5356             loceol = PL_regeol;
5357             while (hardcount < max && scan < loceol &&
5358                    !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
5359                 scan += UTF8SKIP(scan);
5360                 hardcount++;
5361             }
5362         } else {
5363             while (scan < loceol && !isSPACE_LC(*scan))
5364                 scan++;
5365         }
5366         break;
5367     case DIGIT:
5368         if (do_utf8) {
5369             loceol = PL_regeol;
5370             LOAD_UTF8_CHARCLASS_DIGIT();
5371             while (hardcount < max && scan < loceol &&
5372                    swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
5373                 scan += UTF8SKIP(scan);
5374                 hardcount++;
5375             }
5376         } else {
5377             while (scan < loceol && isDIGIT(*scan))
5378                 scan++;
5379         }
5380         break;
5381     case NDIGIT:
5382         if (do_utf8) {
5383             loceol = PL_regeol;
5384             LOAD_UTF8_CHARCLASS_DIGIT();
5385             while (hardcount < max && scan < loceol &&
5386                    !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
5387                 scan += UTF8SKIP(scan);
5388                 hardcount++;
5389             }
5390         } else {
5391             while (scan < loceol && !isDIGIT(*scan))
5392                 scan++;
5393         }
5394         break;
5395     default:            /* Called on something of 0 width. */
5396         break;          /* So match right here or not at all. */
5397     }
5398
5399     if (hardcount)
5400         c = hardcount;
5401     else
5402         c = scan - PL_reginput;
5403     PL_reginput = scan;
5404
5405     DEBUG_r({
5406         GET_RE_DEBUG_FLAGS_DECL;
5407         DEBUG_EXECUTE_r({
5408             SV * const prop = sv_newmortal();
5409             regprop(prog, prop, p);
5410             PerlIO_printf(Perl_debug_log,
5411                         "%*s  %s can match %"IVdf" times out of %"IVdf"...\n",
5412                         REPORT_CODE_OFF + depth*2, "", SvPVX_const(prop),(IV)c,(IV)max);
5413         });
5414     });
5415
5416     return(c);
5417 }
5418
5419
5420 #if !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION)
5421 /*
5422 - regclass_swash - prepare the utf8 swash
5423 */
5424
5425 SV *
5426 Perl_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bool doinit, SV** listsvp, SV **altsvp)
5427 {
5428     dVAR;
5429     SV *sw  = NULL;
5430     SV *si  = NULL;
5431     SV *alt = NULL;
5432     RXi_GET_DECL(prog,progi);
5433     const struct reg_data * const data = prog ? progi->data : NULL;
5434
5435     if (data && data->count) {
5436         const U32 n = ARG(node);
5437
5438         if (data->what[n] == 's') {
5439             SV * const rv = (SV*)data->data[n];
5440             AV * const av = (AV*)SvRV((SV*)rv);
5441             SV **const ary = AvARRAY(av);
5442             SV **a, **b;
5443
5444             /* See the end of regcomp.c:S_regclass() for
5445              * documentation of these array elements. */
5446
5447             si = *ary;
5448             a  = SvROK(ary[1]) ? &ary[1] : 0;
5449             b  = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : 0;
5450
5451             if (a)
5452                 sw = *a;
5453             else if (si && doinit) {
5454                 sw = swash_init("utf8", "", si, 1, 0);
5455                 (void)av_store(av, 1, sw);
5456             }
5457             if (b)
5458                 alt = *b;
5459         }
5460     }
5461
5462     if (listsvp)
5463         *listsvp = si;
5464     if (altsvp)
5465         *altsvp  = alt;
5466
5467     return sw;
5468 }
5469 #endif
5470
5471 /*
5472  - reginclass - determine if a character falls into a character class
5473
5474   The n is the ANYOF regnode, the p is the target string, lenp
5475   is pointer to the maximum length of how far to go in the p
5476   (if the lenp is zero, UTF8SKIP(p) is used),
5477   do_utf8 tells whether the target string is in UTF-8.
5478
5479  */
5480
5481 STATIC bool
5482 S_reginclass(pTHX_ const regexp *prog, register const regnode *n, register const U8* p, STRLEN* lenp, register bool do_utf8)
5483 {
5484     dVAR;
5485     const char flags = ANYOF_FLAGS(n);
5486     bool match = FALSE;
5487     UV c = *p;
5488     STRLEN len = 0;
5489     STRLEN plen;
5490
5491     if (do_utf8 && !UTF8_IS_INVARIANT(c)) {
5492         c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &len,
5493                 (UTF8_ALLOW_DEFAULT & UTF8_ALLOW_ANYUV) | UTF8_CHECK_ONLY);
5494                 /* see [perl #37836] for UTF8_ALLOW_ANYUV */
5495         if (len == (STRLEN)-1)
5496             Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
5497     }
5498
5499     plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
5500     if (do_utf8 || (flags & ANYOF_UNICODE)) {
5501         if (lenp)
5502             *lenp = 0;
5503         if (do_utf8 && !ANYOF_RUNTIME(n)) {
5504             if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
5505                 match = TRUE;
5506         }
5507         if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
5508             match = TRUE;
5509         if (!match) {
5510             AV *av;
5511             SV * const sw = regclass_swash(prog, n, TRUE, 0, (SV**)&av);
5512
5513             if (sw) {
5514                 if (swash_fetch(sw, p, do_utf8))
5515                     match = TRUE;
5516                 else if (flags & ANYOF_FOLD) {
5517                     if (!match && lenp && av) {
5518                         I32 i;
5519                         for (i = 0; i <= av_len(av); i++) {
5520                             SV* const sv = *av_fetch(av, i, FALSE);
5521                             STRLEN len;
5522                             const char * const s = SvPV_const(sv, len);
5523
5524                             if (len <= plen && memEQ(s, (char*)p, len)) {
5525                                 *lenp = len;
5526                                 match = TRUE;
5527                                 break;
5528                             }
5529                         }
5530                     }
5531                     if (!match) {
5532                         U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
5533                         STRLEN tmplen;
5534
5535                         to_utf8_fold(p, tmpbuf, &tmplen);
5536                         if (swash_fetch(sw, tmpbuf, do_utf8))
5537                             match = TRUE;
5538                     }
5539                 }
5540             }
5541         }
5542         if (match && lenp && *lenp == 0)
5543             *lenp = UNISKIP(NATIVE_TO_UNI(c));
5544     }
5545     if (!match && c < 256) {
5546         if (ANYOF_BITMAP_TEST(n, c))
5547             match = TRUE;
5548         else if (flags & ANYOF_FOLD) {
5549             U8 f;
5550
5551             if (flags & ANYOF_LOCALE) {
5552                 PL_reg_flags |= RF_tainted;
5553                 f = PL_fold_locale[c];
5554             }
5555             else
5556                 f = PL_fold[c];
5557             if (f != c && ANYOF_BITMAP_TEST(n, f))
5558                 match = TRUE;
5559         }
5560
5561         if (!match && (flags & ANYOF_CLASS)) {
5562             PL_reg_flags |= RF_tainted;
5563             if (
5564                 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM)   &&  isALNUM_LC(c))  ||
5565                 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM)  && !isALNUM_LC(c))  ||
5566                 (ANYOF_CLASS_TEST(n, ANYOF_SPACE)   &&  isSPACE_LC(c))  ||
5567                 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE)  && !isSPACE_LC(c))  ||
5568                 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT)   &&  isDIGIT_LC(c))  ||
5569                 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT)  && !isDIGIT_LC(c))  ||
5570                 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC)  &&  isALNUMC_LC(c)) ||
5571                 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
5572                 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA)   &&  isALPHA_LC(c))  ||
5573                 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA)  && !isALPHA_LC(c))  ||
5574                 (ANYOF_CLASS_TEST(n, ANYOF_ASCII)   &&  isASCII(c))     ||
5575                 (ANYOF_CLASS_TEST(n, ANYOF_NASCII)  && !isASCII(c))     ||
5576                 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL)   &&  isCNTRL_LC(c))  ||
5577                 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL)  && !isCNTRL_LC(c))  ||
5578                 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH)   &&  isGRAPH_LC(c))  ||
5579                 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH)  && !isGRAPH_LC(c))  ||
5580                 (ANYOF_CLASS_TEST(n, ANYOF_LOWER)   &&  isLOWER_LC(c))  ||
5581                 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER)  && !isLOWER_LC(c))  ||
5582                 (ANYOF_CLASS_TEST(n, ANYOF_PRINT)   &&  isPRINT_LC(c))  ||
5583                 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT)  && !isPRINT_LC(c))  ||
5584                 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT)   &&  isPUNCT_LC(c))  ||
5585                 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT)  && !isPUNCT_LC(c))  ||
5586                 (ANYOF_CLASS_TEST(n, ANYOF_UPPER)   &&  isUPPER_LC(c))  ||
5587                 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER)  && !isUPPER_LC(c))  ||
5588                 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT)  &&  isXDIGIT(c))    ||
5589                 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c))    ||
5590                 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC)  &&  isPSXSPC(c))    ||
5591                 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c))    ||
5592                 (ANYOF_CLASS_TEST(n, ANYOF_BLANK)   &&  isBLANK(c))     ||
5593                 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK)  && !isBLANK(c))
5594                 ) /* How's that for a conditional? */
5595             {
5596                 match = TRUE;
5597             }
5598         }
5599     }
5600
5601     return (flags & ANYOF_INVERT) ? !match : match;
5602 }
5603
5604 STATIC U8 *
5605 S_reghop3(U8 *s, I32 off, const U8* lim)
5606 {
5607     dVAR;
5608     if (off >= 0) {
5609         while (off-- && s < lim) {
5610             /* XXX could check well-formedness here */
5611             s += UTF8SKIP(s);
5612         }
5613     }
5614     else {
5615         while (off++ && s > lim) {
5616             s--;
5617             if (UTF8_IS_CONTINUED(*s)) {
5618                 while (s > lim && UTF8_IS_CONTINUATION(*s))
5619                     s--;
5620             }
5621             /* XXX could check well-formedness here */
5622         }
5623     }
5624     return s;
5625 }
5626
5627 #ifdef XXX_dmq
5628 /* there are a bunch of places where we use two reghop3's that should
5629    be replaced with this routine. but since thats not done yet
5630    we ifdef it out - dmq
5631 */
5632 STATIC U8 *
5633 S_reghop4(U8 *s, I32 off, const U8* llim, const U8* rlim)
5634 {
5635     dVAR;
5636     if (off >= 0) {
5637         while (off-- && s < rlim) {
5638             /* XXX could check well-formedness here */
5639             s += UTF8SKIP(s);
5640         }
5641     }
5642     else {
5643         while (off++ && s > llim) {
5644             s--;
5645             if (UTF8_IS_CONTINUED(*s)) {
5646                 while (s > llim && UTF8_IS_CONTINUATION(*s))
5647                     s--;
5648             }
5649             /* XXX could check well-formedness here */
5650         }
5651     }
5652     return s;
5653 }
5654 #endif
5655
5656 STATIC U8 *
5657 S_reghopmaybe3(U8* s, I32 off, const U8* lim)
5658 {
5659     dVAR;
5660     if (off >= 0) {
5661         while (off-- && s < lim) {
5662             /* XXX could check well-formedness here */
5663             s += UTF8SKIP(s);
5664         }
5665         if (off >= 0)
5666             return NULL;
5667     }
5668     else {
5669         while (off++ && s > lim) {
5670             s--;
5671             if (UTF8_IS_CONTINUED(*s)) {
5672                 while (s > lim && UTF8_IS_CONTINUATION(*s))
5673                     s--;
5674             }
5675             /* XXX could check well-formedness here */
5676         }
5677         if (off <= 0)
5678             return NULL;
5679     }
5680     return s;
5681 }
5682
5683 static void
5684 restore_pos(pTHX_ void *arg)
5685 {
5686     dVAR;
5687     regexp * const rex = (regexp *)arg;
5688     if (PL_reg_eval_set) {
5689         if (PL_reg_oldsaved) {
5690             rex->subbeg = PL_reg_oldsaved;
5691             rex->sublen = PL_reg_oldsavedlen;
5692 #ifdef PERL_OLD_COPY_ON_WRITE
5693             rex->saved_copy = PL_nrs;
5694 #endif
5695             RX_MATCH_COPIED_on(rex);
5696         }
5697         PL_reg_magic->mg_len = PL_reg_oldpos;
5698         PL_reg_eval_set = 0;
5699         PL_curpm = PL_reg_oldcurpm;
5700     }
5701 }
5702
5703 STATIC void
5704 S_to_utf8_substr(pTHX_ register regexp *prog)
5705 {
5706     int i = 1;
5707     do {
5708         if (prog->substrs->data[i].substr
5709             && !prog->substrs->data[i].utf8_substr) {
5710             SV* const sv = newSVsv(prog->substrs->data[i].substr);
5711             prog->substrs->data[i].utf8_substr = sv;
5712             sv_utf8_upgrade(sv);
5713             if (SvVALID(prog->substrs->data[i].substr)) {
5714                 const U8 flags = BmFLAGS(prog->substrs->data[i].substr);
5715                 if (flags & FBMcf_TAIL) {
5716                     /* Trim the trailing \n that fbm_compile added last
5717                        time.  */
5718                     SvCUR_set(sv, SvCUR(sv) - 1);
5719                     /* Whilst this makes the SV technically "invalid" (as its
5720                        buffer is no longer followed by "\0") when fbm_compile()
5721                        adds the "\n" back, a "\0" is restored.  */
5722                 }
5723                 fbm_compile(sv, flags);
5724             }
5725             if (prog->substrs->data[i].substr == prog->check_substr)
5726                 prog->check_utf8 = sv;
5727         }
5728     } while (i--);
5729 }
5730
5731 STATIC void
5732 S_to_byte_substr(pTHX_ register regexp *prog)
5733 {
5734     dVAR;
5735     int i = 1;
5736     do {
5737         if (prog->substrs->data[i].utf8_substr
5738             && !prog->substrs->data[i].substr) {
5739             SV* sv = newSVsv(prog->substrs->data[i].utf8_substr);
5740             if (sv_utf8_downgrade(sv, TRUE)) {
5741                 if (SvVALID(prog->substrs->data[i].utf8_substr)) {
5742                     const U8 flags
5743                         = BmFLAGS(prog->substrs->data[i].utf8_substr);
5744                     if (flags & FBMcf_TAIL) {
5745                         /* Trim the trailing \n that fbm_compile added last
5746                            time.  */
5747                         SvCUR_set(sv, SvCUR(sv) - 1);
5748                     }
5749                     fbm_compile(sv, flags);
5750                 }
5751             } else {
5752                 SvREFCNT_dec(sv);
5753                 sv = &PL_sv_undef;
5754             }
5755             prog->substrs->data[i].substr = sv;
5756             if (prog->substrs->data[i].utf8_substr == prog->check_utf8)
5757                 prog->check_substr = sv;
5758         }
5759     } while (i--);
5760 }
5761
5762 /*
5763  * Local variables:
5764  * c-indentation-style: bsd
5765  * c-basic-offset: 4
5766  * indent-tabs-mode: t
5767  * End:
5768  *
5769  * ex: set ts=8 sts=4 sw=4 noet:
5770  */