regexec.c

   1 /*    regexec.c
   2  */
   3
   4 /*
   5  *      One Ring to rule them all, One Ring to find them
   6  &
   7  *     [p.v of _The Lord of the Rings_, opening poem]
   8  *     [p.50 of _The Lord of the Rings_, I/iii: "The Shadow of the Past"]
   9  *     [p.254 of _The Lord of the Rings_, II/ii: "The Council of Elrond"]
  10  */
  11
  12 /* This file contains functions for executing a regular expression.  See
  13  * also regcomp.c which funnily enough, contains functions for compiling
  14  * a regular expression.
  15  *
  16  * This file is also copied at build time to ext/re/re_exec.c, where
  17  * it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
  18  * This causes the main functions to be compiled under new names and with
  19  * debugging support added, which makes "use re 'debug'" work.
  20  */
  21
  22 /* NOTE: this is derived from Henry Spencer's regexp code, and should not
  23  * confused with the original package (see point 3 below).  Thanks, Henry!
  24  */
  25
  26 /* Additional note: this code is very heavily munged from Henry's version
  27  * in places.  In some spots I've traded clarity for efficiency, so don't
  28  * blame Henry for some of the lack of readability.
  29  */
  30
  31 /* The names of the functions have been changed from regcomp and
  32  * regexec to  pregcomp and pregexec in order to avoid conflicts
  33  * with the POSIX routines of the same names.
  34 */
  35
  36 #ifdef PERL_EXT_RE_BUILD
  37 #include "re_top.h"
  38 #endif
  39
  40 /*
  41  * pregcomp and pregexec -- regsub and regerror are not used in perl
  42  *
  43  *      Copyright (c) 1986 by University of Toronto.
  44  *      Written by Henry Spencer.  Not derived from licensed software.
  45  *
  46  *      Permission is granted to anyone to use this software for any
  47  *      purpose on any computer system, and to redistribute it freely,
  48  *      subject to the following restrictions:
  49  *
  50  *      1. The author is not responsible for the consequences of use of
  51  *              this software, no matter how awful, even if they arise
  52  *              from defects in it.
  53  *
  54  *      2. The origin of this software must not be misrepresented, either
  55  *              by explicit claim or by omission.
  56  *
  57  *      3. Altered versions must be plainly marked as such, and must not
  58  *              be misrepresented as being the original software.
  59  *
  60  ****    Alterations to Henry's code are...
  61  ****
  62  ****    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
  63  ****    2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
  64  ****    by Larry Wall and others
  65  ****
  66  ****    You may distribute under the terms of either the GNU General Public
  67  ****    License or the Artistic License, as specified in the README file.
  68  *
  69  * Beware that some of this code is subtly aware of the way operator
  70  * precedence is structured in regular expressions.  Serious changes in
  71  * regular-expression syntax might require a total rethink.
  72  */
  73 #include "EXTERN.h"
  74 #define PERL_IN_REGEXEC_C
  75 #include "perl.h"
  76
  77 #ifdef PERL_IN_XSUB_RE
  78 #  include "re_comp.h"
  79 #else
  80 #  include "regcomp.h"
  81 #endif
  82
  83 #define RF_tainted      1               /* tainted information used? */
  84 #define RF_warned       2               /* warned about big count? */
  85
  86 #define RF_utf8         8               /* Pattern contains multibyte chars? */
  87
  88 #define UTF ((PL_reg_flags & RF_utf8) != 0)
  89
  90 #define RS_init         1               /* eval environment created */
  91 #define RS_set          2               /* replsv value is set */
  92
  93 #ifndef STATIC
  94 #define STATIC  static
  95 #endif
  96
  97 #define REGINCLASS(prog,p,c)  (ANYOF_FLAGS(p) ? reginclass(prog,p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
  98
  99 /*
 100  * Forwards.
 101  */
 102
 103 #define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
 104 #define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
 105
 106 #define HOPc(pos,off) \
 107         (char *)(PL_reg_match_utf8 \
 108             ? reghop3((U8*)pos, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr)) \
 109             : (U8*)(pos + off))
 110 #define HOPBACKc(pos, off) \
 111         (char*)(PL_reg_match_utf8\
 112             ? reghopmaybe3((U8*)pos, -off, (U8*)PL_bostr) \
 113             : (pos - off >= PL_bostr)           \
 114                 ? (U8*)pos - off                \
 115                 : NULL)
 116
 117 #define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)(pos), off, (U8*)(lim)) : (U8*)(pos + off))
 118 #define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
 119
 120 #define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
 121     if (!CAT2(PL_utf8_,class)) { bool ok; ENTER; save_re_context(); ok=CAT2(is_utf8_,class)((const U8*)str); assert(ok); LEAVE; } } STMT_END
 122 #define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS(alnum,"a")
 123 #define LOAD_UTF8_CHARCLASS_DIGIT() LOAD_UTF8_CHARCLASS(digit,"0")
 124 #define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
 125 #define LOAD_UTF8_CHARCLASS_MARK()  LOAD_UTF8_CHARCLASS(mark, "\xcd\x86")
 126
 127 /* TODO: Combine JUMPABLE and HAS_TEXT to cache OP(rn) */
 128
 129 /* for use after a quantifier and before an EXACT-like node -- japhy */
 130 /* it would be nice to rework regcomp.sym to generate this stuff. sigh */
 131 #define JUMPABLE(rn) (      \
 132     OP(rn) == OPEN ||       \
 133     (OP(rn) == CLOSE && (!cur_eval || cur_eval->u.eval.close_paren != ARG(rn))) || \
 134     OP(rn) == EVAL ||   \
 135     OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
 136     OP(rn) == PLUS || OP(rn) == MINMOD || \
 137     OP(rn) == KEEPS || (PL_regkind[OP(rn)] == VERB) || \
 138     (PL_regkind[OP(rn)] == CURLY && ARG1(rn) > 0) \
 139 )
 140 #define IS_EXACT(rn) (PL_regkind[OP(rn)] == EXACT)
 141
 142 #define HAS_TEXT(rn) ( IS_EXACT(rn) || PL_regkind[OP(rn)] == REF )
 143
 144 #if 0
 145 /* Currently these are only used when PL_regkind[OP(rn)] == EXACT so
 146    we don't need this definition. */
 147 #define IS_TEXT(rn)   ( OP(rn)==EXACT   || OP(rn)==REF   || OP(rn)==NREF   )
 148 #define IS_TEXTF(rn)  ( OP(rn)==EXACTF  || OP(rn)==REFF  || OP(rn)==NREFF  )
 149 #define IS_TEXTFL(rn) ( OP(rn)==EXACTFL || OP(rn)==REFFL || OP(rn)==NREFFL )
 150
 151 #else
 152 /* ... so we use this as its faster. */
 153 #define IS_TEXT(rn)   ( OP(rn)==EXACT   )
 154 #define IS_TEXTF(rn)  ( OP(rn)==EXACTF  )
 155 #define IS_TEXTFL(rn) ( OP(rn)==EXACTFL )
 156
 157 #endif
 158
 159 /*
 160   Search for mandatory following text node; for lookahead, the text must
 161   follow but for lookbehind (rn->flags != 0) we skip to the next step.
 162 */
 163 #define FIND_NEXT_IMPT(rn) STMT_START { \
 164     while (JUMPABLE(rn)) { \
 165         const OPCODE type = OP(rn); \
 166         if (type == SUSPEND || PL_regkind[type] == CURLY) \
 167             rn = NEXTOPER(NEXTOPER(rn)); \
 168         else if (type == PLUS) \
 169             rn = NEXTOPER(rn); \
 170         else if (type == IFMATCH) \
 171             rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
 172         else rn += NEXT_OFF(rn); \
 173     } \
 174 } STMT_END
 175
 176
 177 static void restore_pos(pTHX_ void *arg);
 178
 179 STATIC CHECKPOINT
 180 S_regcppush(pTHX_ I32 parenfloor)
 181 {
 182     dVAR;
 183     const int retval = PL_savestack_ix;
 184 #define REGCP_PAREN_ELEMS 4
 185     const int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
 186     int p;
 187     GET_RE_DEBUG_FLAGS_DECL;
 188
 189     if (paren_elems_to_push < 0)
 190         Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
 191
 192 #define REGCP_OTHER_ELEMS 7
 193     SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
 194
 195     for (p = PL_regsize; p > parenfloor; p--) {
 196 /* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
 197         SSPUSHINT(PL_regoffs[p].end);
 198         SSPUSHINT(PL_regoffs[p].start);
 199         SSPUSHPTR(PL_reg_start_tmp[p]);
 200         SSPUSHINT(p);
 201         DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log,
 202           "     saving \\%"UVuf" %"IVdf"(%"IVdf")..%"IVdf"\n",
 203                       (UV)p, (IV)PL_regoffs[p].start,
 204                       (IV)(PL_reg_start_tmp[p] - PL_bostr),
 205                       (IV)PL_regoffs[p].end
 206         ));
 207     }
 208 /* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
 209     SSPUSHPTR(PL_regoffs);
 210     SSPUSHINT(PL_regsize);
 211     SSPUSHINT(*PL_reglastparen);
 212     SSPUSHINT(*PL_reglastcloseparen);
 213     SSPUSHPTR(PL_reginput);
 214 #define REGCP_FRAME_ELEMS 2
 215 /* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
 216  * are needed for the regexp context stack bookkeeping. */
 217     SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
 218     SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
 219
 220     return retval;
 221 }
 222
 223 /* These are needed since we do not localize EVAL nodes: */
 224 #define REGCP_SET(cp)                                           \
 225     DEBUG_STATE_r(                                              \
 226             PerlIO_printf(Perl_debug_log,                       \
 227                 "  Setting an EVAL scope, savestack=%"IVdf"\n", \
 228                 (IV)PL_savestack_ix));                          \
 229     cp = PL_savestack_ix
 230
 231 #define REGCP_UNWIND(cp)                                        \
 232     DEBUG_STATE_r(                                              \
 233         if (cp != PL_savestack_ix)                              \
 234             PerlIO_printf(Perl_debug_log,                       \
 235                 "  Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
 236                 (IV)(cp), (IV)PL_savestack_ix));                \
 237     regcpblow(cp)
 238
 239 STATIC char *
 240 S_regcppop(pTHX_ const regexp *rex)
 241 {
 242     dVAR;
 243     U32 i;
 244     char *input;
 245     GET_RE_DEBUG_FLAGS_DECL;
 246
 247     PERL_ARGS_ASSERT_REGCPPOP;
 248
 249     /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
 250     i = SSPOPINT;
 251     assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
 252     i = SSPOPINT; /* Parentheses elements to pop. */
 253     input = (char *) SSPOPPTR;
 254     *PL_reglastcloseparen = SSPOPINT;
 255     *PL_reglastparen = SSPOPINT;
 256     PL_regsize = SSPOPINT;
 257     PL_regoffs=(regexp_paren_pair *) SSPOPPTR;
 258
 259
 260     /* Now restore the parentheses context. */
 261     for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
 262          i > 0; i -= REGCP_PAREN_ELEMS) {
 263         I32 tmps;
 264         U32 paren = (U32)SSPOPINT;
 265         PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
 266         PL_regoffs[paren].start = SSPOPINT;
 267         tmps = SSPOPINT;
 268         if (paren <= *PL_reglastparen)
 269             PL_regoffs[paren].end = tmps;
 270         DEBUG_BUFFERS_r(
 271             PerlIO_printf(Perl_debug_log,
 272                           "     restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
 273                           (UV)paren, (IV)PL_regoffs[paren].start,
 274                           (IV)(PL_reg_start_tmp[paren] - PL_bostr),
 275                           (IV)PL_regoffs[paren].end,
 276                           (paren > *PL_reglastparen ? "(no)" : ""));
 277         );
 278     }
 279     DEBUG_BUFFERS_r(
 280         if (*PL_reglastparen + 1 <= rex->nparens) {
 281             PerlIO_printf(Perl_debug_log,
 282                           "     restoring \\%"IVdf"..\\%"IVdf" to undef\n",
 283                           (IV)(*PL_reglastparen + 1), (IV)rex->nparens);
 284         }
 285     );
 286 #if 1
 287     /* It would seem that the similar code in regtry()
 288      * already takes care of this, and in fact it is in
 289      * a better location to since this code can #if 0-ed out
 290      * but the code in regtry() is needed or otherwise tests
 291      * requiring null fields (pat.t#187 and split.t#{13,14}
 292      * (as of patchlevel 7877)  will fail.  Then again,
 293      * this code seems to be necessary or otherwise
 294      * this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
 295      * --jhi updated by dapm */
 296     for (i = *PL_reglastparen + 1; i <= rex->nparens; i++) {
 297         if (i > PL_regsize)
 298             PL_regoffs[i].start = -1;
 299         PL_regoffs[i].end = -1;
 300     }
 301 #endif
 302     return input;
 303 }
 304
 305 #define regcpblow(cp) LEAVE_SCOPE(cp)   /* Ignores regcppush()ed data. */
 306
 307 /*
 308  * pregexec and friends
 309  */
 310
 311 #ifndef PERL_IN_XSUB_RE
 312 /*
 313  - pregexec - match a regexp against a string
 314  */
 315 I32
 316 Perl_pregexec(pTHX_ REGEXP * const prog, char* stringarg, register char *strend,
 317          char *strbeg, I32 minend, SV *screamer, U32 nosave)
 318 /* strend: pointer to null at end of string */
 319 /* strbeg: real beginning of string */
 320 /* minend: end of match must be >=minend after stringarg. */
 321 /* nosave: For optimizations. */
 322 {
 323     PERL_ARGS_ASSERT_PREGEXEC;
 324
 325     return
 326         regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
 327                       nosave ? 0 : REXEC_COPY_STR);
 328 }
 329 #endif
 330
 331 /*
 332  * Need to implement the following flags for reg_anch:
 333  *
 334  * USE_INTUIT_NOML              - Useful to call re_intuit_start() first
 335  * USE_INTUIT_ML
 336  * INTUIT_AUTORITATIVE_NOML     - Can trust a positive answer
 337  * INTUIT_AUTORITATIVE_ML
 338  * INTUIT_ONCE_NOML             - Intuit can match in one location only.
 339  * INTUIT_ONCE_ML
 340  *
 341  * Another flag for this function: SECOND_TIME (so that float substrs
 342  * with giant delta may be not rechecked).
 343  */
 344
 345 /* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
 346
 347 /* If SCREAM, then SvPVX_const(sv) should be compatible with strpos and strend.
 348    Otherwise, only SvCUR(sv) is used to get strbeg. */
 349
 350 /* XXXX We assume that strpos is strbeg unless sv. */
 351
 352 /* XXXX Some places assume that there is a fixed substring.
 353         An update may be needed if optimizer marks as "INTUITable"
 354         RExen without fixed substrings.  Similarly, it is assumed that
 355         lengths of all the strings are no more than minlen, thus they
 356         cannot come from lookahead.
 357         (Or minlen should take into account lookahead.)
 358   NOTE: Some of this comment is not correct. minlen does now take account
 359   of lookahead/behind. Further research is required. -- demerphq
 360
 361 */
 362
 363 /* A failure to find a constant substring means that there is no need to make
 364    an expensive call to REx engine, thus we celebrate a failure.  Similarly,
 365    finding a substring too deep into the string means that less calls to
 366    regtry() should be needed.
 367
 368    REx compiler's optimizer found 4 possible hints:
 369         a) Anchored substring;
 370         b) Fixed substring;
 371         c) Whether we are anchored (beginning-of-line or \G);
 372         d) First node (of those at offset 0) which may distingush positions;
 373    We use a)b)d) and multiline-part of c), and try to find a position in the
 374    string which does not contradict any of them.
 375  */
 376
 377 /* Most of decisions we do here should have been done at compile time.
 378    The nodes of the REx which we used for the search should have been
 379    deleted from the finite automaton. */
 380
 381 char *
 382 Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV *sv, char *strpos,
 383                      char *strend, const U32 flags, re_scream_pos_data *data)
 384 {
 385     dVAR;
 386     struct regexp *const prog = (struct regexp *)SvANY(rx);
 387     register I32 start_shift = 0;
 388     /* Should be nonnegative! */
 389     register I32 end_shift   = 0;
 390     register char *s;
 391     register SV *check;
 392     char *strbeg;
 393     char *t;
 394     const bool do_utf8 = (sv && SvUTF8(sv)) ? 1 : 0; /* if no sv we have to assume bytes */
 395     I32 ml_anch;
 396     register char *other_last = NULL;   /* other substr checked before this */
 397     char *check_at = NULL;              /* check substr found at this pos */
 398     const I32 multiline = prog->extflags & RXf_PMf_MULTILINE;
 399     RXi_GET_DECL(prog,progi);
 400 #ifdef DEBUGGING
 401     const char * const i_strpos = strpos;
 402 #endif
 403     GET_RE_DEBUG_FLAGS_DECL;
 404
 405     PERL_ARGS_ASSERT_RE_INTUIT_START;
 406
 407     RX_MATCH_UTF8_set(rx,do_utf8);
 408
 409     if (RX_UTF8(rx)) {
 410         PL_reg_flags |= RF_utf8;
 411     }
 412     DEBUG_EXECUTE_r(
 413         debug_start_match(rx, do_utf8, strpos, strend,
 414             sv ? "Guessing start of match in sv for"
 415                : "Guessing start of match in string for");
 416               );
 417
 418     /* CHR_DIST() would be more correct here but it makes things slow. */
 419     if (prog->minlen > strend - strpos) {
 420         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 421                               "String too short... [re_intuit_start]\n"));
 422         goto fail;
 423     }
 424
 425     strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
 426     PL_regeol = strend;
 427     if (do_utf8) {
 428         if (!prog->check_utf8 && prog->check_substr)
 429             to_utf8_substr(prog);
 430         check = prog->check_utf8;
 431     } else {
 432         if (!prog->check_substr && prog->check_utf8)
 433             to_byte_substr(prog);
 434         check = prog->check_substr;
 435     }
 436     if (check == &PL_sv_undef) {
 437         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 438                 "Non-utf8 string cannot match utf8 check string\n"));
 439         goto fail;
 440     }
 441     if (prog->extflags & RXf_ANCH) {    /* Match at beg-of-str or after \n */
 442         ml_anch = !( (prog->extflags & RXf_ANCH_SINGLE)
 443                      || ( (prog->extflags & RXf_ANCH_BOL)
 444                           && !multiline ) );    /* Check after \n? */
 445
 446         if (!ml_anch) {
 447           if ( !(prog->extflags & RXf_ANCH_GPOS) /* Checked by the caller */
 448                 && !(prog->intflags & PREGf_IMPLICIT) /* not a real BOL */
 449                /* SvCUR is not set on references: SvRV and SvPVX_const overlap */
 450                && sv && !SvROK(sv)
 451                && (strpos != strbeg)) {
 452               DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
 453               goto fail;
 454           }
 455           if (prog->check_offset_min == prog->check_offset_max &&
 456               !(prog->extflags & RXf_CANY_SEEN)) {
 457             /* Substring at constant offset from beg-of-str... */
 458             I32 slen;
 459
 460             s = HOP3c(strpos, prog->check_offset_min, strend);
 461
 462             if (SvTAIL(check)) {
 463                 slen = SvCUR(check);    /* >= 1 */
 464
 465                 if ( strend - s > slen || strend - s < slen - 1
 466                      || (strend - s == slen && strend[-1] != '\n')) {
 467                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
 468                     goto fail_finish;
 469                 }
 470                 /* Now should match s[0..slen-2] */
 471                 slen--;
 472                 if (slen && (*SvPVX_const(check) != *s
 473                              || (slen > 1
 474                                  && memNE(SvPVX_const(check), s, slen)))) {
 475                   report_neq:
 476                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
 477                     goto fail_finish;
 478                 }
 479             }
 480             else if (*SvPVX_const(check) != *s
 481                      || ((slen = SvCUR(check)) > 1
 482                          && memNE(SvPVX_const(check), s, slen)))
 483                 goto report_neq;
 484             check_at = s;
 485             goto success_at_start;
 486           }
 487         }
 488         /* Match is anchored, but substr is not anchored wrt beg-of-str. */
 489         s = strpos;
 490         start_shift = prog->check_offset_min; /* okay to underestimate on CC */
 491         end_shift = prog->check_end_shift;
 492
 493         if (!ml_anch) {
 494             const I32 end = prog->check_offset_max + CHR_SVLEN(check)
 495                                          - (SvTAIL(check) != 0);
 496             const I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
 497
 498             if (end_shift < eshift)
 499                 end_shift = eshift;
 500         }
 501     }
 502     else {                              /* Can match at random position */
 503         ml_anch = 0;
 504         s = strpos;
 505         start_shift = prog->check_offset_min;  /* okay to underestimate on CC */
 506         end_shift = prog->check_end_shift;
 507
 508         /* end shift should be non negative here */
 509     }
 510
 511 #ifdef QDEBUGGING       /* 7/99: reports of failure (with the older version) */
 512     if (end_shift < 0)
 513         Perl_croak(aTHX_ "panic: end_shift: %"IVdf" pattern:\n%s\n ",
 514                    (IV)end_shift, RX_PRECOMP(prog));
 515 #endif
 516
 517   restart:
 518     /* Find a possible match in the region s..strend by looking for
 519        the "check" substring in the region corrected by start/end_shift. */
 520
 521     {
 522         I32 srch_start_shift = start_shift;
 523         I32 srch_end_shift = end_shift;
 524         if (srch_start_shift < 0 && strbeg - s > srch_start_shift) {
 525             srch_end_shift -= ((strbeg - s) - srch_start_shift);
 526             srch_start_shift = strbeg - s;
 527         }
 528     DEBUG_OPTIMISE_MORE_r({
 529         PerlIO_printf(Perl_debug_log, "Check offset min: %"IVdf" Start shift: %"IVdf" End shift %"IVdf" Real End Shift: %"IVdf"\n",
 530             (IV)prog->check_offset_min,
 531             (IV)srch_start_shift,
 532             (IV)srch_end_shift,
 533             (IV)prog->check_end_shift);
 534     });
 535
 536     if (flags & REXEC_SCREAM) {
 537         I32 p = -1;                     /* Internal iterator of scream. */
 538         I32 * const pp = data ? data->scream_pos : &p;
 539
 540         if (PL_screamfirst[BmRARE(check)] >= 0
 541             || ( BmRARE(check) == '\n'
 542                  && (BmPREVIOUS(check) == SvCUR(check) - 1)
 543                  && SvTAIL(check) ))
 544             s = screaminstr(sv, check,
 545                             srch_start_shift + (s - strbeg), srch_end_shift, pp, 0);
 546         else
 547             goto fail_finish;
 548         /* we may be pointing at the wrong string */
 549         if (s && RXp_MATCH_COPIED(prog))
 550             s = strbeg + (s - SvPVX_const(sv));
 551         if (data)
 552             *data->scream_olds = s;
 553     }
 554     else {
 555         U8* start_point;
 556         U8* end_point;
 557         if (prog->extflags & RXf_CANY_SEEN) {
 558             start_point= (U8*)(s + srch_start_shift);
 559             end_point= (U8*)(strend - srch_end_shift);
 560         } else {
 561             start_point= HOP3(s, srch_start_shift, srch_start_shift < 0 ? strbeg : strend);
 562             end_point= HOP3(strend, -srch_end_shift, strbeg);
 563         }
 564         DEBUG_OPTIMISE_MORE_r({
 565             PerlIO_printf(Perl_debug_log, "fbm_instr len=%d str=<%.*s>\n",
 566                 (int)(end_point - start_point),
 567                 (int)(end_point - start_point) > 20 ? 20 : (int)(end_point - start_point),
 568                 start_point);
 569         });
 570
 571         s = fbm_instr( start_point, end_point,
 572                       check, multiline ? FBMrf_MULTILINE : 0);
 573     }
 574     }
 575     /* Update the count-of-usability, remove useless subpatterns,
 576         unshift s.  */
 577
 578     DEBUG_EXECUTE_r({
 579         RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
 580             SvPVX_const(check), RE_SV_DUMPLEN(check), 30);
 581         PerlIO_printf(Perl_debug_log, "%s %s substr %s%s%s",
 582                           (s ? "Found" : "Did not find"),
 583             (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)
 584                 ? "anchored" : "floating"),
 585             quoted,
 586             RE_SV_TAIL(check),
 587             (s ? " at offset " : "...\n") );
 588     });
 589
 590     if (!s)
 591         goto fail_finish;
 592     /* Finish the diagnostic message */
 593     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
 594
 595     /* XXX dmq: first branch is for positive lookbehind...
 596        Our check string is offset from the beginning of the pattern.
 597        So we need to do any stclass tests offset forward from that
 598        point. I think. :-(
 599      */
 600
 601
 602
 603     check_at=s;
 604
 605
 606     /* Got a candidate.  Check MBOL anchoring, and the *other* substr.
 607        Start with the other substr.
 608        XXXX no SCREAM optimization yet - and a very coarse implementation
 609        XXXX /ttx+/ results in anchored="ttx", floating="x".  floating will
 610                 *always* match.  Probably should be marked during compile...
 611        Probably it is right to do no SCREAM here...
 612      */
 613
 614     if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8)
 615                 : (prog->float_substr && prog->anchored_substr))
 616     {
 617         /* Take into account the "other" substring. */
 618         /* XXXX May be hopelessly wrong for UTF... */
 619         if (!other_last)
 620             other_last = strpos;
 621         if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
 622           do_other_anchored:
 623             {
 624                 char * const last = HOP3c(s, -start_shift, strbeg);
 625                 char *last1, *last2;
 626                 char * const saved_s = s;
 627                 SV* must;
 628
 629                 t = s - prog->check_offset_max;
 630                 if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
 631                     && (!do_utf8
 632                         || ((t = (char*)reghopmaybe3((U8*)s, -(prog->check_offset_max), (U8*)strpos))
 633                             && t > strpos)))
 634                     NOOP;
 635                 else
 636                     t = strpos;
 637                 t = HOP3c(t, prog->anchored_offset, strend);
 638                 if (t < other_last)     /* These positions already checked */
 639                     t = other_last;
 640                 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
 641                 if (last < last1)
 642                     last1 = last;
 643                 /* XXXX It is not documented what units *_offsets are in.
 644                    We assume bytes, but this is clearly wrong.
 645                    Meaning this code needs to be carefully reviewed for errors.
 646                    dmq.
 647                   */
 648
 649                 /* On end-of-str: see comment below. */
 650                 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
 651                 if (must == &PL_sv_undef) {
 652                     s = (char*)NULL;
 653                     DEBUG_r(must = prog->anchored_utf8);        /* for debug */
 654                 }
 655                 else
 656                     s = fbm_instr(
 657                         (unsigned char*)t,
 658                         HOP3(HOP3(last1, prog->anchored_offset, strend)
 659                                 + SvCUR(must), -(SvTAIL(must)!=0), strbeg),
 660                         must,
 661                         multiline ? FBMrf_MULTILINE : 0
 662                     );
 663                 DEBUG_EXECUTE_r({
 664                     RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
 665                         SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
 666                     PerlIO_printf(Perl_debug_log, "%s anchored substr %s%s",
 667                         (s ? "Found" : "Contradicts"),
 668                         quoted, RE_SV_TAIL(must));
 669                 });
 670
 671
 672                 if (!s) {
 673                     if (last1 >= last2) {
 674                         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 675                                                 ", giving up...\n"));
 676                         goto fail_finish;
 677                     }
 678                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 679                         ", trying floating at offset %ld...\n",
 680                         (long)(HOP3c(saved_s, 1, strend) - i_strpos)));
 681                     other_last = HOP3c(last1, prog->anchored_offset+1, strend);
 682                     s = HOP3c(last, 1, strend);
 683                     goto restart;
 684                 }
 685                 else {
 686                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
 687                           (long)(s - i_strpos)));
 688                     t = HOP3c(s, -prog->anchored_offset, strbeg);
 689                     other_last = HOP3c(s, 1, strend);
 690                     s = saved_s;
 691                     if (t == strpos)
 692                         goto try_at_start;
 693                     goto try_at_offset;
 694                 }
 695             }
 696         }
 697         else {          /* Take into account the floating substring. */
 698             char *last, *last1;
 699             char * const saved_s = s;
 700             SV* must;
 701
 702             t = HOP3c(s, -start_shift, strbeg);
 703             last1 = last =
 704                 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
 705             if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
 706                 last = HOP3c(t, prog->float_max_offset, strend);
 707             s = HOP3c(t, prog->float_min_offset, strend);
 708             if (s < other_last)
 709                 s = other_last;
 710  /* XXXX It is not documented what units *_offsets are in.  Assume bytes.  */
 711             must = do_utf8 ? prog->float_utf8 : prog->float_substr;
 712             /* fbm_instr() takes into account exact value of end-of-str
 713                if the check is SvTAIL(ed).  Since false positives are OK,
 714                and end-of-str is not later than strend we are OK. */
 715             if (must == &PL_sv_undef) {
 716                 s = (char*)NULL;
 717                 DEBUG_r(must = prog->float_utf8);       /* for debug message */
 718             }
 719             else
 720                 s = fbm_instr((unsigned char*)s,
 721                               (unsigned char*)last + SvCUR(must)
 722                                   - (SvTAIL(must)!=0),
 723                               must, multiline ? FBMrf_MULTILINE : 0);
 724             DEBUG_EXECUTE_r({
 725                 RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
 726                     SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
 727                 PerlIO_printf(Perl_debug_log, "%s floating substr %s%s",
 728                     (s ? "Found" : "Contradicts"),
 729                     quoted, RE_SV_TAIL(must));
 730             });
 731             if (!s) {
 732                 if (last1 == last) {
 733                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 734                                             ", giving up...\n"));
 735                     goto fail_finish;
 736                 }
 737                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 738                     ", trying anchored starting at offset %ld...\n",
 739                     (long)(saved_s + 1 - i_strpos)));
 740                 other_last = last;
 741                 s = HOP3c(t, 1, strend);
 742                 goto restart;
 743             }
 744             else {
 745                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
 746                       (long)(s - i_strpos)));
 747                 other_last = s; /* Fix this later. --Hugo */
 748                 s = saved_s;
 749                 if (t == strpos)
 750                     goto try_at_start;
 751                 goto try_at_offset;
 752             }
 753         }
 754     }
 755
 756
 757     t= (char*)HOP3( s, -prog->check_offset_max, (prog->check_offset_max<0) ? strend : strpos);
 758
 759     DEBUG_OPTIMISE_MORE_r(
 760         PerlIO_printf(Perl_debug_log,
 761             "Check offset min:%"IVdf" max:%"IVdf" S:%"IVdf" t:%"IVdf" D:%"IVdf" end:%"IVdf"\n",
 762             (IV)prog->check_offset_min,
 763             (IV)prog->check_offset_max,
 764             (IV)(s-strpos),
 765             (IV)(t-strpos),
 766             (IV)(t-s),
 767             (IV)(strend-strpos)
 768         )
 769     );
 770
 771     if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
 772         && (!do_utf8
 773             || ((t = (char*)reghopmaybe3((U8*)s, -prog->check_offset_max, (U8*) ((prog->check_offset_max<0) ? strend : strpos)))
 774                  && t > strpos)))
 775     {
 776         /* Fixed substring is found far enough so that the match
 777            cannot start at strpos. */
 778       try_at_offset:
 779         if (ml_anch && t[-1] != '\n') {
 780             /* Eventually fbm_*() should handle this, but often
 781                anchored_offset is not 0, so this check will not be wasted. */
 782             /* XXXX In the code below we prefer to look for "^" even in
 783                presence of anchored substrings.  And we search even
 784                beyond the found float position.  These pessimizations
 785                are historical artefacts only.  */
 786           find_anchor:
 787             while (t < strend - prog->minlen) {
 788                 if (*t == '\n') {
 789                     if (t < check_at - prog->check_offset_min) {
 790                         if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
 791                             /* Since we moved from the found position,
 792                                we definitely contradict the found anchored
 793                                substr.  Due to the above check we do not
 794                                contradict "check" substr.
 795                                Thus we can arrive here only if check substr
 796                                is float.  Redo checking for "other"=="fixed".
 797                              */
 798                             strpos = t + 1;
 799                             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
 800                                 PL_colors[0], PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
 801                             goto do_other_anchored;
 802                         }
 803                         /* We don't contradict the found floating substring. */
 804                         /* XXXX Why not check for STCLASS? */
 805                         s = t + 1;
 806                         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
 807                             PL_colors[0], PL_colors[1], (long)(s - i_strpos)));
 808                         goto set_useful;
 809                     }
 810                     /* Position contradicts check-string */
 811                     /* XXXX probably better to look for check-string
 812                        than for "\n", so one should lower the limit for t? */
 813                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
 814                         PL_colors[0], PL_colors[1], (long)(t + 1 - i_strpos)));
 815                     other_last = strpos = s = t + 1;
 816                     goto restart;
 817                 }
 818                 t++;
 819             }
 820             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
 821                         PL_colors[0], PL_colors[1]));
 822             goto fail_finish;
 823         }
 824         else {
 825             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
 826                         PL_colors[0], PL_colors[1]));
 827         }
 828         s = t;
 829       set_useful:
 830         ++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr);    /* hooray/5 */
 831     }
 832     else {
 833         /* The found string does not prohibit matching at strpos,
 834            - no optimization of calling REx engine can be performed,
 835            unless it was an MBOL and we are not after MBOL,
 836            or a future STCLASS check will fail this. */
 837       try_at_start:
 838         /* Even in this situation we may use MBOL flag if strpos is offset
 839            wrt the start of the string. */
 840         if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
 841             && (strpos != strbeg) && strpos[-1] != '\n'
 842             /* May be due to an implicit anchor of m{.*foo}  */
 843             && !(prog->intflags & PREGf_IMPLICIT))
 844         {
 845             t = strpos;
 846             goto find_anchor;
 847         }
 848         DEBUG_EXECUTE_r( if (ml_anch)
 849             PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
 850                           (long)(strpos - i_strpos), PL_colors[0], PL_colors[1]);
 851         );
 852       success_at_start:
 853         if (!(prog->intflags & PREGf_NAUGHTY)   /* XXXX If strpos moved? */
 854             && (do_utf8 ? (
 855                 prog->check_utf8                /* Could be deleted already */
 856                 && --BmUSEFUL(prog->check_utf8) < 0
 857                 && (prog->check_utf8 == prog->float_utf8)
 858             ) : (
 859                 prog->check_substr              /* Could be deleted already */
 860                 && --BmUSEFUL(prog->check_substr) < 0
 861                 && (prog->check_substr == prog->float_substr)
 862             )))
 863         {
 864             /* If flags & SOMETHING - do not do it many times on the same match */
 865             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
 866             SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
 867             if (do_utf8 ? prog->check_substr : prog->check_utf8)
 868                 SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
 869             prog->check_substr = prog->check_utf8 = NULL;       /* disable */
 870             prog->float_substr = prog->float_utf8 = NULL;       /* clear */
 871             check = NULL;                       /* abort */
 872             s = strpos;
 873             /* XXXX This is a remnant of the old implementation.  It
 874                     looks wasteful, since now INTUIT can use many
 875                     other heuristics. */
 876             prog->extflags &= ~RXf_USE_INTUIT;
 877         }
 878         else
 879             s = strpos;
 880     }
 881
 882     /* Last resort... */
 883     /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
 884     /* trie stclasses are too expensive to use here, we are better off to
 885        leave it to regmatch itself */
 886     if (progi->regstclass && PL_regkind[OP(progi->regstclass)]!=TRIE) {
 887         /* minlen == 0 is possible if regstclass is \b or \B,
 888            and the fixed substr is ''$.
 889            Since minlen is already taken into account, s+1 is before strend;
 890            accidentally, minlen >= 1 guaranties no false positives at s + 1
 891            even for \b or \B.  But (minlen? 1 : 0) below assumes that
 892            regstclass does not come from lookahead...  */
 893         /* If regstclass takes bytelength more than 1: If charlength==1, OK.
 894            This leaves EXACTF only, which is dealt with in find_byclass().  */
 895         const U8* const str = (U8*)STRING(progi->regstclass);
 896         const int cl_l = (PL_regkind[OP(progi->regstclass)] == EXACT
 897                     ? CHR_DIST(str+STR_LEN(progi->regstclass), str)
 898                     : 1);
 899         char * endpos;
 900         if (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
 901             endpos= HOP3c(s, (prog->minlen ? cl_l : 0), strend);
 902         else if (prog->float_substr || prog->float_utf8)
 903             endpos= HOP3c(HOP3c(check_at, -start_shift, strbeg), cl_l, strend);
 904         else
 905             endpos= strend;
 906
 907         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "start_shift: %"IVdf" check_at: %"IVdf" s: %"IVdf" endpos: %"IVdf"\n",
 908                                       (IV)start_shift, (IV)(check_at - strbeg), (IV)(s - strbeg), (IV)(endpos - strbeg)));
 909
 910         t = s;
 911         s = find_byclass(prog, progi->regstclass, s, endpos, NULL);
 912         if (!s) {
 913 #ifdef DEBUGGING
 914             const char *what = NULL;
 915 #endif
 916             if (endpos == strend) {
 917                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 918                                 "Could not match STCLASS...\n") );
 919                 goto fail;
 920             }
 921             DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 922                                    "This position contradicts STCLASS...\n") );
 923             if ((prog->extflags & RXf_ANCH) && !ml_anch)
 924                 goto fail;
 925             /* Contradict one of substrings */
 926             if (prog->anchored_substr || prog->anchored_utf8) {
 927                 if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
 928                     DEBUG_EXECUTE_r( what = "anchored" );
 929                   hop_and_restart:
 930                     s = HOP3c(t, 1, strend);
 931                     if (s + start_shift + end_shift > strend) {
 932                         /* XXXX Should be taken into account earlier? */
 933                         DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 934                                                "Could not match STCLASS...\n") );
 935                         goto fail;
 936                     }
 937                     if (!check)
 938                         goto giveup;
 939                     DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 940                                 "Looking for %s substr starting at offset %ld...\n",
 941                                  what, (long)(s + start_shift - i_strpos)) );
 942                     goto restart;
 943                 }
 944                 /* Have both, check_string is floating */
 945                 if (t + start_shift >= check_at) /* Contradicts floating=check */
 946                     goto retry_floating_check;
 947                 /* Recheck anchored substring, but not floating... */
 948                 s = check_at;
 949                 if (!check)
 950                     goto giveup;
 951                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 952                           "Looking for anchored substr starting at offset %ld...\n",
 953                           (long)(other_last - i_strpos)) );
 954                 goto do_other_anchored;
 955             }
 956             /* Another way we could have checked stclass at the
 957                current position only: */
 958             if (ml_anch) {
 959                 s = t = t + 1;
 960                 if (!check)
 961                     goto giveup;
 962                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 963                           "Looking for /%s^%s/m starting at offset %ld...\n",
 964                           PL_colors[0], PL_colors[1], (long)(t - i_strpos)) );
 965                 goto try_at_offset;
 966             }
 967             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))     /* Could have been deleted */
 968                 goto fail;
 969             /* Check is floating subtring. */
 970           retry_floating_check:
 971             t = check_at - start_shift;
 972             DEBUG_EXECUTE_r( what = "floating" );
 973             goto hop_and_restart;
 974         }
 975         if (t != s) {
 976             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 977                         "By STCLASS: moving %ld --> %ld\n",
 978                                   (long)(t - i_strpos), (long)(s - i_strpos))
 979                    );
 980         }
 981         else {
 982             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 983                                   "Does not contradict STCLASS...\n");
 984                    );
 985         }
 986     }
 987   giveup:
 988     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
 989                           PL_colors[4], (check ? "Guessed" : "Giving up"),
 990                           PL_colors[5], (long)(s - i_strpos)) );
 991     return s;
 992
 993   fail_finish:                          /* Substring not found */
 994     if (prog->check_substr || prog->check_utf8)         /* could be removed already */
 995         BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
 996   fail:
 997     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
 998                           PL_colors[4], PL_colors[5]));
 999     return NULL;
1000 }
1001
1002 #define DECL_TRIE_TYPE(scan) \
1003     const enum { trie_plain, trie_utf8, trie_utf8_fold, trie_latin_utf8_fold } \
1004                     trie_type = (scan->flags != EXACT) \
1005                               ? (do_utf8 ? trie_utf8_fold : (UTF ? trie_latin_utf8_fold : trie_plain)) \
1006                               : (do_utf8 ? trie_utf8 : trie_plain)
1007
1008 #define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len,  \
1009 uvc, charid, foldlen, foldbuf, uniflags) STMT_START {                       \
1010     UV uvc_unfolded = 0;                                                    \
1011     switch (trie_type) {                                                    \
1012     case trie_utf8_fold:                                                    \
1013         if ( foldlen>0 ) {                                                  \
1014             uvc_unfolded = uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags ); \
1015             foldlen -= len;                                                 \
1016             uscan += len;                                                   \
1017             len=0;                                                          \
1018         } else {                                                            \
1019             uvc_unfolded = uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); \
1020             uvc = to_uni_fold( uvc, foldbuf, &foldlen );                    \
1021             foldlen -= UNISKIP( uvc );                                      \
1022             uscan = foldbuf + UNISKIP( uvc );                               \
1023         }                                                                   \
1024         break;                                                              \
1025     case trie_latin_utf8_fold:                                              \
1026         if ( foldlen>0 ) {                                                  \
1027             uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags );     \
1028             foldlen -= len;                                                 \
1029             uscan += len;                                                   \
1030             len=0;                                                          \
1031         } else {                                                            \
1032             len = 1;                                                        \
1033             uvc = to_uni_fold( *(U8*)uc, foldbuf, &foldlen );               \
1034             foldlen -= UNISKIP( uvc );                                      \
1035             uscan = foldbuf + UNISKIP( uvc );                               \
1036         }                                                                   \
1037         break;                                                              \
1038     case trie_utf8:                                                         \
1039         uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );       \
1040         break;                                                              \
1041     case trie_plain:                                                        \
1042         uvc = (UV)*uc;                                                      \
1043         len = 1;                                                            \
1044     }                                                                       \
1045                                                                             \
1046     if (uvc < 256) {                                                        \
1047         charid = trie->charmap[ uvc ];                                      \
1048     }                                                                       \
1049     else {                                                                  \
1050         charid = 0;                                                         \
1051         if (widecharmap) {                                                  \
1052             SV** const svpp = hv_fetch(widecharmap,                         \
1053                         (char*)&uvc, sizeof(UV), 0);                        \
1054             if (svpp)                                                       \
1055                 charid = (U16)SvIV(*svpp);                                  \
1056         }                                                                   \
1057     }                                                                       \
1058     if (!charid && trie_type == trie_utf8_fold && !UTF) {                   \
1059         charid = trie->charmap[uvc_unfolded];                               \
1060     }                                                                       \
1061 } STMT_END
1062
1063 #define REXEC_FBC_EXACTISH_CHECK(CoNd)                 \
1064 {                                                      \
1065     char *my_strend= (char *)strend;                   \
1066     if ( (CoNd)                                        \
1067          && (ln == len ||                              \
1068              !ibcmp_utf8(s, &my_strend, 0,  do_utf8,   \
1069                         m, NULL, ln, (bool)UTF))       \
1070          && (!reginfo || regtry(reginfo, &s)) )        \
1071         goto got_it;                                   \
1072     else {                                             \
1073          U8 foldbuf[UTF8_MAXBYTES_CASE+1];             \
1074          uvchr_to_utf8(tmpbuf, c);                     \
1075          f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);  \
1076          if ( f != c                                   \
1077               && (f == c1 || f == c2)                  \
1078               && (ln == len ||                         \
1079                 !ibcmp_utf8(s, &my_strend, 0,  do_utf8,\
1080                               m, NULL, ln, (bool)UTF)) \
1081               && (!reginfo || regtry(reginfo, &s)) )   \
1082               goto got_it;                             \
1083     }                                                  \
1084 }                                                      \
1085 s += len
1086
1087 #define REXEC_FBC_EXACTISH_SCAN(CoNd)                     \
1088 STMT_START {                                              \
1089     while (s <= e) {                                      \
1090         if ( (CoNd)                                       \
1091              && (ln == 1 || !(OP(c) == EXACTF             \
1092                               ? ibcmp(s, m, ln)           \
1093                               : ibcmp_locale(s, m, ln)))  \
1094              && (!reginfo || regtry(reginfo, &s)) )        \
1095             goto got_it;                                  \
1096         s++;                                              \
1097     }                                                     \
1098 } STMT_END
1099
1100 #define REXEC_FBC_UTF8_SCAN(CoDe)                     \
1101 STMT_START {                                          \
1102     while (s + (uskip = UTF8SKIP(s)) <= strend) {     \
1103         CoDe                                          \
1104         s += uskip;                                   \
1105     }                                                 \
1106 } STMT_END
1107
1108 #define REXEC_FBC_SCAN(CoDe)                          \
1109 STMT_START {                                          \
1110     while (s < strend) {                              \
1111         CoDe                                          \
1112         s++;                                          \
1113     }                                                 \
1114 } STMT_END
1115
1116 #define REXEC_FBC_UTF8_CLASS_SCAN(CoNd)               \
1117 REXEC_FBC_UTF8_SCAN(                                  \
1118     if (CoNd) {                                       \
1119         if (tmp && (!reginfo || regtry(reginfo, &s)))  \
1120             goto got_it;                              \
1121         else                                          \
1122             tmp = doevery;                            \
1123     }                                                 \
1124     else                                              \
1125         tmp = 1;                                      \
1126 )
1127
1128 #define REXEC_FBC_CLASS_SCAN(CoNd)                    \
1129 REXEC_FBC_SCAN(                                       \
1130     if (CoNd) {                                       \
1131         if (tmp && (!reginfo || regtry(reginfo, &s)))  \
1132             goto got_it;                              \
1133         else                                          \
1134             tmp = doevery;                            \
1135     }                                                 \
1136     else                                              \
1137         tmp = 1;                                      \
1138 )
1139
1140 #define REXEC_FBC_TRYIT               \
1141 if ((!reginfo || regtry(reginfo, &s))) \
1142     goto got_it
1143
1144 #define REXEC_FBC_CSCAN(CoNdUtF8,CoNd)                         \
1145     if (do_utf8) {                                             \
1146         REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1147     }                                                          \
1148     else {                                                     \
1149         REXEC_FBC_CLASS_SCAN(CoNd);                            \
1150     }                                                          \
1151     break
1152
1153 #define REXEC_FBC_CSCAN_PRELOAD(UtFpReLoAd,CoNdUtF8,CoNd)      \
1154     if (do_utf8) {                                             \
1155         UtFpReLoAd;                                            \
1156         REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1157     }                                                          \
1158     else {                                                     \
1159         REXEC_FBC_CLASS_SCAN(CoNd);                            \
1160     }                                                          \
1161     break
1162
1163 #define REXEC_FBC_CSCAN_TAINT(CoNdUtF8,CoNd)                   \
1164     PL_reg_flags |= RF_tainted;                                \
1165     if (do_utf8) {                                             \
1166         REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1167     }                                                          \
1168     else {                                                     \
1169         REXEC_FBC_CLASS_SCAN(CoNd);                            \
1170     }                                                          \
1171     break
1172
1173 #define DUMP_EXEC_POS(li,s,doutf8) \
1174     dump_exec_pos(li,s,(PL_regeol),(PL_bostr),(PL_reg_starttry),doutf8)
1175
1176 /* We know what class REx starts with.  Try to find this position... */
1177 /* if reginfo is NULL, its a dryrun */
1178 /* annoyingly all the vars in this routine have different names from their counterparts
1179    in regmatch. /grrr */
1180
1181 STATIC char *
1182 S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
1183     const char *strend, regmatch_info *reginfo)
1184 {
1185         dVAR;
1186         const I32 doevery = (prog->intflags & PREGf_SKIP) == 0;
1187         char *m;
1188         STRLEN ln;
1189         STRLEN lnc;
1190         register STRLEN uskip;
1191         unsigned int c1;
1192         unsigned int c2;
1193         char *e;
1194         register I32 tmp = 1;   /* Scratch variable? */
1195         register const bool do_utf8 = PL_reg_match_utf8;
1196         RXi_GET_DECL(prog,progi);
1197
1198         PERL_ARGS_ASSERT_FIND_BYCLASS;
1199
1200         /* We know what class it must start with. */
1201         switch (OP(c)) {
1202         case ANYOF:
1203             if (do_utf8) {
1204                  REXEC_FBC_UTF8_CLASS_SCAN((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
1205                           !UTF8_IS_INVARIANT((U8)s[0]) ?
1206                           reginclass(prog, c, (U8*)s, 0, do_utf8) :
1207                           REGINCLASS(prog, c, (U8*)s));
1208             }
1209             else {
1210                  while (s < strend) {
1211                       STRLEN skip = 1;
1212
1213                       if (REGINCLASS(prog, c, (U8*)s) ||
1214                           (ANYOF_FOLD_SHARP_S(c, s, strend) &&
1215                            /* The assignment of 2 is intentional:
1216                             * for the folded sharp s, the skip is 2. */
1217                            (skip = SHARP_S_SKIP))) {
1218                            if (tmp && (!reginfo || regtry(reginfo, &s)))
1219                                 goto got_it;
1220                            else
1221                                 tmp = doevery;
1222                       }
1223                       else
1224                            tmp = 1;
1225                       s += skip;
1226                  }
1227             }
1228             break;
1229         case CANY:
1230             REXEC_FBC_SCAN(
1231                 if (tmp && (!reginfo || regtry(reginfo, &s)))
1232                     goto got_it;
1233                 else
1234                     tmp = doevery;
1235             );
1236             break;
1237         case EXACTF:
1238             m   = STRING(c);
1239             ln  = STR_LEN(c);   /* length to match in octets/bytes */
1240             lnc = (I32) ln;     /* length to match in characters */
1241             if (UTF) {
1242                 STRLEN ulen1, ulen2;
1243                 U8 *sm = (U8 *) m;
1244                 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
1245                 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
1246                 /* used by commented-out code below */
1247                 /*const U32 uniflags = UTF8_ALLOW_DEFAULT;*/
1248
1249                 /* XXX: Since the node will be case folded at compile
1250                    time this logic is a little odd, although im not
1251                    sure that its actually wrong. --dmq */
1252
1253                 c1 = to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
1254                 c2 = to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
1255
1256                 /* XXX: This is kinda strange. to_utf8_XYZ returns the
1257                    codepoint of the first character in the converted
1258                    form, yet originally we did the extra step.
1259                    No tests fail by commenting this code out however
1260                    so Ive left it out. -- dmq.
1261
1262                 c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXBYTES_CASE,
1263                                     0, uniflags);
1264                 c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXBYTES_CASE,
1265                                     0, uniflags);
1266                 */
1267
1268                 lnc = 0;
1269                 while (sm < ((U8 *) m + ln)) {
1270                     lnc++;
1271                     sm += UTF8SKIP(sm);
1272                 }
1273             }
1274             else {
1275                 c1 = *(U8*)m;
1276                 c2 = PL_fold[c1];
1277             }
1278             goto do_exactf;
1279         case EXACTFL:
1280             m   = STRING(c);
1281             ln  = STR_LEN(c);
1282             lnc = (I32) ln;
1283             c1 = *(U8*)m;
1284             c2 = PL_fold_locale[c1];
1285           do_exactf:
1286             e = HOP3c(strend, -((I32)lnc), s);
1287
1288             if (!reginfo && e < s)
1289                 e = s;                  /* Due to minlen logic of intuit() */
1290
1291             /* The idea in the EXACTF* cases is to first find the
1292              * first character of the EXACTF* node and then, if
1293              * necessary, case-insensitively compare the full
1294              * text of the node.  The c1 and c2 are the first
1295              * characters (though in Unicode it gets a bit
1296              * more complicated because there are more cases
1297              * than just upper and lower: one needs to use
1298              * the so-called folding case for case-insensitive
1299              * matching (called "loose matching" in Unicode).
1300              * ibcmp_utf8() will do just that. */
1301
1302             if (do_utf8 || UTF) {
1303                 UV c, f;
1304                 U8 tmpbuf [UTF8_MAXBYTES+1];
1305                 STRLEN len = 1;
1306                 STRLEN foldlen;
1307                 const U32 uniflags = UTF8_ALLOW_DEFAULT;
1308                 if (c1 == c2) {
1309                     /* Upper and lower of 1st char are equal -
1310                      * probably not a "letter". */
1311                     while (s <= e) {
1312                         if (do_utf8) {
1313                             c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1314                                            uniflags);
1315                         } else {
1316                             c = *((U8*)s);
1317                         }
1318                         REXEC_FBC_EXACTISH_CHECK(c == c1);
1319                     }
1320                 }
1321                 else {
1322                     while (s <= e) {
1323                         if (do_utf8) {
1324                             c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1325                                            uniflags);
1326                         } else {
1327                             c = *((U8*)s);
1328                         }
1329
1330                         /* Handle some of the three Greek sigmas cases.
1331                          * Note that not all the possible combinations
1332                          * are handled here: some of them are handled
1333                          * by the standard folding rules, and some of
1334                          * them (the character class or ANYOF cases)
1335                          * are handled during compiletime in
1336                          * regexec.c:S_regclass(). */
1337                         if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1338                             c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1339                             c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
1340
1341                         REXEC_FBC_EXACTISH_CHECK(c == c1 || c == c2);
1342                     }
1343                 }
1344             }
1345             else {
1346                 /* Neither pattern nor string are UTF8 */
1347                 if (c1 == c2)
1348                     REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1);
1349                 else
1350                     REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1 || *(U8*)s == c2);
1351             }
1352             break;
1353         case BOUNDL:
1354             PL_reg_flags |= RF_tainted;
1355             /* FALL THROUGH */
1356         case BOUND:
1357             if (do_utf8) {
1358                 if (s == PL_bostr)
1359                     tmp = '\n';
1360                 else {
1361                     U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1362                     tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
1363                 }
1364                 tmp = ((OP(c) == BOUND ?
1365                         isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1366                 LOAD_UTF8_CHARCLASS_ALNUM();
1367                 REXEC_FBC_UTF8_SCAN(
1368                     if (tmp == !(OP(c) == BOUND ?
1369                                  (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1370                                  isALNUM_LC_utf8((U8*)s)))
1371                     {
1372                         tmp = !tmp;
1373                         REXEC_FBC_TRYIT;
1374                 }
1375                 );
1376             }
1377             else {
1378                 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1379                 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1380                 REXEC_FBC_SCAN(
1381                     if (tmp ==
1382                         !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1383                         tmp = !tmp;
1384                         REXEC_FBC_TRYIT;
1385                 }
1386                 );
1387             }
1388             if ((!prog->minlen && tmp) && (!reginfo || regtry(reginfo, &s)))
1389                 goto got_it;
1390             break;
1391         case NBOUNDL:
1392             PL_reg_flags |= RF_tainted;
1393             /* FALL THROUGH */
1394         case NBOUND:
1395             if (do_utf8) {
1396                 if (s == PL_bostr)
1397                     tmp = '\n';
1398                 else {
1399                     U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1400                     tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
1401                 }
1402                 tmp = ((OP(c) == NBOUND ?
1403                         isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1404                 LOAD_UTF8_CHARCLASS_ALNUM();
1405                 REXEC_FBC_UTF8_SCAN(
1406                     if (tmp == !(OP(c) == NBOUND ?
1407                                  (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1408                                  isALNUM_LC_utf8((U8*)s)))
1409                         tmp = !tmp;
1410                     else REXEC_FBC_TRYIT;
1411                 );
1412             }
1413             else {
1414                 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1415                 tmp = ((OP(c) == NBOUND ?
1416                         isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1417                 REXEC_FBC_SCAN(
1418                     if (tmp ==
1419                         !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1420                         tmp = !tmp;
1421                     else REXEC_FBC_TRYIT;
1422                 );
1423             }
1424             if ((!prog->minlen && !tmp) && (!reginfo || regtry(reginfo, &s)))
1425                 goto got_it;
1426             break;
1427         case ALNUM:
1428             REXEC_FBC_CSCAN_PRELOAD(
1429                 LOAD_UTF8_CHARCLASS_ALNUM(),
1430                 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8),
1431                 isALNUM(*s)
1432             );
1433         case ALNUML:
1434             REXEC_FBC_CSCAN_TAINT(
1435                 isALNUM_LC_utf8((U8*)s),
1436                 isALNUM_LC(*s)
1437             );
1438         case NALNUM:
1439             REXEC_FBC_CSCAN_PRELOAD(
1440                 LOAD_UTF8_CHARCLASS_ALNUM(),
1441                 !swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8),
1442                 !isALNUM(*s)
1443             );
1444         case NALNUML:
1445             REXEC_FBC_CSCAN_TAINT(
1446                 !isALNUM_LC_utf8((U8*)s),
1447                 !isALNUM_LC(*s)
1448             );
1449         case SPACE:
1450             REXEC_FBC_CSCAN_PRELOAD(
1451                 LOAD_UTF8_CHARCLASS_SPACE(),
1452                 *s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8),
1453                 isSPACE(*s)
1454             );
1455         case SPACEL:
1456             REXEC_FBC_CSCAN_TAINT(
1457                 *s == ' ' || isSPACE_LC_utf8((U8*)s),
1458                 isSPACE_LC(*s)
1459             );
1460         case NSPACE:
1461             REXEC_FBC_CSCAN_PRELOAD(
1462                 LOAD_UTF8_CHARCLASS_SPACE(),
1463                 !(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)),
1464                 !isSPACE(*s)
1465             );
1466         case NSPACEL:
1467             REXEC_FBC_CSCAN_TAINT(
1468                 !(*s == ' ' || isSPACE_LC_utf8((U8*)s)),
1469                 !isSPACE_LC(*s)
1470             );
1471         case DIGIT:
1472             REXEC_FBC_CSCAN_PRELOAD(
1473                 LOAD_UTF8_CHARCLASS_DIGIT(),
1474                 swash_fetch(PL_utf8_digit,(U8*)s, do_utf8),
1475                 isDIGIT(*s)
1476             );
1477         case DIGITL:
1478             REXEC_FBC_CSCAN_TAINT(
1479                 isDIGIT_LC_utf8((U8*)s),
1480                 isDIGIT_LC(*s)
1481             );
1482         case NDIGIT:
1483             REXEC_FBC_CSCAN_PRELOAD(
1484                 LOAD_UTF8_CHARCLASS_DIGIT(),
1485                 !swash_fetch(PL_utf8_digit,(U8*)s, do_utf8),
1486                 !isDIGIT(*s)
1487             );
1488         case NDIGITL:
1489             REXEC_FBC_CSCAN_TAINT(
1490                 !isDIGIT_LC_utf8((U8*)s),
1491                 !isDIGIT_LC(*s)
1492             );
1493         case LNBREAK:
1494             REXEC_FBC_CSCAN(
1495                 is_LNBREAK_utf8(s),
1496                 is_LNBREAK_latin1(s)
1497             );
1498         case VERTWS:
1499             REXEC_FBC_CSCAN(
1500                 is_VERTWS_utf8(s),
1501                 is_VERTWS_latin1(s)
1502             );
1503         case NVERTWS:
1504             REXEC_FBC_CSCAN(
1505                 !is_VERTWS_utf8(s),
1506                 !is_VERTWS_latin1(s)
1507             );
1508         case HORIZWS:
1509             REXEC_FBC_CSCAN(
1510                 is_HORIZWS_utf8(s),
1511                 is_HORIZWS_latin1(s)
1512             );
1513         case NHORIZWS:
1514             REXEC_FBC_CSCAN(
1515                 !is_HORIZWS_utf8(s),
1516                 !is_HORIZWS_latin1(s)
1517             );
1518         case AHOCORASICKC:
1519         case AHOCORASICK:
1520             {
1521                 DECL_TRIE_TYPE(c);
1522                 /* what trie are we using right now */
1523                 reg_ac_data *aho
1524                     = (reg_ac_data*)progi->data->data[ ARG( c ) ];
1525                 reg_trie_data *trie
1526                     = (reg_trie_data*)progi->data->data[ aho->trie ];
1527                 HV *widecharmap = MUTABLE_HV(progi->data->data[ aho->trie + 1 ]);
1528
1529                 const char *last_start = strend - trie->minlen;
1530 #ifdef DEBUGGING
1531                 const char *real_start = s;
1532 #endif
1533                 STRLEN maxlen = trie->maxlen;
1534                 SV *sv_points;
1535                 U8 **points; /* map of where we were in the input string
1536                                 when reading a given char. For ASCII this
1537                                 is unnecessary overhead as the relationship
1538                                 is always 1:1, but for Unicode, especially
1539                                 case folded Unicode this is not true. */
1540                 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
1541                 U8 *bitmap=NULL;
1542
1543
1544                 GET_RE_DEBUG_FLAGS_DECL;
1545
1546                 /* We can't just allocate points here. We need to wrap it in
1547                  * an SV so it gets freed properly if there is a croak while
1548                  * running the match */
1549                 ENTER;
1550                 SAVETMPS;
1551                 sv_points=newSV(maxlen * sizeof(U8 *));
1552                 SvCUR_set(sv_points,
1553                     maxlen * sizeof(U8 *));
1554                 SvPOK_on(sv_points);
1555                 sv_2mortal(sv_points);
1556                 points=(U8**)SvPV_nolen(sv_points );
1557                 if ( trie_type != trie_utf8_fold
1558                      && (trie->bitmap || OP(c)==AHOCORASICKC) )
1559                 {
1560                     if (trie->bitmap)
1561                         bitmap=(U8*)trie->bitmap;
1562                     else
1563                         bitmap=(U8*)ANYOF_BITMAP(c);
1564                 }
1565                 /* this is the Aho-Corasick algorithm modified a touch
1566                    to include special handling for long "unknown char"
1567                    sequences. The basic idea being that we use AC as long
1568                    as we are dealing with a possible matching char, when
1569                    we encounter an unknown char (and we have not encountered
1570                    an accepting state) we scan forward until we find a legal
1571                    starting char.
1572                    AC matching is basically that of trie matching, except
1573                    that when we encounter a failing transition, we fall back
1574                    to the current states "fail state", and try the current char
1575                    again, a process we repeat until we reach the root state,
1576                    state 1, or a legal transition. If we fail on the root state
1577                    then we can either terminate if we have reached an accepting
1578                    state previously, or restart the entire process from the beginning
1579                    if we have not.
1580
1581                  */
1582                 while (s <= last_start) {
1583                     const U32 uniflags = UTF8_ALLOW_DEFAULT;
1584                     U8 *uc = (U8*)s;
1585                     U16 charid = 0;
1586                     U32 base = 1;
1587                     U32 state = 1;
1588                     UV uvc = 0;
1589                     STRLEN len = 0;
1590                     STRLEN foldlen = 0;
1591                     U8 *uscan = (U8*)NULL;
1592                     U8 *leftmost = NULL;
1593 #ifdef DEBUGGING
1594                     U32 accepted_word= 0;
1595 #endif
1596                     U32 pointpos = 0;
1597
1598                     while ( state && uc <= (U8*)strend ) {
1599                         int failed=0;
1600                         U32 word = aho->states[ state ].wordnum;
1601
1602                         if( state==1 ) {
1603                             if ( bitmap ) {
1604                                 DEBUG_TRIE_EXECUTE_r(
1605                                     if ( uc <= (U8*)last_start && !BITMAP_TEST(bitmap,*uc) ) {
1606                                         dump_exec_pos( (char *)uc, c, strend, real_start,
1607                                             (char *)uc, do_utf8 );
1608                                         PerlIO_printf( Perl_debug_log,
1609                                             " Scanning for legal start char...\n");
1610                                     }
1611                                 );
1612                                 while ( uc <= (U8*)last_start  && !BITMAP_TEST(bitmap,*uc) ) {
1613                                     uc++;
1614                                 }
1615                                 s= (char *)uc;
1616                             }
1617                             if (uc >(U8*)last_start) break;
1618                         }
1619
1620                         if ( word ) {
1621                             U8 *lpos= points[ (pointpos - trie->wordlen[word-1] ) % maxlen ];
1622                             if (!leftmost || lpos < leftmost) {
1623                                 DEBUG_r(accepted_word=word);
1624                                 leftmost= lpos;
1625                             }
1626                             if (base==0) break;
1627
1628                         }
1629                         points[pointpos++ % maxlen]= uc;
1630                         REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
1631                                              uscan, len, uvc, charid, foldlen,
1632                                              foldbuf, uniflags);
1633                         DEBUG_TRIE_EXECUTE_r({
1634                             dump_exec_pos( (char *)uc, c, strend, real_start,
1635                                 s,   do_utf8 );
1636                             PerlIO_printf(Perl_debug_log,
1637                                 " Charid:%3u CP:%4"UVxf" ",
1638                                  charid, uvc);
1639                         });
1640
1641                         do {
1642 #ifdef DEBUGGING
1643                             word = aho->states[ state ].wordnum;
1644 #endif
1645                             base = aho->states[ state ].trans.base;
1646
1647                             DEBUG_TRIE_EXECUTE_r({
1648                                 if (failed)
1649                                     dump_exec_pos( (char *)uc, c, strend, real_start,
1650                                         s,   do_utf8 );
1651                                 PerlIO_printf( Perl_debug_log,
1652                                     "%sState: %4"UVxf", word=%"UVxf,
1653                                     failed ? " Fail transition to " : "",
1654                                     (UV)state, (UV)word);
1655                             });
1656                             if ( base ) {
1657                                 U32 tmp;
1658                                 if (charid &&
1659                                      (base + charid > trie->uniquecharcount )
1660                                      && (base + charid - 1 - trie->uniquecharcount
1661                                             < trie->lasttrans)
1662                                      && trie->trans[base + charid - 1 -
1663                                             trie->uniquecharcount].check == state
1664                                      && (tmp=trie->trans[base + charid - 1 -
1665                                         trie->uniquecharcount ].next))
1666                                 {
1667                                     DEBUG_TRIE_EXECUTE_r(
1668                                         PerlIO_printf( Perl_debug_log," - legal\n"));
1669                                     state = tmp;
1670                                     break;
1671                                 }
1672                                 else {
1673                                     DEBUG_TRIE_EXECUTE_r(
1674                                         PerlIO_printf( Perl_debug_log," - fail\n"));
1675                                     failed = 1;
1676                                     state = aho->fail[state];
1677                                 }
1678                             }
1679                             else {
1680                                 /* we must be accepting here */
1681                                 DEBUG_TRIE_EXECUTE_r(
1682                                         PerlIO_printf( Perl_debug_log," - accepting\n"));
1683                                 failed = 1;
1684                                 break;
1685                             }
1686                         } while(state);
1687                         uc += len;
1688                         if (failed) {
1689                             if (leftmost)
1690                                 break;
1691                             if (!state) state = 1;
1692                         }
1693                     }
1694                     if ( aho->states[ state ].wordnum ) {
1695                         U8 *lpos = points[ (pointpos - trie->wordlen[aho->states[ state ].wordnum-1]) % maxlen ];
1696                         if (!leftmost || lpos < leftmost) {
1697                             DEBUG_r(accepted_word=aho->states[ state ].wordnum);
1698                             leftmost = lpos;
1699                         }
1700                     }
1701                     if (leftmost) {
1702                         s = (char*)leftmost;
1703                         DEBUG_TRIE_EXECUTE_r({
1704                             PerlIO_printf(
1705                                 Perl_debug_log,"Matches word #%"UVxf" at position %"IVdf". Trying full pattern...\n",
1706                                 (UV)accepted_word, (IV)(s - real_start)
1707                             );
1708                         });
1709                         if (!reginfo || regtry(reginfo, &s)) {
1710                             FREETMPS;
1711                             LEAVE;
1712                             goto got_it;
1713                         }
1714                         s = HOPc(s,1);
1715                         DEBUG_TRIE_EXECUTE_r({
1716                             PerlIO_printf( Perl_debug_log,"Pattern failed. Looking for new start point...\n");
1717                         });
1718                     } else {
1719                         DEBUG_TRIE_EXECUTE_r(
1720                             PerlIO_printf( Perl_debug_log,"No match.\n"));
1721                         break;
1722                     }
1723                 }
1724                 FREETMPS;
1725                 LEAVE;
1726             }
1727             break;
1728         default:
1729             Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1730             break;
1731         }
1732         return 0;
1733       got_it:
1734         return s;
1735 }
1736
1737
1738 /*
1739  - regexec_flags - match a regexp against a string
1740  */
1741 I32
1742 Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, register char *strend,
1743               char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1744 /* strend: pointer to null at end of string */
1745 /* strbeg: real beginning of string */
1746 /* minend: end of match must be >=minend after stringarg. */
1747 /* data: May be used for some additional optimizations.
1748          Currently its only used, with a U32 cast, for transmitting
1749          the ganch offset when doing a /g match. This will change */
1750 /* nosave: For optimizations. */
1751 {
1752     dVAR;
1753     struct regexp *const prog = (struct regexp *)SvANY(rx);
1754     /*register*/ char *s;
1755     register regnode *c;
1756     /*register*/ char *startpos = stringarg;
1757     I32 minlen;         /* must match at least this many chars */
1758     I32 dontbother = 0; /* how many characters not to try at end */
1759     I32 end_shift = 0;                  /* Same for the end. */         /* CC */
1760     I32 scream_pos = -1;                /* Internal iterator of scream. */
1761     char *scream_olds = NULL;
1762     const bool do_utf8 = (bool)DO_UTF8(sv);
1763     I32 multiline;
1764     RXi_GET_DECL(prog,progi);
1765     regmatch_info reginfo;  /* create some info to pass to regtry etc */
1766     regexp_paren_pair *swap = NULL;
1767     GET_RE_DEBUG_FLAGS_DECL;
1768
1769     PERL_ARGS_ASSERT_REGEXEC_FLAGS;
1770     PERL_UNUSED_ARG(data);
1771
1772     /* Be paranoid... */
1773     if (prog == NULL || startpos == NULL) {
1774         Perl_croak(aTHX_ "NULL regexp parameter");
1775         return 0;
1776     }
1777
1778     multiline = prog->extflags & RXf_PMf_MULTILINE;
1779     reginfo.prog = rx;   /* Yes, sorry that this is confusing.  */
1780
1781     RX_MATCH_UTF8_set(rx, do_utf8);
1782     DEBUG_EXECUTE_r(
1783         debug_start_match(rx, do_utf8, startpos, strend,
1784         "Matching");
1785     );
1786
1787     minlen = prog->minlen;
1788
1789     if (strend - startpos < (minlen+(prog->check_offset_min<0?prog->check_offset_min:0))) {
1790         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
1791                               "String too short [regexec_flags]...\n"));
1792         goto phooey;
1793     }
1794
1795
1796     /* Check validity of program. */
1797     if (UCHARAT(progi->program) != REG_MAGIC) {
1798         Perl_croak(aTHX_ "corrupted regexp program");
1799     }
1800
1801     PL_reg_flags = 0;
1802     PL_reg_eval_set = 0;
1803     PL_reg_maxiter = 0;
1804
1805     if (RX_UTF8(rx))
1806         PL_reg_flags |= RF_utf8;
1807
1808     /* Mark beginning of line for ^ and lookbehind. */
1809     reginfo.bol = startpos; /* XXX not used ??? */
1810     PL_bostr  = strbeg;
1811     reginfo.sv = sv;
1812
1813     /* Mark end of line for $ (and such) */
1814     PL_regeol = strend;
1815
1816     /* see how far we have to get to not match where we matched before */
1817     reginfo.till = startpos+minend;
1818
1819     /* If there is a "must appear" string, look for it. */
1820     s = startpos;
1821
1822     if (prog->extflags & RXf_GPOS_SEEN) { /* Need to set reginfo->ganch */
1823         MAGIC *mg;
1824         if (flags & REXEC_IGNOREPOS){   /* Means: check only at start */
1825             reginfo.ganch = startpos + prog->gofs;
1826             DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
1827               "GPOS IGNOREPOS: reginfo.ganch = startpos + %"UVxf"\n",(UV)prog->gofs));
1828         } else if (sv && SvTYPE(sv) >= SVt_PVMG
1829                   && SvMAGIC(sv)
1830                   && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1831                   && mg->mg_len >= 0) {
1832             reginfo.ganch = strbeg + mg->mg_len;        /* Defined pos() */
1833             DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
1834                 "GPOS MAGIC: reginfo.ganch = strbeg + %"IVdf"\n",(IV)mg->mg_len));
1835
1836             if (prog->extflags & RXf_ANCH_GPOS) {
1837                 if (s > reginfo.ganch)
1838                     goto phooey;
1839                 s = reginfo.ganch - prog->gofs;
1840                 DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
1841                      "GPOS ANCH_GPOS: s = ganch - %"UVxf"\n",(UV)prog->gofs));
1842                 if (s < strbeg)
1843                     goto phooey;
1844             }
1845         }
1846         else if (data) {
1847             reginfo.ganch = strbeg + PTR2UV(data);
1848             DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
1849                  "GPOS DATA: reginfo.ganch= strbeg + %"UVxf"\n",PTR2UV(data)));
1850
1851         } else {                                /* pos() not defined */
1852             reginfo.ganch = strbeg;
1853             DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
1854                  "GPOS: reginfo.ganch = strbeg\n"));
1855         }
1856     }
1857     if (PL_curpm && (PM_GETRE(PL_curpm) == rx)) {
1858         /* We have to be careful. If the previous successful match
1859            was from this regex we don't want a subsequent partially
1860            successful match to clobber the old results.
1861            So when we detect this possibility we add a swap buffer
1862            to the re, and switch the buffer each match. If we fail
1863            we switch it back, otherwise we leave it swapped.
1864         */
1865         swap = prog->offs;
1866         /* do we need a save destructor here for eval dies? */
1867         Newxz(prog->offs, (prog->nparens + 1), regexp_paren_pair);
1868     }
1869     if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL || prog->check_utf8 != NULL)) {
1870         re_scream_pos_data d;
1871
1872         d.scream_olds = &scream_olds;
1873         d.scream_pos = &scream_pos;
1874         s = re_intuit_start(rx, sv, s, strend, flags, &d);
1875         if (!s) {
1876             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
1877             goto phooey;        /* not present */
1878         }
1879     }
1880
1881
1882
1883     /* Simplest case:  anchored match need be tried only once. */
1884     /*  [unless only anchor is BOL and multiline is set] */
1885     if (prog->extflags & (RXf_ANCH & ~RXf_ANCH_GPOS)) {
1886         if (s == startpos && regtry(&reginfo, &startpos))
1887             goto got_it;
1888         else if (multiline || (prog->intflags & PREGf_IMPLICIT)
1889                  || (prog->extflags & RXf_ANCH_MBOL)) /* XXXX SBOL? */
1890         {
1891             char *end;
1892
1893             if (minlen)
1894                 dontbother = minlen - 1;
1895             end = HOP3c(strend, -dontbother, strbeg) - 1;
1896             /* for multiline we only have to try after newlines */
1897             if (prog->check_substr || prog->check_utf8) {
1898                 if (s == startpos)
1899                     goto after_try;
1900                 while (1) {
1901                     if (regtry(&reginfo, &s))
1902                         goto got_it;
1903                   after_try:
1904                     if (s > end)
1905                         goto phooey;
1906                     if (prog->extflags & RXf_USE_INTUIT) {
1907                         s = re_intuit_start(rx, sv, s + 1, strend, flags, NULL);
1908                         if (!s)
1909                             goto phooey;
1910                     }
1911                     else
1912                         s++;
1913                 }
1914             } else {
1915                 if (s > startpos)
1916                     s--;
1917                 while (s < end) {
1918                     if (*s++ == '\n') { /* don't need PL_utf8skip here */
1919                         if (regtry(&reginfo, &s))
1920                             goto got_it;
1921                     }
1922                 }
1923             }
1924         }
1925         goto phooey;
1926     } else if (RXf_GPOS_CHECK == (prog->extflags & RXf_GPOS_CHECK))
1927     {
1928         /* the warning about reginfo.ganch being used without intialization
1929            is bogus -- we set it above, when prog->extflags & RXf_GPOS_SEEN
1930            and we only enter this block when the same bit is set. */
1931         char *tmp_s = reginfo.ganch - prog->gofs;
1932
1933         if (tmp_s >= strbeg && regtry(&reginfo, &tmp_s))
1934             goto got_it;
1935         goto phooey;
1936     }
1937
1938     /* Messy cases:  unanchored match. */
1939     if ((prog->anchored_substr || prog->anchored_utf8) && prog->intflags & PREGf_SKIP) {
1940         /* we have /x+whatever/ */
1941         /* it must be a one character string (XXXX Except UTF?) */
1942         char ch;
1943 #ifdef DEBUGGING
1944         int did_match = 0;
1945 #endif
1946         if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1947             do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1948         ch = SvPVX_const(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
1949
1950         if (do_utf8) {
1951             REXEC_FBC_SCAN(
1952                 if (*s == ch) {
1953                     DEBUG_EXECUTE_r( did_match = 1 );
1954                     if (regtry(&reginfo, &s)) goto got_it;
1955                     s += UTF8SKIP(s);
1956                     while (s < strend && *s == ch)
1957                         s += UTF8SKIP(s);
1958                 }
1959             );
1960         }
1961         else {
1962             REXEC_FBC_SCAN(
1963                 if (*s == ch) {
1964                     DEBUG_EXECUTE_r( did_match = 1 );
1965                     if (regtry(&reginfo, &s)) goto got_it;
1966                     s++;
1967                     while (s < strend && *s == ch)
1968                         s++;
1969                 }
1970             );
1971         }
1972         DEBUG_EXECUTE_r(if (!did_match)
1973                 PerlIO_printf(Perl_debug_log,
1974                                   "Did not find anchored character...\n")
1975                );
1976     }
1977     else if (prog->anchored_substr != NULL
1978               || prog->anchored_utf8 != NULL
1979               || ((prog->float_substr != NULL || prog->float_utf8 != NULL)
1980                   && prog->float_max_offset < strend - s)) {
1981         SV *must;
1982         I32 back_max;
1983         I32 back_min;
1984         char *last;
1985         char *last1;            /* Last position checked before */
1986 #ifdef DEBUGGING
1987         int did_match = 0;
1988 #endif
1989         if (prog->anchored_substr || prog->anchored_utf8) {
1990             if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1991                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1992             must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
1993             back_max = back_min = prog->anchored_offset;
1994         } else {
1995             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1996                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1997             must = do_utf8 ? prog->float_utf8 : prog->float_substr;
1998             back_max = prog->float_max_offset;
1999             back_min = prog->float_min_offset;
2000         }
2001
2002
2003         if (must == &PL_sv_undef)
2004             /* could not downgrade utf8 check substring, so must fail */
2005             goto phooey;
2006
2007         if (back_min<0) {
2008             last = strend;
2009         } else {
2010             last = HOP3c(strend,        /* Cannot start after this */
2011                   -(I32)(CHR_SVLEN(must)
2012                          - (SvTAIL(must) != 0) + back_min), strbeg);
2013         }
2014         if (s > PL_bostr)
2015             last1 = HOPc(s, -1);
2016         else
2017             last1 = s - 1;      /* bogus */
2018
2019         /* XXXX check_substr already used to find "s", can optimize if
2020            check_substr==must. */
2021         scream_pos = -1;
2022         dontbother = end_shift;
2023         strend = HOPc(strend, -dontbother);
2024         while ( (s <= last) &&
2025                 ((flags & REXEC_SCREAM)
2026                  ? (s = screaminstr(sv, must, HOP3c(s, back_min, (back_min<0 ? strbeg : strend)) - strbeg,
2027                                     end_shift, &scream_pos, 0))
2028                  : (s = fbm_instr((unsigned char*)HOP3(s, back_min, (back_min<0 ? strbeg : strend)),
2029                                   (unsigned char*)strend, must,
2030                                   multiline ? FBMrf_MULTILINE : 0))) ) {
2031             /* we may be pointing at the wrong string */
2032             if ((flags & REXEC_SCREAM) && RXp_MATCH_COPIED(prog))
2033                 s = strbeg + (s - SvPVX_const(sv));
2034             DEBUG_EXECUTE_r( did_match = 1 );
2035             if (HOPc(s, -back_max) > last1) {
2036                 last1 = HOPc(s, -back_min);
2037                 s = HOPc(s, -back_max);
2038             }
2039             else {
2040                 char * const t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
2041
2042                 last1 = HOPc(s, -back_min);
2043                 s = t;
2044             }
2045             if (do_utf8) {
2046                 while (s <= last1) {
2047                     if (regtry(&reginfo, &s))
2048                         goto got_it;
2049                     s += UTF8SKIP(s);
2050                 }
2051             }
2052             else {
2053                 while (s <= last1) {
2054                     if (regtry(&reginfo, &s))
2055                         goto got_it;
2056                     s++;
2057                 }
2058             }
2059         }
2060         DEBUG_EXECUTE_r(if (!did_match) {
2061             RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
2062                 SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
2063             PerlIO_printf(Perl_debug_log, "Did not find %s substr %s%s...\n",
2064                               ((must == prog->anchored_substr || must == prog->anchored_utf8)
2065                                ? "anchored" : "floating"),
2066                 quoted, RE_SV_TAIL(must));
2067         });
2068         goto phooey;
2069     }
2070     else if ( (c = progi->regstclass) ) {
2071         if (minlen) {
2072             const OPCODE op = OP(progi->regstclass);
2073             /* don't bother with what can't match */
2074             if (PL_regkind[op] != EXACT && op != CANY && PL_regkind[op] != TRIE)
2075                 strend = HOPc(strend, -(minlen - 1));
2076         }
2077         DEBUG_EXECUTE_r({
2078             SV * const prop = sv_newmortal();
2079             regprop(prog, prop, c);
2080             {
2081                 RE_PV_QUOTED_DECL(quoted,do_utf8,PERL_DEBUG_PAD_ZERO(1),
2082                     s,strend-s,60);
2083                 PerlIO_printf(Perl_debug_log,
2084                     "Matching stclass %.*s against %s (%d chars)\n",
2085                     (int)SvCUR(prop), SvPVX_const(prop),
2086                      quoted, (int)(strend - s));
2087             }
2088         });
2089         if (find_byclass(prog, c, s, strend, &reginfo))
2090             goto got_it;
2091         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass... [regexec_flags]\n"));
2092     }
2093     else {
2094         dontbother = 0;
2095         if (prog->float_substr != NULL || prog->float_utf8 != NULL) {
2096             /* Trim the end. */
2097             char *last;
2098             SV* float_real;
2099
2100             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
2101                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
2102             float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
2103
2104             if (flags & REXEC_SCREAM) {
2105                 last = screaminstr(sv, float_real, s - strbeg,
2106                                    end_shift, &scream_pos, 1); /* last one */
2107                 if (!last)
2108                     last = scream_olds; /* Only one occurrence. */
2109                 /* we may be pointing at the wrong string */
2110                 else if (RXp_MATCH_COPIED(prog))
2111                     s = strbeg + (s - SvPVX_const(sv));
2112             }
2113             else {
2114                 STRLEN len;
2115                 const char * const little = SvPV_const(float_real, len);
2116
2117                 if (SvTAIL(float_real)) {
2118                     if (memEQ(strend - len + 1, little, len - 1))
2119                         last = strend - len + 1;
2120                     else if (!multiline)
2121                         last = memEQ(strend - len, little, len)
2122                             ? strend - len : NULL;
2123                     else
2124                         goto find_last;
2125                 } else {
2126                   find_last:
2127                     if (len)
2128                         last = rninstr(s, strend, little, little + len);
2129                     else
2130                         last = strend;  /* matching "$" */
2131                 }
2132             }
2133             if (last == NULL) {
2134                 DEBUG_EXECUTE_r(
2135                     PerlIO_printf(Perl_debug_log,
2136                         "%sCan't trim the tail, match fails (should not happen)%s\n",
2137                         PL_colors[4], PL_colors[5]));
2138                 goto phooey; /* Should not happen! */
2139             }
2140             dontbother = strend - last + prog->float_min_offset;
2141         }
2142         if (minlen && (dontbother < minlen))
2143             dontbother = minlen - 1;
2144         strend -= dontbother;              /* this one's always in bytes! */
2145         /* We don't know much -- general case. */
2146         if (do_utf8) {
2147             for (;;) {
2148                 if (regtry(&reginfo, &s))
2149                     goto got_it;
2150                 if (s >= strend)
2151                     break;
2152                 s += UTF8SKIP(s);
2153             };
2154         }
2155         else {
2156             do {
2157                 if (regtry(&reginfo, &s))
2158                     goto got_it;
2159             } while (s++ < strend);
2160         }
2161     }
2162
2163     /* Failure. */
2164     goto phooey;
2165
2166 got_it:
2167     Safefree(swap);
2168     RX_MATCH_TAINTED_set(rx, PL_reg_flags & RF_tainted);
2169
2170     if (PL_reg_eval_set)
2171         restore_pos(aTHX_ prog);
2172     if (RXp_PAREN_NAMES(prog))
2173         (void)hv_iterinit(RXp_PAREN_NAMES(prog));
2174
2175     /* make sure $`, $&, $', and $digit will work later */
2176     if ( !(flags & REXEC_NOT_FIRST) ) {
2177         RX_MATCH_COPY_FREE(rx);
2178         if (flags & REXEC_COPY_STR) {
2179             const I32 i = PL_regeol - startpos + (stringarg - strbeg);
2180 #ifdef PERL_OLD_COPY_ON_WRITE
2181             if ((SvIsCOW(sv)
2182                  || (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
2183                 if (DEBUG_C_TEST) {
2184                     PerlIO_printf(Perl_debug_log,
2185                                   "Copy on write: regexp capture, type %d\n",
2186                                   (int) SvTYPE(sv));
2187                 }
2188                 prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
2189                 prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
2190                 assert (SvPOKp(prog->saved_copy));
2191             } else
2192 #endif
2193             {
2194                 RX_MATCH_COPIED_on(rx);
2195                 s = savepvn(strbeg, i);
2196                 prog->subbeg = s;
2197             }
2198             prog->sublen = i;
2199         }
2200         else {
2201             prog->subbeg = strbeg;
2202             prog->sublen = PL_regeol - strbeg;  /* strend may have been modified */
2203         }
2204     }
2205
2206     return 1;
2207
2208 phooey:
2209     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
2210                           PL_colors[4], PL_colors[5]));
2211     if (PL_reg_eval_set)
2212         restore_pos(aTHX_ prog);
2213     if (swap) {
2214         /* we failed :-( roll it back */
2215         Safefree(prog->offs);
2216         prog->offs = swap;
2217     }
2218
2219     return 0;
2220 }
2221
2222
2223 /*
2224  - regtry - try match at specific point
2225  */
2226 STATIC I32                      /* 0 failure, 1 success */
2227 S_regtry(pTHX_ regmatch_info *reginfo, char **startpos)
2228 {
2229     dVAR;
2230     CHECKPOINT lastcp;
2231     REGEXP *const rx = reginfo->prog;
2232     regexp *const prog = (struct regexp *)SvANY(rx);
2233     RXi_GET_DECL(prog,progi);
2234     GET_RE_DEBUG_FLAGS_DECL;
2235
2236     PERL_ARGS_ASSERT_REGTRY;
2237
2238     reginfo->cutpoint=NULL;
2239
2240     if ((prog->extflags & RXf_EVAL_SEEN) && !PL_reg_eval_set) {
2241         MAGIC *mg;
2242
2243         PL_reg_eval_set = RS_init;
2244         DEBUG_EXECUTE_r(DEBUG_s(
2245             PerlIO_printf(Perl_debug_log, "  setting stack tmpbase at %"IVdf"\n",
2246                           (IV)(PL_stack_sp - PL_stack_base));
2247             ));
2248         SAVESTACK_CXPOS();
2249         cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2250         /* Otherwise OP_NEXTSTATE will free whatever on stack now.  */
2251         SAVETMPS;
2252         /* Apparently this is not needed, judging by wantarray. */
2253         /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
2254            cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2255
2256         if (reginfo->sv) {
2257             /* Make $_ available to executed code. */
2258             if (reginfo->sv != DEFSV) {
2259                 SAVE_DEFSV;
2260                 DEFSV_set(reginfo->sv);
2261             }
2262
2263             if (!(SvTYPE(reginfo->sv) >= SVt_PVMG && SvMAGIC(reginfo->sv)
2264                   && (mg = mg_find(reginfo->sv, PERL_MAGIC_regex_global)))) {
2265                 /* prepare for quick setting of pos */
2266 #ifdef PERL_OLD_COPY_ON_WRITE
2267                 if (SvIsCOW(reginfo->sv))
2268                     sv_force_normal_flags(reginfo->sv, 0);
2269 #endif
2270                 mg = sv_magicext(reginfo->sv, NULL, PERL_MAGIC_regex_global,
2271                                  &PL_vtbl_mglob, NULL, 0);
2272                 mg->mg_len = -1;
2273             }
2274             PL_reg_magic    = mg;
2275             PL_reg_oldpos   = mg->mg_len;
2276             SAVEDESTRUCTOR_X(restore_pos, prog);
2277         }
2278         if (!PL_reg_curpm) {
2279             Newxz(PL_reg_curpm, 1, PMOP);
2280 #ifdef USE_ITHREADS
2281             {
2282                 SV* const repointer = &PL_sv_undef;
2283                 /* this regexp is also owned by the new PL_reg_curpm, which
2284                    will try to free it.  */
2285                 av_push(PL_regex_padav, repointer);
2286                 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2287                 PL_regex_pad = AvARRAY(PL_regex_padav);
2288             }
2289 #endif
2290         }
2291 #ifdef USE_ITHREADS
2292         /* It seems that non-ithreads works both with and without this code.
2293            So for efficiency reasons it seems best not to have the code
2294            compiled when it is not needed.  */
2295         /* This is safe against NULLs: */
2296         ReREFCNT_dec(PM_GETRE(PL_reg_curpm));
2297         /* PM_reg_curpm owns a reference to this regexp.  */
2298         ReREFCNT_inc(rx);
2299 #endif
2300         PM_SETRE(PL_reg_curpm, rx);
2301         PL_reg_oldcurpm = PL_curpm;
2302         PL_curpm = PL_reg_curpm;
2303         if (RXp_MATCH_COPIED(prog)) {
2304             /*  Here is a serious problem: we cannot rewrite subbeg,
2305                 since it may be needed if this match fails.  Thus
2306                 $` inside (?{}) could fail... */
2307             PL_reg_oldsaved = prog->subbeg;
2308             PL_reg_oldsavedlen = prog->sublen;
2309 #ifdef PERL_OLD_COPY_ON_WRITE
2310             PL_nrs = prog->saved_copy;
2311 #endif
2312             RXp_MATCH_COPIED_off(prog);
2313         }
2314         else
2315             PL_reg_oldsaved = NULL;
2316         prog->subbeg = PL_bostr;
2317         prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2318     }
2319     DEBUG_EXECUTE_r(PL_reg_starttry = *startpos);
2320     prog->offs[0].start = *startpos - PL_bostr;
2321     PL_reginput = *startpos;
2322     PL_reglastparen = &prog->lastparen;
2323     PL_reglastcloseparen = &prog->lastcloseparen;
2324     prog->lastparen = 0;
2325     prog->lastcloseparen = 0;
2326     PL_regsize = 0;
2327     PL_regoffs = prog->offs;
2328     if (PL_reg_start_tmpl <= prog->nparens) {
2329         PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2330         if(PL_reg_start_tmp)
2331             Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2332         else
2333             Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2334     }
2335
2336     /* XXXX What this code is doing here?!!!  There should be no need
2337        to do this again and again, PL_reglastparen should take care of
2338        this!  --ilya*/
2339
2340     /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2341      * Actually, the code in regcppop() (which Ilya may be meaning by
2342      * PL_reglastparen), is not needed at all by the test suite
2343      * (op/regexp, op/pat, op/split), but that code is needed otherwise
2344      * this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
2345      * Meanwhile, this code *is* needed for the
2346      * above-mentioned test suite tests to succeed.  The common theme
2347      * on those tests seems to be returning null fields from matches.
2348      * --jhi updated by dapm */
2349 #if 1
2350     if (prog->nparens) {
2351         regexp_paren_pair *pp = PL_regoffs;
2352         register I32 i;
2353         for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
2354             ++pp;
2355             pp->start = -1;
2356             pp->end = -1;
2357         }
2358     }
2359 #endif
2360     REGCP_SET(lastcp);
2361     if (regmatch(reginfo, progi->program + 1)) {
2362         PL_regoffs[0].end = PL_reginput - PL_bostr;
2363         return 1;
2364     }
2365     if (reginfo->cutpoint)
2366         *startpos= reginfo->cutpoint;
2367     REGCP_UNWIND(lastcp);
2368     return 0;
2369 }
2370
2371
2372 #define sayYES goto yes
2373 #define sayNO goto no
2374 #define sayNO_SILENT goto no_silent
2375
2376 /* we dont use STMT_START/END here because it leads to
2377    "unreachable code" warnings, which are bogus, but distracting. */
2378 #define CACHEsayNO \
2379     if (ST.cache_mask) \
2380        PL_reg_poscache[ST.cache_offset] |= ST.cache_mask; \
2381     sayNO
2382
2383 /* this is used to determine how far from the left messages like
2384    'failed...' are printed. It should be set such that messages
2385    are inline with the regop output that created them.
2386 */
2387 #define REPORT_CODE_OFF 32
2388
2389
2390 /* Make sure there is a test for this +1 options in re_tests */
2391 #define TRIE_INITAL_ACCEPT_BUFFLEN 4;
2392
2393 #define CHRTEST_UNINIT -1001 /* c1/c2 haven't been calculated yet */
2394 #define CHRTEST_VOID   -1000 /* the c1/c2 "next char" test should be skipped */
2395
2396 #define SLAB_FIRST(s) (&(s)->states[0])
2397 #define SLAB_LAST(s)  (&(s)->states[PERL_REGMATCH_SLAB_SLOTS-1])
2398
2399 /* grab a new slab and return the first slot in it */
2400
2401 STATIC regmatch_state *
2402 S_push_slab(pTHX)
2403 {
2404 #if PERL_VERSION < 9 && !defined(PERL_CORE)
2405     dMY_CXT;
2406 #endif
2407     regmatch_slab *s = PL_regmatch_slab->next;
2408     if (!s) {
2409         Newx(s, 1, regmatch_slab);
2410         s->prev = PL_regmatch_slab;
2411         s->next = NULL;
2412         PL_regmatch_slab->next = s;
2413     }
2414     PL_regmatch_slab = s;
2415     return SLAB_FIRST(s);
2416 }
2417
2418
2419 /* push a new state then goto it */
2420
2421 #define PUSH_STATE_GOTO(state, node) \
2422     scan = node; \
2423     st->resume_state = state; \
2424     goto push_state;
2425
2426 /* push a new state with success backtracking, then goto it */
2427
2428 #define PUSH_YES_STATE_GOTO(state, node) \
2429     scan = node; \
2430     st->resume_state = state; \
2431     goto push_yes_state;
2432
2433
2434
2435 /*
2436
2437 regmatch() - main matching routine
2438
2439 This is basically one big switch statement in a loop. We execute an op,
2440 set 'next' to point the next op, and continue. If we come to a point which
2441 we may need to backtrack to on failure such as (A|B|C), we push a
2442 backtrack state onto the backtrack stack. On failure, we pop the top
2443 state, and re-enter the loop at the state indicated. If there are no more
2444 states to pop, we return failure.
2445
2446 Sometimes we also need to backtrack on success; for example /A+/, where
2447 after successfully matching one A, we need to go back and try to
2448 match another one; similarly for lookahead assertions: if the assertion
2449 completes successfully, we backtrack to the state just before the assertion
2450 and then carry on.  In these cases, the pushed state is marked as
2451 'backtrack on success too'. This marking is in fact done by a chain of
2452 pointers, each pointing to the previous 'yes' state. On success, we pop to
2453 the nearest yes state, discarding any intermediate failure-only states.
2454 Sometimes a yes state is pushed just to force some cleanup code to be
2455 called at the end of a successful match or submatch; e.g. (??{$re}) uses
2456 it to free the inner regex.
2457
2458 Note that failure backtracking rewinds the cursor position, while
2459 success backtracking leaves it alone.
2460
2461 A pattern is complete when the END op is executed, while a subpattern
2462 such as (?=foo) is complete when the SUCCESS op is executed. Both of these
2463 ops trigger the "pop to last yes state if any, otherwise return true"
2464 behaviour.
2465
2466 A common convention in this function is to use A and B to refer to the two
2467 subpatterns (or to the first nodes thereof) in patterns like /A*B/: so A is
2468 the subpattern to be matched possibly multiple times, while B is the entire
2469 rest of the pattern. Variable and state names reflect this convention.
2470
2471 The states in the main switch are the union of ops and failure/success of
2472 substates associated with with that op.  For example, IFMATCH is the op
2473 that does lookahead assertions /(?=A)B/ and so the IFMATCH state means
2474 'execute IFMATCH'; while IFMATCH_A is a state saying that we have just
2475 successfully matched A and IFMATCH_A_fail is a state saying that we have
2476 just failed to match A. Resume states always come in pairs. The backtrack
2477 state we push is marked as 'IFMATCH_A', but when that is popped, we resume
2478 at IFMATCH_A or IFMATCH_A_fail, depending on whether we are backtracking
2479 on success or failure.
2480
2481 The struct that holds a backtracking state is actually a big union, with
2482 one variant for each major type of op. The variable st points to the
2483 top-most backtrack struct. To make the code clearer, within each
2484 block of code we #define ST to alias the relevant union.
2485
2486 Here's a concrete example of a (vastly oversimplified) IFMATCH
2487 implementation:
2488
2489     switch (state) {
2490     ....
2491
2492 #define ST st->u.ifmatch
2493
2494     case IFMATCH: // we are executing the IFMATCH op, (?=A)B
2495         ST.foo = ...; // some state we wish to save
2496         ...
2497         // push a yes backtrack state with a resume value of
2498         // IFMATCH_A/IFMATCH_A_fail, then continue execution at the
2499         // first node of A:
2500         PUSH_YES_STATE_GOTO(IFMATCH_A, A);
2501         // NOTREACHED
2502
2503     case IFMATCH_A: // we have successfully executed A; now continue with B
2504         next = B;
2505         bar = ST.foo; // do something with the preserved value
2506         break;
2507
2508     case IFMATCH_A_fail: // A failed, so the assertion failed
2509         ...;   // do some housekeeping, then ...
2510         sayNO; // propagate the failure
2511
2512 #undef ST
2513
2514     ...
2515     }
2516
2517 For any old-timers reading this who are familiar with the old recursive
2518 approach, the code above is equivalent to:
2519
2520     case IFMATCH: // we are executing the IFMATCH op, (?=A)B
2521     {
2522         int foo = ...
2523         ...
2524         if (regmatch(A)) {
2525             next = B;
2526             bar = foo;
2527             break;
2528         }
2529         ...;   // do some housekeeping, then ...
2530         sayNO; // propagate the failure
2531     }
2532
2533 The topmost backtrack state, pointed to by st, is usually free. If you
2534 want to claim it, populate any ST.foo fields in it with values you wish to
2535 save, then do one of
2536
2537         PUSH_STATE_GOTO(resume_state, node);
2538         PUSH_YES_STATE_GOTO(resume_state, node);
2539
2540 which sets that backtrack state's resume value to 'resume_state', pushes a
2541 new free entry to the top of the backtrack stack, then goes to 'node'.
2542 On backtracking, the free slot is popped, and the saved state becomes the
2543 new free state. An ST.foo field in this new top state can be temporarily
2544 accessed to retrieve values, but once the main loop is re-entered, it
2545 becomes available for reuse.
2546
2547 Note that the depth of the backtrack stack constantly increases during the
2548 left-to-right execution of the pattern, rather than going up and down with
2549 the pattern nesting. For example the stack is at its maximum at Z at the
2550 end of the pattern, rather than at X in the following:
2551
2552     /(((X)+)+)+....(Y)+....Z/
2553
2554 The only exceptions to this are lookahead/behind assertions and the cut,
2555 (?>A), which pop all the backtrack states associated with A before
2556 continuing.
2557
2558 Bascktrack state structs are allocated in slabs of about 4K in size.
2559 PL_regmatch_state and st always point to the currently active state,
2560 and PL_regmatch_slab points to the slab currently containing
2561 PL_regmatch_state.  The first time regmatch() is called, the first slab is
2562 allocated, and is never freed until interpreter destruction. When the slab
2563 is full, a new one is allocated and chained to the end. At exit from
2564 regmatch(), slabs allocated since entry are freed.
2565
2566 */
2567
2568
2569 #define DEBUG_STATE_pp(pp)                                  \
2570     DEBUG_STATE_r({                                         \
2571         DUMP_EXEC_POS(locinput, scan, do_utf8);             \
2572         PerlIO_printf(Perl_debug_log,                       \
2573             "    %*s"pp" %s%s%s%s%s\n",                     \
2574             depth*2, "",                                    \
2575             PL_reg_name[st->resume_state],                     \
2576             ((st==yes_state||st==mark_state) ? "[" : ""),   \
2577             ((st==yes_state) ? "Y" : ""),                   \
2578             ((st==mark_state) ? "M" : ""),                  \
2579             ((st==yes_state||st==mark_state) ? "]" : "")    \
2580         );                                                  \
2581     });
2582
2583
2584 #define REG_NODE_NUM(x) ((x) ? (int)((x)-prog) : -1)
2585
2586 #ifdef DEBUGGING
2587
2588 STATIC void
2589 S_debug_start_match(pTHX_ const REGEXP *prog, const bool do_utf8,
2590     const char *start, const char *end, const char *blurb)
2591 {
2592     const bool utf8_pat = RX_UTF8(prog) ? 1 : 0;
2593
2594     PERL_ARGS_ASSERT_DEBUG_START_MATCH;
2595
2596     if (!PL_colorset)
2597             reginitcolors();
2598     {
2599         RE_PV_QUOTED_DECL(s0, utf8_pat, PERL_DEBUG_PAD_ZERO(0),
2600             RX_PRECOMP_const(prog), RX_PRELEN(prog), 60);
2601
2602         RE_PV_QUOTED_DECL(s1, do_utf8, PERL_DEBUG_PAD_ZERO(1),
2603             start, end - start, 60);
2604
2605         PerlIO_printf(Perl_debug_log,
2606             "%s%s REx%s %s against %s\n",
2607                        PL_colors[4], blurb, PL_colors[5], s0, s1);
2608
2609         if (do_utf8||utf8_pat)
2610             PerlIO_printf(Perl_debug_log, "UTF-8 %s%s%s...\n",
2611                 utf8_pat ? "pattern" : "",
2612                 utf8_pat && do_utf8 ? " and " : "",
2613                 do_utf8 ? "string" : ""
2614             );
2615     }
2616 }
2617
2618 STATIC void
2619 S_dump_exec_pos(pTHX_ const char *locinput,
2620                       const regnode *scan,
2621                       const char *loc_regeol,
2622                       const char *loc_bostr,
2623                       const char *loc_reg_starttry,
2624                       const bool do_utf8)
2625 {
2626     const int docolor = *PL_colors[0] || *PL_colors[2] || *PL_colors[4];
2627     const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
2628     int l = (loc_regeol - locinput) > taill ? taill : (loc_regeol - locinput);
2629     /* The part of the string before starttry has one color
2630        (pref0_len chars), between starttry and current
2631        position another one (pref_len - pref0_len chars),
2632        after the current position the third one.
2633        We assume that pref0_len <= pref_len, otherwise we
2634        decrease pref0_len.  */
2635     int pref_len = (locinput - loc_bostr) > (5 + taill) - l
2636         ? (5 + taill) - l : locinput - loc_bostr;
2637     int pref0_len;
2638
2639     PERL_ARGS_ASSERT_DUMP_EXEC_POS;
2640
2641     while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2642         pref_len++;
2643     pref0_len = pref_len  - (locinput - loc_reg_starttry);
2644     if (l + pref_len < (5 + taill) && l < loc_regeol - locinput)
2645         l = ( loc_regeol - locinput > (5 + taill) - pref_len
2646               ? (5 + taill) - pref_len : loc_regeol - locinput);
2647     while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2648         l--;
2649     if (pref0_len < 0)
2650         pref0_len = 0;
2651     if (pref0_len > pref_len)
2652         pref0_len = pref_len;
2653     {
2654         const int is_uni = (do_utf8 && OP(scan) != CANY) ? 1 : 0;
2655
2656         RE_PV_COLOR_DECL(s0,len0,is_uni,PERL_DEBUG_PAD(0),
2657             (locinput - pref_len),pref0_len, 60, 4, 5);
2658
2659         RE_PV_COLOR_DECL(s1,len1,is_uni,PERL_DEBUG_PAD(1),
2660                     (locinput - pref_len + pref0_len),
2661                     pref_len - pref0_len, 60, 2, 3);
2662
2663         RE_PV_COLOR_DECL(s2,len2,is_uni,PERL_DEBUG_PAD(2),
2664                     locinput, loc_regeol - locinput, 10, 0, 1);
2665
2666         const STRLEN tlen=len0+len1+len2;
2667         PerlIO_printf(Perl_debug_log,
2668                     "%4"IVdf" <%.*s%.*s%s%.*s>%*s|",
2669                     (IV)(locinput - loc_bostr),
2670                     len0, s0,
2671                     len1, s1,
2672                     (docolor ? "" : "> <"),
2673                     len2, s2,
2674                     (int)(tlen > 19 ? 0 :  19 - tlen),
2675                     "");
2676     }
2677 }
2678
2679 #endif
2680
2681 /* reg_check_named_buff_matched()
2682  * Checks to see if a named buffer has matched. The data array of
2683  * buffer numbers corresponding to the buffer is expected to reside
2684  * in the regexp->data->data array in the slot stored in the ARG() of
2685  * node involved. Note that this routine doesn't actually care about the
2686  * name, that information is not preserved from compilation to execution.
2687  * Returns the index of the leftmost defined buffer with the given name
2688  * or 0 if non of the buffers matched.
2689  */
2690 STATIC I32
2691 S_reg_check_named_buff_matched(pTHX_ const regexp *rex, const regnode *scan)
2692 {
2693     I32 n;
2694     RXi_GET_DECL(rex,rexi);
2695     SV *sv_dat= MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
2696     I32 *nums=(I32*)SvPVX(sv_dat);
2697
2698     PERL_ARGS_ASSERT_REG_CHECK_NAMED_BUFF_MATCHED;
2699
2700     for ( n=0; n<SvIVX(sv_dat); n++ ) {
2701         if ((I32)*PL_reglastparen >= nums[n] &&
2702             PL_regoffs[nums[n]].end != -1)
2703         {
2704             return nums[n];
2705         }
2706     }
2707     return 0;
2708 }
2709
2710
2711 /* free all slabs above current one  - called during LEAVE_SCOPE */
2712
2713 STATIC void
2714 S_clear_backtrack_stack(pTHX_ void *p)
2715 {
2716     regmatch_slab *s = PL_regmatch_slab->next;
2717     PERL_UNUSED_ARG(p);
2718
2719     if (!s)
2720         return;
2721     PL_regmatch_slab->next = NULL;
2722     while (s) {
2723         regmatch_slab * const osl = s;
2724         s = s->next;
2725         Safefree(osl);
2726     }
2727 }
2728
2729
2730 #define SETREX(Re1,Re2) \
2731     if (PL_reg_eval_set) PM_SETRE((PL_reg_curpm), (Re2)); \
2732     Re1 = (Re2)
2733
2734 STATIC I32                      /* 0 failure, 1 success */
2735 S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
2736 {
2737 #if PERL_VERSION < 9 && !defined(PERL_CORE)
2738     dMY_CXT;
2739 #endif
2740     dVAR;
2741     register const bool do_utf8 = PL_reg_match_utf8;
2742     const U32 uniflags = UTF8_ALLOW_DEFAULT;
2743     REGEXP *rex_sv = reginfo->prog;
2744     regexp *rex = (struct regexp *)SvANY(rex_sv);
2745     RXi_GET_DECL(rex,rexi);
2746     I32 oldsave;
2747     /* the current state. This is a cached copy of PL_regmatch_state */
2748     register regmatch_state *st;
2749     /* cache heavy used fields of st in registers */
2750     register regnode *scan;
2751     register regnode *next;
2752     register U32 n = 0; /* general value; init to avoid compiler warning */
2753     register I32 ln = 0; /* len or last;  init to avoid compiler warning */
2754     register char *locinput = PL_reginput;
2755     register I32 nextchr;   /* is always set to UCHARAT(locinput) */
2756
2757     bool result = 0;        /* return value of S_regmatch */
2758     int depth = 0;          /* depth of backtrack stack */
2759     U32 nochange_depth = 0; /* depth of GOSUB recursion with nochange */
2760     const U32 max_nochange_depth =
2761         (3 * rex->nparens > MAX_RECURSE_EVAL_NOCHANGE_DEPTH) ?
2762         3 * rex->nparens : MAX_RECURSE_EVAL_NOCHANGE_DEPTH;
2763     regmatch_state *yes_state = NULL; /* state to pop to on success of
2764                                                             subpattern */
2765     /* mark_state piggy backs on the yes_state logic so that when we unwind
2766        the stack on success we can update the mark_state as we go */
2767     regmatch_state *mark_state = NULL; /* last mark state we have seen */
2768     regmatch_state *cur_eval = NULL; /* most recent EVAL_AB state */
2769     struct regmatch_state  *cur_curlyx = NULL; /* most recent curlyx */
2770     U32 state_num;
2771     bool no_final = 0;      /* prevent failure from backtracking? */
2772     bool do_cutgroup = 0;   /* no_final only until next branch/trie entry */
2773     char *startpoint = PL_reginput;
2774     SV *popmark = NULL;     /* are we looking for a mark? */
2775     SV *sv_commit = NULL;   /* last mark name seen in failure */
2776     SV *sv_yes_mark = NULL; /* last mark name we have seen
2777                                during a successfull match */
2778     U32 lastopen = 0;       /* last open we saw */
2779     bool has_cutgroup = RX_HAS_CUTGROUP(rex) ? 1 : 0;
2780     SV* const oreplsv = GvSV(PL_replgv);
2781     /* these three flags are set by various ops to signal information to
2782      * the very next op. They have a useful lifetime of exactly one loop
2783      * iteration, and are not preserved or restored by state pushes/pops
2784      */
2785     bool sw = 0;            /* the condition value in (?(cond)a|b) */
2786     bool minmod = 0;        /* the next "{n,m}" is a "{n,m}?" */
2787     int logical = 0;        /* the following EVAL is:
2788                                 0: (?{...})
2789                                 1: (?(?{...})X|Y)
2790                                 2: (??{...})
2791                                or the following IFMATCH/UNLESSM is:
2792                                 false: plain (?=foo)
2793                                 true:  used as a condition: (?(?=foo))
2794                             */
2795 #ifdef DEBUGGING
2796     GET_RE_DEBUG_FLAGS_DECL;
2797 #endif
2798
2799     PERL_ARGS_ASSERT_REGMATCH;
2800
2801     DEBUG_OPTIMISE_r( DEBUG_EXECUTE_r({
2802             PerlIO_printf(Perl_debug_log,"regmatch start\n");
2803     }));
2804     /* on first ever call to regmatch, allocate first slab */
2805     if (!PL_regmatch_slab) {
2806         Newx(PL_regmatch_slab, 1, regmatch_slab);
2807         PL_regmatch_slab->prev = NULL;
2808         PL_regmatch_slab->next = NULL;
2809         PL_regmatch_state = SLAB_FIRST(PL_regmatch_slab);
2810     }
2811
2812     oldsave = PL_savestack_ix;
2813     SAVEDESTRUCTOR_X(S_clear_backtrack_stack, NULL);
2814     SAVEVPTR(PL_regmatch_slab);
2815     SAVEVPTR(PL_regmatch_state);
2816
2817     /* grab next free state slot */
2818     st = ++PL_regmatch_state;
2819     if (st >  SLAB_LAST(PL_regmatch_slab))
2820         st = PL_regmatch_state = S_push_slab(aTHX);
2821
2822     /* Note that nextchr is a byte even in UTF */
2823     nextchr = UCHARAT(locinput);
2824     scan = prog;
2825     while (scan != NULL) {
2826
2827         DEBUG_EXECUTE_r( {
2828             SV * const prop = sv_newmortal();
2829             regnode *rnext=regnext(scan);
2830             DUMP_EXEC_POS( locinput, scan, do_utf8 );
2831             regprop(rex, prop, scan);
2832
2833             PerlIO_printf(Perl_debug_log,
2834                     "%3"IVdf":%*s%s(%"IVdf")\n",
2835                     (IV)(scan - rexi->program), depth*2, "",
2836                     SvPVX_const(prop),
2837                     (PL_regkind[OP(scan)] == END || !rnext) ?
2838                         0 : (IV)(rnext - rexi->program));
2839         });
2840
2841         next = scan + NEXT_OFF(scan);
2842         if (next == scan)
2843             next = NULL;
2844         state_num = OP(scan);
2845
2846       reenter_switch:
2847
2848         assert(PL_reglastparen == &rex->lastparen);
2849         assert(PL_reglastcloseparen == &rex->lastcloseparen);
2850         assert(PL_regoffs == rex->offs);
2851
2852         switch (state_num) {
2853         case BOL:
2854             if (locinput == PL_bostr)
2855             {
2856                 /* reginfo->till = reginfo->bol; */
2857                 break;
2858             }
2859             sayNO;
2860         case MBOL:
2861             if (locinput == PL_bostr ||
2862                 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
2863             {
2864                 break;
2865             }
2866             sayNO;
2867         case SBOL:
2868             if (locinput == PL_bostr)
2869                 break;
2870             sayNO;
2871         case GPOS:
2872             if (locinput == reginfo->ganch)
2873                 break;
2874             sayNO;
2875
2876         case KEEPS:
2877             /* update the startpoint */
2878             st->u.keeper.val = PL_regoffs[0].start;
2879             PL_reginput = locinput;
2880             PL_regoffs[0].start = locinput - PL_bostr;
2881             PUSH_STATE_GOTO(KEEPS_next, next);
2882             /*NOT-REACHED*/
2883         case KEEPS_next_fail:
2884             /* rollback the start point change */
2885             PL_regoffs[0].start = st->u.keeper.val;
2886             sayNO_SILENT;
2887             /*NOT-REACHED*/
2888         case EOL:
2889                 goto seol;
2890         case MEOL:
2891             if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2892                 sayNO;
2893             break;
2894         case SEOL:
2895           seol:
2896             if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2897                 sayNO;
2898             if (PL_regeol - locinput > 1)
2899                 sayNO;
2900             break;
2901         case EOS:
2902             if (PL_regeol != locinput)
2903                 sayNO;
2904             break;
2905         case SANY:
2906             if (!nextchr && locinput >= PL_regeol)
2907                 sayNO;
2908             if (do_utf8) {
2909                 locinput += PL_utf8skip[nextchr];
2910                 if (locinput > PL_regeol)
2911                     sayNO;
2912                 nextchr = UCHARAT(locinput);
2913             }
2914             else
2915                 nextchr = UCHARAT(++locinput);
2916             break;
2917         case CANY:
2918             if (!nextchr && locinput >= PL_regeol)
2919                 sayNO;
2920             nextchr = UCHARAT(++locinput);
2921             break;
2922         case REG_ANY:
2923             if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2924                 sayNO;
2925             if (do_utf8) {
2926                 locinput += PL_utf8skip[nextchr];
2927                 if (locinput > PL_regeol)
2928                     sayNO;
2929                 nextchr = UCHARAT(locinput);
2930             }
2931             else
2932                 nextchr = UCHARAT(++locinput);
2933             break;
2934
2935 #undef  ST
2936 #define ST st->u.trie
2937         case TRIEC:
2938             /* In this case the charclass data is available inline so
2939                we can fail fast without a lot of extra overhead.
2940              */
2941             if (scan->flags == EXACT || !do_utf8) {
2942                 if(!ANYOF_BITMAP_TEST(scan, *locinput)) {
2943                     DEBUG_EXECUTE_r(
2944                         PerlIO_printf(Perl_debug_log,
2945                                   "%*s  %sfailed to match trie start class...%s\n",
2946                                   REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
2947                     );
2948                     sayNO_SILENT;
2949                     /* NOTREACHED */
2950                 }
2951             }
2952             /* FALL THROUGH */
2953         case TRIE:
2954             {
2955                 /* what type of TRIE am I? (utf8 makes this contextual) */
2956                 DECL_TRIE_TYPE(scan);
2957
2958                 /* what trie are we using right now */
2959                 reg_trie_data * const trie
2960                     = (reg_trie_data*)rexi->data->data[ ARG( scan ) ];
2961                 HV * widecharmap = MUTABLE_HV(rexi->data->data[ ARG( scan ) + 1 ]);
2962                 U32 state = trie->startstate;
2963
2964                 if (trie->bitmap && trie_type != trie_utf8_fold &&
2965                     !TRIE_BITMAP_TEST(trie,*locinput)
2966                 ) {
2967                     if (trie->states[ state ].wordnum) {
2968                          DEBUG_EXECUTE_r(
2969                             PerlIO_printf(Perl_debug_log,
2970                                           "%*s  %smatched empty string...%s\n",
2971                                           REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
2972                         );
2973                         break;
2974                     } else {
2975                         DEBUG_EXECUTE_r(
2976                             PerlIO_printf(Perl_debug_log,
2977                                           "%*s  %sfailed to match trie start class...%s\n",
2978                                           REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
2979                         );
2980                         sayNO_SILENT;
2981                    }
2982                 }
2983
2984             {
2985                 U8 *uc = ( U8* )locinput;
2986
2987                 STRLEN len = 0;
2988                 STRLEN foldlen = 0;
2989                 U8 *uscan = (U8*)NULL;
2990                 STRLEN bufflen=0;
2991                 SV *sv_accept_buff = NULL;
2992                 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
2993
2994                 ST.accepted = 0; /* how many accepting states we have seen */
2995                 ST.B = next;
2996                 ST.jump = trie->jump;
2997                 ST.me = scan;
2998                 /*
2999                    traverse the TRIE keeping track of all accepting states
3000                    we transition through until we get to a failing node.
3001                 */
3002
3003                 while ( state && uc <= (U8*)PL_regeol ) {
3004                     U32 base = trie->states[ state ].trans.base;
3005                     UV uvc = 0;
3006                     U16 charid;
3007                     /* We use charid to hold the wordnum as we don't use it
3008                        for charid until after we have done the wordnum logic.
3009                        We define an alias just so that the wordnum logic reads
3010                        more naturally. */
3011
3012 #define got_wordnum charid
3013                     got_wordnum = trie->states[ state ].wordnum;
3014
3015                     if ( got_wordnum ) {
3016                         if ( ! ST.accepted ) {
3017                             ENTER;
3018                             SAVETMPS; /* XXX is this necessary? dmq */
3019                             bufflen = TRIE_INITAL_ACCEPT_BUFFLEN;
3020                             sv_accept_buff=newSV(bufflen *
3021                                             sizeof(reg_trie_accepted) - 1);
3022                             SvCUR_set(sv_accept_buff, 0);
3023                             SvPOK_on(sv_accept_buff);
3024                             sv_2mortal(sv_accept_buff);
3025                             SAVETMPS;
3026                             ST.accept_buff =
3027                                 (reg_trie_accepted*)SvPV_nolen(sv_accept_buff );
3028                         }
3029                         do {
3030                             if (ST.accepted >= bufflen) {
3031                                 bufflen *= 2;
3032                                 ST.accept_buff =(reg_trie_accepted*)
3033                                     SvGROW(sv_accept_buff,
3034                                         bufflen * sizeof(reg_trie_accepted));
3035                             }
3036                             SvCUR_set(sv_accept_buff,SvCUR(sv_accept_buff)
3037                                 + sizeof(reg_trie_accepted));
3038
3039
3040                             ST.accept_buff[ST.accepted].wordnum = got_wordnum;
3041                             ST.accept_buff[ST.accepted].endpos = uc;
3042                             ++ST.accepted;
3043                         } while (trie->nextword && (got_wordnum= trie->nextword[got_wordnum]));
3044                     }
3045 #undef got_wordnum
3046
3047                     DEBUG_TRIE_EXECUTE_r({
3048                                 DUMP_EXEC_POS( (char *)uc, scan, do_utf8 );
3049                                 PerlIO_printf( Perl_debug_log,
3050                                     "%*s  %sState: %4"UVxf" Accepted: %4"UVxf" ",
3051                                     2+depth * 2, "", PL_colors[4],
3052                                     (UV)state, (UV)ST.accepted );
3053                     });
3054
3055                     if ( base ) {
3056                         REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
3057                                              uscan, len, uvc, charid, foldlen,
3058                                              foldbuf, uniflags);
3059
3060                         if (charid &&
3061                              (base + charid > trie->uniquecharcount )
3062                              && (base + charid - 1 - trie->uniquecharcount
3063                                     < trie->lasttrans)
3064                              && trie->trans[base + charid - 1 -
3065                                     trie->uniquecharcount].check == state)
3066                         {
3067                             state = trie->trans[base + charid - 1 -
3068                                 trie->uniquecharcount ].next;
3069                         }
3070                         else {
3071                             state = 0;
3072                         }
3073                         uc += len;
3074
3075                     }
3076                     else {
3077                         state = 0;
3078                     }
3079                     DEBUG_TRIE_EXECUTE_r(
3080                         PerlIO_printf( Perl_debug_log,
3081                             "Charid:%3x CP:%4"UVxf" After State: %4"UVxf"%s\n",
3082                             charid, uvc, (UV)state, PL_colors[5] );
3083                     );
3084                 }
3085                 if (!ST.accepted )
3086                    sayNO;
3087
3088                 DEBUG_EXECUTE_r(
3089                     PerlIO_printf( Perl_debug_log,
3090                         "%*s  %sgot %"IVdf" possible matches%s\n",
3091                         REPORT_CODE_OFF + depth * 2, "",
3092                         PL_colors[4], (IV)ST.accepted, PL_colors[5] );
3093                 );
3094             }}
3095             goto trie_first_try; /* jump into the fail handler */
3096             /* NOTREACHED */
3097         case TRIE_next_fail: /* we failed - try next alterative */
3098             if ( ST.jump) {
3099                 REGCP_UNWIND(ST.cp);
3100                 for (n = *PL_reglastparen; n > ST.lastparen; n--)
3101                     PL_regoffs[n].end = -1;
3102                 *PL_reglastparen = n;
3103             }
3104           trie_first_try:
3105             if (do_cutgroup) {
3106                 do_cutgroup = 0;
3107                 no_final = 0;
3108             }
3109
3110             if ( ST.jump) {
3111                 ST.lastparen = *PL_reglastparen;
3112                 REGCP_SET(ST.cp);
3113             }
3114             if ( ST.accepted == 1 ) {
3115                 /* only one choice left - just continue */
3116                 DEBUG_EXECUTE_r({
3117                     AV *const trie_words
3118                         = MUTABLE_AV(rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET]);
3119                     SV ** const tmp = av_fetch( trie_words,
3120                         ST.accept_buff[ 0 ].wordnum-1, 0 );
3121                     SV *sv= tmp ? sv_newmortal() : NULL;
3122
3123                     PerlIO_printf( Perl_debug_log,
3124                         "%*s  %sonly one match left: #%d <%s>%s\n",
3125                         REPORT_CODE_OFF+depth*2, "", PL_colors[4],
3126                         ST.accept_buff[ 0 ].wordnum,
3127                         tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0,
3128                                 PL_colors[0], PL_colors[1],
3129                                 (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
3130                             )
3131                         : "not compiled under -Dr",
3132                         PL_colors[5] );
3133                 });
3134                 PL_reginput = (char *)ST.accept_buff[ 0 ].endpos;
3135                 /* in this case we free tmps/leave before we call regmatch
3136                    as we wont be using accept_buff again. */
3137
3138                 locinput = PL_reginput;
3139                 nextchr = UCHARAT(locinput);
3140                 if ( !ST.jump || !ST.jump[ST.accept_buff[0].wordnum])
3141                     scan = ST.B;
3142                 else
3143                     scan = ST.me + ST.jump[ST.accept_buff[0].wordnum];
3144                 if (!has_cutgroup) {
3145                     FREETMPS;
3146                     LEAVE;
3147                 } else {
3148                     ST.accepted--;
3149                     PUSH_YES_STATE_GOTO(TRIE_next, scan);
3150                 }
3151
3152                 continue; /* execute rest of RE */
3153             }
3154
3155             if ( !ST.accepted-- ) {
3156                 DEBUG_EXECUTE_r({
3157                     PerlIO_printf( Perl_debug_log,
3158                         "%*s  %sTRIE failed...%s\n",
3159                         REPORT_CODE_OFF+depth*2, "",
3160                         PL_colors[4],
3161                         PL_colors[5] );
3162                 });
3163                 FREETMPS;
3164                 LEAVE;
3165                 sayNO_SILENT;
3166                 /*NOTREACHED*/
3167             }
3168
3169             /*
3170                There are at least two accepting states left.  Presumably
3171                the number of accepting states is going to be low,
3172                typically two. So we simply scan through to find the one
3173                with lowest wordnum.  Once we find it, we swap the last
3174                state into its place and decrement the size. We then try to
3175                match the rest of the pattern at the point where the word
3176                ends. If we succeed, control just continues along the
3177                regex; if we fail we return here to try the next accepting
3178                state
3179              */
3180
3181             {
3182                 U32 best = 0;
3183                 U32 cur;
3184                 for( cur = 1 ; cur <= ST.accepted ; cur++ ) {
3185                     DEBUG_TRIE_EXECUTE_r(
3186                         PerlIO_printf( Perl_debug_log,
3187                             "%*s  %sgot %"IVdf" (%d) as best, looking at %"IVdf" (%d)%s\n",
3188                             REPORT_CODE_OFF + depth * 2, "", PL_colors[4],
3189                             (IV)best, ST.accept_buff[ best ].wordnum, (IV)cur,
3190                             ST.accept_buff[ cur ].wordnum, PL_colors[5] );
3191                     );
3192
3193                     if (ST.accept_buff[cur].wordnum <
3194                             ST.accept_buff[best].wordnum)
3195                         best = cur;
3196                 }
3197
3198                 DEBUG_EXECUTE_r({
3199                     AV *const trie_words
3200                         = MUTABLE_AV(rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET]);
3201                     SV ** const tmp = av_fetch( trie_words,
3202                         ST.accept_buff[ best ].wordnum - 1, 0 );
3203                     regnode *nextop=(!ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) ?
3204                                     ST.B :
3205                                     ST.me + ST.jump[ST.accept_buff[best].wordnum];
3206                     SV *sv= tmp ? sv_newmortal() : NULL;
3207
3208                     PerlIO_printf( Perl_debug_log,
3209                         "%*s  %strying alternation #%d <%s> at node #%d %s\n",
3210                         REPORT_CODE_OFF+depth*2, "", PL_colors[4],
3211                         ST.accept_buff[best].wordnum,
3212                         tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0,
3213                                 PL_colors[0], PL_colors[1],
3214                                 (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
3215                             ) : "not compiled under -Dr",
3216                             REG_NODE_NUM(nextop),
3217                         PL_colors[5] );
3218                 });
3219
3220                 if ( best<ST.accepted ) {
3221                     reg_trie_accepted tmp = ST.accept_buff[ best ];
3222                     ST.accept_buff[ best ] = ST.accept_buff[ ST.accepted ];
3223                     ST.accept_buff[ ST.accepted ] = tmp;
3224                     best = ST.accepted;
3225                 }
3226                 PL_reginput = (char *)ST.accept_buff[ best ].endpos;
3227                 if ( !ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) {
3228                     scan = ST.B;
3229                 } else {
3230                     scan = ST.me + ST.jump[ST.accept_buff[best].wordnum];
3231                 }
3232                 PUSH_YES_STATE_GOTO(TRIE_next, scan);
3233                 /* NOTREACHED */
3234             }
3235             /* NOTREACHED */
3236         case TRIE_next:
3237             /* we dont want to throw this away, see bug 57042*/
3238             if (oreplsv != GvSV(PL_replgv))
3239                 sv_setsv(oreplsv, GvSV(PL_replgv));
3240             FREETMPS;
3241             LEAVE;
3242             sayYES;
3243 #undef  ST
3244
3245         case EXACT: {
3246             char *s = STRING(scan);
3247             ln = STR_LEN(scan);
3248             if (do_utf8 != UTF) {
3249                 /* The target and the pattern have differing utf8ness. */
3250                 char *l = locinput;
3251                 const char * const e = s + ln;
3252
3253                 if (do_utf8) {
3254                     /* The target is utf8, the pattern is not utf8. */
3255                     while (s < e) {
3256                         STRLEN ulen;
3257                         if (l >= PL_regeol)
3258                              sayNO;
3259                         if (NATIVE_TO_UNI(*(U8*)s) !=
3260                             utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
3261                                             uniflags))
3262                              sayNO;
3263                         l += ulen;
3264                         s ++;
3265                     }
3266                 }
3267                 else {
3268                     /* The target is not utf8, the pattern is utf8. */
3269                     while (s < e) {
3270                         STRLEN ulen;
3271                         if (l >= PL_regeol)
3272                             sayNO;
3273                         if (NATIVE_TO_UNI(*((U8*)l)) !=
3274                             utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
3275                                            uniflags))
3276                             sayNO;
3277                         s += ulen;
3278                         l ++;
3279                     }
3280                 }
3281                 locinput = l;
3282                 nextchr = UCHARAT(locinput);
3283                 break;
3284             }
3285             /* The target and the pattern have the same utf8ness. */
3286             /* Inline the first character, for speed. */
3287             if (UCHARAT(s) != nextchr)
3288                 sayNO;
3289             if (PL_regeol - locinput < ln)
3290                 sayNO;
3291             if (ln > 1 && memNE(s, locinput, ln))
3292                 sayNO;
3293             locinput += ln;
3294             nextchr = UCHARAT(locinput);
3295             break;
3296             }
3297         case EXACTFL:
3298             PL_reg_flags |= RF_tainted;
3299             /* FALL THROUGH */
3300         case EXACTF: {
3301             char * const s = STRING(scan);
3302             ln = STR_LEN(scan);
3303
3304             if (do_utf8 || UTF) {
3305               /* Either target or the pattern are utf8. */
3306                 const char * const l = locinput;
3307                 char *e = PL_regeol;
3308
3309                 if (ibcmp_utf8(s, 0,  ln, (bool)UTF,
3310                                l, &e, 0,  do_utf8)) {
3311                      /* One more case for the sharp s:
3312                       * pack("U0U*", 0xDF) =~ /ss/i,
3313                       * the 0xC3 0x9F are the UTF-8
3314                       * byte sequence for the U+00DF. */
3315
3316                      if (!(do_utf8 &&
3317                            toLOWER(s[0]) == 's' &&
3318                            ln >= 2 &&
3319                            toLOWER(s[1]) == 's' &&
3320                            (U8)l[0] == 0xC3 &&
3321                            e - l >= 2 &&
3322                            (U8)l[1] == 0x9F))
3323                           sayNO;
3324                 }
3325                 locinput = e;
3326                 nextchr = UCHARAT(locinput);
3327                 break;
3328             }
3329
3330             /* Neither the target and the pattern are utf8. */
3331
3332             /* Inline the first character, for speed. */
3333             if (UCHARAT(s) != nextchr &&
3334                 UCHARAT(s) != ((OP(scan) == EXACTF)
3335                                ? PL_fold : PL_fold_locale)[nextchr])
3336                 sayNO;
3337             if (PL_regeol - locinput < ln)
3338                 sayNO;
3339             if (ln > 1 && (OP(scan) == EXACTF
3340                            ? ibcmp(s, locinput, ln)
3341                            : ibcmp_locale(s, locinput, ln)))
3342                 sayNO;
3343             locinput += ln;
3344             nextchr = UCHARAT(locinput);
3345             break;
3346             }
3347         case ANYOF:
3348             if (do_utf8) {
3349                 STRLEN inclasslen = PL_regeol - locinput;
3350
3351                 if (!reginclass(rex, scan, (U8*)locinput, &inclasslen, do_utf8))
3352                     goto anyof_fail;
3353                 if (locinput >= PL_regeol)
3354                     sayNO;
3355                 locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
3356                 nextchr = UCHARAT(locinput);
3357                 break;
3358             }
3359             else {
3360                 if (nextchr < 0)
3361                     nextchr = UCHARAT(locinput);
3362                 if (!REGINCLASS(rex, scan, (U8*)locinput))
3363                     goto anyof_fail;
3364                 if (!nextchr && locinput >= PL_regeol)
3365                     sayNO;
3366                 nextchr = UCHARAT(++locinput);
3367                 break;
3368             }
3369         anyof_fail:
3370             /* If we might have the case of the German sharp s
3371              * in a casefolding Unicode character class. */
3372
3373             if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
3374                  locinput += SHARP_S_SKIP;
3375                  nextchr = UCHARAT(locinput);
3376             }
3377             else
3378                  sayNO;
3379             break;
3380         case ALNUML:
3381             PL_reg_flags |= RF_tainted;
3382             /* FALL THROUGH */
3383         case ALNUM:
3384             if (!nextchr)
3385                 sayNO;
3386             if (do_utf8) {
3387                 LOAD_UTF8_CHARCLASS_ALNUM();
3388                 if (!(OP(scan) == ALNUM
3389                       ? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
3390                       : isALNUM_LC_utf8((U8*)locinput)))
3391                 {
3392                     sayNO;
3393                 }
3394                 locinput += PL_utf8skip[nextchr];
3395                 nextchr = UCHARAT(locinput);
3396                 break;
3397             }
3398             if (!(OP(scan) == ALNUM
3399                   ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
3400                 sayNO;
3401             nextchr = UCHARAT(++locinput);
3402             break;
3403         case NALNUML:
3404             PL_reg_flags |= RF_tainted;
3405             /* FALL THROUGH */
3406         case NALNUM:
3407             if (!nextchr && locinput >= PL_regeol)
3408                 sayNO;
3409             if (do_utf8) {
3410                 LOAD_UTF8_CHARCLASS_ALNUM();
3411                 if (OP(scan) == NALNUM
3412                     ? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
3413                     : isALNUM_LC_utf8((U8*)locinput))
3414                 {
3415                     sayNO;
3416                 }
3417                 locinput += PL_utf8skip[nextchr];
3418                 nextchr = UCHARAT(locinput);
3419                 break;
3420             }
3421             if (OP(scan) == NALNUM
3422                 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
3423                 sayNO;
3424             nextchr = UCHARAT(++locinput);
3425             break;
3426         case BOUNDL:
3427         case NBOUNDL:
3428             PL_reg_flags |= RF_tainted;
3429             /* FALL THROUGH */
3430         case BOUND:
3431         case NBOUND:
3432             /* was last char in word? */
3433             if (do_utf8) {
3434                 if (locinput == PL_bostr)
3435                     ln = '\n';
3436                 else {
3437                     const U8 * const r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
3438
3439                     ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, uniflags);
3440                 }
3441                 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
3442                     ln = isALNUM_uni(ln);
3443                     LOAD_UTF8_CHARCLASS_ALNUM();
3444                     n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
3445                 }
3446                 else {
3447                     ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
3448                     n = isALNUM_LC_utf8((U8*)locinput);
3449                 }
3450             }
3451             else {
3452                 ln = (locinput != PL_bostr) ?
3453                     UCHARAT(locinput - 1) : '\n';
3454                 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
3455                     ln = isALNUM(ln);
3456                     n = isALNUM(nextchr);
3457                 }
3458                 else {
3459                     ln = isALNUM_LC(ln);
3460                     n = isALNUM_LC(nextchr);
3461                 }
3462             }
3463             if (((!ln) == (!n)) == (OP(scan) == BOUND ||
3464                                     OP(scan) == BOUNDL))
3465                     sayNO;
3466             break;
3467         case SPACEL:
3468             PL_reg_flags |= RF_tainted;
3469             /* FALL THROUGH */
3470         case SPACE:
3471             if (!nextchr)
3472                 sayNO;
3473             if (do_utf8) {
3474                 if (UTF8_IS_CONTINUED(nextchr)) {
3475                     LOAD_UTF8_CHARCLASS_SPACE();
3476                     if (!(OP(scan) == SPACE
3477                           ? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
3478                           : isSPACE_LC_utf8((U8*)locinput)))
3479                     {
3480                         sayNO;
3481                     }
3482                     locinput += PL_utf8skip[nextchr];
3483                     nextchr = UCHARAT(locinput);
3484                     break;
3485                 }
3486                 if (!(OP(scan) == SPACE
3487                       ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3488                     sayNO;
3489                 nextchr = UCHARAT(++locinput);
3490             }
3491             else {
3492                 if (!(OP(scan) == SPACE
3493                       ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3494                     sayNO;
3495                 nextchr = UCHARAT(++locinput);
3496             }
3497             break;
3498         case NSPACEL:
3499             PL_reg_flags |= RF_tainted;
3500             /* FALL THROUGH */
3501         case NSPACE:
3502             if (!nextchr && locinput >= PL_regeol)
3503                 sayNO;
3504             if (do_utf8) {
3505                 LOAD_UTF8_CHARCLASS_SPACE();
3506                 if (OP(scan) == NSPACE
3507                     ? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
3508                     : isSPACE_LC_utf8((U8*)locinput))
3509                 {
3510                     sayNO;
3511                 }
3512                 locinput += PL_utf8skip[nextchr];
3513                 nextchr = UCHARAT(locinput);
3514                 break;
3515             }
3516             if (OP(scan) == NSPACE
3517                 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
3518                 sayNO;
3519             nextchr = UCHARAT(++locinput);
3520             break;
3521         case DIGITL:
3522             PL_reg_flags |= RF_tainted;
3523             /* FALL THROUGH */
3524         case DIGIT:
3525             if (!nextchr)
3526                 sayNO;
3527             if (do_utf8) {
3528                 LOAD_UTF8_CHARCLASS_DIGIT();
3529                 if (!(OP(scan) == DIGIT
3530                       ? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
3531                       : isDIGIT_LC_utf8((U8*)locinput)))
3532                 {
3533                     sayNO;
3534                 }
3535                 locinput += PL_utf8skip[nextchr];
3536                 nextchr = UCHARAT(locinput);
3537                 break;
3538             }
3539             if (!(OP(scan) == DIGIT
3540                   ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
3541                 sayNO;
3542             nextchr = UCHARAT(++locinput);
3543             break;
3544         case NDIGITL:
3545             PL_reg_flags |= RF_tainted;
3546             /* FALL THROUGH */
3547         case NDIGIT:
3548             if (!nextchr && locinput >= PL_regeol)
3549                 sayNO;
3550             if (do_utf8) {
3551                 LOAD_UTF8_CHARCLASS_DIGIT();
3552                 if (OP(scan) == NDIGIT
3553                     ? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
3554                     : isDIGIT_LC_utf8((U8*)locinput))
3555                 {
3556                     sayNO;
3557                 }
3558                 locinput += PL_utf8skip[nextchr];
3559                 nextchr = UCHARAT(locinput);
3560                 break;
3561             }
3562             if (OP(scan) == NDIGIT
3563                 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
3564                 sayNO;
3565             nextchr = UCHARAT(++locinput);
3566             break;
3567         case CLUMP:
3568             if (locinput >= PL_regeol)
3569                 sayNO;
3570             if  (do_utf8) {
3571                 LOAD_UTF8_CHARCLASS_MARK();
3572                 if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3573                     sayNO;
3574                 locinput += PL_utf8skip[nextchr];
3575                 while (locinput < PL_regeol &&
3576                        swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3577                     locinput += UTF8SKIP(locinput);
3578                 if (locinput > PL_regeol)
3579                     sayNO;
3580             }
3581             else
3582                locinput++;
3583             nextchr = UCHARAT(locinput);
3584             break;
3585
3586         case NREFFL:
3587         {
3588             char *s;
3589             char type;
3590             PL_reg_flags |= RF_tainted;
3591             /* FALL THROUGH */
3592         case NREF:
3593         case NREFF:
3594             type = OP(scan);
3595             n = reg_check_named_buff_matched(rex,scan);
3596
3597             if ( n ) {
3598                 type = REF + ( type - NREF );
3599                 goto do_ref;
3600             } else {
3601                 sayNO;
3602             }
3603             /* unreached */
3604         case REFFL:
3605             PL_reg_flags |= RF_tainted;
3606             /* FALL THROUGH */
3607         case REF:
3608         case REFF:
3609             n = ARG(scan);  /* which paren pair */
3610             type = OP(scan);
3611           do_ref:
3612             ln = PL_regoffs[n].start;
3613             PL_reg_leftiter = PL_reg_maxiter;           /* Void cache */
3614             if (*PL_reglastparen < n || ln == -1)
3615                 sayNO;                  /* Do not match unless seen CLOSEn. */
3616             if (ln == PL_regoffs[n].end)
3617                 break;
3618
3619             s = PL_bostr + ln;
3620             if (do_utf8 && type != REF) {       /* REF can do byte comparison */
3621                 char *l = locinput;
3622                 const char *e = PL_bostr + PL_regoffs[n].end;
3623                 /*
3624                  * Note that we can't do the "other character" lookup trick as
3625                  * in the 8-bit case (no pun intended) because in Unicode we
3626                  * have to map both upper and title case to lower case.
3627                  */
3628                 if (type == REFF) {
3629                     while (s < e) {
3630                         STRLEN ulen1, ulen2;
3631                         U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3632                         U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
3633
3634                         if (l >= PL_regeol)
3635                             sayNO;
3636                         toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
3637                         toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
3638                         if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
3639                             sayNO;
3640                         s += ulen1;
3641                         l += ulen2;
3642                     }
3643                 }
3644                 locinput = l;
3645                 nextchr = UCHARAT(locinput);
3646                 break;
3647             }
3648
3649             /* Inline the first character, for speed. */
3650             if (UCHARAT(s) != nextchr &&
3651                 (type == REF ||
3652                  (UCHARAT(s) != (type == REFF
3653                                   ? PL_fold : PL_fold_locale)[nextchr])))
3654                 sayNO;
3655             ln = PL_regoffs[n].end - ln;
3656             if (locinput + ln > PL_regeol)
3657                 sayNO;
3658             if (ln > 1 && (type == REF
3659                            ? memNE(s, locinput, ln)
3660                            : (type == REFF
3661                               ? ibcmp(s, locinput, ln)
3662                               : ibcmp_locale(s, locinput, ln))))
3663                 sayNO;
3664             locinput += ln;
3665             nextchr = UCHARAT(locinput);
3666             break;
3667         }
3668         case NOTHING:
3669         case TAIL:
3670             break;
3671         case BACK:
3672             break;
3673
3674 #undef  ST
3675 #define ST st->u.eval
3676         {
3677             SV *ret;
3678             REGEXP *re_sv;
3679             regexp *re;
3680             regexp_internal *rei;
3681             regnode *startpoint;
3682
3683         case GOSTART:
3684         case GOSUB: /*    /(...(?1))/   /(...(?&foo))/   */
3685             if (cur_eval && cur_eval->locinput==locinput) {
3686                 if (cur_eval->u.eval.close_paren == (U32)ARG(scan))
3687                     Perl_croak(aTHX_ "Infinite recursion in regex");
3688                 if ( ++nochange_depth > max_nochange_depth )
3689                     Perl_croak(aTHX_
3690                         "Pattern subroutine nesting without pos change"
3691                         " exceeded limit in regex");
3692             } else {
3693                 nochange_depth = 0;
3694             }
3695             re_sv = rex_sv;
3696             re = rex;
3697             rei = rexi;
3698             (void)ReREFCNT_inc(rex_sv);
3699             if (OP(scan)==GOSUB) {
3700                 startpoint = scan + ARG2L(scan);
3701                 ST.close_paren = ARG(scan);
3702             } else {
3703                 startpoint = rei->program+1;
3704                 ST.close_paren = 0;
3705             }
3706             goto eval_recurse_doit;
3707             /* NOTREACHED */
3708         case EVAL:  /*   /(?{A})B/   /(??{A})B/  and /(?(?{A})X|Y)B/   */
3709             if (cur_eval && cur_eval->locinput==locinput) {
3710                 if ( ++nochange_depth > max_nochange_depth )
3711                     Perl_croak(aTHX_ "EVAL without pos change exceeded limit in regex");
3712             } else {
3713                 nochange_depth = 0;
3714             }
3715             {
3716                 /* execute the code in the {...} */
3717                 dSP;
3718                 SV ** const before = SP;
3719                 OP_4tree * const oop = PL_op;
3720                 COP * const ocurcop = PL_curcop;
3721                 PAD *old_comppad;
3722                 char *saved_regeol = PL_regeol;
3723
3724                 n = ARG(scan);
3725                 PL_op = (OP_4tree*)rexi->data->data[n];
3726                 DEBUG_STATE_r( PerlIO_printf(Perl_debug_log,
3727                     "  re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
3728                 PAD_SAVE_LOCAL(old_comppad, (PAD*)rexi->data->data[n + 2]);
3729                 PL_regoffs[0].end = PL_reg_magic->mg_len = locinput - PL_bostr;
3730
3731                 if (sv_yes_mark) {
3732                     SV *sv_mrk = get_sv("REGMARK", 1);
3733                     sv_setsv(sv_mrk, sv_yes_mark);
3734                 }
3735
3736                 CALLRUNOPS(aTHX);                       /* Scalar context. */
3737                 SPAGAIN;
3738                 if (SP == before)
3739                     ret = &PL_sv_undef;   /* protect against empty (?{}) blocks. */
3740                 else {
3741                     ret = POPs;
3742                     PUTBACK;
3743                 }
3744
3745                 PL_op = oop;
3746                 PAD_RESTORE_LOCAL(old_comppad);
3747                 PL_curcop = ocurcop;
3748                 PL_regeol = saved_regeol;
3749                 if (!logical) {
3750                     /* /(?{...})/ */
3751                     sv_setsv(save_scalar(PL_replgv), ret);
3752                     break;
3753                 }
3754             }
3755             if (logical == 2) { /* Postponed subexpression: /(??{...})/ */
3756                 logical = 0;
3757                 {
3758                     /* extract RE object from returned value; compiling if
3759                      * necessary */
3760                     MAGIC *mg = NULL;
3761                     REGEXP *rx = NULL;
3762
3763                     if (SvROK(ret)) {
3764                         SV *const sv = SvRV(ret);
3765
3766                         if (SvTYPE(sv) == SVt_REGEXP) {
3767                             rx = (REGEXP*) sv;
3768                         } else if (SvSMAGICAL(sv)) {
3769                             mg = mg_find(sv, PERL_MAGIC_qr);
3770                             assert(mg);
3771                         }
3772                     } else if (SvTYPE(ret) == SVt_REGEXP) {
3773                         rx = (REGEXP*) ret;
3774                     } else if (SvSMAGICAL(ret)) {
3775                         if (SvGMAGICAL(ret)) {
3776                             /* I don't believe that there is ever qr magic
3777                                here.  */
3778                             assert(!mg_find(ret, PERL_MAGIC_qr));
3779                             sv_unmagic(ret, PERL_MAGIC_qr);
3780                         }
3781                         else {
3782                             mg = mg_find(ret, PERL_MAGIC_qr);
3783                             /* testing suggests mg only ends up non-NULL for
3784                                scalars who were upgraded and compiled in the
3785                                else block below. In turn, this is only
3786                                triggered in the "postponed utf8 string" tests
3787                                in t/op/pat.t  */
3788                         }
3789                     }
3790
3791                     if (mg) {
3792                         rx = (REGEXP *) mg->mg_obj; /*XXX:dmq*/
3793                         assert(rx);
3794                     }
3795                     if (rx) {
3796                         rx = reg_temp_copy(rx);
3797                     }
3798                     else {
3799                         U32 pm_flags = 0;
3800                         const I32 osize = PL_regsize;
3801
3802                         if (DO_UTF8(ret)) {
3803                             assert (SvUTF8(ret));
3804                         } else if (SvUTF8(ret)) {
3805                             /* Not doing UTF-8, despite what the SV says. Is
3806                                this only if we're trapped in use 'bytes'?  */
3807                             /* Make a copy of the octet sequence, but without
3808                                the flag on, as the compiler now honours the
3809                                SvUTF8 flag on ret.  */
3810                             STRLEN len;
3811                             const char *const p = SvPV(ret, len);
3812                             ret = newSVpvn_flags(p, len, SVs_TEMP);
3813                         }
3814                         rx = CALLREGCOMP(ret, pm_flags);
3815                         if (!(SvFLAGS(ret)
3816                               & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
3817                                  | SVs_GMG))) {
3818                             /* This isn't a first class regexp. Instead, it's
3819                                caching a regexp onto an existing, Perl visible
3820                                scalar.  */
3821                             sv_magic(ret, MUTABLE_SV(rx), PERL_MAGIC_qr, 0, 0);
3822                         }
3823                         PL_regsize = osize;
3824                     }
3825                     re_sv = rx;
3826                     re = (struct regexp *)SvANY(rx);
3827                 }
3828                 RXp_MATCH_COPIED_off(re);
3829                 re->subbeg = rex->subbeg;
3830                 re->sublen = rex->sublen;
3831                 rei = RXi_GET(re);
3832                 DEBUG_EXECUTE_r(
3833                     debug_start_match(re_sv, do_utf8, locinput, PL_regeol,
3834                         "Matching embedded");
3835                 );
3836                 startpoint = rei->program + 1;
3837                 ST.close_paren = 0; /* only used for GOSUB */
3838                 /* borrowed from regtry */
3839                 if (PL_reg_start_tmpl <= re->nparens) {
3840                     PL_reg_start_tmpl = re->nparens*3/2 + 3;
3841                     if(PL_reg_start_tmp)
3842                         Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
3843                     else
3844                         Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
3845                 }
3846
3847         eval_recurse_doit: /* Share code with GOSUB below this line */
3848                 /* run the pattern returned from (??{...}) */
3849                 ST.cp = regcppush(0);   /* Save *all* the positions. */
3850                 REGCP_SET(ST.lastcp);
3851
3852                 PL_regoffs = re->offs; /* essentially NOOP on GOSUB */
3853
3854                 /* see regtry, specifically PL_reglast(?:close)?paren is a pointer! (i dont know why) :dmq */
3855                 PL_reglastparen = &re->lastparen;
3856                 PL_reglastcloseparen = &re->lastcloseparen;
3857                 re->lastparen = 0;
3858                 re->lastcloseparen = 0;
3859
3860                 PL_reginput = locinput;
3861                 PL_regsize = 0;
3862
3863                 /* XXXX This is too dramatic a measure... */
3864                 PL_reg_maxiter = 0;
3865
3866                 ST.toggle_reg_flags = PL_reg_flags;
3867                 if (RX_UTF8(re_sv))
3868                     PL_reg_flags |= RF_utf8;
3869                 else
3870                     PL_reg_flags &= ~RF_utf8;
3871                 ST.toggle_reg_flags ^= PL_reg_flags; /* diff of old and new */
3872
3873                 ST.prev_rex = rex_sv;
3874                 ST.prev_curlyx = cur_curlyx;
3875                 SETREX(rex_sv,re_sv);
3876                 rex = re;
3877                 rexi = rei;
3878                 cur_curlyx = NULL;
3879                 ST.B = next;
3880                 ST.prev_eval = cur_eval;
3881                 cur_eval = st;
3882                 /* now continue from first node in postoned RE */
3883                 PUSH_YES_STATE_GOTO(EVAL_AB, startpoint);
3884                 /* NOTREACHED */
3885             }
3886             /* logical is 1,   /(?(?{...})X|Y)/ */
3887             sw = (bool)SvTRUE(ret);
3888             logical = 0;
3889             break;
3890         }
3891
3892         case EVAL_AB: /* cleanup after a successful (??{A})B */
3893             /* note: this is called twice; first after popping B, then A */
3894             PL_reg_flags ^= ST.toggle_reg_flags;
3895             ReREFCNT_dec(rex_sv);
3896             SETREX(rex_sv,ST.prev_rex);
3897             rex = (struct regexp *)SvANY(rex_sv);
3898             rexi = RXi_GET(rex);
3899             regcpblow(ST.cp);
3900             cur_eval = ST.prev_eval;
3901             cur_curlyx = ST.prev_curlyx;
3902
3903             /* rex was changed so update the pointer in PL_reglastparen and PL_reglastcloseparen */
3904             PL_reglastparen = &rex->lastparen;
3905             PL_reglastcloseparen = &rex->lastcloseparen;
3906             /* also update PL_regoffs */
3907             PL_regoffs = rex->offs;
3908
3909             /* XXXX This is too dramatic a measure... */
3910             PL_reg_maxiter = 0;
3911             if ( nochange_depth )
3912                 nochange_depth--;
3913             sayYES;
3914
3915
3916         case EVAL_AB_fail: /* unsuccessfully ran A or B in (??{A})B */
3917             /* note: this is called twice; first after popping B, then A */
3918             PL_reg_flags ^= ST.toggle_reg_flags;
3919             ReREFCNT_dec(rex_sv);
3920             SETREX(rex_sv,ST.prev_rex);
3921             rex = (struct regexp *)SvANY(rex_sv);
3922             rexi = RXi_GET(rex);
3923             /* rex was changed so update the pointer in PL_reglastparen and PL_reglastcloseparen */
3924             PL_reglastparen = &rex->lastparen;
3925             PL_reglastcloseparen = &rex->lastcloseparen;
3926
3927             PL_reginput = locinput;
3928             REGCP_UNWIND(ST.lastcp);
3929             regcppop(rex);
3930             cur_eval = ST.prev_eval;
3931             cur_curlyx = ST.prev_curlyx;
3932             /* XXXX This is too dramatic a measure... */
3933             PL_reg_maxiter = 0;
3934             if ( nochange_depth )
3935                 nochange_depth--;
3936             sayNO_SILENT;
3937 #undef ST
3938
3939         case OPEN:
3940             n = ARG(scan);  /* which paren pair */
3941             PL_reg_start_tmp[n] = locinput;
3942             if (n > PL_regsize)
3943                 PL_regsize = n;
3944             lastopen = n;
3945             break;
3946         case CLOSE:
3947             n = ARG(scan);  /* which paren pair */
3948             PL_regoffs[n].start = PL_reg_start_tmp[n] - PL_bostr;
3949             PL_regoffs[n].end = locinput - PL_bostr;
3950             /*if (n > PL_regsize)
3951                 PL_regsize = n;*/
3952             if (n > *PL_reglastparen)
3953                 *PL_reglastparen = n;
3954             *PL_reglastcloseparen = n;
3955             if (cur_eval && cur_eval->u.eval.close_paren == n) {
3956                 goto fake_end;
3957             }
3958             break;
3959         case ACCEPT:
3960             if (ARG(scan)){
3961                 regnode *cursor;
3962                 for (cursor=scan;
3963                      cursor && OP(cursor)!=END;
3964                      cursor=regnext(cursor))
3965                 {
3966                     if ( OP(cursor)==CLOSE ){
3967                         n = ARG(cursor);
3968                         if ( n <= lastopen ) {
3969                             PL_regoffs[n].start
3970                                 = PL_reg_start_tmp[n] - PL_bostr;
3971                             PL_regoffs[n].end = locinput - PL_bostr;
3972                             /*if (n > PL_regsize)
3973                             PL_regsize = n;*/
3974                             if (n > *PL_reglastparen)
3975                                 *PL_reglastparen = n;
3976                             *PL_reglastcloseparen = n;
3977                             if ( n == ARG(scan) || (cur_eval &&
3978                                 cur_eval->u.eval.close_paren == n))
3979                                 break;
3980                         }
3981                     }
3982                 }
3983             }
3984             goto fake_end;
3985             /*NOTREACHED*/
3986         case GROUPP:
3987             n = ARG(scan);  /* which paren pair */
3988             sw = (bool)(*PL_reglastparen >= n && PL_regoffs[n].end != -1);
3989             break;
3990         case NGROUPP:
3991             /* reg_check_named_buff_matched returns 0 for no match */
3992             sw = (bool)(0 < reg_check_named_buff_matched(rex,scan));
3993             break;
3994         case INSUBP:
3995             n = ARG(scan);
3996             sw = (cur_eval && (!n || cur_eval->u.eval.close_paren == n));
3997             break;
3998         case DEFINEP:
3999             sw = 0;
4000             break;
4001         case IFTHEN:
4002             PL_reg_leftiter = PL_reg_maxiter;           /* Void cache */
4003             if (sw)
4004                 next = NEXTOPER(NEXTOPER(scan));
4005             else {
4006                 next = scan + ARG(scan);
4007                 if (OP(next) == IFTHEN) /* Fake one. */
4008                     next = NEXTOPER(NEXTOPER(next));
4009             }
4010             break;
4011         case LOGICAL:
4012             logical = scan->flags;
4013             break;
4014
4015 /*******************************************************************
4016
4017 The CURLYX/WHILEM pair of ops handle the most generic case of the /A*B/
4018 pattern, where A and B are subpatterns. (For simple A, CURLYM or
4019 STAR/PLUS/CURLY/CURLYN are used instead.)
4020
4021 A*B is compiled as <CURLYX><A><WHILEM><B>
4022
4023 On entry to the subpattern, CURLYX is called. This pushes a CURLYX
4024 state, which contains the current count, initialised to -1. It also sets
4025 cur_curlyx to point to this state, with any previous value saved in the
4026 state block.
4027
4028 CURLYX then jumps straight to the WHILEM op, rather than executing A,
4029 since the pattern may possibly match zero times (i.e. it's a while {} loop
4030 rather than a do {} while loop).
4031
4032 Each entry to WHILEM represents a successful match of A. The count in the
4033 CURLYX block is incremented, another WHILEM state is pushed, and execution
4034 passes to A or B depending on greediness and the current count.
4035
4036 For example, if matching against the string a1a2a3b (where the aN are
4037 substrings that match /A/), then the match progresses as follows: (the
4038 pushed states are interspersed with the bits of strings matched so far):
4039
4040     <CURLYX cnt=-1>
4041     <CURLYX cnt=0><WHILEM>
4042     <CURLYX cnt=1><WHILEM> a1 <WHILEM>
4043     <CURLYX cnt=2><WHILEM> a1 <WHILEM> a2 <WHILEM>
4044     <CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM>
4045     <CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM> b
4046
4047 (Contrast this with something like CURLYM, which maintains only a single
4048 backtrack state:
4049
4050     <CURLYM cnt=0> a1
4051     a1 <CURLYM cnt=1> a2
4052     a1 a2 <CURLYM cnt=2> a3
4053     a1 a2 a3 <CURLYM cnt=3> b
4054 )
4055
4056 Each WHILEM state block marks a point to backtrack to upon partial failure
4057 of A or B, and also contains some minor state data related to that
4058 iteration.  The CURLYX block, pointed to by cur_curlyx, contains the
4059 overall state, such as the count, and pointers to the A and B ops.
4060
4061 This is complicated slightly by nested CURLYX/WHILEM's. Since cur_curlyx
4062 must always point to the *current* CURLYX block, the rules are:
4063
4064 When executing CURLYX, save the old cur_curlyx in the CURLYX state block,
4065 and set cur_curlyx to point the new block.
4066
4067 When popping the CURLYX block after a successful or unsuccessful match,
4068 restore the previous cur_curlyx.
4069
4070 When WHILEM is about to execute B, save the current cur_curlyx, and set it
4071 to the outer one saved in the CURLYX block.
4072
4073 When popping the WHILEM block after a successful or unsuccessful B match,
4074 restore the previous cur_curlyx.
4075
4076 Here's an example for the pattern (AI* BI)*BO
4077 I and O refer to inner and outer, C and W refer to CURLYX and WHILEM:
4078
4079 cur_
4080 curlyx backtrack stack
4081 ------ ---------------
4082 NULL
4083 CO     <CO prev=NULL> <WO>
4084 CI     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
4085 CO     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
4086 NULL   <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi <WO prev=CO> bo
4087
4088 At this point the pattern succeeds, and we work back down the stack to
4089 clean up, restoring as we go:
4090
4091 CO     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
4092 CI     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
4093 CO     <CO prev=NULL> <WO>
4094 NULL
4095
4096 *******************************************************************/
4097
4098 #define ST st->u.curlyx
4099
4100         case CURLYX:    /* start of /A*B/  (for complex A) */
4101         {
4102             /* No need to save/restore up to this paren */
4103             I32 parenfloor = scan->flags;
4104
4105             assert(next); /* keep Coverity happy */
4106             if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
4107                 next += ARG(next);
4108
4109             /* XXXX Probably it is better to teach regpush to support
4110                parenfloor > PL_regsize... */
4111             if (parenfloor > (I32)*PL_reglastparen)
4112                 parenfloor = *PL_reglastparen; /* Pessimization... */
4113
4114             ST.prev_curlyx= cur_curlyx;
4115             cur_curlyx = st;
4116             ST.cp = PL_savestack_ix;
4117
4118             /* these fields contain the state of the current curly.
4119              * they are accessed by subsequent WHILEMs */
4120             ST.parenfloor = parenfloor;
4121             ST.min = ARG1(scan);
4122             ST.max = ARG2(scan);
4123             ST.A = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
4124             ST.B = next;
4125             ST.minmod = minmod;
4126             minmod = 0;
4127             ST.count = -1;      /* this will be updated by WHILEM */
4128             ST.lastloc = NULL;  /* this will be updated by WHILEM */
4129
4130             PL_reginput = locinput;
4131             PUSH_YES_STATE_GOTO(CURLYX_end, PREVOPER(next));
4132             /* NOTREACHED */
4133         }
4134
4135         case CURLYX_end: /* just finished matching all of A*B */
4136             cur_curlyx = ST.prev_curlyx;
4137             sayYES;
4138             /* NOTREACHED */
4139
4140         case CURLYX_end_fail: /* just failed to match all of A*B */
4141             regcpblow(ST.cp);
4142             cur_curlyx = ST.prev_curlyx;
4143             sayNO;
4144             /* NOTREACHED */
4145
4146
4147 #undef ST
4148 #define ST st->u.whilem
4149
4150         case WHILEM:     /* just matched an A in /A*B/  (for complex A) */
4151         {
4152             /* see the discussion above about CURLYX/WHILEM */
4153             I32 n;
4154             assert(cur_curlyx); /* keep Coverity happy */
4155             n = ++cur_curlyx->u.curlyx.count; /* how many A's matched */
4156             ST.save_lastloc = cur_curlyx->u.curlyx.lastloc;
4157             ST.cache_offset = 0;
4158             ST.cache_mask = 0;
4159
4160             PL_reginput = locinput;
4161
4162             DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4163                   "%*s  whilem: matched %ld out of %ld..%ld\n",
4164                   REPORT_CODE_OFF+depth*2, "", (long)n,
4165                   (long)cur_curlyx->u.curlyx.min,
4166                   (long)cur_curlyx->u.curlyx.max)
4167             );
4168
4169             /* First just match a string of min A's. */
4170
4171             if (n < cur_curlyx->u.curlyx.min) {
4172                 cur_curlyx->u.curlyx.lastloc = locinput;
4173                 PUSH_STATE_GOTO(WHILEM_A_pre, cur_curlyx->u.curlyx.A);
4174                 /* NOTREACHED */
4175             }
4176
4177             /* If degenerate A matches "", assume A done. */
4178
4179             if (locinput == cur_curlyx->u.curlyx.lastloc) {
4180                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4181                    "%*s  whilem: empty match detected, trying continuation...\n",
4182                    REPORT_CODE_OFF+depth*2, "")
4183                 );
4184                 goto do_whilem_B_max;
4185             }
4186
4187             /* super-linear cache processing */
4188
4189             if (scan->flags) {
4190
4191                 if (!PL_reg_maxiter) {
4192                     /* start the countdown: Postpone detection until we
4193                      * know the match is not *that* much linear. */
4194                     PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
4195                     /* possible overflow for long strings and many CURLYX's */
4196                     if (PL_reg_maxiter < 0)
4197                         PL_reg_maxiter = I32_MAX;
4198                     PL_reg_leftiter = PL_reg_maxiter;
4199                 }
4200
4201                 if (PL_reg_leftiter-- == 0) {
4202                     /* initialise cache */
4203                     const I32 size = (PL_reg_maxiter + 7)/8;
4204                     if (PL_reg_poscache) {
4205                         if ((I32)PL_reg_poscache_size < size) {
4206                             Renew(PL_reg_poscache, size, char);
4207                             PL_reg_poscache_size = size;
4208                         }
4209                         Zero(PL_reg_poscache, size, char);
4210                     }
4211                     else {
4212                         PL_reg_poscache_size = size;
4213                         Newxz(PL_reg_poscache, size, char);
4214                     }
4215                     DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4216       "%swhilem: Detected a super-linear match, switching on caching%s...\n",
4217                               PL_colors[4], PL_colors[5])
4218                     );
4219                 }
4220
4221                 if (PL_reg_leftiter < 0) {
4222                     /* have we already failed at this position? */
4223                     I32 offset, mask;
4224                     offset  = (scan->flags & 0xf) - 1
4225                                 + (locinput - PL_bostr)  * (scan->flags>>4);
4226                     mask    = 1 << (offset % 8);
4227                     offset /= 8;
4228                     if (PL_reg_poscache[offset] & mask) {
4229                         DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4230                             "%*s  whilem: (cache) already tried at this position...\n",
4231                             REPORT_CODE_OFF+depth*2, "")
4232                         );
4233                         sayNO; /* cache records failure */
4234                     }
4235                     ST.cache_offset = offset;
4236                     ST.cache_mask   = mask;
4237                 }
4238             }
4239
4240             /* Prefer B over A for minimal matching. */
4241
4242             if (cur_curlyx->u.curlyx.minmod) {
4243                 ST.save_curlyx = cur_curlyx;
4244                 cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
4245                 ST.cp = regcppush(ST.save_curlyx->u.curlyx.parenfloor);
4246                 REGCP_SET(ST.lastcp);
4247                 PUSH_YES_STATE_GOTO(WHILEM_B_min, ST.save_curlyx->u.curlyx.B);
4248                 /* NOTREACHED */
4249             }
4250
4251             /* Prefer A over B for maximal matching. */
4252
4253             if (n < cur_curlyx->u.curlyx.max) { /* More greed allowed? */
4254                 ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
4255                 cur_curlyx->u.curlyx.lastloc = locinput;
4256                 REGCP_SET(ST.lastcp);
4257                 PUSH_STATE_GOTO(WHILEM_A_max, cur_curlyx->u.curlyx.A);
4258                 /* NOTREACHED */
4259             }
4260             goto do_whilem_B_max;
4261         }
4262         /* NOTREACHED */
4263
4264         case WHILEM_B_min: /* just matched B in a minimal match */
4265         case WHILEM_B_max: /* just matched B in a maximal match */
4266             cur_curlyx = ST.save_curlyx;
4267             sayYES;
4268             /* NOTREACHED */
4269
4270         case WHILEM_B_max_fail: /* just failed to match B in a maximal match */
4271             cur_curlyx = ST.save_curlyx;
4272             cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
4273             cur_curlyx->u.curlyx.count--;
4274             CACHEsayNO;
4275             /* NOTREACHED */
4276
4277         case WHILEM_A_min_fail: /* just failed to match A in a minimal match */
4278             REGCP_UNWIND(ST.lastcp);
4279             regcppop(rex);
4280             /* FALL THROUGH */
4281         case WHILEM_A_pre_fail: /* just failed to match even minimal A */
4282             cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
4283             cur_curlyx->u.curlyx.count--;
4284             CACHEsayNO;
4285             /* NOTREACHED */
4286
4287         case WHILEM_A_max_fail: /* just failed to match A in a maximal match */
4288             REGCP_UNWIND(ST.lastcp);
4289             regcppop(rex);      /* Restore some previous $<digit>s? */
4290             PL_reginput = locinput;
4291             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4292                 "%*s  whilem: failed, trying continuation...\n",
4293                 REPORT_CODE_OFF+depth*2, "")
4294             );
4295           do_whilem_B_max:
4296             if (cur_curlyx->u.curlyx.count >= REG_INFTY
4297                 && ckWARN(WARN_REGEXP)
4298                 && !(PL_reg_flags & RF_warned))
4299             {
4300                 PL_reg_flags |= RF_warned;
4301                 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
4302                      "Complex regular subexpression recursion",
4303                      REG_INFTY - 1);
4304             }
4305
4306             /* now try B */
4307             ST.save_curlyx = cur_curlyx;
4308             cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
4309             PUSH_YES_STATE_GOTO(WHILEM_B_max, ST.save_curlyx->u.curlyx.B);
4310             /* NOTREACHED */
4311
4312         case WHILEM_B_min_fail: /* just failed to match B in a minimal match */
4313             cur_curlyx = ST.save_curlyx;
4314             REGCP_UNWIND(ST.lastcp);
4315             regcppop(rex);
4316
4317             if (cur_curlyx->u.curlyx.count >= cur_curlyx->u.curlyx.max) {
4318                 /* Maximum greed exceeded */
4319                 if (cur_curlyx->u.curlyx.count >= REG_INFTY
4320                     && ckWARN(WARN_REGEXP)
4321                     && !(PL_reg_flags & RF_warned))
4322                 {
4323                     PL_reg_flags |= RF_warned;
4324                     Perl_warner(aTHX_ packWARN(WARN_REGEXP),
4325                         "%s limit (%d) exceeded",
4326                         "Complex regular subexpression recursion",
4327                         REG_INFTY - 1);
4328                 }
4329                 cur_curlyx->u.curlyx.count--;
4330                 CACHEsayNO;
4331             }
4332
4333             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4334                 "%*s  trying longer...\n", REPORT_CODE_OFF+depth*2, "")
4335             );
4336             /* Try grabbing another A and see if it helps. */
4337             PL_reginput = locinput;
4338             cur_curlyx->u.curlyx.lastloc = locinput;
4339             ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
4340             REGCP_SET(ST.lastcp);
4341             PUSH_STATE_GOTO(WHILEM_A_min, ST.save_curlyx->u.curlyx.A);
4342             /* NOTREACHED */
4343
4344 #undef  ST
4345 #define ST st->u.branch
4346
4347         case BRANCHJ:       /*  /(...|A|...)/ with long next pointer */
4348             next = scan + ARG(scan);
4349             if (next == scan)
4350                 next = NULL;
4351             scan = NEXTOPER(scan);
4352             /* FALL THROUGH */
4353
4354         case BRANCH:        /*  /(...|A|...)/ */
4355             scan = NEXTOPER(scan); /* scan now points to inner node */
4356             ST.lastparen = *PL_reglastparen;
4357             ST.next_branch = next;
4358             REGCP_SET(ST.cp);
4359             PL_reginput = locinput;
4360
4361             /* Now go into the branch */
4362             if (has_cutgroup) {
4363                 PUSH_YES_STATE_GOTO(BRANCH_next, scan);
4364             } else {
4365                 PUSH_STATE_GOTO(BRANCH_next, scan);
4366             }
4367             /* NOTREACHED */
4368         case CUTGROUP:
4369             PL_reginput = locinput;
4370             sv_yes_mark = st->u.mark.mark_name = scan->flags ? NULL :
4371                 MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
4372             PUSH_STATE_GOTO(CUTGROUP_next,next);
4373             /* NOTREACHED */
4374         case CUTGROUP_next_fail:
4375             do_cutgroup = 1;
4376             no_final = 1;
4377             if (st->u.mark.mark_name)
4378                 sv_commit = st->u.mark.mark_name;
4379             sayNO;
4380             /* NOTREACHED */
4381         case BRANCH_next:
4382             sayYES;
4383             /* NOTREACHED */
4384         case BRANCH_next_fail: /* that branch failed; try the next, if any */
4385             if (do_cutgroup) {
4386                 do_cutgroup = 0;
4387                 no_final = 0;
4388             }
4389             REGCP_UNWIND(ST.cp);
4390             for (n = *PL_reglastparen; n > ST.lastparen; n--)
4391                 PL_regoffs[n].end = -1;
4392             *PL_reglastparen = n;
4393             /*dmq: *PL_reglastcloseparen = n; */
4394             scan = ST.next_branch;
4395             /* no more branches? */
4396             if (!scan || (OP(scan) != BRANCH && OP(scan) != BRANCHJ)) {
4397                 DEBUG_EXECUTE_r({
4398                     PerlIO_printf( Perl_debug_log,
4399                         "%*s  %sBRANCH failed...%s\n",
4400                         REPORT_CODE_OFF+depth*2, "",
4401                         PL_colors[4],
4402                         PL_colors[5] );
4403                 });
4404                 sayNO_SILENT;
4405             }
4406             continue; /* execute next BRANCH[J] op */
4407             /* NOTREACHED */
4408
4409         case MINMOD:
4410             minmod = 1;
4411             break;
4412
4413 #undef  ST
4414 #define ST st->u.curlym
4415
4416         case CURLYM:    /* /A{m,n}B/ where A is fixed-length */
4417
4418             /* This is an optimisation of CURLYX that enables us to push
4419              * only a single backtracking state, no matter how many matches
4420              * there are in {m,n}. It relies on the pattern being constant
4421              * length, with no parens to influence future backrefs
4422              */
4423
4424             ST.me = scan;
4425             scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
4426
4427             /* if paren positive, emulate an OPEN/CLOSE around A */
4428             if (ST.me->flags) {
4429                 U32 paren = ST.me->flags;
4430                 if (paren > PL_regsize)
4431                     PL_regsize = paren;
4432                 if (paren > *PL_reglastparen)
4433                     *PL_reglastparen = paren;
4434                 scan += NEXT_OFF(scan); /* Skip former OPEN. */
4435             }
4436             ST.A = scan;
4437             ST.B = next;
4438             ST.alen = 0;
4439             ST.count = 0;
4440             ST.minmod = minmod;
4441             minmod = 0;
4442             ST.c1 = CHRTEST_UNINIT;
4443             REGCP_SET(ST.cp);
4444
4445             if (!(ST.minmod ? ARG1(ST.me) : ARG2(ST.me))) /* min/max */
4446                 goto curlym_do_B;
4447
4448           curlym_do_A: /* execute the A in /A{m,n}B/  */
4449             PL_reginput = locinput;
4450             PUSH_YES_STATE_GOTO(CURLYM_A, ST.A); /* match A */
4451             /* NOTREACHED */
4452
4453         case CURLYM_A: /* we've just matched an A */
4454             locinput = st->locinput;
4455             nextchr = UCHARAT(locinput);
4456
4457             ST.count++;
4458             /* after first match, determine A's length: u.curlym.alen */
4459             if (ST.count == 1) {
4460                 if (PL_reg_match_utf8) {
4461                     char *s = locinput;
4462                     while (s < PL_reginput) {
4463                         ST.alen++;
4464                         s += UTF8SKIP(s);
4465                     }
4466                 }
4467                 else {
4468                     ST.alen = PL_reginput - locinput;
4469                 }
4470                 if (ST.alen == 0)
4471                     ST.count = ST.minmod ? ARG1(ST.me) : ARG2(ST.me);
4472             }
4473             DEBUG_EXECUTE_r(
4474                 PerlIO_printf(Perl_debug_log,
4475                           "%*s  CURLYM now matched %"IVdf" times, len=%"IVdf"...\n",
4476                           (int)(REPORT_CODE_OFF+(depth*2)), "",
4477                           (IV) ST.count, (IV)ST.alen)
4478             );
4479
4480             locinput = PL_reginput;
4481
4482             if (cur_eval && cur_eval->u.eval.close_paren &&
4483                 cur_eval->u.eval.close_paren == (U32)ST.me->flags)
4484                 goto fake_end;
4485
4486             {
4487                 I32 max = (ST.minmod ? ARG1(ST.me) : ARG2(ST.me));
4488                 if ( max == REG_INFTY || ST.count < max )
4489                     goto curlym_do_A; /* try to match another A */
4490             }
4491             goto curlym_do_B; /* try to match B */
4492
4493         case CURLYM_A_fail: /* just failed to match an A */
4494             REGCP_UNWIND(ST.cp);
4495
4496             if (ST.minmod || ST.count < ARG1(ST.me) /* min*/
4497                 || (cur_eval && cur_eval->u.eval.close_paren &&
4498                     cur_eval->u.eval.close_paren == (U32)ST.me->flags))
4499                 sayNO;
4500
4501           curlym_do_B: /* execute the B in /A{m,n}B/  */
4502             PL_reginput = locinput;
4503             if (ST.c1 == CHRTEST_UNINIT) {
4504                 /* calculate c1 and c2 for possible match of 1st char
4505                  * following curly */
4506                 ST.c1 = ST.c2 = CHRTEST_VOID;
4507                 if (HAS_TEXT(ST.B) || JUMPABLE(ST.B)) {
4508                     regnode *text_node = ST.B;
4509                     if (! HAS_TEXT(text_node))
4510                         FIND_NEXT_IMPT(text_node);
4511                     /* this used to be
4512
4513                         (HAS_TEXT(text_node) && PL_regkind[OP(text_node)] == EXACT)
4514
4515                         But the former is redundant in light of the latter.
4516
4517                         if this changes back then the macro for
4518                         IS_TEXT and friends need to change.
4519                      */
4520                     if (PL_regkind[OP(text_node)] == EXACT)
4521                     {
4522
4523                         ST.c1 = (U8)*STRING(text_node);
4524                         ST.c2 =
4525                             (IS_TEXTF(text_node))
4526                             ? PL_fold[ST.c1]
4527                             : (IS_TEXTFL(text_node))
4528                                 ? PL_fold_locale[ST.c1]
4529                                 : ST.c1;
4530                     }
4531                 }
4532             }
4533
4534             DEBUG_EXECUTE_r(
4535                 PerlIO_printf(Perl_debug_log,
4536                     "%*s  CURLYM trying tail with matches=%"IVdf"...\n",
4537                     (int)(REPORT_CODE_OFF+(depth*2)),
4538                     "", (IV)ST.count)
4539                 );
4540             if (ST.c1 != CHRTEST_VOID
4541                     && UCHARAT(PL_reginput) != ST.c1
4542                     && UCHARAT(PL_reginput) != ST.c2)
4543             {
4544                 /* simulate B failing */
4545                 DEBUG_OPTIMISE_r(
4546                     PerlIO_printf(Perl_debug_log,
4547                         "%*s  CURLYM Fast bail c1=%"IVdf" c2=%"IVdf"\n",
4548                         (int)(REPORT_CODE_OFF+(depth*2)),"",
4549                         (IV)ST.c1,(IV)ST.c2
4550                 ));
4551                 state_num = CURLYM_B_fail;
4552                 goto reenter_switch;
4553             }
4554
4555             if (ST.me->flags) {
4556                 /* mark current A as captured */
4557                 I32 paren = ST.me->flags;
4558                 if (ST.count) {
4559                     PL_regoffs[paren].start
4560                         = HOPc(PL_reginput, -ST.alen) - PL_bostr;
4561                     PL_regoffs[paren].end = PL_reginput - PL_bostr;
4562                     /*dmq: *PL_reglastcloseparen = paren; */
4563                 }
4564                 else
4565                     PL_regoffs[paren].end = -1;
4566                 if (cur_eval && cur_eval->u.eval.close_paren &&
4567                     cur_eval->u.eval.close_paren == (U32)ST.me->flags)
4568                 {
4569                     if (ST.count)
4570                         goto fake_end;
4571                     else
4572                         sayNO;
4573                 }
4574             }
4575
4576             PUSH_STATE_GOTO(CURLYM_B, ST.B); /* match B */
4577             /* NOTREACHED */
4578
4579         case CURLYM_B_fail: /* just failed to match a B */
4580             REGCP_UNWIND(ST.cp);
4581             if (ST.minmod) {
4582                 I32 max = ARG2(ST.me);
4583                 if (max != REG_INFTY && ST.count == max)
4584                     sayNO;
4585                 goto curlym_do_A; /* try to match a further A */
4586             }
4587             /* backtrack one A */
4588             if (ST.count == ARG1(ST.me) /* min */)
4589                 sayNO;
4590             ST.count--;
4591             locinput = HOPc(locinput, -ST.alen);
4592             goto curlym_do_B; /* try to match B */
4593
4594 #undef ST
4595 #define ST st->u.curly
4596
4597 #define CURLY_SETPAREN(paren, success) \
4598     if (paren) { \
4599         if (success) { \
4600             PL_regoffs[paren].start = HOPc(locinput, -1) - PL_bostr; \
4601             PL_regoffs[paren].end = locinput - PL_bostr; \
4602             *PL_reglastcloseparen = paren; \
4603         } \
4604         else \
4605             PL_regoffs[paren].end = -1; \
4606     }
4607
4608         case STAR:              /*  /A*B/ where A is width 1 */
4609             ST.paren = 0;
4610             ST.min = 0;
4611             ST.max = REG_INFTY;
4612             scan = NEXTOPER(scan);
4613             goto repeat;
4614         case PLUS:              /*  /A+B/ where A is width 1 */
4615             ST.paren = 0;
4616             ST.min = 1;
4617             ST.max = REG_INFTY;
4618             scan = NEXTOPER(scan);
4619             goto repeat;
4620         case CURLYN:            /*  /(A){m,n}B/ where A is width 1 */
4621             ST.paren = scan->flags;     /* Which paren to set */
4622             if (ST.paren > PL_regsize)
4623                 PL_regsize = ST.paren;
4624             if (ST.paren > *PL_reglastparen)
4625                 *PL_reglastparen = ST.paren;
4626             ST.min = ARG1(scan);  /* min to match */
4627             ST.max = ARG2(scan);  /* max to match */
4628             if (cur_eval && cur_eval->u.eval.close_paren &&
4629                 cur_eval->u.eval.close_paren == (U32)ST.paren) {
4630                 ST.min=1;
4631                 ST.max=1;
4632             }
4633             scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
4634             goto repeat;
4635         case CURLY:             /*  /A{m,n}B/ where A is width 1 */
4636             ST.paren = 0;
4637             ST.min = ARG1(scan);  /* min to match */
4638             ST.max = ARG2(scan);  /* max to match */
4639             scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
4640           repeat:
4641             /*
4642             * Lookahead to avoid useless match attempts
4643             * when we know what character comes next.
4644             *
4645             * Used to only do .*x and .*?x, but now it allows
4646             * for )'s, ('s and (?{ ... })'s to be in the way
4647             * of the quantifier and the EXACT-like node.  -- japhy
4648             */
4649
4650             if (ST.min > ST.max) /* XXX make this a compile-time check? */
4651                 sayNO;
4652             if (HAS_TEXT(next) || JUMPABLE(next)) {
4653                 U8 *s;
4654                 regnode *text_node = next;
4655
4656                 if (! HAS_TEXT(text_node))
4657                     FIND_NEXT_IMPT(text_node);
4658
4659                 if (! HAS_TEXT(text_node))
4660                     ST.c1 = ST.c2 = CHRTEST_VOID;
4661                 else {
4662                     if ( PL_regkind[OP(text_node)] != EXACT ) {
4663                         ST.c1 = ST.c2 = CHRTEST_VOID;
4664                         goto assume_ok_easy;
4665                     }
4666                     else
4667                         s = (U8*)STRING(text_node);
4668
4669                     /*  Currently we only get here when
4670
4671                         PL_rekind[OP(text_node)] == EXACT
4672
4673                         if this changes back then the macro for IS_TEXT and
4674                         friends need to change. */
4675                     if (!UTF) {
4676                         ST.c2 = ST.c1 = *s;
4677                         if (IS_TEXTF(text_node))
4678                             ST.c2 = PL_fold[ST.c1];
4679                         else if (IS_TEXTFL(text_node))
4680                             ST.c2 = PL_fold_locale[ST.c1];
4681                     }
4682                     else { /* UTF */
4683                         if (IS_TEXTF(text_node)) {
4684                              STRLEN ulen1, ulen2;
4685                              U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
4686                              U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
4687
4688                              to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
4689                              to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
4690 #ifdef EBCDIC
4691                              ST.c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXLEN, 0,
4692                                                     ckWARN(WARN_UTF8) ?
4693                                                     0 : UTF8_ALLOW_ANY);
4694                              ST.c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXLEN, 0,
4695                                                     ckWARN(WARN_UTF8) ?
4696                                                     0 : UTF8_ALLOW_ANY);
4697 #else
4698                              ST.c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
4699                                                     uniflags);
4700                              ST.c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
4701                                                     uniflags);
4702 #endif
4703                         }
4704                         else {
4705                             ST.c2 = ST.c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
4706                                                      uniflags);
4707                         }
4708                     }
4709                 }
4710             }
4711             else
4712                 ST.c1 = ST.c2 = CHRTEST_VOID;
4713         assume_ok_easy:
4714
4715             ST.A = scan;
4716             ST.B = next;
4717             PL_reginput = locinput;
4718             if (minmod) {
4719                 minmod = 0;
4720                 if (ST.min && regrepeat(rex, ST.A, ST.min, depth) < ST.min)
4721                     sayNO;
4722                 ST.count = ST.min;
4723                 locinput = PL_reginput;
4724                 REGCP_SET(ST.cp);
4725                 if (ST.c1 == CHRTEST_VOID)
4726                     goto curly_try_B_min;
4727
4728                 ST.oldloc = locinput;
4729
4730                 /* set ST.maxpos to the furthest point along the
4731                  * string that could possibly match */
4732                 if  (ST.max == REG_INFTY) {
4733                     ST.maxpos = PL_regeol - 1;
4734                     if (do_utf8)
4735                         while (UTF8_IS_CONTINUATION(*(U8*)ST.maxpos))
4736                             ST.maxpos--;
4737                 }
4738                 else if (do_utf8) {
4739                     int m = ST.max - ST.min;
4740                     for (ST.maxpos = locinput;
4741                          m >0 && ST.maxpos + UTF8SKIP(ST.maxpos) <= PL_regeol; m--)
4742                         ST.maxpos += UTF8SKIP(ST.maxpos);
4743                 }
4744                 else {
4745                     ST.maxpos = locinput + ST.max - ST.min;
4746                     if (ST.maxpos >= PL_regeol)
4747                         ST.maxpos = PL_regeol - 1;
4748                 }
4749                 goto curly_try_B_min_known;
4750
4751             }
4752             else {
4753                 ST.count = regrepeat(rex, ST.A, ST.max, depth);
4754                 locinput = PL_reginput;
4755                 if (ST.count < ST.min)
4756                     sayNO;
4757                 if ((ST.count > ST.min)
4758                     && (PL_regkind[OP(ST.B)] == EOL) && (OP(ST.B) != MEOL))
4759                 {
4760                     /* A{m,n} must come at the end of the string, there's
4761                      * no point in backing off ... */
4762                     ST.min = ST.count;
4763                     /* ...except that $ and \Z can match before *and* after
4764                        newline at the end.  Consider "\n\n" =~ /\n+\Z\n/.
4765                        We may back off by one in this case. */
4766                     if (UCHARAT(PL_reginput - 1) == '\n' && OP(ST.B) != EOS)
4767                         ST.min--;
4768                 }
4769                 REGCP_SET(ST.cp);
4770                 goto curly_try_B_max;
4771             }
4772             /* NOTREACHED */
4773
4774
4775         case CURLY_B_min_known_fail:
4776             /* failed to find B in a non-greedy match where c1,c2 valid */
4777             if (ST.paren && ST.count)
4778                 PL_regoffs[ST.paren].end = -1;
4779
4780             PL_reginput = locinput;     /* Could be reset... */
4781             REGCP_UNWIND(ST.cp);
4782             /* Couldn't or didn't -- move forward. */
4783             ST.oldloc = locinput;
4784             if (do_utf8)
4785                 locinput += UTF8SKIP(locinput);
4786             else
4787                 locinput++;
4788             ST.count++;
4789           curly_try_B_min_known:
4790              /* find the next place where 'B' could work, then call B */
4791             {
4792                 int n;
4793                 if (do_utf8) {
4794                     n = (ST.oldloc == locinput) ? 0 : 1;
4795                     if (ST.c1 == ST.c2) {
4796                         STRLEN len;
4797                         /* set n to utf8_distance(oldloc, locinput) */
4798                         while (locinput <= ST.maxpos &&
4799                                utf8n_to_uvchr((U8*)locinput,
4800                                               UTF8_MAXBYTES, &len,
4801                                               uniflags) != (UV)ST.c1) {
4802                             locinput += len;
4803                             n++;
4804                         }
4805                     }
4806                     else {
4807                         /* set n to utf8_distance(oldloc, locinput) */
4808                         while (locinput <= ST.maxpos) {
4809                             STRLEN len;
4810                             const UV c = utf8n_to_uvchr((U8*)locinput,
4811                                                   UTF8_MAXBYTES, &len,
4812                                                   uniflags);
4813                             if (c == (UV)ST.c1 || c == (UV)ST.c2)
4814                                 break;
4815                             locinput += len;
4816                             n++;
4817                         }
4818                     }
4819                 }
4820                 else {
4821                     if (ST.c1 == ST.c2) {
4822                         while (locinput <= ST.maxpos &&
4823                                UCHARAT(locinput) != ST.c1)
4824                             locinput++;
4825                     }
4826                     else {
4827                         while (locinput <= ST.maxpos
4828                                && UCHARAT(locinput) != ST.c1
4829                                && UCHARAT(locinput) != ST.c2)
4830                             locinput++;
4831                     }
4832                     n = locinput - ST.oldloc;
4833                 }
4834                 if (locinput > ST.maxpos)
4835                     sayNO;
4836                 /* PL_reginput == oldloc now */
4837                 if (n) {
4838                     ST.count += n;
4839                     if (regrepeat(rex, ST.A, n, depth) < n)
4840                         sayNO;
4841                 }
4842                 PL_reginput = locinput;
4843                 CURLY_SETPAREN(ST.paren, ST.count);
4844                 if (cur_eval && cur_eval->u.eval.close_paren &&
4845                     cur_eval->u.eval.close_paren == (U32)ST.paren) {
4846                     goto fake_end;
4847                 }
4848                 PUSH_STATE_GOTO(CURLY_B_min_known, ST.B);
4849             }
4850             /* NOTREACHED */
4851
4852
4853         case CURLY_B_min_fail:
4854             /* failed to find B in a non-greedy match where c1,c2 invalid */
4855             if (ST.paren && ST.count)
4856                 PL_regoffs[ST.paren].end = -1;
4857
4858             REGCP_UNWIND(ST.cp);
4859             /* failed -- move forward one */
4860             PL_reginput = locinput;
4861             if (regrepeat(rex, ST.A, 1, depth)) {
4862                 ST.count++;
4863                 locinput = PL_reginput;
4864                 if (ST.count <= ST.max || (ST.max == REG_INFTY &&
4865                         ST.count > 0)) /* count overflow ? */
4866                 {
4867                   curly_try_B_min:
4868                     CURLY_SETPAREN(ST.paren, ST.count);
4869                     if (cur_eval && cur_eval->u.eval.close_paren &&
4870                         cur_eval->u.eval.close_paren == (U32)ST.paren) {
4871                         goto fake_end;
4872                     }
4873                     PUSH_STATE_GOTO(CURLY_B_min, ST.B);
4874                 }
4875             }
4876             sayNO;
4877             /* NOTREACHED */
4878
4879
4880         curly_try_B_max:
4881             /* a successful greedy match: now try to match B */
4882             if (cur_eval && cur_eval->u.eval.close_paren &&
4883                 cur_eval->u.eval.close_paren == (U32)ST.paren) {
4884                 goto fake_end;
4885             }
4886             {
4887                 UV c = 0;
4888                 if (ST.c1 != CHRTEST_VOID)
4889                     c = do_utf8 ? utf8n_to_uvchr((U8*)PL_reginput,
4890                                            UTF8_MAXBYTES, 0, uniflags)
4891                                 : (UV) UCHARAT(PL_reginput);
4892                 /* If it could work, try it. */
4893                 if (ST.c1 == CHRTEST_VOID || c == (UV)ST.c1 || c == (UV)ST.c2) {
4894                     CURLY_SETPAREN(ST.paren, ST.count);
4895                     PUSH_STATE_GOTO(CURLY_B_max, ST.B);
4896                     /* NOTREACHED */
4897                 }
4898             }
4899             /* FALL THROUGH */
4900         case CURLY_B_max_fail:
4901             /* failed to find B in a greedy match */
4902             if (ST.paren && ST.count)
4903                 PL_regoffs[ST.paren].end = -1;
4904
4905             REGCP_UNWIND(ST.cp);
4906             /*  back up. */
4907             if (--ST.count < ST.min)
4908                 sayNO;
4909             PL_reginput = locinput = HOPc(locinput, -1);
4910             goto curly_try_B_max;
4911
4912 #undef ST
4913
4914         case END:
4915             fake_end:
4916             if (cur_eval) {
4917                 /* we've just finished A in /(??{A})B/; now continue with B */
4918                 I32 tmpix;
4919                 st->u.eval.toggle_reg_flags
4920                             = cur_eval->u.eval.toggle_reg_flags;
4921                 PL_reg_flags ^= st->u.eval.toggle_reg_flags;
4922
4923                 st->u.eval.prev_rex = rex_sv;           /* inner */
4924                 SETREX(rex_sv,cur_eval->u.eval.prev_rex);
4925                 rex = (struct regexp *)SvANY(rex_sv);
4926                 rexi = RXi_GET(rex);
4927                 cur_curlyx = cur_eval->u.eval.prev_curlyx;
4928                 ReREFCNT_inc(rex_sv);
4929                 st->u.eval.cp = regcppush(0);   /* Save *all* the positions. */
4930
4931                 /* rex was changed so update the pointer in PL_reglastparen and PL_reglastcloseparen */
4932                 PL_reglastparen = &rex->lastparen;
4933                 PL_reglastcloseparen = &rex->lastcloseparen;
4934
4935                 REGCP_SET(st->u.eval.lastcp);
4936                 PL_reginput = locinput;
4937
4938                 /* Restore parens of the outer rex without popping the
4939                  * savestack */
4940                 tmpix = PL_savestack_ix;
4941                 PL_savestack_ix = cur_eval->u.eval.lastcp;
4942                 regcppop(rex);
4943                 PL_savestack_ix = tmpix;
4944
4945                 st->u.eval.prev_eval = cur_eval;
4946                 cur_eval = cur_eval->u.eval.prev_eval;
4947                 DEBUG_EXECUTE_r(
4948                     PerlIO_printf(Perl_debug_log, "%*s  EVAL trying tail ... %"UVxf"\n",
4949                                       REPORT_CODE_OFF+depth*2, "",PTR2UV(cur_eval)););
4950                 if ( nochange_depth )
4951                     nochange_depth--;
4952
4953                 PUSH_YES_STATE_GOTO(EVAL_AB,
4954                         st->u.eval.prev_eval->u.eval.B); /* match B */
4955             }
4956
4957             if (locinput < reginfo->till) {
4958                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4959                                       "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
4960                                       PL_colors[4],
4961                                       (long)(locinput - PL_reg_starttry),
4962                                       (long)(reginfo->till - PL_reg_starttry),
4963                                       PL_colors[5]));
4964
4965                 sayNO_SILENT;           /* Cannot match: too short. */
4966             }
4967             PL_reginput = locinput;     /* put where regtry can find it */
4968             sayYES;                     /* Success! */
4969
4970         case SUCCEED: /* successful SUSPEND/UNLESSM/IFMATCH/CURLYM */
4971             DEBUG_EXECUTE_r(
4972             PerlIO_printf(Perl_debug_log,
4973                 "%*s  %ssubpattern success...%s\n",
4974                 REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5]));
4975             PL_reginput = locinput;     /* put where regtry can find it */
4976             sayYES;                     /* Success! */
4977
4978 #undef  ST
4979 #define ST st->u.ifmatch
4980
4981         case SUSPEND:   /* (?>A) */
4982             ST.wanted = 1;
4983             PL_reginput = locinput;
4984             goto do_ifmatch;
4985
4986         case UNLESSM:   /* -ve lookaround: (?!A), or with flags, (?<!A) */
4987             ST.wanted = 0;
4988             goto ifmatch_trivial_fail_test;
4989
4990         case IFMATCH:   /* +ve lookaround: (?=A), or with flags, (?<=A) */
4991             ST.wanted = 1;
4992           ifmatch_trivial_fail_test:
4993             if (scan->flags) {
4994                 char * const s = HOPBACKc(locinput, scan->flags);
4995                 if (!s) {
4996                     /* trivial fail */
4997                     if (logical) {
4998                         logical = 0;
4999                         sw = 1 - (bool)ST.wanted;
5000                     }
5001                     else if (ST.wanted)
5002                         sayNO;
5003                     next = scan + ARG(scan);
5004                     if (next == scan)
5005                         next = NULL;
5006                     break;
5007                 }
5008                 PL_reginput = s;
5009             }
5010             else
5011                 PL_reginput = locinput;
5012
5013           do_ifmatch:
5014             ST.me = scan;
5015             ST.logical = logical;
5016             logical = 0; /* XXX: reset state of logical once it has been saved into ST */
5017
5018             /* execute body of (?...A) */
5019             PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)));
5020             /* NOTREACHED */
5021
5022         case IFMATCH_A_fail: /* body of (?...A) failed */
5023             ST.wanted = !ST.wanted;
5024             /* FALL THROUGH */
5025
5026         case IFMATCH_A: /* body of (?...A) succeeded */
5027             if (ST.logical) {
5028                 sw = (bool)ST.wanted;
5029             }
5030             else if (!ST.wanted)
5031                 sayNO;
5032
5033             if (OP(ST.me) == SUSPEND)
5034                 locinput = PL_reginput;
5035             else {
5036                 locinput = PL_reginput = st->locinput;
5037                 nextchr = UCHARAT(locinput);
5038             }
5039             scan = ST.me + ARG(ST.me);
5040             if (scan == ST.me)
5041                 scan = NULL;
5042             continue; /* execute B */
5043
5044 #undef ST
5045
5046         case LONGJMP:
5047             next = scan + ARG(scan);
5048             if (next == scan)
5049                 next = NULL;
5050             break;
5051         case COMMIT:
5052             reginfo->cutpoint = PL_regeol;
5053             /* FALLTHROUGH */
5054         case PRUNE:
5055             PL_reginput = locinput;
5056             if (!scan->flags)
5057                 sv_yes_mark = sv_commit = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
5058             PUSH_STATE_GOTO(COMMIT_next,next);
5059             /* NOTREACHED */
5060         case COMMIT_next_fail:
5061             no_final = 1;
5062             /* FALLTHROUGH */
5063         case OPFAIL:
5064             sayNO;
5065             /* NOTREACHED */
5066
5067 #define ST st->u.mark
5068         case MARKPOINT:
5069             ST.prev_mark = mark_state;
5070             ST.mark_name = sv_commit = sv_yes_mark
5071                 = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
5072             mark_state = st;
5073             ST.mark_loc = PL_reginput = locinput;
5074             PUSH_YES_STATE_GOTO(MARKPOINT_next,next);
5075             /* NOTREACHED */
5076         case MARKPOINT_next:
5077             mark_state = ST.prev_mark;
5078             sayYES;
5079             /* NOTREACHED */
5080         case MARKPOINT_next_fail:
5081             if (popmark && sv_eq(ST.mark_name,popmark))
5082             {
5083                 if (ST.mark_loc > startpoint)
5084                     reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
5085                 popmark = NULL; /* we found our mark */
5086                 sv_commit = ST.mark_name;
5087
5088                 DEBUG_EXECUTE_r({
5089                         PerlIO_printf(Perl_debug_log,
5090                             "%*s  %ssetting cutpoint to mark:%"SVf"...%s\n",
5091                             REPORT_CODE_OFF+depth*2, "",
5092                             PL_colors[4], SVfARG(sv_commit), PL_colors[5]);
5093                 });
5094             }
5095             mark_state = ST.prev_mark;
5096             sv_yes_mark = mark_state ?
5097                 mark_state->u.mark.mark_name : NULL;
5098             sayNO;
5099             /* NOTREACHED */
5100         case SKIP:
5101             PL_reginput = locinput;
5102             if (scan->flags) {
5103                 /* (*SKIP) : if we fail we cut here*/
5104                 ST.mark_name = NULL;
5105                 ST.mark_loc = locinput;
5106                 PUSH_STATE_GOTO(SKIP_next,next);
5107             } else {
5108                 /* (*SKIP:NAME) : if there is a (*MARK:NAME) fail where it was,
5109                    otherwise do nothing.  Meaning we need to scan
5110                  */
5111                 regmatch_state *cur = mark_state;
5112                 SV *find = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
5113
5114                 while (cur) {
5115                     if ( sv_eq( cur->u.mark.mark_name,
5116                                 find ) )
5117                     {
5118                         ST.mark_name = find;
5119                         PUSH_STATE_GOTO( SKIP_next, next );
5120                     }
5121                     cur = cur->u.mark.prev_mark;
5122                 }
5123             }
5124             /* Didn't find our (*MARK:NAME) so ignore this (*SKIP:NAME) */
5125             break;
5126         case SKIP_next_fail:
5127             if (ST.mark_name) {
5128                 /* (*CUT:NAME) - Set up to search for the name as we
5129                    collapse the stack*/
5130                 popmark = ST.mark_name;
5131             } else {
5132                 /* (*CUT) - No name, we cut here.*/
5133                 if (ST.mark_loc > startpoint)
5134                     reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
5135                 /* but we set sv_commit to latest mark_name if there
5136                    is one so they can test to see how things lead to this
5137                    cut */
5138                 if (mark_state)
5139                     sv_commit=mark_state->u.mark.mark_name;
5140             }
5141             no_final = 1;
5142             sayNO;
5143             /* NOTREACHED */
5144 #undef ST
5145         case FOLDCHAR:
5146             n = ARG(scan);
5147             if ( n == (U32)what_len_TRICKYFOLD(locinput,do_utf8,ln) ) {
5148                 locinput += ln;
5149             } else if ( 0xDF == n && !do_utf8 && !UTF ) {
5150                 sayNO;
5151             } else  {
5152                 U8 folded[UTF8_MAXBYTES_CASE+1];
5153                 STRLEN foldlen;
5154                 const char * const l = locinput;
5155                 char *e = PL_regeol;
5156                 to_uni_fold(n, folded, &foldlen);
5157
5158                 if (ibcmp_utf8((const char*) folded, 0,  foldlen, 1,
5159                                l, &e, 0,  do_utf8)) {
5160                         sayNO;
5161                 }
5162                 locinput = e;
5163             }
5164             nextchr = UCHARAT(locinput);
5165             break;
5166         case LNBREAK:
5167             if ((n=is_LNBREAK(locinput,do_utf8))) {
5168                 locinput += n;
5169                 nextchr = UCHARAT(locinput);
5170             } else
5171                 sayNO;
5172             break;
5173
5174 #define CASE_CLASS(nAmE)                              \
5175         case nAmE:                                    \
5176             if ((n=is_##nAmE(locinput,do_utf8))) {    \
5177                 locinput += n;                        \
5178                 nextchr = UCHARAT(locinput);          \
5179             } else                                    \
5180                 sayNO;                                \
5181             break;                                    \
5182         case N##nAmE:                                 \
5183             if ((n=is_##nAmE(locinput,do_utf8))) {    \
5184                 sayNO;                                \
5185             } else {                                  \
5186                 locinput += UTF8SKIP(locinput);       \
5187                 nextchr = UCHARAT(locinput);          \
5188             }                                         \
5189             break
5190
5191         CASE_CLASS(VERTWS);
5192         CASE_CLASS(HORIZWS);
5193 #undef CASE_CLASS
5194
5195         default:
5196             PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
5197                           PTR2UV(scan), OP(scan));
5198             Perl_croak(aTHX_ "regexp memory corruption");
5199
5200         } /* end switch */
5201
5202         /* switch break jumps here */
5203         scan = next; /* prepare to execute the next op and ... */
5204         continue;    /* ... jump back to the top, reusing st */
5205         /* NOTREACHED */
5206
5207       push_yes_state:
5208         /* push a state that backtracks on success */
5209         st->u.yes.prev_yes_state = yes_state;
5210         yes_state = st;
5211         /* FALL THROUGH */
5212       push_state:
5213         /* push a new regex state, then continue at scan  */
5214         {
5215             regmatch_state *newst;
5216
5217             DEBUG_STACK_r({
5218                 regmatch_state *cur = st;
5219                 regmatch_state *curyes = yes_state;
5220                 int curd = depth;
5221                 regmatch_slab *slab = PL_regmatch_slab;
5222                 for (;curd > -1;cur--,curd--) {
5223                     if (cur < SLAB_FIRST(slab)) {
5224                         slab = slab->prev;
5225                         cur = SLAB_LAST(slab);
5226                     }
5227                     PerlIO_printf(Perl_error_log, "%*s#%-3d %-10s %s\n",
5228                         REPORT_CODE_OFF + 2 + depth * 2,"",
5229                         curd, PL_reg_name[cur->resume_state],
5230                         (curyes == cur) ? "yes" : ""
5231                     );
5232                     if (curyes == cur)
5233                         curyes = cur->u.yes.prev_yes_state;
5234                 }
5235             } else
5236                 DEBUG_STATE_pp("push")
5237             );
5238             depth++;
5239             st->locinput = locinput;
5240             newst = st+1;
5241             if (newst >  SLAB_LAST(PL_regmatch_slab))
5242                 newst = S_push_slab(aTHX);
5243             PL_regmatch_state = newst;
5244
5245             locinput = PL_reginput;
5246             nextchr = UCHARAT(locinput);
5247             st = newst;
5248             continue;
5249             /* NOTREACHED */
5250         }
5251     }
5252
5253     /*
5254     * We get here only if there's trouble -- normally "case END" is
5255     * the terminating point.
5256     */
5257     Perl_croak(aTHX_ "corrupted regexp pointers");
5258     /*NOTREACHED*/
5259     sayNO;
5260
5261 yes:
5262     if (yes_state) {
5263         /* we have successfully completed a subexpression, but we must now
5264          * pop to the state marked by yes_state and continue from there */
5265         assert(st != yes_state);
5266 #ifdef DEBUGGING
5267         while (st != yes_state) {
5268             st--;
5269             if (st < SLAB_FIRST(PL_regmatch_slab)) {
5270                 PL_regmatch_slab = PL_regmatch_slab->prev;
5271                 st = SLAB_LAST(PL_regmatch_slab);
5272             }
5273             DEBUG_STATE_r({
5274                 if (no_final) {
5275                     DEBUG_STATE_pp("pop (no final)");
5276                 } else {
5277                     DEBUG_STATE_pp("pop (yes)");
5278                 }
5279             });
5280             depth--;
5281         }
5282 #else
5283         while (yes_state < SLAB_FIRST(PL_regmatch_slab)
5284             || yes_state > SLAB_LAST(PL_regmatch_slab))
5285         {
5286             /* not in this slab, pop slab */
5287             depth -= (st - SLAB_FIRST(PL_regmatch_slab) + 1);
5288             PL_regmatch_slab = PL_regmatch_slab->prev;
5289             st = SLAB_LAST(PL_regmatch_slab);
5290         }
5291         depth -= (st - yes_state);
5292 #endif
5293         st = yes_state;
5294         yes_state = st->u.yes.prev_yes_state;
5295         PL_regmatch_state = st;
5296
5297         if (no_final) {
5298             locinput= st->locinput;
5299             nextchr = UCHARAT(locinput);
5300         }
5301         state_num = st->resume_state + no_final;
5302         goto reenter_switch;
5303     }
5304
5305     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
5306                           PL_colors[4], PL_colors[5]));
5307
5308     if (PL_reg_eval_set) {
5309         /* each successfully executed (?{...}) block does the equivalent of
5310          *   local $^R = do {...}
5311          * When popping the save stack, all these locals would be undone;
5312          * bypass this by setting the outermost saved $^R to the latest
5313          * value */
5314         if (oreplsv != GvSV(PL_replgv))
5315             sv_setsv(oreplsv, GvSV(PL_replgv));
5316     }
5317     result = 1;
5318     goto final_exit;
5319
5320 no:
5321     DEBUG_EXECUTE_r(
5322         PerlIO_printf(Perl_debug_log,
5323             "%*s  %sfailed...%s\n",
5324             REPORT_CODE_OFF+depth*2, "",
5325             PL_colors[4], PL_colors[5])
5326         );
5327
5328 no_silent:
5329     if (no_final) {
5330         if (yes_state) {
5331             goto yes;
5332         } else {
5333             goto final_exit;
5334         }
5335     }
5336     if (depth) {
5337         /* there's a previous state to backtrack to */
5338         st--;
5339         if (st < SLAB_FIRST(PL_regmatch_slab)) {
5340             PL_regmatch_slab = PL_regmatch_slab->prev;
5341             st = SLAB_LAST(PL_regmatch_slab);
5342         }
5343         PL_regmatch_state = st;
5344         locinput= st->locinput;
5345         nextchr = UCHARAT(locinput);
5346
5347         DEBUG_STATE_pp("pop");
5348         depth--;
5349         if (yes_state == st)
5350             yes_state = st->u.yes.prev_yes_state;
5351
5352         state_num = st->resume_state + 1; /* failure = success + 1 */
5353         goto reenter_switch;
5354     }
5355     result = 0;
5356
5357   final_exit:
5358     if (rex->intflags & PREGf_VERBARG_SEEN) {
5359         SV *sv_err = get_sv("REGERROR", 1);
5360         SV *sv_mrk = get_sv("REGMARK", 1);
5361         if (result) {
5362             sv_commit = &PL_sv_no;
5363             if (!sv_yes_mark)
5364                 sv_yes_mark = &PL_sv_yes;
5365         } else {
5366             if (!sv_commit)
5367                 sv_commit = &PL_sv_yes;
5368             sv_yes_mark = &PL_sv_no;
5369         }
5370         sv_setsv(sv_err, sv_commit);
5371         sv_setsv(sv_mrk, sv_yes_mark);
5372     }
5373
5374     /* clean up; in particular, free all slabs above current one */
5375     LEAVE_SCOPE(oldsave);
5376
5377     return result;
5378 }
5379
5380 /*
5381  - regrepeat - repeatedly match something simple, report how many
5382  */
5383 /*
5384  * [This routine now assumes that it will only match on things of length 1.
5385  * That was true before, but now we assume scan - reginput is the count,
5386  * rather than incrementing count on every character.  [Er, except utf8.]]
5387  */
5388 STATIC I32
5389 S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
5390 {
5391     dVAR;
5392     register char *scan;
5393     register I32 c;
5394     register char *loceol = PL_regeol;
5395     register I32 hardcount = 0;
5396     register bool do_utf8 = PL_reg_match_utf8;
5397 #ifndef DEBUGGING
5398     PERL_UNUSED_ARG(depth);
5399 #endif
5400
5401     PERL_ARGS_ASSERT_REGREPEAT;
5402
5403     scan = PL_reginput;
5404     if (max == REG_INFTY)
5405         max = I32_MAX;
5406     else if (max < loceol - scan)
5407         loceol = scan + max;
5408     switch (OP(p)) {
5409     case REG_ANY:
5410         if (do_utf8) {
5411             loceol = PL_regeol;
5412             while (scan < loceol && hardcount < max && *scan != '\n') {
5413                 scan += UTF8SKIP(scan);
5414                 hardcount++;
5415             }
5416         } else {
5417             while (scan < loceol && *scan != '\n')
5418                 scan++;
5419         }
5420         break;
5421     case SANY:
5422         if (do_utf8) {
5423             loceol = PL_regeol;
5424             while (scan < loceol && hardcount < max) {
5425                 scan += UTF8SKIP(scan);
5426                 hardcount++;
5427             }
5428         }
5429         else
5430             scan = loceol;
5431         break;
5432     case CANY:
5433         scan = loceol;
5434         break;
5435     case EXACT:         /* length of string is 1 */
5436         c = (U8)*STRING(p);
5437         while (scan < loceol && UCHARAT(scan) == c)
5438             scan++;
5439         break;
5440     case EXACTF:        /* length of string is 1 */
5441         c = (U8)*STRING(p);
5442         while (scan < loceol &&
5443                (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
5444             scan++;
5445         break;
5446     case EXACTFL:       /* length of string is 1 */
5447         PL_reg_flags |= RF_tainted;
5448         c = (U8)*STRING(p);
5449         while (scan < loceol &&
5450                (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
5451             scan++;
5452         break;
5453     case ANYOF:
5454         if (do_utf8) {
5455             loceol = PL_regeol;
5456             while (hardcount < max && scan < loceol &&
5457                    reginclass(prog, p, (U8*)scan, 0, do_utf8)) {
5458                 scan += UTF8SKIP(scan);
5459                 hardcount++;
5460             }
5461         } else {
5462             while (scan < loceol && REGINCLASS(prog, p, (U8*)scan))
5463                 scan++;
5464         }
5465         break;
5466     case ALNUM:
5467         if (do_utf8) {
5468             loceol = PL_regeol;
5469             LOAD_UTF8_CHARCLASS_ALNUM();
5470             while (hardcount < max && scan < loceol &&
5471                    swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
5472                 scan += UTF8SKIP(scan);
5473                 hardcount++;
5474             }
5475         } else {
5476             while (scan < loceol && isALNUM(*scan))
5477                 scan++;
5478         }
5479         break;
5480     case ALNUML:
5481         PL_reg_flags |= RF_tainted;
5482         if (do_utf8) {
5483             loceol = PL_regeol;
5484             while (hardcount < max && scan < loceol &&
5485                    isALNUM_LC_utf8((U8*)scan)) {
5486                 scan += UTF8SKIP(scan);
5487                 hardcount++;
5488             }
5489         } else {
5490             while (scan < loceol && isALNUM_LC(*scan))
5491                 scan++;
5492         }
5493         break;
5494     case NALNUM:
5495         if (do_utf8) {
5496             loceol = PL_regeol;
5497             LOAD_UTF8_CHARCLASS_ALNUM();
5498             while (hardcount < max && scan < loceol &&
5499                    !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
5500                 scan += UTF8SKIP(scan);
5501                 hardcount++;
5502             }
5503         } else {
5504             while (scan < loceol && !isALNUM(*scan))
5505                 scan++;
5506         }
5507         break;
5508     case NALNUML:
5509         PL_reg_flags |= RF_tainted;
5510         if (do_utf8) {
5511             loceol = PL_regeol;
5512             while (hardcount < max && scan < loceol &&
5513                    !isALNUM_LC_utf8((U8*)scan)) {
5514                 scan += UTF8SKIP(scan);
5515                 hardcount++;
5516             }
5517         } else {
5518             while (scan < loceol && !isALNUM_LC(*scan))
5519                 scan++;
5520         }
5521         break;
5522     case SPACE:
5523         if (do_utf8) {
5524             loceol = PL_regeol;
5525             LOAD_UTF8_CHARCLASS_SPACE();
5526             while (hardcount < max && scan < loceol &&
5527                    (*scan == ' ' ||
5528                     swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
5529                 scan += UTF8SKIP(scan);
5530                 hardcount++;
5531             }
5532         } else {
5533             while (scan < loceol && isSPACE(*scan))
5534                 scan++;
5535         }
5536         break;
5537     case SPACEL:
5538         PL_reg_flags |= RF_tainted;
5539         if (do_utf8) {
5540             loceol = PL_regeol;
5541             while (hardcount < max && scan < loceol &&
5542                    (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
5543                 scan += UTF8SKIP(scan);
5544                 hardcount++;
5545             }
5546         } else {
5547             while (scan < loceol && isSPACE_LC(*scan))
5548                 scan++;
5549         }
5550         break;
5551     case NSPACE:
5552         if (do_utf8) {
5553             loceol = PL_regeol;
5554             LOAD_UTF8_CHARCLASS_SPACE();
5555             while (hardcount < max && scan < loceol &&
5556                    !(*scan == ' ' ||
5557                      swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
5558                 scan += UTF8SKIP(scan);
5559                 hardcount++;
5560             }
5561         } else {
5562             while (scan < loceol && !isSPACE(*scan))
5563                 scan++;
5564         }
5565         break;
5566     case NSPACEL:
5567         PL_reg_flags |= RF_tainted;
5568         if (do_utf8) {
5569             loceol = PL_regeol;
5570             while (hardcount < max && scan < loceol &&
5571                    !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
5572                 scan += UTF8SKIP(scan);
5573                 hardcount++;
5574             }
5575         } else {
5576             while (scan < loceol && !isSPACE_LC(*scan))
5577                 scan++;
5578         }
5579         break;
5580     case DIGIT:
5581         if (do_utf8) {
5582             loceol = PL_regeol;
5583             LOAD_UTF8_CHARCLASS_DIGIT();
5584             while (hardcount < max && scan < loceol &&
5585                    swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
5586                 scan += UTF8SKIP(scan);
5587                 hardcount++;
5588             }
5589         } else {
5590             while (scan < loceol && isDIGIT(*scan))
5591                 scan++;
5592         }
5593         break;
5594     case NDIGIT:
5595         if (do_utf8) {
5596             loceol = PL_regeol;
5597             LOAD_UTF8_CHARCLASS_DIGIT();
5598             while (hardcount < max && scan < loceol &&
5599                    !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
5600                 scan += UTF8SKIP(scan);
5601                 hardcount++;
5602             }
5603         } else {
5604             while (scan < loceol && !isDIGIT(*scan))
5605                 scan++;
5606         }
5607     case LNBREAK:
5608         if (do_utf8) {
5609             loceol = PL_regeol;
5610             while (hardcount < max && scan < loceol && (c=is_LNBREAK_utf8(scan))) {
5611                 scan += c;
5612                 hardcount++;
5613             }
5614         } else {
5615             /*
5616               LNBREAK can match two latin chars, which is ok,
5617               because we have a null terminated string, but we
5618               have to use hardcount in this situation
5619             */
5620             while (scan < loceol && (c=is_LNBREAK_latin1(scan)))  {
5621                 scan+=c;
5622                 hardcount++;
5623             }
5624         }
5625         break;
5626     case HORIZWS:
5627         if (do_utf8) {
5628             loceol = PL_regeol;
5629             while (hardcount < max && scan < loceol && (c=is_HORIZWS_utf8(scan))) {
5630                 scan += c;
5631                 hardcount++;
5632             }
5633         } else {
5634             while (scan < loceol && is_HORIZWS_latin1(scan))
5635                 scan++;
5636         }
5637         break;
5638     case NHORIZWS:
5639         if (do_utf8) {
5640             loceol = PL_regeol;
5641             while (hardcount < max && scan < loceol && !is_HORIZWS_utf8(scan)) {
5642                 scan += UTF8SKIP(scan);
5643                 hardcount++;
5644             }
5645         } else {
5646             while (scan < loceol && !is_HORIZWS_latin1(scan))
5647                 scan++;
5648
5649         }
5650         break;
5651     case VERTWS:
5652         if (do_utf8) {
5653             loceol = PL_regeol;
5654             while (hardcount < max && scan < loceol && (c=is_VERTWS_utf8(scan))) {
5655                 scan += c;
5656                 hardcount++;
5657             }
5658         } else {
5659             while (scan < loceol && is_VERTWS_latin1(scan))
5660                 scan++;
5661
5662         }
5663         break;
5664     case NVERTWS:
5665         if (do_utf8) {
5666             loceol = PL_regeol;
5667             while (hardcount < max && scan < loceol && !is_VERTWS_utf8(scan)) {
5668                 scan += UTF8SKIP(scan);
5669                 hardcount++;
5670             }
5671         } else {
5672             while (scan < loceol && !is_VERTWS_latin1(scan))
5673                 scan++;
5674
5675         }
5676         break;
5677
5678     default:            /* Called on something of 0 width. */
5679         break;          /* So match right here or not at all. */
5680     }
5681
5682     if (hardcount)
5683         c = hardcount;
5684     else
5685         c = scan - PL_reginput;
5686     PL_reginput = scan;
5687
5688     DEBUG_r({
5689         GET_RE_DEBUG_FLAGS_DECL;
5690         DEBUG_EXECUTE_r({
5691             SV * const prop = sv_newmortal();
5692             regprop(prog, prop, p);
5693             PerlIO_printf(Perl_debug_log,
5694                         "%*s  %s can match %"IVdf" times out of %"IVdf"...\n",
5695                         REPORT_CODE_OFF + depth*2, "", SvPVX_const(prop),(IV)c,(IV)max);
5696         });
5697     });
5698
5699     return(c);
5700 }
5701
5702
5703 #if !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION)
5704 /*
5705 - regclass_swash - prepare the utf8 swash
5706 */
5707
5708 SV *
5709 Perl_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bool doinit, SV** listsvp, SV **altsvp)
5710 {
5711     dVAR;
5712     SV *sw  = NULL;
5713     SV *si  = NULL;
5714     SV *alt = NULL;
5715     RXi_GET_DECL(prog,progi);
5716     const struct reg_data * const data = prog ? progi->data : NULL;
5717
5718     PERL_ARGS_ASSERT_REGCLASS_SWASH;
5719
5720     if (data && data->count) {
5721         const U32 n = ARG(node);
5722
5723         if (data->what[n] == 's') {
5724             SV * const rv = MUTABLE_SV(data->data[n]);
5725             AV * const av = MUTABLE_AV(SvRV(rv));
5726             SV **const ary = AvARRAY(av);
5727             SV **a, **b;
5728
5729             /* See the end of regcomp.c:S_regclass() for
5730              * documentation of these array elements. */
5731
5732             si = *ary;
5733             a  = SvROK(ary[1]) ? &ary[1] : NULL;
5734             b  = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : NULL;
5735
5736             if (a)
5737                 sw = *a;
5738             else if (si && doinit) {
5739                 sw = swash_init("utf8", "", si, 1, 0);
5740                 (void)av_store(av, 1, sw);
5741             }
5742             if (b)
5743                 alt = *b;
5744         }
5745     }
5746
5747     if (listsvp)
5748         *listsvp = si;
5749     if (altsvp)
5750         *altsvp  = alt;
5751
5752     return sw;
5753 }
5754 #endif
5755
5756 /*
5757  - reginclass - determine if a character falls into a character class
5758
5759   The n is the ANYOF regnode, the p is the target string, lenp
5760   is pointer to the maximum length of how far to go in the p
5761   (if the lenp is zero, UTF8SKIP(p) is used),
5762   do_utf8 tells whether the target string is in UTF-8.
5763
5764  */
5765
5766 STATIC bool
5767 S_reginclass(pTHX_ const regexp *prog, register const regnode *n, register const U8* p, STRLEN* lenp, register bool do_utf8)
5768 {
5769     dVAR;
5770     const char flags = ANYOF_FLAGS(n);
5771     bool match = FALSE;
5772     UV c = *p;
5773     STRLEN len = 0;
5774     STRLEN plen;
5775
5776     PERL_ARGS_ASSERT_REGINCLASS;
5777
5778     if (do_utf8 && !UTF8_IS_INVARIANT(c)) {
5779         c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &len,
5780                 (UTF8_ALLOW_DEFAULT & UTF8_ALLOW_ANYUV) | UTF8_CHECK_ONLY);
5781                 /* see [perl #37836] for UTF8_ALLOW_ANYUV */
5782         if (len == (STRLEN)-1)
5783             Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
5784     }
5785
5786     plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
5787     if (do_utf8 || (flags & ANYOF_UNICODE)) {
5788         if (lenp)
5789             *lenp = 0;
5790         if (do_utf8 && !ANYOF_RUNTIME(n)) {
5791             if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
5792                 match = TRUE;
5793         }
5794         if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
5795             match = TRUE;
5796         if (!match) {
5797             AV *av;
5798             SV * const sw = regclass_swash(prog, n, TRUE, 0, (SV**)&av);
5799
5800             if (sw) {
5801                 U8 * utf8_p;
5802                 if (do_utf8) {
5803                     utf8_p = (U8 *) p;
5804                 } else {
5805                     STRLEN len = 1;
5806                     utf8_p = bytes_to_utf8(p, &len);
5807                 }
5808                 if (swash_fetch(sw, utf8_p, 1))
5809                     match = TRUE;
5810                 else if (flags & ANYOF_FOLD) {
5811                     if (!match && lenp && av) {
5812                         I32 i;
5813                         for (i = 0; i <= av_len(av); i++) {
5814                             SV* const sv = *av_fetch(av, i, FALSE);
5815                             STRLEN len;
5816                             const char * const s = SvPV_const(sv, len);
5817                             if (len <= plen && memEQ(s, (char*)utf8_p, len)) {
5818                                 *lenp = len;
5819                                 match = TRUE;
5820                                 break;
5821                             }
5822                         }
5823                     }
5824                     if (!match) {
5825                         U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
5826
5827                         STRLEN tmplen;
5828                         to_utf8_fold(utf8_p, tmpbuf, &tmplen);
5829                         if (swash_fetch(sw, tmpbuf, 1))
5830                             match = TRUE;
5831                     }
5832                 }
5833
5834                 /* If we allocated a string above, free it */
5835                 if (! do_utf8) Safefree(utf8_p);
5836             }
5837         }
5838         if (match && lenp && *lenp == 0)
5839             *lenp = UNISKIP(NATIVE_TO_UNI(c));
5840     }
5841     if (!match && c < 256) {
5842         if (ANYOF_BITMAP_TEST(n, c))
5843             match = TRUE;
5844         else if (flags & ANYOF_FOLD) {
5845             U8 f;
5846
5847             if (flags & ANYOF_LOCALE) {
5848                 PL_reg_flags |= RF_tainted;
5849                 f = PL_fold_locale[c];
5850             }
5851             else
5852                 f = PL_fold[c];
5853             if (f != c && ANYOF_BITMAP_TEST(n, f))
5854                 match = TRUE;
5855         }
5856
5857         if (!match && (flags & ANYOF_CLASS)) {
5858             PL_reg_flags |= RF_tainted;
5859             if (
5860                 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM)   &&  isALNUM_LC(c))  ||
5861                 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM)  && !isALNUM_LC(c))  ||
5862                 (ANYOF_CLASS_TEST(n, ANYOF_SPACE)   &&  isSPACE_LC(c))  ||
5863                 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE)  && !isSPACE_LC(c))  ||
5864                 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT)   &&  isDIGIT_LC(c))  ||
5865                 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT)  && !isDIGIT_LC(c))  ||
5866                 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC)  &&  isALNUMC_LC(c)) ||
5867                 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
5868                 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA)   &&  isALPHA_LC(c))  ||
5869                 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA)  && !isALPHA_LC(c))  ||
5870                 (ANYOF_CLASS_TEST(n, ANYOF_ASCII)   &&  isASCII(c))     ||
5871                 (ANYOF_CLASS_TEST(n, ANYOF_NASCII)  && !isASCII(c))     ||
5872                 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL)   &&  isCNTRL_LC(c))  ||
5873                 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL)  && !isCNTRL_LC(c))  ||
5874                 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH)   &&  isGRAPH_LC(c))  ||
5875                 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH)  && !isGRAPH_LC(c))  ||
5876                 (ANYOF_CLASS_TEST(n, ANYOF_LOWER)   &&  isLOWER_LC(c))  ||
5877                 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER)  && !isLOWER_LC(c))  ||
5878                 (ANYOF_CLASS_TEST(n, ANYOF_PRINT)   &&  isPRINT_LC(c))  ||
5879                 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT)  && !isPRINT_LC(c))  ||
5880                 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT)   &&  isPUNCT_LC(c))  ||
5881                 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT)  && !isPUNCT_LC(c))  ||
5882                 (ANYOF_CLASS_TEST(n, ANYOF_UPPER)   &&  isUPPER_LC(c))  ||
5883                 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER)  && !isUPPER_LC(c))  ||
5884                 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT)  &&  isXDIGIT(c))    ||
5885                 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c))    ||
5886                 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC)  &&  isPSXSPC(c))    ||
5887                 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c))    ||
5888                 (ANYOF_CLASS_TEST(n, ANYOF_BLANK)   &&  isBLANK(c))     ||
5889                 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK)  && !isBLANK(c))
5890                 ) /* How's that for a conditional? */
5891             {
5892                 match = TRUE;
5893             }
5894         }
5895     }
5896
5897     return (flags & ANYOF_INVERT) ? !match : match;
5898 }
5899
5900 STATIC U8 *
5901 S_reghop3(U8 *s, I32 off, const U8* lim)
5902 {
5903     dVAR;
5904
5905     PERL_ARGS_ASSERT_REGHOP3;
5906
5907     if (off >= 0) {
5908         while (off-- && s < lim) {
5909             /* XXX could check well-formedness here */
5910             s += UTF8SKIP(s);
5911         }
5912     }
5913     else {
5914         while (off++ && s > lim) {
5915             s--;
5916             if (UTF8_IS_CONTINUED(*s)) {
5917                 while (s > lim && UTF8_IS_CONTINUATION(*s))
5918                     s--;
5919             }
5920             /* XXX could check well-formedness here */
5921         }
5922     }
5923     return s;
5924 }
5925
5926 #ifdef XXX_dmq
5927 /* there are a bunch of places where we use two reghop3's that should
5928    be replaced with this routine. but since thats not done yet
5929    we ifdef it out - dmq
5930 */
5931 STATIC U8 *
5932 S_reghop4(U8 *s, I32 off, const U8* llim, const U8* rlim)
5933 {
5934     dVAR;
5935
5936     PERL_ARGS_ASSERT_REGHOP4;
5937
5938     if (off >= 0) {
5939         while (off-- && s < rlim) {
5940             /* XXX could check well-formedness here */
5941             s += UTF8SKIP(s);
5942         }
5943     }
5944     else {
5945         while (off++ && s > llim) {
5946             s--;
5947             if (UTF8_IS_CONTINUED(*s)) {
5948                 while (s > llim && UTF8_IS_CONTINUATION(*s))
5949                     s--;
5950             }
5951             /* XXX could check well-formedness here */
5952         }
5953     }
5954     return s;
5955 }
5956 #endif
5957
5958 STATIC U8 *
5959 S_reghopmaybe3(U8* s, I32 off, const U8* lim)
5960 {
5961     dVAR;
5962
5963     PERL_ARGS_ASSERT_REGHOPMAYBE3;
5964
5965     if (off >= 0) {
5966         while (off-- && s < lim) {
5967             /* XXX could check well-formedness here */
5968             s += UTF8SKIP(s);
5969         }
5970         if (off >= 0)
5971             return NULL;
5972     }
5973     else {
5974         while (off++ && s > lim) {
5975             s--;
5976             if (UTF8_IS_CONTINUED(*s)) {
5977                 while (s > lim && UTF8_IS_CONTINUATION(*s))
5978                     s--;
5979             }
5980             /* XXX could check well-formedness here */
5981         }
5982         if (off <= 0)
5983             return NULL;
5984     }
5985     return s;
5986 }
5987
5988 static void
5989 restore_pos(pTHX_ void *arg)
5990 {
5991     dVAR;
5992     regexp * const rex = (regexp *)arg;
5993     if (PL_reg_eval_set) {
5994         if (PL_reg_oldsaved) {
5995             rex->subbeg = PL_reg_oldsaved;
5996             rex->sublen = PL_reg_oldsavedlen;
5997 #ifdef PERL_OLD_COPY_ON_WRITE
5998             rex->saved_copy = PL_nrs;
5999 #endif
6000             RXp_MATCH_COPIED_on(rex);
6001         }
6002         PL_reg_magic->mg_len = PL_reg_oldpos;
6003         PL_reg_eval_set = 0;
6004         PL_curpm = PL_reg_oldcurpm;
6005     }
6006 }
6007
6008 STATIC void
6009 S_to_utf8_substr(pTHX_ register regexp *prog)
6010 {
6011     int i = 1;
6012
6013     PERL_ARGS_ASSERT_TO_UTF8_SUBSTR;
6014
6015     do {
6016         if (prog->substrs->data[i].substr
6017             && !prog->substrs->data[i].utf8_substr) {
6018             SV* const sv = newSVsv(prog->substrs->data[i].substr);
6019             prog->substrs->data[i].utf8_substr = sv;
6020             sv_utf8_upgrade(sv);
6021             if (SvVALID(prog->substrs->data[i].substr)) {
6022                 const U8 flags = BmFLAGS(prog->substrs->data[i].substr);
6023                 if (flags & FBMcf_TAIL) {
6024                     /* Trim the trailing \n that fbm_compile added last
6025                        time.  */
6026                     SvCUR_set(sv, SvCUR(sv) - 1);
6027                     /* Whilst this makes the SV technically "invalid" (as its
6028                        buffer is no longer followed by "\0") when fbm_compile()
6029                        adds the "\n" back, a "\0" is restored.  */
6030                 }
6031                 fbm_compile(sv, flags);
6032             }
6033             if (prog->substrs->data[i].substr == prog->check_substr)
6034                 prog->check_utf8 = sv;
6035         }
6036     } while (i--);
6037 }
6038
6039 STATIC void
6040 S_to_byte_substr(pTHX_ register regexp *prog)
6041 {
6042     dVAR;
6043     int i = 1;
6044
6045     PERL_ARGS_ASSERT_TO_BYTE_SUBSTR;
6046
6047     do {
6048         if (prog->substrs->data[i].utf8_substr
6049             && !prog->substrs->data[i].substr) {
6050             SV* sv = newSVsv(prog->substrs->data[i].utf8_substr);
6051             if (sv_utf8_downgrade(sv, TRUE)) {
6052                 if (SvVALID(prog->substrs->data[i].utf8_substr)) {
6053                     const U8 flags
6054                         = BmFLAGS(prog->substrs->data[i].utf8_substr);
6055                     if (flags & FBMcf_TAIL) {
6056                         /* Trim the trailing \n that fbm_compile added last
6057                            time.  */
6058                         SvCUR_set(sv, SvCUR(sv) - 1);
6059                     }
6060                     fbm_compile(sv, flags);
6061                 }
6062             } else {
6063                 SvREFCNT_dec(sv);
6064                 sv = &PL_sv_undef;
6065             }
6066             prog->substrs->data[i].substr = sv;
6067             if (prog->substrs->data[i].utf8_substr == prog->check_utf8)
6068                 prog->check_substr = sv;
6069         }
6070     } while (i--);
6071 }
6072
6073 /*
6074  * Local variables:
6075  * c-indentation-style: bsd
6076  * c-basic-offset: 4
6077  * indent-tabs-mode: t
6078  * End:
6079  *
6080  * ex: set ts=8 sts=4 sw=4 noet:
6081  */