regexec.c

   1 /*    regexec.c
   2  */
   3
   4 /*
   5  * "One Ring to rule them all, One Ring to find them..."
   6  */
   7
   8 /* This file contains functions for executing a regular expression.  See
   9  * also regcomp.c which funnily enough, contains functions for compiling
  10  * a regular expression.
  11  *
  12  * This file is also copied at build time to ext/re/re_exec.c, where
  13  * it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
  14  * This causes the main functions to be compiled under new names and with
  15  * debugging support added, which makes "use re 'debug'" work.
  16  */
  17
  18 /* NOTE: this is derived from Henry Spencer's regexp code, and should not
  19  * confused with the original package (see point 3 below).  Thanks, Henry!
  20  */
  21
  22 /* Additional note: this code is very heavily munged from Henry's version
  23  * in places.  In some spots I've traded clarity for efficiency, so don't
  24  * blame Henry for some of the lack of readability.
  25  */
  26
  27 /* The names of the functions have been changed from regcomp and
  28  * regexec to  pregcomp and pregexec in order to avoid conflicts
  29  * with the POSIX routines of the same names.
  30 */
  31
  32 #ifdef PERL_EXT_RE_BUILD
  33 #include "re_top.h"
  34 #endif
  35
  36 /*
  37  * pregcomp and pregexec -- regsub and regerror are not used in perl
  38  *
  39  *      Copyright (c) 1986 by University of Toronto.
  40  *      Written by Henry Spencer.  Not derived from licensed software.
  41  *
  42  *      Permission is granted to anyone to use this software for any
  43  *      purpose on any computer system, and to redistribute it freely,
  44  *      subject to the following restrictions:
  45  *
  46  *      1. The author is not responsible for the consequences of use of
  47  *              this software, no matter how awful, even if they arise
  48  *              from defects in it.
  49  *
  50  *      2. The origin of this software must not be misrepresented, either
  51  *              by explicit claim or by omission.
  52  *
  53  *      3. Altered versions must be plainly marked as such, and must not
  54  *              be misrepresented as being the original software.
  55  *
  56  ****    Alterations to Henry's code are...
  57  ****
  58  ****    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
  59  ****    2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 by Larry Wall and others
  60  ****
  61  ****    You may distribute under the terms of either the GNU General Public
  62  ****    License or the Artistic License, as specified in the README file.
  63  *
  64  * Beware that some of this code is subtly aware of the way operator
  65  * precedence is structured in regular expressions.  Serious changes in
  66  * regular-expression syntax might require a total rethink.
  67  */
  68 #include "EXTERN.h"
  69 #define PERL_IN_REGEXEC_C
  70 #include "perl.h"
  71
  72 #ifdef PERL_IN_XSUB_RE
  73 #  include "re_comp.h"
  74 #else
  75 #  include "regcomp.h"
  76 #endif
  77
  78 #define RF_tainted      1               /* tainted information used? */
  79 #define RF_warned       2               /* warned about big count? */
  80
  81 #define RF_utf8         8               /* Pattern contains multibyte chars? */
  82
  83 #define UTF ((PL_reg_flags & RF_utf8) != 0)
  84
  85 #define RS_init         1               /* eval environment created */
  86 #define RS_set          2               /* replsv value is set */
  87
  88 #ifndef STATIC
  89 #define STATIC  static
  90 #endif
  91
  92 #define REGINCLASS(prog,p,c)  (ANYOF_FLAGS(p) ? reginclass(prog,p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
  93
  94 /*
  95  * Forwards.
  96  */
  97
  98 #define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
  99 #define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
 100
 101 #define HOPc(pos,off) \
 102         (char *)(PL_reg_match_utf8 \
 103             ? reghop3((U8*)pos, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr)) \
 104             : (U8*)(pos + off))
 105 #define HOPBACKc(pos, off) \
 106         (char*)(PL_reg_match_utf8\
 107             ? reghopmaybe3((U8*)pos, -off, (U8*)PL_bostr) \
 108             : (pos - off >= PL_bostr)           \
 109                 ? (U8*)pos - off                \
 110                 : NULL)
 111
 112 #define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)(pos), off, (U8*)(lim)) : (U8*)(pos + off))
 113 #define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
 114
 115 #define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
 116     if (!CAT2(PL_utf8_,class)) { bool ok; ENTER; save_re_context(); ok=CAT2(is_utf8_,class)((const U8*)str); assert(ok); LEAVE; } } STMT_END
 117 #define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS(alnum,"a")
 118 #define LOAD_UTF8_CHARCLASS_DIGIT() LOAD_UTF8_CHARCLASS(digit,"0")
 119 #define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
 120 #define LOAD_UTF8_CHARCLASS_MARK()  LOAD_UTF8_CHARCLASS(mark, "\xcd\x86")
 121
 122 /* TODO: Combine JUMPABLE and HAS_TEXT to cache OP(rn) */
 123
 124 /* for use after a quantifier and before an EXACT-like node -- japhy */
 125 /* it would be nice to rework regcomp.sym to generate this stuff. sigh */
 126 #define JUMPABLE(rn) (      \
 127     OP(rn) == OPEN ||       \
 128     (OP(rn) == CLOSE && (!cur_eval || cur_eval->u.eval.close_paren != ARG(rn))) || \
 129     OP(rn) == EVAL ||   \
 130     OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
 131     OP(rn) == PLUS || OP(rn) == MINMOD || \
 132     OP(rn) == KEEPS || (PL_regkind[OP(rn)] == VERB) || \
 133     (PL_regkind[OP(rn)] == CURLY && ARG1(rn) > 0) \
 134 )
 135 #define IS_EXACT(rn) (PL_regkind[OP(rn)] == EXACT)
 136
 137 #define HAS_TEXT(rn) ( IS_EXACT(rn) || PL_regkind[OP(rn)] == REF )
 138
 139 #if 0
 140 /* Currently these are only used when PL_regkind[OP(rn)] == EXACT so
 141    we don't need this definition. */
 142 #define IS_TEXT(rn)   ( OP(rn)==EXACT   || OP(rn)==REF   || OP(rn)==NREF   )
 143 #define IS_TEXTF(rn)  ( OP(rn)==EXACTF  || OP(rn)==REFF  || OP(rn)==NREFF  )
 144 #define IS_TEXTFL(rn) ( OP(rn)==EXACTFL || OP(rn)==REFFL || OP(rn)==NREFFL )
 145
 146 #else
 147 /* ... so we use this as its faster. */
 148 #define IS_TEXT(rn)   ( OP(rn)==EXACT   )
 149 #define IS_TEXTF(rn)  ( OP(rn)==EXACTF  )
 150 #define IS_TEXTFL(rn) ( OP(rn)==EXACTFL )
 151
 152 #endif
 153
 154 /*
 155   Search for mandatory following text node; for lookahead, the text must
 156   follow but for lookbehind (rn->flags != 0) we skip to the next step.
 157 */
 158 #define FIND_NEXT_IMPT(rn) STMT_START { \
 159     while (JUMPABLE(rn)) { \
 160         const OPCODE type = OP(rn); \
 161         if (type == SUSPEND || PL_regkind[type] == CURLY) \
 162             rn = NEXTOPER(NEXTOPER(rn)); \
 163         else if (type == PLUS) \
 164             rn = NEXTOPER(rn); \
 165         else if (type == IFMATCH) \
 166             rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
 167         else rn += NEXT_OFF(rn); \
 168     } \
 169 } STMT_END
 170
 171
 172 static void restore_pos(pTHX_ void *arg);
 173
 174 STATIC CHECKPOINT
 175 S_regcppush(pTHX_ I32 parenfloor)
 176 {
 177     dVAR;
 178     const int retval = PL_savestack_ix;
 179 #define REGCP_PAREN_ELEMS 4
 180     const int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
 181     int p;
 182     GET_RE_DEBUG_FLAGS_DECL;
 183
 184     if (paren_elems_to_push < 0)
 185         Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
 186
 187 #define REGCP_OTHER_ELEMS 7
 188     SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
 189
 190     for (p = PL_regsize; p > parenfloor; p--) {
 191 /* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
 192         SSPUSHINT(PL_regoffs[p].end);
 193         SSPUSHINT(PL_regoffs[p].start);
 194         SSPUSHPTR(PL_reg_start_tmp[p]);
 195         SSPUSHINT(p);
 196         DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log,
 197           "     saving \\%"UVuf" %"IVdf"(%"IVdf")..%"IVdf"\n",
 198                       (UV)p, (IV)PL_regoffs[p].start,
 199                       (IV)(PL_reg_start_tmp[p] - PL_bostr),
 200                       (IV)PL_regoffs[p].end
 201         ));
 202     }
 203 /* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
 204     SSPUSHPTR(PL_regoffs);
 205     SSPUSHINT(PL_regsize);
 206     SSPUSHINT(*PL_reglastparen);
 207     SSPUSHINT(*PL_reglastcloseparen);
 208     SSPUSHPTR(PL_reginput);
 209 #define REGCP_FRAME_ELEMS 2
 210 /* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
 211  * are needed for the regexp context stack bookkeeping. */
 212     SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
 213     SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
 214
 215     return retval;
 216 }
 217
 218 /* These are needed since we do not localize EVAL nodes: */
 219 #define REGCP_SET(cp)                                           \
 220     DEBUG_STATE_r(                                              \
 221             PerlIO_printf(Perl_debug_log,                       \
 222                 "  Setting an EVAL scope, savestack=%"IVdf"\n", \
 223                 (IV)PL_savestack_ix));                          \
 224     cp = PL_savestack_ix
 225
 226 #define REGCP_UNWIND(cp)                                        \
 227     DEBUG_STATE_r(                                              \
 228         if (cp != PL_savestack_ix)                              \
 229             PerlIO_printf(Perl_debug_log,                       \
 230                 "  Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
 231                 (IV)(cp), (IV)PL_savestack_ix));                \
 232     regcpblow(cp)
 233
 234 STATIC char *
 235 S_regcppop(pTHX_ const regexp *rex)
 236 {
 237     dVAR;
 238     U32 i;
 239     char *input;
 240
 241     GET_RE_DEBUG_FLAGS_DECL;
 242
 243     /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
 244     i = SSPOPINT;
 245     assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
 246     i = SSPOPINT; /* Parentheses elements to pop. */
 247     input = (char *) SSPOPPTR;
 248     *PL_reglastcloseparen = SSPOPINT;
 249     *PL_reglastparen = SSPOPINT;
 250     PL_regsize = SSPOPINT;
 251     PL_regoffs=(regexp_paren_pair *) SSPOPPTR;
 252
 253
 254     /* Now restore the parentheses context. */
 255     for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
 256          i > 0; i -= REGCP_PAREN_ELEMS) {
 257         I32 tmps;
 258         U32 paren = (U32)SSPOPINT;
 259         PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
 260         PL_regoffs[paren].start = SSPOPINT;
 261         tmps = SSPOPINT;
 262         if (paren <= *PL_reglastparen)
 263             PL_regoffs[paren].end = tmps;
 264         DEBUG_BUFFERS_r(
 265             PerlIO_printf(Perl_debug_log,
 266                           "     restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
 267                           (UV)paren, (IV)PL_regoffs[paren].start,
 268                           (IV)(PL_reg_start_tmp[paren] - PL_bostr),
 269                           (IV)PL_regoffs[paren].end,
 270                           (paren > *PL_reglastparen ? "(no)" : ""));
 271         );
 272     }
 273     DEBUG_BUFFERS_r(
 274         if (*PL_reglastparen + 1 <= rex->nparens) {
 275             PerlIO_printf(Perl_debug_log,
 276                           "     restoring \\%"IVdf"..\\%"IVdf" to undef\n",
 277                           (IV)(*PL_reglastparen + 1), (IV)rex->nparens);
 278         }
 279     );
 280 #if 1
 281     /* It would seem that the similar code in regtry()
 282      * already takes care of this, and in fact it is in
 283      * a better location to since this code can #if 0-ed out
 284      * but the code in regtry() is needed or otherwise tests
 285      * requiring null fields (pat.t#187 and split.t#{13,14}
 286      * (as of patchlevel 7877)  will fail.  Then again,
 287      * this code seems to be necessary or otherwise
 288      * this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
 289      * --jhi updated by dapm */
 290     for (i = *PL_reglastparen + 1; i <= rex->nparens; i++) {
 291         if (i > PL_regsize)
 292             PL_regoffs[i].start = -1;
 293         PL_regoffs[i].end = -1;
 294     }
 295 #endif
 296     return input;
 297 }
 298
 299 #define regcpblow(cp) LEAVE_SCOPE(cp)   /* Ignores regcppush()ed data. */
 300
 301 /*
 302  * pregexec and friends
 303  */
 304
 305 #ifndef PERL_IN_XSUB_RE
 306 /*
 307  - pregexec - match a regexp against a string
 308  */
 309 I32
 310 Perl_pregexec(pTHX_ REGEXP * const prog, char* stringarg, register char *strend,
 311          char *strbeg, I32 minend, SV *screamer, U32 nosave)
 312 /* strend: pointer to null at end of string */
 313 /* strbeg: real beginning of string */
 314 /* minend: end of match must be >=minend after stringarg. */
 315 /* nosave: For optimizations. */
 316 {
 317     return
 318         regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
 319                       nosave ? 0 : REXEC_COPY_STR);
 320 }
 321 #endif
 322
 323 /*
 324  * Need to implement the following flags for reg_anch:
 325  *
 326  * USE_INTUIT_NOML              - Useful to call re_intuit_start() first
 327  * USE_INTUIT_ML
 328  * INTUIT_AUTORITATIVE_NOML     - Can trust a positive answer
 329  * INTUIT_AUTORITATIVE_ML
 330  * INTUIT_ONCE_NOML             - Intuit can match in one location only.
 331  * INTUIT_ONCE_ML
 332  *
 333  * Another flag for this function: SECOND_TIME (so that float substrs
 334  * with giant delta may be not rechecked).
 335  */
 336
 337 /* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
 338
 339 /* If SCREAM, then SvPVX_const(sv) should be compatible with strpos and strend.
 340    Otherwise, only SvCUR(sv) is used to get strbeg. */
 341
 342 /* XXXX We assume that strpos is strbeg unless sv. */
 343
 344 /* XXXX Some places assume that there is a fixed substring.
 345         An update may be needed if optimizer marks as "INTUITable"
 346         RExen without fixed substrings.  Similarly, it is assumed that
 347         lengths of all the strings are no more than minlen, thus they
 348         cannot come from lookahead.
 349         (Or minlen should take into account lookahead.)
 350   NOTE: Some of this comment is not correct. minlen does now take account
 351   of lookahead/behind. Further research is required. -- demerphq
 352
 353 */
 354
 355 /* A failure to find a constant substring means that there is no need to make
 356    an expensive call to REx engine, thus we celebrate a failure.  Similarly,
 357    finding a substring too deep into the string means that less calls to
 358    regtry() should be needed.
 359
 360    REx compiler's optimizer found 4 possible hints:
 361         a) Anchored substring;
 362         b) Fixed substring;
 363         c) Whether we are anchored (beginning-of-line or \G);
 364         d) First node (of those at offset 0) which may distingush positions;
 365    We use a)b)d) and multiline-part of c), and try to find a position in the
 366    string which does not contradict any of them.
 367  */
 368
 369 /* Most of decisions we do here should have been done at compile time.
 370    The nodes of the REx which we used for the search should have been
 371    deleted from the finite automaton. */
 372
 373 char *
 374 Perl_re_intuit_start(pTHX_ REGEXP * const prog, SV *sv, char *strpos,
 375                      char *strend, const U32 flags, re_scream_pos_data *data)
 376 {
 377     dVAR;
 378     register I32 start_shift = 0;
 379     /* Should be nonnegative! */
 380     register I32 end_shift   = 0;
 381     register char *s;
 382     register SV *check;
 383     char *strbeg;
 384     char *t;
 385     const bool do_utf8 = (sv && SvUTF8(sv)) ? 1 : 0; /* if no sv we have to assume bytes */
 386     I32 ml_anch;
 387     register char *other_last = NULL;   /* other substr checked before this */
 388     char *check_at = NULL;              /* check substr found at this pos */
 389     const I32 multiline = prog->extflags & RXf_PMf_MULTILINE;
 390     RXi_GET_DECL(prog,progi);
 391 #ifdef DEBUGGING
 392     const char * const i_strpos = strpos;
 393 #endif
 394
 395     GET_RE_DEBUG_FLAGS_DECL;
 396
 397     RX_MATCH_UTF8_set(prog,do_utf8);
 398
 399     if (prog->extflags & RXf_UTF8) {
 400         PL_reg_flags |= RF_utf8;
 401     }
 402     DEBUG_EXECUTE_r(
 403         debug_start_match(prog, do_utf8, strpos, strend,
 404             sv ? "Guessing start of match in sv for"
 405                : "Guessing start of match in string for");
 406               );
 407
 408     /* CHR_DIST() would be more correct here but it makes things slow. */
 409     if (prog->minlen > strend - strpos) {
 410         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 411                               "String too short... [re_intuit_start]\n"));
 412         goto fail;
 413     }
 414
 415     strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
 416     PL_regeol = strend;
 417     if (do_utf8) {
 418         if (!prog->check_utf8 && prog->check_substr)
 419             to_utf8_substr(prog);
 420         check = prog->check_utf8;
 421     } else {
 422         if (!prog->check_substr && prog->check_utf8)
 423             to_byte_substr(prog);
 424         check = prog->check_substr;
 425     }
 426     if (check == &PL_sv_undef) {
 427         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 428                 "Non-utf8 string cannot match utf8 check string\n"));
 429         goto fail;
 430     }
 431     if (prog->extflags & RXf_ANCH) {    /* Match at beg-of-str or after \n */
 432         ml_anch = !( (prog->extflags & RXf_ANCH_SINGLE)
 433                      || ( (prog->extflags & RXf_ANCH_BOL)
 434                           && !multiline ) );    /* Check after \n? */
 435
 436         if (!ml_anch) {
 437           if ( !(prog->extflags & RXf_ANCH_GPOS) /* Checked by the caller */
 438                 && !(prog->intflags & PREGf_IMPLICIT) /* not a real BOL */
 439                /* SvCUR is not set on references: SvRV and SvPVX_const overlap */
 440                && sv && !SvROK(sv)
 441                && (strpos != strbeg)) {
 442               DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
 443               goto fail;
 444           }
 445           if (prog->check_offset_min == prog->check_offset_max &&
 446               !(prog->extflags & RXf_CANY_SEEN)) {
 447             /* Substring at constant offset from beg-of-str... */
 448             I32 slen;
 449
 450             s = HOP3c(strpos, prog->check_offset_min, strend);
 451
 452             if (SvTAIL(check)) {
 453                 slen = SvCUR(check);    /* >= 1 */
 454
 455                 if ( strend - s > slen || strend - s < slen - 1
 456                      || (strend - s == slen && strend[-1] != '\n')) {
 457                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
 458                     goto fail_finish;
 459                 }
 460                 /* Now should match s[0..slen-2] */
 461                 slen--;
 462                 if (slen && (*SvPVX_const(check) != *s
 463                              || (slen > 1
 464                                  && memNE(SvPVX_const(check), s, slen)))) {
 465                   report_neq:
 466                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
 467                     goto fail_finish;
 468                 }
 469             }
 470             else if (*SvPVX_const(check) != *s
 471                      || ((slen = SvCUR(check)) > 1
 472                          && memNE(SvPVX_const(check), s, slen)))
 473                 goto report_neq;
 474             check_at = s;
 475             goto success_at_start;
 476           }
 477         }
 478         /* Match is anchored, but substr is not anchored wrt beg-of-str. */
 479         s = strpos;
 480         start_shift = prog->check_offset_min; /* okay to underestimate on CC */
 481         end_shift = prog->check_end_shift;
 482
 483         if (!ml_anch) {
 484             const I32 end = prog->check_offset_max + CHR_SVLEN(check)
 485                                          - (SvTAIL(check) != 0);
 486             const I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
 487
 488             if (end_shift < eshift)
 489                 end_shift = eshift;
 490         }
 491     }
 492     else {                              /* Can match at random position */
 493         ml_anch = 0;
 494         s = strpos;
 495         start_shift = prog->check_offset_min;  /* okay to underestimate on CC */
 496         end_shift = prog->check_end_shift;
 497
 498         /* end shift should be non negative here */
 499     }
 500
 501 #ifdef QDEBUGGING       /* 7/99: reports of failure (with the older version) */
 502     if (end_shift < 0)
 503         Perl_croak(aTHX_ "panic: end_shift: %"IVdf" pattern:\n%s\n ",
 504                    (IV)end_shift, prog->precomp);
 505 #endif
 506
 507   restart:
 508     /* Find a possible match in the region s..strend by looking for
 509        the "check" substring in the region corrected by start/end_shift. */
 510
 511     {
 512         I32 srch_start_shift = start_shift;
 513         I32 srch_end_shift = end_shift;
 514         if (srch_start_shift < 0 && strbeg - s > srch_start_shift) {
 515             srch_end_shift -= ((strbeg - s) - srch_start_shift);
 516             srch_start_shift = strbeg - s;
 517         }
 518     DEBUG_OPTIMISE_MORE_r({
 519         PerlIO_printf(Perl_debug_log, "Check offset min: %"IVdf" Start shift: %"IVdf" End shift %"IVdf" Real End Shift: %"IVdf"\n",
 520             (IV)prog->check_offset_min,
 521             (IV)srch_start_shift,
 522             (IV)srch_end_shift,
 523             (IV)prog->check_end_shift);
 524     });
 525
 526     if (flags & REXEC_SCREAM) {
 527         I32 p = -1;                     /* Internal iterator of scream. */
 528         I32 * const pp = data ? data->scream_pos : &p;
 529
 530         if (PL_screamfirst[BmRARE(check)] >= 0
 531             || ( BmRARE(check) == '\n'
 532                  && (BmPREVIOUS(check) == SvCUR(check) - 1)
 533                  && SvTAIL(check) ))
 534             s = screaminstr(sv, check,
 535                             srch_start_shift + (s - strbeg), srch_end_shift, pp, 0);
 536         else
 537             goto fail_finish;
 538         /* we may be pointing at the wrong string */
 539         if (s && RX_MATCH_COPIED(prog))
 540             s = strbeg + (s - SvPVX_const(sv));
 541         if (data)
 542             *data->scream_olds = s;
 543     }
 544     else {
 545         U8* start_point;
 546         U8* end_point;
 547         if (prog->extflags & RXf_CANY_SEEN) {
 548             start_point= (U8*)(s + srch_start_shift);
 549             end_point= (U8*)(strend - srch_end_shift);
 550         } else {
 551             start_point= HOP3(s, srch_start_shift, srch_start_shift < 0 ? strbeg : strend);
 552             end_point= HOP3(strend, -srch_end_shift, strbeg);
 553         }
 554         DEBUG_OPTIMISE_MORE_r({
 555             PerlIO_printf(Perl_debug_log, "fbm_instr len=%d str=<%.*s>\n",
 556                 (int)(end_point - start_point),
 557                 (int)(end_point - start_point) > 20 ? 20 : (int)(end_point - start_point),
 558                 start_point);
 559         });
 560
 561         s = fbm_instr( start_point, end_point,
 562                       check, multiline ? FBMrf_MULTILINE : 0);
 563     }
 564     }
 565     /* Update the count-of-usability, remove useless subpatterns,
 566         unshift s.  */
 567
 568     DEBUG_EXECUTE_r({
 569         RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
 570             SvPVX_const(check), RE_SV_DUMPLEN(check), 30);
 571         PerlIO_printf(Perl_debug_log, "%s %s substr %s%s%s",
 572                           (s ? "Found" : "Did not find"),
 573             (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)
 574                 ? "anchored" : "floating"),
 575             quoted,
 576             RE_SV_TAIL(check),
 577             (s ? " at offset " : "...\n") );
 578     });
 579
 580     if (!s)
 581         goto fail_finish;
 582     /* Finish the diagnostic message */
 583     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
 584
 585     /* XXX dmq: first branch is for positive lookbehind...
 586        Our check string is offset from the beginning of the pattern.
 587        So we need to do any stclass tests offset forward from that
 588        point. I think. :-(
 589      */
 590
 591
 592
 593     check_at=s;
 594
 595
 596     /* Got a candidate.  Check MBOL anchoring, and the *other* substr.
 597        Start with the other substr.
 598        XXXX no SCREAM optimization yet - and a very coarse implementation
 599        XXXX /ttx+/ results in anchored="ttx", floating="x".  floating will
 600                 *always* match.  Probably should be marked during compile...
 601        Probably it is right to do no SCREAM here...
 602      */
 603
 604     if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8)
 605                 : (prog->float_substr && prog->anchored_substr))
 606     {
 607         /* Take into account the "other" substring. */
 608         /* XXXX May be hopelessly wrong for UTF... */
 609         if (!other_last)
 610             other_last = strpos;
 611         if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
 612           do_other_anchored:
 613             {
 614                 char * const last = HOP3c(s, -start_shift, strbeg);
 615                 char *last1, *last2;
 616                 char * const saved_s = s;
 617                 SV* must;
 618
 619                 t = s - prog->check_offset_max;
 620                 if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
 621                     && (!do_utf8
 622                         || ((t = (char*)reghopmaybe3((U8*)s, -(prog->check_offset_max), (U8*)strpos))
 623                             && t > strpos)))
 624                     NOOP;
 625                 else
 626                     t = strpos;
 627                 t = HOP3c(t, prog->anchored_offset, strend);
 628                 if (t < other_last)     /* These positions already checked */
 629                     t = other_last;
 630                 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
 631                 if (last < last1)
 632                     last1 = last;
 633                 /* XXXX It is not documented what units *_offsets are in.
 634                    We assume bytes, but this is clearly wrong.
 635                    Meaning this code needs to be carefully reviewed for errors.
 636                    dmq.
 637                   */
 638
 639                 /* On end-of-str: see comment below. */
 640                 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
 641                 if (must == &PL_sv_undef) {
 642                     s = (char*)NULL;
 643                     DEBUG_r(must = prog->anchored_utf8);        /* for debug */
 644                 }
 645                 else
 646                     s = fbm_instr(
 647                         (unsigned char*)t,
 648                         HOP3(HOP3(last1, prog->anchored_offset, strend)
 649                                 + SvCUR(must), -(SvTAIL(must)!=0), strbeg),
 650                         must,
 651                         multiline ? FBMrf_MULTILINE : 0
 652                     );
 653                 DEBUG_EXECUTE_r({
 654                     RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
 655                         SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
 656                     PerlIO_printf(Perl_debug_log, "%s anchored substr %s%s",
 657                         (s ? "Found" : "Contradicts"),
 658                         quoted, RE_SV_TAIL(must));
 659                 });
 660
 661
 662                 if (!s) {
 663                     if (last1 >= last2) {
 664                         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 665                                                 ", giving up...\n"));
 666                         goto fail_finish;
 667                     }
 668                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 669                         ", trying floating at offset %ld...\n",
 670                         (long)(HOP3c(saved_s, 1, strend) - i_strpos)));
 671                     other_last = HOP3c(last1, prog->anchored_offset+1, strend);
 672                     s = HOP3c(last, 1, strend);
 673                     goto restart;
 674                 }
 675                 else {
 676                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
 677                           (long)(s - i_strpos)));
 678                     t = HOP3c(s, -prog->anchored_offset, strbeg);
 679                     other_last = HOP3c(s, 1, strend);
 680                     s = saved_s;
 681                     if (t == strpos)
 682                         goto try_at_start;
 683                     goto try_at_offset;
 684                 }
 685             }
 686         }
 687         else {          /* Take into account the floating substring. */
 688             char *last, *last1;
 689             char * const saved_s = s;
 690             SV* must;
 691
 692             t = HOP3c(s, -start_shift, strbeg);
 693             last1 = last =
 694                 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
 695             if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
 696                 last = HOP3c(t, prog->float_max_offset, strend);
 697             s = HOP3c(t, prog->float_min_offset, strend);
 698             if (s < other_last)
 699                 s = other_last;
 700  /* XXXX It is not documented what units *_offsets are in.  Assume bytes.  */
 701             must = do_utf8 ? prog->float_utf8 : prog->float_substr;
 702             /* fbm_instr() takes into account exact value of end-of-str
 703                if the check is SvTAIL(ed).  Since false positives are OK,
 704                and end-of-str is not later than strend we are OK. */
 705             if (must == &PL_sv_undef) {
 706                 s = (char*)NULL;
 707                 DEBUG_r(must = prog->float_utf8);       /* for debug message */
 708             }
 709             else
 710                 s = fbm_instr((unsigned char*)s,
 711                               (unsigned char*)last + SvCUR(must)
 712                                   - (SvTAIL(must)!=0),
 713                               must, multiline ? FBMrf_MULTILINE : 0);
 714             DEBUG_EXECUTE_r({
 715                 RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
 716                     SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
 717                 PerlIO_printf(Perl_debug_log, "%s floating substr %s%s",
 718                     (s ? "Found" : "Contradicts"),
 719                     quoted, RE_SV_TAIL(must));
 720             });
 721             if (!s) {
 722                 if (last1 == last) {
 723                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 724                                             ", giving up...\n"));
 725                     goto fail_finish;
 726                 }
 727                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 728                     ", trying anchored starting at offset %ld...\n",
 729                     (long)(saved_s + 1 - i_strpos)));
 730                 other_last = last;
 731                 s = HOP3c(t, 1, strend);
 732                 goto restart;
 733             }
 734             else {
 735                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
 736                       (long)(s - i_strpos)));
 737                 other_last = s; /* Fix this later. --Hugo */
 738                 s = saved_s;
 739                 if (t == strpos)
 740                     goto try_at_start;
 741                 goto try_at_offset;
 742             }
 743         }
 744     }
 745
 746
 747     t= (char*)HOP3( s, -prog->check_offset_max, (prog->check_offset_max<0) ? strend : strpos);
 748
 749     DEBUG_OPTIMISE_MORE_r(
 750         PerlIO_printf(Perl_debug_log,
 751             "Check offset min:%"IVdf" max:%"IVdf" S:%"IVdf" t:%"IVdf" D:%"IVdf" end:%"IVdf"\n",
 752             (IV)prog->check_offset_min,
 753             (IV)prog->check_offset_max,
 754             (IV)(s-strpos),
 755             (IV)(t-strpos),
 756             (IV)(t-s),
 757             (IV)(strend-strpos)
 758         )
 759     );
 760
 761     if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
 762         && (!do_utf8
 763             || ((t = (char*)reghopmaybe3((U8*)s, -prog->check_offset_max, (U8*) ((prog->check_offset_max<0) ? strend : strpos)))
 764                  && t > strpos)))
 765     {
 766         /* Fixed substring is found far enough so that the match
 767            cannot start at strpos. */
 768       try_at_offset:
 769         if (ml_anch && t[-1] != '\n') {
 770             /* Eventually fbm_*() should handle this, but often
 771                anchored_offset is not 0, so this check will not be wasted. */
 772             /* XXXX In the code below we prefer to look for "^" even in
 773                presence of anchored substrings.  And we search even
 774                beyond the found float position.  These pessimizations
 775                are historical artefacts only.  */
 776           find_anchor:
 777             while (t < strend - prog->minlen) {
 778                 if (*t == '\n') {
 779                     if (t < check_at - prog->check_offset_min) {
 780                         if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
 781                             /* Since we moved from the found position,
 782                                we definitely contradict the found anchored
 783                                substr.  Due to the above check we do not
 784                                contradict "check" substr.
 785                                Thus we can arrive here only if check substr
 786                                is float.  Redo checking for "other"=="fixed".
 787                              */
 788                             strpos = t + 1;
 789                             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
 790                                 PL_colors[0], PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
 791                             goto do_other_anchored;
 792                         }
 793                         /* We don't contradict the found floating substring. */
 794                         /* XXXX Why not check for STCLASS? */
 795                         s = t + 1;
 796                         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
 797                             PL_colors[0], PL_colors[1], (long)(s - i_strpos)));
 798                         goto set_useful;
 799                     }
 800                     /* Position contradicts check-string */
 801                     /* XXXX probably better to look for check-string
 802                        than for "\n", so one should lower the limit for t? */
 803                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
 804                         PL_colors[0], PL_colors[1], (long)(t + 1 - i_strpos)));
 805                     other_last = strpos = s = t + 1;
 806                     goto restart;
 807                 }
 808                 t++;
 809             }
 810             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
 811                         PL_colors[0], PL_colors[1]));
 812             goto fail_finish;
 813         }
 814         else {
 815             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
 816                         PL_colors[0], PL_colors[1]));
 817         }
 818         s = t;
 819       set_useful:
 820         ++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr);    /* hooray/5 */
 821     }
 822     else {
 823         /* The found string does not prohibit matching at strpos,
 824            - no optimization of calling REx engine can be performed,
 825            unless it was an MBOL and we are not after MBOL,
 826            or a future STCLASS check will fail this. */
 827       try_at_start:
 828         /* Even in this situation we may use MBOL flag if strpos is offset
 829            wrt the start of the string. */
 830         if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
 831             && (strpos != strbeg) && strpos[-1] != '\n'
 832             /* May be due to an implicit anchor of m{.*foo}  */
 833             && !(prog->intflags & PREGf_IMPLICIT))
 834         {
 835             t = strpos;
 836             goto find_anchor;
 837         }
 838         DEBUG_EXECUTE_r( if (ml_anch)
 839             PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
 840                           (long)(strpos - i_strpos), PL_colors[0], PL_colors[1]);
 841         );
 842       success_at_start:
 843         if (!(prog->intflags & PREGf_NAUGHTY)   /* XXXX If strpos moved? */
 844             && (do_utf8 ? (
 845                 prog->check_utf8                /* Could be deleted already */
 846                 && --BmUSEFUL(prog->check_utf8) < 0
 847                 && (prog->check_utf8 == prog->float_utf8)
 848             ) : (
 849                 prog->check_substr              /* Could be deleted already */
 850                 && --BmUSEFUL(prog->check_substr) < 0
 851                 && (prog->check_substr == prog->float_substr)
 852             )))
 853         {
 854             /* If flags & SOMETHING - do not do it many times on the same match */
 855             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
 856             SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
 857             if (do_utf8 ? prog->check_substr : prog->check_utf8)
 858                 SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
 859             prog->check_substr = prog->check_utf8 = NULL;       /* disable */
 860             prog->float_substr = prog->float_utf8 = NULL;       /* clear */
 861             check = NULL;                       /* abort */
 862             s = strpos;
 863             /* XXXX This is a remnant of the old implementation.  It
 864                     looks wasteful, since now INTUIT can use many
 865                     other heuristics. */
 866             prog->extflags &= ~RXf_USE_INTUIT;
 867         }
 868         else
 869             s = strpos;
 870     }
 871
 872     /* Last resort... */
 873     /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
 874     /* trie stclasses are too expensive to use here, we are better off to
 875        leave it to regmatch itself */
 876     if (progi->regstclass && PL_regkind[OP(progi->regstclass)]!=TRIE) {
 877         /* minlen == 0 is possible if regstclass is \b or \B,
 878            and the fixed substr is ''$.
 879            Since minlen is already taken into account, s+1 is before strend;
 880            accidentally, minlen >= 1 guaranties no false positives at s + 1
 881            even for \b or \B.  But (minlen? 1 : 0) below assumes that
 882            regstclass does not come from lookahead...  */
 883         /* If regstclass takes bytelength more than 1: If charlength==1, OK.
 884            This leaves EXACTF only, which is dealt with in find_byclass().  */
 885         const U8* const str = (U8*)STRING(progi->regstclass);
 886         const int cl_l = (PL_regkind[OP(progi->regstclass)] == EXACT
 887                     ? CHR_DIST(str+STR_LEN(progi->regstclass), str)
 888                     : 1);
 889         char * endpos;
 890         if (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
 891             endpos= HOP3c(s, (prog->minlen ? cl_l : 0), strend);
 892         else if (prog->float_substr || prog->float_utf8)
 893             endpos= HOP3c(HOP3c(check_at, -start_shift, strbeg), cl_l, strend);
 894         else
 895             endpos= strend;
 896
 897         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "start_shift: %"IVdf" check_at: %"IVdf" s: %"IVdf" endpos: %"IVdf"\n",
 898                                       (IV)start_shift, (IV)(check_at - strbeg), (IV)(s - strbeg), (IV)(endpos - strbeg)));
 899
 900         t = s;
 901         s = find_byclass(prog, progi->regstclass, s, endpos, NULL);
 902         if (!s) {
 903 #ifdef DEBUGGING
 904             const char *what = NULL;
 905 #endif
 906             if (endpos == strend) {
 907                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 908                                 "Could not match STCLASS...\n") );
 909                 goto fail;
 910             }
 911             DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 912                                    "This position contradicts STCLASS...\n") );
 913             if ((prog->extflags & RXf_ANCH) && !ml_anch)
 914                 goto fail;
 915             /* Contradict one of substrings */
 916             if (prog->anchored_substr || prog->anchored_utf8) {
 917                 if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
 918                     DEBUG_EXECUTE_r( what = "anchored" );
 919                   hop_and_restart:
 920                     s = HOP3c(t, 1, strend);
 921                     if (s + start_shift + end_shift > strend) {
 922                         /* XXXX Should be taken into account earlier? */
 923                         DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 924                                                "Could not match STCLASS...\n") );
 925                         goto fail;
 926                     }
 927                     if (!check)
 928                         goto giveup;
 929                     DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 930                                 "Looking for %s substr starting at offset %ld...\n",
 931                                  what, (long)(s + start_shift - i_strpos)) );
 932                     goto restart;
 933                 }
 934                 /* Have both, check_string is floating */
 935                 if (t + start_shift >= check_at) /* Contradicts floating=check */
 936                     goto retry_floating_check;
 937                 /* Recheck anchored substring, but not floating... */
 938                 s = check_at;
 939                 if (!check)
 940                     goto giveup;
 941                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 942                           "Looking for anchored substr starting at offset %ld...\n",
 943                           (long)(other_last - i_strpos)) );
 944                 goto do_other_anchored;
 945             }
 946             /* Another way we could have checked stclass at the
 947                current position only: */
 948             if (ml_anch) {
 949                 s = t = t + 1;
 950                 if (!check)
 951                     goto giveup;
 952                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 953                           "Looking for /%s^%s/m starting at offset %ld...\n",
 954                           PL_colors[0], PL_colors[1], (long)(t - i_strpos)) );
 955                 goto try_at_offset;
 956             }
 957             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))     /* Could have been deleted */
 958                 goto fail;
 959             /* Check is floating subtring. */
 960           retry_floating_check:
 961             t = check_at - start_shift;
 962             DEBUG_EXECUTE_r( what = "floating" );
 963             goto hop_and_restart;
 964         }
 965         if (t != s) {
 966             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 967                         "By STCLASS: moving %ld --> %ld\n",
 968                                   (long)(t - i_strpos), (long)(s - i_strpos))
 969                    );
 970         }
 971         else {
 972             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 973                                   "Does not contradict STCLASS...\n");
 974                    );
 975         }
 976     }
 977   giveup:
 978     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
 979                           PL_colors[4], (check ? "Guessed" : "Giving up"),
 980                           PL_colors[5], (long)(s - i_strpos)) );
 981     return s;
 982
 983   fail_finish:                          /* Substring not found */
 984     if (prog->check_substr || prog->check_utf8)         /* could be removed already */
 985         BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
 986   fail:
 987     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
 988                           PL_colors[4], PL_colors[5]));
 989     return NULL;
 990 }
 991
 992
 993
 994 #define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len,  \
 995 uvc, charid, foldlen, foldbuf, uniflags) STMT_START {                       \
 996     switch (trie_type) {                                                    \
 997     case trie_utf8_fold:                                                    \
 998         if ( foldlen>0 ) {                                                  \
 999             uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags );     \
1000             foldlen -= len;                                                 \
1001             uscan += len;                                                   \
1002             len=0;                                                          \
1003         } else {                                                            \
1004             uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );   \
1005             uvc = to_uni_fold( uvc, foldbuf, &foldlen );                    \
1006             foldlen -= UNISKIP( uvc );                                      \
1007             uscan = foldbuf + UNISKIP( uvc );                               \
1008         }                                                                   \
1009         break;                                                              \
1010     case trie_utf8:                                                         \
1011         uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );       \
1012         break;                                                              \
1013     case trie_plain:                                                        \
1014         uvc = (UV)*uc;                                                      \
1015         len = 1;                                                            \
1016     }                                                                       \
1017                                                                             \
1018     if (uvc < 256) {                                                        \
1019         charid = trie->charmap[ uvc ];                                      \
1020     }                                                                       \
1021     else {                                                                  \
1022         charid = 0;                                                         \
1023         if (widecharmap) {                                                  \
1024             SV** const svpp = hv_fetch(widecharmap,                         \
1025                         (char*)&uvc, sizeof(UV), 0);                        \
1026             if (svpp)                                                       \
1027                 charid = (U16)SvIV(*svpp);                                  \
1028         }                                                                   \
1029     }                                                                       \
1030 } STMT_END
1031
1032 #define REXEC_FBC_EXACTISH_CHECK(CoNd)                  \
1033     if ( (CoNd)                                        \
1034          && (ln == len ||                              \
1035              ibcmp_utf8(s, NULL, 0,  do_utf8,          \
1036                         m, NULL, ln, (bool)UTF))       \
1037          && (!reginfo || regtry(reginfo, &s)) )         \
1038         goto got_it;                                   \
1039     else {                                             \
1040          U8 foldbuf[UTF8_MAXBYTES_CASE+1];             \
1041          uvchr_to_utf8(tmpbuf, c);                     \
1042          f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);  \
1043          if ( f != c                                   \
1044               && (f == c1 || f == c2)                  \
1045               && (ln == foldlen ||                     \
1046                   !ibcmp_utf8((char *) foldbuf,        \
1047                               NULL, foldlen, do_utf8,  \
1048                               m,                       \
1049                               NULL, ln, (bool)UTF))    \
1050               && (!reginfo || regtry(reginfo, &s)) )    \
1051               goto got_it;                             \
1052     }                                                  \
1053     s += len
1054
1055 #define REXEC_FBC_EXACTISH_SCAN(CoNd)                     \
1056 STMT_START {                                              \
1057     while (s <= e) {                                      \
1058         if ( (CoNd)                                       \
1059              && (ln == 1 || !(OP(c) == EXACTF             \
1060                               ? ibcmp(s, m, ln)           \
1061                               : ibcmp_locale(s, m, ln)))  \
1062              && (!reginfo || regtry(reginfo, &s)) )        \
1063             goto got_it;                                  \
1064         s++;                                              \
1065     }                                                     \
1066 } STMT_END
1067
1068 #define REXEC_FBC_UTF8_SCAN(CoDe)                     \
1069 STMT_START {                                          \
1070     while (s + (uskip = UTF8SKIP(s)) <= strend) {     \
1071         CoDe                                          \
1072         s += uskip;                                   \
1073     }                                                 \
1074 } STMT_END
1075
1076 #define REXEC_FBC_SCAN(CoDe)                          \
1077 STMT_START {                                          \
1078     while (s < strend) {                              \
1079         CoDe                                          \
1080         s++;                                          \
1081     }                                                 \
1082 } STMT_END
1083
1084 #define REXEC_FBC_UTF8_CLASS_SCAN(CoNd)               \
1085 REXEC_FBC_UTF8_SCAN(                                  \
1086     if (CoNd) {                                       \
1087         if (tmp && (!reginfo || regtry(reginfo, &s)))  \
1088             goto got_it;                              \
1089         else                                          \
1090             tmp = doevery;                            \
1091     }                                                 \
1092     else                                              \
1093         tmp = 1;                                      \
1094 )
1095
1096 #define REXEC_FBC_CLASS_SCAN(CoNd)                    \
1097 REXEC_FBC_SCAN(                                       \
1098     if (CoNd) {                                       \
1099         if (tmp && (!reginfo || regtry(reginfo, &s)))  \
1100             goto got_it;                              \
1101         else                                          \
1102             tmp = doevery;                            \
1103     }                                                 \
1104     else                                              \
1105         tmp = 1;                                      \
1106 )
1107
1108 #define REXEC_FBC_TRYIT               \
1109 if ((!reginfo || regtry(reginfo, &s))) \
1110     goto got_it
1111
1112 #define REXEC_FBC_CSCAN(CoNdUtF8,CoNd)                         \
1113     if (do_utf8) {                                             \
1114         REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1115     }                                                          \
1116     else {                                                     \
1117         REXEC_FBC_CLASS_SCAN(CoNd);                            \
1118     }                                                          \
1119     break
1120
1121 #define REXEC_FBC_CSCAN_PRELOAD(UtFpReLoAd,CoNdUtF8,CoNd)      \
1122     if (do_utf8) {                                             \
1123         UtFpReLoAd;                                            \
1124         REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1125     }                                                          \
1126     else {                                                     \
1127         REXEC_FBC_CLASS_SCAN(CoNd);                            \
1128     }                                                          \
1129     break
1130
1131 #define REXEC_FBC_CSCAN_TAINT(CoNdUtF8,CoNd)                   \
1132     PL_reg_flags |= RF_tainted;                                \
1133     if (do_utf8) {                                             \
1134         REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1135     }                                                          \
1136     else {                                                     \
1137         REXEC_FBC_CLASS_SCAN(CoNd);                            \
1138     }                                                          \
1139     break
1140
1141 #define DUMP_EXEC_POS(li,s,doutf8) \
1142     dump_exec_pos(li,s,(PL_regeol),(PL_bostr),(PL_reg_starttry),doutf8)
1143
1144 /* We know what class REx starts with.  Try to find this position... */
1145 /* if reginfo is NULL, its a dryrun */
1146 /* annoyingly all the vars in this routine have different names from their counterparts
1147    in regmatch. /grrr */
1148
1149 STATIC char *
1150 S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
1151     const char *strend, regmatch_info *reginfo)
1152 {
1153         dVAR;
1154         const I32 doevery = (prog->intflags & PREGf_SKIP) == 0;
1155         char *m;
1156         STRLEN ln;
1157         STRLEN lnc;
1158         register STRLEN uskip;
1159         unsigned int c1;
1160         unsigned int c2;
1161         char *e;
1162         register I32 tmp = 1;   /* Scratch variable? */
1163         register const bool do_utf8 = PL_reg_match_utf8;
1164         RXi_GET_DECL(prog,progi);
1165
1166         /* We know what class it must start with. */
1167         switch (OP(c)) {
1168         case ANYOF:
1169             if (do_utf8) {
1170                  REXEC_FBC_UTF8_CLASS_SCAN((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
1171                           !UTF8_IS_INVARIANT((U8)s[0]) ?
1172                           reginclass(prog, c, (U8*)s, 0, do_utf8) :
1173                           REGINCLASS(prog, c, (U8*)s));
1174             }
1175             else {
1176                  while (s < strend) {
1177                       STRLEN skip = 1;
1178
1179                       if (REGINCLASS(prog, c, (U8*)s) ||
1180                           (ANYOF_FOLD_SHARP_S(c, s, strend) &&
1181                            /* The assignment of 2 is intentional:
1182                             * for the folded sharp s, the skip is 2. */
1183                            (skip = SHARP_S_SKIP))) {
1184                            if (tmp && (!reginfo || regtry(reginfo, &s)))
1185                                 goto got_it;
1186                            else
1187                                 tmp = doevery;
1188                       }
1189                       else
1190                            tmp = 1;
1191                       s += skip;
1192                  }
1193             }
1194             break;
1195         case CANY:
1196             REXEC_FBC_SCAN(
1197                 if (tmp && (!reginfo || regtry(reginfo, &s)))
1198                     goto got_it;
1199                 else
1200                     tmp = doevery;
1201             );
1202             break;
1203         case EXACTF:
1204             m   = STRING(c);
1205             ln  = STR_LEN(c);   /* length to match in octets/bytes */
1206             lnc = (I32) ln;     /* length to match in characters */
1207             if (UTF) {
1208                 STRLEN ulen1, ulen2;
1209                 U8 *sm = (U8 *) m;
1210                 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
1211                 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
1212                 const U32 uniflags = UTF8_ALLOW_DEFAULT;
1213
1214                 to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
1215                 to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
1216
1217                 c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXBYTES_CASE,
1218                                     0, uniflags);
1219                 c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXBYTES_CASE,
1220                                     0, uniflags);
1221                 lnc = 0;
1222                 while (sm < ((U8 *) m + ln)) {
1223                     lnc++;
1224                     sm += UTF8SKIP(sm);
1225                 }
1226             }
1227             else {
1228                 c1 = *(U8*)m;
1229                 c2 = PL_fold[c1];
1230             }
1231             goto do_exactf;
1232         case EXACTFL:
1233             m   = STRING(c);
1234             ln  = STR_LEN(c);
1235             lnc = (I32) ln;
1236             c1 = *(U8*)m;
1237             c2 = PL_fold_locale[c1];
1238           do_exactf:
1239             e = HOP3c(strend, -((I32)lnc), s);
1240
1241             if (!reginfo && e < s)
1242                 e = s;                  /* Due to minlen logic of intuit() */
1243
1244             /* The idea in the EXACTF* cases is to first find the
1245              * first character of the EXACTF* node and then, if
1246              * necessary, case-insensitively compare the full
1247              * text of the node.  The c1 and c2 are the first
1248              * characters (though in Unicode it gets a bit
1249              * more complicated because there are more cases
1250              * than just upper and lower: one needs to use
1251              * the so-called folding case for case-insensitive
1252              * matching (called "loose matching" in Unicode).
1253              * ibcmp_utf8() will do just that. */
1254
1255             if (do_utf8) {
1256                 UV c, f;
1257                 U8 tmpbuf [UTF8_MAXBYTES+1];
1258                 STRLEN len, foldlen;
1259                 const U32 uniflags = UTF8_ALLOW_DEFAULT;
1260                 if (c1 == c2) {
1261                     /* Upper and lower of 1st char are equal -
1262                      * probably not a "letter". */
1263                     while (s <= e) {
1264                         c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1265                                            uniflags);
1266                         REXEC_FBC_EXACTISH_CHECK(c == c1);
1267                     }
1268                 }
1269                 else {
1270                     while (s <= e) {
1271                       c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1272                                            uniflags);
1273
1274                         /* Handle some of the three Greek sigmas cases.
1275                          * Note that not all the possible combinations
1276                          * are handled here: some of them are handled
1277                          * by the standard folding rules, and some of
1278                          * them (the character class or ANYOF cases)
1279                          * are handled during compiletime in
1280                          * regexec.c:S_regclass(). */
1281                         if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1282                             c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1283                             c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
1284
1285                         REXEC_FBC_EXACTISH_CHECK(c == c1 || c == c2);
1286                     }
1287                 }
1288             }
1289             else {
1290                 if (c1 == c2)
1291                     REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1);
1292                 else
1293                     REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1 || *(U8*)s == c2);
1294             }
1295             break;
1296         case BOUNDL:
1297             PL_reg_flags |= RF_tainted;
1298             /* FALL THROUGH */
1299         case BOUND:
1300             if (do_utf8) {
1301                 if (s == PL_bostr)
1302                     tmp = '\n';
1303                 else {
1304                     U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1305                     tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
1306                 }
1307                 tmp = ((OP(c) == BOUND ?
1308                         isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1309                 LOAD_UTF8_CHARCLASS_ALNUM();
1310                 REXEC_FBC_UTF8_SCAN(
1311                     if (tmp == !(OP(c) == BOUND ?
1312                                  (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1313                                  isALNUM_LC_utf8((U8*)s)))
1314                     {
1315                         tmp = !tmp;
1316                         REXEC_FBC_TRYIT;
1317                 }
1318                 );
1319             }
1320             else {
1321                 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1322                 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1323                 REXEC_FBC_SCAN(
1324                     if (tmp ==
1325                         !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1326                         tmp = !tmp;
1327                         REXEC_FBC_TRYIT;
1328                 }
1329                 );
1330             }
1331             if ((!prog->minlen && tmp) && (!reginfo || regtry(reginfo, &s)))
1332                 goto got_it;
1333             break;
1334         case NBOUNDL:
1335             PL_reg_flags |= RF_tainted;
1336             /* FALL THROUGH */
1337         case NBOUND:
1338             if (do_utf8) {
1339                 if (s == PL_bostr)
1340                     tmp = '\n';
1341                 else {
1342                     U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1343                     tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
1344                 }
1345                 tmp = ((OP(c) == NBOUND ?
1346                         isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1347                 LOAD_UTF8_CHARCLASS_ALNUM();
1348                 REXEC_FBC_UTF8_SCAN(
1349                     if (tmp == !(OP(c) == NBOUND ?
1350                                  (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1351                                  isALNUM_LC_utf8((U8*)s)))
1352                         tmp = !tmp;
1353                     else REXEC_FBC_TRYIT;
1354                 );
1355             }
1356             else {
1357                 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1358                 tmp = ((OP(c) == NBOUND ?
1359                         isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1360                 REXEC_FBC_SCAN(
1361                     if (tmp ==
1362                         !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1363                         tmp = !tmp;
1364                     else REXEC_FBC_TRYIT;
1365                 );
1366             }
1367             if ((!prog->minlen && !tmp) && (!reginfo || regtry(reginfo, &s)))
1368                 goto got_it;
1369             break;
1370         case ALNUM:
1371             REXEC_FBC_CSCAN_PRELOAD(
1372                 LOAD_UTF8_CHARCLASS_ALNUM(),
1373                 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8),
1374                 isALNUM(*s)
1375             );
1376         case ALNUML:
1377             REXEC_FBC_CSCAN_TAINT(
1378                 isALNUM_LC_utf8((U8*)s),
1379                 isALNUM_LC(*s)
1380             );
1381         case NALNUM:
1382             REXEC_FBC_CSCAN_PRELOAD(
1383                 LOAD_UTF8_CHARCLASS_ALNUM(),
1384                 !swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8),
1385                 !isALNUM(*s)
1386             );
1387         case NALNUML:
1388             REXEC_FBC_CSCAN_TAINT(
1389                 !isALNUM_LC_utf8((U8*)s),
1390                 !isALNUM_LC(*s)
1391             );
1392         case SPACE:
1393             REXEC_FBC_CSCAN_PRELOAD(
1394                 LOAD_UTF8_CHARCLASS_SPACE(),
1395                 *s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8),
1396                 isSPACE(*s)
1397             );
1398         case SPACEL:
1399             REXEC_FBC_CSCAN_TAINT(
1400                 *s == ' ' || isSPACE_LC_utf8((U8*)s),
1401                 isSPACE_LC(*s)
1402             );
1403         case NSPACE:
1404             REXEC_FBC_CSCAN_PRELOAD(
1405                 LOAD_UTF8_CHARCLASS_SPACE(),
1406                 !(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)),
1407                 !isSPACE(*s)
1408             );
1409         case NSPACEL:
1410             REXEC_FBC_CSCAN_TAINT(
1411                 !(*s == ' ' || isSPACE_LC_utf8((U8*)s)),
1412                 !isSPACE_LC(*s)
1413             );
1414         case DIGIT:
1415             REXEC_FBC_CSCAN_PRELOAD(
1416                 LOAD_UTF8_CHARCLASS_DIGIT(),
1417                 swash_fetch(PL_utf8_digit,(U8*)s, do_utf8),
1418                 isDIGIT(*s)
1419             );
1420         case DIGITL:
1421             REXEC_FBC_CSCAN_TAINT(
1422                 isDIGIT_LC_utf8((U8*)s),
1423                 isDIGIT_LC(*s)
1424             );
1425         case NDIGIT:
1426             REXEC_FBC_CSCAN_PRELOAD(
1427                 LOAD_UTF8_CHARCLASS_DIGIT(),
1428                 !swash_fetch(PL_utf8_digit,(U8*)s, do_utf8),
1429                 !isDIGIT(*s)
1430             );
1431         case NDIGITL:
1432             REXEC_FBC_CSCAN_TAINT(
1433                 !isDIGIT_LC_utf8((U8*)s),
1434                 !isDIGIT_LC(*s)
1435             );
1436         case LNBREAK:
1437             REXEC_FBC_CSCAN(
1438                 is_LNBREAK_utf8(s),
1439                 is_LNBREAK_latin1(s)
1440             );
1441         case VERTWS:
1442             REXEC_FBC_CSCAN(
1443                 is_VERTWS_utf8(s),
1444                 is_VERTWS_latin1(s)
1445             );
1446         case NVERTWS:
1447             REXEC_FBC_CSCAN(
1448                 !is_VERTWS_utf8(s),
1449                 !is_VERTWS_latin1(s)
1450             );
1451         case HORIZWS:
1452             REXEC_FBC_CSCAN(
1453                 is_HORIZWS_utf8(s),
1454                 is_HORIZWS_latin1(s)
1455             );
1456         case NHORIZWS:
1457             REXEC_FBC_CSCAN(
1458                 !is_HORIZWS_utf8(s),
1459                 !is_HORIZWS_latin1(s)
1460             );
1461         case AHOCORASICKC:
1462         case AHOCORASICK:
1463             {
1464                 const enum { trie_plain, trie_utf8, trie_utf8_fold }
1465                     trie_type = do_utf8 ?
1466                           (c->flags == EXACT ? trie_utf8 : trie_utf8_fold)
1467                         : trie_plain;
1468                 /* what trie are we using right now */
1469                 reg_ac_data *aho
1470                     = (reg_ac_data*)progi->data->data[ ARG( c ) ];
1471                 reg_trie_data *trie
1472                     = (reg_trie_data*)progi->data->data[ aho->trie ];
1473                 HV *widecharmap = (HV*) progi->data->data[ aho->trie + 1 ];
1474
1475                 const char *last_start = strend - trie->minlen;
1476 #ifdef DEBUGGING
1477                 const char *real_start = s;
1478 #endif
1479                 STRLEN maxlen = trie->maxlen;
1480                 SV *sv_points;
1481                 U8 **points; /* map of where we were in the input string
1482                                 when reading a given char. For ASCII this
1483                                 is unnecessary overhead as the relationship
1484                                 is always 1:1, but for Unicode, especially
1485                                 case folded Unicode this is not true. */
1486                 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
1487                 U8 *bitmap=NULL;
1488
1489
1490                 GET_RE_DEBUG_FLAGS_DECL;
1491
1492                 /* We can't just allocate points here. We need to wrap it in
1493                  * an SV so it gets freed properly if there is a croak while
1494                  * running the match */
1495                 ENTER;
1496                 SAVETMPS;
1497                 sv_points=newSV(maxlen * sizeof(U8 *));
1498                 SvCUR_set(sv_points,
1499                     maxlen * sizeof(U8 *));
1500                 SvPOK_on(sv_points);
1501                 sv_2mortal(sv_points);
1502                 points=(U8**)SvPV_nolen(sv_points );
1503                 if ( trie_type != trie_utf8_fold
1504                      && (trie->bitmap || OP(c)==AHOCORASICKC) )
1505                 {
1506                     if (trie->bitmap)
1507                         bitmap=(U8*)trie->bitmap;
1508                     else
1509                         bitmap=(U8*)ANYOF_BITMAP(c);
1510                 }
1511                 /* this is the Aho-Corasick algorithm modified a touch
1512                    to include special handling for long "unknown char"
1513                    sequences. The basic idea being that we use AC as long
1514                    as we are dealing with a possible matching char, when
1515                    we encounter an unknown char (and we have not encountered
1516                    an accepting state) we scan forward until we find a legal
1517                    starting char.
1518                    AC matching is basically that of trie matching, except
1519                    that when we encounter a failing transition, we fall back
1520                    to the current states "fail state", and try the current char
1521                    again, a process we repeat until we reach the root state,
1522                    state 1, or a legal transition. If we fail on the root state
1523                    then we can either terminate if we have reached an accepting
1524                    state previously, or restart the entire process from the beginning
1525                    if we have not.
1526
1527                  */
1528                 while (s <= last_start) {
1529                     const U32 uniflags = UTF8_ALLOW_DEFAULT;
1530                     U8 *uc = (U8*)s;
1531                     U16 charid = 0;
1532                     U32 base = 1;
1533                     U32 state = 1;
1534                     UV uvc = 0;
1535                     STRLEN len = 0;
1536                     STRLEN foldlen = 0;
1537                     U8 *uscan = (U8*)NULL;
1538                     U8 *leftmost = NULL;
1539 #ifdef DEBUGGING
1540                     U32 accepted_word= 0;
1541 #endif
1542                     U32 pointpos = 0;
1543
1544                     while ( state && uc <= (U8*)strend ) {
1545                         int failed=0;
1546                         U32 word = aho->states[ state ].wordnum;
1547
1548                         if( state==1 ) {
1549                             if ( bitmap ) {
1550                                 DEBUG_TRIE_EXECUTE_r(
1551                                     if ( uc <= (U8*)last_start && !BITMAP_TEST(bitmap,*uc) ) {
1552                                         dump_exec_pos( (char *)uc, c, strend, real_start,
1553                                             (char *)uc, do_utf8 );
1554                                         PerlIO_printf( Perl_debug_log,
1555                                             " Scanning for legal start char...\n");
1556                                     }
1557                                 );
1558                                 while ( uc <= (U8*)last_start  && !BITMAP_TEST(bitmap,*uc) ) {
1559                                     uc++;
1560                                 }
1561                                 s= (char *)uc;
1562                             }
1563                             if (uc >(U8*)last_start) break;
1564                         }
1565
1566                         if ( word ) {
1567                             U8 *lpos= points[ (pointpos - trie->wordlen[word-1] ) % maxlen ];
1568                             if (!leftmost || lpos < leftmost) {
1569                                 DEBUG_r(accepted_word=word);
1570                                 leftmost= lpos;
1571                             }
1572                             if (base==0) break;
1573
1574                         }
1575                         points[pointpos++ % maxlen]= uc;
1576                         REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
1577                                              uscan, len, uvc, charid, foldlen,
1578                                              foldbuf, uniflags);
1579                         DEBUG_TRIE_EXECUTE_r({
1580                             dump_exec_pos( (char *)uc, c, strend, real_start,
1581                                 s,   do_utf8 );
1582                             PerlIO_printf(Perl_debug_log,
1583                                 " Charid:%3u CP:%4"UVxf" ",
1584                                  charid, uvc);
1585                         });
1586
1587                         do {
1588 #ifdef DEBUGGING
1589                             word = aho->states[ state ].wordnum;
1590 #endif
1591                             base = aho->states[ state ].trans.base;
1592
1593                             DEBUG_TRIE_EXECUTE_r({
1594                                 if (failed)
1595                                     dump_exec_pos( (char *)uc, c, strend, real_start,
1596                                         s,   do_utf8 );
1597                                 PerlIO_printf( Perl_debug_log,
1598                                     "%sState: %4"UVxf", word=%"UVxf,
1599                                     failed ? " Fail transition to " : "",
1600                                     (UV)state, (UV)word);
1601                             });
1602                             if ( base ) {
1603                                 U32 tmp;
1604                                 if (charid &&
1605                                      (base + charid > trie->uniquecharcount )
1606                                      && (base + charid - 1 - trie->uniquecharcount
1607                                             < trie->lasttrans)
1608                                      && trie->trans[base + charid - 1 -
1609                                             trie->uniquecharcount].check == state
1610                                      && (tmp=trie->trans[base + charid - 1 -
1611                                         trie->uniquecharcount ].next))
1612                                 {
1613                                     DEBUG_TRIE_EXECUTE_r(
1614                                         PerlIO_printf( Perl_debug_log," - legal\n"));
1615                                     state = tmp;
1616                                     break;
1617                                 }
1618                                 else {
1619                                     DEBUG_TRIE_EXECUTE_r(
1620                                         PerlIO_printf( Perl_debug_log," - fail\n"));
1621                                     failed = 1;
1622                                     state = aho->fail[state];
1623                                 }
1624                             }
1625                             else {
1626                                 /* we must be accepting here */
1627                                 DEBUG_TRIE_EXECUTE_r(
1628                                         PerlIO_printf( Perl_debug_log," - accepting\n"));
1629                                 failed = 1;
1630                                 break;
1631                             }
1632                         } while(state);
1633                         uc += len;
1634                         if (failed) {
1635                             if (leftmost)
1636                                 break;
1637                             if (!state) state = 1;
1638                         }
1639                     }
1640                     if ( aho->states[ state ].wordnum ) {
1641                         U8 *lpos = points[ (pointpos - trie->wordlen[aho->states[ state ].wordnum-1]) % maxlen ];
1642                         if (!leftmost || lpos < leftmost) {
1643                             DEBUG_r(accepted_word=aho->states[ state ].wordnum);
1644                             leftmost = lpos;
1645                         }
1646                     }
1647                     if (leftmost) {
1648                         s = (char*)leftmost;
1649                         DEBUG_TRIE_EXECUTE_r({
1650                             PerlIO_printf(
1651                                 Perl_debug_log,"Matches word #%"UVxf" at position %"IVdf". Trying full pattern...\n",
1652                                 (UV)accepted_word, (IV)(s - real_start)
1653                             );
1654                         });
1655                         if (!reginfo || regtry(reginfo, &s)) {
1656                             FREETMPS;
1657                             LEAVE;
1658                             goto got_it;
1659                         }
1660                         s = HOPc(s,1);
1661                         DEBUG_TRIE_EXECUTE_r({
1662                             PerlIO_printf( Perl_debug_log,"Pattern failed. Looking for new start point...\n");
1663                         });
1664                     } else {
1665                         DEBUG_TRIE_EXECUTE_r(
1666                             PerlIO_printf( Perl_debug_log,"No match.\n"));
1667                         break;
1668                     }
1669                 }
1670                 FREETMPS;
1671                 LEAVE;
1672             }
1673             break;
1674         default:
1675             Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1676             break;
1677         }
1678         return 0;
1679       got_it:
1680         return s;
1681 }
1682
1683 static void
1684 S_swap_match_buff (pTHX_ regexp *prog) {
1685     regexp_paren_pair *t;
1686
1687     if (!prog->swap) {
1688     /* We have to be careful. If the previous successful match
1689        was from this regex we don't want a subsequent paritally
1690        successful match to clobber the old results.
1691        So when we detect this possibility we add a swap buffer
1692        to the re, and switch the buffer each match. If we fail
1693        we switch it back, otherwise we leave it swapped.
1694     */
1695         Newxz(prog->swap, (prog->nparens + 1), regexp_paren_pair);
1696     }
1697     t = prog->swap;
1698     prog->swap = prog->offs;
1699     prog->offs = t;
1700 }
1701
1702
1703 /*
1704  - regexec_flags - match a regexp against a string
1705  */
1706 I32
1707 Perl_regexec_flags(pTHX_ REGEXP * const prog, char *stringarg, register char *strend,
1708               char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1709 /* strend: pointer to null at end of string */
1710 /* strbeg: real beginning of string */
1711 /* minend: end of match must be >=minend after stringarg. */
1712 /* data: May be used for some additional optimizations.
1713          Currently its only used, with a U32 cast, for transmitting
1714          the ganch offset when doing a /g match. This will change */
1715 /* nosave: For optimizations. */
1716 {
1717     dVAR;
1718     /*register*/ char *s;
1719     register regnode *c;
1720     /*register*/ char *startpos = stringarg;
1721     I32 minlen;         /* must match at least this many chars */
1722     I32 dontbother = 0; /* how many characters not to try at end */
1723     I32 end_shift = 0;                  /* Same for the end. */         /* CC */
1724     I32 scream_pos = -1;                /* Internal iterator of scream. */
1725     char *scream_olds = NULL;
1726     const bool do_utf8 = (bool)DO_UTF8(sv);
1727     I32 multiline;
1728     RXi_GET_DECL(prog,progi);
1729     regmatch_info reginfo;  /* create some info to pass to regtry etc */
1730     bool swap_on_fail = 0;
1731
1732     GET_RE_DEBUG_FLAGS_DECL;
1733
1734     PERL_UNUSED_ARG(data);
1735
1736     /* Be paranoid... */
1737     if (prog == NULL || startpos == NULL) {
1738         Perl_croak(aTHX_ "NULL regexp parameter");
1739         return 0;
1740     }
1741
1742     multiline = prog->extflags & RXf_PMf_MULTILINE;
1743     reginfo.prog = prog;
1744
1745     RX_MATCH_UTF8_set(prog, do_utf8);
1746     DEBUG_EXECUTE_r(
1747         debug_start_match(prog, do_utf8, startpos, strend,
1748         "Matching");
1749     );
1750
1751     minlen = prog->minlen;
1752
1753     if (strend - startpos < (minlen+(prog->check_offset_min<0?prog->check_offset_min:0))) {
1754         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
1755                               "String too short [regexec_flags]...\n"));
1756         goto phooey;
1757     }
1758
1759
1760     /* Check validity of program. */
1761     if (UCHARAT(progi->program) != REG_MAGIC) {
1762         Perl_croak(aTHX_ "corrupted regexp program");
1763     }
1764
1765     PL_reg_flags = 0;
1766     PL_reg_eval_set = 0;
1767     PL_reg_maxiter = 0;
1768
1769     if (prog->extflags & RXf_UTF8)
1770         PL_reg_flags |= RF_utf8;
1771
1772     /* Mark beginning of line for ^ and lookbehind. */
1773     reginfo.bol = startpos; /* XXX not used ??? */
1774     PL_bostr  = strbeg;
1775     reginfo.sv = sv;
1776
1777     /* Mark end of line for $ (and such) */
1778     PL_regeol = strend;
1779
1780     /* see how far we have to get to not match where we matched before */
1781     reginfo.till = startpos+minend;
1782
1783     /* If there is a "must appear" string, look for it. */
1784     s = startpos;
1785
1786     if (prog->extflags & RXf_GPOS_SEEN) { /* Need to set reginfo->ganch */
1787         MAGIC *mg;
1788
1789         if (flags & REXEC_IGNOREPOS)    /* Means: check only at start */
1790             reginfo.ganch = startpos + prog->gofs;
1791         else if (sv && SvTYPE(sv) >= SVt_PVMG
1792                   && SvMAGIC(sv)
1793                   && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1794                   && mg->mg_len >= 0) {
1795             reginfo.ganch = strbeg + mg->mg_len;        /* Defined pos() */
1796             if (prog->extflags & RXf_ANCH_GPOS) {
1797                 if (s > reginfo.ganch)
1798                     goto phooey;
1799                 s = reginfo.ganch - prog->gofs;
1800             }
1801         }
1802         else if (data) {
1803             reginfo.ganch = strbeg + PTR2UV(data);
1804         } else                          /* pos() not defined */
1805             reginfo.ganch = strbeg;
1806     }
1807     if (PL_curpm && (PM_GETRE(PL_curpm) == prog)) {
1808         swap_on_fail = 1;
1809         swap_match_buff(prog); /* do we need a save destructor here for
1810                                   eval dies? */
1811     }
1812     if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL || prog->check_utf8 != NULL)) {
1813         re_scream_pos_data d;
1814
1815         d.scream_olds = &scream_olds;
1816         d.scream_pos = &scream_pos;
1817         s = re_intuit_start(prog, sv, s, strend, flags, &d);
1818         if (!s) {
1819             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
1820             goto phooey;        /* not present */
1821         }
1822     }
1823
1824
1825
1826     /* Simplest case:  anchored match need be tried only once. */
1827     /*  [unless only anchor is BOL and multiline is set] */
1828     if (prog->extflags & (RXf_ANCH & ~RXf_ANCH_GPOS)) {
1829         if (s == startpos && regtry(&reginfo, &startpos))
1830             goto got_it;
1831         else if (multiline || (prog->intflags & PREGf_IMPLICIT)
1832                  || (prog->extflags & RXf_ANCH_MBOL)) /* XXXX SBOL? */
1833         {
1834             char *end;
1835
1836             if (minlen)
1837                 dontbother = minlen - 1;
1838             end = HOP3c(strend, -dontbother, strbeg) - 1;
1839             /* for multiline we only have to try after newlines */
1840             if (prog->check_substr || prog->check_utf8) {
1841                 if (s == startpos)
1842                     goto after_try;
1843                 while (1) {
1844                     if (regtry(&reginfo, &s))
1845                         goto got_it;
1846                   after_try:
1847                     if (s > end)
1848                         goto phooey;
1849                     if (prog->extflags & RXf_USE_INTUIT) {
1850                         s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1851                         if (!s)
1852                             goto phooey;
1853                     }
1854                     else
1855                         s++;
1856                 }
1857             } else {
1858                 if (s > startpos)
1859                     s--;
1860                 while (s < end) {
1861                     if (*s++ == '\n') { /* don't need PL_utf8skip here */
1862                         if (regtry(&reginfo, &s))
1863                             goto got_it;
1864                     }
1865                 }
1866             }
1867         }
1868         goto phooey;
1869     } else if (RXf_GPOS_CHECK == (prog->extflags & RXf_GPOS_CHECK))
1870     {
1871         /* the warning about reginfo.ganch being used without intialization
1872            is bogus -- we set it above, when prog->extflags & RXf_GPOS_SEEN
1873            and we only enter this block when the same bit is set. */
1874         char *tmp_s = reginfo.ganch - prog->gofs;
1875         if (regtry(&reginfo, &tmp_s))
1876             goto got_it;
1877         goto phooey;
1878     }
1879
1880     /* Messy cases:  unanchored match. */
1881     if ((prog->anchored_substr || prog->anchored_utf8) && prog->intflags & PREGf_SKIP) {
1882         /* we have /x+whatever/ */
1883         /* it must be a one character string (XXXX Except UTF?) */
1884         char ch;
1885 #ifdef DEBUGGING
1886         int did_match = 0;
1887 #endif
1888         if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1889             do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1890         ch = SvPVX_const(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
1891
1892         if (do_utf8) {
1893             REXEC_FBC_SCAN(
1894                 if (*s == ch) {
1895                     DEBUG_EXECUTE_r( did_match = 1 );
1896                     if (regtry(&reginfo, &s)) goto got_it;
1897                     s += UTF8SKIP(s);
1898                     while (s < strend && *s == ch)
1899                         s += UTF8SKIP(s);
1900                 }
1901             );
1902         }
1903         else {
1904             REXEC_FBC_SCAN(
1905                 if (*s == ch) {
1906                     DEBUG_EXECUTE_r( did_match = 1 );
1907                     if (regtry(&reginfo, &s)) goto got_it;
1908                     s++;
1909                     while (s < strend && *s == ch)
1910                         s++;
1911                 }
1912             );
1913         }
1914         DEBUG_EXECUTE_r(if (!did_match)
1915                 PerlIO_printf(Perl_debug_log,
1916                                   "Did not find anchored character...\n")
1917                );
1918     }
1919     else if (prog->anchored_substr != NULL
1920               || prog->anchored_utf8 != NULL
1921               || ((prog->float_substr != NULL || prog->float_utf8 != NULL)
1922                   && prog->float_max_offset < strend - s)) {
1923         SV *must;
1924         I32 back_max;
1925         I32 back_min;
1926         char *last;
1927         char *last1;            /* Last position checked before */
1928 #ifdef DEBUGGING
1929         int did_match = 0;
1930 #endif
1931         if (prog->anchored_substr || prog->anchored_utf8) {
1932             if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1933                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1934             must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
1935             back_max = back_min = prog->anchored_offset;
1936         } else {
1937             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1938                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1939             must = do_utf8 ? prog->float_utf8 : prog->float_substr;
1940             back_max = prog->float_max_offset;
1941             back_min = prog->float_min_offset;
1942         }
1943
1944
1945         if (must == &PL_sv_undef)
1946             /* could not downgrade utf8 check substring, so must fail */
1947             goto phooey;
1948
1949         if (back_min<0) {
1950             last = strend;
1951         } else {
1952             last = HOP3c(strend,        /* Cannot start after this */
1953                   -(I32)(CHR_SVLEN(must)
1954                          - (SvTAIL(must) != 0) + back_min), strbeg);
1955         }
1956         if (s > PL_bostr)
1957             last1 = HOPc(s, -1);
1958         else
1959             last1 = s - 1;      /* bogus */
1960
1961         /* XXXX check_substr already used to find "s", can optimize if
1962            check_substr==must. */
1963         scream_pos = -1;
1964         dontbother = end_shift;
1965         strend = HOPc(strend, -dontbother);
1966         while ( (s <= last) &&
1967                 ((flags & REXEC_SCREAM)
1968                  ? (s = screaminstr(sv, must, HOP3c(s, back_min, (back_min<0 ? strbeg : strend)) - strbeg,
1969                                     end_shift, &scream_pos, 0))
1970                  : (s = fbm_instr((unsigned char*)HOP3(s, back_min, (back_min<0 ? strbeg : strend)),
1971                                   (unsigned char*)strend, must,
1972                                   multiline ? FBMrf_MULTILINE : 0))) ) {
1973             /* we may be pointing at the wrong string */
1974             if ((flags & REXEC_SCREAM) && RX_MATCH_COPIED(prog))
1975                 s = strbeg + (s - SvPVX_const(sv));
1976             DEBUG_EXECUTE_r( did_match = 1 );
1977             if (HOPc(s, -back_max) > last1) {
1978                 last1 = HOPc(s, -back_min);
1979                 s = HOPc(s, -back_max);
1980             }
1981             else {
1982                 char * const t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1983
1984                 last1 = HOPc(s, -back_min);
1985                 s = t;
1986             }
1987             if (do_utf8) {
1988                 while (s <= last1) {
1989                     if (regtry(&reginfo, &s))
1990                         goto got_it;
1991                     s += UTF8SKIP(s);
1992                 }
1993             }
1994             else {
1995                 while (s <= last1) {
1996                     if (regtry(&reginfo, &s))
1997                         goto got_it;
1998                     s++;
1999                 }
2000             }
2001         }
2002         DEBUG_EXECUTE_r(if (!did_match) {
2003             RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
2004                 SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
2005             PerlIO_printf(Perl_debug_log, "Did not find %s substr %s%s...\n",
2006                               ((must == prog->anchored_substr || must == prog->anchored_utf8)
2007                                ? "anchored" : "floating"),
2008                 quoted, RE_SV_TAIL(must));
2009         });
2010         goto phooey;
2011     }
2012     else if ( (c = progi->regstclass) ) {
2013         if (minlen) {
2014             const OPCODE op = OP(progi->regstclass);
2015             /* don't bother with what can't match */
2016             if (PL_regkind[op] != EXACT && op != CANY && PL_regkind[op] != TRIE)
2017                 strend = HOPc(strend, -(minlen - 1));
2018         }
2019         DEBUG_EXECUTE_r({
2020             SV * const prop = sv_newmortal();
2021             regprop(prog, prop, c);
2022             {
2023                 RE_PV_QUOTED_DECL(quoted,do_utf8,PERL_DEBUG_PAD_ZERO(1),
2024                     s,strend-s,60);
2025                 PerlIO_printf(Perl_debug_log,
2026                     "Matching stclass %.*s against %s (%d chars)\n",
2027                     (int)SvCUR(prop), SvPVX_const(prop),
2028                      quoted, (int)(strend - s));
2029             }
2030         });
2031         if (find_byclass(prog, c, s, strend, &reginfo))
2032             goto got_it;
2033         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass... [regexec_flags]\n"));
2034     }
2035     else {
2036         dontbother = 0;
2037         if (prog->float_substr != NULL || prog->float_utf8 != NULL) {
2038             /* Trim the end. */
2039             char *last;
2040             SV* float_real;
2041
2042             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
2043                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
2044             float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
2045
2046             if (flags & REXEC_SCREAM) {
2047                 last = screaminstr(sv, float_real, s - strbeg,
2048                                    end_shift, &scream_pos, 1); /* last one */
2049                 if (!last)
2050                     last = scream_olds; /* Only one occurrence. */
2051                 /* we may be pointing at the wrong string */
2052                 else if (RX_MATCH_COPIED(prog))
2053                     s = strbeg + (s - SvPVX_const(sv));
2054             }
2055             else {
2056                 STRLEN len;
2057                 const char * const little = SvPV_const(float_real, len);
2058
2059                 if (SvTAIL(float_real)) {
2060                     if (memEQ(strend - len + 1, little, len - 1))
2061                         last = strend - len + 1;
2062                     else if (!multiline)
2063                         last = memEQ(strend - len, little, len)
2064                             ? strend - len : NULL;
2065                     else
2066                         goto find_last;
2067                 } else {
2068                   find_last:
2069                     if (len)
2070                         last = rninstr(s, strend, little, little + len);
2071                     else
2072                         last = strend;  /* matching "$" */
2073                 }
2074             }
2075             if (last == NULL) {
2076                 DEBUG_EXECUTE_r(
2077                     PerlIO_printf(Perl_debug_log,
2078                         "%sCan't trim the tail, match fails (should not happen)%s\n",
2079                         PL_colors[4], PL_colors[5]));
2080                 goto phooey; /* Should not happen! */
2081             }
2082             dontbother = strend - last + prog->float_min_offset;
2083         }
2084         if (minlen && (dontbother < minlen))
2085             dontbother = minlen - 1;
2086         strend -= dontbother;              /* this one's always in bytes! */
2087         /* We don't know much -- general case. */
2088         if (do_utf8) {
2089             for (;;) {
2090                 if (regtry(&reginfo, &s))
2091                     goto got_it;
2092                 if (s >= strend)
2093                     break;
2094                 s += UTF8SKIP(s);
2095             };
2096         }
2097         else {
2098             do {
2099                 if (regtry(&reginfo, &s))
2100                     goto got_it;
2101             } while (s++ < strend);
2102         }
2103     }
2104
2105     /* Failure. */
2106     goto phooey;
2107
2108 got_it:
2109     RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
2110
2111     if (PL_reg_eval_set)
2112         restore_pos(aTHX_ prog);
2113     if (prog->paren_names)
2114         (void)hv_iterinit(prog->paren_names);
2115
2116     /* make sure $`, $&, $', and $digit will work later */
2117     if ( !(flags & REXEC_NOT_FIRST) ) {
2118         RX_MATCH_COPY_FREE(prog);
2119         if (flags & REXEC_COPY_STR) {
2120             const I32 i = PL_regeol - startpos + (stringarg - strbeg);
2121 #ifdef PERL_OLD_COPY_ON_WRITE
2122             if ((SvIsCOW(sv)
2123                  || (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
2124                 if (DEBUG_C_TEST) {
2125                     PerlIO_printf(Perl_debug_log,
2126                                   "Copy on write: regexp capture, type %d\n",
2127                                   (int) SvTYPE(sv));
2128                 }
2129                 prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
2130                 prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
2131                 assert (SvPOKp(prog->saved_copy));
2132             } else
2133 #endif
2134             {
2135                 RX_MATCH_COPIED_on(prog);
2136                 s = savepvn(strbeg, i);
2137                 prog->subbeg = s;
2138             }
2139             prog->sublen = i;
2140         }
2141         else {
2142             prog->subbeg = strbeg;
2143             prog->sublen = PL_regeol - strbeg;  /* strend may have been modified */
2144         }
2145     }
2146
2147     return 1;
2148
2149 phooey:
2150     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
2151                           PL_colors[4], PL_colors[5]));
2152     if (PL_reg_eval_set)
2153         restore_pos(aTHX_ prog);
2154     if (swap_on_fail)
2155         /* we failed :-( roll it back */
2156         swap_match_buff(prog);
2157
2158     return 0;
2159 }
2160
2161
2162 /*
2163  - regtry - try match at specific point
2164  */
2165 STATIC I32                      /* 0 failure, 1 success */
2166 S_regtry(pTHX_ regmatch_info *reginfo, char **startpos)
2167 {
2168     dVAR;
2169     CHECKPOINT lastcp;
2170     regexp *prog = reginfo->prog;
2171     RXi_GET_DECL(prog,progi);
2172     GET_RE_DEBUG_FLAGS_DECL;
2173     reginfo->cutpoint=NULL;
2174
2175     if ((prog->extflags & RXf_EVAL_SEEN) && !PL_reg_eval_set) {
2176         MAGIC *mg;
2177
2178         PL_reg_eval_set = RS_init;
2179         DEBUG_EXECUTE_r(DEBUG_s(
2180             PerlIO_printf(Perl_debug_log, "  setting stack tmpbase at %"IVdf"\n",
2181                           (IV)(PL_stack_sp - PL_stack_base));
2182             ));
2183         SAVESTACK_CXPOS();
2184         cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2185         /* Otherwise OP_NEXTSTATE will free whatever on stack now.  */
2186         SAVETMPS;
2187         /* Apparently this is not needed, judging by wantarray. */
2188         /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
2189            cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2190
2191         if (reginfo->sv) {
2192             /* Make $_ available to executed code. */
2193             if (reginfo->sv != DEFSV) {
2194                 SAVE_DEFSV;
2195                 DEFSV = reginfo->sv;
2196             }
2197
2198             if (!(SvTYPE(reginfo->sv) >= SVt_PVMG && SvMAGIC(reginfo->sv)
2199                   && (mg = mg_find(reginfo->sv, PERL_MAGIC_regex_global)))) {
2200                 /* prepare for quick setting of pos */
2201 #ifdef PERL_OLD_COPY_ON_WRITE
2202                 if (SvIsCOW(reginfo->sv))
2203                     sv_force_normal_flags(reginfo->sv, 0);
2204 #endif
2205                 mg = sv_magicext(reginfo->sv, NULL, PERL_MAGIC_regex_global,
2206                                  &PL_vtbl_mglob, NULL, 0);
2207                 mg->mg_len = -1;
2208             }
2209             PL_reg_magic    = mg;
2210             PL_reg_oldpos   = mg->mg_len;
2211             SAVEDESTRUCTOR_X(restore_pos, prog);
2212         }
2213         if (!PL_reg_curpm) {
2214             Newxz(PL_reg_curpm, 1, PMOP);
2215 #ifdef USE_ITHREADS
2216             {
2217                 SV* const repointer = newSViv(0);
2218                 /* so we know which PL_regex_padav element is PL_reg_curpm */
2219                 SvFLAGS(repointer) |= SVf_BREAK;
2220                 av_push(PL_regex_padav,repointer);
2221                 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2222                 PL_regex_pad = AvARRAY(PL_regex_padav);
2223             }
2224 #endif
2225         }
2226         PM_SETRE(PL_reg_curpm, prog);
2227         PL_reg_oldcurpm = PL_curpm;
2228         PL_curpm = PL_reg_curpm;
2229         if (RX_MATCH_COPIED(prog)) {
2230             /*  Here is a serious problem: we cannot rewrite subbeg,
2231                 since it may be needed if this match fails.  Thus
2232                 $` inside (?{}) could fail... */
2233             PL_reg_oldsaved = prog->subbeg;
2234             PL_reg_oldsavedlen = prog->sublen;
2235 #ifdef PERL_OLD_COPY_ON_WRITE
2236             PL_nrs = prog->saved_copy;
2237 #endif
2238             RX_MATCH_COPIED_off(prog);
2239         }
2240         else
2241             PL_reg_oldsaved = NULL;
2242         prog->subbeg = PL_bostr;
2243         prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2244     }
2245     DEBUG_EXECUTE_r(PL_reg_starttry = *startpos);
2246     prog->offs[0].start = *startpos - PL_bostr;
2247     PL_reginput = *startpos;
2248     PL_reglastparen = &prog->lastparen;
2249     PL_reglastcloseparen = &prog->lastcloseparen;
2250     prog->lastparen = 0;
2251     prog->lastcloseparen = 0;
2252     PL_regsize = 0;
2253     PL_regoffs = prog->offs;
2254     if (PL_reg_start_tmpl <= prog->nparens) {
2255         PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2256         if(PL_reg_start_tmp)
2257             Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2258         else
2259             Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2260     }
2261
2262     /* XXXX What this code is doing here?!!!  There should be no need
2263        to do this again and again, PL_reglastparen should take care of
2264        this!  --ilya*/
2265
2266     /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2267      * Actually, the code in regcppop() (which Ilya may be meaning by
2268      * PL_reglastparen), is not needed at all by the test suite
2269      * (op/regexp, op/pat, op/split), but that code is needed otherwise
2270      * this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
2271      * Meanwhile, this code *is* needed for the
2272      * above-mentioned test suite tests to succeed.  The common theme
2273      * on those tests seems to be returning null fields from matches.
2274      * --jhi updated by dapm */
2275 #if 1
2276     if (prog->nparens) {
2277         regexp_paren_pair *pp = PL_regoffs;
2278         register I32 i;
2279         for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
2280             ++pp;
2281             pp->start = -1;
2282             pp->end = -1;
2283         }
2284     }
2285 #endif
2286     REGCP_SET(lastcp);
2287     if (regmatch(reginfo, progi->program + 1)) {
2288         PL_regoffs[0].end = PL_reginput - PL_bostr;
2289         return 1;
2290     }
2291     if (reginfo->cutpoint)
2292         *startpos= reginfo->cutpoint;
2293     REGCP_UNWIND(lastcp);
2294     return 0;
2295 }
2296
2297
2298 #define sayYES goto yes
2299 #define sayNO goto no
2300 #define sayNO_SILENT goto no_silent
2301
2302 /* we dont use STMT_START/END here because it leads to
2303    "unreachable code" warnings, which are bogus, but distracting. */
2304 #define CACHEsayNO \
2305     if (ST.cache_mask) \
2306        PL_reg_poscache[ST.cache_offset] |= ST.cache_mask; \
2307     sayNO
2308
2309 /* this is used to determine how far from the left messages like
2310    'failed...' are printed. It should be set such that messages
2311    are inline with the regop output that created them.
2312 */
2313 #define REPORT_CODE_OFF 32
2314
2315
2316 /* Make sure there is a test for this +1 options in re_tests */
2317 #define TRIE_INITAL_ACCEPT_BUFFLEN 4;
2318
2319 #define CHRTEST_UNINIT -1001 /* c1/c2 haven't been calculated yet */
2320 #define CHRTEST_VOID   -1000 /* the c1/c2 "next char" test should be skipped */
2321
2322 #define SLAB_FIRST(s) (&(s)->states[0])
2323 #define SLAB_LAST(s)  (&(s)->states[PERL_REGMATCH_SLAB_SLOTS-1])
2324
2325 /* grab a new slab and return the first slot in it */
2326
2327 STATIC regmatch_state *
2328 S_push_slab(pTHX)
2329 {
2330 #if PERL_VERSION < 9 && !defined(PERL_CORE)
2331     dMY_CXT;
2332 #endif
2333     regmatch_slab *s = PL_regmatch_slab->next;
2334     if (!s) {
2335         Newx(s, 1, regmatch_slab);
2336         s->prev = PL_regmatch_slab;
2337         s->next = NULL;
2338         PL_regmatch_slab->next = s;
2339     }
2340     PL_regmatch_slab = s;
2341     return SLAB_FIRST(s);
2342 }
2343
2344
2345 /* push a new state then goto it */
2346
2347 #define PUSH_STATE_GOTO(state, node) \
2348     scan = node; \
2349     st->resume_state = state; \
2350     goto push_state;
2351
2352 /* push a new state with success backtracking, then goto it */
2353
2354 #define PUSH_YES_STATE_GOTO(state, node) \
2355     scan = node; \
2356     st->resume_state = state; \
2357     goto push_yes_state;
2358
2359
2360
2361 /*
2362
2363 regmatch() - main matching routine
2364
2365 This is basically one big switch statement in a loop. We execute an op,
2366 set 'next' to point the next op, and continue. If we come to a point which
2367 we may need to backtrack to on failure such as (A|B|C), we push a
2368 backtrack state onto the backtrack stack. On failure, we pop the top
2369 state, and re-enter the loop at the state indicated. If there are no more
2370 states to pop, we return failure.
2371
2372 Sometimes we also need to backtrack on success; for example /A+/, where
2373 after successfully matching one A, we need to go back and try to
2374 match another one; similarly for lookahead assertions: if the assertion
2375 completes successfully, we backtrack to the state just before the assertion
2376 and then carry on.  In these cases, the pushed state is marked as
2377 'backtrack on success too'. This marking is in fact done by a chain of
2378 pointers, each pointing to the previous 'yes' state. On success, we pop to
2379 the nearest yes state, discarding any intermediate failure-only states.
2380 Sometimes a yes state is pushed just to force some cleanup code to be
2381 called at the end of a successful match or submatch; e.g. (??{$re}) uses
2382 it to free the inner regex.
2383
2384 Note that failure backtracking rewinds the cursor position, while
2385 success backtracking leaves it alone.
2386
2387 A pattern is complete when the END op is executed, while a subpattern
2388 such as (?=foo) is complete when the SUCCESS op is executed. Both of these
2389 ops trigger the "pop to last yes state if any, otherwise return true"
2390 behaviour.
2391
2392 A common convention in this function is to use A and B to refer to the two
2393 subpatterns (or to the first nodes thereof) in patterns like /A*B/: so A is
2394 the subpattern to be matched possibly multiple times, while B is the entire
2395 rest of the pattern. Variable and state names reflect this convention.
2396
2397 The states in the main switch are the union of ops and failure/success of
2398 substates associated with with that op.  For example, IFMATCH is the op
2399 that does lookahead assertions /(?=A)B/ and so the IFMATCH state means
2400 'execute IFMATCH'; while IFMATCH_A is a state saying that we have just
2401 successfully matched A and IFMATCH_A_fail is a state saying that we have
2402 just failed to match A. Resume states always come in pairs. The backtrack
2403 state we push is marked as 'IFMATCH_A', but when that is popped, we resume
2404 at IFMATCH_A or IFMATCH_A_fail, depending on whether we are backtracking
2405 on success or failure.
2406
2407 The struct that holds a backtracking state is actually a big union, with
2408 one variant for each major type of op. The variable st points to the
2409 top-most backtrack struct. To make the code clearer, within each
2410 block of code we #define ST to alias the relevant union.
2411
2412 Here's a concrete example of a (vastly oversimplified) IFMATCH
2413 implementation:
2414
2415     switch (state) {
2416     ....
2417
2418 #define ST st->u.ifmatch
2419
2420     case IFMATCH: // we are executing the IFMATCH op, (?=A)B
2421         ST.foo = ...; // some state we wish to save
2422         ...
2423         // push a yes backtrack state with a resume value of
2424         // IFMATCH_A/IFMATCH_A_fail, then continue execution at the
2425         // first node of A:
2426         PUSH_YES_STATE_GOTO(IFMATCH_A, A);
2427         // NOTREACHED
2428
2429     case IFMATCH_A: // we have successfully executed A; now continue with B
2430         next = B;
2431         bar = ST.foo; // do something with the preserved value
2432         break;
2433
2434     case IFMATCH_A_fail: // A failed, so the assertion failed
2435         ...;   // do some housekeeping, then ...
2436         sayNO; // propagate the failure
2437
2438 #undef ST
2439
2440     ...
2441     }
2442
2443 For any old-timers reading this who are familiar with the old recursive
2444 approach, the code above is equivalent to:
2445
2446     case IFMATCH: // we are executing the IFMATCH op, (?=A)B
2447     {
2448         int foo = ...
2449         ...
2450         if (regmatch(A)) {
2451             next = B;
2452             bar = foo;
2453             break;
2454         }
2455         ...;   // do some housekeeping, then ...
2456         sayNO; // propagate the failure
2457     }
2458
2459 The topmost backtrack state, pointed to by st, is usually free. If you
2460 want to claim it, populate any ST.foo fields in it with values you wish to
2461 save, then do one of
2462
2463         PUSH_STATE_GOTO(resume_state, node);
2464         PUSH_YES_STATE_GOTO(resume_state, node);
2465
2466 which sets that backtrack state's resume value to 'resume_state', pushes a
2467 new free entry to the top of the backtrack stack, then goes to 'node'.
2468 On backtracking, the free slot is popped, and the saved state becomes the
2469 new free state. An ST.foo field in this new top state can be temporarily
2470 accessed to retrieve values, but once the main loop is re-entered, it
2471 becomes available for reuse.
2472
2473 Note that the depth of the backtrack stack constantly increases during the
2474 left-to-right execution of the pattern, rather than going up and down with
2475 the pattern nesting. For example the stack is at its maximum at Z at the
2476 end of the pattern, rather than at X in the following:
2477
2478     /(((X)+)+)+....(Y)+....Z/
2479
2480 The only exceptions to this are lookahead/behind assertions and the cut,
2481 (?>A), which pop all the backtrack states associated with A before
2482 continuing.
2483
2484 Bascktrack state structs are allocated in slabs of about 4K in size.
2485 PL_regmatch_state and st always point to the currently active state,
2486 and PL_regmatch_slab points to the slab currently containing
2487 PL_regmatch_state.  The first time regmatch() is called, the first slab is
2488 allocated, and is never freed until interpreter destruction. When the slab
2489 is full, a new one is allocated and chained to the end. At exit from
2490 regmatch(), slabs allocated since entry are freed.
2491
2492 */
2493
2494
2495 #define DEBUG_STATE_pp(pp)                                  \
2496     DEBUG_STATE_r({                                         \
2497         DUMP_EXEC_POS(locinput, scan, do_utf8);             \
2498         PerlIO_printf(Perl_debug_log,                       \
2499             "    %*s"pp" %s%s%s%s%s\n",                     \
2500             depth*2, "",                                    \
2501             PL_reg_name[st->resume_state],                     \
2502             ((st==yes_state||st==mark_state) ? "[" : ""),   \
2503             ((st==yes_state) ? "Y" : ""),                   \
2504             ((st==mark_state) ? "M" : ""),                  \
2505             ((st==yes_state||st==mark_state) ? "]" : "")    \
2506         );                                                  \
2507     });
2508
2509
2510 #define REG_NODE_NUM(x) ((x) ? (int)((x)-prog) : -1)
2511
2512 #ifdef DEBUGGING
2513
2514 STATIC void
2515 S_debug_start_match(pTHX_ const regexp *prog, const bool do_utf8,
2516     const char *start, const char *end, const char *blurb)
2517 {
2518     const bool utf8_pat= prog->extflags & RXf_UTF8 ? 1 : 0;
2519     if (!PL_colorset)
2520             reginitcolors();
2521     {
2522         RE_PV_QUOTED_DECL(s0, utf8_pat, PERL_DEBUG_PAD_ZERO(0),
2523             prog->precomp, prog->prelen, 60);
2524
2525         RE_PV_QUOTED_DECL(s1, do_utf8, PERL_DEBUG_PAD_ZERO(1),
2526             start, end - start, 60);
2527
2528         PerlIO_printf(Perl_debug_log,
2529             "%s%s REx%s %s against %s\n",
2530                        PL_colors[4], blurb, PL_colors[5], s0, s1);
2531
2532         if (do_utf8||utf8_pat)
2533             PerlIO_printf(Perl_debug_log, "UTF-8 %s%s%s...\n",
2534                 utf8_pat ? "pattern" : "",
2535                 utf8_pat && do_utf8 ? " and " : "",
2536                 do_utf8 ? "string" : ""
2537             );
2538     }
2539 }
2540
2541 STATIC void
2542 S_dump_exec_pos(pTHX_ const char *locinput,
2543                       const regnode *scan,
2544                       const char *loc_regeol,
2545                       const char *loc_bostr,
2546                       const char *loc_reg_starttry,
2547                       const bool do_utf8)
2548 {
2549     const int docolor = *PL_colors[0] || *PL_colors[2] || *PL_colors[4];
2550     const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
2551     int l = (loc_regeol - locinput) > taill ? taill : (loc_regeol - locinput);
2552     /* The part of the string before starttry has one color
2553        (pref0_len chars), between starttry and current
2554        position another one (pref_len - pref0_len chars),
2555        after the current position the third one.
2556        We assume that pref0_len <= pref_len, otherwise we
2557        decrease pref0_len.  */
2558     int pref_len = (locinput - loc_bostr) > (5 + taill) - l
2559         ? (5 + taill) - l : locinput - loc_bostr;
2560     int pref0_len;
2561
2562     while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2563         pref_len++;
2564     pref0_len = pref_len  - (locinput - loc_reg_starttry);
2565     if (l + pref_len < (5 + taill) && l < loc_regeol - locinput)
2566         l = ( loc_regeol - locinput > (5 + taill) - pref_len
2567               ? (5 + taill) - pref_len : loc_regeol - locinput);
2568     while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2569         l--;
2570     if (pref0_len < 0)
2571         pref0_len = 0;
2572     if (pref0_len > pref_len)
2573         pref0_len = pref_len;
2574     {
2575         const int is_uni = (do_utf8 && OP(scan) != CANY) ? 1 : 0;
2576
2577         RE_PV_COLOR_DECL(s0,len0,is_uni,PERL_DEBUG_PAD(0),
2578             (locinput - pref_len),pref0_len, 60, 4, 5);
2579
2580         RE_PV_COLOR_DECL(s1,len1,is_uni,PERL_DEBUG_PAD(1),
2581                     (locinput - pref_len + pref0_len),
2582                     pref_len - pref0_len, 60, 2, 3);
2583
2584         RE_PV_COLOR_DECL(s2,len2,is_uni,PERL_DEBUG_PAD(2),
2585                     locinput, loc_regeol - locinput, 10, 0, 1);
2586
2587         const STRLEN tlen=len0+len1+len2;
2588         PerlIO_printf(Perl_debug_log,
2589                     "%4"IVdf" <%.*s%.*s%s%.*s>%*s|",
2590                     (IV)(locinput - loc_bostr),
2591                     len0, s0,
2592                     len1, s1,
2593                     (docolor ? "" : "> <"),
2594                     len2, s2,
2595                     (int)(tlen > 19 ? 0 :  19 - tlen),
2596                     "");
2597     }
2598 }
2599
2600 #endif
2601
2602 /* reg_check_named_buff_matched()
2603  * Checks to see if a named buffer has matched. The data array of
2604  * buffer numbers corresponding to the buffer is expected to reside
2605  * in the regexp->data->data array in the slot stored in the ARG() of
2606  * node involved. Note that this routine doesn't actually care about the
2607  * name, that information is not preserved from compilation to execution.
2608  * Returns the index of the leftmost defined buffer with the given name
2609  * or 0 if non of the buffers matched.
2610  */
2611 STATIC I32
2612 S_reg_check_named_buff_matched(pTHX_ const regexp *rex, const regnode *scan) {
2613     I32 n;
2614     RXi_GET_DECL(rex,rexi);
2615     SV *sv_dat=(SV*)rexi->data->data[ ARG( scan ) ];
2616     I32 *nums=(I32*)SvPVX(sv_dat);
2617     for ( n=0; n<SvIVX(sv_dat); n++ ) {
2618         if ((I32)*PL_reglastparen >= nums[n] &&
2619             PL_regoffs[nums[n]].end != -1)
2620         {
2621             return nums[n];
2622         }
2623     }
2624     return 0;
2625 }
2626
2627
2628 /* free all slabs above current one  - called during LEAVE_SCOPE */
2629
2630 STATIC void
2631 S_clear_backtrack_stack(pTHX_ void *p)
2632 {
2633     regmatch_slab *s = PL_regmatch_slab->next;
2634     PERL_UNUSED_ARG(p);
2635
2636     if (!s)
2637         return;
2638     PL_regmatch_slab->next = NULL;
2639     while (s) {
2640         regmatch_slab * const osl = s;
2641         s = s->next;
2642         Safefree(osl);
2643     }
2644 }
2645
2646
2647 #define SETREX(Re1,Re2) \
2648     if (PL_reg_eval_set) PM_SETRE((PL_reg_curpm), (Re2)); \
2649     Re1 = (Re2)
2650
2651 STATIC I32                      /* 0 failure, 1 success */
2652 S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
2653 {
2654 #if PERL_VERSION < 9 && !defined(PERL_CORE)
2655     dMY_CXT;
2656 #endif
2657     dVAR;
2658     register const bool do_utf8 = PL_reg_match_utf8;
2659     const U32 uniflags = UTF8_ALLOW_DEFAULT;
2660
2661     regexp *rex = reginfo->prog;
2662     RXi_GET_DECL(rex,rexi);
2663
2664     I32 oldsave;
2665
2666     /* the current state. This is a cached copy of PL_regmatch_state */
2667     register regmatch_state *st;
2668
2669     /* cache heavy used fields of st in registers */
2670     register regnode *scan;
2671     register regnode *next;
2672     register U32 n = 0; /* general value; init to avoid compiler warning */
2673     register I32 ln = 0; /* len or last;  init to avoid compiler warning */
2674     register char *locinput = PL_reginput;
2675     register I32 nextchr;   /* is always set to UCHARAT(locinput) */
2676
2677     bool result = 0;        /* return value of S_regmatch */
2678     int depth = 0;          /* depth of backtrack stack */
2679     U32 nochange_depth = 0; /* depth of GOSUB recursion with nochange */
2680     const U32 max_nochange_depth =
2681         (3 * rex->nparens > MAX_RECURSE_EVAL_NOCHANGE_DEPTH) ?
2682         3 * rex->nparens : MAX_RECURSE_EVAL_NOCHANGE_DEPTH;
2683
2684     regmatch_state *yes_state = NULL; /* state to pop to on success of
2685                                                             subpattern */
2686     /* mark_state piggy backs on the yes_state logic so that when we unwind
2687        the stack on success we can update the mark_state as we go */
2688     regmatch_state *mark_state = NULL; /* last mark state we have seen */
2689
2690     regmatch_state *cur_eval = NULL; /* most recent EVAL_AB state */
2691     struct regmatch_state  *cur_curlyx = NULL; /* most recent curlyx */
2692     U32 state_num;
2693     bool no_final = 0;      /* prevent failure from backtracking? */
2694     bool do_cutgroup = 0;   /* no_final only until next branch/trie entry */
2695     char *startpoint = PL_reginput;
2696     SV *popmark = NULL;     /* are we looking for a mark? */
2697     SV *sv_commit = NULL;   /* last mark name seen in failure */
2698     SV *sv_yes_mark = NULL; /* last mark name we have seen
2699                                during a successfull match */
2700     U32 lastopen = 0;       /* last open we saw */
2701     bool has_cutgroup = RX_HAS_CUTGROUP(rex) ? 1 : 0;
2702
2703     SV* const oreplsv = GvSV(PL_replgv);
2704
2705
2706     /* these three flags are set by various ops to signal information to
2707      * the very next op. They have a useful lifetime of exactly one loop
2708      * iteration, and are not preserved or restored by state pushes/pops
2709      */
2710     bool sw = 0;            /* the condition value in (?(cond)a|b) */
2711     bool minmod = 0;        /* the next "{n,m}" is a "{n,m}?" */
2712     int logical = 0;        /* the following EVAL is:
2713                                 0: (?{...})
2714                                 1: (?(?{...})X|Y)
2715                                 2: (??{...})
2716                                or the following IFMATCH/UNLESSM is:
2717                                 false: plain (?=foo)
2718                                 true:  used as a condition: (?(?=foo))
2719                             */
2720
2721 #ifdef DEBUGGING
2722     GET_RE_DEBUG_FLAGS_DECL;
2723 #endif
2724
2725     DEBUG_OPTIMISE_r( DEBUG_EXECUTE_r({
2726             PerlIO_printf(Perl_debug_log,"regmatch start\n");
2727     }));
2728     /* on first ever call to regmatch, allocate first slab */
2729     if (!PL_regmatch_slab) {
2730         Newx(PL_regmatch_slab, 1, regmatch_slab);
2731         PL_regmatch_slab->prev = NULL;
2732         PL_regmatch_slab->next = NULL;
2733         PL_regmatch_state = SLAB_FIRST(PL_regmatch_slab);
2734     }
2735
2736     oldsave = PL_savestack_ix;
2737     SAVEDESTRUCTOR_X(S_clear_backtrack_stack, NULL);
2738     SAVEVPTR(PL_regmatch_slab);
2739     SAVEVPTR(PL_regmatch_state);
2740
2741     /* grab next free state slot */
2742     st = ++PL_regmatch_state;
2743     if (st >  SLAB_LAST(PL_regmatch_slab))
2744         st = PL_regmatch_state = S_push_slab(aTHX);
2745
2746     /* Note that nextchr is a byte even in UTF */
2747     nextchr = UCHARAT(locinput);
2748     scan = prog;
2749     while (scan != NULL) {
2750
2751         DEBUG_EXECUTE_r( {
2752             SV * const prop = sv_newmortal();
2753             regnode *rnext=regnext(scan);
2754             DUMP_EXEC_POS( locinput, scan, do_utf8 );
2755             regprop(rex, prop, scan);
2756
2757             PerlIO_printf(Perl_debug_log,
2758                     "%3"IVdf":%*s%s(%"IVdf")\n",
2759                     (IV)(scan - rexi->program), depth*2, "",
2760                     SvPVX_const(prop),
2761                     (PL_regkind[OP(scan)] == END || !rnext) ?
2762                         0 : (IV)(rnext - rexi->program));
2763         });
2764
2765         next = scan + NEXT_OFF(scan);
2766         if (next == scan)
2767             next = NULL;
2768         state_num = OP(scan);
2769
2770       reenter_switch:
2771         switch (state_num) {
2772         case BOL:
2773             if (locinput == PL_bostr)
2774             {
2775                 /* reginfo->till = reginfo->bol; */
2776                 break;
2777             }
2778             sayNO;
2779         case MBOL:
2780             if (locinput == PL_bostr ||
2781                 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
2782             {
2783                 break;
2784             }
2785             sayNO;
2786         case SBOL:
2787             if (locinput == PL_bostr)
2788                 break;
2789             sayNO;
2790         case GPOS:
2791             if (locinput == reginfo->ganch)
2792                 break;
2793             sayNO;
2794
2795         case KEEPS:
2796             /* update the startpoint */
2797             st->u.keeper.val = PL_regoffs[0].start;
2798             PL_reginput = locinput;
2799             PL_regoffs[0].start = locinput - PL_bostr;
2800             PUSH_STATE_GOTO(KEEPS_next, next);
2801             /*NOT-REACHED*/
2802         case KEEPS_next_fail:
2803             /* rollback the start point change */
2804             PL_regoffs[0].start = st->u.keeper.val;
2805             sayNO_SILENT;
2806             /*NOT-REACHED*/
2807         case EOL:
2808                 goto seol;
2809         case MEOL:
2810             if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2811                 sayNO;
2812             break;
2813         case SEOL:
2814           seol:
2815             if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2816                 sayNO;
2817             if (PL_regeol - locinput > 1)
2818                 sayNO;
2819             break;
2820         case EOS:
2821             if (PL_regeol != locinput)
2822                 sayNO;
2823             break;
2824         case SANY:
2825             if (!nextchr && locinput >= PL_regeol)
2826                 sayNO;
2827             if (do_utf8) {
2828                 locinput += PL_utf8skip[nextchr];
2829                 if (locinput > PL_regeol)
2830                     sayNO;
2831                 nextchr = UCHARAT(locinput);
2832             }
2833             else
2834                 nextchr = UCHARAT(++locinput);
2835             break;
2836         case CANY:
2837             if (!nextchr && locinput >= PL_regeol)
2838                 sayNO;
2839             nextchr = UCHARAT(++locinput);
2840             break;
2841         case REG_ANY:
2842             if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2843                 sayNO;
2844             if (do_utf8) {
2845                 locinput += PL_utf8skip[nextchr];
2846                 if (locinput > PL_regeol)
2847                     sayNO;
2848                 nextchr = UCHARAT(locinput);
2849             }
2850             else
2851                 nextchr = UCHARAT(++locinput);
2852             break;
2853
2854 #undef  ST
2855 #define ST st->u.trie
2856         case TRIEC:
2857             /* In this case the charclass data is available inline so
2858                we can fail fast without a lot of extra overhead.
2859              */
2860             if (scan->flags == EXACT || !do_utf8) {
2861                 if(!ANYOF_BITMAP_TEST(scan, *locinput)) {
2862                     DEBUG_EXECUTE_r(
2863                         PerlIO_printf(Perl_debug_log,
2864                                   "%*s  %sfailed to match trie start class...%s\n",
2865                                   REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
2866                     );
2867                     sayNO_SILENT;
2868                     /* NOTREACHED */
2869                 }
2870             }
2871             /* FALL THROUGH */
2872         case TRIE:
2873             {
2874                 /* what type of TRIE am I? (utf8 makes this contextual) */
2875                 const enum { trie_plain, trie_utf8, trie_utf8_fold }
2876                     trie_type = do_utf8 ?
2877                           (scan->flags == EXACT ? trie_utf8 : trie_utf8_fold)
2878                         : trie_plain;
2879
2880                 /* what trie are we using right now */
2881                 reg_trie_data * const trie
2882                     = (reg_trie_data*)rexi->data->data[ ARG( scan ) ];
2883                 HV * widecharmap = (HV *)rexi->data->data[ ARG( scan ) + 1 ];
2884                 U32 state = trie->startstate;
2885
2886                 if (trie->bitmap && trie_type != trie_utf8_fold &&
2887                     !TRIE_BITMAP_TEST(trie,*locinput)
2888                 ) {
2889                     if (trie->states[ state ].wordnum) {
2890                          DEBUG_EXECUTE_r(
2891                             PerlIO_printf(Perl_debug_log,
2892                                           "%*s  %smatched empty string...%s\n",
2893                                           REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
2894                         );
2895                         break;
2896                     } else {
2897                         DEBUG_EXECUTE_r(
2898                             PerlIO_printf(Perl_debug_log,
2899                                           "%*s  %sfailed to match trie start class...%s\n",
2900                                           REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
2901                         );
2902                         sayNO_SILENT;
2903                    }
2904                 }
2905
2906             {
2907                 U8 *uc = ( U8* )locinput;
2908
2909                 STRLEN len = 0;
2910                 STRLEN foldlen = 0;
2911                 U8 *uscan = (U8*)NULL;
2912                 STRLEN bufflen=0;
2913                 SV *sv_accept_buff = NULL;
2914                 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
2915
2916                 ST.accepted = 0; /* how many accepting states we have seen */
2917                 ST.B = next;
2918                 ST.jump = trie->jump;
2919                 ST.me = scan;
2920                 /*
2921                    traverse the TRIE keeping track of all accepting states
2922                    we transition through until we get to a failing node.
2923                 */
2924
2925                 while ( state && uc <= (U8*)PL_regeol ) {
2926                     U32 base = trie->states[ state ].trans.base;
2927                     UV uvc = 0;
2928                     U16 charid;
2929                     /* We use charid to hold the wordnum as we don't use it
2930                        for charid until after we have done the wordnum logic.
2931                        We define an alias just so that the wordnum logic reads
2932                        more naturally. */
2933
2934 #define got_wordnum charid
2935                     got_wordnum = trie->states[ state ].wordnum;
2936
2937                     if ( got_wordnum ) {
2938                         if ( ! ST.accepted ) {
2939                             ENTER;
2940                             /* SAVETMPS; */ /* XXX is this necessary? dmq */
2941                             bufflen = TRIE_INITAL_ACCEPT_BUFFLEN;
2942                             sv_accept_buff=newSV(bufflen *
2943                                             sizeof(reg_trie_accepted) - 1);
2944                             SvCUR_set(sv_accept_buff, 0);
2945                             SvPOK_on(sv_accept_buff);
2946                             sv_2mortal(sv_accept_buff);
2947                             SAVETMPS;
2948                             ST.accept_buff =
2949                                 (reg_trie_accepted*)SvPV_nolen(sv_accept_buff );
2950                         }
2951                         do {
2952                             if (ST.accepted >= bufflen) {
2953                                 bufflen *= 2;
2954                                 ST.accept_buff =(reg_trie_accepted*)
2955                                     SvGROW(sv_accept_buff,
2956                                         bufflen * sizeof(reg_trie_accepted));
2957                             }
2958                             SvCUR_set(sv_accept_buff,SvCUR(sv_accept_buff)
2959                                 + sizeof(reg_trie_accepted));
2960
2961
2962                             ST.accept_buff[ST.accepted].wordnum = got_wordnum;
2963                             ST.accept_buff[ST.accepted].endpos = uc;
2964                             ++ST.accepted;
2965                         } while (trie->nextword && (got_wordnum= trie->nextword[got_wordnum]));
2966                     }
2967 #undef got_wordnum
2968
2969                     DEBUG_TRIE_EXECUTE_r({
2970                                 DUMP_EXEC_POS( (char *)uc, scan, do_utf8 );
2971                                 PerlIO_printf( Perl_debug_log,
2972                                     "%*s  %sState: %4"UVxf" Accepted: %4"UVxf" ",
2973                                     2+depth * 2, "", PL_colors[4],
2974                                     (UV)state, (UV)ST.accepted );
2975                     });
2976
2977                     if ( base ) {
2978                         REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
2979                                              uscan, len, uvc, charid, foldlen,
2980                                              foldbuf, uniflags);
2981
2982                         if (charid &&
2983                              (base + charid > trie->uniquecharcount )
2984                              && (base + charid - 1 - trie->uniquecharcount
2985                                     < trie->lasttrans)
2986                              && trie->trans[base + charid - 1 -
2987                                     trie->uniquecharcount].check == state)
2988                         {
2989                             state = trie->trans[base + charid - 1 -
2990                                 trie->uniquecharcount ].next;
2991                         }
2992                         else {
2993                             state = 0;
2994                         }
2995                         uc += len;
2996
2997                     }
2998                     else {
2999                         state = 0;
3000                     }
3001                     DEBUG_TRIE_EXECUTE_r(
3002                         PerlIO_printf( Perl_debug_log,
3003                             "Charid:%3x CP:%4"UVxf" After State: %4"UVxf"%s\n",
3004                             charid, uvc, (UV)state, PL_colors[5] );
3005                     );
3006                 }
3007                 if (!ST.accepted )
3008                    sayNO;
3009
3010                 DEBUG_EXECUTE_r(
3011                     PerlIO_printf( Perl_debug_log,
3012                         "%*s  %sgot %"IVdf" possible matches%s\n",
3013                         REPORT_CODE_OFF + depth * 2, "",
3014                         PL_colors[4], (IV)ST.accepted, PL_colors[5] );
3015                 );
3016             }}
3017             goto trie_first_try; /* jump into the fail handler */
3018             /* NOTREACHED */
3019         case TRIE_next_fail: /* we failed - try next alterative */
3020             if ( ST.jump) {
3021                 REGCP_UNWIND(ST.cp);
3022                 for (n = *PL_reglastparen; n > ST.lastparen; n--)
3023                     PL_regoffs[n].end = -1;
3024                 *PL_reglastparen = n;
3025             }
3026           trie_first_try:
3027             if (do_cutgroup) {
3028                 do_cutgroup = 0;
3029                 no_final = 0;
3030             }
3031
3032             if ( ST.jump) {
3033                 ST.lastparen = *PL_reglastparen;
3034                 REGCP_SET(ST.cp);
3035             }
3036             if ( ST.accepted == 1 ) {
3037                 /* only one choice left - just continue */
3038                 DEBUG_EXECUTE_r({
3039                     AV *const trie_words
3040                         = (AV *) rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET];
3041                     SV ** const tmp = av_fetch( trie_words,
3042                         ST.accept_buff[ 0 ].wordnum-1, 0 );
3043                     SV *sv= tmp ? sv_newmortal() : NULL;
3044
3045                     PerlIO_printf( Perl_debug_log,
3046                         "%*s  %sonly one match left: #%d <%s>%s\n",
3047                         REPORT_CODE_OFF+depth*2, "", PL_colors[4],
3048                         ST.accept_buff[ 0 ].wordnum,
3049                         tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0,
3050                                 PL_colors[0], PL_colors[1],
3051                                 (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
3052                             )
3053                         : "not compiled under -Dr",
3054                         PL_colors[5] );
3055                 });
3056                 PL_reginput = (char *)ST.accept_buff[ 0 ].endpos;
3057                 /* in this case we free tmps/leave before we call regmatch
3058                    as we wont be using accept_buff again. */
3059
3060                 locinput = PL_reginput;
3061                 nextchr = UCHARAT(locinput);
3062                 if ( !ST.jump || !ST.jump[ST.accept_buff[0].wordnum])
3063                     scan = ST.B;
3064                 else
3065                     scan = ST.me + ST.jump[ST.accept_buff[0].wordnum];
3066                 if (!has_cutgroup) {
3067                     FREETMPS;
3068                     LEAVE;
3069                 } else {
3070                     ST.accepted--;
3071                     PUSH_YES_STATE_GOTO(TRIE_next, scan);
3072                 }
3073
3074                 continue; /* execute rest of RE */
3075             }
3076
3077             if ( !ST.accepted-- ) {
3078                 DEBUG_EXECUTE_r({
3079                     PerlIO_printf( Perl_debug_log,
3080                         "%*s  %sTRIE failed...%s\n",
3081                         REPORT_CODE_OFF+depth*2, "",
3082                         PL_colors[4],
3083                         PL_colors[5] );
3084                 });
3085                 FREETMPS;
3086                 LEAVE;
3087                 sayNO_SILENT;
3088                 /*NOTREACHED*/
3089             }
3090
3091             /*
3092                There are at least two accepting states left.  Presumably
3093                the number of accepting states is going to be low,
3094                typically two. So we simply scan through to find the one
3095                with lowest wordnum.  Once we find it, we swap the last
3096                state into its place and decrement the size. We then try to
3097                match the rest of the pattern at the point where the word
3098                ends. If we succeed, control just continues along the
3099                regex; if we fail we return here to try the next accepting
3100                state
3101              */
3102
3103             {
3104                 U32 best = 0;
3105                 U32 cur;
3106                 for( cur = 1 ; cur <= ST.accepted ; cur++ ) {
3107                     DEBUG_TRIE_EXECUTE_r(
3108                         PerlIO_printf( Perl_debug_log,
3109                             "%*s  %sgot %"IVdf" (%d) as best, looking at %"IVdf" (%d)%s\n",
3110                             REPORT_CODE_OFF + depth * 2, "", PL_colors[4],
3111                             (IV)best, ST.accept_buff[ best ].wordnum, (IV)cur,
3112                             ST.accept_buff[ cur ].wordnum, PL_colors[5] );
3113                     );
3114
3115                     if (ST.accept_buff[cur].wordnum <
3116                             ST.accept_buff[best].wordnum)
3117                         best = cur;
3118                 }
3119
3120                 DEBUG_EXECUTE_r({
3121                     AV *const trie_words
3122                         = (AV *) rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET];
3123                     SV ** const tmp = av_fetch( trie_words,
3124                         ST.accept_buff[ best ].wordnum - 1, 0 );
3125                     regnode *nextop=(!ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) ?
3126                                     ST.B :
3127                                     ST.me + ST.jump[ST.accept_buff[best].wordnum];
3128                     SV *sv= tmp ? sv_newmortal() : NULL;
3129
3130                     PerlIO_printf( Perl_debug_log,
3131                         "%*s  %strying alternation #%d <%s> at node #%d %s\n",
3132                         REPORT_CODE_OFF+depth*2, "", PL_colors[4],
3133                         ST.accept_buff[best].wordnum,
3134                         tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0,
3135                                 PL_colors[0], PL_colors[1],
3136                                 (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
3137                             ) : "not compiled under -Dr",
3138                             REG_NODE_NUM(nextop),
3139                         PL_colors[5] );
3140                 });
3141
3142                 if ( best<ST.accepted ) {
3143                     reg_trie_accepted tmp = ST.accept_buff[ best ];
3144                     ST.accept_buff[ best ] = ST.accept_buff[ ST.accepted ];
3145                     ST.accept_buff[ ST.accepted ] = tmp;
3146                     best = ST.accepted;
3147                 }
3148                 PL_reginput = (char *)ST.accept_buff[ best ].endpos;
3149                 if ( !ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) {
3150                     scan = ST.B;
3151                 } else {
3152                     scan = ST.me + ST.jump[ST.accept_buff[best].wordnum];
3153                 }
3154                 PUSH_YES_STATE_GOTO(TRIE_next, scan);
3155                 /* NOTREACHED */
3156             }
3157             /* NOTREACHED */
3158         case TRIE_next:
3159             FREETMPS;
3160             LEAVE;
3161             sayYES;
3162 #undef  ST
3163
3164         case EXACT: {
3165             char *s = STRING(scan);
3166             ln = STR_LEN(scan);
3167             if (do_utf8 != UTF) {
3168                 /* The target and the pattern have differing utf8ness. */
3169                 char *l = locinput;
3170                 const char * const e = s + ln;
3171
3172                 if (do_utf8) {
3173                     /* The target is utf8, the pattern is not utf8. */
3174                     while (s < e) {
3175                         STRLEN ulen;
3176                         if (l >= PL_regeol)
3177                              sayNO;
3178                         if (NATIVE_TO_UNI(*(U8*)s) !=
3179                             utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
3180                                             uniflags))
3181                              sayNO;
3182                         l += ulen;
3183                         s ++;
3184                     }
3185                 }
3186                 else {
3187                     /* The target is not utf8, the pattern is utf8. */
3188                     while (s < e) {
3189                         STRLEN ulen;
3190                         if (l >= PL_regeol)
3191                             sayNO;
3192                         if (NATIVE_TO_UNI(*((U8*)l)) !=
3193                             utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
3194                                            uniflags))
3195                             sayNO;
3196                         s += ulen;
3197                         l ++;
3198                     }
3199                 }
3200                 locinput = l;
3201                 nextchr = UCHARAT(locinput);
3202                 break;
3203             }
3204             /* The target and the pattern have the same utf8ness. */
3205             /* Inline the first character, for speed. */
3206             if (UCHARAT(s) != nextchr)
3207                 sayNO;
3208             if (PL_regeol - locinput < ln)
3209                 sayNO;
3210             if (ln > 1 && memNE(s, locinput, ln))
3211                 sayNO;
3212             locinput += ln;
3213             nextchr = UCHARAT(locinput);
3214             break;
3215             }
3216         case EXACTFL:
3217             PL_reg_flags |= RF_tainted;
3218             /* FALL THROUGH */
3219         case EXACTF: {
3220             char * const s = STRING(scan);
3221             ln = STR_LEN(scan);
3222
3223             if (do_utf8 || UTF) {
3224               /* Either target or the pattern are utf8. */
3225                 const char * const l = locinput;
3226                 char *e = PL_regeol;
3227
3228                 if (ibcmp_utf8(s, 0,  ln, (bool)UTF,
3229                                l, &e, 0,  do_utf8)) {
3230                      /* One more case for the sharp s:
3231                       * pack("U0U*", 0xDF) =~ /ss/i,
3232                       * the 0xC3 0x9F are the UTF-8
3233                       * byte sequence for the U+00DF. */
3234
3235                      if (!(do_utf8 &&
3236                            toLOWER(s[0]) == 's' &&
3237                            ln >= 2 &&
3238                            toLOWER(s[1]) == 's' &&
3239                            (U8)l[0] == 0xC3 &&
3240                            e - l >= 2 &&
3241                            (U8)l[1] == 0x9F))
3242                           sayNO;
3243                 }
3244                 locinput = e;
3245                 nextchr = UCHARAT(locinput);
3246                 break;
3247             }
3248
3249             /* Neither the target and the pattern are utf8. */
3250
3251             /* Inline the first character, for speed. */
3252             if (UCHARAT(s) != nextchr &&
3253                 UCHARAT(s) != ((OP(scan) == EXACTF)
3254                                ? PL_fold : PL_fold_locale)[nextchr])
3255                 sayNO;
3256             if (PL_regeol - locinput < ln)
3257                 sayNO;
3258             if (ln > 1 && (OP(scan) == EXACTF
3259                            ? ibcmp(s, locinput, ln)
3260                            : ibcmp_locale(s, locinput, ln)))
3261                 sayNO;
3262             locinput += ln;
3263             nextchr = UCHARAT(locinput);
3264             break;
3265             }
3266         case ANYOF:
3267             if (do_utf8) {
3268                 STRLEN inclasslen = PL_regeol - locinput;
3269
3270                 if (!reginclass(rex, scan, (U8*)locinput, &inclasslen, do_utf8))
3271                     goto anyof_fail;
3272                 if (locinput >= PL_regeol)
3273                     sayNO;
3274                 locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
3275                 nextchr = UCHARAT(locinput);
3276                 break;
3277             }
3278             else {
3279                 if (nextchr < 0)
3280                     nextchr = UCHARAT(locinput);
3281                 if (!REGINCLASS(rex, scan, (U8*)locinput))
3282                     goto anyof_fail;
3283                 if (!nextchr && locinput >= PL_regeol)
3284                     sayNO;
3285                 nextchr = UCHARAT(++locinput);
3286                 break;
3287             }
3288         anyof_fail:
3289             /* If we might have the case of the German sharp s
3290              * in a casefolding Unicode character class. */
3291
3292             if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
3293                  locinput += SHARP_S_SKIP;
3294                  nextchr = UCHARAT(locinput);
3295             }
3296             else
3297                  sayNO;
3298             break;
3299         case ALNUML:
3300             PL_reg_flags |= RF_tainted;
3301             /* FALL THROUGH */
3302         case ALNUM:
3303             if (!nextchr)
3304                 sayNO;
3305             if (do_utf8) {
3306                 LOAD_UTF8_CHARCLASS_ALNUM();
3307                 if (!(OP(scan) == ALNUM
3308                       ? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
3309                       : isALNUM_LC_utf8((U8*)locinput)))
3310                 {
3311                     sayNO;
3312                 }
3313                 locinput += PL_utf8skip[nextchr];
3314                 nextchr = UCHARAT(locinput);
3315                 break;
3316             }
3317             if (!(OP(scan) == ALNUM
3318                   ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
3319                 sayNO;
3320             nextchr = UCHARAT(++locinput);
3321             break;
3322         case NALNUML:
3323             PL_reg_flags |= RF_tainted;
3324             /* FALL THROUGH */
3325         case NALNUM:
3326             if (!nextchr && locinput >= PL_regeol)
3327                 sayNO;
3328             if (do_utf8) {
3329                 LOAD_UTF8_CHARCLASS_ALNUM();
3330                 if (OP(scan) == NALNUM
3331                     ? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
3332                     : isALNUM_LC_utf8((U8*)locinput))
3333                 {
3334                     sayNO;
3335                 }
3336                 locinput += PL_utf8skip[nextchr];
3337                 nextchr = UCHARAT(locinput);
3338                 break;
3339             }
3340             if (OP(scan) == NALNUM
3341                 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
3342                 sayNO;
3343             nextchr = UCHARAT(++locinput);
3344             break;
3345         case BOUNDL:
3346         case NBOUNDL:
3347             PL_reg_flags |= RF_tainted;
3348             /* FALL THROUGH */
3349         case BOUND:
3350         case NBOUND:
3351             /* was last char in word? */
3352             if (do_utf8) {
3353                 if (locinput == PL_bostr)
3354                     ln = '\n';
3355                 else {
3356                     const U8 * const r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
3357
3358                     ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, uniflags);
3359                 }
3360                 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
3361                     ln = isALNUM_uni(ln);
3362                     LOAD_UTF8_CHARCLASS_ALNUM();
3363                     n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
3364                 }
3365                 else {
3366                     ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
3367                     n = isALNUM_LC_utf8((U8*)locinput);
3368                 }
3369             }
3370             else {
3371                 ln = (locinput != PL_bostr) ?
3372                     UCHARAT(locinput - 1) : '\n';
3373                 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
3374                     ln = isALNUM(ln);
3375                     n = isALNUM(nextchr);
3376                 }
3377                 else {
3378                     ln = isALNUM_LC(ln);
3379                     n = isALNUM_LC(nextchr);
3380                 }
3381             }
3382             if (((!ln) == (!n)) == (OP(scan) == BOUND ||
3383                                     OP(scan) == BOUNDL))
3384                     sayNO;
3385             break;
3386         case SPACEL:
3387             PL_reg_flags |= RF_tainted;
3388             /* FALL THROUGH */
3389         case SPACE:
3390             if (!nextchr)
3391                 sayNO;
3392             if (do_utf8) {
3393                 if (UTF8_IS_CONTINUED(nextchr)) {
3394                     LOAD_UTF8_CHARCLASS_SPACE();
3395                     if (!(OP(scan) == SPACE
3396                           ? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
3397                           : isSPACE_LC_utf8((U8*)locinput)))
3398                     {
3399                         sayNO;
3400                     }
3401                     locinput += PL_utf8skip[nextchr];
3402                     nextchr = UCHARAT(locinput);
3403                     break;
3404                 }
3405                 if (!(OP(scan) == SPACE
3406                       ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3407                     sayNO;
3408                 nextchr = UCHARAT(++locinput);
3409             }
3410             else {
3411                 if (!(OP(scan) == SPACE
3412                       ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3413                     sayNO;
3414                 nextchr = UCHARAT(++locinput);
3415             }
3416             break;
3417         case NSPACEL:
3418             PL_reg_flags |= RF_tainted;
3419             /* FALL THROUGH */
3420         case NSPACE:
3421             if (!nextchr && locinput >= PL_regeol)
3422                 sayNO;
3423             if (do_utf8) {
3424                 LOAD_UTF8_CHARCLASS_SPACE();
3425                 if (OP(scan) == NSPACE
3426                     ? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
3427                     : isSPACE_LC_utf8((U8*)locinput))
3428                 {
3429                     sayNO;
3430                 }
3431                 locinput += PL_utf8skip[nextchr];
3432                 nextchr = UCHARAT(locinput);
3433                 break;
3434             }
3435             if (OP(scan) == NSPACE
3436                 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
3437                 sayNO;
3438             nextchr = UCHARAT(++locinput);
3439             break;
3440         case DIGITL:
3441             PL_reg_flags |= RF_tainted;
3442             /* FALL THROUGH */
3443         case DIGIT:
3444             if (!nextchr)
3445                 sayNO;
3446             if (do_utf8) {
3447                 LOAD_UTF8_CHARCLASS_DIGIT();
3448                 if (!(OP(scan) == DIGIT
3449                       ? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
3450                       : isDIGIT_LC_utf8((U8*)locinput)))
3451                 {
3452                     sayNO;
3453                 }
3454                 locinput += PL_utf8skip[nextchr];
3455                 nextchr = UCHARAT(locinput);
3456                 break;
3457             }
3458             if (!(OP(scan) == DIGIT
3459                   ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
3460                 sayNO;
3461             nextchr = UCHARAT(++locinput);
3462             break;
3463         case NDIGITL:
3464             PL_reg_flags |= RF_tainted;
3465             /* FALL THROUGH */
3466         case NDIGIT:
3467             if (!nextchr && locinput >= PL_regeol)
3468                 sayNO;
3469             if (do_utf8) {
3470                 LOAD_UTF8_CHARCLASS_DIGIT();
3471                 if (OP(scan) == NDIGIT
3472                     ? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
3473                     : isDIGIT_LC_utf8((U8*)locinput))
3474                 {
3475                     sayNO;
3476                 }
3477                 locinput += PL_utf8skip[nextchr];
3478                 nextchr = UCHARAT(locinput);
3479                 break;
3480             }
3481             if (OP(scan) == NDIGIT
3482                 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
3483                 sayNO;
3484             nextchr = UCHARAT(++locinput);
3485             break;
3486         case CLUMP:
3487             if (locinput >= PL_regeol)
3488                 sayNO;
3489             if  (do_utf8) {
3490                 LOAD_UTF8_CHARCLASS_MARK();
3491                 if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3492                     sayNO;
3493                 locinput += PL_utf8skip[nextchr];
3494                 while (locinput < PL_regeol &&
3495                        swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3496                     locinput += UTF8SKIP(locinput);
3497                 if (locinput > PL_regeol)
3498                     sayNO;
3499             }
3500             else
3501                locinput++;
3502             nextchr = UCHARAT(locinput);
3503             break;
3504
3505         case NREFFL:
3506         {
3507             char *s;
3508             char type;
3509             PL_reg_flags |= RF_tainted;
3510             /* FALL THROUGH */
3511         case NREF:
3512         case NREFF:
3513             type = OP(scan);
3514             n = reg_check_named_buff_matched(rex,scan);
3515
3516             if ( n ) {
3517                 type = REF + ( type - NREF );
3518                 goto do_ref;
3519             } else {
3520                 sayNO;
3521             }
3522             /* unreached */
3523         case REFFL:
3524             PL_reg_flags |= RF_tainted;
3525             /* FALL THROUGH */
3526         case REF:
3527         case REFF:
3528             n = ARG(scan);  /* which paren pair */
3529             type = OP(scan);
3530           do_ref:
3531             ln = PL_regoffs[n].start;
3532             PL_reg_leftiter = PL_reg_maxiter;           /* Void cache */
3533             if (*PL_reglastparen < n || ln == -1)
3534                 sayNO;                  /* Do not match unless seen CLOSEn. */
3535             if (ln == PL_regoffs[n].end)
3536                 break;
3537
3538             s = PL_bostr + ln;
3539             if (do_utf8 && type != REF) {       /* REF can do byte comparison */
3540                 char *l = locinput;
3541                 const char *e = PL_bostr + PL_regoffs[n].end;
3542                 /*
3543                  * Note that we can't do the "other character" lookup trick as
3544                  * in the 8-bit case (no pun intended) because in Unicode we
3545                  * have to map both upper and title case to lower case.
3546                  */
3547                 if (type == REFF) {
3548                     while (s < e) {
3549                         STRLEN ulen1, ulen2;
3550                         U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3551                         U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
3552
3553                         if (l >= PL_regeol)
3554                             sayNO;
3555                         toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
3556                         toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
3557                         if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
3558                             sayNO;
3559                         s += ulen1;
3560                         l += ulen2;
3561                     }
3562                 }
3563                 locinput = l;
3564                 nextchr = UCHARAT(locinput);
3565                 break;
3566             }
3567
3568             /* Inline the first character, for speed. */
3569             if (UCHARAT(s) != nextchr &&
3570                 (type == REF ||
3571                  (UCHARAT(s) != (type == REFF
3572                                   ? PL_fold : PL_fold_locale)[nextchr])))
3573                 sayNO;
3574             ln = PL_regoffs[n].end - ln;
3575             if (locinput + ln > PL_regeol)
3576                 sayNO;
3577             if (ln > 1 && (type == REF
3578                            ? memNE(s, locinput, ln)
3579                            : (type == REFF
3580                               ? ibcmp(s, locinput, ln)
3581                               : ibcmp_locale(s, locinput, ln))))
3582                 sayNO;
3583             locinput += ln;
3584             nextchr = UCHARAT(locinput);
3585             break;
3586         }
3587         case NOTHING:
3588         case TAIL:
3589             break;
3590         case BACK:
3591             break;
3592
3593 #undef  ST
3594 #define ST st->u.eval
3595         {
3596             SV *ret;
3597             regexp *re;
3598             regexp_internal *rei;
3599             regnode *startpoint;
3600
3601         case GOSTART:
3602         case GOSUB: /*    /(...(?1))/   /(...(?&foo))/   */
3603             if (cur_eval && cur_eval->locinput==locinput) {
3604                 if (cur_eval->u.eval.close_paren == (U32)ARG(scan))
3605                     Perl_croak(aTHX_ "Infinite recursion in regex");
3606                 if ( ++nochange_depth > max_nochange_depth )
3607                     Perl_croak(aTHX_
3608                         "Pattern subroutine nesting without pos change"
3609                         " exceeded limit in regex");
3610             } else {
3611                 nochange_depth = 0;
3612             }
3613             re = rex;
3614             rei = rexi;
3615             (void)ReREFCNT_inc(rex);
3616             if (OP(scan)==GOSUB) {
3617                 startpoint = scan + ARG2L(scan);
3618                 ST.close_paren = ARG(scan);
3619             } else {
3620                 startpoint = rei->program+1;
3621                 ST.close_paren = 0;
3622             }
3623             goto eval_recurse_doit;
3624             /* NOTREACHED */
3625         case EVAL:  /*   /(?{A})B/   /(??{A})B/  and /(?(?{A})X|Y)B/   */
3626             if (cur_eval && cur_eval->locinput==locinput) {
3627                 if ( ++nochange_depth > max_nochange_depth )
3628                     Perl_croak(aTHX_ "EVAL without pos change exceeded limit in regex");
3629             } else {
3630                 nochange_depth = 0;
3631             }
3632             {
3633                 /* execute the code in the {...} */
3634                 dSP;
3635                 SV ** const before = SP;
3636                 OP_4tree * const oop = PL_op;
3637                 COP * const ocurcop = PL_curcop;
3638                 PAD *old_comppad;
3639
3640                 n = ARG(scan);
3641                 PL_op = (OP_4tree*)rexi->data->data[n];
3642                 DEBUG_STATE_r( PerlIO_printf(Perl_debug_log,
3643                     "  re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
3644                 PAD_SAVE_LOCAL(old_comppad, (PAD*)rexi->data->data[n + 2]);
3645                 PL_regoffs[0].end = PL_reg_magic->mg_len = locinput - PL_bostr;
3646
3647                 if (sv_yes_mark) {
3648                     SV *sv_mrk = get_sv("REGMARK", 1);
3649                     sv_setsv(sv_mrk, sv_yes_mark);
3650                 }
3651
3652                 CALLRUNOPS(aTHX);                       /* Scalar context. */
3653                 SPAGAIN;
3654                 if (SP == before)
3655                     ret = &PL_sv_undef;   /* protect against empty (?{}) blocks. */
3656                 else {
3657                     ret = POPs;
3658                     PUTBACK;
3659                 }
3660
3661                 PL_op = oop;
3662                 PAD_RESTORE_LOCAL(old_comppad);
3663                 PL_curcop = ocurcop;
3664                 if (!logical) {
3665                     /* /(?{...})/ */
3666                     sv_setsv(save_scalar(PL_replgv), ret);
3667                     break;
3668                 }
3669             }
3670             if (logical == 2) { /* Postponed subexpression: /(??{...})/ */
3671                 logical = 0;
3672                 {
3673                     /* extract RE object from returned value; compiling if
3674                      * necessary */
3675
3676                     MAGIC *mg = NULL;
3677                     const SV *sv;
3678                     if(SvROK(ret) && SvSMAGICAL(sv = SvRV(ret)))
3679                         mg = mg_find(sv, PERL_MAGIC_qr);
3680                     else if (SvSMAGICAL(ret)) {
3681                         if (SvGMAGICAL(ret))
3682                             sv_unmagic(ret, PERL_MAGIC_qr);
3683                         else
3684                             mg = mg_find(ret, PERL_MAGIC_qr);
3685                     }
3686
3687                     if (mg) {
3688                         re = reg_temp_copy((regexp *)mg->mg_obj); /*XXX:dmq*/
3689                     }
3690                     else {
3691                         U32 pm_flags = 0;
3692                         const I32 osize = PL_regsize;
3693
3694                         if (DO_UTF8(ret)) pm_flags |= RXf_UTF8;
3695                         re = CALLREGCOMP(ret, pm_flags);
3696                         if (!(SvFLAGS(ret)
3697                               & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
3698                                 | SVs_GMG)))
3699                             sv_magic(ret,(SV*)ReREFCNT_inc(re),
3700                                         PERL_MAGIC_qr,0,0);
3701                         PL_regsize = osize;
3702                     }
3703                 }
3704                 RX_MATCH_COPIED_off(re);
3705                 re->subbeg = rex->subbeg;
3706                 re->sublen = rex->sublen;
3707                 rei = RXi_GET(re);
3708                 DEBUG_EXECUTE_r(
3709                     debug_start_match(re, do_utf8, locinput, PL_regeol,
3710                         "Matching embedded");
3711                 );
3712                 startpoint = rei->program + 1;
3713                 ST.close_paren = 0; /* only used for GOSUB */
3714                 /* borrowed from regtry */
3715                 if (PL_reg_start_tmpl <= re->nparens) {
3716                     PL_reg_start_tmpl = re->nparens*3/2 + 3;
3717                     if(PL_reg_start_tmp)
3718                         Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
3719                     else
3720                         Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
3721                 }
3722
3723         eval_recurse_doit: /* Share code with GOSUB below this line */
3724                 /* run the pattern returned from (??{...}) */
3725                 ST.cp = regcppush(0);   /* Save *all* the positions. */
3726                 REGCP_SET(ST.lastcp);
3727
3728                 PL_regoffs = re->offs; /* essentially NOOP on GOSUB */
3729
3730                 *PL_reglastparen = 0;
3731                 *PL_reglastcloseparen = 0;
3732                 PL_reginput = locinput;
3733                 PL_regsize = 0;
3734
3735                 /* XXXX This is too dramatic a measure... */
3736                 PL_reg_maxiter = 0;
3737
3738                 ST.toggle_reg_flags = PL_reg_flags;
3739                 if (re->extflags & RXf_UTF8)
3740                     PL_reg_flags |= RF_utf8;
3741                 else
3742                     PL_reg_flags &= ~RF_utf8;
3743                 ST.toggle_reg_flags ^= PL_reg_flags; /* diff of old and new */
3744
3745                 ST.prev_rex = rex;
3746                 ST.prev_curlyx = cur_curlyx;
3747                 SETREX(rex,re);
3748                 rexi = rei;
3749                 cur_curlyx = NULL;
3750                 ST.B = next;
3751                 ST.prev_eval = cur_eval;
3752                 cur_eval = st;
3753                 /* now continue from first node in postoned RE */
3754                 PUSH_YES_STATE_GOTO(EVAL_AB, startpoint);
3755                 /* NOTREACHED */
3756             }
3757             /* logical is 1,   /(?(?{...})X|Y)/ */
3758             sw = (bool)SvTRUE(ret);
3759             logical = 0;
3760             break;
3761         }
3762
3763         case EVAL_AB: /* cleanup after a successful (??{A})B */
3764             /* note: this is called twice; first after popping B, then A */
3765             PL_reg_flags ^= ST.toggle_reg_flags;
3766             ReREFCNT_dec(rex);
3767             SETREX(rex,ST.prev_rex);
3768             rexi = RXi_GET(rex);
3769             regcpblow(ST.cp);
3770             cur_eval = ST.prev_eval;
3771             cur_curlyx = ST.prev_curlyx;
3772             /* XXXX This is too dramatic a measure... */
3773             PL_reg_maxiter = 0;
3774             if ( nochange_depth )
3775                 nochange_depth--;
3776             sayYES;
3777
3778
3779         case EVAL_AB_fail: /* unsuccessfully ran A or B in (??{A})B */
3780             /* note: this is called twice; first after popping B, then A */
3781             PL_reg_flags ^= ST.toggle_reg_flags;
3782             ReREFCNT_dec(rex);
3783             SETREX(rex,ST.prev_rex);
3784             rexi = RXi_GET(rex);
3785             PL_reginput = locinput;
3786             REGCP_UNWIND(ST.lastcp);
3787             regcppop(rex);
3788             cur_eval = ST.prev_eval;
3789             cur_curlyx = ST.prev_curlyx;
3790             /* XXXX This is too dramatic a measure... */
3791             PL_reg_maxiter = 0;
3792             if ( nochange_depth )
3793                 nochange_depth--;
3794             sayNO_SILENT;
3795 #undef ST
3796
3797         case OPEN:
3798             n = ARG(scan);  /* which paren pair */
3799             PL_reg_start_tmp[n] = locinput;
3800             if (n > PL_regsize)
3801                 PL_regsize = n;
3802             lastopen = n;
3803             break;
3804         case CLOSE:
3805             n = ARG(scan);  /* which paren pair */
3806             PL_regoffs[n].start = PL_reg_start_tmp[n] - PL_bostr;
3807             PL_regoffs[n].end = locinput - PL_bostr;
3808             /*if (n > PL_regsize)
3809                 PL_regsize = n;*/
3810             if (n > *PL_reglastparen)
3811                 *PL_reglastparen = n;
3812             *PL_reglastcloseparen = n;
3813             if (cur_eval && cur_eval->u.eval.close_paren == n) {
3814                 goto fake_end;
3815             }
3816             break;
3817         case ACCEPT:
3818             if (ARG(scan)){
3819                 regnode *cursor;
3820                 for (cursor=scan;
3821                      cursor && OP(cursor)!=END;
3822                      cursor=regnext(cursor))
3823                 {
3824                     if ( OP(cursor)==CLOSE ){
3825                         n = ARG(cursor);
3826                         if ( n <= lastopen ) {
3827                             PL_regoffs[n].start
3828                                 = PL_reg_start_tmp[n] - PL_bostr;
3829                             PL_regoffs[n].end = locinput - PL_bostr;
3830                             /*if (n > PL_regsize)
3831                             PL_regsize = n;*/
3832                             if (n > *PL_reglastparen)
3833                                 *PL_reglastparen = n;
3834                             *PL_reglastcloseparen = n;
3835                             if ( n == ARG(scan) || (cur_eval &&
3836                                 cur_eval->u.eval.close_paren == n))
3837                                 break;
3838                         }
3839                     }
3840                 }
3841             }
3842             goto fake_end;
3843             /*NOTREACHED*/
3844         case GROUPP:
3845             n = ARG(scan);  /* which paren pair */
3846             sw = (bool)(*PL_reglastparen >= n && PL_regoffs[n].end != -1);
3847             break;
3848         case NGROUPP:
3849             /* reg_check_named_buff_matched returns 0 for no match */
3850             sw = (bool)(0 < reg_check_named_buff_matched(rex,scan));
3851             break;
3852         case INSUBP:
3853             n = ARG(scan);
3854             sw = (cur_eval && (!n || cur_eval->u.eval.close_paren == n));
3855             break;
3856         case DEFINEP:
3857             sw = 0;
3858             break;
3859         case IFTHEN:
3860             PL_reg_leftiter = PL_reg_maxiter;           /* Void cache */
3861             if (sw)
3862                 next = NEXTOPER(NEXTOPER(scan));
3863             else {
3864                 next = scan + ARG(scan);
3865                 if (OP(next) == IFTHEN) /* Fake one. */
3866                     next = NEXTOPER(NEXTOPER(next));
3867             }
3868             break;
3869         case LOGICAL:
3870             logical = scan->flags;
3871             break;
3872
3873 /*******************************************************************
3874
3875 The CURLYX/WHILEM pair of ops handle the most generic case of the /A*B/
3876 pattern, where A and B are subpatterns. (For simple A, CURLYM or
3877 STAR/PLUS/CURLY/CURLYN are used instead.)
3878
3879 A*B is compiled as <CURLYX><A><WHILEM><B>
3880
3881 On entry to the subpattern, CURLYX is called. This pushes a CURLYX
3882 state, which contains the current count, initialised to -1. It also sets
3883 cur_curlyx to point to this state, with any previous value saved in the
3884 state block.
3885
3886 CURLYX then jumps straight to the WHILEM op, rather than executing A,
3887 since the pattern may possibly match zero times (i.e. it's a while {} loop
3888 rather than a do {} while loop).
3889
3890 Each entry to WHILEM represents a successful match of A. The count in the
3891 CURLYX block is incremented, another WHILEM state is pushed, and execution
3892 passes to A or B depending on greediness and the current count.
3893
3894 For example, if matching against the string a1a2a3b (where the aN are
3895 substrings that match /A/), then the match progresses as follows: (the
3896 pushed states are interspersed with the bits of strings matched so far):
3897
3898     <CURLYX cnt=-1>
3899     <CURLYX cnt=0><WHILEM>
3900     <CURLYX cnt=1><WHILEM> a1 <WHILEM>
3901     <CURLYX cnt=2><WHILEM> a1 <WHILEM> a2 <WHILEM>
3902     <CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM>
3903     <CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM> b
3904
3905 (Contrast this with something like CURLYM, which maintains only a single
3906 backtrack state:
3907
3908     <CURLYM cnt=0> a1
3909     a1 <CURLYM cnt=1> a2
3910     a1 a2 <CURLYM cnt=2> a3
3911     a1 a2 a3 <CURLYM cnt=3> b
3912 )
3913
3914 Each WHILEM state block marks a point to backtrack to upon partial failure
3915 of A or B, and also contains some minor state data related to that
3916 iteration.  The CURLYX block, pointed to by cur_curlyx, contains the
3917 overall state, such as the count, and pointers to the A and B ops.
3918
3919 This is complicated slightly by nested CURLYX/WHILEM's. Since cur_curlyx
3920 must always point to the *current* CURLYX block, the rules are:
3921
3922 When executing CURLYX, save the old cur_curlyx in the CURLYX state block,
3923 and set cur_curlyx to point the new block.
3924
3925 When popping the CURLYX block after a successful or unsuccessful match,
3926 restore the previous cur_curlyx.
3927
3928 When WHILEM is about to execute B, save the current cur_curlyx, and set it
3929 to the outer one saved in the CURLYX block.
3930
3931 When popping the WHILEM block after a successful or unsuccessful B match,
3932 restore the previous cur_curlyx.
3933
3934 Here's an example for the pattern (AI* BI)*BO
3935 I and O refer to inner and outer, C and W refer to CURLYX and WHILEM:
3936
3937 cur_
3938 curlyx backtrack stack
3939 ------ ---------------
3940 NULL
3941 CO     <CO prev=NULL> <WO>
3942 CI     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
3943 CO     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
3944 NULL   <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi <WO prev=CO> bo
3945
3946 At this point the pattern succeeds, and we work back down the stack to
3947 clean up, restoring as we go:
3948
3949 CO     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
3950 CI     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
3951 CO     <CO prev=NULL> <WO>
3952 NULL
3953
3954 *******************************************************************/
3955
3956 #define ST st->u.curlyx
3957
3958         case CURLYX:    /* start of /A*B/  (for complex A) */
3959         {
3960             /* No need to save/restore up to this paren */
3961             I32 parenfloor = scan->flags;
3962
3963             assert(next); /* keep Coverity happy */
3964             if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
3965                 next += ARG(next);
3966
3967             /* XXXX Probably it is better to teach regpush to support
3968                parenfloor > PL_regsize... */
3969             if (parenfloor > (I32)*PL_reglastparen)
3970                 parenfloor = *PL_reglastparen; /* Pessimization... */
3971
3972             ST.prev_curlyx= cur_curlyx;
3973             cur_curlyx = st;
3974             ST.cp = PL_savestack_ix;
3975
3976             /* these fields contain the state of the current curly.
3977              * they are accessed by subsequent WHILEMs */
3978             ST.parenfloor = parenfloor;
3979             ST.min = ARG1(scan);
3980             ST.max = ARG2(scan);
3981             ST.A = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
3982             ST.B = next;
3983             ST.minmod = minmod;
3984             minmod = 0;
3985             ST.count = -1;      /* this will be updated by WHILEM */
3986             ST.lastloc = NULL;  /* this will be updated by WHILEM */
3987
3988             PL_reginput = locinput;
3989             PUSH_YES_STATE_GOTO(CURLYX_end, PREVOPER(next));
3990             /* NOTREACHED */
3991         }
3992
3993         case CURLYX_end: /* just finished matching all of A*B */
3994             cur_curlyx = ST.prev_curlyx;
3995             sayYES;
3996             /* NOTREACHED */
3997
3998         case CURLYX_end_fail: /* just failed to match all of A*B */
3999             regcpblow(ST.cp);
4000             cur_curlyx = ST.prev_curlyx;
4001             sayNO;
4002             /* NOTREACHED */
4003
4004
4005 #undef ST
4006 #define ST st->u.whilem
4007
4008         case WHILEM:     /* just matched an A in /A*B/  (for complex A) */
4009         {
4010             /* see the discussion above about CURLYX/WHILEM */
4011             I32 n;
4012             assert(cur_curlyx); /* keep Coverity happy */
4013             n = ++cur_curlyx->u.curlyx.count; /* how many A's matched */
4014             ST.save_lastloc = cur_curlyx->u.curlyx.lastloc;
4015             ST.cache_offset = 0;
4016             ST.cache_mask = 0;
4017
4018             PL_reginput = locinput;
4019
4020             DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4021                   "%*s  whilem: matched %ld out of %ld..%ld\n",
4022                   REPORT_CODE_OFF+depth*2, "", (long)n,
4023                   (long)cur_curlyx->u.curlyx.min,
4024                   (long)cur_curlyx->u.curlyx.max)
4025             );
4026
4027             /* First just match a string of min A's. */
4028
4029             if (n < cur_curlyx->u.curlyx.min) {
4030                 cur_curlyx->u.curlyx.lastloc = locinput;
4031                 PUSH_STATE_GOTO(WHILEM_A_pre, cur_curlyx->u.curlyx.A);
4032                 /* NOTREACHED */
4033             }
4034
4035             /* If degenerate A matches "", assume A done. */
4036
4037             if (locinput == cur_curlyx->u.curlyx.lastloc) {
4038                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4039                    "%*s  whilem: empty match detected, trying continuation...\n",
4040                    REPORT_CODE_OFF+depth*2, "")
4041                 );
4042                 goto do_whilem_B_max;
4043             }
4044
4045             /* super-linear cache processing */
4046
4047             if (scan->flags) {
4048
4049                 if (!PL_reg_maxiter) {
4050                     /* start the countdown: Postpone detection until we
4051                      * know the match is not *that* much linear. */
4052                     PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
4053                     /* possible overflow for long strings and many CURLYX's */
4054                     if (PL_reg_maxiter < 0)
4055                         PL_reg_maxiter = I32_MAX;
4056                     PL_reg_leftiter = PL_reg_maxiter;
4057                 }
4058
4059                 if (PL_reg_leftiter-- == 0) {
4060                     /* initialise cache */
4061                     const I32 size = (PL_reg_maxiter + 7)/8;
4062                     if (PL_reg_poscache) {
4063                         if ((I32)PL_reg_poscache_size < size) {
4064                             Renew(PL_reg_poscache, size, char);
4065                             PL_reg_poscache_size = size;
4066                         }
4067                         Zero(PL_reg_poscache, size, char);
4068                     }
4069                     else {
4070                         PL_reg_poscache_size = size;
4071                         Newxz(PL_reg_poscache, size, char);
4072                     }
4073                     DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4074       "%swhilem: Detected a super-linear match, switching on caching%s...\n",
4075                               PL_colors[4], PL_colors[5])
4076                     );
4077                 }
4078
4079                 if (PL_reg_leftiter < 0) {
4080                     /* have we already failed at this position? */
4081                     I32 offset, mask;
4082                     offset  = (scan->flags & 0xf) - 1
4083                                 + (locinput - PL_bostr)  * (scan->flags>>4);
4084                     mask    = 1 << (offset % 8);
4085                     offset /= 8;
4086                     if (PL_reg_poscache[offset] & mask) {
4087                         DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4088                             "%*s  whilem: (cache) already tried at this position...\n",
4089                             REPORT_CODE_OFF+depth*2, "")
4090                         );
4091                         sayNO; /* cache records failure */
4092                     }
4093                     ST.cache_offset = offset;
4094                     ST.cache_mask   = mask;
4095                 }
4096             }
4097
4098             /* Prefer B over A for minimal matching. */
4099
4100             if (cur_curlyx->u.curlyx.minmod) {
4101                 ST.save_curlyx = cur_curlyx;
4102                 cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
4103                 ST.cp = regcppush(ST.save_curlyx->u.curlyx.parenfloor);
4104                 REGCP_SET(ST.lastcp);
4105                 PUSH_YES_STATE_GOTO(WHILEM_B_min, ST.save_curlyx->u.curlyx.B);
4106                 /* NOTREACHED */
4107             }
4108
4109             /* Prefer A over B for maximal matching. */
4110
4111             if (n < cur_curlyx->u.curlyx.max) { /* More greed allowed? */
4112                 ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
4113                 cur_curlyx->u.curlyx.lastloc = locinput;
4114                 REGCP_SET(ST.lastcp);
4115                 PUSH_STATE_GOTO(WHILEM_A_max, cur_curlyx->u.curlyx.A);
4116                 /* NOTREACHED */
4117             }
4118             goto do_whilem_B_max;
4119         }
4120         /* NOTREACHED */
4121
4122         case WHILEM_B_min: /* just matched B in a minimal match */
4123         case WHILEM_B_max: /* just matched B in a maximal match */
4124             cur_curlyx = ST.save_curlyx;
4125             sayYES;
4126             /* NOTREACHED */
4127
4128         case WHILEM_B_max_fail: /* just failed to match B in a maximal match */
4129             cur_curlyx = ST.save_curlyx;
4130             cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
4131             cur_curlyx->u.curlyx.count--;
4132             CACHEsayNO;
4133             /* NOTREACHED */
4134
4135         case WHILEM_A_min_fail: /* just failed to match A in a minimal match */
4136             REGCP_UNWIND(ST.lastcp);
4137             regcppop(rex);
4138             /* FALL THROUGH */
4139         case WHILEM_A_pre_fail: /* just failed to match even minimal A */
4140             cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
4141             cur_curlyx->u.curlyx.count--;
4142             CACHEsayNO;
4143             /* NOTREACHED */
4144
4145         case WHILEM_A_max_fail: /* just failed to match A in a maximal match */
4146             REGCP_UNWIND(ST.lastcp);
4147             regcppop(rex);      /* Restore some previous $<digit>s? */
4148             PL_reginput = locinput;
4149             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4150                 "%*s  whilem: failed, trying continuation...\n",
4151                 REPORT_CODE_OFF+depth*2, "")
4152             );
4153           do_whilem_B_max:
4154             if (cur_curlyx->u.curlyx.count >= REG_INFTY
4155                 && ckWARN(WARN_REGEXP)
4156                 && !(PL_reg_flags & RF_warned))
4157             {
4158                 PL_reg_flags |= RF_warned;
4159                 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
4160                      "Complex regular subexpression recursion",
4161                      REG_INFTY - 1);
4162             }
4163
4164             /* now try B */
4165             ST.save_curlyx = cur_curlyx;
4166             cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
4167             PUSH_YES_STATE_GOTO(WHILEM_B_max, ST.save_curlyx->u.curlyx.B);
4168             /* NOTREACHED */
4169
4170         case WHILEM_B_min_fail: /* just failed to match B in a minimal match */
4171             cur_curlyx = ST.save_curlyx;
4172             REGCP_UNWIND(ST.lastcp);
4173             regcppop(rex);
4174
4175             if (cur_curlyx->u.curlyx.count >= cur_curlyx->u.curlyx.max) {
4176                 /* Maximum greed exceeded */
4177                 if (cur_curlyx->u.curlyx.count >= REG_INFTY
4178                     && ckWARN(WARN_REGEXP)
4179                     && !(PL_reg_flags & RF_warned))
4180                 {
4181                     PL_reg_flags |= RF_warned;
4182                     Perl_warner(aTHX_ packWARN(WARN_REGEXP),
4183                         "%s limit (%d) exceeded",
4184                         "Complex regular subexpression recursion",
4185                         REG_INFTY - 1);
4186                 }
4187                 cur_curlyx->u.curlyx.count--;
4188                 CACHEsayNO;
4189             }
4190
4191             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4192                 "%*s  trying longer...\n", REPORT_CODE_OFF+depth*2, "")
4193             );
4194             /* Try grabbing another A and see if it helps. */
4195             PL_reginput = locinput;
4196             cur_curlyx->u.curlyx.lastloc = locinput;
4197             ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
4198             REGCP_SET(ST.lastcp);
4199             PUSH_STATE_GOTO(WHILEM_A_min, ST.save_curlyx->u.curlyx.A);
4200             /* NOTREACHED */
4201
4202 #undef  ST
4203 #define ST st->u.branch
4204
4205         case BRANCHJ:       /*  /(...|A|...)/ with long next pointer */
4206             next = scan + ARG(scan);
4207             if (next == scan)
4208                 next = NULL;
4209             scan = NEXTOPER(scan);
4210             /* FALL THROUGH */
4211
4212         case BRANCH:        /*  /(...|A|...)/ */
4213             scan = NEXTOPER(scan); /* scan now points to inner node */
4214             ST.lastparen = *PL_reglastparen;
4215             ST.next_branch = next;
4216             REGCP_SET(ST.cp);
4217             PL_reginput = locinput;
4218
4219             /* Now go into the branch */
4220             if (has_cutgroup) {
4221                 PUSH_YES_STATE_GOTO(BRANCH_next, scan);
4222             } else {
4223                 PUSH_STATE_GOTO(BRANCH_next, scan);
4224             }
4225             /* NOTREACHED */
4226         case CUTGROUP:
4227             PL_reginput = locinput;
4228             sv_yes_mark = st->u.mark.mark_name = scan->flags ? NULL :
4229                 (SV*)rexi->data->data[ ARG( scan ) ];
4230             PUSH_STATE_GOTO(CUTGROUP_next,next);
4231             /* NOTREACHED */
4232         case CUTGROUP_next_fail:
4233             do_cutgroup = 1;
4234             no_final = 1;
4235             if (st->u.mark.mark_name)
4236                 sv_commit = st->u.mark.mark_name;
4237             sayNO;
4238             /* NOTREACHED */
4239         case BRANCH_next:
4240             sayYES;
4241             /* NOTREACHED */
4242         case BRANCH_next_fail: /* that branch failed; try the next, if any */
4243             if (do_cutgroup) {
4244                 do_cutgroup = 0;
4245                 no_final = 0;
4246             }
4247             REGCP_UNWIND(ST.cp);
4248             for (n = *PL_reglastparen; n > ST.lastparen; n--)
4249                 PL_regoffs[n].end = -1;
4250             *PL_reglastparen = n;
4251             /*dmq: *PL_reglastcloseparen = n; */
4252             scan = ST.next_branch;
4253             /* no more branches? */
4254             if (!scan || (OP(scan) != BRANCH && OP(scan) != BRANCHJ)) {
4255                 DEBUG_EXECUTE_r({
4256                     PerlIO_printf( Perl_debug_log,
4257                         "%*s  %sBRANCH failed...%s\n",
4258                         REPORT_CODE_OFF+depth*2, "",
4259                         PL_colors[4],
4260                         PL_colors[5] );
4261                 });
4262                 sayNO_SILENT;
4263             }
4264             continue; /* execute next BRANCH[J] op */
4265             /* NOTREACHED */
4266
4267         case MINMOD:
4268             minmod = 1;
4269             break;
4270
4271 #undef  ST
4272 #define ST st->u.curlym
4273
4274         case CURLYM:    /* /A{m,n}B/ where A is fixed-length */
4275
4276             /* This is an optimisation of CURLYX that enables us to push
4277              * only a single backtracking state, no matter now many matches
4278              * there are in {m,n}. It relies on the pattern being constant
4279              * length, with no parens to influence future backrefs
4280              */
4281
4282             ST.me = scan;
4283             scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
4284
4285             /* if paren positive, emulate an OPEN/CLOSE around A */
4286             if (ST.me->flags) {
4287                 U32 paren = ST.me->flags;
4288                 if (paren > PL_regsize)
4289                     PL_regsize = paren;
4290                 if (paren > *PL_reglastparen)
4291                     *PL_reglastparen = paren;
4292                 scan += NEXT_OFF(scan); /* Skip former OPEN. */
4293             }
4294             ST.A = scan;
4295             ST.B = next;
4296             ST.alen = 0;
4297             ST.count = 0;
4298             ST.minmod = minmod;
4299             minmod = 0;
4300             ST.c1 = CHRTEST_UNINIT;
4301             REGCP_SET(ST.cp);
4302
4303             if (!(ST.minmod ? ARG1(ST.me) : ARG2(ST.me))) /* min/max */
4304                 goto curlym_do_B;
4305
4306           curlym_do_A: /* execute the A in /A{m,n}B/  */
4307             PL_reginput = locinput;
4308             PUSH_YES_STATE_GOTO(CURLYM_A, ST.A); /* match A */
4309             /* NOTREACHED */
4310
4311         case CURLYM_A: /* we've just matched an A */
4312             locinput = st->locinput;
4313             nextchr = UCHARAT(locinput);
4314
4315             ST.count++;
4316             /* after first match, determine A's length: u.curlym.alen */
4317             if (ST.count == 1) {
4318                 if (PL_reg_match_utf8) {
4319                     char *s = locinput;
4320                     while (s < PL_reginput) {
4321                         ST.alen++;
4322                         s += UTF8SKIP(s);
4323                     }
4324                 }
4325                 else {
4326                     ST.alen = PL_reginput - locinput;
4327                 }
4328                 if (ST.alen == 0)
4329                     ST.count = ST.minmod ? ARG1(ST.me) : ARG2(ST.me);
4330             }
4331             DEBUG_EXECUTE_r(
4332                 PerlIO_printf(Perl_debug_log,
4333                           "%*s  CURLYM now matched %"IVdf" times, len=%"IVdf"...\n",
4334                           (int)(REPORT_CODE_OFF+(depth*2)), "",
4335                           (IV) ST.count, (IV)ST.alen)
4336             );
4337
4338             locinput = PL_reginput;
4339
4340             if (cur_eval && cur_eval->u.eval.close_paren &&
4341                 cur_eval->u.eval.close_paren == (U32)ST.me->flags)
4342                 goto fake_end;
4343
4344             if ( ST.count < (ST.minmod ? ARG1(ST.me) : ARG2(ST.me)) )
4345                 goto curlym_do_A; /* try to match another A */
4346             goto curlym_do_B; /* try to match B */
4347
4348         case CURLYM_A_fail: /* just failed to match an A */
4349             REGCP_UNWIND(ST.cp);
4350
4351             if (ST.minmod || ST.count < ARG1(ST.me) /* min*/
4352                 || (cur_eval && cur_eval->u.eval.close_paren &&
4353                     cur_eval->u.eval.close_paren == (U32)ST.me->flags))
4354                 sayNO;
4355
4356           curlym_do_B: /* execute the B in /A{m,n}B/  */
4357             PL_reginput = locinput;
4358             if (ST.c1 == CHRTEST_UNINIT) {
4359                 /* calculate c1 and c2 for possible match of 1st char
4360                  * following curly */
4361                 ST.c1 = ST.c2 = CHRTEST_VOID;
4362                 if (HAS_TEXT(ST.B) || JUMPABLE(ST.B)) {
4363                     regnode *text_node = ST.B;
4364                     if (! HAS_TEXT(text_node))
4365                         FIND_NEXT_IMPT(text_node);
4366                     /* this used to be
4367
4368                         (HAS_TEXT(text_node) && PL_regkind[OP(text_node)] == EXACT)
4369
4370                         But the former is redundant in light of the latter.
4371
4372                         if this changes back then the macro for
4373                         IS_TEXT and friends need to change.
4374                      */
4375                     if (PL_regkind[OP(text_node)] == EXACT)
4376                     {
4377
4378                         ST.c1 = (U8)*STRING(text_node);
4379                         ST.c2 =
4380                             (IS_TEXTF(text_node))
4381                             ? PL_fold[ST.c1]
4382                             : (IS_TEXTFL(text_node))
4383                                 ? PL_fold_locale[ST.c1]
4384                                 : ST.c1;
4385                     }
4386                 }
4387             }
4388
4389             DEBUG_EXECUTE_r(
4390                 PerlIO_printf(Perl_debug_log,
4391                     "%*s  CURLYM trying tail with matches=%"IVdf"...\n",
4392                     (int)(REPORT_CODE_OFF+(depth*2)),
4393                     "", (IV)ST.count)
4394                 );
4395             if (ST.c1 != CHRTEST_VOID
4396                     && UCHARAT(PL_reginput) != ST.c1
4397                     && UCHARAT(PL_reginput) != ST.c2)
4398             {
4399                 /* simulate B failing */
4400                 DEBUG_OPTIMISE_r(
4401                     PerlIO_printf(Perl_debug_log,
4402                         "%*s  CURLYM Fast bail c1=%"IVdf" c2=%"IVdf"\n",
4403                         (int)(REPORT_CODE_OFF+(depth*2)),"",
4404                         (IV)ST.c1,(IV)ST.c2
4405                 ));
4406                 state_num = CURLYM_B_fail;
4407                 goto reenter_switch;
4408             }
4409
4410             if (ST.me->flags) {
4411                 /* mark current A as captured */
4412                 I32 paren = ST.me->flags;
4413                 if (ST.count) {
4414                     PL_regoffs[paren].start
4415                         = HOPc(PL_reginput, -ST.alen) - PL_bostr;
4416                     PL_regoffs[paren].end = PL_reginput - PL_bostr;
4417                     /*dmq: *PL_reglastcloseparen = paren; */
4418                 }
4419                 else
4420                     PL_regoffs[paren].end = -1;
4421                 if (cur_eval && cur_eval->u.eval.close_paren &&
4422                     cur_eval->u.eval.close_paren == (U32)ST.me->flags)
4423                 {
4424                     if (ST.count)
4425                         goto fake_end;
4426                     else
4427                         sayNO;
4428                 }
4429             }
4430
4431             PUSH_STATE_GOTO(CURLYM_B, ST.B); /* match B */
4432             /* NOTREACHED */
4433
4434         case CURLYM_B_fail: /* just failed to match a B */
4435             REGCP_UNWIND(ST.cp);
4436             if (ST.minmod) {
4437                 if (ST.count == ARG2(ST.me) /* max */)
4438                     sayNO;
4439                 goto curlym_do_A; /* try to match a further A */
4440             }
4441             /* backtrack one A */
4442             if (ST.count == ARG1(ST.me) /* min */)
4443                 sayNO;
4444             ST.count--;
4445             locinput = HOPc(locinput, -ST.alen);
4446             goto curlym_do_B; /* try to match B */
4447
4448 #undef ST
4449 #define ST st->u.curly
4450
4451 #define CURLY_SETPAREN(paren, success) \
4452     if (paren) { \
4453         if (success) { \
4454             PL_regoffs[paren].start = HOPc(locinput, -1) - PL_bostr; \
4455             PL_regoffs[paren].end = locinput - PL_bostr; \
4456             *PL_reglastcloseparen = paren; \
4457         } \
4458         else \
4459             PL_regoffs[paren].end = -1; \
4460     }
4461
4462         case STAR:              /*  /A*B/ where A is width 1 */
4463             ST.paren = 0;
4464             ST.min = 0;
4465             ST.max = REG_INFTY;
4466             scan = NEXTOPER(scan);
4467             goto repeat;
4468         case PLUS:              /*  /A+B/ where A is width 1 */
4469             ST.paren = 0;
4470             ST.min = 1;
4471             ST.max = REG_INFTY;
4472             scan = NEXTOPER(scan);
4473             goto repeat;
4474         case CURLYN:            /*  /(A){m,n}B/ where A is width 1 */
4475             ST.paren = scan->flags;     /* Which paren to set */
4476             if (ST.paren > PL_regsize)
4477                 PL_regsize = ST.paren;
4478             if (ST.paren > *PL_reglastparen)
4479                 *PL_reglastparen = ST.paren;
4480             ST.min = ARG1(scan);  /* min to match */
4481             ST.max = ARG2(scan);  /* max to match */
4482             if (cur_eval && cur_eval->u.eval.close_paren &&
4483                 cur_eval->u.eval.close_paren == (U32)ST.paren) {
4484                 ST.min=1;
4485                 ST.max=1;
4486             }
4487             scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
4488             goto repeat;
4489         case CURLY:             /*  /A{m,n}B/ where A is width 1 */
4490             ST.paren = 0;
4491             ST.min = ARG1(scan);  /* min to match */
4492             ST.max = ARG2(scan);  /* max to match */
4493             scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
4494           repeat:
4495             /*
4496             * Lookahead to avoid useless match attempts
4497             * when we know what character comes next.
4498             *
4499             * Used to only do .*x and .*?x, but now it allows
4500             * for )'s, ('s and (?{ ... })'s to be in the way
4501             * of the quantifier and the EXACT-like node.  -- japhy
4502             */
4503
4504             if (ST.min > ST.max) /* XXX make this a compile-time check? */
4505                 sayNO;
4506             if (HAS_TEXT(next) || JUMPABLE(next)) {
4507                 U8 *s;
4508                 regnode *text_node = next;
4509
4510                 if (! HAS_TEXT(text_node))
4511                     FIND_NEXT_IMPT(text_node);
4512
4513                 if (! HAS_TEXT(text_node))
4514                     ST.c1 = ST.c2 = CHRTEST_VOID;
4515                 else {
4516                     if ( PL_regkind[OP(text_node)] != EXACT ) {
4517                         ST.c1 = ST.c2 = CHRTEST_VOID;
4518                         goto assume_ok_easy;
4519                     }
4520                     else
4521                         s = (U8*)STRING(text_node);
4522
4523                     /*  Currently we only get here when
4524
4525                         PL_rekind[OP(text_node)] == EXACT
4526
4527                         if this changes back then the macro for IS_TEXT and
4528                         friends need to change. */
4529                     if (!UTF) {
4530                         ST.c2 = ST.c1 = *s;
4531                         if (IS_TEXTF(text_node))
4532                             ST.c2 = PL_fold[ST.c1];
4533                         else if (IS_TEXTFL(text_node))
4534                             ST.c2 = PL_fold_locale[ST.c1];
4535                     }
4536                     else { /* UTF */
4537                         if (IS_TEXTF(text_node)) {
4538                              STRLEN ulen1, ulen2;
4539                              U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
4540                              U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
4541
4542                              to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
4543                              to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
4544 #ifdef EBCDIC
4545                              ST.c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXLEN, 0,
4546                                                     ckWARN(WARN_UTF8) ?
4547                                                     0 : UTF8_ALLOW_ANY);
4548                              ST.c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXLEN, 0,
4549                                                     ckWARN(WARN_UTF8) ?
4550                                                     0 : UTF8_ALLOW_ANY);
4551 #else
4552                              ST.c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
4553                                                     uniflags);
4554                              ST.c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
4555                                                     uniflags);
4556 #endif
4557                         }
4558                         else {
4559                             ST.c2 = ST.c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
4560                                                      uniflags);
4561                         }
4562                     }
4563                 }
4564             }
4565             else
4566                 ST.c1 = ST.c2 = CHRTEST_VOID;
4567         assume_ok_easy:
4568
4569             ST.A = scan;
4570             ST.B = next;
4571             PL_reginput = locinput;
4572             if (minmod) {
4573                 minmod = 0;
4574                 if (ST.min && regrepeat(rex, ST.A, ST.min, depth) < ST.min)
4575                     sayNO;
4576                 ST.count = ST.min;
4577                 locinput = PL_reginput;
4578                 REGCP_SET(ST.cp);
4579                 if (ST.c1 == CHRTEST_VOID)
4580                     goto curly_try_B_min;
4581
4582                 ST.oldloc = locinput;
4583
4584                 /* set ST.maxpos to the furthest point along the
4585                  * string that could possibly match */
4586                 if  (ST.max == REG_INFTY) {
4587                     ST.maxpos = PL_regeol - 1;
4588                     if (do_utf8)
4589                         while (UTF8_IS_CONTINUATION(*(U8*)ST.maxpos))
4590                             ST.maxpos--;
4591                 }
4592                 else if (do_utf8) {
4593                     int m = ST.max - ST.min;
4594                     for (ST.maxpos = locinput;
4595                          m >0 && ST.maxpos + UTF8SKIP(ST.maxpos) <= PL_regeol; m--)
4596                         ST.maxpos += UTF8SKIP(ST.maxpos);
4597                 }
4598                 else {
4599                     ST.maxpos = locinput + ST.max - ST.min;
4600                     if (ST.maxpos >= PL_regeol)
4601                         ST.maxpos = PL_regeol - 1;
4602                 }
4603                 goto curly_try_B_min_known;
4604
4605             }
4606             else {
4607                 ST.count = regrepeat(rex, ST.A, ST.max, depth);
4608                 locinput = PL_reginput;
4609                 if (ST.count < ST.min)
4610                     sayNO;
4611                 if ((ST.count > ST.min)
4612                     && (PL_regkind[OP(ST.B)] == EOL) && (OP(ST.B) != MEOL))
4613                 {
4614                     /* A{m,n} must come at the end of the string, there's
4615                      * no point in backing off ... */
4616                     ST.min = ST.count;
4617                     /* ...except that $ and \Z can match before *and* after
4618                        newline at the end.  Consider "\n\n" =~ /\n+\Z\n/.
4619                        We may back off by one in this case. */
4620                     if (UCHARAT(PL_reginput - 1) == '\n' && OP(ST.B) != EOS)
4621                         ST.min--;
4622                 }
4623                 REGCP_SET(ST.cp);
4624                 goto curly_try_B_max;
4625             }
4626             /* NOTREACHED */
4627
4628
4629         case CURLY_B_min_known_fail:
4630             /* failed to find B in a non-greedy match where c1,c2 valid */
4631             if (ST.paren && ST.count)
4632                 PL_regoffs[ST.paren].end = -1;
4633
4634             PL_reginput = locinput;     /* Could be reset... */
4635             REGCP_UNWIND(ST.cp);
4636             /* Couldn't or didn't -- move forward. */
4637             ST.oldloc = locinput;
4638             if (do_utf8)
4639                 locinput += UTF8SKIP(locinput);
4640             else
4641                 locinput++;
4642             ST.count++;
4643           curly_try_B_min_known:
4644              /* find the next place where 'B' could work, then call B */
4645             {
4646                 int n;
4647                 if (do_utf8) {
4648                     n = (ST.oldloc == locinput) ? 0 : 1;
4649                     if (ST.c1 == ST.c2) {
4650                         STRLEN len;
4651                         /* set n to utf8_distance(oldloc, locinput) */
4652                         while (locinput <= ST.maxpos &&
4653                                utf8n_to_uvchr((U8*)locinput,
4654                                               UTF8_MAXBYTES, &len,
4655                                               uniflags) != (UV)ST.c1) {
4656                             locinput += len;
4657                             n++;
4658                         }
4659                     }
4660                     else {
4661                         /* set n to utf8_distance(oldloc, locinput) */
4662                         while (locinput <= ST.maxpos) {
4663                             STRLEN len;
4664                             const UV c = utf8n_to_uvchr((U8*)locinput,
4665                                                   UTF8_MAXBYTES, &len,
4666                                                   uniflags);
4667                             if (c == (UV)ST.c1 || c == (UV)ST.c2)
4668                                 break;
4669                             locinput += len;
4670                             n++;
4671                         }
4672                     }
4673                 }
4674                 else {
4675                     if (ST.c1 == ST.c2) {
4676                         while (locinput <= ST.maxpos &&
4677                                UCHARAT(locinput) != ST.c1)
4678                             locinput++;
4679                     }
4680                     else {
4681                         while (locinput <= ST.maxpos
4682                                && UCHARAT(locinput) != ST.c1
4683                                && UCHARAT(locinput) != ST.c2)
4684                             locinput++;
4685                     }
4686                     n = locinput - ST.oldloc;
4687                 }
4688                 if (locinput > ST.maxpos)
4689                     sayNO;
4690                 /* PL_reginput == oldloc now */
4691                 if (n) {
4692                     ST.count += n;
4693                     if (regrepeat(rex, ST.A, n, depth) < n)
4694                         sayNO;
4695                 }
4696                 PL_reginput = locinput;
4697                 CURLY_SETPAREN(ST.paren, ST.count);
4698                 if (cur_eval && cur_eval->u.eval.close_paren &&
4699                     cur_eval->u.eval.close_paren == (U32)ST.paren) {
4700                     goto fake_end;
4701                 }
4702                 PUSH_STATE_GOTO(CURLY_B_min_known, ST.B);
4703             }
4704             /* NOTREACHED */
4705
4706
4707         case CURLY_B_min_fail:
4708             /* failed to find B in a non-greedy match where c1,c2 invalid */
4709             if (ST.paren && ST.count)
4710                 PL_regoffs[ST.paren].end = -1;
4711
4712             REGCP_UNWIND(ST.cp);
4713             /* failed -- move forward one */
4714             PL_reginput = locinput;
4715             if (regrepeat(rex, ST.A, 1, depth)) {
4716                 ST.count++;
4717                 locinput = PL_reginput;
4718                 if (ST.count <= ST.max || (ST.max == REG_INFTY &&
4719                         ST.count > 0)) /* count overflow ? */
4720                 {
4721                   curly_try_B_min:
4722                     CURLY_SETPAREN(ST.paren, ST.count);
4723                     if (cur_eval && cur_eval->u.eval.close_paren &&
4724                         cur_eval->u.eval.close_paren == (U32)ST.paren) {
4725                         goto fake_end;
4726                     }
4727                     PUSH_STATE_GOTO(CURLY_B_min, ST.B);
4728                 }
4729             }
4730             sayNO;
4731             /* NOTREACHED */
4732
4733
4734         curly_try_B_max:
4735             /* a successful greedy match: now try to match B */
4736             if (cur_eval && cur_eval->u.eval.close_paren &&
4737                 cur_eval->u.eval.close_paren == (U32)ST.paren) {
4738                 goto fake_end;
4739             }
4740             {
4741                 UV c = 0;
4742                 if (ST.c1 != CHRTEST_VOID)
4743                     c = do_utf8 ? utf8n_to_uvchr((U8*)PL_reginput,
4744                                            UTF8_MAXBYTES, 0, uniflags)
4745                                 : (UV) UCHARAT(PL_reginput);
4746                 /* If it could work, try it. */
4747                 if (ST.c1 == CHRTEST_VOID || c == (UV)ST.c1 || c == (UV)ST.c2) {
4748                     CURLY_SETPAREN(ST.paren, ST.count);
4749                     PUSH_STATE_GOTO(CURLY_B_max, ST.B);
4750                     /* NOTREACHED */
4751                 }
4752             }
4753             /* FALL THROUGH */
4754         case CURLY_B_max_fail:
4755             /* failed to find B in a greedy match */
4756             if (ST.paren && ST.count)
4757                 PL_regoffs[ST.paren].end = -1;
4758
4759             REGCP_UNWIND(ST.cp);
4760             /*  back up. */
4761             if (--ST.count < ST.min)
4762                 sayNO;
4763             PL_reginput = locinput = HOPc(locinput, -1);
4764             goto curly_try_B_max;
4765
4766 #undef ST
4767
4768         case END:
4769             fake_end:
4770             if (cur_eval) {
4771                 /* we've just finished A in /(??{A})B/; now continue with B */
4772                 I32 tmpix;
4773                 st->u.eval.toggle_reg_flags
4774                             = cur_eval->u.eval.toggle_reg_flags;
4775                 PL_reg_flags ^= st->u.eval.toggle_reg_flags;
4776
4777                 st->u.eval.prev_rex = rex;              /* inner */
4778                 SETREX(rex,cur_eval->u.eval.prev_rex);
4779                 rexi = RXi_GET(rex);
4780                 cur_curlyx = cur_eval->u.eval.prev_curlyx;
4781                 ReREFCNT_inc(rex);
4782                 st->u.eval.cp = regcppush(0);   /* Save *all* the positions. */
4783                 REGCP_SET(st->u.eval.lastcp);
4784                 PL_reginput = locinput;
4785
4786                 /* Restore parens of the outer rex without popping the
4787                  * savestack */
4788                 tmpix = PL_savestack_ix;
4789                 PL_savestack_ix = cur_eval->u.eval.lastcp;
4790                 regcppop(rex);
4791                 PL_savestack_ix = tmpix;
4792
4793                 st->u.eval.prev_eval = cur_eval;
4794                 cur_eval = cur_eval->u.eval.prev_eval;
4795                 DEBUG_EXECUTE_r(
4796                     PerlIO_printf(Perl_debug_log, "%*s  EVAL trying tail ... %"UVxf"\n",
4797                                       REPORT_CODE_OFF+depth*2, "",PTR2UV(cur_eval)););
4798                 if ( nochange_depth )
4799                     nochange_depth--;
4800
4801                 PUSH_YES_STATE_GOTO(EVAL_AB,
4802                         st->u.eval.prev_eval->u.eval.B); /* match B */
4803             }
4804
4805             if (locinput < reginfo->till) {
4806                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4807                                       "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
4808                                       PL_colors[4],
4809                                       (long)(locinput - PL_reg_starttry),
4810                                       (long)(reginfo->till - PL_reg_starttry),
4811                                       PL_colors[5]));
4812
4813                 sayNO_SILENT;           /* Cannot match: too short. */
4814             }
4815             PL_reginput = locinput;     /* put where regtry can find it */
4816             sayYES;                     /* Success! */
4817
4818         case SUCCEED: /* successful SUSPEND/UNLESSM/IFMATCH/CURLYM */
4819             DEBUG_EXECUTE_r(
4820             PerlIO_printf(Perl_debug_log,
4821                 "%*s  %ssubpattern success...%s\n",
4822                 REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5]));
4823             PL_reginput = locinput;     /* put where regtry can find it */
4824             sayYES;                     /* Success! */
4825
4826 #undef  ST
4827 #define ST st->u.ifmatch
4828
4829         case SUSPEND:   /* (?>A) */
4830             ST.wanted = 1;
4831             PL_reginput = locinput;
4832             goto do_ifmatch;
4833
4834         case UNLESSM:   /* -ve lookaround: (?!A), or with flags, (?<!A) */
4835             ST.wanted = 0;
4836             goto ifmatch_trivial_fail_test;
4837
4838         case IFMATCH:   /* +ve lookaround: (?=A), or with flags, (?<=A) */
4839             ST.wanted = 1;
4840           ifmatch_trivial_fail_test:
4841             if (scan->flags) {
4842                 char * const s = HOPBACKc(locinput, scan->flags);
4843                 if (!s) {
4844                     /* trivial fail */
4845                     if (logical) {
4846                         logical = 0;
4847                         sw = 1 - (bool)ST.wanted;
4848                     }
4849                     else if (ST.wanted)
4850                         sayNO;
4851                     next = scan + ARG(scan);
4852                     if (next == scan)
4853                         next = NULL;
4854                     break;
4855                 }
4856                 PL_reginput = s;
4857             }
4858             else
4859                 PL_reginput = locinput;
4860
4861           do_ifmatch:
4862             ST.me = scan;
4863             ST.logical = logical;
4864             /* execute body of (?...A) */
4865             PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)));
4866             /* NOTREACHED */
4867
4868         case IFMATCH_A_fail: /* body of (?...A) failed */
4869             ST.wanted = !ST.wanted;
4870             /* FALL THROUGH */
4871
4872         case IFMATCH_A: /* body of (?...A) succeeded */
4873             if (ST.logical) {
4874                 sw = (bool)ST.wanted;
4875             }
4876             else if (!ST.wanted)
4877                 sayNO;
4878
4879             if (OP(ST.me) == SUSPEND)
4880                 locinput = PL_reginput;
4881             else {
4882                 locinput = PL_reginput = st->locinput;
4883                 nextchr = UCHARAT(locinput);
4884             }
4885             scan = ST.me + ARG(ST.me);
4886             if (scan == ST.me)
4887                 scan = NULL;
4888             continue; /* execute B */
4889
4890 #undef ST
4891
4892         case LONGJMP:
4893             next = scan + ARG(scan);
4894             if (next == scan)
4895                 next = NULL;
4896             break;
4897         case COMMIT:
4898             reginfo->cutpoint = PL_regeol;
4899             /* FALLTHROUGH */
4900         case PRUNE:
4901             PL_reginput = locinput;
4902             if (!scan->flags)
4903                 sv_yes_mark = sv_commit = (SV*)rexi->data->data[ ARG( scan ) ];
4904             PUSH_STATE_GOTO(COMMIT_next,next);
4905             /* NOTREACHED */
4906         case COMMIT_next_fail:
4907             no_final = 1;
4908             /* FALLTHROUGH */
4909         case OPFAIL:
4910             sayNO;
4911             /* NOTREACHED */
4912
4913 #define ST st->u.mark
4914         case MARKPOINT:
4915             ST.prev_mark = mark_state;
4916             ST.mark_name = sv_commit = sv_yes_mark
4917                 = (SV*)rexi->data->data[ ARG( scan ) ];
4918             mark_state = st;
4919             ST.mark_loc = PL_reginput = locinput;
4920             PUSH_YES_STATE_GOTO(MARKPOINT_next,next);
4921             /* NOTREACHED */
4922         case MARKPOINT_next:
4923             mark_state = ST.prev_mark;
4924             sayYES;
4925             /* NOTREACHED */
4926         case MARKPOINT_next_fail:
4927             if (popmark && sv_eq(ST.mark_name,popmark))
4928             {
4929                 if (ST.mark_loc > startpoint)
4930                     reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
4931                 popmark = NULL; /* we found our mark */
4932                 sv_commit = ST.mark_name;
4933
4934                 DEBUG_EXECUTE_r({
4935                         PerlIO_printf(Perl_debug_log,
4936                             "%*s  %ssetting cutpoint to mark:%"SVf"...%s\n",
4937                             REPORT_CODE_OFF+depth*2, "",
4938                             PL_colors[4], SVfARG(sv_commit), PL_colors[5]);
4939                 });
4940             }
4941             mark_state = ST.prev_mark;
4942             sv_yes_mark = mark_state ?
4943                 mark_state->u.mark.mark_name : NULL;
4944             sayNO;
4945             /* NOTREACHED */
4946         case SKIP:
4947             PL_reginput = locinput;
4948             if (scan->flags) {
4949                 /* (*SKIP) : if we fail we cut here*/
4950                 ST.mark_name = NULL;
4951                 ST.mark_loc = locinput;
4952                 PUSH_STATE_GOTO(SKIP_next,next);
4953             } else {
4954                 /* (*SKIP:NAME) : if there is a (*MARK:NAME) fail where it was,
4955                    otherwise do nothing.  Meaning we need to scan
4956                  */
4957                 regmatch_state *cur = mark_state;
4958                 SV *find = (SV*)rexi->data->data[ ARG( scan ) ];
4959
4960                 while (cur) {
4961                     if ( sv_eq( cur->u.mark.mark_name,
4962                                 find ) )
4963                     {
4964                         ST.mark_name = find;
4965                         PUSH_STATE_GOTO( SKIP_next, next );
4966                     }
4967                     cur = cur->u.mark.prev_mark;
4968                 }
4969             }
4970             /* Didn't find our (*MARK:NAME) so ignore this (*SKIP:NAME) */
4971             break;
4972         case SKIP_next_fail:
4973             if (ST.mark_name) {
4974                 /* (*CUT:NAME) - Set up to search for the name as we
4975                    collapse the stack*/
4976                 popmark = ST.mark_name;
4977             } else {
4978                 /* (*CUT) - No name, we cut here.*/
4979                 if (ST.mark_loc > startpoint)
4980                     reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
4981                 /* but we set sv_commit to latest mark_name if there
4982                    is one so they can test to see how things lead to this
4983                    cut */
4984                 if (mark_state)
4985                     sv_commit=mark_state->u.mark.mark_name;
4986             }
4987             no_final = 1;
4988             sayNO;
4989             /* NOTREACHED */
4990 #undef ST
4991         case FOLDCHAR:
4992             n = ARG(scan);
4993             if ( n == (U32)what_len_TRICKYFOLD(locinput,do_utf8,ln) ) {
4994                 locinput += ln;
4995             } else if ( 0xDF == n && !do_utf8 && !UTF ) {
4996                 sayNO;
4997             } else  {
4998                 U8 folded[UTF8_MAXBYTES_CASE+1];
4999                 STRLEN foldlen;
5000                 const char * const l = locinput;
5001                 char *e = PL_regeol;
5002                 to_uni_fold(n, folded, &foldlen);
5003
5004                 if (ibcmp_utf8((const char*) folded, 0,  foldlen, 1,
5005                                l, &e, 0,  do_utf8)) {
5006                         sayNO;
5007                 }
5008                 locinput = e;
5009             }
5010             nextchr = UCHARAT(locinput);
5011             break;
5012         case LNBREAK:
5013             if ((n=is_LNBREAK(locinput,do_utf8))) {
5014                 locinput += n;
5015                 nextchr = UCHARAT(locinput);
5016             } else
5017                 sayNO;
5018             break;
5019
5020 #define CASE_CLASS(nAmE)                              \
5021         case nAmE:                                    \
5022             if ((n=is_##nAmE(locinput,do_utf8))) {    \
5023                 locinput += n;                        \
5024                 nextchr = UCHARAT(locinput);          \
5025             } else                                    \
5026                 sayNO;                                \
5027             break;                                    \
5028         case N##nAmE:                                 \
5029             if ((n=is_##nAmE(locinput,do_utf8))) {    \
5030                 sayNO;                                \
5031             } else {                                  \
5032                 locinput += UTF8SKIP(locinput);       \
5033                 nextchr = UCHARAT(locinput);          \
5034             }                                         \
5035             break
5036
5037         CASE_CLASS(VERTWS);
5038         CASE_CLASS(HORIZWS);
5039 #undef CASE_CLASS
5040
5041         default:
5042             PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
5043                           PTR2UV(scan), OP(scan));
5044             Perl_croak(aTHX_ "regexp memory corruption");
5045
5046         } /* end switch */
5047
5048         /* switch break jumps here */
5049         scan = next; /* prepare to execute the next op and ... */
5050         continue;    /* ... jump back to the top, reusing st */
5051         /* NOTREACHED */
5052
5053       push_yes_state:
5054         /* push a state that backtracks on success */
5055         st->u.yes.prev_yes_state = yes_state;
5056         yes_state = st;
5057         /* FALL THROUGH */
5058       push_state:
5059         /* push a new regex state, then continue at scan  */
5060         {
5061             regmatch_state *newst;
5062
5063             DEBUG_STACK_r({
5064                 regmatch_state *cur = st;
5065                 regmatch_state *curyes = yes_state;
5066                 int curd = depth;
5067                 regmatch_slab *slab = PL_regmatch_slab;
5068                 for (;curd > -1;cur--,curd--) {
5069                     if (cur < SLAB_FIRST(slab)) {
5070                         slab = slab->prev;
5071                         cur = SLAB_LAST(slab);
5072                     }
5073                     PerlIO_printf(Perl_error_log, "%*s#%-3d %-10s %s\n",
5074                         REPORT_CODE_OFF + 2 + depth * 2,"",
5075                         curd, PL_reg_name[cur->resume_state],
5076                         (curyes == cur) ? "yes" : ""
5077                     );
5078                     if (curyes == cur)
5079                         curyes = cur->u.yes.prev_yes_state;
5080                 }
5081             } else
5082                 DEBUG_STATE_pp("push")
5083             );
5084             depth++;
5085             st->locinput = locinput;
5086             newst = st+1;
5087             if (newst >  SLAB_LAST(PL_regmatch_slab))
5088                 newst = S_push_slab(aTHX);
5089             PL_regmatch_state = newst;
5090
5091             locinput = PL_reginput;
5092             nextchr = UCHARAT(locinput);
5093             st = newst;
5094             continue;
5095             /* NOTREACHED */
5096         }
5097     }
5098
5099     /*
5100     * We get here only if there's trouble -- normally "case END" is
5101     * the terminating point.
5102     */
5103     Perl_croak(aTHX_ "corrupted regexp pointers");
5104     /*NOTREACHED*/
5105     sayNO;
5106
5107 yes:
5108     if (yes_state) {
5109         /* we have successfully completed a subexpression, but we must now
5110          * pop to the state marked by yes_state and continue from there */
5111         assert(st != yes_state);
5112 #ifdef DEBUGGING
5113         while (st != yes_state) {
5114             st--;
5115             if (st < SLAB_FIRST(PL_regmatch_slab)) {
5116                 PL_regmatch_slab = PL_regmatch_slab->prev;
5117                 st = SLAB_LAST(PL_regmatch_slab);
5118             }
5119             DEBUG_STATE_r({
5120                 if (no_final) {
5121                     DEBUG_STATE_pp("pop (no final)");
5122                 } else {
5123                     DEBUG_STATE_pp("pop (yes)");
5124                 }
5125             });
5126             depth--;
5127         }
5128 #else
5129         while (yes_state < SLAB_FIRST(PL_regmatch_slab)
5130             || yes_state > SLAB_LAST(PL_regmatch_slab))
5131         {
5132             /* not in this slab, pop slab */
5133             depth -= (st - SLAB_FIRST(PL_regmatch_slab) + 1);
5134             PL_regmatch_slab = PL_regmatch_slab->prev;
5135             st = SLAB_LAST(PL_regmatch_slab);
5136         }
5137         depth -= (st - yes_state);
5138 #endif
5139         st = yes_state;
5140         yes_state = st->u.yes.prev_yes_state;
5141         PL_regmatch_state = st;
5142
5143         if (no_final) {
5144             locinput= st->locinput;
5145             nextchr = UCHARAT(locinput);
5146         }
5147         state_num = st->resume_state + no_final;
5148         goto reenter_switch;
5149     }
5150
5151     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
5152                           PL_colors[4], PL_colors[5]));
5153
5154     if (PL_reg_eval_set) {
5155         /* each successfully executed (?{...}) block does the equivalent of
5156          *   local $^R = do {...}
5157          * When popping the save stack, all these locals would be undone;
5158          * bypass this by setting the outermost saved $^R to the latest
5159          * value */
5160         if (oreplsv != GvSV(PL_replgv))
5161             sv_setsv(oreplsv, GvSV(PL_replgv));
5162     }
5163     result = 1;
5164     goto final_exit;
5165
5166 no:
5167     DEBUG_EXECUTE_r(
5168         PerlIO_printf(Perl_debug_log,
5169             "%*s  %sfailed...%s\n",
5170             REPORT_CODE_OFF+depth*2, "",
5171             PL_colors[4], PL_colors[5])
5172         );
5173
5174 no_silent:
5175     if (no_final) {
5176         if (yes_state) {
5177             goto yes;
5178         } else {
5179             goto final_exit;
5180         }
5181     }
5182     if (depth) {
5183         /* there's a previous state to backtrack to */
5184         st--;
5185         if (st < SLAB_FIRST(PL_regmatch_slab)) {
5186             PL_regmatch_slab = PL_regmatch_slab->prev;
5187             st = SLAB_LAST(PL_regmatch_slab);
5188         }
5189         PL_regmatch_state = st;
5190         locinput= st->locinput;
5191         nextchr = UCHARAT(locinput);
5192
5193         DEBUG_STATE_pp("pop");
5194         depth--;
5195         if (yes_state == st)
5196             yes_state = st->u.yes.prev_yes_state;
5197
5198         state_num = st->resume_state + 1; /* failure = success + 1 */
5199         goto reenter_switch;
5200     }
5201     result = 0;
5202
5203   final_exit:
5204     if (rex->intflags & PREGf_VERBARG_SEEN) {
5205         SV *sv_err = get_sv("REGERROR", 1);
5206         SV *sv_mrk = get_sv("REGMARK", 1);
5207         if (result) {
5208             sv_commit = &PL_sv_no;
5209             if (!sv_yes_mark)
5210                 sv_yes_mark = &PL_sv_yes;
5211         } else {
5212             if (!sv_commit)
5213                 sv_commit = &PL_sv_yes;
5214             sv_yes_mark = &PL_sv_no;
5215         }
5216         sv_setsv(sv_err, sv_commit);
5217         sv_setsv(sv_mrk, sv_yes_mark);
5218     }
5219
5220     /* clean up; in particular, free all slabs above current one */
5221     LEAVE_SCOPE(oldsave);
5222
5223     return result;
5224 }
5225
5226 /*
5227  - regrepeat - repeatedly match something simple, report how many
5228  */
5229 /*
5230  * [This routine now assumes that it will only match on things of length 1.
5231  * That was true before, but now we assume scan - reginput is the count,
5232  * rather than incrementing count on every character.  [Er, except utf8.]]
5233  */
5234 STATIC I32
5235 S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
5236 {
5237     dVAR;
5238     register char *scan;
5239     register I32 c;
5240     register char *loceol = PL_regeol;
5241     register I32 hardcount = 0;
5242     register bool do_utf8 = PL_reg_match_utf8;
5243 #ifndef DEBUGGING
5244     PERL_UNUSED_ARG(depth);
5245 #endif
5246
5247     scan = PL_reginput;
5248     if (max == REG_INFTY)
5249         max = I32_MAX;
5250     else if (max < loceol - scan)
5251         loceol = scan + max;
5252     switch (OP(p)) {
5253     case REG_ANY:
5254         if (do_utf8) {
5255             loceol = PL_regeol;
5256             while (scan < loceol && hardcount < max && *scan != '\n') {
5257                 scan += UTF8SKIP(scan);
5258                 hardcount++;
5259             }
5260         } else {
5261             while (scan < loceol && *scan != '\n')
5262                 scan++;
5263         }
5264         break;
5265     case SANY:
5266         if (do_utf8) {
5267             loceol = PL_regeol;
5268             while (scan < loceol && hardcount < max) {
5269                 scan += UTF8SKIP(scan);
5270                 hardcount++;
5271             }
5272         }
5273         else
5274             scan = loceol;
5275         break;
5276     case CANY:
5277         scan = loceol;
5278         break;
5279     case EXACT:         /* length of string is 1 */
5280         c = (U8)*STRING(p);
5281         while (scan < loceol && UCHARAT(scan) == c)
5282             scan++;
5283         break;
5284     case EXACTF:        /* length of string is 1 */
5285         c = (U8)*STRING(p);
5286         while (scan < loceol &&
5287                (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
5288             scan++;
5289         break;
5290     case EXACTFL:       /* length of string is 1 */
5291         PL_reg_flags |= RF_tainted;
5292         c = (U8)*STRING(p);
5293         while (scan < loceol &&
5294                (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
5295             scan++;
5296         break;
5297     case ANYOF:
5298         if (do_utf8) {
5299             loceol = PL_regeol;
5300             while (hardcount < max && scan < loceol &&
5301                    reginclass(prog, p, (U8*)scan, 0, do_utf8)) {
5302                 scan += UTF8SKIP(scan);
5303                 hardcount++;
5304             }
5305         } else {
5306             while (scan < loceol && REGINCLASS(prog, p, (U8*)scan))
5307                 scan++;
5308         }
5309         break;
5310     case ALNUM:
5311         if (do_utf8) {
5312             loceol = PL_regeol;
5313             LOAD_UTF8_CHARCLASS_ALNUM();
5314             while (hardcount < max && scan < loceol &&
5315                    swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
5316                 scan += UTF8SKIP(scan);
5317                 hardcount++;
5318             }
5319         } else {
5320             while (scan < loceol && isALNUM(*scan))
5321                 scan++;
5322         }
5323         break;
5324     case ALNUML:
5325         PL_reg_flags |= RF_tainted;
5326         if (do_utf8) {
5327             loceol = PL_regeol;
5328             while (hardcount < max && scan < loceol &&
5329                    isALNUM_LC_utf8((U8*)scan)) {
5330                 scan += UTF8SKIP(scan);
5331                 hardcount++;
5332             }
5333         } else {
5334             while (scan < loceol && isALNUM_LC(*scan))
5335                 scan++;
5336         }
5337         break;
5338     case NALNUM:
5339         if (do_utf8) {
5340             loceol = PL_regeol;
5341             LOAD_UTF8_CHARCLASS_ALNUM();
5342             while (hardcount < max && scan < loceol &&
5343                    !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
5344                 scan += UTF8SKIP(scan);
5345                 hardcount++;
5346             }
5347         } else {
5348             while (scan < loceol && !isALNUM(*scan))
5349                 scan++;
5350         }
5351         break;
5352     case NALNUML:
5353         PL_reg_flags |= RF_tainted;
5354         if (do_utf8) {
5355             loceol = PL_regeol;
5356             while (hardcount < max && scan < loceol &&
5357                    !isALNUM_LC_utf8((U8*)scan)) {
5358                 scan += UTF8SKIP(scan);
5359                 hardcount++;
5360             }
5361         } else {
5362             while (scan < loceol && !isALNUM_LC(*scan))
5363                 scan++;
5364         }
5365         break;
5366     case SPACE:
5367         if (do_utf8) {
5368             loceol = PL_regeol;
5369             LOAD_UTF8_CHARCLASS_SPACE();
5370             while (hardcount < max && scan < loceol &&
5371                    (*scan == ' ' ||
5372                     swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
5373                 scan += UTF8SKIP(scan);
5374                 hardcount++;
5375             }
5376         } else {
5377             while (scan < loceol && isSPACE(*scan))
5378                 scan++;
5379         }
5380         break;
5381     case SPACEL:
5382         PL_reg_flags |= RF_tainted;
5383         if (do_utf8) {
5384             loceol = PL_regeol;
5385             while (hardcount < max && scan < loceol &&
5386                    (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
5387                 scan += UTF8SKIP(scan);
5388                 hardcount++;
5389             }
5390         } else {
5391             while (scan < loceol && isSPACE_LC(*scan))
5392                 scan++;
5393         }
5394         break;
5395     case NSPACE:
5396         if (do_utf8) {
5397             loceol = PL_regeol;
5398             LOAD_UTF8_CHARCLASS_SPACE();
5399             while (hardcount < max && scan < loceol &&
5400                    !(*scan == ' ' ||
5401                      swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
5402                 scan += UTF8SKIP(scan);
5403                 hardcount++;
5404             }
5405         } else {
5406             while (scan < loceol && !isSPACE(*scan))
5407                 scan++;
5408             break;
5409         }
5410     case NSPACEL:
5411         PL_reg_flags |= RF_tainted;
5412         if (do_utf8) {
5413             loceol = PL_regeol;
5414             while (hardcount < max && scan < loceol &&
5415                    !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
5416                 scan += UTF8SKIP(scan);
5417                 hardcount++;
5418             }
5419         } else {
5420             while (scan < loceol && !isSPACE_LC(*scan))
5421                 scan++;
5422         }
5423         break;
5424     case DIGIT:
5425         if (do_utf8) {
5426             loceol = PL_regeol;
5427             LOAD_UTF8_CHARCLASS_DIGIT();
5428             while (hardcount < max && scan < loceol &&
5429                    swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
5430                 scan += UTF8SKIP(scan);
5431                 hardcount++;
5432             }
5433         } else {
5434             while (scan < loceol && isDIGIT(*scan))
5435                 scan++;
5436         }
5437         break;
5438     case NDIGIT:
5439         if (do_utf8) {
5440             loceol = PL_regeol;
5441             LOAD_UTF8_CHARCLASS_DIGIT();
5442             while (hardcount < max && scan < loceol &&
5443                    !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
5444                 scan += UTF8SKIP(scan);
5445                 hardcount++;
5446             }
5447         } else {
5448             while (scan < loceol && !isDIGIT(*scan))
5449                 scan++;
5450         }
5451     case LNBREAK:
5452         if (do_utf8) {
5453             loceol = PL_regeol;
5454             while (hardcount < max && scan < loceol && (c=is_LNBREAK_utf8(scan))) {
5455                 scan += c;
5456                 hardcount++;
5457             }
5458         } else {
5459             /*
5460               LNBREAK can match two latin chars, which is ok,
5461               because we have a null terminated string, but we
5462               have to use hardcount in this situation
5463             */
5464             while (scan < loceol && (c=is_LNBREAK_latin1(scan)))  {
5465                 scan+=c;
5466                 hardcount++;
5467             }
5468         }
5469         break;
5470     case HORIZWS:
5471         if (do_utf8) {
5472             loceol = PL_regeol;
5473             while (hardcount < max && scan < loceol && (c=is_HORIZWS_utf8(scan))) {
5474                 scan += c;
5475                 hardcount++;
5476             }
5477         } else {
5478             while (scan < loceol && is_HORIZWS_latin1(scan))
5479                 scan++;
5480         }
5481         break;
5482     case NHORIZWS:
5483         if (do_utf8) {
5484             loceol = PL_regeol;
5485             while (hardcount < max && scan < loceol && !is_HORIZWS_utf8(scan)) {
5486                 scan += UTF8SKIP(scan);
5487                 hardcount++;
5488             }
5489         } else {
5490             while (scan < loceol && !is_HORIZWS_latin1(scan))
5491                 scan++;
5492
5493         }
5494         break;
5495     case VERTWS:
5496         if (do_utf8) {
5497             loceol = PL_regeol;
5498             while (hardcount < max && scan < loceol && (c=is_VERTWS_utf8(scan))) {
5499                 scan += c;
5500                 hardcount++;
5501             }
5502         } else {
5503             while (scan < loceol && is_VERTWS_latin1(scan))
5504                 scan++;
5505
5506         }
5507         break;
5508     case NVERTWS:
5509         if (do_utf8) {
5510             loceol = PL_regeol;
5511             while (hardcount < max && scan < loceol && !is_VERTWS_utf8(scan)) {
5512                 scan += UTF8SKIP(scan);
5513                 hardcount++;
5514             }
5515         } else {
5516             while (scan < loceol && !is_VERTWS_latin1(scan))
5517                 scan++;
5518
5519         }
5520         break;
5521
5522     default:            /* Called on something of 0 width. */
5523         break;          /* So match right here or not at all. */
5524     }
5525
5526     if (hardcount)
5527         c = hardcount;
5528     else
5529         c = scan - PL_reginput;
5530     PL_reginput = scan;
5531
5532     DEBUG_r({
5533         GET_RE_DEBUG_FLAGS_DECL;
5534         DEBUG_EXECUTE_r({
5535             SV * const prop = sv_newmortal();
5536             regprop(prog, prop, p);
5537             PerlIO_printf(Perl_debug_log,
5538                         "%*s  %s can match %"IVdf" times out of %"IVdf"...\n",
5539                         REPORT_CODE_OFF + depth*2, "", SvPVX_const(prop),(IV)c,(IV)max);
5540         });
5541     });
5542
5543     return(c);
5544 }
5545
5546
5547 #if !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION)
5548 /*
5549 - regclass_swash - prepare the utf8 swash
5550 */
5551
5552 SV *
5553 Perl_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bool doinit, SV** listsvp, SV **altsvp)
5554 {
5555     dVAR;
5556     SV *sw  = NULL;
5557     SV *si  = NULL;
5558     SV *alt = NULL;
5559     RXi_GET_DECL(prog,progi);
5560     const struct reg_data * const data = prog ? progi->data : NULL;
5561
5562     if (data && data->count) {
5563         const U32 n = ARG(node);
5564
5565         if (data->what[n] == 's') {
5566             SV * const rv = (SV*)data->data[n];
5567             AV * const av = (AV*)SvRV((SV*)rv);
5568             SV **const ary = AvARRAY(av);
5569             SV **a, **b;
5570
5571             /* See the end of regcomp.c:S_regclass() for
5572              * documentation of these array elements. */
5573
5574             si = *ary;
5575             a  = SvROK(ary[1]) ? &ary[1] : NULL;
5576             b  = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : NULL;
5577
5578             if (a)
5579                 sw = *a;
5580             else if (si && doinit) {
5581                 sw = swash_init("utf8", "", si, 1, 0);
5582                 (void)av_store(av, 1, sw);
5583             }
5584             if (b)
5585                 alt = *b;
5586         }
5587     }
5588
5589     if (listsvp)
5590         *listsvp = si;
5591     if (altsvp)
5592         *altsvp  = alt;
5593
5594     return sw;
5595 }
5596 #endif
5597
5598 /*
5599  - reginclass - determine if a character falls into a character class
5600
5601   The n is the ANYOF regnode, the p is the target string, lenp
5602   is pointer to the maximum length of how far to go in the p
5603   (if the lenp is zero, UTF8SKIP(p) is used),
5604   do_utf8 tells whether the target string is in UTF-8.
5605
5606  */
5607
5608 STATIC bool
5609 S_reginclass(pTHX_ const regexp *prog, register const regnode *n, register const U8* p, STRLEN* lenp, register bool do_utf8)
5610 {
5611     dVAR;
5612     const char flags = ANYOF_FLAGS(n);
5613     bool match = FALSE;
5614     UV c = *p;
5615     STRLEN len = 0;
5616     STRLEN plen;
5617
5618     if (do_utf8 && !UTF8_IS_INVARIANT(c)) {
5619         c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &len,
5620                 (UTF8_ALLOW_DEFAULT & UTF8_ALLOW_ANYUV) | UTF8_CHECK_ONLY);
5621                 /* see [perl #37836] for UTF8_ALLOW_ANYUV */
5622         if (len == (STRLEN)-1)
5623             Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
5624     }
5625
5626     plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
5627     if (do_utf8 || (flags & ANYOF_UNICODE)) {
5628         if (lenp)
5629             *lenp = 0;
5630         if (do_utf8 && !ANYOF_RUNTIME(n)) {
5631             if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
5632                 match = TRUE;
5633         }
5634         if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
5635             match = TRUE;
5636         if (!match) {
5637             AV *av;
5638             SV * const sw = regclass_swash(prog, n, TRUE, 0, (SV**)&av);
5639
5640             if (sw) {
5641                 if (swash_fetch(sw, p, do_utf8))
5642                     match = TRUE;
5643                 else if (flags & ANYOF_FOLD) {
5644                     if (!match && lenp && av) {
5645                         I32 i;
5646                         for (i = 0; i <= av_len(av); i++) {
5647                             SV* const sv = *av_fetch(av, i, FALSE);
5648                             STRLEN len;
5649                             const char * const s = SvPV_const(sv, len);
5650
5651                             if (len <= plen && memEQ(s, (char*)p, len)) {
5652                                 *lenp = len;
5653                                 match = TRUE;
5654                                 break;
5655                             }
5656                         }
5657                     }
5658                     if (!match) {
5659                         U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
5660                         STRLEN tmplen;
5661
5662                         to_utf8_fold(p, tmpbuf, &tmplen);
5663                         if (swash_fetch(sw, tmpbuf, do_utf8))
5664                             match = TRUE;
5665                     }
5666                 }
5667             }
5668         }
5669         if (match && lenp && *lenp == 0)
5670             *lenp = UNISKIP(NATIVE_TO_UNI(c));
5671     }
5672     if (!match && c < 256) {
5673         if (ANYOF_BITMAP_TEST(n, c))
5674             match = TRUE;
5675         else if (flags & ANYOF_FOLD) {
5676             U8 f;
5677
5678             if (flags & ANYOF_LOCALE) {
5679                 PL_reg_flags |= RF_tainted;
5680                 f = PL_fold_locale[c];
5681             }
5682             else
5683                 f = PL_fold[c];
5684             if (f != c && ANYOF_BITMAP_TEST(n, f))
5685                 match = TRUE;
5686         }
5687
5688         if (!match && (flags & ANYOF_CLASS)) {
5689             PL_reg_flags |= RF_tainted;
5690             if (
5691                 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM)   &&  isALNUM_LC(c))  ||
5692                 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM)  && !isALNUM_LC(c))  ||
5693                 (ANYOF_CLASS_TEST(n, ANYOF_SPACE)   &&  isSPACE_LC(c))  ||
5694                 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE)  && !isSPACE_LC(c))  ||
5695                 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT)   &&  isDIGIT_LC(c))  ||
5696                 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT)  && !isDIGIT_LC(c))  ||
5697                 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC)  &&  isALNUMC_LC(c)) ||
5698                 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
5699                 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA)   &&  isALPHA_LC(c))  ||
5700                 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA)  && !isALPHA_LC(c))  ||
5701                 (ANYOF_CLASS_TEST(n, ANYOF_ASCII)   &&  isASCII(c))     ||
5702                 (ANYOF_CLASS_TEST(n, ANYOF_NASCII)  && !isASCII(c))     ||
5703                 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL)   &&  isCNTRL_LC(c))  ||
5704                 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL)  && !isCNTRL_LC(c))  ||
5705                 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH)   &&  isGRAPH_LC(c))  ||
5706                 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH)  && !isGRAPH_LC(c))  ||
5707                 (ANYOF_CLASS_TEST(n, ANYOF_LOWER)   &&  isLOWER_LC(c))  ||
5708                 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER)  && !isLOWER_LC(c))  ||
5709                 (ANYOF_CLASS_TEST(n, ANYOF_PRINT)   &&  isPRINT_LC(c))  ||
5710                 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT)  && !isPRINT_LC(c))  ||
5711                 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT)   &&  isPUNCT_LC(c))  ||
5712                 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT)  && !isPUNCT_LC(c))  ||
5713                 (ANYOF_CLASS_TEST(n, ANYOF_UPPER)   &&  isUPPER_LC(c))  ||
5714                 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER)  && !isUPPER_LC(c))  ||
5715                 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT)  &&  isXDIGIT(c))    ||
5716                 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c))    ||
5717                 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC)  &&  isPSXSPC(c))    ||
5718                 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c))    ||
5719                 (ANYOF_CLASS_TEST(n, ANYOF_BLANK)   &&  isBLANK(c))     ||
5720                 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK)  && !isBLANK(c))
5721                 ) /* How's that for a conditional? */
5722             {
5723                 match = TRUE;
5724             }
5725         }
5726     }
5727
5728     return (flags & ANYOF_INVERT) ? !match : match;
5729 }
5730
5731 STATIC U8 *
5732 S_reghop3(U8 *s, I32 off, const U8* lim)
5733 {
5734     dVAR;
5735     if (off >= 0) {
5736         while (off-- && s < lim) {
5737             /* XXX could check well-formedness here */
5738             s += UTF8SKIP(s);
5739         }
5740     }
5741     else {
5742         while (off++ && s > lim) {
5743             s--;
5744             if (UTF8_IS_CONTINUED(*s)) {
5745                 while (s > lim && UTF8_IS_CONTINUATION(*s))
5746                     s--;
5747             }
5748             /* XXX could check well-formedness here */
5749         }
5750     }
5751     return s;
5752 }
5753
5754 #ifdef XXX_dmq
5755 /* there are a bunch of places where we use two reghop3's that should
5756    be replaced with this routine. but since thats not done yet
5757    we ifdef it out - dmq
5758 */
5759 STATIC U8 *
5760 S_reghop4(U8 *s, I32 off, const U8* llim, const U8* rlim)
5761 {
5762     dVAR;
5763     if (off >= 0) {
5764         while (off-- && s < rlim) {
5765             /* XXX could check well-formedness here */
5766             s += UTF8SKIP(s);
5767         }
5768     }
5769     else {
5770         while (off++ && s > llim) {
5771             s--;
5772             if (UTF8_IS_CONTINUED(*s)) {
5773                 while (s > llim && UTF8_IS_CONTINUATION(*s))
5774                     s--;
5775             }
5776             /* XXX could check well-formedness here */
5777         }
5778     }
5779     return s;
5780 }
5781 #endif
5782
5783 STATIC U8 *
5784 S_reghopmaybe3(U8* s, I32 off, const U8* lim)
5785 {
5786     dVAR;
5787     if (off >= 0) {
5788         while (off-- && s < lim) {
5789             /* XXX could check well-formedness here */
5790             s += UTF8SKIP(s);
5791         }
5792         if (off >= 0)
5793             return NULL;
5794     }
5795     else {
5796         while (off++ && s > lim) {
5797             s--;
5798             if (UTF8_IS_CONTINUED(*s)) {
5799                 while (s > lim && UTF8_IS_CONTINUATION(*s))
5800                     s--;
5801             }
5802             /* XXX could check well-formedness here */
5803         }
5804         if (off <= 0)
5805             return NULL;
5806     }
5807     return s;
5808 }
5809
5810 static void
5811 restore_pos(pTHX_ void *arg)
5812 {
5813     dVAR;
5814     regexp * const rex = (regexp *)arg;
5815     if (PL_reg_eval_set) {
5816         if (PL_reg_oldsaved) {
5817             rex->subbeg = PL_reg_oldsaved;
5818             rex->sublen = PL_reg_oldsavedlen;
5819 #ifdef PERL_OLD_COPY_ON_WRITE
5820             rex->saved_copy = PL_nrs;
5821 #endif
5822             RX_MATCH_COPIED_on(rex);
5823         }
5824         PL_reg_magic->mg_len = PL_reg_oldpos;
5825         PL_reg_eval_set = 0;
5826         PL_curpm = PL_reg_oldcurpm;
5827     }
5828 }
5829
5830 STATIC void
5831 S_to_utf8_substr(pTHX_ register regexp *prog)
5832 {
5833     int i = 1;
5834     do {
5835         if (prog->substrs->data[i].substr
5836             && !prog->substrs->data[i].utf8_substr) {
5837             SV* const sv = newSVsv(prog->substrs->data[i].substr);
5838             prog->substrs->data[i].utf8_substr = sv;
5839             sv_utf8_upgrade(sv);
5840             if (SvVALID(prog->substrs->data[i].substr)) {
5841                 const U8 flags = BmFLAGS(prog->substrs->data[i].substr);
5842                 if (flags & FBMcf_TAIL) {
5843                     /* Trim the trailing \n that fbm_compile added last
5844                        time.  */
5845                     SvCUR_set(sv, SvCUR(sv) - 1);
5846                     /* Whilst this makes the SV technically "invalid" (as its
5847                        buffer is no longer followed by "\0") when fbm_compile()
5848                        adds the "\n" back, a "\0" is restored.  */
5849                 }
5850                 fbm_compile(sv, flags);
5851             }
5852             if (prog->substrs->data[i].substr == prog->check_substr)
5853                 prog->check_utf8 = sv;
5854         }
5855     } while (i--);
5856 }
5857
5858 STATIC void
5859 S_to_byte_substr(pTHX_ register regexp *prog)
5860 {
5861     dVAR;
5862     int i = 1;
5863     do {
5864         if (prog->substrs->data[i].utf8_substr
5865             && !prog->substrs->data[i].substr) {
5866             SV* sv = newSVsv(prog->substrs->data[i].utf8_substr);
5867             if (sv_utf8_downgrade(sv, TRUE)) {
5868                 if (SvVALID(prog->substrs->data[i].utf8_substr)) {
5869                     const U8 flags
5870                         = BmFLAGS(prog->substrs->data[i].utf8_substr);
5871                     if (flags & FBMcf_TAIL) {
5872                         /* Trim the trailing \n that fbm_compile added last
5873                            time.  */
5874                         SvCUR_set(sv, SvCUR(sv) - 1);
5875                     }
5876                     fbm_compile(sv, flags);
5877                 }
5878             } else {
5879                 SvREFCNT_dec(sv);
5880                 sv = &PL_sv_undef;
5881             }
5882             prog->substrs->data[i].substr = sv;
5883             if (prog->substrs->data[i].utf8_substr == prog->check_utf8)
5884                 prog->check_substr = sv;
5885         }
5886     } while (i--);
5887 }
5888
5889 /*
5890  * Local variables:
5891  * c-indentation-style: bsd
5892  * c-basic-offset: 4
5893  * indent-tabs-mode: t
5894  * End:
5895  *
5896  * ex: set ts=8 sts=4 sw=4 noet:
5897  */