regexec.c

   1 /*    regexec.c
   2  */
   3
   4 /*
   5  * "One Ring to rule them all, One Ring to find them..."
   6  */
   7
   8 /* This file contains functions for executing a regular expression.  See
   9  * also regcomp.c which funnily enough, contains functions for compiling
  10  * a regular expression.
  11  *
  12  * This file is also copied at build time to ext/re/re_exec.c, where
  13  * it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
  14  * This causes the main functions to be compiled under new names and with
  15  * debugging support added, which makes "use re 'debug'" work.
  16  */
  17
  18 /* NOTE: this is derived from Henry Spencer's regexp code, and should not
  19  * confused with the original package (see point 3 below).  Thanks, Henry!
  20  */
  21
  22 /* Additional note: this code is very heavily munged from Henry's version
  23  * in places.  In some spots I've traded clarity for efficiency, so don't
  24  * blame Henry for some of the lack of readability.
  25  */
  26
  27 /* The names of the functions have been changed from regcomp and
  28  * regexec to  pregcomp and pregexec in order to avoid conflicts
  29  * with the POSIX routines of the same names.
  30 */
  31
  32 #ifdef PERL_EXT_RE_BUILD
  33 #include "re_top.h"
  34 #endif
  35
  36 /*
  37  * pregcomp and pregexec -- regsub and regerror are not used in perl
  38  *
  39  *      Copyright (c) 1986 by University of Toronto.
  40  *      Written by Henry Spencer.  Not derived from licensed software.
  41  *
  42  *      Permission is granted to anyone to use this software for any
  43  *      purpose on any computer system, and to redistribute it freely,
  44  *      subject to the following restrictions:
  45  *
  46  *      1. The author is not responsible for the consequences of use of
  47  *              this software, no matter how awful, even if they arise
  48  *              from defects in it.
  49  *
  50  *      2. The origin of this software must not be misrepresented, either
  51  *              by explicit claim or by omission.
  52  *
  53  *      3. Altered versions must be plainly marked as such, and must not
  54  *              be misrepresented as being the original software.
  55  *
  56  ****    Alterations to Henry's code are...
  57  ****
  58  ****    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
  59  ****    2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 by Larry Wall and others
  60  ****
  61  ****    You may distribute under the terms of either the GNU General Public
  62  ****    License or the Artistic License, as specified in the README file.
  63  *
  64  * Beware that some of this code is subtly aware of the way operator
  65  * precedence is structured in regular expressions.  Serious changes in
  66  * regular-expression syntax might require a total rethink.
  67  */
  68 #include "EXTERN.h"
  69 #define PERL_IN_REGEXEC_C
  70 #include "perl.h"
  71
  72 #ifdef PERL_IN_XSUB_RE
  73 #  include "re_comp.h"
  74 #else
  75 #  include "regcomp.h"
  76 #endif
  77
  78 #define RF_tainted      1               /* tainted information used? */
  79 #define RF_warned       2               /* warned about big count? */
  80
  81 #define RF_utf8         8               /* Pattern contains multibyte chars? */
  82
  83 #define UTF ((PL_reg_flags & RF_utf8) != 0)
  84
  85 #define RS_init         1               /* eval environment created */
  86 #define RS_set          2               /* replsv value is set */
  87
  88 #ifndef STATIC
  89 #define STATIC  static
  90 #endif
  91
  92 #define REGINCLASS(prog,p,c)  (ANYOF_FLAGS(p) ? reginclass(prog,p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
  93
  94 /*
  95  * Forwards.
  96  */
  97
  98 #define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
  99 #define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
 100
 101 #define HOPc(pos,off) \
 102         (char *)(PL_reg_match_utf8 \
 103             ? reghop3((U8*)pos, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr)) \
 104             : (U8*)(pos + off))
 105 #define HOPBACKc(pos, off) \
 106         (char*)(PL_reg_match_utf8\
 107             ? reghopmaybe3((U8*)pos, -off, (U8*)PL_bostr) \
 108             : (pos - off >= PL_bostr)           \
 109                 ? (U8*)pos - off                \
 110                 : NULL)
 111
 112 #define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)(pos), off, (U8*)(lim)) : (U8*)(pos + off))
 113 #define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
 114
 115 #define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
 116     if (!CAT2(PL_utf8_,class)) { bool ok; ENTER; save_re_context(); ok=CAT2(is_utf8_,class)((const U8*)str); assert(ok); LEAVE; } } STMT_END
 117 #define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS(alnum,"a")
 118 #define LOAD_UTF8_CHARCLASS_DIGIT() LOAD_UTF8_CHARCLASS(digit,"0")
 119 #define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
 120 #define LOAD_UTF8_CHARCLASS_MARK()  LOAD_UTF8_CHARCLASS(mark, "\xcd\x86")
 121
 122 /* TODO: Combine JUMPABLE and HAS_TEXT to cache OP(rn) */
 123
 124 /* for use after a quantifier and before an EXACT-like node -- japhy */
 125 /* it would be nice to rework regcomp.sym to generate this stuff. sigh */
 126 #define JUMPABLE(rn) (      \
 127     OP(rn) == OPEN ||       \
 128     (OP(rn) == CLOSE && (!cur_eval || cur_eval->u.eval.close_paren != ARG(rn))) || \
 129     OP(rn) == EVAL ||   \
 130     OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
 131     OP(rn) == PLUS || OP(rn) == MINMOD || \
 132     OP(rn) == KEEPS || (PL_regkind[OP(rn)] == VERB) || \
 133     (PL_regkind[OP(rn)] == CURLY && ARG1(rn) > 0) \
 134 )
 135 #define IS_EXACT(rn) (PL_regkind[OP(rn)] == EXACT)
 136
 137 #define HAS_TEXT(rn) ( IS_EXACT(rn) || PL_regkind[OP(rn)] == REF )
 138
 139 #if 0
 140 /* Currently these are only used when PL_regkind[OP(rn)] == EXACT so
 141    we don't need this definition. */
 142 #define IS_TEXT(rn)   ( OP(rn)==EXACT   || OP(rn)==REF   || OP(rn)==NREF   )
 143 #define IS_TEXTF(rn)  ( OP(rn)==EXACTF  || OP(rn)==REFF  || OP(rn)==NREFF  )
 144 #define IS_TEXTFL(rn) ( OP(rn)==EXACTFL || OP(rn)==REFFL || OP(rn)==NREFFL )
 145
 146 #else
 147 /* ... so we use this as its faster. */
 148 #define IS_TEXT(rn)   ( OP(rn)==EXACT   )
 149 #define IS_TEXTF(rn)  ( OP(rn)==EXACTF  )
 150 #define IS_TEXTFL(rn) ( OP(rn)==EXACTFL )
 151
 152 #endif
 153
 154 /*
 155   Search for mandatory following text node; for lookahead, the text must
 156   follow but for lookbehind (rn->flags != 0) we skip to the next step.
 157 */
 158 #define FIND_NEXT_IMPT(rn) STMT_START { \
 159     while (JUMPABLE(rn)) { \
 160         const OPCODE type = OP(rn); \
 161         if (type == SUSPEND || PL_regkind[type] == CURLY) \
 162             rn = NEXTOPER(NEXTOPER(rn)); \
 163         else if (type == PLUS) \
 164             rn = NEXTOPER(rn); \
 165         else if (type == IFMATCH) \
 166             rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
 167         else rn += NEXT_OFF(rn); \
 168     } \
 169 } STMT_END
 170
 171
 172 static void restore_pos(pTHX_ void *arg);
 173
 174 STATIC CHECKPOINT
 175 S_regcppush(pTHX_ I32 parenfloor)
 176 {
 177     dVAR;
 178     const int retval = PL_savestack_ix;
 179 #define REGCP_PAREN_ELEMS 4
 180     const int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
 181     int p;
 182     GET_RE_DEBUG_FLAGS_DECL;
 183
 184     if (paren_elems_to_push < 0)
 185         Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
 186
 187 #define REGCP_OTHER_ELEMS 7
 188     SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
 189
 190     for (p = PL_regsize; p > parenfloor; p--) {
 191 /* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
 192         SSPUSHINT(PL_regoffs[p].end);
 193         SSPUSHINT(PL_regoffs[p].start);
 194         SSPUSHPTR(PL_reg_start_tmp[p]);
 195         SSPUSHINT(p);
 196         DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log,
 197           "     saving \\%"UVuf" %"IVdf"(%"IVdf")..%"IVdf"\n",
 198                       (UV)p, (IV)PL_regoffs[p].start,
 199                       (IV)(PL_reg_start_tmp[p] - PL_bostr),
 200                       (IV)PL_regoffs[p].end
 201         ));
 202     }
 203 /* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
 204     SSPUSHPTR(PL_regoffs);
 205     SSPUSHINT(PL_regsize);
 206     SSPUSHINT(*PL_reglastparen);
 207     SSPUSHINT(*PL_reglastcloseparen);
 208     SSPUSHPTR(PL_reginput);
 209 #define REGCP_FRAME_ELEMS 2
 210 /* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
 211  * are needed for the regexp context stack bookkeeping. */
 212     SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
 213     SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
 214
 215     return retval;
 216 }
 217
 218 /* These are needed since we do not localize EVAL nodes: */
 219 #define REGCP_SET(cp)                                           \
 220     DEBUG_STATE_r(                                              \
 221             PerlIO_printf(Perl_debug_log,                       \
 222                 "  Setting an EVAL scope, savestack=%"IVdf"\n", \
 223                 (IV)PL_savestack_ix));                          \
 224     cp = PL_savestack_ix
 225
 226 #define REGCP_UNWIND(cp)                                        \
 227     DEBUG_STATE_r(                                              \
 228         if (cp != PL_savestack_ix)                              \
 229             PerlIO_printf(Perl_debug_log,                       \
 230                 "  Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
 231                 (IV)(cp), (IV)PL_savestack_ix));                \
 232     regcpblow(cp)
 233
 234 STATIC char *
 235 S_regcppop(pTHX_ const regexp *rex)
 236 {
 237     dVAR;
 238     U32 i;
 239     char *input;
 240     GET_RE_DEBUG_FLAGS_DECL;
 241
 242     PERL_ARGS_ASSERT_REGCPPOP;
 243
 244     /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
 245     i = SSPOPINT;
 246     assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
 247     i = SSPOPINT; /* Parentheses elements to pop. */
 248     input = (char *) SSPOPPTR;
 249     *PL_reglastcloseparen = SSPOPINT;
 250     *PL_reglastparen = SSPOPINT;
 251     PL_regsize = SSPOPINT;
 252     PL_regoffs=(regexp_paren_pair *) SSPOPPTR;
 253
 254
 255     /* Now restore the parentheses context. */
 256     for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
 257          i > 0; i -= REGCP_PAREN_ELEMS) {
 258         I32 tmps;
 259         U32 paren = (U32)SSPOPINT;
 260         PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
 261         PL_regoffs[paren].start = SSPOPINT;
 262         tmps = SSPOPINT;
 263         if (paren <= *PL_reglastparen)
 264             PL_regoffs[paren].end = tmps;
 265         DEBUG_BUFFERS_r(
 266             PerlIO_printf(Perl_debug_log,
 267                           "     restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
 268                           (UV)paren, (IV)PL_regoffs[paren].start,
 269                           (IV)(PL_reg_start_tmp[paren] - PL_bostr),
 270                           (IV)PL_regoffs[paren].end,
 271                           (paren > *PL_reglastparen ? "(no)" : ""));
 272         );
 273     }
 274     DEBUG_BUFFERS_r(
 275         if (*PL_reglastparen + 1 <= rex->nparens) {
 276             PerlIO_printf(Perl_debug_log,
 277                           "     restoring \\%"IVdf"..\\%"IVdf" to undef\n",
 278                           (IV)(*PL_reglastparen + 1), (IV)rex->nparens);
 279         }
 280     );
 281 #if 1
 282     /* It would seem that the similar code in regtry()
 283      * already takes care of this, and in fact it is in
 284      * a better location to since this code can #if 0-ed out
 285      * but the code in regtry() is needed or otherwise tests
 286      * requiring null fields (pat.t#187 and split.t#{13,14}
 287      * (as of patchlevel 7877)  will fail.  Then again,
 288      * this code seems to be necessary or otherwise
 289      * this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
 290      * --jhi updated by dapm */
 291     for (i = *PL_reglastparen + 1; i <= rex->nparens; i++) {
 292         if (i > PL_regsize)
 293             PL_regoffs[i].start = -1;
 294         PL_regoffs[i].end = -1;
 295     }
 296 #endif
 297     return input;
 298 }
 299
 300 #define regcpblow(cp) LEAVE_SCOPE(cp)   /* Ignores regcppush()ed data. */
 301
 302 /*
 303  * pregexec and friends
 304  */
 305
 306 #ifndef PERL_IN_XSUB_RE
 307 /*
 308  - pregexec - match a regexp against a string
 309  */
 310 I32
 311 Perl_pregexec(pTHX_ REGEXP * const prog, char* stringarg, register char *strend,
 312          char *strbeg, I32 minend, SV *screamer, U32 nosave)
 313 /* strend: pointer to null at end of string */
 314 /* strbeg: real beginning of string */
 315 /* minend: end of match must be >=minend after stringarg. */
 316 /* nosave: For optimizations. */
 317 {
 318     PERL_ARGS_ASSERT_PREGEXEC;
 319
 320     return
 321         regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
 322                       nosave ? 0 : REXEC_COPY_STR);
 323 }
 324 #endif
 325
 326 /*
 327  * Need to implement the following flags for reg_anch:
 328  *
 329  * USE_INTUIT_NOML              - Useful to call re_intuit_start() first
 330  * USE_INTUIT_ML
 331  * INTUIT_AUTORITATIVE_NOML     - Can trust a positive answer
 332  * INTUIT_AUTORITATIVE_ML
 333  * INTUIT_ONCE_NOML             - Intuit can match in one location only.
 334  * INTUIT_ONCE_ML
 335  *
 336  * Another flag for this function: SECOND_TIME (so that float substrs
 337  * with giant delta may be not rechecked).
 338  */
 339
 340 /* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
 341
 342 /* If SCREAM, then SvPVX_const(sv) should be compatible with strpos and strend.
 343    Otherwise, only SvCUR(sv) is used to get strbeg. */
 344
 345 /* XXXX We assume that strpos is strbeg unless sv. */
 346
 347 /* XXXX Some places assume that there is a fixed substring.
 348         An update may be needed if optimizer marks as "INTUITable"
 349         RExen without fixed substrings.  Similarly, it is assumed that
 350         lengths of all the strings are no more than minlen, thus they
 351         cannot come from lookahead.
 352         (Or minlen should take into account lookahead.)
 353   NOTE: Some of this comment is not correct. minlen does now take account
 354   of lookahead/behind. Further research is required. -- demerphq
 355
 356 */
 357
 358 /* A failure to find a constant substring means that there is no need to make
 359    an expensive call to REx engine, thus we celebrate a failure.  Similarly,
 360    finding a substring too deep into the string means that less calls to
 361    regtry() should be needed.
 362
 363    REx compiler's optimizer found 4 possible hints:
 364         a) Anchored substring;
 365         b) Fixed substring;
 366         c) Whether we are anchored (beginning-of-line or \G);
 367         d) First node (of those at offset 0) which may distingush positions;
 368    We use a)b)d) and multiline-part of c), and try to find a position in the
 369    string which does not contradict any of them.
 370  */
 371
 372 /* Most of decisions we do here should have been done at compile time.
 373    The nodes of the REx which we used for the search should have been
 374    deleted from the finite automaton. */
 375
 376 char *
 377 Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV *sv, char *strpos,
 378                      char *strend, const U32 flags, re_scream_pos_data *data)
 379 {
 380     dVAR;
 381     struct regexp *const prog = (struct regexp *)SvANY(rx);
 382     register I32 start_shift = 0;
 383     /* Should be nonnegative! */
 384     register I32 end_shift   = 0;
 385     register char *s;
 386     register SV *check;
 387     char *strbeg;
 388     char *t;
 389     const bool do_utf8 = (sv && SvUTF8(sv)) ? 1 : 0; /* if no sv we have to assume bytes */
 390     I32 ml_anch;
 391     register char *other_last = NULL;   /* other substr checked before this */
 392     char *check_at = NULL;              /* check substr found at this pos */
 393     const I32 multiline = prog->extflags & RXf_PMf_MULTILINE;
 394     RXi_GET_DECL(prog,progi);
 395 #ifdef DEBUGGING
 396     const char * const i_strpos = strpos;
 397 #endif
 398     GET_RE_DEBUG_FLAGS_DECL;
 399
 400     PERL_ARGS_ASSERT_RE_INTUIT_START;
 401
 402     RX_MATCH_UTF8_set(rx,do_utf8);
 403
 404     if (RX_UTF8(rx)) {
 405         PL_reg_flags |= RF_utf8;
 406     }
 407     DEBUG_EXECUTE_r(
 408         debug_start_match(rx, do_utf8, strpos, strend,
 409             sv ? "Guessing start of match in sv for"
 410                : "Guessing start of match in string for");
 411               );
 412
 413     /* CHR_DIST() would be more correct here but it makes things slow. */
 414     if (prog->minlen > strend - strpos) {
 415         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 416                               "String too short... [re_intuit_start]\n"));
 417         goto fail;
 418     }
 419
 420     strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
 421     PL_regeol = strend;
 422     if (do_utf8) {
 423         if (!prog->check_utf8 && prog->check_substr)
 424             to_utf8_substr(prog);
 425         check = prog->check_utf8;
 426     } else {
 427         if (!prog->check_substr && prog->check_utf8)
 428             to_byte_substr(prog);
 429         check = prog->check_substr;
 430     }
 431     if (check == &PL_sv_undef) {
 432         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 433                 "Non-utf8 string cannot match utf8 check string\n"));
 434         goto fail;
 435     }
 436     if (prog->extflags & RXf_ANCH) {    /* Match at beg-of-str or after \n */
 437         ml_anch = !( (prog->extflags & RXf_ANCH_SINGLE)
 438                      || ( (prog->extflags & RXf_ANCH_BOL)
 439                           && !multiline ) );    /* Check after \n? */
 440
 441         if (!ml_anch) {
 442           if ( !(prog->extflags & RXf_ANCH_GPOS) /* Checked by the caller */
 443                 && !(prog->intflags & PREGf_IMPLICIT) /* not a real BOL */
 444                /* SvCUR is not set on references: SvRV and SvPVX_const overlap */
 445                && sv && !SvROK(sv)
 446                && (strpos != strbeg)) {
 447               DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
 448               goto fail;
 449           }
 450           if (prog->check_offset_min == prog->check_offset_max &&
 451               !(prog->extflags & RXf_CANY_SEEN)) {
 452             /* Substring at constant offset from beg-of-str... */
 453             I32 slen;
 454
 455             s = HOP3c(strpos, prog->check_offset_min, strend);
 456
 457             if (SvTAIL(check)) {
 458                 slen = SvCUR(check);    /* >= 1 */
 459
 460                 if ( strend - s > slen || strend - s < slen - 1
 461                      || (strend - s == slen && strend[-1] != '\n')) {
 462                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
 463                     goto fail_finish;
 464                 }
 465                 /* Now should match s[0..slen-2] */
 466                 slen--;
 467                 if (slen && (*SvPVX_const(check) != *s
 468                              || (slen > 1
 469                                  && memNE(SvPVX_const(check), s, slen)))) {
 470                   report_neq:
 471                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
 472                     goto fail_finish;
 473                 }
 474             }
 475             else if (*SvPVX_const(check) != *s
 476                      || ((slen = SvCUR(check)) > 1
 477                          && memNE(SvPVX_const(check), s, slen)))
 478                 goto report_neq;
 479             check_at = s;
 480             goto success_at_start;
 481           }
 482         }
 483         /* Match is anchored, but substr is not anchored wrt beg-of-str. */
 484         s = strpos;
 485         start_shift = prog->check_offset_min; /* okay to underestimate on CC */
 486         end_shift = prog->check_end_shift;
 487
 488         if (!ml_anch) {
 489             const I32 end = prog->check_offset_max + CHR_SVLEN(check)
 490                                          - (SvTAIL(check) != 0);
 491             const I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
 492
 493             if (end_shift < eshift)
 494                 end_shift = eshift;
 495         }
 496     }
 497     else {                              /* Can match at random position */
 498         ml_anch = 0;
 499         s = strpos;
 500         start_shift = prog->check_offset_min;  /* okay to underestimate on CC */
 501         end_shift = prog->check_end_shift;
 502
 503         /* end shift should be non negative here */
 504     }
 505
 506 #ifdef QDEBUGGING       /* 7/99: reports of failure (with the older version) */
 507     if (end_shift < 0)
 508         Perl_croak(aTHX_ "panic: end_shift: %"IVdf" pattern:\n%s\n ",
 509                    (IV)end_shift, RX_PRECOMP(prog));
 510 #endif
 511
 512   restart:
 513     /* Find a possible match in the region s..strend by looking for
 514        the "check" substring in the region corrected by start/end_shift. */
 515
 516     {
 517         I32 srch_start_shift = start_shift;
 518         I32 srch_end_shift = end_shift;
 519         if (srch_start_shift < 0 && strbeg - s > srch_start_shift) {
 520             srch_end_shift -= ((strbeg - s) - srch_start_shift);
 521             srch_start_shift = strbeg - s;
 522         }
 523     DEBUG_OPTIMISE_MORE_r({
 524         PerlIO_printf(Perl_debug_log, "Check offset min: %"IVdf" Start shift: %"IVdf" End shift %"IVdf" Real End Shift: %"IVdf"\n",
 525             (IV)prog->check_offset_min,
 526             (IV)srch_start_shift,
 527             (IV)srch_end_shift,
 528             (IV)prog->check_end_shift);
 529     });
 530
 531     if (flags & REXEC_SCREAM) {
 532         I32 p = -1;                     /* Internal iterator of scream. */
 533         I32 * const pp = data ? data->scream_pos : &p;
 534
 535         if (PL_screamfirst[BmRARE(check)] >= 0
 536             || ( BmRARE(check) == '\n'
 537                  && (BmPREVIOUS(check) == SvCUR(check) - 1)
 538                  && SvTAIL(check) ))
 539             s = screaminstr(sv, check,
 540                             srch_start_shift + (s - strbeg), srch_end_shift, pp, 0);
 541         else
 542             goto fail_finish;
 543         /* we may be pointing at the wrong string */
 544         if (s && RXp_MATCH_COPIED(prog))
 545             s = strbeg + (s - SvPVX_const(sv));
 546         if (data)
 547             *data->scream_olds = s;
 548     }
 549     else {
 550         U8* start_point;
 551         U8* end_point;
 552         if (prog->extflags & RXf_CANY_SEEN) {
 553             start_point= (U8*)(s + srch_start_shift);
 554             end_point= (U8*)(strend - srch_end_shift);
 555         } else {
 556             start_point= HOP3(s, srch_start_shift, srch_start_shift < 0 ? strbeg : strend);
 557             end_point= HOP3(strend, -srch_end_shift, strbeg);
 558         }
 559         DEBUG_OPTIMISE_MORE_r({
 560             PerlIO_printf(Perl_debug_log, "fbm_instr len=%d str=<%.*s>\n",
 561                 (int)(end_point - start_point),
 562                 (int)(end_point - start_point) > 20 ? 20 : (int)(end_point - start_point),
 563                 start_point);
 564         });
 565
 566         s = fbm_instr( start_point, end_point,
 567                       check, multiline ? FBMrf_MULTILINE : 0);
 568     }
 569     }
 570     /* Update the count-of-usability, remove useless subpatterns,
 571         unshift s.  */
 572
 573     DEBUG_EXECUTE_r({
 574         RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
 575             SvPVX_const(check), RE_SV_DUMPLEN(check), 30);
 576         PerlIO_printf(Perl_debug_log, "%s %s substr %s%s%s",
 577                           (s ? "Found" : "Did not find"),
 578             (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)
 579                 ? "anchored" : "floating"),
 580             quoted,
 581             RE_SV_TAIL(check),
 582             (s ? " at offset " : "...\n") );
 583     });
 584
 585     if (!s)
 586         goto fail_finish;
 587     /* Finish the diagnostic message */
 588     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
 589
 590     /* XXX dmq: first branch is for positive lookbehind...
 591        Our check string is offset from the beginning of the pattern.
 592        So we need to do any stclass tests offset forward from that
 593        point. I think. :-(
 594      */
 595
 596
 597
 598     check_at=s;
 599
 600
 601     /* Got a candidate.  Check MBOL anchoring, and the *other* substr.
 602        Start with the other substr.
 603        XXXX no SCREAM optimization yet - and a very coarse implementation
 604        XXXX /ttx+/ results in anchored="ttx", floating="x".  floating will
 605                 *always* match.  Probably should be marked during compile...
 606        Probably it is right to do no SCREAM here...
 607      */
 608
 609     if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8)
 610                 : (prog->float_substr && prog->anchored_substr))
 611     {
 612         /* Take into account the "other" substring. */
 613         /* XXXX May be hopelessly wrong for UTF... */
 614         if (!other_last)
 615             other_last = strpos;
 616         if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
 617           do_other_anchored:
 618             {
 619                 char * const last = HOP3c(s, -start_shift, strbeg);
 620                 char *last1, *last2;
 621                 char * const saved_s = s;
 622                 SV* must;
 623
 624                 t = s - prog->check_offset_max;
 625                 if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
 626                     && (!do_utf8
 627                         || ((t = (char*)reghopmaybe3((U8*)s, -(prog->check_offset_max), (U8*)strpos))
 628                             && t > strpos)))
 629                     NOOP;
 630                 else
 631                     t = strpos;
 632                 t = HOP3c(t, prog->anchored_offset, strend);
 633                 if (t < other_last)     /* These positions already checked */
 634                     t = other_last;
 635                 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
 636                 if (last < last1)
 637                     last1 = last;
 638                 /* XXXX It is not documented what units *_offsets are in.
 639                    We assume bytes, but this is clearly wrong.
 640                    Meaning this code needs to be carefully reviewed for errors.
 641                    dmq.
 642                   */
 643
 644                 /* On end-of-str: see comment below. */
 645                 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
 646                 if (must == &PL_sv_undef) {
 647                     s = (char*)NULL;
 648                     DEBUG_r(must = prog->anchored_utf8);        /* for debug */
 649                 }
 650                 else
 651                     s = fbm_instr(
 652                         (unsigned char*)t,
 653                         HOP3(HOP3(last1, prog->anchored_offset, strend)
 654                                 + SvCUR(must), -(SvTAIL(must)!=0), strbeg),
 655                         must,
 656                         multiline ? FBMrf_MULTILINE : 0
 657                     );
 658                 DEBUG_EXECUTE_r({
 659                     RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
 660                         SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
 661                     PerlIO_printf(Perl_debug_log, "%s anchored substr %s%s",
 662                         (s ? "Found" : "Contradicts"),
 663                         quoted, RE_SV_TAIL(must));
 664                 });
 665
 666
 667                 if (!s) {
 668                     if (last1 >= last2) {
 669                         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 670                                                 ", giving up...\n"));
 671                         goto fail_finish;
 672                     }
 673                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 674                         ", trying floating at offset %ld...\n",
 675                         (long)(HOP3c(saved_s, 1, strend) - i_strpos)));
 676                     other_last = HOP3c(last1, prog->anchored_offset+1, strend);
 677                     s = HOP3c(last, 1, strend);
 678                     goto restart;
 679                 }
 680                 else {
 681                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
 682                           (long)(s - i_strpos)));
 683                     t = HOP3c(s, -prog->anchored_offset, strbeg);
 684                     other_last = HOP3c(s, 1, strend);
 685                     s = saved_s;
 686                     if (t == strpos)
 687                         goto try_at_start;
 688                     goto try_at_offset;
 689                 }
 690             }
 691         }
 692         else {          /* Take into account the floating substring. */
 693             char *last, *last1;
 694             char * const saved_s = s;
 695             SV* must;
 696
 697             t = HOP3c(s, -start_shift, strbeg);
 698             last1 = last =
 699                 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
 700             if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
 701                 last = HOP3c(t, prog->float_max_offset, strend);
 702             s = HOP3c(t, prog->float_min_offset, strend);
 703             if (s < other_last)
 704                 s = other_last;
 705  /* XXXX It is not documented what units *_offsets are in.  Assume bytes.  */
 706             must = do_utf8 ? prog->float_utf8 : prog->float_substr;
 707             /* fbm_instr() takes into account exact value of end-of-str
 708                if the check is SvTAIL(ed).  Since false positives are OK,
 709                and end-of-str is not later than strend we are OK. */
 710             if (must == &PL_sv_undef) {
 711                 s = (char*)NULL;
 712                 DEBUG_r(must = prog->float_utf8);       /* for debug message */
 713             }
 714             else
 715                 s = fbm_instr((unsigned char*)s,
 716                               (unsigned char*)last + SvCUR(must)
 717                                   - (SvTAIL(must)!=0),
 718                               must, multiline ? FBMrf_MULTILINE : 0);
 719             DEBUG_EXECUTE_r({
 720                 RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
 721                     SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
 722                 PerlIO_printf(Perl_debug_log, "%s floating substr %s%s",
 723                     (s ? "Found" : "Contradicts"),
 724                     quoted, RE_SV_TAIL(must));
 725             });
 726             if (!s) {
 727                 if (last1 == last) {
 728                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 729                                             ", giving up...\n"));
 730                     goto fail_finish;
 731                 }
 732                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 733                     ", trying anchored starting at offset %ld...\n",
 734                     (long)(saved_s + 1 - i_strpos)));
 735                 other_last = last;
 736                 s = HOP3c(t, 1, strend);
 737                 goto restart;
 738             }
 739             else {
 740                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
 741                       (long)(s - i_strpos)));
 742                 other_last = s; /* Fix this later. --Hugo */
 743                 s = saved_s;
 744                 if (t == strpos)
 745                     goto try_at_start;
 746                 goto try_at_offset;
 747             }
 748         }
 749     }
 750
 751
 752     t= (char*)HOP3( s, -prog->check_offset_max, (prog->check_offset_max<0) ? strend : strpos);
 753
 754     DEBUG_OPTIMISE_MORE_r(
 755         PerlIO_printf(Perl_debug_log,
 756             "Check offset min:%"IVdf" max:%"IVdf" S:%"IVdf" t:%"IVdf" D:%"IVdf" end:%"IVdf"\n",
 757             (IV)prog->check_offset_min,
 758             (IV)prog->check_offset_max,
 759             (IV)(s-strpos),
 760             (IV)(t-strpos),
 761             (IV)(t-s),
 762             (IV)(strend-strpos)
 763         )
 764     );
 765
 766     if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
 767         && (!do_utf8
 768             || ((t = (char*)reghopmaybe3((U8*)s, -prog->check_offset_max, (U8*) ((prog->check_offset_max<0) ? strend : strpos)))
 769                  && t > strpos)))
 770     {
 771         /* Fixed substring is found far enough so that the match
 772            cannot start at strpos. */
 773       try_at_offset:
 774         if (ml_anch && t[-1] != '\n') {
 775             /* Eventually fbm_*() should handle this, but often
 776                anchored_offset is not 0, so this check will not be wasted. */
 777             /* XXXX In the code below we prefer to look for "^" even in
 778                presence of anchored substrings.  And we search even
 779                beyond the found float position.  These pessimizations
 780                are historical artefacts only.  */
 781           find_anchor:
 782             while (t < strend - prog->minlen) {
 783                 if (*t == '\n') {
 784                     if (t < check_at - prog->check_offset_min) {
 785                         if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
 786                             /* Since we moved from the found position,
 787                                we definitely contradict the found anchored
 788                                substr.  Due to the above check we do not
 789                                contradict "check" substr.
 790                                Thus we can arrive here only if check substr
 791                                is float.  Redo checking for "other"=="fixed".
 792                              */
 793                             strpos = t + 1;
 794                             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
 795                                 PL_colors[0], PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
 796                             goto do_other_anchored;
 797                         }
 798                         /* We don't contradict the found floating substring. */
 799                         /* XXXX Why not check for STCLASS? */
 800                         s = t + 1;
 801                         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
 802                             PL_colors[0], PL_colors[1], (long)(s - i_strpos)));
 803                         goto set_useful;
 804                     }
 805                     /* Position contradicts check-string */
 806                     /* XXXX probably better to look for check-string
 807                        than for "\n", so one should lower the limit for t? */
 808                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
 809                         PL_colors[0], PL_colors[1], (long)(t + 1 - i_strpos)));
 810                     other_last = strpos = s = t + 1;
 811                     goto restart;
 812                 }
 813                 t++;
 814             }
 815             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
 816                         PL_colors[0], PL_colors[1]));
 817             goto fail_finish;
 818         }
 819         else {
 820             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
 821                         PL_colors[0], PL_colors[1]));
 822         }
 823         s = t;
 824       set_useful:
 825         ++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr);    /* hooray/5 */
 826     }
 827     else {
 828         /* The found string does not prohibit matching at strpos,
 829            - no optimization of calling REx engine can be performed,
 830            unless it was an MBOL and we are not after MBOL,
 831            or a future STCLASS check will fail this. */
 832       try_at_start:
 833         /* Even in this situation we may use MBOL flag if strpos is offset
 834            wrt the start of the string. */
 835         if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
 836             && (strpos != strbeg) && strpos[-1] != '\n'
 837             /* May be due to an implicit anchor of m{.*foo}  */
 838             && !(prog->intflags & PREGf_IMPLICIT))
 839         {
 840             t = strpos;
 841             goto find_anchor;
 842         }
 843         DEBUG_EXECUTE_r( if (ml_anch)
 844             PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
 845                           (long)(strpos - i_strpos), PL_colors[0], PL_colors[1]);
 846         );
 847       success_at_start:
 848         if (!(prog->intflags & PREGf_NAUGHTY)   /* XXXX If strpos moved? */
 849             && (do_utf8 ? (
 850                 prog->check_utf8                /* Could be deleted already */
 851                 && --BmUSEFUL(prog->check_utf8) < 0
 852                 && (prog->check_utf8 == prog->float_utf8)
 853             ) : (
 854                 prog->check_substr              /* Could be deleted already */
 855                 && --BmUSEFUL(prog->check_substr) < 0
 856                 && (prog->check_substr == prog->float_substr)
 857             )))
 858         {
 859             /* If flags & SOMETHING - do not do it many times on the same match */
 860             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
 861             SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
 862             if (do_utf8 ? prog->check_substr : prog->check_utf8)
 863                 SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
 864             prog->check_substr = prog->check_utf8 = NULL;       /* disable */
 865             prog->float_substr = prog->float_utf8 = NULL;       /* clear */
 866             check = NULL;                       /* abort */
 867             s = strpos;
 868             /* XXXX This is a remnant of the old implementation.  It
 869                     looks wasteful, since now INTUIT can use many
 870                     other heuristics. */
 871             prog->extflags &= ~RXf_USE_INTUIT;
 872         }
 873         else
 874             s = strpos;
 875     }
 876
 877     /* Last resort... */
 878     /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
 879     /* trie stclasses are too expensive to use here, we are better off to
 880        leave it to regmatch itself */
 881     if (progi->regstclass && PL_regkind[OP(progi->regstclass)]!=TRIE) {
 882         /* minlen == 0 is possible if regstclass is \b or \B,
 883            and the fixed substr is ''$.
 884            Since minlen is already taken into account, s+1 is before strend;
 885            accidentally, minlen >= 1 guaranties no false positives at s + 1
 886            even for \b or \B.  But (minlen? 1 : 0) below assumes that
 887            regstclass does not come from lookahead...  */
 888         /* If regstclass takes bytelength more than 1: If charlength==1, OK.
 889            This leaves EXACTF only, which is dealt with in find_byclass().  */
 890         const U8* const str = (U8*)STRING(progi->regstclass);
 891         const int cl_l = (PL_regkind[OP(progi->regstclass)] == EXACT
 892                     ? CHR_DIST(str+STR_LEN(progi->regstclass), str)
 893                     : 1);
 894         char * endpos;
 895         if (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
 896             endpos= HOP3c(s, (prog->minlen ? cl_l : 0), strend);
 897         else if (prog->float_substr || prog->float_utf8)
 898             endpos= HOP3c(HOP3c(check_at, -start_shift, strbeg), cl_l, strend);
 899         else
 900             endpos= strend;
 901
 902         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "start_shift: %"IVdf" check_at: %"IVdf" s: %"IVdf" endpos: %"IVdf"\n",
 903                                       (IV)start_shift, (IV)(check_at - strbeg), (IV)(s - strbeg), (IV)(endpos - strbeg)));
 904
 905         t = s;
 906         s = find_byclass(prog, progi->regstclass, s, endpos, NULL);
 907         if (!s) {
 908 #ifdef DEBUGGING
 909             const char *what = NULL;
 910 #endif
 911             if (endpos == strend) {
 912                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 913                                 "Could not match STCLASS...\n") );
 914                 goto fail;
 915             }
 916             DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 917                                    "This position contradicts STCLASS...\n") );
 918             if ((prog->extflags & RXf_ANCH) && !ml_anch)
 919                 goto fail;
 920             /* Contradict one of substrings */
 921             if (prog->anchored_substr || prog->anchored_utf8) {
 922                 if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
 923                     DEBUG_EXECUTE_r( what = "anchored" );
 924                   hop_and_restart:
 925                     s = HOP3c(t, 1, strend);
 926                     if (s + start_shift + end_shift > strend) {
 927                         /* XXXX Should be taken into account earlier? */
 928                         DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 929                                                "Could not match STCLASS...\n") );
 930                         goto fail;
 931                     }
 932                     if (!check)
 933                         goto giveup;
 934                     DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 935                                 "Looking for %s substr starting at offset %ld...\n",
 936                                  what, (long)(s + start_shift - i_strpos)) );
 937                     goto restart;
 938                 }
 939                 /* Have both, check_string is floating */
 940                 if (t + start_shift >= check_at) /* Contradicts floating=check */
 941                     goto retry_floating_check;
 942                 /* Recheck anchored substring, but not floating... */
 943                 s = check_at;
 944                 if (!check)
 945                     goto giveup;
 946                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 947                           "Looking for anchored substr starting at offset %ld...\n",
 948                           (long)(other_last - i_strpos)) );
 949                 goto do_other_anchored;
 950             }
 951             /* Another way we could have checked stclass at the
 952                current position only: */
 953             if (ml_anch) {
 954                 s = t = t + 1;
 955                 if (!check)
 956                     goto giveup;
 957                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 958                           "Looking for /%s^%s/m starting at offset %ld...\n",
 959                           PL_colors[0], PL_colors[1], (long)(t - i_strpos)) );
 960                 goto try_at_offset;
 961             }
 962             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))     /* Could have been deleted */
 963                 goto fail;
 964             /* Check is floating subtring. */
 965           retry_floating_check:
 966             t = check_at - start_shift;
 967             DEBUG_EXECUTE_r( what = "floating" );
 968             goto hop_and_restart;
 969         }
 970         if (t != s) {
 971             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 972                         "By STCLASS: moving %ld --> %ld\n",
 973                                   (long)(t - i_strpos), (long)(s - i_strpos))
 974                    );
 975         }
 976         else {
 977             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 978                                   "Does not contradict STCLASS...\n");
 979                    );
 980         }
 981     }
 982   giveup:
 983     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
 984                           PL_colors[4], (check ? "Guessed" : "Giving up"),
 985                           PL_colors[5], (long)(s - i_strpos)) );
 986     return s;
 987
 988   fail_finish:                          /* Substring not found */
 989     if (prog->check_substr || prog->check_utf8)         /* could be removed already */
 990         BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
 991   fail:
 992     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
 993                           PL_colors[4], PL_colors[5]));
 994     return NULL;
 995 }
 996
 997 #define DECL_TRIE_TYPE(scan) \
 998     const enum { trie_plain, trie_utf8, trie_utf8_fold, trie_latin_utf8_fold } \
 999                     trie_type = (scan->flags != EXACT) \
1000                               ? (do_utf8 ? trie_utf8_fold : (UTF ? trie_latin_utf8_fold : trie_plain)) \
1001                               : (do_utf8 ? trie_utf8 : trie_plain)
1002
1003 #define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len,  \
1004 uvc, charid, foldlen, foldbuf, uniflags) STMT_START {                       \
1005     switch (trie_type) {                                                    \
1006     case trie_utf8_fold:                                                    \
1007         if ( foldlen>0 ) {                                                  \
1008             uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags );     \
1009             foldlen -= len;                                                 \
1010             uscan += len;                                                   \
1011             len=0;                                                          \
1012         } else {                                                            \
1013             uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );   \
1014             uvc = to_uni_fold( uvc, foldbuf, &foldlen );                    \
1015             foldlen -= UNISKIP( uvc );                                      \
1016             uscan = foldbuf + UNISKIP( uvc );                               \
1017         }                                                                   \
1018         break;                                                              \
1019     case trie_latin_utf8_fold:                                              \
1020         if ( foldlen>0 ) {                                                  \
1021             uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags );     \
1022             foldlen -= len;                                                 \
1023             uscan += len;                                                   \
1024             len=0;                                                          \
1025         } else {                                                            \
1026             len = 1;                                                        \
1027             uvc = to_uni_fold( *(U8*)uc, foldbuf, &foldlen );               \
1028             foldlen -= UNISKIP( uvc );                                      \
1029             uscan = foldbuf + UNISKIP( uvc );                               \
1030         }                                                                   \
1031         break;                                                              \
1032     case trie_utf8:                                                         \
1033         uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );       \
1034         break;                                                              \
1035     case trie_plain:                                                        \
1036         uvc = (UV)*uc;                                                      \
1037         len = 1;                                                            \
1038     }                                                                       \
1039                                                                             \
1040     if (uvc < 256) {                                                        \
1041         charid = trie->charmap[ uvc ];                                      \
1042     }                                                                       \
1043     else {                                                                  \
1044         charid = 0;                                                         \
1045         if (widecharmap) {                                                  \
1046             SV** const svpp = hv_fetch(widecharmap,                         \
1047                         (char*)&uvc, sizeof(UV), 0);                        \
1048             if (svpp)                                                       \
1049                 charid = (U16)SvIV(*svpp);                                  \
1050         }                                                                   \
1051     }                                                                       \
1052 } STMT_END
1053
1054 #define REXEC_FBC_EXACTISH_CHECK(CoNd)                 \
1055 {                                                      \
1056     char *my_strend= (char *)strend;                   \
1057     if ( (CoNd)                                        \
1058          && (ln == len ||                              \
1059              !ibcmp_utf8(s, &my_strend, 0,  do_utf8,   \
1060                         m, NULL, ln, (bool)UTF))       \
1061          && (!reginfo || regtry(reginfo, &s)) )        \
1062         goto got_it;                                   \
1063     else {                                             \
1064          U8 foldbuf[UTF8_MAXBYTES_CASE+1];             \
1065          uvchr_to_utf8(tmpbuf, c);                     \
1066          f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);  \
1067          if ( f != c                                   \
1068               && (f == c1 || f == c2)                  \
1069               && (ln == len ||                         \
1070                 !ibcmp_utf8(s, &my_strend, 0,  do_utf8,\
1071                               m, NULL, ln, (bool)UTF)) \
1072               && (!reginfo || regtry(reginfo, &s)) )   \
1073               goto got_it;                             \
1074     }                                                  \
1075 }                                                      \
1076 s += len
1077
1078 #define REXEC_FBC_EXACTISH_SCAN(CoNd)                     \
1079 STMT_START {                                              \
1080     while (s <= e) {                                      \
1081         if ( (CoNd)                                       \
1082              && (ln == 1 || !(OP(c) == EXACTF             \
1083                               ? ibcmp(s, m, ln)           \
1084                               : ibcmp_locale(s, m, ln)))  \
1085              && (!reginfo || regtry(reginfo, &s)) )        \
1086             goto got_it;                                  \
1087         s++;                                              \
1088     }                                                     \
1089 } STMT_END
1090
1091 #define REXEC_FBC_UTF8_SCAN(CoDe)                     \
1092 STMT_START {                                          \
1093     while (s + (uskip = UTF8SKIP(s)) <= strend) {     \
1094         CoDe                                          \
1095         s += uskip;                                   \
1096     }                                                 \
1097 } STMT_END
1098
1099 #define REXEC_FBC_SCAN(CoDe)                          \
1100 STMT_START {                                          \
1101     while (s < strend) {                              \
1102         CoDe                                          \
1103         s++;                                          \
1104     }                                                 \
1105 } STMT_END
1106
1107 #define REXEC_FBC_UTF8_CLASS_SCAN(CoNd)               \
1108 REXEC_FBC_UTF8_SCAN(                                  \
1109     if (CoNd) {                                       \
1110         if (tmp && (!reginfo || regtry(reginfo, &s)))  \
1111             goto got_it;                              \
1112         else                                          \
1113             tmp = doevery;                            \
1114     }                                                 \
1115     else                                              \
1116         tmp = 1;                                      \
1117 )
1118
1119 #define REXEC_FBC_CLASS_SCAN(CoNd)                    \
1120 REXEC_FBC_SCAN(                                       \
1121     if (CoNd) {                                       \
1122         if (tmp && (!reginfo || regtry(reginfo, &s)))  \
1123             goto got_it;                              \
1124         else                                          \
1125             tmp = doevery;                            \
1126     }                                                 \
1127     else                                              \
1128         tmp = 1;                                      \
1129 )
1130
1131 #define REXEC_FBC_TRYIT               \
1132 if ((!reginfo || regtry(reginfo, &s))) \
1133     goto got_it
1134
1135 #define REXEC_FBC_CSCAN(CoNdUtF8,CoNd)                         \
1136     if (do_utf8) {                                             \
1137         REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1138     }                                                          \
1139     else {                                                     \
1140         REXEC_FBC_CLASS_SCAN(CoNd);                            \
1141     }                                                          \
1142     break
1143
1144 #define REXEC_FBC_CSCAN_PRELOAD(UtFpReLoAd,CoNdUtF8,CoNd)      \
1145     if (do_utf8) {                                             \
1146         UtFpReLoAd;                                            \
1147         REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1148     }                                                          \
1149     else {                                                     \
1150         REXEC_FBC_CLASS_SCAN(CoNd);                            \
1151     }                                                          \
1152     break
1153
1154 #define REXEC_FBC_CSCAN_TAINT(CoNdUtF8,CoNd)                   \
1155     PL_reg_flags |= RF_tainted;                                \
1156     if (do_utf8) {                                             \
1157         REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1158     }                                                          \
1159     else {                                                     \
1160         REXEC_FBC_CLASS_SCAN(CoNd);                            \
1161     }                                                          \
1162     break
1163
1164 #define DUMP_EXEC_POS(li,s,doutf8) \
1165     dump_exec_pos(li,s,(PL_regeol),(PL_bostr),(PL_reg_starttry),doutf8)
1166
1167 /* We know what class REx starts with.  Try to find this position... */
1168 /* if reginfo is NULL, its a dryrun */
1169 /* annoyingly all the vars in this routine have different names from their counterparts
1170    in regmatch. /grrr */
1171
1172 STATIC char *
1173 S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
1174     const char *strend, regmatch_info *reginfo)
1175 {
1176         dVAR;
1177         const I32 doevery = (prog->intflags & PREGf_SKIP) == 0;
1178         char *m;
1179         STRLEN ln;
1180         STRLEN lnc;
1181         register STRLEN uskip;
1182         unsigned int c1;
1183         unsigned int c2;
1184         char *e;
1185         register I32 tmp = 1;   /* Scratch variable? */
1186         register const bool do_utf8 = PL_reg_match_utf8;
1187         RXi_GET_DECL(prog,progi);
1188
1189         PERL_ARGS_ASSERT_FIND_BYCLASS;
1190
1191         /* We know what class it must start with. */
1192         switch (OP(c)) {
1193         case ANYOF:
1194             if (do_utf8) {
1195                  REXEC_FBC_UTF8_CLASS_SCAN((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
1196                           !UTF8_IS_INVARIANT((U8)s[0]) ?
1197                           reginclass(prog, c, (U8*)s, 0, do_utf8) :
1198                           REGINCLASS(prog, c, (U8*)s));
1199             }
1200             else {
1201                  while (s < strend) {
1202                       STRLEN skip = 1;
1203
1204                       if (REGINCLASS(prog, c, (U8*)s) ||
1205                           (ANYOF_FOLD_SHARP_S(c, s, strend) &&
1206                            /* The assignment of 2 is intentional:
1207                             * for the folded sharp s, the skip is 2. */
1208                            (skip = SHARP_S_SKIP))) {
1209                            if (tmp && (!reginfo || regtry(reginfo, &s)))
1210                                 goto got_it;
1211                            else
1212                                 tmp = doevery;
1213                       }
1214                       else
1215                            tmp = 1;
1216                       s += skip;
1217                  }
1218             }
1219             break;
1220         case CANY:
1221             REXEC_FBC_SCAN(
1222                 if (tmp && (!reginfo || regtry(reginfo, &s)))
1223                     goto got_it;
1224                 else
1225                     tmp = doevery;
1226             );
1227             break;
1228         case EXACTF:
1229             m   = STRING(c);
1230             ln  = STR_LEN(c);   /* length to match in octets/bytes */
1231             lnc = (I32) ln;     /* length to match in characters */
1232             if (UTF) {
1233                 STRLEN ulen1, ulen2;
1234                 U8 *sm = (U8 *) m;
1235                 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
1236                 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
1237                 /* used by commented-out code below */
1238                 /*const U32 uniflags = UTF8_ALLOW_DEFAULT;*/
1239
1240                 /* XXX: Since the node will be case folded at compile
1241                    time this logic is a little odd, although im not
1242                    sure that its actually wrong. --dmq */
1243
1244                 c1 = to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
1245                 c2 = to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
1246
1247                 /* XXX: This is kinda strange. to_utf8_XYZ returns the
1248                    codepoint of the first character in the converted
1249                    form, yet originally we did the extra step.
1250                    No tests fail by commenting this code out however
1251                    so Ive left it out. -- dmq.
1252
1253                 c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXBYTES_CASE,
1254                                     0, uniflags);
1255                 c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXBYTES_CASE,
1256                                     0, uniflags);
1257                 */
1258
1259                 lnc = 0;
1260                 while (sm < ((U8 *) m + ln)) {
1261                     lnc++;
1262                     sm += UTF8SKIP(sm);
1263                 }
1264             }
1265             else {
1266                 c1 = *(U8*)m;
1267                 c2 = PL_fold[c1];
1268             }
1269             goto do_exactf;
1270         case EXACTFL:
1271             m   = STRING(c);
1272             ln  = STR_LEN(c);
1273             lnc = (I32) ln;
1274             c1 = *(U8*)m;
1275             c2 = PL_fold_locale[c1];
1276           do_exactf:
1277             e = HOP3c(strend, -((I32)lnc), s);
1278
1279             if (!reginfo && e < s)
1280                 e = s;                  /* Due to minlen logic of intuit() */
1281
1282             /* The idea in the EXACTF* cases is to first find the
1283              * first character of the EXACTF* node and then, if
1284              * necessary, case-insensitively compare the full
1285              * text of the node.  The c1 and c2 are the first
1286              * characters (though in Unicode it gets a bit
1287              * more complicated because there are more cases
1288              * than just upper and lower: one needs to use
1289              * the so-called folding case for case-insensitive
1290              * matching (called "loose matching" in Unicode).
1291              * ibcmp_utf8() will do just that. */
1292
1293             if (do_utf8 || UTF) {
1294                 UV c, f;
1295                 U8 tmpbuf [UTF8_MAXBYTES+1];
1296                 STRLEN len = 1;
1297                 STRLEN foldlen;
1298                 const U32 uniflags = UTF8_ALLOW_DEFAULT;
1299                 if (c1 == c2) {
1300                     /* Upper and lower of 1st char are equal -
1301                      * probably not a "letter". */
1302                     while (s <= e) {
1303                         if (do_utf8) {
1304                             c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1305                                            uniflags);
1306                         } else {
1307                             c = *((U8*)s);
1308                         }
1309                         REXEC_FBC_EXACTISH_CHECK(c == c1);
1310                     }
1311                 }
1312                 else {
1313                     while (s <= e) {
1314                         if (do_utf8) {
1315                             c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1316                                            uniflags);
1317                         } else {
1318                             c = *((U8*)s);
1319                         }
1320
1321                         /* Handle some of the three Greek sigmas cases.
1322                          * Note that not all the possible combinations
1323                          * are handled here: some of them are handled
1324                          * by the standard folding rules, and some of
1325                          * them (the character class or ANYOF cases)
1326                          * are handled during compiletime in
1327                          * regexec.c:S_regclass(). */
1328                         if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1329                             c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1330                             c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
1331
1332                         REXEC_FBC_EXACTISH_CHECK(c == c1 || c == c2);
1333                     }
1334                 }
1335             }
1336             else {
1337                 /* Neither pattern nor string are UTF8 */
1338                 if (c1 == c2)
1339                     REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1);
1340                 else
1341                     REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1 || *(U8*)s == c2);
1342             }
1343             break;
1344         case BOUNDL:
1345             PL_reg_flags |= RF_tainted;
1346             /* FALL THROUGH */
1347         case BOUND:
1348             if (do_utf8) {
1349                 if (s == PL_bostr)
1350                     tmp = '\n';
1351                 else {
1352                     U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1353                     tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
1354                 }
1355                 tmp = ((OP(c) == BOUND ?
1356                         isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1357                 LOAD_UTF8_CHARCLASS_ALNUM();
1358                 REXEC_FBC_UTF8_SCAN(
1359                     if (tmp == !(OP(c) == BOUND ?
1360                                  (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1361                                  isALNUM_LC_utf8((U8*)s)))
1362                     {
1363                         tmp = !tmp;
1364                         REXEC_FBC_TRYIT;
1365                 }
1366                 );
1367             }
1368             else {
1369                 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1370                 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1371                 REXEC_FBC_SCAN(
1372                     if (tmp ==
1373                         !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1374                         tmp = !tmp;
1375                         REXEC_FBC_TRYIT;
1376                 }
1377                 );
1378             }
1379             if ((!prog->minlen && tmp) && (!reginfo || regtry(reginfo, &s)))
1380                 goto got_it;
1381             break;
1382         case NBOUNDL:
1383             PL_reg_flags |= RF_tainted;
1384             /* FALL THROUGH */
1385         case NBOUND:
1386             if (do_utf8) {
1387                 if (s == PL_bostr)
1388                     tmp = '\n';
1389                 else {
1390                     U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1391                     tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
1392                 }
1393                 tmp = ((OP(c) == NBOUND ?
1394                         isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1395                 LOAD_UTF8_CHARCLASS_ALNUM();
1396                 REXEC_FBC_UTF8_SCAN(
1397                     if (tmp == !(OP(c) == NBOUND ?
1398                                  (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1399                                  isALNUM_LC_utf8((U8*)s)))
1400                         tmp = !tmp;
1401                     else REXEC_FBC_TRYIT;
1402                 );
1403             }
1404             else {
1405                 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1406                 tmp = ((OP(c) == NBOUND ?
1407                         isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1408                 REXEC_FBC_SCAN(
1409                     if (tmp ==
1410                         !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1411                         tmp = !tmp;
1412                     else REXEC_FBC_TRYIT;
1413                 );
1414             }
1415             if ((!prog->minlen && !tmp) && (!reginfo || regtry(reginfo, &s)))
1416                 goto got_it;
1417             break;
1418         case ALNUM:
1419             REXEC_FBC_CSCAN_PRELOAD(
1420                 LOAD_UTF8_CHARCLASS_ALNUM(),
1421                 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8),
1422                 isALNUM(*s)
1423             );
1424         case ALNUML:
1425             REXEC_FBC_CSCAN_TAINT(
1426                 isALNUM_LC_utf8((U8*)s),
1427                 isALNUM_LC(*s)
1428             );
1429         case NALNUM:
1430             REXEC_FBC_CSCAN_PRELOAD(
1431                 LOAD_UTF8_CHARCLASS_ALNUM(),
1432                 !swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8),
1433                 !isALNUM(*s)
1434             );
1435         case NALNUML:
1436             REXEC_FBC_CSCAN_TAINT(
1437                 !isALNUM_LC_utf8((U8*)s),
1438                 !isALNUM_LC(*s)
1439             );
1440         case SPACE:
1441             REXEC_FBC_CSCAN_PRELOAD(
1442                 LOAD_UTF8_CHARCLASS_SPACE(),
1443                 *s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8),
1444                 isSPACE(*s)
1445             );
1446         case SPACEL:
1447             REXEC_FBC_CSCAN_TAINT(
1448                 *s == ' ' || isSPACE_LC_utf8((U8*)s),
1449                 isSPACE_LC(*s)
1450             );
1451         case NSPACE:
1452             REXEC_FBC_CSCAN_PRELOAD(
1453                 LOAD_UTF8_CHARCLASS_SPACE(),
1454                 !(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)),
1455                 !isSPACE(*s)
1456             );
1457         case NSPACEL:
1458             REXEC_FBC_CSCAN_TAINT(
1459                 !(*s == ' ' || isSPACE_LC_utf8((U8*)s)),
1460                 !isSPACE_LC(*s)
1461             );
1462         case DIGIT:
1463             REXEC_FBC_CSCAN_PRELOAD(
1464                 LOAD_UTF8_CHARCLASS_DIGIT(),
1465                 swash_fetch(PL_utf8_digit,(U8*)s, do_utf8),
1466                 isDIGIT(*s)
1467             );
1468         case DIGITL:
1469             REXEC_FBC_CSCAN_TAINT(
1470                 isDIGIT_LC_utf8((U8*)s),
1471                 isDIGIT_LC(*s)
1472             );
1473         case NDIGIT:
1474             REXEC_FBC_CSCAN_PRELOAD(
1475                 LOAD_UTF8_CHARCLASS_DIGIT(),
1476                 !swash_fetch(PL_utf8_digit,(U8*)s, do_utf8),
1477                 !isDIGIT(*s)
1478             );
1479         case NDIGITL:
1480             REXEC_FBC_CSCAN_TAINT(
1481                 !isDIGIT_LC_utf8((U8*)s),
1482                 !isDIGIT_LC(*s)
1483             );
1484         case LNBREAK:
1485             REXEC_FBC_CSCAN(
1486                 is_LNBREAK_utf8(s),
1487                 is_LNBREAK_latin1(s)
1488             );
1489         case VERTWS:
1490             REXEC_FBC_CSCAN(
1491                 is_VERTWS_utf8(s),
1492                 is_VERTWS_latin1(s)
1493             );
1494         case NVERTWS:
1495             REXEC_FBC_CSCAN(
1496                 !is_VERTWS_utf8(s),
1497                 !is_VERTWS_latin1(s)
1498             );
1499         case HORIZWS:
1500             REXEC_FBC_CSCAN(
1501                 is_HORIZWS_utf8(s),
1502                 is_HORIZWS_latin1(s)
1503             );
1504         case NHORIZWS:
1505             REXEC_FBC_CSCAN(
1506                 !is_HORIZWS_utf8(s),
1507                 !is_HORIZWS_latin1(s)
1508             );
1509         case AHOCORASICKC:
1510         case AHOCORASICK:
1511             {
1512                 DECL_TRIE_TYPE(c);
1513                 /* what trie are we using right now */
1514                 reg_ac_data *aho
1515                     = (reg_ac_data*)progi->data->data[ ARG( c ) ];
1516                 reg_trie_data *trie
1517                     = (reg_trie_data*)progi->data->data[ aho->trie ];
1518                 HV *widecharmap = (HV*) progi->data->data[ aho->trie + 1 ];
1519
1520                 const char *last_start = strend - trie->minlen;
1521 #ifdef DEBUGGING
1522                 const char *real_start = s;
1523 #endif
1524                 STRLEN maxlen = trie->maxlen;
1525                 SV *sv_points;
1526                 U8 **points; /* map of where we were in the input string
1527                                 when reading a given char. For ASCII this
1528                                 is unnecessary overhead as the relationship
1529                                 is always 1:1, but for Unicode, especially
1530                                 case folded Unicode this is not true. */
1531                 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
1532                 U8 *bitmap=NULL;
1533
1534
1535                 GET_RE_DEBUG_FLAGS_DECL;
1536
1537                 /* We can't just allocate points here. We need to wrap it in
1538                  * an SV so it gets freed properly if there is a croak while
1539                  * running the match */
1540                 ENTER;
1541                 SAVETMPS;
1542                 sv_points=newSV(maxlen * sizeof(U8 *));
1543                 SvCUR_set(sv_points,
1544                     maxlen * sizeof(U8 *));
1545                 SvPOK_on(sv_points);
1546                 sv_2mortal(sv_points);
1547                 points=(U8**)SvPV_nolen(sv_points );
1548                 if ( trie_type != trie_utf8_fold
1549                      && (trie->bitmap || OP(c)==AHOCORASICKC) )
1550                 {
1551                     if (trie->bitmap)
1552                         bitmap=(U8*)trie->bitmap;
1553                     else
1554                         bitmap=(U8*)ANYOF_BITMAP(c);
1555                 }
1556                 /* this is the Aho-Corasick algorithm modified a touch
1557                    to include special handling for long "unknown char"
1558                    sequences. The basic idea being that we use AC as long
1559                    as we are dealing with a possible matching char, when
1560                    we encounter an unknown char (and we have not encountered
1561                    an accepting state) we scan forward until we find a legal
1562                    starting char.
1563                    AC matching is basically that of trie matching, except
1564                    that when we encounter a failing transition, we fall back
1565                    to the current states "fail state", and try the current char
1566                    again, a process we repeat until we reach the root state,
1567                    state 1, or a legal transition. If we fail on the root state
1568                    then we can either terminate if we have reached an accepting
1569                    state previously, or restart the entire process from the beginning
1570                    if we have not.
1571
1572                  */
1573                 while (s <= last_start) {
1574                     const U32 uniflags = UTF8_ALLOW_DEFAULT;
1575                     U8 *uc = (U8*)s;
1576                     U16 charid = 0;
1577                     U32 base = 1;
1578                     U32 state = 1;
1579                     UV uvc = 0;
1580                     STRLEN len = 0;
1581                     STRLEN foldlen = 0;
1582                     U8 *uscan = (U8*)NULL;
1583                     U8 *leftmost = NULL;
1584 #ifdef DEBUGGING
1585                     U32 accepted_word= 0;
1586 #endif
1587                     U32 pointpos = 0;
1588
1589                     while ( state && uc <= (U8*)strend ) {
1590                         int failed=0;
1591                         U32 word = aho->states[ state ].wordnum;
1592
1593                         if( state==1 ) {
1594                             if ( bitmap ) {
1595                                 DEBUG_TRIE_EXECUTE_r(
1596                                     if ( uc <= (U8*)last_start && !BITMAP_TEST(bitmap,*uc) ) {
1597                                         dump_exec_pos( (char *)uc, c, strend, real_start,
1598                                             (char *)uc, do_utf8 );
1599                                         PerlIO_printf( Perl_debug_log,
1600                                             " Scanning for legal start char...\n");
1601                                     }
1602                                 );
1603                                 while ( uc <= (U8*)last_start  && !BITMAP_TEST(bitmap,*uc) ) {
1604                                     uc++;
1605                                 }
1606                                 s= (char *)uc;
1607                             }
1608                             if (uc >(U8*)last_start) break;
1609                         }
1610
1611                         if ( word ) {
1612                             U8 *lpos= points[ (pointpos - trie->wordlen[word-1] ) % maxlen ];
1613                             if (!leftmost || lpos < leftmost) {
1614                                 DEBUG_r(accepted_word=word);
1615                                 leftmost= lpos;
1616                             }
1617                             if (base==0) break;
1618
1619                         }
1620                         points[pointpos++ % maxlen]= uc;
1621                         REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
1622                                              uscan, len, uvc, charid, foldlen,
1623                                              foldbuf, uniflags);
1624                         DEBUG_TRIE_EXECUTE_r({
1625                             dump_exec_pos( (char *)uc, c, strend, real_start,
1626                                 s,   do_utf8 );
1627                             PerlIO_printf(Perl_debug_log,
1628                                 " Charid:%3u CP:%4"UVxf" ",
1629                                  charid, uvc);
1630                         });
1631
1632                         do {
1633 #ifdef DEBUGGING
1634                             word = aho->states[ state ].wordnum;
1635 #endif
1636                             base = aho->states[ state ].trans.base;
1637
1638                             DEBUG_TRIE_EXECUTE_r({
1639                                 if (failed)
1640                                     dump_exec_pos( (char *)uc, c, strend, real_start,
1641                                         s,   do_utf8 );
1642                                 PerlIO_printf( Perl_debug_log,
1643                                     "%sState: %4"UVxf", word=%"UVxf,
1644                                     failed ? " Fail transition to " : "",
1645                                     (UV)state, (UV)word);
1646                             });
1647                             if ( base ) {
1648                                 U32 tmp;
1649                                 if (charid &&
1650                                      (base + charid > trie->uniquecharcount )
1651                                      && (base + charid - 1 - trie->uniquecharcount
1652                                             < trie->lasttrans)
1653                                      && trie->trans[base + charid - 1 -
1654                                             trie->uniquecharcount].check == state
1655                                      && (tmp=trie->trans[base + charid - 1 -
1656                                         trie->uniquecharcount ].next))
1657                                 {
1658                                     DEBUG_TRIE_EXECUTE_r(
1659                                         PerlIO_printf( Perl_debug_log," - legal\n"));
1660                                     state = tmp;
1661                                     break;
1662                                 }
1663                                 else {
1664                                     DEBUG_TRIE_EXECUTE_r(
1665                                         PerlIO_printf( Perl_debug_log," - fail\n"));
1666                                     failed = 1;
1667                                     state = aho->fail[state];
1668                                 }
1669                             }
1670                             else {
1671                                 /* we must be accepting here */
1672                                 DEBUG_TRIE_EXECUTE_r(
1673                                         PerlIO_printf( Perl_debug_log," - accepting\n"));
1674                                 failed = 1;
1675                                 break;
1676                             }
1677                         } while(state);
1678                         uc += len;
1679                         if (failed) {
1680                             if (leftmost)
1681                                 break;
1682                             if (!state) state = 1;
1683                         }
1684                     }
1685                     if ( aho->states[ state ].wordnum ) {
1686                         U8 *lpos = points[ (pointpos - trie->wordlen[aho->states[ state ].wordnum-1]) % maxlen ];
1687                         if (!leftmost || lpos < leftmost) {
1688                             DEBUG_r(accepted_word=aho->states[ state ].wordnum);
1689                             leftmost = lpos;
1690                         }
1691                     }
1692                     if (leftmost) {
1693                         s = (char*)leftmost;
1694                         DEBUG_TRIE_EXECUTE_r({
1695                             PerlIO_printf(
1696                                 Perl_debug_log,"Matches word #%"UVxf" at position %"IVdf". Trying full pattern...\n",
1697                                 (UV)accepted_word, (IV)(s - real_start)
1698                             );
1699                         });
1700                         if (!reginfo || regtry(reginfo, &s)) {
1701                             FREETMPS;
1702                             LEAVE;
1703                             goto got_it;
1704                         }
1705                         s = HOPc(s,1);
1706                         DEBUG_TRIE_EXECUTE_r({
1707                             PerlIO_printf( Perl_debug_log,"Pattern failed. Looking for new start point...\n");
1708                         });
1709                     } else {
1710                         DEBUG_TRIE_EXECUTE_r(
1711                             PerlIO_printf( Perl_debug_log,"No match.\n"));
1712                         break;
1713                     }
1714                 }
1715                 FREETMPS;
1716                 LEAVE;
1717             }
1718             break;
1719         default:
1720             Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1721             break;
1722         }
1723         return 0;
1724       got_it:
1725         return s;
1726 }
1727
1728 static void
1729 S_swap_match_buff (pTHX_ regexp *prog)
1730 {
1731     regexp_paren_pair *t;
1732
1733     PERL_ARGS_ASSERT_SWAP_MATCH_BUFF;
1734
1735     if (!prog->swap) {
1736     /* We have to be careful. If the previous successful match
1737        was from this regex we don't want a subsequent paritally
1738        successful match to clobber the old results.
1739        So when we detect this possibility we add a swap buffer
1740        to the re, and switch the buffer each match. If we fail
1741        we switch it back, otherwise we leave it swapped.
1742     */
1743         Newxz(prog->swap, (prog->nparens + 1), regexp_paren_pair);
1744     }
1745     t = prog->swap;
1746     prog->swap = prog->offs;
1747     prog->offs = t;
1748 }
1749
1750
1751 /*
1752  - regexec_flags - match a regexp against a string
1753  */
1754 I32
1755 Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, register char *strend,
1756               char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1757 /* strend: pointer to null at end of string */
1758 /* strbeg: real beginning of string */
1759 /* minend: end of match must be >=minend after stringarg. */
1760 /* data: May be used for some additional optimizations.
1761          Currently its only used, with a U32 cast, for transmitting
1762          the ganch offset when doing a /g match. This will change */
1763 /* nosave: For optimizations. */
1764 {
1765     dVAR;
1766     struct regexp *const prog = (struct regexp *)SvANY(rx);
1767     /*register*/ char *s;
1768     register regnode *c;
1769     /*register*/ char *startpos = stringarg;
1770     I32 minlen;         /* must match at least this many chars */
1771     I32 dontbother = 0; /* how many characters not to try at end */
1772     I32 end_shift = 0;                  /* Same for the end. */         /* CC */
1773     I32 scream_pos = -1;                /* Internal iterator of scream. */
1774     char *scream_olds = NULL;
1775     const bool do_utf8 = (bool)DO_UTF8(sv);
1776     I32 multiline;
1777     RXi_GET_DECL(prog,progi);
1778     regmatch_info reginfo;  /* create some info to pass to regtry etc */
1779     bool swap_on_fail = 0;
1780     GET_RE_DEBUG_FLAGS_DECL;
1781
1782     PERL_ARGS_ASSERT_REGEXEC_FLAGS;
1783     PERL_UNUSED_ARG(data);
1784
1785     /* Be paranoid... */
1786     if (prog == NULL || startpos == NULL) {
1787         Perl_croak(aTHX_ "NULL regexp parameter");
1788         return 0;
1789     }
1790
1791     multiline = prog->extflags & RXf_PMf_MULTILINE;
1792     reginfo.prog = rx;   /* Yes, sorry that this is confusing.  */
1793
1794     RX_MATCH_UTF8_set(rx, do_utf8);
1795     DEBUG_EXECUTE_r(
1796         debug_start_match(rx, do_utf8, startpos, strend,
1797         "Matching");
1798     );
1799
1800     minlen = prog->minlen;
1801
1802     if (strend - startpos < (minlen+(prog->check_offset_min<0?prog->check_offset_min:0))) {
1803         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
1804                               "String too short [regexec_flags]...\n"));
1805         goto phooey;
1806     }
1807
1808
1809     /* Check validity of program. */
1810     if (UCHARAT(progi->program) != REG_MAGIC) {
1811         Perl_croak(aTHX_ "corrupted regexp program");
1812     }
1813
1814     PL_reg_flags = 0;
1815     PL_reg_eval_set = 0;
1816     PL_reg_maxiter = 0;
1817
1818     if (RX_UTF8(rx))
1819         PL_reg_flags |= RF_utf8;
1820
1821     /* Mark beginning of line for ^ and lookbehind. */
1822     reginfo.bol = startpos; /* XXX not used ??? */
1823     PL_bostr  = strbeg;
1824     reginfo.sv = sv;
1825
1826     /* Mark end of line for $ (and such) */
1827     PL_regeol = strend;
1828
1829     /* see how far we have to get to not match where we matched before */
1830     reginfo.till = startpos+minend;
1831
1832     /* If there is a "must appear" string, look for it. */
1833     s = startpos;
1834
1835     if (prog->extflags & RXf_GPOS_SEEN) { /* Need to set reginfo->ganch */
1836         MAGIC *mg;
1837
1838         if (flags & REXEC_IGNOREPOS)    /* Means: check only at start */
1839             reginfo.ganch = startpos + prog->gofs;
1840         else if (sv && SvTYPE(sv) >= SVt_PVMG
1841                   && SvMAGIC(sv)
1842                   && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1843                   && mg->mg_len >= 0) {
1844             reginfo.ganch = strbeg + mg->mg_len;        /* Defined pos() */
1845             if (prog->extflags & RXf_ANCH_GPOS) {
1846                 if (s > reginfo.ganch)
1847                     goto phooey;
1848                 s = reginfo.ganch - prog->gofs;
1849             }
1850         }
1851         else if (data) {
1852             reginfo.ganch = strbeg + PTR2UV(data);
1853         } else                          /* pos() not defined */
1854             reginfo.ganch = strbeg;
1855     }
1856     if (PL_curpm && (PM_GETRE(PL_curpm) == rx)) {
1857         swap_on_fail = 1;
1858         swap_match_buff(prog); /* do we need a save destructor here for
1859                                   eval dies? */
1860     }
1861     if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL || prog->check_utf8 != NULL)) {
1862         re_scream_pos_data d;
1863
1864         d.scream_olds = &scream_olds;
1865         d.scream_pos = &scream_pos;
1866         s = re_intuit_start(rx, sv, s, strend, flags, &d);
1867         if (!s) {
1868             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
1869             goto phooey;        /* not present */
1870         }
1871     }
1872
1873
1874
1875     /* Simplest case:  anchored match need be tried only once. */
1876     /*  [unless only anchor is BOL and multiline is set] */
1877     if (prog->extflags & (RXf_ANCH & ~RXf_ANCH_GPOS)) {
1878         if (s == startpos && regtry(&reginfo, &startpos))
1879             goto got_it;
1880         else if (multiline || (prog->intflags & PREGf_IMPLICIT)
1881                  || (prog->extflags & RXf_ANCH_MBOL)) /* XXXX SBOL? */
1882         {
1883             char *end;
1884
1885             if (minlen)
1886                 dontbother = minlen - 1;
1887             end = HOP3c(strend, -dontbother, strbeg) - 1;
1888             /* for multiline we only have to try after newlines */
1889             if (prog->check_substr || prog->check_utf8) {
1890                 if (s == startpos)
1891                     goto after_try;
1892                 while (1) {
1893                     if (regtry(&reginfo, &s))
1894                         goto got_it;
1895                   after_try:
1896                     if (s > end)
1897                         goto phooey;
1898                     if (prog->extflags & RXf_USE_INTUIT) {
1899                         s = re_intuit_start(rx, sv, s + 1, strend, flags, NULL);
1900                         if (!s)
1901                             goto phooey;
1902                     }
1903                     else
1904                         s++;
1905                 }
1906             } else {
1907                 if (s > startpos)
1908                     s--;
1909                 while (s < end) {
1910                     if (*s++ == '\n') { /* don't need PL_utf8skip here */
1911                         if (regtry(&reginfo, &s))
1912                             goto got_it;
1913                     }
1914                 }
1915             }
1916         }
1917         goto phooey;
1918     } else if (RXf_GPOS_CHECK == (prog->extflags & RXf_GPOS_CHECK))
1919     {
1920         /* the warning about reginfo.ganch being used without intialization
1921            is bogus -- we set it above, when prog->extflags & RXf_GPOS_SEEN
1922            and we only enter this block when the same bit is set. */
1923         char *tmp_s = reginfo.ganch - prog->gofs;
1924         if (regtry(&reginfo, &tmp_s))
1925             goto got_it;
1926         goto phooey;
1927     }
1928
1929     /* Messy cases:  unanchored match. */
1930     if ((prog->anchored_substr || prog->anchored_utf8) && prog->intflags & PREGf_SKIP) {
1931         /* we have /x+whatever/ */
1932         /* it must be a one character string (XXXX Except UTF?) */
1933         char ch;
1934 #ifdef DEBUGGING
1935         int did_match = 0;
1936 #endif
1937         if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1938             do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1939         ch = SvPVX_const(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
1940
1941         if (do_utf8) {
1942             REXEC_FBC_SCAN(
1943                 if (*s == ch) {
1944                     DEBUG_EXECUTE_r( did_match = 1 );
1945                     if (regtry(&reginfo, &s)) goto got_it;
1946                     s += UTF8SKIP(s);
1947                     while (s < strend && *s == ch)
1948                         s += UTF8SKIP(s);
1949                 }
1950             );
1951         }
1952         else {
1953             REXEC_FBC_SCAN(
1954                 if (*s == ch) {
1955                     DEBUG_EXECUTE_r( did_match = 1 );
1956                     if (regtry(&reginfo, &s)) goto got_it;
1957                     s++;
1958                     while (s < strend && *s == ch)
1959                         s++;
1960                 }
1961             );
1962         }
1963         DEBUG_EXECUTE_r(if (!did_match)
1964                 PerlIO_printf(Perl_debug_log,
1965                                   "Did not find anchored character...\n")
1966                );
1967     }
1968     else if (prog->anchored_substr != NULL
1969               || prog->anchored_utf8 != NULL
1970               || ((prog->float_substr != NULL || prog->float_utf8 != NULL)
1971                   && prog->float_max_offset < strend - s)) {
1972         SV *must;
1973         I32 back_max;
1974         I32 back_min;
1975         char *last;
1976         char *last1;            /* Last position checked before */
1977 #ifdef DEBUGGING
1978         int did_match = 0;
1979 #endif
1980         if (prog->anchored_substr || prog->anchored_utf8) {
1981             if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1982                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1983             must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
1984             back_max = back_min = prog->anchored_offset;
1985         } else {
1986             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1987                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1988             must = do_utf8 ? prog->float_utf8 : prog->float_substr;
1989             back_max = prog->float_max_offset;
1990             back_min = prog->float_min_offset;
1991         }
1992
1993
1994         if (must == &PL_sv_undef)
1995             /* could not downgrade utf8 check substring, so must fail */
1996             goto phooey;
1997
1998         if (back_min<0) {
1999             last = strend;
2000         } else {
2001             last = HOP3c(strend,        /* Cannot start after this */
2002                   -(I32)(CHR_SVLEN(must)
2003                          - (SvTAIL(must) != 0) + back_min), strbeg);
2004         }
2005         if (s > PL_bostr)
2006             last1 = HOPc(s, -1);
2007         else
2008             last1 = s - 1;      /* bogus */
2009
2010         /* XXXX check_substr already used to find "s", can optimize if
2011            check_substr==must. */
2012         scream_pos = -1;
2013         dontbother = end_shift;
2014         strend = HOPc(strend, -dontbother);
2015         while ( (s <= last) &&
2016                 ((flags & REXEC_SCREAM)
2017                  ? (s = screaminstr(sv, must, HOP3c(s, back_min, (back_min<0 ? strbeg : strend)) - strbeg,
2018                                     end_shift, &scream_pos, 0))
2019                  : (s = fbm_instr((unsigned char*)HOP3(s, back_min, (back_min<0 ? strbeg : strend)),
2020                                   (unsigned char*)strend, must,
2021                                   multiline ? FBMrf_MULTILINE : 0))) ) {
2022             /* we may be pointing at the wrong string */
2023             if ((flags & REXEC_SCREAM) && RXp_MATCH_COPIED(prog))
2024                 s = strbeg + (s - SvPVX_const(sv));
2025             DEBUG_EXECUTE_r( did_match = 1 );
2026             if (HOPc(s, -back_max) > last1) {
2027                 last1 = HOPc(s, -back_min);
2028                 s = HOPc(s, -back_max);
2029             }
2030             else {
2031                 char * const t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
2032
2033                 last1 = HOPc(s, -back_min);
2034                 s = t;
2035             }
2036             if (do_utf8) {
2037                 while (s <= last1) {
2038                     if (regtry(&reginfo, &s))
2039                         goto got_it;
2040                     s += UTF8SKIP(s);
2041                 }
2042             }
2043             else {
2044                 while (s <= last1) {
2045                     if (regtry(&reginfo, &s))
2046                         goto got_it;
2047                     s++;
2048                 }
2049             }
2050         }
2051         DEBUG_EXECUTE_r(if (!did_match) {
2052             RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
2053                 SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
2054             PerlIO_printf(Perl_debug_log, "Did not find %s substr %s%s...\n",
2055                               ((must == prog->anchored_substr || must == prog->anchored_utf8)
2056                                ? "anchored" : "floating"),
2057                 quoted, RE_SV_TAIL(must));
2058         });
2059         goto phooey;
2060     }
2061     else if ( (c = progi->regstclass) ) {
2062         if (minlen) {
2063             const OPCODE op = OP(progi->regstclass);
2064             /* don't bother with what can't match */
2065             if (PL_regkind[op] != EXACT && op != CANY && PL_regkind[op] != TRIE)
2066                 strend = HOPc(strend, -(minlen - 1));
2067         }
2068         DEBUG_EXECUTE_r({
2069             SV * const prop = sv_newmortal();
2070             regprop(prog, prop, c);
2071             {
2072                 RE_PV_QUOTED_DECL(quoted,do_utf8,PERL_DEBUG_PAD_ZERO(1),
2073                     s,strend-s,60);
2074                 PerlIO_printf(Perl_debug_log,
2075                     "Matching stclass %.*s against %s (%d chars)\n",
2076                     (int)SvCUR(prop), SvPVX_const(prop),
2077                      quoted, (int)(strend - s));
2078             }
2079         });
2080         if (find_byclass(prog, c, s, strend, &reginfo))
2081             goto got_it;
2082         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass... [regexec_flags]\n"));
2083     }
2084     else {
2085         dontbother = 0;
2086         if (prog->float_substr != NULL || prog->float_utf8 != NULL) {
2087             /* Trim the end. */
2088             char *last;
2089             SV* float_real;
2090
2091             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
2092                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
2093             float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
2094
2095             if (flags & REXEC_SCREAM) {
2096                 last = screaminstr(sv, float_real, s - strbeg,
2097                                    end_shift, &scream_pos, 1); /* last one */
2098                 if (!last)
2099                     last = scream_olds; /* Only one occurrence. */
2100                 /* we may be pointing at the wrong string */
2101                 else if (RXp_MATCH_COPIED(prog))
2102                     s = strbeg + (s - SvPVX_const(sv));
2103             }
2104             else {
2105                 STRLEN len;
2106                 const char * const little = SvPV_const(float_real, len);
2107
2108                 if (SvTAIL(float_real)) {
2109                     if (memEQ(strend - len + 1, little, len - 1))
2110                         last = strend - len + 1;
2111                     else if (!multiline)
2112                         last = memEQ(strend - len, little, len)
2113                             ? strend - len : NULL;
2114                     else
2115                         goto find_last;
2116                 } else {
2117                   find_last:
2118                     if (len)
2119                         last = rninstr(s, strend, little, little + len);
2120                     else
2121                         last = strend;  /* matching "$" */
2122                 }
2123             }
2124             if (last == NULL) {
2125                 DEBUG_EXECUTE_r(
2126                     PerlIO_printf(Perl_debug_log,
2127                         "%sCan't trim the tail, match fails (should not happen)%s\n",
2128                         PL_colors[4], PL_colors[5]));
2129                 goto phooey; /* Should not happen! */
2130             }
2131             dontbother = strend - last + prog->float_min_offset;
2132         }
2133         if (minlen && (dontbother < minlen))
2134             dontbother = minlen - 1;
2135         strend -= dontbother;              /* this one's always in bytes! */
2136         /* We don't know much -- general case. */
2137         if (do_utf8) {
2138             for (;;) {
2139                 if (regtry(&reginfo, &s))
2140                     goto got_it;
2141                 if (s >= strend)
2142                     break;
2143                 s += UTF8SKIP(s);
2144             };
2145         }
2146         else {
2147             do {
2148                 if (regtry(&reginfo, &s))
2149                     goto got_it;
2150             } while (s++ < strend);
2151         }
2152     }
2153
2154     /* Failure. */
2155     goto phooey;
2156
2157 got_it:
2158     RX_MATCH_TAINTED_set(rx, PL_reg_flags & RF_tainted);
2159
2160     if (PL_reg_eval_set)
2161         restore_pos(aTHX_ prog);
2162     if (RXp_PAREN_NAMES(prog))
2163         (void)hv_iterinit(RXp_PAREN_NAMES(prog));
2164
2165     /* make sure $`, $&, $', and $digit will work later */
2166     if ( !(flags & REXEC_NOT_FIRST) ) {
2167         RX_MATCH_COPY_FREE(rx);
2168         if (flags & REXEC_COPY_STR) {
2169             const I32 i = PL_regeol - startpos + (stringarg - strbeg);
2170 #ifdef PERL_OLD_COPY_ON_WRITE
2171             if ((SvIsCOW(sv)
2172                  || (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
2173                 if (DEBUG_C_TEST) {
2174                     PerlIO_printf(Perl_debug_log,
2175                                   "Copy on write: regexp capture, type %d\n",
2176                                   (int) SvTYPE(sv));
2177                 }
2178                 prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
2179                 prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
2180                 assert (SvPOKp(prog->saved_copy));
2181             } else
2182 #endif
2183             {
2184                 RX_MATCH_COPIED_on(rx);
2185                 s = savepvn(strbeg, i);
2186                 prog->subbeg = s;
2187             }
2188             prog->sublen = i;
2189         }
2190         else {
2191             prog->subbeg = strbeg;
2192             prog->sublen = PL_regeol - strbeg;  /* strend may have been modified */
2193         }
2194     }
2195
2196     return 1;
2197
2198 phooey:
2199     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
2200                           PL_colors[4], PL_colors[5]));
2201     if (PL_reg_eval_set)
2202         restore_pos(aTHX_ prog);
2203     if (swap_on_fail)
2204         /* we failed :-( roll it back */
2205         swap_match_buff(prog);
2206
2207     return 0;
2208 }
2209
2210
2211 /*
2212  - regtry - try match at specific point
2213  */
2214 STATIC I32                      /* 0 failure, 1 success */
2215 S_regtry(pTHX_ regmatch_info *reginfo, char **startpos)
2216 {
2217     dVAR;
2218     CHECKPOINT lastcp;
2219     REGEXP *const rx = reginfo->prog;
2220     regexp *const prog = (struct regexp *)SvANY(rx);
2221     RXi_GET_DECL(prog,progi);
2222     GET_RE_DEBUG_FLAGS_DECL;
2223
2224     PERL_ARGS_ASSERT_REGTRY;
2225
2226     reginfo->cutpoint=NULL;
2227
2228     if ((prog->extflags & RXf_EVAL_SEEN) && !PL_reg_eval_set) {
2229         MAGIC *mg;
2230
2231         PL_reg_eval_set = RS_init;
2232         DEBUG_EXECUTE_r(DEBUG_s(
2233             PerlIO_printf(Perl_debug_log, "  setting stack tmpbase at %"IVdf"\n",
2234                           (IV)(PL_stack_sp - PL_stack_base));
2235             ));
2236         SAVESTACK_CXPOS();
2237         cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2238         /* Otherwise OP_NEXTSTATE will free whatever on stack now.  */
2239         SAVETMPS;
2240         /* Apparently this is not needed, judging by wantarray. */
2241         /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
2242            cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2243
2244         if (reginfo->sv) {
2245             /* Make $_ available to executed code. */
2246             if (reginfo->sv != DEFSV) {
2247                 SAVE_DEFSV;
2248                 DEFSV = reginfo->sv;
2249             }
2250
2251             if (!(SvTYPE(reginfo->sv) >= SVt_PVMG && SvMAGIC(reginfo->sv)
2252                   && (mg = mg_find(reginfo->sv, PERL_MAGIC_regex_global)))) {
2253                 /* prepare for quick setting of pos */
2254 #ifdef PERL_OLD_COPY_ON_WRITE
2255                 if (SvIsCOW(reginfo->sv))
2256                     sv_force_normal_flags(reginfo->sv, 0);
2257 #endif
2258                 mg = sv_magicext(reginfo->sv, NULL, PERL_MAGIC_regex_global,
2259                                  &PL_vtbl_mglob, NULL, 0);
2260                 mg->mg_len = -1;
2261             }
2262             PL_reg_magic    = mg;
2263             PL_reg_oldpos   = mg->mg_len;
2264             SAVEDESTRUCTOR_X(restore_pos, prog);
2265         }
2266         if (!PL_reg_curpm) {
2267             Newxz(PL_reg_curpm, 1, PMOP);
2268 #ifdef USE_ITHREADS
2269             {
2270                 SV* const repointer = &PL_sv_undef;
2271                 /* this regexp is also owned by the new PL_reg_curpm, which
2272                    will try to free it.  */
2273                 av_push(PL_regex_padav, repointer);
2274                 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2275                 PL_regex_pad = AvARRAY(PL_regex_padav);
2276             }
2277 #endif
2278         }
2279 #ifdef USE_ITHREADS
2280         /* It seems that non-ithreads works both with and without this code.
2281            So for efficiency reasons it seems best not to have the code
2282            compiled when it is not needed.  */
2283         /* This is safe against NULLs: */
2284         ReREFCNT_dec(PM_GETRE(PL_reg_curpm));
2285         /* PM_reg_curpm owns a reference to this regexp.  */
2286         ReREFCNT_inc(rx);
2287 #endif
2288         PM_SETRE(PL_reg_curpm, rx);
2289         PL_reg_oldcurpm = PL_curpm;
2290         PL_curpm = PL_reg_curpm;
2291         if (RXp_MATCH_COPIED(prog)) {
2292             /*  Here is a serious problem: we cannot rewrite subbeg,
2293                 since it may be needed if this match fails.  Thus
2294                 $` inside (?{}) could fail... */
2295             PL_reg_oldsaved = prog->subbeg;
2296             PL_reg_oldsavedlen = prog->sublen;
2297 #ifdef PERL_OLD_COPY_ON_WRITE
2298             PL_nrs = prog->saved_copy;
2299 #endif
2300             RXp_MATCH_COPIED_off(prog);
2301         }
2302         else
2303             PL_reg_oldsaved = NULL;
2304         prog->subbeg = PL_bostr;
2305         prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2306     }
2307     DEBUG_EXECUTE_r(PL_reg_starttry = *startpos);
2308     prog->offs[0].start = *startpos - PL_bostr;
2309     PL_reginput = *startpos;
2310     PL_reglastparen = &prog->lastparen;
2311     PL_reglastcloseparen = &prog->lastcloseparen;
2312     prog->lastparen = 0;
2313     prog->lastcloseparen = 0;
2314     PL_regsize = 0;
2315     PL_regoffs = prog->offs;
2316     if (PL_reg_start_tmpl <= prog->nparens) {
2317         PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2318         if(PL_reg_start_tmp)
2319             Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2320         else
2321             Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2322     }
2323
2324     /* XXXX What this code is doing here?!!!  There should be no need
2325        to do this again and again, PL_reglastparen should take care of
2326        this!  --ilya*/
2327
2328     /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2329      * Actually, the code in regcppop() (which Ilya may be meaning by
2330      * PL_reglastparen), is not needed at all by the test suite
2331      * (op/regexp, op/pat, op/split), but that code is needed otherwise
2332      * this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
2333      * Meanwhile, this code *is* needed for the
2334      * above-mentioned test suite tests to succeed.  The common theme
2335      * on those tests seems to be returning null fields from matches.
2336      * --jhi updated by dapm */
2337 #if 1
2338     if (prog->nparens) {
2339         regexp_paren_pair *pp = PL_regoffs;
2340         register I32 i;
2341         for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
2342             ++pp;
2343             pp->start = -1;
2344             pp->end = -1;
2345         }
2346     }
2347 #endif
2348     REGCP_SET(lastcp);
2349     if (regmatch(reginfo, progi->program + 1)) {
2350         PL_regoffs[0].end = PL_reginput - PL_bostr;
2351         return 1;
2352     }
2353     if (reginfo->cutpoint)
2354         *startpos= reginfo->cutpoint;
2355     REGCP_UNWIND(lastcp);
2356     return 0;
2357 }
2358
2359
2360 #define sayYES goto yes
2361 #define sayNO goto no
2362 #define sayNO_SILENT goto no_silent
2363
2364 /* we dont use STMT_START/END here because it leads to
2365    "unreachable code" warnings, which are bogus, but distracting. */
2366 #define CACHEsayNO \
2367     if (ST.cache_mask) \
2368        PL_reg_poscache[ST.cache_offset] |= ST.cache_mask; \
2369     sayNO
2370
2371 /* this is used to determine how far from the left messages like
2372    'failed...' are printed. It should be set such that messages
2373    are inline with the regop output that created them.
2374 */
2375 #define REPORT_CODE_OFF 32
2376
2377
2378 /* Make sure there is a test for this +1 options in re_tests */
2379 #define TRIE_INITAL_ACCEPT_BUFFLEN 4;
2380
2381 #define CHRTEST_UNINIT -1001 /* c1/c2 haven't been calculated yet */
2382 #define CHRTEST_VOID   -1000 /* the c1/c2 "next char" test should be skipped */
2383
2384 #define SLAB_FIRST(s) (&(s)->states[0])
2385 #define SLAB_LAST(s)  (&(s)->states[PERL_REGMATCH_SLAB_SLOTS-1])
2386
2387 /* grab a new slab and return the first slot in it */
2388
2389 STATIC regmatch_state *
2390 S_push_slab(pTHX)
2391 {
2392 #if PERL_VERSION < 9 && !defined(PERL_CORE)
2393     dMY_CXT;
2394 #endif
2395     regmatch_slab *s = PL_regmatch_slab->next;
2396     if (!s) {
2397         Newx(s, 1, regmatch_slab);
2398         s->prev = PL_regmatch_slab;
2399         s->next = NULL;
2400         PL_regmatch_slab->next = s;
2401     }
2402     PL_regmatch_slab = s;
2403     return SLAB_FIRST(s);
2404 }
2405
2406
2407 /* push a new state then goto it */
2408
2409 #define PUSH_STATE_GOTO(state, node) \
2410     scan = node; \
2411     st->resume_state = state; \
2412     goto push_state;
2413
2414 /* push a new state with success backtracking, then goto it */
2415
2416 #define PUSH_YES_STATE_GOTO(state, node) \
2417     scan = node; \
2418     st->resume_state = state; \
2419     goto push_yes_state;
2420
2421
2422
2423 /*
2424
2425 regmatch() - main matching routine
2426
2427 This is basically one big switch statement in a loop. We execute an op,
2428 set 'next' to point the next op, and continue. If we come to a point which
2429 we may need to backtrack to on failure such as (A|B|C), we push a
2430 backtrack state onto the backtrack stack. On failure, we pop the top
2431 state, and re-enter the loop at the state indicated. If there are no more
2432 states to pop, we return failure.
2433
2434 Sometimes we also need to backtrack on success; for example /A+/, where
2435 after successfully matching one A, we need to go back and try to
2436 match another one; similarly for lookahead assertions: if the assertion
2437 completes successfully, we backtrack to the state just before the assertion
2438 and then carry on.  In these cases, the pushed state is marked as
2439 'backtrack on success too'. This marking is in fact done by a chain of
2440 pointers, each pointing to the previous 'yes' state. On success, we pop to
2441 the nearest yes state, discarding any intermediate failure-only states.
2442 Sometimes a yes state is pushed just to force some cleanup code to be
2443 called at the end of a successful match or submatch; e.g. (??{$re}) uses
2444 it to free the inner regex.
2445
2446 Note that failure backtracking rewinds the cursor position, while
2447 success backtracking leaves it alone.
2448
2449 A pattern is complete when the END op is executed, while a subpattern
2450 such as (?=foo) is complete when the SUCCESS op is executed. Both of these
2451 ops trigger the "pop to last yes state if any, otherwise return true"
2452 behaviour.
2453
2454 A common convention in this function is to use A and B to refer to the two
2455 subpatterns (or to the first nodes thereof) in patterns like /A*B/: so A is
2456 the subpattern to be matched possibly multiple times, while B is the entire
2457 rest of the pattern. Variable and state names reflect this convention.
2458
2459 The states in the main switch are the union of ops and failure/success of
2460 substates associated with with that op.  For example, IFMATCH is the op
2461 that does lookahead assertions /(?=A)B/ and so the IFMATCH state means
2462 'execute IFMATCH'; while IFMATCH_A is a state saying that we have just
2463 successfully matched A and IFMATCH_A_fail is a state saying that we have
2464 just failed to match A. Resume states always come in pairs. The backtrack
2465 state we push is marked as 'IFMATCH_A', but when that is popped, we resume
2466 at IFMATCH_A or IFMATCH_A_fail, depending on whether we are backtracking
2467 on success or failure.
2468
2469 The struct that holds a backtracking state is actually a big union, with
2470 one variant for each major type of op. The variable st points to the
2471 top-most backtrack struct. To make the code clearer, within each
2472 block of code we #define ST to alias the relevant union.
2473
2474 Here's a concrete example of a (vastly oversimplified) IFMATCH
2475 implementation:
2476
2477     switch (state) {
2478     ....
2479
2480 #define ST st->u.ifmatch
2481
2482     case IFMATCH: // we are executing the IFMATCH op, (?=A)B
2483         ST.foo = ...; // some state we wish to save
2484         ...
2485         // push a yes backtrack state with a resume value of
2486         // IFMATCH_A/IFMATCH_A_fail, then continue execution at the
2487         // first node of A:
2488         PUSH_YES_STATE_GOTO(IFMATCH_A, A);
2489         // NOTREACHED
2490
2491     case IFMATCH_A: // we have successfully executed A; now continue with B
2492         next = B;
2493         bar = ST.foo; // do something with the preserved value
2494         break;
2495
2496     case IFMATCH_A_fail: // A failed, so the assertion failed
2497         ...;   // do some housekeeping, then ...
2498         sayNO; // propagate the failure
2499
2500 #undef ST
2501
2502     ...
2503     }
2504
2505 For any old-timers reading this who are familiar with the old recursive
2506 approach, the code above is equivalent to:
2507
2508     case IFMATCH: // we are executing the IFMATCH op, (?=A)B
2509     {
2510         int foo = ...
2511         ...
2512         if (regmatch(A)) {
2513             next = B;
2514             bar = foo;
2515             break;
2516         }
2517         ...;   // do some housekeeping, then ...
2518         sayNO; // propagate the failure
2519     }
2520
2521 The topmost backtrack state, pointed to by st, is usually free. If you
2522 want to claim it, populate any ST.foo fields in it with values you wish to
2523 save, then do one of
2524
2525         PUSH_STATE_GOTO(resume_state, node);
2526         PUSH_YES_STATE_GOTO(resume_state, node);
2527
2528 which sets that backtrack state's resume value to 'resume_state', pushes a
2529 new free entry to the top of the backtrack stack, then goes to 'node'.
2530 On backtracking, the free slot is popped, and the saved state becomes the
2531 new free state. An ST.foo field in this new top state can be temporarily
2532 accessed to retrieve values, but once the main loop is re-entered, it
2533 becomes available for reuse.
2534
2535 Note that the depth of the backtrack stack constantly increases during the
2536 left-to-right execution of the pattern, rather than going up and down with
2537 the pattern nesting. For example the stack is at its maximum at Z at the
2538 end of the pattern, rather than at X in the following:
2539
2540     /(((X)+)+)+....(Y)+....Z/
2541
2542 The only exceptions to this are lookahead/behind assertions and the cut,
2543 (?>A), which pop all the backtrack states associated with A before
2544 continuing.
2545
2546 Bascktrack state structs are allocated in slabs of about 4K in size.
2547 PL_regmatch_state and st always point to the currently active state,
2548 and PL_regmatch_slab points to the slab currently containing
2549 PL_regmatch_state.  The first time regmatch() is called, the first slab is
2550 allocated, and is never freed until interpreter destruction. When the slab
2551 is full, a new one is allocated and chained to the end. At exit from
2552 regmatch(), slabs allocated since entry are freed.
2553
2554 */
2555
2556
2557 #define DEBUG_STATE_pp(pp)                                  \
2558     DEBUG_STATE_r({                                         \
2559         DUMP_EXEC_POS(locinput, scan, do_utf8);             \
2560         PerlIO_printf(Perl_debug_log,                       \
2561             "    %*s"pp" %s%s%s%s%s\n",                     \
2562             depth*2, "",                                    \
2563             PL_reg_name[st->resume_state],                     \
2564             ((st==yes_state||st==mark_state) ? "[" : ""),   \
2565             ((st==yes_state) ? "Y" : ""),                   \
2566             ((st==mark_state) ? "M" : ""),                  \
2567             ((st==yes_state||st==mark_state) ? "]" : "")    \
2568         );                                                  \
2569     });
2570
2571
2572 #define REG_NODE_NUM(x) ((x) ? (int)((x)-prog) : -1)
2573
2574 #ifdef DEBUGGING
2575
2576 STATIC void
2577 S_debug_start_match(pTHX_ const REGEXP *prog, const bool do_utf8,
2578     const char *start, const char *end, const char *blurb)
2579 {
2580     const bool utf8_pat = RX_UTF8(prog) ? 1 : 0;
2581
2582     PERL_ARGS_ASSERT_DEBUG_START_MATCH;
2583
2584     if (!PL_colorset)
2585             reginitcolors();
2586     {
2587         RE_PV_QUOTED_DECL(s0, utf8_pat, PERL_DEBUG_PAD_ZERO(0),
2588             RX_PRECOMP(prog), RX_PRELEN(prog), 60);
2589
2590         RE_PV_QUOTED_DECL(s1, do_utf8, PERL_DEBUG_PAD_ZERO(1),
2591             start, end - start, 60);
2592
2593         PerlIO_printf(Perl_debug_log,
2594             "%s%s REx%s %s against %s\n",
2595                        PL_colors[4], blurb, PL_colors[5], s0, s1);
2596
2597         if (do_utf8||utf8_pat)
2598             PerlIO_printf(Perl_debug_log, "UTF-8 %s%s%s...\n",
2599                 utf8_pat ? "pattern" : "",
2600                 utf8_pat && do_utf8 ? " and " : "",
2601                 do_utf8 ? "string" : ""
2602             );
2603     }
2604 }
2605
2606 STATIC void
2607 S_dump_exec_pos(pTHX_ const char *locinput,
2608                       const regnode *scan,
2609                       const char *loc_regeol,
2610                       const char *loc_bostr,
2611                       const char *loc_reg_starttry,
2612                       const bool do_utf8)
2613 {
2614     const int docolor = *PL_colors[0] || *PL_colors[2] || *PL_colors[4];
2615     const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
2616     int l = (loc_regeol - locinput) > taill ? taill : (loc_regeol - locinput);
2617     /* The part of the string before starttry has one color
2618        (pref0_len chars), between starttry and current
2619        position another one (pref_len - pref0_len chars),
2620        after the current position the third one.
2621        We assume that pref0_len <= pref_len, otherwise we
2622        decrease pref0_len.  */
2623     int pref_len = (locinput - loc_bostr) > (5 + taill) - l
2624         ? (5 + taill) - l : locinput - loc_bostr;
2625     int pref0_len;
2626
2627     PERL_ARGS_ASSERT_DUMP_EXEC_POS;
2628
2629     while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2630         pref_len++;
2631     pref0_len = pref_len  - (locinput - loc_reg_starttry);
2632     if (l + pref_len < (5 + taill) && l < loc_regeol - locinput)
2633         l = ( loc_regeol - locinput > (5 + taill) - pref_len
2634               ? (5 + taill) - pref_len : loc_regeol - locinput);
2635     while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2636         l--;
2637     if (pref0_len < 0)
2638         pref0_len = 0;
2639     if (pref0_len > pref_len)
2640         pref0_len = pref_len;
2641     {
2642         const int is_uni = (do_utf8 && OP(scan) != CANY) ? 1 : 0;
2643
2644         RE_PV_COLOR_DECL(s0,len0,is_uni,PERL_DEBUG_PAD(0),
2645             (locinput - pref_len),pref0_len, 60, 4, 5);
2646
2647         RE_PV_COLOR_DECL(s1,len1,is_uni,PERL_DEBUG_PAD(1),
2648                     (locinput - pref_len + pref0_len),
2649                     pref_len - pref0_len, 60, 2, 3);
2650
2651         RE_PV_COLOR_DECL(s2,len2,is_uni,PERL_DEBUG_PAD(2),
2652                     locinput, loc_regeol - locinput, 10, 0, 1);
2653
2654         const STRLEN tlen=len0+len1+len2;
2655         PerlIO_printf(Perl_debug_log,
2656                     "%4"IVdf" <%.*s%.*s%s%.*s>%*s|",
2657                     (IV)(locinput - loc_bostr),
2658                     len0, s0,
2659                     len1, s1,
2660                     (docolor ? "" : "> <"),
2661                     len2, s2,
2662                     (int)(tlen > 19 ? 0 :  19 - tlen),
2663                     "");
2664     }
2665 }
2666
2667 #endif
2668
2669 /* reg_check_named_buff_matched()
2670  * Checks to see if a named buffer has matched. The data array of
2671  * buffer numbers corresponding to the buffer is expected to reside
2672  * in the regexp->data->data array in the slot stored in the ARG() of
2673  * node involved. Note that this routine doesn't actually care about the
2674  * name, that information is not preserved from compilation to execution.
2675  * Returns the index of the leftmost defined buffer with the given name
2676  * or 0 if non of the buffers matched.
2677  */
2678 STATIC I32
2679 S_reg_check_named_buff_matched(pTHX_ const regexp *rex, const regnode *scan)
2680 {
2681     I32 n;
2682     RXi_GET_DECL(rex,rexi);
2683     SV *sv_dat=(SV*)rexi->data->data[ ARG( scan ) ];
2684     I32 *nums=(I32*)SvPVX(sv_dat);
2685
2686     PERL_ARGS_ASSERT_REG_CHECK_NAMED_BUFF_MATCHED;
2687
2688     for ( n=0; n<SvIVX(sv_dat); n++ ) {
2689         if ((I32)*PL_reglastparen >= nums[n] &&
2690             PL_regoffs[nums[n]].end != -1)
2691         {
2692             return nums[n];
2693         }
2694     }
2695     return 0;
2696 }
2697
2698
2699 /* free all slabs above current one  - called during LEAVE_SCOPE */
2700
2701 STATIC void
2702 S_clear_backtrack_stack(pTHX_ void *p)
2703 {
2704     regmatch_slab *s = PL_regmatch_slab->next;
2705     PERL_UNUSED_ARG(p);
2706
2707     if (!s)
2708         return;
2709     PL_regmatch_slab->next = NULL;
2710     while (s) {
2711         regmatch_slab * const osl = s;
2712         s = s->next;
2713         Safefree(osl);
2714     }
2715 }
2716
2717
2718 #define SETREX(Re1,Re2) \
2719     if (PL_reg_eval_set) PM_SETRE((PL_reg_curpm), (Re2)); \
2720     Re1 = (Re2)
2721
2722 STATIC I32                      /* 0 failure, 1 success */
2723 S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
2724 {
2725 #if PERL_VERSION < 9 && !defined(PERL_CORE)
2726     dMY_CXT;
2727 #endif
2728     dVAR;
2729     register const bool do_utf8 = PL_reg_match_utf8;
2730     const U32 uniflags = UTF8_ALLOW_DEFAULT;
2731     REGEXP *rex_sv = reginfo->prog;
2732     regexp *rex = (struct regexp *)SvANY(rex_sv);
2733     RXi_GET_DECL(rex,rexi);
2734     I32 oldsave;
2735     /* the current state. This is a cached copy of PL_regmatch_state */
2736     register regmatch_state *st;
2737     /* cache heavy used fields of st in registers */
2738     register regnode *scan;
2739     register regnode *next;
2740     register U32 n = 0; /* general value; init to avoid compiler warning */
2741     register I32 ln = 0; /* len or last;  init to avoid compiler warning */
2742     register char *locinput = PL_reginput;
2743     register I32 nextchr;   /* is always set to UCHARAT(locinput) */
2744
2745     bool result = 0;        /* return value of S_regmatch */
2746     int depth = 0;          /* depth of backtrack stack */
2747     U32 nochange_depth = 0; /* depth of GOSUB recursion with nochange */
2748     const U32 max_nochange_depth =
2749         (3 * rex->nparens > MAX_RECURSE_EVAL_NOCHANGE_DEPTH) ?
2750         3 * rex->nparens : MAX_RECURSE_EVAL_NOCHANGE_DEPTH;
2751     regmatch_state *yes_state = NULL; /* state to pop to on success of
2752                                                             subpattern */
2753     /* mark_state piggy backs on the yes_state logic so that when we unwind
2754        the stack on success we can update the mark_state as we go */
2755     regmatch_state *mark_state = NULL; /* last mark state we have seen */
2756     regmatch_state *cur_eval = NULL; /* most recent EVAL_AB state */
2757     struct regmatch_state  *cur_curlyx = NULL; /* most recent curlyx */
2758     U32 state_num;
2759     bool no_final = 0;      /* prevent failure from backtracking? */
2760     bool do_cutgroup = 0;   /* no_final only until next branch/trie entry */
2761     char *startpoint = PL_reginput;
2762     SV *popmark = NULL;     /* are we looking for a mark? */
2763     SV *sv_commit = NULL;   /* last mark name seen in failure */
2764     SV *sv_yes_mark = NULL; /* last mark name we have seen
2765                                during a successfull match */
2766     U32 lastopen = 0;       /* last open we saw */
2767     bool has_cutgroup = RX_HAS_CUTGROUP(rex) ? 1 : 0;
2768     SV* const oreplsv = GvSV(PL_replgv);
2769     /* these three flags are set by various ops to signal information to
2770      * the very next op. They have a useful lifetime of exactly one loop
2771      * iteration, and are not preserved or restored by state pushes/pops
2772      */
2773     bool sw = 0;            /* the condition value in (?(cond)a|b) */
2774     bool minmod = 0;        /* the next "{n,m}" is a "{n,m}?" */
2775     int logical = 0;        /* the following EVAL is:
2776                                 0: (?{...})
2777                                 1: (?(?{...})X|Y)
2778                                 2: (??{...})
2779                                or the following IFMATCH/UNLESSM is:
2780                                 false: plain (?=foo)
2781                                 true:  used as a condition: (?(?=foo))
2782                             */
2783 #ifdef DEBUGGING
2784     GET_RE_DEBUG_FLAGS_DECL;
2785 #endif
2786
2787     PERL_ARGS_ASSERT_REGMATCH;
2788
2789     DEBUG_OPTIMISE_r( DEBUG_EXECUTE_r({
2790             PerlIO_printf(Perl_debug_log,"regmatch start\n");
2791     }));
2792     /* on first ever call to regmatch, allocate first slab */
2793     if (!PL_regmatch_slab) {
2794         Newx(PL_regmatch_slab, 1, regmatch_slab);
2795         PL_regmatch_slab->prev = NULL;
2796         PL_regmatch_slab->next = NULL;
2797         PL_regmatch_state = SLAB_FIRST(PL_regmatch_slab);
2798     }
2799
2800     oldsave = PL_savestack_ix;
2801     SAVEDESTRUCTOR_X(S_clear_backtrack_stack, NULL);
2802     SAVEVPTR(PL_regmatch_slab);
2803     SAVEVPTR(PL_regmatch_state);
2804
2805     /* grab next free state slot */
2806     st = ++PL_regmatch_state;
2807     if (st >  SLAB_LAST(PL_regmatch_slab))
2808         st = PL_regmatch_state = S_push_slab(aTHX);
2809
2810     /* Note that nextchr is a byte even in UTF */
2811     nextchr = UCHARAT(locinput);
2812     scan = prog;
2813     while (scan != NULL) {
2814
2815         DEBUG_EXECUTE_r( {
2816             SV * const prop = sv_newmortal();
2817             regnode *rnext=regnext(scan);
2818             DUMP_EXEC_POS( locinput, scan, do_utf8 );
2819             regprop(rex, prop, scan);
2820
2821             PerlIO_printf(Perl_debug_log,
2822                     "%3"IVdf":%*s%s(%"IVdf")\n",
2823                     (IV)(scan - rexi->program), depth*2, "",
2824                     SvPVX_const(prop),
2825                     (PL_regkind[OP(scan)] == END || !rnext) ?
2826                         0 : (IV)(rnext - rexi->program));
2827         });
2828
2829         next = scan + NEXT_OFF(scan);
2830         if (next == scan)
2831             next = NULL;
2832         state_num = OP(scan);
2833
2834       reenter_switch:
2835         switch (state_num) {
2836         case BOL:
2837             if (locinput == PL_bostr)
2838             {
2839                 /* reginfo->till = reginfo->bol; */
2840                 break;
2841             }
2842             sayNO;
2843         case MBOL:
2844             if (locinput == PL_bostr ||
2845                 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
2846             {
2847                 break;
2848             }
2849             sayNO;
2850         case SBOL:
2851             if (locinput == PL_bostr)
2852                 break;
2853             sayNO;
2854         case GPOS:
2855             if (locinput == reginfo->ganch)
2856                 break;
2857             sayNO;
2858
2859         case KEEPS:
2860             /* update the startpoint */
2861             st->u.keeper.val = PL_regoffs[0].start;
2862             PL_reginput = locinput;
2863             PL_regoffs[0].start = locinput - PL_bostr;
2864             PUSH_STATE_GOTO(KEEPS_next, next);
2865             /*NOT-REACHED*/
2866         case KEEPS_next_fail:
2867             /* rollback the start point change */
2868             PL_regoffs[0].start = st->u.keeper.val;
2869             sayNO_SILENT;
2870             /*NOT-REACHED*/
2871         case EOL:
2872                 goto seol;
2873         case MEOL:
2874             if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2875                 sayNO;
2876             break;
2877         case SEOL:
2878           seol:
2879             if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2880                 sayNO;
2881             if (PL_regeol - locinput > 1)
2882                 sayNO;
2883             break;
2884         case EOS:
2885             if (PL_regeol != locinput)
2886                 sayNO;
2887             break;
2888         case SANY:
2889             if (!nextchr && locinput >= PL_regeol)
2890                 sayNO;
2891             if (do_utf8) {
2892                 locinput += PL_utf8skip[nextchr];
2893                 if (locinput > PL_regeol)
2894                     sayNO;
2895                 nextchr = UCHARAT(locinput);
2896             }
2897             else
2898                 nextchr = UCHARAT(++locinput);
2899             break;
2900         case CANY:
2901             if (!nextchr && locinput >= PL_regeol)
2902                 sayNO;
2903             nextchr = UCHARAT(++locinput);
2904             break;
2905         case REG_ANY:
2906             if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2907                 sayNO;
2908             if (do_utf8) {
2909                 locinput += PL_utf8skip[nextchr];
2910                 if (locinput > PL_regeol)
2911                     sayNO;
2912                 nextchr = UCHARAT(locinput);
2913             }
2914             else
2915                 nextchr = UCHARAT(++locinput);
2916             break;
2917
2918 #undef  ST
2919 #define ST st->u.trie
2920         case TRIEC:
2921             /* In this case the charclass data is available inline so
2922                we can fail fast without a lot of extra overhead.
2923              */
2924             if (scan->flags == EXACT || !do_utf8) {
2925                 if(!ANYOF_BITMAP_TEST(scan, *locinput)) {
2926                     DEBUG_EXECUTE_r(
2927                         PerlIO_printf(Perl_debug_log,
2928                                   "%*s  %sfailed to match trie start class...%s\n",
2929                                   REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
2930                     );
2931                     sayNO_SILENT;
2932                     /* NOTREACHED */
2933                 }
2934             }
2935             /* FALL THROUGH */
2936         case TRIE:
2937             {
2938                 /* what type of TRIE am I? (utf8 makes this contextual) */
2939                 DECL_TRIE_TYPE(scan);
2940
2941                 /* what trie are we using right now */
2942                 reg_trie_data * const trie
2943                     = (reg_trie_data*)rexi->data->data[ ARG( scan ) ];
2944                 HV * widecharmap = (HV *)rexi->data->data[ ARG( scan ) + 1 ];
2945                 U32 state = trie->startstate;
2946
2947                 if (trie->bitmap && trie_type != trie_utf8_fold &&
2948                     !TRIE_BITMAP_TEST(trie,*locinput)
2949                 ) {
2950                     if (trie->states[ state ].wordnum) {
2951                          DEBUG_EXECUTE_r(
2952                             PerlIO_printf(Perl_debug_log,
2953                                           "%*s  %smatched empty string...%s\n",
2954                                           REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
2955                         );
2956                         break;
2957                     } else {
2958                         DEBUG_EXECUTE_r(
2959                             PerlIO_printf(Perl_debug_log,
2960                                           "%*s  %sfailed to match trie start class...%s\n",
2961                                           REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
2962                         );
2963                         sayNO_SILENT;
2964                    }
2965                 }
2966
2967             {
2968                 U8 *uc = ( U8* )locinput;
2969
2970                 STRLEN len = 0;
2971                 STRLEN foldlen = 0;
2972                 U8 *uscan = (U8*)NULL;
2973                 STRLEN bufflen=0;
2974                 SV *sv_accept_buff = NULL;
2975                 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
2976
2977                 ST.accepted = 0; /* how many accepting states we have seen */
2978                 ST.B = next;
2979                 ST.jump = trie->jump;
2980                 ST.me = scan;
2981                 /*
2982                    traverse the TRIE keeping track of all accepting states
2983                    we transition through until we get to a failing node.
2984                 */
2985
2986                 while ( state && uc <= (U8*)PL_regeol ) {
2987                     U32 base = trie->states[ state ].trans.base;
2988                     UV uvc = 0;
2989                     U16 charid;
2990                     /* We use charid to hold the wordnum as we don't use it
2991                        for charid until after we have done the wordnum logic.
2992                        We define an alias just so that the wordnum logic reads
2993                        more naturally. */
2994
2995 #define got_wordnum charid
2996                     got_wordnum = trie->states[ state ].wordnum;
2997
2998                     if ( got_wordnum ) {
2999                         if ( ! ST.accepted ) {
3000                             ENTER;
3001                             /* SAVETMPS; */ /* XXX is this necessary? dmq */
3002                             bufflen = TRIE_INITAL_ACCEPT_BUFFLEN;
3003                             sv_accept_buff=newSV(bufflen *
3004                                             sizeof(reg_trie_accepted) - 1);
3005                             SvCUR_set(sv_accept_buff, 0);
3006                             SvPOK_on(sv_accept_buff);
3007                             sv_2mortal(sv_accept_buff);
3008                             SAVETMPS;
3009                             ST.accept_buff =
3010                                 (reg_trie_accepted*)SvPV_nolen(sv_accept_buff );
3011                         }
3012                         do {
3013                             if (ST.accepted >= bufflen) {
3014                                 bufflen *= 2;
3015                                 ST.accept_buff =(reg_trie_accepted*)
3016                                     SvGROW(sv_accept_buff,
3017                                         bufflen * sizeof(reg_trie_accepted));
3018                             }
3019                             SvCUR_set(sv_accept_buff,SvCUR(sv_accept_buff)
3020                                 + sizeof(reg_trie_accepted));
3021
3022
3023                             ST.accept_buff[ST.accepted].wordnum = got_wordnum;
3024                             ST.accept_buff[ST.accepted].endpos = uc;
3025                             ++ST.accepted;
3026                         } while (trie->nextword && (got_wordnum= trie->nextword[got_wordnum]));
3027                     }
3028 #undef got_wordnum
3029
3030                     DEBUG_TRIE_EXECUTE_r({
3031                                 DUMP_EXEC_POS( (char *)uc, scan, do_utf8 );
3032                                 PerlIO_printf( Perl_debug_log,
3033                                     "%*s  %sState: %4"UVxf" Accepted: %4"UVxf" ",
3034                                     2+depth * 2, "", PL_colors[4],
3035                                     (UV)state, (UV)ST.accepted );
3036                     });
3037
3038                     if ( base ) {
3039                         REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
3040                                              uscan, len, uvc, charid, foldlen,
3041                                              foldbuf, uniflags);
3042
3043                         if (charid &&
3044                              (base + charid > trie->uniquecharcount )
3045                              && (base + charid - 1 - trie->uniquecharcount
3046                                     < trie->lasttrans)
3047                              && trie->trans[base + charid - 1 -
3048                                     trie->uniquecharcount].check == state)
3049                         {
3050                             state = trie->trans[base + charid - 1 -
3051                                 trie->uniquecharcount ].next;
3052                         }
3053                         else {
3054                             state = 0;
3055                         }
3056                         uc += len;
3057
3058                     }
3059                     else {
3060                         state = 0;
3061                     }
3062                     DEBUG_TRIE_EXECUTE_r(
3063                         PerlIO_printf( Perl_debug_log,
3064                             "Charid:%3x CP:%4"UVxf" After State: %4"UVxf"%s\n",
3065                             charid, uvc, (UV)state, PL_colors[5] );
3066                     );
3067                 }
3068                 if (!ST.accepted )
3069                    sayNO;
3070
3071                 DEBUG_EXECUTE_r(
3072                     PerlIO_printf( Perl_debug_log,
3073                         "%*s  %sgot %"IVdf" possible matches%s\n",
3074                         REPORT_CODE_OFF + depth * 2, "",
3075                         PL_colors[4], (IV)ST.accepted, PL_colors[5] );
3076                 );
3077             }}
3078             goto trie_first_try; /* jump into the fail handler */
3079             /* NOTREACHED */
3080         case TRIE_next_fail: /* we failed - try next alterative */
3081             if ( ST.jump) {
3082                 REGCP_UNWIND(ST.cp);
3083                 for (n = *PL_reglastparen; n > ST.lastparen; n--)
3084                     PL_regoffs[n].end = -1;
3085                 *PL_reglastparen = n;
3086             }
3087           trie_first_try:
3088             if (do_cutgroup) {
3089                 do_cutgroup = 0;
3090                 no_final = 0;
3091             }
3092
3093             if ( ST.jump) {
3094                 ST.lastparen = *PL_reglastparen;
3095                 REGCP_SET(ST.cp);
3096             }
3097             if ( ST.accepted == 1 ) {
3098                 /* only one choice left - just continue */
3099                 DEBUG_EXECUTE_r({
3100                     AV *const trie_words
3101                         = (AV *) rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET];
3102                     SV ** const tmp = av_fetch( trie_words,
3103                         ST.accept_buff[ 0 ].wordnum-1, 0 );
3104                     SV *sv= tmp ? sv_newmortal() : NULL;
3105
3106                     PerlIO_printf( Perl_debug_log,
3107                         "%*s  %sonly one match left: #%d <%s>%s\n",
3108                         REPORT_CODE_OFF+depth*2, "", PL_colors[4],
3109                         ST.accept_buff[ 0 ].wordnum,
3110                         tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0,
3111                                 PL_colors[0], PL_colors[1],
3112                                 (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
3113                             )
3114                         : "not compiled under -Dr",
3115                         PL_colors[5] );
3116                 });
3117                 PL_reginput = (char *)ST.accept_buff[ 0 ].endpos;
3118                 /* in this case we free tmps/leave before we call regmatch
3119                    as we wont be using accept_buff again. */
3120
3121                 locinput = PL_reginput;
3122                 nextchr = UCHARAT(locinput);
3123                 if ( !ST.jump || !ST.jump[ST.accept_buff[0].wordnum])
3124                     scan = ST.B;
3125                 else
3126                     scan = ST.me + ST.jump[ST.accept_buff[0].wordnum];
3127                 if (!has_cutgroup) {
3128                     FREETMPS;
3129                     LEAVE;
3130                 } else {
3131                     ST.accepted--;
3132                     PUSH_YES_STATE_GOTO(TRIE_next, scan);
3133                 }
3134
3135                 continue; /* execute rest of RE */
3136             }
3137
3138             if ( !ST.accepted-- ) {
3139                 DEBUG_EXECUTE_r({
3140                     PerlIO_printf( Perl_debug_log,
3141                         "%*s  %sTRIE failed...%s\n",
3142                         REPORT_CODE_OFF+depth*2, "",
3143                         PL_colors[4],
3144                         PL_colors[5] );
3145                 });
3146                 FREETMPS;
3147                 LEAVE;
3148                 sayNO_SILENT;
3149                 /*NOTREACHED*/
3150             }
3151
3152             /*
3153                There are at least two accepting states left.  Presumably
3154                the number of accepting states is going to be low,
3155                typically two. So we simply scan through to find the one
3156                with lowest wordnum.  Once we find it, we swap the last
3157                state into its place and decrement the size. We then try to
3158                match the rest of the pattern at the point where the word
3159                ends. If we succeed, control just continues along the
3160                regex; if we fail we return here to try the next accepting
3161                state
3162              */
3163
3164             {
3165                 U32 best = 0;
3166                 U32 cur;
3167                 for( cur = 1 ; cur <= ST.accepted ; cur++ ) {
3168                     DEBUG_TRIE_EXECUTE_r(
3169                         PerlIO_printf( Perl_debug_log,
3170                             "%*s  %sgot %"IVdf" (%d) as best, looking at %"IVdf" (%d)%s\n",
3171                             REPORT_CODE_OFF + depth * 2, "", PL_colors[4],
3172                             (IV)best, ST.accept_buff[ best ].wordnum, (IV)cur,
3173                             ST.accept_buff[ cur ].wordnum, PL_colors[5] );
3174                     );
3175
3176                     if (ST.accept_buff[cur].wordnum <
3177                             ST.accept_buff[best].wordnum)
3178                         best = cur;
3179                 }
3180
3181                 DEBUG_EXECUTE_r({
3182                     AV *const trie_words
3183                         = (AV *) rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET];
3184                     SV ** const tmp = av_fetch( trie_words,
3185                         ST.accept_buff[ best ].wordnum - 1, 0 );
3186                     regnode *nextop=(!ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) ?
3187                                     ST.B :
3188                                     ST.me + ST.jump[ST.accept_buff[best].wordnum];
3189                     SV *sv= tmp ? sv_newmortal() : NULL;
3190
3191                     PerlIO_printf( Perl_debug_log,
3192                         "%*s  %strying alternation #%d <%s> at node #%d %s\n",
3193                         REPORT_CODE_OFF+depth*2, "", PL_colors[4],
3194                         ST.accept_buff[best].wordnum,
3195                         tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0,
3196                                 PL_colors[0], PL_colors[1],
3197                                 (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
3198                             ) : "not compiled under -Dr",
3199                             REG_NODE_NUM(nextop),
3200                         PL_colors[5] );
3201                 });
3202
3203                 if ( best<ST.accepted ) {
3204                     reg_trie_accepted tmp = ST.accept_buff[ best ];
3205                     ST.accept_buff[ best ] = ST.accept_buff[ ST.accepted ];
3206                     ST.accept_buff[ ST.accepted ] = tmp;
3207                     best = ST.accepted;
3208                 }
3209                 PL_reginput = (char *)ST.accept_buff[ best ].endpos;
3210                 if ( !ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) {
3211                     scan = ST.B;
3212                 } else {
3213                     scan = ST.me + ST.jump[ST.accept_buff[best].wordnum];
3214                 }
3215                 PUSH_YES_STATE_GOTO(TRIE_next, scan);
3216                 /* NOTREACHED */
3217             }
3218             /* NOTREACHED */
3219         case TRIE_next:
3220             FREETMPS;
3221             LEAVE;
3222             sayYES;
3223 #undef  ST
3224
3225         case EXACT: {
3226             char *s = STRING(scan);
3227             ln = STR_LEN(scan);
3228             if (do_utf8 != UTF) {
3229                 /* The target and the pattern have differing utf8ness. */
3230                 char *l = locinput;
3231                 const char * const e = s + ln;
3232
3233                 if (do_utf8) {
3234                     /* The target is utf8, the pattern is not utf8. */
3235                     while (s < e) {
3236                         STRLEN ulen;
3237                         if (l >= PL_regeol)
3238                              sayNO;
3239                         if (NATIVE_TO_UNI(*(U8*)s) !=
3240                             utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
3241                                             uniflags))
3242                              sayNO;
3243                         l += ulen;
3244                         s ++;
3245                     }
3246                 }
3247                 else {
3248                     /* The target is not utf8, the pattern is utf8. */
3249                     while (s < e) {
3250                         STRLEN ulen;
3251                         if (l >= PL_regeol)
3252                             sayNO;
3253                         if (NATIVE_TO_UNI(*((U8*)l)) !=
3254                             utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
3255                                            uniflags))
3256                             sayNO;
3257                         s += ulen;
3258                         l ++;
3259                     }
3260                 }
3261                 locinput = l;
3262                 nextchr = UCHARAT(locinput);
3263                 break;
3264             }
3265             /* The target and the pattern have the same utf8ness. */
3266             /* Inline the first character, for speed. */
3267             if (UCHARAT(s) != nextchr)
3268                 sayNO;
3269             if (PL_regeol - locinput < ln)
3270                 sayNO;
3271             if (ln > 1 && memNE(s, locinput, ln))
3272                 sayNO;
3273             locinput += ln;
3274             nextchr = UCHARAT(locinput);
3275             break;
3276             }
3277         case EXACTFL:
3278             PL_reg_flags |= RF_tainted;
3279             /* FALL THROUGH */
3280         case EXACTF: {
3281             char * const s = STRING(scan);
3282             ln = STR_LEN(scan);
3283
3284             if (do_utf8 || UTF) {
3285               /* Either target or the pattern are utf8. */
3286                 const char * const l = locinput;
3287                 char *e = PL_regeol;
3288
3289                 if (ibcmp_utf8(s, 0,  ln, (bool)UTF,
3290                                l, &e, 0,  do_utf8)) {
3291                      /* One more case for the sharp s:
3292                       * pack("U0U*", 0xDF) =~ /ss/i,
3293                       * the 0xC3 0x9F are the UTF-8
3294                       * byte sequence for the U+00DF. */
3295
3296                      if (!(do_utf8 &&
3297                            toLOWER(s[0]) == 's' &&
3298                            ln >= 2 &&
3299                            toLOWER(s[1]) == 's' &&
3300                            (U8)l[0] == 0xC3 &&
3301                            e - l >= 2 &&
3302                            (U8)l[1] == 0x9F))
3303                           sayNO;
3304                 }
3305                 locinput = e;
3306                 nextchr = UCHARAT(locinput);
3307                 break;
3308             }
3309
3310             /* Neither the target and the pattern are utf8. */
3311
3312             /* Inline the first character, for speed. */
3313             if (UCHARAT(s) != nextchr &&
3314                 UCHARAT(s) != ((OP(scan) == EXACTF)
3315                                ? PL_fold : PL_fold_locale)[nextchr])
3316                 sayNO;
3317             if (PL_regeol - locinput < ln)
3318                 sayNO;
3319             if (ln > 1 && (OP(scan) == EXACTF
3320                            ? ibcmp(s, locinput, ln)
3321                            : ibcmp_locale(s, locinput, ln)))
3322                 sayNO;
3323             locinput += ln;
3324             nextchr = UCHARAT(locinput);
3325             break;
3326             }
3327         case ANYOF:
3328             if (do_utf8) {
3329                 STRLEN inclasslen = PL_regeol - locinput;
3330
3331                 if (!reginclass(rex, scan, (U8*)locinput, &inclasslen, do_utf8))
3332                     goto anyof_fail;
3333                 if (locinput >= PL_regeol)
3334                     sayNO;
3335                 locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
3336                 nextchr = UCHARAT(locinput);
3337                 break;
3338             }
3339             else {
3340                 if (nextchr < 0)
3341                     nextchr = UCHARAT(locinput);
3342                 if (!REGINCLASS(rex, scan, (U8*)locinput))
3343                     goto anyof_fail;
3344                 if (!nextchr && locinput >= PL_regeol)
3345                     sayNO;
3346                 nextchr = UCHARAT(++locinput);
3347                 break;
3348             }
3349         anyof_fail:
3350             /* If we might have the case of the German sharp s
3351              * in a casefolding Unicode character class. */
3352
3353             if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
3354                  locinput += SHARP_S_SKIP;
3355                  nextchr = UCHARAT(locinput);
3356             }
3357             else
3358                  sayNO;
3359             break;
3360         case ALNUML:
3361             PL_reg_flags |= RF_tainted;
3362             /* FALL THROUGH */
3363         case ALNUM:
3364             if (!nextchr)
3365                 sayNO;
3366             if (do_utf8) {
3367                 LOAD_UTF8_CHARCLASS_ALNUM();
3368                 if (!(OP(scan) == ALNUM
3369                       ? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
3370                       : isALNUM_LC_utf8((U8*)locinput)))
3371                 {
3372                     sayNO;
3373                 }
3374                 locinput += PL_utf8skip[nextchr];
3375                 nextchr = UCHARAT(locinput);
3376                 break;
3377             }
3378             if (!(OP(scan) == ALNUM
3379                   ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
3380                 sayNO;
3381             nextchr = UCHARAT(++locinput);
3382             break;
3383         case NALNUML:
3384             PL_reg_flags |= RF_tainted;
3385             /* FALL THROUGH */
3386         case NALNUM:
3387             if (!nextchr && locinput >= PL_regeol)
3388                 sayNO;
3389             if (do_utf8) {
3390                 LOAD_UTF8_CHARCLASS_ALNUM();
3391                 if (OP(scan) == NALNUM
3392                     ? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
3393                     : isALNUM_LC_utf8((U8*)locinput))
3394                 {
3395                     sayNO;
3396                 }
3397                 locinput += PL_utf8skip[nextchr];
3398                 nextchr = UCHARAT(locinput);
3399                 break;
3400             }
3401             if (OP(scan) == NALNUM
3402                 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
3403                 sayNO;
3404             nextchr = UCHARAT(++locinput);
3405             break;
3406         case BOUNDL:
3407         case NBOUNDL:
3408             PL_reg_flags |= RF_tainted;
3409             /* FALL THROUGH */
3410         case BOUND:
3411         case NBOUND:
3412             /* was last char in word? */
3413             if (do_utf8) {
3414                 if (locinput == PL_bostr)
3415                     ln = '\n';
3416                 else {
3417                     const U8 * const r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
3418
3419                     ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, uniflags);
3420                 }
3421                 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
3422                     ln = isALNUM_uni(ln);
3423                     LOAD_UTF8_CHARCLASS_ALNUM();
3424                     n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
3425                 }
3426                 else {
3427                     ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
3428                     n = isALNUM_LC_utf8((U8*)locinput);
3429                 }
3430             }
3431             else {
3432                 ln = (locinput != PL_bostr) ?
3433                     UCHARAT(locinput - 1) : '\n';
3434                 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
3435                     ln = isALNUM(ln);
3436                     n = isALNUM(nextchr);
3437                 }
3438                 else {
3439                     ln = isALNUM_LC(ln);
3440                     n = isALNUM_LC(nextchr);
3441                 }
3442             }
3443             if (((!ln) == (!n)) == (OP(scan) == BOUND ||
3444                                     OP(scan) == BOUNDL))
3445                     sayNO;
3446             break;
3447         case SPACEL:
3448             PL_reg_flags |= RF_tainted;
3449             /* FALL THROUGH */
3450         case SPACE:
3451             if (!nextchr)
3452                 sayNO;
3453             if (do_utf8) {
3454                 if (UTF8_IS_CONTINUED(nextchr)) {
3455                     LOAD_UTF8_CHARCLASS_SPACE();
3456                     if (!(OP(scan) == SPACE
3457                           ? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
3458                           : isSPACE_LC_utf8((U8*)locinput)))
3459                     {
3460                         sayNO;
3461                     }
3462                     locinput += PL_utf8skip[nextchr];
3463                     nextchr = UCHARAT(locinput);
3464                     break;
3465                 }
3466                 if (!(OP(scan) == SPACE
3467                       ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3468                     sayNO;
3469                 nextchr = UCHARAT(++locinput);
3470             }
3471             else {
3472                 if (!(OP(scan) == SPACE
3473                       ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3474                     sayNO;
3475                 nextchr = UCHARAT(++locinput);
3476             }
3477             break;
3478         case NSPACEL:
3479             PL_reg_flags |= RF_tainted;
3480             /* FALL THROUGH */
3481         case NSPACE:
3482             if (!nextchr && locinput >= PL_regeol)
3483                 sayNO;
3484             if (do_utf8) {
3485                 LOAD_UTF8_CHARCLASS_SPACE();
3486                 if (OP(scan) == NSPACE
3487                     ? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
3488                     : isSPACE_LC_utf8((U8*)locinput))
3489                 {
3490                     sayNO;
3491                 }
3492                 locinput += PL_utf8skip[nextchr];
3493                 nextchr = UCHARAT(locinput);
3494                 break;
3495             }
3496             if (OP(scan) == NSPACE
3497                 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
3498                 sayNO;
3499             nextchr = UCHARAT(++locinput);
3500             break;
3501         case DIGITL:
3502             PL_reg_flags |= RF_tainted;
3503             /* FALL THROUGH */
3504         case DIGIT:
3505             if (!nextchr)
3506                 sayNO;
3507             if (do_utf8) {
3508                 LOAD_UTF8_CHARCLASS_DIGIT();
3509                 if (!(OP(scan) == DIGIT
3510                       ? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
3511                       : isDIGIT_LC_utf8((U8*)locinput)))
3512                 {
3513                     sayNO;
3514                 }
3515                 locinput += PL_utf8skip[nextchr];
3516                 nextchr = UCHARAT(locinput);
3517                 break;
3518             }
3519             if (!(OP(scan) == DIGIT
3520                   ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
3521                 sayNO;
3522             nextchr = UCHARAT(++locinput);
3523             break;
3524         case NDIGITL:
3525             PL_reg_flags |= RF_tainted;
3526             /* FALL THROUGH */
3527         case NDIGIT:
3528             if (!nextchr && locinput >= PL_regeol)
3529                 sayNO;
3530             if (do_utf8) {
3531                 LOAD_UTF8_CHARCLASS_DIGIT();
3532                 if (OP(scan) == NDIGIT
3533                     ? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
3534                     : isDIGIT_LC_utf8((U8*)locinput))
3535                 {
3536                     sayNO;
3537                 }
3538                 locinput += PL_utf8skip[nextchr];
3539                 nextchr = UCHARAT(locinput);
3540                 break;
3541             }
3542             if (OP(scan) == NDIGIT
3543                 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
3544                 sayNO;
3545             nextchr = UCHARAT(++locinput);
3546             break;
3547         case CLUMP:
3548             if (locinput >= PL_regeol)
3549                 sayNO;
3550             if  (do_utf8) {
3551                 LOAD_UTF8_CHARCLASS_MARK();
3552                 if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3553                     sayNO;
3554                 locinput += PL_utf8skip[nextchr];
3555                 while (locinput < PL_regeol &&
3556                        swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3557                     locinput += UTF8SKIP(locinput);
3558                 if (locinput > PL_regeol)
3559                     sayNO;
3560             }
3561             else
3562                locinput++;
3563             nextchr = UCHARAT(locinput);
3564             break;
3565
3566         case NREFFL:
3567         {
3568             char *s;
3569             char type;
3570             PL_reg_flags |= RF_tainted;
3571             /* FALL THROUGH */
3572         case NREF:
3573         case NREFF:
3574             type = OP(scan);
3575             n = reg_check_named_buff_matched(rex,scan);
3576
3577             if ( n ) {
3578                 type = REF + ( type - NREF );
3579                 goto do_ref;
3580             } else {
3581                 sayNO;
3582             }
3583             /* unreached */
3584         case REFFL:
3585             PL_reg_flags |= RF_tainted;
3586             /* FALL THROUGH */
3587         case REF:
3588         case REFF:
3589             n = ARG(scan);  /* which paren pair */
3590             type = OP(scan);
3591           do_ref:
3592             ln = PL_regoffs[n].start;
3593             PL_reg_leftiter = PL_reg_maxiter;           /* Void cache */
3594             if (*PL_reglastparen < n || ln == -1)
3595                 sayNO;                  /* Do not match unless seen CLOSEn. */
3596             if (ln == PL_regoffs[n].end)
3597                 break;
3598
3599             s = PL_bostr + ln;
3600             if (do_utf8 && type != REF) {       /* REF can do byte comparison */
3601                 char *l = locinput;
3602                 const char *e = PL_bostr + PL_regoffs[n].end;
3603                 /*
3604                  * Note that we can't do the "other character" lookup trick as
3605                  * in the 8-bit case (no pun intended) because in Unicode we
3606                  * have to map both upper and title case to lower case.
3607                  */
3608                 if (type == REFF) {
3609                     while (s < e) {
3610                         STRLEN ulen1, ulen2;
3611                         U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3612                         U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
3613
3614                         if (l >= PL_regeol)
3615                             sayNO;
3616                         toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
3617                         toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
3618                         if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
3619                             sayNO;
3620                         s += ulen1;
3621                         l += ulen2;
3622                     }
3623                 }
3624                 locinput = l;
3625                 nextchr = UCHARAT(locinput);
3626                 break;
3627             }
3628
3629             /* Inline the first character, for speed. */
3630             if (UCHARAT(s) != nextchr &&
3631                 (type == REF ||
3632                  (UCHARAT(s) != (type == REFF
3633                                   ? PL_fold : PL_fold_locale)[nextchr])))
3634                 sayNO;
3635             ln = PL_regoffs[n].end - ln;
3636             if (locinput + ln > PL_regeol)
3637                 sayNO;
3638             if (ln > 1 && (type == REF
3639                            ? memNE(s, locinput, ln)
3640                            : (type == REFF
3641                               ? ibcmp(s, locinput, ln)
3642                               : ibcmp_locale(s, locinput, ln))))
3643                 sayNO;
3644             locinput += ln;
3645             nextchr = UCHARAT(locinput);
3646             break;
3647         }
3648         case NOTHING:
3649         case TAIL:
3650             break;
3651         case BACK:
3652             break;
3653
3654 #undef  ST
3655 #define ST st->u.eval
3656         {
3657             SV *ret;
3658             REGEXP *re_sv;
3659             regexp *re;
3660             regexp_internal *rei;
3661             regnode *startpoint;
3662
3663         case GOSTART:
3664         case GOSUB: /*    /(...(?1))/   /(...(?&foo))/   */
3665             if (cur_eval && cur_eval->locinput==locinput) {
3666                 if (cur_eval->u.eval.close_paren == (U32)ARG(scan))
3667                     Perl_croak(aTHX_ "Infinite recursion in regex");
3668                 if ( ++nochange_depth > max_nochange_depth )
3669                     Perl_croak(aTHX_
3670                         "Pattern subroutine nesting without pos change"
3671                         " exceeded limit in regex");
3672             } else {
3673                 nochange_depth = 0;
3674             }
3675             re_sv = rex_sv;
3676             re = rex;
3677             rei = rexi;
3678             (void)ReREFCNT_inc(rex_sv);
3679             if (OP(scan)==GOSUB) {
3680                 startpoint = scan + ARG2L(scan);
3681                 ST.close_paren = ARG(scan);
3682             } else {
3683                 startpoint = rei->program+1;
3684                 ST.close_paren = 0;
3685             }
3686             goto eval_recurse_doit;
3687             /* NOTREACHED */
3688         case EVAL:  /*   /(?{A})B/   /(??{A})B/  and /(?(?{A})X|Y)B/   */
3689             if (cur_eval && cur_eval->locinput==locinput) {
3690                 if ( ++nochange_depth > max_nochange_depth )
3691                     Perl_croak(aTHX_ "EVAL without pos change exceeded limit in regex");
3692             } else {
3693                 nochange_depth = 0;
3694             }
3695             {
3696                 /* execute the code in the {...} */
3697                 dSP;
3698                 SV ** const before = SP;
3699                 OP_4tree * const oop = PL_op;
3700                 COP * const ocurcop = PL_curcop;
3701                 PAD *old_comppad;
3702
3703                 n = ARG(scan);
3704                 PL_op = (OP_4tree*)rexi->data->data[n];
3705                 DEBUG_STATE_r( PerlIO_printf(Perl_debug_log,
3706                     "  re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
3707                 PAD_SAVE_LOCAL(old_comppad, (PAD*)rexi->data->data[n + 2]);
3708                 PL_regoffs[0].end = PL_reg_magic->mg_len = locinput - PL_bostr;
3709
3710                 if (sv_yes_mark) {
3711                     SV *sv_mrk = get_sv("REGMARK", 1);
3712                     sv_setsv(sv_mrk, sv_yes_mark);
3713                 }
3714
3715                 CALLRUNOPS(aTHX);                       /* Scalar context. */
3716                 SPAGAIN;
3717                 if (SP == before)
3718                     ret = &PL_sv_undef;   /* protect against empty (?{}) blocks. */
3719                 else {
3720                     ret = POPs;
3721                     PUTBACK;
3722                 }
3723
3724                 PL_op = oop;
3725                 PAD_RESTORE_LOCAL(old_comppad);
3726                 PL_curcop = ocurcop;
3727                 if (!logical) {
3728                     /* /(?{...})/ */
3729                     sv_setsv(save_scalar(PL_replgv), ret);
3730                     break;
3731                 }
3732             }
3733             if (logical == 2) { /* Postponed subexpression: /(??{...})/ */
3734                 logical = 0;
3735                 {
3736                     /* extract RE object from returned value; compiling if
3737                      * necessary */
3738                     MAGIC *mg = NULL;
3739                     REGEXP *rx = NULL;
3740
3741                     if (SvROK(ret)) {
3742                         SV *const sv = SvRV(ret);
3743
3744                         if (SvTYPE(sv) == SVt_REGEXP) {
3745                             rx = (REGEXP*) sv;
3746                         } else if (SvSMAGICAL(sv)) {
3747                             mg = mg_find(sv, PERL_MAGIC_qr);
3748                             assert(mg);
3749                         }
3750                     } else if (SvTYPE(ret) == SVt_REGEXP) {
3751                         rx = (REGEXP*) ret;
3752                     } else if (SvSMAGICAL(ret)) {
3753                         if (SvGMAGICAL(ret)) {
3754                             /* I don't believe that there is ever qr magic
3755                                here.  */
3756                             assert(!mg_find(ret, PERL_MAGIC_qr));
3757                             sv_unmagic(ret, PERL_MAGIC_qr);
3758                         }
3759                         else {
3760                             mg = mg_find(ret, PERL_MAGIC_qr);
3761                             /* testing suggests mg only ends up non-NULL for
3762                                scalars who were upgraded and compiled in the
3763                                else block below. In turn, this is only
3764                                triggered in the "postponed utf8 string" tests
3765                                in t/op/pat.t  */
3766                         }
3767                     }
3768
3769                     if (mg) {
3770                         rx = (REGEXP *) mg->mg_obj; /*XXX:dmq*/
3771                         assert(rx);
3772                     }
3773                     if (rx) {
3774                         rx = reg_temp_copy(rx);
3775                     }
3776                     else {
3777                         U32 pm_flags = 0;
3778                         const I32 osize = PL_regsize;
3779
3780                         if (DO_UTF8(ret)) {
3781                             assert (SvUTF8(ret));
3782                         } else if (SvUTF8(ret)) {
3783                             /* Not doing UTF-8, despite what the SV says. Is
3784                                this only if we're trapped in use 'bytes'?  */
3785                             /* Make a copy of the octet sequence, but without
3786                                the flag on, as the compiler now honours the
3787                                SvUTF8 flag on ret.  */
3788                             STRLEN len;
3789                             const char *const p = SvPV(ret, len);
3790                             ret = newSVpvn_flags(p, len, SVs_TEMP);
3791                         }
3792                         rx = CALLREGCOMP(ret, pm_flags);
3793                         if (!(SvFLAGS(ret)
3794                               & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
3795                                  | SVs_GMG))) {
3796                             /* This isn't a first class regexp. Instead, it's
3797                                caching a regexp onto an existing, Perl visible
3798                                scalar.  */
3799                             sv_magic(ret, (SV*) rx, PERL_MAGIC_qr, 0, 0);
3800                         }
3801                         PL_regsize = osize;
3802                     }
3803                     re_sv = rx;
3804                     re = (struct regexp *)SvANY(rx);
3805                 }
3806                 RXp_MATCH_COPIED_off(re);
3807                 re->subbeg = rex->subbeg;
3808                 re->sublen = rex->sublen;
3809                 rei = RXi_GET(re);
3810                 DEBUG_EXECUTE_r(
3811                     debug_start_match(re_sv, do_utf8, locinput, PL_regeol,
3812                         "Matching embedded");
3813                 );
3814                 startpoint = rei->program + 1;
3815                 ST.close_paren = 0; /* only used for GOSUB */
3816                 /* borrowed from regtry */
3817                 if (PL_reg_start_tmpl <= re->nparens) {
3818                     PL_reg_start_tmpl = re->nparens*3/2 + 3;
3819                     if(PL_reg_start_tmp)
3820                         Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
3821                     else
3822                         Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
3823                 }
3824
3825         eval_recurse_doit: /* Share code with GOSUB below this line */
3826                 /* run the pattern returned from (??{...}) */
3827                 ST.cp = regcppush(0);   /* Save *all* the positions. */
3828                 REGCP_SET(ST.lastcp);
3829
3830                 PL_regoffs = re->offs; /* essentially NOOP on GOSUB */
3831
3832                 /* see regtry, specifically PL_reglast(?:close)?paren is a pointer! (i dont know why) :dmq */
3833                 PL_reglastparen = &re->lastparen;
3834                 PL_reglastcloseparen = &re->lastcloseparen;
3835                 re->lastparen = 0;
3836                 re->lastcloseparen = 0;
3837
3838                 PL_reginput = locinput;
3839                 PL_regsize = 0;
3840
3841                 /* XXXX This is too dramatic a measure... */
3842                 PL_reg_maxiter = 0;
3843
3844                 ST.toggle_reg_flags = PL_reg_flags;
3845                 if (RX_UTF8(re_sv))
3846                     PL_reg_flags |= RF_utf8;
3847                 else
3848                     PL_reg_flags &= ~RF_utf8;
3849                 ST.toggle_reg_flags ^= PL_reg_flags; /* diff of old and new */
3850
3851                 ST.prev_rex = rex_sv;
3852                 ST.prev_curlyx = cur_curlyx;
3853                 SETREX(rex_sv,re_sv);
3854                 rex = re;
3855                 rexi = rei;
3856                 cur_curlyx = NULL;
3857                 ST.B = next;
3858                 ST.prev_eval = cur_eval;
3859                 cur_eval = st;
3860                 /* now continue from first node in postoned RE */
3861                 PUSH_YES_STATE_GOTO(EVAL_AB, startpoint);
3862                 /* NOTREACHED */
3863             }
3864             /* logical is 1,   /(?(?{...})X|Y)/ */
3865             sw = (bool)SvTRUE(ret);
3866             logical = 0;
3867             break;
3868         }
3869
3870         case EVAL_AB: /* cleanup after a successful (??{A})B */
3871             /* note: this is called twice; first after popping B, then A */
3872             PL_reg_flags ^= ST.toggle_reg_flags;
3873             ReREFCNT_dec(rex_sv);
3874             SETREX(rex_sv,ST.prev_rex);
3875             rex = (struct regexp *)SvANY(rex_sv);
3876             rexi = RXi_GET(rex);
3877             regcpblow(ST.cp);
3878             cur_eval = ST.prev_eval;
3879             cur_curlyx = ST.prev_curlyx;
3880
3881             PL_reglastparen = &rex->lastparen;
3882             PL_reglastcloseparen = &rex->lastcloseparen;
3883
3884             /* XXXX This is too dramatic a measure... */
3885             PL_reg_maxiter = 0;
3886             if ( nochange_depth )
3887                 nochange_depth--;
3888             sayYES;
3889
3890
3891         case EVAL_AB_fail: /* unsuccessfully ran A or B in (??{A})B */
3892             /* note: this is called twice; first after popping B, then A */
3893             PL_reg_flags ^= ST.toggle_reg_flags;
3894             ReREFCNT_dec(rex_sv);
3895             SETREX(rex_sv,ST.prev_rex);
3896             rex = (struct regexp *)SvANY(rex_sv);
3897             rexi = RXi_GET(rex);
3898             PL_reglastparen = &rex->lastparen;
3899             PL_reglastcloseparen = &rex->lastcloseparen;
3900
3901             PL_reginput = locinput;
3902             REGCP_UNWIND(ST.lastcp);
3903             regcppop(rex);
3904             cur_eval = ST.prev_eval;
3905             cur_curlyx = ST.prev_curlyx;
3906             /* XXXX This is too dramatic a measure... */
3907             PL_reg_maxiter = 0;
3908             if ( nochange_depth )
3909                 nochange_depth--;
3910             sayNO_SILENT;
3911 #undef ST
3912
3913         case OPEN:
3914             n = ARG(scan);  /* which paren pair */
3915             PL_reg_start_tmp[n] = locinput;
3916             if (n > PL_regsize)
3917                 PL_regsize = n;
3918             lastopen = n;
3919             break;
3920         case CLOSE:
3921             n = ARG(scan);  /* which paren pair */
3922             PL_regoffs[n].start = PL_reg_start_tmp[n] - PL_bostr;
3923             PL_regoffs[n].end = locinput - PL_bostr;
3924             /*if (n > PL_regsize)
3925                 PL_regsize = n;*/
3926             if (n > *PL_reglastparen)
3927                 *PL_reglastparen = n;
3928             *PL_reglastcloseparen = n;
3929             if (cur_eval && cur_eval->u.eval.close_paren == n) {
3930                 goto fake_end;
3931             }
3932             break;
3933         case ACCEPT:
3934             if (ARG(scan)){
3935                 regnode *cursor;
3936                 for (cursor=scan;
3937                      cursor && OP(cursor)!=END;
3938                      cursor=regnext(cursor))
3939                 {
3940                     if ( OP(cursor)==CLOSE ){
3941                         n = ARG(cursor);
3942                         if ( n <= lastopen ) {
3943                             PL_regoffs[n].start
3944                                 = PL_reg_start_tmp[n] - PL_bostr;
3945                             PL_regoffs[n].end = locinput - PL_bostr;
3946                             /*if (n > PL_regsize)
3947                             PL_regsize = n;*/
3948                             if (n > *PL_reglastparen)
3949                                 *PL_reglastparen = n;
3950                             *PL_reglastcloseparen = n;
3951                             if ( n == ARG(scan) || (cur_eval &&
3952                                 cur_eval->u.eval.close_paren == n))
3953                                 break;
3954                         }
3955                     }
3956                 }
3957             }
3958             goto fake_end;
3959             /*NOTREACHED*/
3960         case GROUPP:
3961             n = ARG(scan);  /* which paren pair */
3962             sw = (bool)(*PL_reglastparen >= n && PL_regoffs[n].end != -1);
3963             break;
3964         case NGROUPP:
3965             /* reg_check_named_buff_matched returns 0 for no match */
3966             sw = (bool)(0 < reg_check_named_buff_matched(rex,scan));
3967             break;
3968         case INSUBP:
3969             n = ARG(scan);
3970             sw = (cur_eval && (!n || cur_eval->u.eval.close_paren == n));
3971             break;
3972         case DEFINEP:
3973             sw = 0;
3974             break;
3975         case IFTHEN:
3976             PL_reg_leftiter = PL_reg_maxiter;           /* Void cache */
3977             if (sw)
3978                 next = NEXTOPER(NEXTOPER(scan));
3979             else {
3980                 next = scan + ARG(scan);
3981                 if (OP(next) == IFTHEN) /* Fake one. */
3982                     next = NEXTOPER(NEXTOPER(next));
3983             }
3984             break;
3985         case LOGICAL:
3986             logical = scan->flags;
3987             break;
3988
3989 /*******************************************************************
3990
3991 The CURLYX/WHILEM pair of ops handle the most generic case of the /A*B/
3992 pattern, where A and B are subpatterns. (For simple A, CURLYM or
3993 STAR/PLUS/CURLY/CURLYN are used instead.)
3994
3995 A*B is compiled as <CURLYX><A><WHILEM><B>
3996
3997 On entry to the subpattern, CURLYX is called. This pushes a CURLYX
3998 state, which contains the current count, initialised to -1. It also sets
3999 cur_curlyx to point to this state, with any previous value saved in the
4000 state block.
4001
4002 CURLYX then jumps straight to the WHILEM op, rather than executing A,
4003 since the pattern may possibly match zero times (i.e. it's a while {} loop
4004 rather than a do {} while loop).
4005
4006 Each entry to WHILEM represents a successful match of A. The count in the
4007 CURLYX block is incremented, another WHILEM state is pushed, and execution
4008 passes to A or B depending on greediness and the current count.
4009
4010 For example, if matching against the string a1a2a3b (where the aN are
4011 substrings that match /A/), then the match progresses as follows: (the
4012 pushed states are interspersed with the bits of strings matched so far):
4013
4014     <CURLYX cnt=-1>
4015     <CURLYX cnt=0><WHILEM>
4016     <CURLYX cnt=1><WHILEM> a1 <WHILEM>
4017     <CURLYX cnt=2><WHILEM> a1 <WHILEM> a2 <WHILEM>
4018     <CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM>
4019     <CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM> b
4020
4021 (Contrast this with something like CURLYM, which maintains only a single
4022 backtrack state:
4023
4024     <CURLYM cnt=0> a1
4025     a1 <CURLYM cnt=1> a2
4026     a1 a2 <CURLYM cnt=2> a3
4027     a1 a2 a3 <CURLYM cnt=3> b
4028 )
4029
4030 Each WHILEM state block marks a point to backtrack to upon partial failure
4031 of A or B, and also contains some minor state data related to that
4032 iteration.  The CURLYX block, pointed to by cur_curlyx, contains the
4033 overall state, such as the count, and pointers to the A and B ops.
4034
4035 This is complicated slightly by nested CURLYX/WHILEM's. Since cur_curlyx
4036 must always point to the *current* CURLYX block, the rules are:
4037
4038 When executing CURLYX, save the old cur_curlyx in the CURLYX state block,
4039 and set cur_curlyx to point the new block.
4040
4041 When popping the CURLYX block after a successful or unsuccessful match,
4042 restore the previous cur_curlyx.
4043
4044 When WHILEM is about to execute B, save the current cur_curlyx, and set it
4045 to the outer one saved in the CURLYX block.
4046
4047 When popping the WHILEM block after a successful or unsuccessful B match,
4048 restore the previous cur_curlyx.
4049
4050 Here's an example for the pattern (AI* BI)*BO
4051 I and O refer to inner and outer, C and W refer to CURLYX and WHILEM:
4052
4053 cur_
4054 curlyx backtrack stack
4055 ------ ---------------
4056 NULL
4057 CO     <CO prev=NULL> <WO>
4058 CI     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
4059 CO     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
4060 NULL   <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi <WO prev=CO> bo
4061
4062 At this point the pattern succeeds, and we work back down the stack to
4063 clean up, restoring as we go:
4064
4065 CO     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
4066 CI     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
4067 CO     <CO prev=NULL> <WO>
4068 NULL
4069
4070 *******************************************************************/
4071
4072 #define ST st->u.curlyx
4073
4074         case CURLYX:    /* start of /A*B/  (for complex A) */
4075         {
4076             /* No need to save/restore up to this paren */
4077             I32 parenfloor = scan->flags;
4078
4079             assert(next); /* keep Coverity happy */
4080             if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
4081                 next += ARG(next);
4082
4083             /* XXXX Probably it is better to teach regpush to support
4084                parenfloor > PL_regsize... */
4085             if (parenfloor > (I32)*PL_reglastparen)
4086                 parenfloor = *PL_reglastparen; /* Pessimization... */
4087
4088             ST.prev_curlyx= cur_curlyx;
4089             cur_curlyx = st;
4090             ST.cp = PL_savestack_ix;
4091
4092             /* these fields contain the state of the current curly.
4093              * they are accessed by subsequent WHILEMs */
4094             ST.parenfloor = parenfloor;
4095             ST.min = ARG1(scan);
4096             ST.max = ARG2(scan);
4097             ST.A = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
4098             ST.B = next;
4099             ST.minmod = minmod;
4100             minmod = 0;
4101             ST.count = -1;      /* this will be updated by WHILEM */
4102             ST.lastloc = NULL;  /* this will be updated by WHILEM */
4103
4104             PL_reginput = locinput;
4105             PUSH_YES_STATE_GOTO(CURLYX_end, PREVOPER(next));
4106             /* NOTREACHED */
4107         }
4108
4109         case CURLYX_end: /* just finished matching all of A*B */
4110             cur_curlyx = ST.prev_curlyx;
4111             sayYES;
4112             /* NOTREACHED */
4113
4114         case CURLYX_end_fail: /* just failed to match all of A*B */
4115             regcpblow(ST.cp);
4116             cur_curlyx = ST.prev_curlyx;
4117             sayNO;
4118             /* NOTREACHED */
4119
4120
4121 #undef ST
4122 #define ST st->u.whilem
4123
4124         case WHILEM:     /* just matched an A in /A*B/  (for complex A) */
4125         {
4126             /* see the discussion above about CURLYX/WHILEM */
4127             I32 n;
4128             assert(cur_curlyx); /* keep Coverity happy */
4129             n = ++cur_curlyx->u.curlyx.count; /* how many A's matched */
4130             ST.save_lastloc = cur_curlyx->u.curlyx.lastloc;
4131             ST.cache_offset = 0;
4132             ST.cache_mask = 0;
4133
4134             PL_reginput = locinput;
4135
4136             DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4137                   "%*s  whilem: matched %ld out of %ld..%ld\n",
4138                   REPORT_CODE_OFF+depth*2, "", (long)n,
4139                   (long)cur_curlyx->u.curlyx.min,
4140                   (long)cur_curlyx->u.curlyx.max)
4141             );
4142
4143             /* First just match a string of min A's. */
4144
4145             if (n < cur_curlyx->u.curlyx.min) {
4146                 cur_curlyx->u.curlyx.lastloc = locinput;
4147                 PUSH_STATE_GOTO(WHILEM_A_pre, cur_curlyx->u.curlyx.A);
4148                 /* NOTREACHED */
4149             }
4150
4151             /* If degenerate A matches "", assume A done. */
4152
4153             if (locinput == cur_curlyx->u.curlyx.lastloc) {
4154                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4155                    "%*s  whilem: empty match detected, trying continuation...\n",
4156                    REPORT_CODE_OFF+depth*2, "")
4157                 );
4158                 goto do_whilem_B_max;
4159             }
4160
4161             /* super-linear cache processing */
4162
4163             if (scan->flags) {
4164
4165                 if (!PL_reg_maxiter) {
4166                     /* start the countdown: Postpone detection until we
4167                      * know the match is not *that* much linear. */
4168                     PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
4169                     /* possible overflow for long strings and many CURLYX's */
4170                     if (PL_reg_maxiter < 0)
4171                         PL_reg_maxiter = I32_MAX;
4172                     PL_reg_leftiter = PL_reg_maxiter;
4173                 }
4174
4175                 if (PL_reg_leftiter-- == 0) {
4176                     /* initialise cache */
4177                     const I32 size = (PL_reg_maxiter + 7)/8;
4178                     if (PL_reg_poscache) {
4179                         if ((I32)PL_reg_poscache_size < size) {
4180                             Renew(PL_reg_poscache, size, char);
4181                             PL_reg_poscache_size = size;
4182                         }
4183                         Zero(PL_reg_poscache, size, char);
4184                     }
4185                     else {
4186                         PL_reg_poscache_size = size;
4187                         Newxz(PL_reg_poscache, size, char);
4188                     }
4189                     DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4190       "%swhilem: Detected a super-linear match, switching on caching%s...\n",
4191                               PL_colors[4], PL_colors[5])
4192                     );
4193                 }
4194
4195                 if (PL_reg_leftiter < 0) {
4196                     /* have we already failed at this position? */
4197                     I32 offset, mask;
4198                     offset  = (scan->flags & 0xf) - 1
4199                                 + (locinput - PL_bostr)  * (scan->flags>>4);
4200                     mask    = 1 << (offset % 8);
4201                     offset /= 8;
4202                     if (PL_reg_poscache[offset] & mask) {
4203                         DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4204                             "%*s  whilem: (cache) already tried at this position...\n",
4205                             REPORT_CODE_OFF+depth*2, "")
4206                         );
4207                         sayNO; /* cache records failure */
4208                     }
4209                     ST.cache_offset = offset;
4210                     ST.cache_mask   = mask;
4211                 }
4212             }
4213
4214             /* Prefer B over A for minimal matching. */
4215
4216             if (cur_curlyx->u.curlyx.minmod) {
4217                 ST.save_curlyx = cur_curlyx;
4218                 cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
4219                 ST.cp = regcppush(ST.save_curlyx->u.curlyx.parenfloor);
4220                 REGCP_SET(ST.lastcp);
4221                 PUSH_YES_STATE_GOTO(WHILEM_B_min, ST.save_curlyx->u.curlyx.B);
4222                 /* NOTREACHED */
4223             }
4224
4225             /* Prefer A over B for maximal matching. */
4226
4227             if (n < cur_curlyx->u.curlyx.max) { /* More greed allowed? */
4228                 ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
4229                 cur_curlyx->u.curlyx.lastloc = locinput;
4230                 REGCP_SET(ST.lastcp);
4231                 PUSH_STATE_GOTO(WHILEM_A_max, cur_curlyx->u.curlyx.A);
4232                 /* NOTREACHED */
4233             }
4234             goto do_whilem_B_max;
4235         }
4236         /* NOTREACHED */
4237
4238         case WHILEM_B_min: /* just matched B in a minimal match */
4239         case WHILEM_B_max: /* just matched B in a maximal match */
4240             cur_curlyx = ST.save_curlyx;
4241             sayYES;
4242             /* NOTREACHED */
4243
4244         case WHILEM_B_max_fail: /* just failed to match B in a maximal match */
4245             cur_curlyx = ST.save_curlyx;
4246             cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
4247             cur_curlyx->u.curlyx.count--;
4248             CACHEsayNO;
4249             /* NOTREACHED */
4250
4251         case WHILEM_A_min_fail: /* just failed to match A in a minimal match */
4252             REGCP_UNWIND(ST.lastcp);
4253             regcppop(rex);
4254             /* FALL THROUGH */
4255         case WHILEM_A_pre_fail: /* just failed to match even minimal A */
4256             cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
4257             cur_curlyx->u.curlyx.count--;
4258             CACHEsayNO;
4259             /* NOTREACHED */
4260
4261         case WHILEM_A_max_fail: /* just failed to match A in a maximal match */
4262             REGCP_UNWIND(ST.lastcp);
4263             regcppop(rex);      /* Restore some previous $<digit>s? */
4264             PL_reginput = locinput;
4265             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4266                 "%*s  whilem: failed, trying continuation...\n",
4267                 REPORT_CODE_OFF+depth*2, "")
4268             );
4269           do_whilem_B_max:
4270             if (cur_curlyx->u.curlyx.count >= REG_INFTY
4271                 && ckWARN(WARN_REGEXP)
4272                 && !(PL_reg_flags & RF_warned))
4273             {
4274                 PL_reg_flags |= RF_warned;
4275                 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
4276                      "Complex regular subexpression recursion",
4277                      REG_INFTY - 1);
4278             }
4279
4280             /* now try B */
4281             ST.save_curlyx = cur_curlyx;
4282             cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
4283             PUSH_YES_STATE_GOTO(WHILEM_B_max, ST.save_curlyx->u.curlyx.B);
4284             /* NOTREACHED */
4285
4286         case WHILEM_B_min_fail: /* just failed to match B in a minimal match */
4287             cur_curlyx = ST.save_curlyx;
4288             REGCP_UNWIND(ST.lastcp);
4289             regcppop(rex);
4290
4291             if (cur_curlyx->u.curlyx.count >= cur_curlyx->u.curlyx.max) {
4292                 /* Maximum greed exceeded */
4293                 if (cur_curlyx->u.curlyx.count >= REG_INFTY
4294                     && ckWARN(WARN_REGEXP)
4295                     && !(PL_reg_flags & RF_warned))
4296                 {
4297                     PL_reg_flags |= RF_warned;
4298                     Perl_warner(aTHX_ packWARN(WARN_REGEXP),
4299                         "%s limit (%d) exceeded",
4300                         "Complex regular subexpression recursion",
4301                         REG_INFTY - 1);
4302                 }
4303                 cur_curlyx->u.curlyx.count--;
4304                 CACHEsayNO;
4305             }
4306
4307             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4308                 "%*s  trying longer...\n", REPORT_CODE_OFF+depth*2, "")
4309             );
4310             /* Try grabbing another A and see if it helps. */
4311             PL_reginput = locinput;
4312             cur_curlyx->u.curlyx.lastloc = locinput;
4313             ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
4314             REGCP_SET(ST.lastcp);
4315             PUSH_STATE_GOTO(WHILEM_A_min, ST.save_curlyx->u.curlyx.A);
4316             /* NOTREACHED */
4317
4318 #undef  ST
4319 #define ST st->u.branch
4320
4321         case BRANCHJ:       /*  /(...|A|...)/ with long next pointer */
4322             next = scan + ARG(scan);
4323             if (next == scan)
4324                 next = NULL;
4325             scan = NEXTOPER(scan);
4326             /* FALL THROUGH */
4327
4328         case BRANCH:        /*  /(...|A|...)/ */
4329             scan = NEXTOPER(scan); /* scan now points to inner node */
4330             ST.lastparen = *PL_reglastparen;
4331             ST.next_branch = next;
4332             REGCP_SET(ST.cp);
4333             PL_reginput = locinput;
4334
4335             /* Now go into the branch */
4336             if (has_cutgroup) {
4337                 PUSH_YES_STATE_GOTO(BRANCH_next, scan);
4338             } else {
4339                 PUSH_STATE_GOTO(BRANCH_next, scan);
4340             }
4341             /* NOTREACHED */
4342         case CUTGROUP:
4343             PL_reginput = locinput;
4344             sv_yes_mark = st->u.mark.mark_name = scan->flags ? NULL :
4345                 (SV*)rexi->data->data[ ARG( scan ) ];
4346             PUSH_STATE_GOTO(CUTGROUP_next,next);
4347             /* NOTREACHED */
4348         case CUTGROUP_next_fail:
4349             do_cutgroup = 1;
4350             no_final = 1;
4351             if (st->u.mark.mark_name)
4352                 sv_commit = st->u.mark.mark_name;
4353             sayNO;
4354             /* NOTREACHED */
4355         case BRANCH_next:
4356             sayYES;
4357             /* NOTREACHED */
4358         case BRANCH_next_fail: /* that branch failed; try the next, if any */
4359             if (do_cutgroup) {
4360                 do_cutgroup = 0;
4361                 no_final = 0;
4362             }
4363             REGCP_UNWIND(ST.cp);
4364             for (n = *PL_reglastparen; n > ST.lastparen; n--)
4365                 PL_regoffs[n].end = -1;
4366             *PL_reglastparen = n;
4367             /*dmq: *PL_reglastcloseparen = n; */
4368             scan = ST.next_branch;
4369             /* no more branches? */
4370             if (!scan || (OP(scan) != BRANCH && OP(scan) != BRANCHJ)) {
4371                 DEBUG_EXECUTE_r({
4372                     PerlIO_printf( Perl_debug_log,
4373                         "%*s  %sBRANCH failed...%s\n",
4374                         REPORT_CODE_OFF+depth*2, "",
4375                         PL_colors[4],
4376                         PL_colors[5] );
4377                 });
4378                 sayNO_SILENT;
4379             }
4380             continue; /* execute next BRANCH[J] op */
4381             /* NOTREACHED */
4382
4383         case MINMOD:
4384             minmod = 1;
4385             break;
4386
4387 #undef  ST
4388 #define ST st->u.curlym
4389
4390         case CURLYM:    /* /A{m,n}B/ where A is fixed-length */
4391
4392             /* This is an optimisation of CURLYX that enables us to push
4393              * only a single backtracking state, no matter now many matches
4394              * there are in {m,n}. It relies on the pattern being constant
4395              * length, with no parens to influence future backrefs
4396              */
4397
4398             ST.me = scan;
4399             scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
4400
4401             /* if paren positive, emulate an OPEN/CLOSE around A */
4402             if (ST.me->flags) {
4403                 U32 paren = ST.me->flags;
4404                 if (paren > PL_regsize)
4405                     PL_regsize = paren;
4406                 if (paren > *PL_reglastparen)
4407                     *PL_reglastparen = paren;
4408                 scan += NEXT_OFF(scan); /* Skip former OPEN. */
4409             }
4410             ST.A = scan;
4411             ST.B = next;
4412             ST.alen = 0;
4413             ST.count = 0;
4414             ST.minmod = minmod;
4415             minmod = 0;
4416             ST.c1 = CHRTEST_UNINIT;
4417             REGCP_SET(ST.cp);
4418
4419             if (!(ST.minmod ? ARG1(ST.me) : ARG2(ST.me))) /* min/max */
4420                 goto curlym_do_B;
4421
4422           curlym_do_A: /* execute the A in /A{m,n}B/  */
4423             PL_reginput = locinput;
4424             PUSH_YES_STATE_GOTO(CURLYM_A, ST.A); /* match A */
4425             /* NOTREACHED */
4426
4427         case CURLYM_A: /* we've just matched an A */
4428             locinput = st->locinput;
4429             nextchr = UCHARAT(locinput);
4430
4431             ST.count++;
4432             /* after first match, determine A's length: u.curlym.alen */
4433             if (ST.count == 1) {
4434                 if (PL_reg_match_utf8) {
4435                     char *s = locinput;
4436                     while (s < PL_reginput) {
4437                         ST.alen++;
4438                         s += UTF8SKIP(s);
4439                     }
4440                 }
4441                 else {
4442                     ST.alen = PL_reginput - locinput;
4443                 }
4444                 if (ST.alen == 0)
4445                     ST.count = ST.minmod ? ARG1(ST.me) : ARG2(ST.me);
4446             }
4447             DEBUG_EXECUTE_r(
4448                 PerlIO_printf(Perl_debug_log,
4449                           "%*s  CURLYM now matched %"IVdf" times, len=%"IVdf"...\n",
4450                           (int)(REPORT_CODE_OFF+(depth*2)), "",
4451                           (IV) ST.count, (IV)ST.alen)
4452             );
4453
4454             locinput = PL_reginput;
4455
4456             if (cur_eval && cur_eval->u.eval.close_paren &&
4457                 cur_eval->u.eval.close_paren == (U32)ST.me->flags)
4458                 goto fake_end;
4459
4460             if ( ST.count < (ST.minmod ? ARG1(ST.me) : ARG2(ST.me)) )
4461                 goto curlym_do_A; /* try to match another A */
4462             goto curlym_do_B; /* try to match B */
4463
4464         case CURLYM_A_fail: /* just failed to match an A */
4465             REGCP_UNWIND(ST.cp);
4466
4467             if (ST.minmod || ST.count < ARG1(ST.me) /* min*/
4468                 || (cur_eval && cur_eval->u.eval.close_paren &&
4469                     cur_eval->u.eval.close_paren == (U32)ST.me->flags))
4470                 sayNO;
4471
4472           curlym_do_B: /* execute the B in /A{m,n}B/  */
4473             PL_reginput = locinput;
4474             if (ST.c1 == CHRTEST_UNINIT) {
4475                 /* calculate c1 and c2 for possible match of 1st char
4476                  * following curly */
4477                 ST.c1 = ST.c2 = CHRTEST_VOID;
4478                 if (HAS_TEXT(ST.B) || JUMPABLE(ST.B)) {
4479                     regnode *text_node = ST.B;
4480                     if (! HAS_TEXT(text_node))
4481                         FIND_NEXT_IMPT(text_node);
4482                     /* this used to be
4483
4484                         (HAS_TEXT(text_node) && PL_regkind[OP(text_node)] == EXACT)
4485
4486                         But the former is redundant in light of the latter.
4487
4488                         if this changes back then the macro for
4489                         IS_TEXT and friends need to change.
4490                      */
4491                     if (PL_regkind[OP(text_node)] == EXACT)
4492                     {
4493
4494                         ST.c1 = (U8)*STRING(text_node);
4495                         ST.c2 =
4496                             (IS_TEXTF(text_node))
4497                             ? PL_fold[ST.c1]
4498                             : (IS_TEXTFL(text_node))
4499                                 ? PL_fold_locale[ST.c1]
4500                                 : ST.c1;
4501                     }
4502                 }
4503             }
4504
4505             DEBUG_EXECUTE_r(
4506                 PerlIO_printf(Perl_debug_log,
4507                     "%*s  CURLYM trying tail with matches=%"IVdf"...\n",
4508                     (int)(REPORT_CODE_OFF+(depth*2)),
4509                     "", (IV)ST.count)
4510                 );
4511             if (ST.c1 != CHRTEST_VOID
4512                     && UCHARAT(PL_reginput) != ST.c1
4513                     && UCHARAT(PL_reginput) != ST.c2)
4514             {
4515                 /* simulate B failing */
4516                 DEBUG_OPTIMISE_r(
4517                     PerlIO_printf(Perl_debug_log,
4518                         "%*s  CURLYM Fast bail c1=%"IVdf" c2=%"IVdf"\n",
4519                         (int)(REPORT_CODE_OFF+(depth*2)),"",
4520                         (IV)ST.c1,(IV)ST.c2
4521                 ));
4522                 state_num = CURLYM_B_fail;
4523                 goto reenter_switch;
4524             }
4525
4526             if (ST.me->flags) {
4527                 /* mark current A as captured */
4528                 I32 paren = ST.me->flags;
4529                 if (ST.count) {
4530                     PL_regoffs[paren].start
4531                         = HOPc(PL_reginput, -ST.alen) - PL_bostr;
4532                     PL_regoffs[paren].end = PL_reginput - PL_bostr;
4533                     /*dmq: *PL_reglastcloseparen = paren; */
4534                 }
4535                 else
4536                     PL_regoffs[paren].end = -1;
4537                 if (cur_eval && cur_eval->u.eval.close_paren &&
4538                     cur_eval->u.eval.close_paren == (U32)ST.me->flags)
4539                 {
4540                     if (ST.count)
4541                         goto fake_end;
4542                     else
4543                         sayNO;
4544                 }
4545             }
4546
4547             PUSH_STATE_GOTO(CURLYM_B, ST.B); /* match B */
4548             /* NOTREACHED */
4549
4550         case CURLYM_B_fail: /* just failed to match a B */
4551             REGCP_UNWIND(ST.cp);
4552             if (ST.minmod) {
4553                 if (ST.count == ARG2(ST.me) /* max */)
4554                     sayNO;
4555                 goto curlym_do_A; /* try to match a further A */
4556             }
4557             /* backtrack one A */
4558             if (ST.count == ARG1(ST.me) /* min */)
4559                 sayNO;
4560             ST.count--;
4561             locinput = HOPc(locinput, -ST.alen);
4562             goto curlym_do_B; /* try to match B */
4563
4564 #undef ST
4565 #define ST st->u.curly
4566
4567 #define CURLY_SETPAREN(paren, success) \
4568     if (paren) { \
4569         if (success) { \
4570             PL_regoffs[paren].start = HOPc(locinput, -1) - PL_bostr; \
4571             PL_regoffs[paren].end = locinput - PL_bostr; \
4572             *PL_reglastcloseparen = paren; \
4573         } \
4574         else \
4575             PL_regoffs[paren].end = -1; \
4576     }
4577
4578         case STAR:              /*  /A*B/ where A is width 1 */
4579             ST.paren = 0;
4580             ST.min = 0;
4581             ST.max = REG_INFTY;
4582             scan = NEXTOPER(scan);
4583             goto repeat;
4584         case PLUS:              /*  /A+B/ where A is width 1 */
4585             ST.paren = 0;
4586             ST.min = 1;
4587             ST.max = REG_INFTY;
4588             scan = NEXTOPER(scan);
4589             goto repeat;
4590         case CURLYN:            /*  /(A){m,n}B/ where A is width 1 */
4591             ST.paren = scan->flags;     /* Which paren to set */
4592             if (ST.paren > PL_regsize)
4593                 PL_regsize = ST.paren;
4594             if (ST.paren > *PL_reglastparen)
4595                 *PL_reglastparen = ST.paren;
4596             ST.min = ARG1(scan);  /* min to match */
4597             ST.max = ARG2(scan);  /* max to match */
4598             if (cur_eval && cur_eval->u.eval.close_paren &&
4599                 cur_eval->u.eval.close_paren == (U32)ST.paren) {
4600                 ST.min=1;
4601                 ST.max=1;
4602             }
4603             scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
4604             goto repeat;
4605         case CURLY:             /*  /A{m,n}B/ where A is width 1 */
4606             ST.paren = 0;
4607             ST.min = ARG1(scan);  /* min to match */
4608             ST.max = ARG2(scan);  /* max to match */
4609             scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
4610           repeat:
4611             /*
4612             * Lookahead to avoid useless match attempts
4613             * when we know what character comes next.
4614             *
4615             * Used to only do .*x and .*?x, but now it allows
4616             * for )'s, ('s and (?{ ... })'s to be in the way
4617             * of the quantifier and the EXACT-like node.  -- japhy
4618             */
4619
4620             if (ST.min > ST.max) /* XXX make this a compile-time check? */
4621                 sayNO;
4622             if (HAS_TEXT(next) || JUMPABLE(next)) {
4623                 U8 *s;
4624                 regnode *text_node = next;
4625
4626                 if (! HAS_TEXT(text_node))
4627                     FIND_NEXT_IMPT(text_node);
4628
4629                 if (! HAS_TEXT(text_node))
4630                     ST.c1 = ST.c2 = CHRTEST_VOID;
4631                 else {
4632                     if ( PL_regkind[OP(text_node)] != EXACT ) {
4633                         ST.c1 = ST.c2 = CHRTEST_VOID;
4634                         goto assume_ok_easy;
4635                     }
4636                     else
4637                         s = (U8*)STRING(text_node);
4638
4639                     /*  Currently we only get here when
4640
4641                         PL_rekind[OP(text_node)] == EXACT
4642
4643                         if this changes back then the macro for IS_TEXT and
4644                         friends need to change. */
4645                     if (!UTF) {
4646                         ST.c2 = ST.c1 = *s;
4647                         if (IS_TEXTF(text_node))
4648                             ST.c2 = PL_fold[ST.c1];
4649                         else if (IS_TEXTFL(text_node))
4650                             ST.c2 = PL_fold_locale[ST.c1];
4651                     }
4652                     else { /* UTF */
4653                         if (IS_TEXTF(text_node)) {
4654                              STRLEN ulen1, ulen2;
4655                              U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
4656                              U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
4657
4658                              to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
4659                              to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
4660 #ifdef EBCDIC
4661                              ST.c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXLEN, 0,
4662                                                     ckWARN(WARN_UTF8) ?
4663                                                     0 : UTF8_ALLOW_ANY);
4664                              ST.c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXLEN, 0,
4665                                                     ckWARN(WARN_UTF8) ?
4666                                                     0 : UTF8_ALLOW_ANY);
4667 #else
4668                              ST.c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
4669                                                     uniflags);
4670                              ST.c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
4671                                                     uniflags);
4672 #endif
4673                         }
4674                         else {
4675                             ST.c2 = ST.c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
4676                                                      uniflags);
4677                         }
4678                     }
4679                 }
4680             }
4681             else
4682                 ST.c1 = ST.c2 = CHRTEST_VOID;
4683         assume_ok_easy:
4684
4685             ST.A = scan;
4686             ST.B = next;
4687             PL_reginput = locinput;
4688             if (minmod) {
4689                 minmod = 0;
4690                 if (ST.min && regrepeat(rex, ST.A, ST.min, depth) < ST.min)
4691                     sayNO;
4692                 ST.count = ST.min;
4693                 locinput = PL_reginput;
4694                 REGCP_SET(ST.cp);
4695                 if (ST.c1 == CHRTEST_VOID)
4696                     goto curly_try_B_min;
4697
4698                 ST.oldloc = locinput;
4699
4700                 /* set ST.maxpos to the furthest point along the
4701                  * string that could possibly match */
4702                 if  (ST.max == REG_INFTY) {
4703                     ST.maxpos = PL_regeol - 1;
4704                     if (do_utf8)
4705                         while (UTF8_IS_CONTINUATION(*(U8*)ST.maxpos))
4706                             ST.maxpos--;
4707                 }
4708                 else if (do_utf8) {
4709                     int m = ST.max - ST.min;
4710                     for (ST.maxpos = locinput;
4711                          m >0 && ST.maxpos + UTF8SKIP(ST.maxpos) <= PL_regeol; m--)
4712                         ST.maxpos += UTF8SKIP(ST.maxpos);
4713                 }
4714                 else {
4715                     ST.maxpos = locinput + ST.max - ST.min;
4716                     if (ST.maxpos >= PL_regeol)
4717                         ST.maxpos = PL_regeol - 1;
4718                 }
4719                 goto curly_try_B_min_known;
4720
4721             }
4722             else {
4723                 ST.count = regrepeat(rex, ST.A, ST.max, depth);
4724                 locinput = PL_reginput;
4725                 if (ST.count < ST.min)
4726                     sayNO;
4727                 if ((ST.count > ST.min)
4728                     && (PL_regkind[OP(ST.B)] == EOL) && (OP(ST.B) != MEOL))
4729                 {
4730                     /* A{m,n} must come at the end of the string, there's
4731                      * no point in backing off ... */
4732                     ST.min = ST.count;
4733                     /* ...except that $ and \Z can match before *and* after
4734                        newline at the end.  Consider "\n\n" =~ /\n+\Z\n/.
4735                        We may back off by one in this case. */
4736                     if (UCHARAT(PL_reginput - 1) == '\n' && OP(ST.B) != EOS)
4737                         ST.min--;
4738                 }
4739                 REGCP_SET(ST.cp);
4740                 goto curly_try_B_max;
4741             }
4742             /* NOTREACHED */
4743
4744
4745         case CURLY_B_min_known_fail:
4746             /* failed to find B in a non-greedy match where c1,c2 valid */
4747             if (ST.paren && ST.count)
4748                 PL_regoffs[ST.paren].end = -1;
4749
4750             PL_reginput = locinput;     /* Could be reset... */
4751             REGCP_UNWIND(ST.cp);
4752             /* Couldn't or didn't -- move forward. */
4753             ST.oldloc = locinput;
4754             if (do_utf8)
4755                 locinput += UTF8SKIP(locinput);
4756             else
4757                 locinput++;
4758             ST.count++;
4759           curly_try_B_min_known:
4760              /* find the next place where 'B' could work, then call B */
4761             {
4762                 int n;
4763                 if (do_utf8) {
4764                     n = (ST.oldloc == locinput) ? 0 : 1;
4765                     if (ST.c1 == ST.c2) {
4766                         STRLEN len;
4767                         /* set n to utf8_distance(oldloc, locinput) */
4768                         while (locinput <= ST.maxpos &&
4769                                utf8n_to_uvchr((U8*)locinput,
4770                                               UTF8_MAXBYTES, &len,
4771                                               uniflags) != (UV)ST.c1) {
4772                             locinput += len;
4773                             n++;
4774                         }
4775                     }
4776                     else {
4777                         /* set n to utf8_distance(oldloc, locinput) */
4778                         while (locinput <= ST.maxpos) {
4779                             STRLEN len;
4780                             const UV c = utf8n_to_uvchr((U8*)locinput,
4781                                                   UTF8_MAXBYTES, &len,
4782                                                   uniflags);
4783                             if (c == (UV)ST.c1 || c == (UV)ST.c2)
4784                                 break;
4785                             locinput += len;
4786                             n++;
4787                         }
4788                     }
4789                 }
4790                 else {
4791                     if (ST.c1 == ST.c2) {
4792                         while (locinput <= ST.maxpos &&
4793                                UCHARAT(locinput) != ST.c1)
4794                             locinput++;
4795                     }
4796                     else {
4797                         while (locinput <= ST.maxpos
4798                                && UCHARAT(locinput) != ST.c1
4799                                && UCHARAT(locinput) != ST.c2)
4800                             locinput++;
4801                     }
4802                     n = locinput - ST.oldloc;
4803                 }
4804                 if (locinput > ST.maxpos)
4805                     sayNO;
4806                 /* PL_reginput == oldloc now */
4807                 if (n) {
4808                     ST.count += n;
4809                     if (regrepeat(rex, ST.A, n, depth) < n)
4810                         sayNO;
4811                 }
4812                 PL_reginput = locinput;
4813                 CURLY_SETPAREN(ST.paren, ST.count);
4814                 if (cur_eval && cur_eval->u.eval.close_paren &&
4815                     cur_eval->u.eval.close_paren == (U32)ST.paren) {
4816                     goto fake_end;
4817                 }
4818                 PUSH_STATE_GOTO(CURLY_B_min_known, ST.B);
4819             }
4820             /* NOTREACHED */
4821
4822
4823         case CURLY_B_min_fail:
4824             /* failed to find B in a non-greedy match where c1,c2 invalid */
4825             if (ST.paren && ST.count)
4826                 PL_regoffs[ST.paren].end = -1;
4827
4828             REGCP_UNWIND(ST.cp);
4829             /* failed -- move forward one */
4830             PL_reginput = locinput;
4831             if (regrepeat(rex, ST.A, 1, depth)) {
4832                 ST.count++;
4833                 locinput = PL_reginput;
4834                 if (ST.count <= ST.max || (ST.max == REG_INFTY &&
4835                         ST.count > 0)) /* count overflow ? */
4836                 {
4837                   curly_try_B_min:
4838                     CURLY_SETPAREN(ST.paren, ST.count);
4839                     if (cur_eval && cur_eval->u.eval.close_paren &&
4840                         cur_eval->u.eval.close_paren == (U32)ST.paren) {
4841                         goto fake_end;
4842                     }
4843                     PUSH_STATE_GOTO(CURLY_B_min, ST.B);
4844                 }
4845             }
4846             sayNO;
4847             /* NOTREACHED */
4848
4849
4850         curly_try_B_max:
4851             /* a successful greedy match: now try to match B */
4852             if (cur_eval && cur_eval->u.eval.close_paren &&
4853                 cur_eval->u.eval.close_paren == (U32)ST.paren) {
4854                 goto fake_end;
4855             }
4856             {
4857                 UV c = 0;
4858                 if (ST.c1 != CHRTEST_VOID)
4859                     c = do_utf8 ? utf8n_to_uvchr((U8*)PL_reginput,
4860                                            UTF8_MAXBYTES, 0, uniflags)
4861                                 : (UV) UCHARAT(PL_reginput);
4862                 /* If it could work, try it. */
4863                 if (ST.c1 == CHRTEST_VOID || c == (UV)ST.c1 || c == (UV)ST.c2) {
4864                     CURLY_SETPAREN(ST.paren, ST.count);
4865                     PUSH_STATE_GOTO(CURLY_B_max, ST.B);
4866                     /* NOTREACHED */
4867                 }
4868             }
4869             /* FALL THROUGH */
4870         case CURLY_B_max_fail:
4871             /* failed to find B in a greedy match */
4872             if (ST.paren && ST.count)
4873                 PL_regoffs[ST.paren].end = -1;
4874
4875             REGCP_UNWIND(ST.cp);
4876             /*  back up. */
4877             if (--ST.count < ST.min)
4878                 sayNO;
4879             PL_reginput = locinput = HOPc(locinput, -1);
4880             goto curly_try_B_max;
4881
4882 #undef ST
4883
4884         case END:
4885             fake_end:
4886             if (cur_eval) {
4887                 /* we've just finished A in /(??{A})B/; now continue with B */
4888                 I32 tmpix;
4889                 st->u.eval.toggle_reg_flags
4890                             = cur_eval->u.eval.toggle_reg_flags;
4891                 PL_reg_flags ^= st->u.eval.toggle_reg_flags;
4892
4893                 st->u.eval.prev_rex = rex_sv;           /* inner */
4894                 SETREX(rex_sv,cur_eval->u.eval.prev_rex);
4895                 rex = (struct regexp *)SvANY(rex_sv);
4896                 rexi = RXi_GET(rex);
4897                 cur_curlyx = cur_eval->u.eval.prev_curlyx;
4898                 ReREFCNT_inc(rex_sv);
4899                 st->u.eval.cp = regcppush(0);   /* Save *all* the positions. */
4900                 REGCP_SET(st->u.eval.lastcp);
4901                 PL_reginput = locinput;
4902
4903                 /* Restore parens of the outer rex without popping the
4904                  * savestack */
4905                 tmpix = PL_savestack_ix;
4906                 PL_savestack_ix = cur_eval->u.eval.lastcp;
4907                 regcppop(rex);
4908                 PL_savestack_ix = tmpix;
4909
4910                 st->u.eval.prev_eval = cur_eval;
4911                 cur_eval = cur_eval->u.eval.prev_eval;
4912                 DEBUG_EXECUTE_r(
4913                     PerlIO_printf(Perl_debug_log, "%*s  EVAL trying tail ... %"UVxf"\n",
4914                                       REPORT_CODE_OFF+depth*2, "",PTR2UV(cur_eval)););
4915                 if ( nochange_depth )
4916                     nochange_depth--;
4917
4918                 PUSH_YES_STATE_GOTO(EVAL_AB,
4919                         st->u.eval.prev_eval->u.eval.B); /* match B */
4920             }
4921
4922             if (locinput < reginfo->till) {
4923                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4924                                       "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
4925                                       PL_colors[4],
4926                                       (long)(locinput - PL_reg_starttry),
4927                                       (long)(reginfo->till - PL_reg_starttry),
4928                                       PL_colors[5]));
4929
4930                 sayNO_SILENT;           /* Cannot match: too short. */
4931             }
4932             PL_reginput = locinput;     /* put where regtry can find it */
4933             sayYES;                     /* Success! */
4934
4935         case SUCCEED: /* successful SUSPEND/UNLESSM/IFMATCH/CURLYM */
4936             DEBUG_EXECUTE_r(
4937             PerlIO_printf(Perl_debug_log,
4938                 "%*s  %ssubpattern success...%s\n",
4939                 REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5]));
4940             PL_reginput = locinput;     /* put where regtry can find it */
4941             sayYES;                     /* Success! */
4942
4943 #undef  ST
4944 #define ST st->u.ifmatch
4945
4946         case SUSPEND:   /* (?>A) */
4947             ST.wanted = 1;
4948             PL_reginput = locinput;
4949             goto do_ifmatch;
4950
4951         case UNLESSM:   /* -ve lookaround: (?!A), or with flags, (?<!A) */
4952             ST.wanted = 0;
4953             goto ifmatch_trivial_fail_test;
4954
4955         case IFMATCH:   /* +ve lookaround: (?=A), or with flags, (?<=A) */
4956             ST.wanted = 1;
4957           ifmatch_trivial_fail_test:
4958             if (scan->flags) {
4959                 char * const s = HOPBACKc(locinput, scan->flags);
4960                 if (!s) {
4961                     /* trivial fail */
4962                     if (logical) {
4963                         logical = 0;
4964                         sw = 1 - (bool)ST.wanted;
4965                     }
4966                     else if (ST.wanted)
4967                         sayNO;
4968                     next = scan + ARG(scan);
4969                     if (next == scan)
4970                         next = NULL;
4971                     break;
4972                 }
4973                 PL_reginput = s;
4974             }
4975             else
4976                 PL_reginput = locinput;
4977
4978           do_ifmatch:
4979             ST.me = scan;
4980             ST.logical = logical;
4981             /* execute body of (?...A) */
4982             PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)));
4983             /* NOTREACHED */
4984
4985         case IFMATCH_A_fail: /* body of (?...A) failed */
4986             ST.wanted = !ST.wanted;
4987             /* FALL THROUGH */
4988
4989         case IFMATCH_A: /* body of (?...A) succeeded */
4990             if (ST.logical) {
4991                 sw = (bool)ST.wanted;
4992             }
4993             else if (!ST.wanted)
4994                 sayNO;
4995
4996             if (OP(ST.me) == SUSPEND)
4997                 locinput = PL_reginput;
4998             else {
4999                 locinput = PL_reginput = st->locinput;
5000                 nextchr = UCHARAT(locinput);
5001             }
5002             scan = ST.me + ARG(ST.me);
5003             if (scan == ST.me)
5004                 scan = NULL;
5005             continue; /* execute B */
5006
5007 #undef ST
5008
5009         case LONGJMP:
5010             next = scan + ARG(scan);
5011             if (next == scan)
5012                 next = NULL;
5013             break;
5014         case COMMIT:
5015             reginfo->cutpoint = PL_regeol;
5016             /* FALLTHROUGH */
5017         case PRUNE:
5018             PL_reginput = locinput;
5019             if (!scan->flags)
5020                 sv_yes_mark = sv_commit = (SV*)rexi->data->data[ ARG( scan ) ];
5021             PUSH_STATE_GOTO(COMMIT_next,next);
5022             /* NOTREACHED */
5023         case COMMIT_next_fail:
5024             no_final = 1;
5025             /* FALLTHROUGH */
5026         case OPFAIL:
5027             sayNO;
5028             /* NOTREACHED */
5029
5030 #define ST st->u.mark
5031         case MARKPOINT:
5032             ST.prev_mark = mark_state;
5033             ST.mark_name = sv_commit = sv_yes_mark
5034                 = (SV*)rexi->data->data[ ARG( scan ) ];
5035             mark_state = st;
5036             ST.mark_loc = PL_reginput = locinput;
5037             PUSH_YES_STATE_GOTO(MARKPOINT_next,next);
5038             /* NOTREACHED */
5039         case MARKPOINT_next:
5040             mark_state = ST.prev_mark;
5041             sayYES;
5042             /* NOTREACHED */
5043         case MARKPOINT_next_fail:
5044             if (popmark && sv_eq(ST.mark_name,popmark))
5045             {
5046                 if (ST.mark_loc > startpoint)
5047                     reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
5048                 popmark = NULL; /* we found our mark */
5049                 sv_commit = ST.mark_name;
5050
5051                 DEBUG_EXECUTE_r({
5052                         PerlIO_printf(Perl_debug_log,
5053                             "%*s  %ssetting cutpoint to mark:%"SVf"...%s\n",
5054                             REPORT_CODE_OFF+depth*2, "",
5055                             PL_colors[4], SVfARG(sv_commit), PL_colors[5]);
5056                 });
5057             }
5058             mark_state = ST.prev_mark;
5059             sv_yes_mark = mark_state ?
5060                 mark_state->u.mark.mark_name : NULL;
5061             sayNO;
5062             /* NOTREACHED */
5063         case SKIP:
5064             PL_reginput = locinput;
5065             if (scan->flags) {
5066                 /* (*SKIP) : if we fail we cut here*/
5067                 ST.mark_name = NULL;
5068                 ST.mark_loc = locinput;
5069                 PUSH_STATE_GOTO(SKIP_next,next);
5070             } else {
5071                 /* (*SKIP:NAME) : if there is a (*MARK:NAME) fail where it was,
5072                    otherwise do nothing.  Meaning we need to scan
5073                  */
5074                 regmatch_state *cur = mark_state;
5075                 SV *find = (SV*)rexi->data->data[ ARG( scan ) ];
5076
5077                 while (cur) {
5078                     if ( sv_eq( cur->u.mark.mark_name,
5079                                 find ) )
5080                     {
5081                         ST.mark_name = find;
5082                         PUSH_STATE_GOTO( SKIP_next, next );
5083                     }
5084                     cur = cur->u.mark.prev_mark;
5085                 }
5086             }
5087             /* Didn't find our (*MARK:NAME) so ignore this (*SKIP:NAME) */
5088             break;
5089         case SKIP_next_fail:
5090             if (ST.mark_name) {
5091                 /* (*CUT:NAME) - Set up to search for the name as we
5092                    collapse the stack*/
5093                 popmark = ST.mark_name;
5094             } else {
5095                 /* (*CUT) - No name, we cut here.*/
5096                 if (ST.mark_loc > startpoint)
5097                     reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
5098                 /* but we set sv_commit to latest mark_name if there
5099                    is one so they can test to see how things lead to this
5100                    cut */
5101                 if (mark_state)
5102                     sv_commit=mark_state->u.mark.mark_name;
5103             }
5104             no_final = 1;
5105             sayNO;
5106             /* NOTREACHED */
5107 #undef ST
5108         case FOLDCHAR:
5109             n = ARG(scan);
5110             if ( n == (U32)what_len_TRICKYFOLD(locinput,do_utf8,ln) ) {
5111                 locinput += ln;
5112             } else if ( 0xDF == n && !do_utf8 && !UTF ) {
5113                 sayNO;
5114             } else  {
5115                 U8 folded[UTF8_MAXBYTES_CASE+1];
5116                 STRLEN foldlen;
5117                 const char * const l = locinput;
5118                 char *e = PL_regeol;
5119                 to_uni_fold(n, folded, &foldlen);
5120
5121                 if (ibcmp_utf8((const char*) folded, 0,  foldlen, 1,
5122                                l, &e, 0,  do_utf8)) {
5123                         sayNO;
5124                 }
5125                 locinput = e;
5126             }
5127             nextchr = UCHARAT(locinput);
5128             break;
5129         case LNBREAK:
5130             if ((n=is_LNBREAK(locinput,do_utf8))) {
5131                 locinput += n;
5132                 nextchr = UCHARAT(locinput);
5133             } else
5134                 sayNO;
5135             break;
5136
5137 #define CASE_CLASS(nAmE)                              \
5138         case nAmE:                                    \
5139             if ((n=is_##nAmE(locinput,do_utf8))) {    \
5140                 locinput += n;                        \
5141                 nextchr = UCHARAT(locinput);          \
5142             } else                                    \
5143                 sayNO;                                \
5144             break;                                    \
5145         case N##nAmE:                                 \
5146             if ((n=is_##nAmE(locinput,do_utf8))) {    \
5147                 sayNO;                                \
5148             } else {                                  \
5149                 locinput += UTF8SKIP(locinput);       \
5150                 nextchr = UCHARAT(locinput);          \
5151             }                                         \
5152             break
5153
5154         CASE_CLASS(VERTWS);
5155         CASE_CLASS(HORIZWS);
5156 #undef CASE_CLASS
5157
5158         default:
5159             PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
5160                           PTR2UV(scan), OP(scan));
5161             Perl_croak(aTHX_ "regexp memory corruption");
5162
5163         } /* end switch */
5164
5165         /* switch break jumps here */
5166         scan = next; /* prepare to execute the next op and ... */
5167         continue;    /* ... jump back to the top, reusing st */
5168         /* NOTREACHED */
5169
5170       push_yes_state:
5171         /* push a state that backtracks on success */
5172         st->u.yes.prev_yes_state = yes_state;
5173         yes_state = st;
5174         /* FALL THROUGH */
5175       push_state:
5176         /* push a new regex state, then continue at scan  */
5177         {
5178             regmatch_state *newst;
5179
5180             DEBUG_STACK_r({
5181                 regmatch_state *cur = st;
5182                 regmatch_state *curyes = yes_state;
5183                 int curd = depth;
5184                 regmatch_slab *slab = PL_regmatch_slab;
5185                 for (;curd > -1;cur--,curd--) {
5186                     if (cur < SLAB_FIRST(slab)) {
5187                         slab = slab->prev;
5188                         cur = SLAB_LAST(slab);
5189                     }
5190                     PerlIO_printf(Perl_error_log, "%*s#%-3d %-10s %s\n",
5191                         REPORT_CODE_OFF + 2 + depth * 2,"",
5192                         curd, PL_reg_name[cur->resume_state],
5193                         (curyes == cur) ? "yes" : ""
5194                     );
5195                     if (curyes == cur)
5196                         curyes = cur->u.yes.prev_yes_state;
5197                 }
5198             } else
5199                 DEBUG_STATE_pp("push")
5200             );
5201             depth++;
5202             st->locinput = locinput;
5203             newst = st+1;
5204             if (newst >  SLAB_LAST(PL_regmatch_slab))
5205                 newst = S_push_slab(aTHX);
5206             PL_regmatch_state = newst;
5207
5208             locinput = PL_reginput;
5209             nextchr = UCHARAT(locinput);
5210             st = newst;
5211             continue;
5212             /* NOTREACHED */
5213         }
5214     }
5215
5216     /*
5217     * We get here only if there's trouble -- normally "case END" is
5218     * the terminating point.
5219     */
5220     Perl_croak(aTHX_ "corrupted regexp pointers");
5221     /*NOTREACHED*/
5222     sayNO;
5223
5224 yes:
5225     if (yes_state) {
5226         /* we have successfully completed a subexpression, but we must now
5227          * pop to the state marked by yes_state and continue from there */
5228         assert(st != yes_state);
5229 #ifdef DEBUGGING
5230         while (st != yes_state) {
5231             st--;
5232             if (st < SLAB_FIRST(PL_regmatch_slab)) {
5233                 PL_regmatch_slab = PL_regmatch_slab->prev;
5234                 st = SLAB_LAST(PL_regmatch_slab);
5235             }
5236             DEBUG_STATE_r({
5237                 if (no_final) {
5238                     DEBUG_STATE_pp("pop (no final)");
5239                 } else {
5240                     DEBUG_STATE_pp("pop (yes)");
5241                 }
5242             });
5243             depth--;
5244         }
5245 #else
5246         while (yes_state < SLAB_FIRST(PL_regmatch_slab)
5247             || yes_state > SLAB_LAST(PL_regmatch_slab))
5248         {
5249             /* not in this slab, pop slab */
5250             depth -= (st - SLAB_FIRST(PL_regmatch_slab) + 1);
5251             PL_regmatch_slab = PL_regmatch_slab->prev;
5252             st = SLAB_LAST(PL_regmatch_slab);
5253         }
5254         depth -= (st - yes_state);
5255 #endif
5256         st = yes_state;
5257         yes_state = st->u.yes.prev_yes_state;
5258         PL_regmatch_state = st;
5259
5260         if (no_final) {
5261             locinput= st->locinput;
5262             nextchr = UCHARAT(locinput);
5263         }
5264         state_num = st->resume_state + no_final;
5265         goto reenter_switch;
5266     }
5267
5268     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
5269                           PL_colors[4], PL_colors[5]));
5270
5271     if (PL_reg_eval_set) {
5272         /* each successfully executed (?{...}) block does the equivalent of
5273          *   local $^R = do {...}
5274          * When popping the save stack, all these locals would be undone;
5275          * bypass this by setting the outermost saved $^R to the latest
5276          * value */
5277         if (oreplsv != GvSV(PL_replgv))
5278             sv_setsv(oreplsv, GvSV(PL_replgv));
5279     }
5280     result = 1;
5281     goto final_exit;
5282
5283 no:
5284     DEBUG_EXECUTE_r(
5285         PerlIO_printf(Perl_debug_log,
5286             "%*s  %sfailed...%s\n",
5287             REPORT_CODE_OFF+depth*2, "",
5288             PL_colors[4], PL_colors[5])
5289         );
5290
5291 no_silent:
5292     if (no_final) {
5293         if (yes_state) {
5294             goto yes;
5295         } else {
5296             goto final_exit;
5297         }
5298     }
5299     if (depth) {
5300         /* there's a previous state to backtrack to */
5301         st--;
5302         if (st < SLAB_FIRST(PL_regmatch_slab)) {
5303             PL_regmatch_slab = PL_regmatch_slab->prev;
5304             st = SLAB_LAST(PL_regmatch_slab);
5305         }
5306         PL_regmatch_state = st;
5307         locinput= st->locinput;
5308         nextchr = UCHARAT(locinput);
5309
5310         DEBUG_STATE_pp("pop");
5311         depth--;
5312         if (yes_state == st)
5313             yes_state = st->u.yes.prev_yes_state;
5314
5315         state_num = st->resume_state + 1; /* failure = success + 1 */
5316         goto reenter_switch;
5317     }
5318     result = 0;
5319
5320   final_exit:
5321     if (rex->intflags & PREGf_VERBARG_SEEN) {
5322         SV *sv_err = get_sv("REGERROR", 1);
5323         SV *sv_mrk = get_sv("REGMARK", 1);
5324         if (result) {
5325             sv_commit = &PL_sv_no;
5326             if (!sv_yes_mark)
5327                 sv_yes_mark = &PL_sv_yes;
5328         } else {
5329             if (!sv_commit)
5330                 sv_commit = &PL_sv_yes;
5331             sv_yes_mark = &PL_sv_no;
5332         }
5333         sv_setsv(sv_err, sv_commit);
5334         sv_setsv(sv_mrk, sv_yes_mark);
5335     }
5336
5337     /* clean up; in particular, free all slabs above current one */
5338     LEAVE_SCOPE(oldsave);
5339
5340     return result;
5341 }
5342
5343 /*
5344  - regrepeat - repeatedly match something simple, report how many
5345  */
5346 /*
5347  * [This routine now assumes that it will only match on things of length 1.
5348  * That was true before, but now we assume scan - reginput is the count,
5349  * rather than incrementing count on every character.  [Er, except utf8.]]
5350  */
5351 STATIC I32
5352 S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
5353 {
5354     dVAR;
5355     register char *scan;
5356     register I32 c;
5357     register char *loceol = PL_regeol;
5358     register I32 hardcount = 0;
5359     register bool do_utf8 = PL_reg_match_utf8;
5360 #ifndef DEBUGGING
5361     PERL_UNUSED_ARG(depth);
5362 #endif
5363
5364     PERL_ARGS_ASSERT_REGREPEAT;
5365
5366     scan = PL_reginput;
5367     if (max == REG_INFTY)
5368         max = I32_MAX;
5369     else if (max < loceol - scan)
5370         loceol = scan + max;
5371     switch (OP(p)) {
5372     case REG_ANY:
5373         if (do_utf8) {
5374             loceol = PL_regeol;
5375             while (scan < loceol && hardcount < max && *scan != '\n') {
5376                 scan += UTF8SKIP(scan);
5377                 hardcount++;
5378             }
5379         } else {
5380             while (scan < loceol && *scan != '\n')
5381                 scan++;
5382         }
5383         break;
5384     case SANY:
5385         if (do_utf8) {
5386             loceol = PL_regeol;
5387             while (scan < loceol && hardcount < max) {
5388                 scan += UTF8SKIP(scan);
5389                 hardcount++;
5390             }
5391         }
5392         else
5393             scan = loceol;
5394         break;
5395     case CANY:
5396         scan = loceol;
5397         break;
5398     case EXACT:         /* length of string is 1 */
5399         c = (U8)*STRING(p);
5400         while (scan < loceol && UCHARAT(scan) == c)
5401             scan++;
5402         break;
5403     case EXACTF:        /* length of string is 1 */
5404         c = (U8)*STRING(p);
5405         while (scan < loceol &&
5406                (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
5407             scan++;
5408         break;
5409     case EXACTFL:       /* length of string is 1 */
5410         PL_reg_flags |= RF_tainted;
5411         c = (U8)*STRING(p);
5412         while (scan < loceol &&
5413                (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
5414             scan++;
5415         break;
5416     case ANYOF:
5417         if (do_utf8) {
5418             loceol = PL_regeol;
5419             while (hardcount < max && scan < loceol &&
5420                    reginclass(prog, p, (U8*)scan, 0, do_utf8)) {
5421                 scan += UTF8SKIP(scan);
5422                 hardcount++;
5423             }
5424         } else {
5425             while (scan < loceol && REGINCLASS(prog, p, (U8*)scan))
5426                 scan++;
5427         }
5428         break;
5429     case ALNUM:
5430         if (do_utf8) {
5431             loceol = PL_regeol;
5432             LOAD_UTF8_CHARCLASS_ALNUM();
5433             while (hardcount < max && scan < loceol &&
5434                    swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
5435                 scan += UTF8SKIP(scan);
5436                 hardcount++;
5437             }
5438         } else {
5439             while (scan < loceol && isALNUM(*scan))
5440                 scan++;
5441         }
5442         break;
5443     case ALNUML:
5444         PL_reg_flags |= RF_tainted;
5445         if (do_utf8) {
5446             loceol = PL_regeol;
5447             while (hardcount < max && scan < loceol &&
5448                    isALNUM_LC_utf8((U8*)scan)) {
5449                 scan += UTF8SKIP(scan);
5450                 hardcount++;
5451             }
5452         } else {
5453             while (scan < loceol && isALNUM_LC(*scan))
5454                 scan++;
5455         }
5456         break;
5457     case NALNUM:
5458         if (do_utf8) {
5459             loceol = PL_regeol;
5460             LOAD_UTF8_CHARCLASS_ALNUM();
5461             while (hardcount < max && scan < loceol &&
5462                    !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
5463                 scan += UTF8SKIP(scan);
5464                 hardcount++;
5465             }
5466         } else {
5467             while (scan < loceol && !isALNUM(*scan))
5468                 scan++;
5469         }
5470         break;
5471     case NALNUML:
5472         PL_reg_flags |= RF_tainted;
5473         if (do_utf8) {
5474             loceol = PL_regeol;
5475             while (hardcount < max && scan < loceol &&
5476                    !isALNUM_LC_utf8((U8*)scan)) {
5477                 scan += UTF8SKIP(scan);
5478                 hardcount++;
5479             }
5480         } else {
5481             while (scan < loceol && !isALNUM_LC(*scan))
5482                 scan++;
5483         }
5484         break;
5485     case SPACE:
5486         if (do_utf8) {
5487             loceol = PL_regeol;
5488             LOAD_UTF8_CHARCLASS_SPACE();
5489             while (hardcount < max && scan < loceol &&
5490                    (*scan == ' ' ||
5491                     swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
5492                 scan += UTF8SKIP(scan);
5493                 hardcount++;
5494             }
5495         } else {
5496             while (scan < loceol && isSPACE(*scan))
5497                 scan++;
5498         }
5499         break;
5500     case SPACEL:
5501         PL_reg_flags |= RF_tainted;
5502         if (do_utf8) {
5503             loceol = PL_regeol;
5504             while (hardcount < max && scan < loceol &&
5505                    (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
5506                 scan += UTF8SKIP(scan);
5507                 hardcount++;
5508             }
5509         } else {
5510             while (scan < loceol && isSPACE_LC(*scan))
5511                 scan++;
5512         }
5513         break;
5514     case NSPACE:
5515         if (do_utf8) {
5516             loceol = PL_regeol;
5517             LOAD_UTF8_CHARCLASS_SPACE();
5518             while (hardcount < max && scan < loceol &&
5519                    !(*scan == ' ' ||
5520                      swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
5521                 scan += UTF8SKIP(scan);
5522                 hardcount++;
5523             }
5524         } else {
5525             while (scan < loceol && !isSPACE(*scan))
5526                 scan++;
5527         }
5528         break;
5529     case NSPACEL:
5530         PL_reg_flags |= RF_tainted;
5531         if (do_utf8) {
5532             loceol = PL_regeol;
5533             while (hardcount < max && scan < loceol &&
5534                    !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
5535                 scan += UTF8SKIP(scan);
5536                 hardcount++;
5537             }
5538         } else {
5539             while (scan < loceol && !isSPACE_LC(*scan))
5540                 scan++;
5541         }
5542         break;
5543     case DIGIT:
5544         if (do_utf8) {
5545             loceol = PL_regeol;
5546             LOAD_UTF8_CHARCLASS_DIGIT();
5547             while (hardcount < max && scan < loceol &&
5548                    swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
5549                 scan += UTF8SKIP(scan);
5550                 hardcount++;
5551             }
5552         } else {
5553             while (scan < loceol && isDIGIT(*scan))
5554                 scan++;
5555         }
5556         break;
5557     case NDIGIT:
5558         if (do_utf8) {
5559             loceol = PL_regeol;
5560             LOAD_UTF8_CHARCLASS_DIGIT();
5561             while (hardcount < max && scan < loceol &&
5562                    !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
5563                 scan += UTF8SKIP(scan);
5564                 hardcount++;
5565             }
5566         } else {
5567             while (scan < loceol && !isDIGIT(*scan))
5568                 scan++;
5569         }
5570     case LNBREAK:
5571         if (do_utf8) {
5572             loceol = PL_regeol;
5573             while (hardcount < max && scan < loceol && (c=is_LNBREAK_utf8(scan))) {
5574                 scan += c;
5575                 hardcount++;
5576             }
5577         } else {
5578             /*
5579               LNBREAK can match two latin chars, which is ok,
5580               because we have a null terminated string, but we
5581               have to use hardcount in this situation
5582             */
5583             while (scan < loceol && (c=is_LNBREAK_latin1(scan)))  {
5584                 scan+=c;
5585                 hardcount++;
5586             }
5587         }
5588         break;
5589     case HORIZWS:
5590         if (do_utf8) {
5591             loceol = PL_regeol;
5592             while (hardcount < max && scan < loceol && (c=is_HORIZWS_utf8(scan))) {
5593                 scan += c;
5594                 hardcount++;
5595             }
5596         } else {
5597             while (scan < loceol && is_HORIZWS_latin1(scan))
5598                 scan++;
5599         }
5600         break;
5601     case NHORIZWS:
5602         if (do_utf8) {
5603             loceol = PL_regeol;
5604             while (hardcount < max && scan < loceol && !is_HORIZWS_utf8(scan)) {
5605                 scan += UTF8SKIP(scan);
5606                 hardcount++;
5607             }
5608         } else {
5609             while (scan < loceol && !is_HORIZWS_latin1(scan))
5610                 scan++;
5611
5612         }
5613         break;
5614     case VERTWS:
5615         if (do_utf8) {
5616             loceol = PL_regeol;
5617             while (hardcount < max && scan < loceol && (c=is_VERTWS_utf8(scan))) {
5618                 scan += c;
5619                 hardcount++;
5620             }
5621         } else {
5622             while (scan < loceol && is_VERTWS_latin1(scan))
5623                 scan++;
5624
5625         }
5626         break;
5627     case NVERTWS:
5628         if (do_utf8) {
5629             loceol = PL_regeol;
5630             while (hardcount < max && scan < loceol && !is_VERTWS_utf8(scan)) {
5631                 scan += UTF8SKIP(scan);
5632                 hardcount++;
5633             }
5634         } else {
5635             while (scan < loceol && !is_VERTWS_latin1(scan))
5636                 scan++;
5637
5638         }
5639         break;
5640
5641     default:            /* Called on something of 0 width. */
5642         break;          /* So match right here or not at all. */
5643     }
5644
5645     if (hardcount)
5646         c = hardcount;
5647     else
5648         c = scan - PL_reginput;
5649     PL_reginput = scan;
5650
5651     DEBUG_r({
5652         GET_RE_DEBUG_FLAGS_DECL;
5653         DEBUG_EXECUTE_r({
5654             SV * const prop = sv_newmortal();
5655             regprop(prog, prop, p);
5656             PerlIO_printf(Perl_debug_log,
5657                         "%*s  %s can match %"IVdf" times out of %"IVdf"...\n",
5658                         REPORT_CODE_OFF + depth*2, "", SvPVX_const(prop),(IV)c,(IV)max);
5659         });
5660     });
5661
5662     return(c);
5663 }
5664
5665
5666 #if !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION)
5667 /*
5668 - regclass_swash - prepare the utf8 swash
5669 */
5670
5671 SV *
5672 Perl_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bool doinit, SV** listsvp, SV **altsvp)
5673 {
5674     dVAR;
5675     SV *sw  = NULL;
5676     SV *si  = NULL;
5677     SV *alt = NULL;
5678     RXi_GET_DECL(prog,progi);
5679     const struct reg_data * const data = prog ? progi->data : NULL;
5680
5681     PERL_ARGS_ASSERT_REGCLASS_SWASH;
5682
5683     if (data && data->count) {
5684         const U32 n = ARG(node);
5685
5686         if (data->what[n] == 's') {
5687             SV * const rv = (SV*)data->data[n];
5688             AV * const av = (AV*)SvRV((SV*)rv);
5689             SV **const ary = AvARRAY(av);
5690             SV **a, **b;
5691
5692             /* See the end of regcomp.c:S_regclass() for
5693              * documentation of these array elements. */
5694
5695             si = *ary;
5696             a  = SvROK(ary[1]) ? &ary[1] : NULL;
5697             b  = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : NULL;
5698
5699             if (a)
5700                 sw = *a;
5701             else if (si && doinit) {
5702                 sw = swash_init("utf8", "", si, 1, 0);
5703                 (void)av_store(av, 1, sw);
5704             }
5705             if (b)
5706                 alt = *b;
5707         }
5708     }
5709
5710     if (listsvp)
5711         *listsvp = si;
5712     if (altsvp)
5713         *altsvp  = alt;
5714
5715     return sw;
5716 }
5717 #endif
5718
5719 /*
5720  - reginclass - determine if a character falls into a character class
5721
5722   The n is the ANYOF regnode, the p is the target string, lenp
5723   is pointer to the maximum length of how far to go in the p
5724   (if the lenp is zero, UTF8SKIP(p) is used),
5725   do_utf8 tells whether the target string is in UTF-8.
5726
5727  */
5728
5729 STATIC bool
5730 S_reginclass(pTHX_ const regexp *prog, register const regnode *n, register const U8* p, STRLEN* lenp, register bool do_utf8)
5731 {
5732     dVAR;
5733     const char flags = ANYOF_FLAGS(n);
5734     bool match = FALSE;
5735     UV c = *p;
5736     STRLEN len = 0;
5737     STRLEN plen;
5738
5739     PERL_ARGS_ASSERT_REGINCLASS;
5740
5741     if (do_utf8 && !UTF8_IS_INVARIANT(c)) {
5742         c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &len,
5743                 (UTF8_ALLOW_DEFAULT & UTF8_ALLOW_ANYUV) | UTF8_CHECK_ONLY);
5744                 /* see [perl #37836] for UTF8_ALLOW_ANYUV */
5745         if (len == (STRLEN)-1)
5746             Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
5747     }
5748
5749     plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
5750     if (do_utf8 || (flags & ANYOF_UNICODE)) {
5751         if (lenp)
5752             *lenp = 0;
5753         if (do_utf8 && !ANYOF_RUNTIME(n)) {
5754             if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
5755                 match = TRUE;
5756         }
5757         if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
5758             match = TRUE;
5759         if (!match) {
5760             AV *av;
5761             SV * const sw = regclass_swash(prog, n, TRUE, 0, (SV**)&av);
5762
5763             if (sw) {
5764                 if (swash_fetch(sw, p, do_utf8))
5765                     match = TRUE;
5766                 else if (flags & ANYOF_FOLD) {
5767                     if (!match && lenp && av) {
5768                         I32 i;
5769                         for (i = 0; i <= av_len(av); i++) {
5770                             SV* const sv = *av_fetch(av, i, FALSE);
5771                             STRLEN len;
5772                             const char * const s = SvPV_const(sv, len);
5773
5774                             if (len <= plen && memEQ(s, (char*)p, len)) {
5775                                 *lenp = len;
5776                                 match = TRUE;
5777                                 break;
5778                             }
5779                         }
5780                     }
5781                     if (!match) {
5782                         U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
5783                         STRLEN tmplen;
5784
5785                         to_utf8_fold(p, tmpbuf, &tmplen);
5786                         if (swash_fetch(sw, tmpbuf, do_utf8))
5787                             match = TRUE;
5788                     }
5789                 }
5790             }
5791         }
5792         if (match && lenp && *lenp == 0)
5793             *lenp = UNISKIP(NATIVE_TO_UNI(c));
5794     }
5795     if (!match && c < 256) {
5796         if (ANYOF_BITMAP_TEST(n, c))
5797             match = TRUE;
5798         else if (flags & ANYOF_FOLD) {
5799             U8 f;
5800
5801             if (flags & ANYOF_LOCALE) {
5802                 PL_reg_flags |= RF_tainted;
5803                 f = PL_fold_locale[c];
5804             }
5805             else
5806                 f = PL_fold[c];
5807             if (f != c && ANYOF_BITMAP_TEST(n, f))
5808                 match = TRUE;
5809         }
5810
5811         if (!match && (flags & ANYOF_CLASS)) {
5812             PL_reg_flags |= RF_tainted;
5813             if (
5814                 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM)   &&  isALNUM_LC(c))  ||
5815                 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM)  && !isALNUM_LC(c))  ||
5816                 (ANYOF_CLASS_TEST(n, ANYOF_SPACE)   &&  isSPACE_LC(c))  ||
5817                 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE)  && !isSPACE_LC(c))  ||
5818                 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT)   &&  isDIGIT_LC(c))  ||
5819                 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT)  && !isDIGIT_LC(c))  ||
5820                 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC)  &&  isALNUMC_LC(c)) ||
5821                 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
5822                 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA)   &&  isALPHA_LC(c))  ||
5823                 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA)  && !isALPHA_LC(c))  ||
5824                 (ANYOF_CLASS_TEST(n, ANYOF_ASCII)   &&  isASCII(c))     ||
5825                 (ANYOF_CLASS_TEST(n, ANYOF_NASCII)  && !isASCII(c))     ||
5826                 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL)   &&  isCNTRL_LC(c))  ||
5827                 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL)  && !isCNTRL_LC(c))  ||
5828                 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH)   &&  isGRAPH_LC(c))  ||
5829                 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH)  && !isGRAPH_LC(c))  ||
5830                 (ANYOF_CLASS_TEST(n, ANYOF_LOWER)   &&  isLOWER_LC(c))  ||
5831                 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER)  && !isLOWER_LC(c))  ||
5832                 (ANYOF_CLASS_TEST(n, ANYOF_PRINT)   &&  isPRINT_LC(c))  ||
5833                 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT)  && !isPRINT_LC(c))  ||
5834                 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT)   &&  isPUNCT_LC(c))  ||
5835                 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT)  && !isPUNCT_LC(c))  ||
5836                 (ANYOF_CLASS_TEST(n, ANYOF_UPPER)   &&  isUPPER_LC(c))  ||
5837                 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER)  && !isUPPER_LC(c))  ||
5838                 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT)  &&  isXDIGIT(c))    ||
5839                 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c))    ||
5840                 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC)  &&  isPSXSPC(c))    ||
5841                 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c))    ||
5842                 (ANYOF_CLASS_TEST(n, ANYOF_BLANK)   &&  isBLANK(c))     ||
5843                 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK)  && !isBLANK(c))
5844                 ) /* How's that for a conditional? */
5845             {
5846                 match = TRUE;
5847             }
5848         }
5849     }
5850
5851     return (flags & ANYOF_INVERT) ? !match : match;
5852 }
5853
5854 STATIC U8 *
5855 S_reghop3(U8 *s, I32 off, const U8* lim)
5856 {
5857     dVAR;
5858
5859     PERL_ARGS_ASSERT_REGHOP3;
5860
5861     if (off >= 0) {
5862         while (off-- && s < lim) {
5863             /* XXX could check well-formedness here */
5864             s += UTF8SKIP(s);
5865         }
5866     }
5867     else {
5868         while (off++ && s > lim) {
5869             s--;
5870             if (UTF8_IS_CONTINUED(*s)) {
5871                 while (s > lim && UTF8_IS_CONTINUATION(*s))
5872                     s--;
5873             }
5874             /* XXX could check well-formedness here */
5875         }
5876     }
5877     return s;
5878 }
5879
5880 #ifdef XXX_dmq
5881 /* there are a bunch of places where we use two reghop3's that should
5882    be replaced with this routine. but since thats not done yet
5883    we ifdef it out - dmq
5884 */
5885 STATIC U8 *
5886 S_reghop4(U8 *s, I32 off, const U8* llim, const U8* rlim)
5887 {
5888     dVAR;
5889
5890     PERL_ARGS_ASSERT_REGHOP4;
5891
5892     if (off >= 0) {
5893         while (off-- && s < rlim) {
5894             /* XXX could check well-formedness here */
5895             s += UTF8SKIP(s);
5896         }
5897     }
5898     else {
5899         while (off++ && s > llim) {
5900             s--;
5901             if (UTF8_IS_CONTINUED(*s)) {
5902                 while (s > llim && UTF8_IS_CONTINUATION(*s))
5903                     s--;
5904             }
5905             /* XXX could check well-formedness here */
5906         }
5907     }
5908     return s;
5909 }
5910 #endif
5911
5912 STATIC U8 *
5913 S_reghopmaybe3(U8* s, I32 off, const U8* lim)
5914 {
5915     dVAR;
5916
5917     PERL_ARGS_ASSERT_REGHOPMAYBE3;
5918
5919     if (off >= 0) {
5920         while (off-- && s < lim) {
5921             /* XXX could check well-formedness here */
5922             s += UTF8SKIP(s);
5923         }
5924         if (off >= 0)
5925             return NULL;
5926     }
5927     else {
5928         while (off++ && s > lim) {
5929             s--;
5930             if (UTF8_IS_CONTINUED(*s)) {
5931                 while (s > lim && UTF8_IS_CONTINUATION(*s))
5932                     s--;
5933             }
5934             /* XXX could check well-formedness here */
5935         }
5936         if (off <= 0)
5937             return NULL;
5938     }
5939     return s;
5940 }
5941
5942 static void
5943 restore_pos(pTHX_ void *arg)
5944 {
5945     dVAR;
5946     regexp * const rex = (regexp *)arg;
5947     if (PL_reg_eval_set) {
5948         if (PL_reg_oldsaved) {
5949             rex->subbeg = PL_reg_oldsaved;
5950             rex->sublen = PL_reg_oldsavedlen;
5951 #ifdef PERL_OLD_COPY_ON_WRITE
5952             rex->saved_copy = PL_nrs;
5953 #endif
5954             RXp_MATCH_COPIED_on(rex);
5955         }
5956         PL_reg_magic->mg_len = PL_reg_oldpos;
5957         PL_reg_eval_set = 0;
5958         PL_curpm = PL_reg_oldcurpm;
5959     }
5960 }
5961
5962 STATIC void
5963 S_to_utf8_substr(pTHX_ register regexp *prog)
5964 {
5965     int i = 1;
5966
5967     PERL_ARGS_ASSERT_TO_UTF8_SUBSTR;
5968
5969     do {
5970         if (prog->substrs->data[i].substr
5971             && !prog->substrs->data[i].utf8_substr) {
5972             SV* const sv = newSVsv(prog->substrs->data[i].substr);
5973             prog->substrs->data[i].utf8_substr = sv;
5974             sv_utf8_upgrade(sv);
5975             if (SvVALID(prog->substrs->data[i].substr)) {
5976                 const U8 flags = BmFLAGS(prog->substrs->data[i].substr);
5977                 if (flags & FBMcf_TAIL) {
5978                     /* Trim the trailing \n that fbm_compile added last
5979                        time.  */
5980                     SvCUR_set(sv, SvCUR(sv) - 1);
5981                     /* Whilst this makes the SV technically "invalid" (as its
5982                        buffer is no longer followed by "\0") when fbm_compile()
5983                        adds the "\n" back, a "\0" is restored.  */
5984                 }
5985                 fbm_compile(sv, flags);
5986             }
5987             if (prog->substrs->data[i].substr == prog->check_substr)
5988                 prog->check_utf8 = sv;
5989         }
5990     } while (i--);
5991 }
5992
5993 STATIC void
5994 S_to_byte_substr(pTHX_ register regexp *prog)
5995 {
5996     dVAR;
5997     int i = 1;
5998
5999     PERL_ARGS_ASSERT_TO_BYTE_SUBSTR;
6000
6001     do {
6002         if (prog->substrs->data[i].utf8_substr
6003             && !prog->substrs->data[i].substr) {
6004             SV* sv = newSVsv(prog->substrs->data[i].utf8_substr);
6005             if (sv_utf8_downgrade(sv, TRUE)) {
6006                 if (SvVALID(prog->substrs->data[i].utf8_substr)) {
6007                     const U8 flags
6008                         = BmFLAGS(prog->substrs->data[i].utf8_substr);
6009                     if (flags & FBMcf_TAIL) {
6010                         /* Trim the trailing \n that fbm_compile added last
6011                            time.  */
6012                         SvCUR_set(sv, SvCUR(sv) - 1);
6013                     }
6014                     fbm_compile(sv, flags);
6015                 }
6016             } else {
6017                 SvREFCNT_dec(sv);
6018                 sv = &PL_sv_undef;
6019             }
6020             prog->substrs->data[i].substr = sv;
6021             if (prog->substrs->data[i].utf8_substr == prog->check_utf8)
6022                 prog->check_substr = sv;
6023         }
6024     } while (i--);
6025 }
6026
6027 /*
6028  * Local variables:
6029  * c-indentation-style: bsd
6030  * c-basic-offset: 4
6031  * indent-tabs-mode: t
6032  * End:
6033  *
6034  * ex: set ts=8 sts=4 sw=4 noet:
6035  */