regexec.c

   1 /*    regexec.c
   2  */
   3
   4 /*
   5  * "One Ring to rule them all, One Ring to find them..."
   6  */
   7
   8 /* This file contains functions for executing a regular expression.  See
   9  * also regcomp.c which funnily enough, contains functions for compiling
  10  * a regular expression.
  11  *
  12  * This file is also copied at build time to ext/re/re_exec.c, where
  13  * it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
  14  * This causes the main functions to be compiled under new names and with
  15  * debugging support added, which makes "use re 'debug'" work.
  16  */
  17
  18 /* NOTE: this is derived from Henry Spencer's regexp code, and should not
  19  * confused with the original package (see point 3 below).  Thanks, Henry!
  20  */
  21
  22 /* Additional note: this code is very heavily munged from Henry's version
  23  * in places.  In some spots I've traded clarity for efficiency, so don't
  24  * blame Henry for some of the lack of readability.
  25  */
  26
  27 /* The names of the functions have been changed from regcomp and
  28  * regexec to  pregcomp and pregexec in order to avoid conflicts
  29  * with the POSIX routines of the same names.
  30 */
  31
  32 #ifdef PERL_EXT_RE_BUILD
  33 #include "re_top.h"
  34 #endif
  35
  36 /*
  37  * pregcomp and pregexec -- regsub and regerror are not used in perl
  38  *
  39  *      Copyright (c) 1986 by University of Toronto.
  40  *      Written by Henry Spencer.  Not derived from licensed software.
  41  *
  42  *      Permission is granted to anyone to use this software for any
  43  *      purpose on any computer system, and to redistribute it freely,
  44  *      subject to the following restrictions:
  45  *
  46  *      1. The author is not responsible for the consequences of use of
  47  *              this software, no matter how awful, even if they arise
  48  *              from defects in it.
  49  *
  50  *      2. The origin of this software must not be misrepresented, either
  51  *              by explicit claim or by omission.
  52  *
  53  *      3. Altered versions must be plainly marked as such, and must not
  54  *              be misrepresented as being the original software.
  55  *
  56  ****    Alterations to Henry's code are...
  57  ****
  58  ****    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
  59  ****    2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
  60  ****    by Larry Wall and others
  61  ****
  62  ****    You may distribute under the terms of either the GNU General Public
  63  ****    License or the Artistic License, as specified in the README file.
  64  *
  65  * Beware that some of this code is subtly aware of the way operator
  66  * precedence is structured in regular expressions.  Serious changes in
  67  * regular-expression syntax might require a total rethink.
  68  */
  69 #include "EXTERN.h"
  70 #define PERL_IN_REGEXEC_C
  71 #include "perl.h"
  72
  73 #ifdef PERL_IN_XSUB_RE
  74 #  include "re_comp.h"
  75 #else
  76 #  include "regcomp.h"
  77 #endif
  78
  79 #define RF_tainted      1               /* tainted information used? */
  80 #define RF_warned       2               /* warned about big count? */
  81
  82 #define RF_utf8         8               /* Pattern contains multibyte chars? */
  83
  84 #define UTF ((PL_reg_flags & RF_utf8) != 0)
  85
  86 #define RS_init         1               /* eval environment created */
  87 #define RS_set          2               /* replsv value is set */
  88
  89 #ifndef STATIC
  90 #define STATIC  static
  91 #endif
  92
  93 #define REGINCLASS(prog,p,c)  (ANYOF_FLAGS(p) ? reginclass(prog,p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
  94
  95 /*
  96  * Forwards.
  97  */
  98
  99 #define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
 100 #define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
 101
 102 #define HOPc(pos,off) \
 103         (char *)(PL_reg_match_utf8 \
 104             ? reghop3((U8*)pos, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr)) \
 105             : (U8*)(pos + off))
 106 #define HOPBACKc(pos, off) \
 107         (char*)(PL_reg_match_utf8\
 108             ? reghopmaybe3((U8*)pos, -off, (U8*)PL_bostr) \
 109             : (pos - off >= PL_bostr)           \
 110                 ? (U8*)pos - off                \
 111                 : NULL)
 112
 113 #define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)(pos), off, (U8*)(lim)) : (U8*)(pos + off))
 114 #define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
 115
 116 #define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
 117     if (!CAT2(PL_utf8_,class)) { bool ok; ENTER; save_re_context(); ok=CAT2(is_utf8_,class)((const U8*)str); assert(ok); LEAVE; } } STMT_END
 118 #define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS(alnum,"a")
 119 #define LOAD_UTF8_CHARCLASS_DIGIT() LOAD_UTF8_CHARCLASS(digit,"0")
 120 #define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
 121 #define LOAD_UTF8_CHARCLASS_MARK()  LOAD_UTF8_CHARCLASS(mark, "\xcd\x86")
 122
 123 /* TODO: Combine JUMPABLE and HAS_TEXT to cache OP(rn) */
 124
 125 /* for use after a quantifier and before an EXACT-like node -- japhy */
 126 /* it would be nice to rework regcomp.sym to generate this stuff. sigh */
 127 #define JUMPABLE(rn) (      \
 128     OP(rn) == OPEN ||       \
 129     (OP(rn) == CLOSE && (!cur_eval || cur_eval->u.eval.close_paren != ARG(rn))) || \
 130     OP(rn) == EVAL ||   \
 131     OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
 132     OP(rn) == PLUS || OP(rn) == MINMOD || \
 133     OP(rn) == KEEPS || (PL_regkind[OP(rn)] == VERB) || \
 134     (PL_regkind[OP(rn)] == CURLY && ARG1(rn) > 0) \
 135 )
 136 #define IS_EXACT(rn) (PL_regkind[OP(rn)] == EXACT)
 137
 138 #define HAS_TEXT(rn) ( IS_EXACT(rn) || PL_regkind[OP(rn)] == REF )
 139
 140 #if 0
 141 /* Currently these are only used when PL_regkind[OP(rn)] == EXACT so
 142    we don't need this definition. */
 143 #define IS_TEXT(rn)   ( OP(rn)==EXACT   || OP(rn)==REF   || OP(rn)==NREF   )
 144 #define IS_TEXTF(rn)  ( OP(rn)==EXACTF  || OP(rn)==REFF  || OP(rn)==NREFF  )
 145 #define IS_TEXTFL(rn) ( OP(rn)==EXACTFL || OP(rn)==REFFL || OP(rn)==NREFFL )
 146
 147 #else
 148 /* ... so we use this as its faster. */
 149 #define IS_TEXT(rn)   ( OP(rn)==EXACT   )
 150 #define IS_TEXTF(rn)  ( OP(rn)==EXACTF  )
 151 #define IS_TEXTFL(rn) ( OP(rn)==EXACTFL )
 152
 153 #endif
 154
 155 /*
 156   Search for mandatory following text node; for lookahead, the text must
 157   follow but for lookbehind (rn->flags != 0) we skip to the next step.
 158 */
 159 #define FIND_NEXT_IMPT(rn) STMT_START { \
 160     while (JUMPABLE(rn)) { \
 161         const OPCODE type = OP(rn); \
 162         if (type == SUSPEND || PL_regkind[type] == CURLY) \
 163             rn = NEXTOPER(NEXTOPER(rn)); \
 164         else if (type == PLUS) \
 165             rn = NEXTOPER(rn); \
 166         else if (type == IFMATCH) \
 167             rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
 168         else rn += NEXT_OFF(rn); \
 169     } \
 170 } STMT_END
 171
 172
 173 static void restore_pos(pTHX_ void *arg);
 174
 175 STATIC CHECKPOINT
 176 S_regcppush(pTHX_ I32 parenfloor)
 177 {
 178     dVAR;
 179     const int retval = PL_savestack_ix;
 180 #define REGCP_PAREN_ELEMS 4
 181     const int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
 182     int p;
 183     GET_RE_DEBUG_FLAGS_DECL;
 184
 185     if (paren_elems_to_push < 0)
 186         Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
 187
 188 #define REGCP_OTHER_ELEMS 7
 189     SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
 190
 191     for (p = PL_regsize; p > parenfloor; p--) {
 192 /* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
 193         SSPUSHINT(PL_regoffs[p].end);
 194         SSPUSHINT(PL_regoffs[p].start);
 195         SSPUSHPTR(PL_reg_start_tmp[p]);
 196         SSPUSHINT(p);
 197         DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log,
 198           "     saving \\%"UVuf" %"IVdf"(%"IVdf")..%"IVdf"\n",
 199                       (UV)p, (IV)PL_regoffs[p].start,
 200                       (IV)(PL_reg_start_tmp[p] - PL_bostr),
 201                       (IV)PL_regoffs[p].end
 202         ));
 203     }
 204 /* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
 205     SSPUSHPTR(PL_regoffs);
 206     SSPUSHINT(PL_regsize);
 207     SSPUSHINT(*PL_reglastparen);
 208     SSPUSHINT(*PL_reglastcloseparen);
 209     SSPUSHPTR(PL_reginput);
 210 #define REGCP_FRAME_ELEMS 2
 211 /* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
 212  * are needed for the regexp context stack bookkeeping. */
 213     SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
 214     SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
 215
 216     return retval;
 217 }
 218
 219 /* These are needed since we do not localize EVAL nodes: */
 220 #define REGCP_SET(cp)                                           \
 221     DEBUG_STATE_r(                                              \
 222             PerlIO_printf(Perl_debug_log,                       \
 223                 "  Setting an EVAL scope, savestack=%"IVdf"\n", \
 224                 (IV)PL_savestack_ix));                          \
 225     cp = PL_savestack_ix
 226
 227 #define REGCP_UNWIND(cp)                                        \
 228     DEBUG_STATE_r(                                              \
 229         if (cp != PL_savestack_ix)                              \
 230             PerlIO_printf(Perl_debug_log,                       \
 231                 "  Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
 232                 (IV)(cp), (IV)PL_savestack_ix));                \
 233     regcpblow(cp)
 234
 235 STATIC char *
 236 S_regcppop(pTHX_ const regexp *rex)
 237 {
 238     dVAR;
 239     U32 i;
 240     char *input;
 241     GET_RE_DEBUG_FLAGS_DECL;
 242
 243     PERL_ARGS_ASSERT_REGCPPOP;
 244
 245     /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
 246     i = SSPOPINT;
 247     assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
 248     i = SSPOPINT; /* Parentheses elements to pop. */
 249     input = (char *) SSPOPPTR;
 250     *PL_reglastcloseparen = SSPOPINT;
 251     *PL_reglastparen = SSPOPINT;
 252     PL_regsize = SSPOPINT;
 253     PL_regoffs=(regexp_paren_pair *) SSPOPPTR;
 254
 255
 256     /* Now restore the parentheses context. */
 257     for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
 258          i > 0; i -= REGCP_PAREN_ELEMS) {
 259         I32 tmps;
 260         U32 paren = (U32)SSPOPINT;
 261         PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
 262         PL_regoffs[paren].start = SSPOPINT;
 263         tmps = SSPOPINT;
 264         if (paren <= *PL_reglastparen)
 265             PL_regoffs[paren].end = tmps;
 266         DEBUG_BUFFERS_r(
 267             PerlIO_printf(Perl_debug_log,
 268                           "     restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
 269                           (UV)paren, (IV)PL_regoffs[paren].start,
 270                           (IV)(PL_reg_start_tmp[paren] - PL_bostr),
 271                           (IV)PL_regoffs[paren].end,
 272                           (paren > *PL_reglastparen ? "(no)" : ""));
 273         );
 274     }
 275     DEBUG_BUFFERS_r(
 276         if (*PL_reglastparen + 1 <= rex->nparens) {
 277             PerlIO_printf(Perl_debug_log,
 278                           "     restoring \\%"IVdf"..\\%"IVdf" to undef\n",
 279                           (IV)(*PL_reglastparen + 1), (IV)rex->nparens);
 280         }
 281     );
 282 #if 1
 283     /* It would seem that the similar code in regtry()
 284      * already takes care of this, and in fact it is in
 285      * a better location to since this code can #if 0-ed out
 286      * but the code in regtry() is needed or otherwise tests
 287      * requiring null fields (pat.t#187 and split.t#{13,14}
 288      * (as of patchlevel 7877)  will fail.  Then again,
 289      * this code seems to be necessary or otherwise
 290      * this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
 291      * --jhi updated by dapm */
 292     for (i = *PL_reglastparen + 1; i <= rex->nparens; i++) {
 293         if (i > PL_regsize)
 294             PL_regoffs[i].start = -1;
 295         PL_regoffs[i].end = -1;
 296     }
 297 #endif
 298     return input;
 299 }
 300
 301 #define regcpblow(cp) LEAVE_SCOPE(cp)   /* Ignores regcppush()ed data. */
 302
 303 /*
 304  * pregexec and friends
 305  */
 306
 307 #ifndef PERL_IN_XSUB_RE
 308 /*
 309  - pregexec - match a regexp against a string
 310  */
 311 I32
 312 Perl_pregexec(pTHX_ REGEXP * const prog, char* stringarg, register char *strend,
 313          char *strbeg, I32 minend, SV *screamer, U32 nosave)
 314 /* strend: pointer to null at end of string */
 315 /* strbeg: real beginning of string */
 316 /* minend: end of match must be >=minend after stringarg. */
 317 /* nosave: For optimizations. */
 318 {
 319     PERL_ARGS_ASSERT_PREGEXEC;
 320
 321     return
 322         regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
 323                       nosave ? 0 : REXEC_COPY_STR);
 324 }
 325 #endif
 326
 327 /*
 328  * Need to implement the following flags for reg_anch:
 329  *
 330  * USE_INTUIT_NOML              - Useful to call re_intuit_start() first
 331  * USE_INTUIT_ML
 332  * INTUIT_AUTORITATIVE_NOML     - Can trust a positive answer
 333  * INTUIT_AUTORITATIVE_ML
 334  * INTUIT_ONCE_NOML             - Intuit can match in one location only.
 335  * INTUIT_ONCE_ML
 336  *
 337  * Another flag for this function: SECOND_TIME (so that float substrs
 338  * with giant delta may be not rechecked).
 339  */
 340
 341 /* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
 342
 343 /* If SCREAM, then SvPVX_const(sv) should be compatible with strpos and strend.
 344    Otherwise, only SvCUR(sv) is used to get strbeg. */
 345
 346 /* XXXX We assume that strpos is strbeg unless sv. */
 347
 348 /* XXXX Some places assume that there is a fixed substring.
 349         An update may be needed if optimizer marks as "INTUITable"
 350         RExen without fixed substrings.  Similarly, it is assumed that
 351         lengths of all the strings are no more than minlen, thus they
 352         cannot come from lookahead.
 353         (Or minlen should take into account lookahead.)
 354   NOTE: Some of this comment is not correct. minlen does now take account
 355   of lookahead/behind. Further research is required. -- demerphq
 356
 357 */
 358
 359 /* A failure to find a constant substring means that there is no need to make
 360    an expensive call to REx engine, thus we celebrate a failure.  Similarly,
 361    finding a substring too deep into the string means that less calls to
 362    regtry() should be needed.
 363
 364    REx compiler's optimizer found 4 possible hints:
 365         a) Anchored substring;
 366         b) Fixed substring;
 367         c) Whether we are anchored (beginning-of-line or \G);
 368         d) First node (of those at offset 0) which may distingush positions;
 369    We use a)b)d) and multiline-part of c), and try to find a position in the
 370    string which does not contradict any of them.
 371  */
 372
 373 /* Most of decisions we do here should have been done at compile time.
 374    The nodes of the REx which we used for the search should have been
 375    deleted from the finite automaton. */
 376
 377 char *
 378 Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV *sv, char *strpos,
 379                      char *strend, const U32 flags, re_scream_pos_data *data)
 380 {
 381     dVAR;
 382     struct regexp *const prog = (struct regexp *)SvANY(rx);
 383     register I32 start_shift = 0;
 384     /* Should be nonnegative! */
 385     register I32 end_shift   = 0;
 386     register char *s;
 387     register SV *check;
 388     char *strbeg;
 389     char *t;
 390     const bool do_utf8 = (sv && SvUTF8(sv)) ? 1 : 0; /* if no sv we have to assume bytes */
 391     I32 ml_anch;
 392     register char *other_last = NULL;   /* other substr checked before this */
 393     char *check_at = NULL;              /* check substr found at this pos */
 394     const I32 multiline = prog->extflags & RXf_PMf_MULTILINE;
 395     RXi_GET_DECL(prog,progi);
 396 #ifdef DEBUGGING
 397     const char * const i_strpos = strpos;
 398 #endif
 399     GET_RE_DEBUG_FLAGS_DECL;
 400
 401     PERL_ARGS_ASSERT_RE_INTUIT_START;
 402
 403     RX_MATCH_UTF8_set(rx,do_utf8);
 404
 405     if (RX_UTF8(rx)) {
 406         PL_reg_flags |= RF_utf8;
 407     }
 408     DEBUG_EXECUTE_r(
 409         debug_start_match(rx, do_utf8, strpos, strend,
 410             sv ? "Guessing start of match in sv for"
 411                : "Guessing start of match in string for");
 412               );
 413
 414     /* CHR_DIST() would be more correct here but it makes things slow. */
 415     if (prog->minlen > strend - strpos) {
 416         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 417                               "String too short... [re_intuit_start]\n"));
 418         goto fail;
 419     }
 420
 421     strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
 422     PL_regeol = strend;
 423     if (do_utf8) {
 424         if (!prog->check_utf8 && prog->check_substr)
 425             to_utf8_substr(prog);
 426         check = prog->check_utf8;
 427     } else {
 428         if (!prog->check_substr && prog->check_utf8)
 429             to_byte_substr(prog);
 430         check = prog->check_substr;
 431     }
 432     if (check == &PL_sv_undef) {
 433         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 434                 "Non-utf8 string cannot match utf8 check string\n"));
 435         goto fail;
 436     }
 437     if (prog->extflags & RXf_ANCH) {    /* Match at beg-of-str or after \n */
 438         ml_anch = !( (prog->extflags & RXf_ANCH_SINGLE)
 439                      || ( (prog->extflags & RXf_ANCH_BOL)
 440                           && !multiline ) );    /* Check after \n? */
 441
 442         if (!ml_anch) {
 443           if ( !(prog->extflags & RXf_ANCH_GPOS) /* Checked by the caller */
 444                 && !(prog->intflags & PREGf_IMPLICIT) /* not a real BOL */
 445                /* SvCUR is not set on references: SvRV and SvPVX_const overlap */
 446                && sv && !SvROK(sv)
 447                && (strpos != strbeg)) {
 448               DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
 449               goto fail;
 450           }
 451           if (prog->check_offset_min == prog->check_offset_max &&
 452               !(prog->extflags & RXf_CANY_SEEN)) {
 453             /* Substring at constant offset from beg-of-str... */
 454             I32 slen;
 455
 456             s = HOP3c(strpos, prog->check_offset_min, strend);
 457
 458             if (SvTAIL(check)) {
 459                 slen = SvCUR(check);    /* >= 1 */
 460
 461                 if ( strend - s > slen || strend - s < slen - 1
 462                      || (strend - s == slen && strend[-1] != '\n')) {
 463                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
 464                     goto fail_finish;
 465                 }
 466                 /* Now should match s[0..slen-2] */
 467                 slen--;
 468                 if (slen && (*SvPVX_const(check) != *s
 469                              || (slen > 1
 470                                  && memNE(SvPVX_const(check), s, slen)))) {
 471                   report_neq:
 472                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
 473                     goto fail_finish;
 474                 }
 475             }
 476             else if (*SvPVX_const(check) != *s
 477                      || ((slen = SvCUR(check)) > 1
 478                          && memNE(SvPVX_const(check), s, slen)))
 479                 goto report_neq;
 480             check_at = s;
 481             goto success_at_start;
 482           }
 483         }
 484         /* Match is anchored, but substr is not anchored wrt beg-of-str. */
 485         s = strpos;
 486         start_shift = prog->check_offset_min; /* okay to underestimate on CC */
 487         end_shift = prog->check_end_shift;
 488
 489         if (!ml_anch) {
 490             const I32 end = prog->check_offset_max + CHR_SVLEN(check)
 491                                          - (SvTAIL(check) != 0);
 492             const I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
 493
 494             if (end_shift < eshift)
 495                 end_shift = eshift;
 496         }
 497     }
 498     else {                              /* Can match at random position */
 499         ml_anch = 0;
 500         s = strpos;
 501         start_shift = prog->check_offset_min;  /* okay to underestimate on CC */
 502         end_shift = prog->check_end_shift;
 503
 504         /* end shift should be non negative here */
 505     }
 506
 507 #ifdef QDEBUGGING       /* 7/99: reports of failure (with the older version) */
 508     if (end_shift < 0)
 509         Perl_croak(aTHX_ "panic: end_shift: %"IVdf" pattern:\n%s\n ",
 510                    (IV)end_shift, RX_PRECOMP(prog));
 511 #endif
 512
 513   restart:
 514     /* Find a possible match in the region s..strend by looking for
 515        the "check" substring in the region corrected by start/end_shift. */
 516
 517     {
 518         I32 srch_start_shift = start_shift;
 519         I32 srch_end_shift = end_shift;
 520         if (srch_start_shift < 0 && strbeg - s > srch_start_shift) {
 521             srch_end_shift -= ((strbeg - s) - srch_start_shift);
 522             srch_start_shift = strbeg - s;
 523         }
 524     DEBUG_OPTIMISE_MORE_r({
 525         PerlIO_printf(Perl_debug_log, "Check offset min: %"IVdf" Start shift: %"IVdf" End shift %"IVdf" Real End Shift: %"IVdf"\n",
 526             (IV)prog->check_offset_min,
 527             (IV)srch_start_shift,
 528             (IV)srch_end_shift,
 529             (IV)prog->check_end_shift);
 530     });
 531
 532     if (flags & REXEC_SCREAM) {
 533         I32 p = -1;                     /* Internal iterator of scream. */
 534         I32 * const pp = data ? data->scream_pos : &p;
 535
 536         if (PL_screamfirst[BmRARE(check)] >= 0
 537             || ( BmRARE(check) == '\n'
 538                  && (BmPREVIOUS(check) == SvCUR(check) - 1)
 539                  && SvTAIL(check) ))
 540             s = screaminstr(sv, check,
 541                             srch_start_shift + (s - strbeg), srch_end_shift, pp, 0);
 542         else
 543             goto fail_finish;
 544         /* we may be pointing at the wrong string */
 545         if (s && RXp_MATCH_COPIED(prog))
 546             s = strbeg + (s - SvPVX_const(sv));
 547         if (data)
 548             *data->scream_olds = s;
 549     }
 550     else {
 551         U8* start_point;
 552         U8* end_point;
 553         if (prog->extflags & RXf_CANY_SEEN) {
 554             start_point= (U8*)(s + srch_start_shift);
 555             end_point= (U8*)(strend - srch_end_shift);
 556         } else {
 557             start_point= HOP3(s, srch_start_shift, srch_start_shift < 0 ? strbeg : strend);
 558             end_point= HOP3(strend, -srch_end_shift, strbeg);
 559         }
 560         DEBUG_OPTIMISE_MORE_r({
 561             PerlIO_printf(Perl_debug_log, "fbm_instr len=%d str=<%.*s>\n",
 562                 (int)(end_point - start_point),
 563                 (int)(end_point - start_point) > 20 ? 20 : (int)(end_point - start_point),
 564                 start_point);
 565         });
 566
 567         s = fbm_instr( start_point, end_point,
 568                       check, multiline ? FBMrf_MULTILINE : 0);
 569     }
 570     }
 571     /* Update the count-of-usability, remove useless subpatterns,
 572         unshift s.  */
 573
 574     DEBUG_EXECUTE_r({
 575         RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
 576             SvPVX_const(check), RE_SV_DUMPLEN(check), 30);
 577         PerlIO_printf(Perl_debug_log, "%s %s substr %s%s%s",
 578                           (s ? "Found" : "Did not find"),
 579             (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)
 580                 ? "anchored" : "floating"),
 581             quoted,
 582             RE_SV_TAIL(check),
 583             (s ? " at offset " : "...\n") );
 584     });
 585
 586     if (!s)
 587         goto fail_finish;
 588     /* Finish the diagnostic message */
 589     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
 590
 591     /* XXX dmq: first branch is for positive lookbehind...
 592        Our check string is offset from the beginning of the pattern.
 593        So we need to do any stclass tests offset forward from that
 594        point. I think. :-(
 595      */
 596
 597
 598
 599     check_at=s;
 600
 601
 602     /* Got a candidate.  Check MBOL anchoring, and the *other* substr.
 603        Start with the other substr.
 604        XXXX no SCREAM optimization yet - and a very coarse implementation
 605        XXXX /ttx+/ results in anchored="ttx", floating="x".  floating will
 606                 *always* match.  Probably should be marked during compile...
 607        Probably it is right to do no SCREAM here...
 608      */
 609
 610     if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8)
 611                 : (prog->float_substr && prog->anchored_substr))
 612     {
 613         /* Take into account the "other" substring. */
 614         /* XXXX May be hopelessly wrong for UTF... */
 615         if (!other_last)
 616             other_last = strpos;
 617         if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
 618           do_other_anchored:
 619             {
 620                 char * const last = HOP3c(s, -start_shift, strbeg);
 621                 char *last1, *last2;
 622                 char * const saved_s = s;
 623                 SV* must;
 624
 625                 t = s - prog->check_offset_max;
 626                 if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
 627                     && (!do_utf8
 628                         || ((t = (char*)reghopmaybe3((U8*)s, -(prog->check_offset_max), (U8*)strpos))
 629                             && t > strpos)))
 630                     NOOP;
 631                 else
 632                     t = strpos;
 633                 t = HOP3c(t, prog->anchored_offset, strend);
 634                 if (t < other_last)     /* These positions already checked */
 635                     t = other_last;
 636                 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
 637                 if (last < last1)
 638                     last1 = last;
 639                 /* XXXX It is not documented what units *_offsets are in.
 640                    We assume bytes, but this is clearly wrong.
 641                    Meaning this code needs to be carefully reviewed for errors.
 642                    dmq.
 643                   */
 644
 645                 /* On end-of-str: see comment below. */
 646                 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
 647                 if (must == &PL_sv_undef) {
 648                     s = (char*)NULL;
 649                     DEBUG_r(must = prog->anchored_utf8);        /* for debug */
 650                 }
 651                 else
 652                     s = fbm_instr(
 653                         (unsigned char*)t,
 654                         HOP3(HOP3(last1, prog->anchored_offset, strend)
 655                                 + SvCUR(must), -(SvTAIL(must)!=0), strbeg),
 656                         must,
 657                         multiline ? FBMrf_MULTILINE : 0
 658                     );
 659                 DEBUG_EXECUTE_r({
 660                     RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
 661                         SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
 662                     PerlIO_printf(Perl_debug_log, "%s anchored substr %s%s",
 663                         (s ? "Found" : "Contradicts"),
 664                         quoted, RE_SV_TAIL(must));
 665                 });
 666
 667
 668                 if (!s) {
 669                     if (last1 >= last2) {
 670                         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 671                                                 ", giving up...\n"));
 672                         goto fail_finish;
 673                     }
 674                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 675                         ", trying floating at offset %ld...\n",
 676                         (long)(HOP3c(saved_s, 1, strend) - i_strpos)));
 677                     other_last = HOP3c(last1, prog->anchored_offset+1, strend);
 678                     s = HOP3c(last, 1, strend);
 679                     goto restart;
 680                 }
 681                 else {
 682                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
 683                           (long)(s - i_strpos)));
 684                     t = HOP3c(s, -prog->anchored_offset, strbeg);
 685                     other_last = HOP3c(s, 1, strend);
 686                     s = saved_s;
 687                     if (t == strpos)
 688                         goto try_at_start;
 689                     goto try_at_offset;
 690                 }
 691             }
 692         }
 693         else {          /* Take into account the floating substring. */
 694             char *last, *last1;
 695             char * const saved_s = s;
 696             SV* must;
 697
 698             t = HOP3c(s, -start_shift, strbeg);
 699             last1 = last =
 700                 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
 701             if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
 702                 last = HOP3c(t, prog->float_max_offset, strend);
 703             s = HOP3c(t, prog->float_min_offset, strend);
 704             if (s < other_last)
 705                 s = other_last;
 706  /* XXXX It is not documented what units *_offsets are in.  Assume bytes.  */
 707             must = do_utf8 ? prog->float_utf8 : prog->float_substr;
 708             /* fbm_instr() takes into account exact value of end-of-str
 709                if the check is SvTAIL(ed).  Since false positives are OK,
 710                and end-of-str is not later than strend we are OK. */
 711             if (must == &PL_sv_undef) {
 712                 s = (char*)NULL;
 713                 DEBUG_r(must = prog->float_utf8);       /* for debug message */
 714             }
 715             else
 716                 s = fbm_instr((unsigned char*)s,
 717                               (unsigned char*)last + SvCUR(must)
 718                                   - (SvTAIL(must)!=0),
 719                               must, multiline ? FBMrf_MULTILINE : 0);
 720             DEBUG_EXECUTE_r({
 721                 RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
 722                     SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
 723                 PerlIO_printf(Perl_debug_log, "%s floating substr %s%s",
 724                     (s ? "Found" : "Contradicts"),
 725                     quoted, RE_SV_TAIL(must));
 726             });
 727             if (!s) {
 728                 if (last1 == last) {
 729                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 730                                             ", giving up...\n"));
 731                     goto fail_finish;
 732                 }
 733                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 734                     ", trying anchored starting at offset %ld...\n",
 735                     (long)(saved_s + 1 - i_strpos)));
 736                 other_last = last;
 737                 s = HOP3c(t, 1, strend);
 738                 goto restart;
 739             }
 740             else {
 741                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
 742                       (long)(s - i_strpos)));
 743                 other_last = s; /* Fix this later. --Hugo */
 744                 s = saved_s;
 745                 if (t == strpos)
 746                     goto try_at_start;
 747                 goto try_at_offset;
 748             }
 749         }
 750     }
 751
 752
 753     t= (char*)HOP3( s, -prog->check_offset_max, (prog->check_offset_max<0) ? strend : strpos);
 754
 755     DEBUG_OPTIMISE_MORE_r(
 756         PerlIO_printf(Perl_debug_log,
 757             "Check offset min:%"IVdf" max:%"IVdf" S:%"IVdf" t:%"IVdf" D:%"IVdf" end:%"IVdf"\n",
 758             (IV)prog->check_offset_min,
 759             (IV)prog->check_offset_max,
 760             (IV)(s-strpos),
 761             (IV)(t-strpos),
 762             (IV)(t-s),
 763             (IV)(strend-strpos)
 764         )
 765     );
 766
 767     if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
 768         && (!do_utf8
 769             || ((t = (char*)reghopmaybe3((U8*)s, -prog->check_offset_max, (U8*) ((prog->check_offset_max<0) ? strend : strpos)))
 770                  && t > strpos)))
 771     {
 772         /* Fixed substring is found far enough so that the match
 773            cannot start at strpos. */
 774       try_at_offset:
 775         if (ml_anch && t[-1] != '\n') {
 776             /* Eventually fbm_*() should handle this, but often
 777                anchored_offset is not 0, so this check will not be wasted. */
 778             /* XXXX In the code below we prefer to look for "^" even in
 779                presence of anchored substrings.  And we search even
 780                beyond the found float position.  These pessimizations
 781                are historical artefacts only.  */
 782           find_anchor:
 783             while (t < strend - prog->minlen) {
 784                 if (*t == '\n') {
 785                     if (t < check_at - prog->check_offset_min) {
 786                         if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
 787                             /* Since we moved from the found position,
 788                                we definitely contradict the found anchored
 789                                substr.  Due to the above check we do not
 790                                contradict "check" substr.
 791                                Thus we can arrive here only if check substr
 792                                is float.  Redo checking for "other"=="fixed".
 793                              */
 794                             strpos = t + 1;
 795                             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
 796                                 PL_colors[0], PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
 797                             goto do_other_anchored;
 798                         }
 799                         /* We don't contradict the found floating substring. */
 800                         /* XXXX Why not check for STCLASS? */
 801                         s = t + 1;
 802                         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
 803                             PL_colors[0], PL_colors[1], (long)(s - i_strpos)));
 804                         goto set_useful;
 805                     }
 806                     /* Position contradicts check-string */
 807                     /* XXXX probably better to look for check-string
 808                        than for "\n", so one should lower the limit for t? */
 809                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
 810                         PL_colors[0], PL_colors[1], (long)(t + 1 - i_strpos)));
 811                     other_last = strpos = s = t + 1;
 812                     goto restart;
 813                 }
 814                 t++;
 815             }
 816             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
 817                         PL_colors[0], PL_colors[1]));
 818             goto fail_finish;
 819         }
 820         else {
 821             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
 822                         PL_colors[0], PL_colors[1]));
 823         }
 824         s = t;
 825       set_useful:
 826         ++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr);    /* hooray/5 */
 827     }
 828     else {
 829         /* The found string does not prohibit matching at strpos,
 830            - no optimization of calling REx engine can be performed,
 831            unless it was an MBOL and we are not after MBOL,
 832            or a future STCLASS check will fail this. */
 833       try_at_start:
 834         /* Even in this situation we may use MBOL flag if strpos is offset
 835            wrt the start of the string. */
 836         if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
 837             && (strpos != strbeg) && strpos[-1] != '\n'
 838             /* May be due to an implicit anchor of m{.*foo}  */
 839             && !(prog->intflags & PREGf_IMPLICIT))
 840         {
 841             t = strpos;
 842             goto find_anchor;
 843         }
 844         DEBUG_EXECUTE_r( if (ml_anch)
 845             PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
 846                           (long)(strpos - i_strpos), PL_colors[0], PL_colors[1]);
 847         );
 848       success_at_start:
 849         if (!(prog->intflags & PREGf_NAUGHTY)   /* XXXX If strpos moved? */
 850             && (do_utf8 ? (
 851                 prog->check_utf8                /* Could be deleted already */
 852                 && --BmUSEFUL(prog->check_utf8) < 0
 853                 && (prog->check_utf8 == prog->float_utf8)
 854             ) : (
 855                 prog->check_substr              /* Could be deleted already */
 856                 && --BmUSEFUL(prog->check_substr) < 0
 857                 && (prog->check_substr == prog->float_substr)
 858             )))
 859         {
 860             /* If flags & SOMETHING - do not do it many times on the same match */
 861             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
 862             SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
 863             if (do_utf8 ? prog->check_substr : prog->check_utf8)
 864                 SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
 865             prog->check_substr = prog->check_utf8 = NULL;       /* disable */
 866             prog->float_substr = prog->float_utf8 = NULL;       /* clear */
 867             check = NULL;                       /* abort */
 868             s = strpos;
 869             /* XXXX This is a remnant of the old implementation.  It
 870                     looks wasteful, since now INTUIT can use many
 871                     other heuristics. */
 872             prog->extflags &= ~RXf_USE_INTUIT;
 873         }
 874         else
 875             s = strpos;
 876     }
 877
 878     /* Last resort... */
 879     /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
 880     /* trie stclasses are too expensive to use here, we are better off to
 881        leave it to regmatch itself */
 882     if (progi->regstclass && PL_regkind[OP(progi->regstclass)]!=TRIE) {
 883         /* minlen == 0 is possible if regstclass is \b or \B,
 884            and the fixed substr is ''$.
 885            Since minlen is already taken into account, s+1 is before strend;
 886            accidentally, minlen >= 1 guaranties no false positives at s + 1
 887            even for \b or \B.  But (minlen? 1 : 0) below assumes that
 888            regstclass does not come from lookahead...  */
 889         /* If regstclass takes bytelength more than 1: If charlength==1, OK.
 890            This leaves EXACTF only, which is dealt with in find_byclass().  */
 891         const U8* const str = (U8*)STRING(progi->regstclass);
 892         const int cl_l = (PL_regkind[OP(progi->regstclass)] == EXACT
 893                     ? CHR_DIST(str+STR_LEN(progi->regstclass), str)
 894                     : 1);
 895         char * endpos;
 896         if (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
 897             endpos= HOP3c(s, (prog->minlen ? cl_l : 0), strend);
 898         else if (prog->float_substr || prog->float_utf8)
 899             endpos= HOP3c(HOP3c(check_at, -start_shift, strbeg), cl_l, strend);
 900         else
 901             endpos= strend;
 902
 903         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "start_shift: %"IVdf" check_at: %"IVdf" s: %"IVdf" endpos: %"IVdf"\n",
 904                                       (IV)start_shift, (IV)(check_at - strbeg), (IV)(s - strbeg), (IV)(endpos - strbeg)));
 905
 906         t = s;
 907         s = find_byclass(prog, progi->regstclass, s, endpos, NULL);
 908         if (!s) {
 909 #ifdef DEBUGGING
 910             const char *what = NULL;
 911 #endif
 912             if (endpos == strend) {
 913                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 914                                 "Could not match STCLASS...\n") );
 915                 goto fail;
 916             }
 917             DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 918                                    "This position contradicts STCLASS...\n") );
 919             if ((prog->extflags & RXf_ANCH) && !ml_anch)
 920                 goto fail;
 921             /* Contradict one of substrings */
 922             if (prog->anchored_substr || prog->anchored_utf8) {
 923                 if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
 924                     DEBUG_EXECUTE_r( what = "anchored" );
 925                   hop_and_restart:
 926                     s = HOP3c(t, 1, strend);
 927                     if (s + start_shift + end_shift > strend) {
 928                         /* XXXX Should be taken into account earlier? */
 929                         DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 930                                                "Could not match STCLASS...\n") );
 931                         goto fail;
 932                     }
 933                     if (!check)
 934                         goto giveup;
 935                     DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 936                                 "Looking for %s substr starting at offset %ld...\n",
 937                                  what, (long)(s + start_shift - i_strpos)) );
 938                     goto restart;
 939                 }
 940                 /* Have both, check_string is floating */
 941                 if (t + start_shift >= check_at) /* Contradicts floating=check */
 942                     goto retry_floating_check;
 943                 /* Recheck anchored substring, but not floating... */
 944                 s = check_at;
 945                 if (!check)
 946                     goto giveup;
 947                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 948                           "Looking for anchored substr starting at offset %ld...\n",
 949                           (long)(other_last - i_strpos)) );
 950                 goto do_other_anchored;
 951             }
 952             /* Another way we could have checked stclass at the
 953                current position only: */
 954             if (ml_anch) {
 955                 s = t = t + 1;
 956                 if (!check)
 957                     goto giveup;
 958                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 959                           "Looking for /%s^%s/m starting at offset %ld...\n",
 960                           PL_colors[0], PL_colors[1], (long)(t - i_strpos)) );
 961                 goto try_at_offset;
 962             }
 963             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))     /* Could have been deleted */
 964                 goto fail;
 965             /* Check is floating subtring. */
 966           retry_floating_check:
 967             t = check_at - start_shift;
 968             DEBUG_EXECUTE_r( what = "floating" );
 969             goto hop_and_restart;
 970         }
 971         if (t != s) {
 972             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 973                         "By STCLASS: moving %ld --> %ld\n",
 974                                   (long)(t - i_strpos), (long)(s - i_strpos))
 975                    );
 976         }
 977         else {
 978             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 979                                   "Does not contradict STCLASS...\n");
 980                    );
 981         }
 982     }
 983   giveup:
 984     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
 985                           PL_colors[4], (check ? "Guessed" : "Giving up"),
 986                           PL_colors[5], (long)(s - i_strpos)) );
 987     return s;
 988
 989   fail_finish:                          /* Substring not found */
 990     if (prog->check_substr || prog->check_utf8)         /* could be removed already */
 991         BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
 992   fail:
 993     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
 994                           PL_colors[4], PL_colors[5]));
 995     return NULL;
 996 }
 997
 998 #define DECL_TRIE_TYPE(scan) \
 999     const enum { trie_plain, trie_utf8, trie_utf8_fold, trie_latin_utf8_fold } \
1000                     trie_type = (scan->flags != EXACT) \
1001                               ? (do_utf8 ? trie_utf8_fold : (UTF ? trie_latin_utf8_fold : trie_plain)) \
1002                               : (do_utf8 ? trie_utf8 : trie_plain)
1003
1004 #define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len,  \
1005 uvc, charid, foldlen, foldbuf, uniflags) STMT_START {                       \
1006     switch (trie_type) {                                                    \
1007     case trie_utf8_fold:                                                    \
1008         if ( foldlen>0 ) {                                                  \
1009             uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags );     \
1010             foldlen -= len;                                                 \
1011             uscan += len;                                                   \
1012             len=0;                                                          \
1013         } else {                                                            \
1014             uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );   \
1015             uvc = to_uni_fold( uvc, foldbuf, &foldlen );                    \
1016             foldlen -= UNISKIP( uvc );                                      \
1017             uscan = foldbuf + UNISKIP( uvc );                               \
1018         }                                                                   \
1019         break;                                                              \
1020     case trie_latin_utf8_fold:                                              \
1021         if ( foldlen>0 ) {                                                  \
1022             uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags );     \
1023             foldlen -= len;                                                 \
1024             uscan += len;                                                   \
1025             len=0;                                                          \
1026         } else {                                                            \
1027             len = 1;                                                        \
1028             uvc = to_uni_fold( *(U8*)uc, foldbuf, &foldlen );               \
1029             foldlen -= UNISKIP( uvc );                                      \
1030             uscan = foldbuf + UNISKIP( uvc );                               \
1031         }                                                                   \
1032         break;                                                              \
1033     case trie_utf8:                                                         \
1034         uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );       \
1035         break;                                                              \
1036     case trie_plain:                                                        \
1037         uvc = (UV)*uc;                                                      \
1038         len = 1;                                                            \
1039     }                                                                       \
1040                                                                             \
1041     if (uvc < 256) {                                                        \
1042         charid = trie->charmap[ uvc ];                                      \
1043     }                                                                       \
1044     else {                                                                  \
1045         charid = 0;                                                         \
1046         if (widecharmap) {                                                  \
1047             SV** const svpp = hv_fetch(widecharmap,                         \
1048                         (char*)&uvc, sizeof(UV), 0);                        \
1049             if (svpp)                                                       \
1050                 charid = (U16)SvIV(*svpp);                                  \
1051         }                                                                   \
1052     }                                                                       \
1053 } STMT_END
1054
1055 #define REXEC_FBC_EXACTISH_CHECK(CoNd)                 \
1056 {                                                      \
1057     char *my_strend= (char *)strend;                   \
1058     if ( (CoNd)                                        \
1059          && (ln == len ||                              \
1060              !ibcmp_utf8(s, &my_strend, 0,  do_utf8,   \
1061                         m, NULL, ln, (bool)UTF))       \
1062          && (!reginfo || regtry(reginfo, &s)) )        \
1063         goto got_it;                                   \
1064     else {                                             \
1065          U8 foldbuf[UTF8_MAXBYTES_CASE+1];             \
1066          uvchr_to_utf8(tmpbuf, c);                     \
1067          f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);  \
1068          if ( f != c                                   \
1069               && (f == c1 || f == c2)                  \
1070               && (ln == len ||                         \
1071                 !ibcmp_utf8(s, &my_strend, 0,  do_utf8,\
1072                               m, NULL, ln, (bool)UTF)) \
1073               && (!reginfo || regtry(reginfo, &s)) )   \
1074               goto got_it;                             \
1075     }                                                  \
1076 }                                                      \
1077 s += len
1078
1079 #define REXEC_FBC_EXACTISH_SCAN(CoNd)                     \
1080 STMT_START {                                              \
1081     while (s <= e) {                                      \
1082         if ( (CoNd)                                       \
1083              && (ln == 1 || !(OP(c) == EXACTF             \
1084                               ? ibcmp(s, m, ln)           \
1085                               : ibcmp_locale(s, m, ln)))  \
1086              && (!reginfo || regtry(reginfo, &s)) )        \
1087             goto got_it;                                  \
1088         s++;                                              \
1089     }                                                     \
1090 } STMT_END
1091
1092 #define REXEC_FBC_UTF8_SCAN(CoDe)                     \
1093 STMT_START {                                          \
1094     while (s + (uskip = UTF8SKIP(s)) <= strend) {     \
1095         CoDe                                          \
1096         s += uskip;                                   \
1097     }                                                 \
1098 } STMT_END
1099
1100 #define REXEC_FBC_SCAN(CoDe)                          \
1101 STMT_START {                                          \
1102     while (s < strend) {                              \
1103         CoDe                                          \
1104         s++;                                          \
1105     }                                                 \
1106 } STMT_END
1107
1108 #define REXEC_FBC_UTF8_CLASS_SCAN(CoNd)               \
1109 REXEC_FBC_UTF8_SCAN(                                  \
1110     if (CoNd) {                                       \
1111         if (tmp && (!reginfo || regtry(reginfo, &s)))  \
1112             goto got_it;                              \
1113         else                                          \
1114             tmp = doevery;                            \
1115     }                                                 \
1116     else                                              \
1117         tmp = 1;                                      \
1118 )
1119
1120 #define REXEC_FBC_CLASS_SCAN(CoNd)                    \
1121 REXEC_FBC_SCAN(                                       \
1122     if (CoNd) {                                       \
1123         if (tmp && (!reginfo || regtry(reginfo, &s)))  \
1124             goto got_it;                              \
1125         else                                          \
1126             tmp = doevery;                            \
1127     }                                                 \
1128     else                                              \
1129         tmp = 1;                                      \
1130 )
1131
1132 #define REXEC_FBC_TRYIT               \
1133 if ((!reginfo || regtry(reginfo, &s))) \
1134     goto got_it
1135
1136 #define REXEC_FBC_CSCAN(CoNdUtF8,CoNd)                         \
1137     if (do_utf8) {                                             \
1138         REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1139     }                                                          \
1140     else {                                                     \
1141         REXEC_FBC_CLASS_SCAN(CoNd);                            \
1142     }                                                          \
1143     break
1144
1145 #define REXEC_FBC_CSCAN_PRELOAD(UtFpReLoAd,CoNdUtF8,CoNd)      \
1146     if (do_utf8) {                                             \
1147         UtFpReLoAd;                                            \
1148         REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1149     }                                                          \
1150     else {                                                     \
1151         REXEC_FBC_CLASS_SCAN(CoNd);                            \
1152     }                                                          \
1153     break
1154
1155 #define REXEC_FBC_CSCAN_TAINT(CoNdUtF8,CoNd)                   \
1156     PL_reg_flags |= RF_tainted;                                \
1157     if (do_utf8) {                                             \
1158         REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1159     }                                                          \
1160     else {                                                     \
1161         REXEC_FBC_CLASS_SCAN(CoNd);                            \
1162     }                                                          \
1163     break
1164
1165 #define DUMP_EXEC_POS(li,s,doutf8) \
1166     dump_exec_pos(li,s,(PL_regeol),(PL_bostr),(PL_reg_starttry),doutf8)
1167
1168 /* We know what class REx starts with.  Try to find this position... */
1169 /* if reginfo is NULL, its a dryrun */
1170 /* annoyingly all the vars in this routine have different names from their counterparts
1171    in regmatch. /grrr */
1172
1173 STATIC char *
1174 S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
1175     const char *strend, regmatch_info *reginfo)
1176 {
1177         dVAR;
1178         const I32 doevery = (prog->intflags & PREGf_SKIP) == 0;
1179         char *m;
1180         STRLEN ln;
1181         STRLEN lnc;
1182         register STRLEN uskip;
1183         unsigned int c1;
1184         unsigned int c2;
1185         char *e;
1186         register I32 tmp = 1;   /* Scratch variable? */
1187         register const bool do_utf8 = PL_reg_match_utf8;
1188         RXi_GET_DECL(prog,progi);
1189
1190         PERL_ARGS_ASSERT_FIND_BYCLASS;
1191
1192         /* We know what class it must start with. */
1193         switch (OP(c)) {
1194         case ANYOF:
1195             if (do_utf8) {
1196                  REXEC_FBC_UTF8_CLASS_SCAN((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
1197                           !UTF8_IS_INVARIANT((U8)s[0]) ?
1198                           reginclass(prog, c, (U8*)s, 0, do_utf8) :
1199                           REGINCLASS(prog, c, (U8*)s));
1200             }
1201             else {
1202                  while (s < strend) {
1203                       STRLEN skip = 1;
1204
1205                       if (REGINCLASS(prog, c, (U8*)s) ||
1206                           (ANYOF_FOLD_SHARP_S(c, s, strend) &&
1207                            /* The assignment of 2 is intentional:
1208                             * for the folded sharp s, the skip is 2. */
1209                            (skip = SHARP_S_SKIP))) {
1210                            if (tmp && (!reginfo || regtry(reginfo, &s)))
1211                                 goto got_it;
1212                            else
1213                                 tmp = doevery;
1214                       }
1215                       else
1216                            tmp = 1;
1217                       s += skip;
1218                  }
1219             }
1220             break;
1221         case CANY:
1222             REXEC_FBC_SCAN(
1223                 if (tmp && (!reginfo || regtry(reginfo, &s)))
1224                     goto got_it;
1225                 else
1226                     tmp = doevery;
1227             );
1228             break;
1229         case EXACTF:
1230             m   = STRING(c);
1231             ln  = STR_LEN(c);   /* length to match in octets/bytes */
1232             lnc = (I32) ln;     /* length to match in characters */
1233             if (UTF) {
1234                 STRLEN ulen1, ulen2;
1235                 U8 *sm = (U8 *) m;
1236                 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
1237                 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
1238                 /* used by commented-out code below */
1239                 /*const U32 uniflags = UTF8_ALLOW_DEFAULT;*/
1240
1241                 /* XXX: Since the node will be case folded at compile
1242                    time this logic is a little odd, although im not
1243                    sure that its actually wrong. --dmq */
1244
1245                 c1 = to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
1246                 c2 = to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
1247
1248                 /* XXX: This is kinda strange. to_utf8_XYZ returns the
1249                    codepoint of the first character in the converted
1250                    form, yet originally we did the extra step.
1251                    No tests fail by commenting this code out however
1252                    so Ive left it out. -- dmq.
1253
1254                 c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXBYTES_CASE,
1255                                     0, uniflags);
1256                 c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXBYTES_CASE,
1257                                     0, uniflags);
1258                 */
1259
1260                 lnc = 0;
1261                 while (sm < ((U8 *) m + ln)) {
1262                     lnc++;
1263                     sm += UTF8SKIP(sm);
1264                 }
1265             }
1266             else {
1267                 c1 = *(U8*)m;
1268                 c2 = PL_fold[c1];
1269             }
1270             goto do_exactf;
1271         case EXACTFL:
1272             m   = STRING(c);
1273             ln  = STR_LEN(c);
1274             lnc = (I32) ln;
1275             c1 = *(U8*)m;
1276             c2 = PL_fold_locale[c1];
1277           do_exactf:
1278             e = HOP3c(strend, -((I32)lnc), s);
1279
1280             if (!reginfo && e < s)
1281                 e = s;                  /* Due to minlen logic of intuit() */
1282
1283             /* The idea in the EXACTF* cases is to first find the
1284              * first character of the EXACTF* node and then, if
1285              * necessary, case-insensitively compare the full
1286              * text of the node.  The c1 and c2 are the first
1287              * characters (though in Unicode it gets a bit
1288              * more complicated because there are more cases
1289              * than just upper and lower: one needs to use
1290              * the so-called folding case for case-insensitive
1291              * matching (called "loose matching" in Unicode).
1292              * ibcmp_utf8() will do just that. */
1293
1294             if (do_utf8 || UTF) {
1295                 UV c, f;
1296                 U8 tmpbuf [UTF8_MAXBYTES+1];
1297                 STRLEN len = 1;
1298                 STRLEN foldlen;
1299                 const U32 uniflags = UTF8_ALLOW_DEFAULT;
1300                 if (c1 == c2) {
1301                     /* Upper and lower of 1st char are equal -
1302                      * probably not a "letter". */
1303                     while (s <= e) {
1304                         if (do_utf8) {
1305                             c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1306                                            uniflags);
1307                         } else {
1308                             c = *((U8*)s);
1309                         }
1310                         REXEC_FBC_EXACTISH_CHECK(c == c1);
1311                     }
1312                 }
1313                 else {
1314                     while (s <= e) {
1315                         if (do_utf8) {
1316                             c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1317                                            uniflags);
1318                         } else {
1319                             c = *((U8*)s);
1320                         }
1321
1322                         /* Handle some of the three Greek sigmas cases.
1323                          * Note that not all the possible combinations
1324                          * are handled here: some of them are handled
1325                          * by the standard folding rules, and some of
1326                          * them (the character class or ANYOF cases)
1327                          * are handled during compiletime in
1328                          * regexec.c:S_regclass(). */
1329                         if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1330                             c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1331                             c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
1332
1333                         REXEC_FBC_EXACTISH_CHECK(c == c1 || c == c2);
1334                     }
1335                 }
1336             }
1337             else {
1338                 /* Neither pattern nor string are UTF8 */
1339                 if (c1 == c2)
1340                     REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1);
1341                 else
1342                     REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1 || *(U8*)s == c2);
1343             }
1344             break;
1345         case BOUNDL:
1346             PL_reg_flags |= RF_tainted;
1347             /* FALL THROUGH */
1348         case BOUND:
1349             if (do_utf8) {
1350                 if (s == PL_bostr)
1351                     tmp = '\n';
1352                 else {
1353                     U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1354                     tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
1355                 }
1356                 tmp = ((OP(c) == BOUND ?
1357                         isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1358                 LOAD_UTF8_CHARCLASS_ALNUM();
1359                 REXEC_FBC_UTF8_SCAN(
1360                     if (tmp == !(OP(c) == BOUND ?
1361                                  (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1362                                  isALNUM_LC_utf8((U8*)s)))
1363                     {
1364                         tmp = !tmp;
1365                         REXEC_FBC_TRYIT;
1366                 }
1367                 );
1368             }
1369             else {
1370                 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1371                 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1372                 REXEC_FBC_SCAN(
1373                     if (tmp ==
1374                         !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1375                         tmp = !tmp;
1376                         REXEC_FBC_TRYIT;
1377                 }
1378                 );
1379             }
1380             if ((!prog->minlen && tmp) && (!reginfo || regtry(reginfo, &s)))
1381                 goto got_it;
1382             break;
1383         case NBOUNDL:
1384             PL_reg_flags |= RF_tainted;
1385             /* FALL THROUGH */
1386         case NBOUND:
1387             if (do_utf8) {
1388                 if (s == PL_bostr)
1389                     tmp = '\n';
1390                 else {
1391                     U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1392                     tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
1393                 }
1394                 tmp = ((OP(c) == NBOUND ?
1395                         isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1396                 LOAD_UTF8_CHARCLASS_ALNUM();
1397                 REXEC_FBC_UTF8_SCAN(
1398                     if (tmp == !(OP(c) == NBOUND ?
1399                                  (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1400                                  isALNUM_LC_utf8((U8*)s)))
1401                         tmp = !tmp;
1402                     else REXEC_FBC_TRYIT;
1403                 );
1404             }
1405             else {
1406                 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1407                 tmp = ((OP(c) == NBOUND ?
1408                         isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1409                 REXEC_FBC_SCAN(
1410                     if (tmp ==
1411                         !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1412                         tmp = !tmp;
1413                     else REXEC_FBC_TRYIT;
1414                 );
1415             }
1416             if ((!prog->minlen && !tmp) && (!reginfo || regtry(reginfo, &s)))
1417                 goto got_it;
1418             break;
1419         case ALNUM:
1420             REXEC_FBC_CSCAN_PRELOAD(
1421                 LOAD_UTF8_CHARCLASS_ALNUM(),
1422                 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8),
1423                 isALNUM(*s)
1424             );
1425         case ALNUML:
1426             REXEC_FBC_CSCAN_TAINT(
1427                 isALNUM_LC_utf8((U8*)s),
1428                 isALNUM_LC(*s)
1429             );
1430         case NALNUM:
1431             REXEC_FBC_CSCAN_PRELOAD(
1432                 LOAD_UTF8_CHARCLASS_ALNUM(),
1433                 !swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8),
1434                 !isALNUM(*s)
1435             );
1436         case NALNUML:
1437             REXEC_FBC_CSCAN_TAINT(
1438                 !isALNUM_LC_utf8((U8*)s),
1439                 !isALNUM_LC(*s)
1440             );
1441         case SPACE:
1442             REXEC_FBC_CSCAN_PRELOAD(
1443                 LOAD_UTF8_CHARCLASS_SPACE(),
1444                 *s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8),
1445                 isSPACE(*s)
1446             );
1447         case SPACEL:
1448             REXEC_FBC_CSCAN_TAINT(
1449                 *s == ' ' || isSPACE_LC_utf8((U8*)s),
1450                 isSPACE_LC(*s)
1451             );
1452         case NSPACE:
1453             REXEC_FBC_CSCAN_PRELOAD(
1454                 LOAD_UTF8_CHARCLASS_SPACE(),
1455                 !(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)),
1456                 !isSPACE(*s)
1457             );
1458         case NSPACEL:
1459             REXEC_FBC_CSCAN_TAINT(
1460                 !(*s == ' ' || isSPACE_LC_utf8((U8*)s)),
1461                 !isSPACE_LC(*s)
1462             );
1463         case DIGIT:
1464             REXEC_FBC_CSCAN_PRELOAD(
1465                 LOAD_UTF8_CHARCLASS_DIGIT(),
1466                 swash_fetch(PL_utf8_digit,(U8*)s, do_utf8),
1467                 isDIGIT(*s)
1468             );
1469         case DIGITL:
1470             REXEC_FBC_CSCAN_TAINT(
1471                 isDIGIT_LC_utf8((U8*)s),
1472                 isDIGIT_LC(*s)
1473             );
1474         case NDIGIT:
1475             REXEC_FBC_CSCAN_PRELOAD(
1476                 LOAD_UTF8_CHARCLASS_DIGIT(),
1477                 !swash_fetch(PL_utf8_digit,(U8*)s, do_utf8),
1478                 !isDIGIT(*s)
1479             );
1480         case NDIGITL:
1481             REXEC_FBC_CSCAN_TAINT(
1482                 !isDIGIT_LC_utf8((U8*)s),
1483                 !isDIGIT_LC(*s)
1484             );
1485         case LNBREAK:
1486             REXEC_FBC_CSCAN(
1487                 is_LNBREAK_utf8(s),
1488                 is_LNBREAK_latin1(s)
1489             );
1490         case VERTWS:
1491             REXEC_FBC_CSCAN(
1492                 is_VERTWS_utf8(s),
1493                 is_VERTWS_latin1(s)
1494             );
1495         case NVERTWS:
1496             REXEC_FBC_CSCAN(
1497                 !is_VERTWS_utf8(s),
1498                 !is_VERTWS_latin1(s)
1499             );
1500         case HORIZWS:
1501             REXEC_FBC_CSCAN(
1502                 is_HORIZWS_utf8(s),
1503                 is_HORIZWS_latin1(s)
1504             );
1505         case NHORIZWS:
1506             REXEC_FBC_CSCAN(
1507                 !is_HORIZWS_utf8(s),
1508                 !is_HORIZWS_latin1(s)
1509             );
1510         case AHOCORASICKC:
1511         case AHOCORASICK:
1512             {
1513                 DECL_TRIE_TYPE(c);
1514                 /* what trie are we using right now */
1515                 reg_ac_data *aho
1516                     = (reg_ac_data*)progi->data->data[ ARG( c ) ];
1517                 reg_trie_data *trie
1518                     = (reg_trie_data*)progi->data->data[ aho->trie ];
1519                 HV *widecharmap = MUTABLE_HV(progi->data->data[ aho->trie + 1 ]);
1520
1521                 const char *last_start = strend - trie->minlen;
1522 #ifdef DEBUGGING
1523                 const char *real_start = s;
1524 #endif
1525                 STRLEN maxlen = trie->maxlen;
1526                 SV *sv_points;
1527                 U8 **points; /* map of where we were in the input string
1528                                 when reading a given char. For ASCII this
1529                                 is unnecessary overhead as the relationship
1530                                 is always 1:1, but for Unicode, especially
1531                                 case folded Unicode this is not true. */
1532                 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
1533                 U8 *bitmap=NULL;
1534
1535
1536                 GET_RE_DEBUG_FLAGS_DECL;
1537
1538                 /* We can't just allocate points here. We need to wrap it in
1539                  * an SV so it gets freed properly if there is a croak while
1540                  * running the match */
1541                 ENTER;
1542                 SAVETMPS;
1543                 sv_points=newSV(maxlen * sizeof(U8 *));
1544                 SvCUR_set(sv_points,
1545                     maxlen * sizeof(U8 *));
1546                 SvPOK_on(sv_points);
1547                 sv_2mortal(sv_points);
1548                 points=(U8**)SvPV_nolen(sv_points );
1549                 if ( trie_type != trie_utf8_fold
1550                      && (trie->bitmap || OP(c)==AHOCORASICKC) )
1551                 {
1552                     if (trie->bitmap)
1553                         bitmap=(U8*)trie->bitmap;
1554                     else
1555                         bitmap=(U8*)ANYOF_BITMAP(c);
1556                 }
1557                 /* this is the Aho-Corasick algorithm modified a touch
1558                    to include special handling for long "unknown char"
1559                    sequences. The basic idea being that we use AC as long
1560                    as we are dealing with a possible matching char, when
1561                    we encounter an unknown char (and we have not encountered
1562                    an accepting state) we scan forward until we find a legal
1563                    starting char.
1564                    AC matching is basically that of trie matching, except
1565                    that when we encounter a failing transition, we fall back
1566                    to the current states "fail state", and try the current char
1567                    again, a process we repeat until we reach the root state,
1568                    state 1, or a legal transition. If we fail on the root state
1569                    then we can either terminate if we have reached an accepting
1570                    state previously, or restart the entire process from the beginning
1571                    if we have not.
1572
1573                  */
1574                 while (s <= last_start) {
1575                     const U32 uniflags = UTF8_ALLOW_DEFAULT;
1576                     U8 *uc = (U8*)s;
1577                     U16 charid = 0;
1578                     U32 base = 1;
1579                     U32 state = 1;
1580                     UV uvc = 0;
1581                     STRLEN len = 0;
1582                     STRLEN foldlen = 0;
1583                     U8 *uscan = (U8*)NULL;
1584                     U8 *leftmost = NULL;
1585 #ifdef DEBUGGING
1586                     U32 accepted_word= 0;
1587 #endif
1588                     U32 pointpos = 0;
1589
1590                     while ( state && uc <= (U8*)strend ) {
1591                         int failed=0;
1592                         U32 word = aho->states[ state ].wordnum;
1593
1594                         if( state==1 ) {
1595                             if ( bitmap ) {
1596                                 DEBUG_TRIE_EXECUTE_r(
1597                                     if ( uc <= (U8*)last_start && !BITMAP_TEST(bitmap,*uc) ) {
1598                                         dump_exec_pos( (char *)uc, c, strend, real_start,
1599                                             (char *)uc, do_utf8 );
1600                                         PerlIO_printf( Perl_debug_log,
1601                                             " Scanning for legal start char...\n");
1602                                     }
1603                                 );
1604                                 while ( uc <= (U8*)last_start  && !BITMAP_TEST(bitmap,*uc) ) {
1605                                     uc++;
1606                                 }
1607                                 s= (char *)uc;
1608                             }
1609                             if (uc >(U8*)last_start) break;
1610                         }
1611
1612                         if ( word ) {
1613                             U8 *lpos= points[ (pointpos - trie->wordlen[word-1] ) % maxlen ];
1614                             if (!leftmost || lpos < leftmost) {
1615                                 DEBUG_r(accepted_word=word);
1616                                 leftmost= lpos;
1617                             }
1618                             if (base==0) break;
1619
1620                         }
1621                         points[pointpos++ % maxlen]= uc;
1622                         REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
1623                                              uscan, len, uvc, charid, foldlen,
1624                                              foldbuf, uniflags);
1625                         DEBUG_TRIE_EXECUTE_r({
1626                             dump_exec_pos( (char *)uc, c, strend, real_start,
1627                                 s,   do_utf8 );
1628                             PerlIO_printf(Perl_debug_log,
1629                                 " Charid:%3u CP:%4"UVxf" ",
1630                                  charid, uvc);
1631                         });
1632
1633                         do {
1634 #ifdef DEBUGGING
1635                             word = aho->states[ state ].wordnum;
1636 #endif
1637                             base = aho->states[ state ].trans.base;
1638
1639                             DEBUG_TRIE_EXECUTE_r({
1640                                 if (failed)
1641                                     dump_exec_pos( (char *)uc, c, strend, real_start,
1642                                         s,   do_utf8 );
1643                                 PerlIO_printf( Perl_debug_log,
1644                                     "%sState: %4"UVxf", word=%"UVxf,
1645                                     failed ? " Fail transition to " : "",
1646                                     (UV)state, (UV)word);
1647                             });
1648                             if ( base ) {
1649                                 U32 tmp;
1650                                 if (charid &&
1651                                      (base + charid > trie->uniquecharcount )
1652                                      && (base + charid - 1 - trie->uniquecharcount
1653                                             < trie->lasttrans)
1654                                      && trie->trans[base + charid - 1 -
1655                                             trie->uniquecharcount].check == state
1656                                      && (tmp=trie->trans[base + charid - 1 -
1657                                         trie->uniquecharcount ].next))
1658                                 {
1659                                     DEBUG_TRIE_EXECUTE_r(
1660                                         PerlIO_printf( Perl_debug_log," - legal\n"));
1661                                     state = tmp;
1662                                     break;
1663                                 }
1664                                 else {
1665                                     DEBUG_TRIE_EXECUTE_r(
1666                                         PerlIO_printf( Perl_debug_log," - fail\n"));
1667                                     failed = 1;
1668                                     state = aho->fail[state];
1669                                 }
1670                             }
1671                             else {
1672                                 /* we must be accepting here */
1673                                 DEBUG_TRIE_EXECUTE_r(
1674                                         PerlIO_printf( Perl_debug_log," - accepting\n"));
1675                                 failed = 1;
1676                                 break;
1677                             }
1678                         } while(state);
1679                         uc += len;
1680                         if (failed) {
1681                             if (leftmost)
1682                                 break;
1683                             if (!state) state = 1;
1684                         }
1685                     }
1686                     if ( aho->states[ state ].wordnum ) {
1687                         U8 *lpos = points[ (pointpos - trie->wordlen[aho->states[ state ].wordnum-1]) % maxlen ];
1688                         if (!leftmost || lpos < leftmost) {
1689                             DEBUG_r(accepted_word=aho->states[ state ].wordnum);
1690                             leftmost = lpos;
1691                         }
1692                     }
1693                     if (leftmost) {
1694                         s = (char*)leftmost;
1695                         DEBUG_TRIE_EXECUTE_r({
1696                             PerlIO_printf(
1697                                 Perl_debug_log,"Matches word #%"UVxf" at position %"IVdf". Trying full pattern...\n",
1698                                 (UV)accepted_word, (IV)(s - real_start)
1699                             );
1700                         });
1701                         if (!reginfo || regtry(reginfo, &s)) {
1702                             FREETMPS;
1703                             LEAVE;
1704                             goto got_it;
1705                         }
1706                         s = HOPc(s,1);
1707                         DEBUG_TRIE_EXECUTE_r({
1708                             PerlIO_printf( Perl_debug_log,"Pattern failed. Looking for new start point...\n");
1709                         });
1710                     } else {
1711                         DEBUG_TRIE_EXECUTE_r(
1712                             PerlIO_printf( Perl_debug_log,"No match.\n"));
1713                         break;
1714                     }
1715                 }
1716                 FREETMPS;
1717                 LEAVE;
1718             }
1719             break;
1720         default:
1721             Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1722             break;
1723         }
1724         return 0;
1725       got_it:
1726         return s;
1727 }
1728
1729 static void
1730 S_swap_match_buff (pTHX_ regexp *prog)
1731 {
1732     regexp_paren_pair *t;
1733
1734     PERL_ARGS_ASSERT_SWAP_MATCH_BUFF;
1735
1736     if (!prog->swap) {
1737     /* We have to be careful. If the previous successful match
1738        was from this regex we don't want a subsequent paritally
1739        successful match to clobber the old results.
1740        So when we detect this possibility we add a swap buffer
1741        to the re, and switch the buffer each match. If we fail
1742        we switch it back, otherwise we leave it swapped.
1743     */
1744         Newxz(prog->swap, (prog->nparens + 1), regexp_paren_pair);
1745     }
1746     t = prog->swap;
1747     prog->swap = prog->offs;
1748     prog->offs = t;
1749 }
1750
1751
1752 /*
1753  - regexec_flags - match a regexp against a string
1754  */
1755 I32
1756 Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, register char *strend,
1757               char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1758 /* strend: pointer to null at end of string */
1759 /* strbeg: real beginning of string */
1760 /* minend: end of match must be >=minend after stringarg. */
1761 /* data: May be used for some additional optimizations.
1762          Currently its only used, with a U32 cast, for transmitting
1763          the ganch offset when doing a /g match. This will change */
1764 /* nosave: For optimizations. */
1765 {
1766     dVAR;
1767     struct regexp *const prog = (struct regexp *)SvANY(rx);
1768     /*register*/ char *s;
1769     register regnode *c;
1770     /*register*/ char *startpos = stringarg;
1771     I32 minlen;         /* must match at least this many chars */
1772     I32 dontbother = 0; /* how many characters not to try at end */
1773     I32 end_shift = 0;                  /* Same for the end. */         /* CC */
1774     I32 scream_pos = -1;                /* Internal iterator of scream. */
1775     char *scream_olds = NULL;
1776     const bool do_utf8 = (bool)DO_UTF8(sv);
1777     I32 multiline;
1778     RXi_GET_DECL(prog,progi);
1779     regmatch_info reginfo;  /* create some info to pass to regtry etc */
1780     bool swap_on_fail = 0;
1781     GET_RE_DEBUG_FLAGS_DECL;
1782
1783     PERL_ARGS_ASSERT_REGEXEC_FLAGS;
1784     PERL_UNUSED_ARG(data);
1785
1786     /* Be paranoid... */
1787     if (prog == NULL || startpos == NULL) {
1788         Perl_croak(aTHX_ "NULL regexp parameter");
1789         return 0;
1790     }
1791
1792     multiline = prog->extflags & RXf_PMf_MULTILINE;
1793     reginfo.prog = rx;   /* Yes, sorry that this is confusing.  */
1794
1795     RX_MATCH_UTF8_set(rx, do_utf8);
1796     DEBUG_EXECUTE_r(
1797         debug_start_match(rx, do_utf8, startpos, strend,
1798         "Matching");
1799     );
1800
1801     minlen = prog->minlen;
1802
1803     if (strend - startpos < (minlen+(prog->check_offset_min<0?prog->check_offset_min:0))) {
1804         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
1805                               "String too short [regexec_flags]...\n"));
1806         goto phooey;
1807     }
1808
1809
1810     /* Check validity of program. */
1811     if (UCHARAT(progi->program) != REG_MAGIC) {
1812         Perl_croak(aTHX_ "corrupted regexp program");
1813     }
1814
1815     PL_reg_flags = 0;
1816     PL_reg_eval_set = 0;
1817     PL_reg_maxiter = 0;
1818
1819     if (RX_UTF8(rx))
1820         PL_reg_flags |= RF_utf8;
1821
1822     /* Mark beginning of line for ^ and lookbehind. */
1823     reginfo.bol = startpos; /* XXX not used ??? */
1824     PL_bostr  = strbeg;
1825     reginfo.sv = sv;
1826
1827     /* Mark end of line for $ (and such) */
1828     PL_regeol = strend;
1829
1830     /* see how far we have to get to not match where we matched before */
1831     reginfo.till = startpos+minend;
1832
1833     /* If there is a "must appear" string, look for it. */
1834     s = startpos;
1835
1836     if (prog->extflags & RXf_GPOS_SEEN) { /* Need to set reginfo->ganch */
1837         MAGIC *mg;
1838
1839         if (flags & REXEC_IGNOREPOS)    /* Means: check only at start */
1840             reginfo.ganch = startpos + prog->gofs;
1841         else if (sv && SvTYPE(sv) >= SVt_PVMG
1842                   && SvMAGIC(sv)
1843                   && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1844                   && mg->mg_len >= 0) {
1845             reginfo.ganch = strbeg + mg->mg_len;        /* Defined pos() */
1846             if (prog->extflags & RXf_ANCH_GPOS) {
1847                 if (s > reginfo.ganch)
1848                     goto phooey;
1849                 s = reginfo.ganch - prog->gofs;
1850             }
1851         }
1852         else if (data) {
1853             reginfo.ganch = strbeg + PTR2UV(data);
1854         } else                          /* pos() not defined */
1855             reginfo.ganch = strbeg;
1856     }
1857     if (PL_curpm && (PM_GETRE(PL_curpm) == rx)) {
1858         swap_on_fail = 1;
1859         swap_match_buff(prog); /* do we need a save destructor here for
1860                                   eval dies? */
1861     }
1862     if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL || prog->check_utf8 != NULL)) {
1863         re_scream_pos_data d;
1864
1865         d.scream_olds = &scream_olds;
1866         d.scream_pos = &scream_pos;
1867         s = re_intuit_start(rx, sv, s, strend, flags, &d);
1868         if (!s) {
1869             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
1870             goto phooey;        /* not present */
1871         }
1872     }
1873
1874
1875
1876     /* Simplest case:  anchored match need be tried only once. */
1877     /*  [unless only anchor is BOL and multiline is set] */
1878     if (prog->extflags & (RXf_ANCH & ~RXf_ANCH_GPOS)) {
1879         if (s == startpos && regtry(&reginfo, &startpos))
1880             goto got_it;
1881         else if (multiline || (prog->intflags & PREGf_IMPLICIT)
1882                  || (prog->extflags & RXf_ANCH_MBOL)) /* XXXX SBOL? */
1883         {
1884             char *end;
1885
1886             if (minlen)
1887                 dontbother = minlen - 1;
1888             end = HOP3c(strend, -dontbother, strbeg) - 1;
1889             /* for multiline we only have to try after newlines */
1890             if (prog->check_substr || prog->check_utf8) {
1891                 if (s == startpos)
1892                     goto after_try;
1893                 while (1) {
1894                     if (regtry(&reginfo, &s))
1895                         goto got_it;
1896                   after_try:
1897                     if (s > end)
1898                         goto phooey;
1899                     if (prog->extflags & RXf_USE_INTUIT) {
1900                         s = re_intuit_start(rx, sv, s + 1, strend, flags, NULL);
1901                         if (!s)
1902                             goto phooey;
1903                     }
1904                     else
1905                         s++;
1906                 }
1907             } else {
1908                 if (s > startpos)
1909                     s--;
1910                 while (s < end) {
1911                     if (*s++ == '\n') { /* don't need PL_utf8skip here */
1912                         if (regtry(&reginfo, &s))
1913                             goto got_it;
1914                     }
1915                 }
1916             }
1917         }
1918         goto phooey;
1919     } else if (RXf_GPOS_CHECK == (prog->extflags & RXf_GPOS_CHECK))
1920     {
1921         /* the warning about reginfo.ganch being used without intialization
1922            is bogus -- we set it above, when prog->extflags & RXf_GPOS_SEEN
1923            and we only enter this block when the same bit is set. */
1924         char *tmp_s = reginfo.ganch - prog->gofs;
1925         if (regtry(&reginfo, &tmp_s))
1926             goto got_it;
1927         goto phooey;
1928     }
1929
1930     /* Messy cases:  unanchored match. */
1931     if ((prog->anchored_substr || prog->anchored_utf8) && prog->intflags & PREGf_SKIP) {
1932         /* we have /x+whatever/ */
1933         /* it must be a one character string (XXXX Except UTF?) */
1934         char ch;
1935 #ifdef DEBUGGING
1936         int did_match = 0;
1937 #endif
1938         if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1939             do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1940         ch = SvPVX_const(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
1941
1942         if (do_utf8) {
1943             REXEC_FBC_SCAN(
1944                 if (*s == ch) {
1945                     DEBUG_EXECUTE_r( did_match = 1 );
1946                     if (regtry(&reginfo, &s)) goto got_it;
1947                     s += UTF8SKIP(s);
1948                     while (s < strend && *s == ch)
1949                         s += UTF8SKIP(s);
1950                 }
1951             );
1952         }
1953         else {
1954             REXEC_FBC_SCAN(
1955                 if (*s == ch) {
1956                     DEBUG_EXECUTE_r( did_match = 1 );
1957                     if (regtry(&reginfo, &s)) goto got_it;
1958                     s++;
1959                     while (s < strend && *s == ch)
1960                         s++;
1961                 }
1962             );
1963         }
1964         DEBUG_EXECUTE_r(if (!did_match)
1965                 PerlIO_printf(Perl_debug_log,
1966                                   "Did not find anchored character...\n")
1967                );
1968     }
1969     else if (prog->anchored_substr != NULL
1970               || prog->anchored_utf8 != NULL
1971               || ((prog->float_substr != NULL || prog->float_utf8 != NULL)
1972                   && prog->float_max_offset < strend - s)) {
1973         SV *must;
1974         I32 back_max;
1975         I32 back_min;
1976         char *last;
1977         char *last1;            /* Last position checked before */
1978 #ifdef DEBUGGING
1979         int did_match = 0;
1980 #endif
1981         if (prog->anchored_substr || prog->anchored_utf8) {
1982             if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1983                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1984             must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
1985             back_max = back_min = prog->anchored_offset;
1986         } else {
1987             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1988                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1989             must = do_utf8 ? prog->float_utf8 : prog->float_substr;
1990             back_max = prog->float_max_offset;
1991             back_min = prog->float_min_offset;
1992         }
1993
1994
1995         if (must == &PL_sv_undef)
1996             /* could not downgrade utf8 check substring, so must fail */
1997             goto phooey;
1998
1999         if (back_min<0) {
2000             last = strend;
2001         } else {
2002             last = HOP3c(strend,        /* Cannot start after this */
2003                   -(I32)(CHR_SVLEN(must)
2004                          - (SvTAIL(must) != 0) + back_min), strbeg);
2005         }
2006         if (s > PL_bostr)
2007             last1 = HOPc(s, -1);
2008         else
2009             last1 = s - 1;      /* bogus */
2010
2011         /* XXXX check_substr already used to find "s", can optimize if
2012            check_substr==must. */
2013         scream_pos = -1;
2014         dontbother = end_shift;
2015         strend = HOPc(strend, -dontbother);
2016         while ( (s <= last) &&
2017                 ((flags & REXEC_SCREAM)
2018                  ? (s = screaminstr(sv, must, HOP3c(s, back_min, (back_min<0 ? strbeg : strend)) - strbeg,
2019                                     end_shift, &scream_pos, 0))
2020                  : (s = fbm_instr((unsigned char*)HOP3(s, back_min, (back_min<0 ? strbeg : strend)),
2021                                   (unsigned char*)strend, must,
2022                                   multiline ? FBMrf_MULTILINE : 0))) ) {
2023             /* we may be pointing at the wrong string */
2024             if ((flags & REXEC_SCREAM) && RXp_MATCH_COPIED(prog))
2025                 s = strbeg + (s - SvPVX_const(sv));
2026             DEBUG_EXECUTE_r( did_match = 1 );
2027             if (HOPc(s, -back_max) > last1) {
2028                 last1 = HOPc(s, -back_min);
2029                 s = HOPc(s, -back_max);
2030             }
2031             else {
2032                 char * const t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
2033
2034                 last1 = HOPc(s, -back_min);
2035                 s = t;
2036             }
2037             if (do_utf8) {
2038                 while (s <= last1) {
2039                     if (regtry(&reginfo, &s))
2040                         goto got_it;
2041                     s += UTF8SKIP(s);
2042                 }
2043             }
2044             else {
2045                 while (s <= last1) {
2046                     if (regtry(&reginfo, &s))
2047                         goto got_it;
2048                     s++;
2049                 }
2050             }
2051         }
2052         DEBUG_EXECUTE_r(if (!did_match) {
2053             RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
2054                 SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
2055             PerlIO_printf(Perl_debug_log, "Did not find %s substr %s%s...\n",
2056                               ((must == prog->anchored_substr || must == prog->anchored_utf8)
2057                                ? "anchored" : "floating"),
2058                 quoted, RE_SV_TAIL(must));
2059         });
2060         goto phooey;
2061     }
2062     else if ( (c = progi->regstclass) ) {
2063         if (minlen) {
2064             const OPCODE op = OP(progi->regstclass);
2065             /* don't bother with what can't match */
2066             if (PL_regkind[op] != EXACT && op != CANY && PL_regkind[op] != TRIE)
2067                 strend = HOPc(strend, -(minlen - 1));
2068         }
2069         DEBUG_EXECUTE_r({
2070             SV * const prop = sv_newmortal();
2071             regprop(prog, prop, c);
2072             {
2073                 RE_PV_QUOTED_DECL(quoted,do_utf8,PERL_DEBUG_PAD_ZERO(1),
2074                     s,strend-s,60);
2075                 PerlIO_printf(Perl_debug_log,
2076                     "Matching stclass %.*s against %s (%d chars)\n",
2077                     (int)SvCUR(prop), SvPVX_const(prop),
2078                      quoted, (int)(strend - s));
2079             }
2080         });
2081         if (find_byclass(prog, c, s, strend, &reginfo))
2082             goto got_it;
2083         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass... [regexec_flags]\n"));
2084     }
2085     else {
2086         dontbother = 0;
2087         if (prog->float_substr != NULL || prog->float_utf8 != NULL) {
2088             /* Trim the end. */
2089             char *last;
2090             SV* float_real;
2091
2092             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
2093                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
2094             float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
2095
2096             if (flags & REXEC_SCREAM) {
2097                 last = screaminstr(sv, float_real, s - strbeg,
2098                                    end_shift, &scream_pos, 1); /* last one */
2099                 if (!last)
2100                     last = scream_olds; /* Only one occurrence. */
2101                 /* we may be pointing at the wrong string */
2102                 else if (RXp_MATCH_COPIED(prog))
2103                     s = strbeg + (s - SvPVX_const(sv));
2104             }
2105             else {
2106                 STRLEN len;
2107                 const char * const little = SvPV_const(float_real, len);
2108
2109                 if (SvTAIL(float_real)) {
2110                     if (memEQ(strend - len + 1, little, len - 1))
2111                         last = strend - len + 1;
2112                     else if (!multiline)
2113                         last = memEQ(strend - len, little, len)
2114                             ? strend - len : NULL;
2115                     else
2116                         goto find_last;
2117                 } else {
2118                   find_last:
2119                     if (len)
2120                         last = rninstr(s, strend, little, little + len);
2121                     else
2122                         last = strend;  /* matching "$" */
2123                 }
2124             }
2125             if (last == NULL) {
2126                 DEBUG_EXECUTE_r(
2127                     PerlIO_printf(Perl_debug_log,
2128                         "%sCan't trim the tail, match fails (should not happen)%s\n",
2129                         PL_colors[4], PL_colors[5]));
2130                 goto phooey; /* Should not happen! */
2131             }
2132             dontbother = strend - last + prog->float_min_offset;
2133         }
2134         if (minlen && (dontbother < minlen))
2135             dontbother = minlen - 1;
2136         strend -= dontbother;              /* this one's always in bytes! */
2137         /* We don't know much -- general case. */
2138         if (do_utf8) {
2139             for (;;) {
2140                 if (regtry(&reginfo, &s))
2141                     goto got_it;
2142                 if (s >= strend)
2143                     break;
2144                 s += UTF8SKIP(s);
2145             };
2146         }
2147         else {
2148             do {
2149                 if (regtry(&reginfo, &s))
2150                     goto got_it;
2151             } while (s++ < strend);
2152         }
2153     }
2154
2155     /* Failure. */
2156     goto phooey;
2157
2158 got_it:
2159     RX_MATCH_TAINTED_set(rx, PL_reg_flags & RF_tainted);
2160
2161     if (PL_reg_eval_set)
2162         restore_pos(aTHX_ prog);
2163     if (RXp_PAREN_NAMES(prog))
2164         (void)hv_iterinit(RXp_PAREN_NAMES(prog));
2165
2166     /* make sure $`, $&, $', and $digit will work later */
2167     if ( !(flags & REXEC_NOT_FIRST) ) {
2168         RX_MATCH_COPY_FREE(rx);
2169         if (flags & REXEC_COPY_STR) {
2170             const I32 i = PL_regeol - startpos + (stringarg - strbeg);
2171 #ifdef PERL_OLD_COPY_ON_WRITE
2172             if ((SvIsCOW(sv)
2173                  || (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
2174                 if (DEBUG_C_TEST) {
2175                     PerlIO_printf(Perl_debug_log,
2176                                   "Copy on write: regexp capture, type %d\n",
2177                                   (int) SvTYPE(sv));
2178                 }
2179                 prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
2180                 prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
2181                 assert (SvPOKp(prog->saved_copy));
2182             } else
2183 #endif
2184             {
2185                 RX_MATCH_COPIED_on(rx);
2186                 s = savepvn(strbeg, i);
2187                 prog->subbeg = s;
2188             }
2189             prog->sublen = i;
2190         }
2191         else {
2192             prog->subbeg = strbeg;
2193             prog->sublen = PL_regeol - strbeg;  /* strend may have been modified */
2194         }
2195     }
2196
2197     return 1;
2198
2199 phooey:
2200     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
2201                           PL_colors[4], PL_colors[5]));
2202     if (PL_reg_eval_set)
2203         restore_pos(aTHX_ prog);
2204     if (swap_on_fail)
2205         /* we failed :-( roll it back */
2206         swap_match_buff(prog);
2207
2208     return 0;
2209 }
2210
2211
2212 /*
2213  - regtry - try match at specific point
2214  */
2215 STATIC I32                      /* 0 failure, 1 success */
2216 S_regtry(pTHX_ regmatch_info *reginfo, char **startpos)
2217 {
2218     dVAR;
2219     CHECKPOINT lastcp;
2220     REGEXP *const rx = reginfo->prog;
2221     regexp *const prog = (struct regexp *)SvANY(rx);
2222     RXi_GET_DECL(prog,progi);
2223     GET_RE_DEBUG_FLAGS_DECL;
2224
2225     PERL_ARGS_ASSERT_REGTRY;
2226
2227     reginfo->cutpoint=NULL;
2228
2229     if ((prog->extflags & RXf_EVAL_SEEN) && !PL_reg_eval_set) {
2230         MAGIC *mg;
2231
2232         PL_reg_eval_set = RS_init;
2233         DEBUG_EXECUTE_r(DEBUG_s(
2234             PerlIO_printf(Perl_debug_log, "  setting stack tmpbase at %"IVdf"\n",
2235                           (IV)(PL_stack_sp - PL_stack_base));
2236             ));
2237         SAVESTACK_CXPOS();
2238         cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2239         /* Otherwise OP_NEXTSTATE will free whatever on stack now.  */
2240         SAVETMPS;
2241         /* Apparently this is not needed, judging by wantarray. */
2242         /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
2243            cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2244
2245         if (reginfo->sv) {
2246             /* Make $_ available to executed code. */
2247             if (reginfo->sv != DEFSV) {
2248                 SAVE_DEFSV;
2249                 DEFSV = reginfo->sv;
2250             }
2251
2252             if (!(SvTYPE(reginfo->sv) >= SVt_PVMG && SvMAGIC(reginfo->sv)
2253                   && (mg = mg_find(reginfo->sv, PERL_MAGIC_regex_global)))) {
2254                 /* prepare for quick setting of pos */
2255 #ifdef PERL_OLD_COPY_ON_WRITE
2256                 if (SvIsCOW(reginfo->sv))
2257                     sv_force_normal_flags(reginfo->sv, 0);
2258 #endif
2259                 mg = sv_magicext(reginfo->sv, NULL, PERL_MAGIC_regex_global,
2260                                  &PL_vtbl_mglob, NULL, 0);
2261                 mg->mg_len = -1;
2262             }
2263             PL_reg_magic    = mg;
2264             PL_reg_oldpos   = mg->mg_len;
2265             SAVEDESTRUCTOR_X(restore_pos, prog);
2266         }
2267         if (!PL_reg_curpm) {
2268             Newxz(PL_reg_curpm, 1, PMOP);
2269 #ifdef USE_ITHREADS
2270             {
2271                 SV* const repointer = &PL_sv_undef;
2272                 /* this regexp is also owned by the new PL_reg_curpm, which
2273                    will try to free it.  */
2274                 av_push(PL_regex_padav, repointer);
2275                 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2276                 PL_regex_pad = AvARRAY(PL_regex_padav);
2277             }
2278 #endif
2279         }
2280 #ifdef USE_ITHREADS
2281         /* It seems that non-ithreads works both with and without this code.
2282            So for efficiency reasons it seems best not to have the code
2283            compiled when it is not needed.  */
2284         /* This is safe against NULLs: */
2285         ReREFCNT_dec(PM_GETRE(PL_reg_curpm));
2286         /* PM_reg_curpm owns a reference to this regexp.  */
2287         ReREFCNT_inc(rx);
2288 #endif
2289         PM_SETRE(PL_reg_curpm, rx);
2290         PL_reg_oldcurpm = PL_curpm;
2291         PL_curpm = PL_reg_curpm;
2292         if (RXp_MATCH_COPIED(prog)) {
2293             /*  Here is a serious problem: we cannot rewrite subbeg,
2294                 since it may be needed if this match fails.  Thus
2295                 $` inside (?{}) could fail... */
2296             PL_reg_oldsaved = prog->subbeg;
2297             PL_reg_oldsavedlen = prog->sublen;
2298 #ifdef PERL_OLD_COPY_ON_WRITE
2299             PL_nrs = prog->saved_copy;
2300 #endif
2301             RXp_MATCH_COPIED_off(prog);
2302         }
2303         else
2304             PL_reg_oldsaved = NULL;
2305         prog->subbeg = PL_bostr;
2306         prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2307     }
2308     DEBUG_EXECUTE_r(PL_reg_starttry = *startpos);
2309     prog->offs[0].start = *startpos - PL_bostr;
2310     PL_reginput = *startpos;
2311     PL_reglastparen = &prog->lastparen;
2312     PL_reglastcloseparen = &prog->lastcloseparen;
2313     prog->lastparen = 0;
2314     prog->lastcloseparen = 0;
2315     PL_regsize = 0;
2316     PL_regoffs = prog->offs;
2317     if (PL_reg_start_tmpl <= prog->nparens) {
2318         PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2319         if(PL_reg_start_tmp)
2320             Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2321         else
2322             Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2323     }
2324
2325     /* XXXX What this code is doing here?!!!  There should be no need
2326        to do this again and again, PL_reglastparen should take care of
2327        this!  --ilya*/
2328
2329     /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2330      * Actually, the code in regcppop() (which Ilya may be meaning by
2331      * PL_reglastparen), is not needed at all by the test suite
2332      * (op/regexp, op/pat, op/split), but that code is needed otherwise
2333      * this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
2334      * Meanwhile, this code *is* needed for the
2335      * above-mentioned test suite tests to succeed.  The common theme
2336      * on those tests seems to be returning null fields from matches.
2337      * --jhi updated by dapm */
2338 #if 1
2339     if (prog->nparens) {
2340         regexp_paren_pair *pp = PL_regoffs;
2341         register I32 i;
2342         for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
2343             ++pp;
2344             pp->start = -1;
2345             pp->end = -1;
2346         }
2347     }
2348 #endif
2349     REGCP_SET(lastcp);
2350     if (regmatch(reginfo, progi->program + 1)) {
2351         PL_regoffs[0].end = PL_reginput - PL_bostr;
2352         return 1;
2353     }
2354     if (reginfo->cutpoint)
2355         *startpos= reginfo->cutpoint;
2356     REGCP_UNWIND(lastcp);
2357     return 0;
2358 }
2359
2360
2361 #define sayYES goto yes
2362 #define sayNO goto no
2363 #define sayNO_SILENT goto no_silent
2364
2365 /* we dont use STMT_START/END here because it leads to
2366    "unreachable code" warnings, which are bogus, but distracting. */
2367 #define CACHEsayNO \
2368     if (ST.cache_mask) \
2369        PL_reg_poscache[ST.cache_offset] |= ST.cache_mask; \
2370     sayNO
2371
2372 /* this is used to determine how far from the left messages like
2373    'failed...' are printed. It should be set such that messages
2374    are inline with the regop output that created them.
2375 */
2376 #define REPORT_CODE_OFF 32
2377
2378
2379 /* Make sure there is a test for this +1 options in re_tests */
2380 #define TRIE_INITAL_ACCEPT_BUFFLEN 4;
2381
2382 #define CHRTEST_UNINIT -1001 /* c1/c2 haven't been calculated yet */
2383 #define CHRTEST_VOID   -1000 /* the c1/c2 "next char" test should be skipped */
2384
2385 #define SLAB_FIRST(s) (&(s)->states[0])
2386 #define SLAB_LAST(s)  (&(s)->states[PERL_REGMATCH_SLAB_SLOTS-1])
2387
2388 /* grab a new slab and return the first slot in it */
2389
2390 STATIC regmatch_state *
2391 S_push_slab(pTHX)
2392 {
2393 #if PERL_VERSION < 9 && !defined(PERL_CORE)
2394     dMY_CXT;
2395 #endif
2396     regmatch_slab *s = PL_regmatch_slab->next;
2397     if (!s) {
2398         Newx(s, 1, regmatch_slab);
2399         s->prev = PL_regmatch_slab;
2400         s->next = NULL;
2401         PL_regmatch_slab->next = s;
2402     }
2403     PL_regmatch_slab = s;
2404     return SLAB_FIRST(s);
2405 }
2406
2407
2408 /* push a new state then goto it */
2409
2410 #define PUSH_STATE_GOTO(state, node) \
2411     scan = node; \
2412     st->resume_state = state; \
2413     goto push_state;
2414
2415 /* push a new state with success backtracking, then goto it */
2416
2417 #define PUSH_YES_STATE_GOTO(state, node) \
2418     scan = node; \
2419     st->resume_state = state; \
2420     goto push_yes_state;
2421
2422
2423
2424 /*
2425
2426 regmatch() - main matching routine
2427
2428 This is basically one big switch statement in a loop. We execute an op,
2429 set 'next' to point the next op, and continue. If we come to a point which
2430 we may need to backtrack to on failure such as (A|B|C), we push a
2431 backtrack state onto the backtrack stack. On failure, we pop the top
2432 state, and re-enter the loop at the state indicated. If there are no more
2433 states to pop, we return failure.
2434
2435 Sometimes we also need to backtrack on success; for example /A+/, where
2436 after successfully matching one A, we need to go back and try to
2437 match another one; similarly for lookahead assertions: if the assertion
2438 completes successfully, we backtrack to the state just before the assertion
2439 and then carry on.  In these cases, the pushed state is marked as
2440 'backtrack on success too'. This marking is in fact done by a chain of
2441 pointers, each pointing to the previous 'yes' state. On success, we pop to
2442 the nearest yes state, discarding any intermediate failure-only states.
2443 Sometimes a yes state is pushed just to force some cleanup code to be
2444 called at the end of a successful match or submatch; e.g. (??{$re}) uses
2445 it to free the inner regex.
2446
2447 Note that failure backtracking rewinds the cursor position, while
2448 success backtracking leaves it alone.
2449
2450 A pattern is complete when the END op is executed, while a subpattern
2451 such as (?=foo) is complete when the SUCCESS op is executed. Both of these
2452 ops trigger the "pop to last yes state if any, otherwise return true"
2453 behaviour.
2454
2455 A common convention in this function is to use A and B to refer to the two
2456 subpatterns (or to the first nodes thereof) in patterns like /A*B/: so A is
2457 the subpattern to be matched possibly multiple times, while B is the entire
2458 rest of the pattern. Variable and state names reflect this convention.
2459
2460 The states in the main switch are the union of ops and failure/success of
2461 substates associated with with that op.  For example, IFMATCH is the op
2462 that does lookahead assertions /(?=A)B/ and so the IFMATCH state means
2463 'execute IFMATCH'; while IFMATCH_A is a state saying that we have just
2464 successfully matched A and IFMATCH_A_fail is a state saying that we have
2465 just failed to match A. Resume states always come in pairs. The backtrack
2466 state we push is marked as 'IFMATCH_A', but when that is popped, we resume
2467 at IFMATCH_A or IFMATCH_A_fail, depending on whether we are backtracking
2468 on success or failure.
2469
2470 The struct that holds a backtracking state is actually a big union, with
2471 one variant for each major type of op. The variable st points to the
2472 top-most backtrack struct. To make the code clearer, within each
2473 block of code we #define ST to alias the relevant union.
2474
2475 Here's a concrete example of a (vastly oversimplified) IFMATCH
2476 implementation:
2477
2478     switch (state) {
2479     ....
2480
2481 #define ST st->u.ifmatch
2482
2483     case IFMATCH: // we are executing the IFMATCH op, (?=A)B
2484         ST.foo = ...; // some state we wish to save
2485         ...
2486         // push a yes backtrack state with a resume value of
2487         // IFMATCH_A/IFMATCH_A_fail, then continue execution at the
2488         // first node of A:
2489         PUSH_YES_STATE_GOTO(IFMATCH_A, A);
2490         // NOTREACHED
2491
2492     case IFMATCH_A: // we have successfully executed A; now continue with B
2493         next = B;
2494         bar = ST.foo; // do something with the preserved value
2495         break;
2496
2497     case IFMATCH_A_fail: // A failed, so the assertion failed
2498         ...;   // do some housekeeping, then ...
2499         sayNO; // propagate the failure
2500
2501 #undef ST
2502
2503     ...
2504     }
2505
2506 For any old-timers reading this who are familiar with the old recursive
2507 approach, the code above is equivalent to:
2508
2509     case IFMATCH: // we are executing the IFMATCH op, (?=A)B
2510     {
2511         int foo = ...
2512         ...
2513         if (regmatch(A)) {
2514             next = B;
2515             bar = foo;
2516             break;
2517         }
2518         ...;   // do some housekeeping, then ...
2519         sayNO; // propagate the failure
2520     }
2521
2522 The topmost backtrack state, pointed to by st, is usually free. If you
2523 want to claim it, populate any ST.foo fields in it with values you wish to
2524 save, then do one of
2525
2526         PUSH_STATE_GOTO(resume_state, node);
2527         PUSH_YES_STATE_GOTO(resume_state, node);
2528
2529 which sets that backtrack state's resume value to 'resume_state', pushes a
2530 new free entry to the top of the backtrack stack, then goes to 'node'.
2531 On backtracking, the free slot is popped, and the saved state becomes the
2532 new free state. An ST.foo field in this new top state can be temporarily
2533 accessed to retrieve values, but once the main loop is re-entered, it
2534 becomes available for reuse.
2535
2536 Note that the depth of the backtrack stack constantly increases during the
2537 left-to-right execution of the pattern, rather than going up and down with
2538 the pattern nesting. For example the stack is at its maximum at Z at the
2539 end of the pattern, rather than at X in the following:
2540
2541     /(((X)+)+)+....(Y)+....Z/
2542
2543 The only exceptions to this are lookahead/behind assertions and the cut,
2544 (?>A), which pop all the backtrack states associated with A before
2545 continuing.
2546
2547 Bascktrack state structs are allocated in slabs of about 4K in size.
2548 PL_regmatch_state and st always point to the currently active state,
2549 and PL_regmatch_slab points to the slab currently containing
2550 PL_regmatch_state.  The first time regmatch() is called, the first slab is
2551 allocated, and is never freed until interpreter destruction. When the slab
2552 is full, a new one is allocated and chained to the end. At exit from
2553 regmatch(), slabs allocated since entry are freed.
2554
2555 */
2556
2557
2558 #define DEBUG_STATE_pp(pp)                                  \
2559     DEBUG_STATE_r({                                         \
2560         DUMP_EXEC_POS(locinput, scan, do_utf8);             \
2561         PerlIO_printf(Perl_debug_log,                       \
2562             "    %*s"pp" %s%s%s%s%s\n",                     \
2563             depth*2, "",                                    \
2564             PL_reg_name[st->resume_state],                     \
2565             ((st==yes_state||st==mark_state) ? "[" : ""),   \
2566             ((st==yes_state) ? "Y" : ""),                   \
2567             ((st==mark_state) ? "M" : ""),                  \
2568             ((st==yes_state||st==mark_state) ? "]" : "")    \
2569         );                                                  \
2570     });
2571
2572
2573 #define REG_NODE_NUM(x) ((x) ? (int)((x)-prog) : -1)
2574
2575 #ifdef DEBUGGING
2576
2577 STATIC void
2578 S_debug_start_match(pTHX_ const REGEXP *prog, const bool do_utf8,
2579     const char *start, const char *end, const char *blurb)
2580 {
2581     const bool utf8_pat = RX_UTF8(prog) ? 1 : 0;
2582
2583     PERL_ARGS_ASSERT_DEBUG_START_MATCH;
2584
2585     if (!PL_colorset)
2586             reginitcolors();
2587     {
2588         RE_PV_QUOTED_DECL(s0, utf8_pat, PERL_DEBUG_PAD_ZERO(0),
2589             RX_PRECOMP(prog), RX_PRELEN(prog), 60);
2590
2591         RE_PV_QUOTED_DECL(s1, do_utf8, PERL_DEBUG_PAD_ZERO(1),
2592             start, end - start, 60);
2593
2594         PerlIO_printf(Perl_debug_log,
2595             "%s%s REx%s %s against %s\n",
2596                        PL_colors[4], blurb, PL_colors[5], s0, s1);
2597
2598         if (do_utf8||utf8_pat)
2599             PerlIO_printf(Perl_debug_log, "UTF-8 %s%s%s...\n",
2600                 utf8_pat ? "pattern" : "",
2601                 utf8_pat && do_utf8 ? " and " : "",
2602                 do_utf8 ? "string" : ""
2603             );
2604     }
2605 }
2606
2607 STATIC void
2608 S_dump_exec_pos(pTHX_ const char *locinput,
2609                       const regnode *scan,
2610                       const char *loc_regeol,
2611                       const char *loc_bostr,
2612                       const char *loc_reg_starttry,
2613                       const bool do_utf8)
2614 {
2615     const int docolor = *PL_colors[0] || *PL_colors[2] || *PL_colors[4];
2616     const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
2617     int l = (loc_regeol - locinput) > taill ? taill : (loc_regeol - locinput);
2618     /* The part of the string before starttry has one color
2619        (pref0_len chars), between starttry and current
2620        position another one (pref_len - pref0_len chars),
2621        after the current position the third one.
2622        We assume that pref0_len <= pref_len, otherwise we
2623        decrease pref0_len.  */
2624     int pref_len = (locinput - loc_bostr) > (5 + taill) - l
2625         ? (5 + taill) - l : locinput - loc_bostr;
2626     int pref0_len;
2627
2628     PERL_ARGS_ASSERT_DUMP_EXEC_POS;
2629
2630     while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2631         pref_len++;
2632     pref0_len = pref_len  - (locinput - loc_reg_starttry);
2633     if (l + pref_len < (5 + taill) && l < loc_regeol - locinput)
2634         l = ( loc_regeol - locinput > (5 + taill) - pref_len
2635               ? (5 + taill) - pref_len : loc_regeol - locinput);
2636     while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2637         l--;
2638     if (pref0_len < 0)
2639         pref0_len = 0;
2640     if (pref0_len > pref_len)
2641         pref0_len = pref_len;
2642     {
2643         const int is_uni = (do_utf8 && OP(scan) != CANY) ? 1 : 0;
2644
2645         RE_PV_COLOR_DECL(s0,len0,is_uni,PERL_DEBUG_PAD(0),
2646             (locinput - pref_len),pref0_len, 60, 4, 5);
2647
2648         RE_PV_COLOR_DECL(s1,len1,is_uni,PERL_DEBUG_PAD(1),
2649                     (locinput - pref_len + pref0_len),
2650                     pref_len - pref0_len, 60, 2, 3);
2651
2652         RE_PV_COLOR_DECL(s2,len2,is_uni,PERL_DEBUG_PAD(2),
2653                     locinput, loc_regeol - locinput, 10, 0, 1);
2654
2655         const STRLEN tlen=len0+len1+len2;
2656         PerlIO_printf(Perl_debug_log,
2657                     "%4"IVdf" <%.*s%.*s%s%.*s>%*s|",
2658                     (IV)(locinput - loc_bostr),
2659                     len0, s0,
2660                     len1, s1,
2661                     (docolor ? "" : "> <"),
2662                     len2, s2,
2663                     (int)(tlen > 19 ? 0 :  19 - tlen),
2664                     "");
2665     }
2666 }
2667
2668 #endif
2669
2670 /* reg_check_named_buff_matched()
2671  * Checks to see if a named buffer has matched. The data array of
2672  * buffer numbers corresponding to the buffer is expected to reside
2673  * in the regexp->data->data array in the slot stored in the ARG() of
2674  * node involved. Note that this routine doesn't actually care about the
2675  * name, that information is not preserved from compilation to execution.
2676  * Returns the index of the leftmost defined buffer with the given name
2677  * or 0 if non of the buffers matched.
2678  */
2679 STATIC I32
2680 S_reg_check_named_buff_matched(pTHX_ const regexp *rex, const regnode *scan)
2681 {
2682     I32 n;
2683     RXi_GET_DECL(rex,rexi);
2684     SV *sv_dat=(SV*)rexi->data->data[ ARG( scan ) ];
2685     I32 *nums=(I32*)SvPVX(sv_dat);
2686
2687     PERL_ARGS_ASSERT_REG_CHECK_NAMED_BUFF_MATCHED;
2688
2689     for ( n=0; n<SvIVX(sv_dat); n++ ) {
2690         if ((I32)*PL_reglastparen >= nums[n] &&
2691             PL_regoffs[nums[n]].end != -1)
2692         {
2693             return nums[n];
2694         }
2695     }
2696     return 0;
2697 }
2698
2699
2700 /* free all slabs above current one  - called during LEAVE_SCOPE */
2701
2702 STATIC void
2703 S_clear_backtrack_stack(pTHX_ void *p)
2704 {
2705     regmatch_slab *s = PL_regmatch_slab->next;
2706     PERL_UNUSED_ARG(p);
2707
2708     if (!s)
2709         return;
2710     PL_regmatch_slab->next = NULL;
2711     while (s) {
2712         regmatch_slab * const osl = s;
2713         s = s->next;
2714         Safefree(osl);
2715     }
2716 }
2717
2718
2719 #define SETREX(Re1,Re2) \
2720     if (PL_reg_eval_set) PM_SETRE((PL_reg_curpm), (Re2)); \
2721     Re1 = (Re2)
2722
2723 STATIC I32                      /* 0 failure, 1 success */
2724 S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
2725 {
2726 #if PERL_VERSION < 9 && !defined(PERL_CORE)
2727     dMY_CXT;
2728 #endif
2729     dVAR;
2730     register const bool do_utf8 = PL_reg_match_utf8;
2731     const U32 uniflags = UTF8_ALLOW_DEFAULT;
2732     REGEXP *rex_sv = reginfo->prog;
2733     regexp *rex = (struct regexp *)SvANY(rex_sv);
2734     RXi_GET_DECL(rex,rexi);
2735     I32 oldsave;
2736     /* the current state. This is a cached copy of PL_regmatch_state */
2737     register regmatch_state *st;
2738     /* cache heavy used fields of st in registers */
2739     register regnode *scan;
2740     register regnode *next;
2741     register U32 n = 0; /* general value; init to avoid compiler warning */
2742     register I32 ln = 0; /* len or last;  init to avoid compiler warning */
2743     register char *locinput = PL_reginput;
2744     register I32 nextchr;   /* is always set to UCHARAT(locinput) */
2745
2746     bool result = 0;        /* return value of S_regmatch */
2747     int depth = 0;          /* depth of backtrack stack */
2748     U32 nochange_depth = 0; /* depth of GOSUB recursion with nochange */
2749     const U32 max_nochange_depth =
2750         (3 * rex->nparens > MAX_RECURSE_EVAL_NOCHANGE_DEPTH) ?
2751         3 * rex->nparens : MAX_RECURSE_EVAL_NOCHANGE_DEPTH;
2752     regmatch_state *yes_state = NULL; /* state to pop to on success of
2753                                                             subpattern */
2754     /* mark_state piggy backs on the yes_state logic so that when we unwind
2755        the stack on success we can update the mark_state as we go */
2756     regmatch_state *mark_state = NULL; /* last mark state we have seen */
2757     regmatch_state *cur_eval = NULL; /* most recent EVAL_AB state */
2758     struct regmatch_state  *cur_curlyx = NULL; /* most recent curlyx */
2759     U32 state_num;
2760     bool no_final = 0;      /* prevent failure from backtracking? */
2761     bool do_cutgroup = 0;   /* no_final only until next branch/trie entry */
2762     char *startpoint = PL_reginput;
2763     SV *popmark = NULL;     /* are we looking for a mark? */
2764     SV *sv_commit = NULL;   /* last mark name seen in failure */
2765     SV *sv_yes_mark = NULL; /* last mark name we have seen
2766                                during a successfull match */
2767     U32 lastopen = 0;       /* last open we saw */
2768     bool has_cutgroup = RX_HAS_CUTGROUP(rex) ? 1 : 0;
2769     SV* const oreplsv = GvSV(PL_replgv);
2770     /* these three flags are set by various ops to signal information to
2771      * the very next op. They have a useful lifetime of exactly one loop
2772      * iteration, and are not preserved or restored by state pushes/pops
2773      */
2774     bool sw = 0;            /* the condition value in (?(cond)a|b) */
2775     bool minmod = 0;        /* the next "{n,m}" is a "{n,m}?" */
2776     int logical = 0;        /* the following EVAL is:
2777                                 0: (?{...})
2778                                 1: (?(?{...})X|Y)
2779                                 2: (??{...})
2780                                or the following IFMATCH/UNLESSM is:
2781                                 false: plain (?=foo)
2782                                 true:  used as a condition: (?(?=foo))
2783                             */
2784 #ifdef DEBUGGING
2785     GET_RE_DEBUG_FLAGS_DECL;
2786 #endif
2787
2788     PERL_ARGS_ASSERT_REGMATCH;
2789
2790     DEBUG_OPTIMISE_r( DEBUG_EXECUTE_r({
2791             PerlIO_printf(Perl_debug_log,"regmatch start\n");
2792     }));
2793     /* on first ever call to regmatch, allocate first slab */
2794     if (!PL_regmatch_slab) {
2795         Newx(PL_regmatch_slab, 1, regmatch_slab);
2796         PL_regmatch_slab->prev = NULL;
2797         PL_regmatch_slab->next = NULL;
2798         PL_regmatch_state = SLAB_FIRST(PL_regmatch_slab);
2799     }
2800
2801     oldsave = PL_savestack_ix;
2802     SAVEDESTRUCTOR_X(S_clear_backtrack_stack, NULL);
2803     SAVEVPTR(PL_regmatch_slab);
2804     SAVEVPTR(PL_regmatch_state);
2805
2806     /* grab next free state slot */
2807     st = ++PL_regmatch_state;
2808     if (st >  SLAB_LAST(PL_regmatch_slab))
2809         st = PL_regmatch_state = S_push_slab(aTHX);
2810
2811     /* Note that nextchr is a byte even in UTF */
2812     nextchr = UCHARAT(locinput);
2813     scan = prog;
2814     while (scan != NULL) {
2815
2816         DEBUG_EXECUTE_r( {
2817             SV * const prop = sv_newmortal();
2818             regnode *rnext=regnext(scan);
2819             DUMP_EXEC_POS( locinput, scan, do_utf8 );
2820             regprop(rex, prop, scan);
2821
2822             PerlIO_printf(Perl_debug_log,
2823                     "%3"IVdf":%*s%s(%"IVdf")\n",
2824                     (IV)(scan - rexi->program), depth*2, "",
2825                     SvPVX_const(prop),
2826                     (PL_regkind[OP(scan)] == END || !rnext) ?
2827                         0 : (IV)(rnext - rexi->program));
2828         });
2829
2830         next = scan + NEXT_OFF(scan);
2831         if (next == scan)
2832             next = NULL;
2833         state_num = OP(scan);
2834
2835       reenter_switch:
2836         switch (state_num) {
2837         case BOL:
2838             if (locinput == PL_bostr)
2839             {
2840                 /* reginfo->till = reginfo->bol; */
2841                 break;
2842             }
2843             sayNO;
2844         case MBOL:
2845             if (locinput == PL_bostr ||
2846                 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
2847             {
2848                 break;
2849             }
2850             sayNO;
2851         case SBOL:
2852             if (locinput == PL_bostr)
2853                 break;
2854             sayNO;
2855         case GPOS:
2856             if (locinput == reginfo->ganch)
2857                 break;
2858             sayNO;
2859
2860         case KEEPS:
2861             /* update the startpoint */
2862             st->u.keeper.val = PL_regoffs[0].start;
2863             PL_reginput = locinput;
2864             PL_regoffs[0].start = locinput - PL_bostr;
2865             PUSH_STATE_GOTO(KEEPS_next, next);
2866             /*NOT-REACHED*/
2867         case KEEPS_next_fail:
2868             /* rollback the start point change */
2869             PL_regoffs[0].start = st->u.keeper.val;
2870             sayNO_SILENT;
2871             /*NOT-REACHED*/
2872         case EOL:
2873                 goto seol;
2874         case MEOL:
2875             if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2876                 sayNO;
2877             break;
2878         case SEOL:
2879           seol:
2880             if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2881                 sayNO;
2882             if (PL_regeol - locinput > 1)
2883                 sayNO;
2884             break;
2885         case EOS:
2886             if (PL_regeol != locinput)
2887                 sayNO;
2888             break;
2889         case SANY:
2890             if (!nextchr && locinput >= PL_regeol)
2891                 sayNO;
2892             if (do_utf8) {
2893                 locinput += PL_utf8skip[nextchr];
2894                 if (locinput > PL_regeol)
2895                     sayNO;
2896                 nextchr = UCHARAT(locinput);
2897             }
2898             else
2899                 nextchr = UCHARAT(++locinput);
2900             break;
2901         case CANY:
2902             if (!nextchr && locinput >= PL_regeol)
2903                 sayNO;
2904             nextchr = UCHARAT(++locinput);
2905             break;
2906         case REG_ANY:
2907             if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2908                 sayNO;
2909             if (do_utf8) {
2910                 locinput += PL_utf8skip[nextchr];
2911                 if (locinput > PL_regeol)
2912                     sayNO;
2913                 nextchr = UCHARAT(locinput);
2914             }
2915             else
2916                 nextchr = UCHARAT(++locinput);
2917             break;
2918
2919 #undef  ST
2920 #define ST st->u.trie
2921         case TRIEC:
2922             /* In this case the charclass data is available inline so
2923                we can fail fast without a lot of extra overhead.
2924              */
2925             if (scan->flags == EXACT || !do_utf8) {
2926                 if(!ANYOF_BITMAP_TEST(scan, *locinput)) {
2927                     DEBUG_EXECUTE_r(
2928                         PerlIO_printf(Perl_debug_log,
2929                                   "%*s  %sfailed to match trie start class...%s\n",
2930                                   REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
2931                     );
2932                     sayNO_SILENT;
2933                     /* NOTREACHED */
2934                 }
2935             }
2936             /* FALL THROUGH */
2937         case TRIE:
2938             {
2939                 /* what type of TRIE am I? (utf8 makes this contextual) */
2940                 DECL_TRIE_TYPE(scan);
2941
2942                 /* what trie are we using right now */
2943                 reg_trie_data * const trie
2944                     = (reg_trie_data*)rexi->data->data[ ARG( scan ) ];
2945                 HV * widecharmap = MUTABLE_HV(rexi->data->data[ ARG( scan ) + 1 ]);
2946                 U32 state = trie->startstate;
2947
2948                 if (trie->bitmap && trie_type != trie_utf8_fold &&
2949                     !TRIE_BITMAP_TEST(trie,*locinput)
2950                 ) {
2951                     if (trie->states[ state ].wordnum) {
2952                          DEBUG_EXECUTE_r(
2953                             PerlIO_printf(Perl_debug_log,
2954                                           "%*s  %smatched empty string...%s\n",
2955                                           REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
2956                         );
2957                         break;
2958                     } else {
2959                         DEBUG_EXECUTE_r(
2960                             PerlIO_printf(Perl_debug_log,
2961                                           "%*s  %sfailed to match trie start class...%s\n",
2962                                           REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
2963                         );
2964                         sayNO_SILENT;
2965                    }
2966                 }
2967
2968             {
2969                 U8 *uc = ( U8* )locinput;
2970
2971                 STRLEN len = 0;
2972                 STRLEN foldlen = 0;
2973                 U8 *uscan = (U8*)NULL;
2974                 STRLEN bufflen=0;
2975                 SV *sv_accept_buff = NULL;
2976                 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
2977
2978                 ST.accepted = 0; /* how many accepting states we have seen */
2979                 ST.B = next;
2980                 ST.jump = trie->jump;
2981                 ST.me = scan;
2982                 /*
2983                    traverse the TRIE keeping track of all accepting states
2984                    we transition through until we get to a failing node.
2985                 */
2986
2987                 while ( state && uc <= (U8*)PL_regeol ) {
2988                     U32 base = trie->states[ state ].trans.base;
2989                     UV uvc = 0;
2990                     U16 charid;
2991                     /* We use charid to hold the wordnum as we don't use it
2992                        for charid until after we have done the wordnum logic.
2993                        We define an alias just so that the wordnum logic reads
2994                        more naturally. */
2995
2996 #define got_wordnum charid
2997                     got_wordnum = trie->states[ state ].wordnum;
2998
2999                     if ( got_wordnum ) {
3000                         if ( ! ST.accepted ) {
3001                             ENTER;
3002                             /* SAVETMPS; */ /* XXX is this necessary? dmq */
3003                             bufflen = TRIE_INITAL_ACCEPT_BUFFLEN;
3004                             sv_accept_buff=newSV(bufflen *
3005                                             sizeof(reg_trie_accepted) - 1);
3006                             SvCUR_set(sv_accept_buff, 0);
3007                             SvPOK_on(sv_accept_buff);
3008                             sv_2mortal(sv_accept_buff);
3009                             SAVETMPS;
3010                             ST.accept_buff =
3011                                 (reg_trie_accepted*)SvPV_nolen(sv_accept_buff );
3012                         }
3013                         do {
3014                             if (ST.accepted >= bufflen) {
3015                                 bufflen *= 2;
3016                                 ST.accept_buff =(reg_trie_accepted*)
3017                                     SvGROW(sv_accept_buff,
3018                                         bufflen * sizeof(reg_trie_accepted));
3019                             }
3020                             SvCUR_set(sv_accept_buff,SvCUR(sv_accept_buff)
3021                                 + sizeof(reg_trie_accepted));
3022
3023
3024                             ST.accept_buff[ST.accepted].wordnum = got_wordnum;
3025                             ST.accept_buff[ST.accepted].endpos = uc;
3026                             ++ST.accepted;
3027                         } while (trie->nextword && (got_wordnum= trie->nextword[got_wordnum]));
3028                     }
3029 #undef got_wordnum
3030
3031                     DEBUG_TRIE_EXECUTE_r({
3032                                 DUMP_EXEC_POS( (char *)uc, scan, do_utf8 );
3033                                 PerlIO_printf( Perl_debug_log,
3034                                     "%*s  %sState: %4"UVxf" Accepted: %4"UVxf" ",
3035                                     2+depth * 2, "", PL_colors[4],
3036                                     (UV)state, (UV)ST.accepted );
3037                     });
3038
3039                     if ( base ) {
3040                         REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
3041                                              uscan, len, uvc, charid, foldlen,
3042                                              foldbuf, uniflags);
3043
3044                         if (charid &&
3045                              (base + charid > trie->uniquecharcount )
3046                              && (base + charid - 1 - trie->uniquecharcount
3047                                     < trie->lasttrans)
3048                              && trie->trans[base + charid - 1 -
3049                                     trie->uniquecharcount].check == state)
3050                         {
3051                             state = trie->trans[base + charid - 1 -
3052                                 trie->uniquecharcount ].next;
3053                         }
3054                         else {
3055                             state = 0;
3056                         }
3057                         uc += len;
3058
3059                     }
3060                     else {
3061                         state = 0;
3062                     }
3063                     DEBUG_TRIE_EXECUTE_r(
3064                         PerlIO_printf( Perl_debug_log,
3065                             "Charid:%3x CP:%4"UVxf" After State: %4"UVxf"%s\n",
3066                             charid, uvc, (UV)state, PL_colors[5] );
3067                     );
3068                 }
3069                 if (!ST.accepted )
3070                    sayNO;
3071
3072                 DEBUG_EXECUTE_r(
3073                     PerlIO_printf( Perl_debug_log,
3074                         "%*s  %sgot %"IVdf" possible matches%s\n",
3075                         REPORT_CODE_OFF + depth * 2, "",
3076                         PL_colors[4], (IV)ST.accepted, PL_colors[5] );
3077                 );
3078             }}
3079             goto trie_first_try; /* jump into the fail handler */
3080             /* NOTREACHED */
3081         case TRIE_next_fail: /* we failed - try next alterative */
3082             if ( ST.jump) {
3083                 REGCP_UNWIND(ST.cp);
3084                 for (n = *PL_reglastparen; n > ST.lastparen; n--)
3085                     PL_regoffs[n].end = -1;
3086                 *PL_reglastparen = n;
3087             }
3088           trie_first_try:
3089             if (do_cutgroup) {
3090                 do_cutgroup = 0;
3091                 no_final = 0;
3092             }
3093
3094             if ( ST.jump) {
3095                 ST.lastparen = *PL_reglastparen;
3096                 REGCP_SET(ST.cp);
3097             }
3098             if ( ST.accepted == 1 ) {
3099                 /* only one choice left - just continue */
3100                 DEBUG_EXECUTE_r({
3101                     AV *const trie_words
3102                         = MUTABLE_AV(rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET]);
3103                     SV ** const tmp = av_fetch( trie_words,
3104                         ST.accept_buff[ 0 ].wordnum-1, 0 );
3105                     SV *sv= tmp ? sv_newmortal() : NULL;
3106
3107                     PerlIO_printf( Perl_debug_log,
3108                         "%*s  %sonly one match left: #%d <%s>%s\n",
3109                         REPORT_CODE_OFF+depth*2, "", PL_colors[4],
3110                         ST.accept_buff[ 0 ].wordnum,
3111                         tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0,
3112                                 PL_colors[0], PL_colors[1],
3113                                 (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
3114                             )
3115                         : "not compiled under -Dr",
3116                         PL_colors[5] );
3117                 });
3118                 PL_reginput = (char *)ST.accept_buff[ 0 ].endpos;
3119                 /* in this case we free tmps/leave before we call regmatch
3120                    as we wont be using accept_buff again. */
3121
3122                 locinput = PL_reginput;
3123                 nextchr = UCHARAT(locinput);
3124                 if ( !ST.jump || !ST.jump[ST.accept_buff[0].wordnum])
3125                     scan = ST.B;
3126                 else
3127                     scan = ST.me + ST.jump[ST.accept_buff[0].wordnum];
3128                 if (!has_cutgroup) {
3129                     FREETMPS;
3130                     LEAVE;
3131                 } else {
3132                     ST.accepted--;
3133                     PUSH_YES_STATE_GOTO(TRIE_next, scan);
3134                 }
3135
3136                 continue; /* execute rest of RE */
3137             }
3138
3139             if ( !ST.accepted-- ) {
3140                 DEBUG_EXECUTE_r({
3141                     PerlIO_printf( Perl_debug_log,
3142                         "%*s  %sTRIE failed...%s\n",
3143                         REPORT_CODE_OFF+depth*2, "",
3144                         PL_colors[4],
3145                         PL_colors[5] );
3146                 });
3147                 FREETMPS;
3148                 LEAVE;
3149                 sayNO_SILENT;
3150                 /*NOTREACHED*/
3151             }
3152
3153             /*
3154                There are at least two accepting states left.  Presumably
3155                the number of accepting states is going to be low,
3156                typically two. So we simply scan through to find the one
3157                with lowest wordnum.  Once we find it, we swap the last
3158                state into its place and decrement the size. We then try to
3159                match the rest of the pattern at the point where the word
3160                ends. If we succeed, control just continues along the
3161                regex; if we fail we return here to try the next accepting
3162                state
3163              */
3164
3165             {
3166                 U32 best = 0;
3167                 U32 cur;
3168                 for( cur = 1 ; cur <= ST.accepted ; cur++ ) {
3169                     DEBUG_TRIE_EXECUTE_r(
3170                         PerlIO_printf( Perl_debug_log,
3171                             "%*s  %sgot %"IVdf" (%d) as best, looking at %"IVdf" (%d)%s\n",
3172                             REPORT_CODE_OFF + depth * 2, "", PL_colors[4],
3173                             (IV)best, ST.accept_buff[ best ].wordnum, (IV)cur,
3174                             ST.accept_buff[ cur ].wordnum, PL_colors[5] );
3175                     );
3176
3177                     if (ST.accept_buff[cur].wordnum <
3178                             ST.accept_buff[best].wordnum)
3179                         best = cur;
3180                 }
3181
3182                 DEBUG_EXECUTE_r({
3183                     AV *const trie_words
3184                         = MUTABLE_AV(rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET]);
3185                     SV ** const tmp = av_fetch( trie_words,
3186                         ST.accept_buff[ best ].wordnum - 1, 0 );
3187                     regnode *nextop=(!ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) ?
3188                                     ST.B :
3189                                     ST.me + ST.jump[ST.accept_buff[best].wordnum];
3190                     SV *sv= tmp ? sv_newmortal() : NULL;
3191
3192                     PerlIO_printf( Perl_debug_log,
3193                         "%*s  %strying alternation #%d <%s> at node #%d %s\n",
3194                         REPORT_CODE_OFF+depth*2, "", PL_colors[4],
3195                         ST.accept_buff[best].wordnum,
3196                         tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0,
3197                                 PL_colors[0], PL_colors[1],
3198                                 (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
3199                             ) : "not compiled under -Dr",
3200                             REG_NODE_NUM(nextop),
3201                         PL_colors[5] );
3202                 });
3203
3204                 if ( best<ST.accepted ) {
3205                     reg_trie_accepted tmp = ST.accept_buff[ best ];
3206                     ST.accept_buff[ best ] = ST.accept_buff[ ST.accepted ];
3207                     ST.accept_buff[ ST.accepted ] = tmp;
3208                     best = ST.accepted;
3209                 }
3210                 PL_reginput = (char *)ST.accept_buff[ best ].endpos;
3211                 if ( !ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) {
3212                     scan = ST.B;
3213                 } else {
3214                     scan = ST.me + ST.jump[ST.accept_buff[best].wordnum];
3215                 }
3216                 PUSH_YES_STATE_GOTO(TRIE_next, scan);
3217                 /* NOTREACHED */
3218             }
3219             /* NOTREACHED */
3220         case TRIE_next:
3221             FREETMPS;
3222             LEAVE;
3223             sayYES;
3224 #undef  ST
3225
3226         case EXACT: {
3227             char *s = STRING(scan);
3228             ln = STR_LEN(scan);
3229             if (do_utf8 != UTF) {
3230                 /* The target and the pattern have differing utf8ness. */
3231                 char *l = locinput;
3232                 const char * const e = s + ln;
3233
3234                 if (do_utf8) {
3235                     /* The target is utf8, the pattern is not utf8. */
3236                     while (s < e) {
3237                         STRLEN ulen;
3238                         if (l >= PL_regeol)
3239                              sayNO;
3240                         if (NATIVE_TO_UNI(*(U8*)s) !=
3241                             utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
3242                                             uniflags))
3243                              sayNO;
3244                         l += ulen;
3245                         s ++;
3246                     }
3247                 }
3248                 else {
3249                     /* The target is not utf8, the pattern is utf8. */
3250                     while (s < e) {
3251                         STRLEN ulen;
3252                         if (l >= PL_regeol)
3253                             sayNO;
3254                         if (NATIVE_TO_UNI(*((U8*)l)) !=
3255                             utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
3256                                            uniflags))
3257                             sayNO;
3258                         s += ulen;
3259                         l ++;
3260                     }
3261                 }
3262                 locinput = l;
3263                 nextchr = UCHARAT(locinput);
3264                 break;
3265             }
3266             /* The target and the pattern have the same utf8ness. */
3267             /* Inline the first character, for speed. */
3268             if (UCHARAT(s) != nextchr)
3269                 sayNO;
3270             if (PL_regeol - locinput < ln)
3271                 sayNO;
3272             if (ln > 1 && memNE(s, locinput, ln))
3273                 sayNO;
3274             locinput += ln;
3275             nextchr = UCHARAT(locinput);
3276             break;
3277             }
3278         case EXACTFL:
3279             PL_reg_flags |= RF_tainted;
3280             /* FALL THROUGH */
3281         case EXACTF: {
3282             char * const s = STRING(scan);
3283             ln = STR_LEN(scan);
3284
3285             if (do_utf8 || UTF) {
3286               /* Either target or the pattern are utf8. */
3287                 const char * const l = locinput;
3288                 char *e = PL_regeol;
3289
3290                 if (ibcmp_utf8(s, 0,  ln, (bool)UTF,
3291                                l, &e, 0,  do_utf8)) {
3292                      /* One more case for the sharp s:
3293                       * pack("U0U*", 0xDF) =~ /ss/i,
3294                       * the 0xC3 0x9F are the UTF-8
3295                       * byte sequence for the U+00DF. */
3296
3297                      if (!(do_utf8 &&
3298                            toLOWER(s[0]) == 's' &&
3299                            ln >= 2 &&
3300                            toLOWER(s[1]) == 's' &&
3301                            (U8)l[0] == 0xC3 &&
3302                            e - l >= 2 &&
3303                            (U8)l[1] == 0x9F))
3304                           sayNO;
3305                 }
3306                 locinput = e;
3307                 nextchr = UCHARAT(locinput);
3308                 break;
3309             }
3310
3311             /* Neither the target and the pattern are utf8. */
3312
3313             /* Inline the first character, for speed. */
3314             if (UCHARAT(s) != nextchr &&
3315                 UCHARAT(s) != ((OP(scan) == EXACTF)
3316                                ? PL_fold : PL_fold_locale)[nextchr])
3317                 sayNO;
3318             if (PL_regeol - locinput < ln)
3319                 sayNO;
3320             if (ln > 1 && (OP(scan) == EXACTF
3321                            ? ibcmp(s, locinput, ln)
3322                            : ibcmp_locale(s, locinput, ln)))
3323                 sayNO;
3324             locinput += ln;
3325             nextchr = UCHARAT(locinput);
3326             break;
3327             }
3328         case ANYOF:
3329             if (do_utf8) {
3330                 STRLEN inclasslen = PL_regeol - locinput;
3331
3332                 if (!reginclass(rex, scan, (U8*)locinput, &inclasslen, do_utf8))
3333                     goto anyof_fail;
3334                 if (locinput >= PL_regeol)
3335                     sayNO;
3336                 locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
3337                 nextchr = UCHARAT(locinput);
3338                 break;
3339             }
3340             else {
3341                 if (nextchr < 0)
3342                     nextchr = UCHARAT(locinput);
3343                 if (!REGINCLASS(rex, scan, (U8*)locinput))
3344                     goto anyof_fail;
3345                 if (!nextchr && locinput >= PL_regeol)
3346                     sayNO;
3347                 nextchr = UCHARAT(++locinput);
3348                 break;
3349             }
3350         anyof_fail:
3351             /* If we might have the case of the German sharp s
3352              * in a casefolding Unicode character class. */
3353
3354             if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
3355                  locinput += SHARP_S_SKIP;
3356                  nextchr = UCHARAT(locinput);
3357             }
3358             else
3359                  sayNO;
3360             break;
3361         case ALNUML:
3362             PL_reg_flags |= RF_tainted;
3363             /* FALL THROUGH */
3364         case ALNUM:
3365             if (!nextchr)
3366                 sayNO;
3367             if (do_utf8) {
3368                 LOAD_UTF8_CHARCLASS_ALNUM();
3369                 if (!(OP(scan) == ALNUM
3370                       ? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
3371                       : isALNUM_LC_utf8((U8*)locinput)))
3372                 {
3373                     sayNO;
3374                 }
3375                 locinput += PL_utf8skip[nextchr];
3376                 nextchr = UCHARAT(locinput);
3377                 break;
3378             }
3379             if (!(OP(scan) == ALNUM
3380                   ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
3381                 sayNO;
3382             nextchr = UCHARAT(++locinput);
3383             break;
3384         case NALNUML:
3385             PL_reg_flags |= RF_tainted;
3386             /* FALL THROUGH */
3387         case NALNUM:
3388             if (!nextchr && locinput >= PL_regeol)
3389                 sayNO;
3390             if (do_utf8) {
3391                 LOAD_UTF8_CHARCLASS_ALNUM();
3392                 if (OP(scan) == NALNUM
3393                     ? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
3394                     : isALNUM_LC_utf8((U8*)locinput))
3395                 {
3396                     sayNO;
3397                 }
3398                 locinput += PL_utf8skip[nextchr];
3399                 nextchr = UCHARAT(locinput);
3400                 break;
3401             }
3402             if (OP(scan) == NALNUM
3403                 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
3404                 sayNO;
3405             nextchr = UCHARAT(++locinput);
3406             break;
3407         case BOUNDL:
3408         case NBOUNDL:
3409             PL_reg_flags |= RF_tainted;
3410             /* FALL THROUGH */
3411         case BOUND:
3412         case NBOUND:
3413             /* was last char in word? */
3414             if (do_utf8) {
3415                 if (locinput == PL_bostr)
3416                     ln = '\n';
3417                 else {
3418                     const U8 * const r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
3419
3420                     ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, uniflags);
3421                 }
3422                 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
3423                     ln = isALNUM_uni(ln);
3424                     LOAD_UTF8_CHARCLASS_ALNUM();
3425                     n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
3426                 }
3427                 else {
3428                     ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
3429                     n = isALNUM_LC_utf8((U8*)locinput);
3430                 }
3431             }
3432             else {
3433                 ln = (locinput != PL_bostr) ?
3434                     UCHARAT(locinput - 1) : '\n';
3435                 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
3436                     ln = isALNUM(ln);
3437                     n = isALNUM(nextchr);
3438                 }
3439                 else {
3440                     ln = isALNUM_LC(ln);
3441                     n = isALNUM_LC(nextchr);
3442                 }
3443             }
3444             if (((!ln) == (!n)) == (OP(scan) == BOUND ||
3445                                     OP(scan) == BOUNDL))
3446                     sayNO;
3447             break;
3448         case SPACEL:
3449             PL_reg_flags |= RF_tainted;
3450             /* FALL THROUGH */
3451         case SPACE:
3452             if (!nextchr)
3453                 sayNO;
3454             if (do_utf8) {
3455                 if (UTF8_IS_CONTINUED(nextchr)) {
3456                     LOAD_UTF8_CHARCLASS_SPACE();
3457                     if (!(OP(scan) == SPACE
3458                           ? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
3459                           : isSPACE_LC_utf8((U8*)locinput)))
3460                     {
3461                         sayNO;
3462                     }
3463                     locinput += PL_utf8skip[nextchr];
3464                     nextchr = UCHARAT(locinput);
3465                     break;
3466                 }
3467                 if (!(OP(scan) == SPACE
3468                       ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3469                     sayNO;
3470                 nextchr = UCHARAT(++locinput);
3471             }
3472             else {
3473                 if (!(OP(scan) == SPACE
3474                       ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3475                     sayNO;
3476                 nextchr = UCHARAT(++locinput);
3477             }
3478             break;
3479         case NSPACEL:
3480             PL_reg_flags |= RF_tainted;
3481             /* FALL THROUGH */
3482         case NSPACE:
3483             if (!nextchr && locinput >= PL_regeol)
3484                 sayNO;
3485             if (do_utf8) {
3486                 LOAD_UTF8_CHARCLASS_SPACE();
3487                 if (OP(scan) == NSPACE
3488                     ? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
3489                     : isSPACE_LC_utf8((U8*)locinput))
3490                 {
3491                     sayNO;
3492                 }
3493                 locinput += PL_utf8skip[nextchr];
3494                 nextchr = UCHARAT(locinput);
3495                 break;
3496             }
3497             if (OP(scan) == NSPACE
3498                 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
3499                 sayNO;
3500             nextchr = UCHARAT(++locinput);
3501             break;
3502         case DIGITL:
3503             PL_reg_flags |= RF_tainted;
3504             /* FALL THROUGH */
3505         case DIGIT:
3506             if (!nextchr)
3507                 sayNO;
3508             if (do_utf8) {
3509                 LOAD_UTF8_CHARCLASS_DIGIT();
3510                 if (!(OP(scan) == DIGIT
3511                       ? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
3512                       : isDIGIT_LC_utf8((U8*)locinput)))
3513                 {
3514                     sayNO;
3515                 }
3516                 locinput += PL_utf8skip[nextchr];
3517                 nextchr = UCHARAT(locinput);
3518                 break;
3519             }
3520             if (!(OP(scan) == DIGIT
3521                   ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
3522                 sayNO;
3523             nextchr = UCHARAT(++locinput);
3524             break;
3525         case NDIGITL:
3526             PL_reg_flags |= RF_tainted;
3527             /* FALL THROUGH */
3528         case NDIGIT:
3529             if (!nextchr && locinput >= PL_regeol)
3530                 sayNO;
3531             if (do_utf8) {
3532                 LOAD_UTF8_CHARCLASS_DIGIT();
3533                 if (OP(scan) == NDIGIT
3534                     ? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
3535                     : isDIGIT_LC_utf8((U8*)locinput))
3536                 {
3537                     sayNO;
3538                 }
3539                 locinput += PL_utf8skip[nextchr];
3540                 nextchr = UCHARAT(locinput);
3541                 break;
3542             }
3543             if (OP(scan) == NDIGIT
3544                 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
3545                 sayNO;
3546             nextchr = UCHARAT(++locinput);
3547             break;
3548         case CLUMP:
3549             if (locinput >= PL_regeol)
3550                 sayNO;
3551             if  (do_utf8) {
3552                 LOAD_UTF8_CHARCLASS_MARK();
3553                 if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3554                     sayNO;
3555                 locinput += PL_utf8skip[nextchr];
3556                 while (locinput < PL_regeol &&
3557                        swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3558                     locinput += UTF8SKIP(locinput);
3559                 if (locinput > PL_regeol)
3560                     sayNO;
3561             }
3562             else
3563                locinput++;
3564             nextchr = UCHARAT(locinput);
3565             break;
3566
3567         case NREFFL:
3568         {
3569             char *s;
3570             char type;
3571             PL_reg_flags |= RF_tainted;
3572             /* FALL THROUGH */
3573         case NREF:
3574         case NREFF:
3575             type = OP(scan);
3576             n = reg_check_named_buff_matched(rex,scan);
3577
3578             if ( n ) {
3579                 type = REF + ( type - NREF );
3580                 goto do_ref;
3581             } else {
3582                 sayNO;
3583             }
3584             /* unreached */
3585         case REFFL:
3586             PL_reg_flags |= RF_tainted;
3587             /* FALL THROUGH */
3588         case REF:
3589         case REFF:
3590             n = ARG(scan);  /* which paren pair */
3591             type = OP(scan);
3592           do_ref:
3593             ln = PL_regoffs[n].start;
3594             PL_reg_leftiter = PL_reg_maxiter;           /* Void cache */
3595             if (*PL_reglastparen < n || ln == -1)
3596                 sayNO;                  /* Do not match unless seen CLOSEn. */
3597             if (ln == PL_regoffs[n].end)
3598                 break;
3599
3600             s = PL_bostr + ln;
3601             if (do_utf8 && type != REF) {       /* REF can do byte comparison */
3602                 char *l = locinput;
3603                 const char *e = PL_bostr + PL_regoffs[n].end;
3604                 /*
3605                  * Note that we can't do the "other character" lookup trick as
3606                  * in the 8-bit case (no pun intended) because in Unicode we
3607                  * have to map both upper and title case to lower case.
3608                  */
3609                 if (type == REFF) {
3610                     while (s < e) {
3611                         STRLEN ulen1, ulen2;
3612                         U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3613                         U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
3614
3615                         if (l >= PL_regeol)
3616                             sayNO;
3617                         toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
3618                         toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
3619                         if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
3620                             sayNO;
3621                         s += ulen1;
3622                         l += ulen2;
3623                     }
3624                 }
3625                 locinput = l;
3626                 nextchr = UCHARAT(locinput);
3627                 break;
3628             }
3629
3630             /* Inline the first character, for speed. */
3631             if (UCHARAT(s) != nextchr &&
3632                 (type == REF ||
3633                  (UCHARAT(s) != (type == REFF
3634                                   ? PL_fold : PL_fold_locale)[nextchr])))
3635                 sayNO;
3636             ln = PL_regoffs[n].end - ln;
3637             if (locinput + ln > PL_regeol)
3638                 sayNO;
3639             if (ln > 1 && (type == REF
3640                            ? memNE(s, locinput, ln)
3641                            : (type == REFF
3642                               ? ibcmp(s, locinput, ln)
3643                               : ibcmp_locale(s, locinput, ln))))
3644                 sayNO;
3645             locinput += ln;
3646             nextchr = UCHARAT(locinput);
3647             break;
3648         }
3649         case NOTHING:
3650         case TAIL:
3651             break;
3652         case BACK:
3653             break;
3654
3655 #undef  ST
3656 #define ST st->u.eval
3657         {
3658             SV *ret;
3659             REGEXP *re_sv;
3660             regexp *re;
3661             regexp_internal *rei;
3662             regnode *startpoint;
3663
3664         case GOSTART:
3665         case GOSUB: /*    /(...(?1))/   /(...(?&foo))/   */
3666             if (cur_eval && cur_eval->locinput==locinput) {
3667                 if (cur_eval->u.eval.close_paren == (U32)ARG(scan))
3668                     Perl_croak(aTHX_ "Infinite recursion in regex");
3669                 if ( ++nochange_depth > max_nochange_depth )
3670                     Perl_croak(aTHX_
3671                         "Pattern subroutine nesting without pos change"
3672                         " exceeded limit in regex");
3673             } else {
3674                 nochange_depth = 0;
3675             }
3676             re_sv = rex_sv;
3677             re = rex;
3678             rei = rexi;
3679             (void)ReREFCNT_inc(rex_sv);
3680             if (OP(scan)==GOSUB) {
3681                 startpoint = scan + ARG2L(scan);
3682                 ST.close_paren = ARG(scan);
3683             } else {
3684                 startpoint = rei->program+1;
3685                 ST.close_paren = 0;
3686             }
3687             goto eval_recurse_doit;
3688             /* NOTREACHED */
3689         case EVAL:  /*   /(?{A})B/   /(??{A})B/  and /(?(?{A})X|Y)B/   */
3690             if (cur_eval && cur_eval->locinput==locinput) {
3691                 if ( ++nochange_depth > max_nochange_depth )
3692                     Perl_croak(aTHX_ "EVAL without pos change exceeded limit in regex");
3693             } else {
3694                 nochange_depth = 0;
3695             }
3696             {
3697                 /* execute the code in the {...} */
3698                 dSP;
3699                 SV ** const before = SP;
3700                 OP_4tree * const oop = PL_op;
3701                 COP * const ocurcop = PL_curcop;
3702                 PAD *old_comppad;
3703
3704                 n = ARG(scan);
3705                 PL_op = (OP_4tree*)rexi->data->data[n];
3706                 DEBUG_STATE_r( PerlIO_printf(Perl_debug_log,
3707                     "  re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
3708                 PAD_SAVE_LOCAL(old_comppad, (PAD*)rexi->data->data[n + 2]);
3709                 PL_regoffs[0].end = PL_reg_magic->mg_len = locinput - PL_bostr;
3710
3711                 if (sv_yes_mark) {
3712                     SV *sv_mrk = get_sv("REGMARK", 1);
3713                     sv_setsv(sv_mrk, sv_yes_mark);
3714                 }
3715
3716                 CALLRUNOPS(aTHX);                       /* Scalar context. */
3717                 SPAGAIN;
3718                 if (SP == before)
3719                     ret = &PL_sv_undef;   /* protect against empty (?{}) blocks. */
3720                 else {
3721                     ret = POPs;
3722                     PUTBACK;
3723                 }
3724
3725                 PL_op = oop;
3726                 PAD_RESTORE_LOCAL(old_comppad);
3727                 PL_curcop = ocurcop;
3728                 if (!logical) {
3729                     /* /(?{...})/ */
3730                     sv_setsv(save_scalar(PL_replgv), ret);
3731                     break;
3732                 }
3733             }
3734             if (logical == 2) { /* Postponed subexpression: /(??{...})/ */
3735                 logical = 0;
3736                 {
3737                     /* extract RE object from returned value; compiling if
3738                      * necessary */
3739                     MAGIC *mg = NULL;
3740                     REGEXP *rx = NULL;
3741
3742                     if (SvROK(ret)) {
3743                         SV *const sv = SvRV(ret);
3744
3745                         if (SvTYPE(sv) == SVt_REGEXP) {
3746                             rx = (REGEXP*) sv;
3747                         } else if (SvSMAGICAL(sv)) {
3748                             mg = mg_find(sv, PERL_MAGIC_qr);
3749                             assert(mg);
3750                         }
3751                     } else if (SvTYPE(ret) == SVt_REGEXP) {
3752                         rx = (REGEXP*) ret;
3753                     } else if (SvSMAGICAL(ret)) {
3754                         if (SvGMAGICAL(ret)) {
3755                             /* I don't believe that there is ever qr magic
3756                                here.  */
3757                             assert(!mg_find(ret, PERL_MAGIC_qr));
3758                             sv_unmagic(ret, PERL_MAGIC_qr);
3759                         }
3760                         else {
3761                             mg = mg_find(ret, PERL_MAGIC_qr);
3762                             /* testing suggests mg only ends up non-NULL for
3763                                scalars who were upgraded and compiled in the
3764                                else block below. In turn, this is only
3765                                triggered in the "postponed utf8 string" tests
3766                                in t/op/pat.t  */
3767                         }
3768                     }
3769
3770                     if (mg) {
3771                         rx = (REGEXP *) mg->mg_obj; /*XXX:dmq*/
3772                         assert(rx);
3773                     }
3774                     if (rx) {
3775                         rx = reg_temp_copy(rx);
3776                     }
3777                     else {
3778                         U32 pm_flags = 0;
3779                         const I32 osize = PL_regsize;
3780
3781                         if (DO_UTF8(ret)) {
3782                             assert (SvUTF8(ret));
3783                         } else if (SvUTF8(ret)) {
3784                             /* Not doing UTF-8, despite what the SV says. Is
3785                                this only if we're trapped in use 'bytes'?  */
3786                             /* Make a copy of the octet sequence, but without
3787                                the flag on, as the compiler now honours the
3788                                SvUTF8 flag on ret.  */
3789                             STRLEN len;
3790                             const char *const p = SvPV(ret, len);
3791                             ret = newSVpvn_flags(p, len, SVs_TEMP);
3792                         }
3793                         rx = CALLREGCOMP(ret, pm_flags);
3794                         if (!(SvFLAGS(ret)
3795                               & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
3796                                  | SVs_GMG))) {
3797                             /* This isn't a first class regexp. Instead, it's
3798                                caching a regexp onto an existing, Perl visible
3799                                scalar.  */
3800                             sv_magic(ret, (SV*) rx, PERL_MAGIC_qr, 0, 0);
3801                         }
3802                         PL_regsize = osize;
3803                     }
3804                     re_sv = rx;
3805                     re = (struct regexp *)SvANY(rx);
3806                 }
3807                 RXp_MATCH_COPIED_off(re);
3808                 re->subbeg = rex->subbeg;
3809                 re->sublen = rex->sublen;
3810                 rei = RXi_GET(re);
3811                 DEBUG_EXECUTE_r(
3812                     debug_start_match(re_sv, do_utf8, locinput, PL_regeol,
3813                         "Matching embedded");
3814                 );
3815                 startpoint = rei->program + 1;
3816                 ST.close_paren = 0; /* only used for GOSUB */
3817                 /* borrowed from regtry */
3818                 if (PL_reg_start_tmpl <= re->nparens) {
3819                     PL_reg_start_tmpl = re->nparens*3/2 + 3;
3820                     if(PL_reg_start_tmp)
3821                         Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
3822                     else
3823                         Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
3824                 }
3825
3826         eval_recurse_doit: /* Share code with GOSUB below this line */
3827                 /* run the pattern returned from (??{...}) */
3828                 ST.cp = regcppush(0);   /* Save *all* the positions. */
3829                 REGCP_SET(ST.lastcp);
3830
3831                 PL_regoffs = re->offs; /* essentially NOOP on GOSUB */
3832
3833                 /* see regtry, specifically PL_reglast(?:close)?paren is a pointer! (i dont know why) :dmq */
3834                 PL_reglastparen = &re->lastparen;
3835                 PL_reglastcloseparen = &re->lastcloseparen;
3836                 re->lastparen = 0;
3837                 re->lastcloseparen = 0;
3838
3839                 PL_reginput = locinput;
3840                 PL_regsize = 0;
3841
3842                 /* XXXX This is too dramatic a measure... */
3843                 PL_reg_maxiter = 0;
3844
3845                 ST.toggle_reg_flags = PL_reg_flags;
3846                 if (RX_UTF8(re_sv))
3847                     PL_reg_flags |= RF_utf8;
3848                 else
3849                     PL_reg_flags &= ~RF_utf8;
3850                 ST.toggle_reg_flags ^= PL_reg_flags; /* diff of old and new */
3851
3852                 ST.prev_rex = rex_sv;
3853                 ST.prev_curlyx = cur_curlyx;
3854                 SETREX(rex_sv,re_sv);
3855                 rex = re;
3856                 rexi = rei;
3857                 cur_curlyx = NULL;
3858                 ST.B = next;
3859                 ST.prev_eval = cur_eval;
3860                 cur_eval = st;
3861                 /* now continue from first node in postoned RE */
3862                 PUSH_YES_STATE_GOTO(EVAL_AB, startpoint);
3863                 /* NOTREACHED */
3864             }
3865             /* logical is 1,   /(?(?{...})X|Y)/ */
3866             sw = (bool)SvTRUE(ret);
3867             logical = 0;
3868             break;
3869         }
3870
3871         case EVAL_AB: /* cleanup after a successful (??{A})B */
3872             /* note: this is called twice; first after popping B, then A */
3873             PL_reg_flags ^= ST.toggle_reg_flags;
3874             ReREFCNT_dec(rex_sv);
3875             SETREX(rex_sv,ST.prev_rex);
3876             rex = (struct regexp *)SvANY(rex_sv);
3877             rexi = RXi_GET(rex);
3878             regcpblow(ST.cp);
3879             cur_eval = ST.prev_eval;
3880             cur_curlyx = ST.prev_curlyx;
3881
3882             PL_reglastparen = &rex->lastparen;
3883             PL_reglastcloseparen = &rex->lastcloseparen;
3884
3885             /* XXXX This is too dramatic a measure... */
3886             PL_reg_maxiter = 0;
3887             if ( nochange_depth )
3888                 nochange_depth--;
3889             sayYES;
3890
3891
3892         case EVAL_AB_fail: /* unsuccessfully ran A or B in (??{A})B */
3893             /* note: this is called twice; first after popping B, then A */
3894             PL_reg_flags ^= ST.toggle_reg_flags;
3895             ReREFCNT_dec(rex_sv);
3896             SETREX(rex_sv,ST.prev_rex);
3897             rex = (struct regexp *)SvANY(rex_sv);
3898             rexi = RXi_GET(rex);
3899             PL_reglastparen = &rex->lastparen;
3900             PL_reglastcloseparen = &rex->lastcloseparen;
3901
3902             PL_reginput = locinput;
3903             REGCP_UNWIND(ST.lastcp);
3904             regcppop(rex);
3905             cur_eval = ST.prev_eval;
3906             cur_curlyx = ST.prev_curlyx;
3907             /* XXXX This is too dramatic a measure... */
3908             PL_reg_maxiter = 0;
3909             if ( nochange_depth )
3910                 nochange_depth--;
3911             sayNO_SILENT;
3912 #undef ST
3913
3914         case OPEN:
3915             n = ARG(scan);  /* which paren pair */
3916             PL_reg_start_tmp[n] = locinput;
3917             if (n > PL_regsize)
3918                 PL_regsize = n;
3919             lastopen = n;
3920             break;
3921         case CLOSE:
3922             n = ARG(scan);  /* which paren pair */
3923             PL_regoffs[n].start = PL_reg_start_tmp[n] - PL_bostr;
3924             PL_regoffs[n].end = locinput - PL_bostr;
3925             /*if (n > PL_regsize)
3926                 PL_regsize = n;*/
3927             if (n > *PL_reglastparen)
3928                 *PL_reglastparen = n;
3929             *PL_reglastcloseparen = n;
3930             if (cur_eval && cur_eval->u.eval.close_paren == n) {
3931                 goto fake_end;
3932             }
3933             break;
3934         case ACCEPT:
3935             if (ARG(scan)){
3936                 regnode *cursor;
3937                 for (cursor=scan;
3938                      cursor && OP(cursor)!=END;
3939                      cursor=regnext(cursor))
3940                 {
3941                     if ( OP(cursor)==CLOSE ){
3942                         n = ARG(cursor);
3943                         if ( n <= lastopen ) {
3944                             PL_regoffs[n].start
3945                                 = PL_reg_start_tmp[n] - PL_bostr;
3946                             PL_regoffs[n].end = locinput - PL_bostr;
3947                             /*if (n > PL_regsize)
3948                             PL_regsize = n;*/
3949                             if (n > *PL_reglastparen)
3950                                 *PL_reglastparen = n;
3951                             *PL_reglastcloseparen = n;
3952                             if ( n == ARG(scan) || (cur_eval &&
3953                                 cur_eval->u.eval.close_paren == n))
3954                                 break;
3955                         }
3956                     }
3957                 }
3958             }
3959             goto fake_end;
3960             /*NOTREACHED*/
3961         case GROUPP:
3962             n = ARG(scan);  /* which paren pair */
3963             sw = (bool)(*PL_reglastparen >= n && PL_regoffs[n].end != -1);
3964             break;
3965         case NGROUPP:
3966             /* reg_check_named_buff_matched returns 0 for no match */
3967             sw = (bool)(0 < reg_check_named_buff_matched(rex,scan));
3968             break;
3969         case INSUBP:
3970             n = ARG(scan);
3971             sw = (cur_eval && (!n || cur_eval->u.eval.close_paren == n));
3972             break;
3973         case DEFINEP:
3974             sw = 0;
3975             break;
3976         case IFTHEN:
3977             PL_reg_leftiter = PL_reg_maxiter;           /* Void cache */
3978             if (sw)
3979                 next = NEXTOPER(NEXTOPER(scan));
3980             else {
3981                 next = scan + ARG(scan);
3982                 if (OP(next) == IFTHEN) /* Fake one. */
3983                     next = NEXTOPER(NEXTOPER(next));
3984             }
3985             break;
3986         case LOGICAL:
3987             logical = scan->flags;
3988             break;
3989
3990 /*******************************************************************
3991
3992 The CURLYX/WHILEM pair of ops handle the most generic case of the /A*B/
3993 pattern, where A and B are subpatterns. (For simple A, CURLYM or
3994 STAR/PLUS/CURLY/CURLYN are used instead.)
3995
3996 A*B is compiled as <CURLYX><A><WHILEM><B>
3997
3998 On entry to the subpattern, CURLYX is called. This pushes a CURLYX
3999 state, which contains the current count, initialised to -1. It also sets
4000 cur_curlyx to point to this state, with any previous value saved in the
4001 state block.
4002
4003 CURLYX then jumps straight to the WHILEM op, rather than executing A,
4004 since the pattern may possibly match zero times (i.e. it's a while {} loop
4005 rather than a do {} while loop).
4006
4007 Each entry to WHILEM represents a successful match of A. The count in the
4008 CURLYX block is incremented, another WHILEM state is pushed, and execution
4009 passes to A or B depending on greediness and the current count.
4010
4011 For example, if matching against the string a1a2a3b (where the aN are
4012 substrings that match /A/), then the match progresses as follows: (the
4013 pushed states are interspersed with the bits of strings matched so far):
4014
4015     <CURLYX cnt=-1>
4016     <CURLYX cnt=0><WHILEM>
4017     <CURLYX cnt=1><WHILEM> a1 <WHILEM>
4018     <CURLYX cnt=2><WHILEM> a1 <WHILEM> a2 <WHILEM>
4019     <CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM>
4020     <CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM> b
4021
4022 (Contrast this with something like CURLYM, which maintains only a single
4023 backtrack state:
4024
4025     <CURLYM cnt=0> a1
4026     a1 <CURLYM cnt=1> a2
4027     a1 a2 <CURLYM cnt=2> a3
4028     a1 a2 a3 <CURLYM cnt=3> b
4029 )
4030
4031 Each WHILEM state block marks a point to backtrack to upon partial failure
4032 of A or B, and also contains some minor state data related to that
4033 iteration.  The CURLYX block, pointed to by cur_curlyx, contains the
4034 overall state, such as the count, and pointers to the A and B ops.
4035
4036 This is complicated slightly by nested CURLYX/WHILEM's. Since cur_curlyx
4037 must always point to the *current* CURLYX block, the rules are:
4038
4039 When executing CURLYX, save the old cur_curlyx in the CURLYX state block,
4040 and set cur_curlyx to point the new block.
4041
4042 When popping the CURLYX block after a successful or unsuccessful match,
4043 restore the previous cur_curlyx.
4044
4045 When WHILEM is about to execute B, save the current cur_curlyx, and set it
4046 to the outer one saved in the CURLYX block.
4047
4048 When popping the WHILEM block after a successful or unsuccessful B match,
4049 restore the previous cur_curlyx.
4050
4051 Here's an example for the pattern (AI* BI)*BO
4052 I and O refer to inner and outer, C and W refer to CURLYX and WHILEM:
4053
4054 cur_
4055 curlyx backtrack stack
4056 ------ ---------------
4057 NULL
4058 CO     <CO prev=NULL> <WO>
4059 CI     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
4060 CO     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
4061 NULL   <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi <WO prev=CO> bo
4062
4063 At this point the pattern succeeds, and we work back down the stack to
4064 clean up, restoring as we go:
4065
4066 CO     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
4067 CI     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
4068 CO     <CO prev=NULL> <WO>
4069 NULL
4070
4071 *******************************************************************/
4072
4073 #define ST st->u.curlyx
4074
4075         case CURLYX:    /* start of /A*B/  (for complex A) */
4076         {
4077             /* No need to save/restore up to this paren */
4078             I32 parenfloor = scan->flags;
4079
4080             assert(next); /* keep Coverity happy */
4081             if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
4082                 next += ARG(next);
4083
4084             /* XXXX Probably it is better to teach regpush to support
4085                parenfloor > PL_regsize... */
4086             if (parenfloor > (I32)*PL_reglastparen)
4087                 parenfloor = *PL_reglastparen; /* Pessimization... */
4088
4089             ST.prev_curlyx= cur_curlyx;
4090             cur_curlyx = st;
4091             ST.cp = PL_savestack_ix;
4092
4093             /* these fields contain the state of the current curly.
4094              * they are accessed by subsequent WHILEMs */
4095             ST.parenfloor = parenfloor;
4096             ST.min = ARG1(scan);
4097             ST.max = ARG2(scan);
4098             ST.A = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
4099             ST.B = next;
4100             ST.minmod = minmod;
4101             minmod = 0;
4102             ST.count = -1;      /* this will be updated by WHILEM */
4103             ST.lastloc = NULL;  /* this will be updated by WHILEM */
4104
4105             PL_reginput = locinput;
4106             PUSH_YES_STATE_GOTO(CURLYX_end, PREVOPER(next));
4107             /* NOTREACHED */
4108         }
4109
4110         case CURLYX_end: /* just finished matching all of A*B */
4111             cur_curlyx = ST.prev_curlyx;
4112             sayYES;
4113             /* NOTREACHED */
4114
4115         case CURLYX_end_fail: /* just failed to match all of A*B */
4116             regcpblow(ST.cp);
4117             cur_curlyx = ST.prev_curlyx;
4118             sayNO;
4119             /* NOTREACHED */
4120
4121
4122 #undef ST
4123 #define ST st->u.whilem
4124
4125         case WHILEM:     /* just matched an A in /A*B/  (for complex A) */
4126         {
4127             /* see the discussion above about CURLYX/WHILEM */
4128             I32 n;
4129             assert(cur_curlyx); /* keep Coverity happy */
4130             n = ++cur_curlyx->u.curlyx.count; /* how many A's matched */
4131             ST.save_lastloc = cur_curlyx->u.curlyx.lastloc;
4132             ST.cache_offset = 0;
4133             ST.cache_mask = 0;
4134
4135             PL_reginput = locinput;
4136
4137             DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4138                   "%*s  whilem: matched %ld out of %ld..%ld\n",
4139                   REPORT_CODE_OFF+depth*2, "", (long)n,
4140                   (long)cur_curlyx->u.curlyx.min,
4141                   (long)cur_curlyx->u.curlyx.max)
4142             );
4143
4144             /* First just match a string of min A's. */
4145
4146             if (n < cur_curlyx->u.curlyx.min) {
4147                 cur_curlyx->u.curlyx.lastloc = locinput;
4148                 PUSH_STATE_GOTO(WHILEM_A_pre, cur_curlyx->u.curlyx.A);
4149                 /* NOTREACHED */
4150             }
4151
4152             /* If degenerate A matches "", assume A done. */
4153
4154             if (locinput == cur_curlyx->u.curlyx.lastloc) {
4155                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4156                    "%*s  whilem: empty match detected, trying continuation...\n",
4157                    REPORT_CODE_OFF+depth*2, "")
4158                 );
4159                 goto do_whilem_B_max;
4160             }
4161
4162             /* super-linear cache processing */
4163
4164             if (scan->flags) {
4165
4166                 if (!PL_reg_maxiter) {
4167                     /* start the countdown: Postpone detection until we
4168                      * know the match is not *that* much linear. */
4169                     PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
4170                     /* possible overflow for long strings and many CURLYX's */
4171                     if (PL_reg_maxiter < 0)
4172                         PL_reg_maxiter = I32_MAX;
4173                     PL_reg_leftiter = PL_reg_maxiter;
4174                 }
4175
4176                 if (PL_reg_leftiter-- == 0) {
4177                     /* initialise cache */
4178                     const I32 size = (PL_reg_maxiter + 7)/8;
4179                     if (PL_reg_poscache) {
4180                         if ((I32)PL_reg_poscache_size < size) {
4181                             Renew(PL_reg_poscache, size, char);
4182                             PL_reg_poscache_size = size;
4183                         }
4184                         Zero(PL_reg_poscache, size, char);
4185                     }
4186                     else {
4187                         PL_reg_poscache_size = size;
4188                         Newxz(PL_reg_poscache, size, char);
4189                     }
4190                     DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4191       "%swhilem: Detected a super-linear match, switching on caching%s...\n",
4192                               PL_colors[4], PL_colors[5])
4193                     );
4194                 }
4195
4196                 if (PL_reg_leftiter < 0) {
4197                     /* have we already failed at this position? */
4198                     I32 offset, mask;
4199                     offset  = (scan->flags & 0xf) - 1
4200                                 + (locinput - PL_bostr)  * (scan->flags>>4);
4201                     mask    = 1 << (offset % 8);
4202                     offset /= 8;
4203                     if (PL_reg_poscache[offset] & mask) {
4204                         DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4205                             "%*s  whilem: (cache) already tried at this position...\n",
4206                             REPORT_CODE_OFF+depth*2, "")
4207                         );
4208                         sayNO; /* cache records failure */
4209                     }
4210                     ST.cache_offset = offset;
4211                     ST.cache_mask   = mask;
4212                 }
4213             }
4214
4215             /* Prefer B over A for minimal matching. */
4216
4217             if (cur_curlyx->u.curlyx.minmod) {
4218                 ST.save_curlyx = cur_curlyx;
4219                 cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
4220                 ST.cp = regcppush(ST.save_curlyx->u.curlyx.parenfloor);
4221                 REGCP_SET(ST.lastcp);
4222                 PUSH_YES_STATE_GOTO(WHILEM_B_min, ST.save_curlyx->u.curlyx.B);
4223                 /* NOTREACHED */
4224             }
4225
4226             /* Prefer A over B for maximal matching. */
4227
4228             if (n < cur_curlyx->u.curlyx.max) { /* More greed allowed? */
4229                 ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
4230                 cur_curlyx->u.curlyx.lastloc = locinput;
4231                 REGCP_SET(ST.lastcp);
4232                 PUSH_STATE_GOTO(WHILEM_A_max, cur_curlyx->u.curlyx.A);
4233                 /* NOTREACHED */
4234             }
4235             goto do_whilem_B_max;
4236         }
4237         /* NOTREACHED */
4238
4239         case WHILEM_B_min: /* just matched B in a minimal match */
4240         case WHILEM_B_max: /* just matched B in a maximal match */
4241             cur_curlyx = ST.save_curlyx;
4242             sayYES;
4243             /* NOTREACHED */
4244
4245         case WHILEM_B_max_fail: /* just failed to match B in a maximal match */
4246             cur_curlyx = ST.save_curlyx;
4247             cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
4248             cur_curlyx->u.curlyx.count--;
4249             CACHEsayNO;
4250             /* NOTREACHED */
4251
4252         case WHILEM_A_min_fail: /* just failed to match A in a minimal match */
4253             REGCP_UNWIND(ST.lastcp);
4254             regcppop(rex);
4255             /* FALL THROUGH */
4256         case WHILEM_A_pre_fail: /* just failed to match even minimal A */
4257             cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
4258             cur_curlyx->u.curlyx.count--;
4259             CACHEsayNO;
4260             /* NOTREACHED */
4261
4262         case WHILEM_A_max_fail: /* just failed to match A in a maximal match */
4263             REGCP_UNWIND(ST.lastcp);
4264             regcppop(rex);      /* Restore some previous $<digit>s? */
4265             PL_reginput = locinput;
4266             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4267                 "%*s  whilem: failed, trying continuation...\n",
4268                 REPORT_CODE_OFF+depth*2, "")
4269             );
4270           do_whilem_B_max:
4271             if (cur_curlyx->u.curlyx.count >= REG_INFTY
4272                 && ckWARN(WARN_REGEXP)
4273                 && !(PL_reg_flags & RF_warned))
4274             {
4275                 PL_reg_flags |= RF_warned;
4276                 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
4277                      "Complex regular subexpression recursion",
4278                      REG_INFTY - 1);
4279             }
4280
4281             /* now try B */
4282             ST.save_curlyx = cur_curlyx;
4283             cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
4284             PUSH_YES_STATE_GOTO(WHILEM_B_max, ST.save_curlyx->u.curlyx.B);
4285             /* NOTREACHED */
4286
4287         case WHILEM_B_min_fail: /* just failed to match B in a minimal match */
4288             cur_curlyx = ST.save_curlyx;
4289             REGCP_UNWIND(ST.lastcp);
4290             regcppop(rex);
4291
4292             if (cur_curlyx->u.curlyx.count >= cur_curlyx->u.curlyx.max) {
4293                 /* Maximum greed exceeded */
4294                 if (cur_curlyx->u.curlyx.count >= REG_INFTY
4295                     && ckWARN(WARN_REGEXP)
4296                     && !(PL_reg_flags & RF_warned))
4297                 {
4298                     PL_reg_flags |= RF_warned;
4299                     Perl_warner(aTHX_ packWARN(WARN_REGEXP),
4300                         "%s limit (%d) exceeded",
4301                         "Complex regular subexpression recursion",
4302                         REG_INFTY - 1);
4303                 }
4304                 cur_curlyx->u.curlyx.count--;
4305                 CACHEsayNO;
4306             }
4307
4308             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4309                 "%*s  trying longer...\n", REPORT_CODE_OFF+depth*2, "")
4310             );
4311             /* Try grabbing another A and see if it helps. */
4312             PL_reginput = locinput;
4313             cur_curlyx->u.curlyx.lastloc = locinput;
4314             ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
4315             REGCP_SET(ST.lastcp);
4316             PUSH_STATE_GOTO(WHILEM_A_min, ST.save_curlyx->u.curlyx.A);
4317             /* NOTREACHED */
4318
4319 #undef  ST
4320 #define ST st->u.branch
4321
4322         case BRANCHJ:       /*  /(...|A|...)/ with long next pointer */
4323             next = scan + ARG(scan);
4324             if (next == scan)
4325                 next = NULL;
4326             scan = NEXTOPER(scan);
4327             /* FALL THROUGH */
4328
4329         case BRANCH:        /*  /(...|A|...)/ */
4330             scan = NEXTOPER(scan); /* scan now points to inner node */
4331             ST.lastparen = *PL_reglastparen;
4332             ST.next_branch = next;
4333             REGCP_SET(ST.cp);
4334             PL_reginput = locinput;
4335
4336             /* Now go into the branch */
4337             if (has_cutgroup) {
4338                 PUSH_YES_STATE_GOTO(BRANCH_next, scan);
4339             } else {
4340                 PUSH_STATE_GOTO(BRANCH_next, scan);
4341             }
4342             /* NOTREACHED */
4343         case CUTGROUP:
4344             PL_reginput = locinput;
4345             sv_yes_mark = st->u.mark.mark_name = scan->flags ? NULL :
4346                 (SV*)rexi->data->data[ ARG( scan ) ];
4347             PUSH_STATE_GOTO(CUTGROUP_next,next);
4348             /* NOTREACHED */
4349         case CUTGROUP_next_fail:
4350             do_cutgroup = 1;
4351             no_final = 1;
4352             if (st->u.mark.mark_name)
4353                 sv_commit = st->u.mark.mark_name;
4354             sayNO;
4355             /* NOTREACHED */
4356         case BRANCH_next:
4357             sayYES;
4358             /* NOTREACHED */
4359         case BRANCH_next_fail: /* that branch failed; try the next, if any */
4360             if (do_cutgroup) {
4361                 do_cutgroup = 0;
4362                 no_final = 0;
4363             }
4364             REGCP_UNWIND(ST.cp);
4365             for (n = *PL_reglastparen; n > ST.lastparen; n--)
4366                 PL_regoffs[n].end = -1;
4367             *PL_reglastparen = n;
4368             /*dmq: *PL_reglastcloseparen = n; */
4369             scan = ST.next_branch;
4370             /* no more branches? */
4371             if (!scan || (OP(scan) != BRANCH && OP(scan) != BRANCHJ)) {
4372                 DEBUG_EXECUTE_r({
4373                     PerlIO_printf( Perl_debug_log,
4374                         "%*s  %sBRANCH failed...%s\n",
4375                         REPORT_CODE_OFF+depth*2, "",
4376                         PL_colors[4],
4377                         PL_colors[5] );
4378                 });
4379                 sayNO_SILENT;
4380             }
4381             continue; /* execute next BRANCH[J] op */
4382             /* NOTREACHED */
4383
4384         case MINMOD:
4385             minmod = 1;
4386             break;
4387
4388 #undef  ST
4389 #define ST st->u.curlym
4390
4391         case CURLYM:    /* /A{m,n}B/ where A is fixed-length */
4392
4393             /* This is an optimisation of CURLYX that enables us to push
4394              * only a single backtracking state, no matter now many matches
4395              * there are in {m,n}. It relies on the pattern being constant
4396              * length, with no parens to influence future backrefs
4397              */
4398
4399             ST.me = scan;
4400             scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
4401
4402             /* if paren positive, emulate an OPEN/CLOSE around A */
4403             if (ST.me->flags) {
4404                 U32 paren = ST.me->flags;
4405                 if (paren > PL_regsize)
4406                     PL_regsize = paren;
4407                 if (paren > *PL_reglastparen)
4408                     *PL_reglastparen = paren;
4409                 scan += NEXT_OFF(scan); /* Skip former OPEN. */
4410             }
4411             ST.A = scan;
4412             ST.B = next;
4413             ST.alen = 0;
4414             ST.count = 0;
4415             ST.minmod = minmod;
4416             minmod = 0;
4417             ST.c1 = CHRTEST_UNINIT;
4418             REGCP_SET(ST.cp);
4419
4420             if (!(ST.minmod ? ARG1(ST.me) : ARG2(ST.me))) /* min/max */
4421                 goto curlym_do_B;
4422
4423           curlym_do_A: /* execute the A in /A{m,n}B/  */
4424             PL_reginput = locinput;
4425             PUSH_YES_STATE_GOTO(CURLYM_A, ST.A); /* match A */
4426             /* NOTREACHED */
4427
4428         case CURLYM_A: /* we've just matched an A */
4429             locinput = st->locinput;
4430             nextchr = UCHARAT(locinput);
4431
4432             ST.count++;
4433             /* after first match, determine A's length: u.curlym.alen */
4434             if (ST.count == 1) {
4435                 if (PL_reg_match_utf8) {
4436                     char *s = locinput;
4437                     while (s < PL_reginput) {
4438                         ST.alen++;
4439                         s += UTF8SKIP(s);
4440                     }
4441                 }
4442                 else {
4443                     ST.alen = PL_reginput - locinput;
4444                 }
4445                 if (ST.alen == 0)
4446                     ST.count = ST.minmod ? ARG1(ST.me) : ARG2(ST.me);
4447             }
4448             DEBUG_EXECUTE_r(
4449                 PerlIO_printf(Perl_debug_log,
4450                           "%*s  CURLYM now matched %"IVdf" times, len=%"IVdf"...\n",
4451                           (int)(REPORT_CODE_OFF+(depth*2)), "",
4452                           (IV) ST.count, (IV)ST.alen)
4453             );
4454
4455             locinput = PL_reginput;
4456
4457             if (cur_eval && cur_eval->u.eval.close_paren &&
4458                 cur_eval->u.eval.close_paren == (U32)ST.me->flags)
4459                 goto fake_end;
4460
4461             if ( ST.count < (ST.minmod ? ARG1(ST.me) : ARG2(ST.me)) )
4462                 goto curlym_do_A; /* try to match another A */
4463             goto curlym_do_B; /* try to match B */
4464
4465         case CURLYM_A_fail: /* just failed to match an A */
4466             REGCP_UNWIND(ST.cp);
4467
4468             if (ST.minmod || ST.count < ARG1(ST.me) /* min*/
4469                 || (cur_eval && cur_eval->u.eval.close_paren &&
4470                     cur_eval->u.eval.close_paren == (U32)ST.me->flags))
4471                 sayNO;
4472
4473           curlym_do_B: /* execute the B in /A{m,n}B/  */
4474             PL_reginput = locinput;
4475             if (ST.c1 == CHRTEST_UNINIT) {
4476                 /* calculate c1 and c2 for possible match of 1st char
4477                  * following curly */
4478                 ST.c1 = ST.c2 = CHRTEST_VOID;
4479                 if (HAS_TEXT(ST.B) || JUMPABLE(ST.B)) {
4480                     regnode *text_node = ST.B;
4481                     if (! HAS_TEXT(text_node))
4482                         FIND_NEXT_IMPT(text_node);
4483                     /* this used to be
4484
4485                         (HAS_TEXT(text_node) && PL_regkind[OP(text_node)] == EXACT)
4486
4487                         But the former is redundant in light of the latter.
4488
4489                         if this changes back then the macro for
4490                         IS_TEXT and friends need to change.
4491                      */
4492                     if (PL_regkind[OP(text_node)] == EXACT)
4493                     {
4494
4495                         ST.c1 = (U8)*STRING(text_node);
4496                         ST.c2 =
4497                             (IS_TEXTF(text_node))
4498                             ? PL_fold[ST.c1]
4499                             : (IS_TEXTFL(text_node))
4500                                 ? PL_fold_locale[ST.c1]
4501                                 : ST.c1;
4502                     }
4503                 }
4504             }
4505
4506             DEBUG_EXECUTE_r(
4507                 PerlIO_printf(Perl_debug_log,
4508                     "%*s  CURLYM trying tail with matches=%"IVdf"...\n",
4509                     (int)(REPORT_CODE_OFF+(depth*2)),
4510                     "", (IV)ST.count)
4511                 );
4512             if (ST.c1 != CHRTEST_VOID
4513                     && UCHARAT(PL_reginput) != ST.c1
4514                     && UCHARAT(PL_reginput) != ST.c2)
4515             {
4516                 /* simulate B failing */
4517                 DEBUG_OPTIMISE_r(
4518                     PerlIO_printf(Perl_debug_log,
4519                         "%*s  CURLYM Fast bail c1=%"IVdf" c2=%"IVdf"\n",
4520                         (int)(REPORT_CODE_OFF+(depth*2)),"",
4521                         (IV)ST.c1,(IV)ST.c2
4522                 ));
4523                 state_num = CURLYM_B_fail;
4524                 goto reenter_switch;
4525             }
4526
4527             if (ST.me->flags) {
4528                 /* mark current A as captured */
4529                 I32 paren = ST.me->flags;
4530                 if (ST.count) {
4531                     PL_regoffs[paren].start
4532                         = HOPc(PL_reginput, -ST.alen) - PL_bostr;
4533                     PL_regoffs[paren].end = PL_reginput - PL_bostr;
4534                     /*dmq: *PL_reglastcloseparen = paren; */
4535                 }
4536                 else
4537                     PL_regoffs[paren].end = -1;
4538                 if (cur_eval && cur_eval->u.eval.close_paren &&
4539                     cur_eval->u.eval.close_paren == (U32)ST.me->flags)
4540                 {
4541                     if (ST.count)
4542                         goto fake_end;
4543                     else
4544                         sayNO;
4545                 }
4546             }
4547
4548             PUSH_STATE_GOTO(CURLYM_B, ST.B); /* match B */
4549             /* NOTREACHED */
4550
4551         case CURLYM_B_fail: /* just failed to match a B */
4552             REGCP_UNWIND(ST.cp);
4553             if (ST.minmod) {
4554                 if (ST.count == ARG2(ST.me) /* max */)
4555                     sayNO;
4556                 goto curlym_do_A; /* try to match a further A */
4557             }
4558             /* backtrack one A */
4559             if (ST.count == ARG1(ST.me) /* min */)
4560                 sayNO;
4561             ST.count--;
4562             locinput = HOPc(locinput, -ST.alen);
4563             goto curlym_do_B; /* try to match B */
4564
4565 #undef ST
4566 #define ST st->u.curly
4567
4568 #define CURLY_SETPAREN(paren, success) \
4569     if (paren) { \
4570         if (success) { \
4571             PL_regoffs[paren].start = HOPc(locinput, -1) - PL_bostr; \
4572             PL_regoffs[paren].end = locinput - PL_bostr; \
4573             *PL_reglastcloseparen = paren; \
4574         } \
4575         else \
4576             PL_regoffs[paren].end = -1; \
4577     }
4578
4579         case STAR:              /*  /A*B/ where A is width 1 */
4580             ST.paren = 0;
4581             ST.min = 0;
4582             ST.max = REG_INFTY;
4583             scan = NEXTOPER(scan);
4584             goto repeat;
4585         case PLUS:              /*  /A+B/ where A is width 1 */
4586             ST.paren = 0;
4587             ST.min = 1;
4588             ST.max = REG_INFTY;
4589             scan = NEXTOPER(scan);
4590             goto repeat;
4591         case CURLYN:            /*  /(A){m,n}B/ where A is width 1 */
4592             ST.paren = scan->flags;     /* Which paren to set */
4593             if (ST.paren > PL_regsize)
4594                 PL_regsize = ST.paren;
4595             if (ST.paren > *PL_reglastparen)
4596                 *PL_reglastparen = ST.paren;
4597             ST.min = ARG1(scan);  /* min to match */
4598             ST.max = ARG2(scan);  /* max to match */
4599             if (cur_eval && cur_eval->u.eval.close_paren &&
4600                 cur_eval->u.eval.close_paren == (U32)ST.paren) {
4601                 ST.min=1;
4602                 ST.max=1;
4603             }
4604             scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
4605             goto repeat;
4606         case CURLY:             /*  /A{m,n}B/ where A is width 1 */
4607             ST.paren = 0;
4608             ST.min = ARG1(scan);  /* min to match */
4609             ST.max = ARG2(scan);  /* max to match */
4610             scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
4611           repeat:
4612             /*
4613             * Lookahead to avoid useless match attempts
4614             * when we know what character comes next.
4615             *
4616             * Used to only do .*x and .*?x, but now it allows
4617             * for )'s, ('s and (?{ ... })'s to be in the way
4618             * of the quantifier and the EXACT-like node.  -- japhy
4619             */
4620
4621             if (ST.min > ST.max) /* XXX make this a compile-time check? */
4622                 sayNO;
4623             if (HAS_TEXT(next) || JUMPABLE(next)) {
4624                 U8 *s;
4625                 regnode *text_node = next;
4626
4627                 if (! HAS_TEXT(text_node))
4628                     FIND_NEXT_IMPT(text_node);
4629
4630                 if (! HAS_TEXT(text_node))
4631                     ST.c1 = ST.c2 = CHRTEST_VOID;
4632                 else {
4633                     if ( PL_regkind[OP(text_node)] != EXACT ) {
4634                         ST.c1 = ST.c2 = CHRTEST_VOID;
4635                         goto assume_ok_easy;
4636                     }
4637                     else
4638                         s = (U8*)STRING(text_node);
4639
4640                     /*  Currently we only get here when
4641
4642                         PL_rekind[OP(text_node)] == EXACT
4643
4644                         if this changes back then the macro for IS_TEXT and
4645                         friends need to change. */
4646                     if (!UTF) {
4647                         ST.c2 = ST.c1 = *s;
4648                         if (IS_TEXTF(text_node))
4649                             ST.c2 = PL_fold[ST.c1];
4650                         else if (IS_TEXTFL(text_node))
4651                             ST.c2 = PL_fold_locale[ST.c1];
4652                     }
4653                     else { /* UTF */
4654                         if (IS_TEXTF(text_node)) {
4655                              STRLEN ulen1, ulen2;
4656                              U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
4657                              U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
4658
4659                              to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
4660                              to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
4661 #ifdef EBCDIC
4662                              ST.c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXLEN, 0,
4663                                                     ckWARN(WARN_UTF8) ?
4664                                                     0 : UTF8_ALLOW_ANY);
4665                              ST.c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXLEN, 0,
4666                                                     ckWARN(WARN_UTF8) ?
4667                                                     0 : UTF8_ALLOW_ANY);
4668 #else
4669                              ST.c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
4670                                                     uniflags);
4671                              ST.c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
4672                                                     uniflags);
4673 #endif
4674                         }
4675                         else {
4676                             ST.c2 = ST.c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
4677                                                      uniflags);
4678                         }
4679                     }
4680                 }
4681             }
4682             else
4683                 ST.c1 = ST.c2 = CHRTEST_VOID;
4684         assume_ok_easy:
4685
4686             ST.A = scan;
4687             ST.B = next;
4688             PL_reginput = locinput;
4689             if (minmod) {
4690                 minmod = 0;
4691                 if (ST.min && regrepeat(rex, ST.A, ST.min, depth) < ST.min)
4692                     sayNO;
4693                 ST.count = ST.min;
4694                 locinput = PL_reginput;
4695                 REGCP_SET(ST.cp);
4696                 if (ST.c1 == CHRTEST_VOID)
4697                     goto curly_try_B_min;
4698
4699                 ST.oldloc = locinput;
4700
4701                 /* set ST.maxpos to the furthest point along the
4702                  * string that could possibly match */
4703                 if  (ST.max == REG_INFTY) {
4704                     ST.maxpos = PL_regeol - 1;
4705                     if (do_utf8)
4706                         while (UTF8_IS_CONTINUATION(*(U8*)ST.maxpos))
4707                             ST.maxpos--;
4708                 }
4709                 else if (do_utf8) {
4710                     int m = ST.max - ST.min;
4711                     for (ST.maxpos = locinput;
4712                          m >0 && ST.maxpos + UTF8SKIP(ST.maxpos) <= PL_regeol; m--)
4713                         ST.maxpos += UTF8SKIP(ST.maxpos);
4714                 }
4715                 else {
4716                     ST.maxpos = locinput + ST.max - ST.min;
4717                     if (ST.maxpos >= PL_regeol)
4718                         ST.maxpos = PL_regeol - 1;
4719                 }
4720                 goto curly_try_B_min_known;
4721
4722             }
4723             else {
4724                 ST.count = regrepeat(rex, ST.A, ST.max, depth);
4725                 locinput = PL_reginput;
4726                 if (ST.count < ST.min)
4727                     sayNO;
4728                 if ((ST.count > ST.min)
4729                     && (PL_regkind[OP(ST.B)] == EOL) && (OP(ST.B) != MEOL))
4730                 {
4731                     /* A{m,n} must come at the end of the string, there's
4732                      * no point in backing off ... */
4733                     ST.min = ST.count;
4734                     /* ...except that $ and \Z can match before *and* after
4735                        newline at the end.  Consider "\n\n" =~ /\n+\Z\n/.
4736                        We may back off by one in this case. */
4737                     if (UCHARAT(PL_reginput - 1) == '\n' && OP(ST.B) != EOS)
4738                         ST.min--;
4739                 }
4740                 REGCP_SET(ST.cp);
4741                 goto curly_try_B_max;
4742             }
4743             /* NOTREACHED */
4744
4745
4746         case CURLY_B_min_known_fail:
4747             /* failed to find B in a non-greedy match where c1,c2 valid */
4748             if (ST.paren && ST.count)
4749                 PL_regoffs[ST.paren].end = -1;
4750
4751             PL_reginput = locinput;     /* Could be reset... */
4752             REGCP_UNWIND(ST.cp);
4753             /* Couldn't or didn't -- move forward. */
4754             ST.oldloc = locinput;
4755             if (do_utf8)
4756                 locinput += UTF8SKIP(locinput);
4757             else
4758                 locinput++;
4759             ST.count++;
4760           curly_try_B_min_known:
4761              /* find the next place where 'B' could work, then call B */
4762             {
4763                 int n;
4764                 if (do_utf8) {
4765                     n = (ST.oldloc == locinput) ? 0 : 1;
4766                     if (ST.c1 == ST.c2) {
4767                         STRLEN len;
4768                         /* set n to utf8_distance(oldloc, locinput) */
4769                         while (locinput <= ST.maxpos &&
4770                                utf8n_to_uvchr((U8*)locinput,
4771                                               UTF8_MAXBYTES, &len,
4772                                               uniflags) != (UV)ST.c1) {
4773                             locinput += len;
4774                             n++;
4775                         }
4776                     }
4777                     else {
4778                         /* set n to utf8_distance(oldloc, locinput) */
4779                         while (locinput <= ST.maxpos) {
4780                             STRLEN len;
4781                             const UV c = utf8n_to_uvchr((U8*)locinput,
4782                                                   UTF8_MAXBYTES, &len,
4783                                                   uniflags);
4784                             if (c == (UV)ST.c1 || c == (UV)ST.c2)
4785                                 break;
4786                             locinput += len;
4787                             n++;
4788                         }
4789                     }
4790                 }
4791                 else {
4792                     if (ST.c1 == ST.c2) {
4793                         while (locinput <= ST.maxpos &&
4794                                UCHARAT(locinput) != ST.c1)
4795                             locinput++;
4796                     }
4797                     else {
4798                         while (locinput <= ST.maxpos
4799                                && UCHARAT(locinput) != ST.c1
4800                                && UCHARAT(locinput) != ST.c2)
4801                             locinput++;
4802                     }
4803                     n = locinput - ST.oldloc;
4804                 }
4805                 if (locinput > ST.maxpos)
4806                     sayNO;
4807                 /* PL_reginput == oldloc now */
4808                 if (n) {
4809                     ST.count += n;
4810                     if (regrepeat(rex, ST.A, n, depth) < n)
4811                         sayNO;
4812                 }
4813                 PL_reginput = locinput;
4814                 CURLY_SETPAREN(ST.paren, ST.count);
4815                 if (cur_eval && cur_eval->u.eval.close_paren &&
4816                     cur_eval->u.eval.close_paren == (U32)ST.paren) {
4817                     goto fake_end;
4818                 }
4819                 PUSH_STATE_GOTO(CURLY_B_min_known, ST.B);
4820             }
4821             /* NOTREACHED */
4822
4823
4824         case CURLY_B_min_fail:
4825             /* failed to find B in a non-greedy match where c1,c2 invalid */
4826             if (ST.paren && ST.count)
4827                 PL_regoffs[ST.paren].end = -1;
4828
4829             REGCP_UNWIND(ST.cp);
4830             /* failed -- move forward one */
4831             PL_reginput = locinput;
4832             if (regrepeat(rex, ST.A, 1, depth)) {
4833                 ST.count++;
4834                 locinput = PL_reginput;
4835                 if (ST.count <= ST.max || (ST.max == REG_INFTY &&
4836                         ST.count > 0)) /* count overflow ? */
4837                 {
4838                   curly_try_B_min:
4839                     CURLY_SETPAREN(ST.paren, ST.count);
4840                     if (cur_eval && cur_eval->u.eval.close_paren &&
4841                         cur_eval->u.eval.close_paren == (U32)ST.paren) {
4842                         goto fake_end;
4843                     }
4844                     PUSH_STATE_GOTO(CURLY_B_min, ST.B);
4845                 }
4846             }
4847             sayNO;
4848             /* NOTREACHED */
4849
4850
4851         curly_try_B_max:
4852             /* a successful greedy match: now try to match B */
4853             if (cur_eval && cur_eval->u.eval.close_paren &&
4854                 cur_eval->u.eval.close_paren == (U32)ST.paren) {
4855                 goto fake_end;
4856             }
4857             {
4858                 UV c = 0;
4859                 if (ST.c1 != CHRTEST_VOID)
4860                     c = do_utf8 ? utf8n_to_uvchr((U8*)PL_reginput,
4861                                            UTF8_MAXBYTES, 0, uniflags)
4862                                 : (UV) UCHARAT(PL_reginput);
4863                 /* If it could work, try it. */
4864                 if (ST.c1 == CHRTEST_VOID || c == (UV)ST.c1 || c == (UV)ST.c2) {
4865                     CURLY_SETPAREN(ST.paren, ST.count);
4866                     PUSH_STATE_GOTO(CURLY_B_max, ST.B);
4867                     /* NOTREACHED */
4868                 }
4869             }
4870             /* FALL THROUGH */
4871         case CURLY_B_max_fail:
4872             /* failed to find B in a greedy match */
4873             if (ST.paren && ST.count)
4874                 PL_regoffs[ST.paren].end = -1;
4875
4876             REGCP_UNWIND(ST.cp);
4877             /*  back up. */
4878             if (--ST.count < ST.min)
4879                 sayNO;
4880             PL_reginput = locinput = HOPc(locinput, -1);
4881             goto curly_try_B_max;
4882
4883 #undef ST
4884
4885         case END:
4886             fake_end:
4887             if (cur_eval) {
4888                 /* we've just finished A in /(??{A})B/; now continue with B */
4889                 I32 tmpix;
4890                 st->u.eval.toggle_reg_flags
4891                             = cur_eval->u.eval.toggle_reg_flags;
4892                 PL_reg_flags ^= st->u.eval.toggle_reg_flags;
4893
4894                 st->u.eval.prev_rex = rex_sv;           /* inner */
4895                 SETREX(rex_sv,cur_eval->u.eval.prev_rex);
4896                 rex = (struct regexp *)SvANY(rex_sv);
4897                 rexi = RXi_GET(rex);
4898                 cur_curlyx = cur_eval->u.eval.prev_curlyx;
4899                 ReREFCNT_inc(rex_sv);
4900                 st->u.eval.cp = regcppush(0);   /* Save *all* the positions. */
4901                 REGCP_SET(st->u.eval.lastcp);
4902                 PL_reginput = locinput;
4903
4904                 /* Restore parens of the outer rex without popping the
4905                  * savestack */
4906                 tmpix = PL_savestack_ix;
4907                 PL_savestack_ix = cur_eval->u.eval.lastcp;
4908                 regcppop(rex);
4909                 PL_savestack_ix = tmpix;
4910
4911                 st->u.eval.prev_eval = cur_eval;
4912                 cur_eval = cur_eval->u.eval.prev_eval;
4913                 DEBUG_EXECUTE_r(
4914                     PerlIO_printf(Perl_debug_log, "%*s  EVAL trying tail ... %"UVxf"\n",
4915                                       REPORT_CODE_OFF+depth*2, "",PTR2UV(cur_eval)););
4916                 if ( nochange_depth )
4917                     nochange_depth--;
4918
4919                 PUSH_YES_STATE_GOTO(EVAL_AB,
4920                         st->u.eval.prev_eval->u.eval.B); /* match B */
4921             }
4922
4923             if (locinput < reginfo->till) {
4924                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4925                                       "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
4926                                       PL_colors[4],
4927                                       (long)(locinput - PL_reg_starttry),
4928                                       (long)(reginfo->till - PL_reg_starttry),
4929                                       PL_colors[5]));
4930
4931                 sayNO_SILENT;           /* Cannot match: too short. */
4932             }
4933             PL_reginput = locinput;     /* put where regtry can find it */
4934             sayYES;                     /* Success! */
4935
4936         case SUCCEED: /* successful SUSPEND/UNLESSM/IFMATCH/CURLYM */
4937             DEBUG_EXECUTE_r(
4938             PerlIO_printf(Perl_debug_log,
4939                 "%*s  %ssubpattern success...%s\n",
4940                 REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5]));
4941             PL_reginput = locinput;     /* put where regtry can find it */
4942             sayYES;                     /* Success! */
4943
4944 #undef  ST
4945 #define ST st->u.ifmatch
4946
4947         case SUSPEND:   /* (?>A) */
4948             ST.wanted = 1;
4949             PL_reginput = locinput;
4950             goto do_ifmatch;
4951
4952         case UNLESSM:   /* -ve lookaround: (?!A), or with flags, (?<!A) */
4953             ST.wanted = 0;
4954             goto ifmatch_trivial_fail_test;
4955
4956         case IFMATCH:   /* +ve lookaround: (?=A), or with flags, (?<=A) */
4957             ST.wanted = 1;
4958           ifmatch_trivial_fail_test:
4959             if (scan->flags) {
4960                 char * const s = HOPBACKc(locinput, scan->flags);
4961                 if (!s) {
4962                     /* trivial fail */
4963                     if (logical) {
4964                         logical = 0;
4965                         sw = 1 - (bool)ST.wanted;
4966                     }
4967                     else if (ST.wanted)
4968                         sayNO;
4969                     next = scan + ARG(scan);
4970                     if (next == scan)
4971                         next = NULL;
4972                     break;
4973                 }
4974                 PL_reginput = s;
4975             }
4976             else
4977                 PL_reginput = locinput;
4978
4979           do_ifmatch:
4980             ST.me = scan;
4981             ST.logical = logical;
4982             /* execute body of (?...A) */
4983             PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)));
4984             /* NOTREACHED */
4985
4986         case IFMATCH_A_fail: /* body of (?...A) failed */
4987             ST.wanted = !ST.wanted;
4988             /* FALL THROUGH */
4989
4990         case IFMATCH_A: /* body of (?...A) succeeded */
4991             if (ST.logical) {
4992                 sw = (bool)ST.wanted;
4993             }
4994             else if (!ST.wanted)
4995                 sayNO;
4996
4997             if (OP(ST.me) == SUSPEND)
4998                 locinput = PL_reginput;
4999             else {
5000                 locinput = PL_reginput = st->locinput;
5001                 nextchr = UCHARAT(locinput);
5002             }
5003             scan = ST.me + ARG(ST.me);
5004             if (scan == ST.me)
5005                 scan = NULL;
5006             continue; /* execute B */
5007
5008 #undef ST
5009
5010         case LONGJMP:
5011             next = scan + ARG(scan);
5012             if (next == scan)
5013                 next = NULL;
5014             break;
5015         case COMMIT:
5016             reginfo->cutpoint = PL_regeol;
5017             /* FALLTHROUGH */
5018         case PRUNE:
5019             PL_reginput = locinput;
5020             if (!scan->flags)
5021                 sv_yes_mark = sv_commit = (SV*)rexi->data->data[ ARG( scan ) ];
5022             PUSH_STATE_GOTO(COMMIT_next,next);
5023             /* NOTREACHED */
5024         case COMMIT_next_fail:
5025             no_final = 1;
5026             /* FALLTHROUGH */
5027         case OPFAIL:
5028             sayNO;
5029             /* NOTREACHED */
5030
5031 #define ST st->u.mark
5032         case MARKPOINT:
5033             ST.prev_mark = mark_state;
5034             ST.mark_name = sv_commit = sv_yes_mark
5035                 = (SV*)rexi->data->data[ ARG( scan ) ];
5036             mark_state = st;
5037             ST.mark_loc = PL_reginput = locinput;
5038             PUSH_YES_STATE_GOTO(MARKPOINT_next,next);
5039             /* NOTREACHED */
5040         case MARKPOINT_next:
5041             mark_state = ST.prev_mark;
5042             sayYES;
5043             /* NOTREACHED */
5044         case MARKPOINT_next_fail:
5045             if (popmark && sv_eq(ST.mark_name,popmark))
5046             {
5047                 if (ST.mark_loc > startpoint)
5048                     reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
5049                 popmark = NULL; /* we found our mark */
5050                 sv_commit = ST.mark_name;
5051
5052                 DEBUG_EXECUTE_r({
5053                         PerlIO_printf(Perl_debug_log,
5054                             "%*s  %ssetting cutpoint to mark:%"SVf"...%s\n",
5055                             REPORT_CODE_OFF+depth*2, "",
5056                             PL_colors[4], SVfARG(sv_commit), PL_colors[5]);
5057                 });
5058             }
5059             mark_state = ST.prev_mark;
5060             sv_yes_mark = mark_state ?
5061                 mark_state->u.mark.mark_name : NULL;
5062             sayNO;
5063             /* NOTREACHED */
5064         case SKIP:
5065             PL_reginput = locinput;
5066             if (scan->flags) {
5067                 /* (*SKIP) : if we fail we cut here*/
5068                 ST.mark_name = NULL;
5069                 ST.mark_loc = locinput;
5070                 PUSH_STATE_GOTO(SKIP_next,next);
5071             } else {
5072                 /* (*SKIP:NAME) : if there is a (*MARK:NAME) fail where it was,
5073                    otherwise do nothing.  Meaning we need to scan
5074                  */
5075                 regmatch_state *cur = mark_state;
5076                 SV *find = (SV*)rexi->data->data[ ARG( scan ) ];
5077
5078                 while (cur) {
5079                     if ( sv_eq( cur->u.mark.mark_name,
5080                                 find ) )
5081                     {
5082                         ST.mark_name = find;
5083                         PUSH_STATE_GOTO( SKIP_next, next );
5084                     }
5085                     cur = cur->u.mark.prev_mark;
5086                 }
5087             }
5088             /* Didn't find our (*MARK:NAME) so ignore this (*SKIP:NAME) */
5089             break;
5090         case SKIP_next_fail:
5091             if (ST.mark_name) {
5092                 /* (*CUT:NAME) - Set up to search for the name as we
5093                    collapse the stack*/
5094                 popmark = ST.mark_name;
5095             } else {
5096                 /* (*CUT) - No name, we cut here.*/
5097                 if (ST.mark_loc > startpoint)
5098                     reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
5099                 /* but we set sv_commit to latest mark_name if there
5100                    is one so they can test to see how things lead to this
5101                    cut */
5102                 if (mark_state)
5103                     sv_commit=mark_state->u.mark.mark_name;
5104             }
5105             no_final = 1;
5106             sayNO;
5107             /* NOTREACHED */
5108 #undef ST
5109         case FOLDCHAR:
5110             n = ARG(scan);
5111             if ( n == (U32)what_len_TRICKYFOLD(locinput,do_utf8,ln) ) {
5112                 locinput += ln;
5113             } else if ( 0xDF == n && !do_utf8 && !UTF ) {
5114                 sayNO;
5115             } else  {
5116                 U8 folded[UTF8_MAXBYTES_CASE+1];
5117                 STRLEN foldlen;
5118                 const char * const l = locinput;
5119                 char *e = PL_regeol;
5120                 to_uni_fold(n, folded, &foldlen);
5121
5122                 if (ibcmp_utf8((const char*) folded, 0,  foldlen, 1,
5123                                l, &e, 0,  do_utf8)) {
5124                         sayNO;
5125                 }
5126                 locinput = e;
5127             }
5128             nextchr = UCHARAT(locinput);
5129             break;
5130         case LNBREAK:
5131             if ((n=is_LNBREAK(locinput,do_utf8))) {
5132                 locinput += n;
5133                 nextchr = UCHARAT(locinput);
5134             } else
5135                 sayNO;
5136             break;
5137
5138 #define CASE_CLASS(nAmE)                              \
5139         case nAmE:                                    \
5140             if ((n=is_##nAmE(locinput,do_utf8))) {    \
5141                 locinput += n;                        \
5142                 nextchr = UCHARAT(locinput);          \
5143             } else                                    \
5144                 sayNO;                                \
5145             break;                                    \
5146         case N##nAmE:                                 \
5147             if ((n=is_##nAmE(locinput,do_utf8))) {    \
5148                 sayNO;                                \
5149             } else {                                  \
5150                 locinput += UTF8SKIP(locinput);       \
5151                 nextchr = UCHARAT(locinput);          \
5152             }                                         \
5153             break
5154
5155         CASE_CLASS(VERTWS);
5156         CASE_CLASS(HORIZWS);
5157 #undef CASE_CLASS
5158
5159         default:
5160             PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
5161                           PTR2UV(scan), OP(scan));
5162             Perl_croak(aTHX_ "regexp memory corruption");
5163
5164         } /* end switch */
5165
5166         /* switch break jumps here */
5167         scan = next; /* prepare to execute the next op and ... */
5168         continue;    /* ... jump back to the top, reusing st */
5169         /* NOTREACHED */
5170
5171       push_yes_state:
5172         /* push a state that backtracks on success */
5173         st->u.yes.prev_yes_state = yes_state;
5174         yes_state = st;
5175         /* FALL THROUGH */
5176       push_state:
5177         /* push a new regex state, then continue at scan  */
5178         {
5179             regmatch_state *newst;
5180
5181             DEBUG_STACK_r({
5182                 regmatch_state *cur = st;
5183                 regmatch_state *curyes = yes_state;
5184                 int curd = depth;
5185                 regmatch_slab *slab = PL_regmatch_slab;
5186                 for (;curd > -1;cur--,curd--) {
5187                     if (cur < SLAB_FIRST(slab)) {
5188                         slab = slab->prev;
5189                         cur = SLAB_LAST(slab);
5190                     }
5191                     PerlIO_printf(Perl_error_log, "%*s#%-3d %-10s %s\n",
5192                         REPORT_CODE_OFF + 2 + depth * 2,"",
5193                         curd, PL_reg_name[cur->resume_state],
5194                         (curyes == cur) ? "yes" : ""
5195                     );
5196                     if (curyes == cur)
5197                         curyes = cur->u.yes.prev_yes_state;
5198                 }
5199             } else
5200                 DEBUG_STATE_pp("push")
5201             );
5202             depth++;
5203             st->locinput = locinput;
5204             newst = st+1;
5205             if (newst >  SLAB_LAST(PL_regmatch_slab))
5206                 newst = S_push_slab(aTHX);
5207             PL_regmatch_state = newst;
5208
5209             locinput = PL_reginput;
5210             nextchr = UCHARAT(locinput);
5211             st = newst;
5212             continue;
5213             /* NOTREACHED */
5214         }
5215     }
5216
5217     /*
5218     * We get here only if there's trouble -- normally "case END" is
5219     * the terminating point.
5220     */
5221     Perl_croak(aTHX_ "corrupted regexp pointers");
5222     /*NOTREACHED*/
5223     sayNO;
5224
5225 yes:
5226     if (yes_state) {
5227         /* we have successfully completed a subexpression, but we must now
5228          * pop to the state marked by yes_state and continue from there */
5229         assert(st != yes_state);
5230 #ifdef DEBUGGING
5231         while (st != yes_state) {
5232             st--;
5233             if (st < SLAB_FIRST(PL_regmatch_slab)) {
5234                 PL_regmatch_slab = PL_regmatch_slab->prev;
5235                 st = SLAB_LAST(PL_regmatch_slab);
5236             }
5237             DEBUG_STATE_r({
5238                 if (no_final) {
5239                     DEBUG_STATE_pp("pop (no final)");
5240                 } else {
5241                     DEBUG_STATE_pp("pop (yes)");
5242                 }
5243             });
5244             depth--;
5245         }
5246 #else
5247         while (yes_state < SLAB_FIRST(PL_regmatch_slab)
5248             || yes_state > SLAB_LAST(PL_regmatch_slab))
5249         {
5250             /* not in this slab, pop slab */
5251             depth -= (st - SLAB_FIRST(PL_regmatch_slab) + 1);
5252             PL_regmatch_slab = PL_regmatch_slab->prev;
5253             st = SLAB_LAST(PL_regmatch_slab);
5254         }
5255         depth -= (st - yes_state);
5256 #endif
5257         st = yes_state;
5258         yes_state = st->u.yes.prev_yes_state;
5259         PL_regmatch_state = st;
5260
5261         if (no_final) {
5262             locinput= st->locinput;
5263             nextchr = UCHARAT(locinput);
5264         }
5265         state_num = st->resume_state + no_final;
5266         goto reenter_switch;
5267     }
5268
5269     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
5270                           PL_colors[4], PL_colors[5]));
5271
5272     if (PL_reg_eval_set) {
5273         /* each successfully executed (?{...}) block does the equivalent of
5274          *   local $^R = do {...}
5275          * When popping the save stack, all these locals would be undone;
5276          * bypass this by setting the outermost saved $^R to the latest
5277          * value */
5278         if (oreplsv != GvSV(PL_replgv))
5279             sv_setsv(oreplsv, GvSV(PL_replgv));
5280     }
5281     result = 1;
5282     goto final_exit;
5283
5284 no:
5285     DEBUG_EXECUTE_r(
5286         PerlIO_printf(Perl_debug_log,
5287             "%*s  %sfailed...%s\n",
5288             REPORT_CODE_OFF+depth*2, "",
5289             PL_colors[4], PL_colors[5])
5290         );
5291
5292 no_silent:
5293     if (no_final) {
5294         if (yes_state) {
5295             goto yes;
5296         } else {
5297             goto final_exit;
5298         }
5299     }
5300     if (depth) {
5301         /* there's a previous state to backtrack to */
5302         st--;
5303         if (st < SLAB_FIRST(PL_regmatch_slab)) {
5304             PL_regmatch_slab = PL_regmatch_slab->prev;
5305             st = SLAB_LAST(PL_regmatch_slab);
5306         }
5307         PL_regmatch_state = st;
5308         locinput= st->locinput;
5309         nextchr = UCHARAT(locinput);
5310
5311         DEBUG_STATE_pp("pop");
5312         depth--;
5313         if (yes_state == st)
5314             yes_state = st->u.yes.prev_yes_state;
5315
5316         state_num = st->resume_state + 1; /* failure = success + 1 */
5317         goto reenter_switch;
5318     }
5319     result = 0;
5320
5321   final_exit:
5322     if (rex->intflags & PREGf_VERBARG_SEEN) {
5323         SV *sv_err = get_sv("REGERROR", 1);
5324         SV *sv_mrk = get_sv("REGMARK", 1);
5325         if (result) {
5326             sv_commit = &PL_sv_no;
5327             if (!sv_yes_mark)
5328                 sv_yes_mark = &PL_sv_yes;
5329         } else {
5330             if (!sv_commit)
5331                 sv_commit = &PL_sv_yes;
5332             sv_yes_mark = &PL_sv_no;
5333         }
5334         sv_setsv(sv_err, sv_commit);
5335         sv_setsv(sv_mrk, sv_yes_mark);
5336     }
5337
5338     /* clean up; in particular, free all slabs above current one */
5339     LEAVE_SCOPE(oldsave);
5340
5341     return result;
5342 }
5343
5344 /*
5345  - regrepeat - repeatedly match something simple, report how many
5346  */
5347 /*
5348  * [This routine now assumes that it will only match on things of length 1.
5349  * That was true before, but now we assume scan - reginput is the count,
5350  * rather than incrementing count on every character.  [Er, except utf8.]]
5351  */
5352 STATIC I32
5353 S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
5354 {
5355     dVAR;
5356     register char *scan;
5357     register I32 c;
5358     register char *loceol = PL_regeol;
5359     register I32 hardcount = 0;
5360     register bool do_utf8 = PL_reg_match_utf8;
5361 #ifndef DEBUGGING
5362     PERL_UNUSED_ARG(depth);
5363 #endif
5364
5365     PERL_ARGS_ASSERT_REGREPEAT;
5366
5367     scan = PL_reginput;
5368     if (max == REG_INFTY)
5369         max = I32_MAX;
5370     else if (max < loceol - scan)
5371         loceol = scan + max;
5372     switch (OP(p)) {
5373     case REG_ANY:
5374         if (do_utf8) {
5375             loceol = PL_regeol;
5376             while (scan < loceol && hardcount < max && *scan != '\n') {
5377                 scan += UTF8SKIP(scan);
5378                 hardcount++;
5379             }
5380         } else {
5381             while (scan < loceol && *scan != '\n')
5382                 scan++;
5383         }
5384         break;
5385     case SANY:
5386         if (do_utf8) {
5387             loceol = PL_regeol;
5388             while (scan < loceol && hardcount < max) {
5389                 scan += UTF8SKIP(scan);
5390                 hardcount++;
5391             }
5392         }
5393         else
5394             scan = loceol;
5395         break;
5396     case CANY:
5397         scan = loceol;
5398         break;
5399     case EXACT:         /* length of string is 1 */
5400         c = (U8)*STRING(p);
5401         while (scan < loceol && UCHARAT(scan) == c)
5402             scan++;
5403         break;
5404     case EXACTF:        /* length of string is 1 */
5405         c = (U8)*STRING(p);
5406         while (scan < loceol &&
5407                (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
5408             scan++;
5409         break;
5410     case EXACTFL:       /* length of string is 1 */
5411         PL_reg_flags |= RF_tainted;
5412         c = (U8)*STRING(p);
5413         while (scan < loceol &&
5414                (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
5415             scan++;
5416         break;
5417     case ANYOF:
5418         if (do_utf8) {
5419             loceol = PL_regeol;
5420             while (hardcount < max && scan < loceol &&
5421                    reginclass(prog, p, (U8*)scan, 0, do_utf8)) {
5422                 scan += UTF8SKIP(scan);
5423                 hardcount++;
5424             }
5425         } else {
5426             while (scan < loceol && REGINCLASS(prog, p, (U8*)scan))
5427                 scan++;
5428         }
5429         break;
5430     case ALNUM:
5431         if (do_utf8) {
5432             loceol = PL_regeol;
5433             LOAD_UTF8_CHARCLASS_ALNUM();
5434             while (hardcount < max && scan < loceol &&
5435                    swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
5436                 scan += UTF8SKIP(scan);
5437                 hardcount++;
5438             }
5439         } else {
5440             while (scan < loceol && isALNUM(*scan))
5441                 scan++;
5442         }
5443         break;
5444     case ALNUML:
5445         PL_reg_flags |= RF_tainted;
5446         if (do_utf8) {
5447             loceol = PL_regeol;
5448             while (hardcount < max && scan < loceol &&
5449                    isALNUM_LC_utf8((U8*)scan)) {
5450                 scan += UTF8SKIP(scan);
5451                 hardcount++;
5452             }
5453         } else {
5454             while (scan < loceol && isALNUM_LC(*scan))
5455                 scan++;
5456         }
5457         break;
5458     case NALNUM:
5459         if (do_utf8) {
5460             loceol = PL_regeol;
5461             LOAD_UTF8_CHARCLASS_ALNUM();
5462             while (hardcount < max && scan < loceol &&
5463                    !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
5464                 scan += UTF8SKIP(scan);
5465                 hardcount++;
5466             }
5467         } else {
5468             while (scan < loceol && !isALNUM(*scan))
5469                 scan++;
5470         }
5471         break;
5472     case NALNUML:
5473         PL_reg_flags |= RF_tainted;
5474         if (do_utf8) {
5475             loceol = PL_regeol;
5476             while (hardcount < max && scan < loceol &&
5477                    !isALNUM_LC_utf8((U8*)scan)) {
5478                 scan += UTF8SKIP(scan);
5479                 hardcount++;
5480             }
5481         } else {
5482             while (scan < loceol && !isALNUM_LC(*scan))
5483                 scan++;
5484         }
5485         break;
5486     case SPACE:
5487         if (do_utf8) {
5488             loceol = PL_regeol;
5489             LOAD_UTF8_CHARCLASS_SPACE();
5490             while (hardcount < max && scan < loceol &&
5491                    (*scan == ' ' ||
5492                     swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
5493                 scan += UTF8SKIP(scan);
5494                 hardcount++;
5495             }
5496         } else {
5497             while (scan < loceol && isSPACE(*scan))
5498                 scan++;
5499         }
5500         break;
5501     case SPACEL:
5502         PL_reg_flags |= RF_tainted;
5503         if (do_utf8) {
5504             loceol = PL_regeol;
5505             while (hardcount < max && scan < loceol &&
5506                    (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
5507                 scan += UTF8SKIP(scan);
5508                 hardcount++;
5509             }
5510         } else {
5511             while (scan < loceol && isSPACE_LC(*scan))
5512                 scan++;
5513         }
5514         break;
5515     case NSPACE:
5516         if (do_utf8) {
5517             loceol = PL_regeol;
5518             LOAD_UTF8_CHARCLASS_SPACE();
5519             while (hardcount < max && scan < loceol &&
5520                    !(*scan == ' ' ||
5521                      swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
5522                 scan += UTF8SKIP(scan);
5523                 hardcount++;
5524             }
5525         } else {
5526             while (scan < loceol && !isSPACE(*scan))
5527                 scan++;
5528         }
5529         break;
5530     case NSPACEL:
5531         PL_reg_flags |= RF_tainted;
5532         if (do_utf8) {
5533             loceol = PL_regeol;
5534             while (hardcount < max && scan < loceol &&
5535                    !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
5536                 scan += UTF8SKIP(scan);
5537                 hardcount++;
5538             }
5539         } else {
5540             while (scan < loceol && !isSPACE_LC(*scan))
5541                 scan++;
5542         }
5543         break;
5544     case DIGIT:
5545         if (do_utf8) {
5546             loceol = PL_regeol;
5547             LOAD_UTF8_CHARCLASS_DIGIT();
5548             while (hardcount < max && scan < loceol &&
5549                    swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
5550                 scan += UTF8SKIP(scan);
5551                 hardcount++;
5552             }
5553         } else {
5554             while (scan < loceol && isDIGIT(*scan))
5555                 scan++;
5556         }
5557         break;
5558     case NDIGIT:
5559         if (do_utf8) {
5560             loceol = PL_regeol;
5561             LOAD_UTF8_CHARCLASS_DIGIT();
5562             while (hardcount < max && scan < loceol &&
5563                    !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
5564                 scan += UTF8SKIP(scan);
5565                 hardcount++;
5566             }
5567         } else {
5568             while (scan < loceol && !isDIGIT(*scan))
5569                 scan++;
5570         }
5571     case LNBREAK:
5572         if (do_utf8) {
5573             loceol = PL_regeol;
5574             while (hardcount < max && scan < loceol && (c=is_LNBREAK_utf8(scan))) {
5575                 scan += c;
5576                 hardcount++;
5577             }
5578         } else {
5579             /*
5580               LNBREAK can match two latin chars, which is ok,
5581               because we have a null terminated string, but we
5582               have to use hardcount in this situation
5583             */
5584             while (scan < loceol && (c=is_LNBREAK_latin1(scan)))  {
5585                 scan+=c;
5586                 hardcount++;
5587             }
5588         }
5589         break;
5590     case HORIZWS:
5591         if (do_utf8) {
5592             loceol = PL_regeol;
5593             while (hardcount < max && scan < loceol && (c=is_HORIZWS_utf8(scan))) {
5594                 scan += c;
5595                 hardcount++;
5596             }
5597         } else {
5598             while (scan < loceol && is_HORIZWS_latin1(scan))
5599                 scan++;
5600         }
5601         break;
5602     case NHORIZWS:
5603         if (do_utf8) {
5604             loceol = PL_regeol;
5605             while (hardcount < max && scan < loceol && !is_HORIZWS_utf8(scan)) {
5606                 scan += UTF8SKIP(scan);
5607                 hardcount++;
5608             }
5609         } else {
5610             while (scan < loceol && !is_HORIZWS_latin1(scan))
5611                 scan++;
5612
5613         }
5614         break;
5615     case VERTWS:
5616         if (do_utf8) {
5617             loceol = PL_regeol;
5618             while (hardcount < max && scan < loceol && (c=is_VERTWS_utf8(scan))) {
5619                 scan += c;
5620                 hardcount++;
5621             }
5622         } else {
5623             while (scan < loceol && is_VERTWS_latin1(scan))
5624                 scan++;
5625
5626         }
5627         break;
5628     case NVERTWS:
5629         if (do_utf8) {
5630             loceol = PL_regeol;
5631             while (hardcount < max && scan < loceol && !is_VERTWS_utf8(scan)) {
5632                 scan += UTF8SKIP(scan);
5633                 hardcount++;
5634             }
5635         } else {
5636             while (scan < loceol && !is_VERTWS_latin1(scan))
5637                 scan++;
5638
5639         }
5640         break;
5641
5642     default:            /* Called on something of 0 width. */
5643         break;          /* So match right here or not at all. */
5644     }
5645
5646     if (hardcount)
5647         c = hardcount;
5648     else
5649         c = scan - PL_reginput;
5650     PL_reginput = scan;
5651
5652     DEBUG_r({
5653         GET_RE_DEBUG_FLAGS_DECL;
5654         DEBUG_EXECUTE_r({
5655             SV * const prop = sv_newmortal();
5656             regprop(prog, prop, p);
5657             PerlIO_printf(Perl_debug_log,
5658                         "%*s  %s can match %"IVdf" times out of %"IVdf"...\n",
5659                         REPORT_CODE_OFF + depth*2, "", SvPVX_const(prop),(IV)c,(IV)max);
5660         });
5661     });
5662
5663     return(c);
5664 }
5665
5666
5667 #if !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION)
5668 /*
5669 - regclass_swash - prepare the utf8 swash
5670 */
5671
5672 SV *
5673 Perl_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bool doinit, SV** listsvp, SV **altsvp)
5674 {
5675     dVAR;
5676     SV *sw  = NULL;
5677     SV *si  = NULL;
5678     SV *alt = NULL;
5679     RXi_GET_DECL(prog,progi);
5680     const struct reg_data * const data = prog ? progi->data : NULL;
5681
5682     PERL_ARGS_ASSERT_REGCLASS_SWASH;
5683
5684     if (data && data->count) {
5685         const U32 n = ARG(node);
5686
5687         if (data->what[n] == 's') {
5688             SV * const rv = (SV*)data->data[n];
5689             AV * const av = MUTABLE_AV(SvRV((SV*)rv));
5690             SV **const ary = AvARRAY(av);
5691             SV **a, **b;
5692
5693             /* See the end of regcomp.c:S_regclass() for
5694              * documentation of these array elements. */
5695
5696             si = *ary;
5697             a  = SvROK(ary[1]) ? &ary[1] : NULL;
5698             b  = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : NULL;
5699
5700             if (a)
5701                 sw = *a;
5702             else if (si && doinit) {
5703                 sw = swash_init("utf8", "", si, 1, 0);
5704                 (void)av_store(av, 1, sw);
5705             }
5706             if (b)
5707                 alt = *b;
5708         }
5709     }
5710
5711     if (listsvp)
5712         *listsvp = si;
5713     if (altsvp)
5714         *altsvp  = alt;
5715
5716     return sw;
5717 }
5718 #endif
5719
5720 /*
5721  - reginclass - determine if a character falls into a character class
5722
5723   The n is the ANYOF regnode, the p is the target string, lenp
5724   is pointer to the maximum length of how far to go in the p
5725   (if the lenp is zero, UTF8SKIP(p) is used),
5726   do_utf8 tells whether the target string is in UTF-8.
5727
5728  */
5729
5730 STATIC bool
5731 S_reginclass(pTHX_ const regexp *prog, register const regnode *n, register const U8* p, STRLEN* lenp, register bool do_utf8)
5732 {
5733     dVAR;
5734     const char flags = ANYOF_FLAGS(n);
5735     bool match = FALSE;
5736     UV c = *p;
5737     STRLEN len = 0;
5738     STRLEN plen;
5739
5740     PERL_ARGS_ASSERT_REGINCLASS;
5741
5742     if (do_utf8 && !UTF8_IS_INVARIANT(c)) {
5743         c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &len,
5744                 (UTF8_ALLOW_DEFAULT & UTF8_ALLOW_ANYUV) | UTF8_CHECK_ONLY);
5745                 /* see [perl #37836] for UTF8_ALLOW_ANYUV */
5746         if (len == (STRLEN)-1)
5747             Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
5748     }
5749
5750     plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
5751     if (do_utf8 || (flags & ANYOF_UNICODE)) {
5752         if (lenp)
5753             *lenp = 0;
5754         if (do_utf8 && !ANYOF_RUNTIME(n)) {
5755             if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
5756                 match = TRUE;
5757         }
5758         if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
5759             match = TRUE;
5760         if (!match) {
5761             AV *av;
5762             SV * const sw = regclass_swash(prog, n, TRUE, 0, (SV**)&av);
5763
5764             if (sw) {
5765                 if (swash_fetch(sw, p, do_utf8))
5766                     match = TRUE;
5767                 else if (flags & ANYOF_FOLD) {
5768                     if (!match && lenp && av) {
5769                         I32 i;
5770                         for (i = 0; i <= av_len(av); i++) {
5771                             SV* const sv = *av_fetch(av, i, FALSE);
5772                             STRLEN len;
5773                             const char * const s = SvPV_const(sv, len);
5774
5775                             if (len <= plen && memEQ(s, (char*)p, len)) {
5776                                 *lenp = len;
5777                                 match = TRUE;
5778                                 break;
5779                             }
5780                         }
5781                     }
5782                     if (!match) {
5783                         U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
5784                         STRLEN tmplen;
5785
5786                         to_utf8_fold(p, tmpbuf, &tmplen);
5787                         if (swash_fetch(sw, tmpbuf, do_utf8))
5788                             match = TRUE;
5789                     }
5790                 }
5791             }
5792         }
5793         if (match && lenp && *lenp == 0)
5794             *lenp = UNISKIP(NATIVE_TO_UNI(c));
5795     }
5796     if (!match && c < 256) {
5797         if (ANYOF_BITMAP_TEST(n, c))
5798             match = TRUE;
5799         else if (flags & ANYOF_FOLD) {
5800             U8 f;
5801
5802             if (flags & ANYOF_LOCALE) {
5803                 PL_reg_flags |= RF_tainted;
5804                 f = PL_fold_locale[c];
5805             }
5806             else
5807                 f = PL_fold[c];
5808             if (f != c && ANYOF_BITMAP_TEST(n, f))
5809                 match = TRUE;
5810         }
5811
5812         if (!match && (flags & ANYOF_CLASS)) {
5813             PL_reg_flags |= RF_tainted;
5814             if (
5815                 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM)   &&  isALNUM_LC(c))  ||
5816                 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM)  && !isALNUM_LC(c))  ||
5817                 (ANYOF_CLASS_TEST(n, ANYOF_SPACE)   &&  isSPACE_LC(c))  ||
5818                 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE)  && !isSPACE_LC(c))  ||
5819                 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT)   &&  isDIGIT_LC(c))  ||
5820                 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT)  && !isDIGIT_LC(c))  ||
5821                 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC)  &&  isALNUMC_LC(c)) ||
5822                 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
5823                 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA)   &&  isALPHA_LC(c))  ||
5824                 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA)  && !isALPHA_LC(c))  ||
5825                 (ANYOF_CLASS_TEST(n, ANYOF_ASCII)   &&  isASCII(c))     ||
5826                 (ANYOF_CLASS_TEST(n, ANYOF_NASCII)  && !isASCII(c))     ||
5827                 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL)   &&  isCNTRL_LC(c))  ||
5828                 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL)  && !isCNTRL_LC(c))  ||
5829                 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH)   &&  isGRAPH_LC(c))  ||
5830                 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH)  && !isGRAPH_LC(c))  ||
5831                 (ANYOF_CLASS_TEST(n, ANYOF_LOWER)   &&  isLOWER_LC(c))  ||
5832                 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER)  && !isLOWER_LC(c))  ||
5833                 (ANYOF_CLASS_TEST(n, ANYOF_PRINT)   &&  isPRINT_LC(c))  ||
5834                 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT)  && !isPRINT_LC(c))  ||
5835                 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT)   &&  isPUNCT_LC(c))  ||
5836                 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT)  && !isPUNCT_LC(c))  ||
5837                 (ANYOF_CLASS_TEST(n, ANYOF_UPPER)   &&  isUPPER_LC(c))  ||
5838                 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER)  && !isUPPER_LC(c))  ||
5839                 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT)  &&  isXDIGIT(c))    ||
5840                 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c))    ||
5841                 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC)  &&  isPSXSPC(c))    ||
5842                 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c))    ||
5843                 (ANYOF_CLASS_TEST(n, ANYOF_BLANK)   &&  isBLANK(c))     ||
5844                 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK)  && !isBLANK(c))
5845                 ) /* How's that for a conditional? */
5846             {
5847                 match = TRUE;
5848             }
5849         }
5850     }
5851
5852     return (flags & ANYOF_INVERT) ? !match : match;
5853 }
5854
5855 STATIC U8 *
5856 S_reghop3(U8 *s, I32 off, const U8* lim)
5857 {
5858     dVAR;
5859
5860     PERL_ARGS_ASSERT_REGHOP3;
5861
5862     if (off >= 0) {
5863         while (off-- && s < lim) {
5864             /* XXX could check well-formedness here */
5865             s += UTF8SKIP(s);
5866         }
5867     }
5868     else {
5869         while (off++ && s > lim) {
5870             s--;
5871             if (UTF8_IS_CONTINUED(*s)) {
5872                 while (s > lim && UTF8_IS_CONTINUATION(*s))
5873                     s--;
5874             }
5875             /* XXX could check well-formedness here */
5876         }
5877     }
5878     return s;
5879 }
5880
5881 #ifdef XXX_dmq
5882 /* there are a bunch of places where we use two reghop3's that should
5883    be replaced with this routine. but since thats not done yet
5884    we ifdef it out - dmq
5885 */
5886 STATIC U8 *
5887 S_reghop4(U8 *s, I32 off, const U8* llim, const U8* rlim)
5888 {
5889     dVAR;
5890
5891     PERL_ARGS_ASSERT_REGHOP4;
5892
5893     if (off >= 0) {
5894         while (off-- && s < rlim) {
5895             /* XXX could check well-formedness here */
5896             s += UTF8SKIP(s);
5897         }
5898     }
5899     else {
5900         while (off++ && s > llim) {
5901             s--;
5902             if (UTF8_IS_CONTINUED(*s)) {
5903                 while (s > llim && UTF8_IS_CONTINUATION(*s))
5904                     s--;
5905             }
5906             /* XXX could check well-formedness here */
5907         }
5908     }
5909     return s;
5910 }
5911 #endif
5912
5913 STATIC U8 *
5914 S_reghopmaybe3(U8* s, I32 off, const U8* lim)
5915 {
5916     dVAR;
5917
5918     PERL_ARGS_ASSERT_REGHOPMAYBE3;
5919
5920     if (off >= 0) {
5921         while (off-- && s < lim) {
5922             /* XXX could check well-formedness here */
5923             s += UTF8SKIP(s);
5924         }
5925         if (off >= 0)
5926             return NULL;
5927     }
5928     else {
5929         while (off++ && s > lim) {
5930             s--;
5931             if (UTF8_IS_CONTINUED(*s)) {
5932                 while (s > lim && UTF8_IS_CONTINUATION(*s))
5933                     s--;
5934             }
5935             /* XXX could check well-formedness here */
5936         }
5937         if (off <= 0)
5938             return NULL;
5939     }
5940     return s;
5941 }
5942
5943 static void
5944 restore_pos(pTHX_ void *arg)
5945 {
5946     dVAR;
5947     regexp * const rex = (regexp *)arg;
5948     if (PL_reg_eval_set) {
5949         if (PL_reg_oldsaved) {
5950             rex->subbeg = PL_reg_oldsaved;
5951             rex->sublen = PL_reg_oldsavedlen;
5952 #ifdef PERL_OLD_COPY_ON_WRITE
5953             rex->saved_copy = PL_nrs;
5954 #endif
5955             RXp_MATCH_COPIED_on(rex);
5956         }
5957         PL_reg_magic->mg_len = PL_reg_oldpos;
5958         PL_reg_eval_set = 0;
5959         PL_curpm = PL_reg_oldcurpm;
5960     }
5961 }
5962
5963 STATIC void
5964 S_to_utf8_substr(pTHX_ register regexp *prog)
5965 {
5966     int i = 1;
5967
5968     PERL_ARGS_ASSERT_TO_UTF8_SUBSTR;
5969
5970     do {
5971         if (prog->substrs->data[i].substr
5972             && !prog->substrs->data[i].utf8_substr) {
5973             SV* const sv = newSVsv(prog->substrs->data[i].substr);
5974             prog->substrs->data[i].utf8_substr = sv;
5975             sv_utf8_upgrade(sv);
5976             if (SvVALID(prog->substrs->data[i].substr)) {
5977                 const U8 flags = BmFLAGS(prog->substrs->data[i].substr);
5978                 if (flags & FBMcf_TAIL) {
5979                     /* Trim the trailing \n that fbm_compile added last
5980                        time.  */
5981                     SvCUR_set(sv, SvCUR(sv) - 1);
5982                     /* Whilst this makes the SV technically "invalid" (as its
5983                        buffer is no longer followed by "\0") when fbm_compile()
5984                        adds the "\n" back, a "\0" is restored.  */
5985                 }
5986                 fbm_compile(sv, flags);
5987             }
5988             if (prog->substrs->data[i].substr == prog->check_substr)
5989                 prog->check_utf8 = sv;
5990         }
5991     } while (i--);
5992 }
5993
5994 STATIC void
5995 S_to_byte_substr(pTHX_ register regexp *prog)
5996 {
5997     dVAR;
5998     int i = 1;
5999
6000     PERL_ARGS_ASSERT_TO_BYTE_SUBSTR;
6001
6002     do {
6003         if (prog->substrs->data[i].utf8_substr
6004             && !prog->substrs->data[i].substr) {
6005             SV* sv = newSVsv(prog->substrs->data[i].utf8_substr);
6006             if (sv_utf8_downgrade(sv, TRUE)) {
6007                 if (SvVALID(prog->substrs->data[i].utf8_substr)) {
6008                     const U8 flags
6009                         = BmFLAGS(prog->substrs->data[i].utf8_substr);
6010                     if (flags & FBMcf_TAIL) {
6011                         /* Trim the trailing \n that fbm_compile added last
6012                            time.  */
6013                         SvCUR_set(sv, SvCUR(sv) - 1);
6014                     }
6015                     fbm_compile(sv, flags);
6016                 }
6017             } else {
6018                 SvREFCNT_dec(sv);
6019                 sv = &PL_sv_undef;
6020             }
6021             prog->substrs->data[i].substr = sv;
6022             if (prog->substrs->data[i].utf8_substr == prog->check_utf8)
6023                 prog->check_substr = sv;
6024         }
6025     } while (i--);
6026 }
6027
6028 /*
6029  * Local variables:
6030  * c-indentation-style: bsd
6031  * c-basic-offset: 4
6032  * indent-tabs-mode: t
6033  * End:
6034  *
6035  * ex: set ts=8 sts=4 sw=4 noet:
6036  */