regexec.c

   1 /*    regexec.c
   2  */
   3
   4 /*
   5  * "One Ring to rule them all, One Ring to find them..."
   6  */
   7
   8 /* This file contains functions for executing a regular expression.  See
   9  * also regcomp.c which funnily enough, contains functions for compiling
  10  * a regular expression.
  11  *
  12  * This file is also copied at build time to ext/re/re_exec.c, where
  13  * it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
  14  * This causes the main functions to be compiled under new names and with
  15  * debugging support added, which makes "use re 'debug'" work.
  16  */
  17
  18 /* NOTE: this is derived from Henry Spencer's regexp code, and should not
  19  * confused with the original package (see point 3 below).  Thanks, Henry!
  20  */
  21
  22 /* Additional note: this code is very heavily munged from Henry's version
  23  * in places.  In some spots I've traded clarity for efficiency, so don't
  24  * blame Henry for some of the lack of readability.
  25  */
  26
  27 /* The names of the functions have been changed from regcomp and
  28  * regexec to  pregcomp and pregexec in order to avoid conflicts
  29  * with the POSIX routines of the same names.
  30 */
  31
  32 #ifdef PERL_EXT_RE_BUILD
  33 #include "re_top.h"
  34 #endif
  35
  36 /*
  37  * pregcomp and pregexec -- regsub and regerror are not used in perl
  38  *
  39  *      Copyright (c) 1986 by University of Toronto.
  40  *      Written by Henry Spencer.  Not derived from licensed software.
  41  *
  42  *      Permission is granted to anyone to use this software for any
  43  *      purpose on any computer system, and to redistribute it freely,
  44  *      subject to the following restrictions:
  45  *
  46  *      1. The author is not responsible for the consequences of use of
  47  *              this software, no matter how awful, even if they arise
  48  *              from defects in it.
  49  *
  50  *      2. The origin of this software must not be misrepresented, either
  51  *              by explicit claim or by omission.
  52  *
  53  *      3. Altered versions must be plainly marked as such, and must not
  54  *              be misrepresented as being the original software.
  55  *
  56  ****    Alterations to Henry's code are...
  57  ****
  58  ****    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
  59  ****    2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
  60  ****
  61  ****    You may distribute under the terms of either the GNU General Public
  62  ****    License or the Artistic License, as specified in the README file.
  63  *
  64  * Beware that some of this code is subtly aware of the way operator
  65  * precedence is structured in regular expressions.  Serious changes in
  66  * regular-expression syntax might require a total rethink.
  67  */
  68 #include "EXTERN.h"
  69 #define PERL_IN_REGEXEC_C
  70 #include "perl.h"
  71
  72 #ifdef PERL_IN_XSUB_RE
  73 #  include "re_comp.h"
  74 #else
  75 #  include "regcomp.h"
  76 #endif
  77
  78 #define RF_tainted      1               /* tainted information used? */
  79 #define RF_warned       2               /* warned about big count? */
  80
  81 #define RF_utf8         8               /* Pattern contains multibyte chars? */
  82
  83 #define UTF ((PL_reg_flags & RF_utf8) != 0)
  84
  85 #define RS_init         1               /* eval environment created */
  86 #define RS_set          2               /* replsv value is set */
  87
  88 #ifndef STATIC
  89 #define STATIC  static
  90 #endif
  91
  92 #define REGINCLASS(prog,p,c)  (ANYOF_FLAGS(p) ? reginclass(prog,p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
  93
  94 /*
  95  * Forwards.
  96  */
  97
  98 #define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
  99 #define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
 100
 101 #define HOPc(pos,off) \
 102         (char *)(PL_reg_match_utf8 \
 103             ? reghop3((U8*)pos, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr)) \
 104             : (U8*)(pos + off))
 105 #define HOPBACKc(pos, off) \
 106         (char*)(PL_reg_match_utf8\
 107             ? reghopmaybe3((U8*)pos, -off, (U8*)PL_bostr) \
 108             : (pos - off >= PL_bostr)           \
 109                 ? (U8*)pos - off                \
 110                 : NULL)
 111
 112 #define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)(pos), off, (U8*)(lim)) : (U8*)(pos + off))
 113 #define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
 114
 115 #define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
 116     if (!CAT2(PL_utf8_,class)) { bool ok; ENTER; save_re_context(); ok=CAT2(is_utf8_,class)((const U8*)str); assert(ok); LEAVE; } } STMT_END
 117 #define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS(alnum,"a")
 118 #define LOAD_UTF8_CHARCLASS_DIGIT() LOAD_UTF8_CHARCLASS(digit,"0")
 119 #define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
 120 #define LOAD_UTF8_CHARCLASS_MARK()  LOAD_UTF8_CHARCLASS(mark, "\xcd\x86")
 121
 122 /* TODO: Combine JUMPABLE and HAS_TEXT to cache OP(rn) */
 123
 124 /* for use after a quantifier and before an EXACT-like node -- japhy */
 125 #define JUMPABLE(rn) ( \
 126     OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
 127     OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
 128     OP(rn) == PLUS || OP(rn) == MINMOD || \
 129     (PL_regkind[OP(rn)] == CURLY && ARG1(rn) > 0) \
 130 )
 131
 132 #define HAS_TEXT(rn) ( \
 133     PL_regkind[OP(rn)] == EXACT || PL_regkind[OP(rn)] == REF \
 134 )
 135
 136 /*
 137   Search for mandatory following text node; for lookahead, the text must
 138   follow but for lookbehind (rn->flags != 0) we skip to the next step.
 139 */
 140 #define FIND_NEXT_IMPT(rn) STMT_START { \
 141     while (JUMPABLE(rn)) { \
 142         const OPCODE type = OP(rn); \
 143         if (type == SUSPEND || PL_regkind[type] == CURLY) \
 144             rn = NEXTOPER(NEXTOPER(rn)); \
 145         else if (type == PLUS) \
 146             rn = NEXTOPER(rn); \
 147         else if (type == IFMATCH) \
 148             rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
 149         else rn += NEXT_OFF(rn); \
 150     } \
 151 } STMT_END
 152
 153
 154 static void restore_pos(pTHX_ void *arg);
 155
 156 STATIC CHECKPOINT
 157 S_regcppush(pTHX_ I32 parenfloor)
 158 {
 159     dVAR;
 160     const int retval = PL_savestack_ix;
 161 #define REGCP_PAREN_ELEMS 4
 162     const int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
 163     int p;
 164     GET_RE_DEBUG_FLAGS_DECL;
 165
 166     if (paren_elems_to_push < 0)
 167         Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
 168
 169 #define REGCP_OTHER_ELEMS 8
 170     SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
 171
 172     for (p = PL_regsize; p > parenfloor; p--) {
 173 /* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
 174         SSPUSHINT(PL_regendp[p]);
 175         SSPUSHINT(PL_regstartp[p]);
 176         SSPUSHPTR(PL_reg_start_tmp[p]);
 177         SSPUSHINT(p);
 178         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 179           "     saving \\%"UVuf" %"IVdf"(%"IVdf")..%"IVdf"\n",
 180                       (UV)p, (IV)PL_regstartp[p],
 181                       (IV)(PL_reg_start_tmp[p] - PL_bostr),
 182                       (IV)PL_regendp[p]
 183         ));
 184     }
 185 /* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
 186     SSPUSHPTR(PL_regstartp);
 187     SSPUSHPTR(PL_regendp);
 188     SSPUSHINT(PL_regsize);
 189     SSPUSHINT(*PL_reglastparen);
 190     SSPUSHINT(*PL_reglastcloseparen);
 191     SSPUSHPTR(PL_reginput);
 192 #define REGCP_FRAME_ELEMS 2
 193 /* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
 194  * are needed for the regexp context stack bookkeeping. */
 195     SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
 196     SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
 197
 198     return retval;
 199 }
 200
 201 /* These are needed since we do not localize EVAL nodes: */
 202 #define REGCP_SET(cp)                                           \
 203     DEBUG_STATE_r(                                              \
 204             PerlIO_printf(Perl_debug_log,                       \
 205                 "  Setting an EVAL scope, savestack=%"IVdf"\n", \
 206                 (IV)PL_savestack_ix));                          \
 207     cp = PL_savestack_ix
 208
 209 #define REGCP_UNWIND(cp)                                        \
 210     DEBUG_STATE_r(                                              \
 211         if (cp != PL_savestack_ix)                              \
 212             PerlIO_printf(Perl_debug_log,                       \
 213                 "  Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
 214                 (IV)(cp), (IV)PL_savestack_ix));                \
 215     regcpblow(cp)
 216
 217 STATIC char *
 218 S_regcppop(pTHX_ const regexp *rex)
 219 {
 220     dVAR;
 221     U32 i;
 222     char *input;
 223
 224     GET_RE_DEBUG_FLAGS_DECL;
 225
 226     /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
 227     i = SSPOPINT;
 228     assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
 229     i = SSPOPINT; /* Parentheses elements to pop. */
 230     input = (char *) SSPOPPTR;
 231     *PL_reglastcloseparen = SSPOPINT;
 232     *PL_reglastparen = SSPOPINT;
 233     PL_regsize = SSPOPINT;
 234     PL_regendp=(I32 *) SSPOPPTR;
 235     PL_regstartp=(I32 *) SSPOPPTR;
 236
 237
 238     /* Now restore the parentheses context. */
 239     for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
 240          i > 0; i -= REGCP_PAREN_ELEMS) {
 241         I32 tmps;
 242         U32 paren = (U32)SSPOPINT;
 243         PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
 244         PL_regstartp[paren] = SSPOPINT;
 245         tmps = SSPOPINT;
 246         if (paren <= *PL_reglastparen)
 247             PL_regendp[paren] = tmps;
 248         DEBUG_EXECUTE_r(
 249             PerlIO_printf(Perl_debug_log,
 250                           "     restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
 251                           (UV)paren, (IV)PL_regstartp[paren],
 252                           (IV)(PL_reg_start_tmp[paren] - PL_bostr),
 253                           (IV)PL_regendp[paren],
 254                           (paren > *PL_reglastparen ? "(no)" : ""));
 255         );
 256     }
 257     DEBUG_EXECUTE_r(
 258         if (*PL_reglastparen + 1 <= rex->nparens) {
 259             PerlIO_printf(Perl_debug_log,
 260                           "     restoring \\%"IVdf"..\\%"IVdf" to undef\n",
 261                           (IV)(*PL_reglastparen + 1), (IV)rex->nparens);
 262         }
 263     );
 264 #if 1
 265     /* It would seem that the similar code in regtry()
 266      * already takes care of this, and in fact it is in
 267      * a better location to since this code can #if 0-ed out
 268      * but the code in regtry() is needed or otherwise tests
 269      * requiring null fields (pat.t#187 and split.t#{13,14}
 270      * (as of patchlevel 7877)  will fail.  Then again,
 271      * this code seems to be necessary or otherwise
 272      * building DynaLoader will fail:
 273      * "Error: '*' not in typemap in DynaLoader.xs, line 164"
 274      * --jhi */
 275     for (i = *PL_reglastparen + 1; i <= rex->nparens; i++) {
 276         if (i > PL_regsize)
 277             PL_regstartp[i] = -1;
 278         PL_regendp[i] = -1;
 279     }
 280 #endif
 281     return input;
 282 }
 283
 284 #define regcpblow(cp) LEAVE_SCOPE(cp)   /* Ignores regcppush()ed data. */
 285
 286 /*
 287  * pregexec and friends
 288  */
 289
 290 #ifndef PERL_IN_XSUB_RE
 291 /*
 292  - pregexec - match a regexp against a string
 293  */
 294 I32
 295 Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
 296          char *strbeg, I32 minend, SV *screamer, U32 nosave)
 297 /* strend: pointer to null at end of string */
 298 /* strbeg: real beginning of string */
 299 /* minend: end of match must be >=minend after stringarg. */
 300 /* nosave: For optimizations. */
 301 {
 302     return
 303         regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
 304                       nosave ? 0 : REXEC_COPY_STR);
 305 }
 306 #endif
 307
 308 /*
 309  * Need to implement the following flags for reg_anch:
 310  *
 311  * USE_INTUIT_NOML              - Useful to call re_intuit_start() first
 312  * USE_INTUIT_ML
 313  * INTUIT_AUTORITATIVE_NOML     - Can trust a positive answer
 314  * INTUIT_AUTORITATIVE_ML
 315  * INTUIT_ONCE_NOML             - Intuit can match in one location only.
 316  * INTUIT_ONCE_ML
 317  *
 318  * Another flag for this function: SECOND_TIME (so that float substrs
 319  * with giant delta may be not rechecked).
 320  */
 321
 322 /* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
 323
 324 /* If SCREAM, then SvPVX_const(sv) should be compatible with strpos and strend.
 325    Otherwise, only SvCUR(sv) is used to get strbeg. */
 326
 327 /* XXXX We assume that strpos is strbeg unless sv. */
 328
 329 /* XXXX Some places assume that there is a fixed substring.
 330         An update may be needed if optimizer marks as "INTUITable"
 331         RExen without fixed substrings.  Similarly, it is assumed that
 332         lengths of all the strings are no more than minlen, thus they
 333         cannot come from lookahead.
 334         (Or minlen should take into account lookahead.)
 335   NOTE: Some of this comment is not correct. minlen does now take account
 336   of lookahead/behind. Further research is required. -- demerphq
 337
 338 */
 339
 340 /* A failure to find a constant substring means that there is no need to make
 341    an expensive call to REx engine, thus we celebrate a failure.  Similarly,
 342    finding a substring too deep into the string means that less calls to
 343    regtry() should be needed.
 344
 345    REx compiler's optimizer found 4 possible hints:
 346         a) Anchored substring;
 347         b) Fixed substring;
 348         c) Whether we are anchored (beginning-of-line or \G);
 349         d) First node (of those at offset 0) which may distingush positions;
 350    We use a)b)d) and multiline-part of c), and try to find a position in the
 351    string which does not contradict any of them.
 352  */
 353
 354 /* Most of decisions we do here should have been done at compile time.
 355    The nodes of the REx which we used for the search should have been
 356    deleted from the finite automaton. */
 357
 358 char *
 359 Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
 360                      char *strend, U32 flags, re_scream_pos_data *data)
 361 {
 362     dVAR;
 363     register I32 start_shift = 0;
 364     /* Should be nonnegative! */
 365     register I32 end_shift   = 0;
 366     register char *s;
 367     register SV *check;
 368     char *strbeg;
 369     char *t;
 370     const bool do_utf8 = (sv && SvUTF8(sv)) ? 1 : 0; /* if no sv we have to assume bytes */
 371     I32 ml_anch;
 372     register char *other_last = NULL;   /* other substr checked before this */
 373     char *check_at = NULL;              /* check substr found at this pos */
 374     const I32 multiline = prog->extflags & RXf_PMf_MULTILINE;
 375     RXi_GET_DECL(prog,progi);
 376 #ifdef DEBUGGING
 377     const char * const i_strpos = strpos;
 378 #endif
 379
 380     GET_RE_DEBUG_FLAGS_DECL;
 381
 382     RX_MATCH_UTF8_set(prog,do_utf8);
 383
 384     if (prog->extflags & RXf_UTF8) {
 385         PL_reg_flags |= RF_utf8;
 386     }
 387     DEBUG_EXECUTE_r(
 388         debug_start_match(prog, do_utf8, strpos, strend,
 389             sv ? "Guessing start of match in sv for"
 390                : "Guessing start of match in string for");
 391               );
 392
 393     /* CHR_DIST() would be more correct here but it makes things slow. */
 394     if (prog->minlen > strend - strpos) {
 395         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 396                               "String too short... [re_intuit_start]\n"));
 397         goto fail;
 398     }
 399
 400     strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
 401     PL_regeol = strend;
 402     if (do_utf8) {
 403         if (!prog->check_utf8 && prog->check_substr)
 404             to_utf8_substr(prog);
 405         check = prog->check_utf8;
 406     } else {
 407         if (!prog->check_substr && prog->check_utf8)
 408             to_byte_substr(prog);
 409         check = prog->check_substr;
 410     }
 411     if (check == &PL_sv_undef) {
 412         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 413                 "Non-utf8 string cannot match utf8 check string\n"));
 414         goto fail;
 415     }
 416     if (prog->extflags & RXf_ANCH) {    /* Match at beg-of-str or after \n */
 417         ml_anch = !( (prog->extflags & RXf_ANCH_SINGLE)
 418                      || ( (prog->extflags & RXf_ANCH_BOL)
 419                           && !multiline ) );    /* Check after \n? */
 420
 421         if (!ml_anch) {
 422           if ( !(prog->extflags & RXf_ANCH_GPOS) /* Checked by the caller */
 423                 && !(prog->intflags & PREGf_IMPLICIT) /* not a real BOL */
 424                /* SvCUR is not set on references: SvRV and SvPVX_const overlap */
 425                && sv && !SvROK(sv)
 426                && (strpos != strbeg)) {
 427               DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
 428               goto fail;
 429           }
 430           if (prog->check_offset_min == prog->check_offset_max &&
 431               !(prog->extflags & RXf_CANY_SEEN)) {
 432             /* Substring at constant offset from beg-of-str... */
 433             I32 slen;
 434
 435             s = HOP3c(strpos, prog->check_offset_min, strend);
 436
 437             if (SvTAIL(check)) {
 438                 slen = SvCUR(check);    /* >= 1 */
 439
 440                 if ( strend - s > slen || strend - s < slen - 1
 441                      || (strend - s == slen && strend[-1] != '\n')) {
 442                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
 443                     goto fail_finish;
 444                 }
 445                 /* Now should match s[0..slen-2] */
 446                 slen--;
 447                 if (slen && (*SvPVX_const(check) != *s
 448                              || (slen > 1
 449                                  && memNE(SvPVX_const(check), s, slen)))) {
 450                   report_neq:
 451                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
 452                     goto fail_finish;
 453                 }
 454             }
 455             else if (*SvPVX_const(check) != *s
 456                      || ((slen = SvCUR(check)) > 1
 457                          && memNE(SvPVX_const(check), s, slen)))
 458                 goto report_neq;
 459             check_at = s;
 460             goto success_at_start;
 461           }
 462         }
 463         /* Match is anchored, but substr is not anchored wrt beg-of-str. */
 464         s = strpos;
 465         start_shift = prog->check_offset_min; /* okay to underestimate on CC */
 466         end_shift = prog->check_end_shift;
 467
 468         if (!ml_anch) {
 469             const I32 end = prog->check_offset_max + CHR_SVLEN(check)
 470                                          - (SvTAIL(check) != 0);
 471             const I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
 472
 473             if (end_shift < eshift)
 474                 end_shift = eshift;
 475         }
 476     }
 477     else {                              /* Can match at random position */
 478         ml_anch = 0;
 479         s = strpos;
 480         start_shift = prog->check_offset_min;  /* okay to underestimate on CC */
 481         end_shift = prog->check_end_shift;
 482
 483         /* end shift should be non negative here */
 484     }
 485
 486 #ifdef DEBUGGING        /* 7/99: reports of failure (with the older version) */
 487     if (end_shift < 0)
 488         Perl_croak(aTHX_ "panic: end_shift: %"IVdf" pattern:\n%s\n ",
 489                    (IV)end_shift, prog->precomp);
 490 #endif
 491
 492   restart:
 493     /* Find a possible match in the region s..strend by looking for
 494        the "check" substring in the region corrected by start/end_shift. */
 495
 496     {
 497         I32 srch_start_shift = start_shift;
 498         I32 srch_end_shift = end_shift;
 499         if (srch_start_shift < 0 && strbeg - s > srch_start_shift) {
 500             srch_end_shift -= ((strbeg - s) - srch_start_shift);
 501             srch_start_shift = strbeg - s;
 502         }
 503     DEBUG_OPTIMISE_MORE_r({
 504         PerlIO_printf(Perl_debug_log, "Check offset min: %"IVdf" Start shift: %"IVdf" End shift %"IVdf" Real End Shift: %"IVdf"\n",
 505             (IV)prog->check_offset_min,
 506             (IV)srch_start_shift,
 507             (IV)srch_end_shift,
 508             (IV)prog->check_end_shift);
 509     });
 510
 511     if (flags & REXEC_SCREAM) {
 512         I32 p = -1;                     /* Internal iterator of scream. */
 513         I32 * const pp = data ? data->scream_pos : &p;
 514
 515         if (PL_screamfirst[BmRARE(check)] >= 0
 516             || ( BmRARE(check) == '\n'
 517                  && (BmPREVIOUS(check) == SvCUR(check) - 1)
 518                  && SvTAIL(check) ))
 519             s = screaminstr(sv, check,
 520                             srch_start_shift + (s - strbeg), srch_end_shift, pp, 0);
 521         else
 522             goto fail_finish;
 523         /* we may be pointing at the wrong string */
 524         if (s && RX_MATCH_COPIED(prog))
 525             s = strbeg + (s - SvPVX_const(sv));
 526         if (data)
 527             *data->scream_olds = s;
 528     }
 529     else {
 530         U8* start_point;
 531         U8* end_point;
 532         if (prog->extflags & RXf_CANY_SEEN) {
 533             start_point= (U8*)(s + srch_start_shift);
 534             end_point= (U8*)(strend - srch_end_shift);
 535         } else {
 536             start_point= HOP3(s, srch_start_shift, srch_start_shift < 0 ? strbeg : strend);
 537             end_point= HOP3(strend, -srch_end_shift, strbeg);
 538         }
 539         DEBUG_OPTIMISE_MORE_r({
 540             PerlIO_printf(Perl_debug_log, "fbm_instr len=%d str=<%.*s>\n",
 541                 (int)(end_point - start_point),
 542                 (int)(end_point - start_point) > 20 ? 20 : (int)(end_point - start_point),
 543                 start_point);
 544         });
 545
 546         s = fbm_instr( start_point, end_point,
 547                       check, multiline ? FBMrf_MULTILINE : 0);
 548     }
 549     }
 550     /* Update the count-of-usability, remove useless subpatterns,
 551         unshift s.  */
 552
 553     DEBUG_EXECUTE_r({
 554         RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
 555             SvPVX_const(check), RE_SV_DUMPLEN(check), 30);
 556         PerlIO_printf(Perl_debug_log, "%s %s substr %s%s%s",
 557                           (s ? "Found" : "Did not find"),
 558             (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)
 559                 ? "anchored" : "floating"),
 560             quoted,
 561             RE_SV_TAIL(check),
 562             (s ? " at offset " : "...\n") );
 563     });
 564
 565     if (!s)
 566         goto fail_finish;
 567     /* Finish the diagnostic message */
 568     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
 569
 570     /* XXX dmq: first branch is for positive lookbehind...
 571        Our check string is offset from the beginning of the pattern.
 572        So we need to do any stclass tests offset forward from that
 573        point. I think. :-(
 574      */
 575
 576
 577
 578     check_at=s;
 579
 580
 581     /* Got a candidate.  Check MBOL anchoring, and the *other* substr.
 582        Start with the other substr.
 583        XXXX no SCREAM optimization yet - and a very coarse implementation
 584        XXXX /ttx+/ results in anchored="ttx", floating="x".  floating will
 585                 *always* match.  Probably should be marked during compile...
 586        Probably it is right to do no SCREAM here...
 587      */
 588
 589     if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8)
 590                 : (prog->float_substr && prog->anchored_substr))
 591     {
 592         /* Take into account the "other" substring. */
 593         /* XXXX May be hopelessly wrong for UTF... */
 594         if (!other_last)
 595             other_last = strpos;
 596         if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
 597           do_other_anchored:
 598             {
 599                 char * const last = HOP3c(s, -start_shift, strbeg);
 600                 char *last1, *last2;
 601                 char * const saved_s = s;
 602                 SV* must;
 603
 604                 t = s - prog->check_offset_max;
 605                 if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
 606                     && (!do_utf8
 607                         || ((t = (char*)reghopmaybe3((U8*)s, -(prog->check_offset_max), (U8*)strpos))
 608                             && t > strpos)))
 609                     NOOP;
 610                 else
 611                     t = strpos;
 612                 t = HOP3c(t, prog->anchored_offset, strend);
 613                 if (t < other_last)     /* These positions already checked */
 614                     t = other_last;
 615                 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
 616                 if (last < last1)
 617                     last1 = last;
 618                 /* XXXX It is not documented what units *_offsets are in.
 619                    We assume bytes, but this is clearly wrong.
 620                    Meaning this code needs to be carefully reviewed for errors.
 621                    dmq.
 622                   */
 623
 624                 /* On end-of-str: see comment below. */
 625                 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
 626                 if (must == &PL_sv_undef) {
 627                     s = (char*)NULL;
 628                     DEBUG_r(must = prog->anchored_utf8);        /* for debug */
 629                 }
 630                 else
 631                     s = fbm_instr(
 632                         (unsigned char*)t,
 633                         HOP3(HOP3(last1, prog->anchored_offset, strend)
 634                                 + SvCUR(must), -(SvTAIL(must)!=0), strbeg),
 635                         must,
 636                         multiline ? FBMrf_MULTILINE : 0
 637                     );
 638                 DEBUG_EXECUTE_r({
 639                     RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
 640                         SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
 641                     PerlIO_printf(Perl_debug_log, "%s anchored substr %s%s",
 642                         (s ? "Found" : "Contradicts"),
 643                         quoted, RE_SV_TAIL(must));
 644                 });
 645
 646
 647                 if (!s) {
 648                     if (last1 >= last2) {
 649                         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 650                                                 ", giving up...\n"));
 651                         goto fail_finish;
 652                     }
 653                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 654                         ", trying floating at offset %ld...\n",
 655                         (long)(HOP3c(saved_s, 1, strend) - i_strpos)));
 656                     other_last = HOP3c(last1, prog->anchored_offset+1, strend);
 657                     s = HOP3c(last, 1, strend);
 658                     goto restart;
 659                 }
 660                 else {
 661                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
 662                           (long)(s - i_strpos)));
 663                     t = HOP3c(s, -prog->anchored_offset, strbeg);
 664                     other_last = HOP3c(s, 1, strend);
 665                     s = saved_s;
 666                     if (t == strpos)
 667                         goto try_at_start;
 668                     goto try_at_offset;
 669                 }
 670             }
 671         }
 672         else {          /* Take into account the floating substring. */
 673             char *last, *last1;
 674             char * const saved_s = s;
 675             SV* must;
 676
 677             t = HOP3c(s, -start_shift, strbeg);
 678             last1 = last =
 679                 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
 680             if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
 681                 last = HOP3c(t, prog->float_max_offset, strend);
 682             s = HOP3c(t, prog->float_min_offset, strend);
 683             if (s < other_last)
 684                 s = other_last;
 685  /* XXXX It is not documented what units *_offsets are in.  Assume bytes.  */
 686             must = do_utf8 ? prog->float_utf8 : prog->float_substr;
 687             /* fbm_instr() takes into account exact value of end-of-str
 688                if the check is SvTAIL(ed).  Since false positives are OK,
 689                and end-of-str is not later than strend we are OK. */
 690             if (must == &PL_sv_undef) {
 691                 s = (char*)NULL;
 692                 DEBUG_r(must = prog->float_utf8);       /* for debug message */
 693             }
 694             else
 695                 s = fbm_instr((unsigned char*)s,
 696                               (unsigned char*)last + SvCUR(must)
 697                                   - (SvTAIL(must)!=0),
 698                               must, multiline ? FBMrf_MULTILINE : 0);
 699             DEBUG_EXECUTE_r({
 700                 RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
 701                     SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
 702                 PerlIO_printf(Perl_debug_log, "%s floating substr %s%s",
 703                     (s ? "Found" : "Contradicts"),
 704                     quoted, RE_SV_TAIL(must));
 705             });
 706             if (!s) {
 707                 if (last1 == last) {
 708                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 709                                             ", giving up...\n"));
 710                     goto fail_finish;
 711                 }
 712                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 713                     ", trying anchored starting at offset %ld...\n",
 714                     (long)(saved_s + 1 - i_strpos)));
 715                 other_last = last;
 716                 s = HOP3c(t, 1, strend);
 717                 goto restart;
 718             }
 719             else {
 720                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
 721                       (long)(s - i_strpos)));
 722                 other_last = s; /* Fix this later. --Hugo */
 723                 s = saved_s;
 724                 if (t == strpos)
 725                     goto try_at_start;
 726                 goto try_at_offset;
 727             }
 728         }
 729     }
 730
 731
 732     t= (char*)HOP3( s, -prog->check_offset_max, (prog->check_offset_max<0) ? strend : strpos);
 733
 734     DEBUG_OPTIMISE_MORE_r(
 735         PerlIO_printf(Perl_debug_log,
 736             "Check offset min:%"IVdf" max:%"IVdf" S:%"IVdf" t:%"IVdf" D:%"IVdf" end:%"IVdf"\n",
 737             (IV)prog->check_offset_min,
 738             (IV)prog->check_offset_max,
 739             (IV)(s-strpos),
 740             (IV)(t-strpos),
 741             (IV)(t-s),
 742             (IV)(strend-strpos)
 743         )
 744     );
 745
 746     if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
 747         && (!do_utf8
 748             || ((t = (char*)reghopmaybe3((U8*)s, -prog->check_offset_max, (U8*) ((prog->check_offset_max<0) ? strend : strpos)))
 749                  && t > strpos)))
 750     {
 751         /* Fixed substring is found far enough so that the match
 752            cannot start at strpos. */
 753       try_at_offset:
 754         if (ml_anch && t[-1] != '\n') {
 755             /* Eventually fbm_*() should handle this, but often
 756                anchored_offset is not 0, so this check will not be wasted. */
 757             /* XXXX In the code below we prefer to look for "^" even in
 758                presence of anchored substrings.  And we search even
 759                beyond the found float position.  These pessimizations
 760                are historical artefacts only.  */
 761           find_anchor:
 762             while (t < strend - prog->minlen) {
 763                 if (*t == '\n') {
 764                     if (t < check_at - prog->check_offset_min) {
 765                         if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
 766                             /* Since we moved from the found position,
 767                                we definitely contradict the found anchored
 768                                substr.  Due to the above check we do not
 769                                contradict "check" substr.
 770                                Thus we can arrive here only if check substr
 771                                is float.  Redo checking for "other"=="fixed".
 772                              */
 773                             strpos = t + 1;
 774                             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
 775                                 PL_colors[0], PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
 776                             goto do_other_anchored;
 777                         }
 778                         /* We don't contradict the found floating substring. */
 779                         /* XXXX Why not check for STCLASS? */
 780                         s = t + 1;
 781                         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
 782                             PL_colors[0], PL_colors[1], (long)(s - i_strpos)));
 783                         goto set_useful;
 784                     }
 785                     /* Position contradicts check-string */
 786                     /* XXXX probably better to look for check-string
 787                        than for "\n", so one should lower the limit for t? */
 788                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
 789                         PL_colors[0], PL_colors[1], (long)(t + 1 - i_strpos)));
 790                     other_last = strpos = s = t + 1;
 791                     goto restart;
 792                 }
 793                 t++;
 794             }
 795             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
 796                         PL_colors[0], PL_colors[1]));
 797             goto fail_finish;
 798         }
 799         else {
 800             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
 801                         PL_colors[0], PL_colors[1]));
 802         }
 803         s = t;
 804       set_useful:
 805         ++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr);    /* hooray/5 */
 806     }
 807     else {
 808         /* The found string does not prohibit matching at strpos,
 809            - no optimization of calling REx engine can be performed,
 810            unless it was an MBOL and we are not after MBOL,
 811            or a future STCLASS check will fail this. */
 812       try_at_start:
 813         /* Even in this situation we may use MBOL flag if strpos is offset
 814            wrt the start of the string. */
 815         if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
 816             && (strpos != strbeg) && strpos[-1] != '\n'
 817             /* May be due to an implicit anchor of m{.*foo}  */
 818             && !(prog->intflags & PREGf_IMPLICIT))
 819         {
 820             t = strpos;
 821             goto find_anchor;
 822         }
 823         DEBUG_EXECUTE_r( if (ml_anch)
 824             PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
 825                           (long)(strpos - i_strpos), PL_colors[0], PL_colors[1]);
 826         );
 827       success_at_start:
 828         if (!(prog->intflags & PREGf_NAUGHTY)   /* XXXX If strpos moved? */
 829             && (do_utf8 ? (
 830                 prog->check_utf8                /* Could be deleted already */
 831                 && --BmUSEFUL(prog->check_utf8) < 0
 832                 && (prog->check_utf8 == prog->float_utf8)
 833             ) : (
 834                 prog->check_substr              /* Could be deleted already */
 835                 && --BmUSEFUL(prog->check_substr) < 0
 836                 && (prog->check_substr == prog->float_substr)
 837             )))
 838         {
 839             /* If flags & SOMETHING - do not do it many times on the same match */
 840             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
 841             SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
 842             if (do_utf8 ? prog->check_substr : prog->check_utf8)
 843                 SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
 844             prog->check_substr = prog->check_utf8 = NULL;       /* disable */
 845             prog->float_substr = prog->float_utf8 = NULL;       /* clear */
 846             check = NULL;                       /* abort */
 847             s = strpos;
 848             /* XXXX This is a remnant of the old implementation.  It
 849                     looks wasteful, since now INTUIT can use many
 850                     other heuristics. */
 851             prog->extflags &= ~RXf_USE_INTUIT;
 852         }
 853         else
 854             s = strpos;
 855     }
 856
 857     /* Last resort... */
 858     /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
 859     /* trie stclasses are too expensive to use here, we are better off to
 860        leave it to regmatch itself */
 861     if (progi->regstclass && PL_regkind[OP(progi->regstclass)]!=TRIE) {
 862         /* minlen == 0 is possible if regstclass is \b or \B,
 863            and the fixed substr is ''$.
 864            Since minlen is already taken into account, s+1 is before strend;
 865            accidentally, minlen >= 1 guaranties no false positives at s + 1
 866            even for \b or \B.  But (minlen? 1 : 0) below assumes that
 867            regstclass does not come from lookahead...  */
 868         /* If regstclass takes bytelength more than 1: If charlength==1, OK.
 869            This leaves EXACTF only, which is dealt with in find_byclass().  */
 870         const U8* const str = (U8*)STRING(progi->regstclass);
 871         const int cl_l = (PL_regkind[OP(progi->regstclass)] == EXACT
 872                     ? CHR_DIST(str+STR_LEN(progi->regstclass), str)
 873                     : 1);
 874         char * endpos;
 875         if (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
 876             endpos= HOP3c(s, (prog->minlen ? cl_l : 0), strend);
 877         else if (prog->float_substr || prog->float_utf8)
 878             endpos= HOP3c(HOP3c(check_at, -start_shift, strbeg), cl_l, strend);
 879         else
 880             endpos= strend;
 881
 882         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "start_shift: %"IVdf" check_at: %"IVdf" s: %"IVdf" endpos: %"IVdf"\n",
 883                                       (IV)start_shift, (IV)(check_at - strbeg), (IV)(s - strbeg), (IV)(endpos - strbeg)));
 884
 885         t = s;
 886         s = find_byclass(prog, progi->regstclass, s, endpos, NULL);
 887         if (!s) {
 888 #ifdef DEBUGGING
 889             const char *what = NULL;
 890 #endif
 891             if (endpos == strend) {
 892                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 893                                 "Could not match STCLASS...\n") );
 894                 goto fail;
 895             }
 896             DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 897                                    "This position contradicts STCLASS...\n") );
 898             if ((prog->extflags & RXf_ANCH) && !ml_anch)
 899                 goto fail;
 900             /* Contradict one of substrings */
 901             if (prog->anchored_substr || prog->anchored_utf8) {
 902                 if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
 903                     DEBUG_EXECUTE_r( what = "anchored" );
 904                   hop_and_restart:
 905                     s = HOP3c(t, 1, strend);
 906                     if (s + start_shift + end_shift > strend) {
 907                         /* XXXX Should be taken into account earlier? */
 908                         DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 909                                                "Could not match STCLASS...\n") );
 910                         goto fail;
 911                     }
 912                     if (!check)
 913                         goto giveup;
 914                     DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 915                                 "Looking for %s substr starting at offset %ld...\n",
 916                                  what, (long)(s + start_shift - i_strpos)) );
 917                     goto restart;
 918                 }
 919                 /* Have both, check_string is floating */
 920                 if (t + start_shift >= check_at) /* Contradicts floating=check */
 921                     goto retry_floating_check;
 922                 /* Recheck anchored substring, but not floating... */
 923                 s = check_at;
 924                 if (!check)
 925                     goto giveup;
 926                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 927                           "Looking for anchored substr starting at offset %ld...\n",
 928                           (long)(other_last - i_strpos)) );
 929                 goto do_other_anchored;
 930             }
 931             /* Another way we could have checked stclass at the
 932                current position only: */
 933             if (ml_anch) {
 934                 s = t = t + 1;
 935                 if (!check)
 936                     goto giveup;
 937                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 938                           "Looking for /%s^%s/m starting at offset %ld...\n",
 939                           PL_colors[0], PL_colors[1], (long)(t - i_strpos)) );
 940                 goto try_at_offset;
 941             }
 942             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))     /* Could have been deleted */
 943                 goto fail;
 944             /* Check is floating subtring. */
 945           retry_floating_check:
 946             t = check_at - start_shift;
 947             DEBUG_EXECUTE_r( what = "floating" );
 948             goto hop_and_restart;
 949         }
 950         if (t != s) {
 951             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 952                         "By STCLASS: moving %ld --> %ld\n",
 953                                   (long)(t - i_strpos), (long)(s - i_strpos))
 954                    );
 955         }
 956         else {
 957             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 958                                   "Does not contradict STCLASS...\n");
 959                    );
 960         }
 961     }
 962   giveup:
 963     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
 964                           PL_colors[4], (check ? "Guessed" : "Giving up"),
 965                           PL_colors[5], (long)(s - i_strpos)) );
 966     return s;
 967
 968   fail_finish:                          /* Substring not found */
 969     if (prog->check_substr || prog->check_utf8)         /* could be removed already */
 970         BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
 971   fail:
 972     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
 973                           PL_colors[4], PL_colors[5]));
 974     return NULL;
 975 }
 976
 977
 978
 979 #define REXEC_TRIE_READ_CHAR(trie_type, trie, uc, uscan, len, uvc, charid,  \
 980 foldlen, foldbuf, uniflags) STMT_START {                                    \
 981     switch (trie_type) {                                                    \
 982     case trie_utf8_fold:                                                    \
 983         if ( foldlen>0 ) {                                                  \
 984             uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags );     \
 985             foldlen -= len;                                                 \
 986             uscan += len;                                                   \
 987             len=0;                                                          \
 988         } else {                                                            \
 989             uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );   \
 990             uvc = to_uni_fold( uvc, foldbuf, &foldlen );                    \
 991             foldlen -= UNISKIP( uvc );                                      \
 992             uscan = foldbuf + UNISKIP( uvc );                               \
 993         }                                                                   \
 994         break;                                                              \
 995     case trie_utf8:                                                         \
 996         uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );       \
 997         break;                                                              \
 998     case trie_plain:                                                        \
 999         uvc = (UV)*uc;                                                      \
1000         len = 1;                                                            \
1001     }                                                                       \
1002                                                                             \
1003     if (uvc < 256) {                                                        \
1004         charid = trie->charmap[ uvc ];                                      \
1005     }                                                                       \
1006     else {                                                                  \
1007         charid = 0;                                                         \
1008         if (trie->widecharmap) {                                            \
1009             SV** const svpp = hv_fetch(trie->widecharmap,                   \
1010                         (char*)&uvc, sizeof(UV), 0);                        \
1011             if (svpp)                                                       \
1012                 charid = (U16)SvIV(*svpp);                                  \
1013         }                                                                   \
1014     }                                                                       \
1015 } STMT_END
1016
1017 #define REXEC_FBC_EXACTISH_CHECK(CoNd)                  \
1018     if ( (CoNd)                                        \
1019          && (ln == len ||                              \
1020              ibcmp_utf8(s, NULL, 0,  do_utf8,          \
1021                         m, NULL, ln, (bool)UTF))       \
1022          && (!reginfo || regtry(reginfo, &s)) )         \
1023         goto got_it;                                   \
1024     else {                                             \
1025          U8 foldbuf[UTF8_MAXBYTES_CASE+1];             \
1026          uvchr_to_utf8(tmpbuf, c);                     \
1027          f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);  \
1028          if ( f != c                                   \
1029               && (f == c1 || f == c2)                  \
1030               && (ln == foldlen ||                     \
1031                   !ibcmp_utf8((char *) foldbuf,        \
1032                               NULL, foldlen, do_utf8,  \
1033                               m,                       \
1034                               NULL, ln, (bool)UTF))    \
1035               && (!reginfo || regtry(reginfo, &s)) )    \
1036               goto got_it;                             \
1037     }                                                  \
1038     s += len
1039
1040 #define REXEC_FBC_EXACTISH_SCAN(CoNd)                     \
1041 STMT_START {                                              \
1042     while (s <= e) {                                      \
1043         if ( (CoNd)                                       \
1044              && (ln == 1 || !(OP(c) == EXACTF             \
1045                               ? ibcmp(s, m, ln)           \
1046                               : ibcmp_locale(s, m, ln)))  \
1047              && (!reginfo || regtry(reginfo, &s)) )        \
1048             goto got_it;                                  \
1049         s++;                                              \
1050     }                                                     \
1051 } STMT_END
1052
1053 #define REXEC_FBC_UTF8_SCAN(CoDe)                     \
1054 STMT_START {                                          \
1055     while (s + (uskip = UTF8SKIP(s)) <= strend) {     \
1056         CoDe                                          \
1057         s += uskip;                                   \
1058     }                                                 \
1059 } STMT_END
1060
1061 #define REXEC_FBC_SCAN(CoDe)                          \
1062 STMT_START {                                          \
1063     while (s < strend) {                              \
1064         CoDe                                          \
1065         s++;                                          \
1066     }                                                 \
1067 } STMT_END
1068
1069 #define REXEC_FBC_UTF8_CLASS_SCAN(CoNd)               \
1070 REXEC_FBC_UTF8_SCAN(                                  \
1071     if (CoNd) {                                       \
1072         if (tmp && (!reginfo || regtry(reginfo, &s)))  \
1073             goto got_it;                              \
1074         else                                          \
1075             tmp = doevery;                            \
1076     }                                                 \
1077     else                                              \
1078         tmp = 1;                                      \
1079 )
1080
1081 #define REXEC_FBC_CLASS_SCAN(CoNd)                    \
1082 REXEC_FBC_SCAN(                                       \
1083     if (CoNd) {                                       \
1084         if (tmp && (!reginfo || regtry(reginfo, &s)))  \
1085             goto got_it;                              \
1086         else                                          \
1087             tmp = doevery;                            \
1088     }                                                 \
1089     else                                              \
1090         tmp = 1;                                      \
1091 )
1092
1093 #define REXEC_FBC_TRYIT               \
1094 if ((!reginfo || regtry(reginfo, &s))) \
1095     goto got_it
1096
1097 #define REXEC_FBC_CSCAN_PRELOAD(UtFpReLoAd,CoNdUtF8,CoNd)      \
1098     if (do_utf8) {                                             \
1099         UtFpReLoAd;                                            \
1100         REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1101     }                                                          \
1102     else {                                                     \
1103         REXEC_FBC_CLASS_SCAN(CoNd);                            \
1104     }                                                          \
1105     break
1106
1107 #define REXEC_FBC_CSCAN_TAINT(CoNdUtF8,CoNd)                   \
1108     PL_reg_flags |= RF_tainted;                                \
1109     if (do_utf8) {                                             \
1110         REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1111     }                                                          \
1112     else {                                                     \
1113         REXEC_FBC_CLASS_SCAN(CoNd);                            \
1114     }                                                          \
1115     break
1116
1117 #define DUMP_EXEC_POS(li,s,doutf8) \
1118     dump_exec_pos(li,s,(PL_regeol),(PL_bostr),(PL_reg_starttry),doutf8)
1119
1120 /* We know what class REx starts with.  Try to find this position... */
1121 /* if reginfo is NULL, its a dryrun */
1122 /* annoyingly all the vars in this routine have different names from their counterparts
1123    in regmatch. /grrr */
1124
1125 STATIC char *
1126 S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
1127     const char *strend, regmatch_info *reginfo)
1128 {
1129         dVAR;
1130         const I32 doevery = (prog->intflags & PREGf_SKIP) == 0;
1131         char *m;
1132         STRLEN ln;
1133         STRLEN lnc;
1134         register STRLEN uskip;
1135         unsigned int c1;
1136         unsigned int c2;
1137         char *e;
1138         register I32 tmp = 1;   /* Scratch variable? */
1139         register const bool do_utf8 = PL_reg_match_utf8;
1140         RXi_GET_DECL(prog,progi);
1141
1142         /* We know what class it must start with. */
1143         switch (OP(c)) {
1144         case ANYOF:
1145             if (do_utf8) {
1146                  REXEC_FBC_UTF8_CLASS_SCAN((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
1147                           !UTF8_IS_INVARIANT((U8)s[0]) ?
1148                           reginclass(prog, c, (U8*)s, 0, do_utf8) :
1149                           REGINCLASS(prog, c, (U8*)s));
1150             }
1151             else {
1152                  while (s < strend) {
1153                       STRLEN skip = 1;
1154
1155                       if (REGINCLASS(prog, c, (U8*)s) ||
1156                           (ANYOF_FOLD_SHARP_S(c, s, strend) &&
1157                            /* The assignment of 2 is intentional:
1158                             * for the folded sharp s, the skip is 2. */
1159                            (skip = SHARP_S_SKIP))) {
1160                            if (tmp && (!reginfo || regtry(reginfo, &s)))
1161                                 goto got_it;
1162                            else
1163                                 tmp = doevery;
1164                       }
1165                       else
1166                            tmp = 1;
1167                       s += skip;
1168                  }
1169             }
1170             break;
1171         case CANY:
1172             REXEC_FBC_SCAN(
1173                 if (tmp && (!reginfo || regtry(reginfo, &s)))
1174                     goto got_it;
1175                 else
1176                     tmp = doevery;
1177             );
1178             break;
1179         case EXACTF:
1180             m   = STRING(c);
1181             ln  = STR_LEN(c);   /* length to match in octets/bytes */
1182             lnc = (I32) ln;     /* length to match in characters */
1183             if (UTF) {
1184                 STRLEN ulen1, ulen2;
1185                 U8 *sm = (U8 *) m;
1186                 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
1187                 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
1188                 const U32 uniflags = UTF8_ALLOW_DEFAULT;
1189
1190                 to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
1191                 to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
1192
1193                 c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXBYTES_CASE,
1194                                     0, uniflags);
1195                 c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXBYTES_CASE,
1196                                     0, uniflags);
1197                 lnc = 0;
1198                 while (sm < ((U8 *) m + ln)) {
1199                     lnc++;
1200                     sm += UTF8SKIP(sm);
1201                 }
1202             }
1203             else {
1204                 c1 = *(U8*)m;
1205                 c2 = PL_fold[c1];
1206             }
1207             goto do_exactf;
1208         case EXACTFL:
1209             m   = STRING(c);
1210             ln  = STR_LEN(c);
1211             lnc = (I32) ln;
1212             c1 = *(U8*)m;
1213             c2 = PL_fold_locale[c1];
1214           do_exactf:
1215             e = HOP3c(strend, -((I32)lnc), s);
1216
1217             if (!reginfo && e < s)
1218                 e = s;                  /* Due to minlen logic of intuit() */
1219
1220             /* The idea in the EXACTF* cases is to first find the
1221              * first character of the EXACTF* node and then, if
1222              * necessary, case-insensitively compare the full
1223              * text of the node.  The c1 and c2 are the first
1224              * characters (though in Unicode it gets a bit
1225              * more complicated because there are more cases
1226              * than just upper and lower: one needs to use
1227              * the so-called folding case for case-insensitive
1228              * matching (called "loose matching" in Unicode).
1229              * ibcmp_utf8() will do just that. */
1230
1231             if (do_utf8) {
1232                 UV c, f;
1233                 U8 tmpbuf [UTF8_MAXBYTES+1];
1234                 STRLEN len, foldlen;
1235                 const U32 uniflags = UTF8_ALLOW_DEFAULT;
1236                 if (c1 == c2) {
1237                     /* Upper and lower of 1st char are equal -
1238                      * probably not a "letter". */
1239                     while (s <= e) {
1240                         c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1241                                            uniflags);
1242                         REXEC_FBC_EXACTISH_CHECK(c == c1);
1243                     }
1244                 }
1245                 else {
1246                     while (s <= e) {
1247                       c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1248                                            uniflags);
1249
1250                         /* Handle some of the three Greek sigmas cases.
1251                          * Note that not all the possible combinations
1252                          * are handled here: some of them are handled
1253                          * by the standard folding rules, and some of
1254                          * them (the character class or ANYOF cases)
1255                          * are handled during compiletime in
1256                          * regexec.c:S_regclass(). */
1257                         if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1258                             c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1259                             c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
1260
1261                         REXEC_FBC_EXACTISH_CHECK(c == c1 || c == c2);
1262                     }
1263                 }
1264             }
1265             else {
1266                 if (c1 == c2)
1267                     REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1);
1268                 else
1269                     REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1 || *(U8*)s == c2);
1270             }
1271             break;
1272         case BOUNDL:
1273             PL_reg_flags |= RF_tainted;
1274             /* FALL THROUGH */
1275         case BOUND:
1276             if (do_utf8) {
1277                 if (s == PL_bostr)
1278                     tmp = '\n';
1279                 else {
1280                     U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1281                     tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
1282                 }
1283                 tmp = ((OP(c) == BOUND ?
1284                         isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1285                 LOAD_UTF8_CHARCLASS_ALNUM();
1286                 REXEC_FBC_UTF8_SCAN(
1287                     if (tmp == !(OP(c) == BOUND ?
1288                                  (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1289                                  isALNUM_LC_utf8((U8*)s)))
1290                     {
1291                         tmp = !tmp;
1292                         REXEC_FBC_TRYIT;
1293                 }
1294                 );
1295             }
1296             else {
1297                 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1298                 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1299                 REXEC_FBC_SCAN(
1300                     if (tmp ==
1301                         !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1302                         tmp = !tmp;
1303                         REXEC_FBC_TRYIT;
1304                 }
1305                 );
1306             }
1307             if ((!prog->minlen && tmp) && (!reginfo || regtry(reginfo, &s)))
1308                 goto got_it;
1309             break;
1310         case NBOUNDL:
1311             PL_reg_flags |= RF_tainted;
1312             /* FALL THROUGH */
1313         case NBOUND:
1314             if (do_utf8) {
1315                 if (s == PL_bostr)
1316                     tmp = '\n';
1317                 else {
1318                     U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1319                     tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
1320                 }
1321                 tmp = ((OP(c) == NBOUND ?
1322                         isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1323                 LOAD_UTF8_CHARCLASS_ALNUM();
1324                 REXEC_FBC_UTF8_SCAN(
1325                     if (tmp == !(OP(c) == NBOUND ?
1326                                  (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1327                                  isALNUM_LC_utf8((U8*)s)))
1328                         tmp = !tmp;
1329                     else REXEC_FBC_TRYIT;
1330                 );
1331             }
1332             else {
1333                 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1334                 tmp = ((OP(c) == NBOUND ?
1335                         isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1336                 REXEC_FBC_SCAN(
1337                     if (tmp ==
1338                         !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1339                         tmp = !tmp;
1340                     else REXEC_FBC_TRYIT;
1341                 );
1342             }
1343             if ((!prog->minlen && !tmp) && (!reginfo || regtry(reginfo, &s)))
1344                 goto got_it;
1345             break;
1346         case ALNUM:
1347             REXEC_FBC_CSCAN_PRELOAD(
1348                 LOAD_UTF8_CHARCLASS_ALNUM(),
1349                 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8),
1350                 isALNUM(*s)
1351             );
1352         case ALNUML:
1353             REXEC_FBC_CSCAN_TAINT(
1354                 isALNUM_LC_utf8((U8*)s),
1355                 isALNUM_LC(*s)
1356             );
1357         case NALNUM:
1358             REXEC_FBC_CSCAN_PRELOAD(
1359                 LOAD_UTF8_CHARCLASS_ALNUM(),
1360                 !swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8),
1361                 !isALNUM(*s)
1362             );
1363         case NALNUML:
1364             REXEC_FBC_CSCAN_TAINT(
1365                 !isALNUM_LC_utf8((U8*)s),
1366                 !isALNUM_LC(*s)
1367             );
1368         case SPACE:
1369             REXEC_FBC_CSCAN_PRELOAD(
1370                 LOAD_UTF8_CHARCLASS_SPACE(),
1371                 *s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8),
1372                 isSPACE(*s)
1373             );
1374         case SPACEL:
1375             REXEC_FBC_CSCAN_TAINT(
1376                 *s == ' ' || isSPACE_LC_utf8((U8*)s),
1377                 isSPACE_LC(*s)
1378             );
1379         case NSPACE:
1380             REXEC_FBC_CSCAN_PRELOAD(
1381                 LOAD_UTF8_CHARCLASS_SPACE(),
1382                 !(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)),
1383                 !isSPACE(*s)
1384             );
1385         case NSPACEL:
1386             REXEC_FBC_CSCAN_TAINT(
1387                 !(*s == ' ' || isSPACE_LC_utf8((U8*)s)),
1388                 !isSPACE_LC(*s)
1389             );
1390         case DIGIT:
1391             REXEC_FBC_CSCAN_PRELOAD(
1392                 LOAD_UTF8_CHARCLASS_DIGIT(),
1393                 swash_fetch(PL_utf8_digit,(U8*)s, do_utf8),
1394                 isDIGIT(*s)
1395             );
1396         case DIGITL:
1397             REXEC_FBC_CSCAN_TAINT(
1398                 isDIGIT_LC_utf8((U8*)s),
1399                 isDIGIT_LC(*s)
1400             );
1401         case NDIGIT:
1402             REXEC_FBC_CSCAN_PRELOAD(
1403                 LOAD_UTF8_CHARCLASS_DIGIT(),
1404                 !swash_fetch(PL_utf8_digit,(U8*)s, do_utf8),
1405                 !isDIGIT(*s)
1406             );
1407         case NDIGITL:
1408             REXEC_FBC_CSCAN_TAINT(
1409                 !isDIGIT_LC_utf8((U8*)s),
1410                 !isDIGIT_LC(*s)
1411             );
1412         case AHOCORASICKC:
1413         case AHOCORASICK:
1414             {
1415                 const enum { trie_plain, trie_utf8, trie_utf8_fold }
1416                     trie_type = do_utf8 ?
1417                           (c->flags == EXACT ? trie_utf8 : trie_utf8_fold)
1418                         : trie_plain;
1419                 /* what trie are we using right now */
1420                 reg_ac_data *aho
1421                     = (reg_ac_data*)progi->data->data[ ARG( c ) ];
1422                 reg_trie_data *trie=aho->trie;
1423
1424                 const char *last_start = strend - trie->minlen;
1425 #ifdef DEBUGGING
1426                 const char *real_start = s;
1427 #endif
1428                 STRLEN maxlen = trie->maxlen;
1429                 SV *sv_points;
1430                 U8 **points; /* map of where we were in the input string
1431                                 when reading a given char. For ASCII this
1432                                 is unnecessary overhead as the relationship
1433                                 is always 1:1, but for unicode, especially
1434                                 case folded unicode this is not true. */
1435                 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
1436                 U8 *bitmap=NULL;
1437
1438
1439                 GET_RE_DEBUG_FLAGS_DECL;
1440
1441                 /* We can't just allocate points here. We need to wrap it in
1442                  * an SV so it gets freed properly if there is a croak while
1443                  * running the match */
1444                 ENTER;
1445                 SAVETMPS;
1446                 sv_points=newSV(maxlen * sizeof(U8 *));
1447                 SvCUR_set(sv_points,
1448                     maxlen * sizeof(U8 *));
1449                 SvPOK_on(sv_points);
1450                 sv_2mortal(sv_points);
1451                 points=(U8**)SvPV_nolen(sv_points );
1452                 if ( trie_type != trie_utf8_fold
1453                      && (trie->bitmap || OP(c)==AHOCORASICKC) )
1454                 {
1455                     if (trie->bitmap)
1456                         bitmap=(U8*)trie->bitmap;
1457                     else
1458                         bitmap=(U8*)ANYOF_BITMAP(c);
1459                 }
1460                 /* this is the Aho-Corasick algorithm modified a touch
1461                    to include special handling for long "unknown char"
1462                    sequences. The basic idea being that we use AC as long
1463                    as we are dealing with a possible matching char, when
1464                    we encounter an unknown char (and we have not encountered
1465                    an accepting state) we scan forward until we find a legal
1466                    starting char.
1467                    AC matching is basically that of trie matching, except
1468                    that when we encounter a failing transition, we fall back
1469                    to the current states "fail state", and try the current char
1470                    again, a process we repeat until we reach the root state,
1471                    state 1, or a legal transition. If we fail on the root state
1472                    then we can either terminate if we have reached an accepting
1473                    state previously, or restart the entire process from the beginning
1474                    if we have not.
1475
1476                  */
1477                 while (s <= last_start) {
1478                     const U32 uniflags = UTF8_ALLOW_DEFAULT;
1479                     U8 *uc = (U8*)s;
1480                     U16 charid = 0;
1481                     U32 base = 1;
1482                     U32 state = 1;
1483                     UV uvc = 0;
1484                     STRLEN len = 0;
1485                     STRLEN foldlen = 0;
1486                     U8 *uscan = (U8*)NULL;
1487                     U8 *leftmost = NULL;
1488 #ifdef DEBUGGING
1489                     U32 accepted_word= 0;
1490 #endif
1491                     U32 pointpos = 0;
1492
1493                     while ( state && uc <= (U8*)strend ) {
1494                         int failed=0;
1495                         U32 word = aho->states[ state ].wordnum;
1496
1497                         if( state==1 ) {
1498                             if ( bitmap ) {
1499                                 DEBUG_TRIE_EXECUTE_r(
1500                                     if ( uc <= (U8*)last_start && !BITMAP_TEST(bitmap,*uc) ) {
1501                                         dump_exec_pos( (char *)uc, c, strend, real_start,
1502                                             (char *)uc, do_utf8 );
1503                                         PerlIO_printf( Perl_debug_log,
1504                                             " Scanning for legal start char...\n");
1505                                     }
1506                                 );
1507                                 while ( uc <= (U8*)last_start  && !BITMAP_TEST(bitmap,*uc) ) {
1508                                     uc++;
1509                                 }
1510                                 s= (char *)uc;
1511                             }
1512                             if (uc >(U8*)last_start) break;
1513                         }
1514
1515                         if ( word ) {
1516                             U8 *lpos= points[ (pointpos - trie->wordlen[word-1] ) % maxlen ];
1517                             if (!leftmost || lpos < leftmost) {
1518                                 DEBUG_r(accepted_word=word);
1519                                 leftmost= lpos;
1520                             }
1521                             if (base==0) break;
1522
1523                         }
1524                         points[pointpos++ % maxlen]= uc;
1525                         REXEC_TRIE_READ_CHAR(trie_type, trie, uc, uscan, len,
1526                             uvc, charid, foldlen, foldbuf, uniflags);
1527                         DEBUG_TRIE_EXECUTE_r({
1528                             dump_exec_pos( (char *)uc, c, strend, real_start,
1529                                 s,   do_utf8 );
1530                             PerlIO_printf(Perl_debug_log,
1531                                 " Charid:%3u CP:%4"UVxf" ",
1532                                  charid, uvc);
1533                         });
1534
1535                         do {
1536 #ifdef DEBUGGING
1537                             word = aho->states[ state ].wordnum;
1538 #endif
1539                             base = aho->states[ state ].trans.base;
1540
1541                             DEBUG_TRIE_EXECUTE_r({
1542                                 if (failed)
1543                                     dump_exec_pos( (char *)uc, c, strend, real_start,
1544                                         s,   do_utf8 );
1545                                 PerlIO_printf( Perl_debug_log,
1546                                     "%sState: %4"UVxf", word=%"UVxf,
1547                                     failed ? " Fail transition to " : "",
1548                                     (UV)state, (UV)word);
1549                             });
1550                             if ( base ) {
1551                                 U32 tmp;
1552                                 if (charid &&
1553                                      (base + charid > trie->uniquecharcount )
1554                                      && (base + charid - 1 - trie->uniquecharcount
1555                                             < trie->lasttrans)
1556                                      && trie->trans[base + charid - 1 -
1557                                             trie->uniquecharcount].check == state
1558                                      && (tmp=trie->trans[base + charid - 1 -
1559                                         trie->uniquecharcount ].next))
1560                                 {
1561                                     DEBUG_TRIE_EXECUTE_r(
1562                                         PerlIO_printf( Perl_debug_log," - legal\n"));
1563                                     state = tmp;
1564                                     break;
1565                                 }
1566                                 else {
1567                                     DEBUG_TRIE_EXECUTE_r(
1568                                         PerlIO_printf( Perl_debug_log," - fail\n"));
1569                                     failed = 1;
1570                                     state = aho->fail[state];
1571                                 }
1572                             }
1573                             else {
1574                                 /* we must be accepting here */
1575                                 DEBUG_TRIE_EXECUTE_r(
1576                                         PerlIO_printf( Perl_debug_log," - accepting\n"));
1577                                 failed = 1;
1578                                 break;
1579                             }
1580                         } while(state);
1581                         uc += len;
1582                         if (failed) {
1583                             if (leftmost)
1584                                 break;
1585                             if (!state) state = 1;
1586                         }
1587                     }
1588                     if ( aho->states[ state ].wordnum ) {
1589                         U8 *lpos = points[ (pointpos - trie->wordlen[aho->states[ state ].wordnum-1]) % maxlen ];
1590                         if (!leftmost || lpos < leftmost) {
1591                             DEBUG_r(accepted_word=aho->states[ state ].wordnum);
1592                             leftmost = lpos;
1593                         }
1594                     }
1595                     if (leftmost) {
1596                         s = (char*)leftmost;
1597                         DEBUG_TRIE_EXECUTE_r({
1598                             PerlIO_printf(
1599                                 Perl_debug_log,"Matches word #%"UVxf" at position %"IVdf". Trying full pattern...\n",
1600                                 (UV)accepted_word, (IV)(s - real_start)
1601                             );
1602                         });
1603                         if (!reginfo || regtry(reginfo, &s)) {
1604                             FREETMPS;
1605                             LEAVE;
1606                             goto got_it;
1607                         }
1608                         s = HOPc(s,1);
1609                         DEBUG_TRIE_EXECUTE_r({
1610                             PerlIO_printf( Perl_debug_log,"Pattern failed. Looking for new start point...\n");
1611                         });
1612                     } else {
1613                         DEBUG_TRIE_EXECUTE_r(
1614                             PerlIO_printf( Perl_debug_log,"No match.\n"));
1615                         break;
1616                     }
1617                 }
1618                 FREETMPS;
1619                 LEAVE;
1620             }
1621             break;
1622         default:
1623             Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1624             break;
1625         }
1626         return 0;
1627       got_it:
1628         return s;
1629 }
1630
1631 /*
1632  - regexec_flags - match a regexp against a string
1633  */
1634 I32
1635 Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1636               char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1637 /* strend: pointer to null at end of string */
1638 /* strbeg: real beginning of string */
1639 /* minend: end of match must be >=minend after stringarg. */
1640 /* data: May be used for some additional optimizations.
1641          Currently its only used, with a U32 cast, for transmitting
1642          the ganch offset when doing a /g match. This will change */
1643 /* nosave: For optimizations. */
1644 {
1645     dVAR;
1646     /*register*/ char *s;
1647     register regnode *c;
1648     /*register*/ char *startpos = stringarg;
1649     I32 minlen;         /* must match at least this many chars */
1650     I32 dontbother = 0; /* how many characters not to try at end */
1651     I32 end_shift = 0;                  /* Same for the end. */         /* CC */
1652     I32 scream_pos = -1;                /* Internal iterator of scream. */
1653     char *scream_olds = NULL;
1654     SV* const oreplsv = GvSV(PL_replgv);
1655     const bool do_utf8 = (bool)DO_UTF8(sv);
1656     I32 multiline;
1657     RXi_GET_DECL(prog,progi);
1658     regmatch_info reginfo;  /* create some info to pass to regtry etc */
1659
1660     GET_RE_DEBUG_FLAGS_DECL;
1661
1662     PERL_UNUSED_ARG(data);
1663
1664     /* Be paranoid... */
1665     if (prog == NULL || startpos == NULL) {
1666         Perl_croak(aTHX_ "NULL regexp parameter");
1667         return 0;
1668     }
1669
1670     multiline = prog->extflags & RXf_PMf_MULTILINE;
1671     reginfo.prog = prog;
1672
1673     RX_MATCH_UTF8_set(prog, do_utf8);
1674     DEBUG_EXECUTE_r(
1675         debug_start_match(prog, do_utf8, startpos, strend,
1676         "Matching");
1677     );
1678
1679     minlen = prog->minlen;
1680
1681     if (strend - startpos < (minlen+(prog->check_offset_min<0?prog->check_offset_min:0))) {
1682         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
1683                               "String too short [regexec_flags]...\n"));
1684         goto phooey;
1685     }
1686
1687
1688     /* Check validity of program. */
1689     if (UCHARAT(progi->program) != REG_MAGIC) {
1690         Perl_croak(aTHX_ "corrupted regexp program");
1691     }
1692
1693     PL_reg_flags = 0;
1694     PL_reg_eval_set = 0;
1695     PL_reg_maxiter = 0;
1696
1697     if (prog->extflags & RXf_UTF8)
1698         PL_reg_flags |= RF_utf8;
1699
1700     /* Mark beginning of line for ^ and lookbehind. */
1701     reginfo.bol = startpos; /* XXX not used ??? */
1702     PL_bostr  = strbeg;
1703     reginfo.sv = sv;
1704
1705     /* Mark end of line for $ (and such) */
1706     PL_regeol = strend;
1707
1708     /* see how far we have to get to not match where we matched before */
1709     reginfo.till = startpos+minend;
1710
1711     /* If there is a "must appear" string, look for it. */
1712     s = startpos;
1713
1714     if (prog->extflags & RXf_GPOS_SEEN) { /* Need to set reginfo->ganch */
1715         MAGIC *mg;
1716
1717         if (flags & REXEC_IGNOREPOS)    /* Means: check only at start */
1718             reginfo.ganch = startpos + prog->gofs;
1719         else if (sv && SvTYPE(sv) >= SVt_PVMG
1720                   && SvMAGIC(sv)
1721                   && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1722                   && mg->mg_len >= 0) {
1723             reginfo.ganch = strbeg + mg->mg_len;        /* Defined pos() */
1724             if (prog->extflags & RXf_ANCH_GPOS) {
1725                 if (s > reginfo.ganch)
1726                     goto phooey;
1727                 s = reginfo.ganch - prog->gofs;
1728             }
1729         }
1730         else if (data) {
1731             reginfo.ganch = strbeg + PTR2UV(data);
1732         } else                          /* pos() not defined */
1733             reginfo.ganch = strbeg;
1734     }
1735     if (PL_curpm && (PM_GETRE(PL_curpm) == prog)) {
1736         I32 *t;
1737         if (!progi->swap) {
1738         /* We have to be careful. If the previous successful match
1739            was from this regex we don't want a subsequent paritally
1740            successful match to clobber the old results.
1741            So when we detect this possibility we add a swap buffer
1742            to the re, and switch the buffer each match. If we fail
1743            we switch it back, otherwise we leave it swapped.
1744         */
1745             Newxz(progi->swap, 1, regexp_paren_ofs);
1746             /* no need to copy these */
1747             Newxz(progi->swap->startp, prog->nparens + 1, I32);
1748             Newxz(progi->swap->endp, prog->nparens + 1, I32);
1749         }
1750         t = progi->swap->startp;
1751         progi->swap->startp = prog->startp;
1752         prog->startp = t;
1753         t = progi->swap->endp;
1754         progi->swap->endp = prog->endp;
1755         prog->endp = t;
1756     }
1757     if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL || prog->check_utf8 != NULL)) {
1758         re_scream_pos_data d;
1759
1760         d.scream_olds = &scream_olds;
1761         d.scream_pos = &scream_pos;
1762         s = re_intuit_start(prog, sv, s, strend, flags, &d);
1763         if (!s) {
1764             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
1765             goto phooey;        /* not present */
1766         }
1767     }
1768
1769
1770
1771     /* Simplest case:  anchored match need be tried only once. */
1772     /*  [unless only anchor is BOL and multiline is set] */
1773     if (prog->extflags & (RXf_ANCH & ~RXf_ANCH_GPOS)) {
1774         if (s == startpos && regtry(&reginfo, &startpos))
1775             goto got_it;
1776         else if (multiline || (prog->intflags & PREGf_IMPLICIT)
1777                  || (prog->extflags & RXf_ANCH_MBOL)) /* XXXX SBOL? */
1778         {
1779             char *end;
1780
1781             if (minlen)
1782                 dontbother = minlen - 1;
1783             end = HOP3c(strend, -dontbother, strbeg) - 1;
1784             /* for multiline we only have to try after newlines */
1785             if (prog->check_substr || prog->check_utf8) {
1786                 if (s == startpos)
1787                     goto after_try;
1788                 while (1) {
1789                     if (regtry(&reginfo, &s))
1790                         goto got_it;
1791                   after_try:
1792                     if (s >= end)
1793                         goto phooey;
1794                     if (prog->extflags & RXf_USE_INTUIT) {
1795                         s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1796                         if (!s)
1797                             goto phooey;
1798                     }
1799                     else
1800                         s++;
1801                 }
1802             } else {
1803                 if (s > startpos)
1804                     s--;
1805                 while (s < end) {
1806                     if (*s++ == '\n') { /* don't need PL_utf8skip here */
1807                         if (regtry(&reginfo, &s))
1808                             goto got_it;
1809                     }
1810                 }
1811             }
1812         }
1813         goto phooey;
1814     } else if (RXf_GPOS_CHECK == (prog->extflags & RXf_GPOS_CHECK))
1815     {
1816         /* the warning about reginfo.ganch being used without intialization
1817            is bogus -- we set it above, when prog->extflags & RXf_GPOS_SEEN
1818            and we only enter this block when the same bit is set. */
1819         char *tmp_s = reginfo.ganch - prog->gofs;
1820         if (regtry(&reginfo, &tmp_s))
1821             goto got_it;
1822         goto phooey;
1823     }
1824
1825     /* Messy cases:  unanchored match. */
1826     if ((prog->anchored_substr || prog->anchored_utf8) && prog->intflags & PREGf_SKIP) {
1827         /* we have /x+whatever/ */
1828         /* it must be a one character string (XXXX Except UTF?) */
1829         char ch;
1830 #ifdef DEBUGGING
1831         int did_match = 0;
1832 #endif
1833         if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1834             do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1835         ch = SvPVX_const(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
1836
1837         if (do_utf8) {
1838             REXEC_FBC_SCAN(
1839                 if (*s == ch) {
1840                     DEBUG_EXECUTE_r( did_match = 1 );
1841                     if (regtry(&reginfo, &s)) goto got_it;
1842                     s += UTF8SKIP(s);
1843                     while (s < strend && *s == ch)
1844                         s += UTF8SKIP(s);
1845                 }
1846             );
1847         }
1848         else {
1849             REXEC_FBC_SCAN(
1850                 if (*s == ch) {
1851                     DEBUG_EXECUTE_r( did_match = 1 );
1852                     if (regtry(&reginfo, &s)) goto got_it;
1853                     s++;
1854                     while (s < strend && *s == ch)
1855                         s++;
1856                 }
1857             );
1858         }
1859         DEBUG_EXECUTE_r(if (!did_match)
1860                 PerlIO_printf(Perl_debug_log,
1861                                   "Did not find anchored character...\n")
1862                );
1863     }
1864     else if (prog->anchored_substr != NULL
1865               || prog->anchored_utf8 != NULL
1866               || ((prog->float_substr != NULL || prog->float_utf8 != NULL)
1867                   && prog->float_max_offset < strend - s)) {
1868         SV *must;
1869         I32 back_max;
1870         I32 back_min;
1871         char *last;
1872         char *last1;            /* Last position checked before */
1873 #ifdef DEBUGGING
1874         int did_match = 0;
1875 #endif
1876         if (prog->anchored_substr || prog->anchored_utf8) {
1877             if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1878                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1879             must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
1880             back_max = back_min = prog->anchored_offset;
1881         } else {
1882             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1883                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1884             must = do_utf8 ? prog->float_utf8 : prog->float_substr;
1885             back_max = prog->float_max_offset;
1886             back_min = prog->float_min_offset;
1887         }
1888
1889
1890         if (must == &PL_sv_undef)
1891             /* could not downgrade utf8 check substring, so must fail */
1892             goto phooey;
1893
1894         if (back_min<0) {
1895             last = strend;
1896         } else {
1897             last = HOP3c(strend,        /* Cannot start after this */
1898                   -(I32)(CHR_SVLEN(must)
1899                          - (SvTAIL(must) != 0) + back_min), strbeg);
1900         }
1901         if (s > PL_bostr)
1902             last1 = HOPc(s, -1);
1903         else
1904             last1 = s - 1;      /* bogus */
1905
1906         /* XXXX check_substr already used to find "s", can optimize if
1907            check_substr==must. */
1908         scream_pos = -1;
1909         dontbother = end_shift;
1910         strend = HOPc(strend, -dontbother);
1911         while ( (s <= last) &&
1912                 ((flags & REXEC_SCREAM)
1913                  ? (s = screaminstr(sv, must, HOP3c(s, back_min, (back_min<0 ? strbeg : strend)) - strbeg,
1914                                     end_shift, &scream_pos, 0))
1915                  : (s = fbm_instr((unsigned char*)HOP3(s, back_min, (back_min<0 ? strbeg : strend)),
1916                                   (unsigned char*)strend, must,
1917                                   multiline ? FBMrf_MULTILINE : 0))) ) {
1918             /* we may be pointing at the wrong string */
1919             if ((flags & REXEC_SCREAM) && RX_MATCH_COPIED(prog))
1920                 s = strbeg + (s - SvPVX_const(sv));
1921             DEBUG_EXECUTE_r( did_match = 1 );
1922             if (HOPc(s, -back_max) > last1) {
1923                 last1 = HOPc(s, -back_min);
1924                 s = HOPc(s, -back_max);
1925             }
1926             else {
1927                 char * const t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1928
1929                 last1 = HOPc(s, -back_min);
1930                 s = t;
1931             }
1932             if (do_utf8) {
1933                 while (s <= last1) {
1934                     if (regtry(&reginfo, &s))
1935                         goto got_it;
1936                     s += UTF8SKIP(s);
1937                 }
1938             }
1939             else {
1940                 while (s <= last1) {
1941                     if (regtry(&reginfo, &s))
1942                         goto got_it;
1943                     s++;
1944                 }
1945             }
1946         }
1947         DEBUG_EXECUTE_r(if (!did_match) {
1948             RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
1949                 SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
1950             PerlIO_printf(Perl_debug_log, "Did not find %s substr %s%s...\n",
1951                               ((must == prog->anchored_substr || must == prog->anchored_utf8)
1952                                ? "anchored" : "floating"),
1953                 quoted, RE_SV_TAIL(must));
1954         });
1955         goto phooey;
1956     }
1957     else if ( (c = progi->regstclass) ) {
1958         if (minlen) {
1959             const OPCODE op = OP(progi->regstclass);
1960             /* don't bother with what can't match */
1961             if (PL_regkind[op] != EXACT && op != CANY && PL_regkind[op] != TRIE)
1962                 strend = HOPc(strend, -(minlen - 1));
1963         }
1964         DEBUG_EXECUTE_r({
1965             SV * const prop = sv_newmortal();
1966             regprop(prog, prop, c);
1967             {
1968                 RE_PV_QUOTED_DECL(quoted,UTF,PERL_DEBUG_PAD_ZERO(1),
1969                     s,strend-s,60);
1970                 PerlIO_printf(Perl_debug_log,
1971                     "Matching stclass %.*s against %s (%d chars)\n",
1972                     (int)SvCUR(prop), SvPVX_const(prop),
1973                      quoted, (int)(strend - s));
1974             }
1975         });
1976         if (find_byclass(prog, c, s, strend, &reginfo))
1977             goto got_it;
1978         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass... [regexec_flags]\n"));
1979     }
1980     else {
1981         dontbother = 0;
1982         if (prog->float_substr != NULL || prog->float_utf8 != NULL) {
1983             /* Trim the end. */
1984             char *last;
1985             SV* float_real;
1986
1987             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1988                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1989             float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
1990
1991             if (flags & REXEC_SCREAM) {
1992                 last = screaminstr(sv, float_real, s - strbeg,
1993                                    end_shift, &scream_pos, 1); /* last one */
1994                 if (!last)
1995                     last = scream_olds; /* Only one occurrence. */
1996                 /* we may be pointing at the wrong string */
1997                 else if (RX_MATCH_COPIED(prog))
1998                     s = strbeg + (s - SvPVX_const(sv));
1999             }
2000             else {
2001                 STRLEN len;
2002                 const char * const little = SvPV_const(float_real, len);
2003
2004                 if (SvTAIL(float_real)) {
2005                     if (memEQ(strend - len + 1, little, len - 1))
2006                         last = strend - len + 1;
2007                     else if (!multiline)
2008                         last = memEQ(strend - len, little, len)
2009                             ? strend - len : NULL;
2010                     else
2011                         goto find_last;
2012                 } else {
2013                   find_last:
2014                     if (len)
2015                         last = rninstr(s, strend, little, little + len);
2016                     else
2017                         last = strend;  /* matching "$" */
2018                 }
2019             }
2020             if (last == NULL) {
2021                 DEBUG_EXECUTE_r(
2022                     PerlIO_printf(Perl_debug_log,
2023                         "%sCan't trim the tail, match fails (should not happen)%s\n",
2024                         PL_colors[4], PL_colors[5]));
2025                 goto phooey; /* Should not happen! */
2026             }
2027             dontbother = strend - last + prog->float_min_offset;
2028         }
2029         if (minlen && (dontbother < minlen))
2030             dontbother = minlen - 1;
2031         strend -= dontbother;              /* this one's always in bytes! */
2032         /* We don't know much -- general case. */
2033         if (do_utf8) {
2034             for (;;) {
2035                 if (regtry(&reginfo, &s))
2036                     goto got_it;
2037                 if (s >= strend)
2038                     break;
2039                 s += UTF8SKIP(s);
2040             };
2041         }
2042         else {
2043             do {
2044                 if (regtry(&reginfo, &s))
2045                     goto got_it;
2046             } while (s++ < strend);
2047         }
2048     }
2049
2050     /* Failure. */
2051     goto phooey;
2052
2053 got_it:
2054     RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
2055
2056     if (PL_reg_eval_set) {
2057         /* Preserve the current value of $^R */
2058         if (oreplsv != GvSV(PL_replgv))
2059             sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
2060                                                   restored, the value remains
2061                                                   the same. */
2062         restore_pos(aTHX_ prog);
2063     }
2064     if (prog->paren_names)
2065         (void)hv_iterinit(prog->paren_names);
2066
2067     /* make sure $`, $&, $', and $digit will work later */
2068     if ( !(flags & REXEC_NOT_FIRST) ) {
2069         RX_MATCH_COPY_FREE(prog);
2070         if (flags & REXEC_COPY_STR) {
2071             const I32 i = PL_regeol - startpos + (stringarg - strbeg);
2072 #ifdef PERL_OLD_COPY_ON_WRITE
2073             if ((SvIsCOW(sv)
2074                  || (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
2075                 if (DEBUG_C_TEST) {
2076                     PerlIO_printf(Perl_debug_log,
2077                                   "Copy on write: regexp capture, type %d\n",
2078                                   (int) SvTYPE(sv));
2079                 }
2080                 prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
2081                 prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
2082                 assert (SvPOKp(prog->saved_copy));
2083             } else
2084 #endif
2085             {
2086                 RX_MATCH_COPIED_on(prog);
2087                 s = savepvn(strbeg, i);
2088                 prog->subbeg = s;
2089             }
2090             prog->sublen = i;
2091         }
2092         else {
2093             prog->subbeg = strbeg;
2094             prog->sublen = PL_regeol - strbeg;  /* strend may have been modified */
2095         }
2096     }
2097
2098     return 1;
2099
2100 phooey:
2101     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
2102                           PL_colors[4], PL_colors[5]));
2103     if (PL_reg_eval_set)
2104         restore_pos(aTHX_ prog);
2105     if (progi->swap) {
2106         /* we failed :-( roll it back */
2107         I32 *t;
2108         t = progi->swap->startp;
2109         progi->swap->startp = prog->startp;
2110         prog->startp = t;
2111         t = progi->swap->endp;
2112         progi->swap->endp = prog->endp;
2113         prog->endp = t;
2114     }
2115     return 0;
2116 }
2117
2118
2119 /*
2120  - regtry - try match at specific point
2121  */
2122 STATIC I32                      /* 0 failure, 1 success */
2123 S_regtry(pTHX_ regmatch_info *reginfo, char **startpos)
2124 {
2125     dVAR;
2126     register I32 *sp;
2127     register I32 *ep;
2128     CHECKPOINT lastcp;
2129     regexp *prog = reginfo->prog;
2130     RXi_GET_DECL(prog,progi);
2131     GET_RE_DEBUG_FLAGS_DECL;
2132     reginfo->cutpoint=NULL;
2133
2134     if ((prog->extflags & RXf_EVAL_SEEN) && !PL_reg_eval_set) {
2135         MAGIC *mg;
2136
2137         PL_reg_eval_set = RS_init;
2138         DEBUG_EXECUTE_r(DEBUG_s(
2139             PerlIO_printf(Perl_debug_log, "  setting stack tmpbase at %"IVdf"\n",
2140                           (IV)(PL_stack_sp - PL_stack_base));
2141             ));
2142         SAVESTACK_CXPOS();
2143         cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2144         /* Otherwise OP_NEXTSTATE will free whatever on stack now.  */
2145         SAVETMPS;
2146         /* Apparently this is not needed, judging by wantarray. */
2147         /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
2148            cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2149
2150         if (reginfo->sv) {
2151             /* Make $_ available to executed code. */
2152             if (reginfo->sv != DEFSV) {
2153                 SAVE_DEFSV;
2154                 DEFSV = reginfo->sv;
2155             }
2156
2157             if (!(SvTYPE(reginfo->sv) >= SVt_PVMG && SvMAGIC(reginfo->sv)
2158                   && (mg = mg_find(reginfo->sv, PERL_MAGIC_regex_global)))) {
2159                 /* prepare for quick setting of pos */
2160 #ifdef PERL_OLD_COPY_ON_WRITE
2161                 if (SvIsCOW(reginfo->sv))
2162                     sv_force_normal_flags(reginfo->sv, 0);
2163 #endif
2164                 mg = sv_magicext(reginfo->sv, NULL, PERL_MAGIC_regex_global,
2165                                  &PL_vtbl_mglob, NULL, 0);
2166                 mg->mg_len = -1;
2167             }
2168             PL_reg_magic    = mg;
2169             PL_reg_oldpos   = mg->mg_len;
2170             SAVEDESTRUCTOR_X(restore_pos, prog);
2171         }
2172         if (!PL_reg_curpm) {
2173             Newxz(PL_reg_curpm, 1, PMOP);
2174 #ifdef USE_ITHREADS
2175             {
2176                 SV* const repointer = newSViv(0);
2177                 /* so we know which PL_regex_padav element is PL_reg_curpm */
2178                 SvFLAGS(repointer) |= SVf_BREAK;
2179                 av_push(PL_regex_padav,repointer);
2180                 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2181                 PL_regex_pad = AvARRAY(PL_regex_padav);
2182             }
2183 #endif
2184         }
2185         PM_SETRE(PL_reg_curpm, prog);
2186         PL_reg_oldcurpm = PL_curpm;
2187         PL_curpm = PL_reg_curpm;
2188         if (RX_MATCH_COPIED(prog)) {
2189             /*  Here is a serious problem: we cannot rewrite subbeg,
2190                 since it may be needed if this match fails.  Thus
2191                 $` inside (?{}) could fail... */
2192             PL_reg_oldsaved = prog->subbeg;
2193             PL_reg_oldsavedlen = prog->sublen;
2194 #ifdef PERL_OLD_COPY_ON_WRITE
2195             PL_nrs = prog->saved_copy;
2196 #endif
2197             RX_MATCH_COPIED_off(prog);
2198         }
2199         else
2200             PL_reg_oldsaved = NULL;
2201         prog->subbeg = PL_bostr;
2202         prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2203     }
2204     DEBUG_EXECUTE_r(PL_reg_starttry = *startpos);
2205     prog->startp[0] = *startpos - PL_bostr;
2206     PL_reginput = *startpos;
2207     PL_reglastparen = &prog->lastparen;
2208     PL_reglastcloseparen = &prog->lastcloseparen;
2209     prog->lastparen = 0;
2210     prog->lastcloseparen = 0;
2211     PL_regsize = 0;
2212     PL_regstartp = prog->startp;
2213     PL_regendp = prog->endp;
2214     if (PL_reg_start_tmpl <= prog->nparens) {
2215         PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2216         if(PL_reg_start_tmp)
2217             Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2218         else
2219             Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2220     }
2221
2222     /* XXXX What this code is doing here?!!!  There should be no need
2223        to do this again and again, PL_reglastparen should take care of
2224        this!  --ilya*/
2225
2226     /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2227      * Actually, the code in regcppop() (which Ilya may be meaning by
2228      * PL_reglastparen), is not needed at all by the test suite
2229      * (op/regexp, op/pat, op/split), but that code is needed, oddly
2230      * enough, for building DynaLoader, or otherwise this
2231      * "Error: '*' not in typemap in DynaLoader.xs, line 164"
2232      * will happen.  Meanwhile, this code *is* needed for the
2233      * above-mentioned test suite tests to succeed.  The common theme
2234      * on those tests seems to be returning null fields from matches.
2235      * --jhi */
2236 #if 1
2237     sp = PL_regstartp;
2238     ep = PL_regendp;
2239     if (prog->nparens) {
2240         register I32 i;
2241         for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
2242             *++sp = -1;
2243             *++ep = -1;
2244         }
2245     }
2246 #endif
2247     REGCP_SET(lastcp);
2248     if (regmatch(reginfo, progi->program + 1)) {
2249         PL_regendp[0] = PL_reginput - PL_bostr;
2250         return 1;
2251     }
2252     if (reginfo->cutpoint)
2253         *startpos= reginfo->cutpoint;
2254     REGCP_UNWIND(lastcp);
2255     return 0;
2256 }
2257
2258
2259 #define sayYES goto yes
2260 #define sayNO goto no
2261 #define sayNO_SILENT goto no_silent
2262
2263 /* we dont use STMT_START/END here because it leads to
2264    "unreachable code" warnings, which are bogus, but distracting. */
2265 #define CACHEsayNO \
2266     if (ST.cache_mask) \
2267        PL_reg_poscache[ST.cache_offset] |= ST.cache_mask; \
2268     sayNO
2269
2270 /* this is used to determine how far from the left messages like
2271    'failed...' are printed. It should be set such that messages
2272    are inline with the regop output that created them.
2273 */
2274 #define REPORT_CODE_OFF 32
2275
2276
2277 /* Make sure there is a test for this +1 options in re_tests */
2278 #define TRIE_INITAL_ACCEPT_BUFFLEN 4;
2279
2280 #define CHRTEST_UNINIT -1001 /* c1/c2 haven't been calculated yet */
2281 #define CHRTEST_VOID   -1000 /* the c1/c2 "next char" test should be skipped */
2282
2283 #define SLAB_FIRST(s) (&(s)->states[0])
2284 #define SLAB_LAST(s)  (&(s)->states[PERL_REGMATCH_SLAB_SLOTS-1])
2285
2286 /* grab a new slab and return the first slot in it */
2287
2288 STATIC regmatch_state *
2289 S_push_slab(pTHX)
2290 {
2291 #if PERL_VERSION < 9
2292     dMY_CXT;
2293 #endif
2294     regmatch_slab *s = PL_regmatch_slab->next;
2295     if (!s) {
2296         Newx(s, 1, regmatch_slab);
2297         s->prev = PL_regmatch_slab;
2298         s->next = NULL;
2299         PL_regmatch_slab->next = s;
2300     }
2301     PL_regmatch_slab = s;
2302     return SLAB_FIRST(s);
2303 }
2304
2305
2306 /* push a new state then goto it */
2307
2308 #define PUSH_STATE_GOTO(state, node) \
2309     scan = node; \
2310     st->resume_state = state; \
2311     goto push_state;
2312
2313 /* push a new state with success backtracking, then goto it */
2314
2315 #define PUSH_YES_STATE_GOTO(state, node) \
2316     scan = node; \
2317     st->resume_state = state; \
2318     goto push_yes_state;
2319
2320
2321
2322 /*
2323
2324 regmatch() - main matching routine
2325
2326 This is basically one big switch statement in a loop. We execute an op,
2327 set 'next' to point the next op, and continue. If we come to a point which
2328 we may need to backtrack to on failure such as (A|B|C), we push a
2329 backtrack state onto the backtrack stack. On failure, we pop the top
2330 state, and re-enter the loop at the state indicated. If there are no more
2331 states to pop, we return failure.
2332
2333 Sometimes we also need to backtrack on success; for example /A+/, where
2334 after successfully matching one A, we need to go back and try to
2335 match another one; similarly for lookahead assertions: if the assertion
2336 completes successfully, we backtrack to the state just before the assertion
2337 and then carry on.  In these cases, the pushed state is marked as
2338 'backtrack on success too'. This marking is in fact done by a chain of
2339 pointers, each pointing to the previous 'yes' state. On success, we pop to
2340 the nearest yes state, discarding any intermediate failure-only states.
2341 Sometimes a yes state is pushed just to force some cleanup code to be
2342 called at the end of a successful match or submatch; e.g. (??{$re}) uses
2343 it to free the inner regex.
2344
2345 Note that failure backtracking rewinds the cursor position, while
2346 success backtracking leaves it alone.
2347
2348 A pattern is complete when the END op is executed, while a subpattern
2349 such as (?=foo) is complete when the SUCCESS op is executed. Both of these
2350 ops trigger the "pop to last yes state if any, otherwise return true"
2351 behaviour.
2352
2353 A common convention in this function is to use A and B to refer to the two
2354 subpatterns (or to the first nodes thereof) in patterns like /A*B/: so A is
2355 the subpattern to be matched possibly multiple times, while B is the entire
2356 rest of the pattern. Variable and state names reflect this convention.
2357
2358 The states in the main switch are the union of ops and failure/success of
2359 substates associated with with that op.  For example, IFMATCH is the op
2360 that does lookahead assertions /(?=A)B/ and so the IFMATCH state means
2361 'execute IFMATCH'; while IFMATCH_A is a state saying that we have just
2362 successfully matched A and IFMATCH_A_fail is a state saying that we have
2363 just failed to match A. Resume states always come in pairs. The backtrack
2364 state we push is marked as 'IFMATCH_A', but when that is popped, we resume
2365 at IFMATCH_A or IFMATCH_A_fail, depending on whether we are backtracking
2366 on success or failure.
2367
2368 The struct that holds a backtracking state is actually a big union, with
2369 one variant for each major type of op. The variable st points to the
2370 top-most backtrack struct. To make the code clearer, within each
2371 block of code we #define ST to alias the relevant union.
2372
2373 Here's a concrete example of a (vastly oversimplified) IFMATCH
2374 implementation:
2375
2376     switch (state) {
2377     ....
2378
2379 #define ST st->u.ifmatch
2380
2381     case IFMATCH: // we are executing the IFMATCH op, (?=A)B
2382         ST.foo = ...; // some state we wish to save
2383         ...
2384         // push a yes backtrack state with a resume value of
2385         // IFMATCH_A/IFMATCH_A_fail, then continue execution at the
2386         // first node of A:
2387         PUSH_YES_STATE_GOTO(IFMATCH_A, A);
2388         // NOTREACHED
2389
2390     case IFMATCH_A: // we have successfully executed A; now continue with B
2391         next = B;
2392         bar = ST.foo; // do something with the preserved value
2393         break;
2394
2395     case IFMATCH_A_fail: // A failed, so the assertion failed
2396         ...;   // do some housekeeping, then ...
2397         sayNO; // propagate the failure
2398
2399 #undef ST
2400
2401     ...
2402     }
2403
2404 For any old-timers reading this who are familiar with the old recursive
2405 approach, the code above is equivalent to:
2406
2407     case IFMATCH: // we are executing the IFMATCH op, (?=A)B
2408     {
2409         int foo = ...
2410         ...
2411         if (regmatch(A)) {
2412             next = B;
2413             bar = foo;
2414             break;
2415         }
2416         ...;   // do some housekeeping, then ...
2417         sayNO; // propagate the failure
2418     }
2419
2420 The topmost backtrack state, pointed to by st, is usually free. If you
2421 want to claim it, populate any ST.foo fields in it with values you wish to
2422 save, then do one of
2423
2424         PUSH_STATE_GOTO(resume_state, node);
2425         PUSH_YES_STATE_GOTO(resume_state, node);
2426
2427 which sets that backtrack state's resume value to 'resume_state', pushes a
2428 new free entry to the top of the backtrack stack, then goes to 'node'.
2429 On backtracking, the free slot is popped, and the saved state becomes the
2430 new free state. An ST.foo field in this new top state can be temporarily
2431 accessed to retrieve values, but once the main loop is re-entered, it
2432 becomes available for reuse.
2433
2434 Note that the depth of the backtrack stack constantly increases during the
2435 left-to-right execution of the pattern, rather than going up and down with
2436 the pattern nesting. For example the stack is at its maximum at Z at the
2437 end of the pattern, rather than at X in the following:
2438
2439     /(((X)+)+)+....(Y)+....Z/
2440
2441 The only exceptions to this are lookahead/behind assertions and the cut,
2442 (?>A), which pop all the backtrack states associated with A before
2443 continuing.
2444
2445 Bascktrack state structs are allocated in slabs of about 4K in size.
2446 PL_regmatch_state and st always point to the currently active state,
2447 and PL_regmatch_slab points to the slab currently containing
2448 PL_regmatch_state.  The first time regmatch() is called, the first slab is
2449 allocated, and is never freed until interpreter destruction. When the slab
2450 is full, a new one is allocated and chained to the end. At exit from
2451 regmatch(), slabs allocated since entry are freed.
2452
2453 */
2454
2455
2456 #define DEBUG_STATE_pp(pp)                                  \
2457     DEBUG_STATE_r({                                         \
2458         DUMP_EXEC_POS(locinput, scan, do_utf8);             \
2459         PerlIO_printf(Perl_debug_log,                       \
2460             "    %*s"pp" %s%s%s%s%s\n",                     \
2461             depth*2, "",                                    \
2462             reg_name[st->resume_state],                     \
2463             ((st==yes_state||st==mark_state) ? "[" : ""),   \
2464             ((st==yes_state) ? "Y" : ""),                   \
2465             ((st==mark_state) ? "M" : ""),                  \
2466             ((st==yes_state||st==mark_state) ? "]" : "")    \
2467         );                                                  \
2468     });
2469
2470
2471 #define REG_NODE_NUM(x) ((x) ? (int)((x)-prog) : -1)
2472
2473 #ifdef DEBUGGING
2474
2475 STATIC void
2476 S_debug_start_match(pTHX_ const regexp *prog, const bool do_utf8,
2477     const char *start, const char *end, const char *blurb)
2478 {
2479     const bool utf8_pat= prog->extflags & RXf_UTF8 ? 1 : 0;
2480     if (!PL_colorset)
2481             reginitcolors();
2482     {
2483         RE_PV_QUOTED_DECL(s0, utf8_pat, PERL_DEBUG_PAD_ZERO(0),
2484             prog->precomp, prog->prelen, 60);
2485
2486         RE_PV_QUOTED_DECL(s1, do_utf8, PERL_DEBUG_PAD_ZERO(1),
2487             start, end - start, 60);
2488
2489         PerlIO_printf(Perl_debug_log,
2490             "%s%s REx%s %s against %s\n",
2491                        PL_colors[4], blurb, PL_colors[5], s0, s1);
2492
2493         if (do_utf8||utf8_pat)
2494             PerlIO_printf(Perl_debug_log, "UTF-8 %s%s%s...\n",
2495                 utf8_pat ? "pattern" : "",
2496                 utf8_pat && do_utf8 ? " and " : "",
2497                 do_utf8 ? "string" : ""
2498             );
2499     }
2500 }
2501
2502 STATIC void
2503 S_dump_exec_pos(pTHX_ const char *locinput,
2504                       const regnode *scan,
2505                       const char *loc_regeol,
2506                       const char *loc_bostr,
2507                       const char *loc_reg_starttry,
2508                       const bool do_utf8)
2509 {
2510     const int docolor = *PL_colors[0] || *PL_colors[2] || *PL_colors[4];
2511     const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
2512     int l = (loc_regeol - locinput) > taill ? taill : (loc_regeol - locinput);
2513     /* The part of the string before starttry has one color
2514        (pref0_len chars), between starttry and current
2515        position another one (pref_len - pref0_len chars),
2516        after the current position the third one.
2517        We assume that pref0_len <= pref_len, otherwise we
2518        decrease pref0_len.  */
2519     int pref_len = (locinput - loc_bostr) > (5 + taill) - l
2520         ? (5 + taill) - l : locinput - loc_bostr;
2521     int pref0_len;
2522
2523     while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2524         pref_len++;
2525     pref0_len = pref_len  - (locinput - loc_reg_starttry);
2526     if (l + pref_len < (5 + taill) && l < loc_regeol - locinput)
2527         l = ( loc_regeol - locinput > (5 + taill) - pref_len
2528               ? (5 + taill) - pref_len : loc_regeol - locinput);
2529     while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2530         l--;
2531     if (pref0_len < 0)
2532         pref0_len = 0;
2533     if (pref0_len > pref_len)
2534         pref0_len = pref_len;
2535     {
2536         const int is_uni = (do_utf8 && OP(scan) != CANY) ? 1 : 0;
2537
2538         RE_PV_COLOR_DECL(s0,len0,is_uni,PERL_DEBUG_PAD(0),
2539             (locinput - pref_len),pref0_len, 60, 4, 5);
2540
2541         RE_PV_COLOR_DECL(s1,len1,is_uni,PERL_DEBUG_PAD(1),
2542                     (locinput - pref_len + pref0_len),
2543                     pref_len - pref0_len, 60, 2, 3);
2544
2545         RE_PV_COLOR_DECL(s2,len2,is_uni,PERL_DEBUG_PAD(2),
2546                     locinput, loc_regeol - locinput, 10, 0, 1);
2547
2548         const STRLEN tlen=len0+len1+len2;
2549         PerlIO_printf(Perl_debug_log,
2550                     "%4"IVdf" <%.*s%.*s%s%.*s>%*s|",
2551                     (IV)(locinput - loc_bostr),
2552                     len0, s0,
2553                     len1, s1,
2554                     (docolor ? "" : "> <"),
2555                     len2, s2,
2556                     (int)(tlen > 19 ? 0 :  19 - tlen),
2557                     "");
2558     }
2559 }
2560
2561 #endif
2562
2563 /* reg_check_named_buff_matched()
2564  * Checks to see if a named buffer has matched. The data array of
2565  * buffer numbers corresponding to the buffer is expected to reside
2566  * in the regexp->data->data array in the slot stored in the ARG() of
2567  * node involved. Note that this routine doesn't actually care about the
2568  * name, that information is not preserved from compilation to execution.
2569  * Returns the index of the leftmost defined buffer with the given name
2570  * or 0 if non of the buffers matched.
2571  */
2572 STATIC I32
2573 S_reg_check_named_buff_matched(pTHX_ const regexp *rex, const regnode *scan) {
2574     I32 n;
2575     RXi_GET_DECL(rex,rexi);
2576     SV *sv_dat=(SV*)rexi->data->data[ ARG( scan ) ];
2577     I32 *nums=(I32*)SvPVX(sv_dat);
2578     for ( n=0; n<SvIVX(sv_dat); n++ ) {
2579         if ((I32)*PL_reglastparen >= nums[n] &&
2580             PL_regendp[nums[n]] != -1)
2581         {
2582             return nums[n];
2583         }
2584     }
2585     return 0;
2586 }
2587
2588 STATIC I32                      /* 0 failure, 1 success */
2589 S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
2590 {
2591 #if PERL_VERSION < 9
2592     dMY_CXT;
2593 #endif
2594     dVAR;
2595     register const bool do_utf8 = PL_reg_match_utf8;
2596     const U32 uniflags = UTF8_ALLOW_DEFAULT;
2597
2598     regexp *rex = reginfo->prog;
2599     RXi_GET_DECL(rex,rexi);
2600
2601     regmatch_slab  *orig_slab;
2602     regmatch_state *orig_state;
2603
2604     /* the current state. This is a cached copy of PL_regmatch_state */
2605     register regmatch_state *st;
2606
2607     /* cache heavy used fields of st in registers */
2608     register regnode *scan;
2609     register regnode *next;
2610     register U32 n = 0; /* general value; init to avoid compiler warning */
2611     register I32 ln = 0; /* len or last;  init to avoid compiler warning */
2612     register char *locinput = PL_reginput;
2613     register I32 nextchr;   /* is always set to UCHARAT(locinput) */
2614
2615     bool result = 0;        /* return value of S_regmatch */
2616     int depth = 0;          /* depth of backtrack stack */
2617     int nochange_depth = 0; /* depth of GOSUB recursion with nochange*/
2618     regmatch_state *yes_state = NULL; /* state to pop to on success of
2619                                                             subpattern */
2620     /* mark_state piggy backs on the yes_state logic so that when we unwind
2621        the stack on success we can update the mark_state as we go */
2622     regmatch_state *mark_state = NULL; /* last mark state we have seen */
2623
2624     regmatch_state *cur_eval = NULL; /* most recent EVAL_AB state */
2625     struct regmatch_state  *cur_curlyx = NULL; /* most recent curlyx */
2626     U32 state_num;
2627     bool no_final = 0;      /* prevent failure from backtracking? */
2628     bool do_cutgroup = 0;   /* no_final only until next branch/trie entry */
2629     char *startpoint = PL_reginput;
2630     SV *popmark = NULL;     /* are we looking for a mark? */
2631     SV *sv_commit = NULL;   /* last mark name seen in failure */
2632     SV *sv_yes_mark = NULL; /* last mark name we have seen
2633                                during a successfull match */
2634     U32 lastopen = 0;       /* last open we saw */
2635     bool has_cutgroup = RX_HAS_CUTGROUP(rex) ? 1 : 0;
2636
2637
2638     /* these three flags are set by various ops to signal information to
2639      * the very next op. They have a useful lifetime of exactly one loop
2640      * iteration, and are not preserved or restored by state pushes/pops
2641      */
2642     bool sw = 0;            /* the condition value in (?(cond)a|b) */
2643     bool minmod = 0;        /* the next "{n,m}" is a "{n,m}?" */
2644     int logical = 0;        /* the following EVAL is:
2645                                 0: (?{...})
2646                                 1: (?(?{...})X|Y)
2647                                 2: (??{...})
2648                                or the following IFMATCH/UNLESSM is:
2649                                 false: plain (?=foo)
2650                                 true:  used as a condition: (?(?=foo))
2651                             */
2652
2653 #ifdef DEBUGGING
2654     GET_RE_DEBUG_FLAGS_DECL;
2655 #endif
2656
2657     DEBUG_OPTIMISE_r( {
2658             PerlIO_printf(Perl_debug_log,"regmatch start\n");
2659     });
2660     /* on first ever call to regmatch, allocate first slab */
2661     if (!PL_regmatch_slab) {
2662         Newx(PL_regmatch_slab, 1, regmatch_slab);
2663         PL_regmatch_slab->prev = NULL;
2664         PL_regmatch_slab->next = NULL;
2665         PL_regmatch_state = SLAB_FIRST(PL_regmatch_slab);
2666     }
2667
2668     /* remember current high-water mark for exit */
2669     /* XXX this should be done with SAVE* instead */
2670     orig_slab  = PL_regmatch_slab;
2671     orig_state = PL_regmatch_state;
2672
2673     /* grab next free state slot */
2674     st = ++PL_regmatch_state;
2675     if (st >  SLAB_LAST(PL_regmatch_slab))
2676         st = PL_regmatch_state = S_push_slab(aTHX);
2677
2678     /* Note that nextchr is a byte even in UTF */
2679     nextchr = UCHARAT(locinput);
2680     scan = prog;
2681     while (scan != NULL) {
2682
2683         DEBUG_EXECUTE_r( {
2684             SV * const prop = sv_newmortal();
2685             regnode *rnext=regnext(scan);
2686             DUMP_EXEC_POS( locinput, scan, do_utf8 );
2687             regprop(rex, prop, scan);
2688
2689             PerlIO_printf(Perl_debug_log,
2690                     "%3"IVdf":%*s%s(%"IVdf")\n",
2691                     (IV)(scan - rexi->program), depth*2, "",
2692                     SvPVX_const(prop),
2693                     (PL_regkind[OP(scan)] == END || !rnext) ?
2694                         0 : (IV)(rnext - rexi->program));
2695         });
2696
2697         next = scan + NEXT_OFF(scan);
2698         if (next == scan)
2699             next = NULL;
2700         state_num = OP(scan);
2701
2702       reenter_switch:
2703         switch (state_num) {
2704         case BOL:
2705             if (locinput == PL_bostr)
2706             {
2707                 /* reginfo->till = reginfo->bol; */
2708                 break;
2709             }
2710             sayNO;
2711         case MBOL:
2712             if (locinput == PL_bostr ||
2713                 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
2714             {
2715                 break;
2716             }
2717             sayNO;
2718         case SBOL:
2719             if (locinput == PL_bostr)
2720                 break;
2721             sayNO;
2722         case GPOS:
2723             if (locinput == reginfo->ganch)
2724                 break;
2725             sayNO;
2726         case EOL:
2727                 goto seol;
2728         case MEOL:
2729             if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2730                 sayNO;
2731             break;
2732         case SEOL:
2733           seol:
2734             if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2735                 sayNO;
2736             if (PL_regeol - locinput > 1)
2737                 sayNO;
2738             break;
2739         case EOS:
2740             if (PL_regeol != locinput)
2741                 sayNO;
2742             break;
2743         case SANY:
2744             if (!nextchr && locinput >= PL_regeol)
2745                 sayNO;
2746             if (do_utf8) {
2747                 locinput += PL_utf8skip[nextchr];
2748                 if (locinput > PL_regeol)
2749                     sayNO;
2750                 nextchr = UCHARAT(locinput);
2751             }
2752             else
2753                 nextchr = UCHARAT(++locinput);
2754             break;
2755         case CANY:
2756             if (!nextchr && locinput >= PL_regeol)
2757                 sayNO;
2758             nextchr = UCHARAT(++locinput);
2759             break;
2760         case REG_ANY:
2761             if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2762                 sayNO;
2763             if (do_utf8) {
2764                 locinput += PL_utf8skip[nextchr];
2765                 if (locinput > PL_regeol)
2766                     sayNO;
2767                 nextchr = UCHARAT(locinput);
2768             }
2769             else
2770                 nextchr = UCHARAT(++locinput);
2771             break;
2772
2773 #undef  ST
2774 #define ST st->u.trie
2775         case TRIEC:
2776             /* In this case the charclass data is available inline so
2777                we can fail fast without a lot of extra overhead.
2778              */
2779             if (scan->flags == EXACT || !do_utf8) {
2780                 if(!ANYOF_BITMAP_TEST(scan, *locinput)) {
2781                     DEBUG_EXECUTE_r(
2782                         PerlIO_printf(Perl_debug_log,
2783                                   "%*s  %sfailed to match trie start class...%s\n",
2784                                   REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
2785                     );
2786                     sayNO_SILENT;
2787                     /* NOTREACHED */
2788                 }
2789             }
2790             /* FALL THROUGH */
2791         case TRIE:
2792             {
2793                 /* what type of TRIE am I? (utf8 makes this contextual) */
2794                 const enum { trie_plain, trie_utf8, trie_utf8_fold }
2795                     trie_type = do_utf8 ?
2796                           (scan->flags == EXACT ? trie_utf8 : trie_utf8_fold)
2797                         : trie_plain;
2798
2799                 /* what trie are we using right now */
2800                 reg_trie_data * const trie
2801                     = (reg_trie_data*)rexi->data->data[ ARG( scan ) ];
2802                 U32 state = trie->startstate;
2803
2804                 if (trie->bitmap && trie_type != trie_utf8_fold &&
2805                     !TRIE_BITMAP_TEST(trie,*locinput)
2806                 ) {
2807                     if (trie->states[ state ].wordnum) {
2808                          DEBUG_EXECUTE_r(
2809                             PerlIO_printf(Perl_debug_log,
2810                                           "%*s  %smatched empty string...%s\n",
2811                                           REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
2812                         );
2813                         break;
2814                     } else {
2815                         DEBUG_EXECUTE_r(
2816                             PerlIO_printf(Perl_debug_log,
2817                                           "%*s  %sfailed to match trie start class...%s\n",
2818                                           REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
2819                         );
2820                         sayNO_SILENT;
2821                    }
2822                 }
2823
2824             {
2825                 U8 *uc = ( U8* )locinput;
2826
2827                 STRLEN len = 0;
2828                 STRLEN foldlen = 0;
2829                 U8 *uscan = (U8*)NULL;
2830                 STRLEN bufflen=0;
2831                 SV *sv_accept_buff = NULL;
2832                 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
2833
2834                 ST.accepted = 0; /* how many accepting states we have seen */
2835                 ST.B = next;
2836                 ST.jump = trie->jump;
2837                 ST.me = scan;
2838
2839                 /*
2840                    traverse the TRIE keeping track of all accepting states
2841                    we transition through until we get to a failing node.
2842                 */
2843
2844                 while ( state && uc <= (U8*)PL_regeol ) {
2845                     U32 base = trie->states[ state ].trans.base;
2846                     UV uvc = 0;
2847                     U16 charid;
2848                     /* We use charid to hold the wordnum as we don't use it
2849                        for charid until after we have done the wordnum logic.
2850                        We define an alias just so that the wordnum logic reads
2851                        more naturally. */
2852
2853 #define got_wordnum charid
2854                     got_wordnum = trie->states[ state ].wordnum;
2855
2856                     if ( got_wordnum ) {
2857                         if ( ! ST.accepted ) {
2858                             ENTER;
2859                             SAVETMPS;
2860                             bufflen = TRIE_INITAL_ACCEPT_BUFFLEN;
2861                             sv_accept_buff=newSV(bufflen *
2862                                             sizeof(reg_trie_accepted) - 1);
2863                             SvCUR_set(sv_accept_buff, 0);
2864                             SvPOK_on(sv_accept_buff);
2865                             sv_2mortal(sv_accept_buff);
2866                             SAVETMPS;
2867                             ST.accept_buff =
2868                                 (reg_trie_accepted*)SvPV_nolen(sv_accept_buff );
2869                         }
2870                         do {
2871                             if (ST.accepted >= bufflen) {
2872                                 bufflen *= 2;
2873                                 ST.accept_buff =(reg_trie_accepted*)
2874                                     SvGROW(sv_accept_buff,
2875                                         bufflen * sizeof(reg_trie_accepted));
2876                             }
2877                             SvCUR_set(sv_accept_buff,SvCUR(sv_accept_buff)
2878                                 + sizeof(reg_trie_accepted));
2879
2880
2881                             ST.accept_buff[ST.accepted].wordnum = got_wordnum;
2882                             ST.accept_buff[ST.accepted].endpos = uc;
2883                             ++ST.accepted;
2884                         } while (trie->nextword && (got_wordnum= trie->nextword[got_wordnum]));
2885                     }
2886 #undef got_wordnum
2887
2888                     DEBUG_TRIE_EXECUTE_r({
2889                                 DUMP_EXEC_POS( (char *)uc, scan, do_utf8 );
2890                                 PerlIO_printf( Perl_debug_log,
2891                                     "%*s  %sState: %4"UVxf" Accepted: %4"UVxf" ",
2892                                     2+depth * 2, "", PL_colors[4],
2893                                     (UV)state, (UV)ST.accepted );
2894                     });
2895
2896                     if ( base ) {
2897                         REXEC_TRIE_READ_CHAR(trie_type, trie, uc, uscan, len,
2898                             uvc, charid, foldlen, foldbuf, uniflags);
2899
2900                         if (charid &&
2901                              (base + charid > trie->uniquecharcount )
2902                              && (base + charid - 1 - trie->uniquecharcount
2903                                     < trie->lasttrans)
2904                              && trie->trans[base + charid - 1 -
2905                                     trie->uniquecharcount].check == state)
2906                         {
2907                             state = trie->trans[base + charid - 1 -
2908                                 trie->uniquecharcount ].next;
2909                         }
2910                         else {
2911                             state = 0;
2912                         }
2913                         uc += len;
2914
2915                     }
2916                     else {
2917                         state = 0;
2918                     }
2919                     DEBUG_TRIE_EXECUTE_r(
2920                         PerlIO_printf( Perl_debug_log,
2921                             "Charid:%3x CP:%4"UVxf" After State: %4"UVxf"%s\n",
2922                             charid, uvc, (UV)state, PL_colors[5] );
2923                     );
2924                 }
2925                 if (!ST.accepted )
2926                    sayNO;
2927
2928                 DEBUG_EXECUTE_r(
2929                     PerlIO_printf( Perl_debug_log,
2930                         "%*s  %sgot %"IVdf" possible matches%s\n",
2931                         REPORT_CODE_OFF + depth * 2, "",
2932                         PL_colors[4], (IV)ST.accepted, PL_colors[5] );
2933                 );
2934             }}
2935
2936             /* FALL THROUGH */
2937         case TRIE_next_fail: /* we failed - try next alterative */
2938             if (do_cutgroup) {
2939                 do_cutgroup = 0;
2940                 no_final = 0;
2941             }
2942             if ( ST.accepted == 1 ) {
2943                 /* only one choice left - just continue */
2944                 DEBUG_EXECUTE_r({
2945                     reg_trie_data * const trie
2946                         = (reg_trie_data*)rexi->data->data[ ARG(ST.me) ];
2947                     SV ** const tmp = av_fetch( trie->words,
2948                         ST.accept_buff[ 0 ].wordnum-1, 0 );
2949                     SV *sv= tmp ? sv_newmortal() : NULL;
2950
2951                     PerlIO_printf( Perl_debug_log,
2952                         "%*s  %sonly one match left: #%d <%s>%s\n",
2953                         REPORT_CODE_OFF+depth*2, "", PL_colors[4],
2954                         ST.accept_buff[ 0 ].wordnum,
2955                         tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0,
2956                                 PL_colors[0], PL_colors[1],
2957                                 (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
2958                             )
2959                         : "not compiled under -Dr",
2960                         PL_colors[5] );
2961                 });
2962                 PL_reginput = (char *)ST.accept_buff[ 0 ].endpos;
2963                 /* in this case we free tmps/leave before we call regmatch
2964                    as we wont be using accept_buff again. */
2965
2966                 locinput = PL_reginput;
2967                 nextchr = UCHARAT(locinput);
2968                 if ( !ST.jump || !ST.jump[ST.accept_buff[0].wordnum])
2969                     scan = ST.B;
2970                 else
2971                     scan = ST.me + ST.jump[ST.accept_buff[0].wordnum];
2972                 if (!has_cutgroup) {
2973                     FREETMPS;
2974                     LEAVE;
2975                 } else {
2976                     ST.accepted--;
2977                     PUSH_YES_STATE_GOTO(TRIE_next, scan);
2978                 }
2979
2980                 continue; /* execute rest of RE */
2981             }
2982
2983             if (!ST.accepted-- ) {
2984                 DEBUG_EXECUTE_r({
2985                     PerlIO_printf( Perl_debug_log,
2986                         "%*s  %sTRIE failed...%s\n",
2987                         REPORT_CODE_OFF+depth*2, "",
2988                         PL_colors[4],
2989                         PL_colors[5] );
2990                 });
2991                 FREETMPS;
2992                 LEAVE;
2993                 sayNO_SILENT;
2994             }
2995
2996             /*
2997                There are at least two accepting states left.  Presumably
2998                the number of accepting states is going to be low,
2999                typically two. So we simply scan through to find the one
3000                with lowest wordnum.  Once we find it, we swap the last
3001                state into its place and decrement the size. We then try to
3002                match the rest of the pattern at the point where the word
3003                ends. If we succeed, control just continues along the
3004                regex; if we fail we return here to try the next accepting
3005                state
3006              */
3007
3008             {
3009                 U32 best = 0;
3010                 U32 cur;
3011                 for( cur = 1 ; cur <= ST.accepted ; cur++ ) {
3012                     DEBUG_TRIE_EXECUTE_r(
3013                         PerlIO_printf( Perl_debug_log,
3014                             "%*s  %sgot %"IVdf" (%d) as best, looking at %"IVdf" (%d)%s\n",
3015                             REPORT_CODE_OFF + depth * 2, "", PL_colors[4],
3016                             (IV)best, ST.accept_buff[ best ].wordnum, (IV)cur,
3017                             ST.accept_buff[ cur ].wordnum, PL_colors[5] );
3018                     );
3019
3020                     if (ST.accept_buff[cur].wordnum <
3021                             ST.accept_buff[best].wordnum)
3022                         best = cur;
3023                 }
3024
3025                 DEBUG_EXECUTE_r({
3026                     reg_trie_data * const trie
3027                         = (reg_trie_data*)rexi->data->data[ ARG(ST.me) ];
3028                     SV ** const tmp = av_fetch( trie->words,
3029                         ST.accept_buff[ best ].wordnum - 1, 0 );
3030                     regnode *nextop=(!ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) ?
3031                                     ST.B :
3032                                     ST.me + ST.jump[ST.accept_buff[best].wordnum];
3033                     SV *sv= tmp ? sv_newmortal() : NULL;
3034
3035                     PerlIO_printf( Perl_debug_log,
3036                         "%*s  %strying alternation #%d <%s> at node #%d %s\n",
3037                         REPORT_CODE_OFF+depth*2, "", PL_colors[4],
3038                         ST.accept_buff[best].wordnum,
3039                         tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0,
3040                                 PL_colors[0], PL_colors[1],
3041                                 (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
3042                             ) : "not compiled under -Dr",
3043                             REG_NODE_NUM(nextop),
3044                         PL_colors[5] );
3045                 });
3046
3047                 if ( best<ST.accepted ) {
3048                     reg_trie_accepted tmp = ST.accept_buff[ best ];
3049                     ST.accept_buff[ best ] = ST.accept_buff[ ST.accepted ];
3050                     ST.accept_buff[ ST.accepted ] = tmp;
3051                     best = ST.accepted;
3052                 }
3053                 PL_reginput = (char *)ST.accept_buff[ best ].endpos;
3054                 if ( !ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) {
3055                     scan = ST.B;
3056                     /* NOTREACHED */
3057                 } else {
3058                     scan = ST.me + ST.jump[ST.accept_buff[best].wordnum];
3059                     /* NOTREACHED */
3060                 }
3061                 if (has_cutgroup) {
3062                     PUSH_YES_STATE_GOTO(TRIE_next, scan);
3063                     /* NOTREACHED */
3064                 } else {
3065                     PUSH_STATE_GOTO(TRIE_next, scan);
3066                     /* NOTREACHED */
3067                 }
3068                 /* NOTREACHED */
3069             }
3070             /* NOTREACHED */
3071         case TRIE_next:
3072             FREETMPS;
3073             LEAVE;
3074             sayYES;
3075 #undef  ST
3076
3077         case EXACT: {
3078             char *s = STRING(scan);
3079             ln = STR_LEN(scan);
3080             if (do_utf8 != UTF) {
3081                 /* The target and the pattern have differing utf8ness. */
3082                 char *l = locinput;
3083                 const char * const e = s + ln;
3084
3085                 if (do_utf8) {
3086                     /* The target is utf8, the pattern is not utf8. */
3087                     while (s < e) {
3088                         STRLEN ulen;
3089                         if (l >= PL_regeol)
3090                              sayNO;
3091                         if (NATIVE_TO_UNI(*(U8*)s) !=
3092                             utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
3093                                             uniflags))
3094                              sayNO;
3095                         l += ulen;
3096                         s ++;
3097                     }
3098                 }
3099                 else {
3100                     /* The target is not utf8, the pattern is utf8. */
3101                     while (s < e) {
3102                         STRLEN ulen;
3103                         if (l >= PL_regeol)
3104                             sayNO;
3105                         if (NATIVE_TO_UNI(*((U8*)l)) !=
3106                             utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
3107                                            uniflags))
3108                             sayNO;
3109                         s += ulen;
3110                         l ++;
3111                     }
3112                 }
3113                 locinput = l;
3114                 nextchr = UCHARAT(locinput);
3115                 break;
3116             }
3117             /* The target and the pattern have the same utf8ness. */
3118             /* Inline the first character, for speed. */
3119             if (UCHARAT(s) != nextchr)
3120                 sayNO;
3121             if (PL_regeol - locinput < ln)
3122                 sayNO;
3123             if (ln > 1 && memNE(s, locinput, ln))
3124                 sayNO;
3125             locinput += ln;
3126             nextchr = UCHARAT(locinput);
3127             break;
3128             }
3129         case EXACTFL:
3130             PL_reg_flags |= RF_tainted;
3131             /* FALL THROUGH */
3132         case EXACTF: {
3133             char * const s = STRING(scan);
3134             ln = STR_LEN(scan);
3135
3136             if (do_utf8 || UTF) {
3137               /* Either target or the pattern are utf8. */
3138                 const char * const l = locinput;
3139                 char *e = PL_regeol;
3140
3141                 if (ibcmp_utf8(s, 0,  ln, (bool)UTF,
3142                                l, &e, 0,  do_utf8)) {
3143                      /* One more case for the sharp s:
3144                       * pack("U0U*", 0xDF) =~ /ss/i,
3145                       * the 0xC3 0x9F are the UTF-8
3146                       * byte sequence for the U+00DF. */
3147                      if (!(do_utf8 &&
3148                            toLOWER(s[0]) == 's' &&
3149                            ln >= 2 &&
3150                            toLOWER(s[1]) == 's' &&
3151                            (U8)l[0] == 0xC3 &&
3152                            e - l >= 2 &&
3153                            (U8)l[1] == 0x9F))
3154                           sayNO;
3155                 }
3156                 locinput = e;
3157                 nextchr = UCHARAT(locinput);
3158                 break;
3159             }
3160
3161             /* Neither the target and the pattern are utf8. */
3162
3163             /* Inline the first character, for speed. */
3164             if (UCHARAT(s) != nextchr &&
3165                 UCHARAT(s) != ((OP(scan) == EXACTF)
3166                                ? PL_fold : PL_fold_locale)[nextchr])
3167                 sayNO;
3168             if (PL_regeol - locinput < ln)
3169                 sayNO;
3170             if (ln > 1 && (OP(scan) == EXACTF
3171                            ? ibcmp(s, locinput, ln)
3172                            : ibcmp_locale(s, locinput, ln)))
3173                 sayNO;
3174             locinput += ln;
3175             nextchr = UCHARAT(locinput);
3176             break;
3177             }
3178         case ANYOF:
3179             if (do_utf8) {
3180                 STRLEN inclasslen = PL_regeol - locinput;
3181
3182                 if (!reginclass(rex, scan, (U8*)locinput, &inclasslen, do_utf8))
3183                     goto anyof_fail;
3184                 if (locinput >= PL_regeol)
3185                     sayNO;
3186                 locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
3187                 nextchr = UCHARAT(locinput);
3188                 break;
3189             }
3190             else {
3191                 if (nextchr < 0)
3192                     nextchr = UCHARAT(locinput);
3193                 if (!REGINCLASS(rex, scan, (U8*)locinput))
3194                     goto anyof_fail;
3195                 if (!nextchr && locinput >= PL_regeol)
3196                     sayNO;
3197                 nextchr = UCHARAT(++locinput);
3198                 break;
3199             }
3200         anyof_fail:
3201             /* If we might have the case of the German sharp s
3202              * in a casefolding Unicode character class. */
3203
3204             if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
3205                  locinput += SHARP_S_SKIP;
3206                  nextchr = UCHARAT(locinput);
3207             }
3208             else
3209                  sayNO;
3210             break;
3211         case ALNUML:
3212             PL_reg_flags |= RF_tainted;
3213             /* FALL THROUGH */
3214         case ALNUM:
3215             if (!nextchr)
3216                 sayNO;
3217             if (do_utf8) {
3218                 LOAD_UTF8_CHARCLASS_ALNUM();
3219                 if (!(OP(scan) == ALNUM
3220                       ? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
3221                       : isALNUM_LC_utf8((U8*)locinput)))
3222                 {
3223                     sayNO;
3224                 }
3225                 locinput += PL_utf8skip[nextchr];
3226                 nextchr = UCHARAT(locinput);
3227                 break;
3228             }
3229             if (!(OP(scan) == ALNUM
3230                   ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
3231                 sayNO;
3232             nextchr = UCHARAT(++locinput);
3233             break;
3234         case NALNUML:
3235             PL_reg_flags |= RF_tainted;
3236             /* FALL THROUGH */
3237         case NALNUM:
3238             if (!nextchr && locinput >= PL_regeol)
3239                 sayNO;
3240             if (do_utf8) {
3241                 LOAD_UTF8_CHARCLASS_ALNUM();
3242                 if (OP(scan) == NALNUM
3243                     ? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
3244                     : isALNUM_LC_utf8((U8*)locinput))
3245                 {
3246                     sayNO;
3247                 }
3248                 locinput += PL_utf8skip[nextchr];
3249                 nextchr = UCHARAT(locinput);
3250                 break;
3251             }
3252             if (OP(scan) == NALNUM
3253                 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
3254                 sayNO;
3255             nextchr = UCHARAT(++locinput);
3256             break;
3257         case BOUNDL:
3258         case NBOUNDL:
3259             PL_reg_flags |= RF_tainted;
3260             /* FALL THROUGH */
3261         case BOUND:
3262         case NBOUND:
3263             /* was last char in word? */
3264             if (do_utf8) {
3265                 if (locinput == PL_bostr)
3266                     ln = '\n';
3267                 else {
3268                     const U8 * const r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
3269
3270                     ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, uniflags);
3271                 }
3272                 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
3273                     ln = isALNUM_uni(ln);
3274                     LOAD_UTF8_CHARCLASS_ALNUM();
3275                     n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
3276                 }
3277                 else {
3278                     ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
3279                     n = isALNUM_LC_utf8((U8*)locinput);
3280                 }
3281             }
3282             else {
3283                 ln = (locinput != PL_bostr) ?
3284                     UCHARAT(locinput - 1) : '\n';
3285                 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
3286                     ln = isALNUM(ln);
3287                     n = isALNUM(nextchr);
3288                 }
3289                 else {
3290                     ln = isALNUM_LC(ln);
3291                     n = isALNUM_LC(nextchr);
3292                 }
3293             }
3294             if (((!ln) == (!n)) == (OP(scan) == BOUND ||
3295                                     OP(scan) == BOUNDL))
3296                     sayNO;
3297             break;
3298         case SPACEL:
3299             PL_reg_flags |= RF_tainted;
3300             /* FALL THROUGH */
3301         case SPACE:
3302             if (!nextchr)
3303                 sayNO;
3304             if (do_utf8) {
3305                 if (UTF8_IS_CONTINUED(nextchr)) {
3306                     LOAD_UTF8_CHARCLASS_SPACE();
3307                     if (!(OP(scan) == SPACE
3308                           ? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
3309                           : isSPACE_LC_utf8((U8*)locinput)))
3310                     {
3311                         sayNO;
3312                     }
3313                     locinput += PL_utf8skip[nextchr];
3314                     nextchr = UCHARAT(locinput);
3315                     break;
3316                 }
3317                 if (!(OP(scan) == SPACE
3318                       ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3319                     sayNO;
3320                 nextchr = UCHARAT(++locinput);
3321             }
3322             else {
3323                 if (!(OP(scan) == SPACE
3324                       ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3325                     sayNO;
3326                 nextchr = UCHARAT(++locinput);
3327             }
3328             break;
3329         case NSPACEL:
3330             PL_reg_flags |= RF_tainted;
3331             /* FALL THROUGH */
3332         case NSPACE:
3333             if (!nextchr && locinput >= PL_regeol)
3334                 sayNO;
3335             if (do_utf8) {
3336                 LOAD_UTF8_CHARCLASS_SPACE();
3337                 if (OP(scan) == NSPACE
3338                     ? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
3339                     : isSPACE_LC_utf8((U8*)locinput))
3340                 {
3341                     sayNO;
3342                 }
3343                 locinput += PL_utf8skip[nextchr];
3344                 nextchr = UCHARAT(locinput);
3345                 break;
3346             }
3347             if (OP(scan) == NSPACE
3348                 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
3349                 sayNO;
3350             nextchr = UCHARAT(++locinput);
3351             break;
3352         case DIGITL:
3353             PL_reg_flags |= RF_tainted;
3354             /* FALL THROUGH */
3355         case DIGIT:
3356             if (!nextchr)
3357                 sayNO;
3358             if (do_utf8) {
3359                 LOAD_UTF8_CHARCLASS_DIGIT();
3360                 if (!(OP(scan) == DIGIT
3361                       ? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
3362                       : isDIGIT_LC_utf8((U8*)locinput)))
3363                 {
3364                     sayNO;
3365                 }
3366                 locinput += PL_utf8skip[nextchr];
3367                 nextchr = UCHARAT(locinput);
3368                 break;
3369             }
3370             if (!(OP(scan) == DIGIT
3371                   ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
3372                 sayNO;
3373             nextchr = UCHARAT(++locinput);
3374             break;
3375         case NDIGITL:
3376             PL_reg_flags |= RF_tainted;
3377             /* FALL THROUGH */
3378         case NDIGIT:
3379             if (!nextchr && locinput >= PL_regeol)
3380                 sayNO;
3381             if (do_utf8) {
3382                 LOAD_UTF8_CHARCLASS_DIGIT();
3383                 if (OP(scan) == NDIGIT
3384                     ? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
3385                     : isDIGIT_LC_utf8((U8*)locinput))
3386                 {
3387                     sayNO;
3388                 }
3389                 locinput += PL_utf8skip[nextchr];
3390                 nextchr = UCHARAT(locinput);
3391                 break;
3392             }
3393             if (OP(scan) == NDIGIT
3394                 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
3395                 sayNO;
3396             nextchr = UCHARAT(++locinput);
3397             break;
3398         case CLUMP:
3399             if (locinput >= PL_regeol)
3400                 sayNO;
3401             if  (do_utf8) {
3402                 LOAD_UTF8_CHARCLASS_MARK();
3403                 if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3404                     sayNO;
3405                 locinput += PL_utf8skip[nextchr];
3406                 while (locinput < PL_regeol &&
3407                        swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3408                     locinput += UTF8SKIP(locinput);
3409                 if (locinput > PL_regeol)
3410                     sayNO;
3411             }
3412             else
3413                locinput++;
3414             nextchr = UCHARAT(locinput);
3415             break;
3416
3417         case NREFFL:
3418         {
3419             char *s;
3420             char type;
3421             PL_reg_flags |= RF_tainted;
3422             /* FALL THROUGH */
3423         case NREF:
3424         case NREFF:
3425             type = OP(scan);
3426             n = reg_check_named_buff_matched(rex,scan);
3427
3428             if ( n ) {
3429                 type = REF + ( type - NREF );
3430                 goto do_ref;
3431             } else {
3432                 sayNO;
3433             }
3434             /* unreached */
3435         case REFFL:
3436             PL_reg_flags |= RF_tainted;
3437             /* FALL THROUGH */
3438         case REF:
3439         case REFF:
3440             n = ARG(scan);  /* which paren pair */
3441             type = OP(scan);
3442           do_ref:
3443             ln = PL_regstartp[n];
3444             PL_reg_leftiter = PL_reg_maxiter;           /* Void cache */
3445             if (*PL_reglastparen < n || ln == -1)
3446                 sayNO;                  /* Do not match unless seen CLOSEn. */
3447             if (ln == PL_regendp[n])
3448                 break;
3449
3450             s = PL_bostr + ln;
3451             if (do_utf8 && type != REF) {       /* REF can do byte comparison */
3452                 char *l = locinput;
3453                 const char *e = PL_bostr + PL_regendp[n];
3454                 /*
3455                  * Note that we can't do the "other character" lookup trick as
3456                  * in the 8-bit case (no pun intended) because in Unicode we
3457                  * have to map both upper and title case to lower case.
3458                  */
3459                 if (type == REFF) {
3460                     while (s < e) {
3461                         STRLEN ulen1, ulen2;
3462                         U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3463                         U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
3464
3465                         if (l >= PL_regeol)
3466                             sayNO;
3467                         toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
3468                         toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
3469                         if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
3470                             sayNO;
3471                         s += ulen1;
3472                         l += ulen2;
3473                     }
3474                 }
3475                 locinput = l;
3476                 nextchr = UCHARAT(locinput);
3477                 break;
3478             }
3479
3480             /* Inline the first character, for speed. */
3481             if (UCHARAT(s) != nextchr &&
3482                 (type == REF ||
3483                  (UCHARAT(s) != (type == REFF
3484                                   ? PL_fold : PL_fold_locale)[nextchr])))
3485                 sayNO;
3486             ln = PL_regendp[n] - ln;
3487             if (locinput + ln > PL_regeol)
3488                 sayNO;
3489             if (ln > 1 && (type == REF
3490                            ? memNE(s, locinput, ln)
3491                            : (type == REFF
3492                               ? ibcmp(s, locinput, ln)
3493                               : ibcmp_locale(s, locinput, ln))))
3494                 sayNO;
3495             locinput += ln;
3496             nextchr = UCHARAT(locinput);
3497             break;
3498         }
3499         case NOTHING:
3500         case TAIL:
3501             break;
3502         case BACK:
3503             break;
3504
3505 #undef  ST
3506 #define ST st->u.eval
3507         {
3508             SV *ret;
3509             regexp *re;
3510             regexp_internal *rei;
3511             regnode *startpoint;
3512
3513         case GOSTART:
3514         case GOSUB: /*    /(...(?1))/      */
3515             if (cur_eval && cur_eval->locinput==locinput) {
3516                 if (cur_eval->u.eval.close_paren == (U32)ARG(scan))
3517                     Perl_croak(aTHX_ "Infinite recursion in regex");
3518                 if ( ++nochange_depth > MAX_RECURSE_EVAL_NOCHANGE_DEPTH )
3519                     Perl_croak(aTHX_
3520                         "Pattern subroutine nesting without pos change"
3521                         " exceeded limit in regex");
3522             } else {
3523                 nochange_depth = 0;
3524             }
3525             re = rex;
3526             rei = rexi;
3527             (void)ReREFCNT_inc(rex);
3528             if (OP(scan)==GOSUB) {
3529                 startpoint = scan + ARG2L(scan);
3530                 ST.close_paren = ARG(scan);
3531             } else {
3532                 startpoint = rei->program+1;
3533                 ST.close_paren = 0;
3534             }
3535             goto eval_recurse_doit;
3536             /* NOTREACHED */
3537         case EVAL:  /*   /(?{A})B/   /(??{A})B/  and /(?(?{A})X|Y)B/   */
3538             if (cur_eval && cur_eval->locinput==locinput) {
3539                 if ( ++nochange_depth > MAX_RECURSE_EVAL_NOCHANGE_DEPTH )
3540                     Perl_croak(aTHX_ "EVAL without pos change exceeded limit in regex");
3541             } else {
3542                 nochange_depth = 0;
3543             }
3544             {
3545                 /* execute the code in the {...} */
3546                 dSP;
3547                 SV ** const before = SP;
3548                 OP_4tree * const oop = PL_op;
3549                 COP * const ocurcop = PL_curcop;
3550                 PAD *old_comppad;
3551
3552                 n = ARG(scan);
3553                 PL_op = (OP_4tree*)rexi->data->data[n];
3554                 DEBUG_STATE_r( PerlIO_printf(Perl_debug_log,
3555                     "  re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
3556                 PAD_SAVE_LOCAL(old_comppad, (PAD*)rexi->data->data[n + 2]);
3557                 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
3558
3559                 CALLRUNOPS(aTHX);                       /* Scalar context. */
3560                 SPAGAIN;
3561                 if (SP == before)
3562                     ret = &PL_sv_undef;   /* protect against empty (?{}) blocks. */
3563                 else {
3564                     ret = POPs;
3565                     PUTBACK;
3566                 }
3567
3568                 PL_op = oop;
3569                 PAD_RESTORE_LOCAL(old_comppad);
3570                 PL_curcop = ocurcop;
3571                 if (!logical) {
3572                     /* /(?{...})/ */
3573                     sv_setsv(save_scalar(PL_replgv), ret);
3574                     break;
3575                 }
3576             }
3577             if (logical == 2) { /* Postponed subexpression: /(??{...})/ */
3578                 logical = 0;
3579                 {
3580                     /* extract RE object from returned value; compiling if
3581                      * necessary */
3582
3583                     MAGIC *mg = NULL;
3584                     const SV *sv;
3585                     if(SvROK(ret) && SvSMAGICAL(sv = SvRV(ret)))
3586                         mg = mg_find(sv, PERL_MAGIC_qr);
3587                     else if (SvSMAGICAL(ret)) {
3588                         if (SvGMAGICAL(ret))
3589                             sv_unmagic(ret, PERL_MAGIC_qr);
3590                         else
3591                             mg = mg_find(ret, PERL_MAGIC_qr);
3592                     }
3593
3594                     if (mg) {
3595                         re = (regexp *)mg->mg_obj;
3596                         (void)ReREFCNT_inc(re);
3597                     }
3598                     else {
3599                         STRLEN len;
3600                         const char * const t = SvPV_const(ret, len);
3601                         PMOP pm;
3602                         const I32 osize = PL_regsize;
3603
3604                         Zero(&pm, 1, PMOP);
3605                         if (DO_UTF8(ret)) pm.op_pmdynflags |= PMdf_DYN_UTF8;
3606                         re = CALLREGCOMP((char*)t, (char*)t + len, &pm);
3607                         if (!(SvFLAGS(ret)
3608                               & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
3609                                 | SVs_GMG)))
3610                             sv_magic(ret,(SV*)ReREFCNT_inc(re),
3611                                         PERL_MAGIC_qr,0,0);
3612                         PL_regsize = osize;
3613                     }
3614                 }
3615                 rei = RXi_GET(re);
3616                 DEBUG_EXECUTE_r(
3617                     debug_start_match(re, do_utf8, locinput, PL_regeol,
3618                         "Matching embedded");
3619                 );
3620                 startpoint = rei->program + 1;
3621                 ST.close_paren = 0; /* only used for GOSUB */
3622                 /* borrowed from regtry */
3623                 if (PL_reg_start_tmpl <= re->nparens) {
3624                     PL_reg_start_tmpl = re->nparens*3/2 + 3;
3625                     if(PL_reg_start_tmp)
3626                         Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
3627                     else
3628                         Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
3629                 }
3630
3631         eval_recurse_doit: /* Share code with GOSUB below this line */
3632                 /* run the pattern returned from (??{...}) */
3633                 ST.cp = regcppush(0);   /* Save *all* the positions. */
3634                 REGCP_SET(ST.lastcp);
3635
3636                 PL_regstartp = re->startp; /* essentially NOOP on GOSUB */
3637                 PL_regendp = re->endp;     /* essentially NOOP on GOSUB */
3638
3639                 *PL_reglastparen = 0;
3640                 *PL_reglastcloseparen = 0;
3641                 PL_reginput = locinput;
3642                 PL_regsize = 0;
3643
3644                 /* XXXX This is too dramatic a measure... */
3645                 PL_reg_maxiter = 0;
3646
3647                 ST.toggle_reg_flags = PL_reg_flags;
3648                 if (re->extflags & RXf_UTF8)
3649                     PL_reg_flags |= RF_utf8;
3650                 else
3651                     PL_reg_flags &= ~RF_utf8;
3652                 ST.toggle_reg_flags ^= PL_reg_flags; /* diff of old and new */
3653
3654                 ST.prev_rex = rex;
3655                 ST.prev_curlyx = cur_curlyx;
3656                 rex = re;
3657                 rexi = rei;
3658                 cur_curlyx = NULL;
3659                 ST.B = next;
3660                 ST.prev_eval = cur_eval;
3661                 cur_eval = st;
3662                 /* now continue from first node in postoned RE */
3663                 PUSH_YES_STATE_GOTO(EVAL_AB, startpoint);
3664                 /* NOTREACHED */
3665             }
3666             /* logical is 1,   /(?(?{...})X|Y)/ */
3667             sw = (bool)SvTRUE(ret);
3668             logical = 0;
3669             break;
3670         }
3671
3672         case EVAL_AB: /* cleanup after a successful (??{A})B */
3673             /* note: this is called twice; first after popping B, then A */
3674             PL_reg_flags ^= ST.toggle_reg_flags;
3675             ReREFCNT_dec(rex);
3676             rex = ST.prev_rex;
3677             rexi = RXi_GET(rex);
3678             regcpblow(ST.cp);
3679             cur_eval = ST.prev_eval;
3680             cur_curlyx = ST.prev_curlyx;
3681             /* XXXX This is too dramatic a measure... */
3682             PL_reg_maxiter = 0;
3683             sayYES;
3684
3685
3686         case EVAL_AB_fail: /* unsuccessfully ran A or B in (??{A})B */
3687             /* note: this is called twice; first after popping B, then A */
3688             PL_reg_flags ^= ST.toggle_reg_flags;
3689             ReREFCNT_dec(rex);
3690             rex = ST.prev_rex;
3691             rexi = RXi_GET(rex);
3692             PL_reginput = locinput;
3693             REGCP_UNWIND(ST.lastcp);
3694             regcppop(rex);
3695             cur_eval = ST.prev_eval;
3696             cur_curlyx = ST.prev_curlyx;
3697             /* XXXX This is too dramatic a measure... */
3698             PL_reg_maxiter = 0;
3699             sayNO_SILENT;
3700 #undef ST
3701
3702         case OPEN:
3703             n = ARG(scan);  /* which paren pair */
3704             PL_reg_start_tmp[n] = locinput;
3705             if (n > PL_regsize)
3706                 PL_regsize = n;
3707             lastopen = n;
3708             break;
3709         case CLOSE:
3710             n = ARG(scan);  /* which paren pair */
3711             PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
3712             PL_regendp[n] = locinput - PL_bostr;
3713             /*if (n > PL_regsize)
3714                 PL_regsize = n;*/
3715             if (n > *PL_reglastparen)
3716                 *PL_reglastparen = n;
3717             *PL_reglastcloseparen = n;
3718             if (cur_eval && cur_eval->u.eval.close_paren == n) {
3719                 goto fake_end;
3720             }
3721             break;
3722         case ACCEPT:
3723             if (ARG(scan)){
3724                 regnode *cursor;
3725                 for (cursor=scan;
3726                      cursor && OP(cursor)!=END;
3727                      cursor=regnext(cursor))
3728                 {
3729                     if ( OP(cursor)==CLOSE ){
3730                         n = ARG(cursor);
3731                         if ( n <= lastopen ) {
3732                             PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
3733                             PL_regendp[n] = locinput - PL_bostr;
3734                             /*if (n > PL_regsize)
3735                             PL_regsize = n;*/
3736                             if (n > *PL_reglastparen)
3737                                 *PL_reglastparen = n;
3738                             *PL_reglastcloseparen = n;
3739                             if ( n == ARG(scan) || (cur_eval &&
3740                                 cur_eval->u.eval.close_paren == n))
3741                                 break;
3742                         }
3743                     }
3744                 }
3745             }
3746             goto fake_end;
3747             /*NOTREACHED*/
3748         case GROUPP:
3749             n = ARG(scan);  /* which paren pair */
3750             sw = (bool)(*PL_reglastparen >= n && PL_regendp[n] != -1);
3751             break;
3752         case NGROUPP:
3753             /* reg_check_named_buff_matched returns 0 for no match */
3754             sw = (bool)(0 < reg_check_named_buff_matched(rex,scan));
3755             break;
3756         case INSUBP:
3757             n = ARG(scan);
3758             sw = (cur_eval && (!n || cur_eval->u.eval.close_paren == n));
3759             break;
3760         case DEFINEP:
3761             sw = 0;
3762             break;
3763         case IFTHEN:
3764             PL_reg_leftiter = PL_reg_maxiter;           /* Void cache */
3765             if (sw)
3766                 next = NEXTOPER(NEXTOPER(scan));
3767             else {
3768                 next = scan + ARG(scan);
3769                 if (OP(next) == IFTHEN) /* Fake one. */
3770                     next = NEXTOPER(NEXTOPER(next));
3771             }
3772             break;
3773         case LOGICAL:
3774             logical = scan->flags;
3775             break;
3776
3777 /*******************************************************************
3778
3779 The CURLYX/WHILEM pair of ops handle the most generic case of the /A*B/
3780 pattern, where A and B are subpatterns. (For simple A, CURLYM or
3781 STAR/PLUS/CURLY/CURLYN are used instead.)
3782
3783 A*B is compiled as <CURLYX><A><WHILEM><B>
3784
3785 On entry to the subpattern, CURLYX is called. This pushes a CURLYX
3786 state, which contains the current count, initialised to -1. It also sets
3787 cur_curlyx to point to this state, with any previous value saved in the
3788 state block.
3789
3790 CURLYX then jumps straight to the WHILEM op, rather than executing A,
3791 since the pattern may possibly match zero times (i.e. it's a while {} loop
3792 rather than a do {} while loop).
3793
3794 Each entry to WHILEM represents a successful match of A. The count in the
3795 CURLYX block is incremented, another WHILEM state is pushed, and execution
3796 passes to A or B depending on greediness and the current count.
3797
3798 For example, if matching against the string a1a2a3b (where the aN are
3799 substrings that match /A/), then the match progresses as follows: (the
3800 pushed states are interspersed with the bits of strings matched so far):
3801
3802     <CURLYX cnt=-1>
3803     <CURLYX cnt=0><WHILEM>
3804     <CURLYX cnt=1><WHILEM> a1 <WHILEM>
3805     <CURLYX cnt=2><WHILEM> a1 <WHILEM> a2 <WHILEM>
3806     <CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM>
3807     <CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM> b
3808
3809 (Contrast this with something like CURLYM, which maintains only a single
3810 backtrack state:
3811
3812     <CURLYM cnt=0> a1
3813     a1 <CURLYM cnt=1> a2
3814     a1 a2 <CURLYM cnt=2> a3
3815     a1 a2 a3 <CURLYM cnt=3> b
3816 )
3817
3818 Each WHILEM state block marks a point to backtrack to upon partial failure
3819 of A or B, and also contains some minor state data related to that
3820 iteration.  The CURLYX block, pointed to by cur_curlyx, contains the
3821 overall state, such as the count, and pointers to the A and B ops.
3822
3823 This is complicated slightly by nested CURLYX/WHILEM's. Since cur_curlyx
3824 must always point to the *current* CURLYX block, the rules are:
3825
3826 When executing CURLYX, save the old cur_curlyx in the CURLYX state block,
3827 and set cur_curlyx to point the new block.
3828
3829 When popping the CURLYX block after a successful or unsuccessful match,
3830 restore the previous cur_curlyx.
3831
3832 When WHILEM is about to execute B, save the current cur_curlyx, and set it
3833 to the outer one saved in the CURLYX block.
3834
3835 When popping the WHILEM block after a successful or unsuccessful B match,
3836 restore the previous cur_curlyx.
3837
3838 Here's an example for the pattern (AI* BI)*BO
3839 I and O refer to inner and outer, C and W refer to CURLYX and WHILEM:
3840
3841 cur_
3842 curlyx backtrack stack
3843 ------ ---------------
3844 NULL
3845 CO     <CO prev=NULL> <WO>
3846 CI     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
3847 CO     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
3848 NULL   <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi <WO prev=CO> bo
3849
3850 At this point the pattern succeeds, and we work back down the stack to
3851 clean up, restoring as we go:
3852
3853 CO     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
3854 CI     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
3855 CO     <CO prev=NULL> <WO>
3856 NULL
3857
3858 *******************************************************************/
3859
3860 #define ST st->u.curlyx
3861
3862         case CURLYX:    /* start of /A*B/  (for complex A) */
3863         {
3864             /* No need to save/restore up to this paren */
3865             I32 parenfloor = scan->flags;
3866
3867             assert(next); /* keep Coverity happy */
3868             if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
3869                 next += ARG(next);
3870
3871             /* XXXX Probably it is better to teach regpush to support
3872                parenfloor > PL_regsize... */
3873             if (parenfloor > (I32)*PL_reglastparen)
3874                 parenfloor = *PL_reglastparen; /* Pessimization... */
3875
3876             ST.prev_curlyx= cur_curlyx;
3877             cur_curlyx = st;
3878             ST.cp = PL_savestack_ix;
3879
3880             /* these fields contain the state of the current curly.
3881              * they are accessed by subsequent WHILEMs */
3882             ST.parenfloor = parenfloor;
3883             ST.min = ARG1(scan);
3884             ST.max = ARG2(scan);
3885             ST.A = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
3886             ST.B = next;
3887             ST.minmod = minmod;
3888             minmod = 0;
3889             ST.count = -1;      /* this will be updated by WHILEM */
3890             ST.lastloc = NULL;  /* this will be updated by WHILEM */
3891
3892             PL_reginput = locinput;
3893             PUSH_YES_STATE_GOTO(CURLYX_end, PREVOPER(next));
3894             /* NOTREACHED */
3895         }
3896
3897         case CURLYX_end: /* just finished matching all of A*B */
3898             if (PL_reg_eval_set){
3899                 SV *pres= GvSV(PL_replgv);
3900                 SvREFCNT_inc(pres);
3901                 regcpblow(ST.cp);
3902                 sv_setsv(GvSV(PL_replgv), pres);
3903                 SvREFCNT_dec(pres);
3904             } else {
3905                 regcpblow(ST.cp);
3906             }
3907             cur_curlyx = ST.prev_curlyx;
3908             sayYES;
3909             /* NOTREACHED */
3910
3911         case CURLYX_end_fail: /* just failed to match all of A*B */
3912             regcpblow(ST.cp);
3913             cur_curlyx = ST.prev_curlyx;
3914             sayNO;
3915             /* NOTREACHED */
3916
3917
3918 #undef ST
3919 #define ST st->u.whilem
3920
3921         case WHILEM:     /* just matched an A in /A*B/  (for complex A) */
3922         {
3923             /* see the discussion above about CURLYX/WHILEM */
3924             I32 n;
3925             assert(cur_curlyx); /* keep Coverity happy */
3926             n = ++cur_curlyx->u.curlyx.count; /* how many A's matched */
3927             ST.save_lastloc = cur_curlyx->u.curlyx.lastloc;
3928             ST.cache_offset = 0;
3929             ST.cache_mask = 0;
3930
3931             PL_reginput = locinput;
3932
3933             DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
3934                   "%*s  whilem: matched %ld out of %ld..%ld\n",
3935                   REPORT_CODE_OFF+depth*2, "", (long)n,
3936                   (long)cur_curlyx->u.curlyx.min,
3937                   (long)cur_curlyx->u.curlyx.max)
3938             );
3939
3940             /* First just match a string of min A's. */
3941
3942             if (n < cur_curlyx->u.curlyx.min) {
3943                 cur_curlyx->u.curlyx.lastloc = locinput;
3944                 PUSH_STATE_GOTO(WHILEM_A_pre, cur_curlyx->u.curlyx.A);
3945                 /* NOTREACHED */
3946             }
3947
3948             /* If degenerate A matches "", assume A done. */
3949
3950             if (locinput == cur_curlyx->u.curlyx.lastloc) {
3951                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
3952                    "%*s  whilem: empty match detected, trying continuation...\n",
3953                    REPORT_CODE_OFF+depth*2, "")
3954                 );
3955                 goto do_whilem_B_max;
3956             }
3957
3958             /* super-linear cache processing */
3959
3960             if (scan->flags) {
3961
3962                 if (!PL_reg_maxiter) {
3963                     /* start the countdown: Postpone detection until we
3964                      * know the match is not *that* much linear. */
3965                     PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
3966                     /* possible overflow for long strings and many CURLYX's */
3967                     if (PL_reg_maxiter < 0)
3968                         PL_reg_maxiter = I32_MAX;
3969                     PL_reg_leftiter = PL_reg_maxiter;
3970                 }
3971
3972                 if (PL_reg_leftiter-- == 0) {
3973                     /* initialise cache */
3974                     const I32 size = (PL_reg_maxiter + 7)/8;
3975                     if (PL_reg_poscache) {
3976                         if ((I32)PL_reg_poscache_size < size) {
3977                             Renew(PL_reg_poscache, size, char);
3978                             PL_reg_poscache_size = size;
3979                         }
3980                         Zero(PL_reg_poscache, size, char);
3981                     }
3982                     else {
3983                         PL_reg_poscache_size = size;
3984                         Newxz(PL_reg_poscache, size, char);
3985                     }
3986                     DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
3987       "%swhilem: Detected a super-linear match, switching on caching%s...\n",
3988                               PL_colors[4], PL_colors[5])
3989                     );
3990                 }
3991
3992                 if (PL_reg_leftiter < 0) {
3993                     /* have we already failed at this position? */
3994                     I32 offset, mask;
3995                     offset  = (scan->flags & 0xf) - 1
3996                                 + (locinput - PL_bostr)  * (scan->flags>>4);
3997                     mask    = 1 << (offset % 8);
3998                     offset /= 8;
3999                     if (PL_reg_poscache[offset] & mask) {
4000                         DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4001                             "%*s  whilem: (cache) already tried at this position...\n",
4002                             REPORT_CODE_OFF+depth*2, "")
4003                         );
4004                         sayNO; /* cache records failure */
4005                     }
4006                     ST.cache_offset = offset;
4007                     ST.cache_mask   = mask;
4008                 }
4009             }
4010
4011             /* Prefer B over A for minimal matching. */
4012
4013             if (cur_curlyx->u.curlyx.minmod) {
4014                 ST.save_curlyx = cur_curlyx;
4015                 cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
4016                 ST.cp = regcppush(ST.save_curlyx->u.curlyx.parenfloor);
4017                 REGCP_SET(ST.lastcp);
4018                 PUSH_YES_STATE_GOTO(WHILEM_B_min, ST.save_curlyx->u.curlyx.B);
4019                 /* NOTREACHED */
4020             }
4021
4022             /* Prefer A over B for maximal matching. */
4023
4024             if (n < cur_curlyx->u.curlyx.max) { /* More greed allowed? */
4025                 ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
4026                 cur_curlyx->u.curlyx.lastloc = locinput;
4027                 REGCP_SET(ST.lastcp);
4028                 PUSH_STATE_GOTO(WHILEM_A_max, cur_curlyx->u.curlyx.A);
4029                 /* NOTREACHED */
4030             }
4031             goto do_whilem_B_max;
4032         }
4033         /* NOTREACHED */
4034
4035         case WHILEM_B_min: /* just matched B in a minimal match */
4036         case WHILEM_B_max: /* just matched B in a maximal match */
4037             cur_curlyx = ST.save_curlyx;
4038             sayYES;
4039             /* NOTREACHED */
4040
4041         case WHILEM_B_max_fail: /* just failed to match B in a maximal match */
4042             cur_curlyx = ST.save_curlyx;
4043             cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
4044             cur_curlyx->u.curlyx.count--;
4045             CACHEsayNO;
4046             /* NOTREACHED */
4047
4048         case WHILEM_A_min_fail: /* just failed to match A in a minimal match */
4049             REGCP_UNWIND(ST.lastcp);
4050             regcppop(rex);
4051             /* FALL THROUGH */
4052         case WHILEM_A_pre_fail: /* just failed to match even minimal A */
4053             cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
4054             cur_curlyx->u.curlyx.count--;
4055             CACHEsayNO;
4056             /* NOTREACHED */
4057
4058         case WHILEM_A_max_fail: /* just failed to match A in a maximal match */
4059             REGCP_UNWIND(ST.lastcp);
4060             regcppop(rex);      /* Restore some previous $<digit>s? */
4061             PL_reginput = locinput;
4062             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4063                 "%*s  whilem: failed, trying continuation...\n",
4064                 REPORT_CODE_OFF+depth*2, "")
4065             );
4066           do_whilem_B_max:
4067             if (cur_curlyx->u.curlyx.count >= REG_INFTY
4068                 && ckWARN(WARN_REGEXP)
4069                 && !(PL_reg_flags & RF_warned))
4070             {
4071                 PL_reg_flags |= RF_warned;
4072                 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
4073                      "Complex regular subexpression recursion",
4074                      REG_INFTY - 1);
4075             }
4076
4077             /* now try B */
4078             ST.save_curlyx = cur_curlyx;
4079             cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
4080             PUSH_YES_STATE_GOTO(WHILEM_B_max, ST.save_curlyx->u.curlyx.B);
4081             /* NOTREACHED */
4082
4083         case WHILEM_B_min_fail: /* just failed to match B in a minimal match */
4084             cur_curlyx = ST.save_curlyx;
4085             REGCP_UNWIND(ST.lastcp);
4086             regcppop(rex);
4087
4088             if (cur_curlyx->u.curlyx.count >= cur_curlyx->u.curlyx.max) {
4089                 /* Maximum greed exceeded */
4090                 if (cur_curlyx->u.curlyx.count >= REG_INFTY
4091                     && ckWARN(WARN_REGEXP)
4092                     && !(PL_reg_flags & RF_warned))
4093                 {
4094                     PL_reg_flags |= RF_warned;
4095                     Perl_warner(aTHX_ packWARN(WARN_REGEXP),
4096                         "%s limit (%d) exceeded",
4097                         "Complex regular subexpression recursion",
4098                         REG_INFTY - 1);
4099                 }
4100                 cur_curlyx->u.curlyx.count--;
4101                 CACHEsayNO;
4102             }
4103
4104             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4105                 "%*s  trying longer...\n", REPORT_CODE_OFF+depth*2, "")
4106             );
4107             /* Try grabbing another A and see if it helps. */
4108             PL_reginput = locinput;
4109             cur_curlyx->u.curlyx.lastloc = locinput;
4110             ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
4111             REGCP_SET(ST.lastcp);
4112             PUSH_STATE_GOTO(WHILEM_A_min, ST.save_curlyx->u.curlyx.A);
4113             /* NOTREACHED */
4114
4115 #undef  ST
4116 #define ST st->u.branch
4117
4118         case BRANCHJ:       /*  /(...|A|...)/ with long next pointer */
4119             next = scan + ARG(scan);
4120             if (next == scan)
4121                 next = NULL;
4122             scan = NEXTOPER(scan);
4123             /* FALL THROUGH */
4124
4125         case BRANCH:        /*  /(...|A|...)/ */
4126             scan = NEXTOPER(scan); /* scan now points to inner node */
4127             if ((!next || (OP(next) != BRANCH && OP(next) != BRANCHJ))
4128                 && !has_cutgroup)
4129             {
4130                 /* last branch; skip state push and jump direct to node */
4131                 continue;
4132             }
4133             ST.lastparen = *PL_reglastparen;
4134             ST.next_branch = next;
4135             REGCP_SET(ST.cp);
4136             PL_reginput = locinput;
4137
4138             /* Now go into the branch */
4139             if (has_cutgroup) {
4140                 PUSH_YES_STATE_GOTO(BRANCH_next, scan);
4141             } else {
4142                 PUSH_STATE_GOTO(BRANCH_next, scan);
4143             }
4144             /* NOTREACHED */
4145         case CUTGROUP:
4146             PL_reginput = locinput;
4147             sv_yes_mark = st->u.mark.mark_name = scan->flags ? NULL :
4148                 (SV*)rexi->data->data[ ARG( scan ) ];
4149             PUSH_STATE_GOTO(CUTGROUP_next,next);
4150             /* NOTREACHED */
4151         case CUTGROUP_next_fail:
4152             do_cutgroup = 1;
4153             no_final = 1;
4154             if (st->u.mark.mark_name)
4155                 sv_commit = st->u.mark.mark_name;
4156             sayNO;
4157             /* NOTREACHED */
4158         case BRANCH_next:
4159             sayYES;
4160             /* NOTREACHED */
4161         case BRANCH_next_fail: /* that branch failed; try the next, if any */
4162             if (do_cutgroup) {
4163                 do_cutgroup = 0;
4164                 no_final = 0;
4165             }
4166             REGCP_UNWIND(ST.cp);
4167             for (n = *PL_reglastparen; n > ST.lastparen; n--)
4168                 PL_regendp[n] = -1;
4169             *PL_reglastparen = n;
4170             /*dmq: *PL_reglastcloseparen = n; */
4171             scan = ST.next_branch;
4172             /* no more branches? */
4173             if (!scan || (OP(scan) != BRANCH && OP(scan) != BRANCHJ)) {
4174                 DEBUG_EXECUTE_r({
4175                     PerlIO_printf( Perl_debug_log,
4176                         "%*s  %sBRANCH failed...%s\n",
4177                         REPORT_CODE_OFF+depth*2, "",
4178                         PL_colors[4],
4179                         PL_colors[5] );
4180                 });
4181                 sayNO_SILENT;
4182             }
4183             continue; /* execute next BRANCH[J] op */
4184             /* NOTREACHED */
4185
4186         case MINMOD:
4187             minmod = 1;
4188             break;
4189
4190 #undef  ST
4191 #define ST st->u.curlym
4192
4193         case CURLYM:    /* /A{m,n}B/ where A is fixed-length */
4194
4195             /* This is an optimisation of CURLYX that enables us to push
4196              * only a single backtracking state, no matter now many matches
4197              * there are in {m,n}. It relies on the pattern being constant
4198              * length, with no parens to influence future backrefs
4199              */
4200
4201             ST.me = scan;
4202             scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
4203
4204             /* if paren positive, emulate an OPEN/CLOSE around A */
4205             if (ST.me->flags) {
4206                 U32 paren = ST.me->flags;
4207                 if (paren > PL_regsize)
4208                     PL_regsize = paren;
4209                 if (paren > *PL_reglastparen)
4210                     *PL_reglastparen = paren;
4211                 scan += NEXT_OFF(scan); /* Skip former OPEN. */
4212             }
4213             ST.A = scan;
4214             ST.B = next;
4215             ST.alen = 0;
4216             ST.count = 0;
4217             ST.minmod = minmod;
4218             minmod = 0;
4219             ST.c1 = CHRTEST_UNINIT;
4220             REGCP_SET(ST.cp);
4221
4222             if (!(ST.minmod ? ARG1(ST.me) : ARG2(ST.me))) /* min/max */
4223                 goto curlym_do_B;
4224
4225           curlym_do_A: /* execute the A in /A{m,n}B/  */
4226             PL_reginput = locinput;
4227             PUSH_YES_STATE_GOTO(CURLYM_A, ST.A); /* match A */
4228             /* NOTREACHED */
4229
4230         case CURLYM_A: /* we've just matched an A */
4231             locinput = st->locinput;
4232             nextchr = UCHARAT(locinput);
4233
4234             ST.count++;
4235             /* after first match, determine A's length: u.curlym.alen */
4236             if (ST.count == 1) {
4237                 if (PL_reg_match_utf8) {
4238                     char *s = locinput;
4239                     while (s < PL_reginput) {
4240                         ST.alen++;
4241                         s += UTF8SKIP(s);
4242                     }
4243                 }
4244                 else {
4245                     ST.alen = PL_reginput - locinput;
4246                 }
4247                 if (ST.alen == 0)
4248                     ST.count = ST.minmod ? ARG1(ST.me) : ARG2(ST.me);
4249             }
4250             DEBUG_EXECUTE_r(
4251                 PerlIO_printf(Perl_debug_log,
4252                           "%*s  CURLYM now matched %"IVdf" times, len=%"IVdf"...\n",
4253                           (int)(REPORT_CODE_OFF+(depth*2)), "",
4254                           (IV) ST.count, (IV)ST.alen)
4255             );
4256
4257             locinput = PL_reginput;
4258
4259             if (cur_eval && cur_eval->u.eval.close_paren &&
4260                 cur_eval->u.eval.close_paren == (U32)ST.me->flags)
4261                 goto fake_end;
4262
4263             if ( ST.count < (ST.minmod ? ARG1(ST.me) : ARG2(ST.me)) )
4264                 goto curlym_do_A; /* try to match another A */
4265             goto curlym_do_B; /* try to match B */
4266
4267         case CURLYM_A_fail: /* just failed to match an A */
4268             REGCP_UNWIND(ST.cp);
4269
4270             if (ST.minmod || ST.count < ARG1(ST.me) /* min*/
4271                 || (cur_eval && cur_eval->u.eval.close_paren &&
4272                     cur_eval->u.eval.close_paren == (U32)ST.me->flags))
4273                 sayNO;
4274
4275           curlym_do_B: /* execute the B in /A{m,n}B/  */
4276             PL_reginput = locinput;
4277             if (ST.c1 == CHRTEST_UNINIT) {
4278                 /* calculate c1 and c2 for possible match of 1st char
4279                  * following curly */
4280                 ST.c1 = ST.c2 = CHRTEST_VOID;
4281                 if (HAS_TEXT(ST.B) || JUMPABLE(ST.B)) {
4282                     regnode *text_node = ST.B;
4283                     if (! HAS_TEXT(text_node))
4284                         FIND_NEXT_IMPT(text_node);
4285                     if (HAS_TEXT(text_node)
4286                         && PL_regkind[OP(text_node)] != REF)
4287                     {
4288                         ST.c1 = (U8)*STRING(text_node);
4289                         ST.c2 =
4290                             (OP(text_node) == EXACTF || OP(text_node) == REFF)
4291                             ? PL_fold[ST.c1]
4292                             : (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
4293                                 ? PL_fold_locale[ST.c1]
4294                                 : ST.c1;
4295                     }
4296                 }
4297             }
4298
4299             DEBUG_EXECUTE_r(
4300                 PerlIO_printf(Perl_debug_log,
4301                     "%*s  CURLYM trying tail with matches=%"IVdf"...\n",
4302                     (int)(REPORT_CODE_OFF+(depth*2)),
4303                     "", (IV)ST.count)
4304                 );
4305             if (ST.c1 != CHRTEST_VOID
4306                     && UCHARAT(PL_reginput) != ST.c1
4307                     && UCHARAT(PL_reginput) != ST.c2)
4308             {
4309                 /* simulate B failing */
4310                 state_num = CURLYM_B_fail;
4311                 goto reenter_switch;
4312             }
4313
4314             if (ST.me->flags) {
4315                 /* mark current A as captured */
4316                 I32 paren = ST.me->flags;
4317                 if (ST.count) {
4318                     PL_regstartp[paren]
4319                         = HOPc(PL_reginput, -ST.alen) - PL_bostr;
4320                     PL_regendp[paren] = PL_reginput - PL_bostr;
4321                     /*dmq: *PL_reglastcloseparen = paren; */
4322                 }
4323                 else
4324                     PL_regendp[paren] = -1;
4325                 if (cur_eval && cur_eval->u.eval.close_paren &&
4326                     cur_eval->u.eval.close_paren == (U32)ST.me->flags)
4327                 {
4328                     if (ST.count)
4329                         goto fake_end;
4330                     else
4331                         sayNO;
4332                 }
4333             }
4334
4335             PUSH_STATE_GOTO(CURLYM_B, ST.B); /* match B */
4336             /* NOTREACHED */
4337
4338         case CURLYM_B_fail: /* just failed to match a B */
4339             REGCP_UNWIND(ST.cp);
4340             if (ST.minmod) {
4341                 if (ST.count == ARG2(ST.me) /* max */)
4342                     sayNO;
4343                 goto curlym_do_A; /* try to match a further A */
4344             }
4345             /* backtrack one A */
4346             if (ST.count == ARG1(ST.me) /* min */)
4347                 sayNO;
4348             ST.count--;
4349             locinput = HOPc(locinput, -ST.alen);
4350             goto curlym_do_B; /* try to match B */
4351
4352 #undef ST
4353 #define ST st->u.curly
4354
4355 #define CURLY_SETPAREN(paren, success) \
4356     if (paren) { \
4357         if (success) { \
4358             PL_regstartp[paren] = HOPc(locinput, -1) - PL_bostr; \
4359             PL_regendp[paren] = locinput - PL_bostr; \
4360             *PL_reglastcloseparen = paren; \
4361         } \
4362         else \
4363             PL_regendp[paren] = -1; \
4364     }
4365
4366         case STAR:              /*  /A*B/ where A is width 1 */
4367             ST.paren = 0;
4368             ST.min = 0;
4369             ST.max = REG_INFTY;
4370             scan = NEXTOPER(scan);
4371             goto repeat;
4372         case PLUS:              /*  /A+B/ where A is width 1 */
4373             ST.paren = 0;
4374             ST.min = 1;
4375             ST.max = REG_INFTY;
4376             scan = NEXTOPER(scan);
4377             goto repeat;
4378         case CURLYN:            /*  /(A){m,n}B/ where A is width 1 */
4379             ST.paren = scan->flags;     /* Which paren to set */
4380             if (ST.paren > PL_regsize)
4381                 PL_regsize = ST.paren;
4382             if (ST.paren > *PL_reglastparen)
4383                 *PL_reglastparen = ST.paren;
4384             ST.min = ARG1(scan);  /* min to match */
4385             ST.max = ARG2(scan);  /* max to match */
4386             if (cur_eval && cur_eval->u.eval.close_paren &&
4387                 cur_eval->u.eval.close_paren == (U32)ST.paren) {
4388                 ST.min=1;
4389                 ST.max=1;
4390             }
4391             scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
4392             goto repeat;
4393         case CURLY:             /*  /A{m,n}B/ where A is width 1 */
4394             ST.paren = 0;
4395             ST.min = ARG1(scan);  /* min to match */
4396             ST.max = ARG2(scan);  /* max to match */
4397             scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
4398           repeat:
4399             /*
4400             * Lookahead to avoid useless match attempts
4401             * when we know what character comes next.
4402             *
4403             * Used to only do .*x and .*?x, but now it allows
4404             * for )'s, ('s and (?{ ... })'s to be in the way
4405             * of the quantifier and the EXACT-like node.  -- japhy
4406             */
4407
4408             if (ST.min > ST.max) /* XXX make this a compile-time check? */
4409                 sayNO;
4410             if (HAS_TEXT(next) || JUMPABLE(next)) {
4411                 U8 *s;
4412                 regnode *text_node = next;
4413
4414                 if (! HAS_TEXT(text_node))
4415                     FIND_NEXT_IMPT(text_node);
4416
4417                 if (! HAS_TEXT(text_node))
4418                     ST.c1 = ST.c2 = CHRTEST_VOID;
4419                 else {
4420                     if (PL_regkind[OP(text_node)] == REF) {
4421                         ST.c1 = ST.c2 = CHRTEST_VOID;
4422                         goto assume_ok_easy;
4423                     }
4424                     else
4425                         s = (U8*)STRING(text_node);
4426
4427                     if (!UTF) {
4428                         ST.c2 = ST.c1 = *s;
4429                         if (OP(text_node) == EXACTF || OP(text_node) == REFF)
4430                             ST.c2 = PL_fold[ST.c1];
4431                         else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
4432                             ST.c2 = PL_fold_locale[ST.c1];
4433                     }
4434                     else { /* UTF */
4435                         if (OP(text_node) == EXACTF || OP(text_node) == REFF) {
4436                              STRLEN ulen1, ulen2;
4437                              U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
4438                              U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
4439
4440                              to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
4441                              to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
4442 #ifdef EBCDIC
4443                              ST.c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXLEN, 0,
4444                                                     ckWARN(WARN_UTF8) ?
4445                                                     0 : UTF8_ALLOW_ANY);
4446                              ST.c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXLEN, 0,
4447                                                     ckWARN(WARN_UTF8) ?
4448                                                     0 : UTF8_ALLOW_ANY);
4449 #else
4450                              ST.c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
4451                                                     uniflags);
4452                              ST.c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
4453                                                     uniflags);
4454 #endif
4455                         }
4456                         else {
4457                             ST.c2 = ST.c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
4458                                                      uniflags);
4459                         }
4460                     }
4461                 }
4462             }
4463             else
4464                 ST.c1 = ST.c2 = CHRTEST_VOID;
4465         assume_ok_easy:
4466
4467             ST.A = scan;
4468             ST.B = next;
4469             PL_reginput = locinput;
4470             if (minmod) {
4471                 minmod = 0;
4472                 if (ST.min && regrepeat(rex, ST.A, ST.min, depth) < ST.min)
4473                     sayNO;
4474                 ST.count = ST.min;
4475                 locinput = PL_reginput;
4476                 REGCP_SET(ST.cp);
4477                 if (ST.c1 == CHRTEST_VOID)
4478                     goto curly_try_B_min;
4479
4480                 ST.oldloc = locinput;
4481
4482                 /* set ST.maxpos to the furthest point along the
4483                  * string that could possibly match */
4484                 if  (ST.max == REG_INFTY) {
4485                     ST.maxpos = PL_regeol - 1;
4486                     if (do_utf8)
4487                         while (UTF8_IS_CONTINUATION(*(U8*)ST.maxpos))
4488                             ST.maxpos--;
4489                 }
4490                 else if (do_utf8) {
4491                     int m = ST.max - ST.min;
4492                     for (ST.maxpos = locinput;
4493                          m >0 && ST.maxpos + UTF8SKIP(ST.maxpos) <= PL_regeol; m--)
4494                         ST.maxpos += UTF8SKIP(ST.maxpos);
4495                 }
4496                 else {
4497                     ST.maxpos = locinput + ST.max - ST.min;
4498                     if (ST.maxpos >= PL_regeol)
4499                         ST.maxpos = PL_regeol - 1;
4500                 }
4501                 goto curly_try_B_min_known;
4502
4503             }
4504             else {
4505                 ST.count = regrepeat(rex, ST.A, ST.max, depth);
4506                 locinput = PL_reginput;
4507                 if (ST.count < ST.min)
4508                     sayNO;
4509                 if ((ST.count > ST.min)
4510                     && (PL_regkind[OP(ST.B)] == EOL) && (OP(ST.B) != MEOL))
4511                 {
4512                     /* A{m,n} must come at the end of the string, there's
4513                      * no point in backing off ... */
4514                     ST.min = ST.count;
4515                     /* ...except that $ and \Z can match before *and* after
4516                        newline at the end.  Consider "\n\n" =~ /\n+\Z\n/.
4517                        We may back off by one in this case. */
4518                     if (UCHARAT(PL_reginput - 1) == '\n' && OP(ST.B) != EOS)
4519                         ST.min--;
4520                 }
4521                 REGCP_SET(ST.cp);
4522                 goto curly_try_B_max;
4523             }
4524             /* NOTREACHED */
4525
4526
4527         case CURLY_B_min_known_fail:
4528             /* failed to find B in a non-greedy match where c1,c2 valid */
4529             if (ST.paren && ST.count)
4530                 PL_regendp[ST.paren] = -1;
4531
4532             PL_reginput = locinput;     /* Could be reset... */
4533             REGCP_UNWIND(ST.cp);
4534             /* Couldn't or didn't -- move forward. */
4535             ST.oldloc = locinput;
4536             if (do_utf8)
4537                 locinput += UTF8SKIP(locinput);
4538             else
4539                 locinput++;
4540             ST.count++;
4541           curly_try_B_min_known:
4542              /* find the next place where 'B' could work, then call B */
4543             {
4544                 int n;
4545                 if (do_utf8) {
4546                     n = (ST.oldloc == locinput) ? 0 : 1;
4547                     if (ST.c1 == ST.c2) {
4548                         STRLEN len;
4549                         /* set n to utf8_distance(oldloc, locinput) */
4550                         while (locinput <= ST.maxpos &&
4551                                utf8n_to_uvchr((U8*)locinput,
4552                                               UTF8_MAXBYTES, &len,
4553                                               uniflags) != (UV)ST.c1) {
4554                             locinput += len;
4555                             n++;
4556                         }
4557                     }
4558                     else {
4559                         /* set n to utf8_distance(oldloc, locinput) */
4560                         while (locinput <= ST.maxpos) {
4561                             STRLEN len;
4562                             const UV c = utf8n_to_uvchr((U8*)locinput,
4563                                                   UTF8_MAXBYTES, &len,
4564                                                   uniflags);
4565                             if (c == (UV)ST.c1 || c == (UV)ST.c2)
4566                                 break;
4567                             locinput += len;
4568                             n++;
4569                         }
4570                     }
4571                 }
4572                 else {
4573                     if (ST.c1 == ST.c2) {
4574                         while (locinput <= ST.maxpos &&
4575                                UCHARAT(locinput) != ST.c1)
4576                             locinput++;
4577                     }
4578                     else {
4579                         while (locinput <= ST.maxpos
4580                                && UCHARAT(locinput) != ST.c1
4581                                && UCHARAT(locinput) != ST.c2)
4582                             locinput++;
4583                     }
4584                     n = locinput - ST.oldloc;
4585                 }
4586                 if (locinput > ST.maxpos)
4587                     sayNO;
4588                 /* PL_reginput == oldloc now */
4589                 if (n) {
4590                     ST.count += n;
4591                     if (regrepeat(rex, ST.A, n, depth) < n)
4592                         sayNO;
4593                 }
4594                 PL_reginput = locinput;
4595                 CURLY_SETPAREN(ST.paren, ST.count);
4596                 if (cur_eval && cur_eval->u.eval.close_paren &&
4597                     cur_eval->u.eval.close_paren == (U32)ST.paren) {
4598                     goto fake_end;
4599                 }
4600                 PUSH_STATE_GOTO(CURLY_B_min_known, ST.B);
4601             }
4602             /* NOTREACHED */
4603
4604
4605         case CURLY_B_min_fail:
4606             /* failed to find B in a non-greedy match where c1,c2 invalid */
4607             if (ST.paren && ST.count)
4608                 PL_regendp[ST.paren] = -1;
4609
4610             REGCP_UNWIND(ST.cp);
4611             /* failed -- move forward one */
4612             PL_reginput = locinput;
4613             if (regrepeat(rex, ST.A, 1, depth)) {
4614                 ST.count++;
4615                 locinput = PL_reginput;
4616                 if (ST.count <= ST.max || (ST.max == REG_INFTY &&
4617                         ST.count > 0)) /* count overflow ? */
4618                 {
4619                   curly_try_B_min:
4620                     CURLY_SETPAREN(ST.paren, ST.count);
4621                     if (cur_eval && cur_eval->u.eval.close_paren &&
4622                         cur_eval->u.eval.close_paren == (U32)ST.paren) {
4623                         goto fake_end;
4624                     }
4625                     PUSH_STATE_GOTO(CURLY_B_min, ST.B);
4626                 }
4627             }
4628             sayNO;
4629             /* NOTREACHED */
4630
4631
4632         curly_try_B_max:
4633             /* a successful greedy match: now try to match B */
4634             if (cur_eval && cur_eval->u.eval.close_paren &&
4635                 cur_eval->u.eval.close_paren == (U32)ST.paren) {
4636                 goto fake_end;
4637             }
4638             {
4639                 UV c = 0;
4640                 if (ST.c1 != CHRTEST_VOID)
4641                     c = do_utf8 ? utf8n_to_uvchr((U8*)PL_reginput,
4642                                            UTF8_MAXBYTES, 0, uniflags)
4643                                 : (UV) UCHARAT(PL_reginput);
4644                 /* If it could work, try it. */
4645                 if (ST.c1 == CHRTEST_VOID || c == (UV)ST.c1 || c == (UV)ST.c2) {
4646                     CURLY_SETPAREN(ST.paren, ST.count);
4647                     PUSH_STATE_GOTO(CURLY_B_max, ST.B);
4648                     /* NOTREACHED */
4649                 }
4650             }
4651             /* FALL THROUGH */
4652         case CURLY_B_max_fail:
4653             /* failed to find B in a greedy match */
4654             if (ST.paren && ST.count)
4655                 PL_regendp[ST.paren] = -1;
4656
4657             REGCP_UNWIND(ST.cp);
4658             /*  back up. */
4659             if (--ST.count < ST.min)
4660                 sayNO;
4661             PL_reginput = locinput = HOPc(locinput, -1);
4662             goto curly_try_B_max;
4663
4664 #undef ST
4665
4666         case END:
4667             fake_end:
4668             if (cur_eval) {
4669                 /* we've just finished A in /(??{A})B/; now continue with B */
4670                 I32 tmpix;
4671
4672
4673                 st->u.eval.toggle_reg_flags
4674                             = cur_eval->u.eval.toggle_reg_flags;
4675                 PL_reg_flags ^= st->u.eval.toggle_reg_flags;
4676
4677                 st->u.eval.prev_rex = rex;              /* inner */
4678                 rex  = cur_eval->u.eval.prev_rex;       /* outer */
4679                 rexi = RXi_GET(rex);
4680                 cur_curlyx = cur_eval->u.eval.prev_curlyx;
4681                 ReREFCNT_inc(rex);
4682                 st->u.eval.cp = regcppush(0);   /* Save *all* the positions. */
4683                 REGCP_SET(st->u.eval.lastcp);
4684                 PL_reginput = locinput;
4685
4686                 /* Restore parens of the outer rex without popping the
4687                  * savestack */
4688                 tmpix = PL_savestack_ix;
4689                 PL_savestack_ix = cur_eval->u.eval.lastcp;
4690                 regcppop(rex);
4691                 PL_savestack_ix = tmpix;
4692
4693                 st->u.eval.prev_eval = cur_eval;
4694                 cur_eval = cur_eval->u.eval.prev_eval;
4695                 DEBUG_EXECUTE_r(
4696                     PerlIO_printf(Perl_debug_log, "%*s  EVAL trying tail ... %"UVxf"\n",
4697                                       REPORT_CODE_OFF+depth*2, "",PTR2UV(cur_eval)););
4698                 PUSH_YES_STATE_GOTO(EVAL_AB,
4699                         st->u.eval.prev_eval->u.eval.B); /* match B */
4700             }
4701
4702             if (locinput < reginfo->till) {
4703                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4704                                       "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
4705                                       PL_colors[4],
4706                                       (long)(locinput - PL_reg_starttry),
4707                                       (long)(reginfo->till - PL_reg_starttry),
4708                                       PL_colors[5]));
4709
4710                 sayNO_SILENT;           /* Cannot match: too short. */
4711             }
4712             PL_reginput = locinput;     /* put where regtry can find it */
4713             sayYES;                     /* Success! */
4714
4715         case SUCCEED: /* successful SUSPEND/UNLESSM/IFMATCH/CURLYM */
4716             DEBUG_EXECUTE_r(
4717             PerlIO_printf(Perl_debug_log,
4718                 "%*s  %ssubpattern success...%s\n",
4719                 REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5]));
4720             PL_reginput = locinput;     /* put where regtry can find it */
4721             sayYES;                     /* Success! */
4722
4723 #undef  ST
4724 #define ST st->u.ifmatch
4725
4726         case SUSPEND:   /* (?>A) */
4727             ST.wanted = 1;
4728             PL_reginput = locinput;
4729             goto do_ifmatch;
4730
4731         case UNLESSM:   /* -ve lookaround: (?!A), or with flags, (?<!A) */
4732             ST.wanted = 0;
4733             goto ifmatch_trivial_fail_test;
4734
4735         case IFMATCH:   /* +ve lookaround: (?=A), or with flags, (?<=A) */
4736             ST.wanted = 1;
4737           ifmatch_trivial_fail_test:
4738             if (scan->flags) {
4739                 char * const s = HOPBACKc(locinput, scan->flags);
4740                 if (!s) {
4741                     /* trivial fail */
4742                     if (logical) {
4743                         logical = 0;
4744                         sw = 1 - (bool)ST.wanted;
4745                     }
4746                     else if (ST.wanted)
4747                         sayNO;
4748                     next = scan + ARG(scan);
4749                     if (next == scan)
4750                         next = NULL;
4751                     break;
4752                 }
4753                 PL_reginput = s;
4754             }
4755             else
4756                 PL_reginput = locinput;
4757
4758           do_ifmatch:
4759             ST.me = scan;
4760             ST.logical = logical;
4761             /* execute body of (?...A) */
4762             PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)));
4763             /* NOTREACHED */
4764
4765         case IFMATCH_A_fail: /* body of (?...A) failed */
4766             ST.wanted = !ST.wanted;
4767             /* FALL THROUGH */
4768
4769         case IFMATCH_A: /* body of (?...A) succeeded */
4770             if (ST.logical) {
4771                 sw = (bool)ST.wanted;
4772             }
4773             else if (!ST.wanted)
4774                 sayNO;
4775
4776             if (OP(ST.me) == SUSPEND)
4777                 locinput = PL_reginput;
4778             else {
4779                 locinput = PL_reginput = st->locinput;
4780                 nextchr = UCHARAT(locinput);
4781             }
4782             scan = ST.me + ARG(ST.me);
4783             if (scan == ST.me)
4784                 scan = NULL;
4785             continue; /* execute B */
4786
4787 #undef ST
4788
4789         case LONGJMP:
4790             next = scan + ARG(scan);
4791             if (next == scan)
4792                 next = NULL;
4793             break;
4794         case COMMIT:
4795             reginfo->cutpoint = PL_regeol;
4796             /* FALLTHROUGH */
4797         case PRUNE:
4798             PL_reginput = locinput;
4799             if (!scan->flags)
4800                 sv_yes_mark = sv_commit = (SV*)rexi->data->data[ ARG( scan ) ];
4801             PUSH_STATE_GOTO(COMMIT_next,next);
4802             /* NOTREACHED */
4803         case COMMIT_next_fail:
4804             no_final = 1;
4805             /* FALLTHROUGH */
4806         case OPFAIL:
4807             sayNO;
4808             /* NOTREACHED */
4809
4810 #define ST st->u.mark
4811         case MARKPOINT:
4812             ST.prev_mark = mark_state;
4813             ST.mark_name = sv_commit = sv_yes_mark
4814                 = (SV*)rexi->data->data[ ARG( scan ) ];
4815             mark_state = st;
4816             ST.mark_loc = PL_reginput = locinput;
4817             PUSH_YES_STATE_GOTO(MARKPOINT_next,next);
4818             /* NOTREACHED */
4819         case MARKPOINT_next:
4820             mark_state = ST.prev_mark;
4821             sayYES;
4822             /* NOTREACHED */
4823         case MARKPOINT_next_fail:
4824             if (popmark && sv_eq(ST.mark_name,popmark))
4825             {
4826                 if (ST.mark_loc > startpoint)
4827                     reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
4828                 popmark = NULL; /* we found our mark */
4829                 sv_commit = ST.mark_name;
4830
4831                 DEBUG_EXECUTE_r({
4832                         PerlIO_printf(Perl_debug_log,
4833                             "%*s  %ssetting cutpoint to mark:%"SVf"...%s\n",
4834                             REPORT_CODE_OFF+depth*2, "",
4835                             PL_colors[4], sv_commit, PL_colors[5]);
4836                 });
4837             }
4838             mark_state = ST.prev_mark;
4839             sv_yes_mark = mark_state ?
4840                 mark_state->u.mark.mark_name : NULL;
4841             sayNO;
4842             /* NOTREACHED */
4843         case SKIP:
4844             PL_reginput = locinput;
4845             if (scan->flags) {
4846                 /* (*CUT) : if we fail we cut here*/
4847                 ST.mark_name = NULL;
4848                 ST.mark_loc = locinput;
4849                 PUSH_STATE_GOTO(SKIP_next,next);
4850             } else {
4851                 /* (*CUT:NAME) : if there is a (*MARK:NAME) fail where it was,
4852                    otherwise do nothing.  Meaning we need to scan
4853                  */
4854                 regmatch_state *cur = mark_state;
4855                 SV *find = (SV*)rexi->data->data[ ARG( scan ) ];
4856
4857                 while (cur) {
4858                     if ( sv_eq( cur->u.mark.mark_name,
4859                                 find ) )
4860                     {
4861                         ST.mark_name = find;
4862                         PUSH_STATE_GOTO( SKIP_next, next );
4863                     }
4864                     cur = cur->u.mark.prev_mark;
4865                 }
4866             }
4867             /* Didn't find our (*MARK:NAME) so ignore this (*CUT:NAME) */
4868             break;
4869         case SKIP_next_fail:
4870             if (ST.mark_name) {
4871                 /* (*CUT:NAME) - Set up to search for the name as we
4872                    collapse the stack*/
4873                 popmark = ST.mark_name;
4874             } else {
4875                 /* (*CUT) - No name, we cut here.*/
4876                 if (ST.mark_loc > startpoint)
4877                     reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
4878                 /* but we set sv_commit to latest mark_name if there
4879                    is one so they can test to see how things lead to this
4880                    cut */
4881                 if (mark_state)
4882                     sv_commit=mark_state->u.mark.mark_name;
4883             }
4884             no_final = 1;
4885             sayNO;
4886             /* NOTREACHED */
4887 #undef ST
4888
4889         default:
4890             PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
4891                           PTR2UV(scan), OP(scan));
4892             Perl_croak(aTHX_ "regexp memory corruption");
4893
4894         } /* end switch */
4895
4896         /* switch break jumps here */
4897         scan = next; /* prepare to execute the next op and ... */
4898         continue;    /* ... jump back to the top, reusing st */
4899         /* NOTREACHED */
4900
4901       push_yes_state:
4902         /* push a state that backtracks on success */
4903         st->u.yes.prev_yes_state = yes_state;
4904         yes_state = st;
4905         /* FALL THROUGH */
4906       push_state:
4907         /* push a new regex state, then continue at scan  */
4908         {
4909             regmatch_state *newst;
4910
4911             DEBUG_STACK_r({
4912                 regmatch_state *cur = st;
4913                 regmatch_state *curyes = yes_state;
4914                 int curd = depth;
4915                 regmatch_slab *slab = PL_regmatch_slab;
4916                 for (;curd > -1;cur--,curd--) {
4917                     if (cur < SLAB_FIRST(slab)) {
4918                         slab = slab->prev;
4919                         cur = SLAB_LAST(slab);
4920                     }
4921                     PerlIO_printf(Perl_error_log, "%*s#%-3d %-10s %s\n",
4922                         REPORT_CODE_OFF + 2 + depth * 2,"",
4923                         curd, reg_name[cur->resume_state],
4924                         (curyes == cur) ? "yes" : ""
4925                     );
4926                     if (curyes == cur)
4927                         curyes = cur->u.yes.prev_yes_state;
4928                 }
4929             } else
4930                 DEBUG_STATE_pp("push")
4931             );
4932             depth++;
4933             st->locinput = locinput;
4934             newst = st+1;
4935             if (newst >  SLAB_LAST(PL_regmatch_slab))
4936                 newst = S_push_slab(aTHX);
4937             PL_regmatch_state = newst;
4938
4939             locinput = PL_reginput;
4940             nextchr = UCHARAT(locinput);
4941             st = newst;
4942             continue;
4943             /* NOTREACHED */
4944         }
4945     }
4946
4947     /*
4948     * We get here only if there's trouble -- normally "case END" is
4949     * the terminating point.
4950     */
4951     Perl_croak(aTHX_ "corrupted regexp pointers");
4952     /*NOTREACHED*/
4953     sayNO;
4954
4955 yes:
4956     if (yes_state) {
4957         /* we have successfully completed a subexpression, but we must now
4958          * pop to the state marked by yes_state and continue from there */
4959         assert(st != yes_state);
4960 #ifdef DEBUGGING
4961         while (st != yes_state) {
4962             st--;
4963             if (st < SLAB_FIRST(PL_regmatch_slab)) {
4964                 PL_regmatch_slab = PL_regmatch_slab->prev;
4965                 st = SLAB_LAST(PL_regmatch_slab);
4966             }
4967             DEBUG_STATE_r({
4968                 if (no_final) {
4969                     DEBUG_STATE_pp("pop (no final)");
4970                 } else {
4971                     DEBUG_STATE_pp("pop (yes)");
4972                 }
4973             });
4974             depth--;
4975         }
4976 #else
4977         while (yes_state < SLAB_FIRST(PL_regmatch_slab)
4978             || yes_state > SLAB_LAST(PL_regmatch_slab))
4979         {
4980             /* not in this slab, pop slab */
4981             depth -= (st - SLAB_FIRST(PL_regmatch_slab) + 1);
4982             PL_regmatch_slab = PL_regmatch_slab->prev;
4983             st = SLAB_LAST(PL_regmatch_slab);
4984         }
4985         depth -= (st - yes_state);
4986 #endif
4987         st = yes_state;
4988         yes_state = st->u.yes.prev_yes_state;
4989         PL_regmatch_state = st;
4990
4991         if (no_final) {
4992             locinput= st->locinput;
4993             nextchr = UCHARAT(locinput);
4994         }
4995         state_num = st->resume_state + no_final;
4996         goto reenter_switch;
4997     }
4998
4999     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
5000                           PL_colors[4], PL_colors[5]));
5001
5002     result = 1;
5003     goto final_exit;
5004
5005 no:
5006     DEBUG_EXECUTE_r(
5007         PerlIO_printf(Perl_debug_log,
5008             "%*s  %sfailed...%s\n",
5009             REPORT_CODE_OFF+depth*2, "",
5010             PL_colors[4], PL_colors[5])
5011         );
5012
5013 no_silent:
5014     if (no_final) {
5015         if (yes_state) {
5016             goto yes;
5017         } else {
5018             goto final_exit;
5019         }
5020     }
5021     if (depth) {
5022         /* there's a previous state to backtrack to */
5023         st--;
5024         if (st < SLAB_FIRST(PL_regmatch_slab)) {
5025             PL_regmatch_slab = PL_regmatch_slab->prev;
5026             st = SLAB_LAST(PL_regmatch_slab);
5027         }
5028         PL_regmatch_state = st;
5029         locinput= st->locinput;
5030         nextchr = UCHARAT(locinput);
5031
5032         DEBUG_STATE_pp("pop");
5033         depth--;
5034         if (yes_state == st)
5035             yes_state = st->u.yes.prev_yes_state;
5036
5037         state_num = st->resume_state + 1; /* failure = success + 1 */
5038         goto reenter_switch;
5039     }
5040     result = 0;
5041
5042   final_exit:
5043     if (rex->intflags & PREGf_VERBARG_SEEN) {
5044         SV *sv_err = get_sv("REGERROR", 1);
5045         SV *sv_mrk = get_sv("REGMARK", 1);
5046         if (result) {
5047             sv_commit = &PL_sv_no;
5048             if (!sv_yes_mark)
5049                 sv_yes_mark = &PL_sv_yes;
5050         } else {
5051             if (!sv_commit)
5052                 sv_commit = &PL_sv_yes;
5053             sv_yes_mark = &PL_sv_no;
5054         }
5055         sv_setsv(sv_err, sv_commit);
5056         sv_setsv(sv_mrk, sv_yes_mark);
5057     }
5058     /* restore original high-water mark */
5059     PL_regmatch_slab  = orig_slab;
5060     PL_regmatch_state = orig_state;
5061
5062     /* free all slabs above current one */
5063     if (orig_slab->next) {
5064         regmatch_slab *sl = orig_slab->next;
5065         orig_slab->next = NULL;
5066         while (sl) {
5067             regmatch_slab * const osl = sl;
5068             sl = sl->next;
5069             Safefree(osl);
5070         }
5071     }
5072
5073     return result;
5074 }
5075
5076 /*
5077  - regrepeat - repeatedly match something simple, report how many
5078  */
5079 /*
5080  * [This routine now assumes that it will only match on things of length 1.
5081  * That was true before, but now we assume scan - reginput is the count,
5082  * rather than incrementing count on every character.  [Er, except utf8.]]
5083  */
5084 STATIC I32
5085 S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
5086 {
5087     dVAR;
5088     register char *scan;
5089     register I32 c;
5090     register char *loceol = PL_regeol;
5091     register I32 hardcount = 0;
5092     register bool do_utf8 = PL_reg_match_utf8;
5093
5094     scan = PL_reginput;
5095     if (max == REG_INFTY)
5096         max = I32_MAX;
5097     else if (max < loceol - scan)
5098         loceol = scan + max;
5099     switch (OP(p)) {
5100     case REG_ANY:
5101         if (do_utf8) {
5102             loceol = PL_regeol;
5103             while (scan < loceol && hardcount < max && *scan != '\n') {
5104                 scan += UTF8SKIP(scan);
5105                 hardcount++;
5106             }
5107         } else {
5108             while (scan < loceol && *scan != '\n')
5109                 scan++;
5110         }
5111         break;
5112     case SANY:
5113         if (do_utf8) {
5114             loceol = PL_regeol;
5115             while (scan < loceol && hardcount < max) {
5116                 scan += UTF8SKIP(scan);
5117                 hardcount++;
5118             }
5119         }
5120         else
5121             scan = loceol;
5122         break;
5123     case CANY:
5124         scan = loceol;
5125         break;
5126     case EXACT:         /* length of string is 1 */
5127         c = (U8)*STRING(p);
5128         while (scan < loceol && UCHARAT(scan) == c)
5129             scan++;
5130         break;
5131     case EXACTF:        /* length of string is 1 */
5132         c = (U8)*STRING(p);
5133         while (scan < loceol &&
5134                (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
5135             scan++;
5136         break;
5137     case EXACTFL:       /* length of string is 1 */
5138         PL_reg_flags |= RF_tainted;
5139         c = (U8)*STRING(p);
5140         while (scan < loceol &&
5141                (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
5142             scan++;
5143         break;
5144     case ANYOF:
5145         if (do_utf8) {
5146             loceol = PL_regeol;
5147             while (hardcount < max && scan < loceol &&
5148                    reginclass(prog, p, (U8*)scan, 0, do_utf8)) {
5149                 scan += UTF8SKIP(scan);
5150                 hardcount++;
5151             }
5152         } else {
5153             while (scan < loceol && REGINCLASS(prog, p, (U8*)scan))
5154                 scan++;
5155         }
5156         break;
5157     case ALNUM:
5158         if (do_utf8) {
5159             loceol = PL_regeol;
5160             LOAD_UTF8_CHARCLASS_ALNUM();
5161             while (hardcount < max && scan < loceol &&
5162                    swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
5163                 scan += UTF8SKIP(scan);
5164                 hardcount++;
5165             }
5166         } else {
5167             while (scan < loceol && isALNUM(*scan))
5168                 scan++;
5169         }
5170         break;
5171     case ALNUML:
5172         PL_reg_flags |= RF_tainted;
5173         if (do_utf8) {
5174             loceol = PL_regeol;
5175             while (hardcount < max && scan < loceol &&
5176                    isALNUM_LC_utf8((U8*)scan)) {
5177                 scan += UTF8SKIP(scan);
5178                 hardcount++;
5179             }
5180         } else {
5181             while (scan < loceol && isALNUM_LC(*scan))
5182                 scan++;
5183         }
5184         break;
5185     case NALNUM:
5186         if (do_utf8) {
5187             loceol = PL_regeol;
5188             LOAD_UTF8_CHARCLASS_ALNUM();
5189             while (hardcount < max && scan < loceol &&
5190                    !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
5191                 scan += UTF8SKIP(scan);
5192                 hardcount++;
5193             }
5194         } else {
5195             while (scan < loceol && !isALNUM(*scan))
5196                 scan++;
5197         }
5198         break;
5199     case NALNUML:
5200         PL_reg_flags |= RF_tainted;
5201         if (do_utf8) {
5202             loceol = PL_regeol;
5203             while (hardcount < max && scan < loceol &&
5204                    !isALNUM_LC_utf8((U8*)scan)) {
5205                 scan += UTF8SKIP(scan);
5206                 hardcount++;
5207             }
5208         } else {
5209             while (scan < loceol && !isALNUM_LC(*scan))
5210                 scan++;
5211         }
5212         break;
5213     case SPACE:
5214         if (do_utf8) {
5215             loceol = PL_regeol;
5216             LOAD_UTF8_CHARCLASS_SPACE();
5217             while (hardcount < max && scan < loceol &&
5218                    (*scan == ' ' ||
5219                     swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
5220                 scan += UTF8SKIP(scan);
5221                 hardcount++;
5222             }
5223         } else {
5224             while (scan < loceol && isSPACE(*scan))
5225                 scan++;
5226         }
5227         break;
5228     case SPACEL:
5229         PL_reg_flags |= RF_tainted;
5230         if (do_utf8) {
5231             loceol = PL_regeol;
5232             while (hardcount < max && scan < loceol &&
5233                    (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
5234                 scan += UTF8SKIP(scan);
5235                 hardcount++;
5236             }
5237         } else {
5238             while (scan < loceol && isSPACE_LC(*scan))
5239                 scan++;
5240         }
5241         break;
5242     case NSPACE:
5243         if (do_utf8) {
5244             loceol = PL_regeol;
5245             LOAD_UTF8_CHARCLASS_SPACE();
5246             while (hardcount < max && scan < loceol &&
5247                    !(*scan == ' ' ||
5248                      swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
5249                 scan += UTF8SKIP(scan);
5250                 hardcount++;
5251             }
5252         } else {
5253             while (scan < loceol && !isSPACE(*scan))
5254                 scan++;
5255             break;
5256         }
5257     case NSPACEL:
5258         PL_reg_flags |= RF_tainted;
5259         if (do_utf8) {
5260             loceol = PL_regeol;
5261             while (hardcount < max && scan < loceol &&
5262                    !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
5263                 scan += UTF8SKIP(scan);
5264                 hardcount++;
5265             }
5266         } else {
5267             while (scan < loceol && !isSPACE_LC(*scan))
5268                 scan++;
5269         }
5270         break;
5271     case DIGIT:
5272         if (do_utf8) {
5273             loceol = PL_regeol;
5274             LOAD_UTF8_CHARCLASS_DIGIT();
5275             while (hardcount < max && scan < loceol &&
5276                    swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
5277                 scan += UTF8SKIP(scan);
5278                 hardcount++;
5279             }
5280         } else {
5281             while (scan < loceol && isDIGIT(*scan))
5282                 scan++;
5283         }
5284         break;
5285     case NDIGIT:
5286         if (do_utf8) {
5287             loceol = PL_regeol;
5288             LOAD_UTF8_CHARCLASS_DIGIT();
5289             while (hardcount < max && scan < loceol &&
5290                    !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
5291                 scan += UTF8SKIP(scan);
5292                 hardcount++;
5293             }
5294         } else {
5295             while (scan < loceol && !isDIGIT(*scan))
5296                 scan++;
5297         }
5298         break;
5299     default:            /* Called on something of 0 width. */
5300         break;          /* So match right here or not at all. */
5301     }
5302
5303     if (hardcount)
5304         c = hardcount;
5305     else
5306         c = scan - PL_reginput;
5307     PL_reginput = scan;
5308
5309     DEBUG_r({
5310         GET_RE_DEBUG_FLAGS_DECL;
5311         DEBUG_EXECUTE_r({
5312             SV * const prop = sv_newmortal();
5313             regprop(prog, prop, p);
5314             PerlIO_printf(Perl_debug_log,
5315                         "%*s  %s can match %"IVdf" times out of %"IVdf"...\n",
5316                         REPORT_CODE_OFF + depth*2, "", SvPVX_const(prop),(IV)c,(IV)max);
5317         });
5318     });
5319
5320     return(c);
5321 }
5322
5323
5324 #if !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION)
5325 /*
5326 - regclass_swash - prepare the utf8 swash
5327 */
5328
5329 SV *
5330 Perl_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bool doinit, SV** listsvp, SV **altsvp)
5331 {
5332     dVAR;
5333     SV *sw  = NULL;
5334     SV *si  = NULL;
5335     SV *alt = NULL;
5336     RXi_GET_DECL(prog,progi);
5337     const struct reg_data * const data = prog ? progi->data : NULL;
5338
5339     if (data && data->count) {
5340         const U32 n = ARG(node);
5341
5342         if (data->what[n] == 's') {
5343             SV * const rv = (SV*)data->data[n];
5344             AV * const av = (AV*)SvRV((SV*)rv);
5345             SV **const ary = AvARRAY(av);
5346             SV **a, **b;
5347
5348             /* See the end of regcomp.c:S_regclass() for
5349              * documentation of these array elements. */
5350
5351             si = *ary;
5352             a  = SvROK(ary[1]) ? &ary[1] : 0;
5353             b  = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : 0;
5354
5355             if (a)
5356                 sw = *a;
5357             else if (si && doinit) {
5358                 sw = swash_init("utf8", "", si, 1, 0);
5359                 (void)av_store(av, 1, sw);
5360             }
5361             if (b)
5362                 alt = *b;
5363         }
5364     }
5365
5366     if (listsvp)
5367         *listsvp = si;
5368     if (altsvp)
5369         *altsvp  = alt;
5370
5371     return sw;
5372 }
5373 #endif
5374
5375 /*
5376  - reginclass - determine if a character falls into a character class
5377
5378   The n is the ANYOF regnode, the p is the target string, lenp
5379   is pointer to the maximum length of how far to go in the p
5380   (if the lenp is zero, UTF8SKIP(p) is used),
5381   do_utf8 tells whether the target string is in UTF-8.
5382
5383  */
5384
5385 STATIC bool
5386 S_reginclass(pTHX_ const regexp *prog, register const regnode *n, register const U8* p, STRLEN* lenp, register bool do_utf8)
5387 {
5388     dVAR;
5389     const char flags = ANYOF_FLAGS(n);
5390     bool match = FALSE;
5391     UV c = *p;
5392     STRLEN len = 0;
5393     STRLEN plen;
5394
5395     if (do_utf8 && !UTF8_IS_INVARIANT(c)) {
5396         c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &len,
5397                 (UTF8_ALLOW_DEFAULT & UTF8_ALLOW_ANYUV) | UTF8_CHECK_ONLY);
5398                 /* see [perl #37836] for UTF8_ALLOW_ANYUV */
5399         if (len == (STRLEN)-1)
5400             Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
5401     }
5402
5403     plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
5404     if (do_utf8 || (flags & ANYOF_UNICODE)) {
5405         if (lenp)
5406             *lenp = 0;
5407         if (do_utf8 && !ANYOF_RUNTIME(n)) {
5408             if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
5409                 match = TRUE;
5410         }
5411         if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
5412             match = TRUE;
5413         if (!match) {
5414             AV *av;
5415             SV * const sw = regclass_swash(prog, n, TRUE, 0, (SV**)&av);
5416
5417             if (sw) {
5418                 if (swash_fetch(sw, p, do_utf8))
5419                     match = TRUE;
5420                 else if (flags & ANYOF_FOLD) {
5421                     if (!match && lenp && av) {
5422                         I32 i;
5423                         for (i = 0; i <= av_len(av); i++) {
5424                             SV* const sv = *av_fetch(av, i, FALSE);
5425                             STRLEN len;
5426                             const char * const s = SvPV_const(sv, len);
5427
5428                             if (len <= plen && memEQ(s, (char*)p, len)) {
5429                                 *lenp = len;
5430                                 match = TRUE;
5431                                 break;
5432                             }
5433                         }
5434                     }
5435                     if (!match) {
5436                         U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
5437                         STRLEN tmplen;
5438
5439                         to_utf8_fold(p, tmpbuf, &tmplen);
5440                         if (swash_fetch(sw, tmpbuf, do_utf8))
5441                             match = TRUE;
5442                     }
5443                 }
5444             }
5445         }
5446         if (match && lenp && *lenp == 0)
5447             *lenp = UNISKIP(NATIVE_TO_UNI(c));
5448     }
5449     if (!match && c < 256) {
5450         if (ANYOF_BITMAP_TEST(n, c))
5451             match = TRUE;
5452         else if (flags & ANYOF_FOLD) {
5453             U8 f;
5454
5455             if (flags & ANYOF_LOCALE) {
5456                 PL_reg_flags |= RF_tainted;
5457                 f = PL_fold_locale[c];
5458             }
5459             else
5460                 f = PL_fold[c];
5461             if (f != c && ANYOF_BITMAP_TEST(n, f))
5462                 match = TRUE;
5463         }
5464
5465         if (!match && (flags & ANYOF_CLASS)) {
5466             PL_reg_flags |= RF_tainted;
5467             if (
5468                 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM)   &&  isALNUM_LC(c))  ||
5469                 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM)  && !isALNUM_LC(c))  ||
5470                 (ANYOF_CLASS_TEST(n, ANYOF_SPACE)   &&  isSPACE_LC(c))  ||
5471                 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE)  && !isSPACE_LC(c))  ||
5472                 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT)   &&  isDIGIT_LC(c))  ||
5473                 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT)  && !isDIGIT_LC(c))  ||
5474                 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC)  &&  isALNUMC_LC(c)) ||
5475                 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
5476                 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA)   &&  isALPHA_LC(c))  ||
5477                 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA)  && !isALPHA_LC(c))  ||
5478                 (ANYOF_CLASS_TEST(n, ANYOF_ASCII)   &&  isASCII(c))     ||
5479                 (ANYOF_CLASS_TEST(n, ANYOF_NASCII)  && !isASCII(c))     ||
5480                 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL)   &&  isCNTRL_LC(c))  ||
5481                 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL)  && !isCNTRL_LC(c))  ||
5482                 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH)   &&  isGRAPH_LC(c))  ||
5483                 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH)  && !isGRAPH_LC(c))  ||
5484                 (ANYOF_CLASS_TEST(n, ANYOF_LOWER)   &&  isLOWER_LC(c))  ||
5485                 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER)  && !isLOWER_LC(c))  ||
5486                 (ANYOF_CLASS_TEST(n, ANYOF_PRINT)   &&  isPRINT_LC(c))  ||
5487                 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT)  && !isPRINT_LC(c))  ||
5488                 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT)   &&  isPUNCT_LC(c))  ||
5489                 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT)  && !isPUNCT_LC(c))  ||
5490                 (ANYOF_CLASS_TEST(n, ANYOF_UPPER)   &&  isUPPER_LC(c))  ||
5491                 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER)  && !isUPPER_LC(c))  ||
5492                 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT)  &&  isXDIGIT(c))    ||
5493                 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c))    ||
5494                 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC)  &&  isPSXSPC(c))    ||
5495                 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c))    ||
5496                 (ANYOF_CLASS_TEST(n, ANYOF_BLANK)   &&  isBLANK(c))     ||
5497                 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK)  && !isBLANK(c))
5498                 ) /* How's that for a conditional? */
5499             {
5500                 match = TRUE;
5501             }
5502         }
5503     }
5504
5505     return (flags & ANYOF_INVERT) ? !match : match;
5506 }
5507
5508 STATIC U8 *
5509 S_reghop3(U8 *s, I32 off, const U8* lim)
5510 {
5511     dVAR;
5512     if (off >= 0) {
5513         while (off-- && s < lim) {
5514             /* XXX could check well-formedness here */
5515             s += UTF8SKIP(s);
5516         }
5517     }
5518     else {
5519         while (off++ && s > lim) {
5520             s--;
5521             if (UTF8_IS_CONTINUED(*s)) {
5522                 while (s > lim && UTF8_IS_CONTINUATION(*s))
5523                     s--;
5524             }
5525             /* XXX could check well-formedness here */
5526         }
5527     }
5528     return s;
5529 }
5530
5531 #ifdef XXX_dmq
5532 /* there are a bunch of places where we use two reghop3's that should
5533    be replaced with this routine. but since thats not done yet
5534    we ifdef it out - dmq
5535 */
5536 STATIC U8 *
5537 S_reghop4(U8 *s, I32 off, const U8* llim, const U8* rlim)
5538 {
5539     dVAR;
5540     if (off >= 0) {
5541         while (off-- && s < rlim) {
5542             /* XXX could check well-formedness here */
5543             s += UTF8SKIP(s);
5544         }
5545     }
5546     else {
5547         while (off++ && s > llim) {
5548             s--;
5549             if (UTF8_IS_CONTINUED(*s)) {
5550                 while (s > llim && UTF8_IS_CONTINUATION(*s))
5551                     s--;
5552             }
5553             /* XXX could check well-formedness here */
5554         }
5555     }
5556     return s;
5557 }
5558 #endif
5559
5560 STATIC U8 *
5561 S_reghopmaybe3(U8* s, I32 off, const U8* lim)
5562 {
5563     dVAR;
5564     if (off >= 0) {
5565         while (off-- && s < lim) {
5566             /* XXX could check well-formedness here */
5567             s += UTF8SKIP(s);
5568         }
5569         if (off >= 0)
5570             return NULL;
5571     }
5572     else {
5573         while (off++ && s > lim) {
5574             s--;
5575             if (UTF8_IS_CONTINUED(*s)) {
5576                 while (s > lim && UTF8_IS_CONTINUATION(*s))
5577                     s--;
5578             }
5579             /* XXX could check well-formedness here */
5580         }
5581         if (off <= 0)
5582             return NULL;
5583     }
5584     return s;
5585 }
5586
5587 static void
5588 restore_pos(pTHX_ void *arg)
5589 {
5590     dVAR;
5591     regexp * const rex = (regexp *)arg;
5592     if (PL_reg_eval_set) {
5593         if (PL_reg_oldsaved) {
5594             rex->subbeg = PL_reg_oldsaved;
5595             rex->sublen = PL_reg_oldsavedlen;
5596 #ifdef PERL_OLD_COPY_ON_WRITE
5597             rex->saved_copy = PL_nrs;
5598 #endif
5599             RX_MATCH_COPIED_on(rex);
5600         }
5601         PL_reg_magic->mg_len = PL_reg_oldpos;
5602         PL_reg_eval_set = 0;
5603         PL_curpm = PL_reg_oldcurpm;
5604     }
5605 }
5606
5607 STATIC void
5608 S_to_utf8_substr(pTHX_ register regexp *prog)
5609 {
5610     if (prog->float_substr && !prog->float_utf8) {
5611         SV* const sv = newSVsv(prog->float_substr);
5612         prog->float_utf8 = sv;
5613         sv_utf8_upgrade(sv);
5614         if (SvTAIL(prog->float_substr))
5615             SvTAIL_on(sv);
5616         if (prog->float_substr == prog->check_substr)
5617             prog->check_utf8 = sv;
5618     }
5619     if (prog->anchored_substr && !prog->anchored_utf8) {
5620         SV* const sv = newSVsv(prog->anchored_substr);
5621         prog->anchored_utf8 = sv;
5622         sv_utf8_upgrade(sv);
5623         if (SvTAIL(prog->anchored_substr))
5624             SvTAIL_on(sv);
5625         if (prog->anchored_substr == prog->check_substr)
5626             prog->check_utf8 = sv;
5627     }
5628 }
5629
5630 STATIC void
5631 S_to_byte_substr(pTHX_ register regexp *prog)
5632 {
5633     dVAR;
5634     if (prog->float_utf8 && !prog->float_substr) {
5635         SV* sv = newSVsv(prog->float_utf8);
5636         prog->float_substr = sv;
5637         if (sv_utf8_downgrade(sv, TRUE)) {
5638             if (SvTAIL(prog->float_utf8))
5639                 SvTAIL_on(sv);
5640         } else {
5641             SvREFCNT_dec(sv);
5642             prog->float_substr = sv = &PL_sv_undef;
5643         }
5644         if (prog->float_utf8 == prog->check_utf8)
5645             prog->check_substr = sv;
5646     }
5647     if (prog->anchored_utf8 && !prog->anchored_substr) {
5648         SV* sv = newSVsv(prog->anchored_utf8);
5649         prog->anchored_substr = sv;
5650         if (sv_utf8_downgrade(sv, TRUE)) {
5651             if (SvTAIL(prog->anchored_utf8))
5652                 SvTAIL_on(sv);
5653         } else {
5654             SvREFCNT_dec(sv);
5655             prog->anchored_substr = sv = &PL_sv_undef;
5656         }
5657         if (prog->anchored_utf8 == prog->check_utf8)
5658             prog->check_substr = sv;
5659     }
5660 }
5661
5662 /*
5663  * Local variables:
5664  * c-indentation-style: bsd
5665  * c-basic-offset: 4
5666  * indent-tabs-mode: t
5667  * End:
5668  *
5669  * ex: set ts=8 sts=4 sw=4 noet:
5670  */