regexec.c

   1 /*    regexec.c
   2  */
   3
   4 /*
   5  * "One Ring to rule them all, One Ring to find them..."
   6  */
   7
   8 /* This file contains functions for executing a regular expression.  See
   9  * also regcomp.c which funnily enough, contains functions for compiling
  10  * a regular expression.
  11  *
  12  * This file is also copied at build time to ext/re/re_exec.c, where
  13  * it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
  14  * This causes the main functions to be compiled under new names and with
  15  * debugging support added, which makes "use re 'debug'" work.
  16  */
  17
  18 /* NOTE: this is derived from Henry Spencer's regexp code, and should not
  19  * confused with the original package (see point 3 below).  Thanks, Henry!
  20  */
  21
  22 /* Additional note: this code is very heavily munged from Henry's version
  23  * in places.  In some spots I've traded clarity for efficiency, so don't
  24  * blame Henry for some of the lack of readability.
  25  */
  26
  27 /* The names of the functions have been changed from regcomp and
  28  * regexec to  pregcomp and pregexec in order to avoid conflicts
  29  * with the POSIX routines of the same names.
  30 */
  31
  32 #ifdef PERL_EXT_RE_BUILD
  33 #include "re_top.h"
  34 #endif
  35
  36 /*
  37  * pregcomp and pregexec -- regsub and regerror are not used in perl
  38  *
  39  *      Copyright (c) 1986 by University of Toronto.
  40  *      Written by Henry Spencer.  Not derived from licensed software.
  41  *
  42  *      Permission is granted to anyone to use this software for any
  43  *      purpose on any computer system, and to redistribute it freely,
  44  *      subject to the following restrictions:
  45  *
  46  *      1. The author is not responsible for the consequences of use of
  47  *              this software, no matter how awful, even if they arise
  48  *              from defects in it.
  49  *
  50  *      2. The origin of this software must not be misrepresented, either
  51  *              by explicit claim or by omission.
  52  *
  53  *      3. Altered versions must be plainly marked as such, and must not
  54  *              be misrepresented as being the original software.
  55  *
  56  ****    Alterations to Henry's code are...
  57  ****
  58  ****    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
  59  ****    2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 by Larry Wall and others
  60  ****
  61  ****    You may distribute under the terms of either the GNU General Public
  62  ****    License or the Artistic License, as specified in the README file.
  63  *
  64  * Beware that some of this code is subtly aware of the way operator
  65  * precedence is structured in regular expressions.  Serious changes in
  66  * regular-expression syntax might require a total rethink.
  67  */
  68 #include "EXTERN.h"
  69 #define PERL_IN_REGEXEC_C
  70 #include "perl.h"
  71
  72 #ifdef PERL_IN_XSUB_RE
  73 #  include "re_comp.h"
  74 #else
  75 #  include "regcomp.h"
  76 #endif
  77
  78 #define RF_tainted      1               /* tainted information used? */
  79 #define RF_warned       2               /* warned about big count? */
  80
  81 #define RF_utf8         8               /* Pattern contains multibyte chars? */
  82
  83 #define UTF ((PL_reg_flags & RF_utf8) != 0)
  84
  85 #define RS_init         1               /* eval environment created */
  86 #define RS_set          2               /* replsv value is set */
  87
  88 #ifndef STATIC
  89 #define STATIC  static
  90 #endif
  91
  92 #define REGINCLASS(prog,p,c)  (ANYOF_FLAGS(p) ? reginclass(prog,p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
  93
  94 /*
  95  * Forwards.
  96  */
  97
  98 #define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
  99 #define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
 100
 101 #define HOPc(pos,off) \
 102         (char *)(PL_reg_match_utf8 \
 103             ? reghop3((U8*)pos, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr)) \
 104             : (U8*)(pos + off))
 105 #define HOPBACKc(pos, off) \
 106         (char*)(PL_reg_match_utf8\
 107             ? reghopmaybe3((U8*)pos, -off, (U8*)PL_bostr) \
 108             : (pos - off >= PL_bostr)           \
 109                 ? (U8*)pos - off                \
 110                 : NULL)
 111
 112 #define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)(pos), off, (U8*)(lim)) : (U8*)(pos + off))
 113 #define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
 114
 115 #define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
 116     if (!CAT2(PL_utf8_,class)) { bool ok; ENTER; save_re_context(); ok=CAT2(is_utf8_,class)((const U8*)str); assert(ok); LEAVE; } } STMT_END
 117 #define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS(alnum,"a")
 118 #define LOAD_UTF8_CHARCLASS_DIGIT() LOAD_UTF8_CHARCLASS(digit,"0")
 119 #define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
 120 #define LOAD_UTF8_CHARCLASS_MARK()  LOAD_UTF8_CHARCLASS(mark, "\xcd\x86")
 121
 122 /* TODO: Combine JUMPABLE and HAS_TEXT to cache OP(rn) */
 123
 124 /* for use after a quantifier and before an EXACT-like node -- japhy */
 125 /* it would be nice to rework regcomp.sym to generate this stuff. sigh */
 126 #define JUMPABLE(rn) (      \
 127     OP(rn) == OPEN ||       \
 128     (OP(rn) == CLOSE && (!cur_eval || cur_eval->u.eval.close_paren != ARG(rn))) || \
 129     OP(rn) == EVAL ||   \
 130     OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
 131     OP(rn) == PLUS || OP(rn) == MINMOD || \
 132     OP(rn) == KEEPS || (PL_regkind[OP(rn)] == VERB) || \
 133     (PL_regkind[OP(rn)] == CURLY && ARG1(rn) > 0) \
 134 )
 135 #define IS_EXACT(rn) (PL_regkind[OP(rn)] == EXACT)
 136
 137 #define HAS_TEXT(rn) ( IS_EXACT(rn) || PL_regkind[OP(rn)] == REF )
 138
 139 #if 0
 140 /* Currently these are only used when PL_regkind[OP(rn)] == EXACT so
 141    we don't need this definition. */
 142 #define IS_TEXT(rn)   ( OP(rn)==EXACT   || OP(rn)==REF   || OP(rn)==NREF   )
 143 #define IS_TEXTF(rn)  ( OP(rn)==EXACTF  || OP(rn)==REFF  || OP(rn)==NREFF  )
 144 #define IS_TEXTFL(rn) ( OP(rn)==EXACTFL || OP(rn)==REFFL || OP(rn)==NREFFL )
 145
 146 #else
 147 /* ... so we use this as its faster. */
 148 #define IS_TEXT(rn)   ( OP(rn)==EXACT   )
 149 #define IS_TEXTF(rn)  ( OP(rn)==EXACTF  )
 150 #define IS_TEXTFL(rn) ( OP(rn)==EXACTFL )
 151
 152 #endif
 153
 154 /*
 155   Search for mandatory following text node; for lookahead, the text must
 156   follow but for lookbehind (rn->flags != 0) we skip to the next step.
 157 */
 158 #define FIND_NEXT_IMPT(rn) STMT_START { \
 159     while (JUMPABLE(rn)) { \
 160         const OPCODE type = OP(rn); \
 161         if (type == SUSPEND || PL_regkind[type] == CURLY) \
 162             rn = NEXTOPER(NEXTOPER(rn)); \
 163         else if (type == PLUS) \
 164             rn = NEXTOPER(rn); \
 165         else if (type == IFMATCH) \
 166             rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
 167         else rn += NEXT_OFF(rn); \
 168     } \
 169 } STMT_END
 170
 171
 172 static void restore_pos(pTHX_ void *arg);
 173
 174 STATIC CHECKPOINT
 175 S_regcppush(pTHX_ I32 parenfloor)
 176 {
 177     dVAR;
 178     const int retval = PL_savestack_ix;
 179 #define REGCP_PAREN_ELEMS 4
 180     const int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
 181     int p;
 182     GET_RE_DEBUG_FLAGS_DECL;
 183
 184     if (paren_elems_to_push < 0)
 185         Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
 186
 187 #define REGCP_OTHER_ELEMS 7
 188     SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
 189
 190     for (p = PL_regsize; p > parenfloor; p--) {
 191 /* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
 192         SSPUSHINT(PL_regoffs[p].end);
 193         SSPUSHINT(PL_regoffs[p].start);
 194         SSPUSHPTR(PL_reg_start_tmp[p]);
 195         SSPUSHINT(p);
 196         DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log,
 197           "     saving \\%"UVuf" %"IVdf"(%"IVdf")..%"IVdf"\n",
 198                       (UV)p, (IV)PL_regoffs[p].start,
 199                       (IV)(PL_reg_start_tmp[p] - PL_bostr),
 200                       (IV)PL_regoffs[p].end
 201         ));
 202     }
 203 /* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
 204     SSPUSHPTR(PL_regoffs);
 205     SSPUSHINT(PL_regsize);
 206     SSPUSHINT(*PL_reglastparen);
 207     SSPUSHINT(*PL_reglastcloseparen);
 208     SSPUSHPTR(PL_reginput);
 209 #define REGCP_FRAME_ELEMS 2
 210 /* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
 211  * are needed for the regexp context stack bookkeeping. */
 212     SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
 213     SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
 214
 215     return retval;
 216 }
 217
 218 /* These are needed since we do not localize EVAL nodes: */
 219 #define REGCP_SET(cp)                                           \
 220     DEBUG_STATE_r(                                              \
 221             PerlIO_printf(Perl_debug_log,                       \
 222                 "  Setting an EVAL scope, savestack=%"IVdf"\n", \
 223                 (IV)PL_savestack_ix));                          \
 224     cp = PL_savestack_ix
 225
 226 #define REGCP_UNWIND(cp)                                        \
 227     DEBUG_STATE_r(                                              \
 228         if (cp != PL_savestack_ix)                              \
 229             PerlIO_printf(Perl_debug_log,                       \
 230                 "  Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
 231                 (IV)(cp), (IV)PL_savestack_ix));                \
 232     regcpblow(cp)
 233
 234 STATIC char *
 235 S_regcppop(pTHX_ const regexp *rex)
 236 {
 237     dVAR;
 238     U32 i;
 239     char *input;
 240
 241     GET_RE_DEBUG_FLAGS_DECL;
 242
 243     /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
 244     i = SSPOPINT;
 245     assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
 246     i = SSPOPINT; /* Parentheses elements to pop. */
 247     input = (char *) SSPOPPTR;
 248     *PL_reglastcloseparen = SSPOPINT;
 249     *PL_reglastparen = SSPOPINT;
 250     PL_regsize = SSPOPINT;
 251     PL_regoffs=(regexp_paren_pair *) SSPOPPTR;
 252
 253
 254     /* Now restore the parentheses context. */
 255     for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
 256          i > 0; i -= REGCP_PAREN_ELEMS) {
 257         I32 tmps;
 258         U32 paren = (U32)SSPOPINT;
 259         PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
 260         PL_regoffs[paren].start = SSPOPINT;
 261         tmps = SSPOPINT;
 262         if (paren <= *PL_reglastparen)
 263             PL_regoffs[paren].end = tmps;
 264         DEBUG_BUFFERS_r(
 265             PerlIO_printf(Perl_debug_log,
 266                           "     restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
 267                           (UV)paren, (IV)PL_regoffs[paren].start,
 268                           (IV)(PL_reg_start_tmp[paren] - PL_bostr),
 269                           (IV)PL_regoffs[paren].end,
 270                           (paren > *PL_reglastparen ? "(no)" : ""));
 271         );
 272     }
 273     DEBUG_BUFFERS_r(
 274         if (*PL_reglastparen + 1 <= rex->nparens) {
 275             PerlIO_printf(Perl_debug_log,
 276                           "     restoring \\%"IVdf"..\\%"IVdf" to undef\n",
 277                           (IV)(*PL_reglastparen + 1), (IV)rex->nparens);
 278         }
 279     );
 280 #if 1
 281     /* It would seem that the similar code in regtry()
 282      * already takes care of this, and in fact it is in
 283      * a better location to since this code can #if 0-ed out
 284      * but the code in regtry() is needed or otherwise tests
 285      * requiring null fields (pat.t#187 and split.t#{13,14}
 286      * (as of patchlevel 7877)  will fail.  Then again,
 287      * this code seems to be necessary or otherwise
 288      * this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
 289      * --jhi updated by dapm */
 290     for (i = *PL_reglastparen + 1; i <= rex->nparens; i++) {
 291         if (i > PL_regsize)
 292             PL_regoffs[i].start = -1;
 293         PL_regoffs[i].end = -1;
 294     }
 295 #endif
 296     return input;
 297 }
 298
 299 #define regcpblow(cp) LEAVE_SCOPE(cp)   /* Ignores regcppush()ed data. */
 300
 301 /*
 302  * pregexec and friends
 303  */
 304
 305 #ifndef PERL_IN_XSUB_RE
 306 /*
 307  - pregexec - match a regexp against a string
 308  */
 309 I32
 310 Perl_pregexec(pTHX_ REGEXP * const prog, char* stringarg, register char *strend,
 311          char *strbeg, I32 minend, SV *screamer, U32 nosave)
 312 /* strend: pointer to null at end of string */
 313 /* strbeg: real beginning of string */
 314 /* minend: end of match must be >=minend after stringarg. */
 315 /* nosave: For optimizations. */
 316 {
 317     return
 318         regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
 319                       nosave ? 0 : REXEC_COPY_STR);
 320 }
 321 #endif
 322
 323 /*
 324  * Need to implement the following flags for reg_anch:
 325  *
 326  * USE_INTUIT_NOML              - Useful to call re_intuit_start() first
 327  * USE_INTUIT_ML
 328  * INTUIT_AUTORITATIVE_NOML     - Can trust a positive answer
 329  * INTUIT_AUTORITATIVE_ML
 330  * INTUIT_ONCE_NOML             - Intuit can match in one location only.
 331  * INTUIT_ONCE_ML
 332  *
 333  * Another flag for this function: SECOND_TIME (so that float substrs
 334  * with giant delta may be not rechecked).
 335  */
 336
 337 /* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
 338
 339 /* If SCREAM, then SvPVX_const(sv) should be compatible with strpos and strend.
 340    Otherwise, only SvCUR(sv) is used to get strbeg. */
 341
 342 /* XXXX We assume that strpos is strbeg unless sv. */
 343
 344 /* XXXX Some places assume that there is a fixed substring.
 345         An update may be needed if optimizer marks as "INTUITable"
 346         RExen without fixed substrings.  Similarly, it is assumed that
 347         lengths of all the strings are no more than minlen, thus they
 348         cannot come from lookahead.
 349         (Or minlen should take into account lookahead.)
 350   NOTE: Some of this comment is not correct. minlen does now take account
 351   of lookahead/behind. Further research is required. -- demerphq
 352
 353 */
 354
 355 /* A failure to find a constant substring means that there is no need to make
 356    an expensive call to REx engine, thus we celebrate a failure.  Similarly,
 357    finding a substring too deep into the string means that less calls to
 358    regtry() should be needed.
 359
 360    REx compiler's optimizer found 4 possible hints:
 361         a) Anchored substring;
 362         b) Fixed substring;
 363         c) Whether we are anchored (beginning-of-line or \G);
 364         d) First node (of those at offset 0) which may distingush positions;
 365    We use a)b)d) and multiline-part of c), and try to find a position in the
 366    string which does not contradict any of them.
 367  */
 368
 369 /* Most of decisions we do here should have been done at compile time.
 370    The nodes of the REx which we used for the search should have been
 371    deleted from the finite automaton. */
 372
 373 char *
 374 Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV *sv, char *strpos,
 375                      char *strend, const U32 flags, re_scream_pos_data *data)
 376 {
 377     dVAR;
 378     struct regexp *const prog = (struct regexp *)SvANY(rx);
 379     register I32 start_shift = 0;
 380     /* Should be nonnegative! */
 381     register I32 end_shift   = 0;
 382     register char *s;
 383     register SV *check;
 384     char *strbeg;
 385     char *t;
 386     const bool do_utf8 = (sv && SvUTF8(sv)) ? 1 : 0; /* if no sv we have to assume bytes */
 387     I32 ml_anch;
 388     register char *other_last = NULL;   /* other substr checked before this */
 389     char *check_at = NULL;              /* check substr found at this pos */
 390     const I32 multiline = prog->extflags & RXf_PMf_MULTILINE;
 391     RXi_GET_DECL(prog,progi);
 392 #ifdef DEBUGGING
 393     const char * const i_strpos = strpos;
 394 #endif
 395
 396     GET_RE_DEBUG_FLAGS_DECL;
 397
 398     RX_MATCH_UTF8_set(rx,do_utf8);
 399
 400     if (RX_UTF8(rx)) {
 401         PL_reg_flags |= RF_utf8;
 402     }
 403     DEBUG_EXECUTE_r(
 404         debug_start_match(rx, do_utf8, strpos, strend,
 405             sv ? "Guessing start of match in sv for"
 406                : "Guessing start of match in string for");
 407               );
 408
 409     /* CHR_DIST() would be more correct here but it makes things slow. */
 410     if (prog->minlen > strend - strpos) {
 411         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 412                               "String too short... [re_intuit_start]\n"));
 413         goto fail;
 414     }
 415
 416     strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
 417     PL_regeol = strend;
 418     if (do_utf8) {
 419         if (!prog->check_utf8 && prog->check_substr)
 420             to_utf8_substr(prog);
 421         check = prog->check_utf8;
 422     } else {
 423         if (!prog->check_substr && prog->check_utf8)
 424             to_byte_substr(prog);
 425         check = prog->check_substr;
 426     }
 427     if (check == &PL_sv_undef) {
 428         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 429                 "Non-utf8 string cannot match utf8 check string\n"));
 430         goto fail;
 431     }
 432     if (prog->extflags & RXf_ANCH) {    /* Match at beg-of-str or after \n */
 433         ml_anch = !( (prog->extflags & RXf_ANCH_SINGLE)
 434                      || ( (prog->extflags & RXf_ANCH_BOL)
 435                           && !multiline ) );    /* Check after \n? */
 436
 437         if (!ml_anch) {
 438           if ( !(prog->extflags & RXf_ANCH_GPOS) /* Checked by the caller */
 439                 && !(prog->intflags & PREGf_IMPLICIT) /* not a real BOL */
 440                /* SvCUR is not set on references: SvRV and SvPVX_const overlap */
 441                && sv && !SvROK(sv)
 442                && (strpos != strbeg)) {
 443               DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
 444               goto fail;
 445           }
 446           if (prog->check_offset_min == prog->check_offset_max &&
 447               !(prog->extflags & RXf_CANY_SEEN)) {
 448             /* Substring at constant offset from beg-of-str... */
 449             I32 slen;
 450
 451             s = HOP3c(strpos, prog->check_offset_min, strend);
 452
 453             if (SvTAIL(check)) {
 454                 slen = SvCUR(check);    /* >= 1 */
 455
 456                 if ( strend - s > slen || strend - s < slen - 1
 457                      || (strend - s == slen && strend[-1] != '\n')) {
 458                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
 459                     goto fail_finish;
 460                 }
 461                 /* Now should match s[0..slen-2] */
 462                 slen--;
 463                 if (slen && (*SvPVX_const(check) != *s
 464                              || (slen > 1
 465                                  && memNE(SvPVX_const(check), s, slen)))) {
 466                   report_neq:
 467                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
 468                     goto fail_finish;
 469                 }
 470             }
 471             else if (*SvPVX_const(check) != *s
 472                      || ((slen = SvCUR(check)) > 1
 473                          && memNE(SvPVX_const(check), s, slen)))
 474                 goto report_neq;
 475             check_at = s;
 476             goto success_at_start;
 477           }
 478         }
 479         /* Match is anchored, but substr is not anchored wrt beg-of-str. */
 480         s = strpos;
 481         start_shift = prog->check_offset_min; /* okay to underestimate on CC */
 482         end_shift = prog->check_end_shift;
 483
 484         if (!ml_anch) {
 485             const I32 end = prog->check_offset_max + CHR_SVLEN(check)
 486                                          - (SvTAIL(check) != 0);
 487             const I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
 488
 489             if (end_shift < eshift)
 490                 end_shift = eshift;
 491         }
 492     }
 493     else {                              /* Can match at random position */
 494         ml_anch = 0;
 495         s = strpos;
 496         start_shift = prog->check_offset_min;  /* okay to underestimate on CC */
 497         end_shift = prog->check_end_shift;
 498
 499         /* end shift should be non negative here */
 500     }
 501
 502 #ifdef QDEBUGGING       /* 7/99: reports of failure (with the older version) */
 503     if (end_shift < 0)
 504         Perl_croak(aTHX_ "panic: end_shift: %"IVdf" pattern:\n%s\n ",
 505                    (IV)end_shift, RX_PRECOMP(prog));
 506 #endif
 507
 508   restart:
 509     /* Find a possible match in the region s..strend by looking for
 510        the "check" substring in the region corrected by start/end_shift. */
 511
 512     {
 513         I32 srch_start_shift = start_shift;
 514         I32 srch_end_shift = end_shift;
 515         if (srch_start_shift < 0 && strbeg - s > srch_start_shift) {
 516             srch_end_shift -= ((strbeg - s) - srch_start_shift);
 517             srch_start_shift = strbeg - s;
 518         }
 519     DEBUG_OPTIMISE_MORE_r({
 520         PerlIO_printf(Perl_debug_log, "Check offset min: %"IVdf" Start shift: %"IVdf" End shift %"IVdf" Real End Shift: %"IVdf"\n",
 521             (IV)prog->check_offset_min,
 522             (IV)srch_start_shift,
 523             (IV)srch_end_shift,
 524             (IV)prog->check_end_shift);
 525     });
 526
 527     if (flags & REXEC_SCREAM) {
 528         I32 p = -1;                     /* Internal iterator of scream. */
 529         I32 * const pp = data ? data->scream_pos : &p;
 530
 531         if (PL_screamfirst[BmRARE(check)] >= 0
 532             || ( BmRARE(check) == '\n'
 533                  && (BmPREVIOUS(check) == SvCUR(check) - 1)
 534                  && SvTAIL(check) ))
 535             s = screaminstr(sv, check,
 536                             srch_start_shift + (s - strbeg), srch_end_shift, pp, 0);
 537         else
 538             goto fail_finish;
 539         /* we may be pointing at the wrong string */
 540         if (s && RXp_MATCH_COPIED(prog))
 541             s = strbeg + (s - SvPVX_const(sv));
 542         if (data)
 543             *data->scream_olds = s;
 544     }
 545     else {
 546         U8* start_point;
 547         U8* end_point;
 548         if (prog->extflags & RXf_CANY_SEEN) {
 549             start_point= (U8*)(s + srch_start_shift);
 550             end_point= (U8*)(strend - srch_end_shift);
 551         } else {
 552             start_point= HOP3(s, srch_start_shift, srch_start_shift < 0 ? strbeg : strend);
 553             end_point= HOP3(strend, -srch_end_shift, strbeg);
 554         }
 555         DEBUG_OPTIMISE_MORE_r({
 556             PerlIO_printf(Perl_debug_log, "fbm_instr len=%d str=<%.*s>\n",
 557                 (int)(end_point - start_point),
 558                 (int)(end_point - start_point) > 20 ? 20 : (int)(end_point - start_point),
 559                 start_point);
 560         });
 561
 562         s = fbm_instr( start_point, end_point,
 563                       check, multiline ? FBMrf_MULTILINE : 0);
 564     }
 565     }
 566     /* Update the count-of-usability, remove useless subpatterns,
 567         unshift s.  */
 568
 569     DEBUG_EXECUTE_r({
 570         RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
 571             SvPVX_const(check), RE_SV_DUMPLEN(check), 30);
 572         PerlIO_printf(Perl_debug_log, "%s %s substr %s%s%s",
 573                           (s ? "Found" : "Did not find"),
 574             (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)
 575                 ? "anchored" : "floating"),
 576             quoted,
 577             RE_SV_TAIL(check),
 578             (s ? " at offset " : "...\n") );
 579     });
 580
 581     if (!s)
 582         goto fail_finish;
 583     /* Finish the diagnostic message */
 584     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
 585
 586     /* XXX dmq: first branch is for positive lookbehind...
 587        Our check string is offset from the beginning of the pattern.
 588        So we need to do any stclass tests offset forward from that
 589        point. I think. :-(
 590      */
 591
 592
 593
 594     check_at=s;
 595
 596
 597     /* Got a candidate.  Check MBOL anchoring, and the *other* substr.
 598        Start with the other substr.
 599        XXXX no SCREAM optimization yet - and a very coarse implementation
 600        XXXX /ttx+/ results in anchored="ttx", floating="x".  floating will
 601                 *always* match.  Probably should be marked during compile...
 602        Probably it is right to do no SCREAM here...
 603      */
 604
 605     if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8)
 606                 : (prog->float_substr && prog->anchored_substr))
 607     {
 608         /* Take into account the "other" substring. */
 609         /* XXXX May be hopelessly wrong for UTF... */
 610         if (!other_last)
 611             other_last = strpos;
 612         if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
 613           do_other_anchored:
 614             {
 615                 char * const last = HOP3c(s, -start_shift, strbeg);
 616                 char *last1, *last2;
 617                 char * const saved_s = s;
 618                 SV* must;
 619
 620                 t = s - prog->check_offset_max;
 621                 if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
 622                     && (!do_utf8
 623                         || ((t = (char*)reghopmaybe3((U8*)s, -(prog->check_offset_max), (U8*)strpos))
 624                             && t > strpos)))
 625                     NOOP;
 626                 else
 627                     t = strpos;
 628                 t = HOP3c(t, prog->anchored_offset, strend);
 629                 if (t < other_last)     /* These positions already checked */
 630                     t = other_last;
 631                 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
 632                 if (last < last1)
 633                     last1 = last;
 634                 /* XXXX It is not documented what units *_offsets are in.
 635                    We assume bytes, but this is clearly wrong.
 636                    Meaning this code needs to be carefully reviewed for errors.
 637                    dmq.
 638                   */
 639
 640                 /* On end-of-str: see comment below. */
 641                 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
 642                 if (must == &PL_sv_undef) {
 643                     s = (char*)NULL;
 644                     DEBUG_r(must = prog->anchored_utf8);        /* for debug */
 645                 }
 646                 else
 647                     s = fbm_instr(
 648                         (unsigned char*)t,
 649                         HOP3(HOP3(last1, prog->anchored_offset, strend)
 650                                 + SvCUR(must), -(SvTAIL(must)!=0), strbeg),
 651                         must,
 652                         multiline ? FBMrf_MULTILINE : 0
 653                     );
 654                 DEBUG_EXECUTE_r({
 655                     RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
 656                         SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
 657                     PerlIO_printf(Perl_debug_log, "%s anchored substr %s%s",
 658                         (s ? "Found" : "Contradicts"),
 659                         quoted, RE_SV_TAIL(must));
 660                 });
 661
 662
 663                 if (!s) {
 664                     if (last1 >= last2) {
 665                         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 666                                                 ", giving up...\n"));
 667                         goto fail_finish;
 668                     }
 669                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 670                         ", trying floating at offset %ld...\n",
 671                         (long)(HOP3c(saved_s, 1, strend) - i_strpos)));
 672                     other_last = HOP3c(last1, prog->anchored_offset+1, strend);
 673                     s = HOP3c(last, 1, strend);
 674                     goto restart;
 675                 }
 676                 else {
 677                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
 678                           (long)(s - i_strpos)));
 679                     t = HOP3c(s, -prog->anchored_offset, strbeg);
 680                     other_last = HOP3c(s, 1, strend);
 681                     s = saved_s;
 682                     if (t == strpos)
 683                         goto try_at_start;
 684                     goto try_at_offset;
 685                 }
 686             }
 687         }
 688         else {          /* Take into account the floating substring. */
 689             char *last, *last1;
 690             char * const saved_s = s;
 691             SV* must;
 692
 693             t = HOP3c(s, -start_shift, strbeg);
 694             last1 = last =
 695                 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
 696             if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
 697                 last = HOP3c(t, prog->float_max_offset, strend);
 698             s = HOP3c(t, prog->float_min_offset, strend);
 699             if (s < other_last)
 700                 s = other_last;
 701  /* XXXX It is not documented what units *_offsets are in.  Assume bytes.  */
 702             must = do_utf8 ? prog->float_utf8 : prog->float_substr;
 703             /* fbm_instr() takes into account exact value of end-of-str
 704                if the check is SvTAIL(ed).  Since false positives are OK,
 705                and end-of-str is not later than strend we are OK. */
 706             if (must == &PL_sv_undef) {
 707                 s = (char*)NULL;
 708                 DEBUG_r(must = prog->float_utf8);       /* for debug message */
 709             }
 710             else
 711                 s = fbm_instr((unsigned char*)s,
 712                               (unsigned char*)last + SvCUR(must)
 713                                   - (SvTAIL(must)!=0),
 714                               must, multiline ? FBMrf_MULTILINE : 0);
 715             DEBUG_EXECUTE_r({
 716                 RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
 717                     SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
 718                 PerlIO_printf(Perl_debug_log, "%s floating substr %s%s",
 719                     (s ? "Found" : "Contradicts"),
 720                     quoted, RE_SV_TAIL(must));
 721             });
 722             if (!s) {
 723                 if (last1 == last) {
 724                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 725                                             ", giving up...\n"));
 726                     goto fail_finish;
 727                 }
 728                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 729                     ", trying anchored starting at offset %ld...\n",
 730                     (long)(saved_s + 1 - i_strpos)));
 731                 other_last = last;
 732                 s = HOP3c(t, 1, strend);
 733                 goto restart;
 734             }
 735             else {
 736                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
 737                       (long)(s - i_strpos)));
 738                 other_last = s; /* Fix this later. --Hugo */
 739                 s = saved_s;
 740                 if (t == strpos)
 741                     goto try_at_start;
 742                 goto try_at_offset;
 743             }
 744         }
 745     }
 746
 747
 748     t= (char*)HOP3( s, -prog->check_offset_max, (prog->check_offset_max<0) ? strend : strpos);
 749
 750     DEBUG_OPTIMISE_MORE_r(
 751         PerlIO_printf(Perl_debug_log,
 752             "Check offset min:%"IVdf" max:%"IVdf" S:%"IVdf" t:%"IVdf" D:%"IVdf" end:%"IVdf"\n",
 753             (IV)prog->check_offset_min,
 754             (IV)prog->check_offset_max,
 755             (IV)(s-strpos),
 756             (IV)(t-strpos),
 757             (IV)(t-s),
 758             (IV)(strend-strpos)
 759         )
 760     );
 761
 762     if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
 763         && (!do_utf8
 764             || ((t = (char*)reghopmaybe3((U8*)s, -prog->check_offset_max, (U8*) ((prog->check_offset_max<0) ? strend : strpos)))
 765                  && t > strpos)))
 766     {
 767         /* Fixed substring is found far enough so that the match
 768            cannot start at strpos. */
 769       try_at_offset:
 770         if (ml_anch && t[-1] != '\n') {
 771             /* Eventually fbm_*() should handle this, but often
 772                anchored_offset is not 0, so this check will not be wasted. */
 773             /* XXXX In the code below we prefer to look for "^" even in
 774                presence of anchored substrings.  And we search even
 775                beyond the found float position.  These pessimizations
 776                are historical artefacts only.  */
 777           find_anchor:
 778             while (t < strend - prog->minlen) {
 779                 if (*t == '\n') {
 780                     if (t < check_at - prog->check_offset_min) {
 781                         if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
 782                             /* Since we moved from the found position,
 783                                we definitely contradict the found anchored
 784                                substr.  Due to the above check we do not
 785                                contradict "check" substr.
 786                                Thus we can arrive here only if check substr
 787                                is float.  Redo checking for "other"=="fixed".
 788                              */
 789                             strpos = t + 1;
 790                             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
 791                                 PL_colors[0], PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
 792                             goto do_other_anchored;
 793                         }
 794                         /* We don't contradict the found floating substring. */
 795                         /* XXXX Why not check for STCLASS? */
 796                         s = t + 1;
 797                         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
 798                             PL_colors[0], PL_colors[1], (long)(s - i_strpos)));
 799                         goto set_useful;
 800                     }
 801                     /* Position contradicts check-string */
 802                     /* XXXX probably better to look for check-string
 803                        than for "\n", so one should lower the limit for t? */
 804                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
 805                         PL_colors[0], PL_colors[1], (long)(t + 1 - i_strpos)));
 806                     other_last = strpos = s = t + 1;
 807                     goto restart;
 808                 }
 809                 t++;
 810             }
 811             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
 812                         PL_colors[0], PL_colors[1]));
 813             goto fail_finish;
 814         }
 815         else {
 816             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
 817                         PL_colors[0], PL_colors[1]));
 818         }
 819         s = t;
 820       set_useful:
 821         ++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr);    /* hooray/5 */
 822     }
 823     else {
 824         /* The found string does not prohibit matching at strpos,
 825            - no optimization of calling REx engine can be performed,
 826            unless it was an MBOL and we are not after MBOL,
 827            or a future STCLASS check will fail this. */
 828       try_at_start:
 829         /* Even in this situation we may use MBOL flag if strpos is offset
 830            wrt the start of the string. */
 831         if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
 832             && (strpos != strbeg) && strpos[-1] != '\n'
 833             /* May be due to an implicit anchor of m{.*foo}  */
 834             && !(prog->intflags & PREGf_IMPLICIT))
 835         {
 836             t = strpos;
 837             goto find_anchor;
 838         }
 839         DEBUG_EXECUTE_r( if (ml_anch)
 840             PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
 841                           (long)(strpos - i_strpos), PL_colors[0], PL_colors[1]);
 842         );
 843       success_at_start:
 844         if (!(prog->intflags & PREGf_NAUGHTY)   /* XXXX If strpos moved? */
 845             && (do_utf8 ? (
 846                 prog->check_utf8                /* Could be deleted already */
 847                 && --BmUSEFUL(prog->check_utf8) < 0
 848                 && (prog->check_utf8 == prog->float_utf8)
 849             ) : (
 850                 prog->check_substr              /* Could be deleted already */
 851                 && --BmUSEFUL(prog->check_substr) < 0
 852                 && (prog->check_substr == prog->float_substr)
 853             )))
 854         {
 855             /* If flags & SOMETHING - do not do it many times on the same match */
 856             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
 857             SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
 858             if (do_utf8 ? prog->check_substr : prog->check_utf8)
 859                 SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
 860             prog->check_substr = prog->check_utf8 = NULL;       /* disable */
 861             prog->float_substr = prog->float_utf8 = NULL;       /* clear */
 862             check = NULL;                       /* abort */
 863             s = strpos;
 864             /* XXXX This is a remnant of the old implementation.  It
 865                     looks wasteful, since now INTUIT can use many
 866                     other heuristics. */
 867             prog->extflags &= ~RXf_USE_INTUIT;
 868         }
 869         else
 870             s = strpos;
 871     }
 872
 873     /* Last resort... */
 874     /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
 875     /* trie stclasses are too expensive to use here, we are better off to
 876        leave it to regmatch itself */
 877     if (progi->regstclass && PL_regkind[OP(progi->regstclass)]!=TRIE) {
 878         /* minlen == 0 is possible if regstclass is \b or \B,
 879            and the fixed substr is ''$.
 880            Since minlen is already taken into account, s+1 is before strend;
 881            accidentally, minlen >= 1 guaranties no false positives at s + 1
 882            even for \b or \B.  But (minlen? 1 : 0) below assumes that
 883            regstclass does not come from lookahead...  */
 884         /* If regstclass takes bytelength more than 1: If charlength==1, OK.
 885            This leaves EXACTF only, which is dealt with in find_byclass().  */
 886         const U8* const str = (U8*)STRING(progi->regstclass);
 887         const int cl_l = (PL_regkind[OP(progi->regstclass)] == EXACT
 888                     ? CHR_DIST(str+STR_LEN(progi->regstclass), str)
 889                     : 1);
 890         char * endpos;
 891         if (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
 892             endpos= HOP3c(s, (prog->minlen ? cl_l : 0), strend);
 893         else if (prog->float_substr || prog->float_utf8)
 894             endpos= HOP3c(HOP3c(check_at, -start_shift, strbeg), cl_l, strend);
 895         else
 896             endpos= strend;
 897
 898         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "start_shift: %"IVdf" check_at: %"IVdf" s: %"IVdf" endpos: %"IVdf"\n",
 899                                       (IV)start_shift, (IV)(check_at - strbeg), (IV)(s - strbeg), (IV)(endpos - strbeg)));
 900
 901         t = s;
 902         s = find_byclass(prog, progi->regstclass, s, endpos, NULL);
 903         if (!s) {
 904 #ifdef DEBUGGING
 905             const char *what = NULL;
 906 #endif
 907             if (endpos == strend) {
 908                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 909                                 "Could not match STCLASS...\n") );
 910                 goto fail;
 911             }
 912             DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 913                                    "This position contradicts STCLASS...\n") );
 914             if ((prog->extflags & RXf_ANCH) && !ml_anch)
 915                 goto fail;
 916             /* Contradict one of substrings */
 917             if (prog->anchored_substr || prog->anchored_utf8) {
 918                 if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
 919                     DEBUG_EXECUTE_r( what = "anchored" );
 920                   hop_and_restart:
 921                     s = HOP3c(t, 1, strend);
 922                     if (s + start_shift + end_shift > strend) {
 923                         /* XXXX Should be taken into account earlier? */
 924                         DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 925                                                "Could not match STCLASS...\n") );
 926                         goto fail;
 927                     }
 928                     if (!check)
 929                         goto giveup;
 930                     DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 931                                 "Looking for %s substr starting at offset %ld...\n",
 932                                  what, (long)(s + start_shift - i_strpos)) );
 933                     goto restart;
 934                 }
 935                 /* Have both, check_string is floating */
 936                 if (t + start_shift >= check_at) /* Contradicts floating=check */
 937                     goto retry_floating_check;
 938                 /* Recheck anchored substring, but not floating... */
 939                 s = check_at;
 940                 if (!check)
 941                     goto giveup;
 942                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 943                           "Looking for anchored substr starting at offset %ld...\n",
 944                           (long)(other_last - i_strpos)) );
 945                 goto do_other_anchored;
 946             }
 947             /* Another way we could have checked stclass at the
 948                current position only: */
 949             if (ml_anch) {
 950                 s = t = t + 1;
 951                 if (!check)
 952                     goto giveup;
 953                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 954                           "Looking for /%s^%s/m starting at offset %ld...\n",
 955                           PL_colors[0], PL_colors[1], (long)(t - i_strpos)) );
 956                 goto try_at_offset;
 957             }
 958             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))     /* Could have been deleted */
 959                 goto fail;
 960             /* Check is floating subtring. */
 961           retry_floating_check:
 962             t = check_at - start_shift;
 963             DEBUG_EXECUTE_r( what = "floating" );
 964             goto hop_and_restart;
 965         }
 966         if (t != s) {
 967             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 968                         "By STCLASS: moving %ld --> %ld\n",
 969                                   (long)(t - i_strpos), (long)(s - i_strpos))
 970                    );
 971         }
 972         else {
 973             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 974                                   "Does not contradict STCLASS...\n");
 975                    );
 976         }
 977     }
 978   giveup:
 979     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
 980                           PL_colors[4], (check ? "Guessed" : "Giving up"),
 981                           PL_colors[5], (long)(s - i_strpos)) );
 982     return s;
 983
 984   fail_finish:                          /* Substring not found */
 985     if (prog->check_substr || prog->check_utf8)         /* could be removed already */
 986         BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
 987   fail:
 988     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
 989                           PL_colors[4], PL_colors[5]));
 990     return NULL;
 991 }
 992
 993 #define DECL_TRIE_TYPE(scan) \
 994     const enum { trie_plain, trie_utf8, trie_utf8_fold, trie_latin_utf8_fold } \
 995                     trie_type = (scan->flags != EXACT) \
 996                               ? (do_utf8 ? trie_utf8_fold : (UTF ? trie_latin_utf8_fold : trie_plain)) \
 997                               : (do_utf8 ? trie_utf8 : trie_plain)
 998
 999 #define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len,  \
1000 uvc, charid, foldlen, foldbuf, uniflags) STMT_START {                       \
1001     switch (trie_type) {                                                    \
1002     case trie_utf8_fold:                                                    \
1003         if ( foldlen>0 ) {                                                  \
1004             uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags );     \
1005             foldlen -= len;                                                 \
1006             uscan += len;                                                   \
1007             len=0;                                                          \
1008         } else {                                                            \
1009             uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );   \
1010             uvc = to_uni_fold( uvc, foldbuf, &foldlen );                    \
1011             foldlen -= UNISKIP( uvc );                                      \
1012             uscan = foldbuf + UNISKIP( uvc );                               \
1013         }                                                                   \
1014         break;                                                              \
1015     case trie_latin_utf8_fold:                                              \
1016         if ( foldlen>0 ) {                                                  \
1017             uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags );     \
1018             foldlen -= len;                                                 \
1019             uscan += len;                                                   \
1020             len=0;                                                          \
1021         } else {                                                            \
1022             len = 1;                                                        \
1023             uvc = to_uni_fold( *(U8*)uc, foldbuf, &foldlen );               \
1024             foldlen -= UNISKIP( uvc );                                      \
1025             uscan = foldbuf + UNISKIP( uvc );                               \
1026         }                                                                   \
1027         break;                                                              \
1028     case trie_utf8:                                                         \
1029         uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );       \
1030         break;                                                              \
1031     case trie_plain:                                                        \
1032         uvc = (UV)*uc;                                                      \
1033         len = 1;                                                            \
1034     }                                                                       \
1035                                                                             \
1036     if (uvc < 256) {                                                        \
1037         charid = trie->charmap[ uvc ];                                      \
1038     }                                                                       \
1039     else {                                                                  \
1040         charid = 0;                                                         \
1041         if (widecharmap) {                                                  \
1042             SV** const svpp = hv_fetch(widecharmap,                         \
1043                         (char*)&uvc, sizeof(UV), 0);                        \
1044             if (svpp)                                                       \
1045                 charid = (U16)SvIV(*svpp);                                  \
1046         }                                                                   \
1047     }                                                                       \
1048 } STMT_END
1049
1050 #define REXEC_FBC_EXACTISH_CHECK(CoNd)                 \
1051 {                                                      \
1052     char *my_strend= (char *)strend;                   \
1053     if ( (CoNd)                                        \
1054          && (ln == len ||                              \
1055              !ibcmp_utf8(s, &my_strend, 0,  do_utf8,   \
1056                         m, NULL, ln, (bool)UTF))       \
1057          && (!reginfo || regtry(reginfo, &s)) )        \
1058         goto got_it;                                   \
1059     else {                                             \
1060          U8 foldbuf[UTF8_MAXBYTES_CASE+1];             \
1061          uvchr_to_utf8(tmpbuf, c);                     \
1062          f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);  \
1063          if ( f != c                                   \
1064               && (f == c1 || f == c2)                  \
1065               && (ln == len ||                         \
1066                 !ibcmp_utf8(s, &my_strend, 0,  do_utf8,\
1067                               m, NULL, ln, (bool)UTF)) \
1068               && (!reginfo || regtry(reginfo, &s)) )   \
1069               goto got_it;                             \
1070     }                                                  \
1071 }                                                      \
1072 s += len
1073
1074 #define REXEC_FBC_EXACTISH_SCAN(CoNd)                     \
1075 STMT_START {                                              \
1076     while (s <= e) {                                      \
1077         if ( (CoNd)                                       \
1078              && (ln == 1 || !(OP(c) == EXACTF             \
1079                               ? ibcmp(s, m, ln)           \
1080                               : ibcmp_locale(s, m, ln)))  \
1081              && (!reginfo || regtry(reginfo, &s)) )        \
1082             goto got_it;                                  \
1083         s++;                                              \
1084     }                                                     \
1085 } STMT_END
1086
1087 #define REXEC_FBC_UTF8_SCAN(CoDe)                     \
1088 STMT_START {                                          \
1089     while (s + (uskip = UTF8SKIP(s)) <= strend) {     \
1090         CoDe                                          \
1091         s += uskip;                                   \
1092     }                                                 \
1093 } STMT_END
1094
1095 #define REXEC_FBC_SCAN(CoDe)                          \
1096 STMT_START {                                          \
1097     while (s < strend) {                              \
1098         CoDe                                          \
1099         s++;                                          \
1100     }                                                 \
1101 } STMT_END
1102
1103 #define REXEC_FBC_UTF8_CLASS_SCAN(CoNd)               \
1104 REXEC_FBC_UTF8_SCAN(                                  \
1105     if (CoNd) {                                       \
1106         if (tmp && (!reginfo || regtry(reginfo, &s)))  \
1107             goto got_it;                              \
1108         else                                          \
1109             tmp = doevery;                            \
1110     }                                                 \
1111     else                                              \
1112         tmp = 1;                                      \
1113 )
1114
1115 #define REXEC_FBC_CLASS_SCAN(CoNd)                    \
1116 REXEC_FBC_SCAN(                                       \
1117     if (CoNd) {                                       \
1118         if (tmp && (!reginfo || regtry(reginfo, &s)))  \
1119             goto got_it;                              \
1120         else                                          \
1121             tmp = doevery;                            \
1122     }                                                 \
1123     else                                              \
1124         tmp = 1;                                      \
1125 )
1126
1127 #define REXEC_FBC_TRYIT               \
1128 if ((!reginfo || regtry(reginfo, &s))) \
1129     goto got_it
1130
1131 #define REXEC_FBC_CSCAN(CoNdUtF8,CoNd)                         \
1132     if (do_utf8) {                                             \
1133         REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1134     }                                                          \
1135     else {                                                     \
1136         REXEC_FBC_CLASS_SCAN(CoNd);                            \
1137     }                                                          \
1138     break
1139
1140 #define REXEC_FBC_CSCAN_PRELOAD(UtFpReLoAd,CoNdUtF8,CoNd)      \
1141     if (do_utf8) {                                             \
1142         UtFpReLoAd;                                            \
1143         REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1144     }                                                          \
1145     else {                                                     \
1146         REXEC_FBC_CLASS_SCAN(CoNd);                            \
1147     }                                                          \
1148     break
1149
1150 #define REXEC_FBC_CSCAN_TAINT(CoNdUtF8,CoNd)                   \
1151     PL_reg_flags |= RF_tainted;                                \
1152     if (do_utf8) {                                             \
1153         REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1154     }                                                          \
1155     else {                                                     \
1156         REXEC_FBC_CLASS_SCAN(CoNd);                            \
1157     }                                                          \
1158     break
1159
1160 #define DUMP_EXEC_POS(li,s,doutf8) \
1161     dump_exec_pos(li,s,(PL_regeol),(PL_bostr),(PL_reg_starttry),doutf8)
1162
1163 /* We know what class REx starts with.  Try to find this position... */
1164 /* if reginfo is NULL, its a dryrun */
1165 /* annoyingly all the vars in this routine have different names from their counterparts
1166    in regmatch. /grrr */
1167
1168 STATIC char *
1169 S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
1170     const char *strend, regmatch_info *reginfo)
1171 {
1172         dVAR;
1173         const I32 doevery = (prog->intflags & PREGf_SKIP) == 0;
1174         char *m;
1175         STRLEN ln;
1176         STRLEN lnc;
1177         register STRLEN uskip;
1178         unsigned int c1;
1179         unsigned int c2;
1180         char *e;
1181         register I32 tmp = 1;   /* Scratch variable? */
1182         register const bool do_utf8 = PL_reg_match_utf8;
1183         RXi_GET_DECL(prog,progi);
1184
1185         /* We know what class it must start with. */
1186         switch (OP(c)) {
1187         case ANYOF:
1188             if (do_utf8) {
1189                  REXEC_FBC_UTF8_CLASS_SCAN((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
1190                           !UTF8_IS_INVARIANT((U8)s[0]) ?
1191                           reginclass(prog, c, (U8*)s, 0, do_utf8) :
1192                           REGINCLASS(prog, c, (U8*)s));
1193             }
1194             else {
1195                  while (s < strend) {
1196                       STRLEN skip = 1;
1197
1198                       if (REGINCLASS(prog, c, (U8*)s) ||
1199                           (ANYOF_FOLD_SHARP_S(c, s, strend) &&
1200                            /* The assignment of 2 is intentional:
1201                             * for the folded sharp s, the skip is 2. */
1202                            (skip = SHARP_S_SKIP))) {
1203                            if (tmp && (!reginfo || regtry(reginfo, &s)))
1204                                 goto got_it;
1205                            else
1206                                 tmp = doevery;
1207                       }
1208                       else
1209                            tmp = 1;
1210                       s += skip;
1211                  }
1212             }
1213             break;
1214         case CANY:
1215             REXEC_FBC_SCAN(
1216                 if (tmp && (!reginfo || regtry(reginfo, &s)))
1217                     goto got_it;
1218                 else
1219                     tmp = doevery;
1220             );
1221             break;
1222         case EXACTF:
1223             m   = STRING(c);
1224             ln  = STR_LEN(c);   /* length to match in octets/bytes */
1225             lnc = (I32) ln;     /* length to match in characters */
1226             if (UTF) {
1227                 STRLEN ulen1, ulen2;
1228                 U8 *sm = (U8 *) m;
1229                 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
1230                 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
1231                 /* used by commented-out code below */
1232                 /*const U32 uniflags = UTF8_ALLOW_DEFAULT;*/
1233
1234                 /* XXX: Since the node will be case folded at compile
1235                    time this logic is a little odd, although im not
1236                    sure that its actually wrong. --dmq */
1237
1238                 c1 = to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
1239                 c2 = to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
1240
1241                 /* XXX: This is kinda strange. to_utf8_XYZ returns the
1242                    codepoint of the first character in the converted
1243                    form, yet originally we did the extra step.
1244                    No tests fail by commenting this code out however
1245                    so Ive left it out. -- dmq.
1246
1247                 c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXBYTES_CASE,
1248                                     0, uniflags);
1249                 c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXBYTES_CASE,
1250                                     0, uniflags);
1251                 */
1252
1253                 lnc = 0;
1254                 while (sm < ((U8 *) m + ln)) {
1255                     lnc++;
1256                     sm += UTF8SKIP(sm);
1257                 }
1258             }
1259             else {
1260                 c1 = *(U8*)m;
1261                 c2 = PL_fold[c1];
1262             }
1263             goto do_exactf;
1264         case EXACTFL:
1265             m   = STRING(c);
1266             ln  = STR_LEN(c);
1267             lnc = (I32) ln;
1268             c1 = *(U8*)m;
1269             c2 = PL_fold_locale[c1];
1270           do_exactf:
1271             e = HOP3c(strend, -((I32)lnc), s);
1272
1273             if (!reginfo && e < s)
1274                 e = s;                  /* Due to minlen logic of intuit() */
1275
1276             /* The idea in the EXACTF* cases is to first find the
1277              * first character of the EXACTF* node and then, if
1278              * necessary, case-insensitively compare the full
1279              * text of the node.  The c1 and c2 are the first
1280              * characters (though in Unicode it gets a bit
1281              * more complicated because there are more cases
1282              * than just upper and lower: one needs to use
1283              * the so-called folding case for case-insensitive
1284              * matching (called "loose matching" in Unicode).
1285              * ibcmp_utf8() will do just that. */
1286
1287             if (do_utf8 || UTF) {
1288                 UV c, f;
1289                 U8 tmpbuf [UTF8_MAXBYTES+1];
1290                 STRLEN len = 1;
1291                 STRLEN foldlen;
1292                 const U32 uniflags = UTF8_ALLOW_DEFAULT;
1293                 if (c1 == c2) {
1294                     /* Upper and lower of 1st char are equal -
1295                      * probably not a "letter". */
1296                     while (s <= e) {
1297                         if (do_utf8) {
1298                             c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1299                                            uniflags);
1300                         } else {
1301                             c = *((U8*)s);
1302                         }
1303                         REXEC_FBC_EXACTISH_CHECK(c == c1);
1304                     }
1305                 }
1306                 else {
1307                     while (s <= e) {
1308                         if (do_utf8) {
1309                             c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1310                                            uniflags);
1311                         } else {
1312                             c = *((U8*)s);
1313                         }
1314
1315                         /* Handle some of the three Greek sigmas cases.
1316                          * Note that not all the possible combinations
1317                          * are handled here: some of them are handled
1318                          * by the standard folding rules, and some of
1319                          * them (the character class or ANYOF cases)
1320                          * are handled during compiletime in
1321                          * regexec.c:S_regclass(). */
1322                         if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1323                             c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1324                             c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
1325
1326                         REXEC_FBC_EXACTISH_CHECK(c == c1 || c == c2);
1327                     }
1328                 }
1329             }
1330             else {
1331                 /* Neither pattern nor string are UTF8 */
1332                 if (c1 == c2)
1333                     REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1);
1334                 else
1335                     REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1 || *(U8*)s == c2);
1336             }
1337             break;
1338         case BOUNDL:
1339             PL_reg_flags |= RF_tainted;
1340             /* FALL THROUGH */
1341         case BOUND:
1342             if (do_utf8) {
1343                 if (s == PL_bostr)
1344                     tmp = '\n';
1345                 else {
1346                     U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1347                     tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
1348                 }
1349                 tmp = ((OP(c) == BOUND ?
1350                         isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1351                 LOAD_UTF8_CHARCLASS_ALNUM();
1352                 REXEC_FBC_UTF8_SCAN(
1353                     if (tmp == !(OP(c) == BOUND ?
1354                                  (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1355                                  isALNUM_LC_utf8((U8*)s)))
1356                     {
1357                         tmp = !tmp;
1358                         REXEC_FBC_TRYIT;
1359                 }
1360                 );
1361             }
1362             else {
1363                 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1364                 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1365                 REXEC_FBC_SCAN(
1366                     if (tmp ==
1367                         !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1368                         tmp = !tmp;
1369                         REXEC_FBC_TRYIT;
1370                 }
1371                 );
1372             }
1373             if ((!prog->minlen && tmp) && (!reginfo || regtry(reginfo, &s)))
1374                 goto got_it;
1375             break;
1376         case NBOUNDL:
1377             PL_reg_flags |= RF_tainted;
1378             /* FALL THROUGH */
1379         case NBOUND:
1380             if (do_utf8) {
1381                 if (s == PL_bostr)
1382                     tmp = '\n';
1383                 else {
1384                     U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1385                     tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
1386                 }
1387                 tmp = ((OP(c) == NBOUND ?
1388                         isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1389                 LOAD_UTF8_CHARCLASS_ALNUM();
1390                 REXEC_FBC_UTF8_SCAN(
1391                     if (tmp == !(OP(c) == NBOUND ?
1392                                  (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1393                                  isALNUM_LC_utf8((U8*)s)))
1394                         tmp = !tmp;
1395                     else REXEC_FBC_TRYIT;
1396                 );
1397             }
1398             else {
1399                 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1400                 tmp = ((OP(c) == NBOUND ?
1401                         isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1402                 REXEC_FBC_SCAN(
1403                     if (tmp ==
1404                         !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1405                         tmp = !tmp;
1406                     else REXEC_FBC_TRYIT;
1407                 );
1408             }
1409             if ((!prog->minlen && !tmp) && (!reginfo || regtry(reginfo, &s)))
1410                 goto got_it;
1411             break;
1412         case ALNUM:
1413             REXEC_FBC_CSCAN_PRELOAD(
1414                 LOAD_UTF8_CHARCLASS_ALNUM(),
1415                 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8),
1416                 isALNUM(*s)
1417             );
1418         case ALNUML:
1419             REXEC_FBC_CSCAN_TAINT(
1420                 isALNUM_LC_utf8((U8*)s),
1421                 isALNUM_LC(*s)
1422             );
1423         case NALNUM:
1424             REXEC_FBC_CSCAN_PRELOAD(
1425                 LOAD_UTF8_CHARCLASS_ALNUM(),
1426                 !swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8),
1427                 !isALNUM(*s)
1428             );
1429         case NALNUML:
1430             REXEC_FBC_CSCAN_TAINT(
1431                 !isALNUM_LC_utf8((U8*)s),
1432                 !isALNUM_LC(*s)
1433             );
1434         case SPACE:
1435             REXEC_FBC_CSCAN_PRELOAD(
1436                 LOAD_UTF8_CHARCLASS_SPACE(),
1437                 *s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8),
1438                 isSPACE(*s)
1439             );
1440         case SPACEL:
1441             REXEC_FBC_CSCAN_TAINT(
1442                 *s == ' ' || isSPACE_LC_utf8((U8*)s),
1443                 isSPACE_LC(*s)
1444             );
1445         case NSPACE:
1446             REXEC_FBC_CSCAN_PRELOAD(
1447                 LOAD_UTF8_CHARCLASS_SPACE(),
1448                 !(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)),
1449                 !isSPACE(*s)
1450             );
1451         case NSPACEL:
1452             REXEC_FBC_CSCAN_TAINT(
1453                 !(*s == ' ' || isSPACE_LC_utf8((U8*)s)),
1454                 !isSPACE_LC(*s)
1455             );
1456         case DIGIT:
1457             REXEC_FBC_CSCAN_PRELOAD(
1458                 LOAD_UTF8_CHARCLASS_DIGIT(),
1459                 swash_fetch(PL_utf8_digit,(U8*)s, do_utf8),
1460                 isDIGIT(*s)
1461             );
1462         case DIGITL:
1463             REXEC_FBC_CSCAN_TAINT(
1464                 isDIGIT_LC_utf8((U8*)s),
1465                 isDIGIT_LC(*s)
1466             );
1467         case NDIGIT:
1468             REXEC_FBC_CSCAN_PRELOAD(
1469                 LOAD_UTF8_CHARCLASS_DIGIT(),
1470                 !swash_fetch(PL_utf8_digit,(U8*)s, do_utf8),
1471                 !isDIGIT(*s)
1472             );
1473         case NDIGITL:
1474             REXEC_FBC_CSCAN_TAINT(
1475                 !isDIGIT_LC_utf8((U8*)s),
1476                 !isDIGIT_LC(*s)
1477             );
1478         case LNBREAK:
1479             REXEC_FBC_CSCAN(
1480                 is_LNBREAK_utf8(s),
1481                 is_LNBREAK_latin1(s)
1482             );
1483         case VERTWS:
1484             REXEC_FBC_CSCAN(
1485                 is_VERTWS_utf8(s),
1486                 is_VERTWS_latin1(s)
1487             );
1488         case NVERTWS:
1489             REXEC_FBC_CSCAN(
1490                 !is_VERTWS_utf8(s),
1491                 !is_VERTWS_latin1(s)
1492             );
1493         case HORIZWS:
1494             REXEC_FBC_CSCAN(
1495                 is_HORIZWS_utf8(s),
1496                 is_HORIZWS_latin1(s)
1497             );
1498         case NHORIZWS:
1499             REXEC_FBC_CSCAN(
1500                 !is_HORIZWS_utf8(s),
1501                 !is_HORIZWS_latin1(s)
1502             );
1503         case AHOCORASICKC:
1504         case AHOCORASICK:
1505             {
1506                 DECL_TRIE_TYPE(c);
1507                 /* what trie are we using right now */
1508                 reg_ac_data *aho
1509                     = (reg_ac_data*)progi->data->data[ ARG( c ) ];
1510                 reg_trie_data *trie
1511                     = (reg_trie_data*)progi->data->data[ aho->trie ];
1512                 HV *widecharmap = (HV*) progi->data->data[ aho->trie + 1 ];
1513
1514                 const char *last_start = strend - trie->minlen;
1515 #ifdef DEBUGGING
1516                 const char *real_start = s;
1517 #endif
1518                 STRLEN maxlen = trie->maxlen;
1519                 SV *sv_points;
1520                 U8 **points; /* map of where we were in the input string
1521                                 when reading a given char. For ASCII this
1522                                 is unnecessary overhead as the relationship
1523                                 is always 1:1, but for Unicode, especially
1524                                 case folded Unicode this is not true. */
1525                 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
1526                 U8 *bitmap=NULL;
1527
1528
1529                 GET_RE_DEBUG_FLAGS_DECL;
1530
1531                 /* We can't just allocate points here. We need to wrap it in
1532                  * an SV so it gets freed properly if there is a croak while
1533                  * running the match */
1534                 ENTER;
1535                 SAVETMPS;
1536                 sv_points=newSV(maxlen * sizeof(U8 *));
1537                 SvCUR_set(sv_points,
1538                     maxlen * sizeof(U8 *));
1539                 SvPOK_on(sv_points);
1540                 sv_2mortal(sv_points);
1541                 points=(U8**)SvPV_nolen(sv_points );
1542                 if ( trie_type != trie_utf8_fold
1543                      && (trie->bitmap || OP(c)==AHOCORASICKC) )
1544                 {
1545                     if (trie->bitmap)
1546                         bitmap=(U8*)trie->bitmap;
1547                     else
1548                         bitmap=(U8*)ANYOF_BITMAP(c);
1549                 }
1550                 /* this is the Aho-Corasick algorithm modified a touch
1551                    to include special handling for long "unknown char"
1552                    sequences. The basic idea being that we use AC as long
1553                    as we are dealing with a possible matching char, when
1554                    we encounter an unknown char (and we have not encountered
1555                    an accepting state) we scan forward until we find a legal
1556                    starting char.
1557                    AC matching is basically that of trie matching, except
1558                    that when we encounter a failing transition, we fall back
1559                    to the current states "fail state", and try the current char
1560                    again, a process we repeat until we reach the root state,
1561                    state 1, or a legal transition. If we fail on the root state
1562                    then we can either terminate if we have reached an accepting
1563                    state previously, or restart the entire process from the beginning
1564                    if we have not.
1565
1566                  */
1567                 while (s <= last_start) {
1568                     const U32 uniflags = UTF8_ALLOW_DEFAULT;
1569                     U8 *uc = (U8*)s;
1570                     U16 charid = 0;
1571                     U32 base = 1;
1572                     U32 state = 1;
1573                     UV uvc = 0;
1574                     STRLEN len = 0;
1575                     STRLEN foldlen = 0;
1576                     U8 *uscan = (U8*)NULL;
1577                     U8 *leftmost = NULL;
1578 #ifdef DEBUGGING
1579                     U32 accepted_word= 0;
1580 #endif
1581                     U32 pointpos = 0;
1582
1583                     while ( state && uc <= (U8*)strend ) {
1584                         int failed=0;
1585                         U32 word = aho->states[ state ].wordnum;
1586
1587                         if( state==1 ) {
1588                             if ( bitmap ) {
1589                                 DEBUG_TRIE_EXECUTE_r(
1590                                     if ( uc <= (U8*)last_start && !BITMAP_TEST(bitmap,*uc) ) {
1591                                         dump_exec_pos( (char *)uc, c, strend, real_start,
1592                                             (char *)uc, do_utf8 );
1593                                         PerlIO_printf( Perl_debug_log,
1594                                             " Scanning for legal start char...\n");
1595                                     }
1596                                 );
1597                                 while ( uc <= (U8*)last_start  && !BITMAP_TEST(bitmap,*uc) ) {
1598                                     uc++;
1599                                 }
1600                                 s= (char *)uc;
1601                             }
1602                             if (uc >(U8*)last_start) break;
1603                         }
1604
1605                         if ( word ) {
1606                             U8 *lpos= points[ (pointpos - trie->wordlen[word-1] ) % maxlen ];
1607                             if (!leftmost || lpos < leftmost) {
1608                                 DEBUG_r(accepted_word=word);
1609                                 leftmost= lpos;
1610                             }
1611                             if (base==0) break;
1612
1613                         }
1614                         points[pointpos++ % maxlen]= uc;
1615                         REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
1616                                              uscan, len, uvc, charid, foldlen,
1617                                              foldbuf, uniflags);
1618                         DEBUG_TRIE_EXECUTE_r({
1619                             dump_exec_pos( (char *)uc, c, strend, real_start,
1620                                 s,   do_utf8 );
1621                             PerlIO_printf(Perl_debug_log,
1622                                 " Charid:%3u CP:%4"UVxf" ",
1623                                  charid, uvc);
1624                         });
1625
1626                         do {
1627 #ifdef DEBUGGING
1628                             word = aho->states[ state ].wordnum;
1629 #endif
1630                             base = aho->states[ state ].trans.base;
1631
1632                             DEBUG_TRIE_EXECUTE_r({
1633                                 if (failed)
1634                                     dump_exec_pos( (char *)uc, c, strend, real_start,
1635                                         s,   do_utf8 );
1636                                 PerlIO_printf( Perl_debug_log,
1637                                     "%sState: %4"UVxf", word=%"UVxf,
1638                                     failed ? " Fail transition to " : "",
1639                                     (UV)state, (UV)word);
1640                             });
1641                             if ( base ) {
1642                                 U32 tmp;
1643                                 if (charid &&
1644                                      (base + charid > trie->uniquecharcount )
1645                                      && (base + charid - 1 - trie->uniquecharcount
1646                                             < trie->lasttrans)
1647                                      && trie->trans[base + charid - 1 -
1648                                             trie->uniquecharcount].check == state
1649                                      && (tmp=trie->trans[base + charid - 1 -
1650                                         trie->uniquecharcount ].next))
1651                                 {
1652                                     DEBUG_TRIE_EXECUTE_r(
1653                                         PerlIO_printf( Perl_debug_log," - legal\n"));
1654                                     state = tmp;
1655                                     break;
1656                                 }
1657                                 else {
1658                                     DEBUG_TRIE_EXECUTE_r(
1659                                         PerlIO_printf( Perl_debug_log," - fail\n"));
1660                                     failed = 1;
1661                                     state = aho->fail[state];
1662                                 }
1663                             }
1664                             else {
1665                                 /* we must be accepting here */
1666                                 DEBUG_TRIE_EXECUTE_r(
1667                                         PerlIO_printf( Perl_debug_log," - accepting\n"));
1668                                 failed = 1;
1669                                 break;
1670                             }
1671                         } while(state);
1672                         uc += len;
1673                         if (failed) {
1674                             if (leftmost)
1675                                 break;
1676                             if (!state) state = 1;
1677                         }
1678                     }
1679                     if ( aho->states[ state ].wordnum ) {
1680                         U8 *lpos = points[ (pointpos - trie->wordlen[aho->states[ state ].wordnum-1]) % maxlen ];
1681                         if (!leftmost || lpos < leftmost) {
1682                             DEBUG_r(accepted_word=aho->states[ state ].wordnum);
1683                             leftmost = lpos;
1684                         }
1685                     }
1686                     if (leftmost) {
1687                         s = (char*)leftmost;
1688                         DEBUG_TRIE_EXECUTE_r({
1689                             PerlIO_printf(
1690                                 Perl_debug_log,"Matches word #%"UVxf" at position %"IVdf". Trying full pattern...\n",
1691                                 (UV)accepted_word, (IV)(s - real_start)
1692                             );
1693                         });
1694                         if (!reginfo || regtry(reginfo, &s)) {
1695                             FREETMPS;
1696                             LEAVE;
1697                             goto got_it;
1698                         }
1699                         s = HOPc(s,1);
1700                         DEBUG_TRIE_EXECUTE_r({
1701                             PerlIO_printf( Perl_debug_log,"Pattern failed. Looking for new start point...\n");
1702                         });
1703                     } else {
1704                         DEBUG_TRIE_EXECUTE_r(
1705                             PerlIO_printf( Perl_debug_log,"No match.\n"));
1706                         break;
1707                     }
1708                 }
1709                 FREETMPS;
1710                 LEAVE;
1711             }
1712             break;
1713         default:
1714             Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1715             break;
1716         }
1717         return 0;
1718       got_it:
1719         return s;
1720 }
1721
1722 static void
1723 S_swap_match_buff (pTHX_ regexp *prog) {
1724     regexp_paren_pair *t;
1725
1726     if (!prog->swap) {
1727     /* We have to be careful. If the previous successful match
1728        was from this regex we don't want a subsequent paritally
1729        successful match to clobber the old results.
1730        So when we detect this possibility we add a swap buffer
1731        to the re, and switch the buffer each match. If we fail
1732        we switch it back, otherwise we leave it swapped.
1733     */
1734         Newxz(prog->swap, (prog->nparens + 1), regexp_paren_pair);
1735     }
1736     t = prog->swap;
1737     prog->swap = prog->offs;
1738     prog->offs = t;
1739 }
1740
1741
1742 /*
1743  - regexec_flags - match a regexp against a string
1744  */
1745 I32
1746 Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, register char *strend,
1747               char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1748 /* strend: pointer to null at end of string */
1749 /* strbeg: real beginning of string */
1750 /* minend: end of match must be >=minend after stringarg. */
1751 /* data: May be used for some additional optimizations.
1752          Currently its only used, with a U32 cast, for transmitting
1753          the ganch offset when doing a /g match. This will change */
1754 /* nosave: For optimizations. */
1755 {
1756     dVAR;
1757     struct regexp *const prog = (struct regexp *)SvANY(rx);
1758     /*register*/ char *s;
1759     register regnode *c;
1760     /*register*/ char *startpos = stringarg;
1761     I32 minlen;         /* must match at least this many chars */
1762     I32 dontbother = 0; /* how many characters not to try at end */
1763     I32 end_shift = 0;                  /* Same for the end. */         /* CC */
1764     I32 scream_pos = -1;                /* Internal iterator of scream. */
1765     char *scream_olds = NULL;
1766     const bool do_utf8 = (bool)DO_UTF8(sv);
1767     I32 multiline;
1768     RXi_GET_DECL(prog,progi);
1769     regmatch_info reginfo;  /* create some info to pass to regtry etc */
1770     bool swap_on_fail = 0;
1771
1772     GET_RE_DEBUG_FLAGS_DECL;
1773
1774     PERL_UNUSED_ARG(data);
1775
1776     /* Be paranoid... */
1777     if (prog == NULL || startpos == NULL) {
1778         Perl_croak(aTHX_ "NULL regexp parameter");
1779         return 0;
1780     }
1781
1782     multiline = prog->extflags & RXf_PMf_MULTILINE;
1783     reginfo.prog = rx;   /* Yes, sorry that this is confusing.  */
1784
1785     RX_MATCH_UTF8_set(rx, do_utf8);
1786     DEBUG_EXECUTE_r(
1787         debug_start_match(rx, do_utf8, startpos, strend,
1788         "Matching");
1789     );
1790
1791     minlen = prog->minlen;
1792
1793     if (strend - startpos < (minlen+(prog->check_offset_min<0?prog->check_offset_min:0))) {
1794         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
1795                               "String too short [regexec_flags]...\n"));
1796         goto phooey;
1797     }
1798
1799
1800     /* Check validity of program. */
1801     if (UCHARAT(progi->program) != REG_MAGIC) {
1802         Perl_croak(aTHX_ "corrupted regexp program");
1803     }
1804
1805     PL_reg_flags = 0;
1806     PL_reg_eval_set = 0;
1807     PL_reg_maxiter = 0;
1808
1809     if (RX_UTF8(rx))
1810         PL_reg_flags |= RF_utf8;
1811
1812     /* Mark beginning of line for ^ and lookbehind. */
1813     reginfo.bol = startpos; /* XXX not used ??? */
1814     PL_bostr  = strbeg;
1815     reginfo.sv = sv;
1816
1817     /* Mark end of line for $ (and such) */
1818     PL_regeol = strend;
1819
1820     /* see how far we have to get to not match where we matched before */
1821     reginfo.till = startpos+minend;
1822
1823     /* If there is a "must appear" string, look for it. */
1824     s = startpos;
1825
1826     if (prog->extflags & RXf_GPOS_SEEN) { /* Need to set reginfo->ganch */
1827         MAGIC *mg;
1828
1829         if (flags & REXEC_IGNOREPOS)    /* Means: check only at start */
1830             reginfo.ganch = startpos + prog->gofs;
1831         else if (sv && SvTYPE(sv) >= SVt_PVMG
1832                   && SvMAGIC(sv)
1833                   && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1834                   && mg->mg_len >= 0) {
1835             reginfo.ganch = strbeg + mg->mg_len;        /* Defined pos() */
1836             if (prog->extflags & RXf_ANCH_GPOS) {
1837                 if (s > reginfo.ganch)
1838                     goto phooey;
1839                 s = reginfo.ganch - prog->gofs;
1840             }
1841         }
1842         else if (data) {
1843             reginfo.ganch = strbeg + PTR2UV(data);
1844         } else                          /* pos() not defined */
1845             reginfo.ganch = strbeg;
1846     }
1847     if (PL_curpm && (PM_GETRE(PL_curpm) == rx)) {
1848         swap_on_fail = 1;
1849         swap_match_buff(prog); /* do we need a save destructor here for
1850                                   eval dies? */
1851     }
1852     if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL || prog->check_utf8 != NULL)) {
1853         re_scream_pos_data d;
1854
1855         d.scream_olds = &scream_olds;
1856         d.scream_pos = &scream_pos;
1857         s = re_intuit_start(rx, sv, s, strend, flags, &d);
1858         if (!s) {
1859             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
1860             goto phooey;        /* not present */
1861         }
1862     }
1863
1864
1865
1866     /* Simplest case:  anchored match need be tried only once. */
1867     /*  [unless only anchor is BOL and multiline is set] */
1868     if (prog->extflags & (RXf_ANCH & ~RXf_ANCH_GPOS)) {
1869         if (s == startpos && regtry(&reginfo, &startpos))
1870             goto got_it;
1871         else if (multiline || (prog->intflags & PREGf_IMPLICIT)
1872                  || (prog->extflags & RXf_ANCH_MBOL)) /* XXXX SBOL? */
1873         {
1874             char *end;
1875
1876             if (minlen)
1877                 dontbother = minlen - 1;
1878             end = HOP3c(strend, -dontbother, strbeg) - 1;
1879             /* for multiline we only have to try after newlines */
1880             if (prog->check_substr || prog->check_utf8) {
1881                 if (s == startpos)
1882                     goto after_try;
1883                 while (1) {
1884                     if (regtry(&reginfo, &s))
1885                         goto got_it;
1886                   after_try:
1887                     if (s > end)
1888                         goto phooey;
1889                     if (prog->extflags & RXf_USE_INTUIT) {
1890                         s = re_intuit_start(rx, sv, s + 1, strend, flags, NULL);
1891                         if (!s)
1892                             goto phooey;
1893                     }
1894                     else
1895                         s++;
1896                 }
1897             } else {
1898                 if (s > startpos)
1899                     s--;
1900                 while (s < end) {
1901                     if (*s++ == '\n') { /* don't need PL_utf8skip here */
1902                         if (regtry(&reginfo, &s))
1903                             goto got_it;
1904                     }
1905                 }
1906             }
1907         }
1908         goto phooey;
1909     } else if (RXf_GPOS_CHECK == (prog->extflags & RXf_GPOS_CHECK))
1910     {
1911         /* the warning about reginfo.ganch being used without intialization
1912            is bogus -- we set it above, when prog->extflags & RXf_GPOS_SEEN
1913            and we only enter this block when the same bit is set. */
1914         char *tmp_s = reginfo.ganch - prog->gofs;
1915         if (regtry(&reginfo, &tmp_s))
1916             goto got_it;
1917         goto phooey;
1918     }
1919
1920     /* Messy cases:  unanchored match. */
1921     if ((prog->anchored_substr || prog->anchored_utf8) && prog->intflags & PREGf_SKIP) {
1922         /* we have /x+whatever/ */
1923         /* it must be a one character string (XXXX Except UTF?) */
1924         char ch;
1925 #ifdef DEBUGGING
1926         int did_match = 0;
1927 #endif
1928         if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1929             do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1930         ch = SvPVX_const(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
1931
1932         if (do_utf8) {
1933             REXEC_FBC_SCAN(
1934                 if (*s == ch) {
1935                     DEBUG_EXECUTE_r( did_match = 1 );
1936                     if (regtry(&reginfo, &s)) goto got_it;
1937                     s += UTF8SKIP(s);
1938                     while (s < strend && *s == ch)
1939                         s += UTF8SKIP(s);
1940                 }
1941             );
1942         }
1943         else {
1944             REXEC_FBC_SCAN(
1945                 if (*s == ch) {
1946                     DEBUG_EXECUTE_r( did_match = 1 );
1947                     if (regtry(&reginfo, &s)) goto got_it;
1948                     s++;
1949                     while (s < strend && *s == ch)
1950                         s++;
1951                 }
1952             );
1953         }
1954         DEBUG_EXECUTE_r(if (!did_match)
1955                 PerlIO_printf(Perl_debug_log,
1956                                   "Did not find anchored character...\n")
1957                );
1958     }
1959     else if (prog->anchored_substr != NULL
1960               || prog->anchored_utf8 != NULL
1961               || ((prog->float_substr != NULL || prog->float_utf8 != NULL)
1962                   && prog->float_max_offset < strend - s)) {
1963         SV *must;
1964         I32 back_max;
1965         I32 back_min;
1966         char *last;
1967         char *last1;            /* Last position checked before */
1968 #ifdef DEBUGGING
1969         int did_match = 0;
1970 #endif
1971         if (prog->anchored_substr || prog->anchored_utf8) {
1972             if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1973                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1974             must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
1975             back_max = back_min = prog->anchored_offset;
1976         } else {
1977             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1978                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1979             must = do_utf8 ? prog->float_utf8 : prog->float_substr;
1980             back_max = prog->float_max_offset;
1981             back_min = prog->float_min_offset;
1982         }
1983
1984
1985         if (must == &PL_sv_undef)
1986             /* could not downgrade utf8 check substring, so must fail */
1987             goto phooey;
1988
1989         if (back_min<0) {
1990             last = strend;
1991         } else {
1992             last = HOP3c(strend,        /* Cannot start after this */
1993                   -(I32)(CHR_SVLEN(must)
1994                          - (SvTAIL(must) != 0) + back_min), strbeg);
1995         }
1996         if (s > PL_bostr)
1997             last1 = HOPc(s, -1);
1998         else
1999             last1 = s - 1;      /* bogus */
2000
2001         /* XXXX check_substr already used to find "s", can optimize if
2002            check_substr==must. */
2003         scream_pos = -1;
2004         dontbother = end_shift;
2005         strend = HOPc(strend, -dontbother);
2006         while ( (s <= last) &&
2007                 ((flags & REXEC_SCREAM)
2008                  ? (s = screaminstr(sv, must, HOP3c(s, back_min, (back_min<0 ? strbeg : strend)) - strbeg,
2009                                     end_shift, &scream_pos, 0))
2010                  : (s = fbm_instr((unsigned char*)HOP3(s, back_min, (back_min<0 ? strbeg : strend)),
2011                                   (unsigned char*)strend, must,
2012                                   multiline ? FBMrf_MULTILINE : 0))) ) {
2013             /* we may be pointing at the wrong string */
2014             if ((flags & REXEC_SCREAM) && RXp_MATCH_COPIED(prog))
2015                 s = strbeg + (s - SvPVX_const(sv));
2016             DEBUG_EXECUTE_r( did_match = 1 );
2017             if (HOPc(s, -back_max) > last1) {
2018                 last1 = HOPc(s, -back_min);
2019                 s = HOPc(s, -back_max);
2020             }
2021             else {
2022                 char * const t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
2023
2024                 last1 = HOPc(s, -back_min);
2025                 s = t;
2026             }
2027             if (do_utf8) {
2028                 while (s <= last1) {
2029                     if (regtry(&reginfo, &s))
2030                         goto got_it;
2031                     s += UTF8SKIP(s);
2032                 }
2033             }
2034             else {
2035                 while (s <= last1) {
2036                     if (regtry(&reginfo, &s))
2037                         goto got_it;
2038                     s++;
2039                 }
2040             }
2041         }
2042         DEBUG_EXECUTE_r(if (!did_match) {
2043             RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
2044                 SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
2045             PerlIO_printf(Perl_debug_log, "Did not find %s substr %s%s...\n",
2046                               ((must == prog->anchored_substr || must == prog->anchored_utf8)
2047                                ? "anchored" : "floating"),
2048                 quoted, RE_SV_TAIL(must));
2049         });
2050         goto phooey;
2051     }
2052     else if ( (c = progi->regstclass) ) {
2053         if (minlen) {
2054             const OPCODE op = OP(progi->regstclass);
2055             /* don't bother with what can't match */
2056             if (PL_regkind[op] != EXACT && op != CANY && PL_regkind[op] != TRIE)
2057                 strend = HOPc(strend, -(minlen - 1));
2058         }
2059         DEBUG_EXECUTE_r({
2060             SV * const prop = sv_newmortal();
2061             regprop(prog, prop, c);
2062             {
2063                 RE_PV_QUOTED_DECL(quoted,do_utf8,PERL_DEBUG_PAD_ZERO(1),
2064                     s,strend-s,60);
2065                 PerlIO_printf(Perl_debug_log,
2066                     "Matching stclass %.*s against %s (%d chars)\n",
2067                     (int)SvCUR(prop), SvPVX_const(prop),
2068                      quoted, (int)(strend - s));
2069             }
2070         });
2071         if (find_byclass(prog, c, s, strend, &reginfo))
2072             goto got_it;
2073         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass... [regexec_flags]\n"));
2074     }
2075     else {
2076         dontbother = 0;
2077         if (prog->float_substr != NULL || prog->float_utf8 != NULL) {
2078             /* Trim the end. */
2079             char *last;
2080             SV* float_real;
2081
2082             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
2083                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
2084             float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
2085
2086             if (flags & REXEC_SCREAM) {
2087                 last = screaminstr(sv, float_real, s - strbeg,
2088                                    end_shift, &scream_pos, 1); /* last one */
2089                 if (!last)
2090                     last = scream_olds; /* Only one occurrence. */
2091                 /* we may be pointing at the wrong string */
2092                 else if (RXp_MATCH_COPIED(prog))
2093                     s = strbeg + (s - SvPVX_const(sv));
2094             }
2095             else {
2096                 STRLEN len;
2097                 const char * const little = SvPV_const(float_real, len);
2098
2099                 if (SvTAIL(float_real)) {
2100                     if (memEQ(strend - len + 1, little, len - 1))
2101                         last = strend - len + 1;
2102                     else if (!multiline)
2103                         last = memEQ(strend - len, little, len)
2104                             ? strend - len : NULL;
2105                     else
2106                         goto find_last;
2107                 } else {
2108                   find_last:
2109                     if (len)
2110                         last = rninstr(s, strend, little, little + len);
2111                     else
2112                         last = strend;  /* matching "$" */
2113                 }
2114             }
2115             if (last == NULL) {
2116                 DEBUG_EXECUTE_r(
2117                     PerlIO_printf(Perl_debug_log,
2118                         "%sCan't trim the tail, match fails (should not happen)%s\n",
2119                         PL_colors[4], PL_colors[5]));
2120                 goto phooey; /* Should not happen! */
2121             }
2122             dontbother = strend - last + prog->float_min_offset;
2123         }
2124         if (minlen && (dontbother < minlen))
2125             dontbother = minlen - 1;
2126         strend -= dontbother;              /* this one's always in bytes! */
2127         /* We don't know much -- general case. */
2128         if (do_utf8) {
2129             for (;;) {
2130                 if (regtry(&reginfo, &s))
2131                     goto got_it;
2132                 if (s >= strend)
2133                     break;
2134                 s += UTF8SKIP(s);
2135             };
2136         }
2137         else {
2138             do {
2139                 if (regtry(&reginfo, &s))
2140                     goto got_it;
2141             } while (s++ < strend);
2142         }
2143     }
2144
2145     /* Failure. */
2146     goto phooey;
2147
2148 got_it:
2149     RX_MATCH_TAINTED_set(rx, PL_reg_flags & RF_tainted);
2150
2151     if (PL_reg_eval_set)
2152         restore_pos(aTHX_ prog);
2153     if (RXp_PAREN_NAMES(prog))
2154         (void)hv_iterinit(RXp_PAREN_NAMES(prog));
2155
2156     /* make sure $`, $&, $', and $digit will work later */
2157     if ( !(flags & REXEC_NOT_FIRST) ) {
2158         RX_MATCH_COPY_FREE(rx);
2159         if (flags & REXEC_COPY_STR) {
2160             const I32 i = PL_regeol - startpos + (stringarg - strbeg);
2161 #ifdef PERL_OLD_COPY_ON_WRITE
2162             if ((SvIsCOW(sv)
2163                  || (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
2164                 if (DEBUG_C_TEST) {
2165                     PerlIO_printf(Perl_debug_log,
2166                                   "Copy on write: regexp capture, type %d\n",
2167                                   (int) SvTYPE(sv));
2168                 }
2169                 prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
2170                 prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
2171                 assert (SvPOKp(prog->saved_copy));
2172             } else
2173 #endif
2174             {
2175                 RX_MATCH_COPIED_on(rx);
2176                 s = savepvn(strbeg, i);
2177                 prog->subbeg = s;
2178             }
2179             prog->sublen = i;
2180         }
2181         else {
2182             prog->subbeg = strbeg;
2183             prog->sublen = PL_regeol - strbeg;  /* strend may have been modified */
2184         }
2185     }
2186
2187     return 1;
2188
2189 phooey:
2190     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
2191                           PL_colors[4], PL_colors[5]));
2192     if (PL_reg_eval_set)
2193         restore_pos(aTHX_ prog);
2194     if (swap_on_fail)
2195         /* we failed :-( roll it back */
2196         swap_match_buff(prog);
2197
2198     return 0;
2199 }
2200
2201
2202 /*
2203  - regtry - try match at specific point
2204  */
2205 STATIC I32                      /* 0 failure, 1 success */
2206 S_regtry(pTHX_ regmatch_info *reginfo, char **startpos)
2207 {
2208     dVAR;
2209     CHECKPOINT lastcp;
2210     REGEXP *const rx = reginfo->prog;
2211     regexp *const prog = (struct regexp *)SvANY(rx);
2212     RXi_GET_DECL(prog,progi);
2213     GET_RE_DEBUG_FLAGS_DECL;
2214     reginfo->cutpoint=NULL;
2215
2216     if ((prog->extflags & RXf_EVAL_SEEN) && !PL_reg_eval_set) {
2217         MAGIC *mg;
2218
2219         PL_reg_eval_set = RS_init;
2220         DEBUG_EXECUTE_r(DEBUG_s(
2221             PerlIO_printf(Perl_debug_log, "  setting stack tmpbase at %"IVdf"\n",
2222                           (IV)(PL_stack_sp - PL_stack_base));
2223             ));
2224         SAVESTACK_CXPOS();
2225         cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2226         /* Otherwise OP_NEXTSTATE will free whatever on stack now.  */
2227         SAVETMPS;
2228         /* Apparently this is not needed, judging by wantarray. */
2229         /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
2230            cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2231
2232         if (reginfo->sv) {
2233             /* Make $_ available to executed code. */
2234             if (reginfo->sv != DEFSV) {
2235                 SAVE_DEFSV;
2236                 DEFSV = reginfo->sv;
2237             }
2238
2239             if (!(SvTYPE(reginfo->sv) >= SVt_PVMG && SvMAGIC(reginfo->sv)
2240                   && (mg = mg_find(reginfo->sv, PERL_MAGIC_regex_global)))) {
2241                 /* prepare for quick setting of pos */
2242 #ifdef PERL_OLD_COPY_ON_WRITE
2243                 if (SvIsCOW(reginfo->sv))
2244                     sv_force_normal_flags(reginfo->sv, 0);
2245 #endif
2246                 mg = sv_magicext(reginfo->sv, NULL, PERL_MAGIC_regex_global,
2247                                  &PL_vtbl_mglob, NULL, 0);
2248                 mg->mg_len = -1;
2249             }
2250             PL_reg_magic    = mg;
2251             PL_reg_oldpos   = mg->mg_len;
2252             SAVEDESTRUCTOR_X(restore_pos, prog);
2253         }
2254         if (!PL_reg_curpm) {
2255             Newxz(PL_reg_curpm, 1, PMOP);
2256 #ifdef USE_ITHREADS
2257             {
2258                 SV* const repointer = newSViv(0);
2259                 /* so we know which PL_regex_padav element is PL_reg_curpm */
2260                 SvFLAGS(repointer) |= SVf_BREAK;
2261                 av_push(PL_regex_padav,repointer);
2262                 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2263                 PL_regex_pad = AvARRAY(PL_regex_padav);
2264             }
2265 #endif
2266         }
2267         PM_SETRE(PL_reg_curpm, rx);
2268         PL_reg_oldcurpm = PL_curpm;
2269         PL_curpm = PL_reg_curpm;
2270         if (RXp_MATCH_COPIED(prog)) {
2271             /*  Here is a serious problem: we cannot rewrite subbeg,
2272                 since it may be needed if this match fails.  Thus
2273                 $` inside (?{}) could fail... */
2274             PL_reg_oldsaved = prog->subbeg;
2275             PL_reg_oldsavedlen = prog->sublen;
2276 #ifdef PERL_OLD_COPY_ON_WRITE
2277             PL_nrs = prog->saved_copy;
2278 #endif
2279             RXp_MATCH_COPIED_off(prog);
2280         }
2281         else
2282             PL_reg_oldsaved = NULL;
2283         prog->subbeg = PL_bostr;
2284         prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2285     }
2286     DEBUG_EXECUTE_r(PL_reg_starttry = *startpos);
2287     prog->offs[0].start = *startpos - PL_bostr;
2288     PL_reginput = *startpos;
2289     PL_reglastparen = &prog->lastparen;
2290     PL_reglastcloseparen = &prog->lastcloseparen;
2291     prog->lastparen = 0;
2292     prog->lastcloseparen = 0;
2293     PL_regsize = 0;
2294     PL_regoffs = prog->offs;
2295     if (PL_reg_start_tmpl <= prog->nparens) {
2296         PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2297         if(PL_reg_start_tmp)
2298             Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2299         else
2300             Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2301     }
2302
2303     /* XXXX What this code is doing here?!!!  There should be no need
2304        to do this again and again, PL_reglastparen should take care of
2305        this!  --ilya*/
2306
2307     /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2308      * Actually, the code in regcppop() (which Ilya may be meaning by
2309      * PL_reglastparen), is not needed at all by the test suite
2310      * (op/regexp, op/pat, op/split), but that code is needed otherwise
2311      * this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
2312      * Meanwhile, this code *is* needed for the
2313      * above-mentioned test suite tests to succeed.  The common theme
2314      * on those tests seems to be returning null fields from matches.
2315      * --jhi updated by dapm */
2316 #if 1
2317     if (prog->nparens) {
2318         regexp_paren_pair *pp = PL_regoffs;
2319         register I32 i;
2320         for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
2321             ++pp;
2322             pp->start = -1;
2323             pp->end = -1;
2324         }
2325     }
2326 #endif
2327     REGCP_SET(lastcp);
2328     if (regmatch(reginfo, progi->program + 1)) {
2329         PL_regoffs[0].end = PL_reginput - PL_bostr;
2330         return 1;
2331     }
2332     if (reginfo->cutpoint)
2333         *startpos= reginfo->cutpoint;
2334     REGCP_UNWIND(lastcp);
2335     return 0;
2336 }
2337
2338
2339 #define sayYES goto yes
2340 #define sayNO goto no
2341 #define sayNO_SILENT goto no_silent
2342
2343 /* we dont use STMT_START/END here because it leads to
2344    "unreachable code" warnings, which are bogus, but distracting. */
2345 #define CACHEsayNO \
2346     if (ST.cache_mask) \
2347        PL_reg_poscache[ST.cache_offset] |= ST.cache_mask; \
2348     sayNO
2349
2350 /* this is used to determine how far from the left messages like
2351    'failed...' are printed. It should be set such that messages
2352    are inline with the regop output that created them.
2353 */
2354 #define REPORT_CODE_OFF 32
2355
2356
2357 /* Make sure there is a test for this +1 options in re_tests */
2358 #define TRIE_INITAL_ACCEPT_BUFFLEN 4;
2359
2360 #define CHRTEST_UNINIT -1001 /* c1/c2 haven't been calculated yet */
2361 #define CHRTEST_VOID   -1000 /* the c1/c2 "next char" test should be skipped */
2362
2363 #define SLAB_FIRST(s) (&(s)->states[0])
2364 #define SLAB_LAST(s)  (&(s)->states[PERL_REGMATCH_SLAB_SLOTS-1])
2365
2366 /* grab a new slab and return the first slot in it */
2367
2368 STATIC regmatch_state *
2369 S_push_slab(pTHX)
2370 {
2371 #if PERL_VERSION < 9 && !defined(PERL_CORE)
2372     dMY_CXT;
2373 #endif
2374     regmatch_slab *s = PL_regmatch_slab->next;
2375     if (!s) {
2376         Newx(s, 1, regmatch_slab);
2377         s->prev = PL_regmatch_slab;
2378         s->next = NULL;
2379         PL_regmatch_slab->next = s;
2380     }
2381     PL_regmatch_slab = s;
2382     return SLAB_FIRST(s);
2383 }
2384
2385
2386 /* push a new state then goto it */
2387
2388 #define PUSH_STATE_GOTO(state, node) \
2389     scan = node; \
2390     st->resume_state = state; \
2391     goto push_state;
2392
2393 /* push a new state with success backtracking, then goto it */
2394
2395 #define PUSH_YES_STATE_GOTO(state, node) \
2396     scan = node; \
2397     st->resume_state = state; \
2398     goto push_yes_state;
2399
2400
2401
2402 /*
2403
2404 regmatch() - main matching routine
2405
2406 This is basically one big switch statement in a loop. We execute an op,
2407 set 'next' to point the next op, and continue. If we come to a point which
2408 we may need to backtrack to on failure such as (A|B|C), we push a
2409 backtrack state onto the backtrack stack. On failure, we pop the top
2410 state, and re-enter the loop at the state indicated. If there are no more
2411 states to pop, we return failure.
2412
2413 Sometimes we also need to backtrack on success; for example /A+/, where
2414 after successfully matching one A, we need to go back and try to
2415 match another one; similarly for lookahead assertions: if the assertion
2416 completes successfully, we backtrack to the state just before the assertion
2417 and then carry on.  In these cases, the pushed state is marked as
2418 'backtrack on success too'. This marking is in fact done by a chain of
2419 pointers, each pointing to the previous 'yes' state. On success, we pop to
2420 the nearest yes state, discarding any intermediate failure-only states.
2421 Sometimes a yes state is pushed just to force some cleanup code to be
2422 called at the end of a successful match or submatch; e.g. (??{$re}) uses
2423 it to free the inner regex.
2424
2425 Note that failure backtracking rewinds the cursor position, while
2426 success backtracking leaves it alone.
2427
2428 A pattern is complete when the END op is executed, while a subpattern
2429 such as (?=foo) is complete when the SUCCESS op is executed. Both of these
2430 ops trigger the "pop to last yes state if any, otherwise return true"
2431 behaviour.
2432
2433 A common convention in this function is to use A and B to refer to the two
2434 subpatterns (or to the first nodes thereof) in patterns like /A*B/: so A is
2435 the subpattern to be matched possibly multiple times, while B is the entire
2436 rest of the pattern. Variable and state names reflect this convention.
2437
2438 The states in the main switch are the union of ops and failure/success of
2439 substates associated with with that op.  For example, IFMATCH is the op
2440 that does lookahead assertions /(?=A)B/ and so the IFMATCH state means
2441 'execute IFMATCH'; while IFMATCH_A is a state saying that we have just
2442 successfully matched A and IFMATCH_A_fail is a state saying that we have
2443 just failed to match A. Resume states always come in pairs. The backtrack
2444 state we push is marked as 'IFMATCH_A', but when that is popped, we resume
2445 at IFMATCH_A or IFMATCH_A_fail, depending on whether we are backtracking
2446 on success or failure.
2447
2448 The struct that holds a backtracking state is actually a big union, with
2449 one variant for each major type of op. The variable st points to the
2450 top-most backtrack struct. To make the code clearer, within each
2451 block of code we #define ST to alias the relevant union.
2452
2453 Here's a concrete example of a (vastly oversimplified) IFMATCH
2454 implementation:
2455
2456     switch (state) {
2457     ....
2458
2459 #define ST st->u.ifmatch
2460
2461     case IFMATCH: // we are executing the IFMATCH op, (?=A)B
2462         ST.foo = ...; // some state we wish to save
2463         ...
2464         // push a yes backtrack state with a resume value of
2465         // IFMATCH_A/IFMATCH_A_fail, then continue execution at the
2466         // first node of A:
2467         PUSH_YES_STATE_GOTO(IFMATCH_A, A);
2468         // NOTREACHED
2469
2470     case IFMATCH_A: // we have successfully executed A; now continue with B
2471         next = B;
2472         bar = ST.foo; // do something with the preserved value
2473         break;
2474
2475     case IFMATCH_A_fail: // A failed, so the assertion failed
2476         ...;   // do some housekeeping, then ...
2477         sayNO; // propagate the failure
2478
2479 #undef ST
2480
2481     ...
2482     }
2483
2484 For any old-timers reading this who are familiar with the old recursive
2485 approach, the code above is equivalent to:
2486
2487     case IFMATCH: // we are executing the IFMATCH op, (?=A)B
2488     {
2489         int foo = ...
2490         ...
2491         if (regmatch(A)) {
2492             next = B;
2493             bar = foo;
2494             break;
2495         }
2496         ...;   // do some housekeeping, then ...
2497         sayNO; // propagate the failure
2498     }
2499
2500 The topmost backtrack state, pointed to by st, is usually free. If you
2501 want to claim it, populate any ST.foo fields in it with values you wish to
2502 save, then do one of
2503
2504         PUSH_STATE_GOTO(resume_state, node);
2505         PUSH_YES_STATE_GOTO(resume_state, node);
2506
2507 which sets that backtrack state's resume value to 'resume_state', pushes a
2508 new free entry to the top of the backtrack stack, then goes to 'node'.
2509 On backtracking, the free slot is popped, and the saved state becomes the
2510 new free state. An ST.foo field in this new top state can be temporarily
2511 accessed to retrieve values, but once the main loop is re-entered, it
2512 becomes available for reuse.
2513
2514 Note that the depth of the backtrack stack constantly increases during the
2515 left-to-right execution of the pattern, rather than going up and down with
2516 the pattern nesting. For example the stack is at its maximum at Z at the
2517 end of the pattern, rather than at X in the following:
2518
2519     /(((X)+)+)+....(Y)+....Z/
2520
2521 The only exceptions to this are lookahead/behind assertions and the cut,
2522 (?>A), which pop all the backtrack states associated with A before
2523 continuing.
2524
2525 Bascktrack state structs are allocated in slabs of about 4K in size.
2526 PL_regmatch_state and st always point to the currently active state,
2527 and PL_regmatch_slab points to the slab currently containing
2528 PL_regmatch_state.  The first time regmatch() is called, the first slab is
2529 allocated, and is never freed until interpreter destruction. When the slab
2530 is full, a new one is allocated and chained to the end. At exit from
2531 regmatch(), slabs allocated since entry are freed.
2532
2533 */
2534
2535
2536 #define DEBUG_STATE_pp(pp)                                  \
2537     DEBUG_STATE_r({                                         \
2538         DUMP_EXEC_POS(locinput, scan, do_utf8);             \
2539         PerlIO_printf(Perl_debug_log,                       \
2540             "    %*s"pp" %s%s%s%s%s\n",                     \
2541             depth*2, "",                                    \
2542             PL_reg_name[st->resume_state],                     \
2543             ((st==yes_state||st==mark_state) ? "[" : ""),   \
2544             ((st==yes_state) ? "Y" : ""),                   \
2545             ((st==mark_state) ? "M" : ""),                  \
2546             ((st==yes_state||st==mark_state) ? "]" : "")    \
2547         );                                                  \
2548     });
2549
2550
2551 #define REG_NODE_NUM(x) ((x) ? (int)((x)-prog) : -1)
2552
2553 #ifdef DEBUGGING
2554
2555 STATIC void
2556 S_debug_start_match(pTHX_ const REGEXP *prog, const bool do_utf8,
2557     const char *start, const char *end, const char *blurb)
2558 {
2559     const bool utf8_pat = RX_UTF8(prog) ? 1 : 0;
2560     if (!PL_colorset)
2561             reginitcolors();
2562     {
2563         RE_PV_QUOTED_DECL(s0, utf8_pat, PERL_DEBUG_PAD_ZERO(0),
2564             RX_PRECOMP(prog), RX_PRELEN(prog), 60);
2565
2566         RE_PV_QUOTED_DECL(s1, do_utf8, PERL_DEBUG_PAD_ZERO(1),
2567             start, end - start, 60);
2568
2569         PerlIO_printf(Perl_debug_log,
2570             "%s%s REx%s %s against %s\n",
2571                        PL_colors[4], blurb, PL_colors[5], s0, s1);
2572
2573         if (do_utf8||utf8_pat)
2574             PerlIO_printf(Perl_debug_log, "UTF-8 %s%s%s...\n",
2575                 utf8_pat ? "pattern" : "",
2576                 utf8_pat && do_utf8 ? " and " : "",
2577                 do_utf8 ? "string" : ""
2578             );
2579     }
2580 }
2581
2582 STATIC void
2583 S_dump_exec_pos(pTHX_ const char *locinput,
2584                       const regnode *scan,
2585                       const char *loc_regeol,
2586                       const char *loc_bostr,
2587                       const char *loc_reg_starttry,
2588                       const bool do_utf8)
2589 {
2590     const int docolor = *PL_colors[0] || *PL_colors[2] || *PL_colors[4];
2591     const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
2592     int l = (loc_regeol - locinput) > taill ? taill : (loc_regeol - locinput);
2593     /* The part of the string before starttry has one color
2594        (pref0_len chars), between starttry and current
2595        position another one (pref_len - pref0_len chars),
2596        after the current position the third one.
2597        We assume that pref0_len <= pref_len, otherwise we
2598        decrease pref0_len.  */
2599     int pref_len = (locinput - loc_bostr) > (5 + taill) - l
2600         ? (5 + taill) - l : locinput - loc_bostr;
2601     int pref0_len;
2602
2603     while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2604         pref_len++;
2605     pref0_len = pref_len  - (locinput - loc_reg_starttry);
2606     if (l + pref_len < (5 + taill) && l < loc_regeol - locinput)
2607         l = ( loc_regeol - locinput > (5 + taill) - pref_len
2608               ? (5 + taill) - pref_len : loc_regeol - locinput);
2609     while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2610         l--;
2611     if (pref0_len < 0)
2612         pref0_len = 0;
2613     if (pref0_len > pref_len)
2614         pref0_len = pref_len;
2615     {
2616         const int is_uni = (do_utf8 && OP(scan) != CANY) ? 1 : 0;
2617
2618         RE_PV_COLOR_DECL(s0,len0,is_uni,PERL_DEBUG_PAD(0),
2619             (locinput - pref_len),pref0_len, 60, 4, 5);
2620
2621         RE_PV_COLOR_DECL(s1,len1,is_uni,PERL_DEBUG_PAD(1),
2622                     (locinput - pref_len + pref0_len),
2623                     pref_len - pref0_len, 60, 2, 3);
2624
2625         RE_PV_COLOR_DECL(s2,len2,is_uni,PERL_DEBUG_PAD(2),
2626                     locinput, loc_regeol - locinput, 10, 0, 1);
2627
2628         const STRLEN tlen=len0+len1+len2;
2629         PerlIO_printf(Perl_debug_log,
2630                     "%4"IVdf" <%.*s%.*s%s%.*s>%*s|",
2631                     (IV)(locinput - loc_bostr),
2632                     len0, s0,
2633                     len1, s1,
2634                     (docolor ? "" : "> <"),
2635                     len2, s2,
2636                     (int)(tlen > 19 ? 0 :  19 - tlen),
2637                     "");
2638     }
2639 }
2640
2641 #endif
2642
2643 /* reg_check_named_buff_matched()
2644  * Checks to see if a named buffer has matched. The data array of
2645  * buffer numbers corresponding to the buffer is expected to reside
2646  * in the regexp->data->data array in the slot stored in the ARG() of
2647  * node involved. Note that this routine doesn't actually care about the
2648  * name, that information is not preserved from compilation to execution.
2649  * Returns the index of the leftmost defined buffer with the given name
2650  * or 0 if non of the buffers matched.
2651  */
2652 STATIC I32
2653 S_reg_check_named_buff_matched(pTHX_ const regexp *rex, const regnode *scan) {
2654     I32 n;
2655     RXi_GET_DECL(rex,rexi);
2656     SV *sv_dat=(SV*)rexi->data->data[ ARG( scan ) ];
2657     I32 *nums=(I32*)SvPVX(sv_dat);
2658     for ( n=0; n<SvIVX(sv_dat); n++ ) {
2659         if ((I32)*PL_reglastparen >= nums[n] &&
2660             PL_regoffs[nums[n]].end != -1)
2661         {
2662             return nums[n];
2663         }
2664     }
2665     return 0;
2666 }
2667
2668
2669 /* free all slabs above current one  - called during LEAVE_SCOPE */
2670
2671 STATIC void
2672 S_clear_backtrack_stack(pTHX_ void *p)
2673 {
2674     regmatch_slab *s = PL_regmatch_slab->next;
2675     PERL_UNUSED_ARG(p);
2676
2677     if (!s)
2678         return;
2679     PL_regmatch_slab->next = NULL;
2680     while (s) {
2681         regmatch_slab * const osl = s;
2682         s = s->next;
2683         Safefree(osl);
2684     }
2685 }
2686
2687
2688 #define SETREX(Re1,Re2) \
2689     if (PL_reg_eval_set) PM_SETRE((PL_reg_curpm), (Re2)); \
2690     Re1 = (Re2)
2691
2692 STATIC I32                      /* 0 failure, 1 success */
2693 S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
2694 {
2695 #if PERL_VERSION < 9 && !defined(PERL_CORE)
2696     dMY_CXT;
2697 #endif
2698     dVAR;
2699     register const bool do_utf8 = PL_reg_match_utf8;
2700     const U32 uniflags = UTF8_ALLOW_DEFAULT;
2701
2702     REGEXP *rex_sv = reginfo->prog;
2703     regexp *rex = (struct regexp *)SvANY(rex_sv);
2704     RXi_GET_DECL(rex,rexi);
2705
2706     I32 oldsave;
2707
2708     /* the current state. This is a cached copy of PL_regmatch_state */
2709     register regmatch_state *st;
2710
2711     /* cache heavy used fields of st in registers */
2712     register regnode *scan;
2713     register regnode *next;
2714     register U32 n = 0; /* general value; init to avoid compiler warning */
2715     register I32 ln = 0; /* len or last;  init to avoid compiler warning */
2716     register char *locinput = PL_reginput;
2717     register I32 nextchr;   /* is always set to UCHARAT(locinput) */
2718
2719     bool result = 0;        /* return value of S_regmatch */
2720     int depth = 0;          /* depth of backtrack stack */
2721     U32 nochange_depth = 0; /* depth of GOSUB recursion with nochange */
2722     const U32 max_nochange_depth =
2723         (3 * rex->nparens > MAX_RECURSE_EVAL_NOCHANGE_DEPTH) ?
2724         3 * rex->nparens : MAX_RECURSE_EVAL_NOCHANGE_DEPTH;
2725
2726     regmatch_state *yes_state = NULL; /* state to pop to on success of
2727                                                             subpattern */
2728     /* mark_state piggy backs on the yes_state logic so that when we unwind
2729        the stack on success we can update the mark_state as we go */
2730     regmatch_state *mark_state = NULL; /* last mark state we have seen */
2731
2732     regmatch_state *cur_eval = NULL; /* most recent EVAL_AB state */
2733     struct regmatch_state  *cur_curlyx = NULL; /* most recent curlyx */
2734     U32 state_num;
2735     bool no_final = 0;      /* prevent failure from backtracking? */
2736     bool do_cutgroup = 0;   /* no_final only until next branch/trie entry */
2737     char *startpoint = PL_reginput;
2738     SV *popmark = NULL;     /* are we looking for a mark? */
2739     SV *sv_commit = NULL;   /* last mark name seen in failure */
2740     SV *sv_yes_mark = NULL; /* last mark name we have seen
2741                                during a successfull match */
2742     U32 lastopen = 0;       /* last open we saw */
2743     bool has_cutgroup = RX_HAS_CUTGROUP(rex) ? 1 : 0;
2744
2745     SV* const oreplsv = GvSV(PL_replgv);
2746
2747
2748     /* these three flags are set by various ops to signal information to
2749      * the very next op. They have a useful lifetime of exactly one loop
2750      * iteration, and are not preserved or restored by state pushes/pops
2751      */
2752     bool sw = 0;            /* the condition value in (?(cond)a|b) */
2753     bool minmod = 0;        /* the next "{n,m}" is a "{n,m}?" */
2754     int logical = 0;        /* the following EVAL is:
2755                                 0: (?{...})
2756                                 1: (?(?{...})X|Y)
2757                                 2: (??{...})
2758                                or the following IFMATCH/UNLESSM is:
2759                                 false: plain (?=foo)
2760                                 true:  used as a condition: (?(?=foo))
2761                             */
2762
2763 #ifdef DEBUGGING
2764     GET_RE_DEBUG_FLAGS_DECL;
2765 #endif
2766
2767     DEBUG_OPTIMISE_r( DEBUG_EXECUTE_r({
2768             PerlIO_printf(Perl_debug_log,"regmatch start\n");
2769     }));
2770     /* on first ever call to regmatch, allocate first slab */
2771     if (!PL_regmatch_slab) {
2772         Newx(PL_regmatch_slab, 1, regmatch_slab);
2773         PL_regmatch_slab->prev = NULL;
2774         PL_regmatch_slab->next = NULL;
2775         PL_regmatch_state = SLAB_FIRST(PL_regmatch_slab);
2776     }
2777
2778     oldsave = PL_savestack_ix;
2779     SAVEDESTRUCTOR_X(S_clear_backtrack_stack, NULL);
2780     SAVEVPTR(PL_regmatch_slab);
2781     SAVEVPTR(PL_regmatch_state);
2782
2783     /* grab next free state slot */
2784     st = ++PL_regmatch_state;
2785     if (st >  SLAB_LAST(PL_regmatch_slab))
2786         st = PL_regmatch_state = S_push_slab(aTHX);
2787
2788     /* Note that nextchr is a byte even in UTF */
2789     nextchr = UCHARAT(locinput);
2790     scan = prog;
2791     while (scan != NULL) {
2792
2793         DEBUG_EXECUTE_r( {
2794             SV * const prop = sv_newmortal();
2795             regnode *rnext=regnext(scan);
2796             DUMP_EXEC_POS( locinput, scan, do_utf8 );
2797             regprop(rex, prop, scan);
2798
2799             PerlIO_printf(Perl_debug_log,
2800                     "%3"IVdf":%*s%s(%"IVdf")\n",
2801                     (IV)(scan - rexi->program), depth*2, "",
2802                     SvPVX_const(prop),
2803                     (PL_regkind[OP(scan)] == END || !rnext) ?
2804                         0 : (IV)(rnext - rexi->program));
2805         });
2806
2807         next = scan + NEXT_OFF(scan);
2808         if (next == scan)
2809             next = NULL;
2810         state_num = OP(scan);
2811
2812       reenter_switch:
2813         switch (state_num) {
2814         case BOL:
2815             if (locinput == PL_bostr)
2816             {
2817                 /* reginfo->till = reginfo->bol; */
2818                 break;
2819             }
2820             sayNO;
2821         case MBOL:
2822             if (locinput == PL_bostr ||
2823                 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
2824             {
2825                 break;
2826             }
2827             sayNO;
2828         case SBOL:
2829             if (locinput == PL_bostr)
2830                 break;
2831             sayNO;
2832         case GPOS:
2833             if (locinput == reginfo->ganch)
2834                 break;
2835             sayNO;
2836
2837         case KEEPS:
2838             /* update the startpoint */
2839             st->u.keeper.val = PL_regoffs[0].start;
2840             PL_reginput = locinput;
2841             PL_regoffs[0].start = locinput - PL_bostr;
2842             PUSH_STATE_GOTO(KEEPS_next, next);
2843             /*NOT-REACHED*/
2844         case KEEPS_next_fail:
2845             /* rollback the start point change */
2846             PL_regoffs[0].start = st->u.keeper.val;
2847             sayNO_SILENT;
2848             /*NOT-REACHED*/
2849         case EOL:
2850                 goto seol;
2851         case MEOL:
2852             if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2853                 sayNO;
2854             break;
2855         case SEOL:
2856           seol:
2857             if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2858                 sayNO;
2859             if (PL_regeol - locinput > 1)
2860                 sayNO;
2861             break;
2862         case EOS:
2863             if (PL_regeol != locinput)
2864                 sayNO;
2865             break;
2866         case SANY:
2867             if (!nextchr && locinput >= PL_regeol)
2868                 sayNO;
2869             if (do_utf8) {
2870                 locinput += PL_utf8skip[nextchr];
2871                 if (locinput > PL_regeol)
2872                     sayNO;
2873                 nextchr = UCHARAT(locinput);
2874             }
2875             else
2876                 nextchr = UCHARAT(++locinput);
2877             break;
2878         case CANY:
2879             if (!nextchr && locinput >= PL_regeol)
2880                 sayNO;
2881             nextchr = UCHARAT(++locinput);
2882             break;
2883         case REG_ANY:
2884             if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2885                 sayNO;
2886             if (do_utf8) {
2887                 locinput += PL_utf8skip[nextchr];
2888                 if (locinput > PL_regeol)
2889                     sayNO;
2890                 nextchr = UCHARAT(locinput);
2891             }
2892             else
2893                 nextchr = UCHARAT(++locinput);
2894             break;
2895
2896 #undef  ST
2897 #define ST st->u.trie
2898         case TRIEC:
2899             /* In this case the charclass data is available inline so
2900                we can fail fast without a lot of extra overhead.
2901              */
2902             if (scan->flags == EXACT || !do_utf8) {
2903                 if(!ANYOF_BITMAP_TEST(scan, *locinput)) {
2904                     DEBUG_EXECUTE_r(
2905                         PerlIO_printf(Perl_debug_log,
2906                                   "%*s  %sfailed to match trie start class...%s\n",
2907                                   REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
2908                     );
2909                     sayNO_SILENT;
2910                     /* NOTREACHED */
2911                 }
2912             }
2913             /* FALL THROUGH */
2914         case TRIE:
2915             {
2916                 /* what type of TRIE am I? (utf8 makes this contextual) */
2917                 DECL_TRIE_TYPE(scan);
2918
2919                 /* what trie are we using right now */
2920                 reg_trie_data * const trie
2921                     = (reg_trie_data*)rexi->data->data[ ARG( scan ) ];
2922                 HV * widecharmap = (HV *)rexi->data->data[ ARG( scan ) + 1 ];
2923                 U32 state = trie->startstate;
2924
2925                 if (trie->bitmap && trie_type != trie_utf8_fold &&
2926                     !TRIE_BITMAP_TEST(trie,*locinput)
2927                 ) {
2928                     if (trie->states[ state ].wordnum) {
2929                          DEBUG_EXECUTE_r(
2930                             PerlIO_printf(Perl_debug_log,
2931                                           "%*s  %smatched empty string...%s\n",
2932                                           REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
2933                         );
2934                         break;
2935                     } else {
2936                         DEBUG_EXECUTE_r(
2937                             PerlIO_printf(Perl_debug_log,
2938                                           "%*s  %sfailed to match trie start class...%s\n",
2939                                           REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
2940                         );
2941                         sayNO_SILENT;
2942                    }
2943                 }
2944
2945             {
2946                 U8 *uc = ( U8* )locinput;
2947
2948                 STRLEN len = 0;
2949                 STRLEN foldlen = 0;
2950                 U8 *uscan = (U8*)NULL;
2951                 STRLEN bufflen=0;
2952                 SV *sv_accept_buff = NULL;
2953                 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
2954
2955                 ST.accepted = 0; /* how many accepting states we have seen */
2956                 ST.B = next;
2957                 ST.jump = trie->jump;
2958                 ST.me = scan;
2959                 /*
2960                    traverse the TRIE keeping track of all accepting states
2961                    we transition through until we get to a failing node.
2962                 */
2963
2964                 while ( state && uc <= (U8*)PL_regeol ) {
2965                     U32 base = trie->states[ state ].trans.base;
2966                     UV uvc = 0;
2967                     U16 charid;
2968                     /* We use charid to hold the wordnum as we don't use it
2969                        for charid until after we have done the wordnum logic.
2970                        We define an alias just so that the wordnum logic reads
2971                        more naturally. */
2972
2973 #define got_wordnum charid
2974                     got_wordnum = trie->states[ state ].wordnum;
2975
2976                     if ( got_wordnum ) {
2977                         if ( ! ST.accepted ) {
2978                             ENTER;
2979                             /* SAVETMPS; */ /* XXX is this necessary? dmq */
2980                             bufflen = TRIE_INITAL_ACCEPT_BUFFLEN;
2981                             sv_accept_buff=newSV(bufflen *
2982                                             sizeof(reg_trie_accepted) - 1);
2983                             SvCUR_set(sv_accept_buff, 0);
2984                             SvPOK_on(sv_accept_buff);
2985                             sv_2mortal(sv_accept_buff);
2986                             SAVETMPS;
2987                             ST.accept_buff =
2988                                 (reg_trie_accepted*)SvPV_nolen(sv_accept_buff );
2989                         }
2990                         do {
2991                             if (ST.accepted >= bufflen) {
2992                                 bufflen *= 2;
2993                                 ST.accept_buff =(reg_trie_accepted*)
2994                                     SvGROW(sv_accept_buff,
2995                                         bufflen * sizeof(reg_trie_accepted));
2996                             }
2997                             SvCUR_set(sv_accept_buff,SvCUR(sv_accept_buff)
2998                                 + sizeof(reg_trie_accepted));
2999
3000
3001                             ST.accept_buff[ST.accepted].wordnum = got_wordnum;
3002                             ST.accept_buff[ST.accepted].endpos = uc;
3003                             ++ST.accepted;
3004                         } while (trie->nextword && (got_wordnum= trie->nextword[got_wordnum]));
3005                     }
3006 #undef got_wordnum
3007
3008                     DEBUG_TRIE_EXECUTE_r({
3009                                 DUMP_EXEC_POS( (char *)uc, scan, do_utf8 );
3010                                 PerlIO_printf( Perl_debug_log,
3011                                     "%*s  %sState: %4"UVxf" Accepted: %4"UVxf" ",
3012                                     2+depth * 2, "", PL_colors[4],
3013                                     (UV)state, (UV)ST.accepted );
3014                     });
3015
3016                     if ( base ) {
3017                         REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
3018                                              uscan, len, uvc, charid, foldlen,
3019                                              foldbuf, uniflags);
3020
3021                         if (charid &&
3022                              (base + charid > trie->uniquecharcount )
3023                              && (base + charid - 1 - trie->uniquecharcount
3024                                     < trie->lasttrans)
3025                              && trie->trans[base + charid - 1 -
3026                                     trie->uniquecharcount].check == state)
3027                         {
3028                             state = trie->trans[base + charid - 1 -
3029                                 trie->uniquecharcount ].next;
3030                         }
3031                         else {
3032                             state = 0;
3033                         }
3034                         uc += len;
3035
3036                     }
3037                     else {
3038                         state = 0;
3039                     }
3040                     DEBUG_TRIE_EXECUTE_r(
3041                         PerlIO_printf( Perl_debug_log,
3042                             "Charid:%3x CP:%4"UVxf" After State: %4"UVxf"%s\n",
3043                             charid, uvc, (UV)state, PL_colors[5] );
3044                     );
3045                 }
3046                 if (!ST.accepted )
3047                    sayNO;
3048
3049                 DEBUG_EXECUTE_r(
3050                     PerlIO_printf( Perl_debug_log,
3051                         "%*s  %sgot %"IVdf" possible matches%s\n",
3052                         REPORT_CODE_OFF + depth * 2, "",
3053                         PL_colors[4], (IV)ST.accepted, PL_colors[5] );
3054                 );
3055             }}
3056             goto trie_first_try; /* jump into the fail handler */
3057             /* NOTREACHED */
3058         case TRIE_next_fail: /* we failed - try next alterative */
3059             if ( ST.jump) {
3060                 REGCP_UNWIND(ST.cp);
3061                 for (n = *PL_reglastparen; n > ST.lastparen; n--)
3062                     PL_regoffs[n].end = -1;
3063                 *PL_reglastparen = n;
3064             }
3065           trie_first_try:
3066             if (do_cutgroup) {
3067                 do_cutgroup = 0;
3068                 no_final = 0;
3069             }
3070
3071             if ( ST.jump) {
3072                 ST.lastparen = *PL_reglastparen;
3073                 REGCP_SET(ST.cp);
3074             }
3075             if ( ST.accepted == 1 ) {
3076                 /* only one choice left - just continue */
3077                 DEBUG_EXECUTE_r({
3078                     AV *const trie_words
3079                         = (AV *) rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET];
3080                     SV ** const tmp = av_fetch( trie_words,
3081                         ST.accept_buff[ 0 ].wordnum-1, 0 );
3082                     SV *sv= tmp ? sv_newmortal() : NULL;
3083
3084                     PerlIO_printf( Perl_debug_log,
3085                         "%*s  %sonly one match left: #%d <%s>%s\n",
3086                         REPORT_CODE_OFF+depth*2, "", PL_colors[4],
3087                         ST.accept_buff[ 0 ].wordnum,
3088                         tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0,
3089                                 PL_colors[0], PL_colors[1],
3090                                 (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
3091                             )
3092                         : "not compiled under -Dr",
3093                         PL_colors[5] );
3094                 });
3095                 PL_reginput = (char *)ST.accept_buff[ 0 ].endpos;
3096                 /* in this case we free tmps/leave before we call regmatch
3097                    as we wont be using accept_buff again. */
3098
3099                 locinput = PL_reginput;
3100                 nextchr = UCHARAT(locinput);
3101                 if ( !ST.jump || !ST.jump[ST.accept_buff[0].wordnum])
3102                     scan = ST.B;
3103                 else
3104                     scan = ST.me + ST.jump[ST.accept_buff[0].wordnum];
3105                 if (!has_cutgroup) {
3106                     FREETMPS;
3107                     LEAVE;
3108                 } else {
3109                     ST.accepted--;
3110                     PUSH_YES_STATE_GOTO(TRIE_next, scan);
3111                 }
3112
3113                 continue; /* execute rest of RE */
3114             }
3115
3116             if ( !ST.accepted-- ) {
3117                 DEBUG_EXECUTE_r({
3118                     PerlIO_printf( Perl_debug_log,
3119                         "%*s  %sTRIE failed...%s\n",
3120                         REPORT_CODE_OFF+depth*2, "",
3121                         PL_colors[4],
3122                         PL_colors[5] );
3123                 });
3124                 FREETMPS;
3125                 LEAVE;
3126                 sayNO_SILENT;
3127                 /*NOTREACHED*/
3128             }
3129
3130             /*
3131                There are at least two accepting states left.  Presumably
3132                the number of accepting states is going to be low,
3133                typically two. So we simply scan through to find the one
3134                with lowest wordnum.  Once we find it, we swap the last
3135                state into its place and decrement the size. We then try to
3136                match the rest of the pattern at the point where the word
3137                ends. If we succeed, control just continues along the
3138                regex; if we fail we return here to try the next accepting
3139                state
3140              */
3141
3142             {
3143                 U32 best = 0;
3144                 U32 cur;
3145                 for( cur = 1 ; cur <= ST.accepted ; cur++ ) {
3146                     DEBUG_TRIE_EXECUTE_r(
3147                         PerlIO_printf( Perl_debug_log,
3148                             "%*s  %sgot %"IVdf" (%d) as best, looking at %"IVdf" (%d)%s\n",
3149                             REPORT_CODE_OFF + depth * 2, "", PL_colors[4],
3150                             (IV)best, ST.accept_buff[ best ].wordnum, (IV)cur,
3151                             ST.accept_buff[ cur ].wordnum, PL_colors[5] );
3152                     );
3153
3154                     if (ST.accept_buff[cur].wordnum <
3155                             ST.accept_buff[best].wordnum)
3156                         best = cur;
3157                 }
3158
3159                 DEBUG_EXECUTE_r({
3160                     AV *const trie_words
3161                         = (AV *) rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET];
3162                     SV ** const tmp = av_fetch( trie_words,
3163                         ST.accept_buff[ best ].wordnum - 1, 0 );
3164                     regnode *nextop=(!ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) ?
3165                                     ST.B :
3166                                     ST.me + ST.jump[ST.accept_buff[best].wordnum];
3167                     SV *sv= tmp ? sv_newmortal() : NULL;
3168
3169                     PerlIO_printf( Perl_debug_log,
3170                         "%*s  %strying alternation #%d <%s> at node #%d %s\n",
3171                         REPORT_CODE_OFF+depth*2, "", PL_colors[4],
3172                         ST.accept_buff[best].wordnum,
3173                         tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0,
3174                                 PL_colors[0], PL_colors[1],
3175                                 (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
3176                             ) : "not compiled under -Dr",
3177                             REG_NODE_NUM(nextop),
3178                         PL_colors[5] );
3179                 });
3180
3181                 if ( best<ST.accepted ) {
3182                     reg_trie_accepted tmp = ST.accept_buff[ best ];
3183                     ST.accept_buff[ best ] = ST.accept_buff[ ST.accepted ];
3184                     ST.accept_buff[ ST.accepted ] = tmp;
3185                     best = ST.accepted;
3186                 }
3187                 PL_reginput = (char *)ST.accept_buff[ best ].endpos;
3188                 if ( !ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) {
3189                     scan = ST.B;
3190                 } else {
3191                     scan = ST.me + ST.jump[ST.accept_buff[best].wordnum];
3192                 }
3193                 PUSH_YES_STATE_GOTO(TRIE_next, scan);
3194                 /* NOTREACHED */
3195             }
3196             /* NOTREACHED */
3197         case TRIE_next:
3198             FREETMPS;
3199             LEAVE;
3200             sayYES;
3201 #undef  ST
3202
3203         case EXACT: {
3204             char *s = STRING(scan);
3205             ln = STR_LEN(scan);
3206             if (do_utf8 != UTF) {
3207                 /* The target and the pattern have differing utf8ness. */
3208                 char *l = locinput;
3209                 const char * const e = s + ln;
3210
3211                 if (do_utf8) {
3212                     /* The target is utf8, the pattern is not utf8. */
3213                     while (s < e) {
3214                         STRLEN ulen;
3215                         if (l >= PL_regeol)
3216                              sayNO;
3217                         if (NATIVE_TO_UNI(*(U8*)s) !=
3218                             utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
3219                                             uniflags))
3220                              sayNO;
3221                         l += ulen;
3222                         s ++;
3223                     }
3224                 }
3225                 else {
3226                     /* The target is not utf8, the pattern is utf8. */
3227                     while (s < e) {
3228                         STRLEN ulen;
3229                         if (l >= PL_regeol)
3230                             sayNO;
3231                         if (NATIVE_TO_UNI(*((U8*)l)) !=
3232                             utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
3233                                            uniflags))
3234                             sayNO;
3235                         s += ulen;
3236                         l ++;
3237                     }
3238                 }
3239                 locinput = l;
3240                 nextchr = UCHARAT(locinput);
3241                 break;
3242             }
3243             /* The target and the pattern have the same utf8ness. */
3244             /* Inline the first character, for speed. */
3245             if (UCHARAT(s) != nextchr)
3246                 sayNO;
3247             if (PL_regeol - locinput < ln)
3248                 sayNO;
3249             if (ln > 1 && memNE(s, locinput, ln))
3250                 sayNO;
3251             locinput += ln;
3252             nextchr = UCHARAT(locinput);
3253             break;
3254             }
3255         case EXACTFL:
3256             PL_reg_flags |= RF_tainted;
3257             /* FALL THROUGH */
3258         case EXACTF: {
3259             char * const s = STRING(scan);
3260             ln = STR_LEN(scan);
3261
3262             if (do_utf8 || UTF) {
3263               /* Either target or the pattern are utf8. */
3264                 const char * const l = locinput;
3265                 char *e = PL_regeol;
3266
3267                 if (ibcmp_utf8(s, 0,  ln, (bool)UTF,
3268                                l, &e, 0,  do_utf8)) {
3269                      /* One more case for the sharp s:
3270                       * pack("U0U*", 0xDF) =~ /ss/i,
3271                       * the 0xC3 0x9F are the UTF-8
3272                       * byte sequence for the U+00DF. */
3273
3274                      if (!(do_utf8 &&
3275                            toLOWER(s[0]) == 's' &&
3276                            ln >= 2 &&
3277                            toLOWER(s[1]) == 's' &&
3278                            (U8)l[0] == 0xC3 &&
3279                            e - l >= 2 &&
3280                            (U8)l[1] == 0x9F))
3281                           sayNO;
3282                 }
3283                 locinput = e;
3284                 nextchr = UCHARAT(locinput);
3285                 break;
3286             }
3287
3288             /* Neither the target and the pattern are utf8. */
3289
3290             /* Inline the first character, for speed. */
3291             if (UCHARAT(s) != nextchr &&
3292                 UCHARAT(s) != ((OP(scan) == EXACTF)
3293                                ? PL_fold : PL_fold_locale)[nextchr])
3294                 sayNO;
3295             if (PL_regeol - locinput < ln)
3296                 sayNO;
3297             if (ln > 1 && (OP(scan) == EXACTF
3298                            ? ibcmp(s, locinput, ln)
3299                            : ibcmp_locale(s, locinput, ln)))
3300                 sayNO;
3301             locinput += ln;
3302             nextchr = UCHARAT(locinput);
3303             break;
3304             }
3305         case ANYOF:
3306             if (do_utf8) {
3307                 STRLEN inclasslen = PL_regeol - locinput;
3308
3309                 if (!reginclass(rex, scan, (U8*)locinput, &inclasslen, do_utf8))
3310                     goto anyof_fail;
3311                 if (locinput >= PL_regeol)
3312                     sayNO;
3313                 locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
3314                 nextchr = UCHARAT(locinput);
3315                 break;
3316             }
3317             else {
3318                 if (nextchr < 0)
3319                     nextchr = UCHARAT(locinput);
3320                 if (!REGINCLASS(rex, scan, (U8*)locinput))
3321                     goto anyof_fail;
3322                 if (!nextchr && locinput >= PL_regeol)
3323                     sayNO;
3324                 nextchr = UCHARAT(++locinput);
3325                 break;
3326             }
3327         anyof_fail:
3328             /* If we might have the case of the German sharp s
3329              * in a casefolding Unicode character class. */
3330
3331             if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
3332                  locinput += SHARP_S_SKIP;
3333                  nextchr = UCHARAT(locinput);
3334             }
3335             else
3336                  sayNO;
3337             break;
3338         case ALNUML:
3339             PL_reg_flags |= RF_tainted;
3340             /* FALL THROUGH */
3341         case ALNUM:
3342             if (!nextchr)
3343                 sayNO;
3344             if (do_utf8) {
3345                 LOAD_UTF8_CHARCLASS_ALNUM();
3346                 if (!(OP(scan) == ALNUM
3347                       ? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
3348                       : isALNUM_LC_utf8((U8*)locinput)))
3349                 {
3350                     sayNO;
3351                 }
3352                 locinput += PL_utf8skip[nextchr];
3353                 nextchr = UCHARAT(locinput);
3354                 break;
3355             }
3356             if (!(OP(scan) == ALNUM
3357                   ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
3358                 sayNO;
3359             nextchr = UCHARAT(++locinput);
3360             break;
3361         case NALNUML:
3362             PL_reg_flags |= RF_tainted;
3363             /* FALL THROUGH */
3364         case NALNUM:
3365             if (!nextchr && locinput >= PL_regeol)
3366                 sayNO;
3367             if (do_utf8) {
3368                 LOAD_UTF8_CHARCLASS_ALNUM();
3369                 if (OP(scan) == NALNUM
3370                     ? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
3371                     : isALNUM_LC_utf8((U8*)locinput))
3372                 {
3373                     sayNO;
3374                 }
3375                 locinput += PL_utf8skip[nextchr];
3376                 nextchr = UCHARAT(locinput);
3377                 break;
3378             }
3379             if (OP(scan) == NALNUM
3380                 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
3381                 sayNO;
3382             nextchr = UCHARAT(++locinput);
3383             break;
3384         case BOUNDL:
3385         case NBOUNDL:
3386             PL_reg_flags |= RF_tainted;
3387             /* FALL THROUGH */
3388         case BOUND:
3389         case NBOUND:
3390             /* was last char in word? */
3391             if (do_utf8) {
3392                 if (locinput == PL_bostr)
3393                     ln = '\n';
3394                 else {
3395                     const U8 * const r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
3396
3397                     ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, uniflags);
3398                 }
3399                 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
3400                     ln = isALNUM_uni(ln);
3401                     LOAD_UTF8_CHARCLASS_ALNUM();
3402                     n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
3403                 }
3404                 else {
3405                     ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
3406                     n = isALNUM_LC_utf8((U8*)locinput);
3407                 }
3408             }
3409             else {
3410                 ln = (locinput != PL_bostr) ?
3411                     UCHARAT(locinput - 1) : '\n';
3412                 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
3413                     ln = isALNUM(ln);
3414                     n = isALNUM(nextchr);
3415                 }
3416                 else {
3417                     ln = isALNUM_LC(ln);
3418                     n = isALNUM_LC(nextchr);
3419                 }
3420             }
3421             if (((!ln) == (!n)) == (OP(scan) == BOUND ||
3422                                     OP(scan) == BOUNDL))
3423                     sayNO;
3424             break;
3425         case SPACEL:
3426             PL_reg_flags |= RF_tainted;
3427             /* FALL THROUGH */
3428         case SPACE:
3429             if (!nextchr)
3430                 sayNO;
3431             if (do_utf8) {
3432                 if (UTF8_IS_CONTINUED(nextchr)) {
3433                     LOAD_UTF8_CHARCLASS_SPACE();
3434                     if (!(OP(scan) == SPACE
3435                           ? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
3436                           : isSPACE_LC_utf8((U8*)locinput)))
3437                     {
3438                         sayNO;
3439                     }
3440                     locinput += PL_utf8skip[nextchr];
3441                     nextchr = UCHARAT(locinput);
3442                     break;
3443                 }
3444                 if (!(OP(scan) == SPACE
3445                       ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3446                     sayNO;
3447                 nextchr = UCHARAT(++locinput);
3448             }
3449             else {
3450                 if (!(OP(scan) == SPACE
3451                       ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3452                     sayNO;
3453                 nextchr = UCHARAT(++locinput);
3454             }
3455             break;
3456         case NSPACEL:
3457             PL_reg_flags |= RF_tainted;
3458             /* FALL THROUGH */
3459         case NSPACE:
3460             if (!nextchr && locinput >= PL_regeol)
3461                 sayNO;
3462             if (do_utf8) {
3463                 LOAD_UTF8_CHARCLASS_SPACE();
3464                 if (OP(scan) == NSPACE
3465                     ? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
3466                     : isSPACE_LC_utf8((U8*)locinput))
3467                 {
3468                     sayNO;
3469                 }
3470                 locinput += PL_utf8skip[nextchr];
3471                 nextchr = UCHARAT(locinput);
3472                 break;
3473             }
3474             if (OP(scan) == NSPACE
3475                 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
3476                 sayNO;
3477             nextchr = UCHARAT(++locinput);
3478             break;
3479         case DIGITL:
3480             PL_reg_flags |= RF_tainted;
3481             /* FALL THROUGH */
3482         case DIGIT:
3483             if (!nextchr)
3484                 sayNO;
3485             if (do_utf8) {
3486                 LOAD_UTF8_CHARCLASS_DIGIT();
3487                 if (!(OP(scan) == DIGIT
3488                       ? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
3489                       : isDIGIT_LC_utf8((U8*)locinput)))
3490                 {
3491                     sayNO;
3492                 }
3493                 locinput += PL_utf8skip[nextchr];
3494                 nextchr = UCHARAT(locinput);
3495                 break;
3496             }
3497             if (!(OP(scan) == DIGIT
3498                   ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
3499                 sayNO;
3500             nextchr = UCHARAT(++locinput);
3501             break;
3502         case NDIGITL:
3503             PL_reg_flags |= RF_tainted;
3504             /* FALL THROUGH */
3505         case NDIGIT:
3506             if (!nextchr && locinput >= PL_regeol)
3507                 sayNO;
3508             if (do_utf8) {
3509                 LOAD_UTF8_CHARCLASS_DIGIT();
3510                 if (OP(scan) == NDIGIT
3511                     ? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
3512                     : isDIGIT_LC_utf8((U8*)locinput))
3513                 {
3514                     sayNO;
3515                 }
3516                 locinput += PL_utf8skip[nextchr];
3517                 nextchr = UCHARAT(locinput);
3518                 break;
3519             }
3520             if (OP(scan) == NDIGIT
3521                 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
3522                 sayNO;
3523             nextchr = UCHARAT(++locinput);
3524             break;
3525         case CLUMP:
3526             if (locinput >= PL_regeol)
3527                 sayNO;
3528             if  (do_utf8) {
3529                 LOAD_UTF8_CHARCLASS_MARK();
3530                 if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3531                     sayNO;
3532                 locinput += PL_utf8skip[nextchr];
3533                 while (locinput < PL_regeol &&
3534                        swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3535                     locinput += UTF8SKIP(locinput);
3536                 if (locinput > PL_regeol)
3537                     sayNO;
3538             }
3539             else
3540                locinput++;
3541             nextchr = UCHARAT(locinput);
3542             break;
3543
3544         case NREFFL:
3545         {
3546             char *s;
3547             char type;
3548             PL_reg_flags |= RF_tainted;
3549             /* FALL THROUGH */
3550         case NREF:
3551         case NREFF:
3552             type = OP(scan);
3553             n = reg_check_named_buff_matched(rex,scan);
3554
3555             if ( n ) {
3556                 type = REF + ( type - NREF );
3557                 goto do_ref;
3558             } else {
3559                 sayNO;
3560             }
3561             /* unreached */
3562         case REFFL:
3563             PL_reg_flags |= RF_tainted;
3564             /* FALL THROUGH */
3565         case REF:
3566         case REFF:
3567             n = ARG(scan);  /* which paren pair */
3568             type = OP(scan);
3569           do_ref:
3570             ln = PL_regoffs[n].start;
3571             PL_reg_leftiter = PL_reg_maxiter;           /* Void cache */
3572             if (*PL_reglastparen < n || ln == -1)
3573                 sayNO;                  /* Do not match unless seen CLOSEn. */
3574             if (ln == PL_regoffs[n].end)
3575                 break;
3576
3577             s = PL_bostr + ln;
3578             if (do_utf8 && type != REF) {       /* REF can do byte comparison */
3579                 char *l = locinput;
3580                 const char *e = PL_bostr + PL_regoffs[n].end;
3581                 /*
3582                  * Note that we can't do the "other character" lookup trick as
3583                  * in the 8-bit case (no pun intended) because in Unicode we
3584                  * have to map both upper and title case to lower case.
3585                  */
3586                 if (type == REFF) {
3587                     while (s < e) {
3588                         STRLEN ulen1, ulen2;
3589                         U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3590                         U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
3591
3592                         if (l >= PL_regeol)
3593                             sayNO;
3594                         toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
3595                         toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
3596                         if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
3597                             sayNO;
3598                         s += ulen1;
3599                         l += ulen2;
3600                     }
3601                 }
3602                 locinput = l;
3603                 nextchr = UCHARAT(locinput);
3604                 break;
3605             }
3606
3607             /* Inline the first character, for speed. */
3608             if (UCHARAT(s) != nextchr &&
3609                 (type == REF ||
3610                  (UCHARAT(s) != (type == REFF
3611                                   ? PL_fold : PL_fold_locale)[nextchr])))
3612                 sayNO;
3613             ln = PL_regoffs[n].end - ln;
3614             if (locinput + ln > PL_regeol)
3615                 sayNO;
3616             if (ln > 1 && (type == REF
3617                            ? memNE(s, locinput, ln)
3618                            : (type == REFF
3619                               ? ibcmp(s, locinput, ln)
3620                               : ibcmp_locale(s, locinput, ln))))
3621                 sayNO;
3622             locinput += ln;
3623             nextchr = UCHARAT(locinput);
3624             break;
3625         }
3626         case NOTHING:
3627         case TAIL:
3628             break;
3629         case BACK:
3630             break;
3631
3632 #undef  ST
3633 #define ST st->u.eval
3634         {
3635             SV *ret;
3636             SV *re_sv;
3637             regexp *re;
3638             regexp_internal *rei;
3639             regnode *startpoint;
3640
3641         case GOSTART:
3642         case GOSUB: /*    /(...(?1))/   /(...(?&foo))/   */
3643             if (cur_eval && cur_eval->locinput==locinput) {
3644                 if (cur_eval->u.eval.close_paren == (U32)ARG(scan))
3645                     Perl_croak(aTHX_ "Infinite recursion in regex");
3646                 if ( ++nochange_depth > max_nochange_depth )
3647                     Perl_croak(aTHX_
3648                         "Pattern subroutine nesting without pos change"
3649                         " exceeded limit in regex");
3650             } else {
3651                 nochange_depth = 0;
3652             }
3653             re_sv = rex_sv;
3654             re = rex;
3655             rei = rexi;
3656             (void)ReREFCNT_inc(rex_sv);
3657             if (OP(scan)==GOSUB) {
3658                 startpoint = scan + ARG2L(scan);
3659                 ST.close_paren = ARG(scan);
3660             } else {
3661                 startpoint = rei->program+1;
3662                 ST.close_paren = 0;
3663             }
3664             goto eval_recurse_doit;
3665             /* NOTREACHED */
3666         case EVAL:  /*   /(?{A})B/   /(??{A})B/  and /(?(?{A})X|Y)B/   */
3667             if (cur_eval && cur_eval->locinput==locinput) {
3668                 if ( ++nochange_depth > max_nochange_depth )
3669                     Perl_croak(aTHX_ "EVAL without pos change exceeded limit in regex");
3670             } else {
3671                 nochange_depth = 0;
3672             }
3673             {
3674                 /* execute the code in the {...} */
3675                 dSP;
3676                 SV ** const before = SP;
3677                 OP_4tree * const oop = PL_op;
3678                 COP * const ocurcop = PL_curcop;
3679                 PAD *old_comppad;
3680
3681                 n = ARG(scan);
3682                 PL_op = (OP_4tree*)rexi->data->data[n];
3683                 DEBUG_STATE_r( PerlIO_printf(Perl_debug_log,
3684                     "  re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
3685                 PAD_SAVE_LOCAL(old_comppad, (PAD*)rexi->data->data[n + 2]);
3686                 PL_regoffs[0].end = PL_reg_magic->mg_len = locinput - PL_bostr;
3687
3688                 if (sv_yes_mark) {
3689                     SV *sv_mrk = get_sv("REGMARK", 1);
3690                     sv_setsv(sv_mrk, sv_yes_mark);
3691                 }
3692
3693                 CALLRUNOPS(aTHX);                       /* Scalar context. */
3694                 SPAGAIN;
3695                 if (SP == before)
3696                     ret = &PL_sv_undef;   /* protect against empty (?{}) blocks. */
3697                 else {
3698                     ret = POPs;
3699                     PUTBACK;
3700                 }
3701
3702                 PL_op = oop;
3703                 PAD_RESTORE_LOCAL(old_comppad);
3704                 PL_curcop = ocurcop;
3705                 if (!logical) {
3706                     /* /(?{...})/ */
3707                     sv_setsv(save_scalar(PL_replgv), ret);
3708                     break;
3709                 }
3710             }
3711             if (logical == 2) { /* Postponed subexpression: /(??{...})/ */
3712                 logical = 0;
3713                 {
3714                     /* extract RE object from returned value; compiling if
3715                      * necessary */
3716                     MAGIC *mg = NULL;
3717                     REGEXP *rx = NULL;
3718
3719                     if (SvROK(ret)) {
3720                         SV *const sv = SvRV(ret);
3721
3722                         if (SvTYPE(sv) == SVt_REGEXP) {
3723                             rx = sv;
3724                         } else if (SvSMAGICAL(sv)) {
3725                             mg = mg_find(sv, PERL_MAGIC_qr);
3726                             assert(mg);
3727                         }
3728                     } else if (SvTYPE(ret) == SVt_REGEXP) {
3729                         rx = ret;
3730                     } else if (SvSMAGICAL(ret)) {
3731                         if (SvGMAGICAL(ret)) {
3732                             /* I don't believe that there is ever qr magic
3733                                here.  */
3734                             assert(!mg_find(ret, PERL_MAGIC_qr));
3735                             sv_unmagic(ret, PERL_MAGIC_qr);
3736                         }
3737                         else {
3738                             mg = mg_find(ret, PERL_MAGIC_qr);
3739                             /* testing suggests mg only ends up non-NULL for
3740                                scalars who were upgraded and compiled in the
3741                                else block below. In turn, this is only
3742                                triggered in the "postponed utf8 string" tests
3743                                in t/op/pat.t  */
3744                         }
3745                     }
3746
3747                     if (mg) {
3748                         rx = mg->mg_obj; /*XXX:dmq*/
3749                         assert(rx);
3750                     }
3751                     if (rx) {
3752                         rx = reg_temp_copy(rx);
3753                     }
3754                     else {
3755                         U32 pm_flags = 0;
3756                         const I32 osize = PL_regsize;
3757
3758                         if (DO_UTF8(ret)) {
3759                             assert (SvUTF8(ret));
3760                         } else if (SvUTF8(ret)) {
3761                             /* Not doing UTF-8, despite what the SV says. Is
3762                                this only if we're trapped in use 'bytes'?  */
3763                             /* Make a copy of the octet sequence, but without
3764                                the flag on, as the compiler now honours the
3765                                SvUTF8 flag on ret.  */
3766                             STRLEN len;
3767                             const char *const p = SvPV(ret, len);
3768                             ret = newSVpvn_flags(p, len, SVs_TEMP);
3769                         }
3770                         rx = CALLREGCOMP(ret, pm_flags);
3771                         if (!(SvFLAGS(ret)
3772                               & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
3773                                  | SVs_GMG))) {
3774                             /* This isn't a first class regexp. Instead, it's
3775                                caching a regexp onto an existing, Perl visible
3776                                scalar.  */
3777                             sv_magic(ret, rx, PERL_MAGIC_qr, 0, 0);
3778                         }
3779                         PL_regsize = osize;
3780                     }
3781                     re_sv = rx;
3782                     re = (struct regexp *)SvANY(rx);
3783                 }
3784                 RXp_MATCH_COPIED_off(re);
3785                 re->subbeg = rex->subbeg;
3786                 re->sublen = rex->sublen;
3787                 rei = RXi_GET(re);
3788                 DEBUG_EXECUTE_r(
3789                     debug_start_match(re_sv, do_utf8, locinput, PL_regeol,
3790                         "Matching embedded");
3791                 );
3792                 startpoint = rei->program + 1;
3793                 ST.close_paren = 0; /* only used for GOSUB */
3794                 /* borrowed from regtry */
3795                 if (PL_reg_start_tmpl <= re->nparens) {
3796                     PL_reg_start_tmpl = re->nparens*3/2 + 3;
3797                     if(PL_reg_start_tmp)
3798                         Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
3799                     else
3800                         Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
3801                 }
3802
3803         eval_recurse_doit: /* Share code with GOSUB below this line */
3804                 /* run the pattern returned from (??{...}) */
3805                 ST.cp = regcppush(0);   /* Save *all* the positions. */
3806                 REGCP_SET(ST.lastcp);
3807
3808                 PL_regoffs = re->offs; /* essentially NOOP on GOSUB */
3809
3810                 /* see regtry, specifically PL_reglast(?:close)?paren is a pointer! (i dont know why) :dmq */
3811                 PL_reglastparen = &re->lastparen;
3812                 PL_reglastcloseparen = &re->lastcloseparen;
3813                 re->lastparen = 0;
3814                 re->lastcloseparen = 0;
3815
3816                 PL_reginput = locinput;
3817                 PL_regsize = 0;
3818
3819                 /* XXXX This is too dramatic a measure... */
3820                 PL_reg_maxiter = 0;
3821
3822                 ST.toggle_reg_flags = PL_reg_flags;
3823                 if (RX_UTF8(re_sv))
3824                     PL_reg_flags |= RF_utf8;
3825                 else
3826                     PL_reg_flags &= ~RF_utf8;
3827                 ST.toggle_reg_flags ^= PL_reg_flags; /* diff of old and new */
3828
3829                 ST.prev_rex = rex_sv;
3830                 ST.prev_curlyx = cur_curlyx;
3831                 SETREX(rex_sv,re_sv);
3832                 rex = re;
3833                 rexi = rei;
3834                 cur_curlyx = NULL;
3835                 ST.B = next;
3836                 ST.prev_eval = cur_eval;
3837                 cur_eval = st;
3838                 /* now continue from first node in postoned RE */
3839                 PUSH_YES_STATE_GOTO(EVAL_AB, startpoint);
3840                 /* NOTREACHED */
3841             }
3842             /* logical is 1,   /(?(?{...})X|Y)/ */
3843             sw = (bool)SvTRUE(ret);
3844             logical = 0;
3845             break;
3846         }
3847
3848         case EVAL_AB: /* cleanup after a successful (??{A})B */
3849             /* note: this is called twice; first after popping B, then A */
3850             PL_reg_flags ^= ST.toggle_reg_flags;
3851             ReREFCNT_dec(rex_sv);
3852             SETREX(rex_sv,ST.prev_rex);
3853             rex = (struct regexp *)SvANY(rex_sv);
3854             rexi = RXi_GET(rex);
3855             regcpblow(ST.cp);
3856             cur_eval = ST.prev_eval;
3857             cur_curlyx = ST.prev_curlyx;
3858
3859             PL_reglastparen = &rex->lastparen;
3860             PL_reglastcloseparen = &rex->lastcloseparen;
3861
3862             /* XXXX This is too dramatic a measure... */
3863             PL_reg_maxiter = 0;
3864             if ( nochange_depth )
3865                 nochange_depth--;
3866             sayYES;
3867
3868
3869         case EVAL_AB_fail: /* unsuccessfully ran A or B in (??{A})B */
3870             /* note: this is called twice; first after popping B, then A */
3871             PL_reg_flags ^= ST.toggle_reg_flags;
3872             ReREFCNT_dec(rex_sv);
3873             SETREX(rex_sv,ST.prev_rex);
3874             rex = (struct regexp *)SvANY(rex_sv);
3875             rexi = RXi_GET(rex);
3876             PL_reglastparen = &rex->lastparen;
3877             PL_reglastcloseparen = &rex->lastcloseparen;
3878
3879             PL_reginput = locinput;
3880             REGCP_UNWIND(ST.lastcp);
3881             regcppop(rex);
3882             cur_eval = ST.prev_eval;
3883             cur_curlyx = ST.prev_curlyx;
3884             /* XXXX This is too dramatic a measure... */
3885             PL_reg_maxiter = 0;
3886             if ( nochange_depth )
3887                 nochange_depth--;
3888             sayNO_SILENT;
3889 #undef ST
3890
3891         case OPEN:
3892             n = ARG(scan);  /* which paren pair */
3893             PL_reg_start_tmp[n] = locinput;
3894             if (n > PL_regsize)
3895                 PL_regsize = n;
3896             lastopen = n;
3897             break;
3898         case CLOSE:
3899             n = ARG(scan);  /* which paren pair */
3900             PL_regoffs[n].start = PL_reg_start_tmp[n] - PL_bostr;
3901             PL_regoffs[n].end = locinput - PL_bostr;
3902             /*if (n > PL_regsize)
3903                 PL_regsize = n;*/
3904             if (n > *PL_reglastparen)
3905                 *PL_reglastparen = n;
3906             *PL_reglastcloseparen = n;
3907             if (cur_eval && cur_eval->u.eval.close_paren == n) {
3908                 goto fake_end;
3909             }
3910             break;
3911         case ACCEPT:
3912             if (ARG(scan)){
3913                 regnode *cursor;
3914                 for (cursor=scan;
3915                      cursor && OP(cursor)!=END;
3916                      cursor=regnext(cursor))
3917                 {
3918                     if ( OP(cursor)==CLOSE ){
3919                         n = ARG(cursor);
3920                         if ( n <= lastopen ) {
3921                             PL_regoffs[n].start
3922                                 = PL_reg_start_tmp[n] - PL_bostr;
3923                             PL_regoffs[n].end = locinput - PL_bostr;
3924                             /*if (n > PL_regsize)
3925                             PL_regsize = n;*/
3926                             if (n > *PL_reglastparen)
3927                                 *PL_reglastparen = n;
3928                             *PL_reglastcloseparen = n;
3929                             if ( n == ARG(scan) || (cur_eval &&
3930                                 cur_eval->u.eval.close_paren == n))
3931                                 break;
3932                         }
3933                     }
3934                 }
3935             }
3936             goto fake_end;
3937             /*NOTREACHED*/
3938         case GROUPP:
3939             n = ARG(scan);  /* which paren pair */
3940             sw = (bool)(*PL_reglastparen >= n && PL_regoffs[n].end != -1);
3941             break;
3942         case NGROUPP:
3943             /* reg_check_named_buff_matched returns 0 for no match */
3944             sw = (bool)(0 < reg_check_named_buff_matched(rex,scan));
3945             break;
3946         case INSUBP:
3947             n = ARG(scan);
3948             sw = (cur_eval && (!n || cur_eval->u.eval.close_paren == n));
3949             break;
3950         case DEFINEP:
3951             sw = 0;
3952             break;
3953         case IFTHEN:
3954             PL_reg_leftiter = PL_reg_maxiter;           /* Void cache */
3955             if (sw)
3956                 next = NEXTOPER(NEXTOPER(scan));
3957             else {
3958                 next = scan + ARG(scan);
3959                 if (OP(next) == IFTHEN) /* Fake one. */
3960                     next = NEXTOPER(NEXTOPER(next));
3961             }
3962             break;
3963         case LOGICAL:
3964             logical = scan->flags;
3965             break;
3966
3967 /*******************************************************************
3968
3969 The CURLYX/WHILEM pair of ops handle the most generic case of the /A*B/
3970 pattern, where A and B are subpatterns. (For simple A, CURLYM or
3971 STAR/PLUS/CURLY/CURLYN are used instead.)
3972
3973 A*B is compiled as <CURLYX><A><WHILEM><B>
3974
3975 On entry to the subpattern, CURLYX is called. This pushes a CURLYX
3976 state, which contains the current count, initialised to -1. It also sets
3977 cur_curlyx to point to this state, with any previous value saved in the
3978 state block.
3979
3980 CURLYX then jumps straight to the WHILEM op, rather than executing A,
3981 since the pattern may possibly match zero times (i.e. it's a while {} loop
3982 rather than a do {} while loop).
3983
3984 Each entry to WHILEM represents a successful match of A. The count in the
3985 CURLYX block is incremented, another WHILEM state is pushed, and execution
3986 passes to A or B depending on greediness and the current count.
3987
3988 For example, if matching against the string a1a2a3b (where the aN are
3989 substrings that match /A/), then the match progresses as follows: (the
3990 pushed states are interspersed with the bits of strings matched so far):
3991
3992     <CURLYX cnt=-1>
3993     <CURLYX cnt=0><WHILEM>
3994     <CURLYX cnt=1><WHILEM> a1 <WHILEM>
3995     <CURLYX cnt=2><WHILEM> a1 <WHILEM> a2 <WHILEM>
3996     <CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM>
3997     <CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM> b
3998
3999 (Contrast this with something like CURLYM, which maintains only a single
4000 backtrack state:
4001
4002     <CURLYM cnt=0> a1
4003     a1 <CURLYM cnt=1> a2
4004     a1 a2 <CURLYM cnt=2> a3
4005     a1 a2 a3 <CURLYM cnt=3> b
4006 )
4007
4008 Each WHILEM state block marks a point to backtrack to upon partial failure
4009 of A or B, and also contains some minor state data related to that
4010 iteration.  The CURLYX block, pointed to by cur_curlyx, contains the
4011 overall state, such as the count, and pointers to the A and B ops.
4012
4013 This is complicated slightly by nested CURLYX/WHILEM's. Since cur_curlyx
4014 must always point to the *current* CURLYX block, the rules are:
4015
4016 When executing CURLYX, save the old cur_curlyx in the CURLYX state block,
4017 and set cur_curlyx to point the new block.
4018
4019 When popping the CURLYX block after a successful or unsuccessful match,
4020 restore the previous cur_curlyx.
4021
4022 When WHILEM is about to execute B, save the current cur_curlyx, and set it
4023 to the outer one saved in the CURLYX block.
4024
4025 When popping the WHILEM block after a successful or unsuccessful B match,
4026 restore the previous cur_curlyx.
4027
4028 Here's an example for the pattern (AI* BI)*BO
4029 I and O refer to inner and outer, C and W refer to CURLYX and WHILEM:
4030
4031 cur_
4032 curlyx backtrack stack
4033 ------ ---------------
4034 NULL
4035 CO     <CO prev=NULL> <WO>
4036 CI     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
4037 CO     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
4038 NULL   <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi <WO prev=CO> bo
4039
4040 At this point the pattern succeeds, and we work back down the stack to
4041 clean up, restoring as we go:
4042
4043 CO     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
4044 CI     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
4045 CO     <CO prev=NULL> <WO>
4046 NULL
4047
4048 *******************************************************************/
4049
4050 #define ST st->u.curlyx
4051
4052         case CURLYX:    /* start of /A*B/  (for complex A) */
4053         {
4054             /* No need to save/restore up to this paren */
4055             I32 parenfloor = scan->flags;
4056
4057             assert(next); /* keep Coverity happy */
4058             if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
4059                 next += ARG(next);
4060
4061             /* XXXX Probably it is better to teach regpush to support
4062                parenfloor > PL_regsize... */
4063             if (parenfloor > (I32)*PL_reglastparen)
4064                 parenfloor = *PL_reglastparen; /* Pessimization... */
4065
4066             ST.prev_curlyx= cur_curlyx;
4067             cur_curlyx = st;
4068             ST.cp = PL_savestack_ix;
4069
4070             /* these fields contain the state of the current curly.
4071              * they are accessed by subsequent WHILEMs */
4072             ST.parenfloor = parenfloor;
4073             ST.min = ARG1(scan);
4074             ST.max = ARG2(scan);
4075             ST.A = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
4076             ST.B = next;
4077             ST.minmod = minmod;
4078             minmod = 0;
4079             ST.count = -1;      /* this will be updated by WHILEM */
4080             ST.lastloc = NULL;  /* this will be updated by WHILEM */
4081
4082             PL_reginput = locinput;
4083             PUSH_YES_STATE_GOTO(CURLYX_end, PREVOPER(next));
4084             /* NOTREACHED */
4085         }
4086
4087         case CURLYX_end: /* just finished matching all of A*B */
4088             cur_curlyx = ST.prev_curlyx;
4089             sayYES;
4090             /* NOTREACHED */
4091
4092         case CURLYX_end_fail: /* just failed to match all of A*B */
4093             regcpblow(ST.cp);
4094             cur_curlyx = ST.prev_curlyx;
4095             sayNO;
4096             /* NOTREACHED */
4097
4098
4099 #undef ST
4100 #define ST st->u.whilem
4101
4102         case WHILEM:     /* just matched an A in /A*B/  (for complex A) */
4103         {
4104             /* see the discussion above about CURLYX/WHILEM */
4105             I32 n;
4106             assert(cur_curlyx); /* keep Coverity happy */
4107             n = ++cur_curlyx->u.curlyx.count; /* how many A's matched */
4108             ST.save_lastloc = cur_curlyx->u.curlyx.lastloc;
4109             ST.cache_offset = 0;
4110             ST.cache_mask = 0;
4111
4112             PL_reginput = locinput;
4113
4114             DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4115                   "%*s  whilem: matched %ld out of %ld..%ld\n",
4116                   REPORT_CODE_OFF+depth*2, "", (long)n,
4117                   (long)cur_curlyx->u.curlyx.min,
4118                   (long)cur_curlyx->u.curlyx.max)
4119             );
4120
4121             /* First just match a string of min A's. */
4122
4123             if (n < cur_curlyx->u.curlyx.min) {
4124                 cur_curlyx->u.curlyx.lastloc = locinput;
4125                 PUSH_STATE_GOTO(WHILEM_A_pre, cur_curlyx->u.curlyx.A);
4126                 /* NOTREACHED */
4127             }
4128
4129             /* If degenerate A matches "", assume A done. */
4130
4131             if (locinput == cur_curlyx->u.curlyx.lastloc) {
4132                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4133                    "%*s  whilem: empty match detected, trying continuation...\n",
4134                    REPORT_CODE_OFF+depth*2, "")
4135                 );
4136                 goto do_whilem_B_max;
4137             }
4138
4139             /* super-linear cache processing */
4140
4141             if (scan->flags) {
4142
4143                 if (!PL_reg_maxiter) {
4144                     /* start the countdown: Postpone detection until we
4145                      * know the match is not *that* much linear. */
4146                     PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
4147                     /* possible overflow for long strings and many CURLYX's */
4148                     if (PL_reg_maxiter < 0)
4149                         PL_reg_maxiter = I32_MAX;
4150                     PL_reg_leftiter = PL_reg_maxiter;
4151                 }
4152
4153                 if (PL_reg_leftiter-- == 0) {
4154                     /* initialise cache */
4155                     const I32 size = (PL_reg_maxiter + 7)/8;
4156                     if (PL_reg_poscache) {
4157                         if ((I32)PL_reg_poscache_size < size) {
4158                             Renew(PL_reg_poscache, size, char);
4159                             PL_reg_poscache_size = size;
4160                         }
4161                         Zero(PL_reg_poscache, size, char);
4162                     }
4163                     else {
4164                         PL_reg_poscache_size = size;
4165                         Newxz(PL_reg_poscache, size, char);
4166                     }
4167                     DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4168       "%swhilem: Detected a super-linear match, switching on caching%s...\n",
4169                               PL_colors[4], PL_colors[5])
4170                     );
4171                 }
4172
4173                 if (PL_reg_leftiter < 0) {
4174                     /* have we already failed at this position? */
4175                     I32 offset, mask;
4176                     offset  = (scan->flags & 0xf) - 1
4177                                 + (locinput - PL_bostr)  * (scan->flags>>4);
4178                     mask    = 1 << (offset % 8);
4179                     offset /= 8;
4180                     if (PL_reg_poscache[offset] & mask) {
4181                         DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4182                             "%*s  whilem: (cache) already tried at this position...\n",
4183                             REPORT_CODE_OFF+depth*2, "")
4184                         );
4185                         sayNO; /* cache records failure */
4186                     }
4187                     ST.cache_offset = offset;
4188                     ST.cache_mask   = mask;
4189                 }
4190             }
4191
4192             /* Prefer B over A for minimal matching. */
4193
4194             if (cur_curlyx->u.curlyx.minmod) {
4195                 ST.save_curlyx = cur_curlyx;
4196                 cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
4197                 ST.cp = regcppush(ST.save_curlyx->u.curlyx.parenfloor);
4198                 REGCP_SET(ST.lastcp);
4199                 PUSH_YES_STATE_GOTO(WHILEM_B_min, ST.save_curlyx->u.curlyx.B);
4200                 /* NOTREACHED */
4201             }
4202
4203             /* Prefer A over B for maximal matching. */
4204
4205             if (n < cur_curlyx->u.curlyx.max) { /* More greed allowed? */
4206                 ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
4207                 cur_curlyx->u.curlyx.lastloc = locinput;
4208                 REGCP_SET(ST.lastcp);
4209                 PUSH_STATE_GOTO(WHILEM_A_max, cur_curlyx->u.curlyx.A);
4210                 /* NOTREACHED */
4211             }
4212             goto do_whilem_B_max;
4213         }
4214         /* NOTREACHED */
4215
4216         case WHILEM_B_min: /* just matched B in a minimal match */
4217         case WHILEM_B_max: /* just matched B in a maximal match */
4218             cur_curlyx = ST.save_curlyx;
4219             sayYES;
4220             /* NOTREACHED */
4221
4222         case WHILEM_B_max_fail: /* just failed to match B in a maximal match */
4223             cur_curlyx = ST.save_curlyx;
4224             cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
4225             cur_curlyx->u.curlyx.count--;
4226             CACHEsayNO;
4227             /* NOTREACHED */
4228
4229         case WHILEM_A_min_fail: /* just failed to match A in a minimal match */
4230             REGCP_UNWIND(ST.lastcp);
4231             regcppop(rex);
4232             /* FALL THROUGH */
4233         case WHILEM_A_pre_fail: /* just failed to match even minimal A */
4234             cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
4235             cur_curlyx->u.curlyx.count--;
4236             CACHEsayNO;
4237             /* NOTREACHED */
4238
4239         case WHILEM_A_max_fail: /* just failed to match A in a maximal match */
4240             REGCP_UNWIND(ST.lastcp);
4241             regcppop(rex);      /* Restore some previous $<digit>s? */
4242             PL_reginput = locinput;
4243             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4244                 "%*s  whilem: failed, trying continuation...\n",
4245                 REPORT_CODE_OFF+depth*2, "")
4246             );
4247           do_whilem_B_max:
4248             if (cur_curlyx->u.curlyx.count >= REG_INFTY
4249                 && ckWARN(WARN_REGEXP)
4250                 && !(PL_reg_flags & RF_warned))
4251             {
4252                 PL_reg_flags |= RF_warned;
4253                 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
4254                      "Complex regular subexpression recursion",
4255                      REG_INFTY - 1);
4256             }
4257
4258             /* now try B */
4259             ST.save_curlyx = cur_curlyx;
4260             cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
4261             PUSH_YES_STATE_GOTO(WHILEM_B_max, ST.save_curlyx->u.curlyx.B);
4262             /* NOTREACHED */
4263
4264         case WHILEM_B_min_fail: /* just failed to match B in a minimal match */
4265             cur_curlyx = ST.save_curlyx;
4266             REGCP_UNWIND(ST.lastcp);
4267             regcppop(rex);
4268
4269             if (cur_curlyx->u.curlyx.count >= cur_curlyx->u.curlyx.max) {
4270                 /* Maximum greed exceeded */
4271                 if (cur_curlyx->u.curlyx.count >= REG_INFTY
4272                     && ckWARN(WARN_REGEXP)
4273                     && !(PL_reg_flags & RF_warned))
4274                 {
4275                     PL_reg_flags |= RF_warned;
4276                     Perl_warner(aTHX_ packWARN(WARN_REGEXP),
4277                         "%s limit (%d) exceeded",
4278                         "Complex regular subexpression recursion",
4279                         REG_INFTY - 1);
4280                 }
4281                 cur_curlyx->u.curlyx.count--;
4282                 CACHEsayNO;
4283             }
4284
4285             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4286                 "%*s  trying longer...\n", REPORT_CODE_OFF+depth*2, "")
4287             );
4288             /* Try grabbing another A and see if it helps. */
4289             PL_reginput = locinput;
4290             cur_curlyx->u.curlyx.lastloc = locinput;
4291             ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
4292             REGCP_SET(ST.lastcp);
4293             PUSH_STATE_GOTO(WHILEM_A_min, ST.save_curlyx->u.curlyx.A);
4294             /* NOTREACHED */
4295
4296 #undef  ST
4297 #define ST st->u.branch
4298
4299         case BRANCHJ:       /*  /(...|A|...)/ with long next pointer */
4300             next = scan + ARG(scan);
4301             if (next == scan)
4302                 next = NULL;
4303             scan = NEXTOPER(scan);
4304             /* FALL THROUGH */
4305
4306         case BRANCH:        /*  /(...|A|...)/ */
4307             scan = NEXTOPER(scan); /* scan now points to inner node */
4308             ST.lastparen = *PL_reglastparen;
4309             ST.next_branch = next;
4310             REGCP_SET(ST.cp);
4311             PL_reginput = locinput;
4312
4313             /* Now go into the branch */
4314             if (has_cutgroup) {
4315                 PUSH_YES_STATE_GOTO(BRANCH_next, scan);
4316             } else {
4317                 PUSH_STATE_GOTO(BRANCH_next, scan);
4318             }
4319             /* NOTREACHED */
4320         case CUTGROUP:
4321             PL_reginput = locinput;
4322             sv_yes_mark = st->u.mark.mark_name = scan->flags ? NULL :
4323                 (SV*)rexi->data->data[ ARG( scan ) ];
4324             PUSH_STATE_GOTO(CUTGROUP_next,next);
4325             /* NOTREACHED */
4326         case CUTGROUP_next_fail:
4327             do_cutgroup = 1;
4328             no_final = 1;
4329             if (st->u.mark.mark_name)
4330                 sv_commit = st->u.mark.mark_name;
4331             sayNO;
4332             /* NOTREACHED */
4333         case BRANCH_next:
4334             sayYES;
4335             /* NOTREACHED */
4336         case BRANCH_next_fail: /* that branch failed; try the next, if any */
4337             if (do_cutgroup) {
4338                 do_cutgroup = 0;
4339                 no_final = 0;
4340             }
4341             REGCP_UNWIND(ST.cp);
4342             for (n = *PL_reglastparen; n > ST.lastparen; n--)
4343                 PL_regoffs[n].end = -1;
4344             *PL_reglastparen = n;
4345             /*dmq: *PL_reglastcloseparen = n; */
4346             scan = ST.next_branch;
4347             /* no more branches? */
4348             if (!scan || (OP(scan) != BRANCH && OP(scan) != BRANCHJ)) {
4349                 DEBUG_EXECUTE_r({
4350                     PerlIO_printf( Perl_debug_log,
4351                         "%*s  %sBRANCH failed...%s\n",
4352                         REPORT_CODE_OFF+depth*2, "",
4353                         PL_colors[4],
4354                         PL_colors[5] );
4355                 });
4356                 sayNO_SILENT;
4357             }
4358             continue; /* execute next BRANCH[J] op */
4359             /* NOTREACHED */
4360
4361         case MINMOD:
4362             minmod = 1;
4363             break;
4364
4365 #undef  ST
4366 #define ST st->u.curlym
4367
4368         case CURLYM:    /* /A{m,n}B/ where A is fixed-length */
4369
4370             /* This is an optimisation of CURLYX that enables us to push
4371              * only a single backtracking state, no matter now many matches
4372              * there are in {m,n}. It relies on the pattern being constant
4373              * length, with no parens to influence future backrefs
4374              */
4375
4376             ST.me = scan;
4377             scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
4378
4379             /* if paren positive, emulate an OPEN/CLOSE around A */
4380             if (ST.me->flags) {
4381                 U32 paren = ST.me->flags;
4382                 if (paren > PL_regsize)
4383                     PL_regsize = paren;
4384                 if (paren > *PL_reglastparen)
4385                     *PL_reglastparen = paren;
4386                 scan += NEXT_OFF(scan); /* Skip former OPEN. */
4387             }
4388             ST.A = scan;
4389             ST.B = next;
4390             ST.alen = 0;
4391             ST.count = 0;
4392             ST.minmod = minmod;
4393             minmod = 0;
4394             ST.c1 = CHRTEST_UNINIT;
4395             REGCP_SET(ST.cp);
4396
4397             if (!(ST.minmod ? ARG1(ST.me) : ARG2(ST.me))) /* min/max */
4398                 goto curlym_do_B;
4399
4400           curlym_do_A: /* execute the A in /A{m,n}B/  */
4401             PL_reginput = locinput;
4402             PUSH_YES_STATE_GOTO(CURLYM_A, ST.A); /* match A */
4403             /* NOTREACHED */
4404
4405         case CURLYM_A: /* we've just matched an A */
4406             locinput = st->locinput;
4407             nextchr = UCHARAT(locinput);
4408
4409             ST.count++;
4410             /* after first match, determine A's length: u.curlym.alen */
4411             if (ST.count == 1) {
4412                 if (PL_reg_match_utf8) {
4413                     char *s = locinput;
4414                     while (s < PL_reginput) {
4415                         ST.alen++;
4416                         s += UTF8SKIP(s);
4417                     }
4418                 }
4419                 else {
4420                     ST.alen = PL_reginput - locinput;
4421                 }
4422                 if (ST.alen == 0)
4423                     ST.count = ST.minmod ? ARG1(ST.me) : ARG2(ST.me);
4424             }
4425             DEBUG_EXECUTE_r(
4426                 PerlIO_printf(Perl_debug_log,
4427                           "%*s  CURLYM now matched %"IVdf" times, len=%"IVdf"...\n",
4428                           (int)(REPORT_CODE_OFF+(depth*2)), "",
4429                           (IV) ST.count, (IV)ST.alen)
4430             );
4431
4432             locinput = PL_reginput;
4433
4434             if (cur_eval && cur_eval->u.eval.close_paren &&
4435                 cur_eval->u.eval.close_paren == (U32)ST.me->flags)
4436                 goto fake_end;
4437
4438             if ( ST.count < (ST.minmod ? ARG1(ST.me) : ARG2(ST.me)) )
4439                 goto curlym_do_A; /* try to match another A */
4440             goto curlym_do_B; /* try to match B */
4441
4442         case CURLYM_A_fail: /* just failed to match an A */
4443             REGCP_UNWIND(ST.cp);
4444
4445             if (ST.minmod || ST.count < ARG1(ST.me) /* min*/
4446                 || (cur_eval && cur_eval->u.eval.close_paren &&
4447                     cur_eval->u.eval.close_paren == (U32)ST.me->flags))
4448                 sayNO;
4449
4450           curlym_do_B: /* execute the B in /A{m,n}B/  */
4451             PL_reginput = locinput;
4452             if (ST.c1 == CHRTEST_UNINIT) {
4453                 /* calculate c1 and c2 for possible match of 1st char
4454                  * following curly */
4455                 ST.c1 = ST.c2 = CHRTEST_VOID;
4456                 if (HAS_TEXT(ST.B) || JUMPABLE(ST.B)) {
4457                     regnode *text_node = ST.B;
4458                     if (! HAS_TEXT(text_node))
4459                         FIND_NEXT_IMPT(text_node);
4460                     /* this used to be
4461
4462                         (HAS_TEXT(text_node) && PL_regkind[OP(text_node)] == EXACT)
4463
4464                         But the former is redundant in light of the latter.
4465
4466                         if this changes back then the macro for
4467                         IS_TEXT and friends need to change.
4468                      */
4469                     if (PL_regkind[OP(text_node)] == EXACT)
4470                     {
4471
4472                         ST.c1 = (U8)*STRING(text_node);
4473                         ST.c2 =
4474                             (IS_TEXTF(text_node))
4475                             ? PL_fold[ST.c1]
4476                             : (IS_TEXTFL(text_node))
4477                                 ? PL_fold_locale[ST.c1]
4478                                 : ST.c1;
4479                     }
4480                 }
4481             }
4482
4483             DEBUG_EXECUTE_r(
4484                 PerlIO_printf(Perl_debug_log,
4485                     "%*s  CURLYM trying tail with matches=%"IVdf"...\n",
4486                     (int)(REPORT_CODE_OFF+(depth*2)),
4487                     "", (IV)ST.count)
4488                 );
4489             if (ST.c1 != CHRTEST_VOID
4490                     && UCHARAT(PL_reginput) != ST.c1
4491                     && UCHARAT(PL_reginput) != ST.c2)
4492             {
4493                 /* simulate B failing */
4494                 DEBUG_OPTIMISE_r(
4495                     PerlIO_printf(Perl_debug_log,
4496                         "%*s  CURLYM Fast bail c1=%"IVdf" c2=%"IVdf"\n",
4497                         (int)(REPORT_CODE_OFF+(depth*2)),"",
4498                         (IV)ST.c1,(IV)ST.c2
4499                 ));
4500                 state_num = CURLYM_B_fail;
4501                 goto reenter_switch;
4502             }
4503
4504             if (ST.me->flags) {
4505                 /* mark current A as captured */
4506                 I32 paren = ST.me->flags;
4507                 if (ST.count) {
4508                     PL_regoffs[paren].start
4509                         = HOPc(PL_reginput, -ST.alen) - PL_bostr;
4510                     PL_regoffs[paren].end = PL_reginput - PL_bostr;
4511                     /*dmq: *PL_reglastcloseparen = paren; */
4512                 }
4513                 else
4514                     PL_regoffs[paren].end = -1;
4515                 if (cur_eval && cur_eval->u.eval.close_paren &&
4516                     cur_eval->u.eval.close_paren == (U32)ST.me->flags)
4517                 {
4518                     if (ST.count)
4519                         goto fake_end;
4520                     else
4521                         sayNO;
4522                 }
4523             }
4524
4525             PUSH_STATE_GOTO(CURLYM_B, ST.B); /* match B */
4526             /* NOTREACHED */
4527
4528         case CURLYM_B_fail: /* just failed to match a B */
4529             REGCP_UNWIND(ST.cp);
4530             if (ST.minmod) {
4531                 if (ST.count == ARG2(ST.me) /* max */)
4532                     sayNO;
4533                 goto curlym_do_A; /* try to match a further A */
4534             }
4535             /* backtrack one A */
4536             if (ST.count == ARG1(ST.me) /* min */)
4537                 sayNO;
4538             ST.count--;
4539             locinput = HOPc(locinput, -ST.alen);
4540             goto curlym_do_B; /* try to match B */
4541
4542 #undef ST
4543 #define ST st->u.curly
4544
4545 #define CURLY_SETPAREN(paren, success) \
4546     if (paren) { \
4547         if (success) { \
4548             PL_regoffs[paren].start = HOPc(locinput, -1) - PL_bostr; \
4549             PL_regoffs[paren].end = locinput - PL_bostr; \
4550             *PL_reglastcloseparen = paren; \
4551         } \
4552         else \
4553             PL_regoffs[paren].end = -1; \
4554     }
4555
4556         case STAR:              /*  /A*B/ where A is width 1 */
4557             ST.paren = 0;
4558             ST.min = 0;
4559             ST.max = REG_INFTY;
4560             scan = NEXTOPER(scan);
4561             goto repeat;
4562         case PLUS:              /*  /A+B/ where A is width 1 */
4563             ST.paren = 0;
4564             ST.min = 1;
4565             ST.max = REG_INFTY;
4566             scan = NEXTOPER(scan);
4567             goto repeat;
4568         case CURLYN:            /*  /(A){m,n}B/ where A is width 1 */
4569             ST.paren = scan->flags;     /* Which paren to set */
4570             if (ST.paren > PL_regsize)
4571                 PL_regsize = ST.paren;
4572             if (ST.paren > *PL_reglastparen)
4573                 *PL_reglastparen = ST.paren;
4574             ST.min = ARG1(scan);  /* min to match */
4575             ST.max = ARG2(scan);  /* max to match */
4576             if (cur_eval && cur_eval->u.eval.close_paren &&
4577                 cur_eval->u.eval.close_paren == (U32)ST.paren) {
4578                 ST.min=1;
4579                 ST.max=1;
4580             }
4581             scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
4582             goto repeat;
4583         case CURLY:             /*  /A{m,n}B/ where A is width 1 */
4584             ST.paren = 0;
4585             ST.min = ARG1(scan);  /* min to match */
4586             ST.max = ARG2(scan);  /* max to match */
4587             scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
4588           repeat:
4589             /*
4590             * Lookahead to avoid useless match attempts
4591             * when we know what character comes next.
4592             *
4593             * Used to only do .*x and .*?x, but now it allows
4594             * for )'s, ('s and (?{ ... })'s to be in the way
4595             * of the quantifier and the EXACT-like node.  -- japhy
4596             */
4597
4598             if (ST.min > ST.max) /* XXX make this a compile-time check? */
4599                 sayNO;
4600             if (HAS_TEXT(next) || JUMPABLE(next)) {
4601                 U8 *s;
4602                 regnode *text_node = next;
4603
4604                 if (! HAS_TEXT(text_node))
4605                     FIND_NEXT_IMPT(text_node);
4606
4607                 if (! HAS_TEXT(text_node))
4608                     ST.c1 = ST.c2 = CHRTEST_VOID;
4609                 else {
4610                     if ( PL_regkind[OP(text_node)] != EXACT ) {
4611                         ST.c1 = ST.c2 = CHRTEST_VOID;
4612                         goto assume_ok_easy;
4613                     }
4614                     else
4615                         s = (U8*)STRING(text_node);
4616
4617                     /*  Currently we only get here when
4618
4619                         PL_rekind[OP(text_node)] == EXACT
4620
4621                         if this changes back then the macro for IS_TEXT and
4622                         friends need to change. */
4623                     if (!UTF) {
4624                         ST.c2 = ST.c1 = *s;
4625                         if (IS_TEXTF(text_node))
4626                             ST.c2 = PL_fold[ST.c1];
4627                         else if (IS_TEXTFL(text_node))
4628                             ST.c2 = PL_fold_locale[ST.c1];
4629                     }
4630                     else { /* UTF */
4631                         if (IS_TEXTF(text_node)) {
4632                              STRLEN ulen1, ulen2;
4633                              U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
4634                              U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
4635
4636                              to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
4637                              to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
4638 #ifdef EBCDIC
4639                              ST.c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXLEN, 0,
4640                                                     ckWARN(WARN_UTF8) ?
4641                                                     0 : UTF8_ALLOW_ANY);
4642                              ST.c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXLEN, 0,
4643                                                     ckWARN(WARN_UTF8) ?
4644                                                     0 : UTF8_ALLOW_ANY);
4645 #else
4646                              ST.c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
4647                                                     uniflags);
4648                              ST.c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
4649                                                     uniflags);
4650 #endif
4651                         }
4652                         else {
4653                             ST.c2 = ST.c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
4654                                                      uniflags);
4655                         }
4656                     }
4657                 }
4658             }
4659             else
4660                 ST.c1 = ST.c2 = CHRTEST_VOID;
4661         assume_ok_easy:
4662
4663             ST.A = scan;
4664             ST.B = next;
4665             PL_reginput = locinput;
4666             if (minmod) {
4667                 minmod = 0;
4668                 if (ST.min && regrepeat(rex, ST.A, ST.min, depth) < ST.min)
4669                     sayNO;
4670                 ST.count = ST.min;
4671                 locinput = PL_reginput;
4672                 REGCP_SET(ST.cp);
4673                 if (ST.c1 == CHRTEST_VOID)
4674                     goto curly_try_B_min;
4675
4676                 ST.oldloc = locinput;
4677
4678                 /* set ST.maxpos to the furthest point along the
4679                  * string that could possibly match */
4680                 if  (ST.max == REG_INFTY) {
4681                     ST.maxpos = PL_regeol - 1;
4682                     if (do_utf8)
4683                         while (UTF8_IS_CONTINUATION(*(U8*)ST.maxpos))
4684                             ST.maxpos--;
4685                 }
4686                 else if (do_utf8) {
4687                     int m = ST.max - ST.min;
4688                     for (ST.maxpos = locinput;
4689                          m >0 && ST.maxpos + UTF8SKIP(ST.maxpos) <= PL_regeol; m--)
4690                         ST.maxpos += UTF8SKIP(ST.maxpos);
4691                 }
4692                 else {
4693                     ST.maxpos = locinput + ST.max - ST.min;
4694                     if (ST.maxpos >= PL_regeol)
4695                         ST.maxpos = PL_regeol - 1;
4696                 }
4697                 goto curly_try_B_min_known;
4698
4699             }
4700             else {
4701                 ST.count = regrepeat(rex, ST.A, ST.max, depth);
4702                 locinput = PL_reginput;
4703                 if (ST.count < ST.min)
4704                     sayNO;
4705                 if ((ST.count > ST.min)
4706                     && (PL_regkind[OP(ST.B)] == EOL) && (OP(ST.B) != MEOL))
4707                 {
4708                     /* A{m,n} must come at the end of the string, there's
4709                      * no point in backing off ... */
4710                     ST.min = ST.count;
4711                     /* ...except that $ and \Z can match before *and* after
4712                        newline at the end.  Consider "\n\n" =~ /\n+\Z\n/.
4713                        We may back off by one in this case. */
4714                     if (UCHARAT(PL_reginput - 1) == '\n' && OP(ST.B) != EOS)
4715                         ST.min--;
4716                 }
4717                 REGCP_SET(ST.cp);
4718                 goto curly_try_B_max;
4719             }
4720             /* NOTREACHED */
4721
4722
4723         case CURLY_B_min_known_fail:
4724             /* failed to find B in a non-greedy match where c1,c2 valid */
4725             if (ST.paren && ST.count)
4726                 PL_regoffs[ST.paren].end = -1;
4727
4728             PL_reginput = locinput;     /* Could be reset... */
4729             REGCP_UNWIND(ST.cp);
4730             /* Couldn't or didn't -- move forward. */
4731             ST.oldloc = locinput;
4732             if (do_utf8)
4733                 locinput += UTF8SKIP(locinput);
4734             else
4735                 locinput++;
4736             ST.count++;
4737           curly_try_B_min_known:
4738              /* find the next place where 'B' could work, then call B */
4739             {
4740                 int n;
4741                 if (do_utf8) {
4742                     n = (ST.oldloc == locinput) ? 0 : 1;
4743                     if (ST.c1 == ST.c2) {
4744                         STRLEN len;
4745                         /* set n to utf8_distance(oldloc, locinput) */
4746                         while (locinput <= ST.maxpos &&
4747                                utf8n_to_uvchr((U8*)locinput,
4748                                               UTF8_MAXBYTES, &len,
4749                                               uniflags) != (UV)ST.c1) {
4750                             locinput += len;
4751                             n++;
4752                         }
4753                     }
4754                     else {
4755                         /* set n to utf8_distance(oldloc, locinput) */
4756                         while (locinput <= ST.maxpos) {
4757                             STRLEN len;
4758                             const UV c = utf8n_to_uvchr((U8*)locinput,
4759                                                   UTF8_MAXBYTES, &len,
4760                                                   uniflags);
4761                             if (c == (UV)ST.c1 || c == (UV)ST.c2)
4762                                 break;
4763                             locinput += len;
4764                             n++;
4765                         }
4766                     }
4767                 }
4768                 else {
4769                     if (ST.c1 == ST.c2) {
4770                         while (locinput <= ST.maxpos &&
4771                                UCHARAT(locinput) != ST.c1)
4772                             locinput++;
4773                     }
4774                     else {
4775                         while (locinput <= ST.maxpos
4776                                && UCHARAT(locinput) != ST.c1
4777                                && UCHARAT(locinput) != ST.c2)
4778                             locinput++;
4779                     }
4780                     n = locinput - ST.oldloc;
4781                 }
4782                 if (locinput > ST.maxpos)
4783                     sayNO;
4784                 /* PL_reginput == oldloc now */
4785                 if (n) {
4786                     ST.count += n;
4787                     if (regrepeat(rex, ST.A, n, depth) < n)
4788                         sayNO;
4789                 }
4790                 PL_reginput = locinput;
4791                 CURLY_SETPAREN(ST.paren, ST.count);
4792                 if (cur_eval && cur_eval->u.eval.close_paren &&
4793                     cur_eval->u.eval.close_paren == (U32)ST.paren) {
4794                     goto fake_end;
4795                 }
4796                 PUSH_STATE_GOTO(CURLY_B_min_known, ST.B);
4797             }
4798             /* NOTREACHED */
4799
4800
4801         case CURLY_B_min_fail:
4802             /* failed to find B in a non-greedy match where c1,c2 invalid */
4803             if (ST.paren && ST.count)
4804                 PL_regoffs[ST.paren].end = -1;
4805
4806             REGCP_UNWIND(ST.cp);
4807             /* failed -- move forward one */
4808             PL_reginput = locinput;
4809             if (regrepeat(rex, ST.A, 1, depth)) {
4810                 ST.count++;
4811                 locinput = PL_reginput;
4812                 if (ST.count <= ST.max || (ST.max == REG_INFTY &&
4813                         ST.count > 0)) /* count overflow ? */
4814                 {
4815                   curly_try_B_min:
4816                     CURLY_SETPAREN(ST.paren, ST.count);
4817                     if (cur_eval && cur_eval->u.eval.close_paren &&
4818                         cur_eval->u.eval.close_paren == (U32)ST.paren) {
4819                         goto fake_end;
4820                     }
4821                     PUSH_STATE_GOTO(CURLY_B_min, ST.B);
4822                 }
4823             }
4824             sayNO;
4825             /* NOTREACHED */
4826
4827
4828         curly_try_B_max:
4829             /* a successful greedy match: now try to match B */
4830             if (cur_eval && cur_eval->u.eval.close_paren &&
4831                 cur_eval->u.eval.close_paren == (U32)ST.paren) {
4832                 goto fake_end;
4833             }
4834             {
4835                 UV c = 0;
4836                 if (ST.c1 != CHRTEST_VOID)
4837                     c = do_utf8 ? utf8n_to_uvchr((U8*)PL_reginput,
4838                                            UTF8_MAXBYTES, 0, uniflags)
4839                                 : (UV) UCHARAT(PL_reginput);
4840                 /* If it could work, try it. */
4841                 if (ST.c1 == CHRTEST_VOID || c == (UV)ST.c1 || c == (UV)ST.c2) {
4842                     CURLY_SETPAREN(ST.paren, ST.count);
4843                     PUSH_STATE_GOTO(CURLY_B_max, ST.B);
4844                     /* NOTREACHED */
4845                 }
4846             }
4847             /* FALL THROUGH */
4848         case CURLY_B_max_fail:
4849             /* failed to find B in a greedy match */
4850             if (ST.paren && ST.count)
4851                 PL_regoffs[ST.paren].end = -1;
4852
4853             REGCP_UNWIND(ST.cp);
4854             /*  back up. */
4855             if (--ST.count < ST.min)
4856                 sayNO;
4857             PL_reginput = locinput = HOPc(locinput, -1);
4858             goto curly_try_B_max;
4859
4860 #undef ST
4861
4862         case END:
4863             fake_end:
4864             if (cur_eval) {
4865                 /* we've just finished A in /(??{A})B/; now continue with B */
4866                 I32 tmpix;
4867                 st->u.eval.toggle_reg_flags
4868                             = cur_eval->u.eval.toggle_reg_flags;
4869                 PL_reg_flags ^= st->u.eval.toggle_reg_flags;
4870
4871                 st->u.eval.prev_rex = rex_sv;           /* inner */
4872                 SETREX(rex_sv,cur_eval->u.eval.prev_rex);
4873                 rex = (struct regexp *)SvANY(rex_sv);
4874                 rexi = RXi_GET(rex);
4875                 cur_curlyx = cur_eval->u.eval.prev_curlyx;
4876                 ReREFCNT_inc(rex_sv);
4877                 st->u.eval.cp = regcppush(0);   /* Save *all* the positions. */
4878                 REGCP_SET(st->u.eval.lastcp);
4879                 PL_reginput = locinput;
4880
4881                 /* Restore parens of the outer rex without popping the
4882                  * savestack */
4883                 tmpix = PL_savestack_ix;
4884                 PL_savestack_ix = cur_eval->u.eval.lastcp;
4885                 regcppop(rex);
4886                 PL_savestack_ix = tmpix;
4887
4888                 st->u.eval.prev_eval = cur_eval;
4889                 cur_eval = cur_eval->u.eval.prev_eval;
4890                 DEBUG_EXECUTE_r(
4891                     PerlIO_printf(Perl_debug_log, "%*s  EVAL trying tail ... %"UVxf"\n",
4892                                       REPORT_CODE_OFF+depth*2, "",PTR2UV(cur_eval)););
4893                 if ( nochange_depth )
4894                     nochange_depth--;
4895
4896                 PUSH_YES_STATE_GOTO(EVAL_AB,
4897                         st->u.eval.prev_eval->u.eval.B); /* match B */
4898             }
4899
4900             if (locinput < reginfo->till) {
4901                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4902                                       "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
4903                                       PL_colors[4],
4904                                       (long)(locinput - PL_reg_starttry),
4905                                       (long)(reginfo->till - PL_reg_starttry),
4906                                       PL_colors[5]));
4907
4908                 sayNO_SILENT;           /* Cannot match: too short. */
4909             }
4910             PL_reginput = locinput;     /* put where regtry can find it */
4911             sayYES;                     /* Success! */
4912
4913         case SUCCEED: /* successful SUSPEND/UNLESSM/IFMATCH/CURLYM */
4914             DEBUG_EXECUTE_r(
4915             PerlIO_printf(Perl_debug_log,
4916                 "%*s  %ssubpattern success...%s\n",
4917                 REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5]));
4918             PL_reginput = locinput;     /* put where regtry can find it */
4919             sayYES;                     /* Success! */
4920
4921 #undef  ST
4922 #define ST st->u.ifmatch
4923
4924         case SUSPEND:   /* (?>A) */
4925             ST.wanted = 1;
4926             PL_reginput = locinput;
4927             goto do_ifmatch;
4928
4929         case UNLESSM:   /* -ve lookaround: (?!A), or with flags, (?<!A) */
4930             ST.wanted = 0;
4931             goto ifmatch_trivial_fail_test;
4932
4933         case IFMATCH:   /* +ve lookaround: (?=A), or with flags, (?<=A) */
4934             ST.wanted = 1;
4935           ifmatch_trivial_fail_test:
4936             if (scan->flags) {
4937                 char * const s = HOPBACKc(locinput, scan->flags);
4938                 if (!s) {
4939                     /* trivial fail */
4940                     if (logical) {
4941                         logical = 0;
4942                         sw = 1 - (bool)ST.wanted;
4943                     }
4944                     else if (ST.wanted)
4945                         sayNO;
4946                     next = scan + ARG(scan);
4947                     if (next == scan)
4948                         next = NULL;
4949                     break;
4950                 }
4951                 PL_reginput = s;
4952             }
4953             else
4954                 PL_reginput = locinput;
4955
4956           do_ifmatch:
4957             ST.me = scan;
4958             ST.logical = logical;
4959             /* execute body of (?...A) */
4960             PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)));
4961             /* NOTREACHED */
4962
4963         case IFMATCH_A_fail: /* body of (?...A) failed */
4964             ST.wanted = !ST.wanted;
4965             /* FALL THROUGH */
4966
4967         case IFMATCH_A: /* body of (?...A) succeeded */
4968             if (ST.logical) {
4969                 sw = (bool)ST.wanted;
4970             }
4971             else if (!ST.wanted)
4972                 sayNO;
4973
4974             if (OP(ST.me) == SUSPEND)
4975                 locinput = PL_reginput;
4976             else {
4977                 locinput = PL_reginput = st->locinput;
4978                 nextchr = UCHARAT(locinput);
4979             }
4980             scan = ST.me + ARG(ST.me);
4981             if (scan == ST.me)
4982                 scan = NULL;
4983             continue; /* execute B */
4984
4985 #undef ST
4986
4987         case LONGJMP:
4988             next = scan + ARG(scan);
4989             if (next == scan)
4990                 next = NULL;
4991             break;
4992         case COMMIT:
4993             reginfo->cutpoint = PL_regeol;
4994             /* FALLTHROUGH */
4995         case PRUNE:
4996             PL_reginput = locinput;
4997             if (!scan->flags)
4998                 sv_yes_mark = sv_commit = (SV*)rexi->data->data[ ARG( scan ) ];
4999             PUSH_STATE_GOTO(COMMIT_next,next);
5000             /* NOTREACHED */
5001         case COMMIT_next_fail:
5002             no_final = 1;
5003             /* FALLTHROUGH */
5004         case OPFAIL:
5005             sayNO;
5006             /* NOTREACHED */
5007
5008 #define ST st->u.mark
5009         case MARKPOINT:
5010             ST.prev_mark = mark_state;
5011             ST.mark_name = sv_commit = sv_yes_mark
5012                 = (SV*)rexi->data->data[ ARG( scan ) ];
5013             mark_state = st;
5014             ST.mark_loc = PL_reginput = locinput;
5015             PUSH_YES_STATE_GOTO(MARKPOINT_next,next);
5016             /* NOTREACHED */
5017         case MARKPOINT_next:
5018             mark_state = ST.prev_mark;
5019             sayYES;
5020             /* NOTREACHED */
5021         case MARKPOINT_next_fail:
5022             if (popmark && sv_eq(ST.mark_name,popmark))
5023             {
5024                 if (ST.mark_loc > startpoint)
5025                     reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
5026                 popmark = NULL; /* we found our mark */
5027                 sv_commit = ST.mark_name;
5028
5029                 DEBUG_EXECUTE_r({
5030                         PerlIO_printf(Perl_debug_log,
5031                             "%*s  %ssetting cutpoint to mark:%"SVf"...%s\n",
5032                             REPORT_CODE_OFF+depth*2, "",
5033                             PL_colors[4], SVfARG(sv_commit), PL_colors[5]);
5034                 });
5035             }
5036             mark_state = ST.prev_mark;
5037             sv_yes_mark = mark_state ?
5038                 mark_state->u.mark.mark_name : NULL;
5039             sayNO;
5040             /* NOTREACHED */
5041         case SKIP:
5042             PL_reginput = locinput;
5043             if (scan->flags) {
5044                 /* (*SKIP) : if we fail we cut here*/
5045                 ST.mark_name = NULL;
5046                 ST.mark_loc = locinput;
5047                 PUSH_STATE_GOTO(SKIP_next,next);
5048             } else {
5049                 /* (*SKIP:NAME) : if there is a (*MARK:NAME) fail where it was,
5050                    otherwise do nothing.  Meaning we need to scan
5051                  */
5052                 regmatch_state *cur = mark_state;
5053                 SV *find = (SV*)rexi->data->data[ ARG( scan ) ];
5054
5055                 while (cur) {
5056                     if ( sv_eq( cur->u.mark.mark_name,
5057                                 find ) )
5058                     {
5059                         ST.mark_name = find;
5060                         PUSH_STATE_GOTO( SKIP_next, next );
5061                     }
5062                     cur = cur->u.mark.prev_mark;
5063                 }
5064             }
5065             /* Didn't find our (*MARK:NAME) so ignore this (*SKIP:NAME) */
5066             break;
5067         case SKIP_next_fail:
5068             if (ST.mark_name) {
5069                 /* (*CUT:NAME) - Set up to search for the name as we
5070                    collapse the stack*/
5071                 popmark = ST.mark_name;
5072             } else {
5073                 /* (*CUT) - No name, we cut here.*/
5074                 if (ST.mark_loc > startpoint)
5075                     reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
5076                 /* but we set sv_commit to latest mark_name if there
5077                    is one so they can test to see how things lead to this
5078                    cut */
5079                 if (mark_state)
5080                     sv_commit=mark_state->u.mark.mark_name;
5081             }
5082             no_final = 1;
5083             sayNO;
5084             /* NOTREACHED */
5085 #undef ST
5086         case FOLDCHAR:
5087             n = ARG(scan);
5088             if ( n == (U32)what_len_TRICKYFOLD(locinput,do_utf8,ln) ) {
5089                 locinput += ln;
5090             } else if ( 0xDF == n && !do_utf8 && !UTF ) {
5091                 sayNO;
5092             } else  {
5093                 U8 folded[UTF8_MAXBYTES_CASE+1];
5094                 STRLEN foldlen;
5095                 const char * const l = locinput;
5096                 char *e = PL_regeol;
5097                 to_uni_fold(n, folded, &foldlen);
5098
5099                 if (ibcmp_utf8((const char*) folded, 0,  foldlen, 1,
5100                                l, &e, 0,  do_utf8)) {
5101                         sayNO;
5102                 }
5103                 locinput = e;
5104             }
5105             nextchr = UCHARAT(locinput);
5106             break;
5107         case LNBREAK:
5108             if ((n=is_LNBREAK(locinput,do_utf8))) {
5109                 locinput += n;
5110                 nextchr = UCHARAT(locinput);
5111             } else
5112                 sayNO;
5113             break;
5114
5115 #define CASE_CLASS(nAmE)                              \
5116         case nAmE:                                    \
5117             if ((n=is_##nAmE(locinput,do_utf8))) {    \
5118                 locinput += n;                        \
5119                 nextchr = UCHARAT(locinput);          \
5120             } else                                    \
5121                 sayNO;                                \
5122             break;                                    \
5123         case N##nAmE:                                 \
5124             if ((n=is_##nAmE(locinput,do_utf8))) {    \
5125                 sayNO;                                \
5126             } else {                                  \
5127                 locinput += UTF8SKIP(locinput);       \
5128                 nextchr = UCHARAT(locinput);          \
5129             }                                         \
5130             break
5131
5132         CASE_CLASS(VERTWS);
5133         CASE_CLASS(HORIZWS);
5134 #undef CASE_CLASS
5135
5136         default:
5137             PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
5138                           PTR2UV(scan), OP(scan));
5139             Perl_croak(aTHX_ "regexp memory corruption");
5140
5141         } /* end switch */
5142
5143         /* switch break jumps here */
5144         scan = next; /* prepare to execute the next op and ... */
5145         continue;    /* ... jump back to the top, reusing st */
5146         /* NOTREACHED */
5147
5148       push_yes_state:
5149         /* push a state that backtracks on success */
5150         st->u.yes.prev_yes_state = yes_state;
5151         yes_state = st;
5152         /* FALL THROUGH */
5153       push_state:
5154         /* push a new regex state, then continue at scan  */
5155         {
5156             regmatch_state *newst;
5157
5158             DEBUG_STACK_r({
5159                 regmatch_state *cur = st;
5160                 regmatch_state *curyes = yes_state;
5161                 int curd = depth;
5162                 regmatch_slab *slab = PL_regmatch_slab;
5163                 for (;curd > -1;cur--,curd--) {
5164                     if (cur < SLAB_FIRST(slab)) {
5165                         slab = slab->prev;
5166                         cur = SLAB_LAST(slab);
5167                     }
5168                     PerlIO_printf(Perl_error_log, "%*s#%-3d %-10s %s\n",
5169                         REPORT_CODE_OFF + 2 + depth * 2,"",
5170                         curd, PL_reg_name[cur->resume_state],
5171                         (curyes == cur) ? "yes" : ""
5172                     );
5173                     if (curyes == cur)
5174                         curyes = cur->u.yes.prev_yes_state;
5175                 }
5176             } else
5177                 DEBUG_STATE_pp("push")
5178             );
5179             depth++;
5180             st->locinput = locinput;
5181             newst = st+1;
5182             if (newst >  SLAB_LAST(PL_regmatch_slab))
5183                 newst = S_push_slab(aTHX);
5184             PL_regmatch_state = newst;
5185
5186             locinput = PL_reginput;
5187             nextchr = UCHARAT(locinput);
5188             st = newst;
5189             continue;
5190             /* NOTREACHED */
5191         }
5192     }
5193
5194     /*
5195     * We get here only if there's trouble -- normally "case END" is
5196     * the terminating point.
5197     */
5198     Perl_croak(aTHX_ "corrupted regexp pointers");
5199     /*NOTREACHED*/
5200     sayNO;
5201
5202 yes:
5203     if (yes_state) {
5204         /* we have successfully completed a subexpression, but we must now
5205          * pop to the state marked by yes_state and continue from there */
5206         assert(st != yes_state);
5207 #ifdef DEBUGGING
5208         while (st != yes_state) {
5209             st--;
5210             if (st < SLAB_FIRST(PL_regmatch_slab)) {
5211                 PL_regmatch_slab = PL_regmatch_slab->prev;
5212                 st = SLAB_LAST(PL_regmatch_slab);
5213             }
5214             DEBUG_STATE_r({
5215                 if (no_final) {
5216                     DEBUG_STATE_pp("pop (no final)");
5217                 } else {
5218                     DEBUG_STATE_pp("pop (yes)");
5219                 }
5220             });
5221             depth--;
5222         }
5223 #else
5224         while (yes_state < SLAB_FIRST(PL_regmatch_slab)
5225             || yes_state > SLAB_LAST(PL_regmatch_slab))
5226         {
5227             /* not in this slab, pop slab */
5228             depth -= (st - SLAB_FIRST(PL_regmatch_slab) + 1);
5229             PL_regmatch_slab = PL_regmatch_slab->prev;
5230             st = SLAB_LAST(PL_regmatch_slab);
5231         }
5232         depth -= (st - yes_state);
5233 #endif
5234         st = yes_state;
5235         yes_state = st->u.yes.prev_yes_state;
5236         PL_regmatch_state = st;
5237
5238         if (no_final) {
5239             locinput= st->locinput;
5240             nextchr = UCHARAT(locinput);
5241         }
5242         state_num = st->resume_state + no_final;
5243         goto reenter_switch;
5244     }
5245
5246     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
5247                           PL_colors[4], PL_colors[5]));
5248
5249     if (PL_reg_eval_set) {
5250         /* each successfully executed (?{...}) block does the equivalent of
5251          *   local $^R = do {...}
5252          * When popping the save stack, all these locals would be undone;
5253          * bypass this by setting the outermost saved $^R to the latest
5254          * value */
5255         if (oreplsv != GvSV(PL_replgv))
5256             sv_setsv(oreplsv, GvSV(PL_replgv));
5257     }
5258     result = 1;
5259     goto final_exit;
5260
5261 no:
5262     DEBUG_EXECUTE_r(
5263         PerlIO_printf(Perl_debug_log,
5264             "%*s  %sfailed...%s\n",
5265             REPORT_CODE_OFF+depth*2, "",
5266             PL_colors[4], PL_colors[5])
5267         );
5268
5269 no_silent:
5270     if (no_final) {
5271         if (yes_state) {
5272             goto yes;
5273         } else {
5274             goto final_exit;
5275         }
5276     }
5277     if (depth) {
5278         /* there's a previous state to backtrack to */
5279         st--;
5280         if (st < SLAB_FIRST(PL_regmatch_slab)) {
5281             PL_regmatch_slab = PL_regmatch_slab->prev;
5282             st = SLAB_LAST(PL_regmatch_slab);
5283         }
5284         PL_regmatch_state = st;
5285         locinput= st->locinput;
5286         nextchr = UCHARAT(locinput);
5287
5288         DEBUG_STATE_pp("pop");
5289         depth--;
5290         if (yes_state == st)
5291             yes_state = st->u.yes.prev_yes_state;
5292
5293         state_num = st->resume_state + 1; /* failure = success + 1 */
5294         goto reenter_switch;
5295     }
5296     result = 0;
5297
5298   final_exit:
5299     if (rex->intflags & PREGf_VERBARG_SEEN) {
5300         SV *sv_err = get_sv("REGERROR", 1);
5301         SV *sv_mrk = get_sv("REGMARK", 1);
5302         if (result) {
5303             sv_commit = &PL_sv_no;
5304             if (!sv_yes_mark)
5305                 sv_yes_mark = &PL_sv_yes;
5306         } else {
5307             if (!sv_commit)
5308                 sv_commit = &PL_sv_yes;
5309             sv_yes_mark = &PL_sv_no;
5310         }
5311         sv_setsv(sv_err, sv_commit);
5312         sv_setsv(sv_mrk, sv_yes_mark);
5313     }
5314
5315     /* clean up; in particular, free all slabs above current one */
5316     LEAVE_SCOPE(oldsave);
5317
5318     return result;
5319 }
5320
5321 /*
5322  - regrepeat - repeatedly match something simple, report how many
5323  */
5324 /*
5325  * [This routine now assumes that it will only match on things of length 1.
5326  * That was true before, but now we assume scan - reginput is the count,
5327  * rather than incrementing count on every character.  [Er, except utf8.]]
5328  */
5329 STATIC I32
5330 S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
5331 {
5332     dVAR;
5333     register char *scan;
5334     register I32 c;
5335     register char *loceol = PL_regeol;
5336     register I32 hardcount = 0;
5337     register bool do_utf8 = PL_reg_match_utf8;
5338 #ifndef DEBUGGING
5339     PERL_UNUSED_ARG(depth);
5340 #endif
5341
5342     scan = PL_reginput;
5343     if (max == REG_INFTY)
5344         max = I32_MAX;
5345     else if (max < loceol - scan)
5346         loceol = scan + max;
5347     switch (OP(p)) {
5348     case REG_ANY:
5349         if (do_utf8) {
5350             loceol = PL_regeol;
5351             while (scan < loceol && hardcount < max && *scan != '\n') {
5352                 scan += UTF8SKIP(scan);
5353                 hardcount++;
5354             }
5355         } else {
5356             while (scan < loceol && *scan != '\n')
5357                 scan++;
5358         }
5359         break;
5360     case SANY:
5361         if (do_utf8) {
5362             loceol = PL_regeol;
5363             while (scan < loceol && hardcount < max) {
5364                 scan += UTF8SKIP(scan);
5365                 hardcount++;
5366             }
5367         }
5368         else
5369             scan = loceol;
5370         break;
5371     case CANY:
5372         scan = loceol;
5373         break;
5374     case EXACT:         /* length of string is 1 */
5375         c = (U8)*STRING(p);
5376         while (scan < loceol && UCHARAT(scan) == c)
5377             scan++;
5378         break;
5379     case EXACTF:        /* length of string is 1 */
5380         c = (U8)*STRING(p);
5381         while (scan < loceol &&
5382                (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
5383             scan++;
5384         break;
5385     case EXACTFL:       /* length of string is 1 */
5386         PL_reg_flags |= RF_tainted;
5387         c = (U8)*STRING(p);
5388         while (scan < loceol &&
5389                (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
5390             scan++;
5391         break;
5392     case ANYOF:
5393         if (do_utf8) {
5394             loceol = PL_regeol;
5395             while (hardcount < max && scan < loceol &&
5396                    reginclass(prog, p, (U8*)scan, 0, do_utf8)) {
5397                 scan += UTF8SKIP(scan);
5398                 hardcount++;
5399             }
5400         } else {
5401             while (scan < loceol && REGINCLASS(prog, p, (U8*)scan))
5402                 scan++;
5403         }
5404         break;
5405     case ALNUM:
5406         if (do_utf8) {
5407             loceol = PL_regeol;
5408             LOAD_UTF8_CHARCLASS_ALNUM();
5409             while (hardcount < max && scan < loceol &&
5410                    swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
5411                 scan += UTF8SKIP(scan);
5412                 hardcount++;
5413             }
5414         } else {
5415             while (scan < loceol && isALNUM(*scan))
5416                 scan++;
5417         }
5418         break;
5419     case ALNUML:
5420         PL_reg_flags |= RF_tainted;
5421         if (do_utf8) {
5422             loceol = PL_regeol;
5423             while (hardcount < max && scan < loceol &&
5424                    isALNUM_LC_utf8((U8*)scan)) {
5425                 scan += UTF8SKIP(scan);
5426                 hardcount++;
5427             }
5428         } else {
5429             while (scan < loceol && isALNUM_LC(*scan))
5430                 scan++;
5431         }
5432         break;
5433     case NALNUM:
5434         if (do_utf8) {
5435             loceol = PL_regeol;
5436             LOAD_UTF8_CHARCLASS_ALNUM();
5437             while (hardcount < max && scan < loceol &&
5438                    !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
5439                 scan += UTF8SKIP(scan);
5440                 hardcount++;
5441             }
5442         } else {
5443             while (scan < loceol && !isALNUM(*scan))
5444                 scan++;
5445         }
5446         break;
5447     case NALNUML:
5448         PL_reg_flags |= RF_tainted;
5449         if (do_utf8) {
5450             loceol = PL_regeol;
5451             while (hardcount < max && scan < loceol &&
5452                    !isALNUM_LC_utf8((U8*)scan)) {
5453                 scan += UTF8SKIP(scan);
5454                 hardcount++;
5455             }
5456         } else {
5457             while (scan < loceol && !isALNUM_LC(*scan))
5458                 scan++;
5459         }
5460         break;
5461     case SPACE:
5462         if (do_utf8) {
5463             loceol = PL_regeol;
5464             LOAD_UTF8_CHARCLASS_SPACE();
5465             while (hardcount < max && scan < loceol &&
5466                    (*scan == ' ' ||
5467                     swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
5468                 scan += UTF8SKIP(scan);
5469                 hardcount++;
5470             }
5471         } else {
5472             while (scan < loceol && isSPACE(*scan))
5473                 scan++;
5474         }
5475         break;
5476     case SPACEL:
5477         PL_reg_flags |= RF_tainted;
5478         if (do_utf8) {
5479             loceol = PL_regeol;
5480             while (hardcount < max && scan < loceol &&
5481                    (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
5482                 scan += UTF8SKIP(scan);
5483                 hardcount++;
5484             }
5485         } else {
5486             while (scan < loceol && isSPACE_LC(*scan))
5487                 scan++;
5488         }
5489         break;
5490     case NSPACE:
5491         if (do_utf8) {
5492             loceol = PL_regeol;
5493             LOAD_UTF8_CHARCLASS_SPACE();
5494             while (hardcount < max && scan < loceol &&
5495                    !(*scan == ' ' ||
5496                      swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
5497                 scan += UTF8SKIP(scan);
5498                 hardcount++;
5499             }
5500         } else {
5501             while (scan < loceol && !isSPACE(*scan))
5502                 scan++;
5503         }
5504         break;
5505     case NSPACEL:
5506         PL_reg_flags |= RF_tainted;
5507         if (do_utf8) {
5508             loceol = PL_regeol;
5509             while (hardcount < max && scan < loceol &&
5510                    !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
5511                 scan += UTF8SKIP(scan);
5512                 hardcount++;
5513             }
5514         } else {
5515             while (scan < loceol && !isSPACE_LC(*scan))
5516                 scan++;
5517         }
5518         break;
5519     case DIGIT:
5520         if (do_utf8) {
5521             loceol = PL_regeol;
5522             LOAD_UTF8_CHARCLASS_DIGIT();
5523             while (hardcount < max && scan < loceol &&
5524                    swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
5525                 scan += UTF8SKIP(scan);
5526                 hardcount++;
5527             }
5528         } else {
5529             while (scan < loceol && isDIGIT(*scan))
5530                 scan++;
5531         }
5532         break;
5533     case NDIGIT:
5534         if (do_utf8) {
5535             loceol = PL_regeol;
5536             LOAD_UTF8_CHARCLASS_DIGIT();
5537             while (hardcount < max && scan < loceol &&
5538                    !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
5539                 scan += UTF8SKIP(scan);
5540                 hardcount++;
5541             }
5542         } else {
5543             while (scan < loceol && !isDIGIT(*scan))
5544                 scan++;
5545         }
5546     case LNBREAK:
5547         if (do_utf8) {
5548             loceol = PL_regeol;
5549             while (hardcount < max && scan < loceol && (c=is_LNBREAK_utf8(scan))) {
5550                 scan += c;
5551                 hardcount++;
5552             }
5553         } else {
5554             /*
5555               LNBREAK can match two latin chars, which is ok,
5556               because we have a null terminated string, but we
5557               have to use hardcount in this situation
5558             */
5559             while (scan < loceol && (c=is_LNBREAK_latin1(scan)))  {
5560                 scan+=c;
5561                 hardcount++;
5562             }
5563         }
5564         break;
5565     case HORIZWS:
5566         if (do_utf8) {
5567             loceol = PL_regeol;
5568             while (hardcount < max && scan < loceol && (c=is_HORIZWS_utf8(scan))) {
5569                 scan += c;
5570                 hardcount++;
5571             }
5572         } else {
5573             while (scan < loceol && is_HORIZWS_latin1(scan))
5574                 scan++;
5575         }
5576         break;
5577     case NHORIZWS:
5578         if (do_utf8) {
5579             loceol = PL_regeol;
5580             while (hardcount < max && scan < loceol && !is_HORIZWS_utf8(scan)) {
5581                 scan += UTF8SKIP(scan);
5582                 hardcount++;
5583             }
5584         } else {
5585             while (scan < loceol && !is_HORIZWS_latin1(scan))
5586                 scan++;
5587
5588         }
5589         break;
5590     case VERTWS:
5591         if (do_utf8) {
5592             loceol = PL_regeol;
5593             while (hardcount < max && scan < loceol && (c=is_VERTWS_utf8(scan))) {
5594                 scan += c;
5595                 hardcount++;
5596             }
5597         } else {
5598             while (scan < loceol && is_VERTWS_latin1(scan))
5599                 scan++;
5600
5601         }
5602         break;
5603     case NVERTWS:
5604         if (do_utf8) {
5605             loceol = PL_regeol;
5606             while (hardcount < max && scan < loceol && !is_VERTWS_utf8(scan)) {
5607                 scan += UTF8SKIP(scan);
5608                 hardcount++;
5609             }
5610         } else {
5611             while (scan < loceol && !is_VERTWS_latin1(scan))
5612                 scan++;
5613
5614         }
5615         break;
5616
5617     default:            /* Called on something of 0 width. */
5618         break;          /* So match right here or not at all. */
5619     }
5620
5621     if (hardcount)
5622         c = hardcount;
5623     else
5624         c = scan - PL_reginput;
5625     PL_reginput = scan;
5626
5627     DEBUG_r({
5628         GET_RE_DEBUG_FLAGS_DECL;
5629         DEBUG_EXECUTE_r({
5630             SV * const prop = sv_newmortal();
5631             regprop(prog, prop, p);
5632             PerlIO_printf(Perl_debug_log,
5633                         "%*s  %s can match %"IVdf" times out of %"IVdf"...\n",
5634                         REPORT_CODE_OFF + depth*2, "", SvPVX_const(prop),(IV)c,(IV)max);
5635         });
5636     });
5637
5638     return(c);
5639 }
5640
5641
5642 #if !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION)
5643 /*
5644 - regclass_swash - prepare the utf8 swash
5645 */
5646
5647 SV *
5648 Perl_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bool doinit, SV** listsvp, SV **altsvp)
5649 {
5650     dVAR;
5651     SV *sw  = NULL;
5652     SV *si  = NULL;
5653     SV *alt = NULL;
5654     RXi_GET_DECL(prog,progi);
5655     const struct reg_data * const data = prog ? progi->data : NULL;
5656
5657     if (data && data->count) {
5658         const U32 n = ARG(node);
5659
5660         if (data->what[n] == 's') {
5661             SV * const rv = (SV*)data->data[n];
5662             AV * const av = (AV*)SvRV((SV*)rv);
5663             SV **const ary = AvARRAY(av);
5664             SV **a, **b;
5665
5666             /* See the end of regcomp.c:S_regclass() for
5667              * documentation of these array elements. */
5668
5669             si = *ary;
5670             a  = SvROK(ary[1]) ? &ary[1] : NULL;
5671             b  = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : NULL;
5672
5673             if (a)
5674                 sw = *a;
5675             else if (si && doinit) {
5676                 sw = swash_init("utf8", "", si, 1, 0);
5677                 (void)av_store(av, 1, sw);
5678             }
5679             if (b)
5680                 alt = *b;
5681         }
5682     }
5683
5684     if (listsvp)
5685         *listsvp = si;
5686     if (altsvp)
5687         *altsvp  = alt;
5688
5689     return sw;
5690 }
5691 #endif
5692
5693 /*
5694  - reginclass - determine if a character falls into a character class
5695
5696   The n is the ANYOF regnode, the p is the target string, lenp
5697   is pointer to the maximum length of how far to go in the p
5698   (if the lenp is zero, UTF8SKIP(p) is used),
5699   do_utf8 tells whether the target string is in UTF-8.
5700
5701  */
5702
5703 STATIC bool
5704 S_reginclass(pTHX_ const regexp *prog, register const regnode *n, register const U8* p, STRLEN* lenp, register bool do_utf8)
5705 {
5706     dVAR;
5707     const char flags = ANYOF_FLAGS(n);
5708     bool match = FALSE;
5709     UV c = *p;
5710     STRLEN len = 0;
5711     STRLEN plen;
5712
5713     if (do_utf8 && !UTF8_IS_INVARIANT(c)) {
5714         c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &len,
5715                 (UTF8_ALLOW_DEFAULT & UTF8_ALLOW_ANYUV) | UTF8_CHECK_ONLY);
5716                 /* see [perl #37836] for UTF8_ALLOW_ANYUV */
5717         if (len == (STRLEN)-1)
5718             Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
5719     }
5720
5721     plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
5722     if (do_utf8 || (flags & ANYOF_UNICODE)) {
5723         if (lenp)
5724             *lenp = 0;
5725         if (do_utf8 && !ANYOF_RUNTIME(n)) {
5726             if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
5727                 match = TRUE;
5728         }
5729         if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
5730             match = TRUE;
5731         if (!match) {
5732             AV *av;
5733             SV * const sw = regclass_swash(prog, n, TRUE, 0, (SV**)&av);
5734
5735             if (sw) {
5736                 if (swash_fetch(sw, p, do_utf8))
5737                     match = TRUE;
5738                 else if (flags & ANYOF_FOLD) {
5739                     if (!match && lenp && av) {
5740                         I32 i;
5741                         for (i = 0; i <= av_len(av); i++) {
5742                             SV* const sv = *av_fetch(av, i, FALSE);
5743                             STRLEN len;
5744                             const char * const s = SvPV_const(sv, len);
5745
5746                             if (len <= plen && memEQ(s, (char*)p, len)) {
5747                                 *lenp = len;
5748                                 match = TRUE;
5749                                 break;
5750                             }
5751                         }
5752                     }
5753                     if (!match) {
5754                         U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
5755                         STRLEN tmplen;
5756
5757                         to_utf8_fold(p, tmpbuf, &tmplen);
5758                         if (swash_fetch(sw, tmpbuf, do_utf8))
5759                             match = TRUE;
5760                     }
5761                 }
5762             }
5763         }
5764         if (match && lenp && *lenp == 0)
5765             *lenp = UNISKIP(NATIVE_TO_UNI(c));
5766     }
5767     if (!match && c < 256) {
5768         if (ANYOF_BITMAP_TEST(n, c))
5769             match = TRUE;
5770         else if (flags & ANYOF_FOLD) {
5771             U8 f;
5772
5773             if (flags & ANYOF_LOCALE) {
5774                 PL_reg_flags |= RF_tainted;
5775                 f = PL_fold_locale[c];
5776             }
5777             else
5778                 f = PL_fold[c];
5779             if (f != c && ANYOF_BITMAP_TEST(n, f))
5780                 match = TRUE;
5781         }
5782
5783         if (!match && (flags & ANYOF_CLASS)) {
5784             PL_reg_flags |= RF_tainted;
5785             if (
5786                 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM)   &&  isALNUM_LC(c))  ||
5787                 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM)  && !isALNUM_LC(c))  ||
5788                 (ANYOF_CLASS_TEST(n, ANYOF_SPACE)   &&  isSPACE_LC(c))  ||
5789                 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE)  && !isSPACE_LC(c))  ||
5790                 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT)   &&  isDIGIT_LC(c))  ||
5791                 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT)  && !isDIGIT_LC(c))  ||
5792                 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC)  &&  isALNUMC_LC(c)) ||
5793                 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
5794                 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA)   &&  isALPHA_LC(c))  ||
5795                 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA)  && !isALPHA_LC(c))  ||
5796                 (ANYOF_CLASS_TEST(n, ANYOF_ASCII)   &&  isASCII(c))     ||
5797                 (ANYOF_CLASS_TEST(n, ANYOF_NASCII)  && !isASCII(c))     ||
5798                 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL)   &&  isCNTRL_LC(c))  ||
5799                 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL)  && !isCNTRL_LC(c))  ||
5800                 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH)   &&  isGRAPH_LC(c))  ||
5801                 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH)  && !isGRAPH_LC(c))  ||
5802                 (ANYOF_CLASS_TEST(n, ANYOF_LOWER)   &&  isLOWER_LC(c))  ||
5803                 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER)  && !isLOWER_LC(c))  ||
5804                 (ANYOF_CLASS_TEST(n, ANYOF_PRINT)   &&  isPRINT_LC(c))  ||
5805                 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT)  && !isPRINT_LC(c))  ||
5806                 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT)   &&  isPUNCT_LC(c))  ||
5807                 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT)  && !isPUNCT_LC(c))  ||
5808                 (ANYOF_CLASS_TEST(n, ANYOF_UPPER)   &&  isUPPER_LC(c))  ||
5809                 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER)  && !isUPPER_LC(c))  ||
5810                 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT)  &&  isXDIGIT(c))    ||
5811                 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c))    ||
5812                 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC)  &&  isPSXSPC(c))    ||
5813                 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c))    ||
5814                 (ANYOF_CLASS_TEST(n, ANYOF_BLANK)   &&  isBLANK(c))     ||
5815                 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK)  && !isBLANK(c))
5816                 ) /* How's that for a conditional? */
5817             {
5818                 match = TRUE;
5819             }
5820         }
5821     }
5822
5823     return (flags & ANYOF_INVERT) ? !match : match;
5824 }
5825
5826 STATIC U8 *
5827 S_reghop3(U8 *s, I32 off, const U8* lim)
5828 {
5829     dVAR;
5830     if (off >= 0) {
5831         while (off-- && s < lim) {
5832             /* XXX could check well-formedness here */
5833             s += UTF8SKIP(s);
5834         }
5835     }
5836     else {
5837         while (off++ && s > lim) {
5838             s--;
5839             if (UTF8_IS_CONTINUED(*s)) {
5840                 while (s > lim && UTF8_IS_CONTINUATION(*s))
5841                     s--;
5842             }
5843             /* XXX could check well-formedness here */
5844         }
5845     }
5846     return s;
5847 }
5848
5849 #ifdef XXX_dmq
5850 /* there are a bunch of places where we use two reghop3's that should
5851    be replaced with this routine. but since thats not done yet
5852    we ifdef it out - dmq
5853 */
5854 STATIC U8 *
5855 S_reghop4(U8 *s, I32 off, const U8* llim, const U8* rlim)
5856 {
5857     dVAR;
5858     if (off >= 0) {
5859         while (off-- && s < rlim) {
5860             /* XXX could check well-formedness here */
5861             s += UTF8SKIP(s);
5862         }
5863     }
5864     else {
5865         while (off++ && s > llim) {
5866             s--;
5867             if (UTF8_IS_CONTINUED(*s)) {
5868                 while (s > llim && UTF8_IS_CONTINUATION(*s))
5869                     s--;
5870             }
5871             /* XXX could check well-formedness here */
5872         }
5873     }
5874     return s;
5875 }
5876 #endif
5877
5878 STATIC U8 *
5879 S_reghopmaybe3(U8* s, I32 off, const U8* lim)
5880 {
5881     dVAR;
5882     if (off >= 0) {
5883         while (off-- && s < lim) {
5884             /* XXX could check well-formedness here */
5885             s += UTF8SKIP(s);
5886         }
5887         if (off >= 0)
5888             return NULL;
5889     }
5890     else {
5891         while (off++ && s > lim) {
5892             s--;
5893             if (UTF8_IS_CONTINUED(*s)) {
5894                 while (s > lim && UTF8_IS_CONTINUATION(*s))
5895                     s--;
5896             }
5897             /* XXX could check well-formedness here */
5898         }
5899         if (off <= 0)
5900             return NULL;
5901     }
5902     return s;
5903 }
5904
5905 static void
5906 restore_pos(pTHX_ void *arg)
5907 {
5908     dVAR;
5909     regexp * const rex = (regexp *)arg;
5910     if (PL_reg_eval_set) {
5911         if (PL_reg_oldsaved) {
5912             rex->subbeg = PL_reg_oldsaved;
5913             rex->sublen = PL_reg_oldsavedlen;
5914 #ifdef PERL_OLD_COPY_ON_WRITE
5915             rex->saved_copy = PL_nrs;
5916 #endif
5917             RXp_MATCH_COPIED_on(rex);
5918         }
5919         PL_reg_magic->mg_len = PL_reg_oldpos;
5920         PL_reg_eval_set = 0;
5921         PL_curpm = PL_reg_oldcurpm;
5922     }
5923 }
5924
5925 STATIC void
5926 S_to_utf8_substr(pTHX_ register regexp *prog)
5927 {
5928     int i = 1;
5929     do {
5930         if (prog->substrs->data[i].substr
5931             && !prog->substrs->data[i].utf8_substr) {
5932             SV* const sv = newSVsv(prog->substrs->data[i].substr);
5933             prog->substrs->data[i].utf8_substr = sv;
5934             sv_utf8_upgrade(sv);
5935             if (SvVALID(prog->substrs->data[i].substr)) {
5936                 const U8 flags = BmFLAGS(prog->substrs->data[i].substr);
5937                 if (flags & FBMcf_TAIL) {
5938                     /* Trim the trailing \n that fbm_compile added last
5939                        time.  */
5940                     SvCUR_set(sv, SvCUR(sv) - 1);
5941                     /* Whilst this makes the SV technically "invalid" (as its
5942                        buffer is no longer followed by "\0") when fbm_compile()
5943                        adds the "\n" back, a "\0" is restored.  */
5944                 }
5945                 fbm_compile(sv, flags);
5946             }
5947             if (prog->substrs->data[i].substr == prog->check_substr)
5948                 prog->check_utf8 = sv;
5949         }
5950     } while (i--);
5951 }
5952
5953 STATIC void
5954 S_to_byte_substr(pTHX_ register regexp *prog)
5955 {
5956     dVAR;
5957     int i = 1;
5958     do {
5959         if (prog->substrs->data[i].utf8_substr
5960             && !prog->substrs->data[i].substr) {
5961             SV* sv = newSVsv(prog->substrs->data[i].utf8_substr);
5962             if (sv_utf8_downgrade(sv, TRUE)) {
5963                 if (SvVALID(prog->substrs->data[i].utf8_substr)) {
5964                     const U8 flags
5965                         = BmFLAGS(prog->substrs->data[i].utf8_substr);
5966                     if (flags & FBMcf_TAIL) {
5967                         /* Trim the trailing \n that fbm_compile added last
5968                            time.  */
5969                         SvCUR_set(sv, SvCUR(sv) - 1);
5970                     }
5971                     fbm_compile(sv, flags);
5972                 }
5973             } else {
5974                 SvREFCNT_dec(sv);
5975                 sv = &PL_sv_undef;
5976             }
5977             prog->substrs->data[i].substr = sv;
5978             if (prog->substrs->data[i].utf8_substr == prog->check_utf8)
5979                 prog->check_substr = sv;
5980         }
5981     } while (i--);
5982 }
5983
5984 /*
5985  * Local variables:
5986  * c-indentation-style: bsd
5987  * c-basic-offset: 4
5988  * indent-tabs-mode: t
5989  * End:
5990  *
5991  * ex: set ts=8 sts=4 sw=4 noet:
5992  */