regexec.c

   1 /*    regexec.c
   2  */
   3
   4 /*
   5  * "One Ring to rule them all, One Ring to find them..."
   6  */
   7
   8 /* This file contains functions for executing a regular expression.  See
   9  * also regcomp.c which funnily enough, contains functions for compiling
  10  * a regular expression.
  11  *
  12  * This file is also copied at build time to ext/re/re_exec.c, where
  13  * it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
  14  * This causes the main functions to be compiled under new names and with
  15  * debugging support added, which makes "use re 'debug'" work.
  16  */
  17
  18 /* NOTE: this is derived from Henry Spencer's regexp code, and should not
  19  * confused with the original package (see point 3 below).  Thanks, Henry!
  20  */
  21
  22 /* Additional note: this code is very heavily munged from Henry's version
  23  * in places.  In some spots I've traded clarity for efficiency, so don't
  24  * blame Henry for some of the lack of readability.
  25  */
  26
  27 /* The names of the functions have been changed from regcomp and
  28  * regexec to  pregcomp and pregexec in order to avoid conflicts
  29  * with the POSIX routines of the same names.
  30 */
  31
  32 #ifdef PERL_EXT_RE_BUILD
  33 #include "re_top.h"
  34 #endif
  35
  36 /*
  37  * pregcomp and pregexec -- regsub and regerror are not used in perl
  38  *
  39  *      Copyright (c) 1986 by University of Toronto.
  40  *      Written by Henry Spencer.  Not derived from licensed software.
  41  *
  42  *      Permission is granted to anyone to use this software for any
  43  *      purpose on any computer system, and to redistribute it freely,
  44  *      subject to the following restrictions:
  45  *
  46  *      1. The author is not responsible for the consequences of use of
  47  *              this software, no matter how awful, even if they arise
  48  *              from defects in it.
  49  *
  50  *      2. The origin of this software must not be misrepresented, either
  51  *              by explicit claim or by omission.
  52  *
  53  *      3. Altered versions must be plainly marked as such, and must not
  54  *              be misrepresented as being the original software.
  55  *
  56  ****    Alterations to Henry's code are...
  57  ****
  58  ****    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
  59  ****    2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 by Larry Wall and others
  60  ****
  61  ****    You may distribute under the terms of either the GNU General Public
  62  ****    License or the Artistic License, as specified in the README file.
  63  *
  64  * Beware that some of this code is subtly aware of the way operator
  65  * precedence is structured in regular expressions.  Serious changes in
  66  * regular-expression syntax might require a total rethink.
  67  */
  68 #include "EXTERN.h"
  69 #define PERL_IN_REGEXEC_C
  70 #include "perl.h"
  71
  72 #ifdef PERL_IN_XSUB_RE
  73 #  include "re_comp.h"
  74 #else
  75 #  include "regcomp.h"
  76 #endif
  77
  78 #define RF_tainted      1               /* tainted information used? */
  79 #define RF_warned       2               /* warned about big count? */
  80
  81 #define RF_utf8         8               /* Pattern contains multibyte chars? */
  82
  83 #define UTF ((PL_reg_flags & RF_utf8) != 0)
  84
  85 #define RS_init         1               /* eval environment created */
  86 #define RS_set          2               /* replsv value is set */
  87
  88 #ifndef STATIC
  89 #define STATIC  static
  90 #endif
  91
  92 #define REGINCLASS(prog,p,c)  (ANYOF_FLAGS(p) ? reginclass(prog,p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
  93
  94 /*
  95  * Forwards.
  96  */
  97
  98 #define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
  99 #define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
 100
 101 #define HOPc(pos,off) \
 102         (char *)(PL_reg_match_utf8 \
 103             ? reghop3((U8*)pos, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr)) \
 104             : (U8*)(pos + off))
 105 #define HOPBACKc(pos, off) \
 106         (char*)(PL_reg_match_utf8\
 107             ? reghopmaybe3((U8*)pos, -off, (U8*)PL_bostr) \
 108             : (pos - off >= PL_bostr)           \
 109                 ? (U8*)pos - off                \
 110                 : NULL)
 111
 112 #define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)(pos), off, (U8*)(lim)) : (U8*)(pos + off))
 113 #define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
 114
 115 #define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
 116     if (!CAT2(PL_utf8_,class)) { bool ok; ENTER; save_re_context(); ok=CAT2(is_utf8_,class)((const U8*)str); assert(ok); LEAVE; } } STMT_END
 117 #define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS(alnum,"a")
 118 #define LOAD_UTF8_CHARCLASS_DIGIT() LOAD_UTF8_CHARCLASS(digit,"0")
 119 #define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
 120 #define LOAD_UTF8_CHARCLASS_MARK()  LOAD_UTF8_CHARCLASS(mark, "\xcd\x86")
 121
 122 /* TODO: Combine JUMPABLE and HAS_TEXT to cache OP(rn) */
 123
 124 /* for use after a quantifier and before an EXACT-like node -- japhy */
 125 /* it would be nice to rework regcomp.sym to generate this stuff. sigh */
 126 #define JUMPABLE(rn) (      \
 127     OP(rn) == OPEN ||       \
 128     (OP(rn) == CLOSE && (!cur_eval || cur_eval->u.eval.close_paren != ARG(rn))) || \
 129     OP(rn) == EVAL ||   \
 130     OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
 131     OP(rn) == PLUS || OP(rn) == MINMOD || \
 132     OP(rn) == KEEPS || (PL_regkind[OP(rn)] == VERB) || \
 133     (PL_regkind[OP(rn)] == CURLY && ARG1(rn) > 0) \
 134 )
 135 #define IS_EXACT(rn) (PL_regkind[OP(rn)] == EXACT)
 136
 137 #define HAS_TEXT(rn) ( IS_EXACT(rn) || PL_regkind[OP(rn)] == REF )
 138
 139 #if 0
 140 /* Currently these are only used when PL_regkind[OP(rn)] == EXACT so
 141    we don't need this definition. */
 142 #define IS_TEXT(rn)   ( OP(rn)==EXACT   || OP(rn)==REF   || OP(rn)==NREF   )
 143 #define IS_TEXTF(rn)  ( OP(rn)==EXACTF  || OP(rn)==REFF  || OP(rn)==NREFF  )
 144 #define IS_TEXTFL(rn) ( OP(rn)==EXACTFL || OP(rn)==REFFL || OP(rn)==NREFFL )
 145
 146 #else
 147 /* ... so we use this as its faster. */
 148 #define IS_TEXT(rn)   ( OP(rn)==EXACT   )
 149 #define IS_TEXTF(rn)  ( OP(rn)==EXACTF  )
 150 #define IS_TEXTFL(rn) ( OP(rn)==EXACTFL )
 151
 152 #endif
 153
 154 /*
 155   Search for mandatory following text node; for lookahead, the text must
 156   follow but for lookbehind (rn->flags != 0) we skip to the next step.
 157 */
 158 #define FIND_NEXT_IMPT(rn) STMT_START { \
 159     while (JUMPABLE(rn)) { \
 160         const OPCODE type = OP(rn); \
 161         if (type == SUSPEND || PL_regkind[type] == CURLY) \
 162             rn = NEXTOPER(NEXTOPER(rn)); \
 163         else if (type == PLUS) \
 164             rn = NEXTOPER(rn); \
 165         else if (type == IFMATCH) \
 166             rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
 167         else rn += NEXT_OFF(rn); \
 168     } \
 169 } STMT_END
 170
 171
 172 static void restore_pos(pTHX_ void *arg);
 173
 174 STATIC CHECKPOINT
 175 S_regcppush(pTHX_ I32 parenfloor)
 176 {
 177     dVAR;
 178     const int retval = PL_savestack_ix;
 179 #define REGCP_PAREN_ELEMS 4
 180     const int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
 181     int p;
 182     GET_RE_DEBUG_FLAGS_DECL;
 183
 184     if (paren_elems_to_push < 0)
 185         Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
 186
 187 #define REGCP_OTHER_ELEMS 7
 188     SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
 189
 190     for (p = PL_regsize; p > parenfloor; p--) {
 191 /* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
 192         SSPUSHINT(PL_regoffs[p].end);
 193         SSPUSHINT(PL_regoffs[p].start);
 194         SSPUSHPTR(PL_reg_start_tmp[p]);
 195         SSPUSHINT(p);
 196         DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log,
 197           "     saving \\%"UVuf" %"IVdf"(%"IVdf")..%"IVdf"\n",
 198                       (UV)p, (IV)PL_regoffs[p].start,
 199                       (IV)(PL_reg_start_tmp[p] - PL_bostr),
 200                       (IV)PL_regoffs[p].end
 201         ));
 202     }
 203 /* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
 204     SSPUSHPTR(PL_regoffs);
 205     SSPUSHINT(PL_regsize);
 206     SSPUSHINT(*PL_reglastparen);
 207     SSPUSHINT(*PL_reglastcloseparen);
 208     SSPUSHPTR(PL_reginput);
 209 #define REGCP_FRAME_ELEMS 2
 210 /* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
 211  * are needed for the regexp context stack bookkeeping. */
 212     SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
 213     SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
 214
 215     return retval;
 216 }
 217
 218 /* These are needed since we do not localize EVAL nodes: */
 219 #define REGCP_SET(cp)                                           \
 220     DEBUG_STATE_r(                                              \
 221             PerlIO_printf(Perl_debug_log,                       \
 222                 "  Setting an EVAL scope, savestack=%"IVdf"\n", \
 223                 (IV)PL_savestack_ix));                          \
 224     cp = PL_savestack_ix
 225
 226 #define REGCP_UNWIND(cp)                                        \
 227     DEBUG_STATE_r(                                              \
 228         if (cp != PL_savestack_ix)                              \
 229             PerlIO_printf(Perl_debug_log,                       \
 230                 "  Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
 231                 (IV)(cp), (IV)PL_savestack_ix));                \
 232     regcpblow(cp)
 233
 234 STATIC char *
 235 S_regcppop(pTHX_ const regexp *rex)
 236 {
 237     dVAR;
 238     U32 i;
 239     char *input;
 240
 241     GET_RE_DEBUG_FLAGS_DECL;
 242
 243     /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
 244     i = SSPOPINT;
 245     assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
 246     i = SSPOPINT; /* Parentheses elements to pop. */
 247     input = (char *) SSPOPPTR;
 248     *PL_reglastcloseparen = SSPOPINT;
 249     *PL_reglastparen = SSPOPINT;
 250     PL_regsize = SSPOPINT;
 251     PL_regoffs=(regexp_paren_pair *) SSPOPPTR;
 252
 253
 254     /* Now restore the parentheses context. */
 255     for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
 256          i > 0; i -= REGCP_PAREN_ELEMS) {
 257         I32 tmps;
 258         U32 paren = (U32)SSPOPINT;
 259         PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
 260         PL_regoffs[paren].start = SSPOPINT;
 261         tmps = SSPOPINT;
 262         if (paren <= *PL_reglastparen)
 263             PL_regoffs[paren].end = tmps;
 264         DEBUG_BUFFERS_r(
 265             PerlIO_printf(Perl_debug_log,
 266                           "     restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
 267                           (UV)paren, (IV)PL_regoffs[paren].start,
 268                           (IV)(PL_reg_start_tmp[paren] - PL_bostr),
 269                           (IV)PL_regoffs[paren].end,
 270                           (paren > *PL_reglastparen ? "(no)" : ""));
 271         );
 272     }
 273     DEBUG_BUFFERS_r(
 274         if (*PL_reglastparen + 1 <= rex->nparens) {
 275             PerlIO_printf(Perl_debug_log,
 276                           "     restoring \\%"IVdf"..\\%"IVdf" to undef\n",
 277                           (IV)(*PL_reglastparen + 1), (IV)rex->nparens);
 278         }
 279     );
 280 #if 1
 281     /* It would seem that the similar code in regtry()
 282      * already takes care of this, and in fact it is in
 283      * a better location to since this code can #if 0-ed out
 284      * but the code in regtry() is needed or otherwise tests
 285      * requiring null fields (pat.t#187 and split.t#{13,14}
 286      * (as of patchlevel 7877)  will fail.  Then again,
 287      * this code seems to be necessary or otherwise
 288      * this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
 289      * --jhi updated by dapm */
 290     for (i = *PL_reglastparen + 1; i <= rex->nparens; i++) {
 291         if (i > PL_regsize)
 292             PL_regoffs[i].start = -1;
 293         PL_regoffs[i].end = -1;
 294     }
 295 #endif
 296     return input;
 297 }
 298
 299 #define regcpblow(cp) LEAVE_SCOPE(cp)   /* Ignores regcppush()ed data. */
 300
 301 /*
 302  * pregexec and friends
 303  */
 304
 305 #ifndef PERL_IN_XSUB_RE
 306 /*
 307  - pregexec - match a regexp against a string
 308  */
 309 I32
 310 Perl_pregexec(pTHX_ REGEXP * const prog, char* stringarg, register char *strend,
 311          char *strbeg, I32 minend, SV *screamer, U32 nosave)
 312 /* strend: pointer to null at end of string */
 313 /* strbeg: real beginning of string */
 314 /* minend: end of match must be >=minend after stringarg. */
 315 /* nosave: For optimizations. */
 316 {
 317     return
 318         regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
 319                       nosave ? 0 : REXEC_COPY_STR);
 320 }
 321 #endif
 322
 323 /*
 324  * Need to implement the following flags for reg_anch:
 325  *
 326  * USE_INTUIT_NOML              - Useful to call re_intuit_start() first
 327  * USE_INTUIT_ML
 328  * INTUIT_AUTORITATIVE_NOML     - Can trust a positive answer
 329  * INTUIT_AUTORITATIVE_ML
 330  * INTUIT_ONCE_NOML             - Intuit can match in one location only.
 331  * INTUIT_ONCE_ML
 332  *
 333  * Another flag for this function: SECOND_TIME (so that float substrs
 334  * with giant delta may be not rechecked).
 335  */
 336
 337 /* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
 338
 339 /* If SCREAM, then SvPVX_const(sv) should be compatible with strpos and strend.
 340    Otherwise, only SvCUR(sv) is used to get strbeg. */
 341
 342 /* XXXX We assume that strpos is strbeg unless sv. */
 343
 344 /* XXXX Some places assume that there is a fixed substring.
 345         An update may be needed if optimizer marks as "INTUITable"
 346         RExen without fixed substrings.  Similarly, it is assumed that
 347         lengths of all the strings are no more than minlen, thus they
 348         cannot come from lookahead.
 349         (Or minlen should take into account lookahead.)
 350   NOTE: Some of this comment is not correct. minlen does now take account
 351   of lookahead/behind. Further research is required. -- demerphq
 352
 353 */
 354
 355 /* A failure to find a constant substring means that there is no need to make
 356    an expensive call to REx engine, thus we celebrate a failure.  Similarly,
 357    finding a substring too deep into the string means that less calls to
 358    regtry() should be needed.
 359
 360    REx compiler's optimizer found 4 possible hints:
 361         a) Anchored substring;
 362         b) Fixed substring;
 363         c) Whether we are anchored (beginning-of-line or \G);
 364         d) First node (of those at offset 0) which may distingush positions;
 365    We use a)b)d) and multiline-part of c), and try to find a position in the
 366    string which does not contradict any of them.
 367  */
 368
 369 /* Most of decisions we do here should have been done at compile time.
 370    The nodes of the REx which we used for the search should have been
 371    deleted from the finite automaton. */
 372
 373 char *
 374 Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV *sv, char *strpos,
 375                      char *strend, const U32 flags, re_scream_pos_data *data)
 376 {
 377     dVAR;
 378     struct regexp *const prog = (struct regexp *)SvANY(rx);
 379     register I32 start_shift = 0;
 380     /* Should be nonnegative! */
 381     register I32 end_shift   = 0;
 382     register char *s;
 383     register SV *check;
 384     char *strbeg;
 385     char *t;
 386     const bool do_utf8 = (sv && SvUTF8(sv)) ? 1 : 0; /* if no sv we have to assume bytes */
 387     I32 ml_anch;
 388     register char *other_last = NULL;   /* other substr checked before this */
 389     char *check_at = NULL;              /* check substr found at this pos */
 390     const I32 multiline = prog->extflags & RXf_PMf_MULTILINE;
 391     RXi_GET_DECL(prog,progi);
 392 #ifdef DEBUGGING
 393     const char * const i_strpos = strpos;
 394 #endif
 395
 396     GET_RE_DEBUG_FLAGS_DECL;
 397
 398     RX_MATCH_UTF8_set(rx,do_utf8);
 399
 400     if (RX_UTF8(rx)) {
 401         PL_reg_flags |= RF_utf8;
 402     }
 403     DEBUG_EXECUTE_r(
 404         debug_start_match(rx, do_utf8, strpos, strend,
 405             sv ? "Guessing start of match in sv for"
 406                : "Guessing start of match in string for");
 407               );
 408
 409     /* CHR_DIST() would be more correct here but it makes things slow. */
 410     if (prog->minlen > strend - strpos) {
 411         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 412                               "String too short... [re_intuit_start]\n"));
 413         goto fail;
 414     }
 415
 416     strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
 417     PL_regeol = strend;
 418     if (do_utf8) {
 419         if (!prog->check_utf8 && prog->check_substr)
 420             to_utf8_substr(prog);
 421         check = prog->check_utf8;
 422     } else {
 423         if (!prog->check_substr && prog->check_utf8)
 424             to_byte_substr(prog);
 425         check = prog->check_substr;
 426     }
 427     if (check == &PL_sv_undef) {
 428         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 429                 "Non-utf8 string cannot match utf8 check string\n"));
 430         goto fail;
 431     }
 432     if (prog->extflags & RXf_ANCH) {    /* Match at beg-of-str or after \n */
 433         ml_anch = !( (prog->extflags & RXf_ANCH_SINGLE)
 434                      || ( (prog->extflags & RXf_ANCH_BOL)
 435                           && !multiline ) );    /* Check after \n? */
 436
 437         if (!ml_anch) {
 438           if ( !(prog->extflags & RXf_ANCH_GPOS) /* Checked by the caller */
 439                 && !(prog->intflags & PREGf_IMPLICIT) /* not a real BOL */
 440                /* SvCUR is not set on references: SvRV and SvPVX_const overlap */
 441                && sv && !SvROK(sv)
 442                && (strpos != strbeg)) {
 443               DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
 444               goto fail;
 445           }
 446           if (prog->check_offset_min == prog->check_offset_max &&
 447               !(prog->extflags & RXf_CANY_SEEN)) {
 448             /* Substring at constant offset from beg-of-str... */
 449             I32 slen;
 450
 451             s = HOP3c(strpos, prog->check_offset_min, strend);
 452
 453             if (SvTAIL(check)) {
 454                 slen = SvCUR(check);    /* >= 1 */
 455
 456                 if ( strend - s > slen || strend - s < slen - 1
 457                      || (strend - s == slen && strend[-1] != '\n')) {
 458                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
 459                     goto fail_finish;
 460                 }
 461                 /* Now should match s[0..slen-2] */
 462                 slen--;
 463                 if (slen && (*SvPVX_const(check) != *s
 464                              || (slen > 1
 465                                  && memNE(SvPVX_const(check), s, slen)))) {
 466                   report_neq:
 467                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
 468                     goto fail_finish;
 469                 }
 470             }
 471             else if (*SvPVX_const(check) != *s
 472                      || ((slen = SvCUR(check)) > 1
 473                          && memNE(SvPVX_const(check), s, slen)))
 474                 goto report_neq;
 475             check_at = s;
 476             goto success_at_start;
 477           }
 478         }
 479         /* Match is anchored, but substr is not anchored wrt beg-of-str. */
 480         s = strpos;
 481         start_shift = prog->check_offset_min; /* okay to underestimate on CC */
 482         end_shift = prog->check_end_shift;
 483
 484         if (!ml_anch) {
 485             const I32 end = prog->check_offset_max + CHR_SVLEN(check)
 486                                          - (SvTAIL(check) != 0);
 487             const I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
 488
 489             if (end_shift < eshift)
 490                 end_shift = eshift;
 491         }
 492     }
 493     else {                              /* Can match at random position */
 494         ml_anch = 0;
 495         s = strpos;
 496         start_shift = prog->check_offset_min;  /* okay to underestimate on CC */
 497         end_shift = prog->check_end_shift;
 498
 499         /* end shift should be non negative here */
 500     }
 501
 502 #ifdef QDEBUGGING       /* 7/99: reports of failure (with the older version) */
 503     if (end_shift < 0)
 504         Perl_croak(aTHX_ "panic: end_shift: %"IVdf" pattern:\n%s\n ",
 505                    (IV)end_shift, RX_PRECOMP(prog));
 506 #endif
 507
 508   restart:
 509     /* Find a possible match in the region s..strend by looking for
 510        the "check" substring in the region corrected by start/end_shift. */
 511
 512     {
 513         I32 srch_start_shift = start_shift;
 514         I32 srch_end_shift = end_shift;
 515         if (srch_start_shift < 0 && strbeg - s > srch_start_shift) {
 516             srch_end_shift -= ((strbeg - s) - srch_start_shift);
 517             srch_start_shift = strbeg - s;
 518         }
 519     DEBUG_OPTIMISE_MORE_r({
 520         PerlIO_printf(Perl_debug_log, "Check offset min: %"IVdf" Start shift: %"IVdf" End shift %"IVdf" Real End Shift: %"IVdf"\n",
 521             (IV)prog->check_offset_min,
 522             (IV)srch_start_shift,
 523             (IV)srch_end_shift,
 524             (IV)prog->check_end_shift);
 525     });
 526
 527     if (flags & REXEC_SCREAM) {
 528         I32 p = -1;                     /* Internal iterator of scream. */
 529         I32 * const pp = data ? data->scream_pos : &p;
 530
 531         if (PL_screamfirst[BmRARE(check)] >= 0
 532             || ( BmRARE(check) == '\n'
 533                  && (BmPREVIOUS(check) == SvCUR(check) - 1)
 534                  && SvTAIL(check) ))
 535             s = screaminstr(sv, check,
 536                             srch_start_shift + (s - strbeg), srch_end_shift, pp, 0);
 537         else
 538             goto fail_finish;
 539         /* we may be pointing at the wrong string */
 540         if (s && RXp_MATCH_COPIED(prog))
 541             s = strbeg + (s - SvPVX_const(sv));
 542         if (data)
 543             *data->scream_olds = s;
 544     }
 545     else {
 546         U8* start_point;
 547         U8* end_point;
 548         if (prog->extflags & RXf_CANY_SEEN) {
 549             start_point= (U8*)(s + srch_start_shift);
 550             end_point= (U8*)(strend - srch_end_shift);
 551         } else {
 552             start_point= HOP3(s, srch_start_shift, srch_start_shift < 0 ? strbeg : strend);
 553             end_point= HOP3(strend, -srch_end_shift, strbeg);
 554         }
 555         DEBUG_OPTIMISE_MORE_r({
 556             PerlIO_printf(Perl_debug_log, "fbm_instr len=%d str=<%.*s>\n",
 557                 (int)(end_point - start_point),
 558                 (int)(end_point - start_point) > 20 ? 20 : (int)(end_point - start_point),
 559                 start_point);
 560         });
 561
 562         s = fbm_instr( start_point, end_point,
 563                       check, multiline ? FBMrf_MULTILINE : 0);
 564     }
 565     }
 566     /* Update the count-of-usability, remove useless subpatterns,
 567         unshift s.  */
 568
 569     DEBUG_EXECUTE_r({
 570         RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
 571             SvPVX_const(check), RE_SV_DUMPLEN(check), 30);
 572         PerlIO_printf(Perl_debug_log, "%s %s substr %s%s%s",
 573                           (s ? "Found" : "Did not find"),
 574             (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)
 575                 ? "anchored" : "floating"),
 576             quoted,
 577             RE_SV_TAIL(check),
 578             (s ? " at offset " : "...\n") );
 579     });
 580
 581     if (!s)
 582         goto fail_finish;
 583     /* Finish the diagnostic message */
 584     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
 585
 586     /* XXX dmq: first branch is for positive lookbehind...
 587        Our check string is offset from the beginning of the pattern.
 588        So we need to do any stclass tests offset forward from that
 589        point. I think. :-(
 590      */
 591
 592
 593
 594     check_at=s;
 595
 596
 597     /* Got a candidate.  Check MBOL anchoring, and the *other* substr.
 598        Start with the other substr.
 599        XXXX no SCREAM optimization yet - and a very coarse implementation
 600        XXXX /ttx+/ results in anchored="ttx", floating="x".  floating will
 601                 *always* match.  Probably should be marked during compile...
 602        Probably it is right to do no SCREAM here...
 603      */
 604
 605     if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8)
 606                 : (prog->float_substr && prog->anchored_substr))
 607     {
 608         /* Take into account the "other" substring. */
 609         /* XXXX May be hopelessly wrong for UTF... */
 610         if (!other_last)
 611             other_last = strpos;
 612         if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
 613           do_other_anchored:
 614             {
 615                 char * const last = HOP3c(s, -start_shift, strbeg);
 616                 char *last1, *last2;
 617                 char * const saved_s = s;
 618                 SV* must;
 619
 620                 t = s - prog->check_offset_max;
 621                 if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
 622                     && (!do_utf8
 623                         || ((t = (char*)reghopmaybe3((U8*)s, -(prog->check_offset_max), (U8*)strpos))
 624                             && t > strpos)))
 625                     NOOP;
 626                 else
 627                     t = strpos;
 628                 t = HOP3c(t, prog->anchored_offset, strend);
 629                 if (t < other_last)     /* These positions already checked */
 630                     t = other_last;
 631                 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
 632                 if (last < last1)
 633                     last1 = last;
 634                 /* XXXX It is not documented what units *_offsets are in.
 635                    We assume bytes, but this is clearly wrong.
 636                    Meaning this code needs to be carefully reviewed for errors.
 637                    dmq.
 638                   */
 639
 640                 /* On end-of-str: see comment below. */
 641                 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
 642                 if (must == &PL_sv_undef) {
 643                     s = (char*)NULL;
 644                     DEBUG_r(must = prog->anchored_utf8);        /* for debug */
 645                 }
 646                 else
 647                     s = fbm_instr(
 648                         (unsigned char*)t,
 649                         HOP3(HOP3(last1, prog->anchored_offset, strend)
 650                                 + SvCUR(must), -(SvTAIL(must)!=0), strbeg),
 651                         must,
 652                         multiline ? FBMrf_MULTILINE : 0
 653                     );
 654                 DEBUG_EXECUTE_r({
 655                     RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
 656                         SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
 657                     PerlIO_printf(Perl_debug_log, "%s anchored substr %s%s",
 658                         (s ? "Found" : "Contradicts"),
 659                         quoted, RE_SV_TAIL(must));
 660                 });
 661
 662
 663                 if (!s) {
 664                     if (last1 >= last2) {
 665                         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 666                                                 ", giving up...\n"));
 667                         goto fail_finish;
 668                     }
 669                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 670                         ", trying floating at offset %ld...\n",
 671                         (long)(HOP3c(saved_s, 1, strend) - i_strpos)));
 672                     other_last = HOP3c(last1, prog->anchored_offset+1, strend);
 673                     s = HOP3c(last, 1, strend);
 674                     goto restart;
 675                 }
 676                 else {
 677                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
 678                           (long)(s - i_strpos)));
 679                     t = HOP3c(s, -prog->anchored_offset, strbeg);
 680                     other_last = HOP3c(s, 1, strend);
 681                     s = saved_s;
 682                     if (t == strpos)
 683                         goto try_at_start;
 684                     goto try_at_offset;
 685                 }
 686             }
 687         }
 688         else {          /* Take into account the floating substring. */
 689             char *last, *last1;
 690             char * const saved_s = s;
 691             SV* must;
 692
 693             t = HOP3c(s, -start_shift, strbeg);
 694             last1 = last =
 695                 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
 696             if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
 697                 last = HOP3c(t, prog->float_max_offset, strend);
 698             s = HOP3c(t, prog->float_min_offset, strend);
 699             if (s < other_last)
 700                 s = other_last;
 701  /* XXXX It is not documented what units *_offsets are in.  Assume bytes.  */
 702             must = do_utf8 ? prog->float_utf8 : prog->float_substr;
 703             /* fbm_instr() takes into account exact value of end-of-str
 704                if the check is SvTAIL(ed).  Since false positives are OK,
 705                and end-of-str is not later than strend we are OK. */
 706             if (must == &PL_sv_undef) {
 707                 s = (char*)NULL;
 708                 DEBUG_r(must = prog->float_utf8);       /* for debug message */
 709             }
 710             else
 711                 s = fbm_instr((unsigned char*)s,
 712                               (unsigned char*)last + SvCUR(must)
 713                                   - (SvTAIL(must)!=0),
 714                               must, multiline ? FBMrf_MULTILINE : 0);
 715             DEBUG_EXECUTE_r({
 716                 RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
 717                     SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
 718                 PerlIO_printf(Perl_debug_log, "%s floating substr %s%s",
 719                     (s ? "Found" : "Contradicts"),
 720                     quoted, RE_SV_TAIL(must));
 721             });
 722             if (!s) {
 723                 if (last1 == last) {
 724                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 725                                             ", giving up...\n"));
 726                     goto fail_finish;
 727                 }
 728                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 729                     ", trying anchored starting at offset %ld...\n",
 730                     (long)(saved_s + 1 - i_strpos)));
 731                 other_last = last;
 732                 s = HOP3c(t, 1, strend);
 733                 goto restart;
 734             }
 735             else {
 736                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
 737                       (long)(s - i_strpos)));
 738                 other_last = s; /* Fix this later. --Hugo */
 739                 s = saved_s;
 740                 if (t == strpos)
 741                     goto try_at_start;
 742                 goto try_at_offset;
 743             }
 744         }
 745     }
 746
 747
 748     t= (char*)HOP3( s, -prog->check_offset_max, (prog->check_offset_max<0) ? strend : strpos);
 749
 750     DEBUG_OPTIMISE_MORE_r(
 751         PerlIO_printf(Perl_debug_log,
 752             "Check offset min:%"IVdf" max:%"IVdf" S:%"IVdf" t:%"IVdf" D:%"IVdf" end:%"IVdf"\n",
 753             (IV)prog->check_offset_min,
 754             (IV)prog->check_offset_max,
 755             (IV)(s-strpos),
 756             (IV)(t-strpos),
 757             (IV)(t-s),
 758             (IV)(strend-strpos)
 759         )
 760     );
 761
 762     if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
 763         && (!do_utf8
 764             || ((t = (char*)reghopmaybe3((U8*)s, -prog->check_offset_max, (U8*) ((prog->check_offset_max<0) ? strend : strpos)))
 765                  && t > strpos)))
 766     {
 767         /* Fixed substring is found far enough so that the match
 768            cannot start at strpos. */
 769       try_at_offset:
 770         if (ml_anch && t[-1] != '\n') {
 771             /* Eventually fbm_*() should handle this, but often
 772                anchored_offset is not 0, so this check will not be wasted. */
 773             /* XXXX In the code below we prefer to look for "^" even in
 774                presence of anchored substrings.  And we search even
 775                beyond the found float position.  These pessimizations
 776                are historical artefacts only.  */
 777           find_anchor:
 778             while (t < strend - prog->minlen) {
 779                 if (*t == '\n') {
 780                     if (t < check_at - prog->check_offset_min) {
 781                         if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
 782                             /* Since we moved from the found position,
 783                                we definitely contradict the found anchored
 784                                substr.  Due to the above check we do not
 785                                contradict "check" substr.
 786                                Thus we can arrive here only if check substr
 787                                is float.  Redo checking for "other"=="fixed".
 788                              */
 789                             strpos = t + 1;
 790                             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
 791                                 PL_colors[0], PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
 792                             goto do_other_anchored;
 793                         }
 794                         /* We don't contradict the found floating substring. */
 795                         /* XXXX Why not check for STCLASS? */
 796                         s = t + 1;
 797                         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
 798                             PL_colors[0], PL_colors[1], (long)(s - i_strpos)));
 799                         goto set_useful;
 800                     }
 801                     /* Position contradicts check-string */
 802                     /* XXXX probably better to look for check-string
 803                        than for "\n", so one should lower the limit for t? */
 804                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
 805                         PL_colors[0], PL_colors[1], (long)(t + 1 - i_strpos)));
 806                     other_last = strpos = s = t + 1;
 807                     goto restart;
 808                 }
 809                 t++;
 810             }
 811             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
 812                         PL_colors[0], PL_colors[1]));
 813             goto fail_finish;
 814         }
 815         else {
 816             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
 817                         PL_colors[0], PL_colors[1]));
 818         }
 819         s = t;
 820       set_useful:
 821         ++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr);    /* hooray/5 */
 822     }
 823     else {
 824         /* The found string does not prohibit matching at strpos,
 825            - no optimization of calling REx engine can be performed,
 826            unless it was an MBOL and we are not after MBOL,
 827            or a future STCLASS check will fail this. */
 828       try_at_start:
 829         /* Even in this situation we may use MBOL flag if strpos is offset
 830            wrt the start of the string. */
 831         if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
 832             && (strpos != strbeg) && strpos[-1] != '\n'
 833             /* May be due to an implicit anchor of m{.*foo}  */
 834             && !(prog->intflags & PREGf_IMPLICIT))
 835         {
 836             t = strpos;
 837             goto find_anchor;
 838         }
 839         DEBUG_EXECUTE_r( if (ml_anch)
 840             PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
 841                           (long)(strpos - i_strpos), PL_colors[0], PL_colors[1]);
 842         );
 843       success_at_start:
 844         if (!(prog->intflags & PREGf_NAUGHTY)   /* XXXX If strpos moved? */
 845             && (do_utf8 ? (
 846                 prog->check_utf8                /* Could be deleted already */
 847                 && --BmUSEFUL(prog->check_utf8) < 0
 848                 && (prog->check_utf8 == prog->float_utf8)
 849             ) : (
 850                 prog->check_substr              /* Could be deleted already */
 851                 && --BmUSEFUL(prog->check_substr) < 0
 852                 && (prog->check_substr == prog->float_substr)
 853             )))
 854         {
 855             /* If flags & SOMETHING - do not do it many times on the same match */
 856             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
 857             SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
 858             if (do_utf8 ? prog->check_substr : prog->check_utf8)
 859                 SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
 860             prog->check_substr = prog->check_utf8 = NULL;       /* disable */
 861             prog->float_substr = prog->float_utf8 = NULL;       /* clear */
 862             check = NULL;                       /* abort */
 863             s = strpos;
 864             /* XXXX This is a remnant of the old implementation.  It
 865                     looks wasteful, since now INTUIT can use many
 866                     other heuristics. */
 867             prog->extflags &= ~RXf_USE_INTUIT;
 868         }
 869         else
 870             s = strpos;
 871     }
 872
 873     /* Last resort... */
 874     /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
 875     /* trie stclasses are too expensive to use here, we are better off to
 876        leave it to regmatch itself */
 877     if (progi->regstclass && PL_regkind[OP(progi->regstclass)]!=TRIE) {
 878         /* minlen == 0 is possible if regstclass is \b or \B,
 879            and the fixed substr is ''$.
 880            Since minlen is already taken into account, s+1 is before strend;
 881            accidentally, minlen >= 1 guaranties no false positives at s + 1
 882            even for \b or \B.  But (minlen? 1 : 0) below assumes that
 883            regstclass does not come from lookahead...  */
 884         /* If regstclass takes bytelength more than 1: If charlength==1, OK.
 885            This leaves EXACTF only, which is dealt with in find_byclass().  */
 886         const U8* const str = (U8*)STRING(progi->regstclass);
 887         const int cl_l = (PL_regkind[OP(progi->regstclass)] == EXACT
 888                     ? CHR_DIST(str+STR_LEN(progi->regstclass), str)
 889                     : 1);
 890         char * endpos;
 891         if (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
 892             endpos= HOP3c(s, (prog->minlen ? cl_l : 0), strend);
 893         else if (prog->float_substr || prog->float_utf8)
 894             endpos= HOP3c(HOP3c(check_at, -start_shift, strbeg), cl_l, strend);
 895         else
 896             endpos= strend;
 897
 898         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "start_shift: %"IVdf" check_at: %"IVdf" s: %"IVdf" endpos: %"IVdf"\n",
 899                                       (IV)start_shift, (IV)(check_at - strbeg), (IV)(s - strbeg), (IV)(endpos - strbeg)));
 900
 901         t = s;
 902         s = find_byclass(prog, progi->regstclass, s, endpos, NULL);
 903         if (!s) {
 904 #ifdef DEBUGGING
 905             const char *what = NULL;
 906 #endif
 907             if (endpos == strend) {
 908                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 909                                 "Could not match STCLASS...\n") );
 910                 goto fail;
 911             }
 912             DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 913                                    "This position contradicts STCLASS...\n") );
 914             if ((prog->extflags & RXf_ANCH) && !ml_anch)
 915                 goto fail;
 916             /* Contradict one of substrings */
 917             if (prog->anchored_substr || prog->anchored_utf8) {
 918                 if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
 919                     DEBUG_EXECUTE_r( what = "anchored" );
 920                   hop_and_restart:
 921                     s = HOP3c(t, 1, strend);
 922                     if (s + start_shift + end_shift > strend) {
 923                         /* XXXX Should be taken into account earlier? */
 924                         DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 925                                                "Could not match STCLASS...\n") );
 926                         goto fail;
 927                     }
 928                     if (!check)
 929                         goto giveup;
 930                     DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 931                                 "Looking for %s substr starting at offset %ld...\n",
 932                                  what, (long)(s + start_shift - i_strpos)) );
 933                     goto restart;
 934                 }
 935                 /* Have both, check_string is floating */
 936                 if (t + start_shift >= check_at) /* Contradicts floating=check */
 937                     goto retry_floating_check;
 938                 /* Recheck anchored substring, but not floating... */
 939                 s = check_at;
 940                 if (!check)
 941                     goto giveup;
 942                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 943                           "Looking for anchored substr starting at offset %ld...\n",
 944                           (long)(other_last - i_strpos)) );
 945                 goto do_other_anchored;
 946             }
 947             /* Another way we could have checked stclass at the
 948                current position only: */
 949             if (ml_anch) {
 950                 s = t = t + 1;
 951                 if (!check)
 952                     goto giveup;
 953                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 954                           "Looking for /%s^%s/m starting at offset %ld...\n",
 955                           PL_colors[0], PL_colors[1], (long)(t - i_strpos)) );
 956                 goto try_at_offset;
 957             }
 958             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))     /* Could have been deleted */
 959                 goto fail;
 960             /* Check is floating subtring. */
 961           retry_floating_check:
 962             t = check_at - start_shift;
 963             DEBUG_EXECUTE_r( what = "floating" );
 964             goto hop_and_restart;
 965         }
 966         if (t != s) {
 967             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 968                         "By STCLASS: moving %ld --> %ld\n",
 969                                   (long)(t - i_strpos), (long)(s - i_strpos))
 970                    );
 971         }
 972         else {
 973             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 974                                   "Does not contradict STCLASS...\n");
 975                    );
 976         }
 977     }
 978   giveup:
 979     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
 980                           PL_colors[4], (check ? "Guessed" : "Giving up"),
 981                           PL_colors[5], (long)(s - i_strpos)) );
 982     return s;
 983
 984   fail_finish:                          /* Substring not found */
 985     if (prog->check_substr || prog->check_utf8)         /* could be removed already */
 986         BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
 987   fail:
 988     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
 989                           PL_colors[4], PL_colors[5]));
 990     return NULL;
 991 }
 992
 993 #define DECL_TRIE_TYPE(scan) \
 994     const enum { trie_plain, trie_utf8, trie_utf8_fold, trie_latin_utf8_fold } \
 995                     trie_type = (scan->flags != EXACT) \
 996                               ? (do_utf8 ? trie_utf8_fold : (UTF ? trie_latin_utf8_fold : trie_plain)) \
 997                               : (do_utf8 ? trie_utf8 : trie_plain)
 998
 999 #define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len,  \
1000 uvc, charid, foldlen, foldbuf, uniflags) STMT_START {                       \
1001     switch (trie_type) {                                                    \
1002     case trie_utf8_fold:                                                    \
1003         if ( foldlen>0 ) {                                                  \
1004             uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags );     \
1005             foldlen -= len;                                                 \
1006             uscan += len;                                                   \
1007             len=0;                                                          \
1008         } else {                                                            \
1009             uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );   \
1010             uvc = to_uni_fold( uvc, foldbuf, &foldlen );                    \
1011             foldlen -= UNISKIP( uvc );                                      \
1012             uscan = foldbuf + UNISKIP( uvc );                               \
1013         }                                                                   \
1014         break;                                                              \
1015     case trie_latin_utf8_fold:                                              \
1016         if ( foldlen>0 ) {                                                  \
1017             uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags );     \
1018             foldlen -= len;                                                 \
1019             uscan += len;                                                   \
1020             len=0;                                                          \
1021         } else {                                                            \
1022             len = 1;                                                        \
1023             uvc = to_uni_fold( *(U8*)uc, foldbuf, &foldlen );               \
1024             foldlen -= UNISKIP( uvc );                                      \
1025             uscan = foldbuf + UNISKIP( uvc );                               \
1026         }                                                                   \
1027         break;                                                              \
1028     case trie_utf8:                                                         \
1029         uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );       \
1030         break;                                                              \
1031     case trie_plain:                                                        \
1032         uvc = (UV)*uc;                                                      \
1033         len = 1;                                                            \
1034     }                                                                       \
1035                                                                             \
1036     if (uvc < 256) {                                                        \
1037         charid = trie->charmap[ uvc ];                                      \
1038     }                                                                       \
1039     else {                                                                  \
1040         charid = 0;                                                         \
1041         if (widecharmap) {                                                  \
1042             SV** const svpp = hv_fetch(widecharmap,                         \
1043                         (char*)&uvc, sizeof(UV), 0);                        \
1044             if (svpp)                                                       \
1045                 charid = (U16)SvIV(*svpp);                                  \
1046         }                                                                   \
1047     }                                                                       \
1048 } STMT_END
1049
1050 #define REXEC_FBC_EXACTISH_CHECK(CoNd)                 \
1051 {                                                      \
1052     char *my_strend= (char *)strend;                   \
1053     if ( (CoNd)                                        \
1054          && (ln == len ||                              \
1055              !ibcmp_utf8(s, &my_strend, 0,  do_utf8,   \
1056                         m, NULL, ln, (bool)UTF))       \
1057          && (!reginfo || regtry(reginfo, &s)) )        \
1058         goto got_it;                                   \
1059     else {                                             \
1060          U8 foldbuf[UTF8_MAXBYTES_CASE+1];             \
1061          uvchr_to_utf8(tmpbuf, c);                     \
1062          f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);  \
1063          if ( f != c                                   \
1064               && (f == c1 || f == c2)                  \
1065               && (ln == len ||                         \
1066                 !ibcmp_utf8(s, &my_strend, 0,  do_utf8,\
1067                               m, NULL, ln, (bool)UTF)) \
1068               && (!reginfo || regtry(reginfo, &s)) )   \
1069               goto got_it;                             \
1070     }                                                  \
1071 }                                                      \
1072 s += len
1073
1074 #define REXEC_FBC_EXACTISH_SCAN(CoNd)                     \
1075 STMT_START {                                              \
1076     while (s <= e) {                                      \
1077         if ( (CoNd)                                       \
1078              && (ln == 1 || !(OP(c) == EXACTF             \
1079                               ? ibcmp(s, m, ln)           \
1080                               : ibcmp_locale(s, m, ln)))  \
1081              && (!reginfo || regtry(reginfo, &s)) )        \
1082             goto got_it;                                  \
1083         s++;                                              \
1084     }                                                     \
1085 } STMT_END
1086
1087 #define REXEC_FBC_UTF8_SCAN(CoDe)                     \
1088 STMT_START {                                          \
1089     while (s + (uskip = UTF8SKIP(s)) <= strend) {     \
1090         CoDe                                          \
1091         s += uskip;                                   \
1092     }                                                 \
1093 } STMT_END
1094
1095 #define REXEC_FBC_SCAN(CoDe)                          \
1096 STMT_START {                                          \
1097     while (s < strend) {                              \
1098         CoDe                                          \
1099         s++;                                          \
1100     }                                                 \
1101 } STMT_END
1102
1103 #define REXEC_FBC_UTF8_CLASS_SCAN(CoNd)               \
1104 REXEC_FBC_UTF8_SCAN(                                  \
1105     if (CoNd) {                                       \
1106         if (tmp && (!reginfo || regtry(reginfo, &s)))  \
1107             goto got_it;                              \
1108         else                                          \
1109             tmp = doevery;                            \
1110     }                                                 \
1111     else                                              \
1112         tmp = 1;                                      \
1113 )
1114
1115 #define REXEC_FBC_CLASS_SCAN(CoNd)                    \
1116 REXEC_FBC_SCAN(                                       \
1117     if (CoNd) {                                       \
1118         if (tmp && (!reginfo || regtry(reginfo, &s)))  \
1119             goto got_it;                              \
1120         else                                          \
1121             tmp = doevery;                            \
1122     }                                                 \
1123     else                                              \
1124         tmp = 1;                                      \
1125 )
1126
1127 #define REXEC_FBC_TRYIT               \
1128 if ((!reginfo || regtry(reginfo, &s))) \
1129     goto got_it
1130
1131 #define REXEC_FBC_CSCAN(CoNdUtF8,CoNd)                         \
1132     if (do_utf8) {                                             \
1133         REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1134     }                                                          \
1135     else {                                                     \
1136         REXEC_FBC_CLASS_SCAN(CoNd);                            \
1137     }                                                          \
1138     break
1139
1140 #define REXEC_FBC_CSCAN_PRELOAD(UtFpReLoAd,CoNdUtF8,CoNd)      \
1141     if (do_utf8) {                                             \
1142         UtFpReLoAd;                                            \
1143         REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1144     }                                                          \
1145     else {                                                     \
1146         REXEC_FBC_CLASS_SCAN(CoNd);                            \
1147     }                                                          \
1148     break
1149
1150 #define REXEC_FBC_CSCAN_TAINT(CoNdUtF8,CoNd)                   \
1151     PL_reg_flags |= RF_tainted;                                \
1152     if (do_utf8) {                                             \
1153         REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1154     }                                                          \
1155     else {                                                     \
1156         REXEC_FBC_CLASS_SCAN(CoNd);                            \
1157     }                                                          \
1158     break
1159
1160 #define DUMP_EXEC_POS(li,s,doutf8) \
1161     dump_exec_pos(li,s,(PL_regeol),(PL_bostr),(PL_reg_starttry),doutf8)
1162
1163 /* We know what class REx starts with.  Try to find this position... */
1164 /* if reginfo is NULL, its a dryrun */
1165 /* annoyingly all the vars in this routine have different names from their counterparts
1166    in regmatch. /grrr */
1167
1168 STATIC char *
1169 S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
1170     const char *strend, regmatch_info *reginfo)
1171 {
1172         dVAR;
1173         const I32 doevery = (prog->intflags & PREGf_SKIP) == 0;
1174         char *m;
1175         STRLEN ln;
1176         STRLEN lnc;
1177         register STRLEN uskip;
1178         unsigned int c1;
1179         unsigned int c2;
1180         char *e;
1181         register I32 tmp = 1;   /* Scratch variable? */
1182         register const bool do_utf8 = PL_reg_match_utf8;
1183         RXi_GET_DECL(prog,progi);
1184
1185         /* We know what class it must start with. */
1186         switch (OP(c)) {
1187         case ANYOF:
1188             if (do_utf8) {
1189                  REXEC_FBC_UTF8_CLASS_SCAN((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
1190                           !UTF8_IS_INVARIANT((U8)s[0]) ?
1191                           reginclass(prog, c, (U8*)s, 0, do_utf8) :
1192                           REGINCLASS(prog, c, (U8*)s));
1193             }
1194             else {
1195                  while (s < strend) {
1196                       STRLEN skip = 1;
1197
1198                       if (REGINCLASS(prog, c, (U8*)s) ||
1199                           (ANYOF_FOLD_SHARP_S(c, s, strend) &&
1200                            /* The assignment of 2 is intentional:
1201                             * for the folded sharp s, the skip is 2. */
1202                            (skip = SHARP_S_SKIP))) {
1203                            if (tmp && (!reginfo || regtry(reginfo, &s)))
1204                                 goto got_it;
1205                            else
1206                                 tmp = doevery;
1207                       }
1208                       else
1209                            tmp = 1;
1210                       s += skip;
1211                  }
1212             }
1213             break;
1214         case CANY:
1215             REXEC_FBC_SCAN(
1216                 if (tmp && (!reginfo || regtry(reginfo, &s)))
1217                     goto got_it;
1218                 else
1219                     tmp = doevery;
1220             );
1221             break;
1222         case EXACTF:
1223             m   = STRING(c);
1224             ln  = STR_LEN(c);   /* length to match in octets/bytes */
1225             lnc = (I32) ln;     /* length to match in characters */
1226             if (UTF) {
1227                 STRLEN ulen1, ulen2;
1228                 U8 *sm = (U8 *) m;
1229                 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
1230                 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
1231                 /* used by commented-out code below */
1232                 /*const U32 uniflags = UTF8_ALLOW_DEFAULT;*/
1233
1234                 /* XXX: Since the node will be case folded at compile
1235                    time this logic is a little odd, although im not
1236                    sure that its actually wrong. --dmq */
1237
1238                 c1 = to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
1239                 c2 = to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
1240
1241                 /* XXX: This is kinda strange. to_utf8_XYZ returns the
1242                    codepoint of the first character in the converted
1243                    form, yet originally we did the extra step.
1244                    No tests fail by commenting this code out however
1245                    so Ive left it out. -- dmq.
1246
1247                 c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXBYTES_CASE,
1248                                     0, uniflags);
1249                 c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXBYTES_CASE,
1250                                     0, uniflags);
1251                 */
1252
1253                 lnc = 0;
1254                 while (sm < ((U8 *) m + ln)) {
1255                     lnc++;
1256                     sm += UTF8SKIP(sm);
1257                 }
1258             }
1259             else {
1260                 c1 = *(U8*)m;
1261                 c2 = PL_fold[c1];
1262             }
1263             goto do_exactf;
1264         case EXACTFL:
1265             m   = STRING(c);
1266             ln  = STR_LEN(c);
1267             lnc = (I32) ln;
1268             c1 = *(U8*)m;
1269             c2 = PL_fold_locale[c1];
1270           do_exactf:
1271             e = HOP3c(strend, -((I32)lnc), s);
1272
1273             if (!reginfo && e < s)
1274                 e = s;                  /* Due to minlen logic of intuit() */
1275
1276             /* The idea in the EXACTF* cases is to first find the
1277              * first character of the EXACTF* node and then, if
1278              * necessary, case-insensitively compare the full
1279              * text of the node.  The c1 and c2 are the first
1280              * characters (though in Unicode it gets a bit
1281              * more complicated because there are more cases
1282              * than just upper and lower: one needs to use
1283              * the so-called folding case for case-insensitive
1284              * matching (called "loose matching" in Unicode).
1285              * ibcmp_utf8() will do just that. */
1286
1287             if (do_utf8 || UTF) {
1288                 UV c, f;
1289                 U8 tmpbuf [UTF8_MAXBYTES+1];
1290                 STRLEN len = 1;
1291                 STRLEN foldlen;
1292                 const U32 uniflags = UTF8_ALLOW_DEFAULT;
1293                 if (c1 == c2) {
1294                     /* Upper and lower of 1st char are equal -
1295                      * probably not a "letter". */
1296                     while (s <= e) {
1297                         if (do_utf8) {
1298                             c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1299                                            uniflags);
1300                         } else {
1301                             c = *((U8*)s);
1302                         }
1303                         REXEC_FBC_EXACTISH_CHECK(c == c1);
1304                     }
1305                 }
1306                 else {
1307                     while (s <= e) {
1308                         if (do_utf8) {
1309                             c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1310                                            uniflags);
1311                         } else {
1312                             c = *((U8*)s);
1313                         }
1314
1315                         /* Handle some of the three Greek sigmas cases.
1316                          * Note that not all the possible combinations
1317                          * are handled here: some of them are handled
1318                          * by the standard folding rules, and some of
1319                          * them (the character class or ANYOF cases)
1320                          * are handled during compiletime in
1321                          * regexec.c:S_regclass(). */
1322                         if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1323                             c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1324                             c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
1325
1326                         REXEC_FBC_EXACTISH_CHECK(c == c1 || c == c2);
1327                     }
1328                 }
1329             }
1330             else {
1331                 /* Neither pattern nor string are UTF8 */
1332                 if (c1 == c2)
1333                     REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1);
1334                 else
1335                     REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1 || *(U8*)s == c2);
1336             }
1337             break;
1338         case BOUNDL:
1339             PL_reg_flags |= RF_tainted;
1340             /* FALL THROUGH */
1341         case BOUND:
1342             if (do_utf8) {
1343                 if (s == PL_bostr)
1344                     tmp = '\n';
1345                 else {
1346                     U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1347                     tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
1348                 }
1349                 tmp = ((OP(c) == BOUND ?
1350                         isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1351                 LOAD_UTF8_CHARCLASS_ALNUM();
1352                 REXEC_FBC_UTF8_SCAN(
1353                     if (tmp == !(OP(c) == BOUND ?
1354                                  (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1355                                  isALNUM_LC_utf8((U8*)s)))
1356                     {
1357                         tmp = !tmp;
1358                         REXEC_FBC_TRYIT;
1359                 }
1360                 );
1361             }
1362             else {
1363                 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1364                 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1365                 REXEC_FBC_SCAN(
1366                     if (tmp ==
1367                         !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1368                         tmp = !tmp;
1369                         REXEC_FBC_TRYIT;
1370                 }
1371                 );
1372             }
1373             if ((!prog->minlen && tmp) && (!reginfo || regtry(reginfo, &s)))
1374                 goto got_it;
1375             break;
1376         case NBOUNDL:
1377             PL_reg_flags |= RF_tainted;
1378             /* FALL THROUGH */
1379         case NBOUND:
1380             if (do_utf8) {
1381                 if (s == PL_bostr)
1382                     tmp = '\n';
1383                 else {
1384                     U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1385                     tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
1386                 }
1387                 tmp = ((OP(c) == NBOUND ?
1388                         isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1389                 LOAD_UTF8_CHARCLASS_ALNUM();
1390                 REXEC_FBC_UTF8_SCAN(
1391                     if (tmp == !(OP(c) == NBOUND ?
1392                                  (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1393                                  isALNUM_LC_utf8((U8*)s)))
1394                         tmp = !tmp;
1395                     else REXEC_FBC_TRYIT;
1396                 );
1397             }
1398             else {
1399                 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1400                 tmp = ((OP(c) == NBOUND ?
1401                         isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1402                 REXEC_FBC_SCAN(
1403                     if (tmp ==
1404                         !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1405                         tmp = !tmp;
1406                     else REXEC_FBC_TRYIT;
1407                 );
1408             }
1409             if ((!prog->minlen && !tmp) && (!reginfo || regtry(reginfo, &s)))
1410                 goto got_it;
1411             break;
1412         case ALNUM:
1413             REXEC_FBC_CSCAN_PRELOAD(
1414                 LOAD_UTF8_CHARCLASS_ALNUM(),
1415                 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8),
1416                 isALNUM(*s)
1417             );
1418         case ALNUML:
1419             REXEC_FBC_CSCAN_TAINT(
1420                 isALNUM_LC_utf8((U8*)s),
1421                 isALNUM_LC(*s)
1422             );
1423         case NALNUM:
1424             REXEC_FBC_CSCAN_PRELOAD(
1425                 LOAD_UTF8_CHARCLASS_ALNUM(),
1426                 !swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8),
1427                 !isALNUM(*s)
1428             );
1429         case NALNUML:
1430             REXEC_FBC_CSCAN_TAINT(
1431                 !isALNUM_LC_utf8((U8*)s),
1432                 !isALNUM_LC(*s)
1433             );
1434         case SPACE:
1435             REXEC_FBC_CSCAN_PRELOAD(
1436                 LOAD_UTF8_CHARCLASS_SPACE(),
1437                 *s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8),
1438                 isSPACE(*s)
1439             );
1440         case SPACEL:
1441             REXEC_FBC_CSCAN_TAINT(
1442                 *s == ' ' || isSPACE_LC_utf8((U8*)s),
1443                 isSPACE_LC(*s)
1444             );
1445         case NSPACE:
1446             REXEC_FBC_CSCAN_PRELOAD(
1447                 LOAD_UTF8_CHARCLASS_SPACE(),
1448                 !(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)),
1449                 !isSPACE(*s)
1450             );
1451         case NSPACEL:
1452             REXEC_FBC_CSCAN_TAINT(
1453                 !(*s == ' ' || isSPACE_LC_utf8((U8*)s)),
1454                 !isSPACE_LC(*s)
1455             );
1456         case DIGIT:
1457             REXEC_FBC_CSCAN_PRELOAD(
1458                 LOAD_UTF8_CHARCLASS_DIGIT(),
1459                 swash_fetch(PL_utf8_digit,(U8*)s, do_utf8),
1460                 isDIGIT(*s)
1461             );
1462         case DIGITL:
1463             REXEC_FBC_CSCAN_TAINT(
1464                 isDIGIT_LC_utf8((U8*)s),
1465                 isDIGIT_LC(*s)
1466             );
1467         case NDIGIT:
1468             REXEC_FBC_CSCAN_PRELOAD(
1469                 LOAD_UTF8_CHARCLASS_DIGIT(),
1470                 !swash_fetch(PL_utf8_digit,(U8*)s, do_utf8),
1471                 !isDIGIT(*s)
1472             );
1473         case NDIGITL:
1474             REXEC_FBC_CSCAN_TAINT(
1475                 !isDIGIT_LC_utf8((U8*)s),
1476                 !isDIGIT_LC(*s)
1477             );
1478         case LNBREAK:
1479             REXEC_FBC_CSCAN(
1480                 is_LNBREAK_utf8(s),
1481                 is_LNBREAK_latin1(s)
1482             );
1483         case VERTWS:
1484             REXEC_FBC_CSCAN(
1485                 is_VERTWS_utf8(s),
1486                 is_VERTWS_latin1(s)
1487             );
1488         case NVERTWS:
1489             REXEC_FBC_CSCAN(
1490                 !is_VERTWS_utf8(s),
1491                 !is_VERTWS_latin1(s)
1492             );
1493         case HORIZWS:
1494             REXEC_FBC_CSCAN(
1495                 is_HORIZWS_utf8(s),
1496                 is_HORIZWS_latin1(s)
1497             );
1498         case NHORIZWS:
1499             REXEC_FBC_CSCAN(
1500                 !is_HORIZWS_utf8(s),
1501                 !is_HORIZWS_latin1(s)
1502             );
1503         case AHOCORASICKC:
1504         case AHOCORASICK:
1505             {
1506                 DECL_TRIE_TYPE(c);
1507                 /* what trie are we using right now */
1508                 reg_ac_data *aho
1509                     = (reg_ac_data*)progi->data->data[ ARG( c ) ];
1510                 reg_trie_data *trie
1511                     = (reg_trie_data*)progi->data->data[ aho->trie ];
1512                 HV *widecharmap = (HV*) progi->data->data[ aho->trie + 1 ];
1513
1514                 const char *last_start = strend - trie->minlen;
1515 #ifdef DEBUGGING
1516                 const char *real_start = s;
1517 #endif
1518                 STRLEN maxlen = trie->maxlen;
1519                 SV *sv_points;
1520                 U8 **points; /* map of where we were in the input string
1521                                 when reading a given char. For ASCII this
1522                                 is unnecessary overhead as the relationship
1523                                 is always 1:1, but for Unicode, especially
1524                                 case folded Unicode this is not true. */
1525                 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
1526                 U8 *bitmap=NULL;
1527
1528
1529                 GET_RE_DEBUG_FLAGS_DECL;
1530
1531                 /* We can't just allocate points here. We need to wrap it in
1532                  * an SV so it gets freed properly if there is a croak while
1533                  * running the match */
1534                 ENTER;
1535                 SAVETMPS;
1536                 sv_points=newSV(maxlen * sizeof(U8 *));
1537                 SvCUR_set(sv_points,
1538                     maxlen * sizeof(U8 *));
1539                 SvPOK_on(sv_points);
1540                 sv_2mortal(sv_points);
1541                 points=(U8**)SvPV_nolen(sv_points );
1542                 if ( trie_type != trie_utf8_fold
1543                      && (trie->bitmap || OP(c)==AHOCORASICKC) )
1544                 {
1545                     if (trie->bitmap)
1546                         bitmap=(U8*)trie->bitmap;
1547                     else
1548                         bitmap=(U8*)ANYOF_BITMAP(c);
1549                 }
1550                 /* this is the Aho-Corasick algorithm modified a touch
1551                    to include special handling for long "unknown char"
1552                    sequences. The basic idea being that we use AC as long
1553                    as we are dealing with a possible matching char, when
1554                    we encounter an unknown char (and we have not encountered
1555                    an accepting state) we scan forward until we find a legal
1556                    starting char.
1557                    AC matching is basically that of trie matching, except
1558                    that when we encounter a failing transition, we fall back
1559                    to the current states "fail state", and try the current char
1560                    again, a process we repeat until we reach the root state,
1561                    state 1, or a legal transition. If we fail on the root state
1562                    then we can either terminate if we have reached an accepting
1563                    state previously, or restart the entire process from the beginning
1564                    if we have not.
1565
1566                  */
1567                 while (s <= last_start) {
1568                     const U32 uniflags = UTF8_ALLOW_DEFAULT;
1569                     U8 *uc = (U8*)s;
1570                     U16 charid = 0;
1571                     U32 base = 1;
1572                     U32 state = 1;
1573                     UV uvc = 0;
1574                     STRLEN len = 0;
1575                     STRLEN foldlen = 0;
1576                     U8 *uscan = (U8*)NULL;
1577                     U8 *leftmost = NULL;
1578 #ifdef DEBUGGING
1579                     U32 accepted_word= 0;
1580 #endif
1581                     U32 pointpos = 0;
1582
1583                     while ( state && uc <= (U8*)strend ) {
1584                         int failed=0;
1585                         U32 word = aho->states[ state ].wordnum;
1586
1587                         if( state==1 ) {
1588                             if ( bitmap ) {
1589                                 DEBUG_TRIE_EXECUTE_r(
1590                                     if ( uc <= (U8*)last_start && !BITMAP_TEST(bitmap,*uc) ) {
1591                                         dump_exec_pos( (char *)uc, c, strend, real_start,
1592                                             (char *)uc, do_utf8 );
1593                                         PerlIO_printf( Perl_debug_log,
1594                                             " Scanning for legal start char...\n");
1595                                     }
1596                                 );
1597                                 while ( uc <= (U8*)last_start  && !BITMAP_TEST(bitmap,*uc) ) {
1598                                     uc++;
1599                                 }
1600                                 s= (char *)uc;
1601                             }
1602                             if (uc >(U8*)last_start) break;
1603                         }
1604
1605                         if ( word ) {
1606                             U8 *lpos= points[ (pointpos - trie->wordlen[word-1] ) % maxlen ];
1607                             if (!leftmost || lpos < leftmost) {
1608                                 DEBUG_r(accepted_word=word);
1609                                 leftmost= lpos;
1610                             }
1611                             if (base==0) break;
1612
1613                         }
1614                         points[pointpos++ % maxlen]= uc;
1615                         REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
1616                                              uscan, len, uvc, charid, foldlen,
1617                                              foldbuf, uniflags);
1618                         DEBUG_TRIE_EXECUTE_r({
1619                             dump_exec_pos( (char *)uc, c, strend, real_start,
1620                                 s,   do_utf8 );
1621                             PerlIO_printf(Perl_debug_log,
1622                                 " Charid:%3u CP:%4"UVxf" ",
1623                                  charid, uvc);
1624                         });
1625
1626                         do {
1627 #ifdef DEBUGGING
1628                             word = aho->states[ state ].wordnum;
1629 #endif
1630                             base = aho->states[ state ].trans.base;
1631
1632                             DEBUG_TRIE_EXECUTE_r({
1633                                 if (failed)
1634                                     dump_exec_pos( (char *)uc, c, strend, real_start,
1635                                         s,   do_utf8 );
1636                                 PerlIO_printf( Perl_debug_log,
1637                                     "%sState: %4"UVxf", word=%"UVxf,
1638                                     failed ? " Fail transition to " : "",
1639                                     (UV)state, (UV)word);
1640                             });
1641                             if ( base ) {
1642                                 U32 tmp;
1643                                 if (charid &&
1644                                      (base + charid > trie->uniquecharcount )
1645                                      && (base + charid - 1 - trie->uniquecharcount
1646                                             < trie->lasttrans)
1647                                      && trie->trans[base + charid - 1 -
1648                                             trie->uniquecharcount].check == state
1649                                      && (tmp=trie->trans[base + charid - 1 -
1650                                         trie->uniquecharcount ].next))
1651                                 {
1652                                     DEBUG_TRIE_EXECUTE_r(
1653                                         PerlIO_printf( Perl_debug_log," - legal\n"));
1654                                     state = tmp;
1655                                     break;
1656                                 }
1657                                 else {
1658                                     DEBUG_TRIE_EXECUTE_r(
1659                                         PerlIO_printf( Perl_debug_log," - fail\n"));
1660                                     failed = 1;
1661                                     state = aho->fail[state];
1662                                 }
1663                             }
1664                             else {
1665                                 /* we must be accepting here */
1666                                 DEBUG_TRIE_EXECUTE_r(
1667                                         PerlIO_printf( Perl_debug_log," - accepting\n"));
1668                                 failed = 1;
1669                                 break;
1670                             }
1671                         } while(state);
1672                         uc += len;
1673                         if (failed) {
1674                             if (leftmost)
1675                                 break;
1676                             if (!state) state = 1;
1677                         }
1678                     }
1679                     if ( aho->states[ state ].wordnum ) {
1680                         U8 *lpos = points[ (pointpos - trie->wordlen[aho->states[ state ].wordnum-1]) % maxlen ];
1681                         if (!leftmost || lpos < leftmost) {
1682                             DEBUG_r(accepted_word=aho->states[ state ].wordnum);
1683                             leftmost = lpos;
1684                         }
1685                     }
1686                     if (leftmost) {
1687                         s = (char*)leftmost;
1688                         DEBUG_TRIE_EXECUTE_r({
1689                             PerlIO_printf(
1690                                 Perl_debug_log,"Matches word #%"UVxf" at position %"IVdf". Trying full pattern...\n",
1691                                 (UV)accepted_word, (IV)(s - real_start)
1692                             );
1693                         });
1694                         if (!reginfo || regtry(reginfo, &s)) {
1695                             FREETMPS;
1696                             LEAVE;
1697                             goto got_it;
1698                         }
1699                         s = HOPc(s,1);
1700                         DEBUG_TRIE_EXECUTE_r({
1701                             PerlIO_printf( Perl_debug_log,"Pattern failed. Looking for new start point...\n");
1702                         });
1703                     } else {
1704                         DEBUG_TRIE_EXECUTE_r(
1705                             PerlIO_printf( Perl_debug_log,"No match.\n"));
1706                         break;
1707                     }
1708                 }
1709                 FREETMPS;
1710                 LEAVE;
1711             }
1712             break;
1713         default:
1714             Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1715             break;
1716         }
1717         return 0;
1718       got_it:
1719         return s;
1720 }
1721
1722 static void
1723 S_swap_match_buff (pTHX_ regexp *prog) {
1724     regexp_paren_pair *t;
1725
1726     if (!prog->swap) {
1727     /* We have to be careful. If the previous successful match
1728        was from this regex we don't want a subsequent paritally
1729        successful match to clobber the old results.
1730        So when we detect this possibility we add a swap buffer
1731        to the re, and switch the buffer each match. If we fail
1732        we switch it back, otherwise we leave it swapped.
1733     */
1734         Newxz(prog->swap, (prog->nparens + 1), regexp_paren_pair);
1735     }
1736     t = prog->swap;
1737     prog->swap = prog->offs;
1738     prog->offs = t;
1739 }
1740
1741
1742 /*
1743  - regexec_flags - match a regexp against a string
1744  */
1745 I32
1746 Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, register char *strend,
1747               char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1748 /* strend: pointer to null at end of string */
1749 /* strbeg: real beginning of string */
1750 /* minend: end of match must be >=minend after stringarg. */
1751 /* data: May be used for some additional optimizations.
1752          Currently its only used, with a U32 cast, for transmitting
1753          the ganch offset when doing a /g match. This will change */
1754 /* nosave: For optimizations. */
1755 {
1756     dVAR;
1757     struct regexp *const prog = (struct regexp *)SvANY(rx);
1758     /*register*/ char *s;
1759     register regnode *c;
1760     /*register*/ char *startpos = stringarg;
1761     I32 minlen;         /* must match at least this many chars */
1762     I32 dontbother = 0; /* how many characters not to try at end */
1763     I32 end_shift = 0;                  /* Same for the end. */         /* CC */
1764     I32 scream_pos = -1;                /* Internal iterator of scream. */
1765     char *scream_olds = NULL;
1766     const bool do_utf8 = (bool)DO_UTF8(sv);
1767     I32 multiline;
1768     RXi_GET_DECL(prog,progi);
1769     regmatch_info reginfo;  /* create some info to pass to regtry etc */
1770     bool swap_on_fail = 0;
1771
1772     GET_RE_DEBUG_FLAGS_DECL;
1773
1774     PERL_UNUSED_ARG(data);
1775
1776     /* Be paranoid... */
1777     if (prog == NULL || startpos == NULL) {
1778         Perl_croak(aTHX_ "NULL regexp parameter");
1779         return 0;
1780     }
1781
1782     multiline = prog->extflags & RXf_PMf_MULTILINE;
1783     reginfo.prog = rx;   /* Yes, sorry that this is confusing.  */
1784
1785     RX_MATCH_UTF8_set(rx, do_utf8);
1786     DEBUG_EXECUTE_r(
1787         debug_start_match(rx, do_utf8, startpos, strend,
1788         "Matching");
1789     );
1790
1791     minlen = prog->minlen;
1792
1793     if (strend - startpos < (minlen+(prog->check_offset_min<0?prog->check_offset_min:0))) {
1794         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
1795                               "String too short [regexec_flags]...\n"));
1796         goto phooey;
1797     }
1798
1799
1800     /* Check validity of program. */
1801     if (UCHARAT(progi->program) != REG_MAGIC) {
1802         Perl_croak(aTHX_ "corrupted regexp program");
1803     }
1804
1805     PL_reg_flags = 0;
1806     PL_reg_eval_set = 0;
1807     PL_reg_maxiter = 0;
1808
1809     if (RX_UTF8(rx))
1810         PL_reg_flags |= RF_utf8;
1811
1812     /* Mark beginning of line for ^ and lookbehind. */
1813     reginfo.bol = startpos; /* XXX not used ??? */
1814     PL_bostr  = strbeg;
1815     reginfo.sv = sv;
1816
1817     /* Mark end of line for $ (and such) */
1818     PL_regeol = strend;
1819
1820     /* see how far we have to get to not match where we matched before */
1821     reginfo.till = startpos+minend;
1822
1823     /* If there is a "must appear" string, look for it. */
1824     s = startpos;
1825
1826     if (prog->extflags & RXf_GPOS_SEEN) { /* Need to set reginfo->ganch */
1827         MAGIC *mg;
1828
1829         if (flags & REXEC_IGNOREPOS)    /* Means: check only at start */
1830             reginfo.ganch = startpos + prog->gofs;
1831         else if (sv && SvTYPE(sv) >= SVt_PVMG
1832                   && SvMAGIC(sv)
1833                   && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1834                   && mg->mg_len >= 0) {
1835             reginfo.ganch = strbeg + mg->mg_len;        /* Defined pos() */
1836             if (prog->extflags & RXf_ANCH_GPOS) {
1837                 if (s > reginfo.ganch)
1838                     goto phooey;
1839                 s = reginfo.ganch - prog->gofs;
1840             }
1841         }
1842         else if (data) {
1843             reginfo.ganch = strbeg + PTR2UV(data);
1844         } else                          /* pos() not defined */
1845             reginfo.ganch = strbeg;
1846     }
1847     if (PL_curpm && (PM_GETRE(PL_curpm) == rx)) {
1848         swap_on_fail = 1;
1849         swap_match_buff(prog); /* do we need a save destructor here for
1850                                   eval dies? */
1851     }
1852     if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL || prog->check_utf8 != NULL)) {
1853         re_scream_pos_data d;
1854
1855         d.scream_olds = &scream_olds;
1856         d.scream_pos = &scream_pos;
1857         s = re_intuit_start(rx, sv, s, strend, flags, &d);
1858         if (!s) {
1859             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
1860             goto phooey;        /* not present */
1861         }
1862     }
1863
1864
1865
1866     /* Simplest case:  anchored match need be tried only once. */
1867     /*  [unless only anchor is BOL and multiline is set] */
1868     if (prog->extflags & (RXf_ANCH & ~RXf_ANCH_GPOS)) {
1869         if (s == startpos && regtry(&reginfo, &startpos))
1870             goto got_it;
1871         else if (multiline || (prog->intflags & PREGf_IMPLICIT)
1872                  || (prog->extflags & RXf_ANCH_MBOL)) /* XXXX SBOL? */
1873         {
1874             char *end;
1875
1876             if (minlen)
1877                 dontbother = minlen - 1;
1878             end = HOP3c(strend, -dontbother, strbeg) - 1;
1879             /* for multiline we only have to try after newlines */
1880             if (prog->check_substr || prog->check_utf8) {
1881                 if (s == startpos)
1882                     goto after_try;
1883                 while (1) {
1884                     if (regtry(&reginfo, &s))
1885                         goto got_it;
1886                   after_try:
1887                     if (s > end)
1888                         goto phooey;
1889                     if (prog->extflags & RXf_USE_INTUIT) {
1890                         s = re_intuit_start(rx, sv, s + 1, strend, flags, NULL);
1891                         if (!s)
1892                             goto phooey;
1893                     }
1894                     else
1895                         s++;
1896                 }
1897             } else {
1898                 if (s > startpos)
1899                     s--;
1900                 while (s < end) {
1901                     if (*s++ == '\n') { /* don't need PL_utf8skip here */
1902                         if (regtry(&reginfo, &s))
1903                             goto got_it;
1904                     }
1905                 }
1906             }
1907         }
1908         goto phooey;
1909     } else if (RXf_GPOS_CHECK == (prog->extflags & RXf_GPOS_CHECK))
1910     {
1911         /* the warning about reginfo.ganch being used without intialization
1912            is bogus -- we set it above, when prog->extflags & RXf_GPOS_SEEN
1913            and we only enter this block when the same bit is set. */
1914         char *tmp_s = reginfo.ganch - prog->gofs;
1915         if (regtry(&reginfo, &tmp_s))
1916             goto got_it;
1917         goto phooey;
1918     }
1919
1920     /* Messy cases:  unanchored match. */
1921     if ((prog->anchored_substr || prog->anchored_utf8) && prog->intflags & PREGf_SKIP) {
1922         /* we have /x+whatever/ */
1923         /* it must be a one character string (XXXX Except UTF?) */
1924         char ch;
1925 #ifdef DEBUGGING
1926         int did_match = 0;
1927 #endif
1928         if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1929             do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1930         ch = SvPVX_const(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
1931
1932         if (do_utf8) {
1933             REXEC_FBC_SCAN(
1934                 if (*s == ch) {
1935                     DEBUG_EXECUTE_r( did_match = 1 );
1936                     if (regtry(&reginfo, &s)) goto got_it;
1937                     s += UTF8SKIP(s);
1938                     while (s < strend && *s == ch)
1939                         s += UTF8SKIP(s);
1940                 }
1941             );
1942         }
1943         else {
1944             REXEC_FBC_SCAN(
1945                 if (*s == ch) {
1946                     DEBUG_EXECUTE_r( did_match = 1 );
1947                     if (regtry(&reginfo, &s)) goto got_it;
1948                     s++;
1949                     while (s < strend && *s == ch)
1950                         s++;
1951                 }
1952             );
1953         }
1954         DEBUG_EXECUTE_r(if (!did_match)
1955                 PerlIO_printf(Perl_debug_log,
1956                                   "Did not find anchored character...\n")
1957                );
1958     }
1959     else if (prog->anchored_substr != NULL
1960               || prog->anchored_utf8 != NULL
1961               || ((prog->float_substr != NULL || prog->float_utf8 != NULL)
1962                   && prog->float_max_offset < strend - s)) {
1963         SV *must;
1964         I32 back_max;
1965         I32 back_min;
1966         char *last;
1967         char *last1;            /* Last position checked before */
1968 #ifdef DEBUGGING
1969         int did_match = 0;
1970 #endif
1971         if (prog->anchored_substr || prog->anchored_utf8) {
1972             if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1973                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1974             must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
1975             back_max = back_min = prog->anchored_offset;
1976         } else {
1977             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1978                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1979             must = do_utf8 ? prog->float_utf8 : prog->float_substr;
1980             back_max = prog->float_max_offset;
1981             back_min = prog->float_min_offset;
1982         }
1983
1984
1985         if (must == &PL_sv_undef)
1986             /* could not downgrade utf8 check substring, so must fail */
1987             goto phooey;
1988
1989         if (back_min<0) {
1990             last = strend;
1991         } else {
1992             last = HOP3c(strend,        /* Cannot start after this */
1993                   -(I32)(CHR_SVLEN(must)
1994                          - (SvTAIL(must) != 0) + back_min), strbeg);
1995         }
1996         if (s > PL_bostr)
1997             last1 = HOPc(s, -1);
1998         else
1999             last1 = s - 1;      /* bogus */
2000
2001         /* XXXX check_substr already used to find "s", can optimize if
2002            check_substr==must. */
2003         scream_pos = -1;
2004         dontbother = end_shift;
2005         strend = HOPc(strend, -dontbother);
2006         while ( (s <= last) &&
2007                 ((flags & REXEC_SCREAM)
2008                  ? (s = screaminstr(sv, must, HOP3c(s, back_min, (back_min<0 ? strbeg : strend)) - strbeg,
2009                                     end_shift, &scream_pos, 0))
2010                  : (s = fbm_instr((unsigned char*)HOP3(s, back_min, (back_min<0 ? strbeg : strend)),
2011                                   (unsigned char*)strend, must,
2012                                   multiline ? FBMrf_MULTILINE : 0))) ) {
2013             /* we may be pointing at the wrong string */
2014             if ((flags & REXEC_SCREAM) && RXp_MATCH_COPIED(prog))
2015                 s = strbeg + (s - SvPVX_const(sv));
2016             DEBUG_EXECUTE_r( did_match = 1 );
2017             if (HOPc(s, -back_max) > last1) {
2018                 last1 = HOPc(s, -back_min);
2019                 s = HOPc(s, -back_max);
2020             }
2021             else {
2022                 char * const t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
2023
2024                 last1 = HOPc(s, -back_min);
2025                 s = t;
2026             }
2027             if (do_utf8) {
2028                 while (s <= last1) {
2029                     if (regtry(&reginfo, &s))
2030                         goto got_it;
2031                     s += UTF8SKIP(s);
2032                 }
2033             }
2034             else {
2035                 while (s <= last1) {
2036                     if (regtry(&reginfo, &s))
2037                         goto got_it;
2038                     s++;
2039                 }
2040             }
2041         }
2042         DEBUG_EXECUTE_r(if (!did_match) {
2043             RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
2044                 SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
2045             PerlIO_printf(Perl_debug_log, "Did not find %s substr %s%s...\n",
2046                               ((must == prog->anchored_substr || must == prog->anchored_utf8)
2047                                ? "anchored" : "floating"),
2048                 quoted, RE_SV_TAIL(must));
2049         });
2050         goto phooey;
2051     }
2052     else if ( (c = progi->regstclass) ) {
2053         if (minlen) {
2054             const OPCODE op = OP(progi->regstclass);
2055             /* don't bother with what can't match */
2056             if (PL_regkind[op] != EXACT && op != CANY && PL_regkind[op] != TRIE)
2057                 strend = HOPc(strend, -(minlen - 1));
2058         }
2059         DEBUG_EXECUTE_r({
2060             SV * const prop = sv_newmortal();
2061             regprop(prog, prop, c);
2062             {
2063                 RE_PV_QUOTED_DECL(quoted,do_utf8,PERL_DEBUG_PAD_ZERO(1),
2064                     s,strend-s,60);
2065                 PerlIO_printf(Perl_debug_log,
2066                     "Matching stclass %.*s against %s (%d chars)\n",
2067                     (int)SvCUR(prop), SvPVX_const(prop),
2068                      quoted, (int)(strend - s));
2069             }
2070         });
2071         if (find_byclass(prog, c, s, strend, &reginfo))
2072             goto got_it;
2073         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass... [regexec_flags]\n"));
2074     }
2075     else {
2076         dontbother = 0;
2077         if (prog->float_substr != NULL || prog->float_utf8 != NULL) {
2078             /* Trim the end. */
2079             char *last;
2080             SV* float_real;
2081
2082             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
2083                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
2084             float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
2085
2086             if (flags & REXEC_SCREAM) {
2087                 last = screaminstr(sv, float_real, s - strbeg,
2088                                    end_shift, &scream_pos, 1); /* last one */
2089                 if (!last)
2090                     last = scream_olds; /* Only one occurrence. */
2091                 /* we may be pointing at the wrong string */
2092                 else if (RXp_MATCH_COPIED(prog))
2093                     s = strbeg + (s - SvPVX_const(sv));
2094             }
2095             else {
2096                 STRLEN len;
2097                 const char * const little = SvPV_const(float_real, len);
2098
2099                 if (SvTAIL(float_real)) {
2100                     if (memEQ(strend - len + 1, little, len - 1))
2101                         last = strend - len + 1;
2102                     else if (!multiline)
2103                         last = memEQ(strend - len, little, len)
2104                             ? strend - len : NULL;
2105                     else
2106                         goto find_last;
2107                 } else {
2108                   find_last:
2109                     if (len)
2110                         last = rninstr(s, strend, little, little + len);
2111                     else
2112                         last = strend;  /* matching "$" */
2113                 }
2114             }
2115             if (last == NULL) {
2116                 DEBUG_EXECUTE_r(
2117                     PerlIO_printf(Perl_debug_log,
2118                         "%sCan't trim the tail, match fails (should not happen)%s\n",
2119                         PL_colors[4], PL_colors[5]));
2120                 goto phooey; /* Should not happen! */
2121             }
2122             dontbother = strend - last + prog->float_min_offset;
2123         }
2124         if (minlen && (dontbother < minlen))
2125             dontbother = minlen - 1;
2126         strend -= dontbother;              /* this one's always in bytes! */
2127         /* We don't know much -- general case. */
2128         if (do_utf8) {
2129             for (;;) {
2130                 if (regtry(&reginfo, &s))
2131                     goto got_it;
2132                 if (s >= strend)
2133                     break;
2134                 s += UTF8SKIP(s);
2135             };
2136         }
2137         else {
2138             do {
2139                 if (regtry(&reginfo, &s))
2140                     goto got_it;
2141             } while (s++ < strend);
2142         }
2143     }
2144
2145     /* Failure. */
2146     goto phooey;
2147
2148 got_it:
2149     RX_MATCH_TAINTED_set(rx, PL_reg_flags & RF_tainted);
2150
2151     if (PL_reg_eval_set)
2152         restore_pos(aTHX_ prog);
2153     if (RXp_PAREN_NAMES(prog))
2154         (void)hv_iterinit(RXp_PAREN_NAMES(prog));
2155
2156     /* make sure $`, $&, $', and $digit will work later */
2157     if ( !(flags & REXEC_NOT_FIRST) ) {
2158         RX_MATCH_COPY_FREE(rx);
2159         if (flags & REXEC_COPY_STR) {
2160             const I32 i = PL_regeol - startpos + (stringarg - strbeg);
2161 #ifdef PERL_OLD_COPY_ON_WRITE
2162             if ((SvIsCOW(sv)
2163                  || (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
2164                 if (DEBUG_C_TEST) {
2165                     PerlIO_printf(Perl_debug_log,
2166                                   "Copy on write: regexp capture, type %d\n",
2167                                   (int) SvTYPE(sv));
2168                 }
2169                 prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
2170                 prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
2171                 assert (SvPOKp(prog->saved_copy));
2172             } else
2173 #endif
2174             {
2175                 RX_MATCH_COPIED_on(rx);
2176                 s = savepvn(strbeg, i);
2177                 prog->subbeg = s;
2178             }
2179             prog->sublen = i;
2180         }
2181         else {
2182             prog->subbeg = strbeg;
2183             prog->sublen = PL_regeol - strbeg;  /* strend may have been modified */
2184         }
2185     }
2186
2187     return 1;
2188
2189 phooey:
2190     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
2191                           PL_colors[4], PL_colors[5]));
2192     if (PL_reg_eval_set)
2193         restore_pos(aTHX_ prog);
2194     if (swap_on_fail)
2195         /* we failed :-( roll it back */
2196         swap_match_buff(prog);
2197
2198     return 0;
2199 }
2200
2201
2202 /*
2203  - regtry - try match at specific point
2204  */
2205 STATIC I32                      /* 0 failure, 1 success */
2206 S_regtry(pTHX_ regmatch_info *reginfo, char **startpos)
2207 {
2208     dVAR;
2209     CHECKPOINT lastcp;
2210     REGEXP *const rx = reginfo->prog;
2211     regexp *const prog = (struct regexp *)SvANY(rx);
2212     RXi_GET_DECL(prog,progi);
2213     GET_RE_DEBUG_FLAGS_DECL;
2214     reginfo->cutpoint=NULL;
2215
2216     if ((prog->extflags & RXf_EVAL_SEEN) && !PL_reg_eval_set) {
2217         MAGIC *mg;
2218
2219         PL_reg_eval_set = RS_init;
2220         DEBUG_EXECUTE_r(DEBUG_s(
2221             PerlIO_printf(Perl_debug_log, "  setting stack tmpbase at %"IVdf"\n",
2222                           (IV)(PL_stack_sp - PL_stack_base));
2223             ));
2224         SAVESTACK_CXPOS();
2225         cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2226         /* Otherwise OP_NEXTSTATE will free whatever on stack now.  */
2227         SAVETMPS;
2228         /* Apparently this is not needed, judging by wantarray. */
2229         /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
2230            cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2231
2232         if (reginfo->sv) {
2233             /* Make $_ available to executed code. */
2234             if (reginfo->sv != DEFSV) {
2235                 SAVE_DEFSV;
2236                 DEFSV = reginfo->sv;
2237             }
2238
2239             if (!(SvTYPE(reginfo->sv) >= SVt_PVMG && SvMAGIC(reginfo->sv)
2240                   && (mg = mg_find(reginfo->sv, PERL_MAGIC_regex_global)))) {
2241                 /* prepare for quick setting of pos */
2242 #ifdef PERL_OLD_COPY_ON_WRITE
2243                 if (SvIsCOW(reginfo->sv))
2244                     sv_force_normal_flags(reginfo->sv, 0);
2245 #endif
2246                 mg = sv_magicext(reginfo->sv, NULL, PERL_MAGIC_regex_global,
2247                                  &PL_vtbl_mglob, NULL, 0);
2248                 mg->mg_len = -1;
2249             }
2250             PL_reg_magic    = mg;
2251             PL_reg_oldpos   = mg->mg_len;
2252             SAVEDESTRUCTOR_X(restore_pos, prog);
2253         }
2254         if (!PL_reg_curpm) {
2255             Newxz(PL_reg_curpm, 1, PMOP);
2256 #ifdef USE_ITHREADS
2257             {
2258                 SV* const repointer = &PL_sv_undef;
2259                 /* this regexp is also owned by the new PL_reg_curpm, which
2260                    will try to free it.  */
2261                 av_push(PL_regex_padav, repointer);
2262                 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2263                 PL_regex_pad = AvARRAY(PL_regex_padav);
2264             }
2265 #endif
2266         }
2267 #ifdef USE_ITHREADS
2268         /* It seems that non-ithreads works both with and without this code.
2269            So for efficiency reasons it seems best not to have the code
2270            compiled when it is not needed.  */
2271         /* This is safe against NULLs: */
2272         ReREFCNT_dec(PM_GETRE(PL_reg_curpm));
2273         /* PM_reg_curpm owns a reference to this regexp.  */
2274         ReREFCNT_inc(rx);
2275 #endif
2276         PM_SETRE(PL_reg_curpm, rx);
2277         PL_reg_oldcurpm = PL_curpm;
2278         PL_curpm = PL_reg_curpm;
2279         if (RXp_MATCH_COPIED(prog)) {
2280             /*  Here is a serious problem: we cannot rewrite subbeg,
2281                 since it may be needed if this match fails.  Thus
2282                 $` inside (?{}) could fail... */
2283             PL_reg_oldsaved = prog->subbeg;
2284             PL_reg_oldsavedlen = prog->sublen;
2285 #ifdef PERL_OLD_COPY_ON_WRITE
2286             PL_nrs = prog->saved_copy;
2287 #endif
2288             RXp_MATCH_COPIED_off(prog);
2289         }
2290         else
2291             PL_reg_oldsaved = NULL;
2292         prog->subbeg = PL_bostr;
2293         prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2294     }
2295     DEBUG_EXECUTE_r(PL_reg_starttry = *startpos);
2296     prog->offs[0].start = *startpos - PL_bostr;
2297     PL_reginput = *startpos;
2298     PL_reglastparen = &prog->lastparen;
2299     PL_reglastcloseparen = &prog->lastcloseparen;
2300     prog->lastparen = 0;
2301     prog->lastcloseparen = 0;
2302     PL_regsize = 0;
2303     PL_regoffs = prog->offs;
2304     if (PL_reg_start_tmpl <= prog->nparens) {
2305         PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2306         if(PL_reg_start_tmp)
2307             Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2308         else
2309             Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2310     }
2311
2312     /* XXXX What this code is doing here?!!!  There should be no need
2313        to do this again and again, PL_reglastparen should take care of
2314        this!  --ilya*/
2315
2316     /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2317      * Actually, the code in regcppop() (which Ilya may be meaning by
2318      * PL_reglastparen), is not needed at all by the test suite
2319      * (op/regexp, op/pat, op/split), but that code is needed otherwise
2320      * this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
2321      * Meanwhile, this code *is* needed for the
2322      * above-mentioned test suite tests to succeed.  The common theme
2323      * on those tests seems to be returning null fields from matches.
2324      * --jhi updated by dapm */
2325 #if 1
2326     if (prog->nparens) {
2327         regexp_paren_pair *pp = PL_regoffs;
2328         register I32 i;
2329         for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
2330             ++pp;
2331             pp->start = -1;
2332             pp->end = -1;
2333         }
2334     }
2335 #endif
2336     REGCP_SET(lastcp);
2337     if (regmatch(reginfo, progi->program + 1)) {
2338         PL_regoffs[0].end = PL_reginput - PL_bostr;
2339         return 1;
2340     }
2341     if (reginfo->cutpoint)
2342         *startpos= reginfo->cutpoint;
2343     REGCP_UNWIND(lastcp);
2344     return 0;
2345 }
2346
2347
2348 #define sayYES goto yes
2349 #define sayNO goto no
2350 #define sayNO_SILENT goto no_silent
2351
2352 /* we dont use STMT_START/END here because it leads to
2353    "unreachable code" warnings, which are bogus, but distracting. */
2354 #define CACHEsayNO \
2355     if (ST.cache_mask) \
2356        PL_reg_poscache[ST.cache_offset] |= ST.cache_mask; \
2357     sayNO
2358
2359 /* this is used to determine how far from the left messages like
2360    'failed...' are printed. It should be set such that messages
2361    are inline with the regop output that created them.
2362 */
2363 #define REPORT_CODE_OFF 32
2364
2365
2366 /* Make sure there is a test for this +1 options in re_tests */
2367 #define TRIE_INITAL_ACCEPT_BUFFLEN 4;
2368
2369 #define CHRTEST_UNINIT -1001 /* c1/c2 haven't been calculated yet */
2370 #define CHRTEST_VOID   -1000 /* the c1/c2 "next char" test should be skipped */
2371
2372 #define SLAB_FIRST(s) (&(s)->states[0])
2373 #define SLAB_LAST(s)  (&(s)->states[PERL_REGMATCH_SLAB_SLOTS-1])
2374
2375 /* grab a new slab and return the first slot in it */
2376
2377 STATIC regmatch_state *
2378 S_push_slab(pTHX)
2379 {
2380 #if PERL_VERSION < 9 && !defined(PERL_CORE)
2381     dMY_CXT;
2382 #endif
2383     regmatch_slab *s = PL_regmatch_slab->next;
2384     if (!s) {
2385         Newx(s, 1, regmatch_slab);
2386         s->prev = PL_regmatch_slab;
2387         s->next = NULL;
2388         PL_regmatch_slab->next = s;
2389     }
2390     PL_regmatch_slab = s;
2391     return SLAB_FIRST(s);
2392 }
2393
2394
2395 /* push a new state then goto it */
2396
2397 #define PUSH_STATE_GOTO(state, node) \
2398     scan = node; \
2399     st->resume_state = state; \
2400     goto push_state;
2401
2402 /* push a new state with success backtracking, then goto it */
2403
2404 #define PUSH_YES_STATE_GOTO(state, node) \
2405     scan = node; \
2406     st->resume_state = state; \
2407     goto push_yes_state;
2408
2409
2410
2411 /*
2412
2413 regmatch() - main matching routine
2414
2415 This is basically one big switch statement in a loop. We execute an op,
2416 set 'next' to point the next op, and continue. If we come to a point which
2417 we may need to backtrack to on failure such as (A|B|C), we push a
2418 backtrack state onto the backtrack stack. On failure, we pop the top
2419 state, and re-enter the loop at the state indicated. If there are no more
2420 states to pop, we return failure.
2421
2422 Sometimes we also need to backtrack on success; for example /A+/, where
2423 after successfully matching one A, we need to go back and try to
2424 match another one; similarly for lookahead assertions: if the assertion
2425 completes successfully, we backtrack to the state just before the assertion
2426 and then carry on.  In these cases, the pushed state is marked as
2427 'backtrack on success too'. This marking is in fact done by a chain of
2428 pointers, each pointing to the previous 'yes' state. On success, we pop to
2429 the nearest yes state, discarding any intermediate failure-only states.
2430 Sometimes a yes state is pushed just to force some cleanup code to be
2431 called at the end of a successful match or submatch; e.g. (??{$re}) uses
2432 it to free the inner regex.
2433
2434 Note that failure backtracking rewinds the cursor position, while
2435 success backtracking leaves it alone.
2436
2437 A pattern is complete when the END op is executed, while a subpattern
2438 such as (?=foo) is complete when the SUCCESS op is executed. Both of these
2439 ops trigger the "pop to last yes state if any, otherwise return true"
2440 behaviour.
2441
2442 A common convention in this function is to use A and B to refer to the two
2443 subpatterns (or to the first nodes thereof) in patterns like /A*B/: so A is
2444 the subpattern to be matched possibly multiple times, while B is the entire
2445 rest of the pattern. Variable and state names reflect this convention.
2446
2447 The states in the main switch are the union of ops and failure/success of
2448 substates associated with with that op.  For example, IFMATCH is the op
2449 that does lookahead assertions /(?=A)B/ and so the IFMATCH state means
2450 'execute IFMATCH'; while IFMATCH_A is a state saying that we have just
2451 successfully matched A and IFMATCH_A_fail is a state saying that we have
2452 just failed to match A. Resume states always come in pairs. The backtrack
2453 state we push is marked as 'IFMATCH_A', but when that is popped, we resume
2454 at IFMATCH_A or IFMATCH_A_fail, depending on whether we are backtracking
2455 on success or failure.
2456
2457 The struct that holds a backtracking state is actually a big union, with
2458 one variant for each major type of op. The variable st points to the
2459 top-most backtrack struct. To make the code clearer, within each
2460 block of code we #define ST to alias the relevant union.
2461
2462 Here's a concrete example of a (vastly oversimplified) IFMATCH
2463 implementation:
2464
2465     switch (state) {
2466     ....
2467
2468 #define ST st->u.ifmatch
2469
2470     case IFMATCH: // we are executing the IFMATCH op, (?=A)B
2471         ST.foo = ...; // some state we wish to save
2472         ...
2473         // push a yes backtrack state with a resume value of
2474         // IFMATCH_A/IFMATCH_A_fail, then continue execution at the
2475         // first node of A:
2476         PUSH_YES_STATE_GOTO(IFMATCH_A, A);
2477         // NOTREACHED
2478
2479     case IFMATCH_A: // we have successfully executed A; now continue with B
2480         next = B;
2481         bar = ST.foo; // do something with the preserved value
2482         break;
2483
2484     case IFMATCH_A_fail: // A failed, so the assertion failed
2485         ...;   // do some housekeeping, then ...
2486         sayNO; // propagate the failure
2487
2488 #undef ST
2489
2490     ...
2491     }
2492
2493 For any old-timers reading this who are familiar with the old recursive
2494 approach, the code above is equivalent to:
2495
2496     case IFMATCH: // we are executing the IFMATCH op, (?=A)B
2497     {
2498         int foo = ...
2499         ...
2500         if (regmatch(A)) {
2501             next = B;
2502             bar = foo;
2503             break;
2504         }
2505         ...;   // do some housekeeping, then ...
2506         sayNO; // propagate the failure
2507     }
2508
2509 The topmost backtrack state, pointed to by st, is usually free. If you
2510 want to claim it, populate any ST.foo fields in it with values you wish to
2511 save, then do one of
2512
2513         PUSH_STATE_GOTO(resume_state, node);
2514         PUSH_YES_STATE_GOTO(resume_state, node);
2515
2516 which sets that backtrack state's resume value to 'resume_state', pushes a
2517 new free entry to the top of the backtrack stack, then goes to 'node'.
2518 On backtracking, the free slot is popped, and the saved state becomes the
2519 new free state. An ST.foo field in this new top state can be temporarily
2520 accessed to retrieve values, but once the main loop is re-entered, it
2521 becomes available for reuse.
2522
2523 Note that the depth of the backtrack stack constantly increases during the
2524 left-to-right execution of the pattern, rather than going up and down with
2525 the pattern nesting. For example the stack is at its maximum at Z at the
2526 end of the pattern, rather than at X in the following:
2527
2528     /(((X)+)+)+....(Y)+....Z/
2529
2530 The only exceptions to this are lookahead/behind assertions and the cut,
2531 (?>A), which pop all the backtrack states associated with A before
2532 continuing.
2533
2534 Bascktrack state structs are allocated in slabs of about 4K in size.
2535 PL_regmatch_state and st always point to the currently active state,
2536 and PL_regmatch_slab points to the slab currently containing
2537 PL_regmatch_state.  The first time regmatch() is called, the first slab is
2538 allocated, and is never freed until interpreter destruction. When the slab
2539 is full, a new one is allocated and chained to the end. At exit from
2540 regmatch(), slabs allocated since entry are freed.
2541
2542 */
2543
2544
2545 #define DEBUG_STATE_pp(pp)                                  \
2546     DEBUG_STATE_r({                                         \
2547         DUMP_EXEC_POS(locinput, scan, do_utf8);             \
2548         PerlIO_printf(Perl_debug_log,                       \
2549             "    %*s"pp" %s%s%s%s%s\n",                     \
2550             depth*2, "",                                    \
2551             PL_reg_name[st->resume_state],                     \
2552             ((st==yes_state||st==mark_state) ? "[" : ""),   \
2553             ((st==yes_state) ? "Y" : ""),                   \
2554             ((st==mark_state) ? "M" : ""),                  \
2555             ((st==yes_state||st==mark_state) ? "]" : "")    \
2556         );                                                  \
2557     });
2558
2559
2560 #define REG_NODE_NUM(x) ((x) ? (int)((x)-prog) : -1)
2561
2562 #ifdef DEBUGGING
2563
2564 STATIC void
2565 S_debug_start_match(pTHX_ const REGEXP *prog, const bool do_utf8,
2566     const char *start, const char *end, const char *blurb)
2567 {
2568     const bool utf8_pat = RX_UTF8(prog) ? 1 : 0;
2569     if (!PL_colorset)
2570             reginitcolors();
2571     {
2572         RE_PV_QUOTED_DECL(s0, utf8_pat, PERL_DEBUG_PAD_ZERO(0),
2573             RX_PRECOMP(prog), RX_PRELEN(prog), 60);
2574
2575         RE_PV_QUOTED_DECL(s1, do_utf8, PERL_DEBUG_PAD_ZERO(1),
2576             start, end - start, 60);
2577
2578         PerlIO_printf(Perl_debug_log,
2579             "%s%s REx%s %s against %s\n",
2580                        PL_colors[4], blurb, PL_colors[5], s0, s1);
2581
2582         if (do_utf8||utf8_pat)
2583             PerlIO_printf(Perl_debug_log, "UTF-8 %s%s%s...\n",
2584                 utf8_pat ? "pattern" : "",
2585                 utf8_pat && do_utf8 ? " and " : "",
2586                 do_utf8 ? "string" : ""
2587             );
2588     }
2589 }
2590
2591 STATIC void
2592 S_dump_exec_pos(pTHX_ const char *locinput,
2593                       const regnode *scan,
2594                       const char *loc_regeol,
2595                       const char *loc_bostr,
2596                       const char *loc_reg_starttry,
2597                       const bool do_utf8)
2598 {
2599     const int docolor = *PL_colors[0] || *PL_colors[2] || *PL_colors[4];
2600     const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
2601     int l = (loc_regeol - locinput) > taill ? taill : (loc_regeol - locinput);
2602     /* The part of the string before starttry has one color
2603        (pref0_len chars), between starttry and current
2604        position another one (pref_len - pref0_len chars),
2605        after the current position the third one.
2606        We assume that pref0_len <= pref_len, otherwise we
2607        decrease pref0_len.  */
2608     int pref_len = (locinput - loc_bostr) > (5 + taill) - l
2609         ? (5 + taill) - l : locinput - loc_bostr;
2610     int pref0_len;
2611
2612     while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2613         pref_len++;
2614     pref0_len = pref_len  - (locinput - loc_reg_starttry);
2615     if (l + pref_len < (5 + taill) && l < loc_regeol - locinput)
2616         l = ( loc_regeol - locinput > (5 + taill) - pref_len
2617               ? (5 + taill) - pref_len : loc_regeol - locinput);
2618     while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2619         l--;
2620     if (pref0_len < 0)
2621         pref0_len = 0;
2622     if (pref0_len > pref_len)
2623         pref0_len = pref_len;
2624     {
2625         const int is_uni = (do_utf8 && OP(scan) != CANY) ? 1 : 0;
2626
2627         RE_PV_COLOR_DECL(s0,len0,is_uni,PERL_DEBUG_PAD(0),
2628             (locinput - pref_len),pref0_len, 60, 4, 5);
2629
2630         RE_PV_COLOR_DECL(s1,len1,is_uni,PERL_DEBUG_PAD(1),
2631                     (locinput - pref_len + pref0_len),
2632                     pref_len - pref0_len, 60, 2, 3);
2633
2634         RE_PV_COLOR_DECL(s2,len2,is_uni,PERL_DEBUG_PAD(2),
2635                     locinput, loc_regeol - locinput, 10, 0, 1);
2636
2637         const STRLEN tlen=len0+len1+len2;
2638         PerlIO_printf(Perl_debug_log,
2639                     "%4"IVdf" <%.*s%.*s%s%.*s>%*s|",
2640                     (IV)(locinput - loc_bostr),
2641                     len0, s0,
2642                     len1, s1,
2643                     (docolor ? "" : "> <"),
2644                     len2, s2,
2645                     (int)(tlen > 19 ? 0 :  19 - tlen),
2646                     "");
2647     }
2648 }
2649
2650 #endif
2651
2652 /* reg_check_named_buff_matched()
2653  * Checks to see if a named buffer has matched. The data array of
2654  * buffer numbers corresponding to the buffer is expected to reside
2655  * in the regexp->data->data array in the slot stored in the ARG() of
2656  * node involved. Note that this routine doesn't actually care about the
2657  * name, that information is not preserved from compilation to execution.
2658  * Returns the index of the leftmost defined buffer with the given name
2659  * or 0 if non of the buffers matched.
2660  */
2661 STATIC I32
2662 S_reg_check_named_buff_matched(pTHX_ const regexp *rex, const regnode *scan) {
2663     I32 n;
2664     RXi_GET_DECL(rex,rexi);
2665     SV *sv_dat=(SV*)rexi->data->data[ ARG( scan ) ];
2666     I32 *nums=(I32*)SvPVX(sv_dat);
2667     for ( n=0; n<SvIVX(sv_dat); n++ ) {
2668         if ((I32)*PL_reglastparen >= nums[n] &&
2669             PL_regoffs[nums[n]].end != -1)
2670         {
2671             return nums[n];
2672         }
2673     }
2674     return 0;
2675 }
2676
2677
2678 /* free all slabs above current one  - called during LEAVE_SCOPE */
2679
2680 STATIC void
2681 S_clear_backtrack_stack(pTHX_ void *p)
2682 {
2683     regmatch_slab *s = PL_regmatch_slab->next;
2684     PERL_UNUSED_ARG(p);
2685
2686     if (!s)
2687         return;
2688     PL_regmatch_slab->next = NULL;
2689     while (s) {
2690         regmatch_slab * const osl = s;
2691         s = s->next;
2692         Safefree(osl);
2693     }
2694 }
2695
2696
2697 #define SETREX(Re1,Re2) \
2698     if (PL_reg_eval_set) PM_SETRE((PL_reg_curpm), (Re2)); \
2699     Re1 = (Re2)
2700
2701 STATIC I32                      /* 0 failure, 1 success */
2702 S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
2703 {
2704 #if PERL_VERSION < 9 && !defined(PERL_CORE)
2705     dMY_CXT;
2706 #endif
2707     dVAR;
2708     register const bool do_utf8 = PL_reg_match_utf8;
2709     const U32 uniflags = UTF8_ALLOW_DEFAULT;
2710
2711     REGEXP *rex_sv = reginfo->prog;
2712     regexp *rex = (struct regexp *)SvANY(rex_sv);
2713     RXi_GET_DECL(rex,rexi);
2714
2715     I32 oldsave;
2716
2717     /* the current state. This is a cached copy of PL_regmatch_state */
2718     register regmatch_state *st;
2719
2720     /* cache heavy used fields of st in registers */
2721     register regnode *scan;
2722     register regnode *next;
2723     register U32 n = 0; /* general value; init to avoid compiler warning */
2724     register I32 ln = 0; /* len or last;  init to avoid compiler warning */
2725     register char *locinput = PL_reginput;
2726     register I32 nextchr;   /* is always set to UCHARAT(locinput) */
2727
2728     bool result = 0;        /* return value of S_regmatch */
2729     int depth = 0;          /* depth of backtrack stack */
2730     U32 nochange_depth = 0; /* depth of GOSUB recursion with nochange */
2731     const U32 max_nochange_depth =
2732         (3 * rex->nparens > MAX_RECURSE_EVAL_NOCHANGE_DEPTH) ?
2733         3 * rex->nparens : MAX_RECURSE_EVAL_NOCHANGE_DEPTH;
2734
2735     regmatch_state *yes_state = NULL; /* state to pop to on success of
2736                                                             subpattern */
2737     /* mark_state piggy backs on the yes_state logic so that when we unwind
2738        the stack on success we can update the mark_state as we go */
2739     regmatch_state *mark_state = NULL; /* last mark state we have seen */
2740
2741     regmatch_state *cur_eval = NULL; /* most recent EVAL_AB state */
2742     struct regmatch_state  *cur_curlyx = NULL; /* most recent curlyx */
2743     U32 state_num;
2744     bool no_final = 0;      /* prevent failure from backtracking? */
2745     bool do_cutgroup = 0;   /* no_final only until next branch/trie entry */
2746     char *startpoint = PL_reginput;
2747     SV *popmark = NULL;     /* are we looking for a mark? */
2748     SV *sv_commit = NULL;   /* last mark name seen in failure */
2749     SV *sv_yes_mark = NULL; /* last mark name we have seen
2750                                during a successfull match */
2751     U32 lastopen = 0;       /* last open we saw */
2752     bool has_cutgroup = RX_HAS_CUTGROUP(rex) ? 1 : 0;
2753
2754     SV* const oreplsv = GvSV(PL_replgv);
2755
2756
2757     /* these three flags are set by various ops to signal information to
2758      * the very next op. They have a useful lifetime of exactly one loop
2759      * iteration, and are not preserved or restored by state pushes/pops
2760      */
2761     bool sw = 0;            /* the condition value in (?(cond)a|b) */
2762     bool minmod = 0;        /* the next "{n,m}" is a "{n,m}?" */
2763     int logical = 0;        /* the following EVAL is:
2764                                 0: (?{...})
2765                                 1: (?(?{...})X|Y)
2766                                 2: (??{...})
2767                                or the following IFMATCH/UNLESSM is:
2768                                 false: plain (?=foo)
2769                                 true:  used as a condition: (?(?=foo))
2770                             */
2771
2772 #ifdef DEBUGGING
2773     GET_RE_DEBUG_FLAGS_DECL;
2774 #endif
2775
2776     DEBUG_OPTIMISE_r( DEBUG_EXECUTE_r({
2777             PerlIO_printf(Perl_debug_log,"regmatch start\n");
2778     }));
2779     /* on first ever call to regmatch, allocate first slab */
2780     if (!PL_regmatch_slab) {
2781         Newx(PL_regmatch_slab, 1, regmatch_slab);
2782         PL_regmatch_slab->prev = NULL;
2783         PL_regmatch_slab->next = NULL;
2784         PL_regmatch_state = SLAB_FIRST(PL_regmatch_slab);
2785     }
2786
2787     oldsave = PL_savestack_ix;
2788     SAVEDESTRUCTOR_X(S_clear_backtrack_stack, NULL);
2789     SAVEVPTR(PL_regmatch_slab);
2790     SAVEVPTR(PL_regmatch_state);
2791
2792     /* grab next free state slot */
2793     st = ++PL_regmatch_state;
2794     if (st >  SLAB_LAST(PL_regmatch_slab))
2795         st = PL_regmatch_state = S_push_slab(aTHX);
2796
2797     /* Note that nextchr is a byte even in UTF */
2798     nextchr = UCHARAT(locinput);
2799     scan = prog;
2800     while (scan != NULL) {
2801
2802         DEBUG_EXECUTE_r( {
2803             SV * const prop = sv_newmortal();
2804             regnode *rnext=regnext(scan);
2805             DUMP_EXEC_POS( locinput, scan, do_utf8 );
2806             regprop(rex, prop, scan);
2807
2808             PerlIO_printf(Perl_debug_log,
2809                     "%3"IVdf":%*s%s(%"IVdf")\n",
2810                     (IV)(scan - rexi->program), depth*2, "",
2811                     SvPVX_const(prop),
2812                     (PL_regkind[OP(scan)] == END || !rnext) ?
2813                         0 : (IV)(rnext - rexi->program));
2814         });
2815
2816         next = scan + NEXT_OFF(scan);
2817         if (next == scan)
2818             next = NULL;
2819         state_num = OP(scan);
2820
2821       reenter_switch:
2822         switch (state_num) {
2823         case BOL:
2824             if (locinput == PL_bostr)
2825             {
2826                 /* reginfo->till = reginfo->bol; */
2827                 break;
2828             }
2829             sayNO;
2830         case MBOL:
2831             if (locinput == PL_bostr ||
2832                 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
2833             {
2834                 break;
2835             }
2836             sayNO;
2837         case SBOL:
2838             if (locinput == PL_bostr)
2839                 break;
2840             sayNO;
2841         case GPOS:
2842             if (locinput == reginfo->ganch)
2843                 break;
2844             sayNO;
2845
2846         case KEEPS:
2847             /* update the startpoint */
2848             st->u.keeper.val = PL_regoffs[0].start;
2849             PL_reginput = locinput;
2850             PL_regoffs[0].start = locinput - PL_bostr;
2851             PUSH_STATE_GOTO(KEEPS_next, next);
2852             /*NOT-REACHED*/
2853         case KEEPS_next_fail:
2854             /* rollback the start point change */
2855             PL_regoffs[0].start = st->u.keeper.val;
2856             sayNO_SILENT;
2857             /*NOT-REACHED*/
2858         case EOL:
2859                 goto seol;
2860         case MEOL:
2861             if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2862                 sayNO;
2863             break;
2864         case SEOL:
2865           seol:
2866             if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2867                 sayNO;
2868             if (PL_regeol - locinput > 1)
2869                 sayNO;
2870             break;
2871         case EOS:
2872             if (PL_regeol != locinput)
2873                 sayNO;
2874             break;
2875         case SANY:
2876             if (!nextchr && locinput >= PL_regeol)
2877                 sayNO;
2878             if (do_utf8) {
2879                 locinput += PL_utf8skip[nextchr];
2880                 if (locinput > PL_regeol)
2881                     sayNO;
2882                 nextchr = UCHARAT(locinput);
2883             }
2884             else
2885                 nextchr = UCHARAT(++locinput);
2886             break;
2887         case CANY:
2888             if (!nextchr && locinput >= PL_regeol)
2889                 sayNO;
2890             nextchr = UCHARAT(++locinput);
2891             break;
2892         case REG_ANY:
2893             if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2894                 sayNO;
2895             if (do_utf8) {
2896                 locinput += PL_utf8skip[nextchr];
2897                 if (locinput > PL_regeol)
2898                     sayNO;
2899                 nextchr = UCHARAT(locinput);
2900             }
2901             else
2902                 nextchr = UCHARAT(++locinput);
2903             break;
2904
2905 #undef  ST
2906 #define ST st->u.trie
2907         case TRIEC:
2908             /* In this case the charclass data is available inline so
2909                we can fail fast without a lot of extra overhead.
2910              */
2911             if (scan->flags == EXACT || !do_utf8) {
2912                 if(!ANYOF_BITMAP_TEST(scan, *locinput)) {
2913                     DEBUG_EXECUTE_r(
2914                         PerlIO_printf(Perl_debug_log,
2915                                   "%*s  %sfailed to match trie start class...%s\n",
2916                                   REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
2917                     );
2918                     sayNO_SILENT;
2919                     /* NOTREACHED */
2920                 }
2921             }
2922             /* FALL THROUGH */
2923         case TRIE:
2924             {
2925                 /* what type of TRIE am I? (utf8 makes this contextual) */
2926                 DECL_TRIE_TYPE(scan);
2927
2928                 /* what trie are we using right now */
2929                 reg_trie_data * const trie
2930                     = (reg_trie_data*)rexi->data->data[ ARG( scan ) ];
2931                 HV * widecharmap = (HV *)rexi->data->data[ ARG( scan ) + 1 ];
2932                 U32 state = trie->startstate;
2933
2934                 if (trie->bitmap && trie_type != trie_utf8_fold &&
2935                     !TRIE_BITMAP_TEST(trie,*locinput)
2936                 ) {
2937                     if (trie->states[ state ].wordnum) {
2938                          DEBUG_EXECUTE_r(
2939                             PerlIO_printf(Perl_debug_log,
2940                                           "%*s  %smatched empty string...%s\n",
2941                                           REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
2942                         );
2943                         break;
2944                     } else {
2945                         DEBUG_EXECUTE_r(
2946                             PerlIO_printf(Perl_debug_log,
2947                                           "%*s  %sfailed to match trie start class...%s\n",
2948                                           REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
2949                         );
2950                         sayNO_SILENT;
2951                    }
2952                 }
2953
2954             {
2955                 U8 *uc = ( U8* )locinput;
2956
2957                 STRLEN len = 0;
2958                 STRLEN foldlen = 0;
2959                 U8 *uscan = (U8*)NULL;
2960                 STRLEN bufflen=0;
2961                 SV *sv_accept_buff = NULL;
2962                 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
2963
2964                 ST.accepted = 0; /* how many accepting states we have seen */
2965                 ST.B = next;
2966                 ST.jump = trie->jump;
2967                 ST.me = scan;
2968                 /*
2969                    traverse the TRIE keeping track of all accepting states
2970                    we transition through until we get to a failing node.
2971                 */
2972
2973                 while ( state && uc <= (U8*)PL_regeol ) {
2974                     U32 base = trie->states[ state ].trans.base;
2975                     UV uvc = 0;
2976                     U16 charid;
2977                     /* We use charid to hold the wordnum as we don't use it
2978                        for charid until after we have done the wordnum logic.
2979                        We define an alias just so that the wordnum logic reads
2980                        more naturally. */
2981
2982 #define got_wordnum charid
2983                     got_wordnum = trie->states[ state ].wordnum;
2984
2985                     if ( got_wordnum ) {
2986                         if ( ! ST.accepted ) {
2987                             ENTER;
2988                             /* SAVETMPS; */ /* XXX is this necessary? dmq */
2989                             bufflen = TRIE_INITAL_ACCEPT_BUFFLEN;
2990                             sv_accept_buff=newSV(bufflen *
2991                                             sizeof(reg_trie_accepted) - 1);
2992                             SvCUR_set(sv_accept_buff, 0);
2993                             SvPOK_on(sv_accept_buff);
2994                             sv_2mortal(sv_accept_buff);
2995                             SAVETMPS;
2996                             ST.accept_buff =
2997                                 (reg_trie_accepted*)SvPV_nolen(sv_accept_buff );
2998                         }
2999                         do {
3000                             if (ST.accepted >= bufflen) {
3001                                 bufflen *= 2;
3002                                 ST.accept_buff =(reg_trie_accepted*)
3003                                     SvGROW(sv_accept_buff,
3004                                         bufflen * sizeof(reg_trie_accepted));
3005                             }
3006                             SvCUR_set(sv_accept_buff,SvCUR(sv_accept_buff)
3007                                 + sizeof(reg_trie_accepted));
3008
3009
3010                             ST.accept_buff[ST.accepted].wordnum = got_wordnum;
3011                             ST.accept_buff[ST.accepted].endpos = uc;
3012                             ++ST.accepted;
3013                         } while (trie->nextword && (got_wordnum= trie->nextword[got_wordnum]));
3014                     }
3015 #undef got_wordnum
3016
3017                     DEBUG_TRIE_EXECUTE_r({
3018                                 DUMP_EXEC_POS( (char *)uc, scan, do_utf8 );
3019                                 PerlIO_printf( Perl_debug_log,
3020                                     "%*s  %sState: %4"UVxf" Accepted: %4"UVxf" ",
3021                                     2+depth * 2, "", PL_colors[4],
3022                                     (UV)state, (UV)ST.accepted );
3023                     });
3024
3025                     if ( base ) {
3026                         REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
3027                                              uscan, len, uvc, charid, foldlen,
3028                                              foldbuf, uniflags);
3029
3030                         if (charid &&
3031                              (base + charid > trie->uniquecharcount )
3032                              && (base + charid - 1 - trie->uniquecharcount
3033                                     < trie->lasttrans)
3034                              && trie->trans[base + charid - 1 -
3035                                     trie->uniquecharcount].check == state)
3036                         {
3037                             state = trie->trans[base + charid - 1 -
3038                                 trie->uniquecharcount ].next;
3039                         }
3040                         else {
3041                             state = 0;
3042                         }
3043                         uc += len;
3044
3045                     }
3046                     else {
3047                         state = 0;
3048                     }
3049                     DEBUG_TRIE_EXECUTE_r(
3050                         PerlIO_printf( Perl_debug_log,
3051                             "Charid:%3x CP:%4"UVxf" After State: %4"UVxf"%s\n",
3052                             charid, uvc, (UV)state, PL_colors[5] );
3053                     );
3054                 }
3055                 if (!ST.accepted )
3056                    sayNO;
3057
3058                 DEBUG_EXECUTE_r(
3059                     PerlIO_printf( Perl_debug_log,
3060                         "%*s  %sgot %"IVdf" possible matches%s\n",
3061                         REPORT_CODE_OFF + depth * 2, "",
3062                         PL_colors[4], (IV)ST.accepted, PL_colors[5] );
3063                 );
3064             }}
3065             goto trie_first_try; /* jump into the fail handler */
3066             /* NOTREACHED */
3067         case TRIE_next_fail: /* we failed - try next alterative */
3068             if ( ST.jump) {
3069                 REGCP_UNWIND(ST.cp);
3070                 for (n = *PL_reglastparen; n > ST.lastparen; n--)
3071                     PL_regoffs[n].end = -1;
3072                 *PL_reglastparen = n;
3073             }
3074           trie_first_try:
3075             if (do_cutgroup) {
3076                 do_cutgroup = 0;
3077                 no_final = 0;
3078             }
3079
3080             if ( ST.jump) {
3081                 ST.lastparen = *PL_reglastparen;
3082                 REGCP_SET(ST.cp);
3083             }
3084             if ( ST.accepted == 1 ) {
3085                 /* only one choice left - just continue */
3086                 DEBUG_EXECUTE_r({
3087                     AV *const trie_words
3088                         = (AV *) rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET];
3089                     SV ** const tmp = av_fetch( trie_words,
3090                         ST.accept_buff[ 0 ].wordnum-1, 0 );
3091                     SV *sv= tmp ? sv_newmortal() : NULL;
3092
3093                     PerlIO_printf( Perl_debug_log,
3094                         "%*s  %sonly one match left: #%d <%s>%s\n",
3095                         REPORT_CODE_OFF+depth*2, "", PL_colors[4],
3096                         ST.accept_buff[ 0 ].wordnum,
3097                         tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0,
3098                                 PL_colors[0], PL_colors[1],
3099                                 (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
3100                             )
3101                         : "not compiled under -Dr",
3102                         PL_colors[5] );
3103                 });
3104                 PL_reginput = (char *)ST.accept_buff[ 0 ].endpos;
3105                 /* in this case we free tmps/leave before we call regmatch
3106                    as we wont be using accept_buff again. */
3107
3108                 locinput = PL_reginput;
3109                 nextchr = UCHARAT(locinput);
3110                 if ( !ST.jump || !ST.jump[ST.accept_buff[0].wordnum])
3111                     scan = ST.B;
3112                 else
3113                     scan = ST.me + ST.jump[ST.accept_buff[0].wordnum];
3114                 if (!has_cutgroup) {
3115                     FREETMPS;
3116                     LEAVE;
3117                 } else {
3118                     ST.accepted--;
3119                     PUSH_YES_STATE_GOTO(TRIE_next, scan);
3120                 }
3121
3122                 continue; /* execute rest of RE */
3123             }
3124
3125             if ( !ST.accepted-- ) {
3126                 DEBUG_EXECUTE_r({
3127                     PerlIO_printf( Perl_debug_log,
3128                         "%*s  %sTRIE failed...%s\n",
3129                         REPORT_CODE_OFF+depth*2, "",
3130                         PL_colors[4],
3131                         PL_colors[5] );
3132                 });
3133                 FREETMPS;
3134                 LEAVE;
3135                 sayNO_SILENT;
3136                 /*NOTREACHED*/
3137             }
3138
3139             /*
3140                There are at least two accepting states left.  Presumably
3141                the number of accepting states is going to be low,
3142                typically two. So we simply scan through to find the one
3143                with lowest wordnum.  Once we find it, we swap the last
3144                state into its place and decrement the size. We then try to
3145                match the rest of the pattern at the point where the word
3146                ends. If we succeed, control just continues along the
3147                regex; if we fail we return here to try the next accepting
3148                state
3149              */
3150
3151             {
3152                 U32 best = 0;
3153                 U32 cur;
3154                 for( cur = 1 ; cur <= ST.accepted ; cur++ ) {
3155                     DEBUG_TRIE_EXECUTE_r(
3156                         PerlIO_printf( Perl_debug_log,
3157                             "%*s  %sgot %"IVdf" (%d) as best, looking at %"IVdf" (%d)%s\n",
3158                             REPORT_CODE_OFF + depth * 2, "", PL_colors[4],
3159                             (IV)best, ST.accept_buff[ best ].wordnum, (IV)cur,
3160                             ST.accept_buff[ cur ].wordnum, PL_colors[5] );
3161                     );
3162
3163                     if (ST.accept_buff[cur].wordnum <
3164                             ST.accept_buff[best].wordnum)
3165                         best = cur;
3166                 }
3167
3168                 DEBUG_EXECUTE_r({
3169                     AV *const trie_words
3170                         = (AV *) rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET];
3171                     SV ** const tmp = av_fetch( trie_words,
3172                         ST.accept_buff[ best ].wordnum - 1, 0 );
3173                     regnode *nextop=(!ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) ?
3174                                     ST.B :
3175                                     ST.me + ST.jump[ST.accept_buff[best].wordnum];
3176                     SV *sv= tmp ? sv_newmortal() : NULL;
3177
3178                     PerlIO_printf( Perl_debug_log,
3179                         "%*s  %strying alternation #%d <%s> at node #%d %s\n",
3180                         REPORT_CODE_OFF+depth*2, "", PL_colors[4],
3181                         ST.accept_buff[best].wordnum,
3182                         tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0,
3183                                 PL_colors[0], PL_colors[1],
3184                                 (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
3185                             ) : "not compiled under -Dr",
3186                             REG_NODE_NUM(nextop),
3187                         PL_colors[5] );
3188                 });
3189
3190                 if ( best<ST.accepted ) {
3191                     reg_trie_accepted tmp = ST.accept_buff[ best ];
3192                     ST.accept_buff[ best ] = ST.accept_buff[ ST.accepted ];
3193                     ST.accept_buff[ ST.accepted ] = tmp;
3194                     best = ST.accepted;
3195                 }
3196                 PL_reginput = (char *)ST.accept_buff[ best ].endpos;
3197                 if ( !ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) {
3198                     scan = ST.B;
3199                 } else {
3200                     scan = ST.me + ST.jump[ST.accept_buff[best].wordnum];
3201                 }
3202                 PUSH_YES_STATE_GOTO(TRIE_next, scan);
3203                 /* NOTREACHED */
3204             }
3205             /* NOTREACHED */
3206         case TRIE_next:
3207             FREETMPS;
3208             LEAVE;
3209             sayYES;
3210 #undef  ST
3211
3212         case EXACT: {
3213             char *s = STRING(scan);
3214             ln = STR_LEN(scan);
3215             if (do_utf8 != UTF) {
3216                 /* The target and the pattern have differing utf8ness. */
3217                 char *l = locinput;
3218                 const char * const e = s + ln;
3219
3220                 if (do_utf8) {
3221                     /* The target is utf8, the pattern is not utf8. */
3222                     while (s < e) {
3223                         STRLEN ulen;
3224                         if (l >= PL_regeol)
3225                              sayNO;
3226                         if (NATIVE_TO_UNI(*(U8*)s) !=
3227                             utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
3228                                             uniflags))
3229                              sayNO;
3230                         l += ulen;
3231                         s ++;
3232                     }
3233                 }
3234                 else {
3235                     /* The target is not utf8, the pattern is utf8. */
3236                     while (s < e) {
3237                         STRLEN ulen;
3238                         if (l >= PL_regeol)
3239                             sayNO;
3240                         if (NATIVE_TO_UNI(*((U8*)l)) !=
3241                             utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
3242                                            uniflags))
3243                             sayNO;
3244                         s += ulen;
3245                         l ++;
3246                     }
3247                 }
3248                 locinput = l;
3249                 nextchr = UCHARAT(locinput);
3250                 break;
3251             }
3252             /* The target and the pattern have the same utf8ness. */
3253             /* Inline the first character, for speed. */
3254             if (UCHARAT(s) != nextchr)
3255                 sayNO;
3256             if (PL_regeol - locinput < ln)
3257                 sayNO;
3258             if (ln > 1 && memNE(s, locinput, ln))
3259                 sayNO;
3260             locinput += ln;
3261             nextchr = UCHARAT(locinput);
3262             break;
3263             }
3264         case EXACTFL:
3265             PL_reg_flags |= RF_tainted;
3266             /* FALL THROUGH */
3267         case EXACTF: {
3268             char * const s = STRING(scan);
3269             ln = STR_LEN(scan);
3270
3271             if (do_utf8 || UTF) {
3272               /* Either target or the pattern are utf8. */
3273                 const char * const l = locinput;
3274                 char *e = PL_regeol;
3275
3276                 if (ibcmp_utf8(s, 0,  ln, (bool)UTF,
3277                                l, &e, 0,  do_utf8)) {
3278                      /* One more case for the sharp s:
3279                       * pack("U0U*", 0xDF) =~ /ss/i,
3280                       * the 0xC3 0x9F are the UTF-8
3281                       * byte sequence for the U+00DF. */
3282
3283                      if (!(do_utf8 &&
3284                            toLOWER(s[0]) == 's' &&
3285                            ln >= 2 &&
3286                            toLOWER(s[1]) == 's' &&
3287                            (U8)l[0] == 0xC3 &&
3288                            e - l >= 2 &&
3289                            (U8)l[1] == 0x9F))
3290                           sayNO;
3291                 }
3292                 locinput = e;
3293                 nextchr = UCHARAT(locinput);
3294                 break;
3295             }
3296
3297             /* Neither the target and the pattern are utf8. */
3298
3299             /* Inline the first character, for speed. */
3300             if (UCHARAT(s) != nextchr &&
3301                 UCHARAT(s) != ((OP(scan) == EXACTF)
3302                                ? PL_fold : PL_fold_locale)[nextchr])
3303                 sayNO;
3304             if (PL_regeol - locinput < ln)
3305                 sayNO;
3306             if (ln > 1 && (OP(scan) == EXACTF
3307                            ? ibcmp(s, locinput, ln)
3308                            : ibcmp_locale(s, locinput, ln)))
3309                 sayNO;
3310             locinput += ln;
3311             nextchr = UCHARAT(locinput);
3312             break;
3313             }
3314         case ANYOF:
3315             if (do_utf8) {
3316                 STRLEN inclasslen = PL_regeol - locinput;
3317
3318                 if (!reginclass(rex, scan, (U8*)locinput, &inclasslen, do_utf8))
3319                     goto anyof_fail;
3320                 if (locinput >= PL_regeol)
3321                     sayNO;
3322                 locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
3323                 nextchr = UCHARAT(locinput);
3324                 break;
3325             }
3326             else {
3327                 if (nextchr < 0)
3328                     nextchr = UCHARAT(locinput);
3329                 if (!REGINCLASS(rex, scan, (U8*)locinput))
3330                     goto anyof_fail;
3331                 if (!nextchr && locinput >= PL_regeol)
3332                     sayNO;
3333                 nextchr = UCHARAT(++locinput);
3334                 break;
3335             }
3336         anyof_fail:
3337             /* If we might have the case of the German sharp s
3338              * in a casefolding Unicode character class. */
3339
3340             if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
3341                  locinput += SHARP_S_SKIP;
3342                  nextchr = UCHARAT(locinput);
3343             }
3344             else
3345                  sayNO;
3346             break;
3347         case ALNUML:
3348             PL_reg_flags |= RF_tainted;
3349             /* FALL THROUGH */
3350         case ALNUM:
3351             if (!nextchr)
3352                 sayNO;
3353             if (do_utf8) {
3354                 LOAD_UTF8_CHARCLASS_ALNUM();
3355                 if (!(OP(scan) == ALNUM
3356                       ? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
3357                       : isALNUM_LC_utf8((U8*)locinput)))
3358                 {
3359                     sayNO;
3360                 }
3361                 locinput += PL_utf8skip[nextchr];
3362                 nextchr = UCHARAT(locinput);
3363                 break;
3364             }
3365             if (!(OP(scan) == ALNUM
3366                   ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
3367                 sayNO;
3368             nextchr = UCHARAT(++locinput);
3369             break;
3370         case NALNUML:
3371             PL_reg_flags |= RF_tainted;
3372             /* FALL THROUGH */
3373         case NALNUM:
3374             if (!nextchr && locinput >= PL_regeol)
3375                 sayNO;
3376             if (do_utf8) {
3377                 LOAD_UTF8_CHARCLASS_ALNUM();
3378                 if (OP(scan) == NALNUM
3379                     ? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
3380                     : isALNUM_LC_utf8((U8*)locinput))
3381                 {
3382                     sayNO;
3383                 }
3384                 locinput += PL_utf8skip[nextchr];
3385                 nextchr = UCHARAT(locinput);
3386                 break;
3387             }
3388             if (OP(scan) == NALNUM
3389                 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
3390                 sayNO;
3391             nextchr = UCHARAT(++locinput);
3392             break;
3393         case BOUNDL:
3394         case NBOUNDL:
3395             PL_reg_flags |= RF_tainted;
3396             /* FALL THROUGH */
3397         case BOUND:
3398         case NBOUND:
3399             /* was last char in word? */
3400             if (do_utf8) {
3401                 if (locinput == PL_bostr)
3402                     ln = '\n';
3403                 else {
3404                     const U8 * const r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
3405
3406                     ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, uniflags);
3407                 }
3408                 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
3409                     ln = isALNUM_uni(ln);
3410                     LOAD_UTF8_CHARCLASS_ALNUM();
3411                     n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
3412                 }
3413                 else {
3414                     ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
3415                     n = isALNUM_LC_utf8((U8*)locinput);
3416                 }
3417             }
3418             else {
3419                 ln = (locinput != PL_bostr) ?
3420                     UCHARAT(locinput - 1) : '\n';
3421                 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
3422                     ln = isALNUM(ln);
3423                     n = isALNUM(nextchr);
3424                 }
3425                 else {
3426                     ln = isALNUM_LC(ln);
3427                     n = isALNUM_LC(nextchr);
3428                 }
3429             }
3430             if (((!ln) == (!n)) == (OP(scan) == BOUND ||
3431                                     OP(scan) == BOUNDL))
3432                     sayNO;
3433             break;
3434         case SPACEL:
3435             PL_reg_flags |= RF_tainted;
3436             /* FALL THROUGH */
3437         case SPACE:
3438             if (!nextchr)
3439                 sayNO;
3440             if (do_utf8) {
3441                 if (UTF8_IS_CONTINUED(nextchr)) {
3442                     LOAD_UTF8_CHARCLASS_SPACE();
3443                     if (!(OP(scan) == SPACE
3444                           ? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
3445                           : isSPACE_LC_utf8((U8*)locinput)))
3446                     {
3447                         sayNO;
3448                     }
3449                     locinput += PL_utf8skip[nextchr];
3450                     nextchr = UCHARAT(locinput);
3451                     break;
3452                 }
3453                 if (!(OP(scan) == SPACE
3454                       ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3455                     sayNO;
3456                 nextchr = UCHARAT(++locinput);
3457             }
3458             else {
3459                 if (!(OP(scan) == SPACE
3460                       ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3461                     sayNO;
3462                 nextchr = UCHARAT(++locinput);
3463             }
3464             break;
3465         case NSPACEL:
3466             PL_reg_flags |= RF_tainted;
3467             /* FALL THROUGH */
3468         case NSPACE:
3469             if (!nextchr && locinput >= PL_regeol)
3470                 sayNO;
3471             if (do_utf8) {
3472                 LOAD_UTF8_CHARCLASS_SPACE();
3473                 if (OP(scan) == NSPACE
3474                     ? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
3475                     : isSPACE_LC_utf8((U8*)locinput))
3476                 {
3477                     sayNO;
3478                 }
3479                 locinput += PL_utf8skip[nextchr];
3480                 nextchr = UCHARAT(locinput);
3481                 break;
3482             }
3483             if (OP(scan) == NSPACE
3484                 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
3485                 sayNO;
3486             nextchr = UCHARAT(++locinput);
3487             break;
3488         case DIGITL:
3489             PL_reg_flags |= RF_tainted;
3490             /* FALL THROUGH */
3491         case DIGIT:
3492             if (!nextchr)
3493                 sayNO;
3494             if (do_utf8) {
3495                 LOAD_UTF8_CHARCLASS_DIGIT();
3496                 if (!(OP(scan) == DIGIT
3497                       ? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
3498                       : isDIGIT_LC_utf8((U8*)locinput)))
3499                 {
3500                     sayNO;
3501                 }
3502                 locinput += PL_utf8skip[nextchr];
3503                 nextchr = UCHARAT(locinput);
3504                 break;
3505             }
3506             if (!(OP(scan) == DIGIT
3507                   ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
3508                 sayNO;
3509             nextchr = UCHARAT(++locinput);
3510             break;
3511         case NDIGITL:
3512             PL_reg_flags |= RF_tainted;
3513             /* FALL THROUGH */
3514         case NDIGIT:
3515             if (!nextchr && locinput >= PL_regeol)
3516                 sayNO;
3517             if (do_utf8) {
3518                 LOAD_UTF8_CHARCLASS_DIGIT();
3519                 if (OP(scan) == NDIGIT
3520                     ? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
3521                     : isDIGIT_LC_utf8((U8*)locinput))
3522                 {
3523                     sayNO;
3524                 }
3525                 locinput += PL_utf8skip[nextchr];
3526                 nextchr = UCHARAT(locinput);
3527                 break;
3528             }
3529             if (OP(scan) == NDIGIT
3530                 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
3531                 sayNO;
3532             nextchr = UCHARAT(++locinput);
3533             break;
3534         case CLUMP:
3535             if (locinput >= PL_regeol)
3536                 sayNO;
3537             if  (do_utf8) {
3538                 LOAD_UTF8_CHARCLASS_MARK();
3539                 if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3540                     sayNO;
3541                 locinput += PL_utf8skip[nextchr];
3542                 while (locinput < PL_regeol &&
3543                        swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3544                     locinput += UTF8SKIP(locinput);
3545                 if (locinput > PL_regeol)
3546                     sayNO;
3547             }
3548             else
3549                locinput++;
3550             nextchr = UCHARAT(locinput);
3551             break;
3552
3553         case NREFFL:
3554         {
3555             char *s;
3556             char type;
3557             PL_reg_flags |= RF_tainted;
3558             /* FALL THROUGH */
3559         case NREF:
3560         case NREFF:
3561             type = OP(scan);
3562             n = reg_check_named_buff_matched(rex,scan);
3563
3564             if ( n ) {
3565                 type = REF + ( type - NREF );
3566                 goto do_ref;
3567             } else {
3568                 sayNO;
3569             }
3570             /* unreached */
3571         case REFFL:
3572             PL_reg_flags |= RF_tainted;
3573             /* FALL THROUGH */
3574         case REF:
3575         case REFF:
3576             n = ARG(scan);  /* which paren pair */
3577             type = OP(scan);
3578           do_ref:
3579             ln = PL_regoffs[n].start;
3580             PL_reg_leftiter = PL_reg_maxiter;           /* Void cache */
3581             if (*PL_reglastparen < n || ln == -1)
3582                 sayNO;                  /* Do not match unless seen CLOSEn. */
3583             if (ln == PL_regoffs[n].end)
3584                 break;
3585
3586             s = PL_bostr + ln;
3587             if (do_utf8 && type != REF) {       /* REF can do byte comparison */
3588                 char *l = locinput;
3589                 const char *e = PL_bostr + PL_regoffs[n].end;
3590                 /*
3591                  * Note that we can't do the "other character" lookup trick as
3592                  * in the 8-bit case (no pun intended) because in Unicode we
3593                  * have to map both upper and title case to lower case.
3594                  */
3595                 if (type == REFF) {
3596                     while (s < e) {
3597                         STRLEN ulen1, ulen2;
3598                         U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3599                         U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
3600
3601                         if (l >= PL_regeol)
3602                             sayNO;
3603                         toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
3604                         toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
3605                         if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
3606                             sayNO;
3607                         s += ulen1;
3608                         l += ulen2;
3609                     }
3610                 }
3611                 locinput = l;
3612                 nextchr = UCHARAT(locinput);
3613                 break;
3614             }
3615
3616             /* Inline the first character, for speed. */
3617             if (UCHARAT(s) != nextchr &&
3618                 (type == REF ||
3619                  (UCHARAT(s) != (type == REFF
3620                                   ? PL_fold : PL_fold_locale)[nextchr])))
3621                 sayNO;
3622             ln = PL_regoffs[n].end - ln;
3623             if (locinput + ln > PL_regeol)
3624                 sayNO;
3625             if (ln > 1 && (type == REF
3626                            ? memNE(s, locinput, ln)
3627                            : (type == REFF
3628                               ? ibcmp(s, locinput, ln)
3629                               : ibcmp_locale(s, locinput, ln))))
3630                 sayNO;
3631             locinput += ln;
3632             nextchr = UCHARAT(locinput);
3633             break;
3634         }
3635         case NOTHING:
3636         case TAIL:
3637             break;
3638         case BACK:
3639             break;
3640
3641 #undef  ST
3642 #define ST st->u.eval
3643         {
3644             SV *ret;
3645             REGEXP *re_sv;
3646             regexp *re;
3647             regexp_internal *rei;
3648             regnode *startpoint;
3649
3650         case GOSTART:
3651         case GOSUB: /*    /(...(?1))/   /(...(?&foo))/   */
3652             if (cur_eval && cur_eval->locinput==locinput) {
3653                 if (cur_eval->u.eval.close_paren == (U32)ARG(scan))
3654                     Perl_croak(aTHX_ "Infinite recursion in regex");
3655                 if ( ++nochange_depth > max_nochange_depth )
3656                     Perl_croak(aTHX_
3657                         "Pattern subroutine nesting without pos change"
3658                         " exceeded limit in regex");
3659             } else {
3660                 nochange_depth = 0;
3661             }
3662             re_sv = rex_sv;
3663             re = rex;
3664             rei = rexi;
3665             (void)ReREFCNT_inc(rex_sv);
3666             if (OP(scan)==GOSUB) {
3667                 startpoint = scan + ARG2L(scan);
3668                 ST.close_paren = ARG(scan);
3669             } else {
3670                 startpoint = rei->program+1;
3671                 ST.close_paren = 0;
3672             }
3673             goto eval_recurse_doit;
3674             /* NOTREACHED */
3675         case EVAL:  /*   /(?{A})B/   /(??{A})B/  and /(?(?{A})X|Y)B/   */
3676             if (cur_eval && cur_eval->locinput==locinput) {
3677                 if ( ++nochange_depth > max_nochange_depth )
3678                     Perl_croak(aTHX_ "EVAL without pos change exceeded limit in regex");
3679             } else {
3680                 nochange_depth = 0;
3681             }
3682             {
3683                 /* execute the code in the {...} */
3684                 dSP;
3685                 SV ** const before = SP;
3686                 OP_4tree * const oop = PL_op;
3687                 COP * const ocurcop = PL_curcop;
3688                 PAD *old_comppad;
3689
3690                 n = ARG(scan);
3691                 PL_op = (OP_4tree*)rexi->data->data[n];
3692                 DEBUG_STATE_r( PerlIO_printf(Perl_debug_log,
3693                     "  re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
3694                 PAD_SAVE_LOCAL(old_comppad, (PAD*)rexi->data->data[n + 2]);
3695                 PL_regoffs[0].end = PL_reg_magic->mg_len = locinput - PL_bostr;
3696
3697                 if (sv_yes_mark) {
3698                     SV *sv_mrk = get_sv("REGMARK", 1);
3699                     sv_setsv(sv_mrk, sv_yes_mark);
3700                 }
3701
3702                 CALLRUNOPS(aTHX);                       /* Scalar context. */
3703                 SPAGAIN;
3704                 if (SP == before)
3705                     ret = &PL_sv_undef;   /* protect against empty (?{}) blocks. */
3706                 else {
3707                     ret = POPs;
3708                     PUTBACK;
3709                 }
3710
3711                 PL_op = oop;
3712                 PAD_RESTORE_LOCAL(old_comppad);
3713                 PL_curcop = ocurcop;
3714                 if (!logical) {
3715                     /* /(?{...})/ */
3716                     sv_setsv(save_scalar(PL_replgv), ret);
3717                     break;
3718                 }
3719             }
3720             if (logical == 2) { /* Postponed subexpression: /(??{...})/ */
3721                 logical = 0;
3722                 {
3723                     /* extract RE object from returned value; compiling if
3724                      * necessary */
3725                     MAGIC *mg = NULL;
3726                     REGEXP *rx = NULL;
3727
3728                     if (SvROK(ret)) {
3729                         SV *const sv = SvRV(ret);
3730
3731                         if (SvTYPE(sv) == SVt_REGEXP) {
3732                             rx = (REGEXP*) sv;
3733                         } else if (SvSMAGICAL(sv)) {
3734                             mg = mg_find(sv, PERL_MAGIC_qr);
3735                             assert(mg);
3736                         }
3737                     } else if (SvTYPE(ret) == SVt_REGEXP) {
3738                         rx = (REGEXP*) ret;
3739                     } else if (SvSMAGICAL(ret)) {
3740                         if (SvGMAGICAL(ret)) {
3741                             /* I don't believe that there is ever qr magic
3742                                here.  */
3743                             assert(!mg_find(ret, PERL_MAGIC_qr));
3744                             sv_unmagic(ret, PERL_MAGIC_qr);
3745                         }
3746                         else {
3747                             mg = mg_find(ret, PERL_MAGIC_qr);
3748                             /* testing suggests mg only ends up non-NULL for
3749                                scalars who were upgraded and compiled in the
3750                                else block below. In turn, this is only
3751                                triggered in the "postponed utf8 string" tests
3752                                in t/op/pat.t  */
3753                         }
3754                     }
3755
3756                     if (mg) {
3757                         rx = (REGEXP *) mg->mg_obj; /*XXX:dmq*/
3758                         assert(rx);
3759                     }
3760                     if (rx) {
3761                         rx = reg_temp_copy(rx);
3762                     }
3763                     else {
3764                         U32 pm_flags = 0;
3765                         const I32 osize = PL_regsize;
3766
3767                         if (DO_UTF8(ret)) {
3768                             assert (SvUTF8(ret));
3769                         } else if (SvUTF8(ret)) {
3770                             /* Not doing UTF-8, despite what the SV says. Is
3771                                this only if we're trapped in use 'bytes'?  */
3772                             /* Make a copy of the octet sequence, but without
3773                                the flag on, as the compiler now honours the
3774                                SvUTF8 flag on ret.  */
3775                             STRLEN len;
3776                             const char *const p = SvPV(ret, len);
3777                             ret = newSVpvn_flags(p, len, SVs_TEMP);
3778                         }
3779                         rx = CALLREGCOMP(ret, pm_flags);
3780                         if (!(SvFLAGS(ret)
3781                               & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
3782                                  | SVs_GMG))) {
3783                             /* This isn't a first class regexp. Instead, it's
3784                                caching a regexp onto an existing, Perl visible
3785                                scalar.  */
3786                             sv_magic(ret, (SV*) rx, PERL_MAGIC_qr, 0, 0);
3787                         }
3788                         PL_regsize = osize;
3789                     }
3790                     re_sv = rx;
3791                     re = (struct regexp *)SvANY(rx);
3792                 }
3793                 RXp_MATCH_COPIED_off(re);
3794                 re->subbeg = rex->subbeg;
3795                 re->sublen = rex->sublen;
3796                 rei = RXi_GET(re);
3797                 DEBUG_EXECUTE_r(
3798                     debug_start_match(re_sv, do_utf8, locinput, PL_regeol,
3799                         "Matching embedded");
3800                 );
3801                 startpoint = rei->program + 1;
3802                 ST.close_paren = 0; /* only used for GOSUB */
3803                 /* borrowed from regtry */
3804                 if (PL_reg_start_tmpl <= re->nparens) {
3805                     PL_reg_start_tmpl = re->nparens*3/2 + 3;
3806                     if(PL_reg_start_tmp)
3807                         Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
3808                     else
3809                         Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
3810                 }
3811
3812         eval_recurse_doit: /* Share code with GOSUB below this line */
3813                 /* run the pattern returned from (??{...}) */
3814                 ST.cp = regcppush(0);   /* Save *all* the positions. */
3815                 REGCP_SET(ST.lastcp);
3816
3817                 PL_regoffs = re->offs; /* essentially NOOP on GOSUB */
3818
3819                 /* see regtry, specifically PL_reglast(?:close)?paren is a pointer! (i dont know why) :dmq */
3820                 PL_reglastparen = &re->lastparen;
3821                 PL_reglastcloseparen = &re->lastcloseparen;
3822                 re->lastparen = 0;
3823                 re->lastcloseparen = 0;
3824
3825                 PL_reginput = locinput;
3826                 PL_regsize = 0;
3827
3828                 /* XXXX This is too dramatic a measure... */
3829                 PL_reg_maxiter = 0;
3830
3831                 ST.toggle_reg_flags = PL_reg_flags;
3832                 if (RX_UTF8(re_sv))
3833                     PL_reg_flags |= RF_utf8;
3834                 else
3835                     PL_reg_flags &= ~RF_utf8;
3836                 ST.toggle_reg_flags ^= PL_reg_flags; /* diff of old and new */
3837
3838                 ST.prev_rex = rex_sv;
3839                 ST.prev_curlyx = cur_curlyx;
3840                 SETREX(rex_sv,re_sv);
3841                 rex = re;
3842                 rexi = rei;
3843                 cur_curlyx = NULL;
3844                 ST.B = next;
3845                 ST.prev_eval = cur_eval;
3846                 cur_eval = st;
3847                 /* now continue from first node in postoned RE */
3848                 PUSH_YES_STATE_GOTO(EVAL_AB, startpoint);
3849                 /* NOTREACHED */
3850             }
3851             /* logical is 1,   /(?(?{...})X|Y)/ */
3852             sw = (bool)SvTRUE(ret);
3853             logical = 0;
3854             break;
3855         }
3856
3857         case EVAL_AB: /* cleanup after a successful (??{A})B */
3858             /* note: this is called twice; first after popping B, then A */
3859             PL_reg_flags ^= ST.toggle_reg_flags;
3860             ReREFCNT_dec(rex_sv);
3861             SETREX(rex_sv,ST.prev_rex);
3862             rex = (struct regexp *)SvANY(rex_sv);
3863             rexi = RXi_GET(rex);
3864             regcpblow(ST.cp);
3865             cur_eval = ST.prev_eval;
3866             cur_curlyx = ST.prev_curlyx;
3867
3868             PL_reglastparen = &rex->lastparen;
3869             PL_reglastcloseparen = &rex->lastcloseparen;
3870
3871             /* XXXX This is too dramatic a measure... */
3872             PL_reg_maxiter = 0;
3873             if ( nochange_depth )
3874                 nochange_depth--;
3875             sayYES;
3876
3877
3878         case EVAL_AB_fail: /* unsuccessfully ran A or B in (??{A})B */
3879             /* note: this is called twice; first after popping B, then A */
3880             PL_reg_flags ^= ST.toggle_reg_flags;
3881             ReREFCNT_dec(rex_sv);
3882             SETREX(rex_sv,ST.prev_rex);
3883             rex = (struct regexp *)SvANY(rex_sv);
3884             rexi = RXi_GET(rex);
3885             PL_reglastparen = &rex->lastparen;
3886             PL_reglastcloseparen = &rex->lastcloseparen;
3887
3888             PL_reginput = locinput;
3889             REGCP_UNWIND(ST.lastcp);
3890             regcppop(rex);
3891             cur_eval = ST.prev_eval;
3892             cur_curlyx = ST.prev_curlyx;
3893             /* XXXX This is too dramatic a measure... */
3894             PL_reg_maxiter = 0;
3895             if ( nochange_depth )
3896                 nochange_depth--;
3897             sayNO_SILENT;
3898 #undef ST
3899
3900         case OPEN:
3901             n = ARG(scan);  /* which paren pair */
3902             PL_reg_start_tmp[n] = locinput;
3903             if (n > PL_regsize)
3904                 PL_regsize = n;
3905             lastopen = n;
3906             break;
3907         case CLOSE:
3908             n = ARG(scan);  /* which paren pair */
3909             PL_regoffs[n].start = PL_reg_start_tmp[n] - PL_bostr;
3910             PL_regoffs[n].end = locinput - PL_bostr;
3911             /*if (n > PL_regsize)
3912                 PL_regsize = n;*/
3913             if (n > *PL_reglastparen)
3914                 *PL_reglastparen = n;
3915             *PL_reglastcloseparen = n;
3916             if (cur_eval && cur_eval->u.eval.close_paren == n) {
3917                 goto fake_end;
3918             }
3919             break;
3920         case ACCEPT:
3921             if (ARG(scan)){
3922                 regnode *cursor;
3923                 for (cursor=scan;
3924                      cursor && OP(cursor)!=END;
3925                      cursor=regnext(cursor))
3926                 {
3927                     if ( OP(cursor)==CLOSE ){
3928                         n = ARG(cursor);
3929                         if ( n <= lastopen ) {
3930                             PL_regoffs[n].start
3931                                 = PL_reg_start_tmp[n] - PL_bostr;
3932                             PL_regoffs[n].end = locinput - PL_bostr;
3933                             /*if (n > PL_regsize)
3934                             PL_regsize = n;*/
3935                             if (n > *PL_reglastparen)
3936                                 *PL_reglastparen = n;
3937                             *PL_reglastcloseparen = n;
3938                             if ( n == ARG(scan) || (cur_eval &&
3939                                 cur_eval->u.eval.close_paren == n))
3940                                 break;
3941                         }
3942                     }
3943                 }
3944             }
3945             goto fake_end;
3946             /*NOTREACHED*/
3947         case GROUPP:
3948             n = ARG(scan);  /* which paren pair */
3949             sw = (bool)(*PL_reglastparen >= n && PL_regoffs[n].end != -1);
3950             break;
3951         case NGROUPP:
3952             /* reg_check_named_buff_matched returns 0 for no match */
3953             sw = (bool)(0 < reg_check_named_buff_matched(rex,scan));
3954             break;
3955         case INSUBP:
3956             n = ARG(scan);
3957             sw = (cur_eval && (!n || cur_eval->u.eval.close_paren == n));
3958             break;
3959         case DEFINEP:
3960             sw = 0;
3961             break;
3962         case IFTHEN:
3963             PL_reg_leftiter = PL_reg_maxiter;           /* Void cache */
3964             if (sw)
3965                 next = NEXTOPER(NEXTOPER(scan));
3966             else {
3967                 next = scan + ARG(scan);
3968                 if (OP(next) == IFTHEN) /* Fake one. */
3969                     next = NEXTOPER(NEXTOPER(next));
3970             }
3971             break;
3972         case LOGICAL:
3973             logical = scan->flags;
3974             break;
3975
3976 /*******************************************************************
3977
3978 The CURLYX/WHILEM pair of ops handle the most generic case of the /A*B/
3979 pattern, where A and B are subpatterns. (For simple A, CURLYM or
3980 STAR/PLUS/CURLY/CURLYN are used instead.)
3981
3982 A*B is compiled as <CURLYX><A><WHILEM><B>
3983
3984 On entry to the subpattern, CURLYX is called. This pushes a CURLYX
3985 state, which contains the current count, initialised to -1. It also sets
3986 cur_curlyx to point to this state, with any previous value saved in the
3987 state block.
3988
3989 CURLYX then jumps straight to the WHILEM op, rather than executing A,
3990 since the pattern may possibly match zero times (i.e. it's a while {} loop
3991 rather than a do {} while loop).
3992
3993 Each entry to WHILEM represents a successful match of A. The count in the
3994 CURLYX block is incremented, another WHILEM state is pushed, and execution
3995 passes to A or B depending on greediness and the current count.
3996
3997 For example, if matching against the string a1a2a3b (where the aN are
3998 substrings that match /A/), then the match progresses as follows: (the
3999 pushed states are interspersed with the bits of strings matched so far):
4000
4001     <CURLYX cnt=-1>
4002     <CURLYX cnt=0><WHILEM>
4003     <CURLYX cnt=1><WHILEM> a1 <WHILEM>
4004     <CURLYX cnt=2><WHILEM> a1 <WHILEM> a2 <WHILEM>
4005     <CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM>
4006     <CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM> b
4007
4008 (Contrast this with something like CURLYM, which maintains only a single
4009 backtrack state:
4010
4011     <CURLYM cnt=0> a1
4012     a1 <CURLYM cnt=1> a2
4013     a1 a2 <CURLYM cnt=2> a3
4014     a1 a2 a3 <CURLYM cnt=3> b
4015 )
4016
4017 Each WHILEM state block marks a point to backtrack to upon partial failure
4018 of A or B, and also contains some minor state data related to that
4019 iteration.  The CURLYX block, pointed to by cur_curlyx, contains the
4020 overall state, such as the count, and pointers to the A and B ops.
4021
4022 This is complicated slightly by nested CURLYX/WHILEM's. Since cur_curlyx
4023 must always point to the *current* CURLYX block, the rules are:
4024
4025 When executing CURLYX, save the old cur_curlyx in the CURLYX state block,
4026 and set cur_curlyx to point the new block.
4027
4028 When popping the CURLYX block after a successful or unsuccessful match,
4029 restore the previous cur_curlyx.
4030
4031 When WHILEM is about to execute B, save the current cur_curlyx, and set it
4032 to the outer one saved in the CURLYX block.
4033
4034 When popping the WHILEM block after a successful or unsuccessful B match,
4035 restore the previous cur_curlyx.
4036
4037 Here's an example for the pattern (AI* BI)*BO
4038 I and O refer to inner and outer, C and W refer to CURLYX and WHILEM:
4039
4040 cur_
4041 curlyx backtrack stack
4042 ------ ---------------
4043 NULL
4044 CO     <CO prev=NULL> <WO>
4045 CI     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
4046 CO     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
4047 NULL   <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi <WO prev=CO> bo
4048
4049 At this point the pattern succeeds, and we work back down the stack to
4050 clean up, restoring as we go:
4051
4052 CO     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
4053 CI     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
4054 CO     <CO prev=NULL> <WO>
4055 NULL
4056
4057 *******************************************************************/
4058
4059 #define ST st->u.curlyx
4060
4061         case CURLYX:    /* start of /A*B/  (for complex A) */
4062         {
4063             /* No need to save/restore up to this paren */
4064             I32 parenfloor = scan->flags;
4065
4066             assert(next); /* keep Coverity happy */
4067             if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
4068                 next += ARG(next);
4069
4070             /* XXXX Probably it is better to teach regpush to support
4071                parenfloor > PL_regsize... */
4072             if (parenfloor > (I32)*PL_reglastparen)
4073                 parenfloor = *PL_reglastparen; /* Pessimization... */
4074
4075             ST.prev_curlyx= cur_curlyx;
4076             cur_curlyx = st;
4077             ST.cp = PL_savestack_ix;
4078
4079             /* these fields contain the state of the current curly.
4080              * they are accessed by subsequent WHILEMs */
4081             ST.parenfloor = parenfloor;
4082             ST.min = ARG1(scan);
4083             ST.max = ARG2(scan);
4084             ST.A = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
4085             ST.B = next;
4086             ST.minmod = minmod;
4087             minmod = 0;
4088             ST.count = -1;      /* this will be updated by WHILEM */
4089             ST.lastloc = NULL;  /* this will be updated by WHILEM */
4090
4091             PL_reginput = locinput;
4092             PUSH_YES_STATE_GOTO(CURLYX_end, PREVOPER(next));
4093             /* NOTREACHED */
4094         }
4095
4096         case CURLYX_end: /* just finished matching all of A*B */
4097             cur_curlyx = ST.prev_curlyx;
4098             sayYES;
4099             /* NOTREACHED */
4100
4101         case CURLYX_end_fail: /* just failed to match all of A*B */
4102             regcpblow(ST.cp);
4103             cur_curlyx = ST.prev_curlyx;
4104             sayNO;
4105             /* NOTREACHED */
4106
4107
4108 #undef ST
4109 #define ST st->u.whilem
4110
4111         case WHILEM:     /* just matched an A in /A*B/  (for complex A) */
4112         {
4113             /* see the discussion above about CURLYX/WHILEM */
4114             I32 n;
4115             assert(cur_curlyx); /* keep Coverity happy */
4116             n = ++cur_curlyx->u.curlyx.count; /* how many A's matched */
4117             ST.save_lastloc = cur_curlyx->u.curlyx.lastloc;
4118             ST.cache_offset = 0;
4119             ST.cache_mask = 0;
4120
4121             PL_reginput = locinput;
4122
4123             DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4124                   "%*s  whilem: matched %ld out of %ld..%ld\n",
4125                   REPORT_CODE_OFF+depth*2, "", (long)n,
4126                   (long)cur_curlyx->u.curlyx.min,
4127                   (long)cur_curlyx->u.curlyx.max)
4128             );
4129
4130             /* First just match a string of min A's. */
4131
4132             if (n < cur_curlyx->u.curlyx.min) {
4133                 cur_curlyx->u.curlyx.lastloc = locinput;
4134                 PUSH_STATE_GOTO(WHILEM_A_pre, cur_curlyx->u.curlyx.A);
4135                 /* NOTREACHED */
4136             }
4137
4138             /* If degenerate A matches "", assume A done. */
4139
4140             if (locinput == cur_curlyx->u.curlyx.lastloc) {
4141                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4142                    "%*s  whilem: empty match detected, trying continuation...\n",
4143                    REPORT_CODE_OFF+depth*2, "")
4144                 );
4145                 goto do_whilem_B_max;
4146             }
4147
4148             /* super-linear cache processing */
4149
4150             if (scan->flags) {
4151
4152                 if (!PL_reg_maxiter) {
4153                     /* start the countdown: Postpone detection until we
4154                      * know the match is not *that* much linear. */
4155                     PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
4156                     /* possible overflow for long strings and many CURLYX's */
4157                     if (PL_reg_maxiter < 0)
4158                         PL_reg_maxiter = I32_MAX;
4159                     PL_reg_leftiter = PL_reg_maxiter;
4160                 }
4161
4162                 if (PL_reg_leftiter-- == 0) {
4163                     /* initialise cache */
4164                     const I32 size = (PL_reg_maxiter + 7)/8;
4165                     if (PL_reg_poscache) {
4166                         if ((I32)PL_reg_poscache_size < size) {
4167                             Renew(PL_reg_poscache, size, char);
4168                             PL_reg_poscache_size = size;
4169                         }
4170                         Zero(PL_reg_poscache, size, char);
4171                     }
4172                     else {
4173                         PL_reg_poscache_size = size;
4174                         Newxz(PL_reg_poscache, size, char);
4175                     }
4176                     DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4177       "%swhilem: Detected a super-linear match, switching on caching%s...\n",
4178                               PL_colors[4], PL_colors[5])
4179                     );
4180                 }
4181
4182                 if (PL_reg_leftiter < 0) {
4183                     /* have we already failed at this position? */
4184                     I32 offset, mask;
4185                     offset  = (scan->flags & 0xf) - 1
4186                                 + (locinput - PL_bostr)  * (scan->flags>>4);
4187                     mask    = 1 << (offset % 8);
4188                     offset /= 8;
4189                     if (PL_reg_poscache[offset] & mask) {
4190                         DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4191                             "%*s  whilem: (cache) already tried at this position...\n",
4192                             REPORT_CODE_OFF+depth*2, "")
4193                         );
4194                         sayNO; /* cache records failure */
4195                     }
4196                     ST.cache_offset = offset;
4197                     ST.cache_mask   = mask;
4198                 }
4199             }
4200
4201             /* Prefer B over A for minimal matching. */
4202
4203             if (cur_curlyx->u.curlyx.minmod) {
4204                 ST.save_curlyx = cur_curlyx;
4205                 cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
4206                 ST.cp = regcppush(ST.save_curlyx->u.curlyx.parenfloor);
4207                 REGCP_SET(ST.lastcp);
4208                 PUSH_YES_STATE_GOTO(WHILEM_B_min, ST.save_curlyx->u.curlyx.B);
4209                 /* NOTREACHED */
4210             }
4211
4212             /* Prefer A over B for maximal matching. */
4213
4214             if (n < cur_curlyx->u.curlyx.max) { /* More greed allowed? */
4215                 ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
4216                 cur_curlyx->u.curlyx.lastloc = locinput;
4217                 REGCP_SET(ST.lastcp);
4218                 PUSH_STATE_GOTO(WHILEM_A_max, cur_curlyx->u.curlyx.A);
4219                 /* NOTREACHED */
4220             }
4221             goto do_whilem_B_max;
4222         }
4223         /* NOTREACHED */
4224
4225         case WHILEM_B_min: /* just matched B in a minimal match */
4226         case WHILEM_B_max: /* just matched B in a maximal match */
4227             cur_curlyx = ST.save_curlyx;
4228             sayYES;
4229             /* NOTREACHED */
4230
4231         case WHILEM_B_max_fail: /* just failed to match B in a maximal match */
4232             cur_curlyx = ST.save_curlyx;
4233             cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
4234             cur_curlyx->u.curlyx.count--;
4235             CACHEsayNO;
4236             /* NOTREACHED */
4237
4238         case WHILEM_A_min_fail: /* just failed to match A in a minimal match */
4239             REGCP_UNWIND(ST.lastcp);
4240             regcppop(rex);
4241             /* FALL THROUGH */
4242         case WHILEM_A_pre_fail: /* just failed to match even minimal A */
4243             cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
4244             cur_curlyx->u.curlyx.count--;
4245             CACHEsayNO;
4246             /* NOTREACHED */
4247
4248         case WHILEM_A_max_fail: /* just failed to match A in a maximal match */
4249             REGCP_UNWIND(ST.lastcp);
4250             regcppop(rex);      /* Restore some previous $<digit>s? */
4251             PL_reginput = locinput;
4252             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4253                 "%*s  whilem: failed, trying continuation...\n",
4254                 REPORT_CODE_OFF+depth*2, "")
4255             );
4256           do_whilem_B_max:
4257             if (cur_curlyx->u.curlyx.count >= REG_INFTY
4258                 && ckWARN(WARN_REGEXP)
4259                 && !(PL_reg_flags & RF_warned))
4260             {
4261                 PL_reg_flags |= RF_warned;
4262                 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
4263                      "Complex regular subexpression recursion",
4264                      REG_INFTY - 1);
4265             }
4266
4267             /* now try B */
4268             ST.save_curlyx = cur_curlyx;
4269             cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
4270             PUSH_YES_STATE_GOTO(WHILEM_B_max, ST.save_curlyx->u.curlyx.B);
4271             /* NOTREACHED */
4272
4273         case WHILEM_B_min_fail: /* just failed to match B in a minimal match */
4274             cur_curlyx = ST.save_curlyx;
4275             REGCP_UNWIND(ST.lastcp);
4276             regcppop(rex);
4277
4278             if (cur_curlyx->u.curlyx.count >= cur_curlyx->u.curlyx.max) {
4279                 /* Maximum greed exceeded */
4280                 if (cur_curlyx->u.curlyx.count >= REG_INFTY
4281                     && ckWARN(WARN_REGEXP)
4282                     && !(PL_reg_flags & RF_warned))
4283                 {
4284                     PL_reg_flags |= RF_warned;
4285                     Perl_warner(aTHX_ packWARN(WARN_REGEXP),
4286                         "%s limit (%d) exceeded",
4287                         "Complex regular subexpression recursion",
4288                         REG_INFTY - 1);
4289                 }
4290                 cur_curlyx->u.curlyx.count--;
4291                 CACHEsayNO;
4292             }
4293
4294             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4295                 "%*s  trying longer...\n", REPORT_CODE_OFF+depth*2, "")
4296             );
4297             /* Try grabbing another A and see if it helps. */
4298             PL_reginput = locinput;
4299             cur_curlyx->u.curlyx.lastloc = locinput;
4300             ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
4301             REGCP_SET(ST.lastcp);
4302             PUSH_STATE_GOTO(WHILEM_A_min, ST.save_curlyx->u.curlyx.A);
4303             /* NOTREACHED */
4304
4305 #undef  ST
4306 #define ST st->u.branch
4307
4308         case BRANCHJ:       /*  /(...|A|...)/ with long next pointer */
4309             next = scan + ARG(scan);
4310             if (next == scan)
4311                 next = NULL;
4312             scan = NEXTOPER(scan);
4313             /* FALL THROUGH */
4314
4315         case BRANCH:        /*  /(...|A|...)/ */
4316             scan = NEXTOPER(scan); /* scan now points to inner node */
4317             ST.lastparen = *PL_reglastparen;
4318             ST.next_branch = next;
4319             REGCP_SET(ST.cp);
4320             PL_reginput = locinput;
4321
4322             /* Now go into the branch */
4323             if (has_cutgroup) {
4324                 PUSH_YES_STATE_GOTO(BRANCH_next, scan);
4325             } else {
4326                 PUSH_STATE_GOTO(BRANCH_next, scan);
4327             }
4328             /* NOTREACHED */
4329         case CUTGROUP:
4330             PL_reginput = locinput;
4331             sv_yes_mark = st->u.mark.mark_name = scan->flags ? NULL :
4332                 (SV*)rexi->data->data[ ARG( scan ) ];
4333             PUSH_STATE_GOTO(CUTGROUP_next,next);
4334             /* NOTREACHED */
4335         case CUTGROUP_next_fail:
4336             do_cutgroup = 1;
4337             no_final = 1;
4338             if (st->u.mark.mark_name)
4339                 sv_commit = st->u.mark.mark_name;
4340             sayNO;
4341             /* NOTREACHED */
4342         case BRANCH_next:
4343             sayYES;
4344             /* NOTREACHED */
4345         case BRANCH_next_fail: /* that branch failed; try the next, if any */
4346             if (do_cutgroup) {
4347                 do_cutgroup = 0;
4348                 no_final = 0;
4349             }
4350             REGCP_UNWIND(ST.cp);
4351             for (n = *PL_reglastparen; n > ST.lastparen; n--)
4352                 PL_regoffs[n].end = -1;
4353             *PL_reglastparen = n;
4354             /*dmq: *PL_reglastcloseparen = n; */
4355             scan = ST.next_branch;
4356             /* no more branches? */
4357             if (!scan || (OP(scan) != BRANCH && OP(scan) != BRANCHJ)) {
4358                 DEBUG_EXECUTE_r({
4359                     PerlIO_printf( Perl_debug_log,
4360                         "%*s  %sBRANCH failed...%s\n",
4361                         REPORT_CODE_OFF+depth*2, "",
4362                         PL_colors[4],
4363                         PL_colors[5] );
4364                 });
4365                 sayNO_SILENT;
4366             }
4367             continue; /* execute next BRANCH[J] op */
4368             /* NOTREACHED */
4369
4370         case MINMOD:
4371             minmod = 1;
4372             break;
4373
4374 #undef  ST
4375 #define ST st->u.curlym
4376
4377         case CURLYM:    /* /A{m,n}B/ where A is fixed-length */
4378
4379             /* This is an optimisation of CURLYX that enables us to push
4380              * only a single backtracking state, no matter now many matches
4381              * there are in {m,n}. It relies on the pattern being constant
4382              * length, with no parens to influence future backrefs
4383              */
4384
4385             ST.me = scan;
4386             scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
4387
4388             /* if paren positive, emulate an OPEN/CLOSE around A */
4389             if (ST.me->flags) {
4390                 U32 paren = ST.me->flags;
4391                 if (paren > PL_regsize)
4392                     PL_regsize = paren;
4393                 if (paren > *PL_reglastparen)
4394                     *PL_reglastparen = paren;
4395                 scan += NEXT_OFF(scan); /* Skip former OPEN. */
4396             }
4397             ST.A = scan;
4398             ST.B = next;
4399             ST.alen = 0;
4400             ST.count = 0;
4401             ST.minmod = minmod;
4402             minmod = 0;
4403             ST.c1 = CHRTEST_UNINIT;
4404             REGCP_SET(ST.cp);
4405
4406             if (!(ST.minmod ? ARG1(ST.me) : ARG2(ST.me))) /* min/max */
4407                 goto curlym_do_B;
4408
4409           curlym_do_A: /* execute the A in /A{m,n}B/  */
4410             PL_reginput = locinput;
4411             PUSH_YES_STATE_GOTO(CURLYM_A, ST.A); /* match A */
4412             /* NOTREACHED */
4413
4414         case CURLYM_A: /* we've just matched an A */
4415             locinput = st->locinput;
4416             nextchr = UCHARAT(locinput);
4417
4418             ST.count++;
4419             /* after first match, determine A's length: u.curlym.alen */
4420             if (ST.count == 1) {
4421                 if (PL_reg_match_utf8) {
4422                     char *s = locinput;
4423                     while (s < PL_reginput) {
4424                         ST.alen++;
4425                         s += UTF8SKIP(s);
4426                     }
4427                 }
4428                 else {
4429                     ST.alen = PL_reginput - locinput;
4430                 }
4431                 if (ST.alen == 0)
4432                     ST.count = ST.minmod ? ARG1(ST.me) : ARG2(ST.me);
4433             }
4434             DEBUG_EXECUTE_r(
4435                 PerlIO_printf(Perl_debug_log,
4436                           "%*s  CURLYM now matched %"IVdf" times, len=%"IVdf"...\n",
4437                           (int)(REPORT_CODE_OFF+(depth*2)), "",
4438                           (IV) ST.count, (IV)ST.alen)
4439             );
4440
4441             locinput = PL_reginput;
4442
4443             if (cur_eval && cur_eval->u.eval.close_paren &&
4444                 cur_eval->u.eval.close_paren == (U32)ST.me->flags)
4445                 goto fake_end;
4446
4447             if ( ST.count < (ST.minmod ? ARG1(ST.me) : ARG2(ST.me)) )
4448                 goto curlym_do_A; /* try to match another A */
4449             goto curlym_do_B; /* try to match B */
4450
4451         case CURLYM_A_fail: /* just failed to match an A */
4452             REGCP_UNWIND(ST.cp);
4453
4454             if (ST.minmod || ST.count < ARG1(ST.me) /* min*/
4455                 || (cur_eval && cur_eval->u.eval.close_paren &&
4456                     cur_eval->u.eval.close_paren == (U32)ST.me->flags))
4457                 sayNO;
4458
4459           curlym_do_B: /* execute the B in /A{m,n}B/  */
4460             PL_reginput = locinput;
4461             if (ST.c1 == CHRTEST_UNINIT) {
4462                 /* calculate c1 and c2 for possible match of 1st char
4463                  * following curly */
4464                 ST.c1 = ST.c2 = CHRTEST_VOID;
4465                 if (HAS_TEXT(ST.B) || JUMPABLE(ST.B)) {
4466                     regnode *text_node = ST.B;
4467                     if (! HAS_TEXT(text_node))
4468                         FIND_NEXT_IMPT(text_node);
4469                     /* this used to be
4470
4471                         (HAS_TEXT(text_node) && PL_regkind[OP(text_node)] == EXACT)
4472
4473                         But the former is redundant in light of the latter.
4474
4475                         if this changes back then the macro for
4476                         IS_TEXT and friends need to change.
4477                      */
4478                     if (PL_regkind[OP(text_node)] == EXACT)
4479                     {
4480
4481                         ST.c1 = (U8)*STRING(text_node);
4482                         ST.c2 =
4483                             (IS_TEXTF(text_node))
4484                             ? PL_fold[ST.c1]
4485                             : (IS_TEXTFL(text_node))
4486                                 ? PL_fold_locale[ST.c1]
4487                                 : ST.c1;
4488                     }
4489                 }
4490             }
4491
4492             DEBUG_EXECUTE_r(
4493                 PerlIO_printf(Perl_debug_log,
4494                     "%*s  CURLYM trying tail with matches=%"IVdf"...\n",
4495                     (int)(REPORT_CODE_OFF+(depth*2)),
4496                     "", (IV)ST.count)
4497                 );
4498             if (ST.c1 != CHRTEST_VOID
4499                     && UCHARAT(PL_reginput) != ST.c1
4500                     && UCHARAT(PL_reginput) != ST.c2)
4501             {
4502                 /* simulate B failing */
4503                 DEBUG_OPTIMISE_r(
4504                     PerlIO_printf(Perl_debug_log,
4505                         "%*s  CURLYM Fast bail c1=%"IVdf" c2=%"IVdf"\n",
4506                         (int)(REPORT_CODE_OFF+(depth*2)),"",
4507                         (IV)ST.c1,(IV)ST.c2
4508                 ));
4509                 state_num = CURLYM_B_fail;
4510                 goto reenter_switch;
4511             }
4512
4513             if (ST.me->flags) {
4514                 /* mark current A as captured */
4515                 I32 paren = ST.me->flags;
4516                 if (ST.count) {
4517                     PL_regoffs[paren].start
4518                         = HOPc(PL_reginput, -ST.alen) - PL_bostr;
4519                     PL_regoffs[paren].end = PL_reginput - PL_bostr;
4520                     /*dmq: *PL_reglastcloseparen = paren; */
4521                 }
4522                 else
4523                     PL_regoffs[paren].end = -1;
4524                 if (cur_eval && cur_eval->u.eval.close_paren &&
4525                     cur_eval->u.eval.close_paren == (U32)ST.me->flags)
4526                 {
4527                     if (ST.count)
4528                         goto fake_end;
4529                     else
4530                         sayNO;
4531                 }
4532             }
4533
4534             PUSH_STATE_GOTO(CURLYM_B, ST.B); /* match B */
4535             /* NOTREACHED */
4536
4537         case CURLYM_B_fail: /* just failed to match a B */
4538             REGCP_UNWIND(ST.cp);
4539             if (ST.minmod) {
4540                 if (ST.count == ARG2(ST.me) /* max */)
4541                     sayNO;
4542                 goto curlym_do_A; /* try to match a further A */
4543             }
4544             /* backtrack one A */
4545             if (ST.count == ARG1(ST.me) /* min */)
4546                 sayNO;
4547             ST.count--;
4548             locinput = HOPc(locinput, -ST.alen);
4549             goto curlym_do_B; /* try to match B */
4550
4551 #undef ST
4552 #define ST st->u.curly
4553
4554 #define CURLY_SETPAREN(paren, success) \
4555     if (paren) { \
4556         if (success) { \
4557             PL_regoffs[paren].start = HOPc(locinput, -1) - PL_bostr; \
4558             PL_regoffs[paren].end = locinput - PL_bostr; \
4559             *PL_reglastcloseparen = paren; \
4560         } \
4561         else \
4562             PL_regoffs[paren].end = -1; \
4563     }
4564
4565         case STAR:              /*  /A*B/ where A is width 1 */
4566             ST.paren = 0;
4567             ST.min = 0;
4568             ST.max = REG_INFTY;
4569             scan = NEXTOPER(scan);
4570             goto repeat;
4571         case PLUS:              /*  /A+B/ where A is width 1 */
4572             ST.paren = 0;
4573             ST.min = 1;
4574             ST.max = REG_INFTY;
4575             scan = NEXTOPER(scan);
4576             goto repeat;
4577         case CURLYN:            /*  /(A){m,n}B/ where A is width 1 */
4578             ST.paren = scan->flags;     /* Which paren to set */
4579             if (ST.paren > PL_regsize)
4580                 PL_regsize = ST.paren;
4581             if (ST.paren > *PL_reglastparen)
4582                 *PL_reglastparen = ST.paren;
4583             ST.min = ARG1(scan);  /* min to match */
4584             ST.max = ARG2(scan);  /* max to match */
4585             if (cur_eval && cur_eval->u.eval.close_paren &&
4586                 cur_eval->u.eval.close_paren == (U32)ST.paren) {
4587                 ST.min=1;
4588                 ST.max=1;
4589             }
4590             scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
4591             goto repeat;
4592         case CURLY:             /*  /A{m,n}B/ where A is width 1 */
4593             ST.paren = 0;
4594             ST.min = ARG1(scan);  /* min to match */
4595             ST.max = ARG2(scan);  /* max to match */
4596             scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
4597           repeat:
4598             /*
4599             * Lookahead to avoid useless match attempts
4600             * when we know what character comes next.
4601             *
4602             * Used to only do .*x and .*?x, but now it allows
4603             * for )'s, ('s and (?{ ... })'s to be in the way
4604             * of the quantifier and the EXACT-like node.  -- japhy
4605             */
4606
4607             if (ST.min > ST.max) /* XXX make this a compile-time check? */
4608                 sayNO;
4609             if (HAS_TEXT(next) || JUMPABLE(next)) {
4610                 U8 *s;
4611                 regnode *text_node = next;
4612
4613                 if (! HAS_TEXT(text_node))
4614                     FIND_NEXT_IMPT(text_node);
4615
4616                 if (! HAS_TEXT(text_node))
4617                     ST.c1 = ST.c2 = CHRTEST_VOID;
4618                 else {
4619                     if ( PL_regkind[OP(text_node)] != EXACT ) {
4620                         ST.c1 = ST.c2 = CHRTEST_VOID;
4621                         goto assume_ok_easy;
4622                     }
4623                     else
4624                         s = (U8*)STRING(text_node);
4625
4626                     /*  Currently we only get here when
4627
4628                         PL_rekind[OP(text_node)] == EXACT
4629
4630                         if this changes back then the macro for IS_TEXT and
4631                         friends need to change. */
4632                     if (!UTF) {
4633                         ST.c2 = ST.c1 = *s;
4634                         if (IS_TEXTF(text_node))
4635                             ST.c2 = PL_fold[ST.c1];
4636                         else if (IS_TEXTFL(text_node))
4637                             ST.c2 = PL_fold_locale[ST.c1];
4638                     }
4639                     else { /* UTF */
4640                         if (IS_TEXTF(text_node)) {
4641                              STRLEN ulen1, ulen2;
4642                              U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
4643                              U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
4644
4645                              to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
4646                              to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
4647 #ifdef EBCDIC
4648                              ST.c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXLEN, 0,
4649                                                     ckWARN(WARN_UTF8) ?
4650                                                     0 : UTF8_ALLOW_ANY);
4651                              ST.c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXLEN, 0,
4652                                                     ckWARN(WARN_UTF8) ?
4653                                                     0 : UTF8_ALLOW_ANY);
4654 #else
4655                              ST.c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
4656                                                     uniflags);
4657                              ST.c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
4658                                                     uniflags);
4659 #endif
4660                         }
4661                         else {
4662                             ST.c2 = ST.c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
4663                                                      uniflags);
4664                         }
4665                     }
4666                 }
4667             }
4668             else
4669                 ST.c1 = ST.c2 = CHRTEST_VOID;
4670         assume_ok_easy:
4671
4672             ST.A = scan;
4673             ST.B = next;
4674             PL_reginput = locinput;
4675             if (minmod) {
4676                 minmod = 0;
4677                 if (ST.min && regrepeat(rex, ST.A, ST.min, depth) < ST.min)
4678                     sayNO;
4679                 ST.count = ST.min;
4680                 locinput = PL_reginput;
4681                 REGCP_SET(ST.cp);
4682                 if (ST.c1 == CHRTEST_VOID)
4683                     goto curly_try_B_min;
4684
4685                 ST.oldloc = locinput;
4686
4687                 /* set ST.maxpos to the furthest point along the
4688                  * string that could possibly match */
4689                 if  (ST.max == REG_INFTY) {
4690                     ST.maxpos = PL_regeol - 1;
4691                     if (do_utf8)
4692                         while (UTF8_IS_CONTINUATION(*(U8*)ST.maxpos))
4693                             ST.maxpos--;
4694                 }
4695                 else if (do_utf8) {
4696                     int m = ST.max - ST.min;
4697                     for (ST.maxpos = locinput;
4698                          m >0 && ST.maxpos + UTF8SKIP(ST.maxpos) <= PL_regeol; m--)
4699                         ST.maxpos += UTF8SKIP(ST.maxpos);
4700                 }
4701                 else {
4702                     ST.maxpos = locinput + ST.max - ST.min;
4703                     if (ST.maxpos >= PL_regeol)
4704                         ST.maxpos = PL_regeol - 1;
4705                 }
4706                 goto curly_try_B_min_known;
4707
4708             }
4709             else {
4710                 ST.count = regrepeat(rex, ST.A, ST.max, depth);
4711                 locinput = PL_reginput;
4712                 if (ST.count < ST.min)
4713                     sayNO;
4714                 if ((ST.count > ST.min)
4715                     && (PL_regkind[OP(ST.B)] == EOL) && (OP(ST.B) != MEOL))
4716                 {
4717                     /* A{m,n} must come at the end of the string, there's
4718                      * no point in backing off ... */
4719                     ST.min = ST.count;
4720                     /* ...except that $ and \Z can match before *and* after
4721                        newline at the end.  Consider "\n\n" =~ /\n+\Z\n/.
4722                        We may back off by one in this case. */
4723                     if (UCHARAT(PL_reginput - 1) == '\n' && OP(ST.B) != EOS)
4724                         ST.min--;
4725                 }
4726                 REGCP_SET(ST.cp);
4727                 goto curly_try_B_max;
4728             }
4729             /* NOTREACHED */
4730
4731
4732         case CURLY_B_min_known_fail:
4733             /* failed to find B in a non-greedy match where c1,c2 valid */
4734             if (ST.paren && ST.count)
4735                 PL_regoffs[ST.paren].end = -1;
4736
4737             PL_reginput = locinput;     /* Could be reset... */
4738             REGCP_UNWIND(ST.cp);
4739             /* Couldn't or didn't -- move forward. */
4740             ST.oldloc = locinput;
4741             if (do_utf8)
4742                 locinput += UTF8SKIP(locinput);
4743             else
4744                 locinput++;
4745             ST.count++;
4746           curly_try_B_min_known:
4747              /* find the next place where 'B' could work, then call B */
4748             {
4749                 int n;
4750                 if (do_utf8) {
4751                     n = (ST.oldloc == locinput) ? 0 : 1;
4752                     if (ST.c1 == ST.c2) {
4753                         STRLEN len;
4754                         /* set n to utf8_distance(oldloc, locinput) */
4755                         while (locinput <= ST.maxpos &&
4756                                utf8n_to_uvchr((U8*)locinput,
4757                                               UTF8_MAXBYTES, &len,
4758                                               uniflags) != (UV)ST.c1) {
4759                             locinput += len;
4760                             n++;
4761                         }
4762                     }
4763                     else {
4764                         /* set n to utf8_distance(oldloc, locinput) */
4765                         while (locinput <= ST.maxpos) {
4766                             STRLEN len;
4767                             const UV c = utf8n_to_uvchr((U8*)locinput,
4768                                                   UTF8_MAXBYTES, &len,
4769                                                   uniflags);
4770                             if (c == (UV)ST.c1 || c == (UV)ST.c2)
4771                                 break;
4772                             locinput += len;
4773                             n++;
4774                         }
4775                     }
4776                 }
4777                 else {
4778                     if (ST.c1 == ST.c2) {
4779                         while (locinput <= ST.maxpos &&
4780                                UCHARAT(locinput) != ST.c1)
4781                             locinput++;
4782                     }
4783                     else {
4784                         while (locinput <= ST.maxpos
4785                                && UCHARAT(locinput) != ST.c1
4786                                && UCHARAT(locinput) != ST.c2)
4787                             locinput++;
4788                     }
4789                     n = locinput - ST.oldloc;
4790                 }
4791                 if (locinput > ST.maxpos)
4792                     sayNO;
4793                 /* PL_reginput == oldloc now */
4794                 if (n) {
4795                     ST.count += n;
4796                     if (regrepeat(rex, ST.A, n, depth) < n)
4797                         sayNO;
4798                 }
4799                 PL_reginput = locinput;
4800                 CURLY_SETPAREN(ST.paren, ST.count);
4801                 if (cur_eval && cur_eval->u.eval.close_paren &&
4802                     cur_eval->u.eval.close_paren == (U32)ST.paren) {
4803                     goto fake_end;
4804                 }
4805                 PUSH_STATE_GOTO(CURLY_B_min_known, ST.B);
4806             }
4807             /* NOTREACHED */
4808
4809
4810         case CURLY_B_min_fail:
4811             /* failed to find B in a non-greedy match where c1,c2 invalid */
4812             if (ST.paren && ST.count)
4813                 PL_regoffs[ST.paren].end = -1;
4814
4815             REGCP_UNWIND(ST.cp);
4816             /* failed -- move forward one */
4817             PL_reginput = locinput;
4818             if (regrepeat(rex, ST.A, 1, depth)) {
4819                 ST.count++;
4820                 locinput = PL_reginput;
4821                 if (ST.count <= ST.max || (ST.max == REG_INFTY &&
4822                         ST.count > 0)) /* count overflow ? */
4823                 {
4824                   curly_try_B_min:
4825                     CURLY_SETPAREN(ST.paren, ST.count);
4826                     if (cur_eval && cur_eval->u.eval.close_paren &&
4827                         cur_eval->u.eval.close_paren == (U32)ST.paren) {
4828                         goto fake_end;
4829                     }
4830                     PUSH_STATE_GOTO(CURLY_B_min, ST.B);
4831                 }
4832             }
4833             sayNO;
4834             /* NOTREACHED */
4835
4836
4837         curly_try_B_max:
4838             /* a successful greedy match: now try to match B */
4839             if (cur_eval && cur_eval->u.eval.close_paren &&
4840                 cur_eval->u.eval.close_paren == (U32)ST.paren) {
4841                 goto fake_end;
4842             }
4843             {
4844                 UV c = 0;
4845                 if (ST.c1 != CHRTEST_VOID)
4846                     c = do_utf8 ? utf8n_to_uvchr((U8*)PL_reginput,
4847                                            UTF8_MAXBYTES, 0, uniflags)
4848                                 : (UV) UCHARAT(PL_reginput);
4849                 /* If it could work, try it. */
4850                 if (ST.c1 == CHRTEST_VOID || c == (UV)ST.c1 || c == (UV)ST.c2) {
4851                     CURLY_SETPAREN(ST.paren, ST.count);
4852                     PUSH_STATE_GOTO(CURLY_B_max, ST.B);
4853                     /* NOTREACHED */
4854                 }
4855             }
4856             /* FALL THROUGH */
4857         case CURLY_B_max_fail:
4858             /* failed to find B in a greedy match */
4859             if (ST.paren && ST.count)
4860                 PL_regoffs[ST.paren].end = -1;
4861
4862             REGCP_UNWIND(ST.cp);
4863             /*  back up. */
4864             if (--ST.count < ST.min)
4865                 sayNO;
4866             PL_reginput = locinput = HOPc(locinput, -1);
4867             goto curly_try_B_max;
4868
4869 #undef ST
4870
4871         case END:
4872             fake_end:
4873             if (cur_eval) {
4874                 /* we've just finished A in /(??{A})B/; now continue with B */
4875                 I32 tmpix;
4876                 st->u.eval.toggle_reg_flags
4877                             = cur_eval->u.eval.toggle_reg_flags;
4878                 PL_reg_flags ^= st->u.eval.toggle_reg_flags;
4879
4880                 st->u.eval.prev_rex = rex_sv;           /* inner */
4881                 SETREX(rex_sv,cur_eval->u.eval.prev_rex);
4882                 rex = (struct regexp *)SvANY(rex_sv);
4883                 rexi = RXi_GET(rex);
4884                 cur_curlyx = cur_eval->u.eval.prev_curlyx;
4885                 ReREFCNT_inc(rex_sv);
4886                 st->u.eval.cp = regcppush(0);   /* Save *all* the positions. */
4887                 REGCP_SET(st->u.eval.lastcp);
4888                 PL_reginput = locinput;
4889
4890                 /* Restore parens of the outer rex without popping the
4891                  * savestack */
4892                 tmpix = PL_savestack_ix;
4893                 PL_savestack_ix = cur_eval->u.eval.lastcp;
4894                 regcppop(rex);
4895                 PL_savestack_ix = tmpix;
4896
4897                 st->u.eval.prev_eval = cur_eval;
4898                 cur_eval = cur_eval->u.eval.prev_eval;
4899                 DEBUG_EXECUTE_r(
4900                     PerlIO_printf(Perl_debug_log, "%*s  EVAL trying tail ... %"UVxf"\n",
4901                                       REPORT_CODE_OFF+depth*2, "",PTR2UV(cur_eval)););
4902                 if ( nochange_depth )
4903                     nochange_depth--;
4904
4905                 PUSH_YES_STATE_GOTO(EVAL_AB,
4906                         st->u.eval.prev_eval->u.eval.B); /* match B */
4907             }
4908
4909             if (locinput < reginfo->till) {
4910                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4911                                       "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
4912                                       PL_colors[4],
4913                                       (long)(locinput - PL_reg_starttry),
4914                                       (long)(reginfo->till - PL_reg_starttry),
4915                                       PL_colors[5]));
4916
4917                 sayNO_SILENT;           /* Cannot match: too short. */
4918             }
4919             PL_reginput = locinput;     /* put where regtry can find it */
4920             sayYES;                     /* Success! */
4921
4922         case SUCCEED: /* successful SUSPEND/UNLESSM/IFMATCH/CURLYM */
4923             DEBUG_EXECUTE_r(
4924             PerlIO_printf(Perl_debug_log,
4925                 "%*s  %ssubpattern success...%s\n",
4926                 REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5]));
4927             PL_reginput = locinput;     /* put where regtry can find it */
4928             sayYES;                     /* Success! */
4929
4930 #undef  ST
4931 #define ST st->u.ifmatch
4932
4933         case SUSPEND:   /* (?>A) */
4934             ST.wanted = 1;
4935             PL_reginput = locinput;
4936             goto do_ifmatch;
4937
4938         case UNLESSM:   /* -ve lookaround: (?!A), or with flags, (?<!A) */
4939             ST.wanted = 0;
4940             goto ifmatch_trivial_fail_test;
4941
4942         case IFMATCH:   /* +ve lookaround: (?=A), or with flags, (?<=A) */
4943             ST.wanted = 1;
4944           ifmatch_trivial_fail_test:
4945             if (scan->flags) {
4946                 char * const s = HOPBACKc(locinput, scan->flags);
4947                 if (!s) {
4948                     /* trivial fail */
4949                     if (logical) {
4950                         logical = 0;
4951                         sw = 1 - (bool)ST.wanted;
4952                     }
4953                     else if (ST.wanted)
4954                         sayNO;
4955                     next = scan + ARG(scan);
4956                     if (next == scan)
4957                         next = NULL;
4958                     break;
4959                 }
4960                 PL_reginput = s;
4961             }
4962             else
4963                 PL_reginput = locinput;
4964
4965           do_ifmatch:
4966             ST.me = scan;
4967             ST.logical = logical;
4968             /* execute body of (?...A) */
4969             PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)));
4970             /* NOTREACHED */
4971
4972         case IFMATCH_A_fail: /* body of (?...A) failed */
4973             ST.wanted = !ST.wanted;
4974             /* FALL THROUGH */
4975
4976         case IFMATCH_A: /* body of (?...A) succeeded */
4977             if (ST.logical) {
4978                 sw = (bool)ST.wanted;
4979             }
4980             else if (!ST.wanted)
4981                 sayNO;
4982
4983             if (OP(ST.me) == SUSPEND)
4984                 locinput = PL_reginput;
4985             else {
4986                 locinput = PL_reginput = st->locinput;
4987                 nextchr = UCHARAT(locinput);
4988             }
4989             scan = ST.me + ARG(ST.me);
4990             if (scan == ST.me)
4991                 scan = NULL;
4992             continue; /* execute B */
4993
4994 #undef ST
4995
4996         case LONGJMP:
4997             next = scan + ARG(scan);
4998             if (next == scan)
4999                 next = NULL;
5000             break;
5001         case COMMIT:
5002             reginfo->cutpoint = PL_regeol;
5003             /* FALLTHROUGH */
5004         case PRUNE:
5005             PL_reginput = locinput;
5006             if (!scan->flags)
5007                 sv_yes_mark = sv_commit = (SV*)rexi->data->data[ ARG( scan ) ];
5008             PUSH_STATE_GOTO(COMMIT_next,next);
5009             /* NOTREACHED */
5010         case COMMIT_next_fail:
5011             no_final = 1;
5012             /* FALLTHROUGH */
5013         case OPFAIL:
5014             sayNO;
5015             /* NOTREACHED */
5016
5017 #define ST st->u.mark
5018         case MARKPOINT:
5019             ST.prev_mark = mark_state;
5020             ST.mark_name = sv_commit = sv_yes_mark
5021                 = (SV*)rexi->data->data[ ARG( scan ) ];
5022             mark_state = st;
5023             ST.mark_loc = PL_reginput = locinput;
5024             PUSH_YES_STATE_GOTO(MARKPOINT_next,next);
5025             /* NOTREACHED */
5026         case MARKPOINT_next:
5027             mark_state = ST.prev_mark;
5028             sayYES;
5029             /* NOTREACHED */
5030         case MARKPOINT_next_fail:
5031             if (popmark && sv_eq(ST.mark_name,popmark))
5032             {
5033                 if (ST.mark_loc > startpoint)
5034                     reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
5035                 popmark = NULL; /* we found our mark */
5036                 sv_commit = ST.mark_name;
5037
5038                 DEBUG_EXECUTE_r({
5039                         PerlIO_printf(Perl_debug_log,
5040                             "%*s  %ssetting cutpoint to mark:%"SVf"...%s\n",
5041                             REPORT_CODE_OFF+depth*2, "",
5042                             PL_colors[4], SVfARG(sv_commit), PL_colors[5]);
5043                 });
5044             }
5045             mark_state = ST.prev_mark;
5046             sv_yes_mark = mark_state ?
5047                 mark_state->u.mark.mark_name : NULL;
5048             sayNO;
5049             /* NOTREACHED */
5050         case SKIP:
5051             PL_reginput = locinput;
5052             if (scan->flags) {
5053                 /* (*SKIP) : if we fail we cut here*/
5054                 ST.mark_name = NULL;
5055                 ST.mark_loc = locinput;
5056                 PUSH_STATE_GOTO(SKIP_next,next);
5057             } else {
5058                 /* (*SKIP:NAME) : if there is a (*MARK:NAME) fail where it was,
5059                    otherwise do nothing.  Meaning we need to scan
5060                  */
5061                 regmatch_state *cur = mark_state;
5062                 SV *find = (SV*)rexi->data->data[ ARG( scan ) ];
5063
5064                 while (cur) {
5065                     if ( sv_eq( cur->u.mark.mark_name,
5066                                 find ) )
5067                     {
5068                         ST.mark_name = find;
5069                         PUSH_STATE_GOTO( SKIP_next, next );
5070                     }
5071                     cur = cur->u.mark.prev_mark;
5072                 }
5073             }
5074             /* Didn't find our (*MARK:NAME) so ignore this (*SKIP:NAME) */
5075             break;
5076         case SKIP_next_fail:
5077             if (ST.mark_name) {
5078                 /* (*CUT:NAME) - Set up to search for the name as we
5079                    collapse the stack*/
5080                 popmark = ST.mark_name;
5081             } else {
5082                 /* (*CUT) - No name, we cut here.*/
5083                 if (ST.mark_loc > startpoint)
5084                     reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
5085                 /* but we set sv_commit to latest mark_name if there
5086                    is one so they can test to see how things lead to this
5087                    cut */
5088                 if (mark_state)
5089                     sv_commit=mark_state->u.mark.mark_name;
5090             }
5091             no_final = 1;
5092             sayNO;
5093             /* NOTREACHED */
5094 #undef ST
5095         case FOLDCHAR:
5096             n = ARG(scan);
5097             if ( n == (U32)what_len_TRICKYFOLD(locinput,do_utf8,ln) ) {
5098                 locinput += ln;
5099             } else if ( 0xDF == n && !do_utf8 && !UTF ) {
5100                 sayNO;
5101             } else  {
5102                 U8 folded[UTF8_MAXBYTES_CASE+1];
5103                 STRLEN foldlen;
5104                 const char * const l = locinput;
5105                 char *e = PL_regeol;
5106                 to_uni_fold(n, folded, &foldlen);
5107
5108                 if (ibcmp_utf8((const char*) folded, 0,  foldlen, 1,
5109                                l, &e, 0,  do_utf8)) {
5110                         sayNO;
5111                 }
5112                 locinput = e;
5113             }
5114             nextchr = UCHARAT(locinput);
5115             break;
5116         case LNBREAK:
5117             if ((n=is_LNBREAK(locinput,do_utf8))) {
5118                 locinput += n;
5119                 nextchr = UCHARAT(locinput);
5120             } else
5121                 sayNO;
5122             break;
5123
5124 #define CASE_CLASS(nAmE)                              \
5125         case nAmE:                                    \
5126             if ((n=is_##nAmE(locinput,do_utf8))) {    \
5127                 locinput += n;                        \
5128                 nextchr = UCHARAT(locinput);          \
5129             } else                                    \
5130                 sayNO;                                \
5131             break;                                    \
5132         case N##nAmE:                                 \
5133             if ((n=is_##nAmE(locinput,do_utf8))) {    \
5134                 sayNO;                                \
5135             } else {                                  \
5136                 locinput += UTF8SKIP(locinput);       \
5137                 nextchr = UCHARAT(locinput);          \
5138             }                                         \
5139             break
5140
5141         CASE_CLASS(VERTWS);
5142         CASE_CLASS(HORIZWS);
5143 #undef CASE_CLASS
5144
5145         default:
5146             PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
5147                           PTR2UV(scan), OP(scan));
5148             Perl_croak(aTHX_ "regexp memory corruption");
5149
5150         } /* end switch */
5151
5152         /* switch break jumps here */
5153         scan = next; /* prepare to execute the next op and ... */
5154         continue;    /* ... jump back to the top, reusing st */
5155         /* NOTREACHED */
5156
5157       push_yes_state:
5158         /* push a state that backtracks on success */
5159         st->u.yes.prev_yes_state = yes_state;
5160         yes_state = st;
5161         /* FALL THROUGH */
5162       push_state:
5163         /* push a new regex state, then continue at scan  */
5164         {
5165             regmatch_state *newst;
5166
5167             DEBUG_STACK_r({
5168                 regmatch_state *cur = st;
5169                 regmatch_state *curyes = yes_state;
5170                 int curd = depth;
5171                 regmatch_slab *slab = PL_regmatch_slab;
5172                 for (;curd > -1;cur--,curd--) {
5173                     if (cur < SLAB_FIRST(slab)) {
5174                         slab = slab->prev;
5175                         cur = SLAB_LAST(slab);
5176                     }
5177                     PerlIO_printf(Perl_error_log, "%*s#%-3d %-10s %s\n",
5178                         REPORT_CODE_OFF + 2 + depth * 2,"",
5179                         curd, PL_reg_name[cur->resume_state],
5180                         (curyes == cur) ? "yes" : ""
5181                     );
5182                     if (curyes == cur)
5183                         curyes = cur->u.yes.prev_yes_state;
5184                 }
5185             } else
5186                 DEBUG_STATE_pp("push")
5187             );
5188             depth++;
5189             st->locinput = locinput;
5190             newst = st+1;
5191             if (newst >  SLAB_LAST(PL_regmatch_slab))
5192                 newst = S_push_slab(aTHX);
5193             PL_regmatch_state = newst;
5194
5195             locinput = PL_reginput;
5196             nextchr = UCHARAT(locinput);
5197             st = newst;
5198             continue;
5199             /* NOTREACHED */
5200         }
5201     }
5202
5203     /*
5204     * We get here only if there's trouble -- normally "case END" is
5205     * the terminating point.
5206     */
5207     Perl_croak(aTHX_ "corrupted regexp pointers");
5208     /*NOTREACHED*/
5209     sayNO;
5210
5211 yes:
5212     if (yes_state) {
5213         /* we have successfully completed a subexpression, but we must now
5214          * pop to the state marked by yes_state and continue from there */
5215         assert(st != yes_state);
5216 #ifdef DEBUGGING
5217         while (st != yes_state) {
5218             st--;
5219             if (st < SLAB_FIRST(PL_regmatch_slab)) {
5220                 PL_regmatch_slab = PL_regmatch_slab->prev;
5221                 st = SLAB_LAST(PL_regmatch_slab);
5222             }
5223             DEBUG_STATE_r({
5224                 if (no_final) {
5225                     DEBUG_STATE_pp("pop (no final)");
5226                 } else {
5227                     DEBUG_STATE_pp("pop (yes)");
5228                 }
5229             });
5230             depth--;
5231         }
5232 #else
5233         while (yes_state < SLAB_FIRST(PL_regmatch_slab)
5234             || yes_state > SLAB_LAST(PL_regmatch_slab))
5235         {
5236             /* not in this slab, pop slab */
5237             depth -= (st - SLAB_FIRST(PL_regmatch_slab) + 1);
5238             PL_regmatch_slab = PL_regmatch_slab->prev;
5239             st = SLAB_LAST(PL_regmatch_slab);
5240         }
5241         depth -= (st - yes_state);
5242 #endif
5243         st = yes_state;
5244         yes_state = st->u.yes.prev_yes_state;
5245         PL_regmatch_state = st;
5246
5247         if (no_final) {
5248             locinput= st->locinput;
5249             nextchr = UCHARAT(locinput);
5250         }
5251         state_num = st->resume_state + no_final;
5252         goto reenter_switch;
5253     }
5254
5255     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
5256                           PL_colors[4], PL_colors[5]));
5257
5258     if (PL_reg_eval_set) {
5259         /* each successfully executed (?{...}) block does the equivalent of
5260          *   local $^R = do {...}
5261          * When popping the save stack, all these locals would be undone;
5262          * bypass this by setting the outermost saved $^R to the latest
5263          * value */
5264         if (oreplsv != GvSV(PL_replgv))
5265             sv_setsv(oreplsv, GvSV(PL_replgv));
5266     }
5267     result = 1;
5268     goto final_exit;
5269
5270 no:
5271     DEBUG_EXECUTE_r(
5272         PerlIO_printf(Perl_debug_log,
5273             "%*s  %sfailed...%s\n",
5274             REPORT_CODE_OFF+depth*2, "",
5275             PL_colors[4], PL_colors[5])
5276         );
5277
5278 no_silent:
5279     if (no_final) {
5280         if (yes_state) {
5281             goto yes;
5282         } else {
5283             goto final_exit;
5284         }
5285     }
5286     if (depth) {
5287         /* there's a previous state to backtrack to */
5288         st--;
5289         if (st < SLAB_FIRST(PL_regmatch_slab)) {
5290             PL_regmatch_slab = PL_regmatch_slab->prev;
5291             st = SLAB_LAST(PL_regmatch_slab);
5292         }
5293         PL_regmatch_state = st;
5294         locinput= st->locinput;
5295         nextchr = UCHARAT(locinput);
5296
5297         DEBUG_STATE_pp("pop");
5298         depth--;
5299         if (yes_state == st)
5300             yes_state = st->u.yes.prev_yes_state;
5301
5302         state_num = st->resume_state + 1; /* failure = success + 1 */
5303         goto reenter_switch;
5304     }
5305     result = 0;
5306
5307   final_exit:
5308     if (rex->intflags & PREGf_VERBARG_SEEN) {
5309         SV *sv_err = get_sv("REGERROR", 1);
5310         SV *sv_mrk = get_sv("REGMARK", 1);
5311         if (result) {
5312             sv_commit = &PL_sv_no;
5313             if (!sv_yes_mark)
5314                 sv_yes_mark = &PL_sv_yes;
5315         } else {
5316             if (!sv_commit)
5317                 sv_commit = &PL_sv_yes;
5318             sv_yes_mark = &PL_sv_no;
5319         }
5320         sv_setsv(sv_err, sv_commit);
5321         sv_setsv(sv_mrk, sv_yes_mark);
5322     }
5323
5324     /* clean up; in particular, free all slabs above current one */
5325     LEAVE_SCOPE(oldsave);
5326
5327     return result;
5328 }
5329
5330 /*
5331  - regrepeat - repeatedly match something simple, report how many
5332  */
5333 /*
5334  * [This routine now assumes that it will only match on things of length 1.
5335  * That was true before, but now we assume scan - reginput is the count,
5336  * rather than incrementing count on every character.  [Er, except utf8.]]
5337  */
5338 STATIC I32
5339 S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
5340 {
5341     dVAR;
5342     register char *scan;
5343     register I32 c;
5344     register char *loceol = PL_regeol;
5345     register I32 hardcount = 0;
5346     register bool do_utf8 = PL_reg_match_utf8;
5347 #ifndef DEBUGGING
5348     PERL_UNUSED_ARG(depth);
5349 #endif
5350
5351     scan = PL_reginput;
5352     if (max == REG_INFTY)
5353         max = I32_MAX;
5354     else if (max < loceol - scan)
5355         loceol = scan + max;
5356     switch (OP(p)) {
5357     case REG_ANY:
5358         if (do_utf8) {
5359             loceol = PL_regeol;
5360             while (scan < loceol && hardcount < max && *scan != '\n') {
5361                 scan += UTF8SKIP(scan);
5362                 hardcount++;
5363             }
5364         } else {
5365             while (scan < loceol && *scan != '\n')
5366                 scan++;
5367         }
5368         break;
5369     case SANY:
5370         if (do_utf8) {
5371             loceol = PL_regeol;
5372             while (scan < loceol && hardcount < max) {
5373                 scan += UTF8SKIP(scan);
5374                 hardcount++;
5375             }
5376         }
5377         else
5378             scan = loceol;
5379         break;
5380     case CANY:
5381         scan = loceol;
5382         break;
5383     case EXACT:         /* length of string is 1 */
5384         c = (U8)*STRING(p);
5385         while (scan < loceol && UCHARAT(scan) == c)
5386             scan++;
5387         break;
5388     case EXACTF:        /* length of string is 1 */
5389         c = (U8)*STRING(p);
5390         while (scan < loceol &&
5391                (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
5392             scan++;
5393         break;
5394     case EXACTFL:       /* length of string is 1 */
5395         PL_reg_flags |= RF_tainted;
5396         c = (U8)*STRING(p);
5397         while (scan < loceol &&
5398                (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
5399             scan++;
5400         break;
5401     case ANYOF:
5402         if (do_utf8) {
5403             loceol = PL_regeol;
5404             while (hardcount < max && scan < loceol &&
5405                    reginclass(prog, p, (U8*)scan, 0, do_utf8)) {
5406                 scan += UTF8SKIP(scan);
5407                 hardcount++;
5408             }
5409         } else {
5410             while (scan < loceol && REGINCLASS(prog, p, (U8*)scan))
5411                 scan++;
5412         }
5413         break;
5414     case ALNUM:
5415         if (do_utf8) {
5416             loceol = PL_regeol;
5417             LOAD_UTF8_CHARCLASS_ALNUM();
5418             while (hardcount < max && scan < loceol &&
5419                    swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
5420                 scan += UTF8SKIP(scan);
5421                 hardcount++;
5422             }
5423         } else {
5424             while (scan < loceol && isALNUM(*scan))
5425                 scan++;
5426         }
5427         break;
5428     case ALNUML:
5429         PL_reg_flags |= RF_tainted;
5430         if (do_utf8) {
5431             loceol = PL_regeol;
5432             while (hardcount < max && scan < loceol &&
5433                    isALNUM_LC_utf8((U8*)scan)) {
5434                 scan += UTF8SKIP(scan);
5435                 hardcount++;
5436             }
5437         } else {
5438             while (scan < loceol && isALNUM_LC(*scan))
5439                 scan++;
5440         }
5441         break;
5442     case NALNUM:
5443         if (do_utf8) {
5444             loceol = PL_regeol;
5445             LOAD_UTF8_CHARCLASS_ALNUM();
5446             while (hardcount < max && scan < loceol &&
5447                    !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
5448                 scan += UTF8SKIP(scan);
5449                 hardcount++;
5450             }
5451         } else {
5452             while (scan < loceol && !isALNUM(*scan))
5453                 scan++;
5454         }
5455         break;
5456     case NALNUML:
5457         PL_reg_flags |= RF_tainted;
5458         if (do_utf8) {
5459             loceol = PL_regeol;
5460             while (hardcount < max && scan < loceol &&
5461                    !isALNUM_LC_utf8((U8*)scan)) {
5462                 scan += UTF8SKIP(scan);
5463                 hardcount++;
5464             }
5465         } else {
5466             while (scan < loceol && !isALNUM_LC(*scan))
5467                 scan++;
5468         }
5469         break;
5470     case SPACE:
5471         if (do_utf8) {
5472             loceol = PL_regeol;
5473             LOAD_UTF8_CHARCLASS_SPACE();
5474             while (hardcount < max && scan < loceol &&
5475                    (*scan == ' ' ||
5476                     swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
5477                 scan += UTF8SKIP(scan);
5478                 hardcount++;
5479             }
5480         } else {
5481             while (scan < loceol && isSPACE(*scan))
5482                 scan++;
5483         }
5484         break;
5485     case SPACEL:
5486         PL_reg_flags |= RF_tainted;
5487         if (do_utf8) {
5488             loceol = PL_regeol;
5489             while (hardcount < max && scan < loceol &&
5490                    (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
5491                 scan += UTF8SKIP(scan);
5492                 hardcount++;
5493             }
5494         } else {
5495             while (scan < loceol && isSPACE_LC(*scan))
5496                 scan++;
5497         }
5498         break;
5499     case NSPACE:
5500         if (do_utf8) {
5501             loceol = PL_regeol;
5502             LOAD_UTF8_CHARCLASS_SPACE();
5503             while (hardcount < max && scan < loceol &&
5504                    !(*scan == ' ' ||
5505                      swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
5506                 scan += UTF8SKIP(scan);
5507                 hardcount++;
5508             }
5509         } else {
5510             while (scan < loceol && !isSPACE(*scan))
5511                 scan++;
5512         }
5513         break;
5514     case NSPACEL:
5515         PL_reg_flags |= RF_tainted;
5516         if (do_utf8) {
5517             loceol = PL_regeol;
5518             while (hardcount < max && scan < loceol &&
5519                    !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
5520                 scan += UTF8SKIP(scan);
5521                 hardcount++;
5522             }
5523         } else {
5524             while (scan < loceol && !isSPACE_LC(*scan))
5525                 scan++;
5526         }
5527         break;
5528     case DIGIT:
5529         if (do_utf8) {
5530             loceol = PL_regeol;
5531             LOAD_UTF8_CHARCLASS_DIGIT();
5532             while (hardcount < max && scan < loceol &&
5533                    swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
5534                 scan += UTF8SKIP(scan);
5535                 hardcount++;
5536             }
5537         } else {
5538             while (scan < loceol && isDIGIT(*scan))
5539                 scan++;
5540         }
5541         break;
5542     case NDIGIT:
5543         if (do_utf8) {
5544             loceol = PL_regeol;
5545             LOAD_UTF8_CHARCLASS_DIGIT();
5546             while (hardcount < max && scan < loceol &&
5547                    !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
5548                 scan += UTF8SKIP(scan);
5549                 hardcount++;
5550             }
5551         } else {
5552             while (scan < loceol && !isDIGIT(*scan))
5553                 scan++;
5554         }
5555     case LNBREAK:
5556         if (do_utf8) {
5557             loceol = PL_regeol;
5558             while (hardcount < max && scan < loceol && (c=is_LNBREAK_utf8(scan))) {
5559                 scan += c;
5560                 hardcount++;
5561             }
5562         } else {
5563             /*
5564               LNBREAK can match two latin chars, which is ok,
5565               because we have a null terminated string, but we
5566               have to use hardcount in this situation
5567             */
5568             while (scan < loceol && (c=is_LNBREAK_latin1(scan)))  {
5569                 scan+=c;
5570                 hardcount++;
5571             }
5572         }
5573         break;
5574     case HORIZWS:
5575         if (do_utf8) {
5576             loceol = PL_regeol;
5577             while (hardcount < max && scan < loceol && (c=is_HORIZWS_utf8(scan))) {
5578                 scan += c;
5579                 hardcount++;
5580             }
5581         } else {
5582             while (scan < loceol && is_HORIZWS_latin1(scan))
5583                 scan++;
5584         }
5585         break;
5586     case NHORIZWS:
5587         if (do_utf8) {
5588             loceol = PL_regeol;
5589             while (hardcount < max && scan < loceol && !is_HORIZWS_utf8(scan)) {
5590                 scan += UTF8SKIP(scan);
5591                 hardcount++;
5592             }
5593         } else {
5594             while (scan < loceol && !is_HORIZWS_latin1(scan))
5595                 scan++;
5596
5597         }
5598         break;
5599     case VERTWS:
5600         if (do_utf8) {
5601             loceol = PL_regeol;
5602             while (hardcount < max && scan < loceol && (c=is_VERTWS_utf8(scan))) {
5603                 scan += c;
5604                 hardcount++;
5605             }
5606         } else {
5607             while (scan < loceol && is_VERTWS_latin1(scan))
5608                 scan++;
5609
5610         }
5611         break;
5612     case NVERTWS:
5613         if (do_utf8) {
5614             loceol = PL_regeol;
5615             while (hardcount < max && scan < loceol && !is_VERTWS_utf8(scan)) {
5616                 scan += UTF8SKIP(scan);
5617                 hardcount++;
5618             }
5619         } else {
5620             while (scan < loceol && !is_VERTWS_latin1(scan))
5621                 scan++;
5622
5623         }
5624         break;
5625
5626     default:            /* Called on something of 0 width. */
5627         break;          /* So match right here or not at all. */
5628     }
5629
5630     if (hardcount)
5631         c = hardcount;
5632     else
5633         c = scan - PL_reginput;
5634     PL_reginput = scan;
5635
5636     DEBUG_r({
5637         GET_RE_DEBUG_FLAGS_DECL;
5638         DEBUG_EXECUTE_r({
5639             SV * const prop = sv_newmortal();
5640             regprop(prog, prop, p);
5641             PerlIO_printf(Perl_debug_log,
5642                         "%*s  %s can match %"IVdf" times out of %"IVdf"...\n",
5643                         REPORT_CODE_OFF + depth*2, "", SvPVX_const(prop),(IV)c,(IV)max);
5644         });
5645     });
5646
5647     return(c);
5648 }
5649
5650
5651 #if !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION)
5652 /*
5653 - regclass_swash - prepare the utf8 swash
5654 */
5655
5656 SV *
5657 Perl_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bool doinit, SV** listsvp, SV **altsvp)
5658 {
5659     dVAR;
5660     SV *sw  = NULL;
5661     SV *si  = NULL;
5662     SV *alt = NULL;
5663     RXi_GET_DECL(prog,progi);
5664     const struct reg_data * const data = prog ? progi->data : NULL;
5665
5666     if (data && data->count) {
5667         const U32 n = ARG(node);
5668
5669         if (data->what[n] == 's') {
5670             SV * const rv = (SV*)data->data[n];
5671             AV * const av = (AV*)SvRV((SV*)rv);
5672             SV **const ary = AvARRAY(av);
5673             SV **a, **b;
5674
5675             /* See the end of regcomp.c:S_regclass() for
5676              * documentation of these array elements. */
5677
5678             si = *ary;
5679             a  = SvROK(ary[1]) ? &ary[1] : NULL;
5680             b  = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : NULL;
5681
5682             if (a)
5683                 sw = *a;
5684             else if (si && doinit) {
5685                 sw = swash_init("utf8", "", si, 1, 0);
5686                 (void)av_store(av, 1, sw);
5687             }
5688             if (b)
5689                 alt = *b;
5690         }
5691     }
5692
5693     if (listsvp)
5694         *listsvp = si;
5695     if (altsvp)
5696         *altsvp  = alt;
5697
5698     return sw;
5699 }
5700 #endif
5701
5702 /*
5703  - reginclass - determine if a character falls into a character class
5704
5705   The n is the ANYOF regnode, the p is the target string, lenp
5706   is pointer to the maximum length of how far to go in the p
5707   (if the lenp is zero, UTF8SKIP(p) is used),
5708   do_utf8 tells whether the target string is in UTF-8.
5709
5710  */
5711
5712 STATIC bool
5713 S_reginclass(pTHX_ const regexp *prog, register const regnode *n, register const U8* p, STRLEN* lenp, register bool do_utf8)
5714 {
5715     dVAR;
5716     const char flags = ANYOF_FLAGS(n);
5717     bool match = FALSE;
5718     UV c = *p;
5719     STRLEN len = 0;
5720     STRLEN plen;
5721
5722     if (do_utf8 && !UTF8_IS_INVARIANT(c)) {
5723         c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &len,
5724                 (UTF8_ALLOW_DEFAULT & UTF8_ALLOW_ANYUV) | UTF8_CHECK_ONLY);
5725                 /* see [perl #37836] for UTF8_ALLOW_ANYUV */
5726         if (len == (STRLEN)-1)
5727             Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
5728     }
5729
5730     plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
5731     if (do_utf8 || (flags & ANYOF_UNICODE)) {
5732         if (lenp)
5733             *lenp = 0;
5734         if (do_utf8 && !ANYOF_RUNTIME(n)) {
5735             if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
5736                 match = TRUE;
5737         }
5738         if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
5739             match = TRUE;
5740         if (!match) {
5741             AV *av;
5742             SV * const sw = regclass_swash(prog, n, TRUE, 0, (SV**)&av);
5743
5744             if (sw) {
5745                 if (swash_fetch(sw, p, do_utf8))
5746                     match = TRUE;
5747                 else if (flags & ANYOF_FOLD) {
5748                     if (!match && lenp && av) {
5749                         I32 i;
5750                         for (i = 0; i <= av_len(av); i++) {
5751                             SV* const sv = *av_fetch(av, i, FALSE);
5752                             STRLEN len;
5753                             const char * const s = SvPV_const(sv, len);
5754
5755                             if (len <= plen && memEQ(s, (char*)p, len)) {
5756                                 *lenp = len;
5757                                 match = TRUE;
5758                                 break;
5759                             }
5760                         }
5761                     }
5762                     if (!match) {
5763                         U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
5764                         STRLEN tmplen;
5765
5766                         to_utf8_fold(p, tmpbuf, &tmplen);
5767                         if (swash_fetch(sw, tmpbuf, do_utf8))
5768                             match = TRUE;
5769                     }
5770                 }
5771             }
5772         }
5773         if (match && lenp && *lenp == 0)
5774             *lenp = UNISKIP(NATIVE_TO_UNI(c));
5775     }
5776     if (!match && c < 256) {
5777         if (ANYOF_BITMAP_TEST(n, c))
5778             match = TRUE;
5779         else if (flags & ANYOF_FOLD) {
5780             U8 f;
5781
5782             if (flags & ANYOF_LOCALE) {
5783                 PL_reg_flags |= RF_tainted;
5784                 f = PL_fold_locale[c];
5785             }
5786             else
5787                 f = PL_fold[c];
5788             if (f != c && ANYOF_BITMAP_TEST(n, f))
5789                 match = TRUE;
5790         }
5791
5792         if (!match && (flags & ANYOF_CLASS)) {
5793             PL_reg_flags |= RF_tainted;
5794             if (
5795                 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM)   &&  isALNUM_LC(c))  ||
5796                 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM)  && !isALNUM_LC(c))  ||
5797                 (ANYOF_CLASS_TEST(n, ANYOF_SPACE)   &&  isSPACE_LC(c))  ||
5798                 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE)  && !isSPACE_LC(c))  ||
5799                 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT)   &&  isDIGIT_LC(c))  ||
5800                 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT)  && !isDIGIT_LC(c))  ||
5801                 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC)  &&  isALNUMC_LC(c)) ||
5802                 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
5803                 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA)   &&  isALPHA_LC(c))  ||
5804                 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA)  && !isALPHA_LC(c))  ||
5805                 (ANYOF_CLASS_TEST(n, ANYOF_ASCII)   &&  isASCII(c))     ||
5806                 (ANYOF_CLASS_TEST(n, ANYOF_NASCII)  && !isASCII(c))     ||
5807                 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL)   &&  isCNTRL_LC(c))  ||
5808                 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL)  && !isCNTRL_LC(c))  ||
5809                 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH)   &&  isGRAPH_LC(c))  ||
5810                 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH)  && !isGRAPH_LC(c))  ||
5811                 (ANYOF_CLASS_TEST(n, ANYOF_LOWER)   &&  isLOWER_LC(c))  ||
5812                 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER)  && !isLOWER_LC(c))  ||
5813                 (ANYOF_CLASS_TEST(n, ANYOF_PRINT)   &&  isPRINT_LC(c))  ||
5814                 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT)  && !isPRINT_LC(c))  ||
5815                 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT)   &&  isPUNCT_LC(c))  ||
5816                 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT)  && !isPUNCT_LC(c))  ||
5817                 (ANYOF_CLASS_TEST(n, ANYOF_UPPER)   &&  isUPPER_LC(c))  ||
5818                 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER)  && !isUPPER_LC(c))  ||
5819                 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT)  &&  isXDIGIT(c))    ||
5820                 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c))    ||
5821                 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC)  &&  isPSXSPC(c))    ||
5822                 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c))    ||
5823                 (ANYOF_CLASS_TEST(n, ANYOF_BLANK)   &&  isBLANK(c))     ||
5824                 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK)  && !isBLANK(c))
5825                 ) /* How's that for a conditional? */
5826             {
5827                 match = TRUE;
5828             }
5829         }
5830     }
5831
5832     return (flags & ANYOF_INVERT) ? !match : match;
5833 }
5834
5835 STATIC U8 *
5836 S_reghop3(U8 *s, I32 off, const U8* lim)
5837 {
5838     dVAR;
5839     if (off >= 0) {
5840         while (off-- && s < lim) {
5841             /* XXX could check well-formedness here */
5842             s += UTF8SKIP(s);
5843         }
5844     }
5845     else {
5846         while (off++ && s > lim) {
5847             s--;
5848             if (UTF8_IS_CONTINUED(*s)) {
5849                 while (s > lim && UTF8_IS_CONTINUATION(*s))
5850                     s--;
5851             }
5852             /* XXX could check well-formedness here */
5853         }
5854     }
5855     return s;
5856 }
5857
5858 #ifdef XXX_dmq
5859 /* there are a bunch of places where we use two reghop3's that should
5860    be replaced with this routine. but since thats not done yet
5861    we ifdef it out - dmq
5862 */
5863 STATIC U8 *
5864 S_reghop4(U8 *s, I32 off, const U8* llim, const U8* rlim)
5865 {
5866     dVAR;
5867     if (off >= 0) {
5868         while (off-- && s < rlim) {
5869             /* XXX could check well-formedness here */
5870             s += UTF8SKIP(s);
5871         }
5872     }
5873     else {
5874         while (off++ && s > llim) {
5875             s--;
5876             if (UTF8_IS_CONTINUED(*s)) {
5877                 while (s > llim && UTF8_IS_CONTINUATION(*s))
5878                     s--;
5879             }
5880             /* XXX could check well-formedness here */
5881         }
5882     }
5883     return s;
5884 }
5885 #endif
5886
5887 STATIC U8 *
5888 S_reghopmaybe3(U8* s, I32 off, const U8* lim)
5889 {
5890     dVAR;
5891     if (off >= 0) {
5892         while (off-- && s < lim) {
5893             /* XXX could check well-formedness here */
5894             s += UTF8SKIP(s);
5895         }
5896         if (off >= 0)
5897             return NULL;
5898     }
5899     else {
5900         while (off++ && s > lim) {
5901             s--;
5902             if (UTF8_IS_CONTINUED(*s)) {
5903                 while (s > lim && UTF8_IS_CONTINUATION(*s))
5904                     s--;
5905             }
5906             /* XXX could check well-formedness here */
5907         }
5908         if (off <= 0)
5909             return NULL;
5910     }
5911     return s;
5912 }
5913
5914 static void
5915 restore_pos(pTHX_ void *arg)
5916 {
5917     dVAR;
5918     regexp * const rex = (regexp *)arg;
5919     if (PL_reg_eval_set) {
5920         if (PL_reg_oldsaved) {
5921             rex->subbeg = PL_reg_oldsaved;
5922             rex->sublen = PL_reg_oldsavedlen;
5923 #ifdef PERL_OLD_COPY_ON_WRITE
5924             rex->saved_copy = PL_nrs;
5925 #endif
5926             RXp_MATCH_COPIED_on(rex);
5927         }
5928         PL_reg_magic->mg_len = PL_reg_oldpos;
5929         PL_reg_eval_set = 0;
5930         PL_curpm = PL_reg_oldcurpm;
5931     }
5932 }
5933
5934 STATIC void
5935 S_to_utf8_substr(pTHX_ register regexp *prog)
5936 {
5937     int i = 1;
5938     do {
5939         if (prog->substrs->data[i].substr
5940             && !prog->substrs->data[i].utf8_substr) {
5941             SV* const sv = newSVsv(prog->substrs->data[i].substr);
5942             prog->substrs->data[i].utf8_substr = sv;
5943             sv_utf8_upgrade(sv);
5944             if (SvVALID(prog->substrs->data[i].substr)) {
5945                 const U8 flags = BmFLAGS(prog->substrs->data[i].substr);
5946                 if (flags & FBMcf_TAIL) {
5947                     /* Trim the trailing \n that fbm_compile added last
5948                        time.  */
5949                     SvCUR_set(sv, SvCUR(sv) - 1);
5950                     /* Whilst this makes the SV technically "invalid" (as its
5951                        buffer is no longer followed by "\0") when fbm_compile()
5952                        adds the "\n" back, a "\0" is restored.  */
5953                 }
5954                 fbm_compile(sv, flags);
5955             }
5956             if (prog->substrs->data[i].substr == prog->check_substr)
5957                 prog->check_utf8 = sv;
5958         }
5959     } while (i--);
5960 }
5961
5962 STATIC void
5963 S_to_byte_substr(pTHX_ register regexp *prog)
5964 {
5965     dVAR;
5966     int i = 1;
5967     do {
5968         if (prog->substrs->data[i].utf8_substr
5969             && !prog->substrs->data[i].substr) {
5970             SV* sv = newSVsv(prog->substrs->data[i].utf8_substr);
5971             if (sv_utf8_downgrade(sv, TRUE)) {
5972                 if (SvVALID(prog->substrs->data[i].utf8_substr)) {
5973                     const U8 flags
5974                         = BmFLAGS(prog->substrs->data[i].utf8_substr);
5975                     if (flags & FBMcf_TAIL) {
5976                         /* Trim the trailing \n that fbm_compile added last
5977                            time.  */
5978                         SvCUR_set(sv, SvCUR(sv) - 1);
5979                     }
5980                     fbm_compile(sv, flags);
5981                 }
5982             } else {
5983                 SvREFCNT_dec(sv);
5984                 sv = &PL_sv_undef;
5985             }
5986             prog->substrs->data[i].substr = sv;
5987             if (prog->substrs->data[i].utf8_substr == prog->check_utf8)
5988                 prog->check_substr = sv;
5989         }
5990     } while (i--);
5991 }
5992
5993 /*
5994  * Local variables:
5995  * c-indentation-style: bsd
5996  * c-basic-offset: 4
5997  * indent-tabs-mode: t
5998  * End:
5999  *
6000  * ex: set ts=8 sts=4 sw=4 noet:
6001  */