regexec.c

   1 /*    regexec.c
   2  */
   3
   4 /*
   5  * "One Ring to rule them all, One Ring to find them..."
   6  */
   7
   8 /* This file contains functions for executing a regular expression.  See
   9  * also regcomp.c which funnily enough, contains functions for compiling
  10  * a regular expression.
  11  *
  12  * This file is also copied at build time to ext/re/re_exec.c, where
  13  * it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
  14  * This causes the main functions to be compiled under new names and with
  15  * debugging support added, which makes "use re 'debug'" work.
  16  */
  17
  18 /* NOTE: this is derived from Henry Spencer's regexp code, and should not
  19  * confused with the original package (see point 3 below).  Thanks, Henry!
  20  */
  21
  22 /* Additional note: this code is very heavily munged from Henry's version
  23  * in places.  In some spots I've traded clarity for efficiency, so don't
  24  * blame Henry for some of the lack of readability.
  25  */
  26
  27 /* The names of the functions have been changed from regcomp and
  28  * regexec to  pregcomp and pregexec in order to avoid conflicts
  29  * with the POSIX routines of the same names.
  30 */
  31
  32 #ifdef PERL_EXT_RE_BUILD
  33 #include "re_top.h"
  34 #endif
  35
  36 /*
  37  * pregcomp and pregexec -- regsub and regerror are not used in perl
  38  *
  39  *      Copyright (c) 1986 by University of Toronto.
  40  *      Written by Henry Spencer.  Not derived from licensed software.
  41  *
  42  *      Permission is granted to anyone to use this software for any
  43  *      purpose on any computer system, and to redistribute it freely,
  44  *      subject to the following restrictions:
  45  *
  46  *      1. The author is not responsible for the consequences of use of
  47  *              this software, no matter how awful, even if they arise
  48  *              from defects in it.
  49  *
  50  *      2. The origin of this software must not be misrepresented, either
  51  *              by explicit claim or by omission.
  52  *
  53  *      3. Altered versions must be plainly marked as such, and must not
  54  *              be misrepresented as being the original software.
  55  *
  56  ****    Alterations to Henry's code are...
  57  ****
  58  ****    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
  59  ****    2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 by Larry Wall and others
  60  ****
  61  ****    You may distribute under the terms of either the GNU General Public
  62  ****    License or the Artistic License, as specified in the README file.
  63  *
  64  * Beware that some of this code is subtly aware of the way operator
  65  * precedence is structured in regular expressions.  Serious changes in
  66  * regular-expression syntax might require a total rethink.
  67  */
  68 #include "EXTERN.h"
  69 #define PERL_IN_REGEXEC_C
  70 #include "perl.h"
  71
  72 #ifdef PERL_IN_XSUB_RE
  73 #  include "re_comp.h"
  74 #else
  75 #  include "regcomp.h"
  76 #endif
  77
  78 #define RF_tainted      1               /* tainted information used? */
  79 #define RF_warned       2               /* warned about big count? */
  80
  81 #define RF_utf8         8               /* Pattern contains multibyte chars? */
  82
  83 #define UTF ((PL_reg_flags & RF_utf8) != 0)
  84
  85 #define RS_init         1               /* eval environment created */
  86 #define RS_set          2               /* replsv value is set */
  87
  88 #ifndef STATIC
  89 #define STATIC  static
  90 #endif
  91
  92 #define REGINCLASS(prog,p,c)  (ANYOF_FLAGS(p) ? reginclass(prog,p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
  93
  94 /*
  95  * Forwards.
  96  */
  97
  98 #define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
  99 #define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
 100
 101 #define HOPc(pos,off) \
 102         (char *)(PL_reg_match_utf8 \
 103             ? reghop3((U8*)pos, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr)) \
 104             : (U8*)(pos + off))
 105 #define HOPBACKc(pos, off) \
 106         (char*)(PL_reg_match_utf8\
 107             ? reghopmaybe3((U8*)pos, -off, (U8*)PL_bostr) \
 108             : (pos - off >= PL_bostr)           \
 109                 ? (U8*)pos - off                \
 110                 : NULL)
 111
 112 #define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)(pos), off, (U8*)(lim)) : (U8*)(pos + off))
 113 #define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
 114
 115 #define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
 116     if (!CAT2(PL_utf8_,class)) { bool ok; ENTER; save_re_context(); ok=CAT2(is_utf8_,class)((const U8*)str); assert(ok); LEAVE; } } STMT_END
 117 #define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS(alnum,"a")
 118 #define LOAD_UTF8_CHARCLASS_DIGIT() LOAD_UTF8_CHARCLASS(digit,"0")
 119 #define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
 120 #define LOAD_UTF8_CHARCLASS_MARK()  LOAD_UTF8_CHARCLASS(mark, "\xcd\x86")
 121
 122 /* TODO: Combine JUMPABLE and HAS_TEXT to cache OP(rn) */
 123
 124 /* for use after a quantifier and before an EXACT-like node -- japhy */
 125 /* it would be nice to rework regcomp.sym to generate this stuff. sigh */
 126 #define JUMPABLE(rn) (      \
 127     OP(rn) == OPEN ||       \
 128     (OP(rn) == CLOSE && (!cur_eval || cur_eval->u.eval.close_paren != ARG(rn))) || \
 129     OP(rn) == EVAL ||   \
 130     OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
 131     OP(rn) == PLUS || OP(rn) == MINMOD || \
 132     OP(rn) == KEEPS || (PL_regkind[OP(rn)] == VERB) || \
 133     (PL_regkind[OP(rn)] == CURLY && ARG1(rn) > 0) \
 134 )
 135 #define IS_EXACT(rn) (PL_regkind[OP(rn)] == EXACT)
 136
 137 #define HAS_TEXT(rn) ( IS_EXACT(rn) || PL_regkind[OP(rn)] == REF )
 138
 139 #if 0
 140 /* Currently these are only used when PL_regkind[OP(rn)] == EXACT so
 141    we don't need this definition. */
 142 #define IS_TEXT(rn)   ( OP(rn)==EXACT   || OP(rn)==REF   || OP(rn)==NREF   )
 143 #define IS_TEXTF(rn)  ( OP(rn)==EXACTF  || OP(rn)==REFF  || OP(rn)==NREFF  )
 144 #define IS_TEXTFL(rn) ( OP(rn)==EXACTFL || OP(rn)==REFFL || OP(rn)==NREFFL )
 145
 146 #else
 147 /* ... so we use this as its faster. */
 148 #define IS_TEXT(rn)   ( OP(rn)==EXACT   )
 149 #define IS_TEXTF(rn)  ( OP(rn)==EXACTF  )
 150 #define IS_TEXTFL(rn) ( OP(rn)==EXACTFL )
 151
 152 #endif
 153
 154 /*
 155   Search for mandatory following text node; for lookahead, the text must
 156   follow but for lookbehind (rn->flags != 0) we skip to the next step.
 157 */
 158 #define FIND_NEXT_IMPT(rn) STMT_START { \
 159     while (JUMPABLE(rn)) { \
 160         const OPCODE type = OP(rn); \
 161         if (type == SUSPEND || PL_regkind[type] == CURLY) \
 162             rn = NEXTOPER(NEXTOPER(rn)); \
 163         else if (type == PLUS) \
 164             rn = NEXTOPER(rn); \
 165         else if (type == IFMATCH) \
 166             rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
 167         else rn += NEXT_OFF(rn); \
 168     } \
 169 } STMT_END
 170
 171
 172 static void restore_pos(pTHX_ void *arg);
 173
 174 STATIC CHECKPOINT
 175 S_regcppush(pTHX_ I32 parenfloor)
 176 {
 177     dVAR;
 178     const int retval = PL_savestack_ix;
 179 #define REGCP_PAREN_ELEMS 4
 180     const int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
 181     int p;
 182     GET_RE_DEBUG_FLAGS_DECL;
 183
 184     if (paren_elems_to_push < 0)
 185         Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
 186
 187 #define REGCP_OTHER_ELEMS 7
 188     SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
 189
 190     for (p = PL_regsize; p > parenfloor; p--) {
 191 /* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
 192         SSPUSHINT(PL_regoffs[p].end);
 193         SSPUSHINT(PL_regoffs[p].start);
 194         SSPUSHPTR(PL_reg_start_tmp[p]);
 195         SSPUSHINT(p);
 196         DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log,
 197           "     saving \\%"UVuf" %"IVdf"(%"IVdf")..%"IVdf"\n",
 198                       (UV)p, (IV)PL_regoffs[p].start,
 199                       (IV)(PL_reg_start_tmp[p] - PL_bostr),
 200                       (IV)PL_regoffs[p].end
 201         ));
 202     }
 203 /* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
 204     SSPUSHPTR(PL_regoffs);
 205     SSPUSHINT(PL_regsize);
 206     SSPUSHINT(*PL_reglastparen);
 207     SSPUSHINT(*PL_reglastcloseparen);
 208     SSPUSHPTR(PL_reginput);
 209 #define REGCP_FRAME_ELEMS 2
 210 /* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
 211  * are needed for the regexp context stack bookkeeping. */
 212     SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
 213     SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
 214
 215     return retval;
 216 }
 217
 218 /* These are needed since we do not localize EVAL nodes: */
 219 #define REGCP_SET(cp)                                           \
 220     DEBUG_STATE_r(                                              \
 221             PerlIO_printf(Perl_debug_log,                       \
 222                 "  Setting an EVAL scope, savestack=%"IVdf"\n", \
 223                 (IV)PL_savestack_ix));                          \
 224     cp = PL_savestack_ix
 225
 226 #define REGCP_UNWIND(cp)                                        \
 227     DEBUG_STATE_r(                                              \
 228         if (cp != PL_savestack_ix)                              \
 229             PerlIO_printf(Perl_debug_log,                       \
 230                 "  Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
 231                 (IV)(cp), (IV)PL_savestack_ix));                \
 232     regcpblow(cp)
 233
 234 STATIC char *
 235 S_regcppop(pTHX_ const regexp *rex)
 236 {
 237     dVAR;
 238     U32 i;
 239     char *input;
 240
 241     GET_RE_DEBUG_FLAGS_DECL;
 242
 243     /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
 244     i = SSPOPINT;
 245     assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
 246     i = SSPOPINT; /* Parentheses elements to pop. */
 247     input = (char *) SSPOPPTR;
 248     *PL_reglastcloseparen = SSPOPINT;
 249     *PL_reglastparen = SSPOPINT;
 250     PL_regsize = SSPOPINT;
 251     PL_regoffs=(regexp_paren_pair *) SSPOPPTR;
 252
 253
 254     /* Now restore the parentheses context. */
 255     for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
 256          i > 0; i -= REGCP_PAREN_ELEMS) {
 257         I32 tmps;
 258         U32 paren = (U32)SSPOPINT;
 259         PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
 260         PL_regoffs[paren].start = SSPOPINT;
 261         tmps = SSPOPINT;
 262         if (paren <= *PL_reglastparen)
 263             PL_regoffs[paren].end = tmps;
 264         DEBUG_BUFFERS_r(
 265             PerlIO_printf(Perl_debug_log,
 266                           "     restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
 267                           (UV)paren, (IV)PL_regoffs[paren].start,
 268                           (IV)(PL_reg_start_tmp[paren] - PL_bostr),
 269                           (IV)PL_regoffs[paren].end,
 270                           (paren > *PL_reglastparen ? "(no)" : ""));
 271         );
 272     }
 273     DEBUG_BUFFERS_r(
 274         if (*PL_reglastparen + 1 <= rex->nparens) {
 275             PerlIO_printf(Perl_debug_log,
 276                           "     restoring \\%"IVdf"..\\%"IVdf" to undef\n",
 277                           (IV)(*PL_reglastparen + 1), (IV)rex->nparens);
 278         }
 279     );
 280 #if 1
 281     /* It would seem that the similar code in regtry()
 282      * already takes care of this, and in fact it is in
 283      * a better location to since this code can #if 0-ed out
 284      * but the code in regtry() is needed or otherwise tests
 285      * requiring null fields (pat.t#187 and split.t#{13,14}
 286      * (as of patchlevel 7877)  will fail.  Then again,
 287      * this code seems to be necessary or otherwise
 288      * this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
 289      * --jhi updated by dapm */
 290     for (i = *PL_reglastparen + 1; i <= rex->nparens; i++) {
 291         if (i > PL_regsize)
 292             PL_regoffs[i].start = -1;
 293         PL_regoffs[i].end = -1;
 294     }
 295 #endif
 296     return input;
 297 }
 298
 299 #define regcpblow(cp) LEAVE_SCOPE(cp)   /* Ignores regcppush()ed data. */
 300
 301 /*
 302  * pregexec and friends
 303  */
 304
 305 #ifndef PERL_IN_XSUB_RE
 306 /*
 307  - pregexec - match a regexp against a string
 308  */
 309 I32
 310 Perl_pregexec(pTHX_ REGEXP * const prog, char* stringarg, register char *strend,
 311          char *strbeg, I32 minend, SV *screamer, U32 nosave)
 312 /* strend: pointer to null at end of string */
 313 /* strbeg: real beginning of string */
 314 /* minend: end of match must be >=minend after stringarg. */
 315 /* nosave: For optimizations. */
 316 {
 317     return
 318         regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
 319                       nosave ? 0 : REXEC_COPY_STR);
 320 }
 321 #endif
 322
 323 /*
 324  * Need to implement the following flags for reg_anch:
 325  *
 326  * USE_INTUIT_NOML              - Useful to call re_intuit_start() first
 327  * USE_INTUIT_ML
 328  * INTUIT_AUTORITATIVE_NOML     - Can trust a positive answer
 329  * INTUIT_AUTORITATIVE_ML
 330  * INTUIT_ONCE_NOML             - Intuit can match in one location only.
 331  * INTUIT_ONCE_ML
 332  *
 333  * Another flag for this function: SECOND_TIME (so that float substrs
 334  * with giant delta may be not rechecked).
 335  */
 336
 337 /* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
 338
 339 /* If SCREAM, then SvPVX_const(sv) should be compatible with strpos and strend.
 340    Otherwise, only SvCUR(sv) is used to get strbeg. */
 341
 342 /* XXXX We assume that strpos is strbeg unless sv. */
 343
 344 /* XXXX Some places assume that there is a fixed substring.
 345         An update may be needed if optimizer marks as "INTUITable"
 346         RExen without fixed substrings.  Similarly, it is assumed that
 347         lengths of all the strings are no more than minlen, thus they
 348         cannot come from lookahead.
 349         (Or minlen should take into account lookahead.)
 350   NOTE: Some of this comment is not correct. minlen does now take account
 351   of lookahead/behind. Further research is required. -- demerphq
 352
 353 */
 354
 355 /* A failure to find a constant substring means that there is no need to make
 356    an expensive call to REx engine, thus we celebrate a failure.  Similarly,
 357    finding a substring too deep into the string means that less calls to
 358    regtry() should be needed.
 359
 360    REx compiler's optimizer found 4 possible hints:
 361         a) Anchored substring;
 362         b) Fixed substring;
 363         c) Whether we are anchored (beginning-of-line or \G);
 364         d) First node (of those at offset 0) which may distingush positions;
 365    We use a)b)d) and multiline-part of c), and try to find a position in the
 366    string which does not contradict any of them.
 367  */
 368
 369 /* Most of decisions we do here should have been done at compile time.
 370    The nodes of the REx which we used for the search should have been
 371    deleted from the finite automaton. */
 372
 373 char *
 374 Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV *sv, char *strpos,
 375                      char *strend, const U32 flags, re_scream_pos_data *data)
 376 {
 377     dVAR;
 378     struct regexp *const prog = (struct regexp *)SvANY(rx);
 379     register I32 start_shift = 0;
 380     /* Should be nonnegative! */
 381     register I32 end_shift   = 0;
 382     register char *s;
 383     register SV *check;
 384     char *strbeg;
 385     char *t;
 386     const bool do_utf8 = (sv && SvUTF8(sv)) ? 1 : 0; /* if no sv we have to assume bytes */
 387     I32 ml_anch;
 388     register char *other_last = NULL;   /* other substr checked before this */
 389     char *check_at = NULL;              /* check substr found at this pos */
 390     const I32 multiline = prog->extflags & RXf_PMf_MULTILINE;
 391     RXi_GET_DECL(prog,progi);
 392 #ifdef DEBUGGING
 393     const char * const i_strpos = strpos;
 394 #endif
 395
 396     GET_RE_DEBUG_FLAGS_DECL;
 397
 398     RX_MATCH_UTF8_set(rx,do_utf8);
 399
 400     if (RX_UTF8(rx)) {
 401         PL_reg_flags |= RF_utf8;
 402     }
 403     DEBUG_EXECUTE_r(
 404         debug_start_match(rx, do_utf8, strpos, strend,
 405             sv ? "Guessing start of match in sv for"
 406                : "Guessing start of match in string for");
 407               );
 408
 409     /* CHR_DIST() would be more correct here but it makes things slow. */
 410     if (prog->minlen > strend - strpos) {
 411         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 412                               "String too short... [re_intuit_start]\n"));
 413         goto fail;
 414     }
 415
 416     strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
 417     PL_regeol = strend;
 418     if (do_utf8) {
 419         if (!prog->check_utf8 && prog->check_substr)
 420             to_utf8_substr(prog);
 421         check = prog->check_utf8;
 422     } else {
 423         if (!prog->check_substr && prog->check_utf8)
 424             to_byte_substr(prog);
 425         check = prog->check_substr;
 426     }
 427     if (check == &PL_sv_undef) {
 428         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 429                 "Non-utf8 string cannot match utf8 check string\n"));
 430         goto fail;
 431     }
 432     if (prog->extflags & RXf_ANCH) {    /* Match at beg-of-str or after \n */
 433         ml_anch = !( (prog->extflags & RXf_ANCH_SINGLE)
 434                      || ( (prog->extflags & RXf_ANCH_BOL)
 435                           && !multiline ) );    /* Check after \n? */
 436
 437         if (!ml_anch) {
 438           if ( !(prog->extflags & RXf_ANCH_GPOS) /* Checked by the caller */
 439                 && !(prog->intflags & PREGf_IMPLICIT) /* not a real BOL */
 440                /* SvCUR is not set on references: SvRV and SvPVX_const overlap */
 441                && sv && !SvROK(sv)
 442                && (strpos != strbeg)) {
 443               DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
 444               goto fail;
 445           }
 446           if (prog->check_offset_min == prog->check_offset_max &&
 447               !(prog->extflags & RXf_CANY_SEEN)) {
 448             /* Substring at constant offset from beg-of-str... */
 449             I32 slen;
 450
 451             s = HOP3c(strpos, prog->check_offset_min, strend);
 452
 453             if (SvTAIL(check)) {
 454                 slen = SvCUR(check);    /* >= 1 */
 455
 456                 if ( strend - s > slen || strend - s < slen - 1
 457                      || (strend - s == slen && strend[-1] != '\n')) {
 458                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
 459                     goto fail_finish;
 460                 }
 461                 /* Now should match s[0..slen-2] */
 462                 slen--;
 463                 if (slen && (*SvPVX_const(check) != *s
 464                              || (slen > 1
 465                                  && memNE(SvPVX_const(check), s, slen)))) {
 466                   report_neq:
 467                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
 468                     goto fail_finish;
 469                 }
 470             }
 471             else if (*SvPVX_const(check) != *s
 472                      || ((slen = SvCUR(check)) > 1
 473                          && memNE(SvPVX_const(check), s, slen)))
 474                 goto report_neq;
 475             check_at = s;
 476             goto success_at_start;
 477           }
 478         }
 479         /* Match is anchored, but substr is not anchored wrt beg-of-str. */
 480         s = strpos;
 481         start_shift = prog->check_offset_min; /* okay to underestimate on CC */
 482         end_shift = prog->check_end_shift;
 483
 484         if (!ml_anch) {
 485             const I32 end = prog->check_offset_max + CHR_SVLEN(check)
 486                                          - (SvTAIL(check) != 0);
 487             const I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
 488
 489             if (end_shift < eshift)
 490                 end_shift = eshift;
 491         }
 492     }
 493     else {                              /* Can match at random position */
 494         ml_anch = 0;
 495         s = strpos;
 496         start_shift = prog->check_offset_min;  /* okay to underestimate on CC */
 497         end_shift = prog->check_end_shift;
 498
 499         /* end shift should be non negative here */
 500     }
 501
 502 #ifdef QDEBUGGING       /* 7/99: reports of failure (with the older version) */
 503     if (end_shift < 0)
 504         Perl_croak(aTHX_ "panic: end_shift: %"IVdf" pattern:\n%s\n ",
 505                    (IV)end_shift, RX_PRECOMP(prog));
 506 #endif
 507
 508   restart:
 509     /* Find a possible match in the region s..strend by looking for
 510        the "check" substring in the region corrected by start/end_shift. */
 511
 512     {
 513         I32 srch_start_shift = start_shift;
 514         I32 srch_end_shift = end_shift;
 515         if (srch_start_shift < 0 && strbeg - s > srch_start_shift) {
 516             srch_end_shift -= ((strbeg - s) - srch_start_shift);
 517             srch_start_shift = strbeg - s;
 518         }
 519     DEBUG_OPTIMISE_MORE_r({
 520         PerlIO_printf(Perl_debug_log, "Check offset min: %"IVdf" Start shift: %"IVdf" End shift %"IVdf" Real End Shift: %"IVdf"\n",
 521             (IV)prog->check_offset_min,
 522             (IV)srch_start_shift,
 523             (IV)srch_end_shift,
 524             (IV)prog->check_end_shift);
 525     });
 526
 527     if (flags & REXEC_SCREAM) {
 528         I32 p = -1;                     /* Internal iterator of scream. */
 529         I32 * const pp = data ? data->scream_pos : &p;
 530
 531         if (PL_screamfirst[BmRARE(check)] >= 0
 532             || ( BmRARE(check) == '\n'
 533                  && (BmPREVIOUS(check) == SvCUR(check) - 1)
 534                  && SvTAIL(check) ))
 535             s = screaminstr(sv, check,
 536                             srch_start_shift + (s - strbeg), srch_end_shift, pp, 0);
 537         else
 538             goto fail_finish;
 539         /* we may be pointing at the wrong string */
 540         if (s && RXp_MATCH_COPIED(prog))
 541             s = strbeg + (s - SvPVX_const(sv));
 542         if (data)
 543             *data->scream_olds = s;
 544     }
 545     else {
 546         U8* start_point;
 547         U8* end_point;
 548         if (prog->extflags & RXf_CANY_SEEN) {
 549             start_point= (U8*)(s + srch_start_shift);
 550             end_point= (U8*)(strend - srch_end_shift);
 551         } else {
 552             start_point= HOP3(s, srch_start_shift, srch_start_shift < 0 ? strbeg : strend);
 553             end_point= HOP3(strend, -srch_end_shift, strbeg);
 554         }
 555         DEBUG_OPTIMISE_MORE_r({
 556             PerlIO_printf(Perl_debug_log, "fbm_instr len=%d str=<%.*s>\n",
 557                 (int)(end_point - start_point),
 558                 (int)(end_point - start_point) > 20 ? 20 : (int)(end_point - start_point),
 559                 start_point);
 560         });
 561
 562         s = fbm_instr( start_point, end_point,
 563                       check, multiline ? FBMrf_MULTILINE : 0);
 564     }
 565     }
 566     /* Update the count-of-usability, remove useless subpatterns,
 567         unshift s.  */
 568
 569     DEBUG_EXECUTE_r({
 570         RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
 571             SvPVX_const(check), RE_SV_DUMPLEN(check), 30);
 572         PerlIO_printf(Perl_debug_log, "%s %s substr %s%s%s",
 573                           (s ? "Found" : "Did not find"),
 574             (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)
 575                 ? "anchored" : "floating"),
 576             quoted,
 577             RE_SV_TAIL(check),
 578             (s ? " at offset " : "...\n") );
 579     });
 580
 581     if (!s)
 582         goto fail_finish;
 583     /* Finish the diagnostic message */
 584     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
 585
 586     /* XXX dmq: first branch is for positive lookbehind...
 587        Our check string is offset from the beginning of the pattern.
 588        So we need to do any stclass tests offset forward from that
 589        point. I think. :-(
 590      */
 591
 592
 593
 594     check_at=s;
 595
 596
 597     /* Got a candidate.  Check MBOL anchoring, and the *other* substr.
 598        Start with the other substr.
 599        XXXX no SCREAM optimization yet - and a very coarse implementation
 600        XXXX /ttx+/ results in anchored="ttx", floating="x".  floating will
 601                 *always* match.  Probably should be marked during compile...
 602        Probably it is right to do no SCREAM here...
 603      */
 604
 605     if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8)
 606                 : (prog->float_substr && prog->anchored_substr))
 607     {
 608         /* Take into account the "other" substring. */
 609         /* XXXX May be hopelessly wrong for UTF... */
 610         if (!other_last)
 611             other_last = strpos;
 612         if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
 613           do_other_anchored:
 614             {
 615                 char * const last = HOP3c(s, -start_shift, strbeg);
 616                 char *last1, *last2;
 617                 char * const saved_s = s;
 618                 SV* must;
 619
 620                 t = s - prog->check_offset_max;
 621                 if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
 622                     && (!do_utf8
 623                         || ((t = (char*)reghopmaybe3((U8*)s, -(prog->check_offset_max), (U8*)strpos))
 624                             && t > strpos)))
 625                     NOOP;
 626                 else
 627                     t = strpos;
 628                 t = HOP3c(t, prog->anchored_offset, strend);
 629                 if (t < other_last)     /* These positions already checked */
 630                     t = other_last;
 631                 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
 632                 if (last < last1)
 633                     last1 = last;
 634                 /* XXXX It is not documented what units *_offsets are in.
 635                    We assume bytes, but this is clearly wrong.
 636                    Meaning this code needs to be carefully reviewed for errors.
 637                    dmq.
 638                   */
 639
 640                 /* On end-of-str: see comment below. */
 641                 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
 642                 if (must == &PL_sv_undef) {
 643                     s = (char*)NULL;
 644                     DEBUG_r(must = prog->anchored_utf8);        /* for debug */
 645                 }
 646                 else
 647                     s = fbm_instr(
 648                         (unsigned char*)t,
 649                         HOP3(HOP3(last1, prog->anchored_offset, strend)
 650                                 + SvCUR(must), -(SvTAIL(must)!=0), strbeg),
 651                         must,
 652                         multiline ? FBMrf_MULTILINE : 0
 653                     );
 654                 DEBUG_EXECUTE_r({
 655                     RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
 656                         SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
 657                     PerlIO_printf(Perl_debug_log, "%s anchored substr %s%s",
 658                         (s ? "Found" : "Contradicts"),
 659                         quoted, RE_SV_TAIL(must));
 660                 });
 661
 662
 663                 if (!s) {
 664                     if (last1 >= last2) {
 665                         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 666                                                 ", giving up...\n"));
 667                         goto fail_finish;
 668                     }
 669                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 670                         ", trying floating at offset %ld...\n",
 671                         (long)(HOP3c(saved_s, 1, strend) - i_strpos)));
 672                     other_last = HOP3c(last1, prog->anchored_offset+1, strend);
 673                     s = HOP3c(last, 1, strend);
 674                     goto restart;
 675                 }
 676                 else {
 677                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
 678                           (long)(s - i_strpos)));
 679                     t = HOP3c(s, -prog->anchored_offset, strbeg);
 680                     other_last = HOP3c(s, 1, strend);
 681                     s = saved_s;
 682                     if (t == strpos)
 683                         goto try_at_start;
 684                     goto try_at_offset;
 685                 }
 686             }
 687         }
 688         else {          /* Take into account the floating substring. */
 689             char *last, *last1;
 690             char * const saved_s = s;
 691             SV* must;
 692
 693             t = HOP3c(s, -start_shift, strbeg);
 694             last1 = last =
 695                 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
 696             if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
 697                 last = HOP3c(t, prog->float_max_offset, strend);
 698             s = HOP3c(t, prog->float_min_offset, strend);
 699             if (s < other_last)
 700                 s = other_last;
 701  /* XXXX It is not documented what units *_offsets are in.  Assume bytes.  */
 702             must = do_utf8 ? prog->float_utf8 : prog->float_substr;
 703             /* fbm_instr() takes into account exact value of end-of-str
 704                if the check is SvTAIL(ed).  Since false positives are OK,
 705                and end-of-str is not later than strend we are OK. */
 706             if (must == &PL_sv_undef) {
 707                 s = (char*)NULL;
 708                 DEBUG_r(must = prog->float_utf8);       /* for debug message */
 709             }
 710             else
 711                 s = fbm_instr((unsigned char*)s,
 712                               (unsigned char*)last + SvCUR(must)
 713                                   - (SvTAIL(must)!=0),
 714                               must, multiline ? FBMrf_MULTILINE : 0);
 715             DEBUG_EXECUTE_r({
 716                 RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
 717                     SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
 718                 PerlIO_printf(Perl_debug_log, "%s floating substr %s%s",
 719                     (s ? "Found" : "Contradicts"),
 720                     quoted, RE_SV_TAIL(must));
 721             });
 722             if (!s) {
 723                 if (last1 == last) {
 724                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 725                                             ", giving up...\n"));
 726                     goto fail_finish;
 727                 }
 728                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 729                     ", trying anchored starting at offset %ld...\n",
 730                     (long)(saved_s + 1 - i_strpos)));
 731                 other_last = last;
 732                 s = HOP3c(t, 1, strend);
 733                 goto restart;
 734             }
 735             else {
 736                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
 737                       (long)(s - i_strpos)));
 738                 other_last = s; /* Fix this later. --Hugo */
 739                 s = saved_s;
 740                 if (t == strpos)
 741                     goto try_at_start;
 742                 goto try_at_offset;
 743             }
 744         }
 745     }
 746
 747
 748     t= (char*)HOP3( s, -prog->check_offset_max, (prog->check_offset_max<0) ? strend : strpos);
 749
 750     DEBUG_OPTIMISE_MORE_r(
 751         PerlIO_printf(Perl_debug_log,
 752             "Check offset min:%"IVdf" max:%"IVdf" S:%"IVdf" t:%"IVdf" D:%"IVdf" end:%"IVdf"\n",
 753             (IV)prog->check_offset_min,
 754             (IV)prog->check_offset_max,
 755             (IV)(s-strpos),
 756             (IV)(t-strpos),
 757             (IV)(t-s),
 758             (IV)(strend-strpos)
 759         )
 760     );
 761
 762     if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
 763         && (!do_utf8
 764             || ((t = (char*)reghopmaybe3((U8*)s, -prog->check_offset_max, (U8*) ((prog->check_offset_max<0) ? strend : strpos)))
 765                  && t > strpos)))
 766     {
 767         /* Fixed substring is found far enough so that the match
 768            cannot start at strpos. */
 769       try_at_offset:
 770         if (ml_anch && t[-1] != '\n') {
 771             /* Eventually fbm_*() should handle this, but often
 772                anchored_offset is not 0, so this check will not be wasted. */
 773             /* XXXX In the code below we prefer to look for "^" even in
 774                presence of anchored substrings.  And we search even
 775                beyond the found float position.  These pessimizations
 776                are historical artefacts only.  */
 777           find_anchor:
 778             while (t < strend - prog->minlen) {
 779                 if (*t == '\n') {
 780                     if (t < check_at - prog->check_offset_min) {
 781                         if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
 782                             /* Since we moved from the found position,
 783                                we definitely contradict the found anchored
 784                                substr.  Due to the above check we do not
 785                                contradict "check" substr.
 786                                Thus we can arrive here only if check substr
 787                                is float.  Redo checking for "other"=="fixed".
 788                              */
 789                             strpos = t + 1;
 790                             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
 791                                 PL_colors[0], PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
 792                             goto do_other_anchored;
 793                         }
 794                         /* We don't contradict the found floating substring. */
 795                         /* XXXX Why not check for STCLASS? */
 796                         s = t + 1;
 797                         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
 798                             PL_colors[0], PL_colors[1], (long)(s - i_strpos)));
 799                         goto set_useful;
 800                     }
 801                     /* Position contradicts check-string */
 802                     /* XXXX probably better to look for check-string
 803                        than for "\n", so one should lower the limit for t? */
 804                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
 805                         PL_colors[0], PL_colors[1], (long)(t + 1 - i_strpos)));
 806                     other_last = strpos = s = t + 1;
 807                     goto restart;
 808                 }
 809                 t++;
 810             }
 811             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
 812                         PL_colors[0], PL_colors[1]));
 813             goto fail_finish;
 814         }
 815         else {
 816             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
 817                         PL_colors[0], PL_colors[1]));
 818         }
 819         s = t;
 820       set_useful:
 821         ++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr);    /* hooray/5 */
 822     }
 823     else {
 824         /* The found string does not prohibit matching at strpos,
 825            - no optimization of calling REx engine can be performed,
 826            unless it was an MBOL and we are not after MBOL,
 827            or a future STCLASS check will fail this. */
 828       try_at_start:
 829         /* Even in this situation we may use MBOL flag if strpos is offset
 830            wrt the start of the string. */
 831         if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
 832             && (strpos != strbeg) && strpos[-1] != '\n'
 833             /* May be due to an implicit anchor of m{.*foo}  */
 834             && !(prog->intflags & PREGf_IMPLICIT))
 835         {
 836             t = strpos;
 837             goto find_anchor;
 838         }
 839         DEBUG_EXECUTE_r( if (ml_anch)
 840             PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
 841                           (long)(strpos - i_strpos), PL_colors[0], PL_colors[1]);
 842         );
 843       success_at_start:
 844         if (!(prog->intflags & PREGf_NAUGHTY)   /* XXXX If strpos moved? */
 845             && (do_utf8 ? (
 846                 prog->check_utf8                /* Could be deleted already */
 847                 && --BmUSEFUL(prog->check_utf8) < 0
 848                 && (prog->check_utf8 == prog->float_utf8)
 849             ) : (
 850                 prog->check_substr              /* Could be deleted already */
 851                 && --BmUSEFUL(prog->check_substr) < 0
 852                 && (prog->check_substr == prog->float_substr)
 853             )))
 854         {
 855             /* If flags & SOMETHING - do not do it many times on the same match */
 856             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
 857             SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
 858             if (do_utf8 ? prog->check_substr : prog->check_utf8)
 859                 SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
 860             prog->check_substr = prog->check_utf8 = NULL;       /* disable */
 861             prog->float_substr = prog->float_utf8 = NULL;       /* clear */
 862             check = NULL;                       /* abort */
 863             s = strpos;
 864             /* XXXX This is a remnant of the old implementation.  It
 865                     looks wasteful, since now INTUIT can use many
 866                     other heuristics. */
 867             prog->extflags &= ~RXf_USE_INTUIT;
 868         }
 869         else
 870             s = strpos;
 871     }
 872
 873     /* Last resort... */
 874     /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
 875     /* trie stclasses are too expensive to use here, we are better off to
 876        leave it to regmatch itself */
 877     if (progi->regstclass && PL_regkind[OP(progi->regstclass)]!=TRIE) {
 878         /* minlen == 0 is possible if regstclass is \b or \B,
 879            and the fixed substr is ''$.
 880            Since minlen is already taken into account, s+1 is before strend;
 881            accidentally, minlen >= 1 guaranties no false positives at s + 1
 882            even for \b or \B.  But (minlen? 1 : 0) below assumes that
 883            regstclass does not come from lookahead...  */
 884         /* If regstclass takes bytelength more than 1: If charlength==1, OK.
 885            This leaves EXACTF only, which is dealt with in find_byclass().  */
 886         const U8* const str = (U8*)STRING(progi->regstclass);
 887         const int cl_l = (PL_regkind[OP(progi->regstclass)] == EXACT
 888                     ? CHR_DIST(str+STR_LEN(progi->regstclass), str)
 889                     : 1);
 890         char * endpos;
 891         if (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
 892             endpos= HOP3c(s, (prog->minlen ? cl_l : 0), strend);
 893         else if (prog->float_substr || prog->float_utf8)
 894             endpos= HOP3c(HOP3c(check_at, -start_shift, strbeg), cl_l, strend);
 895         else
 896             endpos= strend;
 897
 898         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "start_shift: %"IVdf" check_at: %"IVdf" s: %"IVdf" endpos: %"IVdf"\n",
 899                                       (IV)start_shift, (IV)(check_at - strbeg), (IV)(s - strbeg), (IV)(endpos - strbeg)));
 900
 901         t = s;
 902         s = find_byclass(prog, progi->regstclass, s, endpos, NULL);
 903         if (!s) {
 904 #ifdef DEBUGGING
 905             const char *what = NULL;
 906 #endif
 907             if (endpos == strend) {
 908                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 909                                 "Could not match STCLASS...\n") );
 910                 goto fail;
 911             }
 912             DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 913                                    "This position contradicts STCLASS...\n") );
 914             if ((prog->extflags & RXf_ANCH) && !ml_anch)
 915                 goto fail;
 916             /* Contradict one of substrings */
 917             if (prog->anchored_substr || prog->anchored_utf8) {
 918                 if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
 919                     DEBUG_EXECUTE_r( what = "anchored" );
 920                   hop_and_restart:
 921                     s = HOP3c(t, 1, strend);
 922                     if (s + start_shift + end_shift > strend) {
 923                         /* XXXX Should be taken into account earlier? */
 924                         DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 925                                                "Could not match STCLASS...\n") );
 926                         goto fail;
 927                     }
 928                     if (!check)
 929                         goto giveup;
 930                     DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 931                                 "Looking for %s substr starting at offset %ld...\n",
 932                                  what, (long)(s + start_shift - i_strpos)) );
 933                     goto restart;
 934                 }
 935                 /* Have both, check_string is floating */
 936                 if (t + start_shift >= check_at) /* Contradicts floating=check */
 937                     goto retry_floating_check;
 938                 /* Recheck anchored substring, but not floating... */
 939                 s = check_at;
 940                 if (!check)
 941                     goto giveup;
 942                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 943                           "Looking for anchored substr starting at offset %ld...\n",
 944                           (long)(other_last - i_strpos)) );
 945                 goto do_other_anchored;
 946             }
 947             /* Another way we could have checked stclass at the
 948                current position only: */
 949             if (ml_anch) {
 950                 s = t = t + 1;
 951                 if (!check)
 952                     goto giveup;
 953                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 954                           "Looking for /%s^%s/m starting at offset %ld...\n",
 955                           PL_colors[0], PL_colors[1], (long)(t - i_strpos)) );
 956                 goto try_at_offset;
 957             }
 958             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))     /* Could have been deleted */
 959                 goto fail;
 960             /* Check is floating subtring. */
 961           retry_floating_check:
 962             t = check_at - start_shift;
 963             DEBUG_EXECUTE_r( what = "floating" );
 964             goto hop_and_restart;
 965         }
 966         if (t != s) {
 967             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 968                         "By STCLASS: moving %ld --> %ld\n",
 969                                   (long)(t - i_strpos), (long)(s - i_strpos))
 970                    );
 971         }
 972         else {
 973             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 974                                   "Does not contradict STCLASS...\n");
 975                    );
 976         }
 977     }
 978   giveup:
 979     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
 980                           PL_colors[4], (check ? "Guessed" : "Giving up"),
 981                           PL_colors[5], (long)(s - i_strpos)) );
 982     return s;
 983
 984   fail_finish:                          /* Substring not found */
 985     if (prog->check_substr || prog->check_utf8)         /* could be removed already */
 986         BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
 987   fail:
 988     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
 989                           PL_colors[4], PL_colors[5]));
 990     return NULL;
 991 }
 992
 993 #define DECL_TRIE_TYPE(scan) \
 994     const enum { trie_plain, trie_utf8, trie_utf8_fold, trie_latin_utf8_fold } \
 995                     trie_type = (scan->flags != EXACT) \
 996                               ? (do_utf8 ? trie_utf8_fold : (UTF ? trie_latin_utf8_fold : trie_plain)) \
 997                               : (do_utf8 ? trie_utf8 : trie_plain)
 998
 999 #define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len,  \
1000 uvc, charid, foldlen, foldbuf, uniflags) STMT_START {                       \
1001     switch (trie_type) {                                                    \
1002     case trie_utf8_fold:                                                    \
1003         if ( foldlen>0 ) {                                                  \
1004             uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags );     \
1005             foldlen -= len;                                                 \
1006             uscan += len;                                                   \
1007             len=0;                                                          \
1008         } else {                                                            \
1009             uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );   \
1010             uvc = to_uni_fold( uvc, foldbuf, &foldlen );                    \
1011             foldlen -= UNISKIP( uvc );                                      \
1012             uscan = foldbuf + UNISKIP( uvc );                               \
1013         }                                                                   \
1014         break;                                                              \
1015     case trie_latin_utf8_fold:                                              \
1016         if ( foldlen>0 ) {                                                  \
1017             uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags );     \
1018             foldlen -= len;                                                 \
1019             uscan += len;                                                   \
1020             len=0;                                                          \
1021         } else {                                                            \
1022             len = 1;                                                        \
1023             uvc = to_uni_fold( *(U8*)uc, foldbuf, &foldlen );               \
1024             foldlen -= UNISKIP( uvc );                                      \
1025             uscan = foldbuf + UNISKIP( uvc );                               \
1026         }                                                                   \
1027         break;                                                              \
1028     case trie_utf8:                                                         \
1029         uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );       \
1030         break;                                                              \
1031     case trie_plain:                                                        \
1032         uvc = (UV)*uc;                                                      \
1033         len = 1;                                                            \
1034     }                                                                       \
1035                                                                             \
1036     if (uvc < 256) {                                                        \
1037         charid = trie->charmap[ uvc ];                                      \
1038     }                                                                       \
1039     else {                                                                  \
1040         charid = 0;                                                         \
1041         if (widecharmap) {                                                  \
1042             SV** const svpp = hv_fetch(widecharmap,                         \
1043                         (char*)&uvc, sizeof(UV), 0);                        \
1044             if (svpp)                                                       \
1045                 charid = (U16)SvIV(*svpp);                                  \
1046         }                                                                   \
1047     }                                                                       \
1048 } STMT_END
1049
1050 #define REXEC_FBC_EXACTISH_CHECK(CoNd)                 \
1051 {                                                      \
1052     char *my_strend= (char *)strend;                   \
1053     if ( (CoNd)                                        \
1054          && (ln == len ||                              \
1055              !ibcmp_utf8(s, &my_strend, 0,  do_utf8,   \
1056                         m, NULL, ln, (bool)UTF))       \
1057          && (!reginfo || regtry(reginfo, &s)) )        \
1058         goto got_it;                                   \
1059     else {                                             \
1060          U8 foldbuf[UTF8_MAXBYTES_CASE+1];             \
1061          uvchr_to_utf8(tmpbuf, c);                     \
1062          f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);  \
1063          if ( f != c                                   \
1064               && (f == c1 || f == c2)                  \
1065               && (ln == len ||                         \
1066                 !ibcmp_utf8(s, &my_strend, 0,  do_utf8,\
1067                               m, NULL, ln, (bool)UTF)) \
1068               && (!reginfo || regtry(reginfo, &s)) )   \
1069               goto got_it;                             \
1070     }                                                  \
1071 }                                                      \
1072 s += len
1073
1074 #define REXEC_FBC_EXACTISH_SCAN(CoNd)                     \
1075 STMT_START {                                              \
1076     while (s <= e) {                                      \
1077         if ( (CoNd)                                       \
1078              && (ln == 1 || !(OP(c) == EXACTF             \
1079                               ? ibcmp(s, m, ln)           \
1080                               : ibcmp_locale(s, m, ln)))  \
1081              && (!reginfo || regtry(reginfo, &s)) )        \
1082             goto got_it;                                  \
1083         s++;                                              \
1084     }                                                     \
1085 } STMT_END
1086
1087 #define REXEC_FBC_UTF8_SCAN(CoDe)                     \
1088 STMT_START {                                          \
1089     while (s + (uskip = UTF8SKIP(s)) <= strend) {     \
1090         CoDe                                          \
1091         s += uskip;                                   \
1092     }                                                 \
1093 } STMT_END
1094
1095 #define REXEC_FBC_SCAN(CoDe)                          \
1096 STMT_START {                                          \
1097     while (s < strend) {                              \
1098         CoDe                                          \
1099         s++;                                          \
1100     }                                                 \
1101 } STMT_END
1102
1103 #define REXEC_FBC_UTF8_CLASS_SCAN(CoNd)               \
1104 REXEC_FBC_UTF8_SCAN(                                  \
1105     if (CoNd) {                                       \
1106         if (tmp && (!reginfo || regtry(reginfo, &s)))  \
1107             goto got_it;                              \
1108         else                                          \
1109             tmp = doevery;                            \
1110     }                                                 \
1111     else                                              \
1112         tmp = 1;                                      \
1113 )
1114
1115 #define REXEC_FBC_CLASS_SCAN(CoNd)                    \
1116 REXEC_FBC_SCAN(                                       \
1117     if (CoNd) {                                       \
1118         if (tmp && (!reginfo || regtry(reginfo, &s)))  \
1119             goto got_it;                              \
1120         else                                          \
1121             tmp = doevery;                            \
1122     }                                                 \
1123     else                                              \
1124         tmp = 1;                                      \
1125 )
1126
1127 #define REXEC_FBC_TRYIT               \
1128 if ((!reginfo || regtry(reginfo, &s))) \
1129     goto got_it
1130
1131 #define REXEC_FBC_CSCAN(CoNdUtF8,CoNd)                         \
1132     if (do_utf8) {                                             \
1133         REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1134     }                                                          \
1135     else {                                                     \
1136         REXEC_FBC_CLASS_SCAN(CoNd);                            \
1137     }                                                          \
1138     break
1139
1140 #define REXEC_FBC_CSCAN_PRELOAD(UtFpReLoAd,CoNdUtF8,CoNd)      \
1141     if (do_utf8) {                                             \
1142         UtFpReLoAd;                                            \
1143         REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1144     }                                                          \
1145     else {                                                     \
1146         REXEC_FBC_CLASS_SCAN(CoNd);                            \
1147     }                                                          \
1148     break
1149
1150 #define REXEC_FBC_CSCAN_TAINT(CoNdUtF8,CoNd)                   \
1151     PL_reg_flags |= RF_tainted;                                \
1152     if (do_utf8) {                                             \
1153         REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1154     }                                                          \
1155     else {                                                     \
1156         REXEC_FBC_CLASS_SCAN(CoNd);                            \
1157     }                                                          \
1158     break
1159
1160 #define DUMP_EXEC_POS(li,s,doutf8) \
1161     dump_exec_pos(li,s,(PL_regeol),(PL_bostr),(PL_reg_starttry),doutf8)
1162
1163 /* We know what class REx starts with.  Try to find this position... */
1164 /* if reginfo is NULL, its a dryrun */
1165 /* annoyingly all the vars in this routine have different names from their counterparts
1166    in regmatch. /grrr */
1167
1168 STATIC char *
1169 S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
1170     const char *strend, regmatch_info *reginfo)
1171 {
1172         dVAR;
1173         const I32 doevery = (prog->intflags & PREGf_SKIP) == 0;
1174         char *m;
1175         STRLEN ln;
1176         STRLEN lnc;
1177         register STRLEN uskip;
1178         unsigned int c1;
1179         unsigned int c2;
1180         char *e;
1181         register I32 tmp = 1;   /* Scratch variable? */
1182         register const bool do_utf8 = PL_reg_match_utf8;
1183         RXi_GET_DECL(prog,progi);
1184
1185         /* We know what class it must start with. */
1186         switch (OP(c)) {
1187         case ANYOF:
1188             if (do_utf8) {
1189                  REXEC_FBC_UTF8_CLASS_SCAN((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
1190                           !UTF8_IS_INVARIANT((U8)s[0]) ?
1191                           reginclass(prog, c, (U8*)s, 0, do_utf8) :
1192                           REGINCLASS(prog, c, (U8*)s));
1193             }
1194             else {
1195                  while (s < strend) {
1196                       STRLEN skip = 1;
1197
1198                       if (REGINCLASS(prog, c, (U8*)s) ||
1199                           (ANYOF_FOLD_SHARP_S(c, s, strend) &&
1200                            /* The assignment of 2 is intentional:
1201                             * for the folded sharp s, the skip is 2. */
1202                            (skip = SHARP_S_SKIP))) {
1203                            if (tmp && (!reginfo || regtry(reginfo, &s)))
1204                                 goto got_it;
1205                            else
1206                                 tmp = doevery;
1207                       }
1208                       else
1209                            tmp = 1;
1210                       s += skip;
1211                  }
1212             }
1213             break;
1214         case CANY:
1215             REXEC_FBC_SCAN(
1216                 if (tmp && (!reginfo || regtry(reginfo, &s)))
1217                     goto got_it;
1218                 else
1219                     tmp = doevery;
1220             );
1221             break;
1222         case EXACTF:
1223             m   = STRING(c);
1224             ln  = STR_LEN(c);   /* length to match in octets/bytes */
1225             lnc = (I32) ln;     /* length to match in characters */
1226             if (UTF) {
1227                 STRLEN ulen1, ulen2;
1228                 U8 *sm = (U8 *) m;
1229                 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
1230                 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
1231                 /* used by commented-out code below */
1232                 /*const U32 uniflags = UTF8_ALLOW_DEFAULT;*/
1233
1234                 /* XXX: Since the node will be case folded at compile
1235                    time this logic is a little odd, although im not
1236                    sure that its actually wrong. --dmq */
1237
1238                 c1 = to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
1239                 c2 = to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
1240
1241                 /* XXX: This is kinda strange. to_utf8_XYZ returns the
1242                    codepoint of the first character in the converted
1243                    form, yet originally we did the extra step.
1244                    No tests fail by commenting this code out however
1245                    so Ive left it out. -- dmq.
1246
1247                 c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXBYTES_CASE,
1248                                     0, uniflags);
1249                 c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXBYTES_CASE,
1250                                     0, uniflags);
1251                 */
1252
1253                 lnc = 0;
1254                 while (sm < ((U8 *) m + ln)) {
1255                     lnc++;
1256                     sm += UTF8SKIP(sm);
1257                 }
1258             }
1259             else {
1260                 c1 = *(U8*)m;
1261                 c2 = PL_fold[c1];
1262             }
1263             goto do_exactf;
1264         case EXACTFL:
1265             m   = STRING(c);
1266             ln  = STR_LEN(c);
1267             lnc = (I32) ln;
1268             c1 = *(U8*)m;
1269             c2 = PL_fold_locale[c1];
1270           do_exactf:
1271             e = HOP3c(strend, -((I32)lnc), s);
1272
1273             if (!reginfo && e < s)
1274                 e = s;                  /* Due to minlen logic of intuit() */
1275
1276             /* The idea in the EXACTF* cases is to first find the
1277              * first character of the EXACTF* node and then, if
1278              * necessary, case-insensitively compare the full
1279              * text of the node.  The c1 and c2 are the first
1280              * characters (though in Unicode it gets a bit
1281              * more complicated because there are more cases
1282              * than just upper and lower: one needs to use
1283              * the so-called folding case for case-insensitive
1284              * matching (called "loose matching" in Unicode).
1285              * ibcmp_utf8() will do just that. */
1286
1287             if (do_utf8 || UTF) {
1288                 UV c, f;
1289                 U8 tmpbuf [UTF8_MAXBYTES+1];
1290                 STRLEN len = 1;
1291                 STRLEN foldlen;
1292                 const U32 uniflags = UTF8_ALLOW_DEFAULT;
1293                 if (c1 == c2) {
1294                     /* Upper and lower of 1st char are equal -
1295                      * probably not a "letter". */
1296                     while (s <= e) {
1297                         if (do_utf8) {
1298                             c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1299                                            uniflags);
1300                         } else {
1301                             c = *((U8*)s);
1302                         }
1303                         REXEC_FBC_EXACTISH_CHECK(c == c1);
1304                     }
1305                 }
1306                 else {
1307                     while (s <= e) {
1308                         if (do_utf8) {
1309                             c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1310                                            uniflags);
1311                         } else {
1312                             c = *((U8*)s);
1313                         }
1314
1315                         /* Handle some of the three Greek sigmas cases.
1316                          * Note that not all the possible combinations
1317                          * are handled here: some of them are handled
1318                          * by the standard folding rules, and some of
1319                          * them (the character class or ANYOF cases)
1320                          * are handled during compiletime in
1321                          * regexec.c:S_regclass(). */
1322                         if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1323                             c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1324                             c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
1325
1326                         REXEC_FBC_EXACTISH_CHECK(c == c1 || c == c2);
1327                     }
1328                 }
1329             }
1330             else {
1331                 /* Neither pattern nor string are UTF8 */
1332                 if (c1 == c2)
1333                     REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1);
1334                 else
1335                     REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1 || *(U8*)s == c2);
1336             }
1337             break;
1338         case BOUNDL:
1339             PL_reg_flags |= RF_tainted;
1340             /* FALL THROUGH */
1341         case BOUND:
1342             if (do_utf8) {
1343                 if (s == PL_bostr)
1344                     tmp = '\n';
1345                 else {
1346                     U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1347                     tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
1348                 }
1349                 tmp = ((OP(c) == BOUND ?
1350                         isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1351                 LOAD_UTF8_CHARCLASS_ALNUM();
1352                 REXEC_FBC_UTF8_SCAN(
1353                     if (tmp == !(OP(c) == BOUND ?
1354                                  (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1355                                  isALNUM_LC_utf8((U8*)s)))
1356                     {
1357                         tmp = !tmp;
1358                         REXEC_FBC_TRYIT;
1359                 }
1360                 );
1361             }
1362             else {
1363                 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1364                 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1365                 REXEC_FBC_SCAN(
1366                     if (tmp ==
1367                         !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1368                         tmp = !tmp;
1369                         REXEC_FBC_TRYIT;
1370                 }
1371                 );
1372             }
1373             if ((!prog->minlen && tmp) && (!reginfo || regtry(reginfo, &s)))
1374                 goto got_it;
1375             break;
1376         case NBOUNDL:
1377             PL_reg_flags |= RF_tainted;
1378             /* FALL THROUGH */
1379         case NBOUND:
1380             if (do_utf8) {
1381                 if (s == PL_bostr)
1382                     tmp = '\n';
1383                 else {
1384                     U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1385                     tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
1386                 }
1387                 tmp = ((OP(c) == NBOUND ?
1388                         isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1389                 LOAD_UTF8_CHARCLASS_ALNUM();
1390                 REXEC_FBC_UTF8_SCAN(
1391                     if (tmp == !(OP(c) == NBOUND ?
1392                                  (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1393                                  isALNUM_LC_utf8((U8*)s)))
1394                         tmp = !tmp;
1395                     else REXEC_FBC_TRYIT;
1396                 );
1397             }
1398             else {
1399                 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1400                 tmp = ((OP(c) == NBOUND ?
1401                         isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1402                 REXEC_FBC_SCAN(
1403                     if (tmp ==
1404                         !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1405                         tmp = !tmp;
1406                     else REXEC_FBC_TRYIT;
1407                 );
1408             }
1409             if ((!prog->minlen && !tmp) && (!reginfo || regtry(reginfo, &s)))
1410                 goto got_it;
1411             break;
1412         case ALNUM:
1413             REXEC_FBC_CSCAN_PRELOAD(
1414                 LOAD_UTF8_CHARCLASS_ALNUM(),
1415                 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8),
1416                 isALNUM(*s)
1417             );
1418         case ALNUML:
1419             REXEC_FBC_CSCAN_TAINT(
1420                 isALNUM_LC_utf8((U8*)s),
1421                 isALNUM_LC(*s)
1422             );
1423         case NALNUM:
1424             REXEC_FBC_CSCAN_PRELOAD(
1425                 LOAD_UTF8_CHARCLASS_ALNUM(),
1426                 !swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8),
1427                 !isALNUM(*s)
1428             );
1429         case NALNUML:
1430             REXEC_FBC_CSCAN_TAINT(
1431                 !isALNUM_LC_utf8((U8*)s),
1432                 !isALNUM_LC(*s)
1433             );
1434         case SPACE:
1435             REXEC_FBC_CSCAN_PRELOAD(
1436                 LOAD_UTF8_CHARCLASS_SPACE(),
1437                 *s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8),
1438                 isSPACE(*s)
1439             );
1440         case SPACEL:
1441             REXEC_FBC_CSCAN_TAINT(
1442                 *s == ' ' || isSPACE_LC_utf8((U8*)s),
1443                 isSPACE_LC(*s)
1444             );
1445         case NSPACE:
1446             REXEC_FBC_CSCAN_PRELOAD(
1447                 LOAD_UTF8_CHARCLASS_SPACE(),
1448                 !(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)),
1449                 !isSPACE(*s)
1450             );
1451         case NSPACEL:
1452             REXEC_FBC_CSCAN_TAINT(
1453                 !(*s == ' ' || isSPACE_LC_utf8((U8*)s)),
1454                 !isSPACE_LC(*s)
1455             );
1456         case DIGIT:
1457             REXEC_FBC_CSCAN_PRELOAD(
1458                 LOAD_UTF8_CHARCLASS_DIGIT(),
1459                 swash_fetch(PL_utf8_digit,(U8*)s, do_utf8),
1460                 isDIGIT(*s)
1461             );
1462         case DIGITL:
1463             REXEC_FBC_CSCAN_TAINT(
1464                 isDIGIT_LC_utf8((U8*)s),
1465                 isDIGIT_LC(*s)
1466             );
1467         case NDIGIT:
1468             REXEC_FBC_CSCAN_PRELOAD(
1469                 LOAD_UTF8_CHARCLASS_DIGIT(),
1470                 !swash_fetch(PL_utf8_digit,(U8*)s, do_utf8),
1471                 !isDIGIT(*s)
1472             );
1473         case NDIGITL:
1474             REXEC_FBC_CSCAN_TAINT(
1475                 !isDIGIT_LC_utf8((U8*)s),
1476                 !isDIGIT_LC(*s)
1477             );
1478         case LNBREAK:
1479             REXEC_FBC_CSCAN(
1480                 is_LNBREAK_utf8(s),
1481                 is_LNBREAK_latin1(s)
1482             );
1483         case VERTWS:
1484             REXEC_FBC_CSCAN(
1485                 is_VERTWS_utf8(s),
1486                 is_VERTWS_latin1(s)
1487             );
1488         case NVERTWS:
1489             REXEC_FBC_CSCAN(
1490                 !is_VERTWS_utf8(s),
1491                 !is_VERTWS_latin1(s)
1492             );
1493         case HORIZWS:
1494             REXEC_FBC_CSCAN(
1495                 is_HORIZWS_utf8(s),
1496                 is_HORIZWS_latin1(s)
1497             );
1498         case NHORIZWS:
1499             REXEC_FBC_CSCAN(
1500                 !is_HORIZWS_utf8(s),
1501                 !is_HORIZWS_latin1(s)
1502             );
1503         case AHOCORASICKC:
1504         case AHOCORASICK:
1505             {
1506                 DECL_TRIE_TYPE(c);
1507                 /* what trie are we using right now */
1508                 reg_ac_data *aho
1509                     = (reg_ac_data*)progi->data->data[ ARG( c ) ];
1510                 reg_trie_data *trie
1511                     = (reg_trie_data*)progi->data->data[ aho->trie ];
1512                 HV *widecharmap = (HV*) progi->data->data[ aho->trie + 1 ];
1513
1514                 const char *last_start = strend - trie->minlen;
1515 #ifdef DEBUGGING
1516                 const char *real_start = s;
1517 #endif
1518                 STRLEN maxlen = trie->maxlen;
1519                 SV *sv_points;
1520                 U8 **points; /* map of where we were in the input string
1521                                 when reading a given char. For ASCII this
1522                                 is unnecessary overhead as the relationship
1523                                 is always 1:1, but for Unicode, especially
1524                                 case folded Unicode this is not true. */
1525                 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
1526                 U8 *bitmap=NULL;
1527
1528
1529                 GET_RE_DEBUG_FLAGS_DECL;
1530
1531                 /* We can't just allocate points here. We need to wrap it in
1532                  * an SV so it gets freed properly if there is a croak while
1533                  * running the match */
1534                 ENTER;
1535                 SAVETMPS;
1536                 sv_points=newSV(maxlen * sizeof(U8 *));
1537                 SvCUR_set(sv_points,
1538                     maxlen * sizeof(U8 *));
1539                 SvPOK_on(sv_points);
1540                 sv_2mortal(sv_points);
1541                 points=(U8**)SvPV_nolen(sv_points );
1542                 if ( trie_type != trie_utf8_fold
1543                      && (trie->bitmap || OP(c)==AHOCORASICKC) )
1544                 {
1545                     if (trie->bitmap)
1546                         bitmap=(U8*)trie->bitmap;
1547                     else
1548                         bitmap=(U8*)ANYOF_BITMAP(c);
1549                 }
1550                 /* this is the Aho-Corasick algorithm modified a touch
1551                    to include special handling for long "unknown char"
1552                    sequences. The basic idea being that we use AC as long
1553                    as we are dealing with a possible matching char, when
1554                    we encounter an unknown char (and we have not encountered
1555                    an accepting state) we scan forward until we find a legal
1556                    starting char.
1557                    AC matching is basically that of trie matching, except
1558                    that when we encounter a failing transition, we fall back
1559                    to the current states "fail state", and try the current char
1560                    again, a process we repeat until we reach the root state,
1561                    state 1, or a legal transition. If we fail on the root state
1562                    then we can either terminate if we have reached an accepting
1563                    state previously, or restart the entire process from the beginning
1564                    if we have not.
1565
1566                  */
1567                 while (s <= last_start) {
1568                     const U32 uniflags = UTF8_ALLOW_DEFAULT;
1569                     U8 *uc = (U8*)s;
1570                     U16 charid = 0;
1571                     U32 base = 1;
1572                     U32 state = 1;
1573                     UV uvc = 0;
1574                     STRLEN len = 0;
1575                     STRLEN foldlen = 0;
1576                     U8 *uscan = (U8*)NULL;
1577                     U8 *leftmost = NULL;
1578 #ifdef DEBUGGING
1579                     U32 accepted_word= 0;
1580 #endif
1581                     U32 pointpos = 0;
1582
1583                     while ( state && uc <= (U8*)strend ) {
1584                         int failed=0;
1585                         U32 word = aho->states[ state ].wordnum;
1586
1587                         if( state==1 ) {
1588                             if ( bitmap ) {
1589                                 DEBUG_TRIE_EXECUTE_r(
1590                                     if ( uc <= (U8*)last_start && !BITMAP_TEST(bitmap,*uc) ) {
1591                                         dump_exec_pos( (char *)uc, c, strend, real_start,
1592                                             (char *)uc, do_utf8 );
1593                                         PerlIO_printf( Perl_debug_log,
1594                                             " Scanning for legal start char...\n");
1595                                     }
1596                                 );
1597                                 while ( uc <= (U8*)last_start  && !BITMAP_TEST(bitmap,*uc) ) {
1598                                     uc++;
1599                                 }
1600                                 s= (char *)uc;
1601                             }
1602                             if (uc >(U8*)last_start) break;
1603                         }
1604
1605                         if ( word ) {
1606                             U8 *lpos= points[ (pointpos - trie->wordlen[word-1] ) % maxlen ];
1607                             if (!leftmost || lpos < leftmost) {
1608                                 DEBUG_r(accepted_word=word);
1609                                 leftmost= lpos;
1610                             }
1611                             if (base==0) break;
1612
1613                         }
1614                         points[pointpos++ % maxlen]= uc;
1615                         REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
1616                                              uscan, len, uvc, charid, foldlen,
1617                                              foldbuf, uniflags);
1618                         DEBUG_TRIE_EXECUTE_r({
1619                             dump_exec_pos( (char *)uc, c, strend, real_start,
1620                                 s,   do_utf8 );
1621                             PerlIO_printf(Perl_debug_log,
1622                                 " Charid:%3u CP:%4"UVxf" ",
1623                                  charid, uvc);
1624                         });
1625
1626                         do {
1627 #ifdef DEBUGGING
1628                             word = aho->states[ state ].wordnum;
1629 #endif
1630                             base = aho->states[ state ].trans.base;
1631
1632                             DEBUG_TRIE_EXECUTE_r({
1633                                 if (failed)
1634                                     dump_exec_pos( (char *)uc, c, strend, real_start,
1635                                         s,   do_utf8 );
1636                                 PerlIO_printf( Perl_debug_log,
1637                                     "%sState: %4"UVxf", word=%"UVxf,
1638                                     failed ? " Fail transition to " : "",
1639                                     (UV)state, (UV)word);
1640                             });
1641                             if ( base ) {
1642                                 U32 tmp;
1643                                 if (charid &&
1644                                      (base + charid > trie->uniquecharcount )
1645                                      && (base + charid - 1 - trie->uniquecharcount
1646                                             < trie->lasttrans)
1647                                      && trie->trans[base + charid - 1 -
1648                                             trie->uniquecharcount].check == state
1649                                      && (tmp=trie->trans[base + charid - 1 -
1650                                         trie->uniquecharcount ].next))
1651                                 {
1652                                     DEBUG_TRIE_EXECUTE_r(
1653                                         PerlIO_printf( Perl_debug_log," - legal\n"));
1654                                     state = tmp;
1655                                     break;
1656                                 }
1657                                 else {
1658                                     DEBUG_TRIE_EXECUTE_r(
1659                                         PerlIO_printf( Perl_debug_log," - fail\n"));
1660                                     failed = 1;
1661                                     state = aho->fail[state];
1662                                 }
1663                             }
1664                             else {
1665                                 /* we must be accepting here */
1666                                 DEBUG_TRIE_EXECUTE_r(
1667                                         PerlIO_printf( Perl_debug_log," - accepting\n"));
1668                                 failed = 1;
1669                                 break;
1670                             }
1671                         } while(state);
1672                         uc += len;
1673                         if (failed) {
1674                             if (leftmost)
1675                                 break;
1676                             if (!state) state = 1;
1677                         }
1678                     }
1679                     if ( aho->states[ state ].wordnum ) {
1680                         U8 *lpos = points[ (pointpos - trie->wordlen[aho->states[ state ].wordnum-1]) % maxlen ];
1681                         if (!leftmost || lpos < leftmost) {
1682                             DEBUG_r(accepted_word=aho->states[ state ].wordnum);
1683                             leftmost = lpos;
1684                         }
1685                     }
1686                     if (leftmost) {
1687                         s = (char*)leftmost;
1688                         DEBUG_TRIE_EXECUTE_r({
1689                             PerlIO_printf(
1690                                 Perl_debug_log,"Matches word #%"UVxf" at position %"IVdf". Trying full pattern...\n",
1691                                 (UV)accepted_word, (IV)(s - real_start)
1692                             );
1693                         });
1694                         if (!reginfo || regtry(reginfo, &s)) {
1695                             FREETMPS;
1696                             LEAVE;
1697                             goto got_it;
1698                         }
1699                         s = HOPc(s,1);
1700                         DEBUG_TRIE_EXECUTE_r({
1701                             PerlIO_printf( Perl_debug_log,"Pattern failed. Looking for new start point...\n");
1702                         });
1703                     } else {
1704                         DEBUG_TRIE_EXECUTE_r(
1705                             PerlIO_printf( Perl_debug_log,"No match.\n"));
1706                         break;
1707                     }
1708                 }
1709                 FREETMPS;
1710                 LEAVE;
1711             }
1712             break;
1713         default:
1714             Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1715             break;
1716         }
1717         return 0;
1718       got_it:
1719         return s;
1720 }
1721
1722 static void
1723 S_swap_match_buff (pTHX_ regexp *prog) {
1724     regexp_paren_pair *t;
1725
1726     if (!prog->swap) {
1727     /* We have to be careful. If the previous successful match
1728        was from this regex we don't want a subsequent paritally
1729        successful match to clobber the old results.
1730        So when we detect this possibility we add a swap buffer
1731        to the re, and switch the buffer each match. If we fail
1732        we switch it back, otherwise we leave it swapped.
1733     */
1734         Newxz(prog->swap, (prog->nparens + 1), regexp_paren_pair);
1735     }
1736     t = prog->swap;
1737     prog->swap = prog->offs;
1738     prog->offs = t;
1739 }
1740
1741
1742 /*
1743  - regexec_flags - match a regexp against a string
1744  */
1745 I32
1746 Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, register char *strend,
1747               char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1748 /* strend: pointer to null at end of string */
1749 /* strbeg: real beginning of string */
1750 /* minend: end of match must be >=minend after stringarg. */
1751 /* data: May be used for some additional optimizations.
1752          Currently its only used, with a U32 cast, for transmitting
1753          the ganch offset when doing a /g match. This will change */
1754 /* nosave: For optimizations. */
1755 {
1756     dVAR;
1757     struct regexp *const prog = (struct regexp *)SvANY(rx);
1758     /*register*/ char *s;
1759     register regnode *c;
1760     /*register*/ char *startpos = stringarg;
1761     I32 minlen;         /* must match at least this many chars */
1762     I32 dontbother = 0; /* how many characters not to try at end */
1763     I32 end_shift = 0;                  /* Same for the end. */         /* CC */
1764     I32 scream_pos = -1;                /* Internal iterator of scream. */
1765     char *scream_olds = NULL;
1766     const bool do_utf8 = (bool)DO_UTF8(sv);
1767     I32 multiline;
1768     RXi_GET_DECL(prog,progi);
1769     regmatch_info reginfo;  /* create some info to pass to regtry etc */
1770     bool swap_on_fail = 0;
1771
1772     GET_RE_DEBUG_FLAGS_DECL;
1773
1774     PERL_UNUSED_ARG(data);
1775
1776     /* Be paranoid... */
1777     if (prog == NULL || startpos == NULL) {
1778         Perl_croak(aTHX_ "NULL regexp parameter");
1779         return 0;
1780     }
1781
1782     multiline = prog->extflags & RXf_PMf_MULTILINE;
1783     reginfo.prog = rx;   /* Yes, sorry that this is confusing.  */
1784
1785     RX_MATCH_UTF8_set(rx, do_utf8);
1786     DEBUG_EXECUTE_r(
1787         debug_start_match(rx, do_utf8, startpos, strend,
1788         "Matching");
1789     );
1790
1791     minlen = prog->minlen;
1792
1793     if (strend - startpos < (minlen+(prog->check_offset_min<0?prog->check_offset_min:0))) {
1794         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
1795                               "String too short [regexec_flags]...\n"));
1796         goto phooey;
1797     }
1798
1799
1800     /* Check validity of program. */
1801     if (UCHARAT(progi->program) != REG_MAGIC) {
1802         Perl_croak(aTHX_ "corrupted regexp program");
1803     }
1804
1805     PL_reg_flags = 0;
1806     PL_reg_eval_set = 0;
1807     PL_reg_maxiter = 0;
1808
1809     if (RX_UTF8(rx))
1810         PL_reg_flags |= RF_utf8;
1811
1812     /* Mark beginning of line for ^ and lookbehind. */
1813     reginfo.bol = startpos; /* XXX not used ??? */
1814     PL_bostr  = strbeg;
1815     reginfo.sv = sv;
1816
1817     /* Mark end of line for $ (and such) */
1818     PL_regeol = strend;
1819
1820     /* see how far we have to get to not match where we matched before */
1821     reginfo.till = startpos+minend;
1822
1823     /* If there is a "must appear" string, look for it. */
1824     s = startpos;
1825
1826     if (prog->extflags & RXf_GPOS_SEEN) { /* Need to set reginfo->ganch */
1827         MAGIC *mg;
1828
1829         if (flags & REXEC_IGNOREPOS)    /* Means: check only at start */
1830             reginfo.ganch = startpos + prog->gofs;
1831         else if (sv && SvTYPE(sv) >= SVt_PVMG
1832                   && SvMAGIC(sv)
1833                   && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1834                   && mg->mg_len >= 0) {
1835             reginfo.ganch = strbeg + mg->mg_len;        /* Defined pos() */
1836             if (prog->extflags & RXf_ANCH_GPOS) {
1837                 if (s > reginfo.ganch)
1838                     goto phooey;
1839                 s = reginfo.ganch - prog->gofs;
1840             }
1841         }
1842         else if (data) {
1843             reginfo.ganch = strbeg + PTR2UV(data);
1844         } else                          /* pos() not defined */
1845             reginfo.ganch = strbeg;
1846     }
1847     if (PL_curpm && (PM_GETRE(PL_curpm) == rx)) {
1848         swap_on_fail = 1;
1849         swap_match_buff(prog); /* do we need a save destructor here for
1850                                   eval dies? */
1851     }
1852     if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL || prog->check_utf8 != NULL)) {
1853         re_scream_pos_data d;
1854
1855         d.scream_olds = &scream_olds;
1856         d.scream_pos = &scream_pos;
1857         s = re_intuit_start(rx, sv, s, strend, flags, &d);
1858         if (!s) {
1859             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
1860             goto phooey;        /* not present */
1861         }
1862     }
1863
1864
1865
1866     /* Simplest case:  anchored match need be tried only once. */
1867     /*  [unless only anchor is BOL and multiline is set] */
1868     if (prog->extflags & (RXf_ANCH & ~RXf_ANCH_GPOS)) {
1869         if (s == startpos && regtry(&reginfo, &startpos))
1870             goto got_it;
1871         else if (multiline || (prog->intflags & PREGf_IMPLICIT)
1872                  || (prog->extflags & RXf_ANCH_MBOL)) /* XXXX SBOL? */
1873         {
1874             char *end;
1875
1876             if (minlen)
1877                 dontbother = minlen - 1;
1878             end = HOP3c(strend, -dontbother, strbeg) - 1;
1879             /* for multiline we only have to try after newlines */
1880             if (prog->check_substr || prog->check_utf8) {
1881                 if (s == startpos)
1882                     goto after_try;
1883                 while (1) {
1884                     if (regtry(&reginfo, &s))
1885                         goto got_it;
1886                   after_try:
1887                     if (s > end)
1888                         goto phooey;
1889                     if (prog->extflags & RXf_USE_INTUIT) {
1890                         s = re_intuit_start(rx, sv, s + 1, strend, flags, NULL);
1891                         if (!s)
1892                             goto phooey;
1893                     }
1894                     else
1895                         s++;
1896                 }
1897             } else {
1898                 if (s > startpos)
1899                     s--;
1900                 while (s < end) {
1901                     if (*s++ == '\n') { /* don't need PL_utf8skip here */
1902                         if (regtry(&reginfo, &s))
1903                             goto got_it;
1904                     }
1905                 }
1906             }
1907         }
1908         goto phooey;
1909     } else if (RXf_GPOS_CHECK == (prog->extflags & RXf_GPOS_CHECK))
1910     {
1911         /* the warning about reginfo.ganch being used without intialization
1912            is bogus -- we set it above, when prog->extflags & RXf_GPOS_SEEN
1913            and we only enter this block when the same bit is set. */
1914         char *tmp_s = reginfo.ganch - prog->gofs;
1915         if (regtry(&reginfo, &tmp_s))
1916             goto got_it;
1917         goto phooey;
1918     }
1919
1920     /* Messy cases:  unanchored match. */
1921     if ((prog->anchored_substr || prog->anchored_utf8) && prog->intflags & PREGf_SKIP) {
1922         /* we have /x+whatever/ */
1923         /* it must be a one character string (XXXX Except UTF?) */
1924         char ch;
1925 #ifdef DEBUGGING
1926         int did_match = 0;
1927 #endif
1928         if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1929             do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1930         ch = SvPVX_const(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
1931
1932         if (do_utf8) {
1933             REXEC_FBC_SCAN(
1934                 if (*s == ch) {
1935                     DEBUG_EXECUTE_r( did_match = 1 );
1936                     if (regtry(&reginfo, &s)) goto got_it;
1937                     s += UTF8SKIP(s);
1938                     while (s < strend && *s == ch)
1939                         s += UTF8SKIP(s);
1940                 }
1941             );
1942         }
1943         else {
1944             REXEC_FBC_SCAN(
1945                 if (*s == ch) {
1946                     DEBUG_EXECUTE_r( did_match = 1 );
1947                     if (regtry(&reginfo, &s)) goto got_it;
1948                     s++;
1949                     while (s < strend && *s == ch)
1950                         s++;
1951                 }
1952             );
1953         }
1954         DEBUG_EXECUTE_r(if (!did_match)
1955                 PerlIO_printf(Perl_debug_log,
1956                                   "Did not find anchored character...\n")
1957                );
1958     }
1959     else if (prog->anchored_substr != NULL
1960               || prog->anchored_utf8 != NULL
1961               || ((prog->float_substr != NULL || prog->float_utf8 != NULL)
1962                   && prog->float_max_offset < strend - s)) {
1963         SV *must;
1964         I32 back_max;
1965         I32 back_min;
1966         char *last;
1967         char *last1;            /* Last position checked before */
1968 #ifdef DEBUGGING
1969         int did_match = 0;
1970 #endif
1971         if (prog->anchored_substr || prog->anchored_utf8) {
1972             if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1973                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1974             must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
1975             back_max = back_min = prog->anchored_offset;
1976         } else {
1977             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1978                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1979             must = do_utf8 ? prog->float_utf8 : prog->float_substr;
1980             back_max = prog->float_max_offset;
1981             back_min = prog->float_min_offset;
1982         }
1983
1984
1985         if (must == &PL_sv_undef)
1986             /* could not downgrade utf8 check substring, so must fail */
1987             goto phooey;
1988
1989         if (back_min<0) {
1990             last = strend;
1991         } else {
1992             last = HOP3c(strend,        /* Cannot start after this */
1993                   -(I32)(CHR_SVLEN(must)
1994                          - (SvTAIL(must) != 0) + back_min), strbeg);
1995         }
1996         if (s > PL_bostr)
1997             last1 = HOPc(s, -1);
1998         else
1999             last1 = s - 1;      /* bogus */
2000
2001         /* XXXX check_substr already used to find "s", can optimize if
2002            check_substr==must. */
2003         scream_pos = -1;
2004         dontbother = end_shift;
2005         strend = HOPc(strend, -dontbother);
2006         while ( (s <= last) &&
2007                 ((flags & REXEC_SCREAM)
2008                  ? (s = screaminstr(sv, must, HOP3c(s, back_min, (back_min<0 ? strbeg : strend)) - strbeg,
2009                                     end_shift, &scream_pos, 0))
2010                  : (s = fbm_instr((unsigned char*)HOP3(s, back_min, (back_min<0 ? strbeg : strend)),
2011                                   (unsigned char*)strend, must,
2012                                   multiline ? FBMrf_MULTILINE : 0))) ) {
2013             /* we may be pointing at the wrong string */
2014             if ((flags & REXEC_SCREAM) && RXp_MATCH_COPIED(prog))
2015                 s = strbeg + (s - SvPVX_const(sv));
2016             DEBUG_EXECUTE_r( did_match = 1 );
2017             if (HOPc(s, -back_max) > last1) {
2018                 last1 = HOPc(s, -back_min);
2019                 s = HOPc(s, -back_max);
2020             }
2021             else {
2022                 char * const t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
2023
2024                 last1 = HOPc(s, -back_min);
2025                 s = t;
2026             }
2027             if (do_utf8) {
2028                 while (s <= last1) {
2029                     if (regtry(&reginfo, &s))
2030                         goto got_it;
2031                     s += UTF8SKIP(s);
2032                 }
2033             }
2034             else {
2035                 while (s <= last1) {
2036                     if (regtry(&reginfo, &s))
2037                         goto got_it;
2038                     s++;
2039                 }
2040             }
2041         }
2042         DEBUG_EXECUTE_r(if (!did_match) {
2043             RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
2044                 SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
2045             PerlIO_printf(Perl_debug_log, "Did not find %s substr %s%s...\n",
2046                               ((must == prog->anchored_substr || must == prog->anchored_utf8)
2047                                ? "anchored" : "floating"),
2048                 quoted, RE_SV_TAIL(must));
2049         });
2050         goto phooey;
2051     }
2052     else if ( (c = progi->regstclass) ) {
2053         if (minlen) {
2054             const OPCODE op = OP(progi->regstclass);
2055             /* don't bother with what can't match */
2056             if (PL_regkind[op] != EXACT && op != CANY && PL_regkind[op] != TRIE)
2057                 strend = HOPc(strend, -(minlen - 1));
2058         }
2059         DEBUG_EXECUTE_r({
2060             SV * const prop = sv_newmortal();
2061             regprop(prog, prop, c);
2062             {
2063                 RE_PV_QUOTED_DECL(quoted,do_utf8,PERL_DEBUG_PAD_ZERO(1),
2064                     s,strend-s,60);
2065                 PerlIO_printf(Perl_debug_log,
2066                     "Matching stclass %.*s against %s (%d chars)\n",
2067                     (int)SvCUR(prop), SvPVX_const(prop),
2068                      quoted, (int)(strend - s));
2069             }
2070         });
2071         if (find_byclass(prog, c, s, strend, &reginfo))
2072             goto got_it;
2073         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass... [regexec_flags]\n"));
2074     }
2075     else {
2076         dontbother = 0;
2077         if (prog->float_substr != NULL || prog->float_utf8 != NULL) {
2078             /* Trim the end. */
2079             char *last;
2080             SV* float_real;
2081
2082             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
2083                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
2084             float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
2085
2086             if (flags & REXEC_SCREAM) {
2087                 last = screaminstr(sv, float_real, s - strbeg,
2088                                    end_shift, &scream_pos, 1); /* last one */
2089                 if (!last)
2090                     last = scream_olds; /* Only one occurrence. */
2091                 /* we may be pointing at the wrong string */
2092                 else if (RXp_MATCH_COPIED(prog))
2093                     s = strbeg + (s - SvPVX_const(sv));
2094             }
2095             else {
2096                 STRLEN len;
2097                 const char * const little = SvPV_const(float_real, len);
2098
2099                 if (SvTAIL(float_real)) {
2100                     if (memEQ(strend - len + 1, little, len - 1))
2101                         last = strend - len + 1;
2102                     else if (!multiline)
2103                         last = memEQ(strend - len, little, len)
2104                             ? strend - len : NULL;
2105                     else
2106                         goto find_last;
2107                 } else {
2108                   find_last:
2109                     if (len)
2110                         last = rninstr(s, strend, little, little + len);
2111                     else
2112                         last = strend;  /* matching "$" */
2113                 }
2114             }
2115             if (last == NULL) {
2116                 DEBUG_EXECUTE_r(
2117                     PerlIO_printf(Perl_debug_log,
2118                         "%sCan't trim the tail, match fails (should not happen)%s\n",
2119                         PL_colors[4], PL_colors[5]));
2120                 goto phooey; /* Should not happen! */
2121             }
2122             dontbother = strend - last + prog->float_min_offset;
2123         }
2124         if (minlen && (dontbother < minlen))
2125             dontbother = minlen - 1;
2126         strend -= dontbother;              /* this one's always in bytes! */
2127         /* We don't know much -- general case. */
2128         if (do_utf8) {
2129             for (;;) {
2130                 if (regtry(&reginfo, &s))
2131                     goto got_it;
2132                 if (s >= strend)
2133                     break;
2134                 s += UTF8SKIP(s);
2135             };
2136         }
2137         else {
2138             do {
2139                 if (regtry(&reginfo, &s))
2140                     goto got_it;
2141             } while (s++ < strend);
2142         }
2143     }
2144
2145     /* Failure. */
2146     goto phooey;
2147
2148 got_it:
2149     RX_MATCH_TAINTED_set(rx, PL_reg_flags & RF_tainted);
2150
2151     if (PL_reg_eval_set)
2152         restore_pos(aTHX_ prog);
2153     if (RXp_PAREN_NAMES(prog))
2154         (void)hv_iterinit(RXp_PAREN_NAMES(prog));
2155
2156     /* make sure $`, $&, $', and $digit will work later */
2157     if ( !(flags & REXEC_NOT_FIRST) ) {
2158         RX_MATCH_COPY_FREE(rx);
2159         if (flags & REXEC_COPY_STR) {
2160             const I32 i = PL_regeol - startpos + (stringarg - strbeg);
2161 #ifdef PERL_OLD_COPY_ON_WRITE
2162             if ((SvIsCOW(sv)
2163                  || (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
2164                 if (DEBUG_C_TEST) {
2165                     PerlIO_printf(Perl_debug_log,
2166                                   "Copy on write: regexp capture, type %d\n",
2167                                   (int) SvTYPE(sv));
2168                 }
2169                 prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
2170                 prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
2171                 assert (SvPOKp(prog->saved_copy));
2172             } else
2173 #endif
2174             {
2175                 RX_MATCH_COPIED_on(rx);
2176                 s = savepvn(strbeg, i);
2177                 prog->subbeg = s;
2178             }
2179             prog->sublen = i;
2180         }
2181         else {
2182             prog->subbeg = strbeg;
2183             prog->sublen = PL_regeol - strbeg;  /* strend may have been modified */
2184         }
2185     }
2186
2187     return 1;
2188
2189 phooey:
2190     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
2191                           PL_colors[4], PL_colors[5]));
2192     if (PL_reg_eval_set)
2193         restore_pos(aTHX_ prog);
2194     if (swap_on_fail)
2195         /* we failed :-( roll it back */
2196         swap_match_buff(prog);
2197
2198     return 0;
2199 }
2200
2201
2202 /*
2203  - regtry - try match at specific point
2204  */
2205 STATIC I32                      /* 0 failure, 1 success */
2206 S_regtry(pTHX_ regmatch_info *reginfo, char **startpos)
2207 {
2208     dVAR;
2209     CHECKPOINT lastcp;
2210     REGEXP *const rx = reginfo->prog;
2211     regexp *const prog = (struct regexp *)SvANY(rx);
2212     RXi_GET_DECL(prog,progi);
2213     GET_RE_DEBUG_FLAGS_DECL;
2214     reginfo->cutpoint=NULL;
2215
2216     if ((prog->extflags & RXf_EVAL_SEEN) && !PL_reg_eval_set) {
2217         MAGIC *mg;
2218
2219         PL_reg_eval_set = RS_init;
2220         DEBUG_EXECUTE_r(DEBUG_s(
2221             PerlIO_printf(Perl_debug_log, "  setting stack tmpbase at %"IVdf"\n",
2222                           (IV)(PL_stack_sp - PL_stack_base));
2223             ));
2224         SAVESTACK_CXPOS();
2225         cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2226         /* Otherwise OP_NEXTSTATE will free whatever on stack now.  */
2227         SAVETMPS;
2228         /* Apparently this is not needed, judging by wantarray. */
2229         /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
2230            cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2231
2232         if (reginfo->sv) {
2233             /* Make $_ available to executed code. */
2234             if (reginfo->sv != DEFSV) {
2235                 SAVE_DEFSV;
2236                 DEFSV = reginfo->sv;
2237             }
2238
2239             if (!(SvTYPE(reginfo->sv) >= SVt_PVMG && SvMAGIC(reginfo->sv)
2240                   && (mg = mg_find(reginfo->sv, PERL_MAGIC_regex_global)))) {
2241                 /* prepare for quick setting of pos */
2242 #ifdef PERL_OLD_COPY_ON_WRITE
2243                 if (SvIsCOW(reginfo->sv))
2244                     sv_force_normal_flags(reginfo->sv, 0);
2245 #endif
2246                 mg = sv_magicext(reginfo->sv, NULL, PERL_MAGIC_regex_global,
2247                                  &PL_vtbl_mglob, NULL, 0);
2248                 mg->mg_len = -1;
2249             }
2250             PL_reg_magic    = mg;
2251             PL_reg_oldpos   = mg->mg_len;
2252             SAVEDESTRUCTOR_X(restore_pos, prog);
2253         }
2254         if (!PL_reg_curpm) {
2255             Newxz(PL_reg_curpm, 1, PMOP);
2256 #ifdef USE_ITHREADS
2257             {
2258                 SV* const repointer = newSViv(0);
2259                 /* so we know which PL_regex_padav element is PL_reg_curpm
2260                    when clearing up in perl_destruct() */
2261                 SvFLAGS(repointer) |= SVf_BREAK;
2262                 av_push(PL_regex_padav,SvREFCNT_inc_simple_NN(repointer));
2263                 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2264                 PL_regex_pad = AvARRAY(PL_regex_padav);
2265             }
2266 #endif
2267         }
2268         PM_SETRE(PL_reg_curpm, rx);
2269         PL_reg_oldcurpm = PL_curpm;
2270         PL_curpm = PL_reg_curpm;
2271         if (RXp_MATCH_COPIED(prog)) {
2272             /*  Here is a serious problem: we cannot rewrite subbeg,
2273                 since it may be needed if this match fails.  Thus
2274                 $` inside (?{}) could fail... */
2275             PL_reg_oldsaved = prog->subbeg;
2276             PL_reg_oldsavedlen = prog->sublen;
2277 #ifdef PERL_OLD_COPY_ON_WRITE
2278             PL_nrs = prog->saved_copy;
2279 #endif
2280             RXp_MATCH_COPIED_off(prog);
2281         }
2282         else
2283             PL_reg_oldsaved = NULL;
2284         prog->subbeg = PL_bostr;
2285         prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2286     }
2287     DEBUG_EXECUTE_r(PL_reg_starttry = *startpos);
2288     prog->offs[0].start = *startpos - PL_bostr;
2289     PL_reginput = *startpos;
2290     PL_reglastparen = &prog->lastparen;
2291     PL_reglastcloseparen = &prog->lastcloseparen;
2292     prog->lastparen = 0;
2293     prog->lastcloseparen = 0;
2294     PL_regsize = 0;
2295     PL_regoffs = prog->offs;
2296     if (PL_reg_start_tmpl <= prog->nparens) {
2297         PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2298         if(PL_reg_start_tmp)
2299             Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2300         else
2301             Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2302     }
2303
2304     /* XXXX What this code is doing here?!!!  There should be no need
2305        to do this again and again, PL_reglastparen should take care of
2306        this!  --ilya*/
2307
2308     /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2309      * Actually, the code in regcppop() (which Ilya may be meaning by
2310      * PL_reglastparen), is not needed at all by the test suite
2311      * (op/regexp, op/pat, op/split), but that code is needed otherwise
2312      * this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
2313      * Meanwhile, this code *is* needed for the
2314      * above-mentioned test suite tests to succeed.  The common theme
2315      * on those tests seems to be returning null fields from matches.
2316      * --jhi updated by dapm */
2317 #if 1
2318     if (prog->nparens) {
2319         regexp_paren_pair *pp = PL_regoffs;
2320         register I32 i;
2321         for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
2322             ++pp;
2323             pp->start = -1;
2324             pp->end = -1;
2325         }
2326     }
2327 #endif
2328     REGCP_SET(lastcp);
2329     if (regmatch(reginfo, progi->program + 1)) {
2330         PL_regoffs[0].end = PL_reginput - PL_bostr;
2331         return 1;
2332     }
2333     if (reginfo->cutpoint)
2334         *startpos= reginfo->cutpoint;
2335     REGCP_UNWIND(lastcp);
2336     return 0;
2337 }
2338
2339
2340 #define sayYES goto yes
2341 #define sayNO goto no
2342 #define sayNO_SILENT goto no_silent
2343
2344 /* we dont use STMT_START/END here because it leads to
2345    "unreachable code" warnings, which are bogus, but distracting. */
2346 #define CACHEsayNO \
2347     if (ST.cache_mask) \
2348        PL_reg_poscache[ST.cache_offset] |= ST.cache_mask; \
2349     sayNO
2350
2351 /* this is used to determine how far from the left messages like
2352    'failed...' are printed. It should be set such that messages
2353    are inline with the regop output that created them.
2354 */
2355 #define REPORT_CODE_OFF 32
2356
2357
2358 /* Make sure there is a test for this +1 options in re_tests */
2359 #define TRIE_INITAL_ACCEPT_BUFFLEN 4;
2360
2361 #define CHRTEST_UNINIT -1001 /* c1/c2 haven't been calculated yet */
2362 #define CHRTEST_VOID   -1000 /* the c1/c2 "next char" test should be skipped */
2363
2364 #define SLAB_FIRST(s) (&(s)->states[0])
2365 #define SLAB_LAST(s)  (&(s)->states[PERL_REGMATCH_SLAB_SLOTS-1])
2366
2367 /* grab a new slab and return the first slot in it */
2368
2369 STATIC regmatch_state *
2370 S_push_slab(pTHX)
2371 {
2372 #if PERL_VERSION < 9 && !defined(PERL_CORE)
2373     dMY_CXT;
2374 #endif
2375     regmatch_slab *s = PL_regmatch_slab->next;
2376     if (!s) {
2377         Newx(s, 1, regmatch_slab);
2378         s->prev = PL_regmatch_slab;
2379         s->next = NULL;
2380         PL_regmatch_slab->next = s;
2381     }
2382     PL_regmatch_slab = s;
2383     return SLAB_FIRST(s);
2384 }
2385
2386
2387 /* push a new state then goto it */
2388
2389 #define PUSH_STATE_GOTO(state, node) \
2390     scan = node; \
2391     st->resume_state = state; \
2392     goto push_state;
2393
2394 /* push a new state with success backtracking, then goto it */
2395
2396 #define PUSH_YES_STATE_GOTO(state, node) \
2397     scan = node; \
2398     st->resume_state = state; \
2399     goto push_yes_state;
2400
2401
2402
2403 /*
2404
2405 regmatch() - main matching routine
2406
2407 This is basically one big switch statement in a loop. We execute an op,
2408 set 'next' to point the next op, and continue. If we come to a point which
2409 we may need to backtrack to on failure such as (A|B|C), we push a
2410 backtrack state onto the backtrack stack. On failure, we pop the top
2411 state, and re-enter the loop at the state indicated. If there are no more
2412 states to pop, we return failure.
2413
2414 Sometimes we also need to backtrack on success; for example /A+/, where
2415 after successfully matching one A, we need to go back and try to
2416 match another one; similarly for lookahead assertions: if the assertion
2417 completes successfully, we backtrack to the state just before the assertion
2418 and then carry on.  In these cases, the pushed state is marked as
2419 'backtrack on success too'. This marking is in fact done by a chain of
2420 pointers, each pointing to the previous 'yes' state. On success, we pop to
2421 the nearest yes state, discarding any intermediate failure-only states.
2422 Sometimes a yes state is pushed just to force some cleanup code to be
2423 called at the end of a successful match or submatch; e.g. (??{$re}) uses
2424 it to free the inner regex.
2425
2426 Note that failure backtracking rewinds the cursor position, while
2427 success backtracking leaves it alone.
2428
2429 A pattern is complete when the END op is executed, while a subpattern
2430 such as (?=foo) is complete when the SUCCESS op is executed. Both of these
2431 ops trigger the "pop to last yes state if any, otherwise return true"
2432 behaviour.
2433
2434 A common convention in this function is to use A and B to refer to the two
2435 subpatterns (or to the first nodes thereof) in patterns like /A*B/: so A is
2436 the subpattern to be matched possibly multiple times, while B is the entire
2437 rest of the pattern. Variable and state names reflect this convention.
2438
2439 The states in the main switch are the union of ops and failure/success of
2440 substates associated with with that op.  For example, IFMATCH is the op
2441 that does lookahead assertions /(?=A)B/ and so the IFMATCH state means
2442 'execute IFMATCH'; while IFMATCH_A is a state saying that we have just
2443 successfully matched A and IFMATCH_A_fail is a state saying that we have
2444 just failed to match A. Resume states always come in pairs. The backtrack
2445 state we push is marked as 'IFMATCH_A', but when that is popped, we resume
2446 at IFMATCH_A or IFMATCH_A_fail, depending on whether we are backtracking
2447 on success or failure.
2448
2449 The struct that holds a backtracking state is actually a big union, with
2450 one variant for each major type of op. The variable st points to the
2451 top-most backtrack struct. To make the code clearer, within each
2452 block of code we #define ST to alias the relevant union.
2453
2454 Here's a concrete example of a (vastly oversimplified) IFMATCH
2455 implementation:
2456
2457     switch (state) {
2458     ....
2459
2460 #define ST st->u.ifmatch
2461
2462     case IFMATCH: // we are executing the IFMATCH op, (?=A)B
2463         ST.foo = ...; // some state we wish to save
2464         ...
2465         // push a yes backtrack state with a resume value of
2466         // IFMATCH_A/IFMATCH_A_fail, then continue execution at the
2467         // first node of A:
2468         PUSH_YES_STATE_GOTO(IFMATCH_A, A);
2469         // NOTREACHED
2470
2471     case IFMATCH_A: // we have successfully executed A; now continue with B
2472         next = B;
2473         bar = ST.foo; // do something with the preserved value
2474         break;
2475
2476     case IFMATCH_A_fail: // A failed, so the assertion failed
2477         ...;   // do some housekeeping, then ...
2478         sayNO; // propagate the failure
2479
2480 #undef ST
2481
2482     ...
2483     }
2484
2485 For any old-timers reading this who are familiar with the old recursive
2486 approach, the code above is equivalent to:
2487
2488     case IFMATCH: // we are executing the IFMATCH op, (?=A)B
2489     {
2490         int foo = ...
2491         ...
2492         if (regmatch(A)) {
2493             next = B;
2494             bar = foo;
2495             break;
2496         }
2497         ...;   // do some housekeeping, then ...
2498         sayNO; // propagate the failure
2499     }
2500
2501 The topmost backtrack state, pointed to by st, is usually free. If you
2502 want to claim it, populate any ST.foo fields in it with values you wish to
2503 save, then do one of
2504
2505         PUSH_STATE_GOTO(resume_state, node);
2506         PUSH_YES_STATE_GOTO(resume_state, node);
2507
2508 which sets that backtrack state's resume value to 'resume_state', pushes a
2509 new free entry to the top of the backtrack stack, then goes to 'node'.
2510 On backtracking, the free slot is popped, and the saved state becomes the
2511 new free state. An ST.foo field in this new top state can be temporarily
2512 accessed to retrieve values, but once the main loop is re-entered, it
2513 becomes available for reuse.
2514
2515 Note that the depth of the backtrack stack constantly increases during the
2516 left-to-right execution of the pattern, rather than going up and down with
2517 the pattern nesting. For example the stack is at its maximum at Z at the
2518 end of the pattern, rather than at X in the following:
2519
2520     /(((X)+)+)+....(Y)+....Z/
2521
2522 The only exceptions to this are lookahead/behind assertions and the cut,
2523 (?>A), which pop all the backtrack states associated with A before
2524 continuing.
2525
2526 Bascktrack state structs are allocated in slabs of about 4K in size.
2527 PL_regmatch_state and st always point to the currently active state,
2528 and PL_regmatch_slab points to the slab currently containing
2529 PL_regmatch_state.  The first time regmatch() is called, the first slab is
2530 allocated, and is never freed until interpreter destruction. When the slab
2531 is full, a new one is allocated and chained to the end. At exit from
2532 regmatch(), slabs allocated since entry are freed.
2533
2534 */
2535
2536
2537 #define DEBUG_STATE_pp(pp)                                  \
2538     DEBUG_STATE_r({                                         \
2539         DUMP_EXEC_POS(locinput, scan, do_utf8);             \
2540         PerlIO_printf(Perl_debug_log,                       \
2541             "    %*s"pp" %s%s%s%s%s\n",                     \
2542             depth*2, "",                                    \
2543             PL_reg_name[st->resume_state],                     \
2544             ((st==yes_state||st==mark_state) ? "[" : ""),   \
2545             ((st==yes_state) ? "Y" : ""),                   \
2546             ((st==mark_state) ? "M" : ""),                  \
2547             ((st==yes_state||st==mark_state) ? "]" : "")    \
2548         );                                                  \
2549     });
2550
2551
2552 #define REG_NODE_NUM(x) ((x) ? (int)((x)-prog) : -1)
2553
2554 #ifdef DEBUGGING
2555
2556 STATIC void
2557 S_debug_start_match(pTHX_ const REGEXP *prog, const bool do_utf8,
2558     const char *start, const char *end, const char *blurb)
2559 {
2560     const bool utf8_pat = RX_UTF8(prog) ? 1 : 0;
2561     if (!PL_colorset)
2562             reginitcolors();
2563     {
2564         RE_PV_QUOTED_DECL(s0, utf8_pat, PERL_DEBUG_PAD_ZERO(0),
2565             RX_PRECOMP(prog), RX_PRELEN(prog), 60);
2566
2567         RE_PV_QUOTED_DECL(s1, do_utf8, PERL_DEBUG_PAD_ZERO(1),
2568             start, end - start, 60);
2569
2570         PerlIO_printf(Perl_debug_log,
2571             "%s%s REx%s %s against %s\n",
2572                        PL_colors[4], blurb, PL_colors[5], s0, s1);
2573
2574         if (do_utf8||utf8_pat)
2575             PerlIO_printf(Perl_debug_log, "UTF-8 %s%s%s...\n",
2576                 utf8_pat ? "pattern" : "",
2577                 utf8_pat && do_utf8 ? " and " : "",
2578                 do_utf8 ? "string" : ""
2579             );
2580     }
2581 }
2582
2583 STATIC void
2584 S_dump_exec_pos(pTHX_ const char *locinput,
2585                       const regnode *scan,
2586                       const char *loc_regeol,
2587                       const char *loc_bostr,
2588                       const char *loc_reg_starttry,
2589                       const bool do_utf8)
2590 {
2591     const int docolor = *PL_colors[0] || *PL_colors[2] || *PL_colors[4];
2592     const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
2593     int l = (loc_regeol - locinput) > taill ? taill : (loc_regeol - locinput);
2594     /* The part of the string before starttry has one color
2595        (pref0_len chars), between starttry and current
2596        position another one (pref_len - pref0_len chars),
2597        after the current position the third one.
2598        We assume that pref0_len <= pref_len, otherwise we
2599        decrease pref0_len.  */
2600     int pref_len = (locinput - loc_bostr) > (5 + taill) - l
2601         ? (5 + taill) - l : locinput - loc_bostr;
2602     int pref0_len;
2603
2604     while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2605         pref_len++;
2606     pref0_len = pref_len  - (locinput - loc_reg_starttry);
2607     if (l + pref_len < (5 + taill) && l < loc_regeol - locinput)
2608         l = ( loc_regeol - locinput > (5 + taill) - pref_len
2609               ? (5 + taill) - pref_len : loc_regeol - locinput);
2610     while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2611         l--;
2612     if (pref0_len < 0)
2613         pref0_len = 0;
2614     if (pref0_len > pref_len)
2615         pref0_len = pref_len;
2616     {
2617         const int is_uni = (do_utf8 && OP(scan) != CANY) ? 1 : 0;
2618
2619         RE_PV_COLOR_DECL(s0,len0,is_uni,PERL_DEBUG_PAD(0),
2620             (locinput - pref_len),pref0_len, 60, 4, 5);
2621
2622         RE_PV_COLOR_DECL(s1,len1,is_uni,PERL_DEBUG_PAD(1),
2623                     (locinput - pref_len + pref0_len),
2624                     pref_len - pref0_len, 60, 2, 3);
2625
2626         RE_PV_COLOR_DECL(s2,len2,is_uni,PERL_DEBUG_PAD(2),
2627                     locinput, loc_regeol - locinput, 10, 0, 1);
2628
2629         const STRLEN tlen=len0+len1+len2;
2630         PerlIO_printf(Perl_debug_log,
2631                     "%4"IVdf" <%.*s%.*s%s%.*s>%*s|",
2632                     (IV)(locinput - loc_bostr),
2633                     len0, s0,
2634                     len1, s1,
2635                     (docolor ? "" : "> <"),
2636                     len2, s2,
2637                     (int)(tlen > 19 ? 0 :  19 - tlen),
2638                     "");
2639     }
2640 }
2641
2642 #endif
2643
2644 /* reg_check_named_buff_matched()
2645  * Checks to see if a named buffer has matched. The data array of
2646  * buffer numbers corresponding to the buffer is expected to reside
2647  * in the regexp->data->data array in the slot stored in the ARG() of
2648  * node involved. Note that this routine doesn't actually care about the
2649  * name, that information is not preserved from compilation to execution.
2650  * Returns the index of the leftmost defined buffer with the given name
2651  * or 0 if non of the buffers matched.
2652  */
2653 STATIC I32
2654 S_reg_check_named_buff_matched(pTHX_ const regexp *rex, const regnode *scan) {
2655     I32 n;
2656     RXi_GET_DECL(rex,rexi);
2657     SV *sv_dat=(SV*)rexi->data->data[ ARG( scan ) ];
2658     I32 *nums=(I32*)SvPVX(sv_dat);
2659     for ( n=0; n<SvIVX(sv_dat); n++ ) {
2660         if ((I32)*PL_reglastparen >= nums[n] &&
2661             PL_regoffs[nums[n]].end != -1)
2662         {
2663             return nums[n];
2664         }
2665     }
2666     return 0;
2667 }
2668
2669
2670 /* free all slabs above current one  - called during LEAVE_SCOPE */
2671
2672 STATIC void
2673 S_clear_backtrack_stack(pTHX_ void *p)
2674 {
2675     regmatch_slab *s = PL_regmatch_slab->next;
2676     PERL_UNUSED_ARG(p);
2677
2678     if (!s)
2679         return;
2680     PL_regmatch_slab->next = NULL;
2681     while (s) {
2682         regmatch_slab * const osl = s;
2683         s = s->next;
2684         Safefree(osl);
2685     }
2686 }
2687
2688
2689 #define SETREX(Re1,Re2) \
2690     if (PL_reg_eval_set) PM_SETRE((PL_reg_curpm), (Re2)); \
2691     Re1 = (Re2)
2692
2693 STATIC I32                      /* 0 failure, 1 success */
2694 S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
2695 {
2696 #if PERL_VERSION < 9 && !defined(PERL_CORE)
2697     dMY_CXT;
2698 #endif
2699     dVAR;
2700     register const bool do_utf8 = PL_reg_match_utf8;
2701     const U32 uniflags = UTF8_ALLOW_DEFAULT;
2702
2703     REGEXP *rex_sv = reginfo->prog;
2704     regexp *rex = (struct regexp *)SvANY(rex_sv);
2705     RXi_GET_DECL(rex,rexi);
2706
2707     I32 oldsave;
2708
2709     /* the current state. This is a cached copy of PL_regmatch_state */
2710     register regmatch_state *st;
2711
2712     /* cache heavy used fields of st in registers */
2713     register regnode *scan;
2714     register regnode *next;
2715     register U32 n = 0; /* general value; init to avoid compiler warning */
2716     register I32 ln = 0; /* len or last;  init to avoid compiler warning */
2717     register char *locinput = PL_reginput;
2718     register I32 nextchr;   /* is always set to UCHARAT(locinput) */
2719
2720     bool result = 0;        /* return value of S_regmatch */
2721     int depth = 0;          /* depth of backtrack stack */
2722     U32 nochange_depth = 0; /* depth of GOSUB recursion with nochange */
2723     const U32 max_nochange_depth =
2724         (3 * rex->nparens > MAX_RECURSE_EVAL_NOCHANGE_DEPTH) ?
2725         3 * rex->nparens : MAX_RECURSE_EVAL_NOCHANGE_DEPTH;
2726
2727     regmatch_state *yes_state = NULL; /* state to pop to on success of
2728                                                             subpattern */
2729     /* mark_state piggy backs on the yes_state logic so that when we unwind
2730        the stack on success we can update the mark_state as we go */
2731     regmatch_state *mark_state = NULL; /* last mark state we have seen */
2732
2733     regmatch_state *cur_eval = NULL; /* most recent EVAL_AB state */
2734     struct regmatch_state  *cur_curlyx = NULL; /* most recent curlyx */
2735     U32 state_num;
2736     bool no_final = 0;      /* prevent failure from backtracking? */
2737     bool do_cutgroup = 0;   /* no_final only until next branch/trie entry */
2738     char *startpoint = PL_reginput;
2739     SV *popmark = NULL;     /* are we looking for a mark? */
2740     SV *sv_commit = NULL;   /* last mark name seen in failure */
2741     SV *sv_yes_mark = NULL; /* last mark name we have seen
2742                                during a successfull match */
2743     U32 lastopen = 0;       /* last open we saw */
2744     bool has_cutgroup = RX_HAS_CUTGROUP(rex) ? 1 : 0;
2745
2746     SV* const oreplsv = GvSV(PL_replgv);
2747
2748
2749     /* these three flags are set by various ops to signal information to
2750      * the very next op. They have a useful lifetime of exactly one loop
2751      * iteration, and are not preserved or restored by state pushes/pops
2752      */
2753     bool sw = 0;            /* the condition value in (?(cond)a|b) */
2754     bool minmod = 0;        /* the next "{n,m}" is a "{n,m}?" */
2755     int logical = 0;        /* the following EVAL is:
2756                                 0: (?{...})
2757                                 1: (?(?{...})X|Y)
2758                                 2: (??{...})
2759                                or the following IFMATCH/UNLESSM is:
2760                                 false: plain (?=foo)
2761                                 true:  used as a condition: (?(?=foo))
2762                             */
2763
2764 #ifdef DEBUGGING
2765     GET_RE_DEBUG_FLAGS_DECL;
2766 #endif
2767
2768     DEBUG_OPTIMISE_r( DEBUG_EXECUTE_r({
2769             PerlIO_printf(Perl_debug_log,"regmatch start\n");
2770     }));
2771     /* on first ever call to regmatch, allocate first slab */
2772     if (!PL_regmatch_slab) {
2773         Newx(PL_regmatch_slab, 1, regmatch_slab);
2774         PL_regmatch_slab->prev = NULL;
2775         PL_regmatch_slab->next = NULL;
2776         PL_regmatch_state = SLAB_FIRST(PL_regmatch_slab);
2777     }
2778
2779     oldsave = PL_savestack_ix;
2780     SAVEDESTRUCTOR_X(S_clear_backtrack_stack, NULL);
2781     SAVEVPTR(PL_regmatch_slab);
2782     SAVEVPTR(PL_regmatch_state);
2783
2784     /* grab next free state slot */
2785     st = ++PL_regmatch_state;
2786     if (st >  SLAB_LAST(PL_regmatch_slab))
2787         st = PL_regmatch_state = S_push_slab(aTHX);
2788
2789     /* Note that nextchr is a byte even in UTF */
2790     nextchr = UCHARAT(locinput);
2791     scan = prog;
2792     while (scan != NULL) {
2793
2794         DEBUG_EXECUTE_r( {
2795             SV * const prop = sv_newmortal();
2796             regnode *rnext=regnext(scan);
2797             DUMP_EXEC_POS( locinput, scan, do_utf8 );
2798             regprop(rex, prop, scan);
2799
2800             PerlIO_printf(Perl_debug_log,
2801                     "%3"IVdf":%*s%s(%"IVdf")\n",
2802                     (IV)(scan - rexi->program), depth*2, "",
2803                     SvPVX_const(prop),
2804                     (PL_regkind[OP(scan)] == END || !rnext) ?
2805                         0 : (IV)(rnext - rexi->program));
2806         });
2807
2808         next = scan + NEXT_OFF(scan);
2809         if (next == scan)
2810             next = NULL;
2811         state_num = OP(scan);
2812
2813       reenter_switch:
2814         switch (state_num) {
2815         case BOL:
2816             if (locinput == PL_bostr)
2817             {
2818                 /* reginfo->till = reginfo->bol; */
2819                 break;
2820             }
2821             sayNO;
2822         case MBOL:
2823             if (locinput == PL_bostr ||
2824                 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
2825             {
2826                 break;
2827             }
2828             sayNO;
2829         case SBOL:
2830             if (locinput == PL_bostr)
2831                 break;
2832             sayNO;
2833         case GPOS:
2834             if (locinput == reginfo->ganch)
2835                 break;
2836             sayNO;
2837
2838         case KEEPS:
2839             /* update the startpoint */
2840             st->u.keeper.val = PL_regoffs[0].start;
2841             PL_reginput = locinput;
2842             PL_regoffs[0].start = locinput - PL_bostr;
2843             PUSH_STATE_GOTO(KEEPS_next, next);
2844             /*NOT-REACHED*/
2845         case KEEPS_next_fail:
2846             /* rollback the start point change */
2847             PL_regoffs[0].start = st->u.keeper.val;
2848             sayNO_SILENT;
2849             /*NOT-REACHED*/
2850         case EOL:
2851                 goto seol;
2852         case MEOL:
2853             if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2854                 sayNO;
2855             break;
2856         case SEOL:
2857           seol:
2858             if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2859                 sayNO;
2860             if (PL_regeol - locinput > 1)
2861                 sayNO;
2862             break;
2863         case EOS:
2864             if (PL_regeol != locinput)
2865                 sayNO;
2866             break;
2867         case SANY:
2868             if (!nextchr && locinput >= PL_regeol)
2869                 sayNO;
2870             if (do_utf8) {
2871                 locinput += PL_utf8skip[nextchr];
2872                 if (locinput > PL_regeol)
2873                     sayNO;
2874                 nextchr = UCHARAT(locinput);
2875             }
2876             else
2877                 nextchr = UCHARAT(++locinput);
2878             break;
2879         case CANY:
2880             if (!nextchr && locinput >= PL_regeol)
2881                 sayNO;
2882             nextchr = UCHARAT(++locinput);
2883             break;
2884         case REG_ANY:
2885             if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2886                 sayNO;
2887             if (do_utf8) {
2888                 locinput += PL_utf8skip[nextchr];
2889                 if (locinput > PL_regeol)
2890                     sayNO;
2891                 nextchr = UCHARAT(locinput);
2892             }
2893             else
2894                 nextchr = UCHARAT(++locinput);
2895             break;
2896
2897 #undef  ST
2898 #define ST st->u.trie
2899         case TRIEC:
2900             /* In this case the charclass data is available inline so
2901                we can fail fast without a lot of extra overhead.
2902              */
2903             if (scan->flags == EXACT || !do_utf8) {
2904                 if(!ANYOF_BITMAP_TEST(scan, *locinput)) {
2905                     DEBUG_EXECUTE_r(
2906                         PerlIO_printf(Perl_debug_log,
2907                                   "%*s  %sfailed to match trie start class...%s\n",
2908                                   REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
2909                     );
2910                     sayNO_SILENT;
2911                     /* NOTREACHED */
2912                 }
2913             }
2914             /* FALL THROUGH */
2915         case TRIE:
2916             {
2917                 /* what type of TRIE am I? (utf8 makes this contextual) */
2918                 DECL_TRIE_TYPE(scan);
2919
2920                 /* what trie are we using right now */
2921                 reg_trie_data * const trie
2922                     = (reg_trie_data*)rexi->data->data[ ARG( scan ) ];
2923                 HV * widecharmap = (HV *)rexi->data->data[ ARG( scan ) + 1 ];
2924                 U32 state = trie->startstate;
2925
2926                 if (trie->bitmap && trie_type != trie_utf8_fold &&
2927                     !TRIE_BITMAP_TEST(trie,*locinput)
2928                 ) {
2929                     if (trie->states[ state ].wordnum) {
2930                          DEBUG_EXECUTE_r(
2931                             PerlIO_printf(Perl_debug_log,
2932                                           "%*s  %smatched empty string...%s\n",
2933                                           REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
2934                         );
2935                         break;
2936                     } else {
2937                         DEBUG_EXECUTE_r(
2938                             PerlIO_printf(Perl_debug_log,
2939                                           "%*s  %sfailed to match trie start class...%s\n",
2940                                           REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
2941                         );
2942                         sayNO_SILENT;
2943                    }
2944                 }
2945
2946             {
2947                 U8 *uc = ( U8* )locinput;
2948
2949                 STRLEN len = 0;
2950                 STRLEN foldlen = 0;
2951                 U8 *uscan = (U8*)NULL;
2952                 STRLEN bufflen=0;
2953                 SV *sv_accept_buff = NULL;
2954                 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
2955
2956                 ST.accepted = 0; /* how many accepting states we have seen */
2957                 ST.B = next;
2958                 ST.jump = trie->jump;
2959                 ST.me = scan;
2960                 /*
2961                    traverse the TRIE keeping track of all accepting states
2962                    we transition through until we get to a failing node.
2963                 */
2964
2965                 while ( state && uc <= (U8*)PL_regeol ) {
2966                     U32 base = trie->states[ state ].trans.base;
2967                     UV uvc = 0;
2968                     U16 charid;
2969                     /* We use charid to hold the wordnum as we don't use it
2970                        for charid until after we have done the wordnum logic.
2971                        We define an alias just so that the wordnum logic reads
2972                        more naturally. */
2973
2974 #define got_wordnum charid
2975                     got_wordnum = trie->states[ state ].wordnum;
2976
2977                     if ( got_wordnum ) {
2978                         if ( ! ST.accepted ) {
2979                             ENTER;
2980                             /* SAVETMPS; */ /* XXX is this necessary? dmq */
2981                             bufflen = TRIE_INITAL_ACCEPT_BUFFLEN;
2982                             sv_accept_buff=newSV(bufflen *
2983                                             sizeof(reg_trie_accepted) - 1);
2984                             SvCUR_set(sv_accept_buff, 0);
2985                             SvPOK_on(sv_accept_buff);
2986                             sv_2mortal(sv_accept_buff);
2987                             SAVETMPS;
2988                             ST.accept_buff =
2989                                 (reg_trie_accepted*)SvPV_nolen(sv_accept_buff );
2990                         }
2991                         do {
2992                             if (ST.accepted >= bufflen) {
2993                                 bufflen *= 2;
2994                                 ST.accept_buff =(reg_trie_accepted*)
2995                                     SvGROW(sv_accept_buff,
2996                                         bufflen * sizeof(reg_trie_accepted));
2997                             }
2998                             SvCUR_set(sv_accept_buff,SvCUR(sv_accept_buff)
2999                                 + sizeof(reg_trie_accepted));
3000
3001
3002                             ST.accept_buff[ST.accepted].wordnum = got_wordnum;
3003                             ST.accept_buff[ST.accepted].endpos = uc;
3004                             ++ST.accepted;
3005                         } while (trie->nextword && (got_wordnum= trie->nextword[got_wordnum]));
3006                     }
3007 #undef got_wordnum
3008
3009                     DEBUG_TRIE_EXECUTE_r({
3010                                 DUMP_EXEC_POS( (char *)uc, scan, do_utf8 );
3011                                 PerlIO_printf( Perl_debug_log,
3012                                     "%*s  %sState: %4"UVxf" Accepted: %4"UVxf" ",
3013                                     2+depth * 2, "", PL_colors[4],
3014                                     (UV)state, (UV)ST.accepted );
3015                     });
3016
3017                     if ( base ) {
3018                         REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
3019                                              uscan, len, uvc, charid, foldlen,
3020                                              foldbuf, uniflags);
3021
3022                         if (charid &&
3023                              (base + charid > trie->uniquecharcount )
3024                              && (base + charid - 1 - trie->uniquecharcount
3025                                     < trie->lasttrans)
3026                              && trie->trans[base + charid - 1 -
3027                                     trie->uniquecharcount].check == state)
3028                         {
3029                             state = trie->trans[base + charid - 1 -
3030                                 trie->uniquecharcount ].next;
3031                         }
3032                         else {
3033                             state = 0;
3034                         }
3035                         uc += len;
3036
3037                     }
3038                     else {
3039                         state = 0;
3040                     }
3041                     DEBUG_TRIE_EXECUTE_r(
3042                         PerlIO_printf( Perl_debug_log,
3043                             "Charid:%3x CP:%4"UVxf" After State: %4"UVxf"%s\n",
3044                             charid, uvc, (UV)state, PL_colors[5] );
3045                     );
3046                 }
3047                 if (!ST.accepted )
3048                    sayNO;
3049
3050                 DEBUG_EXECUTE_r(
3051                     PerlIO_printf( Perl_debug_log,
3052                         "%*s  %sgot %"IVdf" possible matches%s\n",
3053                         REPORT_CODE_OFF + depth * 2, "",
3054                         PL_colors[4], (IV)ST.accepted, PL_colors[5] );
3055                 );
3056             }}
3057             goto trie_first_try; /* jump into the fail handler */
3058             /* NOTREACHED */
3059         case TRIE_next_fail: /* we failed - try next alterative */
3060             if ( ST.jump) {
3061                 REGCP_UNWIND(ST.cp);
3062                 for (n = *PL_reglastparen; n > ST.lastparen; n--)
3063                     PL_regoffs[n].end = -1;
3064                 *PL_reglastparen = n;
3065             }
3066           trie_first_try:
3067             if (do_cutgroup) {
3068                 do_cutgroup = 0;
3069                 no_final = 0;
3070             }
3071
3072             if ( ST.jump) {
3073                 ST.lastparen = *PL_reglastparen;
3074                 REGCP_SET(ST.cp);
3075             }
3076             if ( ST.accepted == 1 ) {
3077                 /* only one choice left - just continue */
3078                 DEBUG_EXECUTE_r({
3079                     AV *const trie_words
3080                         = (AV *) rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET];
3081                     SV ** const tmp = av_fetch( trie_words,
3082                         ST.accept_buff[ 0 ].wordnum-1, 0 );
3083                     SV *sv= tmp ? sv_newmortal() : NULL;
3084
3085                     PerlIO_printf( Perl_debug_log,
3086                         "%*s  %sonly one match left: #%d <%s>%s\n",
3087                         REPORT_CODE_OFF+depth*2, "", PL_colors[4],
3088                         ST.accept_buff[ 0 ].wordnum,
3089                         tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0,
3090                                 PL_colors[0], PL_colors[1],
3091                                 (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
3092                             )
3093                         : "not compiled under -Dr",
3094                         PL_colors[5] );
3095                 });
3096                 PL_reginput = (char *)ST.accept_buff[ 0 ].endpos;
3097                 /* in this case we free tmps/leave before we call regmatch
3098                    as we wont be using accept_buff again. */
3099
3100                 locinput = PL_reginput;
3101                 nextchr = UCHARAT(locinput);
3102                 if ( !ST.jump || !ST.jump[ST.accept_buff[0].wordnum])
3103                     scan = ST.B;
3104                 else
3105                     scan = ST.me + ST.jump[ST.accept_buff[0].wordnum];
3106                 if (!has_cutgroup) {
3107                     FREETMPS;
3108                     LEAVE;
3109                 } else {
3110                     ST.accepted--;
3111                     PUSH_YES_STATE_GOTO(TRIE_next, scan);
3112                 }
3113
3114                 continue; /* execute rest of RE */
3115             }
3116
3117             if ( !ST.accepted-- ) {
3118                 DEBUG_EXECUTE_r({
3119                     PerlIO_printf( Perl_debug_log,
3120                         "%*s  %sTRIE failed...%s\n",
3121                         REPORT_CODE_OFF+depth*2, "",
3122                         PL_colors[4],
3123                         PL_colors[5] );
3124                 });
3125                 FREETMPS;
3126                 LEAVE;
3127                 sayNO_SILENT;
3128                 /*NOTREACHED*/
3129             }
3130
3131             /*
3132                There are at least two accepting states left.  Presumably
3133                the number of accepting states is going to be low,
3134                typically two. So we simply scan through to find the one
3135                with lowest wordnum.  Once we find it, we swap the last
3136                state into its place and decrement the size. We then try to
3137                match the rest of the pattern at the point where the word
3138                ends. If we succeed, control just continues along the
3139                regex; if we fail we return here to try the next accepting
3140                state
3141              */
3142
3143             {
3144                 U32 best = 0;
3145                 U32 cur;
3146                 for( cur = 1 ; cur <= ST.accepted ; cur++ ) {
3147                     DEBUG_TRIE_EXECUTE_r(
3148                         PerlIO_printf( Perl_debug_log,
3149                             "%*s  %sgot %"IVdf" (%d) as best, looking at %"IVdf" (%d)%s\n",
3150                             REPORT_CODE_OFF + depth * 2, "", PL_colors[4],
3151                             (IV)best, ST.accept_buff[ best ].wordnum, (IV)cur,
3152                             ST.accept_buff[ cur ].wordnum, PL_colors[5] );
3153                     );
3154
3155                     if (ST.accept_buff[cur].wordnum <
3156                             ST.accept_buff[best].wordnum)
3157                         best = cur;
3158                 }
3159
3160                 DEBUG_EXECUTE_r({
3161                     AV *const trie_words
3162                         = (AV *) rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET];
3163                     SV ** const tmp = av_fetch( trie_words,
3164                         ST.accept_buff[ best ].wordnum - 1, 0 );
3165                     regnode *nextop=(!ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) ?
3166                                     ST.B :
3167                                     ST.me + ST.jump[ST.accept_buff[best].wordnum];
3168                     SV *sv= tmp ? sv_newmortal() : NULL;
3169
3170                     PerlIO_printf( Perl_debug_log,
3171                         "%*s  %strying alternation #%d <%s> at node #%d %s\n",
3172                         REPORT_CODE_OFF+depth*2, "", PL_colors[4],
3173                         ST.accept_buff[best].wordnum,
3174                         tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0,
3175                                 PL_colors[0], PL_colors[1],
3176                                 (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
3177                             ) : "not compiled under -Dr",
3178                             REG_NODE_NUM(nextop),
3179                         PL_colors[5] );
3180                 });
3181
3182                 if ( best<ST.accepted ) {
3183                     reg_trie_accepted tmp = ST.accept_buff[ best ];
3184                     ST.accept_buff[ best ] = ST.accept_buff[ ST.accepted ];
3185                     ST.accept_buff[ ST.accepted ] = tmp;
3186                     best = ST.accepted;
3187                 }
3188                 PL_reginput = (char *)ST.accept_buff[ best ].endpos;
3189                 if ( !ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) {
3190                     scan = ST.B;
3191                 } else {
3192                     scan = ST.me + ST.jump[ST.accept_buff[best].wordnum];
3193                 }
3194                 PUSH_YES_STATE_GOTO(TRIE_next, scan);
3195                 /* NOTREACHED */
3196             }
3197             /* NOTREACHED */
3198         case TRIE_next:
3199             FREETMPS;
3200             LEAVE;
3201             sayYES;
3202 #undef  ST
3203
3204         case EXACT: {
3205             char *s = STRING(scan);
3206             ln = STR_LEN(scan);
3207             if (do_utf8 != UTF) {
3208                 /* The target and the pattern have differing utf8ness. */
3209                 char *l = locinput;
3210                 const char * const e = s + ln;
3211
3212                 if (do_utf8) {
3213                     /* The target is utf8, the pattern is not utf8. */
3214                     while (s < e) {
3215                         STRLEN ulen;
3216                         if (l >= PL_regeol)
3217                              sayNO;
3218                         if (NATIVE_TO_UNI(*(U8*)s) !=
3219                             utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
3220                                             uniflags))
3221                              sayNO;
3222                         l += ulen;
3223                         s ++;
3224                     }
3225                 }
3226                 else {
3227                     /* The target is not utf8, the pattern is utf8. */
3228                     while (s < e) {
3229                         STRLEN ulen;
3230                         if (l >= PL_regeol)
3231                             sayNO;
3232                         if (NATIVE_TO_UNI(*((U8*)l)) !=
3233                             utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
3234                                            uniflags))
3235                             sayNO;
3236                         s += ulen;
3237                         l ++;
3238                     }
3239                 }
3240                 locinput = l;
3241                 nextchr = UCHARAT(locinput);
3242                 break;
3243             }
3244             /* The target and the pattern have the same utf8ness. */
3245             /* Inline the first character, for speed. */
3246             if (UCHARAT(s) != nextchr)
3247                 sayNO;
3248             if (PL_regeol - locinput < ln)
3249                 sayNO;
3250             if (ln > 1 && memNE(s, locinput, ln))
3251                 sayNO;
3252             locinput += ln;
3253             nextchr = UCHARAT(locinput);
3254             break;
3255             }
3256         case EXACTFL:
3257             PL_reg_flags |= RF_tainted;
3258             /* FALL THROUGH */
3259         case EXACTF: {
3260             char * const s = STRING(scan);
3261             ln = STR_LEN(scan);
3262
3263             if (do_utf8 || UTF) {
3264               /* Either target or the pattern are utf8. */
3265                 const char * const l = locinput;
3266                 char *e = PL_regeol;
3267
3268                 if (ibcmp_utf8(s, 0,  ln, (bool)UTF,
3269                                l, &e, 0,  do_utf8)) {
3270                      /* One more case for the sharp s:
3271                       * pack("U0U*", 0xDF) =~ /ss/i,
3272                       * the 0xC3 0x9F are the UTF-8
3273                       * byte sequence for the U+00DF. */
3274
3275                      if (!(do_utf8 &&
3276                            toLOWER(s[0]) == 's' &&
3277                            ln >= 2 &&
3278                            toLOWER(s[1]) == 's' &&
3279                            (U8)l[0] == 0xC3 &&
3280                            e - l >= 2 &&
3281                            (U8)l[1] == 0x9F))
3282                           sayNO;
3283                 }
3284                 locinput = e;
3285                 nextchr = UCHARAT(locinput);
3286                 break;
3287             }
3288
3289             /* Neither the target and the pattern are utf8. */
3290
3291             /* Inline the first character, for speed. */
3292             if (UCHARAT(s) != nextchr &&
3293                 UCHARAT(s) != ((OP(scan) == EXACTF)
3294                                ? PL_fold : PL_fold_locale)[nextchr])
3295                 sayNO;
3296             if (PL_regeol - locinput < ln)
3297                 sayNO;
3298             if (ln > 1 && (OP(scan) == EXACTF
3299                            ? ibcmp(s, locinput, ln)
3300                            : ibcmp_locale(s, locinput, ln)))
3301                 sayNO;
3302             locinput += ln;
3303             nextchr = UCHARAT(locinput);
3304             break;
3305             }
3306         case ANYOF:
3307             if (do_utf8) {
3308                 STRLEN inclasslen = PL_regeol - locinput;
3309
3310                 if (!reginclass(rex, scan, (U8*)locinput, &inclasslen, do_utf8))
3311                     goto anyof_fail;
3312                 if (locinput >= PL_regeol)
3313                     sayNO;
3314                 locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
3315                 nextchr = UCHARAT(locinput);
3316                 break;
3317             }
3318             else {
3319                 if (nextchr < 0)
3320                     nextchr = UCHARAT(locinput);
3321                 if (!REGINCLASS(rex, scan, (U8*)locinput))
3322                     goto anyof_fail;
3323                 if (!nextchr && locinput >= PL_regeol)
3324                     sayNO;
3325                 nextchr = UCHARAT(++locinput);
3326                 break;
3327             }
3328         anyof_fail:
3329             /* If we might have the case of the German sharp s
3330              * in a casefolding Unicode character class. */
3331
3332             if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
3333                  locinput += SHARP_S_SKIP;
3334                  nextchr = UCHARAT(locinput);
3335             }
3336             else
3337                  sayNO;
3338             break;
3339         case ALNUML:
3340             PL_reg_flags |= RF_tainted;
3341             /* FALL THROUGH */
3342         case ALNUM:
3343             if (!nextchr)
3344                 sayNO;
3345             if (do_utf8) {
3346                 LOAD_UTF8_CHARCLASS_ALNUM();
3347                 if (!(OP(scan) == ALNUM
3348                       ? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
3349                       : isALNUM_LC_utf8((U8*)locinput)))
3350                 {
3351                     sayNO;
3352                 }
3353                 locinput += PL_utf8skip[nextchr];
3354                 nextchr = UCHARAT(locinput);
3355                 break;
3356             }
3357             if (!(OP(scan) == ALNUM
3358                   ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
3359                 sayNO;
3360             nextchr = UCHARAT(++locinput);
3361             break;
3362         case NALNUML:
3363             PL_reg_flags |= RF_tainted;
3364             /* FALL THROUGH */
3365         case NALNUM:
3366             if (!nextchr && locinput >= PL_regeol)
3367                 sayNO;
3368             if (do_utf8) {
3369                 LOAD_UTF8_CHARCLASS_ALNUM();
3370                 if (OP(scan) == NALNUM
3371                     ? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
3372                     : isALNUM_LC_utf8((U8*)locinput))
3373                 {
3374                     sayNO;
3375                 }
3376                 locinput += PL_utf8skip[nextchr];
3377                 nextchr = UCHARAT(locinput);
3378                 break;
3379             }
3380             if (OP(scan) == NALNUM
3381                 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
3382                 sayNO;
3383             nextchr = UCHARAT(++locinput);
3384             break;
3385         case BOUNDL:
3386         case NBOUNDL:
3387             PL_reg_flags |= RF_tainted;
3388             /* FALL THROUGH */
3389         case BOUND:
3390         case NBOUND:
3391             /* was last char in word? */
3392             if (do_utf8) {
3393                 if (locinput == PL_bostr)
3394                     ln = '\n';
3395                 else {
3396                     const U8 * const r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
3397
3398                     ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, uniflags);
3399                 }
3400                 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
3401                     ln = isALNUM_uni(ln);
3402                     LOAD_UTF8_CHARCLASS_ALNUM();
3403                     n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
3404                 }
3405                 else {
3406                     ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
3407                     n = isALNUM_LC_utf8((U8*)locinput);
3408                 }
3409             }
3410             else {
3411                 ln = (locinput != PL_bostr) ?
3412                     UCHARAT(locinput - 1) : '\n';
3413                 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
3414                     ln = isALNUM(ln);
3415                     n = isALNUM(nextchr);
3416                 }
3417                 else {
3418                     ln = isALNUM_LC(ln);
3419                     n = isALNUM_LC(nextchr);
3420                 }
3421             }
3422             if (((!ln) == (!n)) == (OP(scan) == BOUND ||
3423                                     OP(scan) == BOUNDL))
3424                     sayNO;
3425             break;
3426         case SPACEL:
3427             PL_reg_flags |= RF_tainted;
3428             /* FALL THROUGH */
3429         case SPACE:
3430             if (!nextchr)
3431                 sayNO;
3432             if (do_utf8) {
3433                 if (UTF8_IS_CONTINUED(nextchr)) {
3434                     LOAD_UTF8_CHARCLASS_SPACE();
3435                     if (!(OP(scan) == SPACE
3436                           ? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
3437                           : isSPACE_LC_utf8((U8*)locinput)))
3438                     {
3439                         sayNO;
3440                     }
3441                     locinput += PL_utf8skip[nextchr];
3442                     nextchr = UCHARAT(locinput);
3443                     break;
3444                 }
3445                 if (!(OP(scan) == SPACE
3446                       ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3447                     sayNO;
3448                 nextchr = UCHARAT(++locinput);
3449             }
3450             else {
3451                 if (!(OP(scan) == SPACE
3452                       ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3453                     sayNO;
3454                 nextchr = UCHARAT(++locinput);
3455             }
3456             break;
3457         case NSPACEL:
3458             PL_reg_flags |= RF_tainted;
3459             /* FALL THROUGH */
3460         case NSPACE:
3461             if (!nextchr && locinput >= PL_regeol)
3462                 sayNO;
3463             if (do_utf8) {
3464                 LOAD_UTF8_CHARCLASS_SPACE();
3465                 if (OP(scan) == NSPACE
3466                     ? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
3467                     : isSPACE_LC_utf8((U8*)locinput))
3468                 {
3469                     sayNO;
3470                 }
3471                 locinput += PL_utf8skip[nextchr];
3472                 nextchr = UCHARAT(locinput);
3473                 break;
3474             }
3475             if (OP(scan) == NSPACE
3476                 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
3477                 sayNO;
3478             nextchr = UCHARAT(++locinput);
3479             break;
3480         case DIGITL:
3481             PL_reg_flags |= RF_tainted;
3482             /* FALL THROUGH */
3483         case DIGIT:
3484             if (!nextchr)
3485                 sayNO;
3486             if (do_utf8) {
3487                 LOAD_UTF8_CHARCLASS_DIGIT();
3488                 if (!(OP(scan) == DIGIT
3489                       ? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
3490                       : isDIGIT_LC_utf8((U8*)locinput)))
3491                 {
3492                     sayNO;
3493                 }
3494                 locinput += PL_utf8skip[nextchr];
3495                 nextchr = UCHARAT(locinput);
3496                 break;
3497             }
3498             if (!(OP(scan) == DIGIT
3499                   ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
3500                 sayNO;
3501             nextchr = UCHARAT(++locinput);
3502             break;
3503         case NDIGITL:
3504             PL_reg_flags |= RF_tainted;
3505             /* FALL THROUGH */
3506         case NDIGIT:
3507             if (!nextchr && locinput >= PL_regeol)
3508                 sayNO;
3509             if (do_utf8) {
3510                 LOAD_UTF8_CHARCLASS_DIGIT();
3511                 if (OP(scan) == NDIGIT
3512                     ? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
3513                     : isDIGIT_LC_utf8((U8*)locinput))
3514                 {
3515                     sayNO;
3516                 }
3517                 locinput += PL_utf8skip[nextchr];
3518                 nextchr = UCHARAT(locinput);
3519                 break;
3520             }
3521             if (OP(scan) == NDIGIT
3522                 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
3523                 sayNO;
3524             nextchr = UCHARAT(++locinput);
3525             break;
3526         case CLUMP:
3527             if (locinput >= PL_regeol)
3528                 sayNO;
3529             if  (do_utf8) {
3530                 LOAD_UTF8_CHARCLASS_MARK();
3531                 if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3532                     sayNO;
3533                 locinput += PL_utf8skip[nextchr];
3534                 while (locinput < PL_regeol &&
3535                        swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3536                     locinput += UTF8SKIP(locinput);
3537                 if (locinput > PL_regeol)
3538                     sayNO;
3539             }
3540             else
3541                locinput++;
3542             nextchr = UCHARAT(locinput);
3543             break;
3544
3545         case NREFFL:
3546         {
3547             char *s;
3548             char type;
3549             PL_reg_flags |= RF_tainted;
3550             /* FALL THROUGH */
3551         case NREF:
3552         case NREFF:
3553             type = OP(scan);
3554             n = reg_check_named_buff_matched(rex,scan);
3555
3556             if ( n ) {
3557                 type = REF + ( type - NREF );
3558                 goto do_ref;
3559             } else {
3560                 sayNO;
3561             }
3562             /* unreached */
3563         case REFFL:
3564             PL_reg_flags |= RF_tainted;
3565             /* FALL THROUGH */
3566         case REF:
3567         case REFF:
3568             n = ARG(scan);  /* which paren pair */
3569             type = OP(scan);
3570           do_ref:
3571             ln = PL_regoffs[n].start;
3572             PL_reg_leftiter = PL_reg_maxiter;           /* Void cache */
3573             if (*PL_reglastparen < n || ln == -1)
3574                 sayNO;                  /* Do not match unless seen CLOSEn. */
3575             if (ln == PL_regoffs[n].end)
3576                 break;
3577
3578             s = PL_bostr + ln;
3579             if (do_utf8 && type != REF) {       /* REF can do byte comparison */
3580                 char *l = locinput;
3581                 const char *e = PL_bostr + PL_regoffs[n].end;
3582                 /*
3583                  * Note that we can't do the "other character" lookup trick as
3584                  * in the 8-bit case (no pun intended) because in Unicode we
3585                  * have to map both upper and title case to lower case.
3586                  */
3587                 if (type == REFF) {
3588                     while (s < e) {
3589                         STRLEN ulen1, ulen2;
3590                         U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3591                         U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
3592
3593                         if (l >= PL_regeol)
3594                             sayNO;
3595                         toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
3596                         toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
3597                         if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
3598                             sayNO;
3599                         s += ulen1;
3600                         l += ulen2;
3601                     }
3602                 }
3603                 locinput = l;
3604                 nextchr = UCHARAT(locinput);
3605                 break;
3606             }
3607
3608             /* Inline the first character, for speed. */
3609             if (UCHARAT(s) != nextchr &&
3610                 (type == REF ||
3611                  (UCHARAT(s) != (type == REFF
3612                                   ? PL_fold : PL_fold_locale)[nextchr])))
3613                 sayNO;
3614             ln = PL_regoffs[n].end - ln;
3615             if (locinput + ln > PL_regeol)
3616                 sayNO;
3617             if (ln > 1 && (type == REF
3618                            ? memNE(s, locinput, ln)
3619                            : (type == REFF
3620                               ? ibcmp(s, locinput, ln)
3621                               : ibcmp_locale(s, locinput, ln))))
3622                 sayNO;
3623             locinput += ln;
3624             nextchr = UCHARAT(locinput);
3625             break;
3626         }
3627         case NOTHING:
3628         case TAIL:
3629             break;
3630         case BACK:
3631             break;
3632
3633 #undef  ST
3634 #define ST st->u.eval
3635         {
3636             SV *ret;
3637             REGEXP *re_sv;
3638             regexp *re;
3639             regexp_internal *rei;
3640             regnode *startpoint;
3641
3642         case GOSTART:
3643         case GOSUB: /*    /(...(?1))/   /(...(?&foo))/   */
3644             if (cur_eval && cur_eval->locinput==locinput) {
3645                 if (cur_eval->u.eval.close_paren == (U32)ARG(scan))
3646                     Perl_croak(aTHX_ "Infinite recursion in regex");
3647                 if ( ++nochange_depth > max_nochange_depth )
3648                     Perl_croak(aTHX_
3649                         "Pattern subroutine nesting without pos change"
3650                         " exceeded limit in regex");
3651             } else {
3652                 nochange_depth = 0;
3653             }
3654             re_sv = rex_sv;
3655             re = rex;
3656             rei = rexi;
3657             (void)ReREFCNT_inc(rex_sv);
3658             if (OP(scan)==GOSUB) {
3659                 startpoint = scan + ARG2L(scan);
3660                 ST.close_paren = ARG(scan);
3661             } else {
3662                 startpoint = rei->program+1;
3663                 ST.close_paren = 0;
3664             }
3665             goto eval_recurse_doit;
3666             /* NOTREACHED */
3667         case EVAL:  /*   /(?{A})B/   /(??{A})B/  and /(?(?{A})X|Y)B/   */
3668             if (cur_eval && cur_eval->locinput==locinput) {
3669                 if ( ++nochange_depth > max_nochange_depth )
3670                     Perl_croak(aTHX_ "EVAL without pos change exceeded limit in regex");
3671             } else {
3672                 nochange_depth = 0;
3673             }
3674             {
3675                 /* execute the code in the {...} */
3676                 dSP;
3677                 SV ** const before = SP;
3678                 OP_4tree * const oop = PL_op;
3679                 COP * const ocurcop = PL_curcop;
3680                 PAD *old_comppad;
3681
3682                 n = ARG(scan);
3683                 PL_op = (OP_4tree*)rexi->data->data[n];
3684                 DEBUG_STATE_r( PerlIO_printf(Perl_debug_log,
3685                     "  re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
3686                 PAD_SAVE_LOCAL(old_comppad, (PAD*)rexi->data->data[n + 2]);
3687                 PL_regoffs[0].end = PL_reg_magic->mg_len = locinput - PL_bostr;
3688
3689                 if (sv_yes_mark) {
3690                     SV *sv_mrk = get_sv("REGMARK", 1);
3691                     sv_setsv(sv_mrk, sv_yes_mark);
3692                 }
3693
3694                 CALLRUNOPS(aTHX);                       /* Scalar context. */
3695                 SPAGAIN;
3696                 if (SP == before)
3697                     ret = &PL_sv_undef;   /* protect against empty (?{}) blocks. */
3698                 else {
3699                     ret = POPs;
3700                     PUTBACK;
3701                 }
3702
3703                 PL_op = oop;
3704                 PAD_RESTORE_LOCAL(old_comppad);
3705                 PL_curcop = ocurcop;
3706                 if (!logical) {
3707                     /* /(?{...})/ */
3708                     sv_setsv(save_scalar(PL_replgv), ret);
3709                     break;
3710                 }
3711             }
3712             if (logical == 2) { /* Postponed subexpression: /(??{...})/ */
3713                 logical = 0;
3714                 {
3715                     /* extract RE object from returned value; compiling if
3716                      * necessary */
3717                     MAGIC *mg = NULL;
3718                     REGEXP *rx = NULL;
3719
3720                     if (SvROK(ret)) {
3721                         SV *const sv = SvRV(ret);
3722
3723                         if (SvTYPE(sv) == SVt_REGEXP) {
3724                             rx = (REGEXP*) sv;
3725                         } else if (SvSMAGICAL(sv)) {
3726                             mg = mg_find(sv, PERL_MAGIC_qr);
3727                             assert(mg);
3728                         }
3729                     } else if (SvTYPE(ret) == SVt_REGEXP) {
3730                         rx = (REGEXP*) ret;
3731                     } else if (SvSMAGICAL(ret)) {
3732                         if (SvGMAGICAL(ret)) {
3733                             /* I don't believe that there is ever qr magic
3734                                here.  */
3735                             assert(!mg_find(ret, PERL_MAGIC_qr));
3736                             sv_unmagic(ret, PERL_MAGIC_qr);
3737                         }
3738                         else {
3739                             mg = mg_find(ret, PERL_MAGIC_qr);
3740                             /* testing suggests mg only ends up non-NULL for
3741                                scalars who were upgraded and compiled in the
3742                                else block below. In turn, this is only
3743                                triggered in the "postponed utf8 string" tests
3744                                in t/op/pat.t  */
3745                         }
3746                     }
3747
3748                     if (mg) {
3749                         rx = (REGEXP *) mg->mg_obj; /*XXX:dmq*/
3750                         assert(rx);
3751                     }
3752                     if (rx) {
3753                         rx = reg_temp_copy(rx);
3754                     }
3755                     else {
3756                         U32 pm_flags = 0;
3757                         const I32 osize = PL_regsize;
3758
3759                         if (DO_UTF8(ret)) {
3760                             assert (SvUTF8(ret));
3761                         } else if (SvUTF8(ret)) {
3762                             /* Not doing UTF-8, despite what the SV says. Is
3763                                this only if we're trapped in use 'bytes'?  */
3764                             /* Make a copy of the octet sequence, but without
3765                                the flag on, as the compiler now honours the
3766                                SvUTF8 flag on ret.  */
3767                             STRLEN len;
3768                             const char *const p = SvPV(ret, len);
3769                             ret = newSVpvn_flags(p, len, SVs_TEMP);
3770                         }
3771                         rx = CALLREGCOMP(ret, pm_flags);
3772                         if (!(SvFLAGS(ret)
3773                               & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
3774                                  | SVs_GMG))) {
3775                             /* This isn't a first class regexp. Instead, it's
3776                                caching a regexp onto an existing, Perl visible
3777                                scalar.  */
3778                             sv_magic(ret, (SV*) rx, PERL_MAGIC_qr, 0, 0);
3779                         }
3780                         PL_regsize = osize;
3781                     }
3782                     re_sv = rx;
3783                     re = (struct regexp *)SvANY(rx);
3784                 }
3785                 RXp_MATCH_COPIED_off(re);
3786                 re->subbeg = rex->subbeg;
3787                 re->sublen = rex->sublen;
3788                 rei = RXi_GET(re);
3789                 DEBUG_EXECUTE_r(
3790                     debug_start_match(re_sv, do_utf8, locinput, PL_regeol,
3791                         "Matching embedded");
3792                 );
3793                 startpoint = rei->program + 1;
3794                 ST.close_paren = 0; /* only used for GOSUB */
3795                 /* borrowed from regtry */
3796                 if (PL_reg_start_tmpl <= re->nparens) {
3797                     PL_reg_start_tmpl = re->nparens*3/2 + 3;
3798                     if(PL_reg_start_tmp)
3799                         Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
3800                     else
3801                         Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
3802                 }
3803
3804         eval_recurse_doit: /* Share code with GOSUB below this line */
3805                 /* run the pattern returned from (??{...}) */
3806                 ST.cp = regcppush(0);   /* Save *all* the positions. */
3807                 REGCP_SET(ST.lastcp);
3808
3809                 PL_regoffs = re->offs; /* essentially NOOP on GOSUB */
3810
3811                 /* see regtry, specifically PL_reglast(?:close)?paren is a pointer! (i dont know why) :dmq */
3812                 PL_reglastparen = &re->lastparen;
3813                 PL_reglastcloseparen = &re->lastcloseparen;
3814                 re->lastparen = 0;
3815                 re->lastcloseparen = 0;
3816
3817                 PL_reginput = locinput;
3818                 PL_regsize = 0;
3819
3820                 /* XXXX This is too dramatic a measure... */
3821                 PL_reg_maxiter = 0;
3822
3823                 ST.toggle_reg_flags = PL_reg_flags;
3824                 if (RX_UTF8(re_sv))
3825                     PL_reg_flags |= RF_utf8;
3826                 else
3827                     PL_reg_flags &= ~RF_utf8;
3828                 ST.toggle_reg_flags ^= PL_reg_flags; /* diff of old and new */
3829
3830                 ST.prev_rex = rex_sv;
3831                 ST.prev_curlyx = cur_curlyx;
3832                 SETREX(rex_sv,re_sv);
3833                 rex = re;
3834                 rexi = rei;
3835                 cur_curlyx = NULL;
3836                 ST.B = next;
3837                 ST.prev_eval = cur_eval;
3838                 cur_eval = st;
3839                 /* now continue from first node in postoned RE */
3840                 PUSH_YES_STATE_GOTO(EVAL_AB, startpoint);
3841                 /* NOTREACHED */
3842             }
3843             /* logical is 1,   /(?(?{...})X|Y)/ */
3844             sw = (bool)SvTRUE(ret);
3845             logical = 0;
3846             break;
3847         }
3848
3849         case EVAL_AB: /* cleanup after a successful (??{A})B */
3850             /* note: this is called twice; first after popping B, then A */
3851             PL_reg_flags ^= ST.toggle_reg_flags;
3852             ReREFCNT_dec(rex_sv);
3853             SETREX(rex_sv,ST.prev_rex);
3854             rex = (struct regexp *)SvANY(rex_sv);
3855             rexi = RXi_GET(rex);
3856             regcpblow(ST.cp);
3857             cur_eval = ST.prev_eval;
3858             cur_curlyx = ST.prev_curlyx;
3859
3860             PL_reglastparen = &rex->lastparen;
3861             PL_reglastcloseparen = &rex->lastcloseparen;
3862
3863             /* XXXX This is too dramatic a measure... */
3864             PL_reg_maxiter = 0;
3865             if ( nochange_depth )
3866                 nochange_depth--;
3867             sayYES;
3868
3869
3870         case EVAL_AB_fail: /* unsuccessfully ran A or B in (??{A})B */
3871             /* note: this is called twice; first after popping B, then A */
3872             PL_reg_flags ^= ST.toggle_reg_flags;
3873             ReREFCNT_dec(rex_sv);
3874             SETREX(rex_sv,ST.prev_rex);
3875             rex = (struct regexp *)SvANY(rex_sv);
3876             rexi = RXi_GET(rex);
3877             PL_reglastparen = &rex->lastparen;
3878             PL_reglastcloseparen = &rex->lastcloseparen;
3879
3880             PL_reginput = locinput;
3881             REGCP_UNWIND(ST.lastcp);
3882             regcppop(rex);
3883             cur_eval = ST.prev_eval;
3884             cur_curlyx = ST.prev_curlyx;
3885             /* XXXX This is too dramatic a measure... */
3886             PL_reg_maxiter = 0;
3887             if ( nochange_depth )
3888                 nochange_depth--;
3889             sayNO_SILENT;
3890 #undef ST
3891
3892         case OPEN:
3893             n = ARG(scan);  /* which paren pair */
3894             PL_reg_start_tmp[n] = locinput;
3895             if (n > PL_regsize)
3896                 PL_regsize = n;
3897             lastopen = n;
3898             break;
3899         case CLOSE:
3900             n = ARG(scan);  /* which paren pair */
3901             PL_regoffs[n].start = PL_reg_start_tmp[n] - PL_bostr;
3902             PL_regoffs[n].end = locinput - PL_bostr;
3903             /*if (n > PL_regsize)
3904                 PL_regsize = n;*/
3905             if (n > *PL_reglastparen)
3906                 *PL_reglastparen = n;
3907             *PL_reglastcloseparen = n;
3908             if (cur_eval && cur_eval->u.eval.close_paren == n) {
3909                 goto fake_end;
3910             }
3911             break;
3912         case ACCEPT:
3913             if (ARG(scan)){
3914                 regnode *cursor;
3915                 for (cursor=scan;
3916                      cursor && OP(cursor)!=END;
3917                      cursor=regnext(cursor))
3918                 {
3919                     if ( OP(cursor)==CLOSE ){
3920                         n = ARG(cursor);
3921                         if ( n <= lastopen ) {
3922                             PL_regoffs[n].start
3923                                 = PL_reg_start_tmp[n] - PL_bostr;
3924                             PL_regoffs[n].end = locinput - PL_bostr;
3925                             /*if (n > PL_regsize)
3926                             PL_regsize = n;*/
3927                             if (n > *PL_reglastparen)
3928                                 *PL_reglastparen = n;
3929                             *PL_reglastcloseparen = n;
3930                             if ( n == ARG(scan) || (cur_eval &&
3931                                 cur_eval->u.eval.close_paren == n))
3932                                 break;
3933                         }
3934                     }
3935                 }
3936             }
3937             goto fake_end;
3938             /*NOTREACHED*/
3939         case GROUPP:
3940             n = ARG(scan);  /* which paren pair */
3941             sw = (bool)(*PL_reglastparen >= n && PL_regoffs[n].end != -1);
3942             break;
3943         case NGROUPP:
3944             /* reg_check_named_buff_matched returns 0 for no match */
3945             sw = (bool)(0 < reg_check_named_buff_matched(rex,scan));
3946             break;
3947         case INSUBP:
3948             n = ARG(scan);
3949             sw = (cur_eval && (!n || cur_eval->u.eval.close_paren == n));
3950             break;
3951         case DEFINEP:
3952             sw = 0;
3953             break;
3954         case IFTHEN:
3955             PL_reg_leftiter = PL_reg_maxiter;           /* Void cache */
3956             if (sw)
3957                 next = NEXTOPER(NEXTOPER(scan));
3958             else {
3959                 next = scan + ARG(scan);
3960                 if (OP(next) == IFTHEN) /* Fake one. */
3961                     next = NEXTOPER(NEXTOPER(next));
3962             }
3963             break;
3964         case LOGICAL:
3965             logical = scan->flags;
3966             break;
3967
3968 /*******************************************************************
3969
3970 The CURLYX/WHILEM pair of ops handle the most generic case of the /A*B/
3971 pattern, where A and B are subpatterns. (For simple A, CURLYM or
3972 STAR/PLUS/CURLY/CURLYN are used instead.)
3973
3974 A*B is compiled as <CURLYX><A><WHILEM><B>
3975
3976 On entry to the subpattern, CURLYX is called. This pushes a CURLYX
3977 state, which contains the current count, initialised to -1. It also sets
3978 cur_curlyx to point to this state, with any previous value saved in the
3979 state block.
3980
3981 CURLYX then jumps straight to the WHILEM op, rather than executing A,
3982 since the pattern may possibly match zero times (i.e. it's a while {} loop
3983 rather than a do {} while loop).
3984
3985 Each entry to WHILEM represents a successful match of A. The count in the
3986 CURLYX block is incremented, another WHILEM state is pushed, and execution
3987 passes to A or B depending on greediness and the current count.
3988
3989 For example, if matching against the string a1a2a3b (where the aN are
3990 substrings that match /A/), then the match progresses as follows: (the
3991 pushed states are interspersed with the bits of strings matched so far):
3992
3993     <CURLYX cnt=-1>
3994     <CURLYX cnt=0><WHILEM>
3995     <CURLYX cnt=1><WHILEM> a1 <WHILEM>
3996     <CURLYX cnt=2><WHILEM> a1 <WHILEM> a2 <WHILEM>
3997     <CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM>
3998     <CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM> b
3999
4000 (Contrast this with something like CURLYM, which maintains only a single
4001 backtrack state:
4002
4003     <CURLYM cnt=0> a1
4004     a1 <CURLYM cnt=1> a2
4005     a1 a2 <CURLYM cnt=2> a3
4006     a1 a2 a3 <CURLYM cnt=3> b
4007 )
4008
4009 Each WHILEM state block marks a point to backtrack to upon partial failure
4010 of A or B, and also contains some minor state data related to that
4011 iteration.  The CURLYX block, pointed to by cur_curlyx, contains the
4012 overall state, such as the count, and pointers to the A and B ops.
4013
4014 This is complicated slightly by nested CURLYX/WHILEM's. Since cur_curlyx
4015 must always point to the *current* CURLYX block, the rules are:
4016
4017 When executing CURLYX, save the old cur_curlyx in the CURLYX state block,
4018 and set cur_curlyx to point the new block.
4019
4020 When popping the CURLYX block after a successful or unsuccessful match,
4021 restore the previous cur_curlyx.
4022
4023 When WHILEM is about to execute B, save the current cur_curlyx, and set it
4024 to the outer one saved in the CURLYX block.
4025
4026 When popping the WHILEM block after a successful or unsuccessful B match,
4027 restore the previous cur_curlyx.
4028
4029 Here's an example for the pattern (AI* BI)*BO
4030 I and O refer to inner and outer, C and W refer to CURLYX and WHILEM:
4031
4032 cur_
4033 curlyx backtrack stack
4034 ------ ---------------
4035 NULL
4036 CO     <CO prev=NULL> <WO>
4037 CI     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
4038 CO     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
4039 NULL   <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi <WO prev=CO> bo
4040
4041 At this point the pattern succeeds, and we work back down the stack to
4042 clean up, restoring as we go:
4043
4044 CO     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
4045 CI     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
4046 CO     <CO prev=NULL> <WO>
4047 NULL
4048
4049 *******************************************************************/
4050
4051 #define ST st->u.curlyx
4052
4053         case CURLYX:    /* start of /A*B/  (for complex A) */
4054         {
4055             /* No need to save/restore up to this paren */
4056             I32 parenfloor = scan->flags;
4057
4058             assert(next); /* keep Coverity happy */
4059             if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
4060                 next += ARG(next);
4061
4062             /* XXXX Probably it is better to teach regpush to support
4063                parenfloor > PL_regsize... */
4064             if (parenfloor > (I32)*PL_reglastparen)
4065                 parenfloor = *PL_reglastparen; /* Pessimization... */
4066
4067             ST.prev_curlyx= cur_curlyx;
4068             cur_curlyx = st;
4069             ST.cp = PL_savestack_ix;
4070
4071             /* these fields contain the state of the current curly.
4072              * they are accessed by subsequent WHILEMs */
4073             ST.parenfloor = parenfloor;
4074             ST.min = ARG1(scan);
4075             ST.max = ARG2(scan);
4076             ST.A = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
4077             ST.B = next;
4078             ST.minmod = minmod;
4079             minmod = 0;
4080             ST.count = -1;      /* this will be updated by WHILEM */
4081             ST.lastloc = NULL;  /* this will be updated by WHILEM */
4082
4083             PL_reginput = locinput;
4084             PUSH_YES_STATE_GOTO(CURLYX_end, PREVOPER(next));
4085             /* NOTREACHED */
4086         }
4087
4088         case CURLYX_end: /* just finished matching all of A*B */
4089             cur_curlyx = ST.prev_curlyx;
4090             sayYES;
4091             /* NOTREACHED */
4092
4093         case CURLYX_end_fail: /* just failed to match all of A*B */
4094             regcpblow(ST.cp);
4095             cur_curlyx = ST.prev_curlyx;
4096             sayNO;
4097             /* NOTREACHED */
4098
4099
4100 #undef ST
4101 #define ST st->u.whilem
4102
4103         case WHILEM:     /* just matched an A in /A*B/  (for complex A) */
4104         {
4105             /* see the discussion above about CURLYX/WHILEM */
4106             I32 n;
4107             assert(cur_curlyx); /* keep Coverity happy */
4108             n = ++cur_curlyx->u.curlyx.count; /* how many A's matched */
4109             ST.save_lastloc = cur_curlyx->u.curlyx.lastloc;
4110             ST.cache_offset = 0;
4111             ST.cache_mask = 0;
4112
4113             PL_reginput = locinput;
4114
4115             DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4116                   "%*s  whilem: matched %ld out of %ld..%ld\n",
4117                   REPORT_CODE_OFF+depth*2, "", (long)n,
4118                   (long)cur_curlyx->u.curlyx.min,
4119                   (long)cur_curlyx->u.curlyx.max)
4120             );
4121
4122             /* First just match a string of min A's. */
4123
4124             if (n < cur_curlyx->u.curlyx.min) {
4125                 cur_curlyx->u.curlyx.lastloc = locinput;
4126                 PUSH_STATE_GOTO(WHILEM_A_pre, cur_curlyx->u.curlyx.A);
4127                 /* NOTREACHED */
4128             }
4129
4130             /* If degenerate A matches "", assume A done. */
4131
4132             if (locinput == cur_curlyx->u.curlyx.lastloc) {
4133                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4134                    "%*s  whilem: empty match detected, trying continuation...\n",
4135                    REPORT_CODE_OFF+depth*2, "")
4136                 );
4137                 goto do_whilem_B_max;
4138             }
4139
4140             /* super-linear cache processing */
4141
4142             if (scan->flags) {
4143
4144                 if (!PL_reg_maxiter) {
4145                     /* start the countdown: Postpone detection until we
4146                      * know the match is not *that* much linear. */
4147                     PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
4148                     /* possible overflow for long strings and many CURLYX's */
4149                     if (PL_reg_maxiter < 0)
4150                         PL_reg_maxiter = I32_MAX;
4151                     PL_reg_leftiter = PL_reg_maxiter;
4152                 }
4153
4154                 if (PL_reg_leftiter-- == 0) {
4155                     /* initialise cache */
4156                     const I32 size = (PL_reg_maxiter + 7)/8;
4157                     if (PL_reg_poscache) {
4158                         if ((I32)PL_reg_poscache_size < size) {
4159                             Renew(PL_reg_poscache, size, char);
4160                             PL_reg_poscache_size = size;
4161                         }
4162                         Zero(PL_reg_poscache, size, char);
4163                     }
4164                     else {
4165                         PL_reg_poscache_size = size;
4166                         Newxz(PL_reg_poscache, size, char);
4167                     }
4168                     DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4169       "%swhilem: Detected a super-linear match, switching on caching%s...\n",
4170                               PL_colors[4], PL_colors[5])
4171                     );
4172                 }
4173
4174                 if (PL_reg_leftiter < 0) {
4175                     /* have we already failed at this position? */
4176                     I32 offset, mask;
4177                     offset  = (scan->flags & 0xf) - 1
4178                                 + (locinput - PL_bostr)  * (scan->flags>>4);
4179                     mask    = 1 << (offset % 8);
4180                     offset /= 8;
4181                     if (PL_reg_poscache[offset] & mask) {
4182                         DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4183                             "%*s  whilem: (cache) already tried at this position...\n",
4184                             REPORT_CODE_OFF+depth*2, "")
4185                         );
4186                         sayNO; /* cache records failure */
4187                     }
4188                     ST.cache_offset = offset;
4189                     ST.cache_mask   = mask;
4190                 }
4191             }
4192
4193             /* Prefer B over A for minimal matching. */
4194
4195             if (cur_curlyx->u.curlyx.minmod) {
4196                 ST.save_curlyx = cur_curlyx;
4197                 cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
4198                 ST.cp = regcppush(ST.save_curlyx->u.curlyx.parenfloor);
4199                 REGCP_SET(ST.lastcp);
4200                 PUSH_YES_STATE_GOTO(WHILEM_B_min, ST.save_curlyx->u.curlyx.B);
4201                 /* NOTREACHED */
4202             }
4203
4204             /* Prefer A over B for maximal matching. */
4205
4206             if (n < cur_curlyx->u.curlyx.max) { /* More greed allowed? */
4207                 ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
4208                 cur_curlyx->u.curlyx.lastloc = locinput;
4209                 REGCP_SET(ST.lastcp);
4210                 PUSH_STATE_GOTO(WHILEM_A_max, cur_curlyx->u.curlyx.A);
4211                 /* NOTREACHED */
4212             }
4213             goto do_whilem_B_max;
4214         }
4215         /* NOTREACHED */
4216
4217         case WHILEM_B_min: /* just matched B in a minimal match */
4218         case WHILEM_B_max: /* just matched B in a maximal match */
4219             cur_curlyx = ST.save_curlyx;
4220             sayYES;
4221             /* NOTREACHED */
4222
4223         case WHILEM_B_max_fail: /* just failed to match B in a maximal match */
4224             cur_curlyx = ST.save_curlyx;
4225             cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
4226             cur_curlyx->u.curlyx.count--;
4227             CACHEsayNO;
4228             /* NOTREACHED */
4229
4230         case WHILEM_A_min_fail: /* just failed to match A in a minimal match */
4231             REGCP_UNWIND(ST.lastcp);
4232             regcppop(rex);
4233             /* FALL THROUGH */
4234         case WHILEM_A_pre_fail: /* just failed to match even minimal A */
4235             cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
4236             cur_curlyx->u.curlyx.count--;
4237             CACHEsayNO;
4238             /* NOTREACHED */
4239
4240         case WHILEM_A_max_fail: /* just failed to match A in a maximal match */
4241             REGCP_UNWIND(ST.lastcp);
4242             regcppop(rex);      /* Restore some previous $<digit>s? */
4243             PL_reginput = locinput;
4244             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4245                 "%*s  whilem: failed, trying continuation...\n",
4246                 REPORT_CODE_OFF+depth*2, "")
4247             );
4248           do_whilem_B_max:
4249             if (cur_curlyx->u.curlyx.count >= REG_INFTY
4250                 && ckWARN(WARN_REGEXP)
4251                 && !(PL_reg_flags & RF_warned))
4252             {
4253                 PL_reg_flags |= RF_warned;
4254                 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
4255                      "Complex regular subexpression recursion",
4256                      REG_INFTY - 1);
4257             }
4258
4259             /* now try B */
4260             ST.save_curlyx = cur_curlyx;
4261             cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
4262             PUSH_YES_STATE_GOTO(WHILEM_B_max, ST.save_curlyx->u.curlyx.B);
4263             /* NOTREACHED */
4264
4265         case WHILEM_B_min_fail: /* just failed to match B in a minimal match */
4266             cur_curlyx = ST.save_curlyx;
4267             REGCP_UNWIND(ST.lastcp);
4268             regcppop(rex);
4269
4270             if (cur_curlyx->u.curlyx.count >= cur_curlyx->u.curlyx.max) {
4271                 /* Maximum greed exceeded */
4272                 if (cur_curlyx->u.curlyx.count >= REG_INFTY
4273                     && ckWARN(WARN_REGEXP)
4274                     && !(PL_reg_flags & RF_warned))
4275                 {
4276                     PL_reg_flags |= RF_warned;
4277                     Perl_warner(aTHX_ packWARN(WARN_REGEXP),
4278                         "%s limit (%d) exceeded",
4279                         "Complex regular subexpression recursion",
4280                         REG_INFTY - 1);
4281                 }
4282                 cur_curlyx->u.curlyx.count--;
4283                 CACHEsayNO;
4284             }
4285
4286             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4287                 "%*s  trying longer...\n", REPORT_CODE_OFF+depth*2, "")
4288             );
4289             /* Try grabbing another A and see if it helps. */
4290             PL_reginput = locinput;
4291             cur_curlyx->u.curlyx.lastloc = locinput;
4292             ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
4293             REGCP_SET(ST.lastcp);
4294             PUSH_STATE_GOTO(WHILEM_A_min, ST.save_curlyx->u.curlyx.A);
4295             /* NOTREACHED */
4296
4297 #undef  ST
4298 #define ST st->u.branch
4299
4300         case BRANCHJ:       /*  /(...|A|...)/ with long next pointer */
4301             next = scan + ARG(scan);
4302             if (next == scan)
4303                 next = NULL;
4304             scan = NEXTOPER(scan);
4305             /* FALL THROUGH */
4306
4307         case BRANCH:        /*  /(...|A|...)/ */
4308             scan = NEXTOPER(scan); /* scan now points to inner node */
4309             ST.lastparen = *PL_reglastparen;
4310             ST.next_branch = next;
4311             REGCP_SET(ST.cp);
4312             PL_reginput = locinput;
4313
4314             /* Now go into the branch */
4315             if (has_cutgroup) {
4316                 PUSH_YES_STATE_GOTO(BRANCH_next, scan);
4317             } else {
4318                 PUSH_STATE_GOTO(BRANCH_next, scan);
4319             }
4320             /* NOTREACHED */
4321         case CUTGROUP:
4322             PL_reginput = locinput;
4323             sv_yes_mark = st->u.mark.mark_name = scan->flags ? NULL :
4324                 (SV*)rexi->data->data[ ARG( scan ) ];
4325             PUSH_STATE_GOTO(CUTGROUP_next,next);
4326             /* NOTREACHED */
4327         case CUTGROUP_next_fail:
4328             do_cutgroup = 1;
4329             no_final = 1;
4330             if (st->u.mark.mark_name)
4331                 sv_commit = st->u.mark.mark_name;
4332             sayNO;
4333             /* NOTREACHED */
4334         case BRANCH_next:
4335             sayYES;
4336             /* NOTREACHED */
4337         case BRANCH_next_fail: /* that branch failed; try the next, if any */
4338             if (do_cutgroup) {
4339                 do_cutgroup = 0;
4340                 no_final = 0;
4341             }
4342             REGCP_UNWIND(ST.cp);
4343             for (n = *PL_reglastparen; n > ST.lastparen; n--)
4344                 PL_regoffs[n].end = -1;
4345             *PL_reglastparen = n;
4346             /*dmq: *PL_reglastcloseparen = n; */
4347             scan = ST.next_branch;
4348             /* no more branches? */
4349             if (!scan || (OP(scan) != BRANCH && OP(scan) != BRANCHJ)) {
4350                 DEBUG_EXECUTE_r({
4351                     PerlIO_printf( Perl_debug_log,
4352                         "%*s  %sBRANCH failed...%s\n",
4353                         REPORT_CODE_OFF+depth*2, "",
4354                         PL_colors[4],
4355                         PL_colors[5] );
4356                 });
4357                 sayNO_SILENT;
4358             }
4359             continue; /* execute next BRANCH[J] op */
4360             /* NOTREACHED */
4361
4362         case MINMOD:
4363             minmod = 1;
4364             break;
4365
4366 #undef  ST
4367 #define ST st->u.curlym
4368
4369         case CURLYM:    /* /A{m,n}B/ where A is fixed-length */
4370
4371             /* This is an optimisation of CURLYX that enables us to push
4372              * only a single backtracking state, no matter now many matches
4373              * there are in {m,n}. It relies on the pattern being constant
4374              * length, with no parens to influence future backrefs
4375              */
4376
4377             ST.me = scan;
4378             scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
4379
4380             /* if paren positive, emulate an OPEN/CLOSE around A */
4381             if (ST.me->flags) {
4382                 U32 paren = ST.me->flags;
4383                 if (paren > PL_regsize)
4384                     PL_regsize = paren;
4385                 if (paren > *PL_reglastparen)
4386                     *PL_reglastparen = paren;
4387                 scan += NEXT_OFF(scan); /* Skip former OPEN. */
4388             }
4389             ST.A = scan;
4390             ST.B = next;
4391             ST.alen = 0;
4392             ST.count = 0;
4393             ST.minmod = minmod;
4394             minmod = 0;
4395             ST.c1 = CHRTEST_UNINIT;
4396             REGCP_SET(ST.cp);
4397
4398             if (!(ST.minmod ? ARG1(ST.me) : ARG2(ST.me))) /* min/max */
4399                 goto curlym_do_B;
4400
4401           curlym_do_A: /* execute the A in /A{m,n}B/  */
4402             PL_reginput = locinput;
4403             PUSH_YES_STATE_GOTO(CURLYM_A, ST.A); /* match A */
4404             /* NOTREACHED */
4405
4406         case CURLYM_A: /* we've just matched an A */
4407             locinput = st->locinput;
4408             nextchr = UCHARAT(locinput);
4409
4410             ST.count++;
4411             /* after first match, determine A's length: u.curlym.alen */
4412             if (ST.count == 1) {
4413                 if (PL_reg_match_utf8) {
4414                     char *s = locinput;
4415                     while (s < PL_reginput) {
4416                         ST.alen++;
4417                         s += UTF8SKIP(s);
4418                     }
4419                 }
4420                 else {
4421                     ST.alen = PL_reginput - locinput;
4422                 }
4423                 if (ST.alen == 0)
4424                     ST.count = ST.minmod ? ARG1(ST.me) : ARG2(ST.me);
4425             }
4426             DEBUG_EXECUTE_r(
4427                 PerlIO_printf(Perl_debug_log,
4428                           "%*s  CURLYM now matched %"IVdf" times, len=%"IVdf"...\n",
4429                           (int)(REPORT_CODE_OFF+(depth*2)), "",
4430                           (IV) ST.count, (IV)ST.alen)
4431             );
4432
4433             locinput = PL_reginput;
4434
4435             if (cur_eval && cur_eval->u.eval.close_paren &&
4436                 cur_eval->u.eval.close_paren == (U32)ST.me->flags)
4437                 goto fake_end;
4438
4439             if ( ST.count < (ST.minmod ? ARG1(ST.me) : ARG2(ST.me)) )
4440                 goto curlym_do_A; /* try to match another A */
4441             goto curlym_do_B; /* try to match B */
4442
4443         case CURLYM_A_fail: /* just failed to match an A */
4444             REGCP_UNWIND(ST.cp);
4445
4446             if (ST.minmod || ST.count < ARG1(ST.me) /* min*/
4447                 || (cur_eval && cur_eval->u.eval.close_paren &&
4448                     cur_eval->u.eval.close_paren == (U32)ST.me->flags))
4449                 sayNO;
4450
4451           curlym_do_B: /* execute the B in /A{m,n}B/  */
4452             PL_reginput = locinput;
4453             if (ST.c1 == CHRTEST_UNINIT) {
4454                 /* calculate c1 and c2 for possible match of 1st char
4455                  * following curly */
4456                 ST.c1 = ST.c2 = CHRTEST_VOID;
4457                 if (HAS_TEXT(ST.B) || JUMPABLE(ST.B)) {
4458                     regnode *text_node = ST.B;
4459                     if (! HAS_TEXT(text_node))
4460                         FIND_NEXT_IMPT(text_node);
4461                     /* this used to be
4462
4463                         (HAS_TEXT(text_node) && PL_regkind[OP(text_node)] == EXACT)
4464
4465                         But the former is redundant in light of the latter.
4466
4467                         if this changes back then the macro for
4468                         IS_TEXT and friends need to change.
4469                      */
4470                     if (PL_regkind[OP(text_node)] == EXACT)
4471                     {
4472
4473                         ST.c1 = (U8)*STRING(text_node);
4474                         ST.c2 =
4475                             (IS_TEXTF(text_node))
4476                             ? PL_fold[ST.c1]
4477                             : (IS_TEXTFL(text_node))
4478                                 ? PL_fold_locale[ST.c1]
4479                                 : ST.c1;
4480                     }
4481                 }
4482             }
4483
4484             DEBUG_EXECUTE_r(
4485                 PerlIO_printf(Perl_debug_log,
4486                     "%*s  CURLYM trying tail with matches=%"IVdf"...\n",
4487                     (int)(REPORT_CODE_OFF+(depth*2)),
4488                     "", (IV)ST.count)
4489                 );
4490             if (ST.c1 != CHRTEST_VOID
4491                     && UCHARAT(PL_reginput) != ST.c1
4492                     && UCHARAT(PL_reginput) != ST.c2)
4493             {
4494                 /* simulate B failing */
4495                 DEBUG_OPTIMISE_r(
4496                     PerlIO_printf(Perl_debug_log,
4497                         "%*s  CURLYM Fast bail c1=%"IVdf" c2=%"IVdf"\n",
4498                         (int)(REPORT_CODE_OFF+(depth*2)),"",
4499                         (IV)ST.c1,(IV)ST.c2
4500                 ));
4501                 state_num = CURLYM_B_fail;
4502                 goto reenter_switch;
4503             }
4504
4505             if (ST.me->flags) {
4506                 /* mark current A as captured */
4507                 I32 paren = ST.me->flags;
4508                 if (ST.count) {
4509                     PL_regoffs[paren].start
4510                         = HOPc(PL_reginput, -ST.alen) - PL_bostr;
4511                     PL_regoffs[paren].end = PL_reginput - PL_bostr;
4512                     /*dmq: *PL_reglastcloseparen = paren; */
4513                 }
4514                 else
4515                     PL_regoffs[paren].end = -1;
4516                 if (cur_eval && cur_eval->u.eval.close_paren &&
4517                     cur_eval->u.eval.close_paren == (U32)ST.me->flags)
4518                 {
4519                     if (ST.count)
4520                         goto fake_end;
4521                     else
4522                         sayNO;
4523                 }
4524             }
4525
4526             PUSH_STATE_GOTO(CURLYM_B, ST.B); /* match B */
4527             /* NOTREACHED */
4528
4529         case CURLYM_B_fail: /* just failed to match a B */
4530             REGCP_UNWIND(ST.cp);
4531             if (ST.minmod) {
4532                 if (ST.count == ARG2(ST.me) /* max */)
4533                     sayNO;
4534                 goto curlym_do_A; /* try to match a further A */
4535             }
4536             /* backtrack one A */
4537             if (ST.count == ARG1(ST.me) /* min */)
4538                 sayNO;
4539             ST.count--;
4540             locinput = HOPc(locinput, -ST.alen);
4541             goto curlym_do_B; /* try to match B */
4542
4543 #undef ST
4544 #define ST st->u.curly
4545
4546 #define CURLY_SETPAREN(paren, success) \
4547     if (paren) { \
4548         if (success) { \
4549             PL_regoffs[paren].start = HOPc(locinput, -1) - PL_bostr; \
4550             PL_regoffs[paren].end = locinput - PL_bostr; \
4551             *PL_reglastcloseparen = paren; \
4552         } \
4553         else \
4554             PL_regoffs[paren].end = -1; \
4555     }
4556
4557         case STAR:              /*  /A*B/ where A is width 1 */
4558             ST.paren = 0;
4559             ST.min = 0;
4560             ST.max = REG_INFTY;
4561             scan = NEXTOPER(scan);
4562             goto repeat;
4563         case PLUS:              /*  /A+B/ where A is width 1 */
4564             ST.paren = 0;
4565             ST.min = 1;
4566             ST.max = REG_INFTY;
4567             scan = NEXTOPER(scan);
4568             goto repeat;
4569         case CURLYN:            /*  /(A){m,n}B/ where A is width 1 */
4570             ST.paren = scan->flags;     /* Which paren to set */
4571             if (ST.paren > PL_regsize)
4572                 PL_regsize = ST.paren;
4573             if (ST.paren > *PL_reglastparen)
4574                 *PL_reglastparen = ST.paren;
4575             ST.min = ARG1(scan);  /* min to match */
4576             ST.max = ARG2(scan);  /* max to match */
4577             if (cur_eval && cur_eval->u.eval.close_paren &&
4578                 cur_eval->u.eval.close_paren == (U32)ST.paren) {
4579                 ST.min=1;
4580                 ST.max=1;
4581             }
4582             scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
4583             goto repeat;
4584         case CURLY:             /*  /A{m,n}B/ where A is width 1 */
4585             ST.paren = 0;
4586             ST.min = ARG1(scan);  /* min to match */
4587             ST.max = ARG2(scan);  /* max to match */
4588             scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
4589           repeat:
4590             /*
4591             * Lookahead to avoid useless match attempts
4592             * when we know what character comes next.
4593             *
4594             * Used to only do .*x and .*?x, but now it allows
4595             * for )'s, ('s and (?{ ... })'s to be in the way
4596             * of the quantifier and the EXACT-like node.  -- japhy
4597             */
4598
4599             if (ST.min > ST.max) /* XXX make this a compile-time check? */
4600                 sayNO;
4601             if (HAS_TEXT(next) || JUMPABLE(next)) {
4602                 U8 *s;
4603                 regnode *text_node = next;
4604
4605                 if (! HAS_TEXT(text_node))
4606                     FIND_NEXT_IMPT(text_node);
4607
4608                 if (! HAS_TEXT(text_node))
4609                     ST.c1 = ST.c2 = CHRTEST_VOID;
4610                 else {
4611                     if ( PL_regkind[OP(text_node)] != EXACT ) {
4612                         ST.c1 = ST.c2 = CHRTEST_VOID;
4613                         goto assume_ok_easy;
4614                     }
4615                     else
4616                         s = (U8*)STRING(text_node);
4617
4618                     /*  Currently we only get here when
4619
4620                         PL_rekind[OP(text_node)] == EXACT
4621
4622                         if this changes back then the macro for IS_TEXT and
4623                         friends need to change. */
4624                     if (!UTF) {
4625                         ST.c2 = ST.c1 = *s;
4626                         if (IS_TEXTF(text_node))
4627                             ST.c2 = PL_fold[ST.c1];
4628                         else if (IS_TEXTFL(text_node))
4629                             ST.c2 = PL_fold_locale[ST.c1];
4630                     }
4631                     else { /* UTF */
4632                         if (IS_TEXTF(text_node)) {
4633                              STRLEN ulen1, ulen2;
4634                              U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
4635                              U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
4636
4637                              to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
4638                              to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
4639 #ifdef EBCDIC
4640                              ST.c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXLEN, 0,
4641                                                     ckWARN(WARN_UTF8) ?
4642                                                     0 : UTF8_ALLOW_ANY);
4643                              ST.c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXLEN, 0,
4644                                                     ckWARN(WARN_UTF8) ?
4645                                                     0 : UTF8_ALLOW_ANY);
4646 #else
4647                              ST.c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
4648                                                     uniflags);
4649                              ST.c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
4650                                                     uniflags);
4651 #endif
4652                         }
4653                         else {
4654                             ST.c2 = ST.c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
4655                                                      uniflags);
4656                         }
4657                     }
4658                 }
4659             }
4660             else
4661                 ST.c1 = ST.c2 = CHRTEST_VOID;
4662         assume_ok_easy:
4663
4664             ST.A = scan;
4665             ST.B = next;
4666             PL_reginput = locinput;
4667             if (minmod) {
4668                 minmod = 0;
4669                 if (ST.min && regrepeat(rex, ST.A, ST.min, depth) < ST.min)
4670                     sayNO;
4671                 ST.count = ST.min;
4672                 locinput = PL_reginput;
4673                 REGCP_SET(ST.cp);
4674                 if (ST.c1 == CHRTEST_VOID)
4675                     goto curly_try_B_min;
4676
4677                 ST.oldloc = locinput;
4678
4679                 /* set ST.maxpos to the furthest point along the
4680                  * string that could possibly match */
4681                 if  (ST.max == REG_INFTY) {
4682                     ST.maxpos = PL_regeol - 1;
4683                     if (do_utf8)
4684                         while (UTF8_IS_CONTINUATION(*(U8*)ST.maxpos))
4685                             ST.maxpos--;
4686                 }
4687                 else if (do_utf8) {
4688                     int m = ST.max - ST.min;
4689                     for (ST.maxpos = locinput;
4690                          m >0 && ST.maxpos + UTF8SKIP(ST.maxpos) <= PL_regeol; m--)
4691                         ST.maxpos += UTF8SKIP(ST.maxpos);
4692                 }
4693                 else {
4694                     ST.maxpos = locinput + ST.max - ST.min;
4695                     if (ST.maxpos >= PL_regeol)
4696                         ST.maxpos = PL_regeol - 1;
4697                 }
4698                 goto curly_try_B_min_known;
4699
4700             }
4701             else {
4702                 ST.count = regrepeat(rex, ST.A, ST.max, depth);
4703                 locinput = PL_reginput;
4704                 if (ST.count < ST.min)
4705                     sayNO;
4706                 if ((ST.count > ST.min)
4707                     && (PL_regkind[OP(ST.B)] == EOL) && (OP(ST.B) != MEOL))
4708                 {
4709                     /* A{m,n} must come at the end of the string, there's
4710                      * no point in backing off ... */
4711                     ST.min = ST.count;
4712                     /* ...except that $ and \Z can match before *and* after
4713                        newline at the end.  Consider "\n\n" =~ /\n+\Z\n/.
4714                        We may back off by one in this case. */
4715                     if (UCHARAT(PL_reginput - 1) == '\n' && OP(ST.B) != EOS)
4716                         ST.min--;
4717                 }
4718                 REGCP_SET(ST.cp);
4719                 goto curly_try_B_max;
4720             }
4721             /* NOTREACHED */
4722
4723
4724         case CURLY_B_min_known_fail:
4725             /* failed to find B in a non-greedy match where c1,c2 valid */
4726             if (ST.paren && ST.count)
4727                 PL_regoffs[ST.paren].end = -1;
4728
4729             PL_reginput = locinput;     /* Could be reset... */
4730             REGCP_UNWIND(ST.cp);
4731             /* Couldn't or didn't -- move forward. */
4732             ST.oldloc = locinput;
4733             if (do_utf8)
4734                 locinput += UTF8SKIP(locinput);
4735             else
4736                 locinput++;
4737             ST.count++;
4738           curly_try_B_min_known:
4739              /* find the next place where 'B' could work, then call B */
4740             {
4741                 int n;
4742                 if (do_utf8) {
4743                     n = (ST.oldloc == locinput) ? 0 : 1;
4744                     if (ST.c1 == ST.c2) {
4745                         STRLEN len;
4746                         /* set n to utf8_distance(oldloc, locinput) */
4747                         while (locinput <= ST.maxpos &&
4748                                utf8n_to_uvchr((U8*)locinput,
4749                                               UTF8_MAXBYTES, &len,
4750                                               uniflags) != (UV)ST.c1) {
4751                             locinput += len;
4752                             n++;
4753                         }
4754                     }
4755                     else {
4756                         /* set n to utf8_distance(oldloc, locinput) */
4757                         while (locinput <= ST.maxpos) {
4758                             STRLEN len;
4759                             const UV c = utf8n_to_uvchr((U8*)locinput,
4760                                                   UTF8_MAXBYTES, &len,
4761                                                   uniflags);
4762                             if (c == (UV)ST.c1 || c == (UV)ST.c2)
4763                                 break;
4764                             locinput += len;
4765                             n++;
4766                         }
4767                     }
4768                 }
4769                 else {
4770                     if (ST.c1 == ST.c2) {
4771                         while (locinput <= ST.maxpos &&
4772                                UCHARAT(locinput) != ST.c1)
4773                             locinput++;
4774                     }
4775                     else {
4776                         while (locinput <= ST.maxpos
4777                                && UCHARAT(locinput) != ST.c1
4778                                && UCHARAT(locinput) != ST.c2)
4779                             locinput++;
4780                     }
4781                     n = locinput - ST.oldloc;
4782                 }
4783                 if (locinput > ST.maxpos)
4784                     sayNO;
4785                 /* PL_reginput == oldloc now */
4786                 if (n) {
4787                     ST.count += n;
4788                     if (regrepeat(rex, ST.A, n, depth) < n)
4789                         sayNO;
4790                 }
4791                 PL_reginput = locinput;
4792                 CURLY_SETPAREN(ST.paren, ST.count);
4793                 if (cur_eval && cur_eval->u.eval.close_paren &&
4794                     cur_eval->u.eval.close_paren == (U32)ST.paren) {
4795                     goto fake_end;
4796                 }
4797                 PUSH_STATE_GOTO(CURLY_B_min_known, ST.B);
4798             }
4799             /* NOTREACHED */
4800
4801
4802         case CURLY_B_min_fail:
4803             /* failed to find B in a non-greedy match where c1,c2 invalid */
4804             if (ST.paren && ST.count)
4805                 PL_regoffs[ST.paren].end = -1;
4806
4807             REGCP_UNWIND(ST.cp);
4808             /* failed -- move forward one */
4809             PL_reginput = locinput;
4810             if (regrepeat(rex, ST.A, 1, depth)) {
4811                 ST.count++;
4812                 locinput = PL_reginput;
4813                 if (ST.count <= ST.max || (ST.max == REG_INFTY &&
4814                         ST.count > 0)) /* count overflow ? */
4815                 {
4816                   curly_try_B_min:
4817                     CURLY_SETPAREN(ST.paren, ST.count);
4818                     if (cur_eval && cur_eval->u.eval.close_paren &&
4819                         cur_eval->u.eval.close_paren == (U32)ST.paren) {
4820                         goto fake_end;
4821                     }
4822                     PUSH_STATE_GOTO(CURLY_B_min, ST.B);
4823                 }
4824             }
4825             sayNO;
4826             /* NOTREACHED */
4827
4828
4829         curly_try_B_max:
4830             /* a successful greedy match: now try to match B */
4831             if (cur_eval && cur_eval->u.eval.close_paren &&
4832                 cur_eval->u.eval.close_paren == (U32)ST.paren) {
4833                 goto fake_end;
4834             }
4835             {
4836                 UV c = 0;
4837                 if (ST.c1 != CHRTEST_VOID)
4838                     c = do_utf8 ? utf8n_to_uvchr((U8*)PL_reginput,
4839                                            UTF8_MAXBYTES, 0, uniflags)
4840                                 : (UV) UCHARAT(PL_reginput);
4841                 /* If it could work, try it. */
4842                 if (ST.c1 == CHRTEST_VOID || c == (UV)ST.c1 || c == (UV)ST.c2) {
4843                     CURLY_SETPAREN(ST.paren, ST.count);
4844                     PUSH_STATE_GOTO(CURLY_B_max, ST.B);
4845                     /* NOTREACHED */
4846                 }
4847             }
4848             /* FALL THROUGH */
4849         case CURLY_B_max_fail:
4850             /* failed to find B in a greedy match */
4851             if (ST.paren && ST.count)
4852                 PL_regoffs[ST.paren].end = -1;
4853
4854             REGCP_UNWIND(ST.cp);
4855             /*  back up. */
4856             if (--ST.count < ST.min)
4857                 sayNO;
4858             PL_reginput = locinput = HOPc(locinput, -1);
4859             goto curly_try_B_max;
4860
4861 #undef ST
4862
4863         case END:
4864             fake_end:
4865             if (cur_eval) {
4866                 /* we've just finished A in /(??{A})B/; now continue with B */
4867                 I32 tmpix;
4868                 st->u.eval.toggle_reg_flags
4869                             = cur_eval->u.eval.toggle_reg_flags;
4870                 PL_reg_flags ^= st->u.eval.toggle_reg_flags;
4871
4872                 st->u.eval.prev_rex = rex_sv;           /* inner */
4873                 SETREX(rex_sv,cur_eval->u.eval.prev_rex);
4874                 rex = (struct regexp *)SvANY(rex_sv);
4875                 rexi = RXi_GET(rex);
4876                 cur_curlyx = cur_eval->u.eval.prev_curlyx;
4877                 ReREFCNT_inc(rex_sv);
4878                 st->u.eval.cp = regcppush(0);   /* Save *all* the positions. */
4879                 REGCP_SET(st->u.eval.lastcp);
4880                 PL_reginput = locinput;
4881
4882                 /* Restore parens of the outer rex without popping the
4883                  * savestack */
4884                 tmpix = PL_savestack_ix;
4885                 PL_savestack_ix = cur_eval->u.eval.lastcp;
4886                 regcppop(rex);
4887                 PL_savestack_ix = tmpix;
4888
4889                 st->u.eval.prev_eval = cur_eval;
4890                 cur_eval = cur_eval->u.eval.prev_eval;
4891                 DEBUG_EXECUTE_r(
4892                     PerlIO_printf(Perl_debug_log, "%*s  EVAL trying tail ... %"UVxf"\n",
4893                                       REPORT_CODE_OFF+depth*2, "",PTR2UV(cur_eval)););
4894                 if ( nochange_depth )
4895                     nochange_depth--;
4896
4897                 PUSH_YES_STATE_GOTO(EVAL_AB,
4898                         st->u.eval.prev_eval->u.eval.B); /* match B */
4899             }
4900
4901             if (locinput < reginfo->till) {
4902                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4903                                       "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
4904                                       PL_colors[4],
4905                                       (long)(locinput - PL_reg_starttry),
4906                                       (long)(reginfo->till - PL_reg_starttry),
4907                                       PL_colors[5]));
4908
4909                 sayNO_SILENT;           /* Cannot match: too short. */
4910             }
4911             PL_reginput = locinput;     /* put where regtry can find it */
4912             sayYES;                     /* Success! */
4913
4914         case SUCCEED: /* successful SUSPEND/UNLESSM/IFMATCH/CURLYM */
4915             DEBUG_EXECUTE_r(
4916             PerlIO_printf(Perl_debug_log,
4917                 "%*s  %ssubpattern success...%s\n",
4918                 REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5]));
4919             PL_reginput = locinput;     /* put where regtry can find it */
4920             sayYES;                     /* Success! */
4921
4922 #undef  ST
4923 #define ST st->u.ifmatch
4924
4925         case SUSPEND:   /* (?>A) */
4926             ST.wanted = 1;
4927             PL_reginput = locinput;
4928             goto do_ifmatch;
4929
4930         case UNLESSM:   /* -ve lookaround: (?!A), or with flags, (?<!A) */
4931             ST.wanted = 0;
4932             goto ifmatch_trivial_fail_test;
4933
4934         case IFMATCH:   /* +ve lookaround: (?=A), or with flags, (?<=A) */
4935             ST.wanted = 1;
4936           ifmatch_trivial_fail_test:
4937             if (scan->flags) {
4938                 char * const s = HOPBACKc(locinput, scan->flags);
4939                 if (!s) {
4940                     /* trivial fail */
4941                     if (logical) {
4942                         logical = 0;
4943                         sw = 1 - (bool)ST.wanted;
4944                     }
4945                     else if (ST.wanted)
4946                         sayNO;
4947                     next = scan + ARG(scan);
4948                     if (next == scan)
4949                         next = NULL;
4950                     break;
4951                 }
4952                 PL_reginput = s;
4953             }
4954             else
4955                 PL_reginput = locinput;
4956
4957           do_ifmatch:
4958             ST.me = scan;
4959             ST.logical = logical;
4960             /* execute body of (?...A) */
4961             PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)));
4962             /* NOTREACHED */
4963
4964         case IFMATCH_A_fail: /* body of (?...A) failed */
4965             ST.wanted = !ST.wanted;
4966             /* FALL THROUGH */
4967
4968         case IFMATCH_A: /* body of (?...A) succeeded */
4969             if (ST.logical) {
4970                 sw = (bool)ST.wanted;
4971             }
4972             else if (!ST.wanted)
4973                 sayNO;
4974
4975             if (OP(ST.me) == SUSPEND)
4976                 locinput = PL_reginput;
4977             else {
4978                 locinput = PL_reginput = st->locinput;
4979                 nextchr = UCHARAT(locinput);
4980             }
4981             scan = ST.me + ARG(ST.me);
4982             if (scan == ST.me)
4983                 scan = NULL;
4984             continue; /* execute B */
4985
4986 #undef ST
4987
4988         case LONGJMP:
4989             next = scan + ARG(scan);
4990             if (next == scan)
4991                 next = NULL;
4992             break;
4993         case COMMIT:
4994             reginfo->cutpoint = PL_regeol;
4995             /* FALLTHROUGH */
4996         case PRUNE:
4997             PL_reginput = locinput;
4998             if (!scan->flags)
4999                 sv_yes_mark = sv_commit = (SV*)rexi->data->data[ ARG( scan ) ];
5000             PUSH_STATE_GOTO(COMMIT_next,next);
5001             /* NOTREACHED */
5002         case COMMIT_next_fail:
5003             no_final = 1;
5004             /* FALLTHROUGH */
5005         case OPFAIL:
5006             sayNO;
5007             /* NOTREACHED */
5008
5009 #define ST st->u.mark
5010         case MARKPOINT:
5011             ST.prev_mark = mark_state;
5012             ST.mark_name = sv_commit = sv_yes_mark
5013                 = (SV*)rexi->data->data[ ARG( scan ) ];
5014             mark_state = st;
5015             ST.mark_loc = PL_reginput = locinput;
5016             PUSH_YES_STATE_GOTO(MARKPOINT_next,next);
5017             /* NOTREACHED */
5018         case MARKPOINT_next:
5019             mark_state = ST.prev_mark;
5020             sayYES;
5021             /* NOTREACHED */
5022         case MARKPOINT_next_fail:
5023             if (popmark && sv_eq(ST.mark_name,popmark))
5024             {
5025                 if (ST.mark_loc > startpoint)
5026                     reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
5027                 popmark = NULL; /* we found our mark */
5028                 sv_commit = ST.mark_name;
5029
5030                 DEBUG_EXECUTE_r({
5031                         PerlIO_printf(Perl_debug_log,
5032                             "%*s  %ssetting cutpoint to mark:%"SVf"...%s\n",
5033                             REPORT_CODE_OFF+depth*2, "",
5034                             PL_colors[4], SVfARG(sv_commit), PL_colors[5]);
5035                 });
5036             }
5037             mark_state = ST.prev_mark;
5038             sv_yes_mark = mark_state ?
5039                 mark_state->u.mark.mark_name : NULL;
5040             sayNO;
5041             /* NOTREACHED */
5042         case SKIP:
5043             PL_reginput = locinput;
5044             if (scan->flags) {
5045                 /* (*SKIP) : if we fail we cut here*/
5046                 ST.mark_name = NULL;
5047                 ST.mark_loc = locinput;
5048                 PUSH_STATE_GOTO(SKIP_next,next);
5049             } else {
5050                 /* (*SKIP:NAME) : if there is a (*MARK:NAME) fail where it was,
5051                    otherwise do nothing.  Meaning we need to scan
5052                  */
5053                 regmatch_state *cur = mark_state;
5054                 SV *find = (SV*)rexi->data->data[ ARG( scan ) ];
5055
5056                 while (cur) {
5057                     if ( sv_eq( cur->u.mark.mark_name,
5058                                 find ) )
5059                     {
5060                         ST.mark_name = find;
5061                         PUSH_STATE_GOTO( SKIP_next, next );
5062                     }
5063                     cur = cur->u.mark.prev_mark;
5064                 }
5065             }
5066             /* Didn't find our (*MARK:NAME) so ignore this (*SKIP:NAME) */
5067             break;
5068         case SKIP_next_fail:
5069             if (ST.mark_name) {
5070                 /* (*CUT:NAME) - Set up to search for the name as we
5071                    collapse the stack*/
5072                 popmark = ST.mark_name;
5073             } else {
5074                 /* (*CUT) - No name, we cut here.*/
5075                 if (ST.mark_loc > startpoint)
5076                     reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
5077                 /* but we set sv_commit to latest mark_name if there
5078                    is one so they can test to see how things lead to this
5079                    cut */
5080                 if (mark_state)
5081                     sv_commit=mark_state->u.mark.mark_name;
5082             }
5083             no_final = 1;
5084             sayNO;
5085             /* NOTREACHED */
5086 #undef ST
5087         case FOLDCHAR:
5088             n = ARG(scan);
5089             if ( n == (U32)what_len_TRICKYFOLD(locinput,do_utf8,ln) ) {
5090                 locinput += ln;
5091             } else if ( 0xDF == n && !do_utf8 && !UTF ) {
5092                 sayNO;
5093             } else  {
5094                 U8 folded[UTF8_MAXBYTES_CASE+1];
5095                 STRLEN foldlen;
5096                 const char * const l = locinput;
5097                 char *e = PL_regeol;
5098                 to_uni_fold(n, folded, &foldlen);
5099
5100                 if (ibcmp_utf8((const char*) folded, 0,  foldlen, 1,
5101                                l, &e, 0,  do_utf8)) {
5102                         sayNO;
5103                 }
5104                 locinput = e;
5105             }
5106             nextchr = UCHARAT(locinput);
5107             break;
5108         case LNBREAK:
5109             if ((n=is_LNBREAK(locinput,do_utf8))) {
5110                 locinput += n;
5111                 nextchr = UCHARAT(locinput);
5112             } else
5113                 sayNO;
5114             break;
5115
5116 #define CASE_CLASS(nAmE)                              \
5117         case nAmE:                                    \
5118             if ((n=is_##nAmE(locinput,do_utf8))) {    \
5119                 locinput += n;                        \
5120                 nextchr = UCHARAT(locinput);          \
5121             } else                                    \
5122                 sayNO;                                \
5123             break;                                    \
5124         case N##nAmE:                                 \
5125             if ((n=is_##nAmE(locinput,do_utf8))) {    \
5126                 sayNO;                                \
5127             } else {                                  \
5128                 locinput += UTF8SKIP(locinput);       \
5129                 nextchr = UCHARAT(locinput);          \
5130             }                                         \
5131             break
5132
5133         CASE_CLASS(VERTWS);
5134         CASE_CLASS(HORIZWS);
5135 #undef CASE_CLASS
5136
5137         default:
5138             PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
5139                           PTR2UV(scan), OP(scan));
5140             Perl_croak(aTHX_ "regexp memory corruption");
5141
5142         } /* end switch */
5143
5144         /* switch break jumps here */
5145         scan = next; /* prepare to execute the next op and ... */
5146         continue;    /* ... jump back to the top, reusing st */
5147         /* NOTREACHED */
5148
5149       push_yes_state:
5150         /* push a state that backtracks on success */
5151         st->u.yes.prev_yes_state = yes_state;
5152         yes_state = st;
5153         /* FALL THROUGH */
5154       push_state:
5155         /* push a new regex state, then continue at scan  */
5156         {
5157             regmatch_state *newst;
5158
5159             DEBUG_STACK_r({
5160                 regmatch_state *cur = st;
5161                 regmatch_state *curyes = yes_state;
5162                 int curd = depth;
5163                 regmatch_slab *slab = PL_regmatch_slab;
5164                 for (;curd > -1;cur--,curd--) {
5165                     if (cur < SLAB_FIRST(slab)) {
5166                         slab = slab->prev;
5167                         cur = SLAB_LAST(slab);
5168                     }
5169                     PerlIO_printf(Perl_error_log, "%*s#%-3d %-10s %s\n",
5170                         REPORT_CODE_OFF + 2 + depth * 2,"",
5171                         curd, PL_reg_name[cur->resume_state],
5172                         (curyes == cur) ? "yes" : ""
5173                     );
5174                     if (curyes == cur)
5175                         curyes = cur->u.yes.prev_yes_state;
5176                 }
5177             } else
5178                 DEBUG_STATE_pp("push")
5179             );
5180             depth++;
5181             st->locinput = locinput;
5182             newst = st+1;
5183             if (newst >  SLAB_LAST(PL_regmatch_slab))
5184                 newst = S_push_slab(aTHX);
5185             PL_regmatch_state = newst;
5186
5187             locinput = PL_reginput;
5188             nextchr = UCHARAT(locinput);
5189             st = newst;
5190             continue;
5191             /* NOTREACHED */
5192         }
5193     }
5194
5195     /*
5196     * We get here only if there's trouble -- normally "case END" is
5197     * the terminating point.
5198     */
5199     Perl_croak(aTHX_ "corrupted regexp pointers");
5200     /*NOTREACHED*/
5201     sayNO;
5202
5203 yes:
5204     if (yes_state) {
5205         /* we have successfully completed a subexpression, but we must now
5206          * pop to the state marked by yes_state and continue from there */
5207         assert(st != yes_state);
5208 #ifdef DEBUGGING
5209         while (st != yes_state) {
5210             st--;
5211             if (st < SLAB_FIRST(PL_regmatch_slab)) {
5212                 PL_regmatch_slab = PL_regmatch_slab->prev;
5213                 st = SLAB_LAST(PL_regmatch_slab);
5214             }
5215             DEBUG_STATE_r({
5216                 if (no_final) {
5217                     DEBUG_STATE_pp("pop (no final)");
5218                 } else {
5219                     DEBUG_STATE_pp("pop (yes)");
5220                 }
5221             });
5222             depth--;
5223         }
5224 #else
5225         while (yes_state < SLAB_FIRST(PL_regmatch_slab)
5226             || yes_state > SLAB_LAST(PL_regmatch_slab))
5227         {
5228             /* not in this slab, pop slab */
5229             depth -= (st - SLAB_FIRST(PL_regmatch_slab) + 1);
5230             PL_regmatch_slab = PL_regmatch_slab->prev;
5231             st = SLAB_LAST(PL_regmatch_slab);
5232         }
5233         depth -= (st - yes_state);
5234 #endif
5235         st = yes_state;
5236         yes_state = st->u.yes.prev_yes_state;
5237         PL_regmatch_state = st;
5238
5239         if (no_final) {
5240             locinput= st->locinput;
5241             nextchr = UCHARAT(locinput);
5242         }
5243         state_num = st->resume_state + no_final;
5244         goto reenter_switch;
5245     }
5246
5247     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
5248                           PL_colors[4], PL_colors[5]));
5249
5250     if (PL_reg_eval_set) {
5251         /* each successfully executed (?{...}) block does the equivalent of
5252          *   local $^R = do {...}
5253          * When popping the save stack, all these locals would be undone;
5254          * bypass this by setting the outermost saved $^R to the latest
5255          * value */
5256         if (oreplsv != GvSV(PL_replgv))
5257             sv_setsv(oreplsv, GvSV(PL_replgv));
5258     }
5259     result = 1;
5260     goto final_exit;
5261
5262 no:
5263     DEBUG_EXECUTE_r(
5264         PerlIO_printf(Perl_debug_log,
5265             "%*s  %sfailed...%s\n",
5266             REPORT_CODE_OFF+depth*2, "",
5267             PL_colors[4], PL_colors[5])
5268         );
5269
5270 no_silent:
5271     if (no_final) {
5272         if (yes_state) {
5273             goto yes;
5274         } else {
5275             goto final_exit;
5276         }
5277     }
5278     if (depth) {
5279         /* there's a previous state to backtrack to */
5280         st--;
5281         if (st < SLAB_FIRST(PL_regmatch_slab)) {
5282             PL_regmatch_slab = PL_regmatch_slab->prev;
5283             st = SLAB_LAST(PL_regmatch_slab);
5284         }
5285         PL_regmatch_state = st;
5286         locinput= st->locinput;
5287         nextchr = UCHARAT(locinput);
5288
5289         DEBUG_STATE_pp("pop");
5290         depth--;
5291         if (yes_state == st)
5292             yes_state = st->u.yes.prev_yes_state;
5293
5294         state_num = st->resume_state + 1; /* failure = success + 1 */
5295         goto reenter_switch;
5296     }
5297     result = 0;
5298
5299   final_exit:
5300     if (rex->intflags & PREGf_VERBARG_SEEN) {
5301         SV *sv_err = get_sv("REGERROR", 1);
5302         SV *sv_mrk = get_sv("REGMARK", 1);
5303         if (result) {
5304             sv_commit = &PL_sv_no;
5305             if (!sv_yes_mark)
5306                 sv_yes_mark = &PL_sv_yes;
5307         } else {
5308             if (!sv_commit)
5309                 sv_commit = &PL_sv_yes;
5310             sv_yes_mark = &PL_sv_no;
5311         }
5312         sv_setsv(sv_err, sv_commit);
5313         sv_setsv(sv_mrk, sv_yes_mark);
5314     }
5315
5316     /* clean up; in particular, free all slabs above current one */
5317     LEAVE_SCOPE(oldsave);
5318
5319     return result;
5320 }
5321
5322 /*
5323  - regrepeat - repeatedly match something simple, report how many
5324  */
5325 /*
5326  * [This routine now assumes that it will only match on things of length 1.
5327  * That was true before, but now we assume scan - reginput is the count,
5328  * rather than incrementing count on every character.  [Er, except utf8.]]
5329  */
5330 STATIC I32
5331 S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
5332 {
5333     dVAR;
5334     register char *scan;
5335     register I32 c;
5336     register char *loceol = PL_regeol;
5337     register I32 hardcount = 0;
5338     register bool do_utf8 = PL_reg_match_utf8;
5339 #ifndef DEBUGGING
5340     PERL_UNUSED_ARG(depth);
5341 #endif
5342
5343     scan = PL_reginput;
5344     if (max == REG_INFTY)
5345         max = I32_MAX;
5346     else if (max < loceol - scan)
5347         loceol = scan + max;
5348     switch (OP(p)) {
5349     case REG_ANY:
5350         if (do_utf8) {
5351             loceol = PL_regeol;
5352             while (scan < loceol && hardcount < max && *scan != '\n') {
5353                 scan += UTF8SKIP(scan);
5354                 hardcount++;
5355             }
5356         } else {
5357             while (scan < loceol && *scan != '\n')
5358                 scan++;
5359         }
5360         break;
5361     case SANY:
5362         if (do_utf8) {
5363             loceol = PL_regeol;
5364             while (scan < loceol && hardcount < max) {
5365                 scan += UTF8SKIP(scan);
5366                 hardcount++;
5367             }
5368         }
5369         else
5370             scan = loceol;
5371         break;
5372     case CANY:
5373         scan = loceol;
5374         break;
5375     case EXACT:         /* length of string is 1 */
5376         c = (U8)*STRING(p);
5377         while (scan < loceol && UCHARAT(scan) == c)
5378             scan++;
5379         break;
5380     case EXACTF:        /* length of string is 1 */
5381         c = (U8)*STRING(p);
5382         while (scan < loceol &&
5383                (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
5384             scan++;
5385         break;
5386     case EXACTFL:       /* length of string is 1 */
5387         PL_reg_flags |= RF_tainted;
5388         c = (U8)*STRING(p);
5389         while (scan < loceol &&
5390                (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
5391             scan++;
5392         break;
5393     case ANYOF:
5394         if (do_utf8) {
5395             loceol = PL_regeol;
5396             while (hardcount < max && scan < loceol &&
5397                    reginclass(prog, p, (U8*)scan, 0, do_utf8)) {
5398                 scan += UTF8SKIP(scan);
5399                 hardcount++;
5400             }
5401         } else {
5402             while (scan < loceol && REGINCLASS(prog, p, (U8*)scan))
5403                 scan++;
5404         }
5405         break;
5406     case ALNUM:
5407         if (do_utf8) {
5408             loceol = PL_regeol;
5409             LOAD_UTF8_CHARCLASS_ALNUM();
5410             while (hardcount < max && scan < loceol &&
5411                    swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
5412                 scan += UTF8SKIP(scan);
5413                 hardcount++;
5414             }
5415         } else {
5416             while (scan < loceol && isALNUM(*scan))
5417                 scan++;
5418         }
5419         break;
5420     case ALNUML:
5421         PL_reg_flags |= RF_tainted;
5422         if (do_utf8) {
5423             loceol = PL_regeol;
5424             while (hardcount < max && scan < loceol &&
5425                    isALNUM_LC_utf8((U8*)scan)) {
5426                 scan += UTF8SKIP(scan);
5427                 hardcount++;
5428             }
5429         } else {
5430             while (scan < loceol && isALNUM_LC(*scan))
5431                 scan++;
5432         }
5433         break;
5434     case NALNUM:
5435         if (do_utf8) {
5436             loceol = PL_regeol;
5437             LOAD_UTF8_CHARCLASS_ALNUM();
5438             while (hardcount < max && scan < loceol &&
5439                    !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
5440                 scan += UTF8SKIP(scan);
5441                 hardcount++;
5442             }
5443         } else {
5444             while (scan < loceol && !isALNUM(*scan))
5445                 scan++;
5446         }
5447         break;
5448     case NALNUML:
5449         PL_reg_flags |= RF_tainted;
5450         if (do_utf8) {
5451             loceol = PL_regeol;
5452             while (hardcount < max && scan < loceol &&
5453                    !isALNUM_LC_utf8((U8*)scan)) {
5454                 scan += UTF8SKIP(scan);
5455                 hardcount++;
5456             }
5457         } else {
5458             while (scan < loceol && !isALNUM_LC(*scan))
5459                 scan++;
5460         }
5461         break;
5462     case SPACE:
5463         if (do_utf8) {
5464             loceol = PL_regeol;
5465             LOAD_UTF8_CHARCLASS_SPACE();
5466             while (hardcount < max && scan < loceol &&
5467                    (*scan == ' ' ||
5468                     swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
5469                 scan += UTF8SKIP(scan);
5470                 hardcount++;
5471             }
5472         } else {
5473             while (scan < loceol && isSPACE(*scan))
5474                 scan++;
5475         }
5476         break;
5477     case SPACEL:
5478         PL_reg_flags |= RF_tainted;
5479         if (do_utf8) {
5480             loceol = PL_regeol;
5481             while (hardcount < max && scan < loceol &&
5482                    (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
5483                 scan += UTF8SKIP(scan);
5484                 hardcount++;
5485             }
5486         } else {
5487             while (scan < loceol && isSPACE_LC(*scan))
5488                 scan++;
5489         }
5490         break;
5491     case NSPACE:
5492         if (do_utf8) {
5493             loceol = PL_regeol;
5494             LOAD_UTF8_CHARCLASS_SPACE();
5495             while (hardcount < max && scan < loceol &&
5496                    !(*scan == ' ' ||
5497                      swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
5498                 scan += UTF8SKIP(scan);
5499                 hardcount++;
5500             }
5501         } else {
5502             while (scan < loceol && !isSPACE(*scan))
5503                 scan++;
5504         }
5505         break;
5506     case NSPACEL:
5507         PL_reg_flags |= RF_tainted;
5508         if (do_utf8) {
5509             loceol = PL_regeol;
5510             while (hardcount < max && scan < loceol &&
5511                    !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
5512                 scan += UTF8SKIP(scan);
5513                 hardcount++;
5514             }
5515         } else {
5516             while (scan < loceol && !isSPACE_LC(*scan))
5517                 scan++;
5518         }
5519         break;
5520     case DIGIT:
5521         if (do_utf8) {
5522             loceol = PL_regeol;
5523             LOAD_UTF8_CHARCLASS_DIGIT();
5524             while (hardcount < max && scan < loceol &&
5525                    swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
5526                 scan += UTF8SKIP(scan);
5527                 hardcount++;
5528             }
5529         } else {
5530             while (scan < loceol && isDIGIT(*scan))
5531                 scan++;
5532         }
5533         break;
5534     case NDIGIT:
5535         if (do_utf8) {
5536             loceol = PL_regeol;
5537             LOAD_UTF8_CHARCLASS_DIGIT();
5538             while (hardcount < max && scan < loceol &&
5539                    !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
5540                 scan += UTF8SKIP(scan);
5541                 hardcount++;
5542             }
5543         } else {
5544             while (scan < loceol && !isDIGIT(*scan))
5545                 scan++;
5546         }
5547     case LNBREAK:
5548         if (do_utf8) {
5549             loceol = PL_regeol;
5550             while (hardcount < max && scan < loceol && (c=is_LNBREAK_utf8(scan))) {
5551                 scan += c;
5552                 hardcount++;
5553             }
5554         } else {
5555             /*
5556               LNBREAK can match two latin chars, which is ok,
5557               because we have a null terminated string, but we
5558               have to use hardcount in this situation
5559             */
5560             while (scan < loceol && (c=is_LNBREAK_latin1(scan)))  {
5561                 scan+=c;
5562                 hardcount++;
5563             }
5564         }
5565         break;
5566     case HORIZWS:
5567         if (do_utf8) {
5568             loceol = PL_regeol;
5569             while (hardcount < max && scan < loceol && (c=is_HORIZWS_utf8(scan))) {
5570                 scan += c;
5571                 hardcount++;
5572             }
5573         } else {
5574             while (scan < loceol && is_HORIZWS_latin1(scan))
5575                 scan++;
5576         }
5577         break;
5578     case NHORIZWS:
5579         if (do_utf8) {
5580             loceol = PL_regeol;
5581             while (hardcount < max && scan < loceol && !is_HORIZWS_utf8(scan)) {
5582                 scan += UTF8SKIP(scan);
5583                 hardcount++;
5584             }
5585         } else {
5586             while (scan < loceol && !is_HORIZWS_latin1(scan))
5587                 scan++;
5588
5589         }
5590         break;
5591     case VERTWS:
5592         if (do_utf8) {
5593             loceol = PL_regeol;
5594             while (hardcount < max && scan < loceol && (c=is_VERTWS_utf8(scan))) {
5595                 scan += c;
5596                 hardcount++;
5597             }
5598         } else {
5599             while (scan < loceol && is_VERTWS_latin1(scan))
5600                 scan++;
5601
5602         }
5603         break;
5604     case NVERTWS:
5605         if (do_utf8) {
5606             loceol = PL_regeol;
5607             while (hardcount < max && scan < loceol && !is_VERTWS_utf8(scan)) {
5608                 scan += UTF8SKIP(scan);
5609                 hardcount++;
5610             }
5611         } else {
5612             while (scan < loceol && !is_VERTWS_latin1(scan))
5613                 scan++;
5614
5615         }
5616         break;
5617
5618     default:            /* Called on something of 0 width. */
5619         break;          /* So match right here or not at all. */
5620     }
5621
5622     if (hardcount)
5623         c = hardcount;
5624     else
5625         c = scan - PL_reginput;
5626     PL_reginput = scan;
5627
5628     DEBUG_r({
5629         GET_RE_DEBUG_FLAGS_DECL;
5630         DEBUG_EXECUTE_r({
5631             SV * const prop = sv_newmortal();
5632             regprop(prog, prop, p);
5633             PerlIO_printf(Perl_debug_log,
5634                         "%*s  %s can match %"IVdf" times out of %"IVdf"...\n",
5635                         REPORT_CODE_OFF + depth*2, "", SvPVX_const(prop),(IV)c,(IV)max);
5636         });
5637     });
5638
5639     return(c);
5640 }
5641
5642
5643 #if !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION)
5644 /*
5645 - regclass_swash - prepare the utf8 swash
5646 */
5647
5648 SV *
5649 Perl_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bool doinit, SV** listsvp, SV **altsvp)
5650 {
5651     dVAR;
5652     SV *sw  = NULL;
5653     SV *si  = NULL;
5654     SV *alt = NULL;
5655     RXi_GET_DECL(prog,progi);
5656     const struct reg_data * const data = prog ? progi->data : NULL;
5657
5658     if (data && data->count) {
5659         const U32 n = ARG(node);
5660
5661         if (data->what[n] == 's') {
5662             SV * const rv = (SV*)data->data[n];
5663             AV * const av = (AV*)SvRV((SV*)rv);
5664             SV **const ary = AvARRAY(av);
5665             SV **a, **b;
5666
5667             /* See the end of regcomp.c:S_regclass() for
5668              * documentation of these array elements. */
5669
5670             si = *ary;
5671             a  = SvROK(ary[1]) ? &ary[1] : NULL;
5672             b  = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : NULL;
5673
5674             if (a)
5675                 sw = *a;
5676             else if (si && doinit) {
5677                 sw = swash_init("utf8", "", si, 1, 0);
5678                 (void)av_store(av, 1, sw);
5679             }
5680             if (b)
5681                 alt = *b;
5682         }
5683     }
5684
5685     if (listsvp)
5686         *listsvp = si;
5687     if (altsvp)
5688         *altsvp  = alt;
5689
5690     return sw;
5691 }
5692 #endif
5693
5694 /*
5695  - reginclass - determine if a character falls into a character class
5696
5697   The n is the ANYOF regnode, the p is the target string, lenp
5698   is pointer to the maximum length of how far to go in the p
5699   (if the lenp is zero, UTF8SKIP(p) is used),
5700   do_utf8 tells whether the target string is in UTF-8.
5701
5702  */
5703
5704 STATIC bool
5705 S_reginclass(pTHX_ const regexp *prog, register const regnode *n, register const U8* p, STRLEN* lenp, register bool do_utf8)
5706 {
5707     dVAR;
5708     const char flags = ANYOF_FLAGS(n);
5709     bool match = FALSE;
5710     UV c = *p;
5711     STRLEN len = 0;
5712     STRLEN plen;
5713
5714     if (do_utf8 && !UTF8_IS_INVARIANT(c)) {
5715         c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &len,
5716                 (UTF8_ALLOW_DEFAULT & UTF8_ALLOW_ANYUV) | UTF8_CHECK_ONLY);
5717                 /* see [perl #37836] for UTF8_ALLOW_ANYUV */
5718         if (len == (STRLEN)-1)
5719             Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
5720     }
5721
5722     plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
5723     if (do_utf8 || (flags & ANYOF_UNICODE)) {
5724         if (lenp)
5725             *lenp = 0;
5726         if (do_utf8 && !ANYOF_RUNTIME(n)) {
5727             if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
5728                 match = TRUE;
5729         }
5730         if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
5731             match = TRUE;
5732         if (!match) {
5733             AV *av;
5734             SV * const sw = regclass_swash(prog, n, TRUE, 0, (SV**)&av);
5735
5736             if (sw) {
5737                 if (swash_fetch(sw, p, do_utf8))
5738                     match = TRUE;
5739                 else if (flags & ANYOF_FOLD) {
5740                     if (!match && lenp && av) {
5741                         I32 i;
5742                         for (i = 0; i <= av_len(av); i++) {
5743                             SV* const sv = *av_fetch(av, i, FALSE);
5744                             STRLEN len;
5745                             const char * const s = SvPV_const(sv, len);
5746
5747                             if (len <= plen && memEQ(s, (char*)p, len)) {
5748                                 *lenp = len;
5749                                 match = TRUE;
5750                                 break;
5751                             }
5752                         }
5753                     }
5754                     if (!match) {
5755                         U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
5756                         STRLEN tmplen;
5757
5758                         to_utf8_fold(p, tmpbuf, &tmplen);
5759                         if (swash_fetch(sw, tmpbuf, do_utf8))
5760                             match = TRUE;
5761                     }
5762                 }
5763             }
5764         }
5765         if (match && lenp && *lenp == 0)
5766             *lenp = UNISKIP(NATIVE_TO_UNI(c));
5767     }
5768     if (!match && c < 256) {
5769         if (ANYOF_BITMAP_TEST(n, c))
5770             match = TRUE;
5771         else if (flags & ANYOF_FOLD) {
5772             U8 f;
5773
5774             if (flags & ANYOF_LOCALE) {
5775                 PL_reg_flags |= RF_tainted;
5776                 f = PL_fold_locale[c];
5777             }
5778             else
5779                 f = PL_fold[c];
5780             if (f != c && ANYOF_BITMAP_TEST(n, f))
5781                 match = TRUE;
5782         }
5783
5784         if (!match && (flags & ANYOF_CLASS)) {
5785             PL_reg_flags |= RF_tainted;
5786             if (
5787                 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM)   &&  isALNUM_LC(c))  ||
5788                 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM)  && !isALNUM_LC(c))  ||
5789                 (ANYOF_CLASS_TEST(n, ANYOF_SPACE)   &&  isSPACE_LC(c))  ||
5790                 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE)  && !isSPACE_LC(c))  ||
5791                 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT)   &&  isDIGIT_LC(c))  ||
5792                 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT)  && !isDIGIT_LC(c))  ||
5793                 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC)  &&  isALNUMC_LC(c)) ||
5794                 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
5795                 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA)   &&  isALPHA_LC(c))  ||
5796                 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA)  && !isALPHA_LC(c))  ||
5797                 (ANYOF_CLASS_TEST(n, ANYOF_ASCII)   &&  isASCII(c))     ||
5798                 (ANYOF_CLASS_TEST(n, ANYOF_NASCII)  && !isASCII(c))     ||
5799                 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL)   &&  isCNTRL_LC(c))  ||
5800                 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL)  && !isCNTRL_LC(c))  ||
5801                 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH)   &&  isGRAPH_LC(c))  ||
5802                 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH)  && !isGRAPH_LC(c))  ||
5803                 (ANYOF_CLASS_TEST(n, ANYOF_LOWER)   &&  isLOWER_LC(c))  ||
5804                 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER)  && !isLOWER_LC(c))  ||
5805                 (ANYOF_CLASS_TEST(n, ANYOF_PRINT)   &&  isPRINT_LC(c))  ||
5806                 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT)  && !isPRINT_LC(c))  ||
5807                 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT)   &&  isPUNCT_LC(c))  ||
5808                 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT)  && !isPUNCT_LC(c))  ||
5809                 (ANYOF_CLASS_TEST(n, ANYOF_UPPER)   &&  isUPPER_LC(c))  ||
5810                 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER)  && !isUPPER_LC(c))  ||
5811                 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT)  &&  isXDIGIT(c))    ||
5812                 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c))    ||
5813                 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC)  &&  isPSXSPC(c))    ||
5814                 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c))    ||
5815                 (ANYOF_CLASS_TEST(n, ANYOF_BLANK)   &&  isBLANK(c))     ||
5816                 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK)  && !isBLANK(c))
5817                 ) /* How's that for a conditional? */
5818             {
5819                 match = TRUE;
5820             }
5821         }
5822     }
5823
5824     return (flags & ANYOF_INVERT) ? !match : match;
5825 }
5826
5827 STATIC U8 *
5828 S_reghop3(U8 *s, I32 off, const U8* lim)
5829 {
5830     dVAR;
5831     if (off >= 0) {
5832         while (off-- && s < lim) {
5833             /* XXX could check well-formedness here */
5834             s += UTF8SKIP(s);
5835         }
5836     }
5837     else {
5838         while (off++ && s > lim) {
5839             s--;
5840             if (UTF8_IS_CONTINUED(*s)) {
5841                 while (s > lim && UTF8_IS_CONTINUATION(*s))
5842                     s--;
5843             }
5844             /* XXX could check well-formedness here */
5845         }
5846     }
5847     return s;
5848 }
5849
5850 #ifdef XXX_dmq
5851 /* there are a bunch of places where we use two reghop3's that should
5852    be replaced with this routine. but since thats not done yet
5853    we ifdef it out - dmq
5854 */
5855 STATIC U8 *
5856 S_reghop4(U8 *s, I32 off, const U8* llim, const U8* rlim)
5857 {
5858     dVAR;
5859     if (off >= 0) {
5860         while (off-- && s < rlim) {
5861             /* XXX could check well-formedness here */
5862             s += UTF8SKIP(s);
5863         }
5864     }
5865     else {
5866         while (off++ && s > llim) {
5867             s--;
5868             if (UTF8_IS_CONTINUED(*s)) {
5869                 while (s > llim && UTF8_IS_CONTINUATION(*s))
5870                     s--;
5871             }
5872             /* XXX could check well-formedness here */
5873         }
5874     }
5875     return s;
5876 }
5877 #endif
5878
5879 STATIC U8 *
5880 S_reghopmaybe3(U8* s, I32 off, const U8* lim)
5881 {
5882     dVAR;
5883     if (off >= 0) {
5884         while (off-- && s < lim) {
5885             /* XXX could check well-formedness here */
5886             s += UTF8SKIP(s);
5887         }
5888         if (off >= 0)
5889             return NULL;
5890     }
5891     else {
5892         while (off++ && s > lim) {
5893             s--;
5894             if (UTF8_IS_CONTINUED(*s)) {
5895                 while (s > lim && UTF8_IS_CONTINUATION(*s))
5896                     s--;
5897             }
5898             /* XXX could check well-formedness here */
5899         }
5900         if (off <= 0)
5901             return NULL;
5902     }
5903     return s;
5904 }
5905
5906 static void
5907 restore_pos(pTHX_ void *arg)
5908 {
5909     dVAR;
5910     regexp * const rex = (regexp *)arg;
5911     if (PL_reg_eval_set) {
5912         if (PL_reg_oldsaved) {
5913             rex->subbeg = PL_reg_oldsaved;
5914             rex->sublen = PL_reg_oldsavedlen;
5915 #ifdef PERL_OLD_COPY_ON_WRITE
5916             rex->saved_copy = PL_nrs;
5917 #endif
5918             RXp_MATCH_COPIED_on(rex);
5919         }
5920         PL_reg_magic->mg_len = PL_reg_oldpos;
5921         PL_reg_eval_set = 0;
5922         PL_curpm = PL_reg_oldcurpm;
5923     }
5924 }
5925
5926 STATIC void
5927 S_to_utf8_substr(pTHX_ register regexp *prog)
5928 {
5929     int i = 1;
5930     do {
5931         if (prog->substrs->data[i].substr
5932             && !prog->substrs->data[i].utf8_substr) {
5933             SV* const sv = newSVsv(prog->substrs->data[i].substr);
5934             prog->substrs->data[i].utf8_substr = sv;
5935             sv_utf8_upgrade(sv);
5936             if (SvVALID(prog->substrs->data[i].substr)) {
5937                 const U8 flags = BmFLAGS(prog->substrs->data[i].substr);
5938                 if (flags & FBMcf_TAIL) {
5939                     /* Trim the trailing \n that fbm_compile added last
5940                        time.  */
5941                     SvCUR_set(sv, SvCUR(sv) - 1);
5942                     /* Whilst this makes the SV technically "invalid" (as its
5943                        buffer is no longer followed by "\0") when fbm_compile()
5944                        adds the "\n" back, a "\0" is restored.  */
5945                 }
5946                 fbm_compile(sv, flags);
5947             }
5948             if (prog->substrs->data[i].substr == prog->check_substr)
5949                 prog->check_utf8 = sv;
5950         }
5951     } while (i--);
5952 }
5953
5954 STATIC void
5955 S_to_byte_substr(pTHX_ register regexp *prog)
5956 {
5957     dVAR;
5958     int i = 1;
5959     do {
5960         if (prog->substrs->data[i].utf8_substr
5961             && !prog->substrs->data[i].substr) {
5962             SV* sv = newSVsv(prog->substrs->data[i].utf8_substr);
5963             if (sv_utf8_downgrade(sv, TRUE)) {
5964                 if (SvVALID(prog->substrs->data[i].utf8_substr)) {
5965                     const U8 flags
5966                         = BmFLAGS(prog->substrs->data[i].utf8_substr);
5967                     if (flags & FBMcf_TAIL) {
5968                         /* Trim the trailing \n that fbm_compile added last
5969                            time.  */
5970                         SvCUR_set(sv, SvCUR(sv) - 1);
5971                     }
5972                     fbm_compile(sv, flags);
5973                 }
5974             } else {
5975                 SvREFCNT_dec(sv);
5976                 sv = &PL_sv_undef;
5977             }
5978             prog->substrs->data[i].substr = sv;
5979             if (prog->substrs->data[i].utf8_substr == prog->check_utf8)
5980                 prog->check_substr = sv;
5981         }
5982     } while (i--);
5983 }
5984
5985 /*
5986  * Local variables:
5987  * c-indentation-style: bsd
5988  * c-basic-offset: 4
5989  * indent-tabs-mode: t
5990  * End:
5991  *
5992  * ex: set ts=8 sts=4 sw=4 noet:
5993  */