Re: [perl #34493] h2ph `extern inline' problems
[p5sagit/p5-mst-13.2.git] / regexec.c
1 /*    regexec.c
2  */
3
4 /*
5  * "One Ring to rule them all, One Ring to find them..."
6  */
7
8 /* This file contains functions for executing a regular expression.  See
9  * also regcomp.c which funnily enough, contains functions for compiling
10  * a regular expression.
11  *
12  * This file is also copied at build time to ext/re/re_exec.c, where
13  * it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
14  * This causes the main functions to be compiled under new names and with
15  * debugging support added, which makes "use re 'debug'" work.
16  
17  */
18
19 /* NOTE: this is derived from Henry Spencer's regexp code, and should not
20  * confused with the original package (see point 3 below).  Thanks, Henry!
21  */
22
23 /* Additional note: this code is very heavily munged from Henry's version
24  * in places.  In some spots I've traded clarity for efficiency, so don't
25  * blame Henry for some of the lack of readability.
26  */
27
28 /* The names of the functions have been changed from regcomp and
29  * regexec to  pregcomp and pregexec in order to avoid conflicts
30  * with the POSIX routines of the same names.
31 */
32
33 #ifdef PERL_EXT_RE_BUILD
34 /* need to replace pregcomp et al, so enable that */
35 #  ifndef PERL_IN_XSUB_RE
36 #    define PERL_IN_XSUB_RE
37 #  endif
38 /* need access to debugger hooks */
39 #  if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
40 #    define DEBUGGING
41 #  endif
42 #endif
43
44 #ifdef PERL_IN_XSUB_RE
45 /* We *really* need to overwrite these symbols: */
46 #  define Perl_regexec_flags my_regexec
47 #  define Perl_regdump my_regdump
48 #  define Perl_regprop my_regprop
49 #  define Perl_re_intuit_start my_re_intuit_start
50 /* *These* symbols are masked to allow static link. */
51 #  define Perl_pregexec my_pregexec
52 #  define Perl_reginitcolors my_reginitcolors
53 #  define Perl_regclass_swash my_regclass_swash
54
55 #  define PERL_NO_GET_CONTEXT
56 #endif
57
58 /*SUPPRESS 112*/
59 /*
60  * pregcomp and pregexec -- regsub and regerror are not used in perl
61  *
62  *      Copyright (c) 1986 by University of Toronto.
63  *      Written by Henry Spencer.  Not derived from licensed software.
64  *
65  *      Permission is granted to anyone to use this software for any
66  *      purpose on any computer system, and to redistribute it freely,
67  *      subject to the following restrictions:
68  *
69  *      1. The author is not responsible for the consequences of use of
70  *              this software, no matter how awful, even if they arise
71  *              from defects in it.
72  *
73  *      2. The origin of this software must not be misrepresented, either
74  *              by explicit claim or by omission.
75  *
76  *      3. Altered versions must be plainly marked as such, and must not
77  *              be misrepresented as being the original software.
78  *
79  ****    Alterations to Henry's code are...
80  ****
81  ****    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
82  ****    2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
83  ****
84  ****    You may distribute under the terms of either the GNU General Public
85  ****    License or the Artistic License, as specified in the README file.
86  *
87  * Beware that some of this code is subtly aware of the way operator
88  * precedence is structured in regular expressions.  Serious changes in
89  * regular-expression syntax might require a total rethink.
90  */
91 #include "EXTERN.h"
92 #define PERL_IN_REGEXEC_C
93 #include "perl.h"
94
95 #include "regcomp.h"
96
97 #define RF_tainted      1               /* tainted information used? */
98 #define RF_warned       2               /* warned about big count? */
99 #define RF_evaled       4               /* Did an EVAL with setting? */
100 #define RF_utf8         8               /* String contains multibyte chars? */
101 #define RF_false        16              /* odd number of nested negatives */
102
103 #define UTF ((PL_reg_flags & RF_utf8) != 0)
104
105 #define RS_init         1               /* eval environment created */
106 #define RS_set          2               /* replsv value is set */
107
108 #ifndef STATIC
109 #define STATIC  static
110 #endif
111
112 #define REGINCLASS(p,c)  (ANYOF_FLAGS(p) ? reginclass(p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
113
114 /*
115  * Forwards.
116  */
117
118 #define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
119 #define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
120
121 #define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
122 #define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
123 #define HOP(pos,off) (PL_reg_match_utf8 ? reghop((U8*)pos, off) : (U8*)(pos + off))
124 #define HOPMAYBE(pos,off) (PL_reg_match_utf8 ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
125 #define HOPc(pos,off) ((char*)HOP(pos,off))
126 #define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
127
128 #define HOPBACK(pos, off) (             \
129     (PL_reg_match_utf8)                 \
130         ? reghopmaybe((U8*)pos, -off)   \
131     : (pos - off >= PL_bostr)           \
132         ? (U8*)(pos - off)              \
133     : (U8*)NULL                         \
134 )
135 #define HOPBACKc(pos, off) (char*)HOPBACK(pos, off)
136
137 #define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
138 #define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
139 #define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
140 #define HOPMAYBE3(pos,off,lim) (PL_reg_match_utf8 ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
141 #define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
142 #define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
143
144 #define LOAD_UTF8_CHARCLASS(a,b) STMT_START { if (!CAT2(PL_utf8_,a)) { ENTER; save_re_context(); (void)CAT2(is_utf8_, a)((const U8*)b); LEAVE; } } STMT_END
145
146 /* for use after a quantifier and before an EXACT-like node -- japhy */
147 #define JUMPABLE(rn) ( \
148     OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
149     OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
150     OP(rn) == PLUS || OP(rn) == MINMOD || \
151     (PL_regkind[(U8)OP(rn)] == CURLY && ARG1(rn) > 0) \
152 )
153
154 #define HAS_TEXT(rn) ( \
155     PL_regkind[(U8)OP(rn)] == EXACT || PL_regkind[(U8)OP(rn)] == REF \
156 )
157
158 /*
159   Search for mandatory following text node; for lookahead, the text must
160   follow but for lookbehind (rn->flags != 0) we skip to the next step.
161 */
162 #define FIND_NEXT_IMPT(rn) STMT_START { \
163     while (JUMPABLE(rn)) \
164         if (OP(rn) == SUSPEND || PL_regkind[(U8)OP(rn)] == CURLY) \
165             rn = NEXTOPER(NEXTOPER(rn)); \
166         else if (OP(rn) == PLUS) \
167             rn = NEXTOPER(rn); \
168         else if (OP(rn) == IFMATCH) \
169             rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
170         else rn += NEXT_OFF(rn); \
171 } STMT_END 
172
173 static void restore_pos(pTHX_ void *arg);
174
175 STATIC CHECKPOINT
176 S_regcppush(pTHX_ I32 parenfloor)
177 {
178     int retval = PL_savestack_ix;
179 #define REGCP_PAREN_ELEMS 4
180     int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
181     int p;
182
183     if (paren_elems_to_push < 0)
184         Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
185
186 #define REGCP_OTHER_ELEMS 6
187     SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
188     for (p = PL_regsize; p > parenfloor; p--) {
189 /* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
190         SSPUSHINT(PL_regendp[p]);
191         SSPUSHINT(PL_regstartp[p]);
192         SSPUSHPTR(PL_reg_start_tmp[p]);
193         SSPUSHINT(p);
194     }
195 /* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
196     SSPUSHINT(PL_regsize);
197     SSPUSHINT(*PL_reglastparen);
198     SSPUSHINT(*PL_reglastcloseparen);
199     SSPUSHPTR(PL_reginput);
200 #define REGCP_FRAME_ELEMS 2
201 /* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
202  * are needed for the regexp context stack bookkeeping. */
203     SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
204     SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
205
206     return retval;
207 }
208
209 /* These are needed since we do not localize EVAL nodes: */
210 #  define REGCP_SET(cp)  DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,          \
211                              "  Setting an EVAL scope, savestack=%"IVdf"\n",    \
212                              (IV)PL_savestack_ix)); cp = PL_savestack_ix
213
214 #  define REGCP_UNWIND(cp)  DEBUG_EXECUTE_r(cp != PL_savestack_ix ?             \
215                                 PerlIO_printf(Perl_debug_log,           \
216                                 "  Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
217                                 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
218
219 STATIC char *
220 S_regcppop(pTHX)
221 {
222     I32 i;
223     U32 paren = 0;
224     char *input;
225     I32 tmps;
226
227     GET_RE_DEBUG_FLAGS_DECL;
228
229     /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
230     i = SSPOPINT;
231     assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
232     i = SSPOPINT; /* Parentheses elements to pop. */
233     input = (char *) SSPOPPTR;
234     *PL_reglastcloseparen = SSPOPINT;
235     *PL_reglastparen = SSPOPINT;
236     PL_regsize = SSPOPINT;
237
238     /* Now restore the parentheses context. */
239     for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
240          i > 0; i -= REGCP_PAREN_ELEMS) {
241         paren = (U32)SSPOPINT;
242         PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
243         PL_regstartp[paren] = SSPOPINT;
244         tmps = SSPOPINT;
245         if (paren <= *PL_reglastparen)
246             PL_regendp[paren] = tmps;
247         DEBUG_EXECUTE_r(
248             PerlIO_printf(Perl_debug_log,
249                           "     restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
250                           (UV)paren, (IV)PL_regstartp[paren],
251                           (IV)(PL_reg_start_tmp[paren] - PL_bostr),
252                           (IV)PL_regendp[paren],
253                           (paren > *PL_reglastparen ? "(no)" : ""));
254         );
255     }
256     DEBUG_EXECUTE_r(
257         if ((I32)(*PL_reglastparen + 1) <= PL_regnpar) {
258             PerlIO_printf(Perl_debug_log,
259                           "     restoring \\%"IVdf"..\\%"IVdf" to undef\n",
260                           (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
261         }
262     );
263 #if 1
264     /* It would seem that the similar code in regtry()
265      * already takes care of this, and in fact it is in
266      * a better location to since this code can #if 0-ed out
267      * but the code in regtry() is needed or otherwise tests
268      * requiring null fields (pat.t#187 and split.t#{13,14}
269      * (as of patchlevel 7877)  will fail.  Then again,
270      * this code seems to be necessary or otherwise
271      * building DynaLoader will fail:
272      * "Error: '*' not in typemap in DynaLoader.xs, line 164"
273      * --jhi */
274     for (paren = *PL_reglastparen + 1; (I32)paren <= PL_regnpar; paren++) {
275         if ((I32)paren > PL_regsize)
276             PL_regstartp[paren] = -1;
277         PL_regendp[paren] = -1;
278     }
279 #endif
280     return input;
281 }
282
283 STATIC char *
284 S_regcp_set_to(pTHX_ I32 ss)
285 {
286     I32 tmp = PL_savestack_ix;
287
288     PL_savestack_ix = ss;
289     regcppop();
290     PL_savestack_ix = tmp;
291     return Nullch;
292 }
293
294 typedef struct re_cc_state
295 {
296     I32 ss;
297     regnode *node;
298     struct re_cc_state *prev;
299     CURCUR *cc;
300     regexp *re;
301 } re_cc_state;
302
303 #define regcpblow(cp) LEAVE_SCOPE(cp)   /* Ignores regcppush()ed data. */
304
305 #define TRYPAREN(paren, n, input) {                             \
306     if (paren) {                                                \
307         if (n) {                                                \
308             PL_regstartp[paren] = HOPc(input, -1) - PL_bostr;   \
309             PL_regendp[paren] = input - PL_bostr;               \
310         }                                                       \
311         else                                                    \
312             PL_regendp[paren] = -1;                             \
313     }                                                           \
314     if (regmatch(next))                                         \
315         sayYES;                                                 \
316     if (paren && n)                                             \
317         PL_regendp[paren] = -1;                                 \
318 }
319
320
321 /*
322  * pregexec and friends
323  */
324
325 /*
326  - pregexec - match a regexp against a string
327  */
328 I32
329 Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
330          char *strbeg, I32 minend, SV *screamer, U32 nosave)
331 /* strend: pointer to null at end of string */
332 /* strbeg: real beginning of string */
333 /* minend: end of match must be >=minend after stringarg. */
334 /* nosave: For optimizations. */
335 {
336     return
337         regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
338                       nosave ? 0 : REXEC_COPY_STR);
339 }
340
341 STATIC void
342 S_cache_re(pTHX_ regexp *prog)
343 {
344     PL_regprecomp = prog->precomp;              /* Needed for FAIL. */
345 #ifdef DEBUGGING
346     PL_regprogram = prog->program;
347 #endif
348     PL_regnpar = prog->nparens;
349     PL_regdata = prog->data;
350     PL_reg_re = prog;
351 }
352
353 /*
354  * Need to implement the following flags for reg_anch:
355  *
356  * USE_INTUIT_NOML              - Useful to call re_intuit_start() first
357  * USE_INTUIT_ML
358  * INTUIT_AUTORITATIVE_NOML     - Can trust a positive answer
359  * INTUIT_AUTORITATIVE_ML
360  * INTUIT_ONCE_NOML             - Intuit can match in one location only.
361  * INTUIT_ONCE_ML
362  *
363  * Another flag for this function: SECOND_TIME (so that float substrs
364  * with giant delta may be not rechecked).
365  */
366
367 /* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
368
369 /* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
370    Otherwise, only SvCUR(sv) is used to get strbeg. */
371
372 /* XXXX We assume that strpos is strbeg unless sv. */
373
374 /* XXXX Some places assume that there is a fixed substring.
375         An update may be needed if optimizer marks as "INTUITable"
376         RExen without fixed substrings.  Similarly, it is assumed that
377         lengths of all the strings are no more than minlen, thus they
378         cannot come from lookahead.
379         (Or minlen should take into account lookahead.) */
380
381 /* A failure to find a constant substring means that there is no need to make
382    an expensive call to REx engine, thus we celebrate a failure.  Similarly,
383    finding a substring too deep into the string means that less calls to
384    regtry() should be needed.
385
386    REx compiler's optimizer found 4 possible hints:
387         a) Anchored substring;
388         b) Fixed substring;
389         c) Whether we are anchored (beginning-of-line or \G);
390         d) First node (of those at offset 0) which may distingush positions;
391    We use a)b)d) and multiline-part of c), and try to find a position in the
392    string which does not contradict any of them.
393  */
394
395 /* Most of decisions we do here should have been done at compile time.
396    The nodes of the REx which we used for the search should have been
397    deleted from the finite automaton. */
398
399 char *
400 Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
401                      char *strend, U32 flags, re_scream_pos_data *data)
402 {
403     register I32 start_shift = 0;
404     /* Should be nonnegative! */
405     register I32 end_shift   = 0;
406     register char *s;
407     register SV *check;
408     char *strbeg;
409     char *t;
410     int do_utf8 = sv ? SvUTF8(sv) : 0;  /* if no sv we have to assume bytes */
411     I32 ml_anch;
412     register char *other_last = Nullch; /* other substr checked before this */
413     char *check_at = Nullch;            /* check substr found at this pos */
414     I32 multiline = prog->reganch & PMf_MULTILINE;
415 #ifdef DEBUGGING
416     char *i_strpos = strpos;
417     SV *dsv = PERL_DEBUG_PAD_ZERO(0);
418 #endif
419
420     GET_RE_DEBUG_FLAGS_DECL;
421
422     RX_MATCH_UTF8_set(prog,do_utf8);
423
424     if (prog->reganch & ROPT_UTF8) {
425         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
426                               "UTF-8 regex...\n"));
427         PL_reg_flags |= RF_utf8;
428     }
429
430     DEBUG_EXECUTE_r({
431          char *s   = PL_reg_match_utf8 ?
432                          sv_uni_display(dsv, sv, 60, UNI_DISPLAY_REGEX) :
433                          strpos;
434          int   len = PL_reg_match_utf8 ?
435                          strlen(s) : strend - strpos;
436          if (!PL_colorset)
437               reginitcolors();
438          if (PL_reg_match_utf8)
439              DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
440                                    "UTF-8 target...\n"));
441          PerlIO_printf(Perl_debug_log,
442                        "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
443                        PL_colors[4],PL_colors[5],PL_colors[0],
444                        prog->precomp,
445                        PL_colors[1],
446                        (strlen(prog->precomp) > 60 ? "..." : ""),
447                        PL_colors[0],
448                        (int)(len > 60 ? 60 : len),
449                        s, PL_colors[1],
450                        (len > 60 ? "..." : "")
451               );
452     });
453
454     /* CHR_DIST() would be more correct here but it makes things slow. */
455     if (prog->minlen > strend - strpos) {
456         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
457                               "String too short... [re_intuit_start]\n"));
458         goto fail;
459     }
460     strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
461     PL_regeol = strend;
462     if (do_utf8) {
463         if (!prog->check_utf8 && prog->check_substr)
464             to_utf8_substr(prog);
465         check = prog->check_utf8;
466     } else {
467         if (!prog->check_substr && prog->check_utf8)
468             to_byte_substr(prog);
469         check = prog->check_substr;
470     }
471    if (check == &PL_sv_undef) {
472         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
473                 "Non-utf string cannot match utf check string\n"));
474         goto fail;
475     }
476     if (prog->reganch & ROPT_ANCH) {    /* Match at beg-of-str or after \n */
477         ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
478                      || ( (prog->reganch & ROPT_ANCH_BOL)
479                           && !multiline ) );    /* Check after \n? */
480
481         if (!ml_anch) {
482           if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
483                                   | ROPT_IMPLICIT)) /* not a real BOL */
484                /* SvCUR is not set on references: SvRV and SvPVX overlap */
485                && sv && !SvROK(sv)
486                && (strpos != strbeg)) {
487               DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
488               goto fail;
489           }
490           if (prog->check_offset_min == prog->check_offset_max &&
491               !(prog->reganch & ROPT_CANY_SEEN)) {
492             /* Substring at constant offset from beg-of-str... */
493             I32 slen;
494
495             s = HOP3c(strpos, prog->check_offset_min, strend);
496             if (SvTAIL(check)) {
497                 slen = SvCUR(check);    /* >= 1 */
498
499                 if ( strend - s > slen || strend - s < slen - 1
500                      || (strend - s == slen && strend[-1] != '\n')) {
501                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
502                     goto fail_finish;
503                 }
504                 /* Now should match s[0..slen-2] */
505                 slen--;
506                 if (slen && (*SvPVX(check) != *s
507                              || (slen > 1
508                                  && memNE(SvPVX(check), s, slen)))) {
509                   report_neq:
510                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
511                     goto fail_finish;
512                 }
513             }
514             else if (*SvPVX(check) != *s
515                      || ((slen = SvCUR(check)) > 1
516                          && memNE(SvPVX(check), s, slen)))
517                 goto report_neq;
518             goto success_at_start;
519           }
520         }
521         /* Match is anchored, but substr is not anchored wrt beg-of-str. */
522         s = strpos;
523         start_shift = prog->check_offset_min; /* okay to underestimate on CC */
524         end_shift = prog->minlen - start_shift -
525             CHR_SVLEN(check) + (SvTAIL(check) != 0);
526         if (!ml_anch) {
527             I32 end = prog->check_offset_max + CHR_SVLEN(check)
528                                          - (SvTAIL(check) != 0);
529             I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
530
531             if (end_shift < eshift)
532                 end_shift = eshift;
533         }
534     }
535     else {                              /* Can match at random position */
536         ml_anch = 0;
537         s = strpos;
538         start_shift = prog->check_offset_min; /* okay to underestimate on CC */
539         /* Should be nonnegative! */
540         end_shift = prog->minlen - start_shift -
541             CHR_SVLEN(check) + (SvTAIL(check) != 0);
542     }
543
544 #ifdef DEBUGGING        /* 7/99: reports of failure (with the older version) */
545     if (end_shift < 0)
546         Perl_croak(aTHX_ "panic: end_shift");
547 #endif
548
549   restart:
550     /* Find a possible match in the region s..strend by looking for
551        the "check" substring in the region corrected by start/end_shift. */
552     if (flags & REXEC_SCREAM) {
553         I32 p = -1;                     /* Internal iterator of scream. */
554         I32 *pp = data ? data->scream_pos : &p;
555
556         if (PL_screamfirst[BmRARE(check)] >= 0
557             || ( BmRARE(check) == '\n'
558                  && (BmPREVIOUS(check) == SvCUR(check) - 1)
559                  && SvTAIL(check) ))
560             s = screaminstr(sv, check,
561                             start_shift + (s - strbeg), end_shift, pp, 0);
562         else
563             goto fail_finish;
564         /* we may be pointing at the wrong string */
565         if (s && RX_MATCH_COPIED(prog))
566             s = strbeg + (s - SvPVX(sv));
567         if (data)
568             *data->scream_olds = s;
569     }
570     else if (prog->reganch & ROPT_CANY_SEEN)
571         s = fbm_instr((U8*)(s + start_shift),
572                       (U8*)(strend - end_shift),
573                       check, multiline ? FBMrf_MULTILINE : 0);
574     else
575         s = fbm_instr(HOP3(s, start_shift, strend),
576                       HOP3(strend, -end_shift, strbeg),
577                       check, multiline ? FBMrf_MULTILINE : 0);
578
579     /* Update the count-of-usability, remove useless subpatterns,
580         unshift s.  */
581
582     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
583                           (s ? "Found" : "Did not find"),
584                           (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) ? "anchored" : "floating"),
585                           PL_colors[0],
586                           (int)(SvCUR(check) - (SvTAIL(check)!=0)),
587                           SvPVX(check),
588                           PL_colors[1], (SvTAIL(check) ? "$" : ""),
589                           (s ? " at offset " : "...\n") ) );
590
591     if (!s)
592         goto fail_finish;
593
594     check_at = s;
595
596     /* Finish the diagnostic message */
597     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
598
599     /* Got a candidate.  Check MBOL anchoring, and the *other* substr.
600        Start with the other substr.
601        XXXX no SCREAM optimization yet - and a very coarse implementation
602        XXXX /ttx+/ results in anchored=`ttx', floating=`x'.  floating will
603                 *always* match.  Probably should be marked during compile...
604        Probably it is right to do no SCREAM here...
605      */
606
607     if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8) : (prog->float_substr && prog->anchored_substr)) {
608         /* Take into account the "other" substring. */
609         /* XXXX May be hopelessly wrong for UTF... */
610         if (!other_last)
611             other_last = strpos;
612         if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
613           do_other_anchored:
614             {
615                 char *last = HOP3c(s, -start_shift, strbeg), *last1, *last2;
616                 char *s1 = s;
617                 SV* must;
618
619                 t = s - prog->check_offset_max;
620                 if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
621                     && (!do_utf8
622                         || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
623                             && t > strpos)))
624                     /* EMPTY */;
625                 else
626                     t = strpos;
627                 t = HOP3c(t, prog->anchored_offset, strend);
628                 if (t < other_last)     /* These positions already checked */
629                     t = other_last;
630                 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
631                 if (last < last1)
632                     last1 = last;
633  /* XXXX It is not documented what units *_offsets are in.  Assume bytes.  */
634                 /* On end-of-str: see comment below. */
635                 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
636                 if (must == &PL_sv_undef) {
637                     s = (char*)NULL;
638                     DEBUG_EXECUTE_r(must = prog->anchored_utf8);        /* for debug */
639                 }
640                 else
641                     s = fbm_instr(
642                         (unsigned char*)t,
643                         HOP3(HOP3(last1, prog->anchored_offset, strend)
644                                 + SvCUR(must), -(SvTAIL(must)!=0), strbeg),
645                         must,
646                         multiline ? FBMrf_MULTILINE : 0
647                     );
648                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
649                         "%s anchored substr `%s%.*s%s'%s",
650                         (s ? "Found" : "Contradicts"),
651                         PL_colors[0],
652                           (int)(SvCUR(must)
653                           - (SvTAIL(must)!=0)),
654                           SvPVX(must),
655                           PL_colors[1], (SvTAIL(must) ? "$" : "")));
656                 if (!s) {
657                     if (last1 >= last2) {
658                         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
659                                                 ", giving up...\n"));
660                         goto fail_finish;
661                     }
662                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
663                         ", trying floating at offset %ld...\n",
664                         (long)(HOP3c(s1, 1, strend) - i_strpos)));
665                     other_last = HOP3c(last1, prog->anchored_offset+1, strend);
666                     s = HOP3c(last, 1, strend);
667                     goto restart;
668                 }
669                 else {
670                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
671                           (long)(s - i_strpos)));
672                     t = HOP3c(s, -prog->anchored_offset, strbeg);
673                     other_last = HOP3c(s, 1, strend);
674                     s = s1;
675                     if (t == strpos)
676                         goto try_at_start;
677                     goto try_at_offset;
678                 }
679             }
680         }
681         else {          /* Take into account the floating substring. */
682             char *last, *last1;
683             char *s1 = s;
684             SV* must;
685
686             t = HOP3c(s, -start_shift, strbeg);
687             last1 = last =
688                 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
689             if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
690                 last = HOP3c(t, prog->float_max_offset, strend);
691             s = HOP3c(t, prog->float_min_offset, strend);
692             if (s < other_last)
693                 s = other_last;
694  /* XXXX It is not documented what units *_offsets are in.  Assume bytes.  */
695             must = do_utf8 ? prog->float_utf8 : prog->float_substr;
696             /* fbm_instr() takes into account exact value of end-of-str
697                if the check is SvTAIL(ed).  Since false positives are OK,
698                and end-of-str is not later than strend we are OK. */
699             if (must == &PL_sv_undef) {
700                 s = (char*)NULL;
701                 DEBUG_EXECUTE_r(must = prog->float_utf8);       /* for debug message */
702             }
703             else
704                 s = fbm_instr((unsigned char*)s,
705                               (unsigned char*)last + SvCUR(must)
706                                   - (SvTAIL(must)!=0),
707                               must, multiline ? FBMrf_MULTILINE : 0);
708             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
709                     (s ? "Found" : "Contradicts"),
710                     PL_colors[0],
711                       (int)(SvCUR(must) - (SvTAIL(must)!=0)),
712                       SvPVX(must),
713                       PL_colors[1], (SvTAIL(must) ? "$" : "")));
714             if (!s) {
715                 if (last1 == last) {
716                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
717                                             ", giving up...\n"));
718                     goto fail_finish;
719                 }
720                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
721                     ", trying anchored starting at offset %ld...\n",
722                     (long)(s1 + 1 - i_strpos)));
723                 other_last = last;
724                 s = HOP3c(t, 1, strend);
725                 goto restart;
726             }
727             else {
728                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
729                       (long)(s - i_strpos)));
730                 other_last = s; /* Fix this later. --Hugo */
731                 s = s1;
732                 if (t == strpos)
733                     goto try_at_start;
734                 goto try_at_offset;
735             }
736         }
737     }
738
739     t = s - prog->check_offset_max;
740     if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
741         && (!do_utf8
742             || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
743                  && t > strpos))) {
744         /* Fixed substring is found far enough so that the match
745            cannot start at strpos. */
746       try_at_offset:
747         if (ml_anch && t[-1] != '\n') {
748             /* Eventually fbm_*() should handle this, but often
749                anchored_offset is not 0, so this check will not be wasted. */
750             /* XXXX In the code below we prefer to look for "^" even in
751                presence of anchored substrings.  And we search even
752                beyond the found float position.  These pessimizations
753                are historical artefacts only.  */
754           find_anchor:
755             while (t < strend - prog->minlen) {
756                 if (*t == '\n') {
757                     if (t < check_at - prog->check_offset_min) {
758                         if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
759                             /* Since we moved from the found position,
760                                we definitely contradict the found anchored
761                                substr.  Due to the above check we do not
762                                contradict "check" substr.
763                                Thus we can arrive here only if check substr
764                                is float.  Redo checking for "other"=="fixed".
765                              */
766                             strpos = t + 1;                     
767                             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
768                                 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
769                             goto do_other_anchored;
770                         }
771                         /* We don't contradict the found floating substring. */
772                         /* XXXX Why not check for STCLASS? */
773                         s = t + 1;
774                         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
775                             PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
776                         goto set_useful;
777                     }
778                     /* Position contradicts check-string */
779                     /* XXXX probably better to look for check-string
780                        than for "\n", so one should lower the limit for t? */
781                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
782                         PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
783                     other_last = strpos = s = t + 1;
784                     goto restart;
785                 }
786                 t++;
787             }
788             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
789                         PL_colors[0],PL_colors[1]));
790             goto fail_finish;
791         }
792         else {
793             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
794                         PL_colors[0],PL_colors[1]));
795         }
796         s = t;
797       set_useful:
798         ++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr);    /* hooray/5 */
799     }
800     else {
801         /* The found string does not prohibit matching at strpos,
802            - no optimization of calling REx engine can be performed,
803            unless it was an MBOL and we are not after MBOL,
804            or a future STCLASS check will fail this. */
805       try_at_start:
806         /* Even in this situation we may use MBOL flag if strpos is offset
807            wrt the start of the string. */
808         if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
809             && (strpos != strbeg) && strpos[-1] != '\n'
810             /* May be due to an implicit anchor of m{.*foo}  */
811             && !(prog->reganch & ROPT_IMPLICIT))
812         {
813             t = strpos;
814             goto find_anchor;
815         }
816         DEBUG_EXECUTE_r( if (ml_anch)
817             PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
818                         (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
819         );
820       success_at_start:
821         if (!(prog->reganch & ROPT_NAUGHTY)     /* XXXX If strpos moved? */
822             && (do_utf8 ? (
823                 prog->check_utf8                /* Could be deleted already */
824                 && --BmUSEFUL(prog->check_utf8) < 0
825                 && (prog->check_utf8 == prog->float_utf8)
826             ) : (
827                 prog->check_substr              /* Could be deleted already */
828                 && --BmUSEFUL(prog->check_substr) < 0
829                 && (prog->check_substr == prog->float_substr)
830             )))
831         {
832             /* If flags & SOMETHING - do not do it many times on the same match */
833             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
834             SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
835             if (do_utf8 ? prog->check_substr : prog->check_utf8)
836                 SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
837             prog->check_substr = prog->check_utf8 = Nullsv;     /* disable */
838             prog->float_substr = prog->float_utf8 = Nullsv;     /* clear */
839             check = Nullsv;                     /* abort */
840             s = strpos;
841             /* XXXX This is a remnant of the old implementation.  It
842                     looks wasteful, since now INTUIT can use many
843                     other heuristics. */
844             prog->reganch &= ~RE_USE_INTUIT;
845         }
846         else
847             s = strpos;
848     }
849
850     /* Last resort... */
851     /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
852     if (prog->regstclass) {
853         /* minlen == 0 is possible if regstclass is \b or \B,
854            and the fixed substr is ''$.
855            Since minlen is already taken into account, s+1 is before strend;
856            accidentally, minlen >= 1 guaranties no false positives at s + 1
857            even for \b or \B.  But (minlen? 1 : 0) below assumes that
858            regstclass does not come from lookahead...  */
859         /* If regstclass takes bytelength more than 1: If charlength==1, OK.
860            This leaves EXACTF only, which is dealt with in find_byclass().  */
861         U8* str = (U8*)STRING(prog->regstclass);
862         int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
863                     ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
864                     : 1);
865         char *endpos = (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
866                 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
867                 : (prog->float_substr || prog->float_utf8
868                    ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
869                            cl_l, strend)
870                    : strend);
871         char *startpos = strbeg;
872
873         t = s;
874         cache_re(prog);
875         s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
876         if (!s) {
877 #ifdef DEBUGGING
878             char *what = 0;
879 #endif
880             if (endpos == strend) {
881                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
882                                 "Could not match STCLASS...\n") );
883                 goto fail;
884             }
885             DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
886                                    "This position contradicts STCLASS...\n") );
887             if ((prog->reganch & ROPT_ANCH) && !ml_anch)
888                 goto fail;
889             /* Contradict one of substrings */
890             if (prog->anchored_substr || prog->anchored_utf8) {
891                 if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
892                     DEBUG_EXECUTE_r( what = "anchored" );
893                   hop_and_restart:
894                     s = HOP3c(t, 1, strend);
895                     if (s + start_shift + end_shift > strend) {
896                         /* XXXX Should be taken into account earlier? */
897                         DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
898                                                "Could not match STCLASS...\n") );
899                         goto fail;
900                     }
901                     if (!check)
902                         goto giveup;
903                     DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
904                                 "Looking for %s substr starting at offset %ld...\n",
905                                  what, (long)(s + start_shift - i_strpos)) );
906                     goto restart;
907                 }
908                 /* Have both, check_string is floating */
909                 if (t + start_shift >= check_at) /* Contradicts floating=check */
910                     goto retry_floating_check;
911                 /* Recheck anchored substring, but not floating... */
912                 s = check_at;
913                 if (!check)
914                     goto giveup;
915                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
916                           "Looking for anchored substr starting at offset %ld...\n",
917                           (long)(other_last - i_strpos)) );
918                 goto do_other_anchored;
919             }
920             /* Another way we could have checked stclass at the
921                current position only: */
922             if (ml_anch) {
923                 s = t = t + 1;
924                 if (!check)
925                     goto giveup;
926                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
927                           "Looking for /%s^%s/m starting at offset %ld...\n",
928                           PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
929                 goto try_at_offset;
930             }
931             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))     /* Could have been deleted */
932                 goto fail;
933             /* Check is floating subtring. */
934           retry_floating_check:
935             t = check_at - start_shift;
936             DEBUG_EXECUTE_r( what = "floating" );
937             goto hop_and_restart;
938         }
939         if (t != s) {
940             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
941                         "By STCLASS: moving %ld --> %ld\n",
942                                   (long)(t - i_strpos), (long)(s - i_strpos))
943                    );
944         }
945         else {
946             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
947                                   "Does not contradict STCLASS...\n"); 
948                    );
949         }
950     }
951   giveup:
952     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
953                           PL_colors[4], (check ? "Guessed" : "Giving up"),
954                           PL_colors[5], (long)(s - i_strpos)) );
955     return s;
956
957   fail_finish:                          /* Substring not found */
958     if (prog->check_substr || prog->check_utf8)         /* could be removed already */
959         BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
960   fail:
961     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
962                           PL_colors[4],PL_colors[5]));
963     return Nullch;
964 }
965
966 /* We know what class REx starts with.  Try to find this position... */
967 STATIC char *
968 S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
969 {
970         I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
971         char *m;
972         STRLEN ln;
973         STRLEN lnc;
974         register STRLEN uskip;
975         unsigned int c1;
976         unsigned int c2;
977         char *e;
978         register I32 tmp = 1;   /* Scratch variable? */
979         register bool do_utf8 = PL_reg_match_utf8;
980
981         /* We know what class it must start with. */
982         switch (OP(c)) {
983         case ANYOF:
984             if (do_utf8) {
985                  while (s + (uskip = UTF8SKIP(s)) <= strend) {
986                       if ((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
987                           !UTF8_IS_INVARIANT((U8)s[0]) ?
988                           reginclass(c, (U8*)s, 0, do_utf8) :
989                           REGINCLASS(c, (U8*)s)) {
990                            if (tmp && (norun || regtry(prog, s)))
991                                 goto got_it;
992                            else
993                                 tmp = doevery;
994                       }
995                       else 
996                            tmp = 1;
997                       s += uskip;
998                  }
999             }
1000             else {
1001                  while (s < strend) {
1002                       STRLEN skip = 1;
1003
1004                       if (REGINCLASS(c, (U8*)s) ||
1005                           (ANYOF_FOLD_SHARP_S(c, s, strend) &&
1006                            /* The assignment of 2 is intentional:
1007                             * for the folded sharp s, the skip is 2. */
1008                            (skip = SHARP_S_SKIP))) {
1009                            if (tmp && (norun || regtry(prog, s)))
1010                                 goto got_it;
1011                            else
1012                                 tmp = doevery;
1013                       }
1014                       else 
1015                            tmp = 1;
1016                       s += skip;
1017                  }
1018             }
1019             break;
1020         case CANY:
1021             while (s < strend) {
1022                 if (tmp && (norun || regtry(prog, s)))
1023                     goto got_it;
1024                 else
1025                     tmp = doevery;
1026                 s++;
1027             }
1028             break;
1029         case EXACTF:
1030             m   = STRING(c);
1031             ln  = STR_LEN(c);   /* length to match in octets/bytes */
1032             lnc = (I32) ln;     /* length to match in characters */
1033             if (UTF) {
1034                 STRLEN ulen1, ulen2;
1035                 U8 *sm = (U8 *) m;
1036                 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
1037                 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
1038
1039                 to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
1040                 to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
1041
1042                 c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXBYTES_CASE, 
1043                                     0, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1044                 c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXBYTES_CASE,
1045                                     0, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1046                 lnc = 0;
1047                 while (sm < ((U8 *) m + ln)) {
1048                     lnc++;
1049                     sm += UTF8SKIP(sm);
1050                 }
1051             }
1052             else {
1053                 c1 = *(U8*)m;
1054                 c2 = PL_fold[c1];
1055             }
1056             goto do_exactf;
1057         case EXACTFL:
1058             m   = STRING(c);
1059             ln  = STR_LEN(c);
1060             lnc = (I32) ln;
1061             c1 = *(U8*)m;
1062             c2 = PL_fold_locale[c1];
1063           do_exactf:
1064             e = HOP3c(strend, -((I32)lnc), s);
1065
1066             if (norun && e < s)
1067                 e = s;                  /* Due to minlen logic of intuit() */
1068
1069             /* The idea in the EXACTF* cases is to first find the
1070              * first character of the EXACTF* node and then, if
1071              * necessary, case-insensitively compare the full
1072              * text of the node.  The c1 and c2 are the first
1073              * characters (though in Unicode it gets a bit
1074              * more complicated because there are more cases
1075              * than just upper and lower: one needs to use
1076              * the so-called folding case for case-insensitive
1077              * matching (called "loose matching" in Unicode).
1078              * ibcmp_utf8() will do just that. */
1079
1080             if (do_utf8) {
1081                 UV c, f;
1082                 U8 tmpbuf [UTF8_MAXBYTES+1];
1083                 U8 foldbuf[UTF8_MAXBYTES_CASE+1];
1084                 STRLEN len, foldlen;
1085                 
1086                 if (c1 == c2) {
1087                     /* Upper and lower of 1st char are equal -
1088                      * probably not a "letter". */
1089                     while (s <= e) {
1090                         c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1091                                            ckWARN(WARN_UTF8) ?
1092                                            0 : UTF8_ALLOW_ANY);
1093                         if ( c == c1
1094                              && (ln == len ||
1095                                  ibcmp_utf8(s, (char **)0, 0,  do_utf8,
1096                                             m, (char **)0, ln, (bool)UTF))
1097                              && (norun || regtry(prog, s)) )
1098                             goto got_it;
1099                         else {
1100                              uvchr_to_utf8(tmpbuf, c);
1101                              f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
1102                              if ( f != c
1103                                   && (f == c1 || f == c2)
1104                                   && (ln == foldlen ||
1105                                       !ibcmp_utf8((char *) foldbuf,
1106                                                   (char **)0, foldlen, do_utf8,
1107                                                   m,
1108                                                   (char **)0, ln, (bool)UTF))
1109                                   && (norun || regtry(prog, s)) )
1110                                   goto got_it;
1111                         }
1112                         s += len;
1113                     }
1114                 }
1115                 else {
1116                     while (s <= e) {
1117                       c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1118                                            ckWARN(WARN_UTF8) ?
1119                                            0 : UTF8_ALLOW_ANY);
1120
1121                         /* Handle some of the three Greek sigmas cases.
1122                          * Note that not all the possible combinations
1123                          * are handled here: some of them are handled
1124                          * by the standard folding rules, and some of
1125                          * them (the character class or ANYOF cases)
1126                          * are handled during compiletime in
1127                          * regexec.c:S_regclass(). */
1128                         if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1129                             c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1130                             c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
1131
1132                         if ( (c == c1 || c == c2)
1133                              && (ln == len ||
1134                                  ibcmp_utf8(s, (char **)0, 0,  do_utf8,
1135                                             m, (char **)0, ln, (bool)UTF))
1136                              && (norun || regtry(prog, s)) )
1137                             goto got_it;
1138                         else {
1139                              uvchr_to_utf8(tmpbuf, c);
1140                              f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
1141                              if ( f != c
1142                                   && (f == c1 || f == c2)
1143                                   && (ln == foldlen ||
1144                                       !ibcmp_utf8((char *) foldbuf,
1145                                                   (char **)0, foldlen, do_utf8,
1146                                                   m,
1147                                                   (char **)0, ln, (bool)UTF))
1148                                   && (norun || regtry(prog, s)) )
1149                                   goto got_it;
1150                         }
1151                         s += len;
1152                     }
1153                 }
1154             }
1155             else {
1156                 if (c1 == c2)
1157                     while (s <= e) {
1158                         if ( *(U8*)s == c1
1159                              && (ln == 1 || !(OP(c) == EXACTF
1160                                               ? ibcmp(s, m, ln)
1161                                               : ibcmp_locale(s, m, ln)))
1162                              && (norun || regtry(prog, s)) )
1163                             goto got_it;
1164                         s++;
1165                     }
1166                 else
1167                     while (s <= e) {
1168                         if ( (*(U8*)s == c1 || *(U8*)s == c2)
1169                              && (ln == 1 || !(OP(c) == EXACTF
1170                                               ? ibcmp(s, m, ln)
1171                                               : ibcmp_locale(s, m, ln)))
1172                              && (norun || regtry(prog, s)) )
1173                             goto got_it;
1174                         s++;
1175                     }
1176             }
1177             break;
1178         case BOUNDL:
1179             PL_reg_flags |= RF_tainted;
1180             /* FALL THROUGH */
1181         case BOUND:
1182             if (do_utf8) {
1183                 if (s == PL_bostr)
1184                     tmp = '\n';
1185                 else {
1186                     U8 *r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1187                 
1188                     tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
1189                 }
1190                 tmp = ((OP(c) == BOUND ?
1191                         isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1192                 LOAD_UTF8_CHARCLASS(alnum,"a");
1193                 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1194                     if (tmp == !(OP(c) == BOUND ?
1195                                  swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1196                                  isALNUM_LC_utf8((U8*)s)))
1197                     {
1198                         tmp = !tmp;
1199                         if ((norun || regtry(prog, s)))
1200                             goto got_it;
1201                     }
1202                     s += uskip;
1203                 }
1204             }
1205             else {
1206                 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1207                 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1208                 while (s < strend) {
1209                     if (tmp ==
1210                         !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1211                         tmp = !tmp;
1212                         if ((norun || regtry(prog, s)))
1213                             goto got_it;
1214                     }
1215                     s++;
1216                 }
1217             }
1218             if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
1219                 goto got_it;
1220             break;
1221         case NBOUNDL:
1222             PL_reg_flags |= RF_tainted;
1223             /* FALL THROUGH */
1224         case NBOUND:
1225             if (do_utf8) {
1226                 if (s == PL_bostr)
1227                     tmp = '\n';
1228                 else {
1229                     U8 *r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1230                 
1231                     tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
1232                 }
1233                 tmp = ((OP(c) == NBOUND ?
1234                         isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1235                 LOAD_UTF8_CHARCLASS(alnum,"a");
1236                 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1237                     if (tmp == !(OP(c) == NBOUND ?
1238                                  swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1239                                  isALNUM_LC_utf8((U8*)s)))
1240                         tmp = !tmp;
1241                     else if ((norun || regtry(prog, s)))
1242                         goto got_it;
1243                     s += uskip;
1244                 }
1245             }
1246             else {
1247                 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1248                 tmp = ((OP(c) == NBOUND ?
1249                         isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1250                 while (s < strend) {
1251                     if (tmp ==
1252                         !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1253                         tmp = !tmp;
1254                     else if ((norun || regtry(prog, s)))
1255                         goto got_it;
1256                     s++;
1257                 }
1258             }
1259             if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
1260                 goto got_it;
1261             break;
1262         case ALNUM:
1263             if (do_utf8) {
1264                 LOAD_UTF8_CHARCLASS(alnum,"a");
1265                 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1266                     if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
1267                         if (tmp && (norun || regtry(prog, s)))
1268                             goto got_it;
1269                         else
1270                             tmp = doevery;
1271                     }
1272                     else
1273                         tmp = 1;
1274                     s += uskip;
1275                 }
1276             }
1277             else {
1278                 while (s < strend) {
1279                     if (isALNUM(*s)) {
1280                         if (tmp && (norun || regtry(prog, s)))
1281                             goto got_it;
1282                         else
1283                             tmp = doevery;
1284                     }
1285                     else
1286                         tmp = 1;
1287                     s++;
1288                 }
1289             }
1290             break;
1291         case ALNUML:
1292             PL_reg_flags |= RF_tainted;
1293             if (do_utf8) {
1294                 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1295                     if (isALNUM_LC_utf8((U8*)s)) {
1296                         if (tmp && (norun || regtry(prog, s)))
1297                             goto got_it;
1298                         else
1299                             tmp = doevery;
1300                     }
1301                     else
1302                         tmp = 1;
1303                     s += uskip;
1304                 }
1305             }
1306             else {
1307                 while (s < strend) {
1308                     if (isALNUM_LC(*s)) {
1309                         if (tmp && (norun || regtry(prog, s)))
1310                             goto got_it;
1311                         else
1312                             tmp = doevery;
1313                     }
1314                     else
1315                         tmp = 1;
1316                     s++;
1317                 }
1318             }
1319             break;
1320         case NALNUM:
1321             if (do_utf8) {
1322                 LOAD_UTF8_CHARCLASS(alnum,"a");
1323                 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1324                     if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
1325                         if (tmp && (norun || regtry(prog, s)))
1326                             goto got_it;
1327                         else
1328                             tmp = doevery;
1329                     }
1330                     else
1331                         tmp = 1;
1332                     s += uskip;
1333                 }
1334             }
1335             else {
1336                 while (s < strend) {
1337                     if (!isALNUM(*s)) {
1338                         if (tmp && (norun || regtry(prog, s)))
1339                             goto got_it;
1340                         else
1341                             tmp = doevery;
1342                     }
1343                     else
1344                         tmp = 1;
1345                     s++;
1346                 }
1347             }
1348             break;
1349         case NALNUML:
1350             PL_reg_flags |= RF_tainted;
1351             if (do_utf8) {
1352                 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1353                     if (!isALNUM_LC_utf8((U8*)s)) {
1354                         if (tmp && (norun || regtry(prog, s)))
1355                             goto got_it;
1356                         else
1357                             tmp = doevery;
1358                     }
1359                     else
1360                         tmp = 1;
1361                     s += uskip;
1362                 }
1363             }
1364             else {
1365                 while (s < strend) {
1366                     if (!isALNUM_LC(*s)) {
1367                         if (tmp && (norun || regtry(prog, s)))
1368                             goto got_it;
1369                         else
1370                             tmp = doevery;
1371                     }
1372                     else
1373                         tmp = 1;
1374                     s++;
1375                 }
1376             }
1377             break;
1378         case SPACE:
1379             if (do_utf8) {
1380                 LOAD_UTF8_CHARCLASS(space," ");
1381                 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1382                     if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
1383                         if (tmp && (norun || regtry(prog, s)))
1384                             goto got_it;
1385                         else
1386                             tmp = doevery;
1387                     }
1388                     else
1389                         tmp = 1;
1390                     s += uskip;
1391                 }
1392             }
1393             else {
1394                 while (s < strend) {
1395                     if (isSPACE(*s)) {
1396                         if (tmp && (norun || regtry(prog, s)))
1397                             goto got_it;
1398                         else
1399                             tmp = doevery;
1400                     }
1401                     else
1402                         tmp = 1;
1403                     s++;
1404                 }
1405             }
1406             break;
1407         case SPACEL:
1408             PL_reg_flags |= RF_tainted;
1409             if (do_utf8) {
1410                 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1411                     if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1412                         if (tmp && (norun || regtry(prog, s)))
1413                             goto got_it;
1414                         else
1415                             tmp = doevery;
1416                     }
1417                     else
1418                         tmp = 1;
1419                     s += uskip;
1420                 }
1421             }
1422             else {
1423                 while (s < strend) {
1424                     if (isSPACE_LC(*s)) {
1425                         if (tmp && (norun || regtry(prog, s)))
1426                             goto got_it;
1427                         else
1428                             tmp = doevery;
1429                     }
1430                     else
1431                         tmp = 1;
1432                     s++;
1433                 }
1434             }
1435             break;
1436         case NSPACE:
1437             if (do_utf8) {
1438                 LOAD_UTF8_CHARCLASS(space," ");
1439                 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1440                     if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
1441                         if (tmp && (norun || regtry(prog, s)))
1442                             goto got_it;
1443                         else
1444                             tmp = doevery;
1445                     }
1446                     else
1447                         tmp = 1;
1448                     s += uskip;
1449                 }
1450             }
1451             else {
1452                 while (s < strend) {
1453                     if (!isSPACE(*s)) {
1454                         if (tmp && (norun || regtry(prog, s)))
1455                             goto got_it;
1456                         else
1457                             tmp = doevery;
1458                     }
1459                     else
1460                         tmp = 1;
1461                     s++;
1462                 }
1463             }
1464             break;
1465         case NSPACEL:
1466             PL_reg_flags |= RF_tainted;
1467             if (do_utf8) {
1468                 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1469                     if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1470                         if (tmp && (norun || regtry(prog, s)))
1471                             goto got_it;
1472                         else
1473                             tmp = doevery;
1474                     }
1475                     else
1476                         tmp = 1;
1477                     s += uskip;
1478                 }
1479             }
1480             else {
1481                 while (s < strend) {
1482                     if (!isSPACE_LC(*s)) {
1483                         if (tmp && (norun || regtry(prog, s)))
1484                             goto got_it;
1485                         else
1486                             tmp = doevery;
1487                     }
1488                     else
1489                         tmp = 1;
1490                     s++;
1491                 }
1492             }
1493             break;
1494         case DIGIT:
1495             if (do_utf8) {
1496                 LOAD_UTF8_CHARCLASS(digit,"0");
1497                 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1498                     if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
1499                         if (tmp && (norun || regtry(prog, s)))
1500                             goto got_it;
1501                         else
1502                             tmp = doevery;
1503                     }
1504                     else
1505                         tmp = 1;
1506                     s += uskip;
1507                 }
1508             }
1509             else {
1510                 while (s < strend) {
1511                     if (isDIGIT(*s)) {
1512                         if (tmp && (norun || regtry(prog, s)))
1513                             goto got_it;
1514                         else
1515                             tmp = doevery;
1516                     }
1517                     else
1518                         tmp = 1;
1519                     s++;
1520                 }
1521             }
1522             break;
1523         case DIGITL:
1524             PL_reg_flags |= RF_tainted;
1525             if (do_utf8) {
1526                 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1527                     if (isDIGIT_LC_utf8((U8*)s)) {
1528                         if (tmp && (norun || regtry(prog, s)))
1529                             goto got_it;
1530                         else
1531                             tmp = doevery;
1532                     }
1533                     else
1534                         tmp = 1;
1535                     s += uskip;
1536                 }
1537             }
1538             else {
1539                 while (s < strend) {
1540                     if (isDIGIT_LC(*s)) {
1541                         if (tmp && (norun || regtry(prog, s)))
1542                             goto got_it;
1543                         else
1544                             tmp = doevery;
1545                     }
1546                     else
1547                         tmp = 1;
1548                     s++;
1549                 }
1550             }
1551             break;
1552         case NDIGIT:
1553             if (do_utf8) {
1554                 LOAD_UTF8_CHARCLASS(digit,"0");
1555                 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1556                     if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
1557                         if (tmp && (norun || regtry(prog, s)))
1558                             goto got_it;
1559                         else
1560                             tmp = doevery;
1561                     }
1562                     else
1563                         tmp = 1;
1564                     s += uskip;
1565                 }
1566             }
1567             else {
1568                 while (s < strend) {
1569                     if (!isDIGIT(*s)) {
1570                         if (tmp && (norun || regtry(prog, s)))
1571                             goto got_it;
1572                         else
1573                             tmp = doevery;
1574                     }
1575                     else
1576                         tmp = 1;
1577                     s++;
1578                 }
1579             }
1580             break;
1581         case NDIGITL:
1582             PL_reg_flags |= RF_tainted;
1583             if (do_utf8) {
1584                 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1585                     if (!isDIGIT_LC_utf8((U8*)s)) {
1586                         if (tmp && (norun || regtry(prog, s)))
1587                             goto got_it;
1588                         else
1589                             tmp = doevery;
1590                     }
1591                     else
1592                         tmp = 1;
1593                     s += uskip;
1594                 }
1595             }
1596             else {
1597                 while (s < strend) {
1598                     if (!isDIGIT_LC(*s)) {
1599                         if (tmp && (norun || regtry(prog, s)))
1600                             goto got_it;
1601                         else
1602                             tmp = doevery;
1603                     }
1604                     else
1605                         tmp = 1;
1606                     s++;
1607                 }
1608             }
1609             break;
1610         default:
1611             Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1612             break;
1613         }
1614         return 0;
1615       got_it:
1616         return s;
1617 }
1618
1619 /*
1620  - regexec_flags - match a regexp against a string
1621  */
1622 I32
1623 Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1624               char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1625 /* strend: pointer to null at end of string */
1626 /* strbeg: real beginning of string */
1627 /* minend: end of match must be >=minend after stringarg. */
1628 /* data: May be used for some additional optimizations. */
1629 /* nosave: For optimizations. */
1630 {
1631     register char *s;
1632     register regnode *c;
1633     register char *startpos = stringarg;
1634     I32 minlen;         /* must match at least this many chars */
1635     I32 dontbother = 0; /* how many characters not to try at end */
1636     /* I32 start_shift = 0; */          /* Offset of the start to find
1637                                          constant substr. */            /* CC */
1638     I32 end_shift = 0;                  /* Same for the end. */         /* CC */
1639     I32 scream_pos = -1;                /* Internal iterator of scream. */
1640     char *scream_olds;
1641     SV* oreplsv = GvSV(PL_replgv);
1642     bool do_utf8 = DO_UTF8(sv);
1643     I32 multiline = prog->reganch & PMf_MULTILINE;
1644 #ifdef DEBUGGING
1645     SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
1646     SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
1647 #endif
1648
1649     GET_RE_DEBUG_FLAGS_DECL;
1650
1651     RX_MATCH_UTF8_set(prog,do_utf8);
1652
1653     PL_regcc = 0;
1654
1655     cache_re(prog);
1656 #ifdef DEBUGGING
1657     PL_regnarrate = DEBUG_r_TEST;
1658 #endif
1659
1660     /* Be paranoid... */
1661     if (prog == NULL || startpos == NULL) {
1662         Perl_croak(aTHX_ "NULL regexp parameter");
1663         return 0;
1664     }
1665
1666     minlen = prog->minlen;
1667     if (strend - startpos < minlen) {
1668         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
1669                               "String too short [regexec_flags]...\n"));
1670         goto phooey;
1671     }
1672
1673     /* Check validity of program. */
1674     if (UCHARAT(prog->program) != REG_MAGIC) {
1675         Perl_croak(aTHX_ "corrupted regexp program");
1676     }
1677
1678     PL_reg_flags = 0;
1679     PL_reg_eval_set = 0;
1680     PL_reg_maxiter = 0;
1681
1682     if (prog->reganch & ROPT_UTF8)
1683         PL_reg_flags |= RF_utf8;
1684
1685     /* Mark beginning of line for ^ and lookbehind. */
1686     PL_regbol = startpos;
1687     PL_bostr  = strbeg;
1688     PL_reg_sv = sv;
1689
1690     /* Mark end of line for $ (and such) */
1691     PL_regeol = strend;
1692
1693     /* see how far we have to get to not match where we matched before */
1694     PL_regtill = startpos+minend;
1695
1696     /* We start without call_cc context.  */
1697     PL_reg_call_cc = 0;
1698
1699     /* If there is a "must appear" string, look for it. */
1700     s = startpos;
1701
1702     if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1703         MAGIC *mg;
1704
1705         if (flags & REXEC_IGNOREPOS)    /* Means: check only at start */
1706             PL_reg_ganch = startpos;
1707         else if (sv && SvTYPE(sv) >= SVt_PVMG
1708                   && SvMAGIC(sv)
1709                   && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1710                   && mg->mg_len >= 0) {
1711             PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1712             if (prog->reganch & ROPT_ANCH_GPOS) {
1713                 if (s > PL_reg_ganch)
1714                     goto phooey;
1715                 s = PL_reg_ganch;
1716             }
1717         }
1718         else                            /* pos() not defined */
1719             PL_reg_ganch = strbeg;
1720     }
1721
1722     if (!(flags & REXEC_CHECKED) && (prog->check_substr != Nullsv || prog->check_utf8 != Nullsv)) {
1723         re_scream_pos_data d;
1724
1725         d.scream_olds = &scream_olds;
1726         d.scream_pos = &scream_pos;
1727         s = re_intuit_start(prog, sv, s, strend, flags, &d);
1728         if (!s) {
1729             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
1730             goto phooey;        /* not present */
1731         }
1732     }
1733
1734     DEBUG_EXECUTE_r({
1735          char *s0   = UTF ?
1736            pv_uni_display(dsv0, (U8*)prog->precomp, prog->prelen, 60,
1737                           UNI_DISPLAY_REGEX) :
1738            prog->precomp;
1739          int   len0 = UTF ? SvCUR(dsv0) : prog->prelen;
1740          char *s1   = do_utf8 ? sv_uni_display(dsv1, sv, 60,
1741                                                UNI_DISPLAY_REGEX) : startpos;
1742          int   len1 = do_utf8 ? SvCUR(dsv1) : strend - startpos;
1743          if (!PL_colorset)
1744              reginitcolors();
1745          PerlIO_printf(Perl_debug_log,
1746                        "%sMatching REx%s `%s%*.*s%s%s' against `%s%.*s%s%s'\n",
1747                        PL_colors[4],PL_colors[5],PL_colors[0],
1748                        len0, len0, s0,
1749                        PL_colors[1],
1750                        len0 > 60 ? "..." : "",
1751                        PL_colors[0],
1752                        (int)(len1 > 60 ? 60 : len1),
1753                        s1, PL_colors[1],
1754                        (len1 > 60 ? "..." : "")
1755               );
1756     });
1757
1758     /* Simplest case:  anchored match need be tried only once. */
1759     /*  [unless only anchor is BOL and multiline is set] */
1760     if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1761         if (s == startpos && regtry(prog, startpos))
1762             goto got_it;
1763         else if (multiline || (prog->reganch & ROPT_IMPLICIT)
1764                  || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1765         {
1766             char *end;
1767
1768             if (minlen)
1769                 dontbother = minlen - 1;
1770             end = HOP3c(strend, -dontbother, strbeg) - 1;
1771             /* for multiline we only have to try after newlines */
1772             if (prog->check_substr || prog->check_utf8) {
1773                 if (s == startpos)
1774                     goto after_try;
1775                 while (1) {
1776                     if (regtry(prog, s))
1777                         goto got_it;
1778                   after_try:
1779                     if (s >= end)
1780                         goto phooey;
1781                     if (prog->reganch & RE_USE_INTUIT) {
1782                         s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1783                         if (!s)
1784                             goto phooey;
1785                     }
1786                     else
1787                         s++;
1788                 }               
1789             } else {
1790                 if (s > startpos)
1791                     s--;
1792                 while (s < end) {
1793                     if (*s++ == '\n') { /* don't need PL_utf8skip here */
1794                         if (regtry(prog, s))
1795                             goto got_it;
1796                     }
1797                 }               
1798             }
1799         }
1800         goto phooey;
1801     } else if (prog->reganch & ROPT_ANCH_GPOS) {
1802         if (regtry(prog, PL_reg_ganch))
1803             goto got_it;
1804         goto phooey;
1805     }
1806
1807     /* Messy cases:  unanchored match. */
1808     if ((prog->anchored_substr || prog->anchored_utf8) && prog->reganch & ROPT_SKIP) {
1809         /* we have /x+whatever/ */
1810         /* it must be a one character string (XXXX Except UTF?) */
1811         char ch;
1812 #ifdef DEBUGGING
1813         int did_match = 0;
1814 #endif
1815         if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1816             do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1817         ch = SvPVX(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
1818
1819         if (do_utf8) {
1820             while (s < strend) {
1821                 if (*s == ch) {
1822                     DEBUG_EXECUTE_r( did_match = 1 );
1823                     if (regtry(prog, s)) goto got_it;
1824                     s += UTF8SKIP(s);
1825                     while (s < strend && *s == ch)
1826                         s += UTF8SKIP(s);
1827                 }
1828                 s += UTF8SKIP(s);
1829             }
1830         }
1831         else {
1832             while (s < strend) {
1833                 if (*s == ch) {
1834                     DEBUG_EXECUTE_r( did_match = 1 );
1835                     if (regtry(prog, s)) goto got_it;
1836                     s++;
1837                     while (s < strend && *s == ch)
1838                         s++;
1839                 }
1840                 s++;
1841             }
1842         }
1843         DEBUG_EXECUTE_r(if (!did_match)
1844                 PerlIO_printf(Perl_debug_log,
1845                                   "Did not find anchored character...\n")
1846                );
1847     }
1848     /*SUPPRESS 560*/
1849     else if (prog->anchored_substr != Nullsv
1850               || prog->anchored_utf8 != Nullsv
1851               || ((prog->float_substr != Nullsv || prog->float_utf8 != Nullsv)
1852                   && prog->float_max_offset < strend - s)) {
1853         SV *must;
1854         I32 back_max;
1855         I32 back_min;
1856         char *last;
1857         char *last1;            /* Last position checked before */
1858 #ifdef DEBUGGING
1859         int did_match = 0;
1860 #endif
1861         if (prog->anchored_substr || prog->anchored_utf8) {
1862             if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1863                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1864             must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
1865             back_max = back_min = prog->anchored_offset;
1866         } else {
1867             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1868                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1869             must = do_utf8 ? prog->float_utf8 : prog->float_substr;
1870             back_max = prog->float_max_offset;
1871             back_min = prog->float_min_offset;
1872         }
1873         if (must == &PL_sv_undef)
1874             /* could not downgrade utf8 check substring, so must fail */
1875             goto phooey;
1876
1877         last = HOP3c(strend,    /* Cannot start after this */
1878                           -(I32)(CHR_SVLEN(must)
1879                                  - (SvTAIL(must) != 0) + back_min), strbeg);
1880
1881         if (s > PL_bostr)
1882             last1 = HOPc(s, -1);
1883         else
1884             last1 = s - 1;      /* bogus */
1885
1886         /* XXXX check_substr already used to find `s', can optimize if
1887            check_substr==must. */
1888         scream_pos = -1;
1889         dontbother = end_shift;
1890         strend = HOPc(strend, -dontbother);
1891         while ( (s <= last) &&
1892                 ((flags & REXEC_SCREAM)
1893                  ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
1894                                     end_shift, &scream_pos, 0))
1895                  : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
1896                                   (unsigned char*)strend, must,
1897                                   multiline ? FBMrf_MULTILINE : 0))) ) {
1898             /* we may be pointing at the wrong string */
1899             if ((flags & REXEC_SCREAM) && RX_MATCH_COPIED(prog))
1900                 s = strbeg + (s - SvPVX(sv));
1901             DEBUG_EXECUTE_r( did_match = 1 );
1902             if (HOPc(s, -back_max) > last1) {
1903                 last1 = HOPc(s, -back_min);
1904                 s = HOPc(s, -back_max);
1905             }
1906             else {
1907                 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1908
1909                 last1 = HOPc(s, -back_min);
1910                 s = t;          
1911             }
1912             if (do_utf8) {
1913                 while (s <= last1) {
1914                     if (regtry(prog, s))
1915                         goto got_it;
1916                     s += UTF8SKIP(s);
1917                 }
1918             }
1919             else {
1920                 while (s <= last1) {
1921                     if (regtry(prog, s))
1922                         goto got_it;
1923                     s++;
1924                 }
1925             }
1926         }
1927         DEBUG_EXECUTE_r(if (!did_match)
1928                     PerlIO_printf(Perl_debug_log, 
1929                                   "Did not find %s substr `%s%.*s%s'%s...\n",
1930                               ((must == prog->anchored_substr || must == prog->anchored_utf8)
1931                                ? "anchored" : "floating"),
1932                               PL_colors[0],
1933                               (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1934                               SvPVX(must),
1935                                   PL_colors[1], (SvTAIL(must) ? "$" : ""))
1936                );
1937         goto phooey;
1938     }
1939     else if ((c = prog->regstclass)) {
1940         if (minlen) {
1941             I32 op = (U8)OP(prog->regstclass);
1942             /* don't bother with what can't match */
1943             if (PL_regkind[op] != EXACT && op != CANY)
1944                 strend = HOPc(strend, -(minlen - 1));
1945         }
1946         DEBUG_EXECUTE_r({
1947             SV *prop = sv_newmortal();
1948             char *s0;
1949             char *s1;
1950             int len0;
1951             int len1;
1952
1953             regprop(prop, c);
1954             s0 = UTF ?
1955               pv_uni_display(dsv0, (U8*)SvPVX(prop), SvCUR(prop), 60,
1956                              UNI_DISPLAY_REGEX) :
1957               SvPVX(prop);
1958             len0 = UTF ? SvCUR(dsv0) : SvCUR(prop);
1959             s1 = UTF ?
1960               sv_uni_display(dsv1, sv, 60, UNI_DISPLAY_REGEX) : s;
1961             len1 = UTF ? SvCUR(dsv1) : strend - s;
1962             PerlIO_printf(Perl_debug_log,
1963                           "Matching stclass `%*.*s' against `%*.*s'\n",
1964                           len0, len0, s0,
1965                           len1, len1, s1);
1966         });
1967         if (find_byclass(prog, c, s, strend, startpos, 0))
1968             goto got_it;
1969         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
1970     }
1971     else {
1972         dontbother = 0;
1973         if (prog->float_substr != Nullsv || prog->float_utf8 != Nullsv) {
1974             /* Trim the end. */
1975             char *last;
1976             SV* float_real;
1977
1978             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1979                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1980             float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
1981
1982             if (flags & REXEC_SCREAM) {
1983                 last = screaminstr(sv, float_real, s - strbeg,
1984                                    end_shift, &scream_pos, 1); /* last one */
1985                 if (!last)
1986                     last = scream_olds; /* Only one occurrence. */
1987                 /* we may be pointing at the wrong string */
1988                 else if (RX_MATCH_COPIED(prog))
1989                     s = strbeg + (s - SvPVX(sv));
1990             }
1991             else {
1992                 STRLEN len;
1993                 char *little = SvPV(float_real, len);
1994
1995                 if (SvTAIL(float_real)) {
1996                     if (memEQ(strend - len + 1, little, len - 1))
1997                         last = strend - len + 1;
1998                     else if (!multiline)
1999                         last = memEQ(strend - len, little, len)
2000                             ? strend - len : Nullch;
2001                     else
2002                         goto find_last;
2003                 } else {
2004                   find_last:
2005                     if (len)
2006                         last = rninstr(s, strend, little, little + len);
2007                     else
2008                         last = strend;  /* matching `$' */
2009                 }
2010             }
2011             if (last == NULL) {
2012                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
2013                                       "%sCan't trim the tail, match fails (should not happen)%s\n",
2014                                       PL_colors[4],PL_colors[5]));
2015                 goto phooey; /* Should not happen! */
2016             }
2017             dontbother = strend - last + prog->float_min_offset;
2018         }
2019         if (minlen && (dontbother < minlen))
2020             dontbother = minlen - 1;
2021         strend -= dontbother;              /* this one's always in bytes! */
2022         /* We don't know much -- general case. */
2023         if (do_utf8) {
2024             for (;;) {
2025                 if (regtry(prog, s))
2026                     goto got_it;
2027                 if (s >= strend)
2028                     break;
2029                 s += UTF8SKIP(s);
2030             };
2031         }
2032         else {
2033             do {
2034                 if (regtry(prog, s))
2035                     goto got_it;
2036             } while (s++ < strend);
2037         }
2038     }
2039
2040     /* Failure. */
2041     goto phooey;
2042
2043 got_it:
2044     RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
2045
2046     if (PL_reg_eval_set) {
2047         /* Preserve the current value of $^R */
2048         if (oreplsv != GvSV(PL_replgv))
2049             sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
2050                                                   restored, the value remains
2051                                                   the same. */
2052         restore_pos(aTHX_ 0);
2053     }
2054
2055     /* make sure $`, $&, $', and $digit will work later */
2056     if ( !(flags & REXEC_NOT_FIRST) ) {
2057         RX_MATCH_COPY_FREE(prog);
2058         if (flags & REXEC_COPY_STR) {
2059             I32 i = PL_regeol - startpos + (stringarg - strbeg);
2060 #ifdef PERL_COPY_ON_WRITE
2061             if ((SvIsCOW(sv)
2062                  || (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
2063                 if (DEBUG_C_TEST) {
2064                     PerlIO_printf(Perl_debug_log,
2065                                   "Copy on write: regexp capture, type %d\n",
2066                                   (int) SvTYPE(sv));
2067                 }
2068                 prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
2069                 prog->subbeg = SvPVX(prog->saved_copy);
2070                 assert (SvPOKp(prog->saved_copy));
2071             } else
2072 #endif
2073             {
2074                 RX_MATCH_COPIED_on(prog);
2075                 s = savepvn(strbeg, i);
2076                 prog->subbeg = s;
2077             }
2078             prog->sublen = i;
2079         }
2080         else {
2081             prog->subbeg = strbeg;
2082             prog->sublen = PL_regeol - strbeg;  /* strend may have been modified */
2083         }
2084     }
2085
2086     return 1;
2087
2088 phooey:
2089     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
2090                           PL_colors[4],PL_colors[5]));
2091     if (PL_reg_eval_set)
2092         restore_pos(aTHX_ 0);
2093     return 0;
2094 }
2095
2096 /*
2097  - regtry - try match at specific point
2098  */
2099 STATIC I32                      /* 0 failure, 1 success */
2100 S_regtry(pTHX_ regexp *prog, char *startpos)
2101 {
2102     register I32 i;
2103     register I32 *sp;
2104     register I32 *ep;
2105     CHECKPOINT lastcp;
2106     GET_RE_DEBUG_FLAGS_DECL;
2107
2108 #ifdef DEBUGGING
2109     PL_regindent = 0;   /* XXXX Not good when matches are reenterable... */
2110 #endif
2111     if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
2112         MAGIC *mg;
2113
2114         PL_reg_eval_set = RS_init;
2115         DEBUG_EXECUTE_r(DEBUG_s(
2116             PerlIO_printf(Perl_debug_log, "  setting stack tmpbase at %"IVdf"\n",
2117                           (IV)(PL_stack_sp - PL_stack_base));
2118             ));
2119         SAVEI32(cxstack[cxstack_ix].blk_oldsp);
2120         cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2121         /* Otherwise OP_NEXTSTATE will free whatever on stack now.  */
2122         SAVETMPS;
2123         /* Apparently this is not needed, judging by wantarray. */
2124         /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
2125            cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2126
2127         if (PL_reg_sv) {
2128             /* Make $_ available to executed code. */
2129             if (PL_reg_sv != DEFSV) {
2130                 SAVE_DEFSV;
2131                 DEFSV = PL_reg_sv;
2132             }
2133         
2134             if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
2135                   && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
2136                 /* prepare for quick setting of pos */
2137                 sv_magic(PL_reg_sv, (SV*)0,
2138                         PERL_MAGIC_regex_global, Nullch, 0);
2139                 mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global);
2140                 mg->mg_len = -1;
2141             }
2142             PL_reg_magic    = mg;
2143             PL_reg_oldpos   = mg->mg_len;
2144             SAVEDESTRUCTOR_X(restore_pos, 0);
2145         }
2146         if (!PL_reg_curpm) {
2147             Newz(22,PL_reg_curpm, 1, PMOP);
2148 #ifdef USE_ITHREADS
2149             {
2150                 SV* repointer = newSViv(0);
2151                 /* so we know which PL_regex_padav element is PL_reg_curpm */
2152                 SvFLAGS(repointer) |= SVf_BREAK;
2153                 av_push(PL_regex_padav,repointer);
2154                 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2155                 PL_regex_pad = AvARRAY(PL_regex_padav);
2156             }
2157 #endif      
2158         }
2159         PM_SETRE(PL_reg_curpm, prog);
2160         PL_reg_oldcurpm = PL_curpm;
2161         PL_curpm = PL_reg_curpm;
2162         if (RX_MATCH_COPIED(prog)) {
2163             /*  Here is a serious problem: we cannot rewrite subbeg,
2164                 since it may be needed if this match fails.  Thus
2165                 $` inside (?{}) could fail... */
2166             PL_reg_oldsaved = prog->subbeg;
2167             PL_reg_oldsavedlen = prog->sublen;
2168 #ifdef PERL_COPY_ON_WRITE
2169             PL_nrs = prog->saved_copy;
2170 #endif
2171             RX_MATCH_COPIED_off(prog);
2172         }
2173         else
2174             PL_reg_oldsaved = Nullch;
2175         prog->subbeg = PL_bostr;
2176         prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2177     }
2178     prog->startp[0] = startpos - PL_bostr;
2179     PL_reginput = startpos;
2180     PL_regstartp = prog->startp;
2181     PL_regendp = prog->endp;
2182     PL_reglastparen = &prog->lastparen;
2183     PL_reglastcloseparen = &prog->lastcloseparen;
2184     prog->lastparen = 0;
2185     prog->lastcloseparen = 0;
2186     PL_regsize = 0;
2187     DEBUG_EXECUTE_r(PL_reg_starttry = startpos);
2188     if (PL_reg_start_tmpl <= prog->nparens) {
2189         PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2190         if(PL_reg_start_tmp)
2191             Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2192         else
2193             New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2194     }
2195
2196     /* XXXX What this code is doing here?!!!  There should be no need
2197        to do this again and again, PL_reglastparen should take care of
2198        this!  --ilya*/
2199
2200     /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2201      * Actually, the code in regcppop() (which Ilya may be meaning by
2202      * PL_reglastparen), is not needed at all by the test suite
2203      * (op/regexp, op/pat, op/split), but that code is needed, oddly
2204      * enough, for building DynaLoader, or otherwise this
2205      * "Error: '*' not in typemap in DynaLoader.xs, line 164"
2206      * will happen.  Meanwhile, this code *is* needed for the
2207      * above-mentioned test suite tests to succeed.  The common theme
2208      * on those tests seems to be returning null fields from matches.
2209      * --jhi */
2210 #if 1
2211     sp = prog->startp;
2212     ep = prog->endp;
2213     if (prog->nparens) {
2214         for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
2215             *++sp = -1;
2216             *++ep = -1;
2217         }
2218     }
2219 #endif
2220     REGCP_SET(lastcp);
2221     if (regmatch(prog->program + 1)) {
2222         prog->endp[0] = PL_reginput - PL_bostr;
2223         return 1;
2224     }
2225     REGCP_UNWIND(lastcp);
2226     return 0;
2227 }
2228
2229 #define RE_UNWIND_BRANCH        1
2230 #define RE_UNWIND_BRANCHJ       2
2231
2232 union re_unwind_t;
2233
2234 typedef struct {                /* XX: makes sense to enlarge it... */
2235     I32 type;
2236     I32 prev;
2237     CHECKPOINT lastcp;
2238 } re_unwind_generic_t;
2239
2240 typedef struct {
2241     I32 type;
2242     I32 prev;
2243     CHECKPOINT lastcp;
2244     I32 lastparen;
2245     regnode *next;
2246     char *locinput;
2247     I32 nextchr;
2248 #ifdef DEBUGGING
2249     int regindent;
2250 #endif
2251 } re_unwind_branch_t;
2252
2253 typedef union re_unwind_t {
2254     I32 type;
2255     re_unwind_generic_t generic;
2256     re_unwind_branch_t branch;
2257 } re_unwind_t;
2258
2259 #define sayYES goto yes
2260 #define sayNO goto no
2261 #define sayNO_ANYOF goto no_anyof
2262 #define sayYES_FINAL goto yes_final
2263 #define sayYES_LOUD  goto yes_loud
2264 #define sayNO_FINAL  goto no_final
2265 #define sayNO_SILENT goto do_no
2266 #define saySAME(x) if (x) goto yes; else goto no
2267
2268 /* this is used to determine how far from the left messages like
2269    'failed...' are printed. Currently 29 makes these messages line
2270    up with the opcode they refer to. Earlier perls used 25 which
2271    left these messages outdented making reviewing a debug output
2272    quite difficult.
2273 */
2274 #define REPORT_CODE_OFF 29
2275
2276
2277 /* Make sure there is a test for this +1 options in re_tests */
2278 #define TRIE_INITAL_ACCEPT_BUFFLEN 4;
2279
2280 #define TRIE_CHECK_STATE_IS_ACCEPTING STMT_START {                       \
2281     if ( trie->states[ state ].wordnum ) {                               \
2282         if ( !accepted ) {                                               \
2283             ENTER;                                                       \
2284             SAVETMPS;                                                    \
2285             bufflen = TRIE_INITAL_ACCEPT_BUFFLEN ;                       \
2286             sv_accept_buff=NEWSV( 1234,                                  \
2287               bufflen * sizeof(reg_trie_accepted) - 1 );                 \
2288             SvCUR_set( sv_accept_buff, sizeof(reg_trie_accepted) );      \
2289             SvPOK_on( sv_accept_buff );                                  \
2290             sv_2mortal( sv_accept_buff );                                \
2291             accept_buff = (reg_trie_accepted*)SvPV_nolen( sv_accept_buff );\
2292         } else {                                                         \
2293             if ( accepted >= bufflen ) {                                 \
2294                 bufflen *= 2;                                            \
2295                 accept_buff =(reg_trie_accepted*)SvGROW( sv_accept_buff, \
2296                     bufflen * sizeof(reg_trie_accepted) );               \
2297             }                                                            \
2298             SvCUR_set( sv_accept_buff,SvCUR( sv_accept_buff )            \
2299                 + sizeof( reg_trie_accepted ) );                         \
2300         }                                                                \
2301         accept_buff[ accepted ].wordnum = trie->states[ state ].wordnum; \
2302         accept_buff[ accepted ].endpos = uc;                             \
2303         ++accepted;                                                      \
2304     } } STMT_END
2305
2306 #define TRIE_HANDLE_CHAR STMT_START {                                   \
2307         if ( uvc < 256 ) {                                              \
2308             charid = trie->charmap[ uvc ];                              \
2309         } else {                                                        \
2310             charid = 0;                                                 \
2311             if( trie->widecharmap ) {                                   \
2312             SV** svpp = (SV**)NULL;                                     \
2313             svpp = hv_fetch( trie->widecharmap, (char*)&uvc,            \
2314                           sizeof( UV ), 0 );                            \
2315             if ( svpp ) {                                               \
2316                 charid = (U16)SvIV( *svpp );                            \
2317                 }                                                       \
2318             }                                                           \
2319         }                                                               \
2320         if ( charid &&                                                  \
2321              ( base + charid - 1 - trie->uniquecharcount ) >=0  &&      \
2322              trie->trans[ base + charid - 1 - trie->uniquecharcount ].check == state ) \
2323         {                                                               \
2324             state = trie->trans[ base + charid - 1 - trie->uniquecharcount ].next;     \
2325         } else {                                                        \
2326             state = 0;                                                  \
2327         }                                                               \
2328         uc += len;                                                      \
2329     } STMT_END
2330
2331 /*
2332  - regmatch - main matching routine
2333  *
2334  * Conceptually the strategy is simple:  check to see whether the current
2335  * node matches, call self recursively to see whether the rest matches,
2336  * and then act accordingly.  In practice we make some effort to avoid
2337  * recursion, in particular by going through "ordinary" nodes (that don't
2338  * need to know whether the rest of the match failed) by a loop instead of
2339  * by recursion.
2340  */
2341 /* [lwall] I've hoisted the register declarations to the outer block in order to
2342  * maybe save a little bit of pushing and popping on the stack.  It also takes
2343  * advantage of machines that use a register save mask on subroutine entry.
2344  */
2345 STATIC I32                      /* 0 failure, 1 success */
2346 S_regmatch(pTHX_ regnode *prog)
2347 {
2348     register regnode *scan;     /* Current node. */
2349     regnode *next;              /* Next node. */
2350     regnode *inner;             /* Next node in internal branch. */
2351     register I32 nextchr;       /* renamed nextchr - nextchar colides with
2352                                    function of same name */
2353     register I32 n;             /* no or next */
2354     register I32 ln = 0;        /* len or last */
2355     register char *s = Nullch;  /* operand or save */
2356     register char *locinput = PL_reginput;
2357     register I32 c1 = 0, c2 = 0, paren; /* case fold search, parenth */
2358     int minmod = 0, sw = 0, logical = 0;
2359     I32 unwind = 0;
2360
2361     /* used by the trie code */
2362     SV                 *sv_accept_buff;  /* accepting states we have traversed */
2363     reg_trie_accepted  *accept_buff;     /* "" */
2364     reg_trie_data      *trie;            /* what trie are we using right now */
2365     U32 accepted = 0;                    /* how many accepting states we have seen*/
2366
2367 #if 0
2368     I32 firstcp = PL_savestack_ix;
2369 #endif
2370     register bool do_utf8 = PL_reg_match_utf8;
2371 #ifdef DEBUGGING
2372     SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
2373     SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
2374     SV *dsv2 = PERL_DEBUG_PAD_ZERO(2);
2375
2376     SV *re_debug_flags;
2377 #endif
2378
2379     GET_RE_DEBUG_FLAGS;
2380
2381 #ifdef DEBUGGING
2382     PL_regindent++;
2383 #endif
2384
2385
2386     /* Note that nextchr is a byte even in UTF */
2387     nextchr = UCHARAT(locinput);
2388     scan = prog;
2389     while (scan != NULL) {
2390
2391         DEBUG_EXECUTE_r( {
2392             SV *prop = sv_newmortal();
2393             int docolor = *PL_colors[0];
2394             int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
2395             int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
2396             /* The part of the string before starttry has one color
2397                (pref0_len chars), between starttry and current
2398                position another one (pref_len - pref0_len chars),
2399                after the current position the third one.
2400                We assume that pref0_len <= pref_len, otherwise we
2401                decrease pref0_len.  */
2402             int pref_len = (locinput - PL_bostr) > (5 + taill) - l
2403                 ? (5 + taill) - l : locinput - PL_bostr;
2404             int pref0_len;
2405
2406             while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2407                 pref_len++;
2408             pref0_len = pref_len  - (locinput - PL_reg_starttry);
2409             if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
2410                 l = ( PL_regeol - locinput > (5 + taill) - pref_len
2411                       ? (5 + taill) - pref_len : PL_regeol - locinput);
2412             while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2413                 l--;
2414             if (pref0_len < 0)
2415                 pref0_len = 0;
2416             if (pref0_len > pref_len)
2417                 pref0_len = pref_len;
2418             regprop(prop, scan);
2419             {
2420               char *s0 =
2421                 do_utf8 && OP(scan) != CANY ?
2422                 pv_uni_display(dsv0, (U8*)(locinput - pref_len),
2423                                pref0_len, 60, UNI_DISPLAY_REGEX) :
2424                 locinput - pref_len;
2425               int len0 = do_utf8 ? strlen(s0) : pref0_len;
2426               char *s1 = do_utf8 && OP(scan) != CANY ?
2427                 pv_uni_display(dsv1, (U8*)(locinput - pref_len + pref0_len),
2428                                pref_len - pref0_len, 60, UNI_DISPLAY_REGEX) :
2429                 locinput - pref_len + pref0_len;
2430               int len1 = do_utf8 ? strlen(s1) : pref_len - pref0_len;
2431               char *s2 = do_utf8 && OP(scan) != CANY ?
2432                 pv_uni_display(dsv2, (U8*)locinput,
2433                                PL_regeol - locinput, 60, UNI_DISPLAY_REGEX) :
2434                 locinput;
2435               int len2 = do_utf8 ? strlen(s2) : l;
2436               PerlIO_printf(Perl_debug_log,
2437                             "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
2438                             (IV)(locinput - PL_bostr),
2439                             PL_colors[4],
2440                             len0, s0,
2441                             PL_colors[5],
2442                             PL_colors[2],
2443                             len1, s1,
2444                             PL_colors[3],
2445                             (docolor ? "" : "> <"),
2446                             PL_colors[0],
2447                             len2, s2,
2448                             PL_colors[1],
2449                             15 - l - pref_len + 1,
2450                             "",
2451                             (IV)(scan - PL_regprogram), PL_regindent*2, "",
2452                             SvPVX(prop));
2453             }
2454         });
2455
2456         next = scan + NEXT_OFF(scan);
2457         if (next == scan)
2458             next = NULL;
2459
2460         switch (OP(scan)) {
2461         case BOL:
2462             if (locinput == PL_bostr)
2463             {
2464                 /* regtill = regbol; */
2465                 break;
2466             }
2467             sayNO;
2468         case MBOL:
2469             if (locinput == PL_bostr ||
2470                 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
2471             {
2472                 break;
2473             }
2474             sayNO;
2475         case SBOL:
2476             if (locinput == PL_bostr)
2477                 break;
2478             sayNO;
2479         case GPOS:
2480             if (locinput == PL_reg_ganch)
2481                 break;
2482             sayNO;
2483         case EOL:
2484                 goto seol;
2485         case MEOL:
2486             if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2487                 sayNO;
2488             break;
2489         case SEOL:
2490           seol:
2491             if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2492                 sayNO;
2493             if (PL_regeol - locinput > 1)
2494                 sayNO;
2495             break;
2496         case EOS:
2497             if (PL_regeol != locinput)
2498                 sayNO;
2499             break;
2500         case SANY:
2501             if (!nextchr && locinput >= PL_regeol)
2502                 sayNO;
2503             if (do_utf8) {
2504                 locinput += PL_utf8skip[nextchr];
2505                 if (locinput > PL_regeol)
2506                     sayNO;
2507                 nextchr = UCHARAT(locinput);
2508             }
2509             else
2510                 nextchr = UCHARAT(++locinput);
2511             break;
2512         case CANY:
2513             if (!nextchr && locinput >= PL_regeol)
2514                 sayNO;
2515             nextchr = UCHARAT(++locinput);
2516             break;
2517         case REG_ANY:
2518             if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2519                 sayNO;
2520             if (do_utf8) {
2521                 locinput += PL_utf8skip[nextchr];
2522                 if (locinput > PL_regeol)
2523                     sayNO;
2524                 nextchr = UCHARAT(locinput);
2525             }
2526             else
2527                 nextchr = UCHARAT(++locinput);
2528             break;
2529
2530
2531
2532         /*
2533            traverse the TRIE keeping track of all accepting states
2534            we transition through until we get to a failing node.
2535
2536            we use two slightly different pieces of code to handle
2537            the traversal depending on whether its case sensitive or
2538            not. we reuse the accept code however. (this should probably
2539            be turned into a macro.)
2540
2541         */
2542         case TRIEF:
2543         case TRIEFL:
2544             {
2545
2546                 U32 uniflags = ckWARN( WARN_UTF8 ) ? 0 : UTF8_ALLOW_ANY;
2547                 U8 *uc = ( U8* )locinput;
2548                 U32 state = 1;
2549                 U16 charid = 0;
2550                 U32 base = 0;
2551                 UV uvc = 0;
2552                 STRLEN len = 0;
2553                 STRLEN foldlen = 0;
2554                 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
2555                 U8 *uscan = (U8*)NULL;
2556                 STRLEN bufflen=0;
2557                 accepted = 0;
2558
2559                 trie = (reg_trie_data*)PL_regdata->data[ ARG( scan ) ];
2560
2561                 while ( state && uc <= (U8*)PL_regeol ) {
2562
2563                     TRIE_CHECK_STATE_IS_ACCEPTING;
2564
2565                     base = trie->states[ state ].trans.base;
2566
2567                     DEBUG_TRIE_EXECUTE_r(
2568                                 PerlIO_printf( Perl_debug_log,
2569                                     "%*s  %sState: %4x, Base: %4x Accepted: %4x ",
2570                                     REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
2571                                     state, base, accepted );
2572                     );
2573
2574                     if ( base ) {
2575
2576                         if ( do_utf8 || UTF ) {
2577                             if ( foldlen>0 ) {
2578                                 uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags );
2579                                 foldlen -= len;
2580                                 uscan += len;
2581                                 len=0;
2582                             } else {
2583                                 uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );
2584                                 uvc = to_uni_fold( uvc, foldbuf, &foldlen );
2585                                 foldlen -= UNISKIP( uvc );
2586                                 uscan = foldbuf + UNISKIP( uvc );
2587                             }
2588                         } else {
2589                             uvc = (U32)*uc;
2590                             len = 1;
2591                         }
2592
2593                         TRIE_HANDLE_CHAR;
2594
2595                     } else {
2596                         state = 0;
2597                     }
2598                     DEBUG_TRIE_EXECUTE_r(
2599                                 PerlIO_printf( Perl_debug_log,
2600                                     "Charid:%3x CV:%4x After State: %4x%s\n",
2601                                     charid, uvc, state, PL_colors[5] );
2602                     );
2603                 }
2604                 if ( !accepted ) {
2605                    sayNO;
2606                 } else {
2607                     goto TrieAccept;
2608                 }
2609             }
2610             /* unreached codepoint: we jump into the middle of the next case
2611                from previous if blocks */
2612         case TRIE:
2613             {
2614                 U32 uniflags = ckWARN( WARN_UTF8 ) ? 0 : UTF8_ALLOW_ANY;
2615                 U8 *uc = (U8*)locinput;
2616                 U32 state = 1;
2617                 U16 charid = 0;
2618                 U32 base = 0;
2619                 UV uvc = 0;
2620                 STRLEN len = 0;
2621                 STRLEN bufflen = 0;
2622                 accepted = 0;
2623
2624                 trie = (reg_trie_data*)PL_regdata->data[ ARG( scan ) ];
2625
2626                 while ( state && uc <= (U8*)PL_regeol ) {
2627
2628                     TRIE_CHECK_STATE_IS_ACCEPTING;
2629
2630                     base = trie->states[ state ].trans.base;
2631
2632                     DEBUG_TRIE_EXECUTE_r(
2633                             PerlIO_printf( Perl_debug_log,
2634                                 "%*s  %sState: %4x, Base: %4x Accepted: %4x ",
2635                                 REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
2636                                 state, base, accepted );
2637                     );
2638
2639                     if ( base ) {
2640
2641                         if ( do_utf8 || UTF ) {
2642                             uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );
2643                         } else {
2644                             uvc = (U32)*uc;
2645                             len = 1;
2646                         }
2647
2648                         TRIE_HANDLE_CHAR;
2649
2650                     } else {
2651                         state = 0;
2652                     }
2653                     DEBUG_TRIE_EXECUTE_r(
2654                             PerlIO_printf( Perl_debug_log,
2655                                 "Charid:%3x CV:%4x After State: %4x%s\n",
2656                                 charid, uvc, state, PL_colors[5] );
2657                     );
2658                 }
2659                 if ( !accepted ) {
2660                    sayNO;
2661                 }
2662             }
2663
2664
2665             /*
2666                There was at least one accepting state that we
2667                transitioned through. Presumably the number of accepting
2668                states is going to be low, typically one or two. So we
2669                simply scan through to find the one with lowest wordnum.
2670                Once we find it, we swap the last state into its place
2671                and decrement the size. We then try to match the rest of
2672                the pattern at the point where the word ends, if we
2673                succeed then we end the loop, otherwise the loop
2674                eventually terminates once all of the accepting states
2675                have been tried.
2676             */
2677         TrieAccept:
2678             {
2679                 int gotit = 0;
2680
2681                 if ( accepted == 1 ) {
2682                     DEBUG_EXECUTE_r({
2683                         SV **tmp = av_fetch( trie->words, accept_buff[ 0 ].wordnum-1, 0 );
2684                         PerlIO_printf( Perl_debug_log,
2685                             "%*s  %sonly one match : #%d <%s>%s\n",
2686                             REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],
2687                             accept_buff[ 0 ].wordnum,
2688                             tmp ? SvPV_nolen( *tmp ) : "not compiled under -Dr",
2689                             PL_colors[5] );
2690                     });
2691                     PL_reginput = accept_buff[ 0 ].endpos;
2692                     /* in this case we free tmps/leave before we call regmatch
2693                        as we wont be using accept_buff again. */
2694                     FREETMPS;
2695                     LEAVE;
2696                     gotit = regmatch( scan + NEXT_OFF( scan ) );
2697                 } else {
2698                     DEBUG_EXECUTE_r(
2699                         PerlIO_printf( Perl_debug_log,"%*s  %sgot %d possible matches%s\n",
2700                             REPORT_CODE_OFF + PL_regindent * 2, "",PL_colors[4], accepted,
2701                             PL_colors[5] );
2702                     );
2703                     while ( !gotit && accepted-- ) {
2704                         U32 best = 0;
2705                         U32 cur;
2706                         for( cur = 1 ; cur <= accepted ; cur++ ) {
2707                              DEBUG_TRIE_EXECUTE_r(
2708                                         PerlIO_printf( Perl_debug_log,
2709                                             "%*s  %sgot %d (%d) as best, looking at %d (%d)%s\n",
2710                                             REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
2711                                             best, accept_buff[ best ].wordnum, cur,
2712                                             accept_buff[ cur ].wordnum, PL_colors[5] );
2713                              );
2714
2715                             if ( accept_buff[ cur ].wordnum < accept_buff[ best ].wordnum )
2716                                     best = cur;
2717                         }
2718                         DEBUG_EXECUTE_r({
2719                             SV **tmp = av_fetch( trie->words, accept_buff[ best ].wordnum - 1, 0 );
2720                             PerlIO_printf( Perl_debug_log, "%*s  %strying alternation #%d <%s> at 0x%p%s\n",
2721                                 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],
2722                                 accept_buff[best].wordnum,
2723                                 tmp ? SvPV_nolen( *tmp ) : "not compiled under -Dr",scan,
2724                                 PL_colors[5] );
2725                         });
2726                         if ( best<accepted ) {
2727                             reg_trie_accepted tmp = accept_buff[ best ];
2728                             accept_buff[ best ] = accept_buff[ accepted ];
2729                             accept_buff[ accepted ] = tmp;
2730                             best = accepted;
2731                         }
2732                         PL_reginput = accept_buff[ best ].endpos;
2733
2734                         /* 
2735                            as far as I can tell we only need the SAVETMPS/FREETMPS 
2736                            for re's with EVAL in them but I'm leaving them in for 
2737                            all until I can be sure.
2738                          */
2739                         SAVETMPS;
2740                         gotit = regmatch( scan + NEXT_OFF( scan ) ) ;
2741                         FREETMPS;
2742                     }
2743                     FREETMPS;
2744                     LEAVE;
2745                 }
2746                 
2747                 if ( gotit ) {
2748                     sayYES;
2749                 } else {
2750                     sayNO;
2751                 }
2752             }
2753             /* unreached codepoint */
2754         case EXACT:
2755             s = STRING(scan);
2756             ln = STR_LEN(scan);
2757             if (do_utf8 != UTF) {
2758                 /* The target and the pattern have differing utf8ness. */
2759                 char *l = locinput;
2760                 char *e = s + ln;
2761                 STRLEN ulen;
2762
2763                 if (do_utf8) {
2764                     /* The target is utf8, the pattern is not utf8. */
2765                     while (s < e) {
2766                         if (l >= PL_regeol)
2767                              sayNO;
2768                         if (NATIVE_TO_UNI(*(U8*)s) !=
2769                             utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
2770                                            ckWARN(WARN_UTF8) ?
2771                                            0 : UTF8_ALLOW_ANY))
2772                              sayNO;
2773                         l += ulen;
2774                         s ++;
2775                     }
2776                 }
2777                 else {
2778                     /* The target is not utf8, the pattern is utf8. */
2779                     while (s < e) {
2780                         if (l >= PL_regeol)
2781                             sayNO;
2782                         if (NATIVE_TO_UNI(*((U8*)l)) !=
2783                             utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
2784                                            ckWARN(WARN_UTF8) ?
2785                                            0 : UTF8_ALLOW_ANY))
2786                             sayNO;
2787                         s += ulen;
2788                         l ++;
2789                     }
2790                 }
2791                 locinput = l;
2792                 nextchr = UCHARAT(locinput);
2793                 break;
2794             }
2795             /* The target and the pattern have the same utf8ness. */
2796             /* Inline the first character, for speed. */
2797             if (UCHARAT(s) != nextchr)
2798                 sayNO;
2799             if (PL_regeol - locinput < ln)
2800                 sayNO;
2801             if (ln > 1 && memNE(s, locinput, ln))
2802                 sayNO;
2803             locinput += ln;
2804             nextchr = UCHARAT(locinput);
2805             break;
2806         case EXACTFL:
2807             PL_reg_flags |= RF_tainted;
2808             /* FALL THROUGH */
2809         case EXACTF:
2810             s = STRING(scan);
2811             ln = STR_LEN(scan);
2812
2813             if (do_utf8 || UTF) {
2814               /* Either target or the pattern are utf8. */
2815                 char *l = locinput;
2816                 char *e = PL_regeol;
2817
2818                 if (ibcmp_utf8(s, 0,  ln, (bool)UTF,
2819                                l, &e, 0,  do_utf8)) {
2820                      /* One more case for the sharp s:
2821                       * pack("U0U*", 0xDF) =~ /ss/i,
2822                       * the 0xC3 0x9F are the UTF-8
2823                       * byte sequence for the U+00DF. */
2824                      if (!(do_utf8 &&
2825                            toLOWER(s[0]) == 's' &&
2826                            ln >= 2 &&
2827                            toLOWER(s[1]) == 's' &&
2828                            (U8)l[0] == 0xC3 &&
2829                            e - l >= 2 &&
2830                            (U8)l[1] == 0x9F))
2831                           sayNO;
2832                 }
2833                 locinput = e;
2834                 nextchr = UCHARAT(locinput);
2835                 break;
2836             }
2837
2838             /* Neither the target and the pattern are utf8. */
2839
2840             /* Inline the first character, for speed. */
2841             if (UCHARAT(s) != nextchr &&
2842                 UCHARAT(s) != ((OP(scan) == EXACTF)
2843                                ? PL_fold : PL_fold_locale)[nextchr])
2844                 sayNO;
2845             if (PL_regeol - locinput < ln)
2846                 sayNO;
2847             if (ln > 1 && (OP(scan) == EXACTF
2848                            ? ibcmp(s, locinput, ln)
2849                            : ibcmp_locale(s, locinput, ln)))
2850                 sayNO;
2851             locinput += ln;
2852             nextchr = UCHARAT(locinput);
2853             break;
2854         case ANYOF:
2855             if (do_utf8) {
2856                 STRLEN inclasslen = PL_regeol - locinput;
2857
2858                 if (!reginclass(scan, (U8*)locinput, &inclasslen, do_utf8))
2859                     sayNO_ANYOF;
2860                 if (locinput >= PL_regeol)
2861                     sayNO;
2862                 locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
2863                 nextchr = UCHARAT(locinput);
2864                 break;
2865             }
2866             else {
2867                 if (nextchr < 0)
2868                     nextchr = UCHARAT(locinput);
2869                 if (!REGINCLASS(scan, (U8*)locinput))
2870                     sayNO_ANYOF;
2871                 if (!nextchr && locinput >= PL_regeol)
2872                     sayNO;
2873                 nextchr = UCHARAT(++locinput);
2874                 break;
2875             }
2876         no_anyof:
2877             /* If we might have the case of the German sharp s
2878              * in a casefolding Unicode character class. */
2879
2880             if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
2881                  locinput += SHARP_S_SKIP;
2882                  nextchr = UCHARAT(locinput);
2883             }
2884             else
2885                  sayNO;
2886             break;
2887         case ALNUML:
2888             PL_reg_flags |= RF_tainted;
2889             /* FALL THROUGH */
2890         case ALNUM:
2891             if (!nextchr)
2892                 sayNO;
2893             if (do_utf8) {
2894                 LOAD_UTF8_CHARCLASS(alnum,"a");
2895                 if (!(OP(scan) == ALNUM
2896                       ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
2897                       : isALNUM_LC_utf8((U8*)locinput)))
2898                 {
2899                     sayNO;
2900                 }
2901                 locinput += PL_utf8skip[nextchr];
2902                 nextchr = UCHARAT(locinput);
2903                 break;
2904             }
2905             if (!(OP(scan) == ALNUM
2906                   ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
2907                 sayNO;
2908             nextchr = UCHARAT(++locinput);
2909             break;
2910         case NALNUML:
2911             PL_reg_flags |= RF_tainted;
2912             /* FALL THROUGH */
2913         case NALNUM:
2914             if (!nextchr && locinput >= PL_regeol)
2915                 sayNO;
2916             if (do_utf8) {
2917                 LOAD_UTF8_CHARCLASS(alnum,"a");
2918                 if (OP(scan) == NALNUM
2919                     ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
2920                     : isALNUM_LC_utf8((U8*)locinput))
2921                 {
2922                     sayNO;
2923                 }
2924                 locinput += PL_utf8skip[nextchr];
2925                 nextchr = UCHARAT(locinput);
2926                 break;
2927             }
2928             if (OP(scan) == NALNUM
2929                 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
2930                 sayNO;
2931             nextchr = UCHARAT(++locinput);
2932             break;
2933         case BOUNDL:
2934         case NBOUNDL:
2935             PL_reg_flags |= RF_tainted;
2936             /* FALL THROUGH */
2937         case BOUND:
2938         case NBOUND:
2939             /* was last char in word? */
2940             if (do_utf8) {
2941                 if (locinput == PL_bostr)
2942                     ln = '\n';
2943                 else {
2944                     U8 *r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
2945                 
2946                     ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
2947                 }
2948                 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2949                     ln = isALNUM_uni(ln);
2950                     LOAD_UTF8_CHARCLASS(alnum,"a");
2951                     n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
2952                 }
2953                 else {
2954                     ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
2955                     n = isALNUM_LC_utf8((U8*)locinput);
2956                 }
2957             }
2958             else {
2959                 ln = (locinput != PL_bostr) ?
2960                     UCHARAT(locinput - 1) : '\n';
2961                 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2962                     ln = isALNUM(ln);
2963                     n = isALNUM(nextchr);
2964                 }
2965                 else {
2966                     ln = isALNUM_LC(ln);
2967                     n = isALNUM_LC(nextchr);
2968                 }
2969             }
2970             if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2971                                     OP(scan) == BOUNDL))
2972                     sayNO;
2973             break;
2974         case SPACEL:
2975             PL_reg_flags |= RF_tainted;
2976             /* FALL THROUGH */
2977         case SPACE:
2978             if (!nextchr)
2979                 sayNO;
2980             if (do_utf8) {
2981                 if (UTF8_IS_CONTINUED(nextchr)) {
2982                     LOAD_UTF8_CHARCLASS(space," ");
2983                     if (!(OP(scan) == SPACE
2984                           ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
2985                           : isSPACE_LC_utf8((U8*)locinput)))
2986                     {
2987                         sayNO;
2988                     }
2989                     locinput += PL_utf8skip[nextchr];
2990                     nextchr = UCHARAT(locinput);
2991                     break;
2992                 }
2993                 if (!(OP(scan) == SPACE
2994                       ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2995                     sayNO;
2996                 nextchr = UCHARAT(++locinput);
2997             }
2998             else {
2999                 if (!(OP(scan) == SPACE
3000                       ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3001                     sayNO;
3002                 nextchr = UCHARAT(++locinput);
3003             }
3004             break;
3005         case NSPACEL:
3006             PL_reg_flags |= RF_tainted;
3007             /* FALL THROUGH */
3008         case NSPACE:
3009             if (!nextchr && locinput >= PL_regeol)
3010                 sayNO;
3011             if (do_utf8) {
3012                 LOAD_UTF8_CHARCLASS(space," ");
3013                 if (OP(scan) == NSPACE
3014                     ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
3015                     : isSPACE_LC_utf8((U8*)locinput))
3016                 {
3017                     sayNO;
3018                 }
3019                 locinput += PL_utf8skip[nextchr];
3020                 nextchr = UCHARAT(locinput);
3021                 break;
3022             }
3023             if (OP(scan) == NSPACE
3024                 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
3025                 sayNO;
3026             nextchr = UCHARAT(++locinput);
3027             break;
3028         case DIGITL:
3029             PL_reg_flags |= RF_tainted;
3030             /* FALL THROUGH */
3031         case DIGIT:
3032             if (!nextchr)
3033                 sayNO;
3034             if (do_utf8) {
3035                 LOAD_UTF8_CHARCLASS(digit,"0");
3036                 if (!(OP(scan) == DIGIT
3037                       ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
3038                       : isDIGIT_LC_utf8((U8*)locinput)))
3039                 {
3040                     sayNO;
3041                 }
3042                 locinput += PL_utf8skip[nextchr];
3043                 nextchr = UCHARAT(locinput);
3044                 break;
3045             }
3046             if (!(OP(scan) == DIGIT
3047                   ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
3048                 sayNO;
3049             nextchr = UCHARAT(++locinput);
3050             break;
3051         case NDIGITL:
3052             PL_reg_flags |= RF_tainted;
3053             /* FALL THROUGH */
3054         case NDIGIT:
3055             if (!nextchr && locinput >= PL_regeol)
3056                 sayNO;
3057             if (do_utf8) {
3058                 LOAD_UTF8_CHARCLASS(digit,"0");
3059                 if (OP(scan) == NDIGIT
3060                     ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
3061                     : isDIGIT_LC_utf8((U8*)locinput))
3062                 {
3063                     sayNO;
3064                 }
3065                 locinput += PL_utf8skip[nextchr];
3066                 nextchr = UCHARAT(locinput);
3067                 break;
3068             }
3069             if (OP(scan) == NDIGIT
3070                 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
3071                 sayNO;
3072             nextchr = UCHARAT(++locinput);
3073             break;
3074         case CLUMP:
3075             if (locinput >= PL_regeol)
3076                 sayNO;
3077             if  (do_utf8) {
3078                 LOAD_UTF8_CHARCLASS(mark,"~");
3079                 if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3080                     sayNO;
3081                 locinput += PL_utf8skip[nextchr];
3082                 while (locinput < PL_regeol &&
3083                        swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3084                     locinput += UTF8SKIP(locinput);
3085                 if (locinput > PL_regeol)
3086                     sayNO;
3087             } 
3088             else
3089                locinput++;
3090             nextchr = UCHARAT(locinput);
3091             break;
3092         case REFFL:
3093             PL_reg_flags |= RF_tainted;
3094             /* FALL THROUGH */
3095         case REF:
3096         case REFF:
3097             n = ARG(scan);  /* which paren pair */
3098             ln = PL_regstartp[n];
3099             PL_reg_leftiter = PL_reg_maxiter;           /* Void cache */
3100             if ((I32)*PL_reglastparen < n || ln == -1)
3101                 sayNO;                  /* Do not match unless seen CLOSEn. */
3102             if (ln == PL_regendp[n])
3103                 break;
3104
3105             s = PL_bostr + ln;
3106             if (do_utf8 && OP(scan) != REF) {   /* REF can do byte comparison */
3107                 char *l = locinput;
3108                 char *e = PL_bostr + PL_regendp[n];
3109                 /*
3110                  * Note that we can't do the "other character" lookup trick as
3111                  * in the 8-bit case (no pun intended) because in Unicode we
3112                  * have to map both upper and title case to lower case.
3113                  */
3114                 if (OP(scan) == REFF) {
3115                     STRLEN ulen1, ulen2;
3116                     U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3117                     U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
3118                     while (s < e) {
3119                         if (l >= PL_regeol)
3120                             sayNO;
3121                         toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
3122                         toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
3123                         if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
3124                             sayNO;
3125                         s += ulen1;
3126                         l += ulen2;
3127                     }
3128                 }
3129                 locinput = l;
3130                 nextchr = UCHARAT(locinput);
3131                 break;
3132             }
3133
3134             /* Inline the first character, for speed. */
3135             if (UCHARAT(s) != nextchr &&
3136                 (OP(scan) == REF ||
3137                  (UCHARAT(s) != ((OP(scan) == REFF
3138                                   ? PL_fold : PL_fold_locale)[nextchr]))))
3139                 sayNO;
3140             ln = PL_regendp[n] - ln;
3141             if (locinput + ln > PL_regeol)
3142                 sayNO;
3143             if (ln > 1 && (OP(scan) == REF
3144                            ? memNE(s, locinput, ln)
3145                            : (OP(scan) == REFF
3146                               ? ibcmp(s, locinput, ln)
3147                               : ibcmp_locale(s, locinput, ln))))
3148                 sayNO;
3149             locinput += ln;
3150             nextchr = UCHARAT(locinput);
3151             break;
3152
3153         case NOTHING:
3154         case TAIL:
3155             break;
3156         case BACK:
3157             break;
3158         case EVAL:
3159         {
3160             dSP;
3161             OP_4tree *oop = PL_op;
3162             COP *ocurcop = PL_curcop;
3163             PAD *old_comppad;
3164             SV *ret;
3165             struct regexp *oreg = PL_reg_re;
3166         
3167             n = ARG(scan);
3168             PL_op = (OP_4tree*)PL_regdata->data[n];
3169             DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log, "  re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
3170             PAD_SAVE_LOCAL(old_comppad, (PAD*)PL_regdata->data[n + 2]);
3171             PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
3172
3173             {
3174                 SV **before = SP;
3175                 CALLRUNOPS(aTHX);                       /* Scalar context. */
3176                 SPAGAIN;
3177                 if (SP == before)
3178                     ret = &PL_sv_undef;   /* protect against empty (?{}) blocks. */
3179                 else {
3180                     ret = POPs;
3181                     PUTBACK;
3182                 }
3183             }
3184
3185             PL_op = oop;
3186             PAD_RESTORE_LOCAL(old_comppad);
3187             PL_curcop = ocurcop;
3188             if (logical) {
3189                 if (logical == 2) {     /* Postponed subexpression. */
3190                     regexp *re;
3191                     MAGIC *mg = Null(MAGIC*);
3192                     re_cc_state state;
3193                     CHECKPOINT cp, lastcp;
3194                     int toggleutf;
3195                     register SV *sv;
3196
3197                     if(SvROK(ret) && SvSMAGICAL(sv = SvRV(ret)))
3198                         mg = mg_find(sv, PERL_MAGIC_qr);
3199                     else if (SvSMAGICAL(ret)) {
3200                         if (SvGMAGICAL(ret))
3201                             sv_unmagic(ret, PERL_MAGIC_qr);
3202                         else
3203                             mg = mg_find(ret, PERL_MAGIC_qr);
3204                     }
3205
3206                     if (mg) {
3207                         re = (regexp *)mg->mg_obj;
3208                         (void)ReREFCNT_inc(re);
3209                     }
3210                     else {
3211                         STRLEN len;
3212                         char *t = SvPV(ret, len);
3213                         PMOP pm;
3214                         char *oprecomp = PL_regprecomp;
3215                         I32 osize = PL_regsize;
3216                         I32 onpar = PL_regnpar;
3217
3218                         Zero(&pm, 1, PMOP);
3219                         if (DO_UTF8(ret)) pm.op_pmdynflags |= PMdf_DYN_UTF8;
3220                         re = CALLREGCOMP(aTHX_ t, t + len, &pm);
3221                         if (!(SvFLAGS(ret)
3222                               & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
3223                                 | SVs_GMG)))
3224                             sv_magic(ret,(SV*)ReREFCNT_inc(re),
3225                                         PERL_MAGIC_qr,0,0);
3226                         PL_regprecomp = oprecomp;
3227                         PL_regsize = osize;
3228                         PL_regnpar = onpar;
3229                     }
3230                     DEBUG_EXECUTE_r(
3231                         PerlIO_printf(Perl_debug_log,
3232                                       "Entering embedded `%s%.60s%s%s'\n",
3233                                       PL_colors[0],
3234                                       re->precomp,
3235                                       PL_colors[1],
3236                                       (strlen(re->precomp) > 60 ? "..." : ""))
3237                         );
3238                     state.node = next;
3239                     state.prev = PL_reg_call_cc;
3240                     state.cc = PL_regcc;
3241                     state.re = PL_reg_re;
3242
3243                     PL_regcc = 0;
3244                 
3245                     cp = regcppush(0);  /* Save *all* the positions. */
3246                     REGCP_SET(lastcp);
3247                     cache_re(re);
3248                     state.ss = PL_savestack_ix;
3249                     *PL_reglastparen = 0;
3250                     *PL_reglastcloseparen = 0;
3251                     PL_reg_call_cc = &state;
3252                     PL_reginput = locinput;
3253                     toggleutf = ((PL_reg_flags & RF_utf8) != 0) ^
3254                                 ((re->reganch & ROPT_UTF8) != 0);
3255                     if (toggleutf) PL_reg_flags ^= RF_utf8;
3256
3257                     /* XXXX This is too dramatic a measure... */
3258                     PL_reg_maxiter = 0;
3259
3260                     if (regmatch(re->program + 1)) {
3261                         /* Even though we succeeded, we need to restore
3262                            global variables, since we may be wrapped inside
3263                            SUSPEND, thus the match may be not finished yet. */
3264
3265                         /* XXXX Do this only if SUSPENDed? */
3266                         PL_reg_call_cc = state.prev;
3267                         PL_regcc = state.cc;
3268                         PL_reg_re = state.re;
3269                         cache_re(PL_reg_re);
3270                         if (toggleutf) PL_reg_flags ^= RF_utf8;
3271
3272                         /* XXXX This is too dramatic a measure... */
3273                         PL_reg_maxiter = 0;
3274
3275                         /* These are needed even if not SUSPEND. */
3276                         ReREFCNT_dec(re);
3277                         regcpblow(cp);
3278                         sayYES;
3279                     }
3280                     ReREFCNT_dec(re);
3281                     REGCP_UNWIND(lastcp);
3282                     regcppop();
3283                     PL_reg_call_cc = state.prev;
3284                     PL_regcc = state.cc;
3285                     PL_reg_re = state.re;
3286                     cache_re(PL_reg_re);
3287                     if (toggleutf) PL_reg_flags ^= RF_utf8;
3288
3289                     /* XXXX This is too dramatic a measure... */
3290                     PL_reg_maxiter = 0;
3291
3292                     logical = 0;
3293                     sayNO;
3294                 }
3295                 sw = SvTRUE(ret);
3296                 logical = 0;
3297             }
3298             else {
3299                 sv_setsv(save_scalar(PL_replgv), ret);
3300                 cache_re(oreg);
3301             }
3302             break;
3303         }
3304         case OPEN:
3305             n = ARG(scan);  /* which paren pair */
3306             PL_reg_start_tmp[n] = locinput;
3307             if (n > PL_regsize)
3308                 PL_regsize = n;
3309             break;
3310         case CLOSE:
3311             n = ARG(scan);  /* which paren pair */
3312             PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
3313             PL_regendp[n] = locinput - PL_bostr;
3314             if (n > (I32)*PL_reglastparen)
3315                 *PL_reglastparen = n;
3316             *PL_reglastcloseparen = n;
3317             break;
3318         case GROUPP:
3319             n = ARG(scan);  /* which paren pair */
3320             sw = ((I32)*PL_reglastparen >= n && PL_regendp[n] != -1);
3321             break;
3322         case IFTHEN:
3323             PL_reg_leftiter = PL_reg_maxiter;           /* Void cache */
3324             if (sw)
3325                 next = NEXTOPER(NEXTOPER(scan));
3326             else {
3327                 next = scan + ARG(scan);
3328                 if (OP(next) == IFTHEN) /* Fake one. */
3329                     next = NEXTOPER(NEXTOPER(next));
3330             }
3331             break;
3332         case LOGICAL:
3333             logical = scan->flags;
3334             break;
3335 /*******************************************************************
3336  PL_regcc contains infoblock about the innermost (...)* loop, and
3337  a pointer to the next outer infoblock.
3338
3339  Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
3340
3341    1) After matching X, regnode for CURLYX is processed;
3342
3343    2) This regnode creates infoblock on the stack, and calls
3344       regmatch() recursively with the starting point at WHILEM node;
3345
3346    3) Each hit of WHILEM node tries to match A and Z (in the order
3347       depending on the current iteration, min/max of {min,max} and
3348       greediness).  The information about where are nodes for "A"
3349       and "Z" is read from the infoblock, as is info on how many times "A"
3350       was already matched, and greediness.
3351
3352    4) After A matches, the same WHILEM node is hit again.
3353
3354    5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
3355       of the same pair.  Thus when WHILEM tries to match Z, it temporarily
3356       resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
3357       as in (Y(A)*Z)*.  If Z matches, the automaton will hit the WHILEM node
3358       of the external loop.
3359
3360  Currently present infoblocks form a tree with a stem formed by PL_curcc
3361  and whatever it mentions via ->next, and additional attached trees
3362  corresponding to temporarily unset infoblocks as in "5" above.
3363
3364  In the following picture infoblocks for outer loop of
3365  (Y(A)*?Z)*?T are denoted O, for inner I.  NULL starting block
3366  is denoted by x.  The matched string is YAAZYAZT.  Temporarily postponed
3367  infoblocks are drawn below the "reset" infoblock.
3368
3369  In fact in the picture below we do not show failed matches for Z and T
3370  by WHILEM blocks.  [We illustrate minimal matches, since for them it is
3371  more obvious *why* one needs to *temporary* unset infoblocks.]
3372
3373   Matched       REx position    InfoBlocks      Comment
3374                 (Y(A)*?Z)*?T    x
3375                 Y(A)*?Z)*?T     x <- O
3376   Y             (A)*?Z)*?T      x <- O
3377   Y             A)*?Z)*?T       x <- O <- I
3378   YA            )*?Z)*?T        x <- O <- I
3379   YA            A)*?Z)*?T       x <- O <- I
3380   YAA           )*?Z)*?T        x <- O <- I
3381   YAA           Z)*?T           x <- O          # Temporary unset I
3382                                      I
3383
3384   YAAZ          Y(A)*?Z)*?T     x <- O
3385                                      I
3386
3387   YAAZY         (A)*?Z)*?T      x <- O
3388                                      I
3389
3390   YAAZY         A)*?Z)*?T       x <- O <- I
3391                                      I
3392
3393   YAAZYA        )*?Z)*?T        x <- O <- I     
3394                                      I
3395
3396   YAAZYA        Z)*?T           x <- O          # Temporary unset I
3397                                      I,I
3398
3399   YAAZYAZ       )*?T            x <- O
3400                                      I,I
3401
3402   YAAZYAZ       T               x               # Temporary unset O
3403                                 O
3404                                 I,I
3405
3406   YAAZYAZT                      x
3407                                 O
3408                                 I,I
3409  *******************************************************************/
3410         case CURLYX: {
3411                 CURCUR cc;
3412                 CHECKPOINT cp = PL_savestack_ix;
3413                 /* No need to save/restore up to this paren */
3414                 I32 parenfloor = scan->flags;
3415
3416                 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
3417                     next += ARG(next);
3418                 cc.oldcc = PL_regcc;
3419                 PL_regcc = &cc;
3420                 /* XXXX Probably it is better to teach regpush to support
3421                    parenfloor > PL_regsize... */
3422                 if (parenfloor > (I32)*PL_reglastparen)
3423                     parenfloor = *PL_reglastparen; /* Pessimization... */
3424                 cc.parenfloor = parenfloor;
3425                 cc.cur = -1;
3426                 cc.min = ARG1(scan);
3427                 cc.max  = ARG2(scan);
3428                 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
3429                 cc.next = next;
3430                 cc.minmod = minmod;
3431                 cc.lastloc = 0;
3432                 PL_reginput = locinput;
3433                 n = regmatch(PREVOPER(next));   /* start on the WHILEM */
3434                 regcpblow(cp);
3435                 PL_regcc = cc.oldcc;
3436                 saySAME(n);
3437             }
3438             /* NOT REACHED */
3439         case WHILEM: {
3440                 /*
3441                  * This is really hard to understand, because after we match
3442                  * what we're trying to match, we must make sure the rest of
3443                  * the REx is going to match for sure, and to do that we have
3444                  * to go back UP the parse tree by recursing ever deeper.  And
3445                  * if it fails, we have to reset our parent's current state
3446                  * that we can try again after backing off.
3447                  */
3448
3449                 CHECKPOINT cp, lastcp;
3450                 CURCUR* cc = PL_regcc;
3451                 char *lastloc = cc->lastloc; /* Detection of 0-len. */
3452                 
3453                 n = cc->cur + 1;        /* how many we know we matched */
3454                 PL_reginput = locinput;
3455
3456                 DEBUG_EXECUTE_r(
3457                     PerlIO_printf(Perl_debug_log,
3458                                   "%*s  %ld out of %ld..%ld  cc=%"UVxf"\n",
3459                                   REPORT_CODE_OFF+PL_regindent*2, "",
3460                                   (long)n, (long)cc->min,
3461                                   (long)cc->max, PTR2UV(cc))
3462                     );
3463
3464                 /* If degenerate scan matches "", assume scan done. */
3465
3466                 if (locinput == cc->lastloc && n >= cc->min) {
3467                     PL_regcc = cc->oldcc;
3468                     if (PL_regcc)
3469                         ln = PL_regcc->cur;
3470                     DEBUG_EXECUTE_r(
3471                         PerlIO_printf(Perl_debug_log,
3472                            "%*s  empty match detected, try continuation...\n",
3473                            REPORT_CODE_OFF+PL_regindent*2, "")
3474                         );
3475                     if (regmatch(cc->next))
3476                         sayYES;
3477                     if (PL_regcc)
3478                         PL_regcc->cur = ln;
3479                     PL_regcc = cc;
3480                     sayNO;
3481                 }
3482
3483                 /* First just match a string of min scans. */
3484
3485                 if (n < cc->min) {
3486                     cc->cur = n;
3487                     cc->lastloc = locinput;
3488                     if (regmatch(cc->scan))
3489                         sayYES;
3490                     cc->cur = n - 1;
3491                     cc->lastloc = lastloc;
3492                     sayNO;
3493                 }
3494
3495                 if (scan->flags) {
3496                     /* Check whether we already were at this position.
3497                         Postpone detection until we know the match is not
3498                         *that* much linear. */
3499                 if (!PL_reg_maxiter) {
3500                     PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
3501                     PL_reg_leftiter = PL_reg_maxiter;
3502                 }
3503                 if (PL_reg_leftiter-- == 0) {
3504                     I32 size = (PL_reg_maxiter + 7)/8;
3505                     if (PL_reg_poscache) {
3506                         if ((I32)PL_reg_poscache_size < size) {
3507                             Renew(PL_reg_poscache, size, char);
3508                             PL_reg_poscache_size = size;
3509                         }
3510                         Zero(PL_reg_poscache, size, char);
3511                     }
3512                     else {
3513                         PL_reg_poscache_size = size;
3514                         Newz(29, PL_reg_poscache, size, char);
3515                     }
3516                     DEBUG_EXECUTE_r(
3517                         PerlIO_printf(Perl_debug_log,
3518               "%sDetected a super-linear match, switching on caching%s...\n",
3519                                       PL_colors[4], PL_colors[5])
3520                         );
3521                 }
3522                 if (PL_reg_leftiter < 0) {
3523                     I32 o = locinput - PL_bostr, b;
3524
3525                     o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
3526                     b = o % 8;
3527                     o /= 8;
3528                     if (PL_reg_poscache[o] & (1<<b)) {
3529                     DEBUG_EXECUTE_r(
3530                         PerlIO_printf(Perl_debug_log,
3531                                       "%*s  already tried at this position...\n",
3532                                       REPORT_CODE_OFF+PL_regindent*2, "")
3533                         );
3534                         if (PL_reg_flags & RF_false)
3535                             sayYES;
3536                         else
3537                             sayNO_SILENT;
3538                     }
3539                     PL_reg_poscache[o] |= (1<<b);
3540                 }
3541                 }
3542
3543                 /* Prefer next over scan for minimal matching. */
3544
3545                 if (cc->minmod) {
3546                     PL_regcc = cc->oldcc;
3547                     if (PL_regcc)
3548                         ln = PL_regcc->cur;
3549                     cp = regcppush(cc->parenfloor);
3550                     REGCP_SET(lastcp);
3551                     if (regmatch(cc->next)) {
3552                         regcpblow(cp);
3553                         sayYES; /* All done. */
3554                     }
3555                     REGCP_UNWIND(lastcp);
3556                     regcppop();
3557                     if (PL_regcc)
3558                         PL_regcc->cur = ln;
3559                     PL_regcc = cc;
3560
3561                     if (n >= cc->max) { /* Maximum greed exceeded? */
3562                         if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3563                             && !(PL_reg_flags & RF_warned)) {
3564                             PL_reg_flags |= RF_warned;
3565                             Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
3566                                  "Complex regular subexpression recursion",
3567                                  REG_INFTY - 1);
3568                         }
3569                         sayNO;
3570                     }
3571
3572                     DEBUG_EXECUTE_r(
3573                         PerlIO_printf(Perl_debug_log,
3574                                       "%*s  trying longer...\n",
3575                                       REPORT_CODE_OFF+PL_regindent*2, "")
3576                         );
3577                     /* Try scanning more and see if it helps. */
3578                     PL_reginput = locinput;
3579                     cc->cur = n;
3580                     cc->lastloc = locinput;
3581                     cp = regcppush(cc->parenfloor);
3582                     REGCP_SET(lastcp);
3583                     if (regmatch(cc->scan)) {
3584                         regcpblow(cp);
3585                         sayYES;
3586                     }
3587                     REGCP_UNWIND(lastcp);
3588                     regcppop();
3589                     cc->cur = n - 1;
3590                     cc->lastloc = lastloc;
3591                     sayNO;
3592                 }
3593
3594                 /* Prefer scan over next for maximal matching. */
3595
3596                 if (n < cc->max) {      /* More greed allowed? */
3597                     cp = regcppush(cc->parenfloor);
3598                     cc->cur = n;
3599                     cc->lastloc = locinput;
3600                     REGCP_SET(lastcp);
3601                     if (regmatch(cc->scan)) {
3602                         regcpblow(cp);
3603                         sayYES;
3604                     }
3605                     REGCP_UNWIND(lastcp);
3606                     regcppop();         /* Restore some previous $<digit>s? */
3607                     PL_reginput = locinput;
3608                     DEBUG_EXECUTE_r(
3609                         PerlIO_printf(Perl_debug_log,
3610                                       "%*s  failed, try continuation...\n",
3611                                       REPORT_CODE_OFF+PL_regindent*2, "")
3612                         );
3613                 }
3614                 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3615                         && !(PL_reg_flags & RF_warned)) {
3616                     PL_reg_flags |= RF_warned;
3617                     Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
3618                          "Complex regular subexpression recursion",
3619                          REG_INFTY - 1);
3620                 }
3621
3622                 /* Failed deeper matches of scan, so see if this one works. */
3623                 PL_regcc = cc->oldcc;
3624                 if (PL_regcc)
3625                     ln = PL_regcc->cur;
3626                 if (regmatch(cc->next))
3627                     sayYES;
3628                 if (PL_regcc)
3629                     PL_regcc->cur = ln;
3630                 PL_regcc = cc;
3631                 cc->cur = n - 1;
3632                 cc->lastloc = lastloc;
3633                 sayNO;
3634             }
3635             /* NOT REACHED */
3636         case BRANCHJ:
3637             next = scan + ARG(scan);
3638             if (next == scan)
3639                 next = NULL;
3640             inner = NEXTOPER(NEXTOPER(scan));
3641             goto do_branch;
3642         case BRANCH:
3643             inner = NEXTOPER(scan);
3644           do_branch:
3645             {
3646                 c1 = OP(scan);
3647                 if (OP(next) != c1)     /* No choice. */
3648                     next = inner;       /* Avoid recursion. */
3649                 else {
3650                     I32 lastparen = *PL_reglastparen;
3651                     I32 unwind1;
3652                     re_unwind_branch_t *uw;
3653
3654                     /* Put unwinding data on stack */
3655                     unwind1 = SSNEWt(1,re_unwind_branch_t);
3656                     uw = SSPTRt(unwind1,re_unwind_branch_t);
3657                     uw->prev = unwind;
3658                     unwind = unwind1;
3659                     uw->type = ((c1 == BRANCH)
3660                                 ? RE_UNWIND_BRANCH
3661                                 : RE_UNWIND_BRANCHJ);
3662                     uw->lastparen = lastparen;
3663                     uw->next = next;
3664                     uw->locinput = locinput;
3665                     uw->nextchr = nextchr;
3666 #ifdef DEBUGGING
3667                     uw->regindent = ++PL_regindent;
3668 #endif
3669
3670                     REGCP_SET(uw->lastcp);
3671
3672                     /* Now go into the first branch */
3673                     next = inner;
3674                 }
3675             }
3676             break;
3677         case MINMOD:
3678             minmod = 1;
3679             break;
3680         case CURLYM:
3681         {
3682             I32 l = 0;
3683             CHECKPOINT lastcp;
3684         
3685             /* We suppose that the next guy does not need
3686                backtracking: in particular, it is of constant non-zero length,
3687                and has no parenths to influence future backrefs. */
3688             ln = ARG1(scan);  /* min to match */
3689             n  = ARG2(scan);  /* max to match */
3690             paren = scan->flags;
3691             if (paren) {
3692                 if (paren > PL_regsize)
3693                     PL_regsize = paren;
3694                 if (paren > (I32)*PL_reglastparen)
3695                     *PL_reglastparen = paren;
3696             }
3697             scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
3698             if (paren)
3699                 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3700             PL_reginput = locinput;
3701             if (minmod) {
3702                 minmod = 0;
3703                 if (ln && regrepeat_hard(scan, ln, &l) < ln)
3704                     sayNO;
3705                 locinput = PL_reginput;
3706                 if (HAS_TEXT(next) || JUMPABLE(next)) {
3707                     regnode *text_node = next;
3708
3709                     if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
3710
3711                     if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
3712                     else {
3713                         if (PL_regkind[(U8)OP(text_node)] == REF) {
3714                             c1 = c2 = -1000;
3715                             goto assume_ok_MM;
3716                         }
3717                         else { c1 = (U8)*STRING(text_node); }
3718                         if (OP(text_node) == EXACTF || OP(text_node) == REFF)
3719                             c2 = PL_fold[c1];
3720                         else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
3721                             c2 = PL_fold_locale[c1];
3722                         else
3723                             c2 = c1;
3724                     }
3725                 }
3726                 else
3727                     c1 = c2 = -1000;
3728             assume_ok_MM:
3729                 REGCP_SET(lastcp);
3730                 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
3731                     /* If it could work, try it. */
3732                     if (c1 == -1000 ||
3733                         UCHARAT(PL_reginput) == c1 ||
3734                         UCHARAT(PL_reginput) == c2)
3735                     {
3736                         if (paren) {
3737                             if (ln) {
3738                                 PL_regstartp[paren] =
3739                                     HOPc(PL_reginput, -l) - PL_bostr;
3740                                 PL_regendp[paren] = PL_reginput - PL_bostr;
3741                             }
3742                             else
3743                                 PL_regendp[paren] = -1;
3744                         }
3745                         if (regmatch(next))
3746                             sayYES;
3747                         REGCP_UNWIND(lastcp);
3748                     }
3749                     /* Couldn't or didn't -- move forward. */
3750                     PL_reginput = locinput;
3751                     if (regrepeat_hard(scan, 1, &l)) {
3752                         ln++;
3753                         locinput = PL_reginput;
3754                     }
3755                     else
3756                         sayNO;
3757                 }
3758             }
3759             else {
3760                 n = regrepeat_hard(scan, n, &l);
3761                 locinput = PL_reginput;
3762                 DEBUG_EXECUTE_r(
3763                     PerlIO_printf(Perl_debug_log,
3764                                   "%*s  matched %"IVdf" times, len=%"IVdf"...\n",
3765                                   (int)(REPORT_CODE_OFF+PL_regindent*2), "",
3766                                   (IV) n, (IV)l)
3767                     );
3768                 if (n >= ln) {
3769                     if (HAS_TEXT(next) || JUMPABLE(next)) {
3770                         regnode *text_node = next;
3771
3772                         if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
3773
3774                         if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
3775                         else {
3776                             if (PL_regkind[(U8)OP(text_node)] == REF) {
3777                                 c1 = c2 = -1000;
3778                                 goto assume_ok_REG;
3779                             }
3780                             else { c1 = (U8)*STRING(text_node); }
3781
3782                             if (OP(text_node) == EXACTF || OP(text_node) == REFF)
3783                                 c2 = PL_fold[c1];
3784                             else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
3785                                 c2 = PL_fold_locale[c1];
3786                             else
3787                                 c2 = c1;
3788                         }
3789                     }
3790                     else
3791                         c1 = c2 = -1000;
3792                 }
3793             assume_ok_REG:
3794                 REGCP_SET(lastcp);
3795                 while (n >= ln) {
3796                     /* If it could work, try it. */
3797                     if (c1 == -1000 ||
3798                         UCHARAT(PL_reginput) == c1 ||
3799                         UCHARAT(PL_reginput) == c2)
3800                     {
3801                         DEBUG_EXECUTE_r(
3802                                 PerlIO_printf(Perl_debug_log,
3803                                               "%*s  trying tail with n=%"IVdf"...\n",
3804                                               (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
3805                             );
3806                         if (paren) {
3807                             if (n) {
3808                                 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3809                                 PL_regendp[paren] = PL_reginput - PL_bostr;
3810                             }
3811                             else
3812                                 PL_regendp[paren] = -1;
3813                         }
3814                         if (regmatch(next))
3815                             sayYES;
3816                         REGCP_UNWIND(lastcp);
3817                     }
3818                     /* Couldn't or didn't -- back up. */
3819                     n--;
3820                     locinput = HOPc(locinput, -l);
3821                     PL_reginput = locinput;
3822                 }
3823             }
3824             sayNO;
3825             break;
3826         }
3827         case CURLYN:
3828             paren = scan->flags;        /* Which paren to set */
3829             if (paren > PL_regsize)
3830                 PL_regsize = paren;
3831             if (paren > (I32)*PL_reglastparen)
3832                 *PL_reglastparen = paren;
3833             ln = ARG1(scan);  /* min to match */
3834             n  = ARG2(scan);  /* max to match */
3835             scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
3836             goto repeat;
3837         case CURLY:
3838             paren = 0;
3839             ln = ARG1(scan);  /* min to match */
3840             n  = ARG2(scan);  /* max to match */
3841             scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
3842             goto repeat;
3843         case STAR:
3844             ln = 0;
3845             n = REG_INFTY;
3846             scan = NEXTOPER(scan);
3847             paren = 0;
3848             goto repeat;
3849         case PLUS:
3850             ln = 1;
3851             n = REG_INFTY;
3852             scan = NEXTOPER(scan);
3853             paren = 0;
3854           repeat:
3855             /*
3856             * Lookahead to avoid useless match attempts
3857             * when we know what character comes next.
3858             */
3859
3860             /*
3861             * Used to only do .*x and .*?x, but now it allows
3862             * for )'s, ('s and (?{ ... })'s to be in the way
3863             * of the quantifier and the EXACT-like node.  -- japhy
3864             */
3865
3866             if (HAS_TEXT(next) || JUMPABLE(next)) {
3867                 U8 *s;
3868                 regnode *text_node = next;
3869
3870                 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
3871
3872                 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
3873                 else {
3874                     if (PL_regkind[(U8)OP(text_node)] == REF) {
3875                         c1 = c2 = -1000;
3876                         goto assume_ok_easy;
3877                     }
3878                     else { s = (U8*)STRING(text_node); }
3879
3880                     if (!UTF) {
3881                         c2 = c1 = *s;
3882                         if (OP(text_node) == EXACTF || OP(text_node) == REFF)
3883                             c2 = PL_fold[c1];
3884                         else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
3885                             c2 = PL_fold_locale[c1];
3886                     }
3887                     else { /* UTF */
3888                         if (OP(text_node) == EXACTF || OP(text_node) == REFF) {
3889                              STRLEN ulen1, ulen2;
3890                              U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3891                              U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
3892
3893                              to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
3894                              to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
3895
3896                              c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
3897                                                  ckWARN(WARN_UTF8) ?
3898                                                  0 : UTF8_ALLOW_ANY);
3899                              c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
3900                                                  ckWARN(WARN_UTF8) ?
3901                                                  0 : UTF8_ALLOW_ANY);
3902                         }
3903                         else {
3904                             c2 = c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
3905                                                      ckWARN(WARN_UTF8) ?
3906                                                      0 : UTF8_ALLOW_ANY);
3907                         }
3908                     }
3909                 }
3910             }
3911             else
3912                 c1 = c2 = -1000;
3913         assume_ok_easy:
3914             PL_reginput = locinput;
3915             if (minmod) {
3916                 CHECKPOINT lastcp;
3917                 minmod = 0;
3918                 if (ln && regrepeat(scan, ln) < ln)
3919                     sayNO;
3920                 locinput = PL_reginput;
3921                 REGCP_SET(lastcp);
3922                 if (c1 != -1000) {
3923                     char *e; /* Should not check after this */
3924                     char *old = locinput;
3925                     int count = 0;
3926
3927                     if  (n == REG_INFTY) {
3928                         e = PL_regeol - 1;
3929                         if (do_utf8)
3930                             while (UTF8_IS_CONTINUATION(*(U8*)e))
3931                                 e--;
3932                     }
3933                     else if (do_utf8) {
3934                         int m = n - ln;
3935                         for (e = locinput;
3936                              m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3937                             e += UTF8SKIP(e);
3938                     }
3939                     else {
3940                         e = locinput + n - ln;
3941                         if (e >= PL_regeol)
3942                             e = PL_regeol - 1;
3943                     }
3944                     while (1) {
3945                         /* Find place 'next' could work */
3946                         if (!do_utf8) {
3947                             if (c1 == c2) {
3948                                 while (locinput <= e &&
3949                                        UCHARAT(locinput) != c1)
3950                                     locinput++;
3951                             } else {
3952                                 while (locinput <= e
3953                                        && UCHARAT(locinput) != c1
3954                                        && UCHARAT(locinput) != c2)
3955                                     locinput++;
3956                             }
3957                             count = locinput - old;
3958                         }
3959                         else {
3960                             STRLEN len;
3961                             if (c1 == c2) {
3962                                 /* count initialised to
3963                                  * utf8_distance(old, locinput) */
3964                                 while (locinput <= e &&
3965                                        utf8n_to_uvchr((U8*)locinput,
3966                                                       UTF8_MAXBYTES, &len,
3967                                                       ckWARN(WARN_UTF8) ?
3968                                                       0 : UTF8_ALLOW_ANY) != (UV)c1) {
3969                                     locinput += len;
3970                                     count++;
3971                                 }
3972                             } else {
3973                                 /* count initialised to
3974                                  * utf8_distance(old, locinput) */
3975                                 while (locinput <= e) {
3976                                     UV c = utf8n_to_uvchr((U8*)locinput,
3977                                                           UTF8_MAXBYTES, &len,
3978                                                           ckWARN(WARN_UTF8) ?
3979                                                           0 : UTF8_ALLOW_ANY);
3980                                     if (c == (UV)c1 || c == (UV)c2)
3981                                         break;
3982                                     locinput += len;
3983                                     count++;
3984                                 }
3985                             }
3986                         }
3987                         if (locinput > e)
3988                             sayNO;
3989                         /* PL_reginput == old now */
3990                         if (locinput != old) {
3991                             ln = 1;     /* Did some */
3992                             if (regrepeat(scan, count) < count)
3993                                 sayNO;
3994                         }
3995                         /* PL_reginput == locinput now */
3996                         TRYPAREN(paren, ln, locinput);
3997                         PL_reginput = locinput; /* Could be reset... */
3998                         REGCP_UNWIND(lastcp);
3999                         /* Couldn't or didn't -- move forward. */
4000                         old = locinput;
4001                         if (do_utf8)
4002                             locinput += UTF8SKIP(locinput);
4003                         else
4004                             locinput++;
4005                         count = 1;
4006                     }
4007                 }
4008                 else
4009                 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
4010                     UV c;
4011                     if (c1 != -1000) {
4012                         if (do_utf8)
4013                             c = utf8n_to_uvchr((U8*)PL_reginput,
4014                                                UTF8_MAXBYTES, 0,
4015                                                ckWARN(WARN_UTF8) ?
4016                                                0 : UTF8_ALLOW_ANY);
4017                         else
4018                             c = UCHARAT(PL_reginput);
4019                         /* If it could work, try it. */
4020                         if (c == (UV)c1 || c == (UV)c2)
4021                         {
4022                             TRYPAREN(paren, ln, PL_reginput);
4023                             REGCP_UNWIND(lastcp);
4024                         }
4025                     }
4026                     /* If it could work, try it. */
4027                     else if (c1 == -1000)
4028                     {
4029                         TRYPAREN(paren, ln, PL_reginput);
4030                         REGCP_UNWIND(lastcp);
4031                     }
4032                     /* Couldn't or didn't -- move forward. */
4033                     PL_reginput = locinput;
4034                     if (regrepeat(scan, 1)) {
4035                         ln++;
4036                         locinput = PL_reginput;
4037                     }
4038                     else
4039                         sayNO;
4040                 }
4041             }
4042             else {
4043                 CHECKPOINT lastcp;
4044                 n = regrepeat(scan, n);
4045                 locinput = PL_reginput;
4046                 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
4047                     (OP(next) != MEOL ||
4048                         OP(next) == SEOL || OP(next) == EOS))
4049                 {
4050                     ln = n;                     /* why back off? */
4051                     /* ...because $ and \Z can match before *and* after
4052                        newline at the end.  Consider "\n\n" =~ /\n+\Z\n/.
4053                        We should back off by one in this case. */
4054                     if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
4055                         ln--;
4056                 }
4057                 REGCP_SET(lastcp);
4058                 if (paren) {
4059                     UV c = 0;
4060                     while (n >= ln) {
4061                         if (c1 != -1000) {
4062                             if (do_utf8)
4063                                 c = utf8n_to_uvchr((U8*)PL_reginput,
4064                                                    UTF8_MAXBYTES, 0,
4065                                                    ckWARN(WARN_UTF8) ?
4066                                                    0 : UTF8_ALLOW_ANY);
4067                             else
4068                                 c = UCHARAT(PL_reginput);
4069                         }
4070                         /* If it could work, try it. */
4071                         if (c1 == -1000 || c == (UV)c1 || c == (UV)c2)
4072                             {
4073                                 TRYPAREN(paren, n, PL_reginput);
4074                                 REGCP_UNWIND(lastcp);
4075                             }
4076                         /* Couldn't or didn't -- back up. */
4077                         n--;
4078                         PL_reginput = locinput = HOPc(locinput, -1);
4079                     }
4080                 }
4081                 else {
4082                     UV c = 0;
4083                     while (n >= ln) {
4084                         if (c1 != -1000) {
4085                             if (do_utf8)
4086                                 c = utf8n_to_uvchr((U8*)PL_reginput,
4087                                                    UTF8_MAXBYTES, 0,
4088                                                    ckWARN(WARN_UTF8) ?
4089                                                    0 : UTF8_ALLOW_ANY);
4090                             else
4091                                 c = UCHARAT(PL_reginput);
4092                         }
4093                         /* If it could work, try it. */
4094                         if (c1 == -1000 || c == (UV)c1 || c == (UV)c2)
4095                             {
4096                                 TRYPAREN(paren, n, PL_reginput);
4097                                 REGCP_UNWIND(lastcp);
4098                             }
4099                         /* Couldn't or didn't -- back up. */
4100                         n--;
4101                         PL_reginput = locinput = HOPc(locinput, -1);
4102                     }
4103                 }
4104             }
4105             sayNO;
4106             break;
4107         case END:
4108             if (PL_reg_call_cc) {
4109                 re_cc_state *cur_call_cc = PL_reg_call_cc;
4110                 CURCUR *cctmp = PL_regcc;
4111                 regexp *re = PL_reg_re;
4112                 CHECKPOINT cp, lastcp;
4113                 
4114                 cp = regcppush(0);      /* Save *all* the positions. */
4115                 REGCP_SET(lastcp);
4116                 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
4117                                                     the caller. */
4118                 PL_reginput = locinput; /* Make position available to
4119                                            the callcc. */
4120                 cache_re(PL_reg_call_cc->re);
4121                 PL_regcc = PL_reg_call_cc->cc;
4122                 PL_reg_call_cc = PL_reg_call_cc->prev;
4123                 if (regmatch(cur_call_cc->node)) {
4124                     PL_reg_call_cc = cur_call_cc;
4125                     regcpblow(cp);
4126                     sayYES;
4127                 }
4128                 REGCP_UNWIND(lastcp);
4129                 regcppop();
4130                 PL_reg_call_cc = cur_call_cc;
4131                 PL_regcc = cctmp;
4132                 PL_reg_re = re;
4133                 cache_re(re);
4134
4135                 DEBUG_EXECUTE_r(
4136                     PerlIO_printf(Perl_debug_log,
4137                                   "%*s  continuation failed...\n",
4138                                   REPORT_CODE_OFF+PL_regindent*2, "")
4139                     );
4140                 sayNO_SILENT;
4141             }
4142             if (locinput < PL_regtill) {
4143                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4144                                       "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
4145                                       PL_colors[4],
4146                                       (long)(locinput - PL_reg_starttry),
4147                                       (long)(PL_regtill - PL_reg_starttry),
4148                                       PL_colors[5]));
4149                 sayNO_FINAL;            /* Cannot match: too short. */
4150             }
4151             PL_reginput = locinput;     /* put where regtry can find it */
4152             sayYES_FINAL;               /* Success! */
4153         case SUCCEED:
4154             PL_reginput = locinput;     /* put where regtry can find it */
4155             sayYES_LOUD;                /* Success! */
4156         case SUSPEND:
4157             n = 1;
4158             PL_reginput = locinput;
4159             goto do_ifmatch;    
4160         case UNLESSM:
4161             n = 0;
4162             if (scan->flags) {
4163                 s = HOPBACKc(locinput, scan->flags);
4164                 if (!s)
4165                     goto say_yes;
4166                 PL_reginput = s;
4167             }
4168             else
4169                 PL_reginput = locinput;
4170             PL_reg_flags ^= RF_false;
4171             goto do_ifmatch;
4172         case IFMATCH:
4173             n = 1;
4174             if (scan->flags) {
4175                 s = HOPBACKc(locinput, scan->flags);
4176                 if (!s)
4177                     goto say_no;
4178                 PL_reginput = s;
4179             }
4180             else
4181                 PL_reginput = locinput;
4182
4183           do_ifmatch:
4184             inner = NEXTOPER(NEXTOPER(scan));
4185             if (regmatch(inner) != n) {
4186                 if (n == 0)
4187                     PL_reg_flags ^= RF_false;
4188               say_no:
4189                 if (logical) {
4190                     logical = 0;
4191                     sw = 0;
4192                     goto do_longjump;
4193                 }
4194                 else
4195                     sayNO;
4196             }
4197             if (n == 0)
4198                 PL_reg_flags ^= RF_false;
4199           say_yes:
4200             if (logical) {
4201                 logical = 0;
4202                 sw = 1;
4203             }
4204             if (OP(scan) == SUSPEND) {
4205                 locinput = PL_reginput;
4206                 nextchr = UCHARAT(locinput);
4207             }
4208             /* FALL THROUGH. */
4209         case LONGJMP:
4210           do_longjump:
4211             next = scan + ARG(scan);
4212             if (next == scan)
4213                 next = NULL;
4214             break;
4215         default:
4216             PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
4217                           PTR2UV(scan), OP(scan));
4218             Perl_croak(aTHX_ "regexp memory corruption");
4219         }
4220       reenter:
4221         scan = next;
4222     }
4223
4224     /*
4225     * We get here only if there's trouble -- normally "case END" is
4226     * the terminating point.
4227     */
4228     Perl_croak(aTHX_ "corrupted regexp pointers");
4229     /*NOTREACHED*/
4230     sayNO;
4231
4232 yes_loud:
4233     DEBUG_EXECUTE_r(
4234         PerlIO_printf(Perl_debug_log,
4235                       "%*s  %scould match...%s\n",
4236                       REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
4237         );
4238     goto yes;
4239 yes_final:
4240     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
4241                           PL_colors[4],PL_colors[5]));
4242 yes:
4243 #ifdef DEBUGGING
4244     PL_regindent--;
4245 #endif
4246
4247 #if 0                                   /* Breaks $^R */
4248     if (unwind)
4249         regcpblow(firstcp);
4250 #endif
4251     return 1;
4252
4253 no:
4254     DEBUG_EXECUTE_r(
4255         PerlIO_printf(Perl_debug_log,
4256                       "%*s  %sfailed...%s\n",
4257                       REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
4258         );
4259     goto do_no;
4260 no_final:
4261 do_no:
4262     if (unwind) {
4263         re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
4264
4265         switch (uw->type) {
4266         case RE_UNWIND_BRANCH:
4267         case RE_UNWIND_BRANCHJ:
4268         {
4269             re_unwind_branch_t *uwb = &(uw->branch);
4270             I32 lastparen = uwb->lastparen;
4271         
4272             REGCP_UNWIND(uwb->lastcp);
4273             for (n = *PL_reglastparen; n > lastparen; n--)
4274                 PL_regendp[n] = -1;
4275             *PL_reglastparen = n;
4276             scan = next = uwb->next;
4277             if ( !scan ||
4278                  OP(scan) != (uwb->type == RE_UNWIND_BRANCH
4279                               ? BRANCH : BRANCHJ) ) {           /* Failure */
4280                 unwind = uwb->prev;
4281 #ifdef DEBUGGING
4282                 PL_regindent--;
4283 #endif
4284                 goto do_no;
4285             }
4286             /* Have more choice yet.  Reuse the same uwb.  */
4287             /*SUPPRESS 560*/
4288             if ((n = (uwb->type == RE_UNWIND_BRANCH
4289                       ? NEXT_OFF(next) : ARG(next))))
4290                 next += n;
4291             else
4292                 next = NULL;    /* XXXX Needn't unwinding in this case... */
4293             uwb->next = next;
4294             next = NEXTOPER(scan);
4295             if (uwb->type == RE_UNWIND_BRANCHJ)
4296                 next = NEXTOPER(next);
4297             locinput = uwb->locinput;
4298             nextchr = uwb->nextchr;
4299 #ifdef DEBUGGING
4300             PL_regindent = uwb->regindent;
4301 #endif
4302
4303             goto reenter;
4304         }
4305         /* NOT REACHED */
4306         default:
4307             Perl_croak(aTHX_ "regexp unwind memory corruption");
4308         }
4309         /* NOT REACHED */
4310     }
4311 #ifdef DEBUGGING
4312     PL_regindent--;
4313 #endif
4314     return 0;
4315 }
4316
4317 /*
4318  - regrepeat - repeatedly match something simple, report how many
4319  */
4320 /*
4321  * [This routine now assumes that it will only match on things of length 1.
4322  * That was true before, but now we assume scan - reginput is the count,
4323  * rather than incrementing count on every character.  [Er, except utf8.]]
4324  */
4325 STATIC I32
4326 S_regrepeat(pTHX_ regnode *p, I32 max)
4327 {
4328     register char *scan;
4329     register I32 c;
4330     register char *loceol = PL_regeol;
4331     register I32 hardcount = 0;
4332     register bool do_utf8 = PL_reg_match_utf8;
4333
4334     scan = PL_reginput;
4335     if (max == REG_INFTY)
4336         max = I32_MAX;
4337     else if (max < loceol - scan)
4338       loceol = scan + max;
4339     switch (OP(p)) {
4340     case REG_ANY:
4341         if (do_utf8) {
4342             loceol = PL_regeol;
4343             while (scan < loceol && hardcount < max && *scan != '\n') {
4344                 scan += UTF8SKIP(scan);
4345                 hardcount++;
4346             }
4347         } else {
4348             while (scan < loceol && *scan != '\n')
4349                 scan++;
4350         }
4351         break;
4352     case SANY:
4353         if (do_utf8) {
4354             loceol = PL_regeol;
4355             while (scan < loceol && hardcount < max) {
4356                 scan += UTF8SKIP(scan);
4357                 hardcount++;
4358             }
4359         }
4360         else
4361             scan = loceol;
4362         break;
4363     case CANY:
4364         scan = loceol;
4365         break;
4366     case EXACT:         /* length of string is 1 */
4367         c = (U8)*STRING(p);
4368         while (scan < loceol && UCHARAT(scan) == c)
4369             scan++;
4370         break;
4371     case EXACTF:        /* length of string is 1 */
4372         c = (U8)*STRING(p);
4373         while (scan < loceol &&
4374                (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
4375             scan++;
4376         break;
4377     case EXACTFL:       /* length of string is 1 */
4378         PL_reg_flags |= RF_tainted;
4379         c = (U8)*STRING(p);
4380         while (scan < loceol &&
4381                (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
4382             scan++;
4383         break;
4384     case ANYOF:
4385         if (do_utf8) {
4386             loceol = PL_regeol;
4387             while (hardcount < max && scan < loceol &&
4388                    reginclass(p, (U8*)scan, 0, do_utf8)) {
4389                 scan += UTF8SKIP(scan);
4390                 hardcount++;
4391             }
4392         } else {
4393             while (scan < loceol && REGINCLASS(p, (U8*)scan))
4394                 scan++;
4395         }
4396         break;
4397     case ALNUM:
4398         if (do_utf8) {
4399             loceol = PL_regeol;
4400             LOAD_UTF8_CHARCLASS(alnum,"a");
4401             while (hardcount < max && scan < loceol &&
4402                    swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
4403                 scan += UTF8SKIP(scan);
4404                 hardcount++;
4405             }
4406         } else {
4407             while (scan < loceol && isALNUM(*scan))
4408                 scan++;
4409         }
4410         break;
4411     case ALNUML:
4412         PL_reg_flags |= RF_tainted;
4413         if (do_utf8) {
4414             loceol = PL_regeol;
4415             while (hardcount < max && scan < loceol &&
4416                    isALNUM_LC_utf8((U8*)scan)) {
4417                 scan += UTF8SKIP(scan);
4418                 hardcount++;
4419             }
4420         } else {
4421             while (scan < loceol && isALNUM_LC(*scan))
4422                 scan++;
4423         }
4424         break;
4425     case NALNUM:
4426         if (do_utf8) {
4427             loceol = PL_regeol;
4428             LOAD_UTF8_CHARCLASS(alnum,"a");
4429             while (hardcount < max && scan < loceol &&
4430                    !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
4431                 scan += UTF8SKIP(scan);
4432                 hardcount++;
4433             }
4434         } else {
4435             while (scan < loceol && !isALNUM(*scan))
4436                 scan++;
4437         }
4438         break;
4439     case NALNUML:
4440         PL_reg_flags |= RF_tainted;
4441         if (do_utf8) {
4442             loceol = PL_regeol;
4443             while (hardcount < max && scan < loceol &&
4444                    !isALNUM_LC_utf8((U8*)scan)) {
4445                 scan += UTF8SKIP(scan);
4446                 hardcount++;
4447             }
4448         } else {
4449             while (scan < loceol && !isALNUM_LC(*scan))
4450                 scan++;
4451         }
4452         break;
4453     case SPACE:
4454         if (do_utf8) {
4455             loceol = PL_regeol;
4456             LOAD_UTF8_CHARCLASS(space," ");
4457             while (hardcount < max && scan < loceol &&
4458                    (*scan == ' ' ||
4459                     swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
4460                 scan += UTF8SKIP(scan);
4461                 hardcount++;
4462             }
4463         } else {
4464             while (scan < loceol && isSPACE(*scan))
4465                 scan++;
4466         }
4467         break;
4468     case SPACEL:
4469         PL_reg_flags |= RF_tainted;
4470         if (do_utf8) {
4471             loceol = PL_regeol;
4472             while (hardcount < max && scan < loceol &&
4473                    (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4474                 scan += UTF8SKIP(scan);
4475                 hardcount++;
4476             }
4477         } else {
4478             while (scan < loceol && isSPACE_LC(*scan))
4479                 scan++;
4480         }
4481         break;
4482     case NSPACE:
4483         if (do_utf8) {
4484             loceol = PL_regeol;
4485             LOAD_UTF8_CHARCLASS(space," ");
4486             while (hardcount < max && scan < loceol &&
4487                    !(*scan == ' ' ||
4488                      swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
4489                 scan += UTF8SKIP(scan);
4490                 hardcount++;
4491             }
4492         } else {
4493             while (scan < loceol && !isSPACE(*scan))
4494                 scan++;
4495             break;
4496         }
4497     case NSPACEL:
4498         PL_reg_flags |= RF_tainted;
4499         if (do_utf8) {
4500             loceol = PL_regeol;
4501             while (hardcount < max && scan < loceol &&
4502                    !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4503                 scan += UTF8SKIP(scan);
4504                 hardcount++;
4505             }
4506         } else {
4507             while (scan < loceol && !isSPACE_LC(*scan))
4508                 scan++;
4509         }
4510         break;
4511     case DIGIT:
4512         if (do_utf8) {
4513             loceol = PL_regeol;
4514             LOAD_UTF8_CHARCLASS(digit,"0");
4515             while (hardcount < max && scan < loceol &&
4516                    swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
4517                 scan += UTF8SKIP(scan);
4518                 hardcount++;
4519             }
4520         } else {
4521             while (scan < loceol && isDIGIT(*scan))
4522                 scan++;
4523         }
4524         break;
4525     case NDIGIT:
4526         if (do_utf8) {
4527             loceol = PL_regeol;
4528             LOAD_UTF8_CHARCLASS(digit,"0");
4529             while (hardcount < max && scan < loceol &&
4530                    !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
4531                 scan += UTF8SKIP(scan);
4532                 hardcount++;
4533             }
4534         } else {
4535             while (scan < loceol && !isDIGIT(*scan))
4536                 scan++;
4537         }
4538         break;
4539     default:            /* Called on something of 0 width. */
4540         break;          /* So match right here or not at all. */
4541     }
4542
4543     if (hardcount)
4544         c = hardcount;
4545     else
4546         c = scan - PL_reginput;
4547     PL_reginput = scan;
4548
4549     DEBUG_r({
4550                 SV *re_debug_flags;
4551                 SV *prop = sv_newmortal();
4552                 GET_RE_DEBUG_FLAGS;
4553                 DEBUG_EXECUTE_r({
4554                 regprop(prop, p);
4555                 PerlIO_printf(Perl_debug_log,
4556                               "%*s  %s can match %"IVdf" times out of %"IVdf"...\n",
4557                               REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
4558         });
4559         });
4560
4561     return(c);
4562 }
4563
4564 /*
4565  - regrepeat_hard - repeatedly match something, report total lenth and length
4566  *
4567  * The repeater is supposed to have constant non-zero length.
4568  */
4569
4570 STATIC I32
4571 S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
4572 {
4573     register char *scan = Nullch;
4574     register char *start;
4575     register char *loceol = PL_regeol;
4576     I32 l = 0;
4577     I32 count = 0, res = 1;
4578
4579     if (!max)
4580         return 0;
4581
4582     start = PL_reginput;
4583     if (PL_reg_match_utf8) {
4584         while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
4585             if (!count++) {
4586                 l = 0;
4587                 while (start < PL_reginput) {
4588                     l++;
4589                     start += UTF8SKIP(start);
4590                 }
4591                 *lp = l;
4592                 if (l == 0)
4593                     return max;
4594             }
4595             if (count == max)
4596                 return count;
4597         }
4598     }
4599     else {
4600         while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
4601             if (!count++) {
4602                 *lp = l = PL_reginput - start;
4603                 if (max != REG_INFTY && l*max < loceol - scan)
4604                     loceol = scan + l*max;
4605                 if (l == 0)
4606                     return max;
4607             }
4608         }
4609     }
4610     if (!res)
4611         PL_reginput = scan;
4612
4613     return count;
4614 }
4615
4616 /*
4617 - regclass_swash - prepare the utf8 swash
4618 */
4619
4620 SV *
4621 Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** listsvp, SV **altsvp)
4622 {
4623     SV *sw  = NULL;
4624     SV *si  = NULL;
4625     SV *alt = NULL;
4626
4627     if (PL_regdata && PL_regdata->count) {
4628         U32 n = ARG(node);
4629
4630         if (PL_regdata->what[n] == 's') {
4631             SV *rv = (SV*)PL_regdata->data[n];
4632             AV *av = (AV*)SvRV((SV*)rv);
4633             SV **ary = AvARRAY(av);
4634             SV **a, **b;
4635         
4636             /* See the end of regcomp.c:S_reglass() for
4637              * documentation of these array elements. */
4638
4639             si = *ary;
4640             a  = SvTYPE(ary[1]) == SVt_RV   ? &ary[1] : 0;
4641             b  = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : 0;
4642
4643             if (a)
4644                 sw = *a;
4645             else if (si && doinit) {
4646                 sw = swash_init("utf8", "", si, 1, 0);
4647                 (void)av_store(av, 1, sw);
4648             }
4649             if (b)
4650                 alt = *b;
4651         }
4652     }
4653         
4654     if (listsvp)
4655         *listsvp = si;
4656     if (altsvp)
4657         *altsvp  = alt;
4658
4659     return sw;
4660 }
4661
4662 /*
4663  - reginclass - determine if a character falls into a character class
4664  
4665   The n is the ANYOF regnode, the p is the target string, lenp
4666   is pointer to the maximum length of how far to go in the p
4667   (if the lenp is zero, UTF8SKIP(p) is used),
4668   do_utf8 tells whether the target string is in UTF-8.
4669
4670  */
4671
4672 STATIC bool
4673 S_reginclass(pTHX_ register regnode *n, register U8* p, STRLEN* lenp, register bool do_utf8)
4674 {
4675     char flags = ANYOF_FLAGS(n);
4676     bool match = FALSE;
4677     UV c = *p;
4678     STRLEN len = 0;
4679     STRLEN plen;
4680
4681     if (do_utf8 && !UTF8_IS_INVARIANT(c))
4682          c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &len,
4683                             ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
4684
4685     plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
4686     if (do_utf8 || (flags & ANYOF_UNICODE)) {
4687         if (lenp)
4688             *lenp = 0;
4689         if (do_utf8 && !ANYOF_RUNTIME(n)) {
4690             if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
4691                 match = TRUE;
4692         }
4693         if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
4694             match = TRUE;
4695         if (!match) {
4696             AV *av;
4697             SV *sw = regclass_swash(n, TRUE, 0, (SV**)&av);
4698         
4699             if (sw) {
4700                 if (swash_fetch(sw, p, do_utf8))
4701                     match = TRUE;
4702                 else if (flags & ANYOF_FOLD) {
4703                     if (!match && lenp && av) {
4704                         I32 i;
4705                       
4706                         for (i = 0; i <= av_len(av); i++) {
4707                             SV* sv = *av_fetch(av, i, FALSE);
4708                             STRLEN len;
4709                             char *s = SvPV(sv, len);
4710                         
4711                             if (len <= plen && memEQ(s, (char*)p, len)) {
4712                                 *lenp = len;
4713                                 match = TRUE;
4714                                 break;
4715                             }
4716                         }
4717                     }
4718                     if (!match) {
4719                         U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
4720                         STRLEN tmplen;
4721
4722                         to_utf8_fold(p, tmpbuf, &tmplen);
4723                         if (swash_fetch(sw, tmpbuf, do_utf8))
4724                             match = TRUE;
4725                     }
4726                 }
4727             }
4728         }
4729         if (match && lenp && *lenp == 0)
4730             *lenp = UNISKIP(NATIVE_TO_UNI(c));
4731     }
4732     if (!match && c < 256) {
4733         if (ANYOF_BITMAP_TEST(n, c))
4734             match = TRUE;
4735         else if (flags & ANYOF_FOLD) {
4736             U8 f;
4737
4738             if (flags & ANYOF_LOCALE) {
4739                 PL_reg_flags |= RF_tainted;
4740                 f = PL_fold_locale[c];
4741             }
4742             else
4743                 f = PL_fold[c];
4744             if (f != c && ANYOF_BITMAP_TEST(n, f))
4745                 match = TRUE;
4746         }
4747         
4748         if (!match && (flags & ANYOF_CLASS)) {
4749             PL_reg_flags |= RF_tainted;
4750             if (
4751                 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM)   &&  isALNUM_LC(c))  ||
4752                 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM)  && !isALNUM_LC(c))  ||
4753                 (ANYOF_CLASS_TEST(n, ANYOF_SPACE)   &&  isSPACE_LC(c))  ||
4754                 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE)  && !isSPACE_LC(c))  ||
4755                 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT)   &&  isDIGIT_LC(c))  ||
4756                 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT)  && !isDIGIT_LC(c))  ||
4757                 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC)  &&  isALNUMC_LC(c)) ||
4758                 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
4759                 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA)   &&  isALPHA_LC(c))  ||
4760                 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA)  && !isALPHA_LC(c))  ||
4761                 (ANYOF_CLASS_TEST(n, ANYOF_ASCII)   &&  isASCII(c))     ||
4762                 (ANYOF_CLASS_TEST(n, ANYOF_NASCII)  && !isASCII(c))     ||
4763                 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL)   &&  isCNTRL_LC(c))  ||
4764                 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL)  && !isCNTRL_LC(c))  ||
4765                 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH)   &&  isGRAPH_LC(c))  ||
4766                 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH)  && !isGRAPH_LC(c))  ||
4767                 (ANYOF_CLASS_TEST(n, ANYOF_LOWER)   &&  isLOWER_LC(c))  ||
4768                 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER)  && !isLOWER_LC(c))  ||
4769                 (ANYOF_CLASS_TEST(n, ANYOF_PRINT)   &&  isPRINT_LC(c))  ||
4770                 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT)  && !isPRINT_LC(c))  ||
4771                 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT)   &&  isPUNCT_LC(c))  ||
4772                 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT)  && !isPUNCT_LC(c))  ||
4773                 (ANYOF_CLASS_TEST(n, ANYOF_UPPER)   &&  isUPPER_LC(c))  ||
4774                 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER)  && !isUPPER_LC(c))  ||
4775                 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT)  &&  isXDIGIT(c))    ||
4776                 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c))    ||
4777                 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC)  &&  isPSXSPC(c))    ||
4778                 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c))    ||
4779                 (ANYOF_CLASS_TEST(n, ANYOF_BLANK)   &&  isBLANK(c))     ||
4780                 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK)  && !isBLANK(c))
4781                 ) /* How's that for a conditional? */
4782             {
4783                 match = TRUE;
4784             }
4785         }
4786     }
4787
4788     return (flags & ANYOF_INVERT) ? !match : match;
4789 }
4790
4791 STATIC U8 *
4792 S_reghop(pTHX_ U8 *s, I32 off)
4793 {
4794     return S_reghop3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4795 }
4796
4797 STATIC U8 *
4798 S_reghop3(pTHX_ U8 *s, I32 off, U8* lim)
4799 {
4800     if (off >= 0) {
4801         while (off-- && s < lim) {
4802             /* XXX could check well-formedness here */
4803             s += UTF8SKIP(s);
4804         }
4805     }
4806     else {
4807         while (off++) {
4808             if (s > lim) {
4809                 s--;
4810                 if (UTF8_IS_CONTINUED(*s)) {
4811                     while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
4812                         s--;
4813                 }
4814                 /* XXX could check well-formedness here */
4815             }
4816         }
4817     }
4818     return s;
4819 }
4820
4821 STATIC U8 *
4822 S_reghopmaybe(pTHX_ U8 *s, I32 off)
4823 {
4824     return S_reghopmaybe3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4825 }
4826
4827 STATIC U8 *
4828 S_reghopmaybe3(pTHX_ U8* s, I32 off, U8* lim)
4829 {
4830     if (off >= 0) {
4831         while (off-- && s < lim) {
4832             /* XXX could check well-formedness here */
4833             s += UTF8SKIP(s);
4834         }
4835         if (off >= 0)
4836             return 0;
4837     }
4838     else {
4839         while (off++) {
4840             if (s > lim) {
4841                 s--;
4842                 if (UTF8_IS_CONTINUED(*s)) {
4843                     while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
4844                         s--;
4845                 }
4846                 /* XXX could check well-formedness here */
4847             }
4848             else
4849                 break;
4850         }
4851         if (off <= 0)
4852             return 0;
4853     }
4854     return s;
4855 }
4856
4857 static void
4858 restore_pos(pTHX_ void *arg)
4859 {
4860     if (PL_reg_eval_set) {
4861         if (PL_reg_oldsaved) {
4862             PL_reg_re->subbeg = PL_reg_oldsaved;
4863             PL_reg_re->sublen = PL_reg_oldsavedlen;
4864 #ifdef PERL_COPY_ON_WRITE
4865             PL_reg_re->saved_copy = PL_nrs;
4866 #endif
4867             RX_MATCH_COPIED_on(PL_reg_re);
4868         }
4869         PL_reg_magic->mg_len = PL_reg_oldpos;
4870         PL_reg_eval_set = 0;
4871         PL_curpm = PL_reg_oldcurpm;
4872     }   
4873 }
4874
4875 STATIC void
4876 S_to_utf8_substr(pTHX_ register regexp *prog)
4877 {
4878     SV* sv;
4879     if (prog->float_substr && !prog->float_utf8) {
4880         prog->float_utf8 = sv = newSVsv(prog->float_substr);
4881         sv_utf8_upgrade(sv);
4882         if (SvTAIL(prog->float_substr))
4883             SvTAIL_on(sv);
4884         if (prog->float_substr == prog->check_substr)
4885             prog->check_utf8 = sv;
4886     }
4887     if (prog->anchored_substr && !prog->anchored_utf8) {
4888         prog->anchored_utf8 = sv = newSVsv(prog->anchored_substr);
4889         sv_utf8_upgrade(sv);
4890         if (SvTAIL(prog->anchored_substr))
4891             SvTAIL_on(sv);
4892         if (prog->anchored_substr == prog->check_substr)
4893             prog->check_utf8 = sv;
4894     }
4895 }
4896
4897 STATIC void
4898 S_to_byte_substr(pTHX_ register regexp *prog)
4899 {
4900     SV* sv;
4901     if (prog->float_utf8 && !prog->float_substr) {
4902         prog->float_substr = sv = newSVsv(prog->float_utf8);
4903         if (sv_utf8_downgrade(sv, TRUE)) {
4904             if (SvTAIL(prog->float_utf8))
4905                 SvTAIL_on(sv);
4906         } else {
4907             SvREFCNT_dec(sv);
4908             prog->float_substr = sv = &PL_sv_undef;
4909         }
4910         if (prog->float_utf8 == prog->check_utf8)
4911             prog->check_substr = sv;
4912     }
4913     if (prog->anchored_utf8 && !prog->anchored_substr) {
4914         prog->anchored_substr = sv = newSVsv(prog->anchored_utf8);
4915         if (sv_utf8_downgrade(sv, TRUE)) {
4916             if (SvTAIL(prog->anchored_utf8))
4917                 SvTAIL_on(sv);
4918         } else {
4919             SvREFCNT_dec(sv);
4920             prog->anchored_substr = sv = &PL_sv_undef;
4921         }
4922         if (prog->anchored_utf8 == prog->check_utf8)
4923             prog->check_substr = sv;
4924     }
4925 }