POSIX [[:character class:]] support for standard, locale,
[p5sagit/p5-mst-13.2.git] / regexec.c
1 /*    regexec.c
2  */
3
4 /*
5  * "One Ring to rule them all, One Ring to find them..."
6  */
7
8 /* NOTE: this is derived from Henry Spencer's regexp code, and should not
9  * confused with the original package (see point 3 below).  Thanks, Henry!
10  */
11
12 /* Additional note: this code is very heavily munged from Henry's version
13  * in places.  In some spots I've traded clarity for efficiency, so don't
14  * blame Henry for some of the lack of readability.
15  */
16
17 /* The names of the functions have been changed from regcomp and
18  * regexec to  pregcomp and pregexec in order to avoid conflicts
19  * with the POSIX routines of the same names.
20 */
21
22 #ifdef PERL_EXT_RE_BUILD
23 /* need to replace pregcomp et al, so enable that */
24 #  ifndef PERL_IN_XSUB_RE
25 #    define PERL_IN_XSUB_RE
26 #  endif
27 /* need access to debugger hooks */
28 #  if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
29 #    define DEBUGGING
30 #  endif
31 #endif
32
33 #ifdef PERL_IN_XSUB_RE
34 /* We *really* need to overwrite these symbols: */
35 #  define Perl_regexec_flags my_regexec
36 #  define Perl_regdump my_regdump
37 #  define Perl_regprop my_regprop
38 #  define Perl_re_intuit_start my_re_intuit_start
39 /* *These* symbols are masked to allow static link. */
40 #  define Perl_pregexec my_pregexec
41 #  define Perl_reginitcolors my_reginitcolors 
42 #endif 
43
44 /*SUPPRESS 112*/
45 /*
46  * pregcomp and pregexec -- regsub and regerror are not used in perl
47  *
48  *      Copyright (c) 1986 by University of Toronto.
49  *      Written by Henry Spencer.  Not derived from licensed software.
50  *
51  *      Permission is granted to anyone to use this software for any
52  *      purpose on any computer system, and to redistribute it freely,
53  *      subject to the following restrictions:
54  *
55  *      1. The author is not responsible for the consequences of use of
56  *              this software, no matter how awful, even if they arise
57  *              from defects in it.
58  *
59  *      2. The origin of this software must not be misrepresented, either
60  *              by explicit claim or by omission.
61  *
62  *      3. Altered versions must be plainly marked as such, and must not
63  *              be misrepresented as being the original software.
64  *
65  ****    Alterations to Henry's code are...
66  ****
67  ****    Copyright (c) 1991-1999, Larry Wall
68  ****
69  ****    You may distribute under the terms of either the GNU General Public
70  ****    License or the Artistic License, as specified in the README file.
71  *
72  * Beware that some of this code is subtly aware of the way operator
73  * precedence is structured in regular expressions.  Serious changes in
74  * regular-expression syntax might require a total rethink.
75  */
76 #include "EXTERN.h"
77 #define PERL_IN_REGEXEC_C
78 #include "perl.h"
79
80 #include "regcomp.h"
81
82 #define RF_tainted      1               /* tainted information used? */
83 #define RF_warned       2               /* warned about big count? */
84 #define RF_evaled       4               /* Did an EVAL with setting? */
85 #define RF_utf8         8               /* String contains multibyte chars? */
86
87 #define UTF (PL_reg_flags & RF_utf8)
88
89 #define RS_init         1               /* eval environment created */
90 #define RS_set          2               /* replsv value is set */
91
92 #ifndef STATIC
93 #define STATIC  static
94 #endif
95
96 /*
97  * Forwards.
98  */
99
100 #define REGINCLASS(p,c)  (ANYOF_FLAGS(p) ? reginclass(p,c) : ANYOF_BITMAP_TEST(p,c))
101 #define REGINCLASSUTF8(f,p)  (ARG1(f) ? reginclassutf8(f,p) : swash_fetch((SV*)PL_regdata->data[ARG2(f)],p))
102
103 #define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
104 #define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
105
106 #define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
107 #define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
108 #define HOP(pos,off) (UTF ? reghop((U8*)pos, off) : (U8*)(pos + off))
109 #define HOPMAYBE(pos,off) (UTF ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
110 #define HOPc(pos,off) ((char*)HOP(pos,off))
111 #define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
112
113 STATIC CHECKPOINT
114 S_regcppush(pTHX_ I32 parenfloor)
115 {
116     dTHR;
117     int retval = PL_savestack_ix;
118     int i = (PL_regsize - parenfloor) * 4;
119     int p;
120
121     SSCHECK(i + 5);
122     for (p = PL_regsize; p > parenfloor; p--) {
123         SSPUSHINT(PL_regendp[p]);
124         SSPUSHINT(PL_regstartp[p]);
125         SSPUSHPTR(PL_reg_start_tmp[p]);
126         SSPUSHINT(p);
127     }
128     SSPUSHINT(PL_regsize);
129     SSPUSHINT(*PL_reglastparen);
130     SSPUSHPTR(PL_reginput);
131     SSPUSHINT(i + 3);
132     SSPUSHINT(SAVEt_REGCONTEXT);
133     return retval;
134 }
135
136 /* These are needed since we do not localize EVAL nodes: */
137 #  define REGCP_SET  DEBUG_r(PerlIO_printf(Perl_debug_log,              \
138                              "  Setting an EVAL scope, savestack=%i\n", \
139                              PL_savestack_ix)); lastcp = PL_savestack_ix
140
141 #  define REGCP_UNWIND  DEBUG_r(lastcp != PL_savestack_ix ?             \
142                                 PerlIO_printf(Perl_debug_log,           \
143                                 "  Clearing an EVAL scope, savestack=%i..%i\n", \
144                                 lastcp, PL_savestack_ix) : 0); regcpblow(lastcp)
145
146 STATIC char *
147 S_regcppop(pTHX)
148 {
149     dTHR;
150     I32 i = SSPOPINT;
151     U32 paren = 0;
152     char *input;
153     I32 tmps;
154     assert(i == SAVEt_REGCONTEXT);
155     i = SSPOPINT;
156     input = (char *) SSPOPPTR;
157     *PL_reglastparen = SSPOPINT;
158     PL_regsize = SSPOPINT;
159     for (i -= 3; i > 0; i -= 4) {
160         paren = (U32)SSPOPINT;
161         PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
162         PL_regstartp[paren] = SSPOPINT;
163         tmps = SSPOPINT;
164         if (paren <= *PL_reglastparen)
165             PL_regendp[paren] = tmps;
166         DEBUG_r(
167             PerlIO_printf(Perl_debug_log,
168                           "     restoring \\%d to %d(%d)..%d%s\n",
169                           paren, PL_regstartp[paren], 
170                           PL_reg_start_tmp[paren] - PL_bostr,
171                           PL_regendp[paren], 
172                           (paren > *PL_reglastparen ? "(no)" : ""));
173         );
174     }
175     DEBUG_r(
176         if (*PL_reglastparen + 1 <= PL_regnpar) {
177             PerlIO_printf(Perl_debug_log,
178                           "     restoring \\%d..\\%d to undef\n",
179                           *PL_reglastparen + 1, PL_regnpar);
180         }
181     );
182     for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
183         if (paren > PL_regsize)
184             PL_regstartp[paren] = -1;
185         PL_regendp[paren] = -1;
186     }
187     return input;
188 }
189
190 STATIC char *
191 S_regcp_set_to(pTHX_ I32 ss)
192 {
193     dTHR;
194     I32 tmp = PL_savestack_ix;
195
196     PL_savestack_ix = ss;
197     regcppop();
198     PL_savestack_ix = tmp;
199     return Nullch;
200 }
201
202 typedef struct re_cc_state
203 {
204     I32 ss;
205     regnode *node;
206     struct re_cc_state *prev;
207     CURCUR *cc;
208     regexp *re;
209 } re_cc_state;
210
211 #define regcpblow(cp) LEAVE_SCOPE(cp)
212
213 /*
214  * pregexec and friends
215  */
216
217 /*
218  - pregexec - match a regexp against a string
219  */
220 I32
221 Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
222          char *strbeg, I32 minend, SV *screamer, U32 nosave)
223 /* strend: pointer to null at end of string */
224 /* strbeg: real beginning of string */
225 /* minend: end of match must be >=minend after stringarg. */
226 /* nosave: For optimizations. */
227 {
228     return
229         regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL, 
230                       nosave ? 0 : REXEC_COPY_STR);
231 }
232
233 STATIC void
234 S_cache_re(pTHX_ regexp *prog)
235 {
236     dTHR;
237     PL_regprecomp = prog->precomp;              /* Needed for FAIL. */
238 #ifdef DEBUGGING
239     PL_regprogram = prog->program;
240 #endif
241     PL_regnpar = prog->nparens;
242     PL_regdata = prog->data;    
243     PL_reg_re = prog;    
244 }
245
246 STATIC void
247 S_restore_pos(pTHX_ void *arg)
248 {
249     dTHR;
250     if (PL_reg_eval_set) {
251         if (PL_reg_oldsaved) {
252             PL_reg_re->subbeg = PL_reg_oldsaved;
253             PL_reg_re->sublen = PL_reg_oldsavedlen;
254             RX_MATCH_COPIED_on(PL_reg_re);
255         }
256         PL_reg_magic->mg_len = PL_reg_oldpos;
257         PL_reg_eval_set = 0;
258         PL_curpm = PL_reg_oldcurpm;
259     }   
260 }
261
262 /* 
263  * Need to implement the following flags for reg_anch:
264  *
265  * USE_INTUIT_NOML              - Useful to call re_intuit_start() first
266  * USE_INTUIT_ML
267  * INTUIT_AUTORITATIVE_NOML     - Can trust a positive answer
268  * INTUIT_AUTORITATIVE_ML
269  * INTUIT_ONCE_NOML             - Intuit can match in one location only.
270  * INTUIT_ONCE_ML
271  *
272  * Another flag for this function: SECOND_TIME (so that float substrs
273  * with giant delta may be not rechecked).
274  */
275
276 /* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
277
278 /* If SCREAM, then sv should be compatible with strpos and strend.
279    Otherwise, only SvCUR(sv) is used to get strbeg. */
280
281 /* XXXX We assume that strpos is strbeg unless sv. */
282
283 char *
284 Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
285                      char *strend, U32 flags, re_scream_pos_data *data)
286 {
287     I32 start_shift;
288     /* Should be nonnegative! */
289     I32 end_shift;
290     char *s;
291     char *t;
292     I32 ml_anch;
293
294     DEBUG_r( if (!PL_colorset) reginitcolors() );
295     DEBUG_r(PerlIO_printf(Perl_debug_log,
296                       "%sGuessing start of match:%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
297                       PL_colors[4],PL_colors[5],PL_colors[0],
298                       prog->precomp,
299                       PL_colors[1],
300                       (strlen(prog->precomp) > 60 ? "..." : ""),
301                       PL_colors[0],
302                       (strend - strpos > 60 ? 60 : strend - strpos),
303                       strpos, PL_colors[1],
304                       (strend - strpos > 60 ? "..." : ""))
305         );
306
307     if (prog->minlen > strend - strpos)
308         goto fail;
309
310     /* XXXX Move further down? */
311     start_shift = prog->check_offset_min;       /* okay to underestimate on CC */
312     /* Should be nonnegative! */
313     end_shift = prog->minlen - start_shift -
314         CHR_SVLEN(prog->check_substr) + (SvTAIL(prog->check_substr) != 0);
315
316     if (prog->reganch & ROPT_ANCH) {
317         ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
318                      || ( (prog->reganch & ROPT_ANCH_BOL)
319                           && !PL_multiline ) );
320
321         if ((prog->check_offset_min == prog->check_offset_max) && !ml_anch) {
322             /* Anchored... */
323             I32 slen;
324
325             if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
326                  && (sv && (strpos + SvCUR(sv) != strend)) )
327                 goto fail;
328
329             s = (char*)HOP((U8*)strpos, prog->check_offset_min);
330             if (SvTAIL(prog->check_substr)) {
331                 slen = SvCUR(prog->check_substr);       /* >= 1 */
332
333                 if ( strend - s > slen || strend - s < slen - 1 ) {
334                     s = Nullch;
335                     goto finish;
336                 }
337                 if ( strend - s == slen && strend[-1] != '\n') {
338                     s = Nullch;
339                     goto finish;
340                 }
341                 /* Now should match s[0..slen-2] */
342                 slen--;
343                 if (slen && (*SvPVX(prog->check_substr) != *s
344                              || (slen > 1
345                                  && memNE(SvPVX(prog->check_substr), s, slen))))
346                     s = Nullch;
347             }
348             else if (*SvPVX(prog->check_substr) != *s
349                      || ((slen = SvCUR(prog->check_substr)) > 1
350                          && memNE(SvPVX(prog->check_substr), s, slen)))
351                     s = Nullch;
352             else
353                     s = strpos;
354             goto finish;
355         }
356         s = strpos;
357         if (!ml_anch && (s + prog->check_offset_max < strend - prog->minlen))
358             end_shift += strend - s - prog->minlen - prog->check_offset_max;
359     }
360     else {
361         ml_anch = 0;
362         s = strpos;
363     }
364
365   restart:
366     if (flags & REXEC_SCREAM) {
367         SV *c = prog->check_substr;
368         char *strbeg = SvPVX(sv);       /* XXXX Assume PV_force() on SCREAM! */
369         I32 p = -1;                     /* Internal iterator of scream. */
370         I32 *pp = data ? data->scream_pos : &p;
371
372         if (PL_screamfirst[BmRARE(c)] >= 0
373             || ( BmRARE(c) == '\n'
374                  && (BmPREVIOUS(c) == SvCUR(c) - 1)
375                  && SvTAIL(c) ))
376             s = screaminstr(sv, prog->check_substr, 
377                             start_shift + (strpos - strbeg), end_shift, pp, 0);
378         else
379             s = Nullch;
380         if (data)
381             *data->scream_olds = s;
382     }
383     else
384         s = fbm_instr((unsigned char*)s + start_shift,
385                       (unsigned char*)strend - end_shift,
386                       prog->check_substr, PL_multiline ? FBMrf_MULTILINE : 0);
387
388     /* Update the count-of-usability, remove useless subpatterns,
389         unshift s.  */
390   finish:
391     if (!s) {
392         ++BmUSEFUL(prog->check_substr); /* hooray */
393         goto fail;                      /* not present */
394     }
395     else if (s - strpos > prog->check_offset_max &&
396              ((prog->reganch & ROPT_UTF8)
397               ? ((t = reghopmaybe_c(s, -(prog->check_offset_max)))
398                  && t >= strpos)
399               : (t = s - prog->check_offset_max) != 0) ) {
400         if (ml_anch && t[-1] != '\n') {
401           find_anchor:
402             while (t < strend - end_shift - prog->minlen) {
403                 if (*t == '\n') {
404                     if (t < s - prog->check_offset_min) {
405                         s = t + 1;
406                         goto set_useful;
407                     }
408                     s = t + 1;
409                     goto restart;
410                 }
411                 t++;
412             }
413             s = Nullch;
414             goto finish;
415         }
416         s = t;
417       set_useful:
418         ++BmUSEFUL(prog->check_substr); /* hooray/2 */
419     }
420     else {
421         if (ml_anch && sv 
422             && (strpos + SvCUR(sv) != strend) && strpos[-1] != '\n') {
423             t = strpos;
424             goto find_anchor;
425         }
426         if (!(prog->reganch & ROPT_NAUGHTY)
427             && --BmUSEFUL(prog->check_substr) < 0
428             && prog->check_substr == prog->float_substr) { /* boo */
429             /* If flags & SOMETHING - do not do it many times on the same match */
430             SvREFCNT_dec(prog->check_substr);
431             prog->check_substr = Nullsv;        /* disable */
432             prog->float_substr = Nullsv;        /* clear */
433             s = strpos;
434             prog->reganch &= ~RE_USE_INTUIT;
435         }
436         else
437             s = strpos;
438     }
439
440     DEBUG_r(PerlIO_printf(Perl_debug_log, "%sFound%s at offset %ld\n",
441                           PL_colors[4],PL_colors[5], (long)(s - strpos)) );
442     return s;
443   fail:
444     DEBUG_r(PerlIO_printf(Perl_debug_log, "%sNot found...%s\n",
445                           PL_colors[4],PL_colors[5]));
446     return Nullch;
447 }
448
449 /*
450  - regexec_flags - match a regexp against a string
451  */
452 I32
453 Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
454               char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
455 /* strend: pointer to null at end of string */
456 /* strbeg: real beginning of string */
457 /* minend: end of match must be >=minend after stringarg. */
458 /* data: May be used for some additional optimizations. */
459 /* nosave: For optimizations. */
460 {
461     dTHR;
462     register char *s;
463     register regnode *c;
464     register char *startpos = stringarg;
465     register I32 tmp;
466     I32 minlen;         /* must match at least this many chars */
467     I32 dontbother = 0; /* how many characters not to try at end */
468     CURCUR cc;
469     I32 start_shift = 0;                /* Offset of the start to find
470                                          constant substr. */            /* CC */
471     I32 end_shift = 0;                  /* Same for the end. */         /* CC */
472     I32 scream_pos = -1;                /* Internal iterator of scream. */
473     char *scream_olds;
474     SV* oreplsv = GvSV(PL_replgv);
475
476     cc.cur = 0;
477     cc.oldcc = 0;
478     PL_regcc = &cc;
479
480     cache_re(prog);
481 #ifdef DEBUGGING
482     PL_regnarrate = PL_debug & 512;
483 #endif
484
485     /* Be paranoid... */
486     if (prog == NULL || startpos == NULL) {
487         Perl_croak(aTHX_ "NULL regexp parameter");
488         return 0;
489     }
490
491     minlen = prog->minlen;
492     if (strend - startpos < minlen) goto phooey;
493
494     if (startpos == strbeg)     /* is ^ valid at stringarg? */
495         PL_regprev = '\n';
496     else {
497         PL_regprev = (U32)stringarg[-1];
498         if (!PL_multiline && PL_regprev == '\n')
499             PL_regprev = '\0';          /* force ^ to NOT match */
500     }
501
502     /* Check validity of program. */
503     if (UCHARAT(prog->program) != REG_MAGIC) {
504         Perl_croak(aTHX_ "corrupted regexp program");
505     }
506
507     PL_reg_flags = 0;
508     PL_reg_eval_set = 0;
509
510     if (prog->reganch & ROPT_UTF8)
511         PL_reg_flags |= RF_utf8;
512
513     /* Mark beginning of line for ^ and lookbehind. */
514     PL_regbol = startpos;
515     PL_bostr  = strbeg;
516     PL_reg_sv = sv;
517
518     /* Mark end of line for $ (and such) */
519     PL_regeol = strend;
520
521     /* see how far we have to get to not match where we matched before */
522     PL_regtill = startpos+minend;
523
524     /* We start without call_cc context.  */
525     PL_reg_call_cc = 0;
526
527     /* If there is a "must appear" string, look for it. */
528     s = startpos;
529
530     if (prog->reganch & ROPT_GPOS_SEEN) {
531         MAGIC *mg;
532
533         if (!(flags & REXEC_IGNOREPOS) && sv && SvTYPE(sv) >= SVt_PVMG
534             && SvMAGIC(sv) && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0)
535             PL_reg_ganch = strbeg + mg->mg_len;
536         else
537             PL_reg_ganch = startpos;
538         if (prog->reganch & ROPT_ANCH_GPOS) {
539             if (s > PL_reg_ganch)
540                 goto phooey;
541             s = PL_reg_ganch;
542         }
543     }
544
545     if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
546         re_scream_pos_data d;
547
548         d.scream_olds = &scream_olds;
549         d.scream_pos = &scream_pos;
550         s = re_intuit_start(prog, sv, s, strend, flags, &d);
551         if (!s)
552             goto phooey;        /* not present */
553     }
554
555     DEBUG_r( if (!PL_colorset) reginitcolors() );
556     DEBUG_r(PerlIO_printf(Perl_debug_log,
557                       "%sMatching%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
558                       PL_colors[4],PL_colors[5],PL_colors[0],
559                       prog->precomp,
560                       PL_colors[1],
561                       (strlen(prog->precomp) > 60 ? "..." : ""),
562                       PL_colors[0],
563                       (strend - startpos > 60 ? 60 : strend - startpos),
564                       startpos, PL_colors[1],
565                       (strend - startpos > 60 ? "..." : ""))
566         );
567
568     /* Simplest case:  anchored match need be tried only once. */
569     /*  [unless only anchor is BOL and multiline is set] */
570     if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
571         if (s == startpos && regtry(prog, startpos))
572             goto got_it;
573         else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
574                  || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
575         {
576             char *end;
577
578             if (minlen)
579                 dontbother = minlen - 1;
580             end = HOPc(strend, -dontbother) - 1;
581             /* for multiline we only have to try after newlines */
582             if (prog->check_substr) {
583                 while (1) {
584                     if (regtry(prog, s))
585                         goto got_it;
586                     if (s >= end)
587                         goto phooey;
588                     s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
589                     if (!s)
590                         goto phooey;
591                 }               
592             } else {
593                 if (s > startpos)
594                     s--;
595                 while (s < end) {
596                     if (*s++ == '\n') { /* don't need PL_utf8skip here */
597                         if (regtry(prog, s))
598                             goto got_it;
599                     }
600                 }               
601             }
602         }
603         goto phooey;
604     } else if (prog->reganch & ROPT_ANCH_GPOS) {
605         if (regtry(prog, PL_reg_ganch))
606             goto got_it;
607         goto phooey;
608     }
609
610     /* Messy cases:  unanchored match. */
611     if (prog->anchored_substr && prog->reganch & ROPT_SKIP) { 
612         /* we have /x+whatever/ */
613         /* it must be a one character string (XXXX Except UTF?) */
614         char ch = SvPVX(prog->anchored_substr)[0];
615         if (UTF) {
616             while (s < strend) {
617                 if (*s == ch) {
618                     if (regtry(prog, s)) goto got_it;
619                     s += UTF8SKIP(s);
620                     while (s < strend && *s == ch)
621                         s += UTF8SKIP(s);
622                 }
623                 s += UTF8SKIP(s);
624             }
625         }
626         else {
627             while (s < strend) {
628                 if (*s == ch) {
629                     if (regtry(prog, s)) goto got_it;
630                     s++;
631                     while (s < strend && *s == ch)
632                         s++;
633                 }
634                 s++;
635             }
636         }
637     }
638     /*SUPPRESS 560*/
639     else if (prog->anchored_substr != Nullsv
640              || (prog->float_substr != Nullsv 
641                  && prog->float_max_offset < strend - s)) {
642         SV *must = prog->anchored_substr 
643             ? prog->anchored_substr : prog->float_substr;
644         I32 back_max = 
645             prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
646         I32 back_min = 
647             prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
648         I32 delta = back_max - back_min;
649         char *last = HOPc(strend,       /* Cannot start after this */
650                           -(I32)(CHR_SVLEN(must)
651                                  - (SvTAIL(must) != 0) + back_min));
652         char *last1;            /* Last position checked before */
653
654         if (s > PL_bostr)
655             last1 = HOPc(s, -1);
656         else
657             last1 = s - 1;      /* bogus */
658
659         /* XXXX check_substr already used to find `s', can optimize if
660            check_substr==must. */
661         scream_pos = -1;
662         dontbother = end_shift;
663         strend = HOPc(strend, -dontbother);
664         while ( (s <= last) &&
665                 ((flags & REXEC_SCREAM) 
666                  ? (s = screaminstr(sv, must, HOPc(s, back_min) - strbeg,
667                                     end_shift, &scream_pos, 0))
668                  : (s = fbm_instr((unsigned char*)HOP(s, back_min),
669                                   (unsigned char*)strend, must, 
670                                   PL_multiline ? FBMrf_MULTILINE : 0))) ) {
671             if (HOPc(s, -back_max) > last1) {
672                 last1 = HOPc(s, -back_min);
673                 s = HOPc(s, -back_max);
674             }
675             else {
676                 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
677
678                 last1 = HOPc(s, -back_min);
679                 s = t;          
680             }
681             if (UTF) {
682                 while (s <= last1) {
683                     if (regtry(prog, s))
684                         goto got_it;
685                     s += UTF8SKIP(s);
686                 }
687             }
688             else {
689                 while (s <= last1) {
690                     if (regtry(prog, s))
691                         goto got_it;
692                     s++;
693                 }
694             }
695         }
696         goto phooey;
697     }
698     else if (c = prog->regstclass) {
699         I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
700         char *cc;
701
702         if (minlen)
703             dontbother = minlen - 1;
704         strend = HOPc(strend, -dontbother);     /* don't bother with what can't match */
705         tmp = 1;
706         /* We know what class it must start with. */
707         switch (OP(c)) {
708         case ANYOFUTF8:
709             cc = (char *) OPERAND(c);
710             while (s < strend) {
711                 if (REGINCLASSUTF8(c, (U8*)s)) {
712                     if (tmp && regtry(prog, s))
713                         goto got_it;
714                     else
715                         tmp = doevery;
716                 }
717                 else
718                     tmp = 1;
719                 s += UTF8SKIP(s);
720             }
721             break;
722         case ANYOF:
723             cc = (char *) OPERAND(c);
724             while (s < strend) {
725                 if (REGINCLASS(cc, *s)) {
726                     if (tmp && regtry(prog, s))
727                         goto got_it;
728                     else
729                         tmp = doevery;
730                 }
731                 else
732                     tmp = 1;
733                 s++;
734             }
735             break;
736         case BOUNDL:
737             PL_reg_flags |= RF_tainted;
738             /* FALL THROUGH */
739         case BOUND:
740             if (minlen) {
741                 dontbother++;
742                 strend -= 1;
743             }
744             tmp = (s != startpos) ? UCHARAT(s - 1) : PL_regprev;
745             tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
746             while (s < strend) {
747                 if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
748                     tmp = !tmp;
749                     if (regtry(prog, s))
750                         goto got_it;
751                 }
752                 s++;
753             }
754             if ((minlen || tmp) && regtry(prog,s))
755                 goto got_it;
756             break;
757         case BOUNDLUTF8:
758             PL_reg_flags |= RF_tainted;
759             /* FALL THROUGH */
760         case BOUNDUTF8:
761             if (minlen) {
762                 dontbother++;
763                 strend = reghop_c(strend, -1);
764             }
765             tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : PL_regprev;
766             tmp = ((OP(c) == BOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
767             while (s < strend) {
768                 if (tmp == !(OP(c) == BOUND ?
769                              swash_fetch(PL_utf8_alnum, (U8*)s) :
770                              isALNUM_LC_utf8((U8*)s)))
771                 {
772                     tmp = !tmp;
773                     if (regtry(prog, s))
774                         goto got_it;
775                 }
776                 s += UTF8SKIP(s);
777             }
778             if ((minlen || tmp) && regtry(prog,s))
779                 goto got_it;
780             break;
781         case NBOUNDL:
782             PL_reg_flags |= RF_tainted;
783             /* FALL THROUGH */
784         case NBOUND:
785             if (minlen) {
786                 dontbother++;
787                 strend -= 1;
788             }
789             tmp = (s != startpos) ? UCHARAT(s - 1) : PL_regprev;
790             tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
791             while (s < strend) {
792                 if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
793                     tmp = !tmp;
794                 else if (regtry(prog, s))
795                     goto got_it;
796                 s++;
797             }
798             if ((minlen || !tmp) && regtry(prog,s))
799                 goto got_it;
800             break;
801         case NBOUNDLUTF8:
802             PL_reg_flags |= RF_tainted;
803             /* FALL THROUGH */
804         case NBOUNDUTF8:
805             if (minlen) {
806                 dontbother++;
807                 strend = reghop_c(strend, -1);
808             }
809             tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : PL_regprev;
810             tmp = ((OP(c) == NBOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
811             while (s < strend) {
812                 if (tmp == !(OP(c) == NBOUND ?
813                              swash_fetch(PL_utf8_alnum, (U8*)s) :
814                              isALNUM_LC_utf8((U8*)s)))
815                     tmp = !tmp;
816                 else if (regtry(prog, s))
817                     goto got_it;
818                 s += UTF8SKIP(s);
819             }
820             if ((minlen || !tmp) && regtry(prog,s))
821                 goto got_it;
822             break;
823         case ALNUM:
824             while (s < strend) {
825                 if (isALNUM(*s)) {
826                     if (tmp && regtry(prog, s))
827                         goto got_it;
828                     else
829                         tmp = doevery;
830                 }
831                 else
832                     tmp = 1;
833                 s++;
834             }
835             break;
836         case ALNUMUTF8:
837             while (s < strend) {
838                 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
839                     if (tmp && regtry(prog, s))
840                         goto got_it;
841                     else
842                         tmp = doevery;
843                 }
844                 else
845                     tmp = 1;
846                 s += UTF8SKIP(s);
847             }
848             break;
849         case ALNUML:
850             PL_reg_flags |= RF_tainted;
851             while (s < strend) {
852                 if (isALNUM_LC(*s)) {
853                     if (tmp && regtry(prog, s))
854                         goto got_it;
855                     else
856                         tmp = doevery;
857                 }
858                 else
859                     tmp = 1;
860                 s++;
861             }
862             break;
863         case ALNUMLUTF8:
864             PL_reg_flags |= RF_tainted;
865             while (s < strend) {
866                 if (isALNUM_LC_utf8((U8*)s)) {
867                     if (tmp && regtry(prog, s))
868                         goto got_it;
869                     else
870                         tmp = doevery;
871                 }
872                 else
873                     tmp = 1;
874                 s += UTF8SKIP(s);
875             }
876             break;
877         case NALNUM:
878             while (s < strend) {
879                 if (!isALNUM(*s)) {
880                     if (tmp && regtry(prog, s))
881                         goto got_it;
882                     else
883                         tmp = doevery;
884                 }
885                 else
886                     tmp = 1;
887                 s++;
888             }
889             break;
890         case NALNUMUTF8:
891             while (s < strend) {
892                 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
893                     if (tmp && regtry(prog, s))
894                         goto got_it;
895                     else
896                         tmp = doevery;
897                 }
898                 else
899                     tmp = 1;
900                 s += UTF8SKIP(s);
901             }
902             break;
903         case NALNUML:
904             PL_reg_flags |= RF_tainted;
905             while (s < strend) {
906                 if (!isALNUM_LC(*s)) {
907                     if (tmp && regtry(prog, s))
908                         goto got_it;
909                     else
910                         tmp = doevery;
911                 }
912                 else
913                     tmp = 1;
914                 s++;
915             }
916             break;
917         case NALNUMLUTF8:
918             PL_reg_flags |= RF_tainted;
919             while (s < strend) {
920                 if (!isALNUM_LC_utf8((U8*)s)) {
921                     if (tmp && regtry(prog, s))
922                         goto got_it;
923                     else
924                         tmp = doevery;
925                 }
926                 else
927                     tmp = 1;
928                 s += UTF8SKIP(s);
929             }
930             break;
931         case SPACE:
932             while (s < strend) {
933                 if (isSPACE(*s)) {
934                     if (tmp && regtry(prog, s))
935                         goto got_it;
936                     else
937                         tmp = doevery;
938                 }
939                 else
940                     tmp = 1;
941                 s++;
942             }
943             break;
944         case SPACEUTF8:
945             while (s < strend) {
946                 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
947                     if (tmp && regtry(prog, s))
948                         goto got_it;
949                     else
950                         tmp = doevery;
951                 }
952                 else
953                     tmp = 1;
954                 s += UTF8SKIP(s);
955             }
956             break;
957         case SPACEL:
958             PL_reg_flags |= RF_tainted;
959             while (s < strend) {
960                 if (isSPACE_LC(*s)) {
961                     if (tmp && regtry(prog, s))
962                         goto got_it;
963                     else
964                         tmp = doevery;
965                 }
966                 else
967                     tmp = 1;
968                 s++;
969             }
970             break;
971         case SPACELUTF8:
972             PL_reg_flags |= RF_tainted;
973             while (s < strend) {
974                 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
975                     if (tmp && regtry(prog, s))
976                         goto got_it;
977                     else
978                         tmp = doevery;
979                 }
980                 else
981                     tmp = 1;
982                 s += UTF8SKIP(s);
983             }
984             break;
985         case NSPACE:
986             while (s < strend) {
987                 if (!isSPACE(*s)) {
988                     if (tmp && regtry(prog, s))
989                         goto got_it;
990                     else
991                         tmp = doevery;
992                 }
993                 else
994                     tmp = 1;
995                 s++;
996             }
997             break;
998         case NSPACEUTF8:
999             while (s < strend) {
1000                 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
1001                     if (tmp && regtry(prog, s))
1002                         goto got_it;
1003                     else
1004                         tmp = doevery;
1005                 }
1006                 else
1007                     tmp = 1;
1008                 s += UTF8SKIP(s);
1009             }
1010             break;
1011         case NSPACEL:
1012             PL_reg_flags |= RF_tainted;
1013             while (s < strend) {
1014                 if (!isSPACE_LC(*s)) {
1015                     if (tmp && regtry(prog, s))
1016                         goto got_it;
1017                     else
1018                         tmp = doevery;
1019                 }
1020                 else
1021                     tmp = 1;
1022                 s++;
1023             }
1024             break;
1025         case NSPACELUTF8:
1026             PL_reg_flags |= RF_tainted;
1027             while (s < strend) {
1028                 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1029                     if (tmp && regtry(prog, s))
1030                         goto got_it;
1031                     else
1032                         tmp = doevery;
1033                 }
1034                 else
1035                     tmp = 1;
1036                 s += UTF8SKIP(s);
1037             }
1038             break;
1039         case DIGIT:
1040             while (s < strend) {
1041                 if (isDIGIT(*s)) {
1042                     if (tmp && regtry(prog, s))
1043                         goto got_it;
1044                     else
1045                         tmp = doevery;
1046                 }
1047                 else
1048                     tmp = 1;
1049                 s++;
1050             }
1051             break;
1052         case DIGITUTF8:
1053             while (s < strend) {
1054                 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
1055                     if (tmp && regtry(prog, s))
1056                         goto got_it;
1057                     else
1058                         tmp = doevery;
1059                 }
1060                 else
1061                     tmp = 1;
1062                 s += UTF8SKIP(s);
1063             }
1064             break;
1065         case DIGITL:
1066             PL_reg_flags |= RF_tainted;
1067             while (s < strend) {
1068                 if (isDIGIT_LC(*s)) {
1069                     if (tmp && regtry(prog, s))
1070                         goto got_it;
1071                     else
1072                         tmp = doevery;
1073                 }
1074                 else
1075                     tmp = 1;
1076                 s++;
1077             }
1078             break;
1079         case DIGITLUTF8:
1080             PL_reg_flags |= RF_tainted;
1081             while (s < strend) {
1082                 if (isDIGIT_LC_utf8((U8*)s)) {
1083                     if (tmp && regtry(prog, s))
1084                         goto got_it;
1085                     else
1086                         tmp = doevery;
1087                 }
1088                 else
1089                     tmp = 1;
1090                 s += UTF8SKIP(s);
1091             }
1092             break;
1093         case NDIGIT:
1094             while (s < strend) {
1095                 if (!isDIGIT(*s)) {
1096                     if (tmp && regtry(prog, s))
1097                         goto got_it;
1098                     else
1099                         tmp = doevery;
1100                 }
1101                 else
1102                     tmp = 1;
1103                 s++;
1104             }
1105             break;
1106         case NDIGITUTF8:
1107             while (s < strend) {
1108                 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
1109                     if (tmp && regtry(prog, s))
1110                         goto got_it;
1111                     else
1112                         tmp = doevery;
1113                 }
1114                 else
1115                     tmp = 1;
1116                 s += UTF8SKIP(s);
1117             }
1118             break;
1119         case NDIGITL:
1120             PL_reg_flags |= RF_tainted;
1121             while (s < strend) {
1122                 if (!isDIGIT_LC(*s)) {
1123                     if (tmp && regtry(prog, s))
1124                         goto got_it;
1125                     else
1126                         tmp = doevery;
1127                 }
1128                 else
1129                     tmp = 1;
1130                 s++;
1131             }
1132             break;
1133         case NDIGITLUTF8:
1134             PL_reg_flags |= RF_tainted;
1135             while (s < strend) {
1136                 if (!isDIGIT_LC_utf8((U8*)s)) {
1137                     if (tmp && regtry(prog, s))
1138                         goto got_it;
1139                     else
1140                         tmp = doevery;
1141                 }
1142                 else
1143                     tmp = 1;
1144                 s += UTF8SKIP(s);
1145             }
1146             break;
1147         case ALNUMC:
1148             while (s < strend) {
1149                 if (isALNUMC(*s)) {
1150                     if (tmp && regtry(prog, s))
1151                         goto got_it;
1152                     else
1153                         tmp = doevery;
1154                 }
1155                 else
1156                     tmp = 1;
1157                 s++;
1158             }
1159             break;
1160         case ALNUMCUTF8:
1161             while (s < strend) {
1162                 if (swash_fetch(PL_utf8_alnumc, (U8*)s)) {
1163                     if (tmp && regtry(prog, s))
1164                         goto got_it;
1165                     else
1166                         tmp = doevery;
1167                 }
1168                 else
1169                     tmp = 1;
1170                 s += UTF8SKIP(s);
1171             }
1172             break;
1173         case ALNUMCL:
1174             PL_reg_flags |= RF_tainted;
1175             while (s < strend) {
1176                 if (isALNUMC_LC(*s)) {
1177                     if (tmp && regtry(prog, s))
1178                         goto got_it;
1179                     else
1180                         tmp = doevery;
1181                 }
1182                 else
1183                     tmp = 1;
1184                 s++;
1185             }
1186             break;
1187         case ALNUMCLUTF8:
1188             PL_reg_flags |= RF_tainted;
1189             while (s < strend) {
1190                 if (isALNUMC_LC_utf8((U8*)s)) {
1191                     if (tmp && regtry(prog, s))
1192                         goto got_it;
1193                     else
1194                         tmp = doevery;
1195                 }
1196                 else
1197                     tmp = 1;
1198                 s += UTF8SKIP(s);
1199             }
1200             break;
1201         case NALNUMC:
1202             while (s < strend) {
1203                 if (!isALNUMC(*s)) {
1204                     if (tmp && regtry(prog, s))
1205                         goto got_it;
1206                     else
1207                         tmp = doevery;
1208                 }
1209                 else
1210                     tmp = 1;
1211                 s++;
1212             }
1213             break;
1214         case NALNUMCUTF8:
1215             while (s < strend) {
1216                 if (!swash_fetch(PL_utf8_alnumc, (U8*)s)) {
1217                     if (tmp && regtry(prog, s))
1218                         goto got_it;
1219                     else
1220                         tmp = doevery;
1221                 }
1222                 else
1223                     tmp = 1;
1224                 s += UTF8SKIP(s);
1225             }
1226             break;
1227         case NALNUMCL:
1228             PL_reg_flags |= RF_tainted;
1229             while (s < strend) {
1230                 if (!isALNUMC_LC(*s)) {
1231                     if (tmp && regtry(prog, s))
1232                         goto got_it;
1233                     else
1234                         tmp = doevery;
1235                 }
1236                 else
1237                     tmp = 1;
1238                 s++;
1239             }
1240             break;
1241         case NALNUMCLUTF8:
1242             PL_reg_flags |= RF_tainted;
1243             while (s < strend) {
1244                 if (!isALNUMC_LC_utf8((U8*)s)) {
1245                     if (tmp && regtry(prog, s))
1246                         goto got_it;
1247                     else
1248                         tmp = doevery;
1249                 }
1250                 else
1251                     tmp = 1;
1252                 s += UTF8SKIP(s);
1253             }
1254             break;
1255         case ASCII:
1256             while (s < strend) {
1257                 if (isASCII(*s)) {
1258                     if (tmp && regtry(prog, s))
1259                         goto got_it;
1260                     else
1261                         tmp = doevery;
1262                 }
1263                 else
1264                     tmp = 1;
1265                 s++;
1266             }
1267             break;
1268         case NASCII:
1269             while (s < strend) {
1270                 if (!isASCII(*s)) {
1271                     if (tmp && regtry(prog, s))
1272                         goto got_it;
1273                     else
1274                         tmp = doevery;
1275                 }
1276                 else
1277                     tmp = 1;
1278                 s++;
1279             }
1280             break;
1281         case CNTRL:
1282             while (s < strend) {
1283                 if (isCNTRL(*s)) {
1284                     if (tmp && regtry(prog, s))
1285                         goto got_it;
1286                     else
1287                         tmp = doevery;
1288                 }
1289                 else
1290                     tmp = 1;
1291                 s++;
1292             }
1293             break;
1294         case CNTRLUTF8:
1295             while (s < strend) {
1296                 if (swash_fetch(PL_utf8_cntrl,(U8*)s)) {
1297                     if (tmp && regtry(prog, s))
1298                         goto got_it;
1299                     else
1300                         tmp = doevery;
1301                 }
1302                 else
1303                     tmp = 1;
1304                 s += UTF8SKIP(s);
1305             }
1306             break;
1307         case CNTRLL:
1308             PL_reg_flags |= RF_tainted;
1309             while (s < strend) {
1310                 if (isCNTRL_LC(*s)) {
1311                     if (tmp && regtry(prog, s))
1312                         goto got_it;
1313                     else
1314                         tmp = doevery;
1315                 }
1316                 else
1317                     tmp = 1;
1318                 s++;
1319             }
1320             break;
1321         case CNTRLLUTF8:
1322             PL_reg_flags |= RF_tainted;
1323             while (s < strend) {
1324                 if (*s == ' ' || isCNTRL_LC_utf8((U8*)s)) {
1325                     if (tmp && regtry(prog, s))
1326                         goto got_it;
1327                     else
1328                         tmp = doevery;
1329                 }
1330                 else
1331                     tmp = 1;
1332                 s += UTF8SKIP(s);
1333             }
1334             break;
1335         case NCNTRL:
1336             while (s < strend) {
1337                 if (!isCNTRL(*s)) {
1338                     if (tmp && regtry(prog, s))
1339                         goto got_it;
1340                     else
1341                         tmp = doevery;
1342                 }
1343                 else
1344                     tmp = 1;
1345                 s++;
1346             }
1347             break;
1348         case NCNTRLUTF8:
1349             while (s < strend) {
1350                 if (!swash_fetch(PL_utf8_cntrl,(U8*)s)) {
1351                     if (tmp && regtry(prog, s))
1352                         goto got_it;
1353                     else
1354                         tmp = doevery;
1355                 }
1356                 else
1357                     tmp = 1;
1358                 s += UTF8SKIP(s);
1359             }
1360             break;
1361         case NCNTRLL:
1362             PL_reg_flags |= RF_tainted;
1363             while (s < strend) {
1364                 if (!isCNTRL_LC(*s)) {
1365                     if (tmp && regtry(prog, s))
1366                         goto got_it;
1367                     else
1368                         tmp = doevery;
1369                 }
1370                 else
1371                     tmp = 1;
1372                 s++;
1373             }
1374             break;
1375         case NCNTRLLUTF8:
1376             PL_reg_flags |= RF_tainted;
1377             while (s < strend) {
1378                 if (!isCNTRL_LC_utf8((U8*)s)) {
1379                     if (tmp && regtry(prog, s))
1380                         goto got_it;
1381                     else
1382                         tmp = doevery;
1383                 }
1384                 else
1385                     tmp = 1;
1386                 s += UTF8SKIP(s);
1387             }
1388             break;
1389         case GRAPH:
1390             while (s < strend) {
1391                 if (isGRAPH(*s)) {
1392                     if (tmp && regtry(prog, s))
1393                         goto got_it;
1394                     else
1395                         tmp = doevery;
1396                 }
1397                 else
1398                     tmp = 1;
1399                 s++;
1400             }
1401             break;
1402         case GRAPHUTF8:
1403             while (s < strend) {
1404                 if (swash_fetch(PL_utf8_graph,(U8*)s)) {
1405                     if (tmp && regtry(prog, s))
1406                         goto got_it;
1407                     else
1408                         tmp = doevery;
1409                 }
1410                 else
1411                     tmp = 1;
1412                 s += UTF8SKIP(s);
1413             }
1414             break;
1415         case GRAPHL:
1416             PL_reg_flags |= RF_tainted;
1417             while (s < strend) {
1418                 if (isGRAPH_LC(*s)) {
1419                     if (tmp && regtry(prog, s))
1420                         goto got_it;
1421                     else
1422                         tmp = doevery;
1423                 }
1424                 else
1425                     tmp = 1;
1426                 s++;
1427             }
1428             break;
1429         case GRAPHLUTF8:
1430             PL_reg_flags |= RF_tainted;
1431             while (s < strend) {
1432                 if (*s == ' ' || isGRAPH_LC_utf8((U8*)s)) {
1433                     if (tmp && regtry(prog, s))
1434                         goto got_it;
1435                     else
1436                         tmp = doevery;
1437                 }
1438                 else
1439                     tmp = 1;
1440                 s += UTF8SKIP(s);
1441             }
1442             break;
1443         case NGRAPH:
1444             while (s < strend) {
1445                 if (!isGRAPH(*s)) {
1446                     if (tmp && regtry(prog, s))
1447                         goto got_it;
1448                     else
1449                         tmp = doevery;
1450                 }
1451                 else
1452                     tmp = 1;
1453                 s++;
1454             }
1455             break;
1456         case NGRAPHUTF8:
1457             while (s < strend) {
1458                 if (!swash_fetch(PL_utf8_graph,(U8*)s)) {
1459                     if (tmp && regtry(prog, s))
1460                         goto got_it;
1461                     else
1462                         tmp = doevery;
1463                 }
1464                 else
1465                     tmp = 1;
1466                 s += UTF8SKIP(s);
1467             }
1468             break;
1469         case NGRAPHL:
1470             PL_reg_flags |= RF_tainted;
1471             while (s < strend) {
1472                 if (!isGRAPH_LC(*s)) {
1473                     if (tmp && regtry(prog, s))
1474                         goto got_it;
1475                     else
1476                         tmp = doevery;
1477                 }
1478                 else
1479                     tmp = 1;
1480                 s++;
1481             }
1482             break;
1483         case NGRAPHLUTF8:
1484             PL_reg_flags |= RF_tainted;
1485             while (s < strend) {
1486                 if (!isGRAPH_LC_utf8((U8*)s)) {
1487                     if (tmp && regtry(prog, s))
1488                         goto got_it;
1489                     else
1490                         tmp = doevery;
1491                 }
1492                 else
1493                     tmp = 1;
1494                 s += UTF8SKIP(s);
1495             }
1496             break;
1497         case LOWER:
1498             while (s < strend) {
1499                 if (isLOWER(*s)) {
1500                     if (tmp && regtry(prog, s))
1501                         goto got_it;
1502                     else
1503                         tmp = doevery;
1504                 }
1505                 else
1506                     tmp = 1;
1507                 s++;
1508             }
1509             break;
1510         case LOWERUTF8:
1511             while (s < strend) {
1512                 if (swash_fetch(PL_utf8_lower,(U8*)s)) {
1513                     if (tmp && regtry(prog, s))
1514                         goto got_it;
1515                     else
1516                         tmp = doevery;
1517                 }
1518                 else
1519                     tmp = 1;
1520                 s += UTF8SKIP(s);
1521             }
1522             break;
1523         case LOWERL:
1524             PL_reg_flags |= RF_tainted;
1525             while (s < strend) {
1526                 if (isLOWER_LC(*s)) {
1527                     if (tmp && regtry(prog, s))
1528                         goto got_it;
1529                     else
1530                         tmp = doevery;
1531                 }
1532                 else
1533                     tmp = 1;
1534                 s++;
1535             }
1536             break;
1537         case LOWERLUTF8:
1538             PL_reg_flags |= RF_tainted;
1539             while (s < strend) {
1540                 if (*s == ' ' || isLOWER_LC_utf8((U8*)s)) {
1541                     if (tmp && regtry(prog, s))
1542                         goto got_it;
1543                     else
1544                         tmp = doevery;
1545                 }
1546                 else
1547                     tmp = 1;
1548                 s += UTF8SKIP(s);
1549             }
1550             break;
1551         case NLOWER:
1552             while (s < strend) {
1553                 if (!isLOWER(*s)) {
1554                     if (tmp && regtry(prog, s))
1555                         goto got_it;
1556                     else
1557                         tmp = doevery;
1558                 }
1559                 else
1560                     tmp = 1;
1561                 s++;
1562             }
1563             break;
1564         case NLOWERUTF8:
1565             while (s < strend) {
1566                 if (!swash_fetch(PL_utf8_lower,(U8*)s)) {
1567                     if (tmp && regtry(prog, s))
1568                         goto got_it;
1569                     else
1570                         tmp = doevery;
1571                 }
1572                 else
1573                     tmp = 1;
1574                 s += UTF8SKIP(s);
1575             }
1576             break;
1577         case NLOWERL:
1578             PL_reg_flags |= RF_tainted;
1579             while (s < strend) {
1580                 if (!isLOWER_LC(*s)) {
1581                     if (tmp && regtry(prog, s))
1582                         goto got_it;
1583                     else
1584                         tmp = doevery;
1585                 }
1586                 else
1587                     tmp = 1;
1588                 s++;
1589             }
1590             break;
1591         case NLOWERLUTF8:
1592             PL_reg_flags |= RF_tainted;
1593             while (s < strend) {
1594                 if (!isLOWER_LC_utf8((U8*)s)) {
1595                     if (tmp && regtry(prog, s))
1596                         goto got_it;
1597                     else
1598                         tmp = doevery;
1599                 }
1600                 else
1601                     tmp = 1;
1602                 s += UTF8SKIP(s);
1603             }
1604             break;
1605         case PRINT:
1606             while (s < strend) {
1607                 if (isPRINT(*s)) {
1608                     if (tmp && regtry(prog, s))
1609                         goto got_it;
1610                     else
1611                         tmp = doevery;
1612                 }
1613                 else
1614                     tmp = 1;
1615                 s++;
1616             }
1617             break;
1618         case PRINTUTF8:
1619             while (s < strend) {
1620                 if (swash_fetch(PL_utf8_print,(U8*)s)) {
1621                     if (tmp && regtry(prog, s))
1622                         goto got_it;
1623                     else
1624                         tmp = doevery;
1625                 }
1626                 else
1627                     tmp = 1;
1628                 s += UTF8SKIP(s);
1629             }
1630             break;
1631         case PRINTL:
1632             PL_reg_flags |= RF_tainted;
1633             while (s < strend) {
1634                 if (isPRINT_LC(*s)) {
1635                     if (tmp && regtry(prog, s))
1636                         goto got_it;
1637                     else
1638                         tmp = doevery;
1639                 }
1640                 else
1641                     tmp = 1;
1642                 s++;
1643             }
1644             break;
1645         case PRINTLUTF8:
1646             PL_reg_flags |= RF_tainted;
1647             while (s < strend) {
1648                 if (*s == ' ' || isPRINT_LC_utf8((U8*)s)) {
1649                     if (tmp && regtry(prog, s))
1650                         goto got_it;
1651                     else
1652                         tmp = doevery;
1653                 }
1654                 else
1655                     tmp = 1;
1656                 s += UTF8SKIP(s);
1657             }
1658             break;
1659         case NPRINT:
1660             while (s < strend) {
1661                 if (!isPRINT(*s)) {
1662                     if (tmp && regtry(prog, s))
1663                         goto got_it;
1664                     else
1665                         tmp = doevery;
1666                 }
1667                 else
1668                     tmp = 1;
1669                 s++;
1670             }
1671             break;
1672         case NPRINTUTF8:
1673             while (s < strend) {
1674                 if (!swash_fetch(PL_utf8_print,(U8*)s)) {
1675                     if (tmp && regtry(prog, s))
1676                         goto got_it;
1677                     else
1678                         tmp = doevery;
1679                 }
1680                 else
1681                     tmp = 1;
1682                 s += UTF8SKIP(s);
1683             }
1684             break;
1685         case NPRINTL:
1686             PL_reg_flags |= RF_tainted;
1687             while (s < strend) {
1688                 if (!isPRINT_LC(*s)) {
1689                     if (tmp && regtry(prog, s))
1690                         goto got_it;
1691                     else
1692                         tmp = doevery;
1693                 }
1694                 else
1695                     tmp = 1;
1696                 s++;
1697             }
1698             break;
1699         case NPRINTLUTF8:
1700             PL_reg_flags |= RF_tainted;
1701             while (s < strend) {
1702                 if (!isPRINT_LC_utf8((U8*)s)) {
1703                     if (tmp && regtry(prog, s))
1704                         goto got_it;
1705                     else
1706                         tmp = doevery;
1707                 }
1708                 else
1709                     tmp = 1;
1710                 s += UTF8SKIP(s);
1711             }
1712             break;
1713         case PUNCT:
1714             while (s < strend) {
1715                 if (isPUNCT(*s)) {
1716                     if (tmp && regtry(prog, s))
1717                         goto got_it;
1718                     else
1719                         tmp = doevery;
1720                 }
1721                 else
1722                     tmp = 1;
1723                 s++;
1724             }
1725             break;
1726         case PUNCTUTF8:
1727             while (s < strend) {
1728                 if (swash_fetch(PL_utf8_punct,(U8*)s)) {
1729                     if (tmp && regtry(prog, s))
1730                         goto got_it;
1731                     else
1732                         tmp = doevery;
1733                 }
1734                 else
1735                     tmp = 1;
1736                 s += UTF8SKIP(s);
1737             }
1738             break;
1739         case PUNCTL:
1740             PL_reg_flags |= RF_tainted;
1741             while (s < strend) {
1742                 if (isPUNCT_LC(*s)) {
1743                     if (tmp && regtry(prog, s))
1744                         goto got_it;
1745                     else
1746                         tmp = doevery;
1747                 }
1748                 else
1749                     tmp = 1;
1750                 s++;
1751             }
1752             break;
1753         case PUNCTLUTF8:
1754             PL_reg_flags |= RF_tainted;
1755             while (s < strend) {
1756                 if (*s == ' ' || isPUNCT_LC_utf8((U8*)s)) {
1757                     if (tmp && regtry(prog, s))
1758                         goto got_it;
1759                     else
1760                         tmp = doevery;
1761                 }
1762                 else
1763                     tmp = 1;
1764                 s += UTF8SKIP(s);
1765             }
1766             break;
1767         case NPUNCT:
1768             while (s < strend) {
1769                 if (!isPUNCT(*s)) {
1770                     if (tmp && regtry(prog, s))
1771                         goto got_it;
1772                     else
1773                         tmp = doevery;
1774                 }
1775                 else
1776                     tmp = 1;
1777                 s++;
1778             }
1779             break;
1780         case NPUNCTUTF8:
1781             while (s < strend) {
1782                 if (!swash_fetch(PL_utf8_punct,(U8*)s)) {
1783                     if (tmp && regtry(prog, s))
1784                         goto got_it;
1785                     else
1786                         tmp = doevery;
1787                 }
1788                 else
1789                     tmp = 1;
1790                 s += UTF8SKIP(s);
1791             }
1792             break;
1793         case NPUNCTL:
1794             PL_reg_flags |= RF_tainted;
1795             while (s < strend) {
1796                 if (!isPUNCT_LC(*s)) {
1797                     if (tmp && regtry(prog, s))
1798                         goto got_it;
1799                     else
1800                         tmp = doevery;
1801                 }
1802                 else
1803                     tmp = 1;
1804                 s++;
1805             }
1806             break;
1807         case NPUNCTLUTF8:
1808             PL_reg_flags |= RF_tainted;
1809             while (s < strend) {
1810                 if (!isPUNCT_LC_utf8((U8*)s)) {
1811                     if (tmp && regtry(prog, s))
1812                         goto got_it;
1813                     else
1814                         tmp = doevery;
1815                 }
1816                 else
1817                     tmp = 1;
1818                 s += UTF8SKIP(s);
1819             }
1820             break;
1821         case UPPER:
1822             while (s < strend) {
1823                 if (isUPPER(*s)) {
1824                     if (tmp && regtry(prog, s))
1825                         goto got_it;
1826                     else
1827                         tmp = doevery;
1828                 }
1829                 else
1830                     tmp = 1;
1831                 s++;
1832             }
1833             break;
1834         case UPPERUTF8:
1835             while (s < strend) {
1836                 if (swash_fetch(PL_utf8_upper,(U8*)s)) {
1837                     if (tmp && regtry(prog, s))
1838                         goto got_it;
1839                     else
1840                         tmp = doevery;
1841                 }
1842                 else
1843                     tmp = 1;
1844                 s += UTF8SKIP(s);
1845             }
1846             break;
1847         case UPPERL:
1848             PL_reg_flags |= RF_tainted;
1849             while (s < strend) {
1850                 if (isUPPER_LC(*s)) {
1851                     if (tmp && regtry(prog, s))
1852                         goto got_it;
1853                     else
1854                         tmp = doevery;
1855                 }
1856                 else
1857                     tmp = 1;
1858                 s++;
1859             }
1860             break;
1861         case UPPERLUTF8:
1862             PL_reg_flags |= RF_tainted;
1863             while (s < strend) {
1864                 if (*s == ' ' || isUPPER_LC_utf8((U8*)s)) {
1865                     if (tmp && regtry(prog, s))
1866                         goto got_it;
1867                     else
1868                         tmp = doevery;
1869                 }
1870                 else
1871                     tmp = 1;
1872                 s += UTF8SKIP(s);
1873             }
1874             break;
1875         case NUPPER:
1876             while (s < strend) {
1877                 if (!isUPPER(*s)) {
1878                     if (tmp && regtry(prog, s))
1879                         goto got_it;
1880                     else
1881                         tmp = doevery;
1882                 }
1883                 else
1884                     tmp = 1;
1885                 s++;
1886             }
1887             break;
1888         case NUPPERUTF8:
1889             while (s < strend) {
1890                 if (!swash_fetch(PL_utf8_upper,(U8*)s)) {
1891                     if (tmp && regtry(prog, s))
1892                         goto got_it;
1893                     else
1894                         tmp = doevery;
1895                 }
1896                 else
1897                     tmp = 1;
1898                 s += UTF8SKIP(s);
1899             }
1900             break;
1901         case NUPPERL:
1902             PL_reg_flags |= RF_tainted;
1903             while (s < strend) {
1904                 if (!isUPPER_LC(*s)) {
1905                     if (tmp && regtry(prog, s))
1906                         goto got_it;
1907                     else
1908                         tmp = doevery;
1909                 }
1910                 else
1911                     tmp = 1;
1912                 s++;
1913             }
1914             break;
1915         case NUPPERLUTF8:
1916             PL_reg_flags |= RF_tainted;
1917             while (s < strend) {
1918                 if (!isUPPER_LC_utf8((U8*)s)) {
1919                     if (tmp && regtry(prog, s))
1920                         goto got_it;
1921                     else
1922                         tmp = doevery;
1923                 }
1924                 else
1925                     tmp = 1;
1926                 s += UTF8SKIP(s);
1927             }
1928             break;
1929         case XDIGIT:
1930             while (s < strend) {
1931                 if (isXDIGIT(*s)) {
1932                     if (tmp && regtry(prog, s))
1933                         goto got_it;
1934                     else
1935                         tmp = doevery;
1936                 }
1937                 else
1938                     tmp = 1;
1939                 s++;
1940             }
1941             break;
1942         case NXDIGIT:
1943             while (s < strend) {
1944                 if (!isXDIGIT(*s)) {
1945                     if (tmp && regtry(prog, s))
1946                         goto got_it;
1947                     else
1948                         tmp = doevery;
1949                 }
1950                 else
1951                     tmp = 1;
1952                 s++;
1953             }
1954             break;
1955         }
1956     }
1957     else {
1958         dontbother = 0;
1959         if (prog->float_substr != Nullsv) {     /* Trim the end. */
1960             char *last;
1961             I32 oldpos = scream_pos;
1962
1963             if (flags & REXEC_SCREAM) {
1964                 last = screaminstr(sv, prog->float_substr, s - strbeg,
1965                                    end_shift, &scream_pos, 1); /* last one */
1966                 if (!last)
1967                     last = scream_olds; /* Only one occurence. */
1968             }
1969             else {
1970                 STRLEN len;
1971                 char *little = SvPV(prog->float_substr, len);
1972
1973                 if (SvTAIL(prog->float_substr)) {
1974                     if (memEQ(strend - len + 1, little, len - 1))
1975                         last = strend - len + 1;
1976                     else if (!PL_multiline)
1977                         last = memEQ(strend - len, little, len) 
1978                             ? strend - len : Nullch;
1979                     else
1980                         goto find_last;
1981                 } else {
1982                   find_last:
1983                     if (len) 
1984                         last = rninstr(s, strend, little, little + len);
1985                     else
1986                         last = strend;  /* matching `$' */
1987                 }
1988             }
1989             if (last == NULL) goto phooey; /* Should not happen! */
1990             dontbother = strend - last + prog->float_min_offset;
1991         }
1992         if (minlen && (dontbother < minlen))
1993             dontbother = minlen - 1;
1994         strend -= dontbother;              /* this one's always in bytes! */
1995         /* We don't know much -- general case. */
1996         if (UTF) {
1997             for (;;) {
1998                 if (regtry(prog, s))
1999                     goto got_it;
2000                 if (s >= strend)
2001                     break;
2002                 s += UTF8SKIP(s);
2003             };
2004         }
2005         else {
2006             do {
2007                 if (regtry(prog, s))
2008                     goto got_it;
2009             } while (s++ < strend);
2010         }
2011     }
2012
2013     /* Failure. */
2014     goto phooey;
2015
2016 got_it:
2017     RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
2018
2019     if (PL_reg_eval_set) {
2020         /* Preserve the current value of $^R */
2021         if (oreplsv != GvSV(PL_replgv))
2022             sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
2023                                                   restored, the value remains
2024                                                   the same. */
2025         restore_pos(0);
2026     }
2027
2028     /* make sure $`, $&, $', and $digit will work later */
2029     if ( !(flags & REXEC_NOT_FIRST) ) {
2030         if (RX_MATCH_COPIED(prog)) {
2031             Safefree(prog->subbeg);
2032             RX_MATCH_COPIED_off(prog);
2033         }
2034         if (flags & REXEC_COPY_STR) {
2035             I32 i = PL_regeol - startpos + (stringarg - strbeg);
2036
2037             s = savepvn(strbeg, i);
2038             prog->subbeg = s;
2039             prog->sublen = i;
2040             RX_MATCH_COPIED_on(prog);
2041         }
2042         else {
2043             prog->subbeg = strbeg;
2044             prog->sublen = PL_regeol - strbeg;  /* strend may have been modified */
2045         }
2046     }
2047     
2048     return 1;
2049
2050 phooey:
2051     if (PL_reg_eval_set)
2052         restore_pos(0);
2053     return 0;
2054 }
2055
2056 /*
2057  - regtry - try match at specific point
2058  */
2059 STATIC I32                      /* 0 failure, 1 success */
2060 S_regtry(pTHX_ regexp *prog, char *startpos)
2061 {
2062     dTHR;
2063     register I32 i;
2064     register I32 *sp;
2065     register I32 *ep;
2066     CHECKPOINT lastcp;
2067
2068     if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
2069         MAGIC *mg;
2070
2071         PL_reg_eval_set = RS_init;
2072         DEBUG_r(DEBUG_s(
2073             PerlIO_printf(Perl_debug_log, "  setting stack tmpbase at %i\n",
2074                           PL_stack_sp - PL_stack_base);
2075             ));
2076         SAVEINT(cxstack[cxstack_ix].blk_oldsp);
2077         cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2078         /* Otherwise OP_NEXTSTATE will free whatever on stack now.  */
2079         SAVETMPS;
2080         /* Apparently this is not needed, judging by wantarray. */
2081         /* SAVEINT(cxstack[cxstack_ix].blk_gimme);
2082            cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2083
2084         if (PL_reg_sv) {
2085             /* Make $_ available to executed code. */
2086             if (PL_reg_sv != DEFSV) {
2087                 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
2088                 SAVESPTR(DEFSV);
2089                 DEFSV = PL_reg_sv;
2090             }
2091         
2092             if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv) 
2093                   && (mg = mg_find(PL_reg_sv, 'g')))) {
2094                 /* prepare for quick setting of pos */
2095                 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
2096                 mg = mg_find(PL_reg_sv, 'g');
2097                 mg->mg_len = -1;
2098             }
2099             PL_reg_magic    = mg;
2100             PL_reg_oldpos   = mg->mg_len;
2101             SAVEDESTRUCTOR(S_restore_pos, 0);
2102         }
2103         if (!PL_reg_curpm)
2104             New(22,PL_reg_curpm, 1, PMOP);
2105         PL_reg_curpm->op_pmregexp = prog;
2106         PL_reg_oldcurpm = PL_curpm;
2107         PL_curpm = PL_reg_curpm;
2108         if (RX_MATCH_COPIED(prog)) {
2109             /*  Here is a serious problem: we cannot rewrite subbeg,
2110                 since it may be needed if this match fails.  Thus
2111                 $` inside (?{}) could fail... */
2112             PL_reg_oldsaved = prog->subbeg;
2113             PL_reg_oldsavedlen = prog->sublen;
2114             RX_MATCH_COPIED_off(prog);
2115         }
2116         else
2117             PL_reg_oldsaved = Nullch;
2118         prog->subbeg = PL_bostr;
2119         prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2120     }
2121     prog->startp[0] = startpos - PL_bostr;
2122     PL_reginput = startpos;
2123     PL_regstartp = prog->startp;
2124     PL_regendp = prog->endp;
2125     PL_reglastparen = &prog->lastparen;
2126     prog->lastparen = 0;
2127     PL_regsize = 0;
2128     DEBUG_r(PL_reg_starttry = startpos);
2129     if (PL_reg_start_tmpl <= prog->nparens) {
2130         PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2131         if(PL_reg_start_tmp)
2132             Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2133         else
2134             New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2135     }
2136
2137     /* XXXX What this code is doing here?!!!  There should be no need
2138        to do this again and again, PL_reglastparen should take care of
2139        this!  */
2140     sp = prog->startp;
2141     ep = prog->endp;
2142     if (prog->nparens) {
2143         for (i = prog->nparens; i >= 1; i--) {
2144             *++sp = -1;
2145             *++ep = -1;
2146         }
2147     }
2148     REGCP_SET;
2149     if (regmatch(prog->program + 1)) {
2150         prog->endp[0] = PL_reginput - PL_bostr;
2151         return 1;
2152     }
2153     REGCP_UNWIND;
2154     return 0;
2155 }
2156
2157 /*
2158  - regmatch - main matching routine
2159  *
2160  * Conceptually the strategy is simple:  check to see whether the current
2161  * node matches, call self recursively to see whether the rest matches,
2162  * and then act accordingly.  In practice we make some effort to avoid
2163  * recursion, in particular by going through "ordinary" nodes (that don't
2164  * need to know whether the rest of the match failed) by a loop instead of
2165  * by recursion.
2166  */
2167 /* [lwall] I've hoisted the register declarations to the outer block in order to
2168  * maybe save a little bit of pushing and popping on the stack.  It also takes
2169  * advantage of machines that use a register save mask on subroutine entry.
2170  */
2171 STATIC I32                      /* 0 failure, 1 success */
2172 S_regmatch(pTHX_ regnode *prog)
2173 {
2174     dTHR;
2175     register regnode *scan;     /* Current node. */
2176     regnode *next;              /* Next node. */
2177     regnode *inner;             /* Next node in internal branch. */
2178     register I32 nextchr;       /* renamed nextchr - nextchar colides with
2179                                    function of same name */
2180     register I32 n;             /* no or next */
2181     register I32 ln;            /* len or last */
2182     register char *s;           /* operand or save */
2183     register char *locinput = PL_reginput;
2184     register I32 c1, c2, paren; /* case fold search, parenth */
2185     int minmod = 0, sw = 0, logical = 0;
2186 #ifdef DEBUGGING
2187     PL_regindent++;
2188 #endif
2189
2190     /* Note that nextchr is a byte even in UTF */
2191     nextchr = UCHARAT(locinput);
2192     scan = prog;
2193     while (scan != NULL) {
2194 #define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
2195 #ifdef DEBUGGING
2196 #  define sayYES goto yes
2197 #  define sayNO goto no
2198 #  define saySAME(x) if (x) goto yes; else goto no
2199 #  define REPORT_CODE_OFF 24
2200 #else
2201 #  define sayYES return 1
2202 #  define sayNO return 0
2203 #  define saySAME(x) return x
2204 #endif
2205         DEBUG_r( {
2206             SV *prop = sv_newmortal();
2207             int docolor = *PL_colors[0];
2208             int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
2209             int l = (PL_regeol - locinput > taill ? taill : PL_regeol - locinput);
2210             /* The part of the string before starttry has one color
2211                (pref0_len chars), between starttry and current
2212                position another one (pref_len - pref0_len chars),
2213                after the current position the third one.
2214                We assume that pref0_len <= pref_len, otherwise we
2215                decrease pref0_len.  */
2216             int pref_len = (locinput - PL_bostr > (5 + taill) - l 
2217                             ? (5 + taill) - l : locinput - PL_bostr);
2218             int pref0_len = pref_len  - (locinput - PL_reg_starttry);
2219
2220             if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
2221                 l = ( PL_regeol - locinput > (5 + taill) - pref_len 
2222                       ? (5 + taill) - pref_len : PL_regeol - locinput);
2223             if (pref0_len < 0)
2224                 pref0_len = 0;
2225             if (pref0_len > pref_len)
2226                 pref0_len = pref_len;
2227             regprop(prop, scan);
2228             PerlIO_printf(Perl_debug_log, 
2229                           "%4i <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3d:%*s%s\n",
2230                           locinput - PL_bostr, 
2231                           PL_colors[4], pref0_len, 
2232                           locinput - pref_len, PL_colors[5],
2233                           PL_colors[2], pref_len - pref0_len, 
2234                           locinput - pref_len + pref0_len, PL_colors[3],
2235                           (docolor ? "" : "> <"),
2236                           PL_colors[0], l, locinput, PL_colors[1],
2237                           15 - l - pref_len + 1,
2238                           "",
2239                           scan - PL_regprogram, PL_regindent*2, "",
2240                           SvPVX(prop));
2241         } );
2242
2243         next = scan + NEXT_OFF(scan);
2244         if (next == scan)
2245             next = NULL;
2246
2247         switch (OP(scan)) {
2248         case BOL:
2249             if (locinput == PL_bostr
2250                 ? PL_regprev == '\n'
2251                 : (PL_multiline && 
2252                    (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
2253             {
2254                 /* regtill = regbol; */
2255                 break;
2256             }
2257             sayNO;
2258         case MBOL:
2259             if (locinput == PL_bostr
2260                 ? PL_regprev == '\n'
2261                 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
2262             {
2263                 break;
2264             }
2265             sayNO;
2266         case SBOL:
2267             if (locinput == PL_regbol && PL_regprev == '\n')
2268                 break;
2269             sayNO;
2270         case GPOS:
2271             if (locinput == PL_reg_ganch)
2272                 break;
2273             sayNO;
2274         case EOL:
2275             if (PL_multiline)
2276                 goto meol;
2277             else
2278                 goto seol;
2279         case MEOL:
2280           meol:
2281             if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2282                 sayNO;
2283             break;
2284         case SEOL:
2285           seol:
2286             if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2287                 sayNO;
2288             if (PL_regeol - locinput > 1)
2289                 sayNO;
2290             break;
2291         case EOS:
2292             if (PL_regeol != locinput)
2293                 sayNO;
2294             break;
2295         case SANYUTF8:
2296             if (nextchr & 0x80) {
2297                 locinput += PL_utf8skip[nextchr];
2298                 if (locinput > PL_regeol)
2299                     sayNO;
2300                 nextchr = UCHARAT(locinput);
2301                 break;
2302             }
2303             if (!nextchr && locinput >= PL_regeol)
2304                 sayNO;
2305             nextchr = UCHARAT(++locinput);
2306             break;
2307         case SANY:
2308             if (!nextchr && locinput >= PL_regeol)
2309                 sayNO;
2310             nextchr = UCHARAT(++locinput);
2311             break;
2312         case ANYUTF8:
2313             if (nextchr & 0x80) {
2314                 locinput += PL_utf8skip[nextchr];
2315                 if (locinput > PL_regeol)
2316                     sayNO;
2317                 nextchr = UCHARAT(locinput);
2318                 break;
2319             }
2320             if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
2321                 sayNO;
2322             nextchr = UCHARAT(++locinput);
2323             break;
2324         case REG_ANY:
2325             if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
2326                 sayNO;
2327             nextchr = UCHARAT(++locinput);
2328             break;
2329         case EXACT:
2330             s = (char *) OPERAND(scan);
2331             ln = UCHARAT(s++);
2332             /* Inline the first character, for speed. */
2333             if (UCHARAT(s) != nextchr)
2334                 sayNO;
2335             if (PL_regeol - locinput < ln)
2336                 sayNO;
2337             if (ln > 1 && memNE(s, locinput, ln))
2338                 sayNO;
2339             locinput += ln;
2340             nextchr = UCHARAT(locinput);
2341             break;
2342         case EXACTFL:
2343             PL_reg_flags |= RF_tainted;
2344             /* FALL THROUGH */
2345         case EXACTF:
2346             s = (char *) OPERAND(scan);
2347             ln = UCHARAT(s++);
2348
2349             if (UTF) {
2350                 char *l = locinput;
2351                 char *e = s + ln;
2352                 c1 = OP(scan) == EXACTF;
2353                 while (s < e) {
2354                     if (l >= PL_regeol)
2355                         sayNO;
2356                     if (utf8_to_uv((U8*)s, 0) != (c1 ?
2357                                                   toLOWER_utf8((U8*)l) :
2358                                                   toLOWER_LC_utf8((U8*)l)))
2359                     {
2360                         sayNO;
2361                     }
2362                     s += UTF8SKIP(s);
2363                     l += UTF8SKIP(l);
2364                 }
2365                 locinput = l;
2366                 nextchr = UCHARAT(locinput);
2367                 break;
2368             }
2369
2370             /* Inline the first character, for speed. */
2371             if (UCHARAT(s) != nextchr &&
2372                 UCHARAT(s) != ((OP(scan) == EXACTF)
2373                                ? PL_fold : PL_fold_locale)[nextchr])
2374                 sayNO;
2375             if (PL_regeol - locinput < ln)
2376                 sayNO;
2377             if (ln > 1 && (OP(scan) == EXACTF
2378                            ? ibcmp(s, locinput, ln)
2379                            : ibcmp_locale(s, locinput, ln)))
2380                 sayNO;
2381             locinput += ln;
2382             nextchr = UCHARAT(locinput);
2383             break;
2384         case ANYOFUTF8:
2385             s = (char *) OPERAND(scan);
2386             if (!REGINCLASSUTF8(scan, (U8*)locinput))
2387                 sayNO;
2388             if (locinput >= PL_regeol)
2389                 sayNO;
2390             locinput += PL_utf8skip[nextchr];
2391             nextchr = UCHARAT(locinput);
2392             break;
2393         case ANYOF:
2394             s = (char *) OPERAND(scan);
2395             if (nextchr < 0)
2396                 nextchr = UCHARAT(locinput);
2397             if (!REGINCLASS(s, nextchr))
2398                 sayNO;
2399             if (!nextchr && locinput >= PL_regeol)
2400                 sayNO;
2401             nextchr = UCHARAT(++locinput);
2402             break;
2403         case ALNUML:
2404             PL_reg_flags |= RF_tainted;
2405             /* FALL THROUGH */
2406         case ALNUM:
2407             if (!nextchr)
2408                 sayNO;
2409             if (!(OP(scan) == ALNUM
2410                   ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
2411                 sayNO;
2412             nextchr = UCHARAT(++locinput);
2413             break;
2414         case ALNUMLUTF8:
2415             PL_reg_flags |= RF_tainted;
2416             /* FALL THROUGH */
2417         case ALNUMUTF8:
2418             if (!nextchr)
2419                 sayNO;
2420             if (nextchr & 0x80) {
2421                 if (!(OP(scan) == ALNUMUTF8
2422                       ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2423                       : isALNUM_LC_utf8((U8*)locinput)))
2424                 {
2425                     sayNO;
2426                 }
2427                 locinput += PL_utf8skip[nextchr];
2428                 nextchr = UCHARAT(locinput);
2429                 break;
2430             }
2431             if (!(OP(scan) == ALNUMUTF8
2432                   ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
2433                 sayNO;
2434             nextchr = UCHARAT(++locinput);
2435             break;
2436         case NALNUML:
2437             PL_reg_flags |= RF_tainted;
2438             /* FALL THROUGH */
2439         case NALNUM:
2440             if (!nextchr && locinput >= PL_regeol)
2441                 sayNO;
2442             if (OP(scan) == NALNUM
2443                 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
2444                 sayNO;
2445             nextchr = UCHARAT(++locinput);
2446             break;
2447         case NALNUMLUTF8:
2448             PL_reg_flags |= RF_tainted;
2449             /* FALL THROUGH */
2450         case NALNUMUTF8:
2451             if (!nextchr && locinput >= PL_regeol)
2452                 sayNO;
2453             if (nextchr & 0x80) {
2454                 if (OP(scan) == NALNUMUTF8
2455                     ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2456                     : isALNUM_LC_utf8((U8*)locinput))
2457                 {
2458                     sayNO;
2459                 }
2460                 locinput += PL_utf8skip[nextchr];
2461                 nextchr = UCHARAT(locinput);
2462                 break;
2463             }
2464             if (OP(scan) == NALNUMUTF8
2465                 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
2466                 sayNO;
2467             nextchr = UCHARAT(++locinput);
2468             break;
2469         case BOUNDL:
2470         case NBOUNDL:
2471             PL_reg_flags |= RF_tainted;
2472             /* FALL THROUGH */
2473         case BOUND:
2474         case NBOUND:
2475             /* was last char in word? */
2476             ln = (locinput != PL_regbol) ? UCHARAT(locinput - 1) : PL_regprev;
2477             if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2478                 ln = isALNUM(ln);
2479                 n = isALNUM(nextchr);
2480             }
2481             else {
2482                 ln = isALNUM_LC(ln);
2483                 n = isALNUM_LC(nextchr);
2484             }
2485             if (((!ln) == (!n)) == (OP(scan) == BOUND || OP(scan) == BOUNDL))
2486                 sayNO;
2487             break;
2488         case BOUNDLUTF8:
2489         case NBOUNDLUTF8:
2490             PL_reg_flags |= RF_tainted;
2491             /* FALL THROUGH */
2492         case BOUNDUTF8:
2493         case NBOUNDUTF8:
2494             /* was last char in word? */
2495             ln = (locinput != PL_regbol)
2496                 ? utf8_to_uv(reghop((U8*)locinput, -1), 0) : PL_regprev;
2497             if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) {
2498                 ln = isALNUM_uni(ln);
2499                 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
2500             }
2501             else {
2502                 ln = isALNUM_LC_uni(ln);
2503                 n = isALNUM_LC_utf8((U8*)locinput);
2504             }
2505             if (((!ln) == (!n)) == (OP(scan) == BOUNDUTF8 || OP(scan) == BOUNDLUTF8))
2506                 sayNO;
2507             break;
2508         case SPACEL:
2509             PL_reg_flags |= RF_tainted;
2510             /* FALL THROUGH */
2511         case SPACE:
2512             if (!nextchr && locinput >= PL_regeol)
2513                 sayNO;
2514             if (!(OP(scan) == SPACE
2515                   ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2516                 sayNO;
2517             nextchr = UCHARAT(++locinput);
2518             break;
2519         case SPACELUTF8:
2520             PL_reg_flags |= RF_tainted;
2521             /* FALL THROUGH */
2522         case SPACEUTF8:
2523             if (!nextchr && locinput >= PL_regeol)
2524                 sayNO;
2525             if (nextchr & 0x80) {
2526                 if (!(OP(scan) == SPACEUTF8
2527                       ? swash_fetch(PL_utf8_space,(U8*)locinput)
2528                       : isSPACE_LC_utf8((U8*)locinput)))
2529                 {
2530                     sayNO;
2531                 }
2532                 locinput += PL_utf8skip[nextchr];
2533                 nextchr = UCHARAT(locinput);
2534                 break;
2535             }
2536             if (!(OP(scan) == SPACEUTF8
2537                   ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2538                 sayNO;
2539             nextchr = UCHARAT(++locinput);
2540             break;
2541         case NSPACEL:
2542             PL_reg_flags |= RF_tainted;
2543             /* FALL THROUGH */
2544         case NSPACE:
2545             if (!nextchr)
2546                 sayNO;
2547             if (OP(scan) == SPACE
2548                 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
2549                 sayNO;
2550             nextchr = UCHARAT(++locinput);
2551             break;
2552         case NSPACELUTF8:
2553             PL_reg_flags |= RF_tainted;
2554             /* FALL THROUGH */
2555         case NSPACEUTF8:
2556             if (!nextchr)
2557                 sayNO;
2558             if (nextchr & 0x80) {
2559                 if (OP(scan) == NSPACEUTF8
2560                     ? swash_fetch(PL_utf8_space,(U8*)locinput)
2561                     : isSPACE_LC_utf8((U8*)locinput))
2562                 {
2563                     sayNO;
2564                 }
2565                 locinput += PL_utf8skip[nextchr];
2566                 nextchr = UCHARAT(locinput);
2567                 break;
2568             }
2569             if (OP(scan) == NSPACEUTF8
2570                 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
2571                 sayNO;
2572             nextchr = UCHARAT(++locinput);
2573             break;
2574         case DIGITL:
2575             PL_reg_flags |= RF_tainted;
2576             /* FALL THROUGH */
2577         case DIGIT:
2578             if (!nextchr && locinput >= PL_regeol)
2579                 sayNO;
2580             if (!(OP(scan) == DIGIT
2581                   ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
2582                 sayNO;
2583             nextchr = UCHARAT(++locinput);
2584             break;
2585         case DIGITLUTF8:
2586             PL_reg_flags |= RF_tainted;
2587             /* FALL THROUGH */
2588         case DIGITUTF8:
2589             if (!nextchr)
2590                 sayNO;
2591             if (nextchr & 0x80) {
2592                 if (OP(scan) == NDIGITUTF8
2593                     ? swash_fetch(PL_utf8_digit,(U8*)locinput)
2594                     : isDIGIT_LC_utf8((U8*)locinput))
2595                 {
2596                     sayNO;
2597                 }
2598                 locinput += PL_utf8skip[nextchr];
2599                 nextchr = UCHARAT(locinput);
2600                 break;
2601             }
2602             if (!isDIGIT(nextchr))
2603                 sayNO;
2604             nextchr = UCHARAT(++locinput);
2605             break;
2606         case NDIGITL:
2607             PL_reg_flags |= RF_tainted;
2608             /* FALL THROUGH */
2609         case NDIGIT:
2610             if (!nextchr)
2611                 sayNO;
2612             if (OP(scan) == DIGIT
2613                 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
2614                 sayNO;
2615             nextchr = UCHARAT(++locinput);
2616             break;
2617         case NDIGITLUTF8:
2618             PL_reg_flags |= RF_tainted;
2619             /* FALL THROUGH */
2620         case NDIGITUTF8:
2621             if (!nextchr && locinput >= PL_regeol)
2622                 sayNO;
2623             if (nextchr & 0x80) {
2624                 if (swash_fetch(PL_utf8_digit,(U8*)locinput))
2625                     sayNO;
2626                 locinput += PL_utf8skip[nextchr];
2627                 nextchr = UCHARAT(locinput);
2628                 break;
2629             }
2630             if (isDIGIT(nextchr))
2631                 sayNO;
2632             nextchr = UCHARAT(++locinput);
2633             break;
2634         case ALNUMCL:
2635             PL_reg_flags |= RF_tainted;
2636             /* FALL THROUGH */
2637         case ALNUMC:
2638             if (!nextchr)
2639                 sayNO;
2640             if (!(OP(scan) == ALNUMC
2641                   ? isALNUMC(nextchr) : isALNUMC_LC(nextchr)))
2642                 sayNO;
2643             nextchr = UCHARAT(++locinput);
2644             break;
2645         case ALNUMCLUTF8:
2646             PL_reg_flags |= RF_tainted;
2647             /* FALL THROUGH */
2648         case ALNUMCUTF8:
2649             if (!nextchr)
2650                 sayNO;
2651             if (nextchr & 0x80) {
2652                 if (!(OP(scan) == ALNUMCUTF8
2653                       ? swash_fetch(PL_utf8_alnumc, (U8*)locinput)
2654                       : isALNUMC_LC_utf8((U8*)locinput)))
2655                 {
2656                     sayNO;
2657                 }
2658                 locinput += PL_utf8skip[nextchr];
2659                 nextchr = UCHARAT(locinput);
2660                 break;
2661             }
2662             if (!(OP(scan) == ALNUMCUTF8
2663                   ? isALNUMC(nextchr) : isALNUMC_LC(nextchr)))
2664                 sayNO;
2665             nextchr = UCHARAT(++locinput);
2666             break;
2667         case NALNUMCL:
2668             PL_reg_flags |= RF_tainted;
2669             /* FALL THROUGH */
2670         case NALNUMC:
2671             if (!nextchr)
2672                 sayNO;
2673             if (OP(scan) == ALNUMC
2674                 ? isALNUMC(nextchr) : isALNUMC_LC(nextchr))
2675                 sayNO;
2676             nextchr = UCHARAT(++locinput);
2677             break;
2678         case NALNUMCLUTF8:
2679             PL_reg_flags |= RF_tainted;
2680             /* FALL THROUGH */
2681         case NALNUMCUTF8:
2682             if (!nextchr && locinput >= PL_regeol)
2683                 sayNO;
2684             if (nextchr & 0x80) {
2685                 if (swash_fetch(PL_utf8_alnumc,(U8*)locinput))
2686                     sayNO;
2687                 locinput += PL_utf8skip[nextchr];
2688                 nextchr = UCHARAT(locinput);
2689                 break;
2690             }
2691             if (isALNUMC(nextchr))
2692                 sayNO;
2693             nextchr = UCHARAT(++locinput);
2694             break;
2695         case ALPHAL:
2696             PL_reg_flags |= RF_tainted;
2697             /* FALL THROUGH */
2698         case ALPHA:
2699             if (!nextchr)
2700                 sayNO;
2701             if (!(OP(scan) == ALPHA
2702                   ? isALPHA(nextchr) : isALPHA_LC(nextchr)))
2703                 sayNO;
2704             nextchr = UCHARAT(++locinput);
2705             break;
2706         case ALPHALUTF8:
2707             PL_reg_flags |= RF_tainted;
2708             /* FALL THROUGH */
2709         case ALPHAUTF8:
2710             if (!nextchr)
2711                 sayNO;
2712             if (nextchr & 0x80) {
2713                 if (!(OP(scan) == ALPHAUTF8
2714                       ? swash_fetch(PL_utf8_alpha, (U8*)locinput)
2715                       : isALPHA_LC_utf8((U8*)locinput)))
2716                 {
2717                     sayNO;
2718                 }
2719                 locinput += PL_utf8skip[nextchr];
2720                 nextchr = UCHARAT(locinput);
2721                 break;
2722             }
2723             if (!(OP(scan) == ALPHAUTF8
2724                   ? isALPHA(nextchr) : isALPHA_LC(nextchr)))
2725                 sayNO;
2726             nextchr = UCHARAT(++locinput);
2727             break;
2728         case NALPHAL:
2729             PL_reg_flags |= RF_tainted;
2730             /* FALL THROUGH */
2731         case NALPHA:
2732             if (!nextchr)
2733                 sayNO;
2734             if (OP(scan) == ALPHA
2735                 ? isALPHA(nextchr) : isALPHA_LC(nextchr))
2736                 sayNO;
2737             nextchr = UCHARAT(++locinput);
2738             break;
2739         case NALPHALUTF8:
2740             PL_reg_flags |= RF_tainted;
2741             /* FALL THROUGH */
2742         case NALPHAUTF8:
2743             if (!nextchr && locinput >= PL_regeol)
2744                 sayNO;
2745             if (nextchr & 0x80) {
2746                 if (swash_fetch(PL_utf8_alpha,(U8*)locinput))
2747                     sayNO;
2748                 locinput += PL_utf8skip[nextchr];
2749                 nextchr = UCHARAT(locinput);
2750                 break;
2751             }
2752             if (isALPHA(nextchr))
2753                 sayNO;
2754             nextchr = UCHARAT(++locinput);
2755             break;
2756         case ASCII:
2757             if (!nextchr && locinput >= PL_regeol)
2758                 sayNO;
2759             if (!isASCII(nextchr))
2760                 sayNO;
2761             nextchr = UCHARAT(++locinput);
2762             break;
2763         case NASCII:
2764             if (!nextchr && locinput >= PL_regeol)
2765                 sayNO;
2766             if (isASCII(nextchr))
2767                 sayNO;
2768             nextchr = UCHARAT(++locinput);
2769             break;
2770         case CNTRLL:
2771             PL_reg_flags |= RF_tainted;
2772             /* FALL THROUGH */
2773         case CNTRL:
2774             if (!nextchr)
2775                 sayNO;
2776             if (!(OP(scan) == CNTRL
2777                   ? isCNTRL(nextchr) : isCNTRL_LC(nextchr)))
2778                 sayNO;
2779             nextchr = UCHARAT(++locinput);
2780             break;
2781         case CNTRLLUTF8:
2782             PL_reg_flags |= RF_tainted;
2783             /* FALL THROUGH */
2784         case CNTRLUTF8:
2785             if (!nextchr)
2786                 sayNO;
2787             if (nextchr & 0x80) {
2788                 if (!(OP(scan) == CNTRLUTF8
2789                       ? swash_fetch(PL_utf8_cntrl, (U8*)locinput)
2790                       : isCNTRL_LC_utf8((U8*)locinput)))
2791                 {
2792                     sayNO;
2793                 }
2794                 locinput += PL_utf8skip[nextchr];
2795                 nextchr = UCHARAT(locinput);
2796                 break;
2797             }
2798             if (!(OP(scan) == CNTRLUTF8
2799                   ? isCNTRL(nextchr) : isCNTRL_LC(nextchr)))
2800                 sayNO;
2801             nextchr = UCHARAT(++locinput);
2802             break;
2803         case NCNTRLL:
2804             PL_reg_flags |= RF_tainted;
2805             /* FALL THROUGH */
2806         case NCNTRL:
2807             if (!nextchr)
2808                 sayNO;
2809             if (OP(scan) == CNTRL
2810                 ? isCNTRL(nextchr) : isCNTRL_LC(nextchr))
2811                 sayNO;
2812             nextchr = UCHARAT(++locinput);
2813             break;
2814         case NCNTRLLUTF8:
2815             PL_reg_flags |= RF_tainted;
2816             /* FALL THROUGH */
2817         case NCNTRLUTF8:
2818             if (!nextchr && locinput >= PL_regeol)
2819                 sayNO;
2820             if (nextchr & 0x80) {
2821                 if (swash_fetch(PL_utf8_cntrl,(U8*)locinput))
2822                     sayNO;
2823                 locinput += PL_utf8skip[nextchr];
2824                 nextchr = UCHARAT(locinput);
2825                 break;
2826             }
2827             if (isCNTRL(nextchr))
2828                 sayNO;
2829             nextchr = UCHARAT(++locinput);
2830             break;
2831         case GRAPHL:
2832             PL_reg_flags |= RF_tainted;
2833             /* FALL THROUGH */
2834         case GRAPH:
2835             if (!nextchr)
2836                 sayNO;
2837             if (!(OP(scan) == GRAPH
2838                   ? isGRAPH(nextchr) : isGRAPH_LC(nextchr)))
2839                 sayNO;
2840             nextchr = UCHARAT(++locinput);
2841             break;
2842         case GRAPHLUTF8:
2843             PL_reg_flags |= RF_tainted;
2844             /* FALL THROUGH */
2845         case GRAPHUTF8:
2846             if (!nextchr)
2847                 sayNO;
2848             if (nextchr & 0x80) {
2849                 if (!(OP(scan) == GRAPHUTF8
2850                       ? swash_fetch(PL_utf8_graph, (U8*)locinput)
2851                       : isGRAPH_LC_utf8((U8*)locinput)))
2852                 {
2853                     sayNO;
2854                 }
2855                 locinput += PL_utf8skip[nextchr];
2856                 nextchr = UCHARAT(locinput);
2857                 break;
2858             }
2859             if (!(OP(scan) == GRAPHUTF8
2860                   ? isGRAPH(nextchr) : isGRAPH_LC(nextchr)))
2861                 sayNO;
2862             nextchr = UCHARAT(++locinput);
2863             break;
2864         case NGRAPHL:
2865             PL_reg_flags |= RF_tainted;
2866             /* FALL THROUGH */
2867         case NGRAPH:
2868             if (!nextchr)
2869                 sayNO;
2870             if (OP(scan) == GRAPH
2871                 ? isGRAPH(nextchr) : isGRAPH_LC(nextchr))
2872                 sayNO;
2873             nextchr = UCHARAT(++locinput);
2874             break;
2875         case NGRAPHLUTF8:
2876             PL_reg_flags |= RF_tainted;
2877             /* FALL THROUGH */
2878         case NGRAPHUTF8:
2879             if (!nextchr && locinput >= PL_regeol)
2880                 sayNO;
2881             if (nextchr & 0x80) {
2882                 if (swash_fetch(PL_utf8_graph,(U8*)locinput))
2883                     sayNO;
2884                 locinput += PL_utf8skip[nextchr];
2885                 nextchr = UCHARAT(locinput);
2886                 break;
2887             }
2888             if (isGRAPH(nextchr))
2889                 sayNO;
2890             nextchr = UCHARAT(++locinput);
2891             break;
2892         case LOWERL:
2893             PL_reg_flags |= RF_tainted;
2894             /* FALL THROUGH */
2895         case LOWER:
2896             if (!nextchr)
2897                 sayNO;
2898             if (!(OP(scan) == LOWER
2899                   ? isLOWER(nextchr) : isLOWER_LC(nextchr)))
2900                 sayNO;
2901             nextchr = UCHARAT(++locinput);
2902             break;
2903         case LOWERLUTF8:
2904             PL_reg_flags |= RF_tainted;
2905             /* FALL THROUGH */
2906         case LOWERUTF8:
2907             if (!nextchr)
2908                 sayNO;
2909             if (nextchr & 0x80) {
2910                 if (!(OP(scan) == LOWERUTF8
2911                       ? swash_fetch(PL_utf8_lower, (U8*)locinput)
2912                       : isLOWER_LC_utf8((U8*)locinput)))
2913                 {
2914                     sayNO;
2915                 }
2916                 locinput += PL_utf8skip[nextchr];
2917                 nextchr = UCHARAT(locinput);
2918                 break;
2919             }
2920             if (!(OP(scan) == LOWERUTF8
2921                   ? isLOWER(nextchr) : isLOWER_LC(nextchr)))
2922                 sayNO;
2923             nextchr = UCHARAT(++locinput);
2924             break;
2925         case NLOWERL:
2926             PL_reg_flags |= RF_tainted;
2927             /* FALL THROUGH */
2928         case NLOWER:
2929             if (!nextchr)
2930                 sayNO;
2931             if (OP(scan) == LOWER
2932                 ? isLOWER(nextchr) : isLOWER_LC(nextchr))
2933                 sayNO;
2934             nextchr = UCHARAT(++locinput);
2935             break;
2936         case NLOWERLUTF8:
2937             PL_reg_flags |= RF_tainted;
2938             /* FALL THROUGH */
2939         case NLOWERUTF8:
2940             if (!nextchr && locinput >= PL_regeol)
2941                 sayNO;
2942             if (nextchr & 0x80) {
2943                 if (swash_fetch(PL_utf8_lower,(U8*)locinput))
2944                     sayNO;
2945                 locinput += PL_utf8skip[nextchr];
2946                 nextchr = UCHARAT(locinput);
2947                 break;
2948             }
2949             if (isLOWER(nextchr))
2950                 sayNO;
2951             nextchr = UCHARAT(++locinput);
2952             break;
2953         case PRINTL:
2954             PL_reg_flags |= RF_tainted;
2955             /* FALL THROUGH */
2956         case PRINT:
2957             if (!nextchr)
2958                 sayNO;
2959             if (!(OP(scan) == PRINT
2960                   ? isPRINT(nextchr) : isPRINT_LC(nextchr)))
2961                 sayNO;
2962             nextchr = UCHARAT(++locinput);
2963             break;
2964         case PRINTLUTF8:
2965             PL_reg_flags |= RF_tainted;
2966             /* FALL THROUGH */
2967         case PRINTUTF8:
2968             if (!nextchr)
2969                 sayNO;
2970             if (nextchr & 0x80) {
2971                 if (!(OP(scan) == PRINTUTF8
2972                       ? swash_fetch(PL_utf8_print, (U8*)locinput)
2973                       : isPRINT_LC_utf8((U8*)locinput)))
2974                 {
2975                     sayNO;
2976                 }
2977                 locinput += PL_utf8skip[nextchr];
2978                 nextchr = UCHARAT(locinput);
2979                 break;
2980             }
2981             if (!(OP(scan) == PRINTUTF8
2982                   ? isPRINT(nextchr) : isPRINT_LC(nextchr)))
2983                 sayNO;
2984             nextchr = UCHARAT(++locinput);
2985             break;
2986         case NPRINTL:
2987             PL_reg_flags |= RF_tainted;
2988             /* FALL THROUGH */
2989         case NPRINT:
2990             if (!nextchr)
2991                 sayNO;
2992             if (OP(scan) == PRINT
2993                 ? isPRINT(nextchr) : isPRINT_LC(nextchr))
2994                 sayNO;
2995             nextchr = UCHARAT(++locinput);
2996             break;
2997         case NPRINTLUTF8:
2998             PL_reg_flags |= RF_tainted;
2999             /* FALL THROUGH */
3000         case NPRINTUTF8:
3001             if (!nextchr && locinput >= PL_regeol)
3002                 sayNO;
3003             if (nextchr & 0x80) {
3004                 if (swash_fetch(PL_utf8_print,(U8*)locinput))
3005                     sayNO;
3006                 locinput += PL_utf8skip[nextchr];
3007                 nextchr = UCHARAT(locinput);
3008                 break;
3009             }
3010             if (isPRINT(nextchr))
3011                 sayNO;
3012             nextchr = UCHARAT(++locinput);
3013             break;
3014         case PUNCTL:
3015             PL_reg_flags |= RF_tainted;
3016             /* FALL THROUGH */
3017         case PUNCT:
3018             if (!nextchr)
3019                 sayNO;
3020             if (!(OP(scan) == PUNCT
3021                   ? isPUNCT(nextchr) : isPUNCT_LC(nextchr)))
3022                 sayNO;
3023             nextchr = UCHARAT(++locinput);
3024             break;
3025         case PUNCTLUTF8:
3026             PL_reg_flags |= RF_tainted;
3027             /* FALL THROUGH */
3028         case PUNCTUTF8:
3029             if (!nextchr)
3030                 sayNO;
3031             if (nextchr & 0x80) {
3032                 if (!(OP(scan) == PUNCTUTF8
3033                       ? swash_fetch(PL_utf8_punct, (U8*)locinput)
3034                       : isPUNCT_LC_utf8((U8*)locinput)))
3035                 {
3036                     sayNO;
3037                 }
3038                 locinput += PL_utf8skip[nextchr];
3039                 nextchr = UCHARAT(locinput);
3040                 break;
3041             }
3042             if (!(OP(scan) == PUNCTUTF8
3043                   ? isPUNCT(nextchr) : isPUNCT_LC(nextchr)))
3044                 sayNO;
3045             nextchr = UCHARAT(++locinput);
3046             break;
3047         case NPUNCTL:
3048             PL_reg_flags |= RF_tainted;
3049             /* FALL THROUGH */
3050         case NPUNCT:
3051             if (!nextchr)
3052                 sayNO;
3053             if (OP(scan) == PUNCT
3054                 ? isPUNCT(nextchr) : isPUNCT_LC(nextchr))
3055                 sayNO;
3056             nextchr = UCHARAT(++locinput);
3057             break;
3058         case NPUNCTLUTF8:
3059             PL_reg_flags |= RF_tainted;
3060             /* FALL THROUGH */
3061         case NPUNCTUTF8:
3062             if (!nextchr && locinput >= PL_regeol)
3063                 sayNO;
3064             if (nextchr & 0x80) {
3065                 if (swash_fetch(PL_utf8_punct,(U8*)locinput))
3066                     sayNO;
3067                 locinput += PL_utf8skip[nextchr];
3068                 nextchr = UCHARAT(locinput);
3069                 break;
3070             }
3071             if (isPUNCT(nextchr))
3072                 sayNO;
3073             nextchr = UCHARAT(++locinput);
3074             break;
3075         case UPPERL:
3076             PL_reg_flags |= RF_tainted;
3077             /* FALL THROUGH */
3078         case UPPER:
3079             if (!nextchr)
3080                 sayNO;
3081             if (!(OP(scan) == UPPER
3082                   ? isUPPER(nextchr) : isUPPER_LC(nextchr)))
3083                 sayNO;
3084             nextchr = UCHARAT(++locinput);
3085             break;
3086         case UPPERLUTF8:
3087             PL_reg_flags |= RF_tainted;
3088             /* FALL THROUGH */
3089         case UPPERUTF8:
3090             if (!nextchr)
3091                 sayNO;
3092             if (nextchr & 0x80) {
3093                 if (!(OP(scan) == UPPERUTF8
3094                       ? swash_fetch(PL_utf8_upper, (U8*)locinput)
3095                       : isUPPER_LC_utf8((U8*)locinput)))
3096                 {
3097                     sayNO;
3098                 }
3099                 locinput += PL_utf8skip[nextchr];
3100                 nextchr = UCHARAT(locinput);
3101                 break;
3102             }
3103             if (!(OP(scan) == UPPERUTF8
3104                   ? isUPPER(nextchr) : isUPPER_LC(nextchr)))
3105                 sayNO;
3106             nextchr = UCHARAT(++locinput);
3107             break;
3108         case NUPPERL:
3109             PL_reg_flags |= RF_tainted;
3110             /* FALL THROUGH */
3111         case NUPPER:
3112             if (!nextchr)
3113                 sayNO;
3114             if (OP(scan) == UPPER
3115                 ? isUPPER(nextchr) : isUPPER_LC(nextchr))
3116                 sayNO;
3117             nextchr = UCHARAT(++locinput);
3118             break;
3119         case NUPPERLUTF8:
3120             PL_reg_flags |= RF_tainted;
3121             /* FALL THROUGH */
3122         case NUPPERUTF8:
3123             if (!nextchr && locinput >= PL_regeol)
3124                 sayNO;
3125             if (nextchr & 0x80) {
3126                 if (swash_fetch(PL_utf8_upper,(U8*)locinput))
3127                     sayNO;
3128                 locinput += PL_utf8skip[nextchr];
3129                 nextchr = UCHARAT(locinput);
3130                 break;
3131             }
3132             if (isUPPER(nextchr))
3133                 sayNO;
3134             nextchr = UCHARAT(++locinput);
3135             break;
3136         case XDIGIT:
3137             if (!nextchr && locinput >= PL_regeol)
3138                 sayNO;
3139             if (!isXDIGIT(nextchr))
3140                 sayNO;
3141             nextchr = UCHARAT(++locinput);
3142             break;
3143         case NXDIGIT:
3144             if (!nextchr && locinput >= PL_regeol)
3145                 sayNO;
3146             if (isXDIGIT(nextchr))
3147                 sayNO;
3148             nextchr = UCHARAT(++locinput);
3149             break;
3150         case CLUMP:
3151             if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
3152                 sayNO;
3153             locinput += PL_utf8skip[nextchr];
3154             while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
3155                 locinput += UTF8SKIP(locinput);
3156             if (locinput > PL_regeol)
3157                 sayNO;
3158             nextchr = UCHARAT(locinput);
3159             break;
3160         case REFFL:
3161             PL_reg_flags |= RF_tainted;
3162             /* FALL THROUGH */
3163         case REF:
3164         case REFF:
3165             n = ARG(scan);  /* which paren pair */
3166             ln = PL_regstartp[n];
3167             if (*PL_reglastparen < n || ln == -1)
3168                 sayNO;                  /* Do not match unless seen CLOSEn. */
3169             if (ln == PL_regendp[n])
3170                 break;
3171
3172             s = PL_bostr + ln;
3173             if (UTF && OP(scan) != REF) {       /* REF can do byte comparison */
3174                 char *l = locinput;
3175                 char *e = PL_bostr + PL_regendp[n];
3176                 /*
3177                  * Note that we can't do the "other character" lookup trick as
3178                  * in the 8-bit case (no pun intended) because in Unicode we
3179                  * have to map both upper and title case to lower case.
3180                  */
3181                 if (OP(scan) == REFF) {
3182                     while (s < e) {
3183                         if (l >= PL_regeol)
3184                             sayNO;
3185                         if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
3186                             sayNO;
3187                         s += UTF8SKIP(s);
3188                         l += UTF8SKIP(l);
3189                     }
3190                 }
3191                 else {
3192                     while (s < e) {
3193                         if (l >= PL_regeol)
3194                             sayNO;
3195                         if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
3196                             sayNO;
3197                         s += UTF8SKIP(s);
3198                         l += UTF8SKIP(l);
3199                     }
3200                 }
3201                 locinput = l;
3202                 nextchr = UCHARAT(locinput);
3203                 break;
3204             }
3205
3206             /* Inline the first character, for speed. */
3207             if (UCHARAT(s) != nextchr &&
3208                 (OP(scan) == REF ||
3209                  (UCHARAT(s) != ((OP(scan) == REFF
3210                                   ? PL_fold : PL_fold_locale)[nextchr]))))
3211                 sayNO;
3212             ln = PL_regendp[n] - ln;
3213             if (locinput + ln > PL_regeol)
3214                 sayNO;
3215             if (ln > 1 && (OP(scan) == REF
3216                            ? memNE(s, locinput, ln)
3217                            : (OP(scan) == REFF
3218                               ? ibcmp(s, locinput, ln)
3219                               : ibcmp_locale(s, locinput, ln))))
3220                 sayNO;
3221             locinput += ln;
3222             nextchr = UCHARAT(locinput);
3223             break;
3224
3225         case NOTHING:
3226         case TAIL:
3227             break;
3228         case BACK:
3229             break;
3230         case EVAL:
3231         {
3232             dSP;
3233             OP_4tree *oop = PL_op;
3234             COP *ocurcop = PL_curcop;
3235             SV **ocurpad = PL_curpad;
3236             SV *ret;
3237             
3238             n = ARG(scan);
3239             PL_op = (OP_4tree*)PL_regdata->data[n];
3240             DEBUG_r( PerlIO_printf(Perl_debug_log, "  re_eval 0x%x\n", PL_op) );
3241             PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
3242             PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
3243
3244             CALLRUNOPS(aTHX);                   /* Scalar context. */
3245             SPAGAIN;
3246             ret = POPs;
3247             PUTBACK;
3248             
3249             PL_op = oop;
3250             PL_curpad = ocurpad;
3251             PL_curcop = ocurcop;
3252             if (logical) {
3253                 if (logical == 2) {     /* Postponed subexpression. */
3254                     regexp *re;
3255                     MAGIC *mg = Null(MAGIC*);
3256                     re_cc_state state;
3257                     CURCUR cctmp;
3258                     CHECKPOINT cp, lastcp;
3259
3260                     if(SvROK(ret) || SvRMAGICAL(ret)) {
3261                         SV *sv = SvROK(ret) ? SvRV(ret) : ret;
3262
3263                         if(SvMAGICAL(sv))
3264                             mg = mg_find(sv, 'r');
3265                     }
3266                     if (mg) {
3267                         re = (regexp *)mg->mg_obj;
3268                         (void)ReREFCNT_inc(re);
3269                     }
3270                     else {
3271                         STRLEN len;
3272                         char *t = SvPV(ret, len);
3273                         PMOP pm;
3274                         char *oprecomp = PL_regprecomp;
3275                         I32 osize = PL_regsize;
3276                         I32 onpar = PL_regnpar;
3277
3278                         pm.op_pmflags = 0;
3279                         re = CALLREGCOMP(aTHX_ t, t + len, &pm);
3280                         if (!(SvFLAGS(ret) 
3281                               & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
3282                             sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
3283                         PL_regprecomp = oprecomp;
3284                         PL_regsize = osize;
3285                         PL_regnpar = onpar;
3286                     }
3287                     DEBUG_r(
3288                         PerlIO_printf(Perl_debug_log, 
3289                                       "Entering embedded `%s%.60s%s%s'\n",
3290                                       PL_colors[0],
3291                                       re->precomp,
3292                                       PL_colors[1],
3293                                       (strlen(re->precomp) > 60 ? "..." : ""))
3294                         );
3295                     state.node = next;
3296                     state.prev = PL_reg_call_cc;
3297                     state.cc = PL_regcc;
3298                     state.re = PL_reg_re;
3299
3300                     cctmp.cur = 0;
3301                     cctmp.oldcc = 0;
3302                     PL_regcc = &cctmp;
3303                     
3304                     cp = regcppush(0);  /* Save *all* the positions. */
3305                     REGCP_SET;
3306                     cache_re(re);
3307                     state.ss = PL_savestack_ix;
3308                     *PL_reglastparen = 0;
3309                     PL_reg_call_cc = &state;
3310                     PL_reginput = locinput;
3311                     if (regmatch(re->program + 1)) {
3312                         ReREFCNT_dec(re);
3313                         regcpblow(cp);
3314                         sayYES;
3315                     }
3316                     DEBUG_r(
3317                         PerlIO_printf(Perl_debug_log,
3318                                       "%*s  failed...\n",
3319                                       REPORT_CODE_OFF+PL_regindent*2, "")
3320                         );
3321                     ReREFCNT_dec(re);
3322                     REGCP_UNWIND;
3323                     regcppop();
3324                     PL_reg_call_cc = state.prev;
3325                     PL_regcc = state.cc;
3326                     PL_reg_re = state.re;
3327                     cache_re(PL_reg_re);
3328                     sayNO;
3329                 }
3330                 sw = SvTRUE(ret);
3331                 logical = 0;
3332             }
3333             else
3334                 sv_setsv(save_scalar(PL_replgv), ret);
3335             break;
3336         }
3337         case OPEN:
3338             n = ARG(scan);  /* which paren pair */
3339             PL_reg_start_tmp[n] = locinput;
3340             if (n > PL_regsize)
3341                 PL_regsize = n;
3342             break;
3343         case CLOSE:
3344             n = ARG(scan);  /* which paren pair */
3345             PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
3346             PL_regendp[n] = locinput - PL_bostr;
3347             if (n > *PL_reglastparen)
3348                 *PL_reglastparen = n;
3349             break;
3350         case GROUPP:
3351             n = ARG(scan);  /* which paren pair */
3352             sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
3353             break;
3354         case IFTHEN:
3355             if (sw)
3356                 next = NEXTOPER(NEXTOPER(scan));
3357             else {
3358                 next = scan + ARG(scan);
3359                 if (OP(next) == IFTHEN) /* Fake one. */
3360                     next = NEXTOPER(NEXTOPER(next));
3361             }
3362             break;
3363         case LOGICAL:
3364             logical = scan->flags;
3365             break;
3366         case CURLYX: {
3367                 CURCUR cc;
3368                 CHECKPOINT cp = PL_savestack_ix;
3369
3370                 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
3371                     next += ARG(next);
3372                 cc.oldcc = PL_regcc;
3373                 PL_regcc = &cc;
3374                 cc.parenfloor = *PL_reglastparen;
3375                 cc.cur = -1;
3376                 cc.min = ARG1(scan);
3377                 cc.max  = ARG2(scan);
3378                 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
3379                 cc.next = next;
3380                 cc.minmod = minmod;
3381                 cc.lastloc = 0;
3382                 PL_reginput = locinput;
3383                 n = regmatch(PREVOPER(next));   /* start on the WHILEM */
3384                 regcpblow(cp);
3385                 PL_regcc = cc.oldcc;
3386                 saySAME(n);
3387             }
3388             /* NOT REACHED */
3389         case WHILEM: {
3390                 /*
3391                  * This is really hard to understand, because after we match
3392                  * what we're trying to match, we must make sure the rest of
3393                  * the RE is going to match for sure, and to do that we have
3394                  * to go back UP the parse tree by recursing ever deeper.  And
3395                  * if it fails, we have to reset our parent's current state
3396                  * that we can try again after backing off.
3397                  */
3398
3399                 CHECKPOINT cp, lastcp;
3400                 CURCUR* cc = PL_regcc;
3401                 char *lastloc = cc->lastloc; /* Detection of 0-len. */
3402                 
3403                 n = cc->cur + 1;        /* how many we know we matched */
3404                 PL_reginput = locinput;
3405
3406                 DEBUG_r(
3407                     PerlIO_printf(Perl_debug_log, 
3408                                   "%*s  %ld out of %ld..%ld  cc=%lx\n", 
3409                                   REPORT_CODE_OFF+PL_regindent*2, "",
3410                                   (long)n, (long)cc->min, 
3411                                   (long)cc->max, (long)cc)
3412                     );
3413
3414                 /* If degenerate scan matches "", assume scan done. */
3415
3416                 if (locinput == cc->lastloc && n >= cc->min) {
3417                     PL_regcc = cc->oldcc;
3418                     ln = PL_regcc->cur;
3419                     DEBUG_r(
3420                         PerlIO_printf(Perl_debug_log,
3421                            "%*s  empty match detected, try continuation...\n",
3422                            REPORT_CODE_OFF+PL_regindent*2, "")
3423                         );
3424                     if (regmatch(cc->next))
3425                         sayYES;
3426                     DEBUG_r(
3427                         PerlIO_printf(Perl_debug_log,
3428                                       "%*s  failed...\n",
3429                                       REPORT_CODE_OFF+PL_regindent*2, "")
3430                         );
3431                     PL_regcc->cur = ln;
3432                     PL_regcc = cc;
3433                     sayNO;
3434                 }
3435
3436                 /* First just match a string of min scans. */
3437
3438                 if (n < cc->min) {
3439                     cc->cur = n;
3440                     cc->lastloc = locinput;
3441                     if (regmatch(cc->scan))
3442                         sayYES;
3443                     cc->cur = n - 1;
3444                     cc->lastloc = lastloc;
3445                     DEBUG_r(
3446                         PerlIO_printf(Perl_debug_log,
3447                                       "%*s  failed...\n",
3448                                       REPORT_CODE_OFF+PL_regindent*2, "")
3449                         );
3450                     sayNO;
3451                 }
3452
3453                 /* Prefer next over scan for minimal matching. */
3454
3455                 if (cc->minmod) {
3456                     PL_regcc = cc->oldcc;
3457                     ln = PL_regcc->cur;
3458                     cp = regcppush(cc->parenfloor);
3459                     REGCP_SET;
3460                     if (regmatch(cc->next)) {
3461                         regcpblow(cp);
3462                         sayYES; /* All done. */
3463                     }
3464                     REGCP_UNWIND;
3465                     regcppop();
3466                     PL_regcc->cur = ln;
3467                     PL_regcc = cc;
3468
3469                     if (n >= cc->max) { /* Maximum greed exceeded? */
3470                         if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY 
3471                             && !(PL_reg_flags & RF_warned)) {
3472                             PL_reg_flags |= RF_warned;
3473                             Perl_warner(aTHX_ WARN_UNSAFE, "%s limit (%d) exceeded",
3474                                  "Complex regular subexpression recursion",
3475                                  REG_INFTY - 1);
3476                         }
3477                         sayNO;
3478                     }
3479
3480                     DEBUG_r(
3481                         PerlIO_printf(Perl_debug_log,
3482                                       "%*s  trying longer...\n",
3483                                       REPORT_CODE_OFF+PL_regindent*2, "")
3484                         );
3485                     /* Try scanning more and see if it helps. */
3486                     PL_reginput = locinput;
3487                     cc->cur = n;
3488                     cc->lastloc = locinput;
3489                     cp = regcppush(cc->parenfloor);
3490                     REGCP_SET;
3491                     if (regmatch(cc->scan)) {
3492                         regcpblow(cp);
3493                         sayYES;
3494                     }
3495                     DEBUG_r(
3496                         PerlIO_printf(Perl_debug_log,
3497                                       "%*s  failed...\n",
3498                                       REPORT_CODE_OFF+PL_regindent*2, "")
3499                         );
3500                     REGCP_UNWIND;
3501                     regcppop();
3502                     cc->cur = n - 1;
3503                     cc->lastloc = lastloc;
3504                     sayNO;
3505                 }
3506
3507                 /* Prefer scan over next for maximal matching. */
3508
3509                 if (n < cc->max) {      /* More greed allowed? */
3510                     cp = regcppush(cc->parenfloor);
3511                     cc->cur = n;
3512                     cc->lastloc = locinput;
3513                     REGCP_SET;
3514                     if (regmatch(cc->scan)) {
3515                         regcpblow(cp);
3516                         sayYES;
3517                     }
3518                     REGCP_UNWIND;
3519                     regcppop();         /* Restore some previous $<digit>s? */
3520                     PL_reginput = locinput;
3521                     DEBUG_r(
3522                         PerlIO_printf(Perl_debug_log,
3523                                       "%*s  failed, try continuation...\n",
3524                                       REPORT_CODE_OFF+PL_regindent*2, "")
3525                         );
3526                 }
3527                 if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY 
3528                         && !(PL_reg_flags & RF_warned)) {
3529                     PL_reg_flags |= RF_warned;
3530                     Perl_warner(aTHX_ WARN_UNSAFE, "%s limit (%d) exceeded",
3531                          "Complex regular subexpression recursion",
3532                          REG_INFTY - 1);
3533                 }
3534
3535                 /* Failed deeper matches of scan, so see if this one works. */
3536                 PL_regcc = cc->oldcc;
3537                 ln = PL_regcc->cur;
3538                 if (regmatch(cc->next))
3539                     sayYES;
3540                 DEBUG_r(
3541                     PerlIO_printf(Perl_debug_log, "%*s  failed...\n",
3542                                   REPORT_CODE_OFF+PL_regindent*2, "")
3543                     );
3544                 PL_regcc->cur = ln;
3545                 PL_regcc = cc;
3546                 cc->cur = n - 1;
3547                 cc->lastloc = lastloc;
3548                 sayNO;
3549             }
3550             /* NOT REACHED */
3551         case BRANCHJ: 
3552             next = scan + ARG(scan);
3553             if (next == scan)
3554                 next = NULL;
3555             inner = NEXTOPER(NEXTOPER(scan));
3556             goto do_branch;
3557         case BRANCH: 
3558             inner = NEXTOPER(scan);
3559           do_branch:
3560             {
3561                 CHECKPOINT lastcp;
3562                 c1 = OP(scan);
3563                 if (OP(next) != c1)     /* No choice. */
3564                     next = inner;       /* Avoid recursion. */
3565                 else {
3566                     int lastparen = *PL_reglastparen;
3567
3568                     REGCP_SET;
3569                     do {
3570                         PL_reginput = locinput;
3571                         if (regmatch(inner))
3572                             sayYES;
3573                         REGCP_UNWIND;
3574                         for (n = *PL_reglastparen; n > lastparen; n--)
3575                             PL_regendp[n] = -1;
3576                         *PL_reglastparen = n;
3577                         scan = next;
3578                         /*SUPPRESS 560*/
3579                         if (n = (c1 == BRANCH ? NEXT_OFF(next) : ARG(next)))
3580                             next += n;
3581                         else
3582                             next = NULL;
3583                         inner = NEXTOPER(scan);
3584                         if (c1 == BRANCHJ) {
3585                             inner = NEXTOPER(inner);
3586                         }
3587                     } while (scan != NULL && OP(scan) == c1);
3588                     sayNO;
3589                     /* NOTREACHED */
3590                 }
3591             }
3592             break;
3593         case MINMOD:
3594             minmod = 1;
3595             break;
3596         case CURLYM:
3597         {
3598             I32 l = 0;
3599             CHECKPOINT lastcp;
3600             
3601             /* We suppose that the next guy does not need
3602                backtracking: in particular, it is of constant length,
3603                and has no parenths to influence future backrefs. */
3604             ln = ARG1(scan);  /* min to match */
3605             n  = ARG2(scan);  /* max to match */
3606             paren = scan->flags;
3607             if (paren) {
3608                 if (paren > PL_regsize)
3609                     PL_regsize = paren;
3610                 if (paren > *PL_reglastparen)
3611                     *PL_reglastparen = paren;
3612             }
3613             scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
3614             if (paren)
3615                 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3616             PL_reginput = locinput;
3617             if (minmod) {
3618                 minmod = 0;
3619                 if (ln && regrepeat_hard(scan, ln, &l) < ln)
3620                     sayNO;
3621                 if (ln && l == 0 && n >= ln
3622                     /* In fact, this is tricky.  If paren, then the
3623                        fact that we did/didnot match may influence
3624                        future execution. */
3625                     && !(paren && ln == 0))
3626                     ln = n;
3627                 locinput = PL_reginput;
3628                 if (PL_regkind[(U8)OP(next)] == EXACT) {
3629                     c1 = UCHARAT(OPERAND(next) + 1);
3630                     if (OP(next) == EXACTF)
3631                         c2 = PL_fold[c1];
3632                     else if (OP(next) == EXACTFL)
3633                         c2 = PL_fold_locale[c1];
3634                     else
3635                         c2 = c1;
3636                 }
3637                 else
3638                     c1 = c2 = -1000;
3639                 REGCP_SET;
3640                 /* This may be improved if l == 0.  */
3641                 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
3642                     /* If it could work, try it. */
3643                     if (c1 == -1000 ||
3644                         UCHARAT(PL_reginput) == c1 ||
3645                         UCHARAT(PL_reginput) == c2)
3646                     {
3647                         if (paren) {
3648                             if (n) {
3649                                 PL_regstartp[paren] =
3650                                     HOPc(PL_reginput, -l) - PL_bostr;
3651                                 PL_regendp[paren] = PL_reginput - PL_bostr;
3652                             }
3653                             else
3654                                 PL_regendp[paren] = -1;
3655                         }
3656                         if (regmatch(next))
3657                             sayYES;
3658                         REGCP_UNWIND;
3659                     }
3660                     /* Couldn't or didn't -- move forward. */
3661                     PL_reginput = locinput;
3662                     if (regrepeat_hard(scan, 1, &l)) {
3663                         ln++;
3664                         locinput = PL_reginput;
3665                     }
3666                     else
3667                         sayNO;
3668                 }
3669             }
3670             else {
3671                 n = regrepeat_hard(scan, n, &l);
3672                 if (n != 0 && l == 0
3673                     /* In fact, this is tricky.  If paren, then the
3674                        fact that we did/didnot match may influence
3675                        future execution. */
3676                     && !(paren && ln == 0))
3677                     ln = n;
3678                 locinput = PL_reginput;
3679                 DEBUG_r(
3680                     PerlIO_printf(Perl_debug_log,
3681                                   "%*s  matched %ld times, len=%ld...\n",
3682                                   REPORT_CODE_OFF+PL_regindent*2, "", n, l)
3683                     );
3684                 if (n >= ln) {
3685                     if (PL_regkind[(U8)OP(next)] == EXACT) {
3686                         c1 = UCHARAT(OPERAND(next) + 1);
3687                         if (OP(next) == EXACTF)
3688                             c2 = PL_fold[c1];
3689                         else if (OP(next) == EXACTFL)
3690                             c2 = PL_fold_locale[c1];
3691                         else
3692                             c2 = c1;
3693                     }
3694                     else
3695                         c1 = c2 = -1000;
3696                 }
3697                 REGCP_SET;
3698                 while (n >= ln) {
3699                     /* If it could work, try it. */
3700                     if (c1 == -1000 ||
3701                         UCHARAT(PL_reginput) == c1 ||
3702                         UCHARAT(PL_reginput) == c2)
3703                     {
3704                         DEBUG_r(
3705                                 PerlIO_printf(Perl_debug_log,
3706                                               "%*s  trying tail with n=%ld...\n",
3707                                               REPORT_CODE_OFF+PL_regindent*2, "", n)
3708                             );
3709                         if (paren) {
3710                             if (n) {
3711                                 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3712                                 PL_regendp[paren] = PL_reginput - PL_bostr;
3713                             }
3714                             else
3715                                 PL_regendp[paren] = -1;
3716                         }
3717                         if (regmatch(next))
3718                             sayYES;
3719                         REGCP_UNWIND;
3720                     }
3721                     /* Couldn't or didn't -- back up. */
3722                     n--;
3723                     locinput = HOPc(locinput, -l);
3724                     PL_reginput = locinput;
3725                 }
3726             }
3727             sayNO;
3728             break;
3729         }
3730         case CURLYN:
3731             paren = scan->flags;        /* Which paren to set */
3732             if (paren > PL_regsize)
3733                 PL_regsize = paren;
3734             if (paren > *PL_reglastparen)
3735                 *PL_reglastparen = paren;
3736             ln = ARG1(scan);  /* min to match */
3737             n  = ARG2(scan);  /* max to match */
3738             scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
3739             goto repeat;
3740         case CURLY:
3741             paren = 0;
3742             ln = ARG1(scan);  /* min to match */
3743             n  = ARG2(scan);  /* max to match */
3744             scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
3745             goto repeat;
3746         case STAR:
3747             ln = 0;
3748             n = REG_INFTY;
3749             scan = NEXTOPER(scan);
3750             paren = 0;
3751             goto repeat;
3752         case PLUS:
3753             ln = 1;
3754             n = REG_INFTY;
3755             scan = NEXTOPER(scan);
3756             paren = 0;
3757           repeat:
3758             /*
3759             * Lookahead to avoid useless match attempts
3760             * when we know what character comes next.
3761             */
3762             if (PL_regkind[(U8)OP(next)] == EXACT) {
3763                 c1 = UCHARAT(OPERAND(next) + 1);
3764                 if (OP(next) == EXACTF)
3765                     c2 = PL_fold[c1];
3766                 else if (OP(next) == EXACTFL)
3767                     c2 = PL_fold_locale[c1];
3768                 else
3769                     c2 = c1;
3770             }
3771             else
3772                 c1 = c2 = -1000;
3773             PL_reginput = locinput;
3774             if (minmod) {
3775                 CHECKPOINT lastcp;
3776                 minmod = 0;
3777                 if (ln && regrepeat(scan, ln) < ln)
3778                     sayNO;
3779                 locinput = PL_reginput;
3780                 REGCP_SET;
3781                 if (c1 != -1000) {
3782                     char *e = locinput + n - ln; /* Should not check after this */
3783                     char *old = locinput;
3784
3785                     if (e >= PL_regeol || (n == REG_INFTY))
3786                         e = PL_regeol - 1;
3787                     while (1) {
3788                         /* Find place 'next' could work */
3789                         if (c1 == c2) {
3790                             while (locinput <= e && *locinput != c1)
3791                                 locinput++;
3792                         } else {
3793                             while (locinput <= e 
3794                                    && *locinput != c1
3795                                    && *locinput != c2)
3796                                 locinput++;                         
3797                         }
3798                         if (locinput > e) 
3799                             sayNO;
3800                         /* PL_reginput == old now */
3801                         if (locinput != old) {
3802                             ln = 1;     /* Did some */
3803                             if (regrepeat(scan, locinput - old) <
3804                                  locinput - old)
3805                                 sayNO;
3806                         }
3807                         /* PL_reginput == locinput now */
3808                         if (paren) {
3809                             if (ln) {
3810                                 PL_regstartp[paren] = HOPc(locinput, -1) - PL_bostr;
3811                                 PL_regendp[paren] = locinput - PL_bostr;
3812                             }
3813                             else
3814                                 PL_regendp[paren] = -1;
3815                         }
3816                         if (regmatch(next))
3817                             sayYES;
3818                         PL_reginput = locinput; /* Could be reset... */
3819                         REGCP_UNWIND;
3820                         /* Couldn't or didn't -- move forward. */
3821                         old = locinput++;
3822                     }
3823                 }
3824                 else
3825                 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
3826                     /* If it could work, try it. */
3827                     if (c1 == -1000 ||
3828                         UCHARAT(PL_reginput) == c1 ||
3829                         UCHARAT(PL_reginput) == c2)
3830                     {
3831                         if (paren) {
3832                             if (n) {
3833                                 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
3834                                 PL_regendp[paren] = PL_reginput - PL_bostr;
3835                             }
3836                             else
3837                                 PL_regendp[paren] = -1;
3838                         }
3839                         if (regmatch(next))
3840                             sayYES;
3841                         REGCP_UNWIND;
3842                     }
3843                     /* Couldn't or didn't -- move forward. */
3844                     PL_reginput = locinput;
3845                     if (regrepeat(scan, 1)) {
3846                         ln++;
3847                         locinput = PL_reginput;
3848                     }
3849                     else
3850                         sayNO;
3851                 }
3852             }
3853             else {
3854                 CHECKPOINT lastcp;
3855                 n = regrepeat(scan, n);
3856                 locinput = PL_reginput;
3857                 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
3858                     (!PL_multiline  || OP(next) == SEOL))
3859                     ln = n;                     /* why back off? */
3860                 REGCP_SET;
3861                 if (paren) {
3862                     while (n >= ln) {
3863                         /* If it could work, try it. */
3864                         if (c1 == -1000 ||
3865                             UCHARAT(PL_reginput) == c1 ||
3866                             UCHARAT(PL_reginput) == c2)
3867                             {
3868                                 if (paren && n) {
3869                                     if (n) {
3870                                         PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
3871                                         PL_regendp[paren] = PL_reginput - PL_bostr;
3872                                     }
3873                                     else
3874                                         PL_regendp[paren] = -1;
3875                                 }
3876                                 if (regmatch(next))
3877                                     sayYES;
3878                                 REGCP_UNWIND;
3879                             }
3880                         /* Couldn't or didn't -- back up. */
3881                         n--;
3882                         PL_reginput = locinput = HOPc(locinput, -1);
3883                     }
3884                 }
3885                 else {
3886                     while (n >= ln) {
3887                         /* If it could work, try it. */
3888                         if (c1 == -1000 ||
3889                             UCHARAT(PL_reginput) == c1 ||
3890                             UCHARAT(PL_reginput) == c2)
3891                             {
3892                                 if (regmatch(next))
3893                                     sayYES;
3894                                 REGCP_UNWIND;
3895                             }
3896                         /* Couldn't or didn't -- back up. */
3897                         n--;
3898                         PL_reginput = locinput = HOPc(locinput, -1);
3899                     }
3900                 }
3901             }
3902             sayNO;
3903             break;
3904         case END:
3905             if (PL_reg_call_cc) {
3906                 re_cc_state *cur_call_cc = PL_reg_call_cc;
3907                 CURCUR *cctmp = PL_regcc;
3908                 regexp *re = PL_reg_re;
3909                 CHECKPOINT cp, lastcp;
3910                 
3911                 cp = regcppush(0);      /* Save *all* the positions. */
3912                 REGCP_SET;
3913                 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3914                                                     the caller. */
3915                 PL_reginput = locinput; /* Make position available to
3916                                            the callcc. */
3917                 cache_re(PL_reg_call_cc->re);
3918                 PL_regcc = PL_reg_call_cc->cc;
3919                 PL_reg_call_cc = PL_reg_call_cc->prev;
3920                 if (regmatch(cur_call_cc->node)) {
3921                     PL_reg_call_cc = cur_call_cc;
3922                     regcpblow(cp);
3923                     sayYES;
3924                 }
3925                 REGCP_UNWIND;
3926                 regcppop();
3927                 PL_reg_call_cc = cur_call_cc;
3928                 PL_regcc = cctmp;
3929                 PL_reg_re = re;
3930                 cache_re(re);
3931
3932                 DEBUG_r(
3933                     PerlIO_printf(Perl_debug_log,
3934                                   "%*s  continuation failed...\n",
3935                                   REPORT_CODE_OFF+PL_regindent*2, "")
3936                     );
3937                 sayNO;
3938             }
3939             if (locinput < PL_regtill)
3940                 sayNO;                  /* Cannot match: too short. */
3941             /* Fall through */
3942         case SUCCEED:
3943             PL_reginput = locinput;     /* put where regtry can find it */
3944             sayYES;                     /* Success! */
3945         case SUSPEND:
3946             n = 1;
3947             PL_reginput = locinput;
3948             goto do_ifmatch;        
3949         case UNLESSM:
3950             n = 0;
3951             if (scan->flags) {
3952                 if (UTF) {              /* XXXX This is absolutely
3953                                            broken, we read before
3954                                            start of string. */
3955                     s = HOPMAYBEc(locinput, -scan->flags);
3956                     if (!s)
3957                         goto say_yes;
3958                     PL_reginput = s;
3959                 }
3960                 else {
3961                     if (locinput < PL_bostr + scan->flags) 
3962                         goto say_yes;
3963                     PL_reginput = locinput - scan->flags;
3964                     goto do_ifmatch;
3965                 }
3966             }
3967             else
3968                 PL_reginput = locinput;
3969             goto do_ifmatch;
3970         case IFMATCH:
3971             n = 1;
3972             if (scan->flags) {
3973                 if (UTF) {              /* XXXX This is absolutely
3974                                            broken, we read before
3975                                            start of string. */
3976                     s = HOPMAYBEc(locinput, -scan->flags);
3977                     if (!s || s < PL_bostr)
3978                         goto say_no;
3979                     PL_reginput = s;
3980                 }
3981                 else {
3982                     if (locinput < PL_bostr + scan->flags) 
3983                         goto say_no;
3984                     PL_reginput = locinput - scan->flags;
3985                     goto do_ifmatch;
3986                 }
3987             }
3988             else
3989                 PL_reginput = locinput;
3990
3991           do_ifmatch:
3992             inner = NEXTOPER(NEXTOPER(scan));
3993             if (regmatch(inner) != n) {
3994               say_no:
3995                 if (logical) {
3996                     logical = 0;
3997                     sw = 0;
3998                     goto do_longjump;
3999                 }
4000                 else
4001                     sayNO;
4002             }
4003           say_yes:
4004             if (logical) {
4005                 logical = 0;
4006                 sw = 1;
4007             }
4008             if (OP(scan) == SUSPEND) {
4009                 locinput = PL_reginput;
4010                 nextchr = UCHARAT(locinput);
4011             }
4012             /* FALL THROUGH. */
4013         case LONGJMP:
4014           do_longjump:
4015             next = scan + ARG(scan);
4016             if (next == scan)
4017                 next = NULL;
4018             break;
4019         default:
4020             PerlIO_printf(PerlIO_stderr(), "%lx %d\n",
4021                           (unsigned long)scan, OP(scan));
4022             Perl_croak(aTHX_ "regexp memory corruption");
4023         }
4024         scan = next;
4025     }
4026
4027     /*
4028     * We get here only if there's trouble -- normally "case END" is
4029     * the terminating point.
4030     */
4031     Perl_croak(aTHX_ "corrupted regexp pointers");
4032     /*NOTREACHED*/
4033     sayNO;
4034
4035 yes:
4036 #ifdef DEBUGGING
4037     PL_regindent--;
4038 #endif
4039     return 1;
4040
4041 no:
4042 #ifdef DEBUGGING
4043     PL_regindent--;
4044 #endif
4045     return 0;
4046 }
4047
4048 /*
4049  - regrepeat - repeatedly match something simple, report how many
4050  */
4051 /*
4052  * [This routine now assumes that it will only match on things of length 1.
4053  * That was true before, but now we assume scan - reginput is the count,
4054  * rather than incrementing count on every character.  [Er, except utf8.]]
4055  */
4056 STATIC I32
4057 S_regrepeat(pTHX_ regnode *p, I32 max)
4058 {
4059     dTHR;
4060     register char *scan;
4061     register char *opnd;
4062     register I32 c;
4063     register char *loceol = PL_regeol;
4064     register I32 hardcount = 0;
4065
4066     scan = PL_reginput;
4067     if (max != REG_INFTY && max < loceol - scan)
4068       loceol = scan + max;
4069     opnd = (char *) OPERAND(p);
4070     switch (OP(p)) {
4071     case REG_ANY:
4072         while (scan < loceol && *scan != '\n')
4073             scan++;
4074         break;
4075     case SANY:
4076         scan = loceol;
4077         break;
4078     case ANYUTF8:
4079         loceol = PL_regeol;
4080         while (scan < loceol && *scan != '\n') {
4081             scan += UTF8SKIP(scan);
4082             hardcount++;
4083         }
4084         break;
4085     case SANYUTF8:
4086         loceol = PL_regeol;
4087         while (scan < loceol) {
4088             scan += UTF8SKIP(scan);
4089             hardcount++;
4090         }
4091         break;
4092     case EXACT:         /* length of string is 1 */
4093         c = UCHARAT(++opnd);
4094         while (scan < loceol && UCHARAT(scan) == c)
4095             scan++;
4096         break;
4097     case EXACTF:        /* length of string is 1 */
4098         c = UCHARAT(++opnd);
4099         while (scan < loceol &&
4100                (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
4101             scan++;
4102         break;
4103     case EXACTFL:       /* length of string is 1 */
4104         PL_reg_flags |= RF_tainted;
4105         c = UCHARAT(++opnd);
4106         while (scan < loceol &&
4107                (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
4108             scan++;
4109         break;
4110     case ANYOFUTF8:
4111         loceol = PL_regeol;
4112         while (scan < loceol && REGINCLASSUTF8(p, (U8*)scan)) {
4113             scan += UTF8SKIP(scan);
4114             hardcount++;
4115         }
4116         break;
4117     case ANYOF:
4118         while (scan < loceol && REGINCLASS(opnd, *scan))
4119             scan++;
4120         break;
4121     case ALNUM:
4122         while (scan < loceol && isALNUM(*scan))
4123             scan++;
4124         break;
4125     case ALNUMUTF8:
4126         loceol = PL_regeol;
4127         while (scan < loceol && swash_fetch(PL_utf8_alnum, (U8*)scan)) {
4128             scan += UTF8SKIP(scan);
4129             hardcount++;
4130         }
4131         break;
4132     case ALNUML:
4133         PL_reg_flags |= RF_tainted;
4134         while (scan < loceol && isALNUM_LC(*scan))
4135             scan++;
4136         break;
4137     case ALNUMLUTF8:
4138         PL_reg_flags |= RF_tainted;
4139         loceol = PL_regeol;
4140         while (scan < loceol && isALNUM_LC_utf8((U8*)scan)) {
4141             scan += UTF8SKIP(scan);
4142             hardcount++;
4143         }
4144         break;
4145         break;
4146     case NALNUM:
4147         while (scan < loceol && !isALNUM(*scan))
4148             scan++;
4149         break;
4150     case NALNUMUTF8:
4151         loceol = PL_regeol;
4152         while (scan < loceol && !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
4153             scan += UTF8SKIP(scan);
4154             hardcount++;
4155         }
4156         break;
4157     case NALNUML:
4158         PL_reg_flags |= RF_tainted;
4159         while (scan < loceol && !isALNUM_LC(*scan))
4160             scan++;
4161         break;
4162     case NALNUMLUTF8:
4163         PL_reg_flags |= RF_tainted;
4164         loceol = PL_regeol;
4165         while (scan < loceol && !isALNUM_LC_utf8((U8*)scan)) {
4166             scan += UTF8SKIP(scan);
4167             hardcount++;
4168         }
4169         break;
4170     case SPACE:
4171         while (scan < loceol && isSPACE(*scan))
4172             scan++;
4173         break;
4174     case SPACEUTF8:
4175         loceol = PL_regeol;
4176         while (scan < loceol && (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
4177             scan += UTF8SKIP(scan);
4178             hardcount++;
4179         }
4180         break;
4181     case SPACEL:
4182         PL_reg_flags |= RF_tainted;
4183         while (scan < loceol && isSPACE_LC(*scan))
4184             scan++;
4185         break;
4186     case SPACELUTF8:
4187         PL_reg_flags |= RF_tainted;
4188         loceol = PL_regeol;
4189         while (scan < loceol && (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4190             scan += UTF8SKIP(scan);
4191             hardcount++;
4192         }
4193         break;
4194     case NSPACE:
4195         while (scan < loceol && !isSPACE(*scan))
4196             scan++;
4197         break;
4198     case NSPACEUTF8:
4199         loceol = PL_regeol;
4200         while (scan < loceol && !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
4201             scan += UTF8SKIP(scan);
4202             hardcount++;
4203         }
4204         break;
4205     case NSPACEL:
4206         PL_reg_flags |= RF_tainted;
4207         while (scan < loceol && !isSPACE_LC(*scan))
4208             scan++;
4209         break;
4210     case NSPACELUTF8:
4211         PL_reg_flags |= RF_tainted;
4212         loceol = PL_regeol;
4213         while (scan < loceol && !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4214             scan += UTF8SKIP(scan);
4215             hardcount++;
4216         }
4217         break;
4218     case DIGIT:
4219         while (scan < loceol && isDIGIT(*scan))
4220             scan++;
4221         break;
4222     case DIGITUTF8:
4223         loceol = PL_regeol;
4224         while (scan < loceol && swash_fetch(PL_utf8_digit,(U8*)scan)) {
4225             scan += UTF8SKIP(scan);
4226             hardcount++;
4227         }
4228         break;
4229         break;
4230     case NDIGIT:
4231         while (scan < loceol && !isDIGIT(*scan))
4232             scan++;
4233         break;
4234     case NDIGITUTF8:
4235         loceol = PL_regeol;
4236         while (scan < loceol && !swash_fetch(PL_utf8_digit,(U8*)scan)) {
4237             scan += UTF8SKIP(scan);
4238             hardcount++;
4239         }
4240         break;
4241     default:            /* Called on something of 0 width. */
4242         break;          /* So match right here or not at all. */
4243     }
4244
4245     if (hardcount)
4246         c = hardcount;
4247     else
4248         c = scan - PL_reginput;
4249     PL_reginput = scan;
4250
4251     DEBUG_r( 
4252         {
4253                 SV *prop = sv_newmortal();
4254
4255                 regprop(prop, p);
4256                 PerlIO_printf(Perl_debug_log, 
4257                               "%*s  %s can match %ld times out of %ld...\n", 
4258                               REPORT_CODE_OFF+1, "", SvPVX(prop),c,max);
4259         });
4260     
4261     return(c);
4262 }
4263
4264 /*
4265  - regrepeat_hard - repeatedly match something, report total lenth and length
4266  * 
4267  * The repeater is supposed to have constant length.
4268  */
4269
4270 STATIC I32
4271 S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
4272 {
4273     dTHR;
4274     register char *scan;
4275     register char *start;
4276     register char *loceol = PL_regeol;
4277     I32 l = 0;
4278     I32 count = 0, res = 1;
4279
4280     if (!max)
4281         return 0;
4282
4283     start = PL_reginput;
4284     if (UTF) {
4285         while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
4286             if (!count++) {
4287                 l = 0;
4288                 while (start < PL_reginput) {
4289                     l++;
4290                     start += UTF8SKIP(start);
4291                 }
4292                 *lp = l;
4293                 if (l == 0)
4294                     return max;
4295             }
4296             if (count == max)
4297                 return count;
4298         }
4299     }
4300     else {
4301         while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
4302             if (!count++) {
4303                 *lp = l = PL_reginput - start;
4304                 if (max != REG_INFTY && l*max < loceol - scan)
4305                     loceol = scan + l*max;
4306                 if (l == 0)
4307                     return max;
4308             }
4309         }
4310     }
4311     if (!res)
4312         PL_reginput = scan;
4313     
4314     return count;
4315 }
4316
4317 /*
4318  - reginclass - determine if a character falls into a character class
4319  */
4320
4321 STATIC bool
4322 S_reginclass(pTHX_ register char *p, register I32 c)
4323 {
4324     dTHR;
4325     char flags = ANYOF_FLAGS(p);
4326     bool match = FALSE;
4327
4328     c &= 0xFF;
4329     if (ANYOF_BITMAP_TEST(p, c))
4330         match = TRUE;
4331     else if (flags & ANYOF_FOLD) {
4332         I32 cf;
4333         if (flags & ANYOF_LOCALE) {
4334             PL_reg_flags |= RF_tainted;
4335             cf = PL_fold_locale[c];
4336         }
4337         else
4338             cf = PL_fold[c];
4339         if (ANYOF_BITMAP_TEST(p, cf))
4340             match = TRUE;
4341     }
4342
4343     if (!match && (flags & ANYOF_CLASS)) {
4344         PL_reg_flags |= RF_tainted;
4345         if (
4346             (ANYOF_CLASS_TEST(p, ANYOF_ALNUM)   &&  isALNUM_LC(c))  ||
4347             (ANYOF_CLASS_TEST(p, ANYOF_NALNUM)  && !isALNUM_LC(c))  ||
4348             (ANYOF_CLASS_TEST(p, ANYOF_SPACE)   &&  isSPACE_LC(c))  ||
4349             (ANYOF_CLASS_TEST(p, ANYOF_NSPACE)  && !isSPACE_LC(c))  ||
4350             (ANYOF_CLASS_TEST(p, ANYOF_DIGIT)   &&  isDIGIT_LC(c))  ||
4351             (ANYOF_CLASS_TEST(p, ANYOF_NDIGIT)  && !isDIGIT_LC(c))  ||
4352             (ANYOF_CLASS_TEST(p, ANYOF_ALNUMC)  &&  isALNUMC_LC(c)) ||
4353             (ANYOF_CLASS_TEST(p, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
4354             (ANYOF_CLASS_TEST(p, ANYOF_ALPHA)   &&  isALPHA_LC(c))  ||
4355             (ANYOF_CLASS_TEST(p, ANYOF_NALPHA)  && !isALPHA_LC(c))  ||
4356             (ANYOF_CLASS_TEST(p, ANYOF_ASCII)   &&  isASCII(c))     ||
4357             (ANYOF_CLASS_TEST(p, ANYOF_NASCII)  && !isASCII(c))     ||
4358             (ANYOF_CLASS_TEST(p, ANYOF_CNTRL)   &&  isCNTRL_LC(c))  ||
4359             (ANYOF_CLASS_TEST(p, ANYOF_NCNTRL)  && !isCNTRL_LC(c))  ||
4360             (ANYOF_CLASS_TEST(p, ANYOF_GRAPH)   &&  isGRAPH_LC(c))  ||
4361             (ANYOF_CLASS_TEST(p, ANYOF_NGRAPH)  && !isGRAPH_LC(c))  ||
4362             (ANYOF_CLASS_TEST(p, ANYOF_LOWER)   &&  isLOWER_LC(c))  ||
4363             (ANYOF_CLASS_TEST(p, ANYOF_NLOWER)  && !isLOWER_LC(c))  ||
4364             (ANYOF_CLASS_TEST(p, ANYOF_PRINT)   &&  isPRINT_LC(c))  ||
4365             (ANYOF_CLASS_TEST(p, ANYOF_NPRINT)  && !isPRINT_LC(c))  ||
4366             (ANYOF_CLASS_TEST(p, ANYOF_PUNCT)   &&  isPUNCT_LC(c))  ||
4367             (ANYOF_CLASS_TEST(p, ANYOF_NPUNCT)  && !isPUNCT_LC(c))  ||
4368             (ANYOF_CLASS_TEST(p, ANYOF_UPPER)   &&  isUPPER_LC(c))  ||
4369             (ANYOF_CLASS_TEST(p, ANYOF_NUPPER)  && !isUPPER_LC(c))  ||
4370             (ANYOF_CLASS_TEST(p, ANYOF_XDIGIT)  &&  isXDIGIT(c))    ||
4371             (ANYOF_CLASS_TEST(p, ANYOF_NXDIGIT) && !isXDIGIT(c))
4372             ) /* How's that for a conditional? */
4373         {
4374             match = TRUE;
4375         }
4376     }
4377
4378     return (flags & ANYOF_INVERT) ? !match : match;
4379 }
4380
4381 STATIC bool
4382 S_reginclassutf8(pTHX_ regnode *f, U8 *p)
4383 {                                           
4384     dTHR;
4385     char flags = ARG1(f);
4386     bool match = FALSE;
4387     SV *sv = (SV*)PL_regdata->data[ARG2(f)];
4388
4389     if (swash_fetch(sv, p))
4390         match = TRUE;
4391     else if (flags & ANYOF_FOLD) {
4392         I32 cf;
4393         U8 tmpbuf[10];
4394         if (flags & ANYOF_LOCALE) {
4395             PL_reg_flags |= RF_tainted;
4396             uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
4397         }
4398         else
4399             uv_to_utf8(tmpbuf, toLOWER_utf8(p));
4400         if (swash_fetch(sv, tmpbuf))
4401             match = TRUE;
4402     }
4403
4404     /* UTF8 combined with ANYOF_CLASS is ill-defined. */
4405
4406     return (flags & ANYOF_INVERT) ? !match : match;
4407 }
4408
4409 STATIC U8 *
4410 S_reghop(pTHX_ U8 *s, I32 off)
4411 {                               
4412     dTHR;
4413     if (off >= 0) {
4414         while (off-- && s < (U8*)PL_regeol)
4415             s += UTF8SKIP(s);
4416     }
4417     else {
4418         while (off++) {
4419             if (s > (U8*)PL_bostr) {
4420                 s--;
4421                 if (*s & 0x80) {
4422                     while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
4423                         s--;
4424                 }               /* XXX could check well-formedness here */
4425             }
4426         }
4427     }
4428     return s;
4429 }
4430
4431 STATIC U8 *
4432 S_reghopmaybe(pTHX_ U8* s, I32 off)
4433 {
4434     dTHR;
4435     if (off >= 0) {
4436         while (off-- && s < (U8*)PL_regeol)
4437             s += UTF8SKIP(s);
4438         if (off >= 0)
4439             return 0;
4440     }
4441     else {
4442         while (off++) {
4443             if (s > (U8*)PL_bostr) {
4444                 s--;
4445                 if (*s & 0x80) {
4446                     while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
4447                         s--;
4448                 }               /* XXX could check well-formedness here */
4449             }
4450             else
4451                 break;
4452         }
4453         if (off <= 0)
4454             return 0;
4455     }
4456     return s;
4457 }