4df60acaa9e69503dfd55aa9430ac9358e94a1ef
[p5sagit/p5-mst-13.2.git] / regexec.c
1 /*    regexec.c
2  */
3
4 /*
5  * "One Ring to rule them all, One Ring to find them..."
6  */
7
8 /* NOTE: this is derived from Henry Spencer's regexp code, and should not
9  * confused with the original package (see point 3 below).  Thanks, Henry!
10  */
11
12 /* Additional note: this code is very heavily munged from Henry's version
13  * in places.  In some spots I've traded clarity for efficiency, so don't
14  * blame Henry for some of the lack of readability.
15  */
16
17 /* The names of the functions have been changed from regcomp and
18  * regexec to  pregcomp and pregexec in order to avoid conflicts
19  * with the POSIX routines of the same names.
20 */
21
22 #ifdef PERL_EXT_RE_BUILD
23 /* need to replace pregcomp et al, so enable that */
24 #  ifndef PERL_IN_XSUB_RE
25 #    define PERL_IN_XSUB_RE
26 #  endif
27 /* need access to debugger hooks */
28 #  if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
29 #    define DEBUGGING
30 #  endif
31 #endif
32
33 #ifdef PERL_IN_XSUB_RE
34 /* We *really* need to overwrite these symbols: */
35 #  define Perl_regexec_flags my_regexec
36 #  define Perl_regdump my_regdump
37 #  define Perl_regprop my_regprop
38 #  define Perl_re_intuit_start my_re_intuit_start
39 /* *These* symbols are masked to allow static link. */
40 #  define Perl_pregexec my_pregexec
41 #  define Perl_reginitcolors my_reginitcolors 
42
43 #  define PERL_NO_GET_CONTEXT
44 #endif 
45
46 /*SUPPRESS 112*/
47 /*
48  * pregcomp and pregexec -- regsub and regerror are not used in perl
49  *
50  *      Copyright (c) 1986 by University of Toronto.
51  *      Written by Henry Spencer.  Not derived from licensed software.
52  *
53  *      Permission is granted to anyone to use this software for any
54  *      purpose on any computer system, and to redistribute it freely,
55  *      subject to the following restrictions:
56  *
57  *      1. The author is not responsible for the consequences of use of
58  *              this software, no matter how awful, even if they arise
59  *              from defects in it.
60  *
61  *      2. The origin of this software must not be misrepresented, either
62  *              by explicit claim or by omission.
63  *
64  *      3. Altered versions must be plainly marked as such, and must not
65  *              be misrepresented as being the original software.
66  *
67  ****    Alterations to Henry's code are...
68  ****
69  ****    Copyright (c) 1991-1999, Larry Wall
70  ****
71  ****    You may distribute under the terms of either the GNU General Public
72  ****    License or the Artistic License, as specified in the README file.
73  *
74  * Beware that some of this code is subtly aware of the way operator
75  * precedence is structured in regular expressions.  Serious changes in
76  * regular-expression syntax might require a total rethink.
77  */
78 #include "EXTERN.h"
79 #define PERL_IN_REGEXEC_C
80 #include "perl.h"
81
82 #ifdef PERL_IN_XSUB_RE
83 #  if defined(PERL_CAPI) || defined(PERL_OBJECT)
84 #    include "XSUB.h"
85 #  endif
86 #endif
87
88 #include "regcomp.h"
89
90 #define RF_tainted      1               /* tainted information used? */
91 #define RF_warned       2               /* warned about big count? */
92 #define RF_evaled       4               /* Did an EVAL with setting? */
93 #define RF_utf8         8               /* String contains multibyte chars? */
94
95 #define UTF (PL_reg_flags & RF_utf8)
96
97 #define RS_init         1               /* eval environment created */
98 #define RS_set          2               /* replsv value is set */
99
100 #ifndef STATIC
101 #define STATIC  static
102 #endif
103
104 /*
105  * Forwards.
106  */
107
108 #define REGINCLASS(p,c)  (ANYOF_FLAGS(p) ? reginclass(p,c) : ANYOF_BITMAP_TEST(p,c))
109 #define REGINCLASSUTF8(f,p)  (ARG1(f) ? reginclassutf8(f,p) : swash_fetch((SV*)PL_regdata->data[ARG2(f)],p))
110
111 #define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
112 #define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
113
114 #define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
115 #define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
116 #define HOP(pos,off) (UTF ? reghop((U8*)pos, off) : (U8*)(pos + off))
117 #define HOPMAYBE(pos,off) (UTF ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
118 #define HOPc(pos,off) ((char*)HOP(pos,off))
119 #define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
120
121 static void restore_pos(pTHXo_ void *arg);
122
123
124 STATIC CHECKPOINT
125 S_regcppush(pTHX_ I32 parenfloor)
126 {
127     dTHR;
128     int retval = PL_savestack_ix;
129     int i = (PL_regsize - parenfloor) * 4;
130     int p;
131
132     SSCHECK(i + 5);
133     for (p = PL_regsize; p > parenfloor; p--) {
134         SSPUSHINT(PL_regendp[p]);
135         SSPUSHINT(PL_regstartp[p]);
136         SSPUSHPTR(PL_reg_start_tmp[p]);
137         SSPUSHINT(p);
138     }
139     SSPUSHINT(PL_regsize);
140     SSPUSHINT(*PL_reglastparen);
141     SSPUSHPTR(PL_reginput);
142     SSPUSHINT(i + 3);
143     SSPUSHINT(SAVEt_REGCONTEXT);
144     return retval;
145 }
146
147 /* These are needed since we do not localize EVAL nodes: */
148 #  define REGCP_SET  DEBUG_r(PerlIO_printf(Perl_debug_log,              \
149                              "  Setting an EVAL scope, savestack=%i\n", \
150                              PL_savestack_ix)); lastcp = PL_savestack_ix
151
152 #  define REGCP_UNWIND  DEBUG_r(lastcp != PL_savestack_ix ?             \
153                                 PerlIO_printf(Perl_debug_log,           \
154                                 "  Clearing an EVAL scope, savestack=%i..%i\n", \
155                                 lastcp, PL_savestack_ix) : 0); regcpblow(lastcp)
156
157 STATIC char *
158 S_regcppop(pTHX)
159 {
160     dTHR;
161     I32 i = SSPOPINT;
162     U32 paren = 0;
163     char *input;
164     I32 tmps;
165     assert(i == SAVEt_REGCONTEXT);
166     i = SSPOPINT;
167     input = (char *) SSPOPPTR;
168     *PL_reglastparen = SSPOPINT;
169     PL_regsize = SSPOPINT;
170     for (i -= 3; i > 0; i -= 4) {
171         paren = (U32)SSPOPINT;
172         PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
173         PL_regstartp[paren] = SSPOPINT;
174         tmps = SSPOPINT;
175         if (paren <= *PL_reglastparen)
176             PL_regendp[paren] = tmps;
177         DEBUG_r(
178             PerlIO_printf(Perl_debug_log,
179                           "     restoring \\%d to %d(%d)..%d%s\n",
180                           paren, PL_regstartp[paren], 
181                           PL_reg_start_tmp[paren] - PL_bostr,
182                           PL_regendp[paren], 
183                           (paren > *PL_reglastparen ? "(no)" : ""));
184         );
185     }
186     DEBUG_r(
187         if (*PL_reglastparen + 1 <= PL_regnpar) {
188             PerlIO_printf(Perl_debug_log,
189                           "     restoring \\%d..\\%d to undef\n",
190                           *PL_reglastparen + 1, PL_regnpar);
191         }
192     );
193     for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
194         if (paren > PL_regsize)
195             PL_regstartp[paren] = -1;
196         PL_regendp[paren] = -1;
197     }
198     return input;
199 }
200
201 STATIC char *
202 S_regcp_set_to(pTHX_ I32 ss)
203 {
204     dTHR;
205     I32 tmp = PL_savestack_ix;
206
207     PL_savestack_ix = ss;
208     regcppop();
209     PL_savestack_ix = tmp;
210     return Nullch;
211 }
212
213 typedef struct re_cc_state
214 {
215     I32 ss;
216     regnode *node;
217     struct re_cc_state *prev;
218     CURCUR *cc;
219     regexp *re;
220 } re_cc_state;
221
222 #define regcpblow(cp) LEAVE_SCOPE(cp)
223
224 /*
225  * pregexec and friends
226  */
227
228 /*
229  - pregexec - match a regexp against a string
230  */
231 I32
232 Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
233          char *strbeg, I32 minend, SV *screamer, U32 nosave)
234 /* strend: pointer to null at end of string */
235 /* strbeg: real beginning of string */
236 /* minend: end of match must be >=minend after stringarg. */
237 /* nosave: For optimizations. */
238 {
239     return
240         regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL, 
241                       nosave ? 0 : REXEC_COPY_STR);
242 }
243
244 STATIC void
245 S_cache_re(pTHX_ regexp *prog)
246 {
247     dTHR;
248     PL_regprecomp = prog->precomp;              /* Needed for FAIL. */
249 #ifdef DEBUGGING
250     PL_regprogram = prog->program;
251 #endif
252     PL_regnpar = prog->nparens;
253     PL_regdata = prog->data;    
254     PL_reg_re = prog;    
255 }
256
257 /* 
258  * Need to implement the following flags for reg_anch:
259  *
260  * USE_INTUIT_NOML              - Useful to call re_intuit_start() first
261  * USE_INTUIT_ML
262  * INTUIT_AUTORITATIVE_NOML     - Can trust a positive answer
263  * INTUIT_AUTORITATIVE_ML
264  * INTUIT_ONCE_NOML             - Intuit can match in one location only.
265  * INTUIT_ONCE_ML
266  *
267  * Another flag for this function: SECOND_TIME (so that float substrs
268  * with giant delta may be not rechecked).
269  */
270
271 /* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
272
273 /* If SCREAM, then sv should be compatible with strpos and strend.
274    Otherwise, only SvCUR(sv) is used to get strbeg. */
275
276 /* XXXX We assume that strpos is strbeg unless sv. */
277
278 char *
279 Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
280                      char *strend, U32 flags, re_scream_pos_data *data)
281 {
282     I32 start_shift;
283     /* Should be nonnegative! */
284     I32 end_shift;
285     char *s;
286     char *t;
287     I32 ml_anch;
288
289     DEBUG_r( if (!PL_colorset) reginitcolors() );
290     DEBUG_r(PerlIO_printf(Perl_debug_log,
291                       "%sGuessing start of match:%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
292                       PL_colors[4],PL_colors[5],PL_colors[0],
293                       prog->precomp,
294                       PL_colors[1],
295                       (strlen(prog->precomp) > 60 ? "..." : ""),
296                       PL_colors[0],
297                       (strend - strpos > 60 ? 60 : strend - strpos),
298                       strpos, PL_colors[1],
299                       (strend - strpos > 60 ? "..." : ""))
300         );
301
302     if (prog->minlen > strend - strpos)
303         goto fail;
304
305     /* XXXX Move further down? */
306     start_shift = prog->check_offset_min;       /* okay to underestimate on CC */
307     /* Should be nonnegative! */
308     end_shift = prog->minlen - start_shift -
309         CHR_SVLEN(prog->check_substr) + (SvTAIL(prog->check_substr) != 0);
310
311     if (prog->reganch & ROPT_ANCH) {
312         ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
313                      || ( (prog->reganch & ROPT_ANCH_BOL)
314                           && !PL_multiline ) );
315
316         if ((prog->check_offset_min == prog->check_offset_max) && !ml_anch) {
317             /* Anchored... */
318             I32 slen;
319
320             if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
321                  && (sv && (strpos + SvCUR(sv) != strend)) )
322                 goto fail;
323
324             s = (char*)HOP((U8*)strpos, prog->check_offset_min);
325             if (SvTAIL(prog->check_substr)) {
326                 slen = SvCUR(prog->check_substr);       /* >= 1 */
327
328                 if ( strend - s > slen || strend - s < slen - 1 ) {
329                     s = Nullch;
330                     goto finish;
331                 }
332                 if ( strend - s == slen && strend[-1] != '\n') {
333                     s = Nullch;
334                     goto finish;
335                 }
336                 /* Now should match s[0..slen-2] */
337                 slen--;
338                 if (slen && (*SvPVX(prog->check_substr) != *s
339                              || (slen > 1
340                                  && memNE(SvPVX(prog->check_substr), s, slen))))
341                     s = Nullch;
342             }
343             else if (*SvPVX(prog->check_substr) != *s
344                      || ((slen = SvCUR(prog->check_substr)) > 1
345                          && memNE(SvPVX(prog->check_substr), s, slen)))
346                     s = Nullch;
347             else
348                     s = strpos;
349             goto finish;
350         }
351         s = strpos;
352         if (!ml_anch && (s + prog->check_offset_max < strend - prog->minlen))
353             end_shift += strend - s - prog->minlen - prog->check_offset_max;
354     }
355     else {
356         ml_anch = 0;
357         s = strpos;
358     }
359
360   restart:
361     if (flags & REXEC_SCREAM) {
362         SV *c = prog->check_substr;
363         char *strbeg = SvPVX(sv);       /* XXXX Assume PV_force() on SCREAM! */
364         I32 p = -1;                     /* Internal iterator of scream. */
365         I32 *pp = data ? data->scream_pos : &p;
366
367         if (PL_screamfirst[BmRARE(c)] >= 0
368             || ( BmRARE(c) == '\n'
369                  && (BmPREVIOUS(c) == SvCUR(c) - 1)
370                  && SvTAIL(c) ))
371             s = screaminstr(sv, prog->check_substr, 
372                             start_shift + (strpos - strbeg), end_shift, pp, 0);
373         else
374             s = Nullch;
375         if (data)
376             *data->scream_olds = s;
377     }
378     else
379         s = fbm_instr((unsigned char*)s + start_shift,
380                       (unsigned char*)strend - end_shift,
381                       prog->check_substr, PL_multiline ? FBMrf_MULTILINE : 0);
382
383     /* Update the count-of-usability, remove useless subpatterns,
384         unshift s.  */
385   finish:
386     if (!s) {
387         ++BmUSEFUL(prog->check_substr); /* hooray */
388         goto fail;                      /* not present */
389     }
390     else if (s - strpos > prog->check_offset_max &&
391              ((prog->reganch & ROPT_UTF8)
392               ? ((t = reghopmaybe_c(s, -(prog->check_offset_max)))
393                  && t >= strpos)
394               : (t = s - prog->check_offset_max) != 0) ) {
395         if (ml_anch && t[-1] != '\n') {
396           find_anchor:
397             while (t < strend - end_shift - prog->minlen) {
398                 if (*t == '\n') {
399                     if (t < s - prog->check_offset_min) {
400                         s = t + 1;
401                         goto set_useful;
402                     }
403                     s = t + 1;
404                     goto restart;
405                 }
406                 t++;
407             }
408             s = Nullch;
409             goto finish;
410         }
411         s = t;
412       set_useful:
413         ++BmUSEFUL(prog->check_substr); /* hooray/2 */
414     }
415     else {
416         if (ml_anch && sv 
417             && (strpos + SvCUR(sv) != strend) && strpos[-1] != '\n') {
418             t = strpos;
419             goto find_anchor;
420         }
421         if (!(prog->reganch & ROPT_NAUGHTY)
422             && --BmUSEFUL(prog->check_substr) < 0
423             && prog->check_substr == prog->float_substr) { /* boo */
424             /* If flags & SOMETHING - do not do it many times on the same match */
425             SvREFCNT_dec(prog->check_substr);
426             prog->check_substr = Nullsv;        /* disable */
427             prog->float_substr = Nullsv;        /* clear */
428             s = strpos;
429             prog->reganch &= ~RE_USE_INTUIT;
430         }
431         else
432             s = strpos;
433     }
434
435     DEBUG_r(PerlIO_printf(Perl_debug_log, "%sFound%s at offset %ld\n",
436                           PL_colors[4],PL_colors[5], (long)(s - strpos)) );
437     return s;
438   fail:
439     DEBUG_r(PerlIO_printf(Perl_debug_log, "%sNot found...%s\n",
440                           PL_colors[4],PL_colors[5]));
441     return Nullch;
442 }
443
444 /*
445  - regexec_flags - match a regexp against a string
446  */
447 I32
448 Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
449               char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
450 /* strend: pointer to null at end of string */
451 /* strbeg: real beginning of string */
452 /* minend: end of match must be >=minend after stringarg. */
453 /* data: May be used for some additional optimizations. */
454 /* nosave: For optimizations. */
455 {
456     dTHR;
457     register char *s;
458     register regnode *c;
459     register char *startpos = stringarg;
460     register I32 tmp;
461     I32 minlen;         /* must match at least this many chars */
462     I32 dontbother = 0; /* how many characters not to try at end */
463     CURCUR cc;
464     I32 start_shift = 0;                /* Offset of the start to find
465                                          constant substr. */            /* CC */
466     I32 end_shift = 0;                  /* Same for the end. */         /* CC */
467     I32 scream_pos = -1;                /* Internal iterator of scream. */
468     char *scream_olds;
469     SV* oreplsv = GvSV(PL_replgv);
470
471     cc.cur = 0;
472     cc.oldcc = 0;
473     PL_regcc = &cc;
474
475     cache_re(prog);
476 #ifdef DEBUGGING
477     PL_regnarrate = PL_debug & 512;
478 #endif
479
480     /* Be paranoid... */
481     if (prog == NULL || startpos == NULL) {
482         Perl_croak(aTHX_ "NULL regexp parameter");
483         return 0;
484     }
485
486     minlen = prog->minlen;
487     if (strend - startpos < minlen) goto phooey;
488
489     if (startpos == strbeg)     /* is ^ valid at stringarg? */
490         PL_regprev = '\n';
491     else {
492         PL_regprev = (U32)stringarg[-1];
493         if (!PL_multiline && PL_regprev == '\n')
494             PL_regprev = '\0';          /* force ^ to NOT match */
495     }
496
497     /* Check validity of program. */
498     if (UCHARAT(prog->program) != REG_MAGIC) {
499         Perl_croak(aTHX_ "corrupted regexp program");
500     }
501
502     PL_reg_flags = 0;
503     PL_reg_eval_set = 0;
504
505     if (prog->reganch & ROPT_UTF8)
506         PL_reg_flags |= RF_utf8;
507
508     /* Mark beginning of line for ^ and lookbehind. */
509     PL_regbol = startpos;
510     PL_bostr  = strbeg;
511     PL_reg_sv = sv;
512
513     /* Mark end of line for $ (and such) */
514     PL_regeol = strend;
515
516     /* see how far we have to get to not match where we matched before */
517     PL_regtill = startpos+minend;
518
519     /* We start without call_cc context.  */
520     PL_reg_call_cc = 0;
521
522     /* If there is a "must appear" string, look for it. */
523     s = startpos;
524
525     if (prog->reganch & ROPT_GPOS_SEEN) {
526         MAGIC *mg;
527
528         if (!(flags & REXEC_IGNOREPOS) && sv && SvTYPE(sv) >= SVt_PVMG
529             && SvMAGIC(sv) && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0)
530             PL_reg_ganch = strbeg + mg->mg_len;
531         else
532             PL_reg_ganch = startpos;
533         if (prog->reganch & ROPT_ANCH_GPOS) {
534             if (s > PL_reg_ganch)
535                 goto phooey;
536             s = PL_reg_ganch;
537         }
538     }
539
540     if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
541         re_scream_pos_data d;
542
543         d.scream_olds = &scream_olds;
544         d.scream_pos = &scream_pos;
545         s = re_intuit_start(prog, sv, s, strend, flags, &d);
546         if (!s)
547             goto phooey;        /* not present */
548     }
549
550     DEBUG_r( if (!PL_colorset) reginitcolors() );
551     DEBUG_r(PerlIO_printf(Perl_debug_log,
552                       "%sMatching%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
553                       PL_colors[4],PL_colors[5],PL_colors[0],
554                       prog->precomp,
555                       PL_colors[1],
556                       (strlen(prog->precomp) > 60 ? "..." : ""),
557                       PL_colors[0],
558                       (strend - startpos > 60 ? 60 : strend - startpos),
559                       startpos, PL_colors[1],
560                       (strend - startpos > 60 ? "..." : ""))
561         );
562
563     /* Simplest case:  anchored match need be tried only once. */
564     /*  [unless only anchor is BOL and multiline is set] */
565     if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
566         if (s == startpos && regtry(prog, startpos))
567             goto got_it;
568         else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
569                  || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
570         {
571             char *end;
572
573             if (minlen)
574                 dontbother = minlen - 1;
575             end = HOPc(strend, -dontbother) - 1;
576             /* for multiline we only have to try after newlines */
577             if (prog->check_substr) {
578                 while (1) {
579                     if (regtry(prog, s))
580                         goto got_it;
581                     if (s >= end)
582                         goto phooey;
583                     s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
584                     if (!s)
585                         goto phooey;
586                 }               
587             } else {
588                 if (s > startpos)
589                     s--;
590                 while (s < end) {
591                     if (*s++ == '\n') { /* don't need PL_utf8skip here */
592                         if (regtry(prog, s))
593                             goto got_it;
594                     }
595                 }               
596             }
597         }
598         goto phooey;
599     } else if (prog->reganch & ROPT_ANCH_GPOS) {
600         if (regtry(prog, PL_reg_ganch))
601             goto got_it;
602         goto phooey;
603     }
604
605     /* Messy cases:  unanchored match. */
606     if (prog->anchored_substr && prog->reganch & ROPT_SKIP) { 
607         /* we have /x+whatever/ */
608         /* it must be a one character string (XXXX Except UTF?) */
609         char ch = SvPVX(prog->anchored_substr)[0];
610         if (UTF) {
611             while (s < strend) {
612                 if (*s == ch) {
613                     if (regtry(prog, s)) goto got_it;
614                     s += UTF8SKIP(s);
615                     while (s < strend && *s == ch)
616                         s += UTF8SKIP(s);
617                 }
618                 s += UTF8SKIP(s);
619             }
620         }
621         else {
622             while (s < strend) {
623                 if (*s == ch) {
624                     if (regtry(prog, s)) goto got_it;
625                     s++;
626                     while (s < strend && *s == ch)
627                         s++;
628                 }
629                 s++;
630             }
631         }
632     }
633     /*SUPPRESS 560*/
634     else if (prog->anchored_substr != Nullsv
635              || (prog->float_substr != Nullsv 
636                  && prog->float_max_offset < strend - s)) {
637         SV *must = prog->anchored_substr 
638             ? prog->anchored_substr : prog->float_substr;
639         I32 back_max = 
640             prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
641         I32 back_min = 
642             prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
643         I32 delta = back_max - back_min;
644         char *last = HOPc(strend,       /* Cannot start after this */
645                           -(I32)(CHR_SVLEN(must)
646                                  - (SvTAIL(must) != 0) + back_min));
647         char *last1;            /* Last position checked before */
648
649         if (s > PL_bostr)
650             last1 = HOPc(s, -1);
651         else
652             last1 = s - 1;      /* bogus */
653
654         /* XXXX check_substr already used to find `s', can optimize if
655            check_substr==must. */
656         scream_pos = -1;
657         dontbother = end_shift;
658         strend = HOPc(strend, -dontbother);
659         while ( (s <= last) &&
660                 ((flags & REXEC_SCREAM) 
661                  ? (s = screaminstr(sv, must, HOPc(s, back_min) - strbeg,
662                                     end_shift, &scream_pos, 0))
663                  : (s = fbm_instr((unsigned char*)HOP(s, back_min),
664                                   (unsigned char*)strend, must, 
665                                   PL_multiline ? FBMrf_MULTILINE : 0))) ) {
666             if (HOPc(s, -back_max) > last1) {
667                 last1 = HOPc(s, -back_min);
668                 s = HOPc(s, -back_max);
669             }
670             else {
671                 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
672
673                 last1 = HOPc(s, -back_min);
674                 s = t;          
675             }
676             if (UTF) {
677                 while (s <= last1) {
678                     if (regtry(prog, s))
679                         goto got_it;
680                     s += UTF8SKIP(s);
681                 }
682             }
683             else {
684                 while (s <= last1) {
685                     if (regtry(prog, s))
686                         goto got_it;
687                     s++;
688                 }
689             }
690         }
691         goto phooey;
692     }
693     else if (c = prog->regstclass) {
694         I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
695         char *cc;
696
697         if (minlen)
698             dontbother = minlen - 1;
699         strend = HOPc(strend, -dontbother);     /* don't bother with what can't match */
700         tmp = 1;
701         /* We know what class it must start with. */
702         switch (OP(c)) {
703         case ANYOFUTF8:
704             cc = (char *) OPERAND(c);
705             while (s < strend) {
706                 if (REGINCLASSUTF8(c, (U8*)s)) {
707                     if (tmp && regtry(prog, s))
708                         goto got_it;
709                     else
710                         tmp = doevery;
711                 }
712                 else
713                     tmp = 1;
714                 s += UTF8SKIP(s);
715             }
716             break;
717         case ANYOF:
718             cc = (char *) OPERAND(c);
719             while (s < strend) {
720                 if (REGINCLASS(cc, *s)) {
721                     if (tmp && regtry(prog, s))
722                         goto got_it;
723                     else
724                         tmp = doevery;
725                 }
726                 else
727                     tmp = 1;
728                 s++;
729             }
730             break;
731         case BOUNDL:
732             PL_reg_flags |= RF_tainted;
733             /* FALL THROUGH */
734         case BOUND:
735             if (minlen) {
736                 dontbother++;
737                 strend -= 1;
738             }
739             tmp = (s != startpos) ? UCHARAT(s - 1) : PL_regprev;
740             tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
741             while (s < strend) {
742                 if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
743                     tmp = !tmp;
744                     if (regtry(prog, s))
745                         goto got_it;
746                 }
747                 s++;
748             }
749             if ((minlen || tmp) && regtry(prog,s))
750                 goto got_it;
751             break;
752         case BOUNDLUTF8:
753             PL_reg_flags |= RF_tainted;
754             /* FALL THROUGH */
755         case BOUNDUTF8:
756             if (minlen) {
757                 dontbother++;
758                 strend = reghop_c(strend, -1);
759             }
760             tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : PL_regprev;
761             tmp = ((OP(c) == BOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
762             while (s < strend) {
763                 if (tmp == !(OP(c) == BOUND ?
764                              swash_fetch(PL_utf8_alnum, (U8*)s) :
765                              isALNUM_LC_utf8((U8*)s)))
766                 {
767                     tmp = !tmp;
768                     if (regtry(prog, s))
769                         goto got_it;
770                 }
771                 s += UTF8SKIP(s);
772             }
773             if ((minlen || tmp) && regtry(prog,s))
774                 goto got_it;
775             break;
776         case NBOUNDL:
777             PL_reg_flags |= RF_tainted;
778             /* FALL THROUGH */
779         case NBOUND:
780             if (minlen) {
781                 dontbother++;
782                 strend -= 1;
783             }
784             tmp = (s != startpos) ? UCHARAT(s - 1) : PL_regprev;
785             tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
786             while (s < strend) {
787                 if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
788                     tmp = !tmp;
789                 else if (regtry(prog, s))
790                     goto got_it;
791                 s++;
792             }
793             if ((minlen || !tmp) && regtry(prog,s))
794                 goto got_it;
795             break;
796         case NBOUNDLUTF8:
797             PL_reg_flags |= RF_tainted;
798             /* FALL THROUGH */
799         case NBOUNDUTF8:
800             if (minlen) {
801                 dontbother++;
802                 strend = reghop_c(strend, -1);
803             }
804             tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : PL_regprev;
805             tmp = ((OP(c) == NBOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
806             while (s < strend) {
807                 if (tmp == !(OP(c) == NBOUND ?
808                              swash_fetch(PL_utf8_alnum, (U8*)s) :
809                              isALNUM_LC_utf8((U8*)s)))
810                     tmp = !tmp;
811                 else if (regtry(prog, s))
812                     goto got_it;
813                 s += UTF8SKIP(s);
814             }
815             if ((minlen || !tmp) && regtry(prog,s))
816                 goto got_it;
817             break;
818         case ALNUM:
819             while (s < strend) {
820                 if (isALNUM(*s)) {
821                     if (tmp && regtry(prog, s))
822                         goto got_it;
823                     else
824                         tmp = doevery;
825                 }
826                 else
827                     tmp = 1;
828                 s++;
829             }
830             break;
831         case ALNUMUTF8:
832             while (s < strend) {
833                 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
834                     if (tmp && regtry(prog, s))
835                         goto got_it;
836                     else
837                         tmp = doevery;
838                 }
839                 else
840                     tmp = 1;
841                 s += UTF8SKIP(s);
842             }
843             break;
844         case ALNUML:
845             PL_reg_flags |= RF_tainted;
846             while (s < strend) {
847                 if (isALNUM_LC(*s)) {
848                     if (tmp && regtry(prog, s))
849                         goto got_it;
850                     else
851                         tmp = doevery;
852                 }
853                 else
854                     tmp = 1;
855                 s++;
856             }
857             break;
858         case ALNUMLUTF8:
859             PL_reg_flags |= RF_tainted;
860             while (s < strend) {
861                 if (isALNUM_LC_utf8((U8*)s)) {
862                     if (tmp && regtry(prog, s))
863                         goto got_it;
864                     else
865                         tmp = doevery;
866                 }
867                 else
868                     tmp = 1;
869                 s += UTF8SKIP(s);
870             }
871             break;
872         case NALNUM:
873             while (s < strend) {
874                 if (!isALNUM(*s)) {
875                     if (tmp && regtry(prog, s))
876                         goto got_it;
877                     else
878                         tmp = doevery;
879                 }
880                 else
881                     tmp = 1;
882                 s++;
883             }
884             break;
885         case NALNUMUTF8:
886             while (s < strend) {
887                 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
888                     if (tmp && regtry(prog, s))
889                         goto got_it;
890                     else
891                         tmp = doevery;
892                 }
893                 else
894                     tmp = 1;
895                 s += UTF8SKIP(s);
896             }
897             break;
898         case NALNUML:
899             PL_reg_flags |= RF_tainted;
900             while (s < strend) {
901                 if (!isALNUM_LC(*s)) {
902                     if (tmp && regtry(prog, s))
903                         goto got_it;
904                     else
905                         tmp = doevery;
906                 }
907                 else
908                     tmp = 1;
909                 s++;
910             }
911             break;
912         case NALNUMLUTF8:
913             PL_reg_flags |= RF_tainted;
914             while (s < strend) {
915                 if (!isALNUM_LC_utf8((U8*)s)) {
916                     if (tmp && regtry(prog, s))
917                         goto got_it;
918                     else
919                         tmp = doevery;
920                 }
921                 else
922                     tmp = 1;
923                 s += UTF8SKIP(s);
924             }
925             break;
926         case SPACE:
927             while (s < strend) {
928                 if (isSPACE(*s)) {
929                     if (tmp && regtry(prog, s))
930                         goto got_it;
931                     else
932                         tmp = doevery;
933                 }
934                 else
935                     tmp = 1;
936                 s++;
937             }
938             break;
939         case SPACEUTF8:
940             while (s < strend) {
941                 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
942                     if (tmp && regtry(prog, s))
943                         goto got_it;
944                     else
945                         tmp = doevery;
946                 }
947                 else
948                     tmp = 1;
949                 s += UTF8SKIP(s);
950             }
951             break;
952         case SPACEL:
953             PL_reg_flags |= RF_tainted;
954             while (s < strend) {
955                 if (isSPACE_LC(*s)) {
956                     if (tmp && regtry(prog, s))
957                         goto got_it;
958                     else
959                         tmp = doevery;
960                 }
961                 else
962                     tmp = 1;
963                 s++;
964             }
965             break;
966         case SPACELUTF8:
967             PL_reg_flags |= RF_tainted;
968             while (s < strend) {
969                 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
970                     if (tmp && regtry(prog, s))
971                         goto got_it;
972                     else
973                         tmp = doevery;
974                 }
975                 else
976                     tmp = 1;
977                 s += UTF8SKIP(s);
978             }
979             break;
980         case NSPACE:
981             while (s < strend) {
982                 if (!isSPACE(*s)) {
983                     if (tmp && regtry(prog, s))
984                         goto got_it;
985                     else
986                         tmp = doevery;
987                 }
988                 else
989                     tmp = 1;
990                 s++;
991             }
992             break;
993         case NSPACEUTF8:
994             while (s < strend) {
995                 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
996                     if (tmp && regtry(prog, s))
997                         goto got_it;
998                     else
999                         tmp = doevery;
1000                 }
1001                 else
1002                     tmp = 1;
1003                 s += UTF8SKIP(s);
1004             }
1005             break;
1006         case NSPACEL:
1007             PL_reg_flags |= RF_tainted;
1008             while (s < strend) {
1009                 if (!isSPACE_LC(*s)) {
1010                     if (tmp && regtry(prog, s))
1011                         goto got_it;
1012                     else
1013                         tmp = doevery;
1014                 }
1015                 else
1016                     tmp = 1;
1017                 s++;
1018             }
1019             break;
1020         case NSPACELUTF8:
1021             PL_reg_flags |= RF_tainted;
1022             while (s < strend) {
1023                 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1024                     if (tmp && regtry(prog, s))
1025                         goto got_it;
1026                     else
1027                         tmp = doevery;
1028                 }
1029                 else
1030                     tmp = 1;
1031                 s += UTF8SKIP(s);
1032             }
1033             break;
1034         case DIGIT:
1035             while (s < strend) {
1036                 if (isDIGIT(*s)) {
1037                     if (tmp && regtry(prog, s))
1038                         goto got_it;
1039                     else
1040                         tmp = doevery;
1041                 }
1042                 else
1043                     tmp = 1;
1044                 s++;
1045             }
1046             break;
1047         case DIGITUTF8:
1048             while (s < strend) {
1049                 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
1050                     if (tmp && regtry(prog, s))
1051                         goto got_it;
1052                     else
1053                         tmp = doevery;
1054                 }
1055                 else
1056                     tmp = 1;
1057                 s += UTF8SKIP(s);
1058             }
1059             break;
1060         case DIGITL:
1061             PL_reg_flags |= RF_tainted;
1062             while (s < strend) {
1063                 if (isDIGIT_LC(*s)) {
1064                     if (tmp && regtry(prog, s))
1065                         goto got_it;
1066                     else
1067                         tmp = doevery;
1068                 }
1069                 else
1070                     tmp = 1;
1071                 s++;
1072             }
1073             break;
1074         case DIGITLUTF8:
1075             PL_reg_flags |= RF_tainted;
1076             while (s < strend) {
1077                 if (isDIGIT_LC_utf8((U8*)s)) {
1078                     if (tmp && regtry(prog, s))
1079                         goto got_it;
1080                     else
1081                         tmp = doevery;
1082                 }
1083                 else
1084                     tmp = 1;
1085                 s += UTF8SKIP(s);
1086             }
1087             break;
1088         case NDIGIT:
1089             while (s < strend) {
1090                 if (!isDIGIT(*s)) {
1091                     if (tmp && regtry(prog, s))
1092                         goto got_it;
1093                     else
1094                         tmp = doevery;
1095                 }
1096                 else
1097                     tmp = 1;
1098                 s++;
1099             }
1100             break;
1101         case NDIGITUTF8:
1102             while (s < strend) {
1103                 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
1104                     if (tmp && regtry(prog, s))
1105                         goto got_it;
1106                     else
1107                         tmp = doevery;
1108                 }
1109                 else
1110                     tmp = 1;
1111                 s += UTF8SKIP(s);
1112             }
1113             break;
1114         case NDIGITL:
1115             PL_reg_flags |= RF_tainted;
1116             while (s < strend) {
1117                 if (!isDIGIT_LC(*s)) {
1118                     if (tmp && regtry(prog, s))
1119                         goto got_it;
1120                     else
1121                         tmp = doevery;
1122                 }
1123                 else
1124                     tmp = 1;
1125                 s++;
1126             }
1127             break;
1128         case NDIGITLUTF8:
1129             PL_reg_flags |= RF_tainted;
1130             while (s < strend) {
1131                 if (!isDIGIT_LC_utf8((U8*)s)) {
1132                     if (tmp && regtry(prog, s))
1133                         goto got_it;
1134                     else
1135                         tmp = doevery;
1136                 }
1137                 else
1138                     tmp = 1;
1139                 s += UTF8SKIP(s);
1140             }
1141             break;
1142         case ALNUMC:
1143             while (s < strend) {
1144                 if (isALNUMC(*s)) {
1145                     if (tmp && regtry(prog, s))
1146                         goto got_it;
1147                     else
1148                         tmp = doevery;
1149                 }
1150                 else
1151                     tmp = 1;
1152                 s++;
1153             }
1154             break;
1155         case ALNUMCUTF8:
1156             while (s < strend) {
1157                 if (swash_fetch(PL_utf8_alnumc, (U8*)s)) {
1158                     if (tmp && regtry(prog, s))
1159                         goto got_it;
1160                     else
1161                         tmp = doevery;
1162                 }
1163                 else
1164                     tmp = 1;
1165                 s += UTF8SKIP(s);
1166             }
1167             break;
1168         case ALNUMCL:
1169             PL_reg_flags |= RF_tainted;
1170             while (s < strend) {
1171                 if (isALNUMC_LC(*s)) {
1172                     if (tmp && regtry(prog, s))
1173                         goto got_it;
1174                     else
1175                         tmp = doevery;
1176                 }
1177                 else
1178                     tmp = 1;
1179                 s++;
1180             }
1181             break;
1182         case ALNUMCLUTF8:
1183             PL_reg_flags |= RF_tainted;
1184             while (s < strend) {
1185                 if (isALNUMC_LC_utf8((U8*)s)) {
1186                     if (tmp && regtry(prog, s))
1187                         goto got_it;
1188                     else
1189                         tmp = doevery;
1190                 }
1191                 else
1192                     tmp = 1;
1193                 s += UTF8SKIP(s);
1194             }
1195             break;
1196         case NALNUMC:
1197             while (s < strend) {
1198                 if (!isALNUMC(*s)) {
1199                     if (tmp && regtry(prog, s))
1200                         goto got_it;
1201                     else
1202                         tmp = doevery;
1203                 }
1204                 else
1205                     tmp = 1;
1206                 s++;
1207             }
1208             break;
1209         case NALNUMCUTF8:
1210             while (s < strend) {
1211                 if (!swash_fetch(PL_utf8_alnumc, (U8*)s)) {
1212                     if (tmp && regtry(prog, s))
1213                         goto got_it;
1214                     else
1215                         tmp = doevery;
1216                 }
1217                 else
1218                     tmp = 1;
1219                 s += UTF8SKIP(s);
1220             }
1221             break;
1222         case NALNUMCL:
1223             PL_reg_flags |= RF_tainted;
1224             while (s < strend) {
1225                 if (!isALNUMC_LC(*s)) {
1226                     if (tmp && regtry(prog, s))
1227                         goto got_it;
1228                     else
1229                         tmp = doevery;
1230                 }
1231                 else
1232                     tmp = 1;
1233                 s++;
1234             }
1235             break;
1236         case NALNUMCLUTF8:
1237             PL_reg_flags |= RF_tainted;
1238             while (s < strend) {
1239                 if (!isALNUMC_LC_utf8((U8*)s)) {
1240                     if (tmp && regtry(prog, s))
1241                         goto got_it;
1242                     else
1243                         tmp = doevery;
1244                 }
1245                 else
1246                     tmp = 1;
1247                 s += UTF8SKIP(s);
1248             }
1249             break;
1250         case ASCII:
1251             while (s < strend) {
1252                 if (isASCII(*(U8*)s)) {
1253                     if (tmp && regtry(prog, s))
1254                         goto got_it;
1255                     else
1256                         tmp = doevery;
1257                 }
1258                 else
1259                     tmp = 1;
1260                 s++;
1261             }
1262             break;
1263         case NASCII:
1264             while (s < strend) {
1265                 if (!isASCII(*(U8*)s)) {
1266                     if (tmp && regtry(prog, s))
1267                         goto got_it;
1268                     else
1269                         tmp = doevery;
1270                 }
1271                 else
1272                     tmp = 1;
1273                 s++;
1274             }
1275             break;
1276         case CNTRL:
1277             while (s < strend) {
1278                 if (isCNTRL(*s)) {
1279                     if (tmp && regtry(prog, s))
1280                         goto got_it;
1281                     else
1282                         tmp = doevery;
1283                 }
1284                 else
1285                     tmp = 1;
1286                 s++;
1287             }
1288             break;
1289         case CNTRLUTF8:
1290             while (s < strend) {
1291                 if (swash_fetch(PL_utf8_cntrl,(U8*)s)) {
1292                     if (tmp && regtry(prog, s))
1293                         goto got_it;
1294                     else
1295                         tmp = doevery;
1296                 }
1297                 else
1298                     tmp = 1;
1299                 s += UTF8SKIP(s);
1300             }
1301             break;
1302         case CNTRLL:
1303             PL_reg_flags |= RF_tainted;
1304             while (s < strend) {
1305                 if (isCNTRL_LC(*s)) {
1306                     if (tmp && regtry(prog, s))
1307                         goto got_it;
1308                     else
1309                         tmp = doevery;
1310                 }
1311                 else
1312                     tmp = 1;
1313                 s++;
1314             }
1315             break;
1316         case CNTRLLUTF8:
1317             PL_reg_flags |= RF_tainted;
1318             while (s < strend) {
1319                 if (*s == ' ' || isCNTRL_LC_utf8((U8*)s)) {
1320                     if (tmp && regtry(prog, s))
1321                         goto got_it;
1322                     else
1323                         tmp = doevery;
1324                 }
1325                 else
1326                     tmp = 1;
1327                 s += UTF8SKIP(s);
1328             }
1329             break;
1330         case NCNTRL:
1331             while (s < strend) {
1332                 if (!isCNTRL(*s)) {
1333                     if (tmp && regtry(prog, s))
1334                         goto got_it;
1335                     else
1336                         tmp = doevery;
1337                 }
1338                 else
1339                     tmp = 1;
1340                 s++;
1341             }
1342             break;
1343         case NCNTRLUTF8:
1344             while (s < strend) {
1345                 if (!swash_fetch(PL_utf8_cntrl,(U8*)s)) {
1346                     if (tmp && regtry(prog, s))
1347                         goto got_it;
1348                     else
1349                         tmp = doevery;
1350                 }
1351                 else
1352                     tmp = 1;
1353                 s += UTF8SKIP(s);
1354             }
1355             break;
1356         case NCNTRLL:
1357             PL_reg_flags |= RF_tainted;
1358             while (s < strend) {
1359                 if (!isCNTRL_LC(*s)) {
1360                     if (tmp && regtry(prog, s))
1361                         goto got_it;
1362                     else
1363                         tmp = doevery;
1364                 }
1365                 else
1366                     tmp = 1;
1367                 s++;
1368             }
1369             break;
1370         case NCNTRLLUTF8:
1371             PL_reg_flags |= RF_tainted;
1372             while (s < strend) {
1373                 if (!isCNTRL_LC_utf8((U8*)s)) {
1374                     if (tmp && regtry(prog, s))
1375                         goto got_it;
1376                     else
1377                         tmp = doevery;
1378                 }
1379                 else
1380                     tmp = 1;
1381                 s += UTF8SKIP(s);
1382             }
1383             break;
1384         case GRAPH:
1385             while (s < strend) {
1386                 if (isGRAPH(*s)) {
1387                     if (tmp && regtry(prog, s))
1388                         goto got_it;
1389                     else
1390                         tmp = doevery;
1391                 }
1392                 else
1393                     tmp = 1;
1394                 s++;
1395             }
1396             break;
1397         case GRAPHUTF8:
1398             while (s < strend) {
1399                 if (swash_fetch(PL_utf8_graph,(U8*)s)) {
1400                     if (tmp && regtry(prog, s))
1401                         goto got_it;
1402                     else
1403                         tmp = doevery;
1404                 }
1405                 else
1406                     tmp = 1;
1407                 s += UTF8SKIP(s);
1408             }
1409             break;
1410         case GRAPHL:
1411             PL_reg_flags |= RF_tainted;
1412             while (s < strend) {
1413                 if (isGRAPH_LC(*s)) {
1414                     if (tmp && regtry(prog, s))
1415                         goto got_it;
1416                     else
1417                         tmp = doevery;
1418                 }
1419                 else
1420                     tmp = 1;
1421                 s++;
1422             }
1423             break;
1424         case GRAPHLUTF8:
1425             PL_reg_flags |= RF_tainted;
1426             while (s < strend) {
1427                 if (*s == ' ' || isGRAPH_LC_utf8((U8*)s)) {
1428                     if (tmp && regtry(prog, s))
1429                         goto got_it;
1430                     else
1431                         tmp = doevery;
1432                 }
1433                 else
1434                     tmp = 1;
1435                 s += UTF8SKIP(s);
1436             }
1437             break;
1438         case NGRAPH:
1439             while (s < strend) {
1440                 if (!isGRAPH(*s)) {
1441                     if (tmp && regtry(prog, s))
1442                         goto got_it;
1443                     else
1444                         tmp = doevery;
1445                 }
1446                 else
1447                     tmp = 1;
1448                 s++;
1449             }
1450             break;
1451         case NGRAPHUTF8:
1452             while (s < strend) {
1453                 if (!swash_fetch(PL_utf8_graph,(U8*)s)) {
1454                     if (tmp && regtry(prog, s))
1455                         goto got_it;
1456                     else
1457                         tmp = doevery;
1458                 }
1459                 else
1460                     tmp = 1;
1461                 s += UTF8SKIP(s);
1462             }
1463             break;
1464         case NGRAPHL:
1465             PL_reg_flags |= RF_tainted;
1466             while (s < strend) {
1467                 if (!isGRAPH_LC(*s)) {
1468                     if (tmp && regtry(prog, s))
1469                         goto got_it;
1470                     else
1471                         tmp = doevery;
1472                 }
1473                 else
1474                     tmp = 1;
1475                 s++;
1476             }
1477             break;
1478         case NGRAPHLUTF8:
1479             PL_reg_flags |= RF_tainted;
1480             while (s < strend) {
1481                 if (!isGRAPH_LC_utf8((U8*)s)) {
1482                     if (tmp && regtry(prog, s))
1483                         goto got_it;
1484                     else
1485                         tmp = doevery;
1486                 }
1487                 else
1488                     tmp = 1;
1489                 s += UTF8SKIP(s);
1490             }
1491             break;
1492         case LOWER:
1493             while (s < strend) {
1494                 if (isLOWER(*s)) {
1495                     if (tmp && regtry(prog, s))
1496                         goto got_it;
1497                     else
1498                         tmp = doevery;
1499                 }
1500                 else
1501                     tmp = 1;
1502                 s++;
1503             }
1504             break;
1505         case LOWERUTF8:
1506             while (s < strend) {
1507                 if (swash_fetch(PL_utf8_lower,(U8*)s)) {
1508                     if (tmp && regtry(prog, s))
1509                         goto got_it;
1510                     else
1511                         tmp = doevery;
1512                 }
1513                 else
1514                     tmp = 1;
1515                 s += UTF8SKIP(s);
1516             }
1517             break;
1518         case LOWERL:
1519             PL_reg_flags |= RF_tainted;
1520             while (s < strend) {
1521                 if (isLOWER_LC(*s)) {
1522                     if (tmp && regtry(prog, s))
1523                         goto got_it;
1524                     else
1525                         tmp = doevery;
1526                 }
1527                 else
1528                     tmp = 1;
1529                 s++;
1530             }
1531             break;
1532         case LOWERLUTF8:
1533             PL_reg_flags |= RF_tainted;
1534             while (s < strend) {
1535                 if (*s == ' ' || isLOWER_LC_utf8((U8*)s)) {
1536                     if (tmp && regtry(prog, s))
1537                         goto got_it;
1538                     else
1539                         tmp = doevery;
1540                 }
1541                 else
1542                     tmp = 1;
1543                 s += UTF8SKIP(s);
1544             }
1545             break;
1546         case NLOWER:
1547             while (s < strend) {
1548                 if (!isLOWER(*s)) {
1549                     if (tmp && regtry(prog, s))
1550                         goto got_it;
1551                     else
1552                         tmp = doevery;
1553                 }
1554                 else
1555                     tmp = 1;
1556                 s++;
1557             }
1558             break;
1559         case NLOWERUTF8:
1560             while (s < strend) {
1561                 if (!swash_fetch(PL_utf8_lower,(U8*)s)) {
1562                     if (tmp && regtry(prog, s))
1563                         goto got_it;
1564                     else
1565                         tmp = doevery;
1566                 }
1567                 else
1568                     tmp = 1;
1569                 s += UTF8SKIP(s);
1570             }
1571             break;
1572         case NLOWERL:
1573             PL_reg_flags |= RF_tainted;
1574             while (s < strend) {
1575                 if (!isLOWER_LC(*s)) {
1576                     if (tmp && regtry(prog, s))
1577                         goto got_it;
1578                     else
1579                         tmp = doevery;
1580                 }
1581                 else
1582                     tmp = 1;
1583                 s++;
1584             }
1585             break;
1586         case NLOWERLUTF8:
1587             PL_reg_flags |= RF_tainted;
1588             while (s < strend) {
1589                 if (!isLOWER_LC_utf8((U8*)s)) {
1590                     if (tmp && regtry(prog, s))
1591                         goto got_it;
1592                     else
1593                         tmp = doevery;
1594                 }
1595                 else
1596                     tmp = 1;
1597                 s += UTF8SKIP(s);
1598             }
1599             break;
1600         case PRINT:
1601             while (s < strend) {
1602                 if (isPRINT(*s)) {
1603                     if (tmp && regtry(prog, s))
1604                         goto got_it;
1605                     else
1606                         tmp = doevery;
1607                 }
1608                 else
1609                     tmp = 1;
1610                 s++;
1611             }
1612             break;
1613         case PRINTUTF8:
1614             while (s < strend) {
1615                 if (swash_fetch(PL_utf8_print,(U8*)s)) {
1616                     if (tmp && regtry(prog, s))
1617                         goto got_it;
1618                     else
1619                         tmp = doevery;
1620                 }
1621                 else
1622                     tmp = 1;
1623                 s += UTF8SKIP(s);
1624             }
1625             break;
1626         case PRINTL:
1627             PL_reg_flags |= RF_tainted;
1628             while (s < strend) {
1629                 if (isPRINT_LC(*s)) {
1630                     if (tmp && regtry(prog, s))
1631                         goto got_it;
1632                     else
1633                         tmp = doevery;
1634                 }
1635                 else
1636                     tmp = 1;
1637                 s++;
1638             }
1639             break;
1640         case PRINTLUTF8:
1641             PL_reg_flags |= RF_tainted;
1642             while (s < strend) {
1643                 if (*s == ' ' || isPRINT_LC_utf8((U8*)s)) {
1644                     if (tmp && regtry(prog, s))
1645                         goto got_it;
1646                     else
1647                         tmp = doevery;
1648                 }
1649                 else
1650                     tmp = 1;
1651                 s += UTF8SKIP(s);
1652             }
1653             break;
1654         case NPRINT:
1655             while (s < strend) {
1656                 if (!isPRINT(*s)) {
1657                     if (tmp && regtry(prog, s))
1658                         goto got_it;
1659                     else
1660                         tmp = doevery;
1661                 }
1662                 else
1663                     tmp = 1;
1664                 s++;
1665             }
1666             break;
1667         case NPRINTUTF8:
1668             while (s < strend) {
1669                 if (!swash_fetch(PL_utf8_print,(U8*)s)) {
1670                     if (tmp && regtry(prog, s))
1671                         goto got_it;
1672                     else
1673                         tmp = doevery;
1674                 }
1675                 else
1676                     tmp = 1;
1677                 s += UTF8SKIP(s);
1678             }
1679             break;
1680         case NPRINTL:
1681             PL_reg_flags |= RF_tainted;
1682             while (s < strend) {
1683                 if (!isPRINT_LC(*s)) {
1684                     if (tmp && regtry(prog, s))
1685                         goto got_it;
1686                     else
1687                         tmp = doevery;
1688                 }
1689                 else
1690                     tmp = 1;
1691                 s++;
1692             }
1693             break;
1694         case NPRINTLUTF8:
1695             PL_reg_flags |= RF_tainted;
1696             while (s < strend) {
1697                 if (!isPRINT_LC_utf8((U8*)s)) {
1698                     if (tmp && regtry(prog, s))
1699                         goto got_it;
1700                     else
1701                         tmp = doevery;
1702                 }
1703                 else
1704                     tmp = 1;
1705                 s += UTF8SKIP(s);
1706             }
1707             break;
1708         case PUNCT:
1709             while (s < strend) {
1710                 if (isPUNCT(*s)) {
1711                     if (tmp && regtry(prog, s))
1712                         goto got_it;
1713                     else
1714                         tmp = doevery;
1715                 }
1716                 else
1717                     tmp = 1;
1718                 s++;
1719             }
1720             break;
1721         case PUNCTUTF8:
1722             while (s < strend) {
1723                 if (swash_fetch(PL_utf8_punct,(U8*)s)) {
1724                     if (tmp && regtry(prog, s))
1725                         goto got_it;
1726                     else
1727                         tmp = doevery;
1728                 }
1729                 else
1730                     tmp = 1;
1731                 s += UTF8SKIP(s);
1732             }
1733             break;
1734         case PUNCTL:
1735             PL_reg_flags |= RF_tainted;
1736             while (s < strend) {
1737                 if (isPUNCT_LC(*s)) {
1738                     if (tmp && regtry(prog, s))
1739                         goto got_it;
1740                     else
1741                         tmp = doevery;
1742                 }
1743                 else
1744                     tmp = 1;
1745                 s++;
1746             }
1747             break;
1748         case PUNCTLUTF8:
1749             PL_reg_flags |= RF_tainted;
1750             while (s < strend) {
1751                 if (*s == ' ' || isPUNCT_LC_utf8((U8*)s)) {
1752                     if (tmp && regtry(prog, s))
1753                         goto got_it;
1754                     else
1755                         tmp = doevery;
1756                 }
1757                 else
1758                     tmp = 1;
1759                 s += UTF8SKIP(s);
1760             }
1761             break;
1762         case NPUNCT:
1763             while (s < strend) {
1764                 if (!isPUNCT(*s)) {
1765                     if (tmp && regtry(prog, s))
1766                         goto got_it;
1767                     else
1768                         tmp = doevery;
1769                 }
1770                 else
1771                     tmp = 1;
1772                 s++;
1773             }
1774             break;
1775         case NPUNCTUTF8:
1776             while (s < strend) {
1777                 if (!swash_fetch(PL_utf8_punct,(U8*)s)) {
1778                     if (tmp && regtry(prog, s))
1779                         goto got_it;
1780                     else
1781                         tmp = doevery;
1782                 }
1783                 else
1784                     tmp = 1;
1785                 s += UTF8SKIP(s);
1786             }
1787             break;
1788         case NPUNCTL:
1789             PL_reg_flags |= RF_tainted;
1790             while (s < strend) {
1791                 if (!isPUNCT_LC(*s)) {
1792                     if (tmp && regtry(prog, s))
1793                         goto got_it;
1794                     else
1795                         tmp = doevery;
1796                 }
1797                 else
1798                     tmp = 1;
1799                 s++;
1800             }
1801             break;
1802         case NPUNCTLUTF8:
1803             PL_reg_flags |= RF_tainted;
1804             while (s < strend) {
1805                 if (!isPUNCT_LC_utf8((U8*)s)) {
1806                     if (tmp && regtry(prog, s))
1807                         goto got_it;
1808                     else
1809                         tmp = doevery;
1810                 }
1811                 else
1812                     tmp = 1;
1813                 s += UTF8SKIP(s);
1814             }
1815             break;
1816         case UPPER:
1817             while (s < strend) {
1818                 if (isUPPER(*s)) {
1819                     if (tmp && regtry(prog, s))
1820                         goto got_it;
1821                     else
1822                         tmp = doevery;
1823                 }
1824                 else
1825                     tmp = 1;
1826                 s++;
1827             }
1828             break;
1829         case UPPERUTF8:
1830             while (s < strend) {
1831                 if (swash_fetch(PL_utf8_upper,(U8*)s)) {
1832                     if (tmp && regtry(prog, s))
1833                         goto got_it;
1834                     else
1835                         tmp = doevery;
1836                 }
1837                 else
1838                     tmp = 1;
1839                 s += UTF8SKIP(s);
1840             }
1841             break;
1842         case UPPERL:
1843             PL_reg_flags |= RF_tainted;
1844             while (s < strend) {
1845                 if (isUPPER_LC(*s)) {
1846                     if (tmp && regtry(prog, s))
1847                         goto got_it;
1848                     else
1849                         tmp = doevery;
1850                 }
1851                 else
1852                     tmp = 1;
1853                 s++;
1854             }
1855             break;
1856         case UPPERLUTF8:
1857             PL_reg_flags |= RF_tainted;
1858             while (s < strend) {
1859                 if (*s == ' ' || isUPPER_LC_utf8((U8*)s)) {
1860                     if (tmp && regtry(prog, s))
1861                         goto got_it;
1862                     else
1863                         tmp = doevery;
1864                 }
1865                 else
1866                     tmp = 1;
1867                 s += UTF8SKIP(s);
1868             }
1869             break;
1870         case NUPPER:
1871             while (s < strend) {
1872                 if (!isUPPER(*s)) {
1873                     if (tmp && regtry(prog, s))
1874                         goto got_it;
1875                     else
1876                         tmp = doevery;
1877                 }
1878                 else
1879                     tmp = 1;
1880                 s++;
1881             }
1882             break;
1883         case NUPPERUTF8:
1884             while (s < strend) {
1885                 if (!swash_fetch(PL_utf8_upper,(U8*)s)) {
1886                     if (tmp && regtry(prog, s))
1887                         goto got_it;
1888                     else
1889                         tmp = doevery;
1890                 }
1891                 else
1892                     tmp = 1;
1893                 s += UTF8SKIP(s);
1894             }
1895             break;
1896         case NUPPERL:
1897             PL_reg_flags |= RF_tainted;
1898             while (s < strend) {
1899                 if (!isUPPER_LC(*s)) {
1900                     if (tmp && regtry(prog, s))
1901                         goto got_it;
1902                     else
1903                         tmp = doevery;
1904                 }
1905                 else
1906                     tmp = 1;
1907                 s++;
1908             }
1909             break;
1910         case NUPPERLUTF8:
1911             PL_reg_flags |= RF_tainted;
1912             while (s < strend) {
1913                 if (!isUPPER_LC_utf8((U8*)s)) {
1914                     if (tmp && regtry(prog, s))
1915                         goto got_it;
1916                     else
1917                         tmp = doevery;
1918                 }
1919                 else
1920                     tmp = 1;
1921                 s += UTF8SKIP(s);
1922             }
1923             break;
1924         case XDIGIT:
1925             while (s < strend) {
1926                 if (isXDIGIT(*s)) {
1927                     if (tmp && regtry(prog, s))
1928                         goto got_it;
1929                     else
1930                         tmp = doevery;
1931                 }
1932                 else
1933                     tmp = 1;
1934                 s++;
1935             }
1936             break;
1937         case NXDIGIT:
1938             while (s < strend) {
1939                 if (!isXDIGIT(*s)) {
1940                     if (tmp && regtry(prog, s))
1941                         goto got_it;
1942                     else
1943                         tmp = doevery;
1944                 }
1945                 else
1946                     tmp = 1;
1947                 s++;
1948             }
1949             break;
1950         }
1951     }
1952     else {
1953         dontbother = 0;
1954         if (prog->float_substr != Nullsv) {     /* Trim the end. */
1955             char *last;
1956             I32 oldpos = scream_pos;
1957
1958             if (flags & REXEC_SCREAM) {
1959                 last = screaminstr(sv, prog->float_substr, s - strbeg,
1960                                    end_shift, &scream_pos, 1); /* last one */
1961                 if (!last)
1962                     last = scream_olds; /* Only one occurence. */
1963             }
1964             else {
1965                 STRLEN len;
1966                 char *little = SvPV(prog->float_substr, len);
1967
1968                 if (SvTAIL(prog->float_substr)) {
1969                     if (memEQ(strend - len + 1, little, len - 1))
1970                         last = strend - len + 1;
1971                     else if (!PL_multiline)
1972                         last = memEQ(strend - len, little, len) 
1973                             ? strend - len : Nullch;
1974                     else
1975                         goto find_last;
1976                 } else {
1977                   find_last:
1978                     if (len) 
1979                         last = rninstr(s, strend, little, little + len);
1980                     else
1981                         last = strend;  /* matching `$' */
1982                 }
1983             }
1984             if (last == NULL) goto phooey; /* Should not happen! */
1985             dontbother = strend - last + prog->float_min_offset;
1986         }
1987         if (minlen && (dontbother < minlen))
1988             dontbother = minlen - 1;
1989         strend -= dontbother;              /* this one's always in bytes! */
1990         /* We don't know much -- general case. */
1991         if (UTF) {
1992             for (;;) {
1993                 if (regtry(prog, s))
1994                     goto got_it;
1995                 if (s >= strend)
1996                     break;
1997                 s += UTF8SKIP(s);
1998             };
1999         }
2000         else {
2001             do {
2002                 if (regtry(prog, s))
2003                     goto got_it;
2004             } while (s++ < strend);
2005         }
2006     }
2007
2008     /* Failure. */
2009     goto phooey;
2010
2011 got_it:
2012     RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
2013
2014     if (PL_reg_eval_set) {
2015         /* Preserve the current value of $^R */
2016         if (oreplsv != GvSV(PL_replgv))
2017             sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
2018                                                   restored, the value remains
2019                                                   the same. */
2020         restore_pos(aTHXo_ 0);
2021     }
2022
2023     /* make sure $`, $&, $', and $digit will work later */
2024     if ( !(flags & REXEC_NOT_FIRST) ) {
2025         if (RX_MATCH_COPIED(prog)) {
2026             Safefree(prog->subbeg);
2027             RX_MATCH_COPIED_off(prog);
2028         }
2029         if (flags & REXEC_COPY_STR) {
2030             I32 i = PL_regeol - startpos + (stringarg - strbeg);
2031
2032             s = savepvn(strbeg, i);
2033             prog->subbeg = s;
2034             prog->sublen = i;
2035             RX_MATCH_COPIED_on(prog);
2036         }
2037         else {
2038             prog->subbeg = strbeg;
2039             prog->sublen = PL_regeol - strbeg;  /* strend may have been modified */
2040         }
2041     }
2042     
2043     return 1;
2044
2045 phooey:
2046     if (PL_reg_eval_set)
2047         restore_pos(aTHXo_ 0);
2048     return 0;
2049 }
2050
2051 /*
2052  - regtry - try match at specific point
2053  */
2054 STATIC I32                      /* 0 failure, 1 success */
2055 S_regtry(pTHX_ regexp *prog, char *startpos)
2056 {
2057     dTHR;
2058     register I32 i;
2059     register I32 *sp;
2060     register I32 *ep;
2061     CHECKPOINT lastcp;
2062
2063     if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
2064         MAGIC *mg;
2065
2066         PL_reg_eval_set = RS_init;
2067         DEBUG_r(DEBUG_s(
2068             PerlIO_printf(Perl_debug_log, "  setting stack tmpbase at %i\n",
2069                           PL_stack_sp - PL_stack_base);
2070             ));
2071         SAVEINT(cxstack[cxstack_ix].blk_oldsp);
2072         cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2073         /* Otherwise OP_NEXTSTATE will free whatever on stack now.  */
2074         SAVETMPS;
2075         /* Apparently this is not needed, judging by wantarray. */
2076         /* SAVEINT(cxstack[cxstack_ix].blk_gimme);
2077            cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2078
2079         if (PL_reg_sv) {
2080             /* Make $_ available to executed code. */
2081             if (PL_reg_sv != DEFSV) {
2082                 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
2083                 SAVESPTR(DEFSV);
2084                 DEFSV = PL_reg_sv;
2085             }
2086         
2087             if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv) 
2088                   && (mg = mg_find(PL_reg_sv, 'g')))) {
2089                 /* prepare for quick setting of pos */
2090                 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
2091                 mg = mg_find(PL_reg_sv, 'g');
2092                 mg->mg_len = -1;
2093             }
2094             PL_reg_magic    = mg;
2095             PL_reg_oldpos   = mg->mg_len;
2096             SAVEDESTRUCTOR(restore_pos, 0);
2097         }
2098         if (!PL_reg_curpm)
2099             New(22,PL_reg_curpm, 1, PMOP);
2100         PL_reg_curpm->op_pmregexp = prog;
2101         PL_reg_oldcurpm = PL_curpm;
2102         PL_curpm = PL_reg_curpm;
2103         if (RX_MATCH_COPIED(prog)) {
2104             /*  Here is a serious problem: we cannot rewrite subbeg,
2105                 since it may be needed if this match fails.  Thus
2106                 $` inside (?{}) could fail... */
2107             PL_reg_oldsaved = prog->subbeg;
2108             PL_reg_oldsavedlen = prog->sublen;
2109             RX_MATCH_COPIED_off(prog);
2110         }
2111         else
2112             PL_reg_oldsaved = Nullch;
2113         prog->subbeg = PL_bostr;
2114         prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2115     }
2116     prog->startp[0] = startpos - PL_bostr;
2117     PL_reginput = startpos;
2118     PL_regstartp = prog->startp;
2119     PL_regendp = prog->endp;
2120     PL_reglastparen = &prog->lastparen;
2121     prog->lastparen = 0;
2122     PL_regsize = 0;
2123     DEBUG_r(PL_reg_starttry = startpos);
2124     if (PL_reg_start_tmpl <= prog->nparens) {
2125         PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2126         if(PL_reg_start_tmp)
2127             Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2128         else
2129             New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2130     }
2131
2132     /* XXXX What this code is doing here?!!!  There should be no need
2133        to do this again and again, PL_reglastparen should take care of
2134        this!  */
2135     sp = prog->startp;
2136     ep = prog->endp;
2137     if (prog->nparens) {
2138         for (i = prog->nparens; i >= 1; i--) {
2139             *++sp = -1;
2140             *++ep = -1;
2141         }
2142     }
2143     REGCP_SET;
2144     if (regmatch(prog->program + 1)) {
2145         prog->endp[0] = PL_reginput - PL_bostr;
2146         return 1;
2147     }
2148     REGCP_UNWIND;
2149     return 0;
2150 }
2151
2152 /*
2153  - regmatch - main matching routine
2154  *
2155  * Conceptually the strategy is simple:  check to see whether the current
2156  * node matches, call self recursively to see whether the rest matches,
2157  * and then act accordingly.  In practice we make some effort to avoid
2158  * recursion, in particular by going through "ordinary" nodes (that don't
2159  * need to know whether the rest of the match failed) by a loop instead of
2160  * by recursion.
2161  */
2162 /* [lwall] I've hoisted the register declarations to the outer block in order to
2163  * maybe save a little bit of pushing and popping on the stack.  It also takes
2164  * advantage of machines that use a register save mask on subroutine entry.
2165  */
2166 STATIC I32                      /* 0 failure, 1 success */
2167 S_regmatch(pTHX_ regnode *prog)
2168 {
2169     dTHR;
2170     register regnode *scan;     /* Current node. */
2171     regnode *next;              /* Next node. */
2172     regnode *inner;             /* Next node in internal branch. */
2173     register I32 nextchr;       /* renamed nextchr - nextchar colides with
2174                                    function of same name */
2175     register I32 n;             /* no or next */
2176     register I32 ln;            /* len or last */
2177     register char *s;           /* operand or save */
2178     register char *locinput = PL_reginput;
2179     register I32 c1, c2, paren; /* case fold search, parenth */
2180     int minmod = 0, sw = 0, logical = 0;
2181 #ifdef DEBUGGING
2182     PL_regindent++;
2183 #endif
2184
2185     /* Note that nextchr is a byte even in UTF */
2186     nextchr = UCHARAT(locinput);
2187     scan = prog;
2188     while (scan != NULL) {
2189 #define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
2190 #ifdef DEBUGGING
2191 #  define sayYES goto yes
2192 #  define sayNO goto no
2193 #  define saySAME(x) if (x) goto yes; else goto no
2194 #  define REPORT_CODE_OFF 24
2195 #else
2196 #  define sayYES return 1
2197 #  define sayNO return 0
2198 #  define saySAME(x) return x
2199 #endif
2200         DEBUG_r( {
2201             SV *prop = sv_newmortal();
2202             int docolor = *PL_colors[0];
2203             int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
2204             int l = (PL_regeol - locinput > taill ? taill : PL_regeol - locinput);
2205             /* The part of the string before starttry has one color
2206                (pref0_len chars), between starttry and current
2207                position another one (pref_len - pref0_len chars),
2208                after the current position the third one.
2209                We assume that pref0_len <= pref_len, otherwise we
2210                decrease pref0_len.  */
2211             int pref_len = (locinput - PL_bostr > (5 + taill) - l 
2212                             ? (5 + taill) - l : locinput - PL_bostr);
2213             int pref0_len = pref_len  - (locinput - PL_reg_starttry);
2214
2215             if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
2216                 l = ( PL_regeol - locinput > (5 + taill) - pref_len 
2217                       ? (5 + taill) - pref_len : PL_regeol - locinput);
2218             if (pref0_len < 0)
2219                 pref0_len = 0;
2220             if (pref0_len > pref_len)
2221                 pref0_len = pref_len;
2222             regprop(prop, scan);
2223             PerlIO_printf(Perl_debug_log, 
2224                           "%4i <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3d:%*s%s\n",
2225                           locinput - PL_bostr, 
2226                           PL_colors[4], pref0_len, 
2227                           locinput - pref_len, PL_colors[5],
2228                           PL_colors[2], pref_len - pref0_len, 
2229                           locinput - pref_len + pref0_len, PL_colors[3],
2230                           (docolor ? "" : "> <"),
2231                           PL_colors[0], l, locinput, PL_colors[1],
2232                           15 - l - pref_len + 1,
2233                           "",
2234                           scan - PL_regprogram, PL_regindent*2, "",
2235                           SvPVX(prop));
2236         } );
2237
2238         next = scan + NEXT_OFF(scan);
2239         if (next == scan)
2240             next = NULL;
2241
2242         switch (OP(scan)) {
2243         case BOL:
2244             if (locinput == PL_bostr
2245                 ? PL_regprev == '\n'
2246                 : (PL_multiline && 
2247                    (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
2248             {
2249                 /* regtill = regbol; */
2250                 break;
2251             }
2252             sayNO;
2253         case MBOL:
2254             if (locinput == PL_bostr
2255                 ? PL_regprev == '\n'
2256                 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
2257             {
2258                 break;
2259             }
2260             sayNO;
2261         case SBOL:
2262             if (locinput == PL_regbol && PL_regprev == '\n')
2263                 break;
2264             sayNO;
2265         case GPOS:
2266             if (locinput == PL_reg_ganch)
2267                 break;
2268             sayNO;
2269         case EOL:
2270             if (PL_multiline)
2271                 goto meol;
2272             else
2273                 goto seol;
2274         case MEOL:
2275           meol:
2276             if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2277                 sayNO;
2278             break;
2279         case SEOL:
2280           seol:
2281             if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2282                 sayNO;
2283             if (PL_regeol - locinput > 1)
2284                 sayNO;
2285             break;
2286         case EOS:
2287             if (PL_regeol != locinput)
2288                 sayNO;
2289             break;
2290         case SANYUTF8:
2291             if (nextchr & 0x80) {
2292                 locinput += PL_utf8skip[nextchr];
2293                 if (locinput > PL_regeol)
2294                     sayNO;
2295                 nextchr = UCHARAT(locinput);
2296                 break;
2297             }
2298             if (!nextchr && locinput >= PL_regeol)
2299                 sayNO;
2300             nextchr = UCHARAT(++locinput);
2301             break;
2302         case SANY:
2303             if (!nextchr && locinput >= PL_regeol)
2304                 sayNO;
2305             nextchr = UCHARAT(++locinput);
2306             break;
2307         case ANYUTF8:
2308             if (nextchr & 0x80) {
2309                 locinput += PL_utf8skip[nextchr];
2310                 if (locinput > PL_regeol)
2311                     sayNO;
2312                 nextchr = UCHARAT(locinput);
2313                 break;
2314             }
2315             if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
2316                 sayNO;
2317             nextchr = UCHARAT(++locinput);
2318             break;
2319         case REG_ANY:
2320             if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
2321                 sayNO;
2322             nextchr = UCHARAT(++locinput);
2323             break;
2324         case EXACT:
2325             s = (char *) OPERAND(scan);
2326             ln = UCHARAT(s++);
2327             /* Inline the first character, for speed. */
2328             if (UCHARAT(s) != nextchr)
2329                 sayNO;
2330             if (PL_regeol - locinput < ln)
2331                 sayNO;
2332             if (ln > 1 && memNE(s, locinput, ln))
2333                 sayNO;
2334             locinput += ln;
2335             nextchr = UCHARAT(locinput);
2336             break;
2337         case EXACTFL:
2338             PL_reg_flags |= RF_tainted;
2339             /* FALL THROUGH */
2340         case EXACTF:
2341             s = (char *) OPERAND(scan);
2342             ln = UCHARAT(s++);
2343
2344             if (UTF) {
2345                 char *l = locinput;
2346                 char *e = s + ln;
2347                 c1 = OP(scan) == EXACTF;
2348                 while (s < e) {
2349                     if (l >= PL_regeol)
2350                         sayNO;
2351                     if (utf8_to_uv((U8*)s, 0) != (c1 ?
2352                                                   toLOWER_utf8((U8*)l) :
2353                                                   toLOWER_LC_utf8((U8*)l)))
2354                     {
2355                         sayNO;
2356                     }
2357                     s += UTF8SKIP(s);
2358                     l += UTF8SKIP(l);
2359                 }
2360                 locinput = l;
2361                 nextchr = UCHARAT(locinput);
2362                 break;
2363             }
2364
2365             /* Inline the first character, for speed. */
2366             if (UCHARAT(s) != nextchr &&
2367                 UCHARAT(s) != ((OP(scan) == EXACTF)
2368                                ? PL_fold : PL_fold_locale)[nextchr])
2369                 sayNO;
2370             if (PL_regeol - locinput < ln)
2371                 sayNO;
2372             if (ln > 1 && (OP(scan) == EXACTF
2373                            ? ibcmp(s, locinput, ln)
2374                            : ibcmp_locale(s, locinput, ln)))
2375                 sayNO;
2376             locinput += ln;
2377             nextchr = UCHARAT(locinput);
2378             break;
2379         case ANYOFUTF8:
2380             s = (char *) OPERAND(scan);
2381             if (!REGINCLASSUTF8(scan, (U8*)locinput))
2382                 sayNO;
2383             if (locinput >= PL_regeol)
2384                 sayNO;
2385             locinput += PL_utf8skip[nextchr];
2386             nextchr = UCHARAT(locinput);
2387             break;
2388         case ANYOF:
2389             s = (char *) OPERAND(scan);
2390             if (nextchr < 0)
2391                 nextchr = UCHARAT(locinput);
2392             if (!REGINCLASS(s, nextchr))
2393                 sayNO;
2394             if (!nextchr && locinput >= PL_regeol)
2395                 sayNO;
2396             nextchr = UCHARAT(++locinput);
2397             break;
2398         case ALNUML:
2399             PL_reg_flags |= RF_tainted;
2400             /* FALL THROUGH */
2401         case ALNUM:
2402             if (!nextchr)
2403                 sayNO;
2404             if (!(OP(scan) == ALNUM
2405                   ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
2406                 sayNO;
2407             nextchr = UCHARAT(++locinput);
2408             break;
2409         case ALNUMLUTF8:
2410             PL_reg_flags |= RF_tainted;
2411             /* FALL THROUGH */
2412         case ALNUMUTF8:
2413             if (!nextchr)
2414                 sayNO;
2415             if (nextchr & 0x80) {
2416                 if (!(OP(scan) == ALNUMUTF8
2417                       ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2418                       : isALNUM_LC_utf8((U8*)locinput)))
2419                 {
2420                     sayNO;
2421                 }
2422                 locinput += PL_utf8skip[nextchr];
2423                 nextchr = UCHARAT(locinput);
2424                 break;
2425             }
2426             if (!(OP(scan) == ALNUMUTF8
2427                   ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
2428                 sayNO;
2429             nextchr = UCHARAT(++locinput);
2430             break;
2431         case NALNUML:
2432             PL_reg_flags |= RF_tainted;
2433             /* FALL THROUGH */
2434         case NALNUM:
2435             if (!nextchr && locinput >= PL_regeol)
2436                 sayNO;
2437             if (OP(scan) == NALNUM
2438                 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
2439                 sayNO;
2440             nextchr = UCHARAT(++locinput);
2441             break;
2442         case NALNUMLUTF8:
2443             PL_reg_flags |= RF_tainted;
2444             /* FALL THROUGH */
2445         case NALNUMUTF8:
2446             if (!nextchr && locinput >= PL_regeol)
2447                 sayNO;
2448             if (nextchr & 0x80) {
2449                 if (OP(scan) == NALNUMUTF8
2450                     ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2451                     : isALNUM_LC_utf8((U8*)locinput))
2452                 {
2453                     sayNO;
2454                 }
2455                 locinput += PL_utf8skip[nextchr];
2456                 nextchr = UCHARAT(locinput);
2457                 break;
2458             }
2459             if (OP(scan) == NALNUMUTF8
2460                 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
2461                 sayNO;
2462             nextchr = UCHARAT(++locinput);
2463             break;
2464         case BOUNDL:
2465         case NBOUNDL:
2466             PL_reg_flags |= RF_tainted;
2467             /* FALL THROUGH */
2468         case BOUND:
2469         case NBOUND:
2470             /* was last char in word? */
2471             ln = (locinput != PL_regbol) ? UCHARAT(locinput - 1) : PL_regprev;
2472             if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2473                 ln = isALNUM(ln);
2474                 n = isALNUM(nextchr);
2475             }
2476             else {
2477                 ln = isALNUM_LC(ln);
2478                 n = isALNUM_LC(nextchr);
2479             }
2480             if (((!ln) == (!n)) == (OP(scan) == BOUND || OP(scan) == BOUNDL))
2481                 sayNO;
2482             break;
2483         case BOUNDLUTF8:
2484         case NBOUNDLUTF8:
2485             PL_reg_flags |= RF_tainted;
2486             /* FALL THROUGH */
2487         case BOUNDUTF8:
2488         case NBOUNDUTF8:
2489             /* was last char in word? */
2490             ln = (locinput != PL_regbol)
2491                 ? utf8_to_uv(reghop((U8*)locinput, -1), 0) : PL_regprev;
2492             if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) {
2493                 ln = isALNUM_uni(ln);
2494                 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
2495             }
2496             else {
2497                 ln = isALNUM_LC_uni(ln);
2498                 n = isALNUM_LC_utf8((U8*)locinput);
2499             }
2500             if (((!ln) == (!n)) == (OP(scan) == BOUNDUTF8 || OP(scan) == BOUNDLUTF8))
2501                 sayNO;
2502             break;
2503         case SPACEL:
2504             PL_reg_flags |= RF_tainted;
2505             /* FALL THROUGH */
2506         case SPACE:
2507             if (!nextchr && locinput >= PL_regeol)
2508                 sayNO;
2509             if (!(OP(scan) == SPACE
2510                   ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2511                 sayNO;
2512             nextchr = UCHARAT(++locinput);
2513             break;
2514         case SPACELUTF8:
2515             PL_reg_flags |= RF_tainted;
2516             /* FALL THROUGH */
2517         case SPACEUTF8:
2518             if (!nextchr && locinput >= PL_regeol)
2519                 sayNO;
2520             if (nextchr & 0x80) {
2521                 if (!(OP(scan) == SPACEUTF8
2522                       ? swash_fetch(PL_utf8_space,(U8*)locinput)
2523                       : isSPACE_LC_utf8((U8*)locinput)))
2524                 {
2525                     sayNO;
2526                 }
2527                 locinput += PL_utf8skip[nextchr];
2528                 nextchr = UCHARAT(locinput);
2529                 break;
2530             }
2531             if (!(OP(scan) == SPACEUTF8
2532                   ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2533                 sayNO;
2534             nextchr = UCHARAT(++locinput);
2535             break;
2536         case NSPACEL:
2537             PL_reg_flags |= RF_tainted;
2538             /* FALL THROUGH */
2539         case NSPACE:
2540             if (!nextchr)
2541                 sayNO;
2542             if (OP(scan) == SPACE
2543                 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
2544                 sayNO;
2545             nextchr = UCHARAT(++locinput);
2546             break;
2547         case NSPACELUTF8:
2548             PL_reg_flags |= RF_tainted;
2549             /* FALL THROUGH */
2550         case NSPACEUTF8:
2551             if (!nextchr)
2552                 sayNO;
2553             if (nextchr & 0x80) {
2554                 if (OP(scan) == NSPACEUTF8
2555                     ? swash_fetch(PL_utf8_space,(U8*)locinput)
2556                     : isSPACE_LC_utf8((U8*)locinput))
2557                 {
2558                     sayNO;
2559                 }
2560                 locinput += PL_utf8skip[nextchr];
2561                 nextchr = UCHARAT(locinput);
2562                 break;
2563             }
2564             if (OP(scan) == NSPACEUTF8
2565                 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
2566                 sayNO;
2567             nextchr = UCHARAT(++locinput);
2568             break;
2569         case DIGITL:
2570             PL_reg_flags |= RF_tainted;
2571             /* FALL THROUGH */
2572         case DIGIT:
2573             if (!nextchr && locinput >= PL_regeol)
2574                 sayNO;
2575             if (!(OP(scan) == DIGIT
2576                   ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
2577                 sayNO;
2578             nextchr = UCHARAT(++locinput);
2579             break;
2580         case DIGITLUTF8:
2581             PL_reg_flags |= RF_tainted;
2582             /* FALL THROUGH */
2583         case DIGITUTF8:
2584             if (!nextchr)
2585                 sayNO;
2586             if (nextchr & 0x80) {
2587                 if (OP(scan) == NDIGITUTF8
2588                     ? swash_fetch(PL_utf8_digit,(U8*)locinput)
2589                     : isDIGIT_LC_utf8((U8*)locinput))
2590                 {
2591                     sayNO;
2592                 }
2593                 locinput += PL_utf8skip[nextchr];
2594                 nextchr = UCHARAT(locinput);
2595                 break;
2596             }
2597             if (!isDIGIT(nextchr))
2598                 sayNO;
2599             nextchr = UCHARAT(++locinput);
2600             break;
2601         case NDIGITL:
2602             PL_reg_flags |= RF_tainted;
2603             /* FALL THROUGH */
2604         case NDIGIT:
2605             if (!nextchr)
2606                 sayNO;
2607             if (OP(scan) == DIGIT
2608                 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
2609                 sayNO;
2610             nextchr = UCHARAT(++locinput);
2611             break;
2612         case NDIGITLUTF8:
2613             PL_reg_flags |= RF_tainted;
2614             /* FALL THROUGH */
2615         case NDIGITUTF8:
2616             if (!nextchr && locinput >= PL_regeol)
2617                 sayNO;
2618             if (nextchr & 0x80) {
2619                 if (swash_fetch(PL_utf8_digit,(U8*)locinput))
2620                     sayNO;
2621                 locinput += PL_utf8skip[nextchr];
2622                 nextchr = UCHARAT(locinput);
2623                 break;
2624             }
2625             if (isDIGIT(nextchr))
2626                 sayNO;
2627             nextchr = UCHARAT(++locinput);
2628             break;
2629         case ALNUMCL:
2630             PL_reg_flags |= RF_tainted;
2631             /* FALL THROUGH */
2632         case ALNUMC:
2633             if (!nextchr)
2634                 sayNO;
2635             if (!(OP(scan) == ALNUMC
2636                   ? isALNUMC(nextchr) : isALNUMC_LC(nextchr)))
2637                 sayNO;
2638             nextchr = UCHARAT(++locinput);
2639             break;
2640         case ALNUMCLUTF8:
2641             PL_reg_flags |= RF_tainted;
2642             /* FALL THROUGH */
2643         case ALNUMCUTF8:
2644             if (!nextchr)
2645                 sayNO;
2646             if (nextchr & 0x80) {
2647                 if (!(OP(scan) == ALNUMCUTF8
2648                       ? swash_fetch(PL_utf8_alnumc, (U8*)locinput)
2649                       : isALNUMC_LC_utf8((U8*)locinput)))
2650                 {
2651                     sayNO;
2652                 }
2653                 locinput += PL_utf8skip[nextchr];
2654                 nextchr = UCHARAT(locinput);
2655                 break;
2656             }
2657             if (!(OP(scan) == ALNUMCUTF8
2658                   ? isALNUMC(nextchr) : isALNUMC_LC(nextchr)))
2659                 sayNO;
2660             nextchr = UCHARAT(++locinput);
2661             break;
2662         case NALNUMCL:
2663             PL_reg_flags |= RF_tainted;
2664             /* FALL THROUGH */
2665         case NALNUMC:
2666             if (!nextchr)
2667                 sayNO;
2668             if (OP(scan) == ALNUMC
2669                 ? isALNUMC(nextchr) : isALNUMC_LC(nextchr))
2670                 sayNO;
2671             nextchr = UCHARAT(++locinput);
2672             break;
2673         case NALNUMCLUTF8:
2674             PL_reg_flags |= RF_tainted;
2675             /* FALL THROUGH */
2676         case NALNUMCUTF8:
2677             if (!nextchr && locinput >= PL_regeol)
2678                 sayNO;
2679             if (nextchr & 0x80) {
2680                 if (swash_fetch(PL_utf8_alnumc,(U8*)locinput))
2681                     sayNO;
2682                 locinput += PL_utf8skip[nextchr];
2683                 nextchr = UCHARAT(locinput);
2684                 break;
2685             }
2686             if (isALNUMC(nextchr))
2687                 sayNO;
2688             nextchr = UCHARAT(++locinput);
2689             break;
2690         case ALPHAL:
2691             PL_reg_flags |= RF_tainted;
2692             /* FALL THROUGH */
2693         case ALPHA:
2694             if (!nextchr)
2695                 sayNO;
2696             if (!(OP(scan) == ALPHA
2697                   ? isALPHA(nextchr) : isALPHA_LC(nextchr)))
2698                 sayNO;
2699             nextchr = UCHARAT(++locinput);
2700             break;
2701         case ALPHALUTF8:
2702             PL_reg_flags |= RF_tainted;
2703             /* FALL THROUGH */
2704         case ALPHAUTF8:
2705             if (!nextchr)
2706                 sayNO;
2707             if (nextchr & 0x80) {
2708                 if (!(OP(scan) == ALPHAUTF8
2709                       ? swash_fetch(PL_utf8_alpha, (U8*)locinput)
2710                       : isALPHA_LC_utf8((U8*)locinput)))
2711                 {
2712                     sayNO;
2713                 }
2714                 locinput += PL_utf8skip[nextchr];
2715                 nextchr = UCHARAT(locinput);
2716                 break;
2717             }
2718             if (!(OP(scan) == ALPHAUTF8
2719                   ? isALPHA(nextchr) : isALPHA_LC(nextchr)))
2720                 sayNO;
2721             nextchr = UCHARAT(++locinput);
2722             break;
2723         case NALPHAL:
2724             PL_reg_flags |= RF_tainted;
2725             /* FALL THROUGH */
2726         case NALPHA:
2727             if (!nextchr)
2728                 sayNO;
2729             if (OP(scan) == ALPHA
2730                 ? isALPHA(nextchr) : isALPHA_LC(nextchr))
2731                 sayNO;
2732             nextchr = UCHARAT(++locinput);
2733             break;
2734         case NALPHALUTF8:
2735             PL_reg_flags |= RF_tainted;
2736             /* FALL THROUGH */
2737         case NALPHAUTF8:
2738             if (!nextchr && locinput >= PL_regeol)
2739                 sayNO;
2740             if (nextchr & 0x80) {
2741                 if (swash_fetch(PL_utf8_alpha,(U8*)locinput))
2742                     sayNO;
2743                 locinput += PL_utf8skip[nextchr];
2744                 nextchr = UCHARAT(locinput);
2745                 break;
2746             }
2747             if (isALPHA(nextchr))
2748                 sayNO;
2749             nextchr = UCHARAT(++locinput);
2750             break;
2751         case ASCII:
2752             if (!nextchr && locinput >= PL_regeol)
2753                 sayNO;
2754             if (!isASCII(nextchr))
2755                 sayNO;
2756             nextchr = UCHARAT(++locinput);
2757             break;
2758         case NASCII:
2759             if (!nextchr && locinput >= PL_regeol)
2760                 sayNO;
2761             if (isASCII(nextchr))
2762                 sayNO;
2763             nextchr = UCHARAT(++locinput);
2764             break;
2765         case CNTRLL:
2766             PL_reg_flags |= RF_tainted;
2767             /* FALL THROUGH */
2768         case CNTRL:
2769             if (!nextchr)
2770                 sayNO;
2771             if (!(OP(scan) == CNTRL
2772                   ? isCNTRL(nextchr) : isCNTRL_LC(nextchr)))
2773                 sayNO;
2774             nextchr = UCHARAT(++locinput);
2775             break;
2776         case CNTRLLUTF8:
2777             PL_reg_flags |= RF_tainted;
2778             /* FALL THROUGH */
2779         case CNTRLUTF8:
2780             if (!nextchr)
2781                 sayNO;
2782             if (nextchr & 0x80) {
2783                 if (!(OP(scan) == CNTRLUTF8
2784                       ? swash_fetch(PL_utf8_cntrl, (U8*)locinput)
2785                       : isCNTRL_LC_utf8((U8*)locinput)))
2786                 {
2787                     sayNO;
2788                 }
2789                 locinput += PL_utf8skip[nextchr];
2790                 nextchr = UCHARAT(locinput);
2791                 break;
2792             }
2793             if (!(OP(scan) == CNTRLUTF8
2794                   ? isCNTRL(nextchr) : isCNTRL_LC(nextchr)))
2795                 sayNO;
2796             nextchr = UCHARAT(++locinput);
2797             break;
2798         case NCNTRLL:
2799             PL_reg_flags |= RF_tainted;
2800             /* FALL THROUGH */
2801         case NCNTRL:
2802             if (!nextchr)
2803                 sayNO;
2804             if (OP(scan) == CNTRL
2805                 ? isCNTRL(nextchr) : isCNTRL_LC(nextchr))
2806                 sayNO;
2807             nextchr = UCHARAT(++locinput);
2808             break;
2809         case NCNTRLLUTF8:
2810             PL_reg_flags |= RF_tainted;
2811             /* FALL THROUGH */
2812         case NCNTRLUTF8:
2813             if (!nextchr && locinput >= PL_regeol)
2814                 sayNO;
2815             if (nextchr & 0x80) {
2816                 if (swash_fetch(PL_utf8_cntrl,(U8*)locinput))
2817                     sayNO;
2818                 locinput += PL_utf8skip[nextchr];
2819                 nextchr = UCHARAT(locinput);
2820                 break;
2821             }
2822             if (isCNTRL(nextchr))
2823                 sayNO;
2824             nextchr = UCHARAT(++locinput);
2825             break;
2826         case GRAPHL:
2827             PL_reg_flags |= RF_tainted;
2828             /* FALL THROUGH */
2829         case GRAPH:
2830             if (!nextchr)
2831                 sayNO;
2832             if (!(OP(scan) == GRAPH
2833                   ? isGRAPH(nextchr) : isGRAPH_LC(nextchr)))
2834                 sayNO;
2835             nextchr = UCHARAT(++locinput);
2836             break;
2837         case GRAPHLUTF8:
2838             PL_reg_flags |= RF_tainted;
2839             /* FALL THROUGH */
2840         case GRAPHUTF8:
2841             if (!nextchr)
2842                 sayNO;
2843             if (nextchr & 0x80) {
2844                 if (!(OP(scan) == GRAPHUTF8
2845                       ? swash_fetch(PL_utf8_graph, (U8*)locinput)
2846                       : isGRAPH_LC_utf8((U8*)locinput)))
2847                 {
2848                     sayNO;
2849                 }
2850                 locinput += PL_utf8skip[nextchr];
2851                 nextchr = UCHARAT(locinput);
2852                 break;
2853             }
2854             if (!(OP(scan) == GRAPHUTF8
2855                   ? isGRAPH(nextchr) : isGRAPH_LC(nextchr)))
2856                 sayNO;
2857             nextchr = UCHARAT(++locinput);
2858             break;
2859         case NGRAPHL:
2860             PL_reg_flags |= RF_tainted;
2861             /* FALL THROUGH */
2862         case NGRAPH:
2863             if (!nextchr)
2864                 sayNO;
2865             if (OP(scan) == GRAPH
2866                 ? isGRAPH(nextchr) : isGRAPH_LC(nextchr))
2867                 sayNO;
2868             nextchr = UCHARAT(++locinput);
2869             break;
2870         case NGRAPHLUTF8:
2871             PL_reg_flags |= RF_tainted;
2872             /* FALL THROUGH */
2873         case NGRAPHUTF8:
2874             if (!nextchr && locinput >= PL_regeol)
2875                 sayNO;
2876             if (nextchr & 0x80) {
2877                 if (swash_fetch(PL_utf8_graph,(U8*)locinput))
2878                     sayNO;
2879                 locinput += PL_utf8skip[nextchr];
2880                 nextchr = UCHARAT(locinput);
2881                 break;
2882             }
2883             if (isGRAPH(nextchr))
2884                 sayNO;
2885             nextchr = UCHARAT(++locinput);
2886             break;
2887         case LOWERL:
2888             PL_reg_flags |= RF_tainted;
2889             /* FALL THROUGH */
2890         case LOWER:
2891             if (!nextchr)
2892                 sayNO;
2893             if (!(OP(scan) == LOWER
2894                   ? isLOWER(nextchr) : isLOWER_LC(nextchr)))
2895                 sayNO;
2896             nextchr = UCHARAT(++locinput);
2897             break;
2898         case LOWERLUTF8:
2899             PL_reg_flags |= RF_tainted;
2900             /* FALL THROUGH */
2901         case LOWERUTF8:
2902             if (!nextchr)
2903                 sayNO;
2904             if (nextchr & 0x80) {
2905                 if (!(OP(scan) == LOWERUTF8
2906                       ? swash_fetch(PL_utf8_lower, (U8*)locinput)
2907                       : isLOWER_LC_utf8((U8*)locinput)))
2908                 {
2909                     sayNO;
2910                 }
2911                 locinput += PL_utf8skip[nextchr];
2912                 nextchr = UCHARAT(locinput);
2913                 break;
2914             }
2915             if (!(OP(scan) == LOWERUTF8
2916                   ? isLOWER(nextchr) : isLOWER_LC(nextchr)))
2917                 sayNO;
2918             nextchr = UCHARAT(++locinput);
2919             break;
2920         case NLOWERL:
2921             PL_reg_flags |= RF_tainted;
2922             /* FALL THROUGH */
2923         case NLOWER:
2924             if (!nextchr)
2925                 sayNO;
2926             if (OP(scan) == LOWER
2927                 ? isLOWER(nextchr) : isLOWER_LC(nextchr))
2928                 sayNO;
2929             nextchr = UCHARAT(++locinput);
2930             break;
2931         case NLOWERLUTF8:
2932             PL_reg_flags |= RF_tainted;
2933             /* FALL THROUGH */
2934         case NLOWERUTF8:
2935             if (!nextchr && locinput >= PL_regeol)
2936                 sayNO;
2937             if (nextchr & 0x80) {
2938                 if (swash_fetch(PL_utf8_lower,(U8*)locinput))
2939                     sayNO;
2940                 locinput += PL_utf8skip[nextchr];
2941                 nextchr = UCHARAT(locinput);
2942                 break;
2943             }
2944             if (isLOWER(nextchr))
2945                 sayNO;
2946             nextchr = UCHARAT(++locinput);
2947             break;
2948         case PRINTL:
2949             PL_reg_flags |= RF_tainted;
2950             /* FALL THROUGH */
2951         case PRINT:
2952             if (!nextchr)
2953                 sayNO;
2954             if (!(OP(scan) == PRINT
2955                   ? isPRINT(nextchr) : isPRINT_LC(nextchr)))
2956                 sayNO;
2957             nextchr = UCHARAT(++locinput);
2958             break;
2959         case PRINTLUTF8:
2960             PL_reg_flags |= RF_tainted;
2961             /* FALL THROUGH */
2962         case PRINTUTF8:
2963             if (!nextchr)
2964                 sayNO;
2965             if (nextchr & 0x80) {
2966                 if (!(OP(scan) == PRINTUTF8
2967                       ? swash_fetch(PL_utf8_print, (U8*)locinput)
2968                       : isPRINT_LC_utf8((U8*)locinput)))
2969                 {
2970                     sayNO;
2971                 }
2972                 locinput += PL_utf8skip[nextchr];
2973                 nextchr = UCHARAT(locinput);
2974                 break;
2975             }
2976             if (!(OP(scan) == PRINTUTF8
2977                   ? isPRINT(nextchr) : isPRINT_LC(nextchr)))
2978                 sayNO;
2979             nextchr = UCHARAT(++locinput);
2980             break;
2981         case NPRINTL:
2982             PL_reg_flags |= RF_tainted;
2983             /* FALL THROUGH */
2984         case NPRINT:
2985             if (!nextchr)
2986                 sayNO;
2987             if (OP(scan) == PRINT
2988                 ? isPRINT(nextchr) : isPRINT_LC(nextchr))
2989                 sayNO;
2990             nextchr = UCHARAT(++locinput);
2991             break;
2992         case NPRINTLUTF8:
2993             PL_reg_flags |= RF_tainted;
2994             /* FALL THROUGH */
2995         case NPRINTUTF8:
2996             if (!nextchr && locinput >= PL_regeol)
2997                 sayNO;
2998             if (nextchr & 0x80) {
2999                 if (swash_fetch(PL_utf8_print,(U8*)locinput))
3000                     sayNO;
3001                 locinput += PL_utf8skip[nextchr];
3002                 nextchr = UCHARAT(locinput);
3003                 break;
3004             }
3005             if (isPRINT(nextchr))
3006                 sayNO;
3007             nextchr = UCHARAT(++locinput);
3008             break;
3009         case PUNCTL:
3010             PL_reg_flags |= RF_tainted;
3011             /* FALL THROUGH */
3012         case PUNCT:
3013             if (!nextchr)
3014                 sayNO;
3015             if (!(OP(scan) == PUNCT
3016                   ? isPUNCT(nextchr) : isPUNCT_LC(nextchr)))
3017                 sayNO;
3018             nextchr = UCHARAT(++locinput);
3019             break;
3020         case PUNCTLUTF8:
3021             PL_reg_flags |= RF_tainted;
3022             /* FALL THROUGH */
3023         case PUNCTUTF8:
3024             if (!nextchr)
3025                 sayNO;
3026             if (nextchr & 0x80) {
3027                 if (!(OP(scan) == PUNCTUTF8
3028                       ? swash_fetch(PL_utf8_punct, (U8*)locinput)
3029                       : isPUNCT_LC_utf8((U8*)locinput)))
3030                 {
3031                     sayNO;
3032                 }
3033                 locinput += PL_utf8skip[nextchr];
3034                 nextchr = UCHARAT(locinput);
3035                 break;
3036             }
3037             if (!(OP(scan) == PUNCTUTF8
3038                   ? isPUNCT(nextchr) : isPUNCT_LC(nextchr)))
3039                 sayNO;
3040             nextchr = UCHARAT(++locinput);
3041             break;
3042         case NPUNCTL:
3043             PL_reg_flags |= RF_tainted;
3044             /* FALL THROUGH */
3045         case NPUNCT:
3046             if (!nextchr)
3047                 sayNO;
3048             if (OP(scan) == PUNCT
3049                 ? isPUNCT(nextchr) : isPUNCT_LC(nextchr))
3050                 sayNO;
3051             nextchr = UCHARAT(++locinput);
3052             break;
3053         case NPUNCTLUTF8:
3054             PL_reg_flags |= RF_tainted;
3055             /* FALL THROUGH */
3056         case NPUNCTUTF8:
3057             if (!nextchr && locinput >= PL_regeol)
3058                 sayNO;
3059             if (nextchr & 0x80) {
3060                 if (swash_fetch(PL_utf8_punct,(U8*)locinput))
3061                     sayNO;
3062                 locinput += PL_utf8skip[nextchr];
3063                 nextchr = UCHARAT(locinput);
3064                 break;
3065             }
3066             if (isPUNCT(nextchr))
3067                 sayNO;
3068             nextchr = UCHARAT(++locinput);
3069             break;
3070         case UPPERL:
3071             PL_reg_flags |= RF_tainted;
3072             /* FALL THROUGH */
3073         case UPPER:
3074             if (!nextchr)
3075                 sayNO;
3076             if (!(OP(scan) == UPPER
3077                   ? isUPPER(nextchr) : isUPPER_LC(nextchr)))
3078                 sayNO;
3079             nextchr = UCHARAT(++locinput);
3080             break;
3081         case UPPERLUTF8:
3082             PL_reg_flags |= RF_tainted;
3083             /* FALL THROUGH */
3084         case UPPERUTF8:
3085             if (!nextchr)
3086                 sayNO;
3087             if (nextchr & 0x80) {
3088                 if (!(OP(scan) == UPPERUTF8
3089                       ? swash_fetch(PL_utf8_upper, (U8*)locinput)
3090                       : isUPPER_LC_utf8((U8*)locinput)))
3091                 {
3092                     sayNO;
3093                 }
3094                 locinput += PL_utf8skip[nextchr];
3095                 nextchr = UCHARAT(locinput);
3096                 break;
3097             }
3098             if (!(OP(scan) == UPPERUTF8
3099                   ? isUPPER(nextchr) : isUPPER_LC(nextchr)))
3100                 sayNO;
3101             nextchr = UCHARAT(++locinput);
3102             break;
3103         case NUPPERL:
3104             PL_reg_flags |= RF_tainted;
3105             /* FALL THROUGH */
3106         case NUPPER:
3107             if (!nextchr)
3108                 sayNO;
3109             if (OP(scan) == UPPER
3110                 ? isUPPER(nextchr) : isUPPER_LC(nextchr))
3111                 sayNO;
3112             nextchr = UCHARAT(++locinput);
3113             break;
3114         case NUPPERLUTF8:
3115             PL_reg_flags |= RF_tainted;
3116             /* FALL THROUGH */
3117         case NUPPERUTF8:
3118             if (!nextchr && locinput >= PL_regeol)
3119                 sayNO;
3120             if (nextchr & 0x80) {
3121                 if (swash_fetch(PL_utf8_upper,(U8*)locinput))
3122                     sayNO;
3123                 locinput += PL_utf8skip[nextchr];
3124                 nextchr = UCHARAT(locinput);
3125                 break;
3126             }
3127             if (isUPPER(nextchr))
3128                 sayNO;
3129             nextchr = UCHARAT(++locinput);
3130             break;
3131         case XDIGIT:
3132             if (!nextchr && locinput >= PL_regeol)
3133                 sayNO;
3134             if (!isXDIGIT(nextchr))
3135                 sayNO;
3136             nextchr = UCHARAT(++locinput);
3137             break;
3138         case NXDIGIT:
3139             if (!nextchr && locinput >= PL_regeol)
3140                 sayNO;
3141             if (isXDIGIT(nextchr))
3142                 sayNO;
3143             nextchr = UCHARAT(++locinput);
3144             break;
3145         case CLUMP:
3146             if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
3147                 sayNO;
3148             locinput += PL_utf8skip[nextchr];
3149             while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
3150                 locinput += UTF8SKIP(locinput);
3151             if (locinput > PL_regeol)
3152                 sayNO;
3153             nextchr = UCHARAT(locinput);
3154             break;
3155         case REFFL:
3156             PL_reg_flags |= RF_tainted;
3157             /* FALL THROUGH */
3158         case REF:
3159         case REFF:
3160             n = ARG(scan);  /* which paren pair */
3161             ln = PL_regstartp[n];
3162             if (*PL_reglastparen < n || ln == -1)
3163                 sayNO;                  /* Do not match unless seen CLOSEn. */
3164             if (ln == PL_regendp[n])
3165                 break;
3166
3167             s = PL_bostr + ln;
3168             if (UTF && OP(scan) != REF) {       /* REF can do byte comparison */
3169                 char *l = locinput;
3170                 char *e = PL_bostr + PL_regendp[n];
3171                 /*
3172                  * Note that we can't do the "other character" lookup trick as
3173                  * in the 8-bit case (no pun intended) because in Unicode we
3174                  * have to map both upper and title case to lower case.
3175                  */
3176                 if (OP(scan) == REFF) {
3177                     while (s < e) {
3178                         if (l >= PL_regeol)
3179                             sayNO;
3180                         if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
3181                             sayNO;
3182                         s += UTF8SKIP(s);
3183                         l += UTF8SKIP(l);
3184                     }
3185                 }
3186                 else {
3187                     while (s < e) {
3188                         if (l >= PL_regeol)
3189                             sayNO;
3190                         if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
3191                             sayNO;
3192                         s += UTF8SKIP(s);
3193                         l += UTF8SKIP(l);
3194                     }
3195                 }
3196                 locinput = l;
3197                 nextchr = UCHARAT(locinput);
3198                 break;
3199             }
3200
3201             /* Inline the first character, for speed. */
3202             if (UCHARAT(s) != nextchr &&
3203                 (OP(scan) == REF ||
3204                  (UCHARAT(s) != ((OP(scan) == REFF
3205                                   ? PL_fold : PL_fold_locale)[nextchr]))))
3206                 sayNO;
3207             ln = PL_regendp[n] - ln;
3208             if (locinput + ln > PL_regeol)
3209                 sayNO;
3210             if (ln > 1 && (OP(scan) == REF
3211                            ? memNE(s, locinput, ln)
3212                            : (OP(scan) == REFF
3213                               ? ibcmp(s, locinput, ln)
3214                               : ibcmp_locale(s, locinput, ln))))
3215                 sayNO;
3216             locinput += ln;
3217             nextchr = UCHARAT(locinput);
3218             break;
3219
3220         case NOTHING:
3221         case TAIL:
3222             break;
3223         case BACK:
3224             break;
3225         case EVAL:
3226         {
3227             dSP;
3228             OP_4tree *oop = PL_op;
3229             COP *ocurcop = PL_curcop;
3230             SV **ocurpad = PL_curpad;
3231             SV *ret;
3232             
3233             n = ARG(scan);
3234             PL_op = (OP_4tree*)PL_regdata->data[n];
3235             DEBUG_r( PerlIO_printf(Perl_debug_log, "  re_eval 0x%x\n", PL_op) );
3236             PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
3237             PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
3238
3239             CALLRUNOPS(aTHX);                   /* Scalar context. */
3240             SPAGAIN;
3241             ret = POPs;
3242             PUTBACK;
3243             
3244             PL_op = oop;
3245             PL_curpad = ocurpad;
3246             PL_curcop = ocurcop;
3247             if (logical) {
3248                 if (logical == 2) {     /* Postponed subexpression. */
3249                     regexp *re;
3250                     MAGIC *mg = Null(MAGIC*);
3251                     re_cc_state state;
3252                     CURCUR cctmp;
3253                     CHECKPOINT cp, lastcp;
3254
3255                     if(SvROK(ret) || SvRMAGICAL(ret)) {
3256                         SV *sv = SvROK(ret) ? SvRV(ret) : ret;
3257
3258                         if(SvMAGICAL(sv))
3259                             mg = mg_find(sv, 'r');
3260                     }
3261                     if (mg) {
3262                         re = (regexp *)mg->mg_obj;
3263                         (void)ReREFCNT_inc(re);
3264                     }
3265                     else {
3266                         STRLEN len;
3267                         char *t = SvPV(ret, len);
3268                         PMOP pm;
3269                         char *oprecomp = PL_regprecomp;
3270                         I32 osize = PL_regsize;
3271                         I32 onpar = PL_regnpar;
3272
3273                         pm.op_pmflags = 0;
3274                         re = CALLREGCOMP(aTHX_ t, t + len, &pm);
3275                         if (!(SvFLAGS(ret) 
3276                               & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
3277                             sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
3278                         PL_regprecomp = oprecomp;
3279                         PL_regsize = osize;
3280                         PL_regnpar = onpar;
3281                     }
3282                     DEBUG_r(
3283                         PerlIO_printf(Perl_debug_log, 
3284                                       "Entering embedded `%s%.60s%s%s'\n",
3285                                       PL_colors[0],
3286                                       re->precomp,
3287                                       PL_colors[1],
3288                                       (strlen(re->precomp) > 60 ? "..." : ""))
3289                         );
3290                     state.node = next;
3291                     state.prev = PL_reg_call_cc;
3292                     state.cc = PL_regcc;
3293                     state.re = PL_reg_re;
3294
3295                     cctmp.cur = 0;
3296                     cctmp.oldcc = 0;
3297                     PL_regcc = &cctmp;
3298                     
3299                     cp = regcppush(0);  /* Save *all* the positions. */
3300                     REGCP_SET;
3301                     cache_re(re);
3302                     state.ss = PL_savestack_ix;
3303                     *PL_reglastparen = 0;
3304                     PL_reg_call_cc = &state;
3305                     PL_reginput = locinput;
3306                     if (regmatch(re->program + 1)) {
3307                         ReREFCNT_dec(re);
3308                         regcpblow(cp);
3309                         sayYES;
3310                     }
3311                     DEBUG_r(
3312                         PerlIO_printf(Perl_debug_log,
3313                                       "%*s  failed...\n",
3314                                       REPORT_CODE_OFF+PL_regindent*2, "")
3315                         );
3316                     ReREFCNT_dec(re);
3317                     REGCP_UNWIND;
3318                     regcppop();
3319                     PL_reg_call_cc = state.prev;
3320                     PL_regcc = state.cc;
3321                     PL_reg_re = state.re;
3322                     cache_re(PL_reg_re);
3323                     sayNO;
3324                 }
3325                 sw = SvTRUE(ret);
3326                 logical = 0;
3327             }
3328             else
3329                 sv_setsv(save_scalar(PL_replgv), ret);
3330             break;
3331         }
3332         case OPEN:
3333             n = ARG(scan);  /* which paren pair */
3334             PL_reg_start_tmp[n] = locinput;
3335             if (n > PL_regsize)
3336                 PL_regsize = n;
3337             break;
3338         case CLOSE:
3339             n = ARG(scan);  /* which paren pair */
3340             PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
3341             PL_regendp[n] = locinput - PL_bostr;
3342             if (n > *PL_reglastparen)
3343                 *PL_reglastparen = n;
3344             break;
3345         case GROUPP:
3346             n = ARG(scan);  /* which paren pair */
3347             sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
3348             break;
3349         case IFTHEN:
3350             if (sw)
3351                 next = NEXTOPER(NEXTOPER(scan));
3352             else {
3353                 next = scan + ARG(scan);
3354                 if (OP(next) == IFTHEN) /* Fake one. */
3355                     next = NEXTOPER(NEXTOPER(next));
3356             }
3357             break;
3358         case LOGICAL:
3359             logical = scan->flags;
3360             break;
3361         case CURLYX: {
3362                 CURCUR cc;
3363                 CHECKPOINT cp = PL_savestack_ix;
3364
3365                 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
3366                     next += ARG(next);
3367                 cc.oldcc = PL_regcc;
3368                 PL_regcc = &cc;
3369                 cc.parenfloor = *PL_reglastparen;
3370                 cc.cur = -1;
3371                 cc.min = ARG1(scan);
3372                 cc.max  = ARG2(scan);
3373                 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
3374                 cc.next = next;
3375                 cc.minmod = minmod;
3376                 cc.lastloc = 0;
3377                 PL_reginput = locinput;
3378                 n = regmatch(PREVOPER(next));   /* start on the WHILEM */
3379                 regcpblow(cp);
3380                 PL_regcc = cc.oldcc;
3381                 saySAME(n);
3382             }
3383             /* NOT REACHED */
3384         case WHILEM: {
3385                 /*
3386                  * This is really hard to understand, because after we match
3387                  * what we're trying to match, we must make sure the rest of
3388                  * the RE is going to match for sure, and to do that we have
3389                  * to go back UP the parse tree by recursing ever deeper.  And
3390                  * if it fails, we have to reset our parent's current state
3391                  * that we can try again after backing off.
3392                  */
3393
3394                 CHECKPOINT cp, lastcp;
3395                 CURCUR* cc = PL_regcc;
3396                 char *lastloc = cc->lastloc; /* Detection of 0-len. */
3397                 
3398                 n = cc->cur + 1;        /* how many we know we matched */
3399                 PL_reginput = locinput;
3400
3401                 DEBUG_r(
3402                     PerlIO_printf(Perl_debug_log, 
3403                                   "%*s  %ld out of %ld..%ld  cc=%lx\n", 
3404                                   REPORT_CODE_OFF+PL_regindent*2, "",
3405                                   (long)n, (long)cc->min, 
3406                                   (long)cc->max, (long)cc)
3407                     );
3408
3409                 /* If degenerate scan matches "", assume scan done. */
3410
3411                 if (locinput == cc->lastloc && n >= cc->min) {
3412                     PL_regcc = cc->oldcc;
3413                     ln = PL_regcc->cur;
3414                     DEBUG_r(
3415                         PerlIO_printf(Perl_debug_log,
3416                            "%*s  empty match detected, try continuation...\n",
3417                            REPORT_CODE_OFF+PL_regindent*2, "")
3418                         );
3419                     if (regmatch(cc->next))
3420                         sayYES;
3421                     DEBUG_r(
3422                         PerlIO_printf(Perl_debug_log,
3423                                       "%*s  failed...\n",
3424                                       REPORT_CODE_OFF+PL_regindent*2, "")
3425                         );
3426                     PL_regcc->cur = ln;
3427                     PL_regcc = cc;
3428                     sayNO;
3429                 }
3430
3431                 /* First just match a string of min scans. */
3432
3433                 if (n < cc->min) {
3434                     cc->cur = n;
3435                     cc->lastloc = locinput;
3436                     if (regmatch(cc->scan))
3437                         sayYES;
3438                     cc->cur = n - 1;
3439                     cc->lastloc = lastloc;
3440                     DEBUG_r(
3441                         PerlIO_printf(Perl_debug_log,
3442                                       "%*s  failed...\n",
3443                                       REPORT_CODE_OFF+PL_regindent*2, "")
3444                         );
3445                     sayNO;
3446                 }
3447
3448                 /* Prefer next over scan for minimal matching. */
3449
3450                 if (cc->minmod) {
3451                     PL_regcc = cc->oldcc;
3452                     ln = PL_regcc->cur;
3453                     cp = regcppush(cc->parenfloor);
3454                     REGCP_SET;
3455                     if (regmatch(cc->next)) {
3456                         regcpblow(cp);
3457                         sayYES; /* All done. */
3458                     }
3459                     REGCP_UNWIND;
3460                     regcppop();
3461                     PL_regcc->cur = ln;
3462                     PL_regcc = cc;
3463
3464                     if (n >= cc->max) { /* Maximum greed exceeded? */
3465                         if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY 
3466                             && !(PL_reg_flags & RF_warned)) {
3467                             PL_reg_flags |= RF_warned;
3468                             Perl_warner(aTHX_ WARN_UNSAFE, "%s limit (%d) exceeded",
3469                                  "Complex regular subexpression recursion",
3470                                  REG_INFTY - 1);
3471                         }
3472                         sayNO;
3473                     }
3474
3475                     DEBUG_r(
3476                         PerlIO_printf(Perl_debug_log,
3477                                       "%*s  trying longer...\n",
3478                                       REPORT_CODE_OFF+PL_regindent*2, "")
3479                         );
3480                     /* Try scanning more and see if it helps. */
3481                     PL_reginput = locinput;
3482                     cc->cur = n;
3483                     cc->lastloc = locinput;
3484                     cp = regcppush(cc->parenfloor);
3485                     REGCP_SET;
3486                     if (regmatch(cc->scan)) {
3487                         regcpblow(cp);
3488                         sayYES;
3489                     }
3490                     DEBUG_r(
3491                         PerlIO_printf(Perl_debug_log,
3492                                       "%*s  failed...\n",
3493                                       REPORT_CODE_OFF+PL_regindent*2, "")
3494                         );
3495                     REGCP_UNWIND;
3496                     regcppop();
3497                     cc->cur = n - 1;
3498                     cc->lastloc = lastloc;
3499                     sayNO;
3500                 }
3501
3502                 /* Prefer scan over next for maximal matching. */
3503
3504                 if (n < cc->max) {      /* More greed allowed? */
3505                     cp = regcppush(cc->parenfloor);
3506                     cc->cur = n;
3507                     cc->lastloc = locinput;
3508                     REGCP_SET;
3509                     if (regmatch(cc->scan)) {
3510                         regcpblow(cp);
3511                         sayYES;
3512                     }
3513                     REGCP_UNWIND;
3514                     regcppop();         /* Restore some previous $<digit>s? */
3515                     PL_reginput = locinput;
3516                     DEBUG_r(
3517                         PerlIO_printf(Perl_debug_log,
3518                                       "%*s  failed, try continuation...\n",
3519                                       REPORT_CODE_OFF+PL_regindent*2, "")
3520                         );
3521                 }
3522                 if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY 
3523                         && !(PL_reg_flags & RF_warned)) {
3524                     PL_reg_flags |= RF_warned;
3525                     Perl_warner(aTHX_ WARN_UNSAFE, "%s limit (%d) exceeded",
3526                          "Complex regular subexpression recursion",
3527                          REG_INFTY - 1);
3528                 }
3529
3530                 /* Failed deeper matches of scan, so see if this one works. */
3531                 PL_regcc = cc->oldcc;
3532                 ln = PL_regcc->cur;
3533                 if (regmatch(cc->next))
3534                     sayYES;
3535                 DEBUG_r(
3536                     PerlIO_printf(Perl_debug_log, "%*s  failed...\n",
3537                                   REPORT_CODE_OFF+PL_regindent*2, "")
3538                     );
3539                 PL_regcc->cur = ln;
3540                 PL_regcc = cc;
3541                 cc->cur = n - 1;
3542                 cc->lastloc = lastloc;
3543                 sayNO;
3544             }
3545             /* NOT REACHED */
3546         case BRANCHJ: 
3547             next = scan + ARG(scan);
3548             if (next == scan)
3549                 next = NULL;
3550             inner = NEXTOPER(NEXTOPER(scan));
3551             goto do_branch;
3552         case BRANCH: 
3553             inner = NEXTOPER(scan);
3554           do_branch:
3555             {
3556                 CHECKPOINT lastcp;
3557                 c1 = OP(scan);
3558                 if (OP(next) != c1)     /* No choice. */
3559                     next = inner;       /* Avoid recursion. */
3560                 else {
3561                     int lastparen = *PL_reglastparen;
3562
3563                     REGCP_SET;
3564                     do {
3565                         PL_reginput = locinput;
3566                         if (regmatch(inner))
3567                             sayYES;
3568                         REGCP_UNWIND;
3569                         for (n = *PL_reglastparen; n > lastparen; n--)
3570                             PL_regendp[n] = -1;
3571                         *PL_reglastparen = n;
3572                         scan = next;
3573                         /*SUPPRESS 560*/
3574                         if (n = (c1 == BRANCH ? NEXT_OFF(next) : ARG(next)))
3575                             next += n;
3576                         else
3577                             next = NULL;
3578                         inner = NEXTOPER(scan);
3579                         if (c1 == BRANCHJ) {
3580                             inner = NEXTOPER(inner);
3581                         }
3582                     } while (scan != NULL && OP(scan) == c1);
3583                     sayNO;
3584                     /* NOTREACHED */
3585                 }
3586             }
3587             break;
3588         case MINMOD:
3589             minmod = 1;
3590             break;
3591         case CURLYM:
3592         {
3593             I32 l = 0;
3594             CHECKPOINT lastcp;
3595             
3596             /* We suppose that the next guy does not need
3597                backtracking: in particular, it is of constant length,
3598                and has no parenths to influence future backrefs. */
3599             ln = ARG1(scan);  /* min to match */
3600             n  = ARG2(scan);  /* max to match */
3601             paren = scan->flags;
3602             if (paren) {
3603                 if (paren > PL_regsize)
3604                     PL_regsize = paren;
3605                 if (paren > *PL_reglastparen)
3606                     *PL_reglastparen = paren;
3607             }
3608             scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
3609             if (paren)
3610                 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3611             PL_reginput = locinput;
3612             if (minmod) {
3613                 minmod = 0;
3614                 if (ln && regrepeat_hard(scan, ln, &l) < ln)
3615                     sayNO;
3616                 if (ln && l == 0 && n >= ln
3617                     /* In fact, this is tricky.  If paren, then the
3618                        fact that we did/didnot match may influence
3619                        future execution. */
3620                     && !(paren && ln == 0))
3621                     ln = n;
3622                 locinput = PL_reginput;
3623                 if (PL_regkind[(U8)OP(next)] == EXACT) {
3624                     c1 = UCHARAT(OPERAND(next) + 1);
3625                     if (OP(next) == EXACTF)
3626                         c2 = PL_fold[c1];
3627                     else if (OP(next) == EXACTFL)
3628                         c2 = PL_fold_locale[c1];
3629                     else
3630                         c2 = c1;
3631                 }
3632                 else
3633                     c1 = c2 = -1000;
3634                 REGCP_SET;
3635                 /* This may be improved if l == 0.  */
3636                 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
3637                     /* If it could work, try it. */
3638                     if (c1 == -1000 ||
3639                         UCHARAT(PL_reginput) == c1 ||
3640                         UCHARAT(PL_reginput) == c2)
3641                     {
3642                         if (paren) {
3643                             if (n) {
3644                                 PL_regstartp[paren] =
3645                                     HOPc(PL_reginput, -l) - PL_bostr;
3646                                 PL_regendp[paren] = PL_reginput - PL_bostr;
3647                             }
3648                             else
3649                                 PL_regendp[paren] = -1;
3650                         }
3651                         if (regmatch(next))
3652                             sayYES;
3653                         REGCP_UNWIND;
3654                     }
3655                     /* Couldn't or didn't -- move forward. */
3656                     PL_reginput = locinput;
3657                     if (regrepeat_hard(scan, 1, &l)) {
3658                         ln++;
3659                         locinput = PL_reginput;
3660                     }
3661                     else
3662                         sayNO;
3663                 }
3664             }
3665             else {
3666                 n = regrepeat_hard(scan, n, &l);
3667                 if (n != 0 && l == 0
3668                     /* In fact, this is tricky.  If paren, then the
3669                        fact that we did/didnot match may influence
3670                        future execution. */
3671                     && !(paren && ln == 0))
3672                     ln = n;
3673                 locinput = PL_reginput;
3674                 DEBUG_r(
3675                     PerlIO_printf(Perl_debug_log,
3676                                   "%*s  matched %ld times, len=%ld...\n",
3677                                   REPORT_CODE_OFF+PL_regindent*2, "", n, l)
3678                     );
3679                 if (n >= ln) {
3680                     if (PL_regkind[(U8)OP(next)] == EXACT) {
3681                         c1 = UCHARAT(OPERAND(next) + 1);
3682                         if (OP(next) == EXACTF)
3683                             c2 = PL_fold[c1];
3684                         else if (OP(next) == EXACTFL)
3685                             c2 = PL_fold_locale[c1];
3686                         else
3687                             c2 = c1;
3688                     }
3689                     else
3690                         c1 = c2 = -1000;
3691                 }
3692                 REGCP_SET;
3693                 while (n >= ln) {
3694                     /* If it could work, try it. */
3695                     if (c1 == -1000 ||
3696                         UCHARAT(PL_reginput) == c1 ||
3697                         UCHARAT(PL_reginput) == c2)
3698                     {
3699                         DEBUG_r(
3700                                 PerlIO_printf(Perl_debug_log,
3701                                               "%*s  trying tail with n=%ld...\n",
3702                                               REPORT_CODE_OFF+PL_regindent*2, "", n)
3703                             );
3704                         if (paren) {
3705                             if (n) {
3706                                 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3707                                 PL_regendp[paren] = PL_reginput - PL_bostr;
3708                             }
3709                             else
3710                                 PL_regendp[paren] = -1;
3711                         }
3712                         if (regmatch(next))
3713                             sayYES;
3714                         REGCP_UNWIND;
3715                     }
3716                     /* Couldn't or didn't -- back up. */
3717                     n--;
3718                     locinput = HOPc(locinput, -l);
3719                     PL_reginput = locinput;
3720                 }
3721             }
3722             sayNO;
3723             break;
3724         }
3725         case CURLYN:
3726             paren = scan->flags;        /* Which paren to set */
3727             if (paren > PL_regsize)
3728                 PL_regsize = paren;
3729             if (paren > *PL_reglastparen)
3730                 *PL_reglastparen = paren;
3731             ln = ARG1(scan);  /* min to match */
3732             n  = ARG2(scan);  /* max to match */
3733             scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
3734             goto repeat;
3735         case CURLY:
3736             paren = 0;
3737             ln = ARG1(scan);  /* min to match */
3738             n  = ARG2(scan);  /* max to match */
3739             scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
3740             goto repeat;
3741         case STAR:
3742             ln = 0;
3743             n = REG_INFTY;
3744             scan = NEXTOPER(scan);
3745             paren = 0;
3746             goto repeat;
3747         case PLUS:
3748             ln = 1;
3749             n = REG_INFTY;
3750             scan = NEXTOPER(scan);
3751             paren = 0;
3752           repeat:
3753             /*
3754             * Lookahead to avoid useless match attempts
3755             * when we know what character comes next.
3756             */
3757             if (PL_regkind[(U8)OP(next)] == EXACT) {
3758                 c1 = UCHARAT(OPERAND(next) + 1);
3759                 if (OP(next) == EXACTF)
3760                     c2 = PL_fold[c1];
3761                 else if (OP(next) == EXACTFL)
3762                     c2 = PL_fold_locale[c1];
3763                 else
3764                     c2 = c1;
3765             }
3766             else
3767                 c1 = c2 = -1000;
3768             PL_reginput = locinput;
3769             if (minmod) {
3770                 CHECKPOINT lastcp;
3771                 minmod = 0;
3772                 if (ln && regrepeat(scan, ln) < ln)
3773                     sayNO;
3774                 locinput = PL_reginput;
3775                 REGCP_SET;
3776                 if (c1 != -1000) {
3777                     char *e = locinput + n - ln; /* Should not check after this */
3778                     char *old = locinput;
3779
3780                     if (e >= PL_regeol || (n == REG_INFTY))
3781                         e = PL_regeol - 1;
3782                     while (1) {
3783                         /* Find place 'next' could work */
3784                         if (c1 == c2) {
3785                             while (locinput <= e && *locinput != c1)
3786                                 locinput++;
3787                         } else {
3788                             while (locinput <= e 
3789                                    && *locinput != c1
3790                                    && *locinput != c2)
3791                                 locinput++;                         
3792                         }
3793                         if (locinput > e) 
3794                             sayNO;
3795                         /* PL_reginput == old now */
3796                         if (locinput != old) {
3797                             ln = 1;     /* Did some */
3798                             if (regrepeat(scan, locinput - old) <
3799                                  locinput - old)
3800                                 sayNO;
3801                         }
3802                         /* PL_reginput == locinput now */
3803                         if (paren) {
3804                             if (ln) {
3805                                 PL_regstartp[paren] = HOPc(locinput, -1) - PL_bostr;
3806                                 PL_regendp[paren] = locinput - PL_bostr;
3807                             }
3808                             else
3809                                 PL_regendp[paren] = -1;
3810                         }
3811                         if (regmatch(next))
3812                             sayYES;
3813                         PL_reginput = locinput; /* Could be reset... */
3814                         REGCP_UNWIND;
3815                         /* Couldn't or didn't -- move forward. */
3816                         old = locinput++;
3817                     }
3818                 }
3819                 else
3820                 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
3821                     /* If it could work, try it. */
3822                     if (c1 == -1000 ||
3823                         UCHARAT(PL_reginput) == c1 ||
3824                         UCHARAT(PL_reginput) == c2)
3825                     {
3826                         if (paren) {
3827                             if (n) {
3828                                 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
3829                                 PL_regendp[paren] = PL_reginput - PL_bostr;
3830                             }
3831                             else
3832                                 PL_regendp[paren] = -1;
3833                         }
3834                         if (regmatch(next))
3835                             sayYES;
3836                         REGCP_UNWIND;
3837                     }
3838                     /* Couldn't or didn't -- move forward. */
3839                     PL_reginput = locinput;
3840                     if (regrepeat(scan, 1)) {
3841                         ln++;
3842                         locinput = PL_reginput;
3843                     }
3844                     else
3845                         sayNO;
3846                 }
3847             }
3848             else {
3849                 CHECKPOINT lastcp;
3850                 n = regrepeat(scan, n);
3851                 locinput = PL_reginput;
3852                 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
3853                     (!PL_multiline  || OP(next) == SEOL))
3854                     ln = n;                     /* why back off? */
3855                 REGCP_SET;
3856                 if (paren) {
3857                     while (n >= ln) {
3858                         /* If it could work, try it. */
3859                         if (c1 == -1000 ||
3860                             UCHARAT(PL_reginput) == c1 ||
3861                             UCHARAT(PL_reginput) == c2)
3862                             {
3863                                 if (paren && n) {
3864                                     if (n) {
3865                                         PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
3866                                         PL_regendp[paren] = PL_reginput - PL_bostr;
3867                                     }
3868                                     else
3869                                         PL_regendp[paren] = -1;
3870                                 }
3871                                 if (regmatch(next))
3872                                     sayYES;
3873                                 REGCP_UNWIND;
3874                             }
3875                         /* Couldn't or didn't -- back up. */
3876                         n--;
3877                         PL_reginput = locinput = HOPc(locinput, -1);
3878                     }
3879                 }
3880                 else {
3881                     while (n >= ln) {
3882                         /* If it could work, try it. */
3883                         if (c1 == -1000 ||
3884                             UCHARAT(PL_reginput) == c1 ||
3885                             UCHARAT(PL_reginput) == c2)
3886                             {
3887                                 if (regmatch(next))
3888                                     sayYES;
3889                                 REGCP_UNWIND;
3890                             }
3891                         /* Couldn't or didn't -- back up. */
3892                         n--;
3893                         PL_reginput = locinput = HOPc(locinput, -1);
3894                     }
3895                 }
3896             }
3897             sayNO;
3898             break;
3899         case END:
3900             if (PL_reg_call_cc) {
3901                 re_cc_state *cur_call_cc = PL_reg_call_cc;
3902                 CURCUR *cctmp = PL_regcc;
3903                 regexp *re = PL_reg_re;
3904                 CHECKPOINT cp, lastcp;
3905                 
3906                 cp = regcppush(0);      /* Save *all* the positions. */
3907                 REGCP_SET;
3908                 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3909                                                     the caller. */
3910                 PL_reginput = locinput; /* Make position available to
3911                                            the callcc. */
3912                 cache_re(PL_reg_call_cc->re);
3913                 PL_regcc = PL_reg_call_cc->cc;
3914                 PL_reg_call_cc = PL_reg_call_cc->prev;
3915                 if (regmatch(cur_call_cc->node)) {
3916                     PL_reg_call_cc = cur_call_cc;
3917                     regcpblow(cp);
3918                     sayYES;
3919                 }
3920                 REGCP_UNWIND;
3921                 regcppop();
3922                 PL_reg_call_cc = cur_call_cc;
3923                 PL_regcc = cctmp;
3924                 PL_reg_re = re;
3925                 cache_re(re);
3926
3927                 DEBUG_r(
3928                     PerlIO_printf(Perl_debug_log,
3929                                   "%*s  continuation failed...\n",
3930                                   REPORT_CODE_OFF+PL_regindent*2, "")
3931                     );
3932                 sayNO;
3933             }
3934             if (locinput < PL_regtill)
3935                 sayNO;                  /* Cannot match: too short. */
3936             /* Fall through */
3937         case SUCCEED:
3938             PL_reginput = locinput;     /* put where regtry can find it */
3939             sayYES;                     /* Success! */
3940         case SUSPEND:
3941             n = 1;
3942             PL_reginput = locinput;
3943             goto do_ifmatch;        
3944         case UNLESSM:
3945             n = 0;
3946             if (scan->flags) {
3947                 if (UTF) {              /* XXXX This is absolutely
3948                                            broken, we read before
3949                                            start of string. */
3950                     s = HOPMAYBEc(locinput, -scan->flags);
3951                     if (!s)
3952                         goto say_yes;
3953                     PL_reginput = s;
3954                 }
3955                 else {
3956                     if (locinput < PL_bostr + scan->flags) 
3957                         goto say_yes;
3958                     PL_reginput = locinput - scan->flags;
3959                     goto do_ifmatch;
3960                 }
3961             }
3962             else
3963                 PL_reginput = locinput;
3964             goto do_ifmatch;
3965         case IFMATCH:
3966             n = 1;
3967             if (scan->flags) {
3968                 if (UTF) {              /* XXXX This is absolutely
3969                                            broken, we read before
3970                                            start of string. */
3971                     s = HOPMAYBEc(locinput, -scan->flags);
3972                     if (!s || s < PL_bostr)
3973                         goto say_no;
3974                     PL_reginput = s;
3975                 }
3976                 else {
3977                     if (locinput < PL_bostr + scan->flags) 
3978                         goto say_no;
3979                     PL_reginput = locinput - scan->flags;
3980                     goto do_ifmatch;
3981                 }
3982             }
3983             else
3984                 PL_reginput = locinput;
3985
3986           do_ifmatch:
3987             inner = NEXTOPER(NEXTOPER(scan));
3988             if (regmatch(inner) != n) {
3989               say_no:
3990                 if (logical) {
3991                     logical = 0;
3992                     sw = 0;
3993                     goto do_longjump;
3994                 }
3995                 else
3996                     sayNO;
3997             }
3998           say_yes:
3999             if (logical) {
4000                 logical = 0;
4001                 sw = 1;
4002             }
4003             if (OP(scan) == SUSPEND) {
4004                 locinput = PL_reginput;
4005                 nextchr = UCHARAT(locinput);
4006             }
4007             /* FALL THROUGH. */
4008         case LONGJMP:
4009           do_longjump:
4010             next = scan + ARG(scan);
4011             if (next == scan)
4012                 next = NULL;
4013             break;
4014         default:
4015             PerlIO_printf(PerlIO_stderr(), "%lx %d\n",
4016                           (unsigned long)scan, OP(scan));
4017             Perl_croak(aTHX_ "regexp memory corruption");
4018         }
4019         scan = next;
4020     }
4021
4022     /*
4023     * We get here only if there's trouble -- normally "case END" is
4024     * the terminating point.
4025     */
4026     Perl_croak(aTHX_ "corrupted regexp pointers");
4027     /*NOTREACHED*/
4028     sayNO;
4029
4030 yes:
4031 #ifdef DEBUGGING
4032     PL_regindent--;
4033 #endif
4034     return 1;
4035
4036 no:
4037 #ifdef DEBUGGING
4038     PL_regindent--;
4039 #endif
4040     return 0;
4041 }
4042
4043 /*
4044  - regrepeat - repeatedly match something simple, report how many
4045  */
4046 /*
4047  * [This routine now assumes that it will only match on things of length 1.
4048  * That was true before, but now we assume scan - reginput is the count,
4049  * rather than incrementing count on every character.  [Er, except utf8.]]
4050  */
4051 STATIC I32
4052 S_regrepeat(pTHX_ regnode *p, I32 max)
4053 {
4054     dTHR;
4055     register char *scan;
4056     register char *opnd;
4057     register I32 c;
4058     register char *loceol = PL_regeol;
4059     register I32 hardcount = 0;
4060
4061     scan = PL_reginput;
4062     if (max != REG_INFTY && max < loceol - scan)
4063       loceol = scan + max;
4064     opnd = (char *) OPERAND(p);
4065     switch (OP(p)) {
4066     case REG_ANY:
4067         while (scan < loceol && *scan != '\n')
4068             scan++;
4069         break;
4070     case SANY:
4071         scan = loceol;
4072         break;
4073     case ANYUTF8:
4074         loceol = PL_regeol;
4075         while (scan < loceol && *scan != '\n') {
4076             scan += UTF8SKIP(scan);
4077             hardcount++;
4078         }
4079         break;
4080     case SANYUTF8:
4081         loceol = PL_regeol;
4082         while (scan < loceol) {
4083             scan += UTF8SKIP(scan);
4084             hardcount++;
4085         }
4086         break;
4087     case EXACT:         /* length of string is 1 */
4088         c = UCHARAT(++opnd);
4089         while (scan < loceol && UCHARAT(scan) == c)
4090             scan++;
4091         break;
4092     case EXACTF:        /* length of string is 1 */
4093         c = UCHARAT(++opnd);
4094         while (scan < loceol &&
4095                (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
4096             scan++;
4097         break;
4098     case EXACTFL:       /* length of string is 1 */
4099         PL_reg_flags |= RF_tainted;
4100         c = UCHARAT(++opnd);
4101         while (scan < loceol &&
4102                (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
4103             scan++;
4104         break;
4105     case ANYOFUTF8:
4106         loceol = PL_regeol;
4107         while (scan < loceol && REGINCLASSUTF8(p, (U8*)scan)) {
4108             scan += UTF8SKIP(scan);
4109             hardcount++;
4110         }
4111         break;
4112     case ANYOF:
4113         while (scan < loceol && REGINCLASS(opnd, *scan))
4114             scan++;
4115         break;
4116     case ALNUM:
4117         while (scan < loceol && isALNUM(*scan))
4118             scan++;
4119         break;
4120     case ALNUMUTF8:
4121         loceol = PL_regeol;
4122         while (scan < loceol && swash_fetch(PL_utf8_alnum, (U8*)scan)) {
4123             scan += UTF8SKIP(scan);
4124             hardcount++;
4125         }
4126         break;
4127     case ALNUML:
4128         PL_reg_flags |= RF_tainted;
4129         while (scan < loceol && isALNUM_LC(*scan))
4130             scan++;
4131         break;
4132     case ALNUMLUTF8:
4133         PL_reg_flags |= RF_tainted;
4134         loceol = PL_regeol;
4135         while (scan < loceol && isALNUM_LC_utf8((U8*)scan)) {
4136             scan += UTF8SKIP(scan);
4137             hardcount++;
4138         }
4139         break;
4140         break;
4141     case NALNUM:
4142         while (scan < loceol && !isALNUM(*scan))
4143             scan++;
4144         break;
4145     case NALNUMUTF8:
4146         loceol = PL_regeol;
4147         while (scan < loceol && !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
4148             scan += UTF8SKIP(scan);
4149             hardcount++;
4150         }
4151         break;
4152     case NALNUML:
4153         PL_reg_flags |= RF_tainted;
4154         while (scan < loceol && !isALNUM_LC(*scan))
4155             scan++;
4156         break;
4157     case NALNUMLUTF8:
4158         PL_reg_flags |= RF_tainted;
4159         loceol = PL_regeol;
4160         while (scan < loceol && !isALNUM_LC_utf8((U8*)scan)) {
4161             scan += UTF8SKIP(scan);
4162             hardcount++;
4163         }
4164         break;
4165     case SPACE:
4166         while (scan < loceol && isSPACE(*scan))
4167             scan++;
4168         break;
4169     case SPACEUTF8:
4170         loceol = PL_regeol;
4171         while (scan < loceol && (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
4172             scan += UTF8SKIP(scan);
4173             hardcount++;
4174         }
4175         break;
4176     case SPACEL:
4177         PL_reg_flags |= RF_tainted;
4178         while (scan < loceol && isSPACE_LC(*scan))
4179             scan++;
4180         break;
4181     case SPACELUTF8:
4182         PL_reg_flags |= RF_tainted;
4183         loceol = PL_regeol;
4184         while (scan < loceol && (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4185             scan += UTF8SKIP(scan);
4186             hardcount++;
4187         }
4188         break;
4189     case NSPACE:
4190         while (scan < loceol && !isSPACE(*scan))
4191             scan++;
4192         break;
4193     case NSPACEUTF8:
4194         loceol = PL_regeol;
4195         while (scan < loceol && !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
4196             scan += UTF8SKIP(scan);
4197             hardcount++;
4198         }
4199         break;
4200     case NSPACEL:
4201         PL_reg_flags |= RF_tainted;
4202         while (scan < loceol && !isSPACE_LC(*scan))
4203             scan++;
4204         break;
4205     case NSPACELUTF8:
4206         PL_reg_flags |= RF_tainted;
4207         loceol = PL_regeol;
4208         while (scan < loceol && !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4209             scan += UTF8SKIP(scan);
4210             hardcount++;
4211         }
4212         break;
4213     case DIGIT:
4214         while (scan < loceol && isDIGIT(*scan))
4215             scan++;
4216         break;
4217     case DIGITUTF8:
4218         loceol = PL_regeol;
4219         while (scan < loceol && swash_fetch(PL_utf8_digit,(U8*)scan)) {
4220             scan += UTF8SKIP(scan);
4221             hardcount++;
4222         }
4223         break;
4224         break;
4225     case NDIGIT:
4226         while (scan < loceol && !isDIGIT(*scan))
4227             scan++;
4228         break;
4229     case NDIGITUTF8:
4230         loceol = PL_regeol;
4231         while (scan < loceol && !swash_fetch(PL_utf8_digit,(U8*)scan)) {
4232             scan += UTF8SKIP(scan);
4233             hardcount++;
4234         }
4235         break;
4236     default:            /* Called on something of 0 width. */
4237         break;          /* So match right here or not at all. */
4238     }
4239
4240     if (hardcount)
4241         c = hardcount;
4242     else
4243         c = scan - PL_reginput;
4244     PL_reginput = scan;
4245
4246     DEBUG_r( 
4247         {
4248                 SV *prop = sv_newmortal();
4249
4250                 regprop(prop, p);
4251                 PerlIO_printf(Perl_debug_log, 
4252                               "%*s  %s can match %ld times out of %ld...\n", 
4253                               REPORT_CODE_OFF+1, "", SvPVX(prop),c,max);
4254         });
4255     
4256     return(c);
4257 }
4258
4259 /*
4260  - regrepeat_hard - repeatedly match something, report total lenth and length
4261  * 
4262  * The repeater is supposed to have constant length.
4263  */
4264
4265 STATIC I32
4266 S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
4267 {
4268     dTHR;
4269     register char *scan;
4270     register char *start;
4271     register char *loceol = PL_regeol;
4272     I32 l = 0;
4273     I32 count = 0, res = 1;
4274
4275     if (!max)
4276         return 0;
4277
4278     start = PL_reginput;
4279     if (UTF) {
4280         while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
4281             if (!count++) {
4282                 l = 0;
4283                 while (start < PL_reginput) {
4284                     l++;
4285                     start += UTF8SKIP(start);
4286                 }
4287                 *lp = l;
4288                 if (l == 0)
4289                     return max;
4290             }
4291             if (count == max)
4292                 return count;
4293         }
4294     }
4295     else {
4296         while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
4297             if (!count++) {
4298                 *lp = l = PL_reginput - start;
4299                 if (max != REG_INFTY && l*max < loceol - scan)
4300                     loceol = scan + l*max;
4301                 if (l == 0)
4302                     return max;
4303             }
4304         }
4305     }
4306     if (!res)
4307         PL_reginput = scan;
4308     
4309     return count;
4310 }
4311
4312 /*
4313  - reginclass - determine if a character falls into a character class
4314  */
4315
4316 STATIC bool
4317 S_reginclass(pTHX_ register char *p, register I32 c)
4318 {
4319     dTHR;
4320     char flags = ANYOF_FLAGS(p);
4321     bool match = FALSE;
4322
4323     c &= 0xFF;
4324     if (ANYOF_BITMAP_TEST(p, c))
4325         match = TRUE;
4326     else if (flags & ANYOF_FOLD) {
4327         I32 cf;
4328         if (flags & ANYOF_LOCALE) {
4329             PL_reg_flags |= RF_tainted;
4330             cf = PL_fold_locale[c];
4331         }
4332         else
4333             cf = PL_fold[c];
4334         if (ANYOF_BITMAP_TEST(p, cf))
4335             match = TRUE;
4336     }
4337
4338     if (!match && (flags & ANYOF_CLASS)) {
4339         PL_reg_flags |= RF_tainted;
4340         if (
4341             (ANYOF_CLASS_TEST(p, ANYOF_ALNUM)   &&  isALNUM_LC(c))  ||
4342             (ANYOF_CLASS_TEST(p, ANYOF_NALNUM)  && !isALNUM_LC(c))  ||
4343             (ANYOF_CLASS_TEST(p, ANYOF_SPACE)   &&  isSPACE_LC(c))  ||
4344             (ANYOF_CLASS_TEST(p, ANYOF_NSPACE)  && !isSPACE_LC(c))  ||
4345             (ANYOF_CLASS_TEST(p, ANYOF_DIGIT)   &&  isDIGIT_LC(c))  ||
4346             (ANYOF_CLASS_TEST(p, ANYOF_NDIGIT)  && !isDIGIT_LC(c))  ||
4347             (ANYOF_CLASS_TEST(p, ANYOF_ALNUMC)  &&  isALNUMC_LC(c)) ||
4348             (ANYOF_CLASS_TEST(p, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
4349             (ANYOF_CLASS_TEST(p, ANYOF_ALPHA)   &&  isALPHA_LC(c))  ||
4350             (ANYOF_CLASS_TEST(p, ANYOF_NALPHA)  && !isALPHA_LC(c))  ||
4351             (ANYOF_CLASS_TEST(p, ANYOF_ASCII)   &&  isASCII(c))     ||
4352             (ANYOF_CLASS_TEST(p, ANYOF_NASCII)  && !isASCII(c))     ||
4353             (ANYOF_CLASS_TEST(p, ANYOF_CNTRL)   &&  isCNTRL_LC(c))  ||
4354             (ANYOF_CLASS_TEST(p, ANYOF_NCNTRL)  && !isCNTRL_LC(c))  ||
4355             (ANYOF_CLASS_TEST(p, ANYOF_GRAPH)   &&  isGRAPH_LC(c))  ||
4356             (ANYOF_CLASS_TEST(p, ANYOF_NGRAPH)  && !isGRAPH_LC(c))  ||
4357             (ANYOF_CLASS_TEST(p, ANYOF_LOWER)   &&  isLOWER_LC(c))  ||
4358             (ANYOF_CLASS_TEST(p, ANYOF_NLOWER)  && !isLOWER_LC(c))  ||
4359             (ANYOF_CLASS_TEST(p, ANYOF_PRINT)   &&  isPRINT_LC(c))  ||
4360             (ANYOF_CLASS_TEST(p, ANYOF_NPRINT)  && !isPRINT_LC(c))  ||
4361             (ANYOF_CLASS_TEST(p, ANYOF_PUNCT)   &&  isPUNCT_LC(c))  ||
4362             (ANYOF_CLASS_TEST(p, ANYOF_NPUNCT)  && !isPUNCT_LC(c))  ||
4363             (ANYOF_CLASS_TEST(p, ANYOF_UPPER)   &&  isUPPER_LC(c))  ||
4364             (ANYOF_CLASS_TEST(p, ANYOF_NUPPER)  && !isUPPER_LC(c))  ||
4365             (ANYOF_CLASS_TEST(p, ANYOF_XDIGIT)  &&  isXDIGIT(c))    ||
4366             (ANYOF_CLASS_TEST(p, ANYOF_NXDIGIT) && !isXDIGIT(c))
4367             ) /* How's that for a conditional? */
4368         {
4369             match = TRUE;
4370         }
4371     }
4372
4373     return (flags & ANYOF_INVERT) ? !match : match;
4374 }
4375
4376 STATIC bool
4377 S_reginclassutf8(pTHX_ regnode *f, U8 *p)
4378 {                                           
4379     dTHR;
4380     char flags = ARG1(f);
4381     bool match = FALSE;
4382     SV *sv = (SV*)PL_regdata->data[ARG2(f)];
4383
4384     if (swash_fetch(sv, p))
4385         match = TRUE;
4386     else if (flags & ANYOF_FOLD) {
4387         I32 cf;
4388         U8 tmpbuf[10];
4389         if (flags & ANYOF_LOCALE) {
4390             PL_reg_flags |= RF_tainted;
4391             uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
4392         }
4393         else
4394             uv_to_utf8(tmpbuf, toLOWER_utf8(p));
4395         if (swash_fetch(sv, tmpbuf))
4396             match = TRUE;
4397     }
4398
4399     /* UTF8 combined with ANYOF_CLASS is ill-defined. */
4400
4401     return (flags & ANYOF_INVERT) ? !match : match;
4402 }
4403
4404 STATIC U8 *
4405 S_reghop(pTHX_ U8 *s, I32 off)
4406 {                               
4407     dTHR;
4408     if (off >= 0) {
4409         while (off-- && s < (U8*)PL_regeol)
4410             s += UTF8SKIP(s);
4411     }
4412     else {
4413         while (off++) {
4414             if (s > (U8*)PL_bostr) {
4415                 s--;
4416                 if (*s & 0x80) {
4417                     while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
4418                         s--;
4419                 }               /* XXX could check well-formedness here */
4420             }
4421         }
4422     }
4423     return s;
4424 }
4425
4426 STATIC U8 *
4427 S_reghopmaybe(pTHX_ U8* s, I32 off)
4428 {
4429     dTHR;
4430     if (off >= 0) {
4431         while (off-- && s < (U8*)PL_regeol)
4432             s += UTF8SKIP(s);
4433         if (off >= 0)
4434             return 0;
4435     }
4436     else {
4437         while (off++) {
4438             if (s > (U8*)PL_bostr) {
4439                 s--;
4440                 if (*s & 0x80) {
4441                     while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
4442                         s--;
4443                 }               /* XXX could check well-formedness here */
4444             }
4445             else
4446                 break;
4447         }
4448         if (off <= 0)
4449             return 0;
4450     }
4451     return s;
4452 }
4453
4454 #ifdef PERL_OBJECT
4455 #define NO_XSLOCKS
4456 #include "XSUB.h"
4457 #endif
4458
4459 static void
4460 restore_pos(pTHXo_ void *arg)
4461 {
4462     dTHR;
4463     if (PL_reg_eval_set) {
4464         if (PL_reg_oldsaved) {
4465             PL_reg_re->subbeg = PL_reg_oldsaved;
4466             PL_reg_re->sublen = PL_reg_oldsavedlen;
4467             RX_MATCH_COPIED_on(PL_reg_re);
4468         }
4469         PL_reg_magic->mg_len = PL_reg_oldpos;
4470         PL_reg_eval_set = 0;
4471         PL_curpm = PL_reg_oldcurpm;
4472     }   
4473 }
4474