5 * "One Ring to rule them all, One Ring to find them..."
8 /* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
12 /* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
17 /* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
22 #ifdef PERL_EXT_RE_BUILD
23 /* need to replace pregcomp et al, so enable that */
24 # ifndef PERL_IN_XSUB_RE
25 # define PERL_IN_XSUB_RE
27 /* need access to debugger hooks */
28 # if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
33 #ifdef PERL_IN_XSUB_RE
34 /* We *really* need to overwrite these symbols: */
35 # define Perl_regexec_flags my_regexec
36 # define Perl_regdump my_regdump
37 # define Perl_regprop my_regprop
38 # define Perl_re_intuit_start my_re_intuit_start
39 /* *These* symbols are masked to allow static link. */
40 # define Perl_pregexec my_pregexec
41 # define Perl_reginitcolors my_reginitcolors
46 * pregcomp and pregexec -- regsub and regerror are not used in perl
48 * Copyright (c) 1986 by University of Toronto.
49 * Written by Henry Spencer. Not derived from licensed software.
51 * Permission is granted to anyone to use this software for any
52 * purpose on any computer system, and to redistribute it freely,
53 * subject to the following restrictions:
55 * 1. The author is not responsible for the consequences of use of
56 * this software, no matter how awful, even if they arise
59 * 2. The origin of this software must not be misrepresented, either
60 * by explicit claim or by omission.
62 * 3. Altered versions must be plainly marked as such, and must not
63 * be misrepresented as being the original software.
65 **** Alterations to Henry's code are...
67 **** Copyright (c) 1991-1999, Larry Wall
69 **** You may distribute under the terms of either the GNU General Public
70 **** License or the Artistic License, as specified in the README file.
72 * Beware that some of this code is subtly aware of the way operator
73 * precedence is structured in regular expressions. Serious changes in
74 * regular-expression syntax might require a total rethink.
77 #define PERL_IN_REGEXEC_C
82 #define RF_tainted 1 /* tainted information used? */
83 #define RF_warned 2 /* warned about big count? */
84 #define RF_evaled 4 /* Did an EVAL with setting? */
85 #define RF_utf8 8 /* String contains multibyte chars? */
87 #define UTF (PL_reg_flags & RF_utf8)
89 #define RS_init 1 /* eval environment created */
90 #define RS_set 2 /* replsv value is set */
100 #define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c) : ANYOF_BITMAP_TEST(p,c))
101 #define REGINCLASSUTF8(f,p) (ARG1(f) ? reginclassutf8(f,p) : swash_fetch((SV*)PL_regdata->data[ARG2(f)],p))
103 #define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
104 #define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
106 #define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
107 #define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
108 #define HOP(pos,off) (UTF ? reghop((U8*)pos, off) : (U8*)(pos + off))
109 #define HOPMAYBE(pos,off) (UTF ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
110 #define HOPc(pos,off) ((char*)HOP(pos,off))
111 #define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
113 static void restore_pos(pTHXo_ void *arg);
117 S_regcppush(pTHX_ I32 parenfloor)
120 int retval = PL_savestack_ix;
121 int i = (PL_regsize - parenfloor) * 4;
125 for (p = PL_regsize; p > parenfloor; p--) {
126 SSPUSHINT(PL_regendp[p]);
127 SSPUSHINT(PL_regstartp[p]);
128 SSPUSHPTR(PL_reg_start_tmp[p]);
131 SSPUSHINT(PL_regsize);
132 SSPUSHINT(*PL_reglastparen);
133 SSPUSHPTR(PL_reginput);
135 SSPUSHINT(SAVEt_REGCONTEXT);
139 /* These are needed since we do not localize EVAL nodes: */
140 # define REGCP_SET DEBUG_r(PerlIO_printf(Perl_debug_log, \
141 " Setting an EVAL scope, savestack=%i\n", \
142 PL_savestack_ix)); lastcp = PL_savestack_ix
144 # define REGCP_UNWIND DEBUG_r(lastcp != PL_savestack_ix ? \
145 PerlIO_printf(Perl_debug_log, \
146 " Clearing an EVAL scope, savestack=%i..%i\n", \
147 lastcp, PL_savestack_ix) : 0); regcpblow(lastcp)
157 assert(i == SAVEt_REGCONTEXT);
159 input = (char *) SSPOPPTR;
160 *PL_reglastparen = SSPOPINT;
161 PL_regsize = SSPOPINT;
162 for (i -= 3; i > 0; i -= 4) {
163 paren = (U32)SSPOPINT;
164 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
165 PL_regstartp[paren] = SSPOPINT;
167 if (paren <= *PL_reglastparen)
168 PL_regendp[paren] = tmps;
170 PerlIO_printf(Perl_debug_log,
171 " restoring \\%d to %d(%d)..%d%s\n",
172 paren, PL_regstartp[paren],
173 PL_reg_start_tmp[paren] - PL_bostr,
175 (paren > *PL_reglastparen ? "(no)" : ""));
179 if (*PL_reglastparen + 1 <= PL_regnpar) {
180 PerlIO_printf(Perl_debug_log,
181 " restoring \\%d..\\%d to undef\n",
182 *PL_reglastparen + 1, PL_regnpar);
185 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
186 if (paren > PL_regsize)
187 PL_regstartp[paren] = -1;
188 PL_regendp[paren] = -1;
194 S_regcp_set_to(pTHX_ I32 ss)
197 I32 tmp = PL_savestack_ix;
199 PL_savestack_ix = ss;
201 PL_savestack_ix = tmp;
205 typedef struct re_cc_state
209 struct re_cc_state *prev;
214 #define regcpblow(cp) LEAVE_SCOPE(cp)
217 * pregexec and friends
221 - pregexec - match a regexp against a string
224 Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
225 char *strbeg, I32 minend, SV *screamer, U32 nosave)
226 /* strend: pointer to null at end of string */
227 /* strbeg: real beginning of string */
228 /* minend: end of match must be >=minend after stringarg. */
229 /* nosave: For optimizations. */
232 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
233 nosave ? 0 : REXEC_COPY_STR);
237 S_cache_re(pTHX_ regexp *prog)
240 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
242 PL_regprogram = prog->program;
244 PL_regnpar = prog->nparens;
245 PL_regdata = prog->data;
250 * Need to implement the following flags for reg_anch:
252 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
254 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
255 * INTUIT_AUTORITATIVE_ML
256 * INTUIT_ONCE_NOML - Intuit can match in one location only.
259 * Another flag for this function: SECOND_TIME (so that float substrs
260 * with giant delta may be not rechecked).
263 /* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
265 /* If SCREAM, then sv should be compatible with strpos and strend.
266 Otherwise, only SvCUR(sv) is used to get strbeg. */
268 /* XXXX We assume that strpos is strbeg unless sv. */
271 Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
272 char *strend, U32 flags, re_scream_pos_data *data)
275 /* Should be nonnegative! */
281 DEBUG_r( if (!PL_colorset) reginitcolors() );
282 DEBUG_r(PerlIO_printf(Perl_debug_log,
283 "%sGuessing start of match:%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
284 PL_colors[4],PL_colors[5],PL_colors[0],
287 (strlen(prog->precomp) > 60 ? "..." : ""),
289 (strend - strpos > 60 ? 60 : strend - strpos),
290 strpos, PL_colors[1],
291 (strend - strpos > 60 ? "..." : ""))
294 if (prog->minlen > strend - strpos)
297 /* XXXX Move further down? */
298 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
299 /* Should be nonnegative! */
300 end_shift = prog->minlen - start_shift -
301 CHR_SVLEN(prog->check_substr) + (SvTAIL(prog->check_substr) != 0);
303 if (prog->reganch & ROPT_ANCH) {
304 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
305 || ( (prog->reganch & ROPT_ANCH_BOL)
306 && !PL_multiline ) );
308 if ((prog->check_offset_min == prog->check_offset_max) && !ml_anch) {
312 if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
313 && (sv && (strpos + SvCUR(sv) != strend)) )
316 s = (char*)HOP((U8*)strpos, prog->check_offset_min);
317 if (SvTAIL(prog->check_substr)) {
318 slen = SvCUR(prog->check_substr); /* >= 1 */
320 if ( strend - s > slen || strend - s < slen - 1 ) {
324 if ( strend - s == slen && strend[-1] != '\n') {
328 /* Now should match s[0..slen-2] */
330 if (slen && (*SvPVX(prog->check_substr) != *s
332 && memNE(SvPVX(prog->check_substr), s, slen))))
335 else if (*SvPVX(prog->check_substr) != *s
336 || ((slen = SvCUR(prog->check_substr)) > 1
337 && memNE(SvPVX(prog->check_substr), s, slen)))
344 if (!ml_anch && (s + prog->check_offset_max < strend - prog->minlen))
345 end_shift += strend - s - prog->minlen - prog->check_offset_max;
353 if (flags & REXEC_SCREAM) {
354 SV *c = prog->check_substr;
355 char *strbeg = SvPVX(sv); /* XXXX Assume PV_force() on SCREAM! */
356 I32 p = -1; /* Internal iterator of scream. */
357 I32 *pp = data ? data->scream_pos : &p;
359 if (PL_screamfirst[BmRARE(c)] >= 0
360 || ( BmRARE(c) == '\n'
361 && (BmPREVIOUS(c) == SvCUR(c) - 1)
363 s = screaminstr(sv, prog->check_substr,
364 start_shift + (strpos - strbeg), end_shift, pp, 0);
368 *data->scream_olds = s;
371 s = fbm_instr((unsigned char*)s + start_shift,
372 (unsigned char*)strend - end_shift,
373 prog->check_substr, PL_multiline ? FBMrf_MULTILINE : 0);
375 /* Update the count-of-usability, remove useless subpatterns,
379 ++BmUSEFUL(prog->check_substr); /* hooray */
380 goto fail; /* not present */
382 else if (s - strpos > prog->check_offset_max &&
383 ((prog->reganch & ROPT_UTF8)
384 ? ((t = reghopmaybe_c(s, -(prog->check_offset_max)))
386 : (t = s - prog->check_offset_max) != 0) ) {
387 if (ml_anch && t[-1] != '\n') {
389 while (t < strend - end_shift - prog->minlen) {
391 if (t < s - prog->check_offset_min) {
405 ++BmUSEFUL(prog->check_substr); /* hooray/2 */
409 && (strpos + SvCUR(sv) != strend) && strpos[-1] != '\n') {
413 if (!(prog->reganch & ROPT_NAUGHTY)
414 && --BmUSEFUL(prog->check_substr) < 0
415 && prog->check_substr == prog->float_substr) { /* boo */
416 /* If flags & SOMETHING - do not do it many times on the same match */
417 SvREFCNT_dec(prog->check_substr);
418 prog->check_substr = Nullsv; /* disable */
419 prog->float_substr = Nullsv; /* clear */
421 prog->reganch &= ~RE_USE_INTUIT;
427 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sFound%s at offset %ld\n",
428 PL_colors[4],PL_colors[5], (long)(s - strpos)) );
431 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sNot found...%s\n",
432 PL_colors[4],PL_colors[5]));
437 - regexec_flags - match a regexp against a string
440 Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
441 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
442 /* strend: pointer to null at end of string */
443 /* strbeg: real beginning of string */
444 /* minend: end of match must be >=minend after stringarg. */
445 /* data: May be used for some additional optimizations. */
446 /* nosave: For optimizations. */
451 register char *startpos = stringarg;
453 I32 minlen; /* must match at least this many chars */
454 I32 dontbother = 0; /* how many characters not to try at end */
456 I32 start_shift = 0; /* Offset of the start to find
457 constant substr. */ /* CC */
458 I32 end_shift = 0; /* Same for the end. */ /* CC */
459 I32 scream_pos = -1; /* Internal iterator of scream. */
461 SV* oreplsv = GvSV(PL_replgv);
469 PL_regnarrate = PL_debug & 512;
473 if (prog == NULL || startpos == NULL) {
474 Perl_croak(aTHX_ "NULL regexp parameter");
478 minlen = prog->minlen;
479 if (strend - startpos < minlen) goto phooey;
481 if (startpos == strbeg) /* is ^ valid at stringarg? */
484 PL_regprev = (U32)stringarg[-1];
485 if (!PL_multiline && PL_regprev == '\n')
486 PL_regprev = '\0'; /* force ^ to NOT match */
489 /* Check validity of program. */
490 if (UCHARAT(prog->program) != REG_MAGIC) {
491 Perl_croak(aTHX_ "corrupted regexp program");
497 if (prog->reganch & ROPT_UTF8)
498 PL_reg_flags |= RF_utf8;
500 /* Mark beginning of line for ^ and lookbehind. */
501 PL_regbol = startpos;
505 /* Mark end of line for $ (and such) */
508 /* see how far we have to get to not match where we matched before */
509 PL_regtill = startpos+minend;
511 /* We start without call_cc context. */
514 /* If there is a "must appear" string, look for it. */
517 if (prog->reganch & ROPT_GPOS_SEEN) {
520 if (!(flags & REXEC_IGNOREPOS) && sv && SvTYPE(sv) >= SVt_PVMG
521 && SvMAGIC(sv) && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0)
522 PL_reg_ganch = strbeg + mg->mg_len;
524 PL_reg_ganch = startpos;
525 if (prog->reganch & ROPT_ANCH_GPOS) {
526 if (s > PL_reg_ganch)
532 if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
533 re_scream_pos_data d;
535 d.scream_olds = &scream_olds;
536 d.scream_pos = &scream_pos;
537 s = re_intuit_start(prog, sv, s, strend, flags, &d);
539 goto phooey; /* not present */
542 DEBUG_r( if (!PL_colorset) reginitcolors() );
543 DEBUG_r(PerlIO_printf(Perl_debug_log,
544 "%sMatching%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
545 PL_colors[4],PL_colors[5],PL_colors[0],
548 (strlen(prog->precomp) > 60 ? "..." : ""),
550 (strend - startpos > 60 ? 60 : strend - startpos),
551 startpos, PL_colors[1],
552 (strend - startpos > 60 ? "..." : ""))
555 /* Simplest case: anchored match need be tried only once. */
556 /* [unless only anchor is BOL and multiline is set] */
557 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
558 if (s == startpos && regtry(prog, startpos))
560 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
561 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
566 dontbother = minlen - 1;
567 end = HOPc(strend, -dontbother) - 1;
568 /* for multiline we only have to try after newlines */
569 if (prog->check_substr) {
575 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
583 if (*s++ == '\n') { /* don't need PL_utf8skip here */
591 } else if (prog->reganch & ROPT_ANCH_GPOS) {
592 if (regtry(prog, PL_reg_ganch))
597 /* Messy cases: unanchored match. */
598 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
599 /* we have /x+whatever/ */
600 /* it must be a one character string (XXXX Except UTF?) */
601 char ch = SvPVX(prog->anchored_substr)[0];
605 if (regtry(prog, s)) goto got_it;
607 while (s < strend && *s == ch)
616 if (regtry(prog, s)) goto got_it;
618 while (s < strend && *s == ch)
626 else if (prog->anchored_substr != Nullsv
627 || (prog->float_substr != Nullsv
628 && prog->float_max_offset < strend - s)) {
629 SV *must = prog->anchored_substr
630 ? prog->anchored_substr : prog->float_substr;
632 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
634 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
635 I32 delta = back_max - back_min;
636 char *last = HOPc(strend, /* Cannot start after this */
637 -(I32)(CHR_SVLEN(must)
638 - (SvTAIL(must) != 0) + back_min));
639 char *last1; /* Last position checked before */
644 last1 = s - 1; /* bogus */
646 /* XXXX check_substr already used to find `s', can optimize if
647 check_substr==must. */
649 dontbother = end_shift;
650 strend = HOPc(strend, -dontbother);
651 while ( (s <= last) &&
652 ((flags & REXEC_SCREAM)
653 ? (s = screaminstr(sv, must, HOPc(s, back_min) - strbeg,
654 end_shift, &scream_pos, 0))
655 : (s = fbm_instr((unsigned char*)HOP(s, back_min),
656 (unsigned char*)strend, must,
657 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
658 if (HOPc(s, -back_max) > last1) {
659 last1 = HOPc(s, -back_min);
660 s = HOPc(s, -back_max);
663 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
665 last1 = HOPc(s, -back_min);
685 else if (c = prog->regstclass) {
686 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
690 dontbother = minlen - 1;
691 strend = HOPc(strend, -dontbother); /* don't bother with what can't match */
693 /* We know what class it must start with. */
696 cc = (char *) OPERAND(c);
698 if (REGINCLASSUTF8(c, (U8*)s)) {
699 if (tmp && regtry(prog, s))
710 cc = (char *) OPERAND(c);
712 if (REGINCLASS(cc, *s)) {
713 if (tmp && regtry(prog, s))
724 PL_reg_flags |= RF_tainted;
731 tmp = (s != startpos) ? UCHARAT(s - 1) : PL_regprev;
732 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
734 if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
741 if ((minlen || tmp) && regtry(prog,s))
745 PL_reg_flags |= RF_tainted;
750 strend = reghop_c(strend, -1);
752 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : PL_regprev;
753 tmp = ((OP(c) == BOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
755 if (tmp == !(OP(c) == BOUND ?
756 swash_fetch(PL_utf8_alnum, (U8*)s) :
757 isALNUM_LC_utf8((U8*)s)))
765 if ((minlen || tmp) && regtry(prog,s))
769 PL_reg_flags |= RF_tainted;
776 tmp = (s != startpos) ? UCHARAT(s - 1) : PL_regprev;
777 tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
779 if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
781 else if (regtry(prog, s))
785 if ((minlen || !tmp) && regtry(prog,s))
789 PL_reg_flags |= RF_tainted;
794 strend = reghop_c(strend, -1);
796 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : PL_regprev;
797 tmp = ((OP(c) == NBOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
799 if (tmp == !(OP(c) == NBOUND ?
800 swash_fetch(PL_utf8_alnum, (U8*)s) :
801 isALNUM_LC_utf8((U8*)s)))
803 else if (regtry(prog, s))
807 if ((minlen || !tmp) && regtry(prog,s))
813 if (tmp && regtry(prog, s))
825 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
826 if (tmp && regtry(prog, s))
837 PL_reg_flags |= RF_tainted;
839 if (isALNUM_LC(*s)) {
840 if (tmp && regtry(prog, s))
851 PL_reg_flags |= RF_tainted;
853 if (isALNUM_LC_utf8((U8*)s)) {
854 if (tmp && regtry(prog, s))
867 if (tmp && regtry(prog, s))
879 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
880 if (tmp && regtry(prog, s))
891 PL_reg_flags |= RF_tainted;
893 if (!isALNUM_LC(*s)) {
894 if (tmp && regtry(prog, s))
905 PL_reg_flags |= RF_tainted;
907 if (!isALNUM_LC_utf8((U8*)s)) {
908 if (tmp && regtry(prog, s))
921 if (tmp && regtry(prog, s))
933 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
934 if (tmp && regtry(prog, s))
945 PL_reg_flags |= RF_tainted;
947 if (isSPACE_LC(*s)) {
948 if (tmp && regtry(prog, s))
959 PL_reg_flags |= RF_tainted;
961 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
962 if (tmp && regtry(prog, s))
975 if (tmp && regtry(prog, s))
987 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
988 if (tmp && regtry(prog, s))
999 PL_reg_flags |= RF_tainted;
1000 while (s < strend) {
1001 if (!isSPACE_LC(*s)) {
1002 if (tmp && regtry(prog, s))
1013 PL_reg_flags |= RF_tainted;
1014 while (s < strend) {
1015 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1016 if (tmp && regtry(prog, s))
1027 while (s < strend) {
1029 if (tmp && regtry(prog, s))
1040 while (s < strend) {
1041 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
1042 if (tmp && regtry(prog, s))
1053 PL_reg_flags |= RF_tainted;
1054 while (s < strend) {
1055 if (isDIGIT_LC(*s)) {
1056 if (tmp && regtry(prog, s))
1067 PL_reg_flags |= RF_tainted;
1068 while (s < strend) {
1069 if (isDIGIT_LC_utf8((U8*)s)) {
1070 if (tmp && regtry(prog, s))
1081 while (s < strend) {
1083 if (tmp && regtry(prog, s))
1094 while (s < strend) {
1095 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
1096 if (tmp && regtry(prog, s))
1107 PL_reg_flags |= RF_tainted;
1108 while (s < strend) {
1109 if (!isDIGIT_LC(*s)) {
1110 if (tmp && regtry(prog, s))
1121 PL_reg_flags |= RF_tainted;
1122 while (s < strend) {
1123 if (!isDIGIT_LC_utf8((U8*)s)) {
1124 if (tmp && regtry(prog, s))
1135 while (s < strend) {
1137 if (tmp && regtry(prog, s))
1148 while (s < strend) {
1149 if (swash_fetch(PL_utf8_alnumc, (U8*)s)) {
1150 if (tmp && regtry(prog, s))
1161 PL_reg_flags |= RF_tainted;
1162 while (s < strend) {
1163 if (isALNUMC_LC(*s)) {
1164 if (tmp && regtry(prog, s))
1175 PL_reg_flags |= RF_tainted;
1176 while (s < strend) {
1177 if (isALNUMC_LC_utf8((U8*)s)) {
1178 if (tmp && regtry(prog, s))
1189 while (s < strend) {
1190 if (!isALNUMC(*s)) {
1191 if (tmp && regtry(prog, s))
1202 while (s < strend) {
1203 if (!swash_fetch(PL_utf8_alnumc, (U8*)s)) {
1204 if (tmp && regtry(prog, s))
1215 PL_reg_flags |= RF_tainted;
1216 while (s < strend) {
1217 if (!isALNUMC_LC(*s)) {
1218 if (tmp && regtry(prog, s))
1229 PL_reg_flags |= RF_tainted;
1230 while (s < strend) {
1231 if (!isALNUMC_LC_utf8((U8*)s)) {
1232 if (tmp && regtry(prog, s))
1243 while (s < strend) {
1244 if (isASCII(*(U8*)s)) {
1245 if (tmp && regtry(prog, s))
1256 while (s < strend) {
1257 if (!isASCII(*(U8*)s)) {
1258 if (tmp && regtry(prog, s))
1269 while (s < strend) {
1271 if (tmp && regtry(prog, s))
1282 while (s < strend) {
1283 if (swash_fetch(PL_utf8_cntrl,(U8*)s)) {
1284 if (tmp && regtry(prog, s))
1295 PL_reg_flags |= RF_tainted;
1296 while (s < strend) {
1297 if (isCNTRL_LC(*s)) {
1298 if (tmp && regtry(prog, s))
1309 PL_reg_flags |= RF_tainted;
1310 while (s < strend) {
1311 if (*s == ' ' || isCNTRL_LC_utf8((U8*)s)) {
1312 if (tmp && regtry(prog, s))
1323 while (s < strend) {
1325 if (tmp && regtry(prog, s))
1336 while (s < strend) {
1337 if (!swash_fetch(PL_utf8_cntrl,(U8*)s)) {
1338 if (tmp && regtry(prog, s))
1349 PL_reg_flags |= RF_tainted;
1350 while (s < strend) {
1351 if (!isCNTRL_LC(*s)) {
1352 if (tmp && regtry(prog, s))
1363 PL_reg_flags |= RF_tainted;
1364 while (s < strend) {
1365 if (!isCNTRL_LC_utf8((U8*)s)) {
1366 if (tmp && regtry(prog, s))
1377 while (s < strend) {
1379 if (tmp && regtry(prog, s))
1390 while (s < strend) {
1391 if (swash_fetch(PL_utf8_graph,(U8*)s)) {
1392 if (tmp && regtry(prog, s))
1403 PL_reg_flags |= RF_tainted;
1404 while (s < strend) {
1405 if (isGRAPH_LC(*s)) {
1406 if (tmp && regtry(prog, s))
1417 PL_reg_flags |= RF_tainted;
1418 while (s < strend) {
1419 if (*s == ' ' || isGRAPH_LC_utf8((U8*)s)) {
1420 if (tmp && regtry(prog, s))
1431 while (s < strend) {
1433 if (tmp && regtry(prog, s))
1444 while (s < strend) {
1445 if (!swash_fetch(PL_utf8_graph,(U8*)s)) {
1446 if (tmp && regtry(prog, s))
1457 PL_reg_flags |= RF_tainted;
1458 while (s < strend) {
1459 if (!isGRAPH_LC(*s)) {
1460 if (tmp && regtry(prog, s))
1471 PL_reg_flags |= RF_tainted;
1472 while (s < strend) {
1473 if (!isGRAPH_LC_utf8((U8*)s)) {
1474 if (tmp && regtry(prog, s))
1485 while (s < strend) {
1487 if (tmp && regtry(prog, s))
1498 while (s < strend) {
1499 if (swash_fetch(PL_utf8_lower,(U8*)s)) {
1500 if (tmp && regtry(prog, s))
1511 PL_reg_flags |= RF_tainted;
1512 while (s < strend) {
1513 if (isLOWER_LC(*s)) {
1514 if (tmp && regtry(prog, s))
1525 PL_reg_flags |= RF_tainted;
1526 while (s < strend) {
1527 if (*s == ' ' || isLOWER_LC_utf8((U8*)s)) {
1528 if (tmp && regtry(prog, s))
1539 while (s < strend) {
1541 if (tmp && regtry(prog, s))
1552 while (s < strend) {
1553 if (!swash_fetch(PL_utf8_lower,(U8*)s)) {
1554 if (tmp && regtry(prog, s))
1565 PL_reg_flags |= RF_tainted;
1566 while (s < strend) {
1567 if (!isLOWER_LC(*s)) {
1568 if (tmp && regtry(prog, s))
1579 PL_reg_flags |= RF_tainted;
1580 while (s < strend) {
1581 if (!isLOWER_LC_utf8((U8*)s)) {
1582 if (tmp && regtry(prog, s))
1593 while (s < strend) {
1595 if (tmp && regtry(prog, s))
1606 while (s < strend) {
1607 if (swash_fetch(PL_utf8_print,(U8*)s)) {
1608 if (tmp && regtry(prog, s))
1619 PL_reg_flags |= RF_tainted;
1620 while (s < strend) {
1621 if (isPRINT_LC(*s)) {
1622 if (tmp && regtry(prog, s))
1633 PL_reg_flags |= RF_tainted;
1634 while (s < strend) {
1635 if (*s == ' ' || isPRINT_LC_utf8((U8*)s)) {
1636 if (tmp && regtry(prog, s))
1647 while (s < strend) {
1649 if (tmp && regtry(prog, s))
1660 while (s < strend) {
1661 if (!swash_fetch(PL_utf8_print,(U8*)s)) {
1662 if (tmp && regtry(prog, s))
1673 PL_reg_flags |= RF_tainted;
1674 while (s < strend) {
1675 if (!isPRINT_LC(*s)) {
1676 if (tmp && regtry(prog, s))
1687 PL_reg_flags |= RF_tainted;
1688 while (s < strend) {
1689 if (!isPRINT_LC_utf8((U8*)s)) {
1690 if (tmp && regtry(prog, s))
1701 while (s < strend) {
1703 if (tmp && regtry(prog, s))
1714 while (s < strend) {
1715 if (swash_fetch(PL_utf8_punct,(U8*)s)) {
1716 if (tmp && regtry(prog, s))
1727 PL_reg_flags |= RF_tainted;
1728 while (s < strend) {
1729 if (isPUNCT_LC(*s)) {
1730 if (tmp && regtry(prog, s))
1741 PL_reg_flags |= RF_tainted;
1742 while (s < strend) {
1743 if (*s == ' ' || isPUNCT_LC_utf8((U8*)s)) {
1744 if (tmp && regtry(prog, s))
1755 while (s < strend) {
1757 if (tmp && regtry(prog, s))
1768 while (s < strend) {
1769 if (!swash_fetch(PL_utf8_punct,(U8*)s)) {
1770 if (tmp && regtry(prog, s))
1781 PL_reg_flags |= RF_tainted;
1782 while (s < strend) {
1783 if (!isPUNCT_LC(*s)) {
1784 if (tmp && regtry(prog, s))
1795 PL_reg_flags |= RF_tainted;
1796 while (s < strend) {
1797 if (!isPUNCT_LC_utf8((U8*)s)) {
1798 if (tmp && regtry(prog, s))
1809 while (s < strend) {
1811 if (tmp && regtry(prog, s))
1822 while (s < strend) {
1823 if (swash_fetch(PL_utf8_upper,(U8*)s)) {
1824 if (tmp && regtry(prog, s))
1835 PL_reg_flags |= RF_tainted;
1836 while (s < strend) {
1837 if (isUPPER_LC(*s)) {
1838 if (tmp && regtry(prog, s))
1849 PL_reg_flags |= RF_tainted;
1850 while (s < strend) {
1851 if (*s == ' ' || isUPPER_LC_utf8((U8*)s)) {
1852 if (tmp && regtry(prog, s))
1863 while (s < strend) {
1865 if (tmp && regtry(prog, s))
1876 while (s < strend) {
1877 if (!swash_fetch(PL_utf8_upper,(U8*)s)) {
1878 if (tmp && regtry(prog, s))
1889 PL_reg_flags |= RF_tainted;
1890 while (s < strend) {
1891 if (!isUPPER_LC(*s)) {
1892 if (tmp && regtry(prog, s))
1903 PL_reg_flags |= RF_tainted;
1904 while (s < strend) {
1905 if (!isUPPER_LC_utf8((U8*)s)) {
1906 if (tmp && regtry(prog, s))
1917 while (s < strend) {
1919 if (tmp && regtry(prog, s))
1930 while (s < strend) {
1931 if (!isXDIGIT(*s)) {
1932 if (tmp && regtry(prog, s))
1946 if (prog->float_substr != Nullsv) { /* Trim the end. */
1948 I32 oldpos = scream_pos;
1950 if (flags & REXEC_SCREAM) {
1951 last = screaminstr(sv, prog->float_substr, s - strbeg,
1952 end_shift, &scream_pos, 1); /* last one */
1954 last = scream_olds; /* Only one occurence. */
1958 char *little = SvPV(prog->float_substr, len);
1960 if (SvTAIL(prog->float_substr)) {
1961 if (memEQ(strend - len + 1, little, len - 1))
1962 last = strend - len + 1;
1963 else if (!PL_multiline)
1964 last = memEQ(strend - len, little, len)
1965 ? strend - len : Nullch;
1971 last = rninstr(s, strend, little, little + len);
1973 last = strend; /* matching `$' */
1976 if (last == NULL) goto phooey; /* Should not happen! */
1977 dontbother = strend - last + prog->float_min_offset;
1979 if (minlen && (dontbother < minlen))
1980 dontbother = minlen - 1;
1981 strend -= dontbother; /* this one's always in bytes! */
1982 /* We don't know much -- general case. */
1985 if (regtry(prog, s))
1994 if (regtry(prog, s))
1996 } while (s++ < strend);
2004 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
2006 if (PL_reg_eval_set) {
2007 /* Preserve the current value of $^R */
2008 if (oreplsv != GvSV(PL_replgv))
2009 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
2010 restored, the value remains
2012 restore_pos(aTHXo_ 0);
2015 /* make sure $`, $&, $', and $digit will work later */
2016 if ( !(flags & REXEC_NOT_FIRST) ) {
2017 if (RX_MATCH_COPIED(prog)) {
2018 Safefree(prog->subbeg);
2019 RX_MATCH_COPIED_off(prog);
2021 if (flags & REXEC_COPY_STR) {
2022 I32 i = PL_regeol - startpos + (stringarg - strbeg);
2024 s = savepvn(strbeg, i);
2027 RX_MATCH_COPIED_on(prog);
2030 prog->subbeg = strbeg;
2031 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
2038 if (PL_reg_eval_set)
2039 restore_pos(aTHXo_ 0);
2044 - regtry - try match at specific point
2046 STATIC I32 /* 0 failure, 1 success */
2047 S_regtry(pTHX_ regexp *prog, char *startpos)
2055 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
2058 PL_reg_eval_set = RS_init;
2060 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %i\n",
2061 PL_stack_sp - PL_stack_base);
2063 SAVEINT(cxstack[cxstack_ix].blk_oldsp);
2064 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2065 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
2067 /* Apparently this is not needed, judging by wantarray. */
2068 /* SAVEINT(cxstack[cxstack_ix].blk_gimme);
2069 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2072 /* Make $_ available to executed code. */
2073 if (PL_reg_sv != DEFSV) {
2074 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
2079 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
2080 && (mg = mg_find(PL_reg_sv, 'g')))) {
2081 /* prepare for quick setting of pos */
2082 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
2083 mg = mg_find(PL_reg_sv, 'g');
2087 PL_reg_oldpos = mg->mg_len;
2088 SAVEDESTRUCTOR(restore_pos, 0);
2091 New(22,PL_reg_curpm, 1, PMOP);
2092 PL_reg_curpm->op_pmregexp = prog;
2093 PL_reg_oldcurpm = PL_curpm;
2094 PL_curpm = PL_reg_curpm;
2095 if (RX_MATCH_COPIED(prog)) {
2096 /* Here is a serious problem: we cannot rewrite subbeg,
2097 since it may be needed if this match fails. Thus
2098 $` inside (?{}) could fail... */
2099 PL_reg_oldsaved = prog->subbeg;
2100 PL_reg_oldsavedlen = prog->sublen;
2101 RX_MATCH_COPIED_off(prog);
2104 PL_reg_oldsaved = Nullch;
2105 prog->subbeg = PL_bostr;
2106 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2108 prog->startp[0] = startpos - PL_bostr;
2109 PL_reginput = startpos;
2110 PL_regstartp = prog->startp;
2111 PL_regendp = prog->endp;
2112 PL_reglastparen = &prog->lastparen;
2113 prog->lastparen = 0;
2115 DEBUG_r(PL_reg_starttry = startpos);
2116 if (PL_reg_start_tmpl <= prog->nparens) {
2117 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2118 if(PL_reg_start_tmp)
2119 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2121 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2124 /* XXXX What this code is doing here?!!! There should be no need
2125 to do this again and again, PL_reglastparen should take care of
2129 if (prog->nparens) {
2130 for (i = prog->nparens; i >= 1; i--) {
2136 if (regmatch(prog->program + 1)) {
2137 prog->endp[0] = PL_reginput - PL_bostr;
2145 - regmatch - main matching routine
2147 * Conceptually the strategy is simple: check to see whether the current
2148 * node matches, call self recursively to see whether the rest matches,
2149 * and then act accordingly. In practice we make some effort to avoid
2150 * recursion, in particular by going through "ordinary" nodes (that don't
2151 * need to know whether the rest of the match failed) by a loop instead of
2154 /* [lwall] I've hoisted the register declarations to the outer block in order to
2155 * maybe save a little bit of pushing and popping on the stack. It also takes
2156 * advantage of machines that use a register save mask on subroutine entry.
2158 STATIC I32 /* 0 failure, 1 success */
2159 S_regmatch(pTHX_ regnode *prog)
2162 register regnode *scan; /* Current node. */
2163 regnode *next; /* Next node. */
2164 regnode *inner; /* Next node in internal branch. */
2165 register I32 nextchr; /* renamed nextchr - nextchar colides with
2166 function of same name */
2167 register I32 n; /* no or next */
2168 register I32 ln; /* len or last */
2169 register char *s; /* operand or save */
2170 register char *locinput = PL_reginput;
2171 register I32 c1, c2, paren; /* case fold search, parenth */
2172 int minmod = 0, sw = 0, logical = 0;
2177 /* Note that nextchr is a byte even in UTF */
2178 nextchr = UCHARAT(locinput);
2180 while (scan != NULL) {
2181 #define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
2183 # define sayYES goto yes
2184 # define sayNO goto no
2185 # define saySAME(x) if (x) goto yes; else goto no
2186 # define REPORT_CODE_OFF 24
2188 # define sayYES return 1
2189 # define sayNO return 0
2190 # define saySAME(x) return x
2193 SV *prop = sv_newmortal();
2194 int docolor = *PL_colors[0];
2195 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
2196 int l = (PL_regeol - locinput > taill ? taill : PL_regeol - locinput);
2197 /* The part of the string before starttry has one color
2198 (pref0_len chars), between starttry and current
2199 position another one (pref_len - pref0_len chars),
2200 after the current position the third one.
2201 We assume that pref0_len <= pref_len, otherwise we
2202 decrease pref0_len. */
2203 int pref_len = (locinput - PL_bostr > (5 + taill) - l
2204 ? (5 + taill) - l : locinput - PL_bostr);
2205 int pref0_len = pref_len - (locinput - PL_reg_starttry);
2207 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
2208 l = ( PL_regeol - locinput > (5 + taill) - pref_len
2209 ? (5 + taill) - pref_len : PL_regeol - locinput);
2212 if (pref0_len > pref_len)
2213 pref0_len = pref_len;
2214 regprop(prop, scan);
2215 PerlIO_printf(Perl_debug_log,
2216 "%4i <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3d:%*s%s\n",
2217 locinput - PL_bostr,
2218 PL_colors[4], pref0_len,
2219 locinput - pref_len, PL_colors[5],
2220 PL_colors[2], pref_len - pref0_len,
2221 locinput - pref_len + pref0_len, PL_colors[3],
2222 (docolor ? "" : "> <"),
2223 PL_colors[0], l, locinput, PL_colors[1],
2224 15 - l - pref_len + 1,
2226 scan - PL_regprogram, PL_regindent*2, "",
2230 next = scan + NEXT_OFF(scan);
2236 if (locinput == PL_bostr
2237 ? PL_regprev == '\n'
2239 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
2241 /* regtill = regbol; */
2246 if (locinput == PL_bostr
2247 ? PL_regprev == '\n'
2248 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
2254 if (locinput == PL_regbol && PL_regprev == '\n')
2258 if (locinput == PL_reg_ganch)
2268 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2273 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2275 if (PL_regeol - locinput > 1)
2279 if (PL_regeol != locinput)
2283 if (nextchr & 0x80) {
2284 locinput += PL_utf8skip[nextchr];
2285 if (locinput > PL_regeol)
2287 nextchr = UCHARAT(locinput);
2290 if (!nextchr && locinput >= PL_regeol)
2292 nextchr = UCHARAT(++locinput);
2295 if (!nextchr && locinput >= PL_regeol)
2297 nextchr = UCHARAT(++locinput);
2300 if (nextchr & 0x80) {
2301 locinput += PL_utf8skip[nextchr];
2302 if (locinput > PL_regeol)
2304 nextchr = UCHARAT(locinput);
2307 if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
2309 nextchr = UCHARAT(++locinput);
2312 if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
2314 nextchr = UCHARAT(++locinput);
2317 s = (char *) OPERAND(scan);
2319 /* Inline the first character, for speed. */
2320 if (UCHARAT(s) != nextchr)
2322 if (PL_regeol - locinput < ln)
2324 if (ln > 1 && memNE(s, locinput, ln))
2327 nextchr = UCHARAT(locinput);
2330 PL_reg_flags |= RF_tainted;
2333 s = (char *) OPERAND(scan);
2339 c1 = OP(scan) == EXACTF;
2343 if (utf8_to_uv((U8*)s, 0) != (c1 ?
2344 toLOWER_utf8((U8*)l) :
2345 toLOWER_LC_utf8((U8*)l)))
2353 nextchr = UCHARAT(locinput);
2357 /* Inline the first character, for speed. */
2358 if (UCHARAT(s) != nextchr &&
2359 UCHARAT(s) != ((OP(scan) == EXACTF)
2360 ? PL_fold : PL_fold_locale)[nextchr])
2362 if (PL_regeol - locinput < ln)
2364 if (ln > 1 && (OP(scan) == EXACTF
2365 ? ibcmp(s, locinput, ln)
2366 : ibcmp_locale(s, locinput, ln)))
2369 nextchr = UCHARAT(locinput);
2372 s = (char *) OPERAND(scan);
2373 if (!REGINCLASSUTF8(scan, (U8*)locinput))
2375 if (locinput >= PL_regeol)
2377 locinput += PL_utf8skip[nextchr];
2378 nextchr = UCHARAT(locinput);
2381 s = (char *) OPERAND(scan);
2383 nextchr = UCHARAT(locinput);
2384 if (!REGINCLASS(s, nextchr))
2386 if (!nextchr && locinput >= PL_regeol)
2388 nextchr = UCHARAT(++locinput);
2391 PL_reg_flags |= RF_tainted;
2396 if (!(OP(scan) == ALNUM
2397 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
2399 nextchr = UCHARAT(++locinput);
2402 PL_reg_flags |= RF_tainted;
2407 if (nextchr & 0x80) {
2408 if (!(OP(scan) == ALNUMUTF8
2409 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2410 : isALNUM_LC_utf8((U8*)locinput)))
2414 locinput += PL_utf8skip[nextchr];
2415 nextchr = UCHARAT(locinput);
2418 if (!(OP(scan) == ALNUMUTF8
2419 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
2421 nextchr = UCHARAT(++locinput);
2424 PL_reg_flags |= RF_tainted;
2427 if (!nextchr && locinput >= PL_regeol)
2429 if (OP(scan) == NALNUM
2430 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
2432 nextchr = UCHARAT(++locinput);
2435 PL_reg_flags |= RF_tainted;
2438 if (!nextchr && locinput >= PL_regeol)
2440 if (nextchr & 0x80) {
2441 if (OP(scan) == NALNUMUTF8
2442 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2443 : isALNUM_LC_utf8((U8*)locinput))
2447 locinput += PL_utf8skip[nextchr];
2448 nextchr = UCHARAT(locinput);
2451 if (OP(scan) == NALNUMUTF8
2452 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
2454 nextchr = UCHARAT(++locinput);
2458 PL_reg_flags |= RF_tainted;
2462 /* was last char in word? */
2463 ln = (locinput != PL_regbol) ? UCHARAT(locinput - 1) : PL_regprev;
2464 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2466 n = isALNUM(nextchr);
2469 ln = isALNUM_LC(ln);
2470 n = isALNUM_LC(nextchr);
2472 if (((!ln) == (!n)) == (OP(scan) == BOUND || OP(scan) == BOUNDL))
2477 PL_reg_flags |= RF_tainted;
2481 /* was last char in word? */
2482 ln = (locinput != PL_regbol)
2483 ? utf8_to_uv(reghop((U8*)locinput, -1), 0) : PL_regprev;
2484 if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) {
2485 ln = isALNUM_uni(ln);
2486 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
2489 ln = isALNUM_LC_uni(ln);
2490 n = isALNUM_LC_utf8((U8*)locinput);
2492 if (((!ln) == (!n)) == (OP(scan) == BOUNDUTF8 || OP(scan) == BOUNDLUTF8))
2496 PL_reg_flags |= RF_tainted;
2499 if (!nextchr && locinput >= PL_regeol)
2501 if (!(OP(scan) == SPACE
2502 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2504 nextchr = UCHARAT(++locinput);
2507 PL_reg_flags |= RF_tainted;
2510 if (!nextchr && locinput >= PL_regeol)
2512 if (nextchr & 0x80) {
2513 if (!(OP(scan) == SPACEUTF8
2514 ? swash_fetch(PL_utf8_space,(U8*)locinput)
2515 : isSPACE_LC_utf8((U8*)locinput)))
2519 locinput += PL_utf8skip[nextchr];
2520 nextchr = UCHARAT(locinput);
2523 if (!(OP(scan) == SPACEUTF8
2524 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2526 nextchr = UCHARAT(++locinput);
2529 PL_reg_flags |= RF_tainted;
2534 if (OP(scan) == SPACE
2535 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
2537 nextchr = UCHARAT(++locinput);
2540 PL_reg_flags |= RF_tainted;
2545 if (nextchr & 0x80) {
2546 if (OP(scan) == NSPACEUTF8
2547 ? swash_fetch(PL_utf8_space,(U8*)locinput)
2548 : isSPACE_LC_utf8((U8*)locinput))
2552 locinput += PL_utf8skip[nextchr];
2553 nextchr = UCHARAT(locinput);
2556 if (OP(scan) == NSPACEUTF8
2557 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
2559 nextchr = UCHARAT(++locinput);
2562 PL_reg_flags |= RF_tainted;
2565 if (!nextchr && locinput >= PL_regeol)
2567 if (!(OP(scan) == DIGIT
2568 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
2570 nextchr = UCHARAT(++locinput);
2573 PL_reg_flags |= RF_tainted;
2578 if (nextchr & 0x80) {
2579 if (OP(scan) == NDIGITUTF8
2580 ? swash_fetch(PL_utf8_digit,(U8*)locinput)
2581 : isDIGIT_LC_utf8((U8*)locinput))
2585 locinput += PL_utf8skip[nextchr];
2586 nextchr = UCHARAT(locinput);
2589 if (!isDIGIT(nextchr))
2591 nextchr = UCHARAT(++locinput);
2594 PL_reg_flags |= RF_tainted;
2599 if (OP(scan) == DIGIT
2600 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
2602 nextchr = UCHARAT(++locinput);
2605 PL_reg_flags |= RF_tainted;
2608 if (!nextchr && locinput >= PL_regeol)
2610 if (nextchr & 0x80) {
2611 if (swash_fetch(PL_utf8_digit,(U8*)locinput))
2613 locinput += PL_utf8skip[nextchr];
2614 nextchr = UCHARAT(locinput);
2617 if (isDIGIT(nextchr))
2619 nextchr = UCHARAT(++locinput);
2622 PL_reg_flags |= RF_tainted;
2627 if (!(OP(scan) == ALNUMC
2628 ? isALNUMC(nextchr) : isALNUMC_LC(nextchr)))
2630 nextchr = UCHARAT(++locinput);
2633 PL_reg_flags |= RF_tainted;
2638 if (nextchr & 0x80) {
2639 if (!(OP(scan) == ALNUMCUTF8
2640 ? swash_fetch(PL_utf8_alnumc, (U8*)locinput)
2641 : isALNUMC_LC_utf8((U8*)locinput)))
2645 locinput += PL_utf8skip[nextchr];
2646 nextchr = UCHARAT(locinput);
2649 if (!(OP(scan) == ALNUMCUTF8
2650 ? isALNUMC(nextchr) : isALNUMC_LC(nextchr)))
2652 nextchr = UCHARAT(++locinput);
2655 PL_reg_flags |= RF_tainted;
2660 if (OP(scan) == ALNUMC
2661 ? isALNUMC(nextchr) : isALNUMC_LC(nextchr))
2663 nextchr = UCHARAT(++locinput);
2666 PL_reg_flags |= RF_tainted;
2669 if (!nextchr && locinput >= PL_regeol)
2671 if (nextchr & 0x80) {
2672 if (swash_fetch(PL_utf8_alnumc,(U8*)locinput))
2674 locinput += PL_utf8skip[nextchr];
2675 nextchr = UCHARAT(locinput);
2678 if (isALNUMC(nextchr))
2680 nextchr = UCHARAT(++locinput);
2683 PL_reg_flags |= RF_tainted;
2688 if (!(OP(scan) == ALPHA
2689 ? isALPHA(nextchr) : isALPHA_LC(nextchr)))
2691 nextchr = UCHARAT(++locinput);
2694 PL_reg_flags |= RF_tainted;
2699 if (nextchr & 0x80) {
2700 if (!(OP(scan) == ALPHAUTF8
2701 ? swash_fetch(PL_utf8_alpha, (U8*)locinput)
2702 : isALPHA_LC_utf8((U8*)locinput)))
2706 locinput += PL_utf8skip[nextchr];
2707 nextchr = UCHARAT(locinput);
2710 if (!(OP(scan) == ALPHAUTF8
2711 ? isALPHA(nextchr) : isALPHA_LC(nextchr)))
2713 nextchr = UCHARAT(++locinput);
2716 PL_reg_flags |= RF_tainted;
2721 if (OP(scan) == ALPHA
2722 ? isALPHA(nextchr) : isALPHA_LC(nextchr))
2724 nextchr = UCHARAT(++locinput);
2727 PL_reg_flags |= RF_tainted;
2730 if (!nextchr && locinput >= PL_regeol)
2732 if (nextchr & 0x80) {
2733 if (swash_fetch(PL_utf8_alpha,(U8*)locinput))
2735 locinput += PL_utf8skip[nextchr];
2736 nextchr = UCHARAT(locinput);
2739 if (isALPHA(nextchr))
2741 nextchr = UCHARAT(++locinput);
2744 if (!nextchr && locinput >= PL_regeol)
2746 if (!isASCII(nextchr))
2748 nextchr = UCHARAT(++locinput);
2751 if (!nextchr && locinput >= PL_regeol)
2753 if (isASCII(nextchr))
2755 nextchr = UCHARAT(++locinput);
2758 PL_reg_flags |= RF_tainted;
2763 if (!(OP(scan) == CNTRL
2764 ? isCNTRL(nextchr) : isCNTRL_LC(nextchr)))
2766 nextchr = UCHARAT(++locinput);
2769 PL_reg_flags |= RF_tainted;
2774 if (nextchr & 0x80) {
2775 if (!(OP(scan) == CNTRLUTF8
2776 ? swash_fetch(PL_utf8_cntrl, (U8*)locinput)
2777 : isCNTRL_LC_utf8((U8*)locinput)))
2781 locinput += PL_utf8skip[nextchr];
2782 nextchr = UCHARAT(locinput);
2785 if (!(OP(scan) == CNTRLUTF8
2786 ? isCNTRL(nextchr) : isCNTRL_LC(nextchr)))
2788 nextchr = UCHARAT(++locinput);
2791 PL_reg_flags |= RF_tainted;
2796 if (OP(scan) == CNTRL
2797 ? isCNTRL(nextchr) : isCNTRL_LC(nextchr))
2799 nextchr = UCHARAT(++locinput);
2802 PL_reg_flags |= RF_tainted;
2805 if (!nextchr && locinput >= PL_regeol)
2807 if (nextchr & 0x80) {
2808 if (swash_fetch(PL_utf8_cntrl,(U8*)locinput))
2810 locinput += PL_utf8skip[nextchr];
2811 nextchr = UCHARAT(locinput);
2814 if (isCNTRL(nextchr))
2816 nextchr = UCHARAT(++locinput);
2819 PL_reg_flags |= RF_tainted;
2824 if (!(OP(scan) == GRAPH
2825 ? isGRAPH(nextchr) : isGRAPH_LC(nextchr)))
2827 nextchr = UCHARAT(++locinput);
2830 PL_reg_flags |= RF_tainted;
2835 if (nextchr & 0x80) {
2836 if (!(OP(scan) == GRAPHUTF8
2837 ? swash_fetch(PL_utf8_graph, (U8*)locinput)
2838 : isGRAPH_LC_utf8((U8*)locinput)))
2842 locinput += PL_utf8skip[nextchr];
2843 nextchr = UCHARAT(locinput);
2846 if (!(OP(scan) == GRAPHUTF8
2847 ? isGRAPH(nextchr) : isGRAPH_LC(nextchr)))
2849 nextchr = UCHARAT(++locinput);
2852 PL_reg_flags |= RF_tainted;
2857 if (OP(scan) == GRAPH
2858 ? isGRAPH(nextchr) : isGRAPH_LC(nextchr))
2860 nextchr = UCHARAT(++locinput);
2863 PL_reg_flags |= RF_tainted;
2866 if (!nextchr && locinput >= PL_regeol)
2868 if (nextchr & 0x80) {
2869 if (swash_fetch(PL_utf8_graph,(U8*)locinput))
2871 locinput += PL_utf8skip[nextchr];
2872 nextchr = UCHARAT(locinput);
2875 if (isGRAPH(nextchr))
2877 nextchr = UCHARAT(++locinput);
2880 PL_reg_flags |= RF_tainted;
2885 if (!(OP(scan) == LOWER
2886 ? isLOWER(nextchr) : isLOWER_LC(nextchr)))
2888 nextchr = UCHARAT(++locinput);
2891 PL_reg_flags |= RF_tainted;
2896 if (nextchr & 0x80) {
2897 if (!(OP(scan) == LOWERUTF8
2898 ? swash_fetch(PL_utf8_lower, (U8*)locinput)
2899 : isLOWER_LC_utf8((U8*)locinput)))
2903 locinput += PL_utf8skip[nextchr];
2904 nextchr = UCHARAT(locinput);
2907 if (!(OP(scan) == LOWERUTF8
2908 ? isLOWER(nextchr) : isLOWER_LC(nextchr)))
2910 nextchr = UCHARAT(++locinput);
2913 PL_reg_flags |= RF_tainted;
2918 if (OP(scan) == LOWER
2919 ? isLOWER(nextchr) : isLOWER_LC(nextchr))
2921 nextchr = UCHARAT(++locinput);
2924 PL_reg_flags |= RF_tainted;
2927 if (!nextchr && locinput >= PL_regeol)
2929 if (nextchr & 0x80) {
2930 if (swash_fetch(PL_utf8_lower,(U8*)locinput))
2932 locinput += PL_utf8skip[nextchr];
2933 nextchr = UCHARAT(locinput);
2936 if (isLOWER(nextchr))
2938 nextchr = UCHARAT(++locinput);
2941 PL_reg_flags |= RF_tainted;
2946 if (!(OP(scan) == PRINT
2947 ? isPRINT(nextchr) : isPRINT_LC(nextchr)))
2949 nextchr = UCHARAT(++locinput);
2952 PL_reg_flags |= RF_tainted;
2957 if (nextchr & 0x80) {
2958 if (!(OP(scan) == PRINTUTF8
2959 ? swash_fetch(PL_utf8_print, (U8*)locinput)
2960 : isPRINT_LC_utf8((U8*)locinput)))
2964 locinput += PL_utf8skip[nextchr];
2965 nextchr = UCHARAT(locinput);
2968 if (!(OP(scan) == PRINTUTF8
2969 ? isPRINT(nextchr) : isPRINT_LC(nextchr)))
2971 nextchr = UCHARAT(++locinput);
2974 PL_reg_flags |= RF_tainted;
2979 if (OP(scan) == PRINT
2980 ? isPRINT(nextchr) : isPRINT_LC(nextchr))
2982 nextchr = UCHARAT(++locinput);
2985 PL_reg_flags |= RF_tainted;
2988 if (!nextchr && locinput >= PL_regeol)
2990 if (nextchr & 0x80) {
2991 if (swash_fetch(PL_utf8_print,(U8*)locinput))
2993 locinput += PL_utf8skip[nextchr];
2994 nextchr = UCHARAT(locinput);
2997 if (isPRINT(nextchr))
2999 nextchr = UCHARAT(++locinput);
3002 PL_reg_flags |= RF_tainted;
3007 if (!(OP(scan) == PUNCT
3008 ? isPUNCT(nextchr) : isPUNCT_LC(nextchr)))
3010 nextchr = UCHARAT(++locinput);
3013 PL_reg_flags |= RF_tainted;
3018 if (nextchr & 0x80) {
3019 if (!(OP(scan) == PUNCTUTF8
3020 ? swash_fetch(PL_utf8_punct, (U8*)locinput)
3021 : isPUNCT_LC_utf8((U8*)locinput)))
3025 locinput += PL_utf8skip[nextchr];
3026 nextchr = UCHARAT(locinput);
3029 if (!(OP(scan) == PUNCTUTF8
3030 ? isPUNCT(nextchr) : isPUNCT_LC(nextchr)))
3032 nextchr = UCHARAT(++locinput);
3035 PL_reg_flags |= RF_tainted;
3040 if (OP(scan) == PUNCT
3041 ? isPUNCT(nextchr) : isPUNCT_LC(nextchr))
3043 nextchr = UCHARAT(++locinput);
3046 PL_reg_flags |= RF_tainted;
3049 if (!nextchr && locinput >= PL_regeol)
3051 if (nextchr & 0x80) {
3052 if (swash_fetch(PL_utf8_punct,(U8*)locinput))
3054 locinput += PL_utf8skip[nextchr];
3055 nextchr = UCHARAT(locinput);
3058 if (isPUNCT(nextchr))
3060 nextchr = UCHARAT(++locinput);
3063 PL_reg_flags |= RF_tainted;
3068 if (!(OP(scan) == UPPER
3069 ? isUPPER(nextchr) : isUPPER_LC(nextchr)))
3071 nextchr = UCHARAT(++locinput);
3074 PL_reg_flags |= RF_tainted;
3079 if (nextchr & 0x80) {
3080 if (!(OP(scan) == UPPERUTF8
3081 ? swash_fetch(PL_utf8_upper, (U8*)locinput)
3082 : isUPPER_LC_utf8((U8*)locinput)))
3086 locinput += PL_utf8skip[nextchr];
3087 nextchr = UCHARAT(locinput);
3090 if (!(OP(scan) == UPPERUTF8
3091 ? isUPPER(nextchr) : isUPPER_LC(nextchr)))
3093 nextchr = UCHARAT(++locinput);
3096 PL_reg_flags |= RF_tainted;
3101 if (OP(scan) == UPPER
3102 ? isUPPER(nextchr) : isUPPER_LC(nextchr))
3104 nextchr = UCHARAT(++locinput);
3107 PL_reg_flags |= RF_tainted;
3110 if (!nextchr && locinput >= PL_regeol)
3112 if (nextchr & 0x80) {
3113 if (swash_fetch(PL_utf8_upper,(U8*)locinput))
3115 locinput += PL_utf8skip[nextchr];
3116 nextchr = UCHARAT(locinput);
3119 if (isUPPER(nextchr))
3121 nextchr = UCHARAT(++locinput);
3124 if (!nextchr && locinput >= PL_regeol)
3126 if (!isXDIGIT(nextchr))
3128 nextchr = UCHARAT(++locinput);
3131 if (!nextchr && locinput >= PL_regeol)
3133 if (isXDIGIT(nextchr))
3135 nextchr = UCHARAT(++locinput);
3138 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
3140 locinput += PL_utf8skip[nextchr];
3141 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
3142 locinput += UTF8SKIP(locinput);
3143 if (locinput > PL_regeol)
3145 nextchr = UCHARAT(locinput);
3148 PL_reg_flags |= RF_tainted;
3152 n = ARG(scan); /* which paren pair */
3153 ln = PL_regstartp[n];
3154 if (*PL_reglastparen < n || ln == -1)
3155 sayNO; /* Do not match unless seen CLOSEn. */
3156 if (ln == PL_regendp[n])
3160 if (UTF && OP(scan) != REF) { /* REF can do byte comparison */
3162 char *e = PL_bostr + PL_regendp[n];
3164 * Note that we can't do the "other character" lookup trick as
3165 * in the 8-bit case (no pun intended) because in Unicode we
3166 * have to map both upper and title case to lower case.
3168 if (OP(scan) == REFF) {
3172 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
3182 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
3189 nextchr = UCHARAT(locinput);
3193 /* Inline the first character, for speed. */
3194 if (UCHARAT(s) != nextchr &&
3196 (UCHARAT(s) != ((OP(scan) == REFF
3197 ? PL_fold : PL_fold_locale)[nextchr]))))
3199 ln = PL_regendp[n] - ln;
3200 if (locinput + ln > PL_regeol)
3202 if (ln > 1 && (OP(scan) == REF
3203 ? memNE(s, locinput, ln)
3205 ? ibcmp(s, locinput, ln)
3206 : ibcmp_locale(s, locinput, ln))))
3209 nextchr = UCHARAT(locinput);
3220 OP_4tree *oop = PL_op;
3221 COP *ocurcop = PL_curcop;
3222 SV **ocurpad = PL_curpad;
3226 PL_op = (OP_4tree*)PL_regdata->data[n];
3227 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%x\n", PL_op) );
3228 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
3229 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
3231 CALLRUNOPS(aTHX); /* Scalar context. */
3237 PL_curpad = ocurpad;
3238 PL_curcop = ocurcop;
3240 if (logical == 2) { /* Postponed subexpression. */
3242 MAGIC *mg = Null(MAGIC*);
3245 CHECKPOINT cp, lastcp;
3247 if(SvROK(ret) || SvRMAGICAL(ret)) {
3248 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
3251 mg = mg_find(sv, 'r');
3254 re = (regexp *)mg->mg_obj;
3255 (void)ReREFCNT_inc(re);
3259 char *t = SvPV(ret, len);
3261 char *oprecomp = PL_regprecomp;
3262 I32 osize = PL_regsize;
3263 I32 onpar = PL_regnpar;
3266 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
3268 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
3269 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
3270 PL_regprecomp = oprecomp;
3275 PerlIO_printf(Perl_debug_log,
3276 "Entering embedded `%s%.60s%s%s'\n",
3280 (strlen(re->precomp) > 60 ? "..." : ""))
3283 state.prev = PL_reg_call_cc;
3284 state.cc = PL_regcc;
3285 state.re = PL_reg_re;
3291 cp = regcppush(0); /* Save *all* the positions. */
3294 state.ss = PL_savestack_ix;
3295 *PL_reglastparen = 0;
3296 PL_reg_call_cc = &state;
3297 PL_reginput = locinput;
3298 if (regmatch(re->program + 1)) {
3304 PerlIO_printf(Perl_debug_log,
3306 REPORT_CODE_OFF+PL_regindent*2, "")
3311 PL_reg_call_cc = state.prev;
3312 PL_regcc = state.cc;
3313 PL_reg_re = state.re;
3314 cache_re(PL_reg_re);
3321 sv_setsv(save_scalar(PL_replgv), ret);
3325 n = ARG(scan); /* which paren pair */
3326 PL_reg_start_tmp[n] = locinput;
3331 n = ARG(scan); /* which paren pair */
3332 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
3333 PL_regendp[n] = locinput - PL_bostr;
3334 if (n > *PL_reglastparen)
3335 *PL_reglastparen = n;
3338 n = ARG(scan); /* which paren pair */
3339 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
3343 next = NEXTOPER(NEXTOPER(scan));
3345 next = scan + ARG(scan);
3346 if (OP(next) == IFTHEN) /* Fake one. */
3347 next = NEXTOPER(NEXTOPER(next));
3351 logical = scan->flags;
3355 CHECKPOINT cp = PL_savestack_ix;
3357 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
3359 cc.oldcc = PL_regcc;
3361 cc.parenfloor = *PL_reglastparen;
3363 cc.min = ARG1(scan);
3364 cc.max = ARG2(scan);
3365 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
3369 PL_reginput = locinput;
3370 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
3372 PL_regcc = cc.oldcc;
3378 * This is really hard to understand, because after we match
3379 * what we're trying to match, we must make sure the rest of
3380 * the RE is going to match for sure, and to do that we have
3381 * to go back UP the parse tree by recursing ever deeper. And
3382 * if it fails, we have to reset our parent's current state
3383 * that we can try again after backing off.
3386 CHECKPOINT cp, lastcp;
3387 CURCUR* cc = PL_regcc;
3388 char *lastloc = cc->lastloc; /* Detection of 0-len. */
3390 n = cc->cur + 1; /* how many we know we matched */
3391 PL_reginput = locinput;
3394 PerlIO_printf(Perl_debug_log,
3395 "%*s %ld out of %ld..%ld cc=%lx\n",
3396 REPORT_CODE_OFF+PL_regindent*2, "",
3397 (long)n, (long)cc->min,
3398 (long)cc->max, (long)cc)
3401 /* If degenerate scan matches "", assume scan done. */
3403 if (locinput == cc->lastloc && n >= cc->min) {
3404 PL_regcc = cc->oldcc;
3407 PerlIO_printf(Perl_debug_log,
3408 "%*s empty match detected, try continuation...\n",
3409 REPORT_CODE_OFF+PL_regindent*2, "")
3411 if (regmatch(cc->next))
3414 PerlIO_printf(Perl_debug_log,
3416 REPORT_CODE_OFF+PL_regindent*2, "")
3423 /* First just match a string of min scans. */
3427 cc->lastloc = locinput;
3428 if (regmatch(cc->scan))
3431 cc->lastloc = lastloc;
3433 PerlIO_printf(Perl_debug_log,
3435 REPORT_CODE_OFF+PL_regindent*2, "")
3440 /* Prefer next over scan for minimal matching. */
3443 PL_regcc = cc->oldcc;
3445 cp = regcppush(cc->parenfloor);
3447 if (regmatch(cc->next)) {
3449 sayYES; /* All done. */
3456 if (n >= cc->max) { /* Maximum greed exceeded? */
3457 if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY
3458 && !(PL_reg_flags & RF_warned)) {
3459 PL_reg_flags |= RF_warned;
3460 Perl_warner(aTHX_ WARN_UNSAFE, "%s limit (%d) exceeded",
3461 "Complex regular subexpression recursion",
3468 PerlIO_printf(Perl_debug_log,
3469 "%*s trying longer...\n",
3470 REPORT_CODE_OFF+PL_regindent*2, "")
3472 /* Try scanning more and see if it helps. */
3473 PL_reginput = locinput;
3475 cc->lastloc = locinput;
3476 cp = regcppush(cc->parenfloor);
3478 if (regmatch(cc->scan)) {
3483 PerlIO_printf(Perl_debug_log,
3485 REPORT_CODE_OFF+PL_regindent*2, "")
3490 cc->lastloc = lastloc;
3494 /* Prefer scan over next for maximal matching. */
3496 if (n < cc->max) { /* More greed allowed? */
3497 cp = regcppush(cc->parenfloor);
3499 cc->lastloc = locinput;
3501 if (regmatch(cc->scan)) {
3506 regcppop(); /* Restore some previous $<digit>s? */
3507 PL_reginput = locinput;
3509 PerlIO_printf(Perl_debug_log,
3510 "%*s failed, try continuation...\n",
3511 REPORT_CODE_OFF+PL_regindent*2, "")
3514 if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY
3515 && !(PL_reg_flags & RF_warned)) {
3516 PL_reg_flags |= RF_warned;
3517 Perl_warner(aTHX_ WARN_UNSAFE, "%s limit (%d) exceeded",
3518 "Complex regular subexpression recursion",
3522 /* Failed deeper matches of scan, so see if this one works. */
3523 PL_regcc = cc->oldcc;
3525 if (regmatch(cc->next))
3528 PerlIO_printf(Perl_debug_log, "%*s failed...\n",
3529 REPORT_CODE_OFF+PL_regindent*2, "")
3534 cc->lastloc = lastloc;
3539 next = scan + ARG(scan);
3542 inner = NEXTOPER(NEXTOPER(scan));
3545 inner = NEXTOPER(scan);
3550 if (OP(next) != c1) /* No choice. */
3551 next = inner; /* Avoid recursion. */
3553 int lastparen = *PL_reglastparen;
3557 PL_reginput = locinput;
3558 if (regmatch(inner))
3561 for (n = *PL_reglastparen; n > lastparen; n--)
3563 *PL_reglastparen = n;
3566 if (n = (c1 == BRANCH ? NEXT_OFF(next) : ARG(next)))
3570 inner = NEXTOPER(scan);
3571 if (c1 == BRANCHJ) {
3572 inner = NEXTOPER(inner);
3574 } while (scan != NULL && OP(scan) == c1);
3588 /* We suppose that the next guy does not need
3589 backtracking: in particular, it is of constant length,
3590 and has no parenths to influence future backrefs. */
3591 ln = ARG1(scan); /* min to match */
3592 n = ARG2(scan); /* max to match */
3593 paren = scan->flags;
3595 if (paren > PL_regsize)
3597 if (paren > *PL_reglastparen)
3598 *PL_reglastparen = paren;
3600 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
3602 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3603 PL_reginput = locinput;
3606 if (ln && regrepeat_hard(scan, ln, &l) < ln)
3608 if (ln && l == 0 && n >= ln
3609 /* In fact, this is tricky. If paren, then the
3610 fact that we did/didnot match may influence
3611 future execution. */
3612 && !(paren && ln == 0))
3614 locinput = PL_reginput;
3615 if (PL_regkind[(U8)OP(next)] == EXACT) {
3616 c1 = UCHARAT(OPERAND(next) + 1);
3617 if (OP(next) == EXACTF)
3619 else if (OP(next) == EXACTFL)
3620 c2 = PL_fold_locale[c1];
3627 /* This may be improved if l == 0. */
3628 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
3629 /* If it could work, try it. */
3631 UCHARAT(PL_reginput) == c1 ||
3632 UCHARAT(PL_reginput) == c2)
3636 PL_regstartp[paren] =
3637 HOPc(PL_reginput, -l) - PL_bostr;
3638 PL_regendp[paren] = PL_reginput - PL_bostr;
3641 PL_regendp[paren] = -1;
3647 /* Couldn't or didn't -- move forward. */
3648 PL_reginput = locinput;
3649 if (regrepeat_hard(scan, 1, &l)) {
3651 locinput = PL_reginput;
3658 n = regrepeat_hard(scan, n, &l);
3659 if (n != 0 && l == 0
3660 /* In fact, this is tricky. If paren, then the
3661 fact that we did/didnot match may influence
3662 future execution. */
3663 && !(paren && ln == 0))
3665 locinput = PL_reginput;
3667 PerlIO_printf(Perl_debug_log,
3668 "%*s matched %ld times, len=%ld...\n",
3669 REPORT_CODE_OFF+PL_regindent*2, "", n, l)
3672 if (PL_regkind[(U8)OP(next)] == EXACT) {
3673 c1 = UCHARAT(OPERAND(next) + 1);
3674 if (OP(next) == EXACTF)
3676 else if (OP(next) == EXACTFL)
3677 c2 = PL_fold_locale[c1];
3686 /* If it could work, try it. */
3688 UCHARAT(PL_reginput) == c1 ||
3689 UCHARAT(PL_reginput) == c2)
3692 PerlIO_printf(Perl_debug_log,
3693 "%*s trying tail with n=%ld...\n",
3694 REPORT_CODE_OFF+PL_regindent*2, "", n)
3698 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3699 PL_regendp[paren] = PL_reginput - PL_bostr;
3702 PL_regendp[paren] = -1;
3708 /* Couldn't or didn't -- back up. */
3710 locinput = HOPc(locinput, -l);
3711 PL_reginput = locinput;
3718 paren = scan->flags; /* Which paren to set */
3719 if (paren > PL_regsize)
3721 if (paren > *PL_reglastparen)
3722 *PL_reglastparen = paren;
3723 ln = ARG1(scan); /* min to match */
3724 n = ARG2(scan); /* max to match */
3725 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
3729 ln = ARG1(scan); /* min to match */
3730 n = ARG2(scan); /* max to match */
3731 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
3736 scan = NEXTOPER(scan);
3742 scan = NEXTOPER(scan);
3746 * Lookahead to avoid useless match attempts
3747 * when we know what character comes next.
3749 if (PL_regkind[(U8)OP(next)] == EXACT) {
3750 c1 = UCHARAT(OPERAND(next) + 1);
3751 if (OP(next) == EXACTF)
3753 else if (OP(next) == EXACTFL)
3754 c2 = PL_fold_locale[c1];
3760 PL_reginput = locinput;
3764 if (ln && regrepeat(scan, ln) < ln)
3766 locinput = PL_reginput;
3769 char *e = locinput + n - ln; /* Should not check after this */
3770 char *old = locinput;
3772 if (e >= PL_regeol || (n == REG_INFTY))
3775 /* Find place 'next' could work */
3777 while (locinput <= e && *locinput != c1)
3780 while (locinput <= e
3787 /* PL_reginput == old now */
3788 if (locinput != old) {
3789 ln = 1; /* Did some */
3790 if (regrepeat(scan, locinput - old) <
3794 /* PL_reginput == locinput now */
3797 PL_regstartp[paren] = HOPc(locinput, -1) - PL_bostr;
3798 PL_regendp[paren] = locinput - PL_bostr;
3801 PL_regendp[paren] = -1;
3805 PL_reginput = locinput; /* Could be reset... */
3807 /* Couldn't or didn't -- move forward. */
3812 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
3813 /* If it could work, try it. */
3815 UCHARAT(PL_reginput) == c1 ||
3816 UCHARAT(PL_reginput) == c2)
3820 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
3821 PL_regendp[paren] = PL_reginput - PL_bostr;
3824 PL_regendp[paren] = -1;
3830 /* Couldn't or didn't -- move forward. */
3831 PL_reginput = locinput;
3832 if (regrepeat(scan, 1)) {
3834 locinput = PL_reginput;
3842 n = regrepeat(scan, n);
3843 locinput = PL_reginput;
3844 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
3845 (!PL_multiline || OP(next) == SEOL))
3846 ln = n; /* why back off? */
3850 /* If it could work, try it. */
3852 UCHARAT(PL_reginput) == c1 ||
3853 UCHARAT(PL_reginput) == c2)
3857 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
3858 PL_regendp[paren] = PL_reginput - PL_bostr;
3861 PL_regendp[paren] = -1;
3867 /* Couldn't or didn't -- back up. */
3869 PL_reginput = locinput = HOPc(locinput, -1);
3874 /* If it could work, try it. */
3876 UCHARAT(PL_reginput) == c1 ||
3877 UCHARAT(PL_reginput) == c2)
3883 /* Couldn't or didn't -- back up. */
3885 PL_reginput = locinput = HOPc(locinput, -1);
3892 if (PL_reg_call_cc) {
3893 re_cc_state *cur_call_cc = PL_reg_call_cc;
3894 CURCUR *cctmp = PL_regcc;
3895 regexp *re = PL_reg_re;
3896 CHECKPOINT cp, lastcp;
3898 cp = regcppush(0); /* Save *all* the positions. */
3900 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3902 PL_reginput = locinput; /* Make position available to
3904 cache_re(PL_reg_call_cc->re);
3905 PL_regcc = PL_reg_call_cc->cc;
3906 PL_reg_call_cc = PL_reg_call_cc->prev;
3907 if (regmatch(cur_call_cc->node)) {
3908 PL_reg_call_cc = cur_call_cc;
3914 PL_reg_call_cc = cur_call_cc;
3920 PerlIO_printf(Perl_debug_log,
3921 "%*s continuation failed...\n",
3922 REPORT_CODE_OFF+PL_regindent*2, "")
3926 if (locinput < PL_regtill)
3927 sayNO; /* Cannot match: too short. */
3930 PL_reginput = locinput; /* put where regtry can find it */
3931 sayYES; /* Success! */
3934 PL_reginput = locinput;
3939 if (UTF) { /* XXXX This is absolutely
3940 broken, we read before
3942 s = HOPMAYBEc(locinput, -scan->flags);
3948 if (locinput < PL_bostr + scan->flags)
3950 PL_reginput = locinput - scan->flags;
3955 PL_reginput = locinput;
3960 if (UTF) { /* XXXX This is absolutely
3961 broken, we read before
3963 s = HOPMAYBEc(locinput, -scan->flags);
3964 if (!s || s < PL_bostr)
3969 if (locinput < PL_bostr + scan->flags)
3971 PL_reginput = locinput - scan->flags;
3976 PL_reginput = locinput;
3979 inner = NEXTOPER(NEXTOPER(scan));
3980 if (regmatch(inner) != n) {
3995 if (OP(scan) == SUSPEND) {
3996 locinput = PL_reginput;
3997 nextchr = UCHARAT(locinput);
4002 next = scan + ARG(scan);
4007 PerlIO_printf(PerlIO_stderr(), "%lx %d\n",
4008 (unsigned long)scan, OP(scan));
4009 Perl_croak(aTHX_ "regexp memory corruption");
4015 * We get here only if there's trouble -- normally "case END" is
4016 * the terminating point.
4018 Perl_croak(aTHX_ "corrupted regexp pointers");
4036 - regrepeat - repeatedly match something simple, report how many
4039 * [This routine now assumes that it will only match on things of length 1.
4040 * That was true before, but now we assume scan - reginput is the count,
4041 * rather than incrementing count on every character. [Er, except utf8.]]
4044 S_regrepeat(pTHX_ regnode *p, I32 max)
4047 register char *scan;
4048 register char *opnd;
4050 register char *loceol = PL_regeol;
4051 register I32 hardcount = 0;
4054 if (max != REG_INFTY && max < loceol - scan)
4055 loceol = scan + max;
4056 opnd = (char *) OPERAND(p);
4059 while (scan < loceol && *scan != '\n')
4067 while (scan < loceol && *scan != '\n') {
4068 scan += UTF8SKIP(scan);
4074 while (scan < loceol) {
4075 scan += UTF8SKIP(scan);
4079 case EXACT: /* length of string is 1 */
4080 c = UCHARAT(++opnd);
4081 while (scan < loceol && UCHARAT(scan) == c)
4084 case EXACTF: /* length of string is 1 */
4085 c = UCHARAT(++opnd);
4086 while (scan < loceol &&
4087 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
4090 case EXACTFL: /* length of string is 1 */
4091 PL_reg_flags |= RF_tainted;
4092 c = UCHARAT(++opnd);
4093 while (scan < loceol &&
4094 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
4099 while (scan < loceol && REGINCLASSUTF8(p, (U8*)scan)) {
4100 scan += UTF8SKIP(scan);
4105 while (scan < loceol && REGINCLASS(opnd, *scan))
4109 while (scan < loceol && isALNUM(*scan))
4114 while (scan < loceol && swash_fetch(PL_utf8_alnum, (U8*)scan)) {
4115 scan += UTF8SKIP(scan);
4120 PL_reg_flags |= RF_tainted;
4121 while (scan < loceol && isALNUM_LC(*scan))
4125 PL_reg_flags |= RF_tainted;
4127 while (scan < loceol && isALNUM_LC_utf8((U8*)scan)) {
4128 scan += UTF8SKIP(scan);
4134 while (scan < loceol && !isALNUM(*scan))
4139 while (scan < loceol && !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
4140 scan += UTF8SKIP(scan);
4145 PL_reg_flags |= RF_tainted;
4146 while (scan < loceol && !isALNUM_LC(*scan))
4150 PL_reg_flags |= RF_tainted;
4152 while (scan < loceol && !isALNUM_LC_utf8((U8*)scan)) {
4153 scan += UTF8SKIP(scan);
4158 while (scan < loceol && isSPACE(*scan))
4163 while (scan < loceol && (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
4164 scan += UTF8SKIP(scan);
4169 PL_reg_flags |= RF_tainted;
4170 while (scan < loceol && isSPACE_LC(*scan))
4174 PL_reg_flags |= RF_tainted;
4176 while (scan < loceol && (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4177 scan += UTF8SKIP(scan);
4182 while (scan < loceol && !isSPACE(*scan))
4187 while (scan < loceol && !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
4188 scan += UTF8SKIP(scan);
4193 PL_reg_flags |= RF_tainted;
4194 while (scan < loceol && !isSPACE_LC(*scan))
4198 PL_reg_flags |= RF_tainted;
4200 while (scan < loceol && !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4201 scan += UTF8SKIP(scan);
4206 while (scan < loceol && isDIGIT(*scan))
4211 while (scan < loceol && swash_fetch(PL_utf8_digit,(U8*)scan)) {
4212 scan += UTF8SKIP(scan);
4218 while (scan < loceol && !isDIGIT(*scan))
4223 while (scan < loceol && !swash_fetch(PL_utf8_digit,(U8*)scan)) {
4224 scan += UTF8SKIP(scan);
4228 default: /* Called on something of 0 width. */
4229 break; /* So match right here or not at all. */
4235 c = scan - PL_reginput;
4240 SV *prop = sv_newmortal();
4243 PerlIO_printf(Perl_debug_log,
4244 "%*s %s can match %ld times out of %ld...\n",
4245 REPORT_CODE_OFF+1, "", SvPVX(prop),c,max);
4252 - regrepeat_hard - repeatedly match something, report total lenth and length
4254 * The repeater is supposed to have constant length.
4258 S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
4261 register char *scan;
4262 register char *start;
4263 register char *loceol = PL_regeol;
4265 I32 count = 0, res = 1;
4270 start = PL_reginput;
4272 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
4275 while (start < PL_reginput) {
4277 start += UTF8SKIP(start);
4288 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
4290 *lp = l = PL_reginput - start;
4291 if (max != REG_INFTY && l*max < loceol - scan)
4292 loceol = scan + l*max;
4305 - reginclass - determine if a character falls into a character class
4309 S_reginclass(pTHX_ register char *p, register I32 c)
4312 char flags = ANYOF_FLAGS(p);
4316 if (ANYOF_BITMAP_TEST(p, c))
4318 else if (flags & ANYOF_FOLD) {
4320 if (flags & ANYOF_LOCALE) {
4321 PL_reg_flags |= RF_tainted;
4322 cf = PL_fold_locale[c];
4326 if (ANYOF_BITMAP_TEST(p, cf))
4330 if (!match && (flags & ANYOF_CLASS)) {
4331 PL_reg_flags |= RF_tainted;
4333 (ANYOF_CLASS_TEST(p, ANYOF_ALNUM) && isALNUM_LC(c)) ||
4334 (ANYOF_CLASS_TEST(p, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
4335 (ANYOF_CLASS_TEST(p, ANYOF_SPACE) && isSPACE_LC(c)) ||
4336 (ANYOF_CLASS_TEST(p, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
4337 (ANYOF_CLASS_TEST(p, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
4338 (ANYOF_CLASS_TEST(p, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
4339 (ANYOF_CLASS_TEST(p, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
4340 (ANYOF_CLASS_TEST(p, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
4341 (ANYOF_CLASS_TEST(p, ANYOF_ALPHA) && isALPHA_LC(c)) ||
4342 (ANYOF_CLASS_TEST(p, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
4343 (ANYOF_CLASS_TEST(p, ANYOF_ASCII) && isASCII(c)) ||
4344 (ANYOF_CLASS_TEST(p, ANYOF_NASCII) && !isASCII(c)) ||
4345 (ANYOF_CLASS_TEST(p, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
4346 (ANYOF_CLASS_TEST(p, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
4347 (ANYOF_CLASS_TEST(p, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
4348 (ANYOF_CLASS_TEST(p, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
4349 (ANYOF_CLASS_TEST(p, ANYOF_LOWER) && isLOWER_LC(c)) ||
4350 (ANYOF_CLASS_TEST(p, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
4351 (ANYOF_CLASS_TEST(p, ANYOF_PRINT) && isPRINT_LC(c)) ||
4352 (ANYOF_CLASS_TEST(p, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
4353 (ANYOF_CLASS_TEST(p, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
4354 (ANYOF_CLASS_TEST(p, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
4355 (ANYOF_CLASS_TEST(p, ANYOF_UPPER) && isUPPER_LC(c)) ||
4356 (ANYOF_CLASS_TEST(p, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
4357 (ANYOF_CLASS_TEST(p, ANYOF_XDIGIT) && isXDIGIT(c)) ||
4358 (ANYOF_CLASS_TEST(p, ANYOF_NXDIGIT) && !isXDIGIT(c))
4359 ) /* How's that for a conditional? */
4365 return (flags & ANYOF_INVERT) ? !match : match;
4369 S_reginclassutf8(pTHX_ regnode *f, U8 *p)
4372 char flags = ARG1(f);
4374 SV *sv = (SV*)PL_regdata->data[ARG2(f)];
4376 if (swash_fetch(sv, p))
4378 else if (flags & ANYOF_FOLD) {
4381 if (flags & ANYOF_LOCALE) {
4382 PL_reg_flags |= RF_tainted;
4383 uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
4386 uv_to_utf8(tmpbuf, toLOWER_utf8(p));
4387 if (swash_fetch(sv, tmpbuf))
4391 /* UTF8 combined with ANYOF_CLASS is ill-defined. */
4393 return (flags & ANYOF_INVERT) ? !match : match;
4397 S_reghop(pTHX_ U8 *s, I32 off)
4401 while (off-- && s < (U8*)PL_regeol)
4406 if (s > (U8*)PL_bostr) {
4409 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
4411 } /* XXX could check well-formedness here */
4419 S_reghopmaybe(pTHX_ U8* s, I32 off)
4423 while (off-- && s < (U8*)PL_regeol)
4430 if (s > (U8*)PL_bostr) {
4433 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
4435 } /* XXX could check well-formedness here */
4452 restore_pos(pTHXo_ void *arg)
4455 if (PL_reg_eval_set) {
4456 if (PL_reg_oldsaved) {
4457 PL_reg_re->subbeg = PL_reg_oldsaved;
4458 PL_reg_re->sublen = PL_reg_oldsavedlen;
4459 RX_MATCH_COPIED_on(PL_reg_re);
4461 PL_reg_magic->mg_len = PL_reg_oldpos;
4462 PL_reg_eval_set = 0;
4463 PL_curpm = PL_reg_oldcurpm;