5 * "One Ring to rule them all, One Ring to find them..."
8 /* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
12 /* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
17 /* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
22 #ifdef PERL_EXT_RE_BUILD
23 /* need to replace pregcomp et al, so enable that */
24 # ifndef PERL_IN_XSUB_RE
25 # define PERL_IN_XSUB_RE
27 /* need access to debugger hooks */
28 # if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
33 #ifdef PERL_IN_XSUB_RE
34 /* We *really* need to overwrite these symbols: */
35 # define Perl_regexec_flags my_regexec
36 # define Perl_regdump my_regdump
37 # define Perl_regprop my_regprop
38 # define Perl_re_intuit_start my_re_intuit_start
39 /* *These* symbols are masked to allow static link. */
40 # define Perl_pregexec my_pregexec
41 # define Perl_reginitcolors my_reginitcolors
43 # define PERL_NO_GET_CONTEXT
48 * pregcomp and pregexec -- regsub and regerror are not used in perl
50 * Copyright (c) 1986 by University of Toronto.
51 * Written by Henry Spencer. Not derived from licensed software.
53 * Permission is granted to anyone to use this software for any
54 * purpose on any computer system, and to redistribute it freely,
55 * subject to the following restrictions:
57 * 1. The author is not responsible for the consequences of use of
58 * this software, no matter how awful, even if they arise
61 * 2. The origin of this software must not be misrepresented, either
62 * by explicit claim or by omission.
64 * 3. Altered versions must be plainly marked as such, and must not
65 * be misrepresented as being the original software.
67 **** Alterations to Henry's code are...
69 **** Copyright (c) 1991-1999, Larry Wall
71 **** You may distribute under the terms of either the GNU General Public
72 **** License or the Artistic License, as specified in the README file.
74 * Beware that some of this code is subtly aware of the way operator
75 * precedence is structured in regular expressions. Serious changes in
76 * regular-expression syntax might require a total rethink.
79 #define PERL_IN_REGEXEC_C
82 #ifdef PERL_IN_XSUB_RE
83 # if defined(PERL_CAPI) || defined(PERL_OBJECT)
90 #define RF_tainted 1 /* tainted information used? */
91 #define RF_warned 2 /* warned about big count? */
92 #define RF_evaled 4 /* Did an EVAL with setting? */
93 #define RF_utf8 8 /* String contains multibyte chars? */
95 #define UTF (PL_reg_flags & RF_utf8)
97 #define RS_init 1 /* eval environment created */
98 #define RS_set 2 /* replsv value is set */
101 #define STATIC static
108 #define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c) : ANYOF_BITMAP_TEST(p,c))
109 #define REGINCLASSUTF8(f,p) (ARG1(f) ? reginclassutf8(f,p) : swash_fetch((SV*)PL_regdata->data[ARG2(f)],p))
111 #define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
112 #define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
114 #define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
115 #define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
116 #define HOP(pos,off) (UTF ? reghop((U8*)pos, off) : (U8*)(pos + off))
117 #define HOPMAYBE(pos,off) (UTF ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
118 #define HOPc(pos,off) ((char*)HOP(pos,off))
119 #define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
121 static void restore_pos(pTHXo_ void *arg);
125 S_regcppush(pTHX_ I32 parenfloor)
128 int retval = PL_savestack_ix;
129 int i = (PL_regsize - parenfloor) * 4;
133 for (p = PL_regsize; p > parenfloor; p--) {
134 SSPUSHINT(PL_regendp[p]);
135 SSPUSHINT(PL_regstartp[p]);
136 SSPUSHPTR(PL_reg_start_tmp[p]);
139 SSPUSHINT(PL_regsize);
140 SSPUSHINT(*PL_reglastparen);
141 SSPUSHPTR(PL_reginput);
143 SSPUSHINT(SAVEt_REGCONTEXT);
147 /* These are needed since we do not localize EVAL nodes: */
148 # define REGCP_SET DEBUG_r(PerlIO_printf(Perl_debug_log, \
149 " Setting an EVAL scope, savestack=%i\n", \
150 PL_savestack_ix)); lastcp = PL_savestack_ix
152 # define REGCP_UNWIND DEBUG_r(lastcp != PL_savestack_ix ? \
153 PerlIO_printf(Perl_debug_log, \
154 " Clearing an EVAL scope, savestack=%i..%i\n", \
155 lastcp, PL_savestack_ix) : 0); regcpblow(lastcp)
165 assert(i == SAVEt_REGCONTEXT);
167 input = (char *) SSPOPPTR;
168 *PL_reglastparen = SSPOPINT;
169 PL_regsize = SSPOPINT;
170 for (i -= 3; i > 0; i -= 4) {
171 paren = (U32)SSPOPINT;
172 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
173 PL_regstartp[paren] = SSPOPINT;
175 if (paren <= *PL_reglastparen)
176 PL_regendp[paren] = tmps;
178 PerlIO_printf(Perl_debug_log,
179 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
180 (UV)paren, (IV)PL_regstartp[paren],
181 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
182 (IV)PL_regendp[paren],
183 (paren > *PL_reglastparen ? "(no)" : ""));
187 if (*PL_reglastparen + 1 <= PL_regnpar) {
188 PerlIO_printf(Perl_debug_log,
189 " restoring \\%d..\\%d to undef\n",
190 *PL_reglastparen + 1, PL_regnpar);
193 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
194 if (paren > PL_regsize)
195 PL_regstartp[paren] = -1;
196 PL_regendp[paren] = -1;
202 S_regcp_set_to(pTHX_ I32 ss)
205 I32 tmp = PL_savestack_ix;
207 PL_savestack_ix = ss;
209 PL_savestack_ix = tmp;
213 typedef struct re_cc_state
217 struct re_cc_state *prev;
222 #define regcpblow(cp) LEAVE_SCOPE(cp)
225 * pregexec and friends
229 - pregexec - match a regexp against a string
232 Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
233 char *strbeg, I32 minend, SV *screamer, U32 nosave)
234 /* strend: pointer to null at end of string */
235 /* strbeg: real beginning of string */
236 /* minend: end of match must be >=minend after stringarg. */
237 /* nosave: For optimizations. */
240 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
241 nosave ? 0 : REXEC_COPY_STR);
245 S_cache_re(pTHX_ regexp *prog)
248 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
250 PL_regprogram = prog->program;
252 PL_regnpar = prog->nparens;
253 PL_regdata = prog->data;
258 * Need to implement the following flags for reg_anch:
260 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
262 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
263 * INTUIT_AUTORITATIVE_ML
264 * INTUIT_ONCE_NOML - Intuit can match in one location only.
267 * Another flag for this function: SECOND_TIME (so that float substrs
268 * with giant delta may be not rechecked).
271 /* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
273 /* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
274 Otherwise, only SvCUR(sv) is used to get strbeg. */
276 /* XXXX We assume that strpos is strbeg unless sv. */
278 /* A failure to find a constant substring means that there is no need to make
279 an expensive call to REx engine, thus we celebrate a failure. Similarly,
280 finding a substring too deep into the string means that less calls to
281 regtry() should be needed.
283 REx compiler's optimizer found 4 possible hints:
284 a) Anchored substring;
286 c) Whether we are anchored (beginning-of-line or \G);
287 d) First node (of those at offset 0) which may distingush positions;
288 We use 'a', 'b', multiline-part of 'c', and try to find a position in the
289 string which does not contradict any of them.
293 Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
294 char *strend, U32 flags, re_scream_pos_data *data)
296 register I32 start_shift;
297 /* Should be nonnegative! */
298 register I32 end_shift;
304 register char *other_last = Nullch;
306 char *i_strpos = strpos;
309 DEBUG_r( if (!PL_colorset) reginitcolors() );
310 DEBUG_r(PerlIO_printf(Perl_debug_log,
311 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
312 PL_colors[4],PL_colors[5],PL_colors[0],
315 (strlen(prog->precomp) > 60 ? "..." : ""),
317 (int)(strend - strpos > 60 ? 60 : strend - strpos),
318 strpos, PL_colors[1],
319 (strend - strpos > 60 ? "..." : ""))
322 if (prog->minlen > strend - strpos) {
323 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
326 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
327 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
328 || ( (prog->reganch & ROPT_ANCH_BOL)
329 && !PL_multiline ) ); /* Check after \n? */
331 if ((prog->check_offset_min == prog->check_offset_max) && !ml_anch) {
332 /* Substring at constant offset from beg-of-str... */
335 if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
336 && (sv && (strpos + SvCUR(sv) != strend)) ) {
337 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
340 PL_regeol = strend; /* Used in HOP() */
341 s = HOPc(strpos, prog->check_offset_min);
342 if (SvTAIL(prog->check_substr)) {
343 slen = SvCUR(prog->check_substr); /* >= 1 */
345 if ( strend - s > slen || strend - s < slen - 1
346 || (strend - s == slen && strend[-1] != '\n')) {
347 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
350 /* Now should match s[0..slen-2] */
352 if (slen && (*SvPVX(prog->check_substr) != *s
354 && memNE(SvPVX(prog->check_substr), s, slen)))) {
356 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
360 else if (*SvPVX(prog->check_substr) != *s
361 || ((slen = SvCUR(prog->check_substr)) > 1
362 && memNE(SvPVX(prog->check_substr), s, slen)))
364 goto success_at_start;
366 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
368 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
369 /* Should be nonnegative! */
370 end_shift = prog->minlen - start_shift -
371 CHR_SVLEN(prog->check_substr) + (SvTAIL(prog->check_substr) != 0);
373 I32 end = prog->check_offset_max + CHR_SVLEN(prog->check_substr)
374 - (SvTAIL(prog->check_substr) != 0);
375 I32 eshift = strend - s - end;
377 if (end_shift < eshift)
381 else { /* Can match at random position */
384 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
385 /* Should be nonnegative! */
386 end_shift = prog->minlen - start_shift -
387 CHR_SVLEN(prog->check_substr) + (SvTAIL(prog->check_substr) != 0);
390 #ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
392 Perl_croak(aTHX_ "panic: end_shift");
395 check = prog->check_substr;
397 /* Find a possible match in the region s..strend by looking for
398 the "check" substring in the region corrected by start/end_shift. */
399 if (flags & REXEC_SCREAM) {
400 char *strbeg = SvPVX(sv); /* XXXX Assume PV_force() on SCREAM! */
401 I32 p = -1; /* Internal iterator of scream. */
402 I32 *pp = data ? data->scream_pos : &p;
404 if (PL_screamfirst[BmRARE(check)] >= 0
405 || ( BmRARE(check) == '\n'
406 && (BmPREVIOUS(check) == SvCUR(check) - 1)
408 s = screaminstr(sv, check,
409 start_shift + (s - strbeg), end_shift, pp, 0);
413 *data->scream_olds = s;
416 s = fbm_instr((unsigned char*)s + start_shift,
417 (unsigned char*)strend - end_shift,
418 check, PL_multiline ? FBMrf_MULTILINE : 0);
420 /* Update the count-of-usability, remove useless subpatterns,
423 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
424 (s ? "Found" : "Did not find"),
425 ((check == prog->anchored_substr) ? "anchored" : "floating"),
427 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
429 PL_colors[1], (SvTAIL(check) ? "$" : ""),
430 (s ? " at offset " : "...\n") ) );
435 /* Finish the diagnostic message */
436 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
438 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
439 Start with the other substr.
440 XXXX no SCREAM optimization yet - and a very coarse implementation
441 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
442 *always* match. Probably should be marked during compile...
443 Probably it is right to do no SCREAM here...
446 if (prog->float_substr && prog->anchored_substr) {
447 /* Take into account the "other" substring. */
448 /* XXXX May be hopelessly wrong for UTF... */
450 other_last = strpos - 1;
451 if (check == prog->float_substr) {
454 char *last = s - start_shift, *last1, *last2;
458 t = s - prog->check_offset_max;
459 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
460 && (!(prog->reganch & ROPT_UTF8)
461 || (PL_bostr = strpos, /* Used in regcopmaybe() */
462 (t = reghopmaybe_c(s, -(prog->check_offset_max)))
467 t += prog->anchored_offset;
471 last2 = last1 = strend - prog->minlen;
474 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
475 /* On end-of-str: see comment below. */
476 s = fbm_instr((unsigned char*)t,
477 (unsigned char*)last1 + prog->anchored_offset
478 + SvCUR(prog->anchored_substr)
479 - (SvTAIL(prog->anchored_substr)!=0),
480 prog->anchored_substr, PL_multiline ? FBMrf_MULTILINE : 0);
481 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s anchored substr `%s%.*s%s'%s",
482 (s ? "Found" : "Contradicts"),
484 (int)(SvCUR(prog->anchored_substr)
485 - (SvTAIL(prog->anchored_substr)!=0)),
486 SvPVX(prog->anchored_substr),
487 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
489 if (last1 >= last2) {
490 DEBUG_r(PerlIO_printf(Perl_debug_log,
491 ", giving up...\n"));
494 DEBUG_r(PerlIO_printf(Perl_debug_log,
495 ", trying floating at offset %ld...\n",
496 (long)(s1 + 1 - i_strpos)));
497 PL_regeol = strend; /* Used in HOP() */
498 other_last = last1 + prog->anchored_offset;
503 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
504 (long)(s - i_strpos)));
505 t = s - prog->anchored_offset;
514 else { /* Take into account the floating substring. */
519 last1 = last = strend - prog->minlen + prog->float_min_offset;
520 if (last - t > prog->float_max_offset)
521 last = t + prog->float_max_offset;
522 s = t + prog->float_min_offset;
525 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
526 /* fbm_instr() takes into account exact value of end-of-str
527 if the check is SvTAIL(ed). Since false positives are OK,
528 and end-of-str is not later than strend we are OK. */
529 s = fbm_instr((unsigned char*)s,
530 (unsigned char*)last + SvCUR(prog->float_substr)
531 - (SvTAIL(prog->float_substr)!=0),
532 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
533 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
534 (s ? "Found" : "Contradicts"),
536 (int)(SvCUR(prog->float_substr)
537 - (SvTAIL(prog->float_substr)!=0)),
538 SvPVX(prog->float_substr),
539 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
542 DEBUG_r(PerlIO_printf(Perl_debug_log,
543 ", giving up...\n"));
546 DEBUG_r(PerlIO_printf(Perl_debug_log,
547 ", trying anchored starting at offset %ld...\n",
548 (long)(s1 + 1 - i_strpos)));
550 PL_regeol = strend; /* Used in HOP() */
555 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
556 (long)(s - i_strpos)));
566 t = s - prog->check_offset_max;
568 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
569 && (!(prog->reganch & ROPT_UTF8)
570 || (PL_bostr = strpos, /* Used in regcopmaybe() */
571 ((t = reghopmaybe_c(s, -(prog->check_offset_max)))
574 /* Fixed substring is found far enough so that the match
575 cannot start at strpos. */
577 if (ml_anch && t[-1] != '\n') {
578 /* Eventually fbm_*() should handle this, but often
579 anchored_offset is not 0, so this check will not be wasted. */
580 /* XXXX In the code below we prefer to look for "^" even in
581 presence of anchored substrings. And we search even
582 beyond the found float position. These pessimizations
583 are historical artefacts only. */
585 while (t < strend - prog->minlen) {
587 if (t < s - prog->check_offset_min) {
588 if (prog->anchored_substr) {
589 /* We definitely contradict the found anchored
590 substr. Due to the above check we do not
591 contradict "check" substr.
592 Thus we can arrive here only if check substr
593 is float. Redo checking for "other"=="fixed".
596 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
597 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
598 goto do_other_anchored;
601 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
602 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
605 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting at offset %ld...\n",
606 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
612 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
613 PL_colors[0],PL_colors[1]));
618 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
622 /* The found string does not prohibit matching at beg-of-str
623 - no optimization of calling REx engine can be performed,
624 unless it was an MBOL and we are not after MBOL. */
626 /* Even in this situation we may use MBOL flag if strpos is offset
627 wrt the start of the string. */
629 && (strpos + SvCUR(sv) != strend) && strpos[-1] != '\n') {
633 DEBUG_r( if (ml_anch)
634 PerlIO_printf(Perl_debug_log, "Does not contradict /%s^%s/m...\n",
635 PL_colors[0],PL_colors[1]);
638 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
639 && --BmUSEFUL(prog->check_substr) < 0
640 && prog->check_substr == prog->float_substr) { /* boo */
641 /* If flags & SOMETHING - do not do it many times on the same match */
642 SvREFCNT_dec(prog->check_substr);
643 prog->check_substr = Nullsv; /* disable */
644 prog->float_substr = Nullsv; /* clear */
646 prog->reganch &= ~RE_USE_INTUIT;
652 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sGuessed:%s match at offset %ld\n",
653 PL_colors[4], PL_colors[5], (long)(s - i_strpos)) );
656 fail_finish: /* Substring not found */
657 BmUSEFUL(prog->check_substr) += 5; /* hooray */
659 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
660 PL_colors[4],PL_colors[5]));
665 - regexec_flags - match a regexp against a string
668 Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
669 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
670 /* strend: pointer to null at end of string */
671 /* strbeg: real beginning of string */
672 /* minend: end of match must be >=minend after stringarg. */
673 /* data: May be used for some additional optimizations. */
674 /* nosave: For optimizations. */
679 register char *startpos = stringarg;
681 I32 minlen; /* must match at least this many chars */
682 I32 dontbother = 0; /* how many characters not to try at end */
683 I32 start_shift = 0; /* Offset of the start to find
684 constant substr. */ /* CC */
685 I32 end_shift = 0; /* Same for the end. */ /* CC */
686 I32 scream_pos = -1; /* Internal iterator of scream. */
688 SV* oreplsv = GvSV(PL_replgv);
694 PL_regnarrate = PL_debug & 512;
698 if (prog == NULL || startpos == NULL) {
699 Perl_croak(aTHX_ "NULL regexp parameter");
703 minlen = prog->minlen;
704 if (strend - startpos < minlen) goto phooey;
706 if (startpos == strbeg) /* is ^ valid at stringarg? */
709 PL_regprev = (U32)stringarg[-1];
710 if (!PL_multiline && PL_regprev == '\n')
711 PL_regprev = '\0'; /* force ^ to NOT match */
714 /* Check validity of program. */
715 if (UCHARAT(prog->program) != REG_MAGIC) {
716 Perl_croak(aTHX_ "corrupted regexp program");
723 if (prog->reganch & ROPT_UTF8)
724 PL_reg_flags |= RF_utf8;
726 /* Mark beginning of line for ^ and lookbehind. */
727 PL_regbol = startpos;
731 /* Mark end of line for $ (and such) */
734 /* see how far we have to get to not match where we matched before */
735 PL_regtill = startpos+minend;
737 /* We start without call_cc context. */
740 /* If there is a "must appear" string, look for it. */
743 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
746 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
747 PL_reg_ganch = startpos;
748 else if (sv && SvTYPE(sv) >= SVt_PVMG
750 && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) {
751 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
752 if (prog->reganch & ROPT_ANCH_GPOS) {
753 if (s > PL_reg_ganch)
758 else /* pos() not defined */
759 PL_reg_ganch = strbeg;
762 if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
763 re_scream_pos_data d;
765 d.scream_olds = &scream_olds;
766 d.scream_pos = &scream_pos;
767 s = re_intuit_start(prog, sv, s, strend, flags, &d);
769 goto phooey; /* not present */
772 DEBUG_r( if (!PL_colorset) reginitcolors() );
773 DEBUG_r(PerlIO_printf(Perl_debug_log,
774 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
775 PL_colors[4],PL_colors[5],PL_colors[0],
778 (strlen(prog->precomp) > 60 ? "..." : ""),
780 (int)(strend - startpos > 60 ? 60 : strend - startpos),
781 startpos, PL_colors[1],
782 (strend - startpos > 60 ? "..." : ""))
785 /* Simplest case: anchored match need be tried only once. */
786 /* [unless only anchor is BOL and multiline is set] */
787 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
788 if (s == startpos && regtry(prog, startpos))
790 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
791 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
796 dontbother = minlen - 1;
797 end = HOPc(strend, -dontbother) - 1;
798 /* for multiline we only have to try after newlines */
799 if (prog->check_substr) {
808 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
816 if (*s++ == '\n') { /* don't need PL_utf8skip here */
824 } else if (prog->reganch & ROPT_ANCH_GPOS) {
825 if (regtry(prog, PL_reg_ganch))
830 /* Messy cases: unanchored match. */
831 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
832 /* we have /x+whatever/ */
833 /* it must be a one character string (XXXX Except UTF?) */
834 char ch = SvPVX(prog->anchored_substr)[0];
838 if (regtry(prog, s)) goto got_it;
840 while (s < strend && *s == ch)
849 if (regtry(prog, s)) goto got_it;
851 while (s < strend && *s == ch)
859 else if (prog->anchored_substr != Nullsv
860 || (prog->float_substr != Nullsv
861 && prog->float_max_offset < strend - s)) {
862 SV *must = prog->anchored_substr
863 ? prog->anchored_substr : prog->float_substr;
865 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
867 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
868 I32 delta = back_max - back_min;
869 char *last = HOPc(strend, /* Cannot start after this */
870 -(I32)(CHR_SVLEN(must)
871 - (SvTAIL(must) != 0) + back_min));
872 char *last1; /* Last position checked before */
877 last1 = s - 1; /* bogus */
879 /* XXXX check_substr already used to find `s', can optimize if
880 check_substr==must. */
882 dontbother = end_shift;
883 strend = HOPc(strend, -dontbother);
884 while ( (s <= last) &&
885 ((flags & REXEC_SCREAM)
886 ? (s = screaminstr(sv, must, HOPc(s, back_min) - strbeg,
887 end_shift, &scream_pos, 0))
888 : (s = fbm_instr((unsigned char*)HOP(s, back_min),
889 (unsigned char*)strend, must,
890 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
891 if (HOPc(s, -back_max) > last1) {
892 last1 = HOPc(s, -back_min);
893 s = HOPc(s, -back_max);
896 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
898 last1 = HOPc(s, -back_min);
918 else if (c = prog->regstclass) {
919 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
923 dontbother = minlen - 1;
924 strend = HOPc(strend, -dontbother); /* don't bother with what can't match */
926 /* We know what class it must start with. */
931 if (REGINCLASSUTF8(c, (U8*)s)) {
932 if (tmp && regtry(prog, s))
945 if (REGINCLASS(cc, *s)) {
946 if (tmp && regtry(prog, s))
957 PL_reg_flags |= RF_tainted;
964 tmp = (s != startpos) ? UCHARAT(s - 1) : PL_regprev;
965 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
967 if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
974 if ((minlen || tmp) && regtry(prog,s))
978 PL_reg_flags |= RF_tainted;
983 strend = reghop_c(strend, -1);
985 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : PL_regprev;
986 tmp = ((OP(c) == BOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
988 if (tmp == !(OP(c) == BOUND ?
989 swash_fetch(PL_utf8_alnum, (U8*)s) :
990 isALNUM_LC_utf8((U8*)s)))
998 if ((minlen || tmp) && regtry(prog,s))
1002 PL_reg_flags |= RF_tainted;
1009 tmp = (s != startpos) ? UCHARAT(s - 1) : PL_regprev;
1010 tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1011 while (s < strend) {
1012 if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1014 else if (regtry(prog, s))
1018 if ((minlen || !tmp) && regtry(prog,s))
1022 PL_reg_flags |= RF_tainted;
1027 strend = reghop_c(strend, -1);
1029 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : PL_regprev;
1030 tmp = ((OP(c) == NBOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
1031 while (s < strend) {
1032 if (tmp == !(OP(c) == NBOUND ?
1033 swash_fetch(PL_utf8_alnum, (U8*)s) :
1034 isALNUM_LC_utf8((U8*)s)))
1036 else if (regtry(prog, s))
1040 if ((minlen || !tmp) && regtry(prog,s))
1044 while (s < strend) {
1046 if (tmp && regtry(prog, s))
1057 while (s < strend) {
1058 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
1059 if (tmp && regtry(prog, s))
1070 PL_reg_flags |= RF_tainted;
1071 while (s < strend) {
1072 if (isALNUM_LC(*s)) {
1073 if (tmp && regtry(prog, s))
1084 PL_reg_flags |= RF_tainted;
1085 while (s < strend) {
1086 if (isALNUM_LC_utf8((U8*)s)) {
1087 if (tmp && regtry(prog, s))
1098 while (s < strend) {
1100 if (tmp && regtry(prog, s))
1111 while (s < strend) {
1112 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
1113 if (tmp && regtry(prog, s))
1124 PL_reg_flags |= RF_tainted;
1125 while (s < strend) {
1126 if (!isALNUM_LC(*s)) {
1127 if (tmp && regtry(prog, s))
1138 PL_reg_flags |= RF_tainted;
1139 while (s < strend) {
1140 if (!isALNUM_LC_utf8((U8*)s)) {
1141 if (tmp && regtry(prog, s))
1152 while (s < strend) {
1154 if (tmp && regtry(prog, s))
1165 while (s < strend) {
1166 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
1167 if (tmp && regtry(prog, s))
1178 PL_reg_flags |= RF_tainted;
1179 while (s < strend) {
1180 if (isSPACE_LC(*s)) {
1181 if (tmp && regtry(prog, s))
1192 PL_reg_flags |= RF_tainted;
1193 while (s < strend) {
1194 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1195 if (tmp && regtry(prog, s))
1206 while (s < strend) {
1208 if (tmp && regtry(prog, s))
1219 while (s < strend) {
1220 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
1221 if (tmp && regtry(prog, s))
1232 PL_reg_flags |= RF_tainted;
1233 while (s < strend) {
1234 if (!isSPACE_LC(*s)) {
1235 if (tmp && regtry(prog, s))
1246 PL_reg_flags |= RF_tainted;
1247 while (s < strend) {
1248 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1249 if (tmp && regtry(prog, s))
1260 while (s < strend) {
1262 if (tmp && regtry(prog, s))
1273 while (s < strend) {
1274 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
1275 if (tmp && regtry(prog, s))
1286 PL_reg_flags |= RF_tainted;
1287 while (s < strend) {
1288 if (isDIGIT_LC(*s)) {
1289 if (tmp && regtry(prog, s))
1300 PL_reg_flags |= RF_tainted;
1301 while (s < strend) {
1302 if (isDIGIT_LC_utf8((U8*)s)) {
1303 if (tmp && regtry(prog, s))
1314 while (s < strend) {
1316 if (tmp && regtry(prog, s))
1327 while (s < strend) {
1328 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
1329 if (tmp && regtry(prog, s))
1340 PL_reg_flags |= RF_tainted;
1341 while (s < strend) {
1342 if (!isDIGIT_LC(*s)) {
1343 if (tmp && regtry(prog, s))
1354 PL_reg_flags |= RF_tainted;
1355 while (s < strend) {
1356 if (!isDIGIT_LC_utf8((U8*)s)) {
1357 if (tmp && regtry(prog, s))
1371 if (prog->float_substr != Nullsv) { /* Trim the end. */
1373 I32 oldpos = scream_pos;
1375 if (flags & REXEC_SCREAM) {
1376 last = screaminstr(sv, prog->float_substr, s - strbeg,
1377 end_shift, &scream_pos, 1); /* last one */
1379 last = scream_olds; /* Only one occurence. */
1383 char *little = SvPV(prog->float_substr, len);
1385 if (SvTAIL(prog->float_substr)) {
1386 if (memEQ(strend - len + 1, little, len - 1))
1387 last = strend - len + 1;
1388 else if (!PL_multiline)
1389 last = memEQ(strend - len, little, len)
1390 ? strend - len : Nullch;
1396 last = rninstr(s, strend, little, little + len);
1398 last = strend; /* matching `$' */
1401 if (last == NULL) goto phooey; /* Should not happen! */
1402 dontbother = strend - last + prog->float_min_offset;
1404 if (minlen && (dontbother < minlen))
1405 dontbother = minlen - 1;
1406 strend -= dontbother; /* this one's always in bytes! */
1407 /* We don't know much -- general case. */
1410 if (regtry(prog, s))
1419 if (regtry(prog, s))
1421 } while (s++ < strend);
1429 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1431 if (PL_reg_eval_set) {
1432 /* Preserve the current value of $^R */
1433 if (oreplsv != GvSV(PL_replgv))
1434 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1435 restored, the value remains
1437 restore_pos(aTHXo_ 0);
1440 /* make sure $`, $&, $', and $digit will work later */
1441 if ( !(flags & REXEC_NOT_FIRST) ) {
1442 if (RX_MATCH_COPIED(prog)) {
1443 Safefree(prog->subbeg);
1444 RX_MATCH_COPIED_off(prog);
1446 if (flags & REXEC_COPY_STR) {
1447 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1449 s = savepvn(strbeg, i);
1452 RX_MATCH_COPIED_on(prog);
1455 prog->subbeg = strbeg;
1456 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1463 if (PL_reg_eval_set)
1464 restore_pos(aTHXo_ 0);
1469 - regtry - try match at specific point
1471 STATIC I32 /* 0 failure, 1 success */
1472 S_regtry(pTHX_ regexp *prog, char *startpos)
1480 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1483 PL_reg_eval_set = RS_init;
1485 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1486 (IV)(PL_stack_sp - PL_stack_base));
1488 SAVEINT(cxstack[cxstack_ix].blk_oldsp);
1489 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1490 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1492 /* Apparently this is not needed, judging by wantarray. */
1493 /* SAVEINT(cxstack[cxstack_ix].blk_gimme);
1494 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1497 /* Make $_ available to executed code. */
1498 if (PL_reg_sv != DEFSV) {
1499 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1504 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
1505 && (mg = mg_find(PL_reg_sv, 'g')))) {
1506 /* prepare for quick setting of pos */
1507 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
1508 mg = mg_find(PL_reg_sv, 'g');
1512 PL_reg_oldpos = mg->mg_len;
1513 SAVEDESTRUCTOR_X(restore_pos, 0);
1516 New(22,PL_reg_curpm, 1, PMOP);
1517 PL_reg_curpm->op_pmregexp = prog;
1518 PL_reg_oldcurpm = PL_curpm;
1519 PL_curpm = PL_reg_curpm;
1520 if (RX_MATCH_COPIED(prog)) {
1521 /* Here is a serious problem: we cannot rewrite subbeg,
1522 since it may be needed if this match fails. Thus
1523 $` inside (?{}) could fail... */
1524 PL_reg_oldsaved = prog->subbeg;
1525 PL_reg_oldsavedlen = prog->sublen;
1526 RX_MATCH_COPIED_off(prog);
1529 PL_reg_oldsaved = Nullch;
1530 prog->subbeg = PL_bostr;
1531 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1533 prog->startp[0] = startpos - PL_bostr;
1534 PL_reginput = startpos;
1535 PL_regstartp = prog->startp;
1536 PL_regendp = prog->endp;
1537 PL_reglastparen = &prog->lastparen;
1538 prog->lastparen = 0;
1540 DEBUG_r(PL_reg_starttry = startpos);
1541 if (PL_reg_start_tmpl <= prog->nparens) {
1542 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1543 if(PL_reg_start_tmp)
1544 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1546 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1549 /* XXXX What this code is doing here?!!! There should be no need
1550 to do this again and again, PL_reglastparen should take care of
1554 if (prog->nparens) {
1555 for (i = prog->nparens; i >= 1; i--) {
1561 if (regmatch(prog->program + 1)) {
1562 prog->endp[0] = PL_reginput - PL_bostr;
1570 - regmatch - main matching routine
1572 * Conceptually the strategy is simple: check to see whether the current
1573 * node matches, call self recursively to see whether the rest matches,
1574 * and then act accordingly. In practice we make some effort to avoid
1575 * recursion, in particular by going through "ordinary" nodes (that don't
1576 * need to know whether the rest of the match failed) by a loop instead of
1579 /* [lwall] I've hoisted the register declarations to the outer block in order to
1580 * maybe save a little bit of pushing and popping on the stack. It also takes
1581 * advantage of machines that use a register save mask on subroutine entry.
1583 STATIC I32 /* 0 failure, 1 success */
1584 S_regmatch(pTHX_ regnode *prog)
1587 register regnode *scan; /* Current node. */
1588 regnode *next; /* Next node. */
1589 regnode *inner; /* Next node in internal branch. */
1590 register I32 nextchr; /* renamed nextchr - nextchar colides with
1591 function of same name */
1592 register I32 n; /* no or next */
1593 register I32 ln; /* len or last */
1594 register char *s; /* operand or save */
1595 register char *locinput = PL_reginput;
1596 register I32 c1, c2, paren; /* case fold search, parenth */
1597 int minmod = 0, sw = 0, logical = 0;
1602 /* Note that nextchr is a byte even in UTF */
1603 nextchr = UCHARAT(locinput);
1605 while (scan != NULL) {
1606 #define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
1608 # define sayYES goto yes
1609 # define sayNO goto no
1610 # define sayYES_FINAL goto yes_final
1611 # define sayYES_LOUD goto yes_loud
1612 # define sayNO_FINAL goto no_final
1613 # define sayNO_SILENT goto do_no
1614 # define saySAME(x) if (x) goto yes; else goto no
1615 # define REPORT_CODE_OFF 24
1617 # define sayYES return 1
1618 # define sayNO return 0
1619 # define sayYES_FINAL return 1
1620 # define sayYES_LOUD return 1
1621 # define sayNO_FINAL return 0
1622 # define sayNO_SILENT return 0
1623 # define saySAME(x) return x
1626 SV *prop = sv_newmortal();
1627 int docolor = *PL_colors[0];
1628 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1629 int l = (PL_regeol - locinput > taill ? taill : PL_regeol - locinput);
1630 /* The part of the string before starttry has one color
1631 (pref0_len chars), between starttry and current
1632 position another one (pref_len - pref0_len chars),
1633 after the current position the third one.
1634 We assume that pref0_len <= pref_len, otherwise we
1635 decrease pref0_len. */
1636 int pref_len = (locinput - PL_bostr > (5 + taill) - l
1637 ? (5 + taill) - l : locinput - PL_bostr);
1638 int pref0_len = pref_len - (locinput - PL_reg_starttry);
1640 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
1641 l = ( PL_regeol - locinput > (5 + taill) - pref_len
1642 ? (5 + taill) - pref_len : PL_regeol - locinput);
1645 if (pref0_len > pref_len)
1646 pref0_len = pref_len;
1647 regprop(prop, scan);
1648 PerlIO_printf(Perl_debug_log,
1649 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
1650 (IV)(locinput - PL_bostr),
1651 PL_colors[4], pref0_len,
1652 locinput - pref_len, PL_colors[5],
1653 PL_colors[2], pref_len - pref0_len,
1654 locinput - pref_len + pref0_len, PL_colors[3],
1655 (docolor ? "" : "> <"),
1656 PL_colors[0], l, locinput, PL_colors[1],
1657 15 - l - pref_len + 1,
1659 (IV)(scan - PL_regprogram), PL_regindent*2, "",
1663 next = scan + NEXT_OFF(scan);
1669 if (locinput == PL_bostr
1670 ? PL_regprev == '\n'
1672 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1674 /* regtill = regbol; */
1679 if (locinput == PL_bostr
1680 ? PL_regprev == '\n'
1681 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1687 if (locinput == PL_regbol && PL_regprev == '\n')
1691 if (locinput == PL_reg_ganch)
1701 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
1706 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
1708 if (PL_regeol - locinput > 1)
1712 if (PL_regeol != locinput)
1716 if (nextchr & 0x80) {
1717 locinput += PL_utf8skip[nextchr];
1718 if (locinput > PL_regeol)
1720 nextchr = UCHARAT(locinput);
1723 if (!nextchr && locinput >= PL_regeol)
1725 nextchr = UCHARAT(++locinput);
1728 if (!nextchr && locinput >= PL_regeol)
1730 nextchr = UCHARAT(++locinput);
1733 if (nextchr & 0x80) {
1734 locinput += PL_utf8skip[nextchr];
1735 if (locinput > PL_regeol)
1737 nextchr = UCHARAT(locinput);
1740 if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
1742 nextchr = UCHARAT(++locinput);
1745 if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
1747 nextchr = UCHARAT(++locinput);
1752 /* Inline the first character, for speed. */
1753 if (UCHARAT(s) != nextchr)
1755 if (PL_regeol - locinput < ln)
1757 if (ln > 1 && memNE(s, locinput, ln))
1760 nextchr = UCHARAT(locinput);
1763 PL_reg_flags |= RF_tainted;
1772 c1 = OP(scan) == EXACTF;
1776 if (utf8_to_uv((U8*)s, 0) != (c1 ?
1777 toLOWER_utf8((U8*)l) :
1778 toLOWER_LC_utf8((U8*)l)))
1786 nextchr = UCHARAT(locinput);
1790 /* Inline the first character, for speed. */
1791 if (UCHARAT(s) != nextchr &&
1792 UCHARAT(s) != ((OP(scan) == EXACTF)
1793 ? PL_fold : PL_fold_locale)[nextchr])
1795 if (PL_regeol - locinput < ln)
1797 if (ln > 1 && (OP(scan) == EXACTF
1798 ? ibcmp(s, locinput, ln)
1799 : ibcmp_locale(s, locinput, ln)))
1802 nextchr = UCHARAT(locinput);
1806 if (!REGINCLASSUTF8(scan, (U8*)locinput))
1808 if (locinput >= PL_regeol)
1810 locinput += PL_utf8skip[nextchr];
1811 nextchr = UCHARAT(locinput);
1816 nextchr = UCHARAT(locinput);
1817 if (!REGINCLASS(s, nextchr))
1819 if (!nextchr && locinput >= PL_regeol)
1821 nextchr = UCHARAT(++locinput);
1824 PL_reg_flags |= RF_tainted;
1829 if (!(OP(scan) == ALNUM
1830 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
1832 nextchr = UCHARAT(++locinput);
1835 PL_reg_flags |= RF_tainted;
1840 if (nextchr & 0x80) {
1841 if (!(OP(scan) == ALNUMUTF8
1842 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
1843 : isALNUM_LC_utf8((U8*)locinput)))
1847 locinput += PL_utf8skip[nextchr];
1848 nextchr = UCHARAT(locinput);
1851 if (!(OP(scan) == ALNUMUTF8
1852 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
1854 nextchr = UCHARAT(++locinput);
1857 PL_reg_flags |= RF_tainted;
1860 if (!nextchr && locinput >= PL_regeol)
1862 if (OP(scan) == NALNUM
1863 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
1865 nextchr = UCHARAT(++locinput);
1868 PL_reg_flags |= RF_tainted;
1871 if (!nextchr && locinput >= PL_regeol)
1873 if (nextchr & 0x80) {
1874 if (OP(scan) == NALNUMUTF8
1875 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
1876 : isALNUM_LC_utf8((U8*)locinput))
1880 locinput += PL_utf8skip[nextchr];
1881 nextchr = UCHARAT(locinput);
1884 if (OP(scan) == NALNUMUTF8
1885 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
1887 nextchr = UCHARAT(++locinput);
1891 PL_reg_flags |= RF_tainted;
1895 /* was last char in word? */
1896 ln = (locinput != PL_regbol) ? UCHARAT(locinput - 1) : PL_regprev;
1897 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
1899 n = isALNUM(nextchr);
1902 ln = isALNUM_LC(ln);
1903 n = isALNUM_LC(nextchr);
1905 if (((!ln) == (!n)) == (OP(scan) == BOUND || OP(scan) == BOUNDL))
1910 PL_reg_flags |= RF_tainted;
1914 /* was last char in word? */
1915 ln = (locinput != PL_regbol)
1916 ? utf8_to_uv(reghop((U8*)locinput, -1), 0) : PL_regprev;
1917 if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) {
1918 ln = isALNUM_uni(ln);
1919 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
1922 ln = isALNUM_LC_uni(ln);
1923 n = isALNUM_LC_utf8((U8*)locinput);
1925 if (((!ln) == (!n)) == (OP(scan) == BOUNDUTF8 || OP(scan) == BOUNDLUTF8))
1929 PL_reg_flags |= RF_tainted;
1932 if (!nextchr && locinput >= PL_regeol)
1934 if (!(OP(scan) == SPACE
1935 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
1937 nextchr = UCHARAT(++locinput);
1940 PL_reg_flags |= RF_tainted;
1943 if (!nextchr && locinput >= PL_regeol)
1945 if (nextchr & 0x80) {
1946 if (!(OP(scan) == SPACEUTF8
1947 ? swash_fetch(PL_utf8_space,(U8*)locinput)
1948 : isSPACE_LC_utf8((U8*)locinput)))
1952 locinput += PL_utf8skip[nextchr];
1953 nextchr = UCHARAT(locinput);
1956 if (!(OP(scan) == SPACEUTF8
1957 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
1959 nextchr = UCHARAT(++locinput);
1962 PL_reg_flags |= RF_tainted;
1967 if (OP(scan) == SPACE
1968 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
1970 nextchr = UCHARAT(++locinput);
1973 PL_reg_flags |= RF_tainted;
1978 if (nextchr & 0x80) {
1979 if (OP(scan) == NSPACEUTF8
1980 ? swash_fetch(PL_utf8_space,(U8*)locinput)
1981 : isSPACE_LC_utf8((U8*)locinput))
1985 locinput += PL_utf8skip[nextchr];
1986 nextchr = UCHARAT(locinput);
1989 if (OP(scan) == NSPACEUTF8
1990 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
1992 nextchr = UCHARAT(++locinput);
1995 PL_reg_flags |= RF_tainted;
1998 if (!nextchr && locinput >= PL_regeol)
2000 if (!(OP(scan) == DIGIT
2001 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
2003 nextchr = UCHARAT(++locinput);
2006 PL_reg_flags |= RF_tainted;
2011 if (nextchr & 0x80) {
2012 if (OP(scan) == NDIGITUTF8
2013 ? swash_fetch(PL_utf8_digit,(U8*)locinput)
2014 : isDIGIT_LC_utf8((U8*)locinput))
2018 locinput += PL_utf8skip[nextchr];
2019 nextchr = UCHARAT(locinput);
2022 if (!isDIGIT(nextchr))
2024 nextchr = UCHARAT(++locinput);
2027 PL_reg_flags |= RF_tainted;
2032 if (OP(scan) == DIGIT
2033 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
2035 nextchr = UCHARAT(++locinput);
2038 PL_reg_flags |= RF_tainted;
2041 if (!nextchr && locinput >= PL_regeol)
2043 if (nextchr & 0x80) {
2044 if (swash_fetch(PL_utf8_digit,(U8*)locinput))
2046 locinput += PL_utf8skip[nextchr];
2047 nextchr = UCHARAT(locinput);
2050 if (isDIGIT(nextchr))
2052 nextchr = UCHARAT(++locinput);
2055 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
2057 locinput += PL_utf8skip[nextchr];
2058 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
2059 locinput += UTF8SKIP(locinput);
2060 if (locinput > PL_regeol)
2062 nextchr = UCHARAT(locinput);
2065 PL_reg_flags |= RF_tainted;
2069 n = ARG(scan); /* which paren pair */
2070 ln = PL_regstartp[n];
2071 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
2072 if (*PL_reglastparen < n || ln == -1)
2073 sayNO; /* Do not match unless seen CLOSEn. */
2074 if (ln == PL_regendp[n])
2078 if (UTF && OP(scan) != REF) { /* REF can do byte comparison */
2080 char *e = PL_bostr + PL_regendp[n];
2082 * Note that we can't do the "other character" lookup trick as
2083 * in the 8-bit case (no pun intended) because in Unicode we
2084 * have to map both upper and title case to lower case.
2086 if (OP(scan) == REFF) {
2090 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
2100 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
2107 nextchr = UCHARAT(locinput);
2111 /* Inline the first character, for speed. */
2112 if (UCHARAT(s) != nextchr &&
2114 (UCHARAT(s) != ((OP(scan) == REFF
2115 ? PL_fold : PL_fold_locale)[nextchr]))))
2117 ln = PL_regendp[n] - ln;
2118 if (locinput + ln > PL_regeol)
2120 if (ln > 1 && (OP(scan) == REF
2121 ? memNE(s, locinput, ln)
2123 ? ibcmp(s, locinput, ln)
2124 : ibcmp_locale(s, locinput, ln))))
2127 nextchr = UCHARAT(locinput);
2138 OP_4tree *oop = PL_op;
2139 COP *ocurcop = PL_curcop;
2140 SV **ocurpad = PL_curpad;
2144 PL_op = (OP_4tree*)PL_regdata->data[n];
2145 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", (UV)PL_op) );
2146 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
2147 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
2149 CALLRUNOPS(aTHX); /* Scalar context. */
2155 PL_curpad = ocurpad;
2156 PL_curcop = ocurcop;
2158 if (logical == 2) { /* Postponed subexpression. */
2160 MAGIC *mg = Null(MAGIC*);
2162 CHECKPOINT cp, lastcp;
2164 if(SvROK(ret) || SvRMAGICAL(ret)) {
2165 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2168 mg = mg_find(sv, 'r');
2171 re = (regexp *)mg->mg_obj;
2172 (void)ReREFCNT_inc(re);
2176 char *t = SvPV(ret, len);
2178 char *oprecomp = PL_regprecomp;
2179 I32 osize = PL_regsize;
2180 I32 onpar = PL_regnpar;
2183 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
2185 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
2186 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
2187 PL_regprecomp = oprecomp;
2192 PerlIO_printf(Perl_debug_log,
2193 "Entering embedded `%s%.60s%s%s'\n",
2197 (strlen(re->precomp) > 60 ? "..." : ""))
2200 state.prev = PL_reg_call_cc;
2201 state.cc = PL_regcc;
2202 state.re = PL_reg_re;
2206 cp = regcppush(0); /* Save *all* the positions. */
2209 state.ss = PL_savestack_ix;
2210 *PL_reglastparen = 0;
2211 PL_reg_call_cc = &state;
2212 PL_reginput = locinput;
2214 /* XXXX This is too dramatic a measure... */
2217 if (regmatch(re->program + 1)) {
2218 /* Even though we succeeded, we need to restore
2219 global variables, since we may be wrapped inside
2220 SUSPEND, thus the match may be not finished yet. */
2222 /* XXXX Do this only if SUSPENDed? */
2223 PL_reg_call_cc = state.prev;
2224 PL_regcc = state.cc;
2225 PL_reg_re = state.re;
2226 cache_re(PL_reg_re);
2228 /* XXXX This is too dramatic a measure... */
2231 /* These are needed even if not SUSPEND. */
2239 PL_reg_call_cc = state.prev;
2240 PL_regcc = state.cc;
2241 PL_reg_re = state.re;
2242 cache_re(PL_reg_re);
2244 /* XXXX This is too dramatic a measure... */
2253 sv_setsv(save_scalar(PL_replgv), ret);
2257 n = ARG(scan); /* which paren pair */
2258 PL_reg_start_tmp[n] = locinput;
2263 n = ARG(scan); /* which paren pair */
2264 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2265 PL_regendp[n] = locinput - PL_bostr;
2266 if (n > *PL_reglastparen)
2267 *PL_reglastparen = n;
2270 n = ARG(scan); /* which paren pair */
2271 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
2274 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
2276 next = NEXTOPER(NEXTOPER(scan));
2278 next = scan + ARG(scan);
2279 if (OP(next) == IFTHEN) /* Fake one. */
2280 next = NEXTOPER(NEXTOPER(next));
2284 logical = scan->flags;
2286 /*******************************************************************
2287 PL_regcc contains infoblock about the innermost (...)* loop, and
2288 a pointer to the next outer infoblock.
2290 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2292 1) After matching X, regnode for CURLYX is processed;
2294 2) This regnode creates infoblock on the stack, and calls
2295 regmatch() recursively with the starting point at WHILEM node;
2297 3) Each hit of WHILEM node tries to match A and Z (in the order
2298 depending on the current iteration, min/max of {min,max} and
2299 greediness). The information about where are nodes for "A"
2300 and "Z" is read from the infoblock, as is info on how many times "A"
2301 was already matched, and greediness.
2303 4) After A matches, the same WHILEM node is hit again.
2305 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2306 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2307 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2308 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2309 of the external loop.
2311 Currently present infoblocks form a tree with a stem formed by PL_curcc
2312 and whatever it mentions via ->next, and additional attached trees
2313 corresponding to temporarily unset infoblocks as in "5" above.
2315 In the following picture infoblocks for outer loop of
2316 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2317 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2318 infoblocks are drawn below the "reset" infoblock.
2320 In fact in the picture below we do not show failed matches for Z and T
2321 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2322 more obvious *why* one needs to *temporary* unset infoblocks.]
2324 Matched REx position InfoBlocks Comment
2328 Y A)*?Z)*?T x <- O <- I
2329 YA )*?Z)*?T x <- O <- I
2330 YA A)*?Z)*?T x <- O <- I
2331 YAA )*?Z)*?T x <- O <- I
2332 YAA Z)*?T x <- O # Temporary unset I
2335 YAAZ Y(A)*?Z)*?T x <- O
2338 YAAZY (A)*?Z)*?T x <- O
2341 YAAZY A)*?Z)*?T x <- O <- I
2344 YAAZYA )*?Z)*?T x <- O <- I
2347 YAAZYA Z)*?T x <- O # Temporary unset I
2353 YAAZYAZ T x # Temporary unset O
2360 *******************************************************************/
2363 CHECKPOINT cp = PL_savestack_ix;
2365 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2367 cc.oldcc = PL_regcc;
2369 cc.parenfloor = *PL_reglastparen;
2371 cc.min = ARG1(scan);
2372 cc.max = ARG2(scan);
2373 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
2377 PL_reginput = locinput;
2378 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2380 PL_regcc = cc.oldcc;
2386 * This is really hard to understand, because after we match
2387 * what we're trying to match, we must make sure the rest of
2388 * the REx is going to match for sure, and to do that we have
2389 * to go back UP the parse tree by recursing ever deeper. And
2390 * if it fails, we have to reset our parent's current state
2391 * that we can try again after backing off.
2394 CHECKPOINT cp, lastcp;
2395 CURCUR* cc = PL_regcc;
2396 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2398 n = cc->cur + 1; /* how many we know we matched */
2399 PL_reginput = locinput;
2402 PerlIO_printf(Perl_debug_log,
2403 "%*s %ld out of %ld..%ld cc=%lx\n",
2404 REPORT_CODE_OFF+PL_regindent*2, "",
2405 (long)n, (long)cc->min,
2406 (long)cc->max, (long)cc)
2409 /* If degenerate scan matches "", assume scan done. */
2411 if (locinput == cc->lastloc && n >= cc->min) {
2412 PL_regcc = cc->oldcc;
2416 PerlIO_printf(Perl_debug_log,
2417 "%*s empty match detected, try continuation...\n",
2418 REPORT_CODE_OFF+PL_regindent*2, "")
2420 if (regmatch(cc->next))
2428 /* First just match a string of min scans. */
2432 cc->lastloc = locinput;
2433 if (regmatch(cc->scan))
2436 cc->lastloc = lastloc;
2441 /* Check whether we already were at this position.
2442 Postpone detection until we know the match is not
2443 *that* much linear. */
2444 if (!PL_reg_maxiter) {
2445 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2446 PL_reg_leftiter = PL_reg_maxiter;
2448 if (PL_reg_leftiter-- == 0) {
2449 I32 size = (PL_reg_maxiter + 7)/8;
2450 if (PL_reg_poscache) {
2451 if (PL_reg_poscache_size < size) {
2452 Renew(PL_reg_poscache, size, char);
2453 PL_reg_poscache_size = size;
2455 Zero(PL_reg_poscache, size, char);
2458 PL_reg_poscache_size = size;
2459 Newz(29, PL_reg_poscache, size, char);
2462 PerlIO_printf(Perl_debug_log,
2463 "%sDetected a super-linear match, switching on caching%s...\n",
2464 PL_colors[4], PL_colors[5])
2467 if (PL_reg_leftiter < 0) {
2468 I32 o = locinput - PL_bostr, b;
2470 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2473 if (PL_reg_poscache[o] & (1<<b)) {
2475 PerlIO_printf(Perl_debug_log,
2476 "%*s already tried at this position...\n",
2477 REPORT_CODE_OFF+PL_regindent*2, "")
2481 PL_reg_poscache[o] |= (1<<b);
2485 /* Prefer next over scan for minimal matching. */
2488 PL_regcc = cc->oldcc;
2491 cp = regcppush(cc->parenfloor);
2493 if (regmatch(cc->next)) {
2495 sayYES; /* All done. */
2503 if (n >= cc->max) { /* Maximum greed exceeded? */
2504 if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY
2505 && !(PL_reg_flags & RF_warned)) {
2506 PL_reg_flags |= RF_warned;
2507 Perl_warner(aTHX_ WARN_UNSAFE, "%s limit (%d) exceeded",
2508 "Complex regular subexpression recursion",
2515 PerlIO_printf(Perl_debug_log,
2516 "%*s trying longer...\n",
2517 REPORT_CODE_OFF+PL_regindent*2, "")
2519 /* Try scanning more and see if it helps. */
2520 PL_reginput = locinput;
2522 cc->lastloc = locinput;
2523 cp = regcppush(cc->parenfloor);
2525 if (regmatch(cc->scan)) {
2532 cc->lastloc = lastloc;
2536 /* Prefer scan over next for maximal matching. */
2538 if (n < cc->max) { /* More greed allowed? */
2539 cp = regcppush(cc->parenfloor);
2541 cc->lastloc = locinput;
2543 if (regmatch(cc->scan)) {
2548 regcppop(); /* Restore some previous $<digit>s? */
2549 PL_reginput = locinput;
2551 PerlIO_printf(Perl_debug_log,
2552 "%*s failed, try continuation...\n",
2553 REPORT_CODE_OFF+PL_regindent*2, "")
2556 if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY
2557 && !(PL_reg_flags & RF_warned)) {
2558 PL_reg_flags |= RF_warned;
2559 Perl_warner(aTHX_ WARN_UNSAFE, "%s limit (%d) exceeded",
2560 "Complex regular subexpression recursion",
2564 /* Failed deeper matches of scan, so see if this one works. */
2565 PL_regcc = cc->oldcc;
2568 if (regmatch(cc->next))
2574 cc->lastloc = lastloc;
2579 next = scan + ARG(scan);
2582 inner = NEXTOPER(NEXTOPER(scan));
2585 inner = NEXTOPER(scan);
2590 if (OP(next) != c1) /* No choice. */
2591 next = inner; /* Avoid recursion. */
2593 int lastparen = *PL_reglastparen;
2597 PL_reginput = locinput;
2598 if (regmatch(inner))
2601 for (n = *PL_reglastparen; n > lastparen; n--)
2603 *PL_reglastparen = n;
2606 if (n = (c1 == BRANCH ? NEXT_OFF(next) : ARG(next)))
2610 inner = NEXTOPER(scan);
2611 if (c1 == BRANCHJ) {
2612 inner = NEXTOPER(inner);
2614 } while (scan != NULL && OP(scan) == c1);
2628 /* We suppose that the next guy does not need
2629 backtracking: in particular, it is of constant length,
2630 and has no parenths to influence future backrefs. */
2631 ln = ARG1(scan); /* min to match */
2632 n = ARG2(scan); /* max to match */
2633 paren = scan->flags;
2635 if (paren > PL_regsize)
2637 if (paren > *PL_reglastparen)
2638 *PL_reglastparen = paren;
2640 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
2642 scan += NEXT_OFF(scan); /* Skip former OPEN. */
2643 PL_reginput = locinput;
2646 if (ln && regrepeat_hard(scan, ln, &l) < ln)
2648 if (ln && l == 0 && n >= ln
2649 /* In fact, this is tricky. If paren, then the
2650 fact that we did/didnot match may influence
2651 future execution. */
2652 && !(paren && ln == 0))
2654 locinput = PL_reginput;
2655 if (PL_regkind[(U8)OP(next)] == EXACT) {
2656 c1 = (U8)*STRING(next);
2657 if (OP(next) == EXACTF)
2659 else if (OP(next) == EXACTFL)
2660 c2 = PL_fold_locale[c1];
2667 /* This may be improved if l == 0. */
2668 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
2669 /* If it could work, try it. */
2671 UCHARAT(PL_reginput) == c1 ||
2672 UCHARAT(PL_reginput) == c2)
2676 PL_regstartp[paren] =
2677 HOPc(PL_reginput, -l) - PL_bostr;
2678 PL_regendp[paren] = PL_reginput - PL_bostr;
2681 PL_regendp[paren] = -1;
2687 /* Couldn't or didn't -- move forward. */
2688 PL_reginput = locinput;
2689 if (regrepeat_hard(scan, 1, &l)) {
2691 locinput = PL_reginput;
2698 n = regrepeat_hard(scan, n, &l);
2699 if (n != 0 && l == 0
2700 /* In fact, this is tricky. If paren, then the
2701 fact that we did/didnot match may influence
2702 future execution. */
2703 && !(paren && ln == 0))
2705 locinput = PL_reginput;
2707 PerlIO_printf(Perl_debug_log,
2708 "%*s matched %d times, len=%"IVdf"...\n",
2709 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
2713 if (PL_regkind[(U8)OP(next)] == EXACT) {
2714 c1 = (U8)*STRING(next);
2715 if (OP(next) == EXACTF)
2717 else if (OP(next) == EXACTFL)
2718 c2 = PL_fold_locale[c1];
2727 /* If it could work, try it. */
2729 UCHARAT(PL_reginput) == c1 ||
2730 UCHARAT(PL_reginput) == c2)
2733 PerlIO_printf(Perl_debug_log,
2734 "%*s trying tail with n=%"IVdf"...\n",
2735 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
2739 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
2740 PL_regendp[paren] = PL_reginput - PL_bostr;
2743 PL_regendp[paren] = -1;
2749 /* Couldn't or didn't -- back up. */
2751 locinput = HOPc(locinput, -l);
2752 PL_reginput = locinput;
2759 paren = scan->flags; /* Which paren to set */
2760 if (paren > PL_regsize)
2762 if (paren > *PL_reglastparen)
2763 *PL_reglastparen = paren;
2764 ln = ARG1(scan); /* min to match */
2765 n = ARG2(scan); /* max to match */
2766 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
2770 ln = ARG1(scan); /* min to match */
2771 n = ARG2(scan); /* max to match */
2772 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
2777 scan = NEXTOPER(scan);
2783 scan = NEXTOPER(scan);
2787 * Lookahead to avoid useless match attempts
2788 * when we know what character comes next.
2790 if (PL_regkind[(U8)OP(next)] == EXACT) {
2791 c1 = (U8)*STRING(next);
2792 if (OP(next) == EXACTF)
2794 else if (OP(next) == EXACTFL)
2795 c2 = PL_fold_locale[c1];
2801 PL_reginput = locinput;
2805 if (ln && regrepeat(scan, ln) < ln)
2807 locinput = PL_reginput;
2810 char *e = locinput + n - ln; /* Should not check after this */
2811 char *old = locinput;
2813 if (e >= PL_regeol || (n == REG_INFTY))
2816 /* Find place 'next' could work */
2818 while (locinput <= e && *locinput != c1)
2821 while (locinput <= e
2828 /* PL_reginput == old now */
2829 if (locinput != old) {
2830 ln = 1; /* Did some */
2831 if (regrepeat(scan, locinput - old) <
2835 /* PL_reginput == locinput now */
2838 PL_regstartp[paren] = HOPc(locinput, -1) - PL_bostr;
2839 PL_regendp[paren] = locinput - PL_bostr;
2842 PL_regendp[paren] = -1;
2846 PL_reginput = locinput; /* Could be reset... */
2848 /* Couldn't or didn't -- move forward. */
2853 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
2854 /* If it could work, try it. */
2856 UCHARAT(PL_reginput) == c1 ||
2857 UCHARAT(PL_reginput) == c2)
2861 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
2862 PL_regendp[paren] = PL_reginput - PL_bostr;
2865 PL_regendp[paren] = -1;
2871 /* Couldn't or didn't -- move forward. */
2872 PL_reginput = locinput;
2873 if (regrepeat(scan, 1)) {
2875 locinput = PL_reginput;
2883 n = regrepeat(scan, n);
2884 locinput = PL_reginput;
2885 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
2886 (!PL_multiline || OP(next) == SEOL))
2887 ln = n; /* why back off? */
2891 /* If it could work, try it. */
2893 UCHARAT(PL_reginput) == c1 ||
2894 UCHARAT(PL_reginput) == c2)
2898 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
2899 PL_regendp[paren] = PL_reginput - PL_bostr;
2902 PL_regendp[paren] = -1;
2908 /* Couldn't or didn't -- back up. */
2910 PL_reginput = locinput = HOPc(locinput, -1);
2915 /* If it could work, try it. */
2917 UCHARAT(PL_reginput) == c1 ||
2918 UCHARAT(PL_reginput) == c2)
2924 /* Couldn't or didn't -- back up. */
2926 PL_reginput = locinput = HOPc(locinput, -1);
2933 if (PL_reg_call_cc) {
2934 re_cc_state *cur_call_cc = PL_reg_call_cc;
2935 CURCUR *cctmp = PL_regcc;
2936 regexp *re = PL_reg_re;
2937 CHECKPOINT cp, lastcp;
2939 cp = regcppush(0); /* Save *all* the positions. */
2941 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
2943 PL_reginput = locinput; /* Make position available to
2945 cache_re(PL_reg_call_cc->re);
2946 PL_regcc = PL_reg_call_cc->cc;
2947 PL_reg_call_cc = PL_reg_call_cc->prev;
2948 if (regmatch(cur_call_cc->node)) {
2949 PL_reg_call_cc = cur_call_cc;
2955 PL_reg_call_cc = cur_call_cc;
2961 PerlIO_printf(Perl_debug_log,
2962 "%*s continuation failed...\n",
2963 REPORT_CODE_OFF+PL_regindent*2, "")
2967 if (locinput < PL_regtill) {
2968 DEBUG_r(PerlIO_printf(Perl_debug_log,
2969 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
2971 (long)(locinput - PL_reg_starttry),
2972 (long)(PL_regtill - PL_reg_starttry),
2974 sayNO_FINAL; /* Cannot match: too short. */
2976 PL_reginput = locinput; /* put where regtry can find it */
2977 sayYES_FINAL; /* Success! */
2979 PL_reginput = locinput; /* put where regtry can find it */
2980 sayYES_LOUD; /* Success! */
2983 PL_reginput = locinput;
2988 if (UTF) { /* XXXX This is absolutely
2989 broken, we read before
2991 s = HOPMAYBEc(locinput, -scan->flags);
2997 if (locinput < PL_bostr + scan->flags)
2999 PL_reginput = locinput - scan->flags;
3004 PL_reginput = locinput;
3009 if (UTF) { /* XXXX This is absolutely
3010 broken, we read before
3012 s = HOPMAYBEc(locinput, -scan->flags);
3013 if (!s || s < PL_bostr)
3018 if (locinput < PL_bostr + scan->flags)
3020 PL_reginput = locinput - scan->flags;
3025 PL_reginput = locinput;
3028 inner = NEXTOPER(NEXTOPER(scan));
3029 if (regmatch(inner) != n) {
3044 if (OP(scan) == SUSPEND) {
3045 locinput = PL_reginput;
3046 nextchr = UCHARAT(locinput);
3051 next = scan + ARG(scan);
3056 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
3057 (UV)scan, OP(scan));
3058 Perl_croak(aTHX_ "regexp memory corruption");
3064 * We get here only if there's trouble -- normally "case END" is
3065 * the terminating point.
3067 Perl_croak(aTHX_ "corrupted regexp pointers");
3073 PerlIO_printf(Perl_debug_log,
3074 "%*s %scould match...%s\n",
3075 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3079 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3080 PL_colors[4],PL_colors[5]));
3089 PerlIO_printf(Perl_debug_log,
3090 "%*s %sfailed...%s\n",
3091 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3103 - regrepeat - repeatedly match something simple, report how many
3106 * [This routine now assumes that it will only match on things of length 1.
3107 * That was true before, but now we assume scan - reginput is the count,
3108 * rather than incrementing count on every character. [Er, except utf8.]]
3111 S_regrepeat(pTHX_ regnode *p, I32 max)
3114 register char *scan;
3115 register char *opnd;
3117 register char *loceol = PL_regeol;
3118 register I32 hardcount = 0;
3121 if (max != REG_INFTY && max < loceol - scan)
3122 loceol = scan + max;
3125 while (scan < loceol && *scan != '\n')
3133 while (scan < loceol && *scan != '\n') {
3134 scan += UTF8SKIP(scan);
3140 while (scan < loceol) {
3141 scan += UTF8SKIP(scan);
3145 case EXACT: /* length of string is 1 */
3147 while (scan < loceol && UCHARAT(scan) == c)
3150 case EXACTF: /* length of string is 1 */
3152 while (scan < loceol &&
3153 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
3156 case EXACTFL: /* length of string is 1 */
3157 PL_reg_flags |= RF_tainted;
3159 while (scan < loceol &&
3160 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
3165 while (scan < loceol && REGINCLASSUTF8(p, (U8*)scan)) {
3166 scan += UTF8SKIP(scan);
3172 while (scan < loceol && REGINCLASS(opnd, *scan))
3176 while (scan < loceol && isALNUM(*scan))
3181 while (scan < loceol && swash_fetch(PL_utf8_alnum, (U8*)scan)) {
3182 scan += UTF8SKIP(scan);
3187 PL_reg_flags |= RF_tainted;
3188 while (scan < loceol && isALNUM_LC(*scan))
3192 PL_reg_flags |= RF_tainted;
3194 while (scan < loceol && isALNUM_LC_utf8((U8*)scan)) {
3195 scan += UTF8SKIP(scan);
3201 while (scan < loceol && !isALNUM(*scan))
3206 while (scan < loceol && !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
3207 scan += UTF8SKIP(scan);
3212 PL_reg_flags |= RF_tainted;
3213 while (scan < loceol && !isALNUM_LC(*scan))
3217 PL_reg_flags |= RF_tainted;
3219 while (scan < loceol && !isALNUM_LC_utf8((U8*)scan)) {
3220 scan += UTF8SKIP(scan);
3225 while (scan < loceol && isSPACE(*scan))
3230 while (scan < loceol && (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
3231 scan += UTF8SKIP(scan);
3236 PL_reg_flags |= RF_tainted;
3237 while (scan < loceol && isSPACE_LC(*scan))
3241 PL_reg_flags |= RF_tainted;
3243 while (scan < loceol && (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3244 scan += UTF8SKIP(scan);
3249 while (scan < loceol && !isSPACE(*scan))
3254 while (scan < loceol && !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
3255 scan += UTF8SKIP(scan);
3260 PL_reg_flags |= RF_tainted;
3261 while (scan < loceol && !isSPACE_LC(*scan))
3265 PL_reg_flags |= RF_tainted;
3267 while (scan < loceol && !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3268 scan += UTF8SKIP(scan);
3273 while (scan < loceol && isDIGIT(*scan))
3278 while (scan < loceol && swash_fetch(PL_utf8_digit,(U8*)scan)) {
3279 scan += UTF8SKIP(scan);
3285 while (scan < loceol && !isDIGIT(*scan))
3290 while (scan < loceol && !swash_fetch(PL_utf8_digit,(U8*)scan)) {
3291 scan += UTF8SKIP(scan);
3295 default: /* Called on something of 0 width. */
3296 break; /* So match right here or not at all. */
3302 c = scan - PL_reginput;
3307 SV *prop = sv_newmortal();
3310 PerlIO_printf(Perl_debug_log,
3311 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
3312 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
3319 - regrepeat_hard - repeatedly match something, report total lenth and length
3321 * The repeater is supposed to have constant length.
3325 S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
3328 register char *scan;
3329 register char *start;
3330 register char *loceol = PL_regeol;
3332 I32 count = 0, res = 1;
3337 start = PL_reginput;
3339 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
3342 while (start < PL_reginput) {
3344 start += UTF8SKIP(start);
3355 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
3357 *lp = l = PL_reginput - start;
3358 if (max != REG_INFTY && l*max < loceol - scan)
3359 loceol = scan + l*max;
3372 - reginclass - determine if a character falls into a character class
3376 S_reginclass(pTHX_ register char *p, register I32 c)
3379 char flags = ANYOF_FLAGS(p);
3383 if (ANYOF_BITMAP_TEST(p, c))
3385 else if (flags & ANYOF_FOLD) {
3387 if (flags & ANYOF_LOCALE) {
3388 PL_reg_flags |= RF_tainted;
3389 cf = PL_fold_locale[c];
3393 if (ANYOF_BITMAP_TEST(p, cf))
3397 if (!match && (flags & ANYOF_CLASS)) {
3398 PL_reg_flags |= RF_tainted;
3400 (ANYOF_CLASS_TEST(p, ANYOF_ALNUM) && isALNUM_LC(c)) ||
3401 (ANYOF_CLASS_TEST(p, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
3402 (ANYOF_CLASS_TEST(p, ANYOF_SPACE) && isSPACE_LC(c)) ||
3403 (ANYOF_CLASS_TEST(p, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
3404 (ANYOF_CLASS_TEST(p, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
3405 (ANYOF_CLASS_TEST(p, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
3406 (ANYOF_CLASS_TEST(p, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
3407 (ANYOF_CLASS_TEST(p, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
3408 (ANYOF_CLASS_TEST(p, ANYOF_ALPHA) && isALPHA_LC(c)) ||
3409 (ANYOF_CLASS_TEST(p, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
3410 (ANYOF_CLASS_TEST(p, ANYOF_ASCII) && isASCII(c)) ||
3411 (ANYOF_CLASS_TEST(p, ANYOF_NASCII) && !isASCII(c)) ||
3412 (ANYOF_CLASS_TEST(p, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
3413 (ANYOF_CLASS_TEST(p, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
3414 (ANYOF_CLASS_TEST(p, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
3415 (ANYOF_CLASS_TEST(p, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
3416 (ANYOF_CLASS_TEST(p, ANYOF_LOWER) && isLOWER_LC(c)) ||
3417 (ANYOF_CLASS_TEST(p, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
3418 (ANYOF_CLASS_TEST(p, ANYOF_PRINT) && isPRINT_LC(c)) ||
3419 (ANYOF_CLASS_TEST(p, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
3420 (ANYOF_CLASS_TEST(p, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
3421 (ANYOF_CLASS_TEST(p, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
3422 (ANYOF_CLASS_TEST(p, ANYOF_UPPER) && isUPPER_LC(c)) ||
3423 (ANYOF_CLASS_TEST(p, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
3424 (ANYOF_CLASS_TEST(p, ANYOF_XDIGIT) && isXDIGIT(c)) ||
3425 (ANYOF_CLASS_TEST(p, ANYOF_NXDIGIT) && !isXDIGIT(c))
3426 ) /* How's that for a conditional? */
3432 return (flags & ANYOF_INVERT) ? !match : match;
3436 S_reginclassutf8(pTHX_ regnode *f, U8 *p)
3439 char flags = ARG1(f);
3441 SV *sv = (SV*)PL_regdata->data[ARG2(f)];
3443 if (swash_fetch(sv, p))
3445 else if (flags & ANYOF_FOLD) {
3448 if (flags & ANYOF_LOCALE) {
3449 PL_reg_flags |= RF_tainted;
3450 uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
3453 uv_to_utf8(tmpbuf, toLOWER_utf8(p));
3454 if (swash_fetch(sv, tmpbuf))
3458 /* UTF8 combined with ANYOF_CLASS is ill-defined. */
3460 return (flags & ANYOF_INVERT) ? !match : match;
3464 S_reghop(pTHX_ U8 *s, I32 off)
3468 while (off-- && s < (U8*)PL_regeol)
3473 if (s > (U8*)PL_bostr) {
3476 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3478 } /* XXX could check well-formedness here */
3486 S_reghopmaybe(pTHX_ U8* s, I32 off)
3490 while (off-- && s < (U8*)PL_regeol)
3497 if (s > (U8*)PL_bostr) {
3500 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3502 } /* XXX could check well-formedness here */
3519 restore_pos(pTHXo_ void *arg)
3522 if (PL_reg_eval_set) {
3523 if (PL_reg_oldsaved) {
3524 PL_reg_re->subbeg = PL_reg_oldsaved;
3525 PL_reg_re->sublen = PL_reg_oldsavedlen;
3526 RX_MATCH_COPIED_on(PL_reg_re);
3528 PL_reg_magic->mg_len = PL_reg_oldpos;
3529 PL_reg_eval_set = 0;
3530 PL_curpm = PL_reg_oldcurpm;