5 * "One Ring to rule them all, One Ring to find them..."
8 /* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
12 /* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
17 /* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
22 #ifdef PERL_EXT_RE_BUILD
23 /* need to replace pregcomp et al, so enable that */
24 # ifndef PERL_IN_XSUB_RE
25 # define PERL_IN_XSUB_RE
27 /* need access to debugger hooks */
28 # if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
33 #ifdef PERL_IN_XSUB_RE
34 /* We *really* need to overwrite these symbols: */
35 # define Perl_regexec_flags my_regexec
36 # define Perl_regdump my_regdump
37 # define Perl_regprop my_regprop
38 # define Perl_re_intuit_start my_re_intuit_start
39 /* *These* symbols are masked to allow static link. */
40 # define Perl_pregexec my_pregexec
41 # define Perl_reginitcolors my_reginitcolors
43 # define PERL_NO_GET_CONTEXT
48 * pregcomp and pregexec -- regsub and regerror are not used in perl
50 * Copyright (c) 1986 by University of Toronto.
51 * Written by Henry Spencer. Not derived from licensed software.
53 * Permission is granted to anyone to use this software for any
54 * purpose on any computer system, and to redistribute it freely,
55 * subject to the following restrictions:
57 * 1. The author is not responsible for the consequences of use of
58 * this software, no matter how awful, even if they arise
61 * 2. The origin of this software must not be misrepresented, either
62 * by explicit claim or by omission.
64 * 3. Altered versions must be plainly marked as such, and must not
65 * be misrepresented as being the original software.
67 **** Alterations to Henry's code are...
69 **** Copyright (c) 1991-1999, Larry Wall
71 **** You may distribute under the terms of either the GNU General Public
72 **** License or the Artistic License, as specified in the README file.
74 * Beware that some of this code is subtly aware of the way operator
75 * precedence is structured in regular expressions. Serious changes in
76 * regular-expression syntax might require a total rethink.
79 #define PERL_IN_REGEXEC_C
82 #ifdef PERL_IN_XSUB_RE
83 # if defined(PERL_CAPI) || defined(PERL_OBJECT)
90 #define RF_tainted 1 /* tainted information used? */
91 #define RF_warned 2 /* warned about big count? */
92 #define RF_evaled 4 /* Did an EVAL with setting? */
93 #define RF_utf8 8 /* String contains multibyte chars? */
95 #define UTF (PL_reg_flags & RF_utf8)
97 #define RS_init 1 /* eval environment created */
98 #define RS_set 2 /* replsv value is set */
101 #define STATIC static
108 #define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c) : ANYOF_BITMAP_TEST(p,c))
109 #define REGINCLASSUTF8(f,p) (ARG1(f) ? reginclassutf8(f,p) : swash_fetch((SV*)PL_regdata->data[ARG2(f)],p))
111 #define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
112 #define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
114 #define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
115 #define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
116 #define HOP(pos,off) (UTF ? reghop((U8*)pos, off) : (U8*)(pos + off))
117 #define HOPMAYBE(pos,off) (UTF ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
118 #define HOPc(pos,off) ((char*)HOP(pos,off))
119 #define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
121 static void restore_pos(pTHXo_ void *arg);
125 S_regcppush(pTHX_ I32 parenfloor)
128 int retval = PL_savestack_ix;
129 int i = (PL_regsize - parenfloor) * 4;
133 for (p = PL_regsize; p > parenfloor; p--) {
134 SSPUSHINT(PL_regendp[p]);
135 SSPUSHINT(PL_regstartp[p]);
136 SSPUSHPTR(PL_reg_start_tmp[p]);
139 SSPUSHINT(PL_regsize);
140 SSPUSHINT(*PL_reglastparen);
141 SSPUSHPTR(PL_reginput);
143 SSPUSHINT(SAVEt_REGCONTEXT);
147 /* These are needed since we do not localize EVAL nodes: */
148 # define REGCP_SET DEBUG_r(PerlIO_printf(Perl_debug_log, \
149 " Setting an EVAL scope, savestack=%i\n", \
150 PL_savestack_ix)); lastcp = PL_savestack_ix
152 # define REGCP_UNWIND DEBUG_r(lastcp != PL_savestack_ix ? \
153 PerlIO_printf(Perl_debug_log, \
154 " Clearing an EVAL scope, savestack=%i..%i\n", \
155 lastcp, PL_savestack_ix) : 0); regcpblow(lastcp)
165 assert(i == SAVEt_REGCONTEXT);
167 input = (char *) SSPOPPTR;
168 *PL_reglastparen = SSPOPINT;
169 PL_regsize = SSPOPINT;
170 for (i -= 3; i > 0; i -= 4) {
171 paren = (U32)SSPOPINT;
172 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
173 PL_regstartp[paren] = SSPOPINT;
175 if (paren <= *PL_reglastparen)
176 PL_regendp[paren] = tmps;
178 PerlIO_printf(Perl_debug_log,
179 " restoring \\%d to %d(%d)..%d%s\n",
180 paren, PL_regstartp[paren],
181 PL_reg_start_tmp[paren] - PL_bostr,
183 (paren > *PL_reglastparen ? "(no)" : ""));
187 if (*PL_reglastparen + 1 <= PL_regnpar) {
188 PerlIO_printf(Perl_debug_log,
189 " restoring \\%d..\\%d to undef\n",
190 *PL_reglastparen + 1, PL_regnpar);
193 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
194 if (paren > PL_regsize)
195 PL_regstartp[paren] = -1;
196 PL_regendp[paren] = -1;
202 S_regcp_set_to(pTHX_ I32 ss)
205 I32 tmp = PL_savestack_ix;
207 PL_savestack_ix = ss;
209 PL_savestack_ix = tmp;
213 typedef struct re_cc_state
217 struct re_cc_state *prev;
222 #define regcpblow(cp) LEAVE_SCOPE(cp)
225 * pregexec and friends
229 - pregexec - match a regexp against a string
232 Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
233 char *strbeg, I32 minend, SV *screamer, U32 nosave)
234 /* strend: pointer to null at end of string */
235 /* strbeg: real beginning of string */
236 /* minend: end of match must be >=minend after stringarg. */
237 /* nosave: For optimizations. */
240 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
241 nosave ? 0 : REXEC_COPY_STR);
245 S_cache_re(pTHX_ regexp *prog)
248 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
250 PL_regprogram = prog->program;
252 PL_regnpar = prog->nparens;
253 PL_regdata = prog->data;
258 * Need to implement the following flags for reg_anch:
260 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
262 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
263 * INTUIT_AUTORITATIVE_ML
264 * INTUIT_ONCE_NOML - Intuit can match in one location only.
267 * Another flag for this function: SECOND_TIME (so that float substrs
268 * with giant delta may be not rechecked).
271 /* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
273 /* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
274 Otherwise, only SvCUR(sv) is used to get strbeg. */
276 /* XXXX We assume that strpos is strbeg unless sv. */
278 /* A failure to find a constant substring means that there is no need to make
279 an expensive call to REx engine, thus we celebrate a failure. Similarly,
280 finding a substring too deep into the string means that less calls to
281 regtry() should be needed.
283 REx compiler's optimizer found 4 possible hints:
284 a) Anchored substring;
286 c) Whether we are anchored (beginning-of-line or \G);
287 d) First node (of those at offset 0) which may distingush positions;
288 We use 'a', 'b', multiline-part of 'c', and try to find a position in the
289 string which does not contradict any of them.
293 Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
294 char *strend, U32 flags, re_scream_pos_data *data)
296 register I32 start_shift;
297 /* Should be nonnegative! */
298 register I32 end_shift;
304 register char *other_last = Nullch;
306 char *i_strpos = strpos;
309 DEBUG_r( if (!PL_colorset) reginitcolors() );
310 DEBUG_r(PerlIO_printf(Perl_debug_log,
311 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
312 PL_colors[4],PL_colors[5],PL_colors[0],
315 (strlen(prog->precomp) > 60 ? "..." : ""),
317 (strend - strpos > 60 ? 60 : strend - strpos),
318 strpos, PL_colors[1],
319 (strend - strpos > 60 ? "..." : ""))
322 if (prog->minlen > strend - strpos) {
323 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
326 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
327 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
328 || ( (prog->reganch & ROPT_ANCH_BOL)
329 && !PL_multiline ) ); /* Check after \n? */
331 if ((prog->check_offset_min == prog->check_offset_max) && !ml_anch) {
332 /* Substring at constant offset from beg-of-str... */
335 if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
336 && (sv && (strpos + SvCUR(sv) != strend)) ) {
337 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
340 PL_regeol = strend; /* Used in HOP() */
341 s = HOPc(strpos, prog->check_offset_min);
342 if (SvTAIL(prog->check_substr)) {
343 slen = SvCUR(prog->check_substr); /* >= 1 */
345 if ( strend - s > slen || strend - s < slen - 1
346 || (strend - s == slen && strend[-1] != '\n')) {
347 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
350 /* Now should match s[0..slen-2] */
352 if (slen && (*SvPVX(prog->check_substr) != *s
354 && memNE(SvPVX(prog->check_substr), s, slen)))) {
356 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
360 else if (*SvPVX(prog->check_substr) != *s
361 || ((slen = SvCUR(prog->check_substr)) > 1
362 && memNE(SvPVX(prog->check_substr), s, slen)))
364 goto success_at_start;
366 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
368 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
369 /* Should be nonnegative! */
370 end_shift = prog->minlen - start_shift -
371 CHR_SVLEN(prog->check_substr) + (SvTAIL(prog->check_substr) != 0);
373 I32 end = prog->check_offset_max + CHR_SVLEN(prog->check_substr)
374 - (SvTAIL(prog->check_substr) != 0);
375 I32 eshift = strend - s - end;
377 if (end_shift < eshift)
381 else { /* Can match at random position */
384 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
385 /* Should be nonnegative! */
386 end_shift = prog->minlen - start_shift -
387 CHR_SVLEN(prog->check_substr) + (SvTAIL(prog->check_substr) != 0);
390 #ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
392 Perl_croak(aTHX_ "panic: end_shift");
395 check = prog->check_substr;
397 /* Find a possible match in the region s..strend by looking for
398 the "check" substring in the region corrected by start/end_shift. */
399 if (flags & REXEC_SCREAM) {
400 char *strbeg = SvPVX(sv); /* XXXX Assume PV_force() on SCREAM! */
401 I32 p = -1; /* Internal iterator of scream. */
402 I32 *pp = data ? data->scream_pos : &p;
404 if (PL_screamfirst[BmRARE(check)] >= 0
405 || ( BmRARE(check) == '\n'
406 && (BmPREVIOUS(check) == SvCUR(check) - 1)
408 s = screaminstr(sv, check,
409 start_shift + (s - strbeg), end_shift, pp, 0);
413 *data->scream_olds = s;
416 s = fbm_instr((unsigned char*)s + start_shift,
417 (unsigned char*)strend - end_shift,
418 check, PL_multiline ? FBMrf_MULTILINE : 0);
420 /* Update the count-of-usability, remove useless subpatterns,
423 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
424 (s ? "Found" : "Did not find"),
425 ((check == prog->anchored_substr) ? "anchored" : "floating"),
427 SvCUR(check) - (SvTAIL(check)!=0), SvPVX(check),
428 PL_colors[1], (SvTAIL(check) ? "$" : ""),
429 (s ? " at offset " : "...\n") ) );
434 /* Finish the diagnostic message */
435 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
437 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
438 Start with the other substr.
439 XXXX no SCREAM optimization yet - and a very coarse implementation
440 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
441 *always* match. Probably should be marked during compile...
442 Probably it is right to do no SCREAM here...
445 if (prog->float_substr && prog->anchored_substr) {
446 /* Take into account the "other" substring. */
447 /* XXXX May be hopelessly wrong for UTF... */
449 other_last = strpos - 1;
450 if (check == prog->float_substr) {
453 char *last = s - start_shift, *last1, *last2;
457 t = s - prog->check_offset_max;
458 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
459 && (!(prog->reganch & ROPT_UTF8)
460 || (PL_bostr = strpos, /* Used in regcopmaybe() */
461 (t = reghopmaybe_c(s, -(prog->check_offset_max)))
466 t += prog->anchored_offset;
470 last2 = last1 = strend - prog->minlen;
473 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
474 /* On end-of-str: see comment below. */
475 s = fbm_instr((unsigned char*)t,
476 (unsigned char*)last1 + prog->anchored_offset
477 + SvCUR(prog->anchored_substr)
478 - (SvTAIL(prog->anchored_substr)!=0),
479 prog->anchored_substr, PL_multiline ? FBMrf_MULTILINE : 0);
480 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s anchored substr `%s%.*s%s'%s",
481 (s ? "Found" : "Contradicts"),
483 SvCUR(prog->anchored_substr)
484 - (SvTAIL(prog->anchored_substr)!=0),
485 SvPVX(prog->anchored_substr),
486 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
488 if (last1 >= last2) {
489 DEBUG_r(PerlIO_printf(Perl_debug_log,
490 ", giving up...\n"));
493 DEBUG_r(PerlIO_printf(Perl_debug_log,
494 ", trying floating at offset %ld...\n",
495 (long)(s1 + 1 - i_strpos)));
496 PL_regeol = strend; /* Used in HOP() */
497 other_last = last1 + prog->anchored_offset;
502 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
503 (long)(s - i_strpos)));
504 t = s - prog->anchored_offset;
513 else { /* Take into account the floating substring. */
518 last1 = last = strend - prog->minlen + prog->float_min_offset;
519 if (last - t > prog->float_max_offset)
520 last = t + prog->float_max_offset;
521 s = t + prog->float_min_offset;
524 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
525 /* fbm_instr() takes into account exact value of end-of-str
526 if the check is SvTAIL(ed). Since false positives are OK,
527 and end-of-str is not later than strend we are OK. */
528 s = fbm_instr((unsigned char*)s,
529 (unsigned char*)last + SvCUR(prog->float_substr)
530 - (SvTAIL(prog->float_substr)!=0),
531 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
532 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
533 (s ? "Found" : "Contradicts"),
535 SvCUR(prog->float_substr)
536 - (SvTAIL(prog->float_substr)!=0),
537 SvPVX(prog->float_substr),
538 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
541 DEBUG_r(PerlIO_printf(Perl_debug_log,
542 ", giving up...\n"));
545 DEBUG_r(PerlIO_printf(Perl_debug_log,
546 ", trying anchored starting at offset %ld...\n",
547 (long)(s1 + 1 - i_strpos)));
549 PL_regeol = strend; /* Used in HOP() */
554 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
555 (long)(s - i_strpos)));
565 t = s - prog->check_offset_max;
567 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
568 && (!(prog->reganch & ROPT_UTF8)
569 || (PL_bostr = strpos, /* Used in regcopmaybe() */
570 ((t = reghopmaybe_c(s, -(prog->check_offset_max)))
573 /* Fixed substring is found far enough so that the match
574 cannot start at strpos. */
576 if (ml_anch && t[-1] != '\n') {
577 /* Eventually fbm_*() should handle this, but often
578 anchored_offset is not 0, so this check will not be wasted. */
579 /* XXXX In the code below we prefer to look for "^" even in
580 presence of anchored substrings. And we search even
581 beyond the found float position. These pessimizations
582 are historical artefacts only. */
584 while (t < strend - prog->minlen) {
586 if (t < s - prog->check_offset_min) {
587 if (prog->anchored_substr) {
588 /* We definitely contradict the found anchored
589 substr. Due to the above check we do not
590 contradict "check" substr.
591 Thus we can arrive here only if check substr
592 is float. Redo checking for "other"=="fixed".
595 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
596 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
597 goto do_other_anchored;
600 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
601 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
604 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting at offset %ld...\n",
605 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
611 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
612 PL_colors[0],PL_colors[1]));
617 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
621 /* The found string does not prohibit matching at beg-of-str
622 - no optimization of calling REx engine can be performed,
623 unless it was an MBOL and we are not after MBOL. */
625 /* Even in this situation we may use MBOL flag if strpos is offset
626 wrt the start of the string. */
628 && (strpos + SvCUR(sv) != strend) && strpos[-1] != '\n') {
632 DEBUG_r( if (ml_anch)
633 PerlIO_printf(Perl_debug_log, "Does not contradict /%s^%s/m...\n",
634 PL_colors[0],PL_colors[1]);
637 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
638 && --BmUSEFUL(prog->check_substr) < 0
639 && prog->check_substr == prog->float_substr) { /* boo */
640 /* If flags & SOMETHING - do not do it many times on the same match */
641 SvREFCNT_dec(prog->check_substr);
642 prog->check_substr = Nullsv; /* disable */
643 prog->float_substr = Nullsv; /* clear */
645 prog->reganch &= ~RE_USE_INTUIT;
651 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sGuessed:%s match at offset %ld\n",
652 PL_colors[4], PL_colors[5], (long)(s - i_strpos)) );
655 fail_finish: /* Substring not found */
656 BmUSEFUL(prog->check_substr) += 5; /* hooray */
658 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
659 PL_colors[4],PL_colors[5]));
664 - regexec_flags - match a regexp against a string
667 Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
668 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
669 /* strend: pointer to null at end of string */
670 /* strbeg: real beginning of string */
671 /* minend: end of match must be >=minend after stringarg. */
672 /* data: May be used for some additional optimizations. */
673 /* nosave: For optimizations. */
678 register char *startpos = stringarg;
680 I32 minlen; /* must match at least this many chars */
681 I32 dontbother = 0; /* how many characters not to try at end */
682 I32 start_shift = 0; /* Offset of the start to find
683 constant substr. */ /* CC */
684 I32 end_shift = 0; /* Same for the end. */ /* CC */
685 I32 scream_pos = -1; /* Internal iterator of scream. */
687 SV* oreplsv = GvSV(PL_replgv);
693 PL_regnarrate = PL_debug & 512;
697 if (prog == NULL || startpos == NULL) {
698 Perl_croak(aTHX_ "NULL regexp parameter");
702 minlen = prog->minlen;
703 if (strend - startpos < minlen) goto phooey;
705 if (startpos == strbeg) /* is ^ valid at stringarg? */
708 PL_regprev = (U32)stringarg[-1];
709 if (!PL_multiline && PL_regprev == '\n')
710 PL_regprev = '\0'; /* force ^ to NOT match */
713 /* Check validity of program. */
714 if (UCHARAT(prog->program) != REG_MAGIC) {
715 Perl_croak(aTHX_ "corrupted regexp program");
722 if (prog->reganch & ROPT_UTF8)
723 PL_reg_flags |= RF_utf8;
725 /* Mark beginning of line for ^ and lookbehind. */
726 PL_regbol = startpos;
730 /* Mark end of line for $ (and such) */
733 /* see how far we have to get to not match where we matched before */
734 PL_regtill = startpos+minend;
736 /* We start without call_cc context. */
739 /* If there is a "must appear" string, look for it. */
742 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
745 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
746 PL_reg_ganch = startpos;
747 else if (sv && SvTYPE(sv) >= SVt_PVMG
749 && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) {
750 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
751 if (prog->reganch & ROPT_ANCH_GPOS) {
752 if (s > PL_reg_ganch)
757 else /* pos() not defined */
758 PL_reg_ganch = strbeg;
761 if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
762 re_scream_pos_data d;
764 d.scream_olds = &scream_olds;
765 d.scream_pos = &scream_pos;
766 s = re_intuit_start(prog, sv, s, strend, flags, &d);
768 goto phooey; /* not present */
771 DEBUG_r( if (!PL_colorset) reginitcolors() );
772 DEBUG_r(PerlIO_printf(Perl_debug_log,
773 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
774 PL_colors[4],PL_colors[5],PL_colors[0],
777 (strlen(prog->precomp) > 60 ? "..." : ""),
779 (strend - startpos > 60 ? 60 : strend - startpos),
780 startpos, PL_colors[1],
781 (strend - startpos > 60 ? "..." : ""))
784 /* Simplest case: anchored match need be tried only once. */
785 /* [unless only anchor is BOL and multiline is set] */
786 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
787 if (s == startpos && regtry(prog, startpos))
789 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
790 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
795 dontbother = minlen - 1;
796 end = HOPc(strend, -dontbother) - 1;
797 /* for multiline we only have to try after newlines */
798 if (prog->check_substr) {
807 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
815 if (*s++ == '\n') { /* don't need PL_utf8skip here */
823 } else if (prog->reganch & ROPT_ANCH_GPOS) {
824 if (regtry(prog, PL_reg_ganch))
829 /* Messy cases: unanchored match. */
830 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
831 /* we have /x+whatever/ */
832 /* it must be a one character string (XXXX Except UTF?) */
833 char ch = SvPVX(prog->anchored_substr)[0];
837 if (regtry(prog, s)) goto got_it;
839 while (s < strend && *s == ch)
848 if (regtry(prog, s)) goto got_it;
850 while (s < strend && *s == ch)
858 else if (prog->anchored_substr != Nullsv
859 || (prog->float_substr != Nullsv
860 && prog->float_max_offset < strend - s)) {
861 SV *must = prog->anchored_substr
862 ? prog->anchored_substr : prog->float_substr;
864 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
866 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
867 I32 delta = back_max - back_min;
868 char *last = HOPc(strend, /* Cannot start after this */
869 -(I32)(CHR_SVLEN(must)
870 - (SvTAIL(must) != 0) + back_min));
871 char *last1; /* Last position checked before */
876 last1 = s - 1; /* bogus */
878 /* XXXX check_substr already used to find `s', can optimize if
879 check_substr==must. */
881 dontbother = end_shift;
882 strend = HOPc(strend, -dontbother);
883 while ( (s <= last) &&
884 ((flags & REXEC_SCREAM)
885 ? (s = screaminstr(sv, must, HOPc(s, back_min) - strbeg,
886 end_shift, &scream_pos, 0))
887 : (s = fbm_instr((unsigned char*)HOP(s, back_min),
888 (unsigned char*)strend, must,
889 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
890 if (HOPc(s, -back_max) > last1) {
891 last1 = HOPc(s, -back_min);
892 s = HOPc(s, -back_max);
895 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
897 last1 = HOPc(s, -back_min);
917 else if (c = prog->regstclass) {
918 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
922 dontbother = minlen - 1;
923 strend = HOPc(strend, -dontbother); /* don't bother with what can't match */
925 /* We know what class it must start with. */
930 if (REGINCLASSUTF8(c, (U8*)s)) {
931 if (tmp && regtry(prog, s))
944 if (REGINCLASS(cc, *s)) {
945 if (tmp && regtry(prog, s))
956 PL_reg_flags |= RF_tainted;
963 tmp = (s != startpos) ? UCHARAT(s - 1) : PL_regprev;
964 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
966 if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
973 if ((minlen || tmp) && regtry(prog,s))
977 PL_reg_flags |= RF_tainted;
982 strend = reghop_c(strend, -1);
984 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : PL_regprev;
985 tmp = ((OP(c) == BOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
987 if (tmp == !(OP(c) == BOUND ?
988 swash_fetch(PL_utf8_alnum, (U8*)s) :
989 isALNUM_LC_utf8((U8*)s)))
997 if ((minlen || tmp) && regtry(prog,s))
1001 PL_reg_flags |= RF_tainted;
1008 tmp = (s != startpos) ? UCHARAT(s - 1) : PL_regprev;
1009 tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1010 while (s < strend) {
1011 if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1013 else if (regtry(prog, s))
1017 if ((minlen || !tmp) && regtry(prog,s))
1021 PL_reg_flags |= RF_tainted;
1026 strend = reghop_c(strend, -1);
1028 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : PL_regprev;
1029 tmp = ((OP(c) == NBOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
1030 while (s < strend) {
1031 if (tmp == !(OP(c) == NBOUND ?
1032 swash_fetch(PL_utf8_alnum, (U8*)s) :
1033 isALNUM_LC_utf8((U8*)s)))
1035 else if (regtry(prog, s))
1039 if ((minlen || !tmp) && regtry(prog,s))
1043 while (s < strend) {
1045 if (tmp && regtry(prog, s))
1056 while (s < strend) {
1057 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
1058 if (tmp && regtry(prog, s))
1069 PL_reg_flags |= RF_tainted;
1070 while (s < strend) {
1071 if (isALNUM_LC(*s)) {
1072 if (tmp && regtry(prog, s))
1083 PL_reg_flags |= RF_tainted;
1084 while (s < strend) {
1085 if (isALNUM_LC_utf8((U8*)s)) {
1086 if (tmp && regtry(prog, s))
1097 while (s < strend) {
1099 if (tmp && regtry(prog, s))
1110 while (s < strend) {
1111 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
1112 if (tmp && regtry(prog, s))
1123 PL_reg_flags |= RF_tainted;
1124 while (s < strend) {
1125 if (!isALNUM_LC(*s)) {
1126 if (tmp && regtry(prog, s))
1137 PL_reg_flags |= RF_tainted;
1138 while (s < strend) {
1139 if (!isALNUM_LC_utf8((U8*)s)) {
1140 if (tmp && regtry(prog, s))
1151 while (s < strend) {
1153 if (tmp && regtry(prog, s))
1164 while (s < strend) {
1165 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
1166 if (tmp && regtry(prog, s))
1177 PL_reg_flags |= RF_tainted;
1178 while (s < strend) {
1179 if (isSPACE_LC(*s)) {
1180 if (tmp && regtry(prog, s))
1191 PL_reg_flags |= RF_tainted;
1192 while (s < strend) {
1193 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1194 if (tmp && regtry(prog, s))
1205 while (s < strend) {
1207 if (tmp && regtry(prog, s))
1218 while (s < strend) {
1219 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
1220 if (tmp && regtry(prog, s))
1231 PL_reg_flags |= RF_tainted;
1232 while (s < strend) {
1233 if (!isSPACE_LC(*s)) {
1234 if (tmp && regtry(prog, s))
1245 PL_reg_flags |= RF_tainted;
1246 while (s < strend) {
1247 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1248 if (tmp && regtry(prog, s))
1259 while (s < strend) {
1261 if (tmp && regtry(prog, s))
1272 while (s < strend) {
1273 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
1274 if (tmp && regtry(prog, s))
1285 PL_reg_flags |= RF_tainted;
1286 while (s < strend) {
1287 if (isDIGIT_LC(*s)) {
1288 if (tmp && regtry(prog, s))
1299 PL_reg_flags |= RF_tainted;
1300 while (s < strend) {
1301 if (isDIGIT_LC_utf8((U8*)s)) {
1302 if (tmp && regtry(prog, s))
1313 while (s < strend) {
1315 if (tmp && regtry(prog, s))
1326 while (s < strend) {
1327 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
1328 if (tmp && regtry(prog, s))
1339 PL_reg_flags |= RF_tainted;
1340 while (s < strend) {
1341 if (!isDIGIT_LC(*s)) {
1342 if (tmp && regtry(prog, s))
1353 PL_reg_flags |= RF_tainted;
1354 while (s < strend) {
1355 if (!isDIGIT_LC_utf8((U8*)s)) {
1356 if (tmp && regtry(prog, s))
1370 if (prog->float_substr != Nullsv) { /* Trim the end. */
1372 I32 oldpos = scream_pos;
1374 if (flags & REXEC_SCREAM) {
1375 last = screaminstr(sv, prog->float_substr, s - strbeg,
1376 end_shift, &scream_pos, 1); /* last one */
1378 last = scream_olds; /* Only one occurence. */
1382 char *little = SvPV(prog->float_substr, len);
1384 if (SvTAIL(prog->float_substr)) {
1385 if (memEQ(strend - len + 1, little, len - 1))
1386 last = strend - len + 1;
1387 else if (!PL_multiline)
1388 last = memEQ(strend - len, little, len)
1389 ? strend - len : Nullch;
1395 last = rninstr(s, strend, little, little + len);
1397 last = strend; /* matching `$' */
1400 if (last == NULL) goto phooey; /* Should not happen! */
1401 dontbother = strend - last + prog->float_min_offset;
1403 if (minlen && (dontbother < minlen))
1404 dontbother = minlen - 1;
1405 strend -= dontbother; /* this one's always in bytes! */
1406 /* We don't know much -- general case. */
1409 if (regtry(prog, s))
1418 if (regtry(prog, s))
1420 } while (s++ < strend);
1428 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1430 if (PL_reg_eval_set) {
1431 /* Preserve the current value of $^R */
1432 if (oreplsv != GvSV(PL_replgv))
1433 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1434 restored, the value remains
1436 restore_pos(aTHXo_ 0);
1439 /* make sure $`, $&, $', and $digit will work later */
1440 if ( !(flags & REXEC_NOT_FIRST) ) {
1441 if (RX_MATCH_COPIED(prog)) {
1442 Safefree(prog->subbeg);
1443 RX_MATCH_COPIED_off(prog);
1445 if (flags & REXEC_COPY_STR) {
1446 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1448 s = savepvn(strbeg, i);
1451 RX_MATCH_COPIED_on(prog);
1454 prog->subbeg = strbeg;
1455 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1462 if (PL_reg_eval_set)
1463 restore_pos(aTHXo_ 0);
1468 - regtry - try match at specific point
1470 STATIC I32 /* 0 failure, 1 success */
1471 S_regtry(pTHX_ regexp *prog, char *startpos)
1479 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1482 PL_reg_eval_set = RS_init;
1484 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %i\n",
1485 PL_stack_sp - PL_stack_base);
1487 SAVEINT(cxstack[cxstack_ix].blk_oldsp);
1488 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1489 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1491 /* Apparently this is not needed, judging by wantarray. */
1492 /* SAVEINT(cxstack[cxstack_ix].blk_gimme);
1493 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1496 /* Make $_ available to executed code. */
1497 if (PL_reg_sv != DEFSV) {
1498 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1503 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
1504 && (mg = mg_find(PL_reg_sv, 'g')))) {
1505 /* prepare for quick setting of pos */
1506 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
1507 mg = mg_find(PL_reg_sv, 'g');
1511 PL_reg_oldpos = mg->mg_len;
1512 SAVEDESTRUCTOR(restore_pos, 0);
1515 New(22,PL_reg_curpm, 1, PMOP);
1516 PL_reg_curpm->op_pmregexp = prog;
1517 PL_reg_oldcurpm = PL_curpm;
1518 PL_curpm = PL_reg_curpm;
1519 if (RX_MATCH_COPIED(prog)) {
1520 /* Here is a serious problem: we cannot rewrite subbeg,
1521 since it may be needed if this match fails. Thus
1522 $` inside (?{}) could fail... */
1523 PL_reg_oldsaved = prog->subbeg;
1524 PL_reg_oldsavedlen = prog->sublen;
1525 RX_MATCH_COPIED_off(prog);
1528 PL_reg_oldsaved = Nullch;
1529 prog->subbeg = PL_bostr;
1530 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1532 prog->startp[0] = startpos - PL_bostr;
1533 PL_reginput = startpos;
1534 PL_regstartp = prog->startp;
1535 PL_regendp = prog->endp;
1536 PL_reglastparen = &prog->lastparen;
1537 prog->lastparen = 0;
1539 DEBUG_r(PL_reg_starttry = startpos);
1540 if (PL_reg_start_tmpl <= prog->nparens) {
1541 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1542 if(PL_reg_start_tmp)
1543 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1545 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1548 /* XXXX What this code is doing here?!!! There should be no need
1549 to do this again and again, PL_reglastparen should take care of
1553 if (prog->nparens) {
1554 for (i = prog->nparens; i >= 1; i--) {
1560 if (regmatch(prog->program + 1)) {
1561 prog->endp[0] = PL_reginput - PL_bostr;
1569 - regmatch - main matching routine
1571 * Conceptually the strategy is simple: check to see whether the current
1572 * node matches, call self recursively to see whether the rest matches,
1573 * and then act accordingly. In practice we make some effort to avoid
1574 * recursion, in particular by going through "ordinary" nodes (that don't
1575 * need to know whether the rest of the match failed) by a loop instead of
1578 /* [lwall] I've hoisted the register declarations to the outer block in order to
1579 * maybe save a little bit of pushing and popping on the stack. It also takes
1580 * advantage of machines that use a register save mask on subroutine entry.
1582 STATIC I32 /* 0 failure, 1 success */
1583 S_regmatch(pTHX_ regnode *prog)
1586 register regnode *scan; /* Current node. */
1587 regnode *next; /* Next node. */
1588 regnode *inner; /* Next node in internal branch. */
1589 register I32 nextchr; /* renamed nextchr - nextchar colides with
1590 function of same name */
1591 register I32 n; /* no or next */
1592 register I32 ln; /* len or last */
1593 register char *s; /* operand or save */
1594 register char *locinput = PL_reginput;
1595 register I32 c1, c2, paren; /* case fold search, parenth */
1596 int minmod = 0, sw = 0, logical = 0;
1601 /* Note that nextchr is a byte even in UTF */
1602 nextchr = UCHARAT(locinput);
1604 while (scan != NULL) {
1605 #define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
1607 # define sayYES goto yes
1608 # define sayNO goto no
1609 # define sayYES_FINAL goto yes_final
1610 # define sayYES_LOUD goto yes_loud
1611 # define sayNO_FINAL goto no_final
1612 # define sayNO_SILENT goto do_no
1613 # define saySAME(x) if (x) goto yes; else goto no
1614 # define REPORT_CODE_OFF 24
1616 # define sayYES return 1
1617 # define sayNO return 0
1618 # define sayYES_FINAL return 1
1619 # define sayYES_LOUD return 1
1620 # define sayNO_FINAL return 0
1621 # define sayNO_SILENT return 0
1622 # define saySAME(x) return x
1625 SV *prop = sv_newmortal();
1626 int docolor = *PL_colors[0];
1627 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1628 int l = (PL_regeol - locinput > taill ? taill : PL_regeol - locinput);
1629 /* The part of the string before starttry has one color
1630 (pref0_len chars), between starttry and current
1631 position another one (pref_len - pref0_len chars),
1632 after the current position the third one.
1633 We assume that pref0_len <= pref_len, otherwise we
1634 decrease pref0_len. */
1635 int pref_len = (locinput - PL_bostr > (5 + taill) - l
1636 ? (5 + taill) - l : locinput - PL_bostr);
1637 int pref0_len = pref_len - (locinput - PL_reg_starttry);
1639 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
1640 l = ( PL_regeol - locinput > (5 + taill) - pref_len
1641 ? (5 + taill) - pref_len : PL_regeol - locinput);
1644 if (pref0_len > pref_len)
1645 pref0_len = pref_len;
1646 regprop(prop, scan);
1647 PerlIO_printf(Perl_debug_log,
1648 "%4i <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3d:%*s%s\n",
1649 locinput - PL_bostr,
1650 PL_colors[4], pref0_len,
1651 locinput - pref_len, PL_colors[5],
1652 PL_colors[2], pref_len - pref0_len,
1653 locinput - pref_len + pref0_len, PL_colors[3],
1654 (docolor ? "" : "> <"),
1655 PL_colors[0], l, locinput, PL_colors[1],
1656 15 - l - pref_len + 1,
1658 scan - PL_regprogram, PL_regindent*2, "",
1662 next = scan + NEXT_OFF(scan);
1668 if (locinput == PL_bostr
1669 ? PL_regprev == '\n'
1671 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1673 /* regtill = regbol; */
1678 if (locinput == PL_bostr
1679 ? PL_regprev == '\n'
1680 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1686 if (locinput == PL_regbol && PL_regprev == '\n')
1690 if (locinput == PL_reg_ganch)
1700 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
1705 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
1707 if (PL_regeol - locinput > 1)
1711 if (PL_regeol != locinput)
1715 if (nextchr & 0x80) {
1716 locinput += PL_utf8skip[nextchr];
1717 if (locinput > PL_regeol)
1719 nextchr = UCHARAT(locinput);
1722 if (!nextchr && locinput >= PL_regeol)
1724 nextchr = UCHARAT(++locinput);
1727 if (!nextchr && locinput >= PL_regeol)
1729 nextchr = UCHARAT(++locinput);
1732 if (nextchr & 0x80) {
1733 locinput += PL_utf8skip[nextchr];
1734 if (locinput > PL_regeol)
1736 nextchr = UCHARAT(locinput);
1739 if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
1741 nextchr = UCHARAT(++locinput);
1744 if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
1746 nextchr = UCHARAT(++locinput);
1751 /* Inline the first character, for speed. */
1752 if (UCHARAT(s) != nextchr)
1754 if (PL_regeol - locinput < ln)
1756 if (ln > 1 && memNE(s, locinput, ln))
1759 nextchr = UCHARAT(locinput);
1762 PL_reg_flags |= RF_tainted;
1771 c1 = OP(scan) == EXACTF;
1775 if (utf8_to_uv((U8*)s, 0) != (c1 ?
1776 toLOWER_utf8((U8*)l) :
1777 toLOWER_LC_utf8((U8*)l)))
1785 nextchr = UCHARAT(locinput);
1789 /* Inline the first character, for speed. */
1790 if (UCHARAT(s) != nextchr &&
1791 UCHARAT(s) != ((OP(scan) == EXACTF)
1792 ? PL_fold : PL_fold_locale)[nextchr])
1794 if (PL_regeol - locinput < ln)
1796 if (ln > 1 && (OP(scan) == EXACTF
1797 ? ibcmp(s, locinput, ln)
1798 : ibcmp_locale(s, locinput, ln)))
1801 nextchr = UCHARAT(locinput);
1805 if (!REGINCLASSUTF8(scan, (U8*)locinput))
1807 if (locinput >= PL_regeol)
1809 locinput += PL_utf8skip[nextchr];
1810 nextchr = UCHARAT(locinput);
1815 nextchr = UCHARAT(locinput);
1816 if (!REGINCLASS(s, nextchr))
1818 if (!nextchr && locinput >= PL_regeol)
1820 nextchr = UCHARAT(++locinput);
1823 PL_reg_flags |= RF_tainted;
1828 if (!(OP(scan) == ALNUM
1829 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
1831 nextchr = UCHARAT(++locinput);
1834 PL_reg_flags |= RF_tainted;
1839 if (nextchr & 0x80) {
1840 if (!(OP(scan) == ALNUMUTF8
1841 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
1842 : isALNUM_LC_utf8((U8*)locinput)))
1846 locinput += PL_utf8skip[nextchr];
1847 nextchr = UCHARAT(locinput);
1850 if (!(OP(scan) == ALNUMUTF8
1851 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
1853 nextchr = UCHARAT(++locinput);
1856 PL_reg_flags |= RF_tainted;
1859 if (!nextchr && locinput >= PL_regeol)
1861 if (OP(scan) == NALNUM
1862 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
1864 nextchr = UCHARAT(++locinput);
1867 PL_reg_flags |= RF_tainted;
1870 if (!nextchr && locinput >= PL_regeol)
1872 if (nextchr & 0x80) {
1873 if (OP(scan) == NALNUMUTF8
1874 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
1875 : isALNUM_LC_utf8((U8*)locinput))
1879 locinput += PL_utf8skip[nextchr];
1880 nextchr = UCHARAT(locinput);
1883 if (OP(scan) == NALNUMUTF8
1884 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
1886 nextchr = UCHARAT(++locinput);
1890 PL_reg_flags |= RF_tainted;
1894 /* was last char in word? */
1895 ln = (locinput != PL_regbol) ? UCHARAT(locinput - 1) : PL_regprev;
1896 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
1898 n = isALNUM(nextchr);
1901 ln = isALNUM_LC(ln);
1902 n = isALNUM_LC(nextchr);
1904 if (((!ln) == (!n)) == (OP(scan) == BOUND || OP(scan) == BOUNDL))
1909 PL_reg_flags |= RF_tainted;
1913 /* was last char in word? */
1914 ln = (locinput != PL_regbol)
1915 ? utf8_to_uv(reghop((U8*)locinput, -1), 0) : PL_regprev;
1916 if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) {
1917 ln = isALNUM_uni(ln);
1918 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
1921 ln = isALNUM_LC_uni(ln);
1922 n = isALNUM_LC_utf8((U8*)locinput);
1924 if (((!ln) == (!n)) == (OP(scan) == BOUNDUTF8 || OP(scan) == BOUNDLUTF8))
1928 PL_reg_flags |= RF_tainted;
1931 if (!nextchr && locinput >= PL_regeol)
1933 if (!(OP(scan) == SPACE
1934 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
1936 nextchr = UCHARAT(++locinput);
1939 PL_reg_flags |= RF_tainted;
1942 if (!nextchr && locinput >= PL_regeol)
1944 if (nextchr & 0x80) {
1945 if (!(OP(scan) == SPACEUTF8
1946 ? swash_fetch(PL_utf8_space,(U8*)locinput)
1947 : isSPACE_LC_utf8((U8*)locinput)))
1951 locinput += PL_utf8skip[nextchr];
1952 nextchr = UCHARAT(locinput);
1955 if (!(OP(scan) == SPACEUTF8
1956 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
1958 nextchr = UCHARAT(++locinput);
1961 PL_reg_flags |= RF_tainted;
1966 if (OP(scan) == SPACE
1967 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
1969 nextchr = UCHARAT(++locinput);
1972 PL_reg_flags |= RF_tainted;
1977 if (nextchr & 0x80) {
1978 if (OP(scan) == NSPACEUTF8
1979 ? swash_fetch(PL_utf8_space,(U8*)locinput)
1980 : isSPACE_LC_utf8((U8*)locinput))
1984 locinput += PL_utf8skip[nextchr];
1985 nextchr = UCHARAT(locinput);
1988 if (OP(scan) == NSPACEUTF8
1989 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
1991 nextchr = UCHARAT(++locinput);
1994 PL_reg_flags |= RF_tainted;
1997 if (!nextchr && locinput >= PL_regeol)
1999 if (!(OP(scan) == DIGIT
2000 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
2002 nextchr = UCHARAT(++locinput);
2005 PL_reg_flags |= RF_tainted;
2010 if (nextchr & 0x80) {
2011 if (OP(scan) == NDIGITUTF8
2012 ? swash_fetch(PL_utf8_digit,(U8*)locinput)
2013 : isDIGIT_LC_utf8((U8*)locinput))
2017 locinput += PL_utf8skip[nextchr];
2018 nextchr = UCHARAT(locinput);
2021 if (!isDIGIT(nextchr))
2023 nextchr = UCHARAT(++locinput);
2026 PL_reg_flags |= RF_tainted;
2031 if (OP(scan) == DIGIT
2032 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
2034 nextchr = UCHARAT(++locinput);
2037 PL_reg_flags |= RF_tainted;
2040 if (!nextchr && locinput >= PL_regeol)
2042 if (nextchr & 0x80) {
2043 if (swash_fetch(PL_utf8_digit,(U8*)locinput))
2045 locinput += PL_utf8skip[nextchr];
2046 nextchr = UCHARAT(locinput);
2049 if (isDIGIT(nextchr))
2051 nextchr = UCHARAT(++locinput);
2054 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
2056 locinput += PL_utf8skip[nextchr];
2057 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
2058 locinput += UTF8SKIP(locinput);
2059 if (locinput > PL_regeol)
2061 nextchr = UCHARAT(locinput);
2064 PL_reg_flags |= RF_tainted;
2068 n = ARG(scan); /* which paren pair */
2069 ln = PL_regstartp[n];
2070 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
2071 if (*PL_reglastparen < n || ln == -1)
2072 sayNO; /* Do not match unless seen CLOSEn. */
2073 if (ln == PL_regendp[n])
2077 if (UTF && OP(scan) != REF) { /* REF can do byte comparison */
2079 char *e = PL_bostr + PL_regendp[n];
2081 * Note that we can't do the "other character" lookup trick as
2082 * in the 8-bit case (no pun intended) because in Unicode we
2083 * have to map both upper and title case to lower case.
2085 if (OP(scan) == REFF) {
2089 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
2099 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
2106 nextchr = UCHARAT(locinput);
2110 /* Inline the first character, for speed. */
2111 if (UCHARAT(s) != nextchr &&
2113 (UCHARAT(s) != ((OP(scan) == REFF
2114 ? PL_fold : PL_fold_locale)[nextchr]))))
2116 ln = PL_regendp[n] - ln;
2117 if (locinput + ln > PL_regeol)
2119 if (ln > 1 && (OP(scan) == REF
2120 ? memNE(s, locinput, ln)
2122 ? ibcmp(s, locinput, ln)
2123 : ibcmp_locale(s, locinput, ln))))
2126 nextchr = UCHARAT(locinput);
2137 OP_4tree *oop = PL_op;
2138 COP *ocurcop = PL_curcop;
2139 SV **ocurpad = PL_curpad;
2143 PL_op = (OP_4tree*)PL_regdata->data[n];
2144 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%x\n", PL_op) );
2145 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
2146 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
2148 CALLRUNOPS(aTHX); /* Scalar context. */
2154 PL_curpad = ocurpad;
2155 PL_curcop = ocurcop;
2157 if (logical == 2) { /* Postponed subexpression. */
2159 MAGIC *mg = Null(MAGIC*);
2161 CHECKPOINT cp, lastcp;
2163 if(SvROK(ret) || SvRMAGICAL(ret)) {
2164 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2167 mg = mg_find(sv, 'r');
2170 re = (regexp *)mg->mg_obj;
2171 (void)ReREFCNT_inc(re);
2175 char *t = SvPV(ret, len);
2177 char *oprecomp = PL_regprecomp;
2178 I32 osize = PL_regsize;
2179 I32 onpar = PL_regnpar;
2182 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
2184 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
2185 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
2186 PL_regprecomp = oprecomp;
2191 PerlIO_printf(Perl_debug_log,
2192 "Entering embedded `%s%.60s%s%s'\n",
2196 (strlen(re->precomp) > 60 ? "..." : ""))
2199 state.prev = PL_reg_call_cc;
2200 state.cc = PL_regcc;
2201 state.re = PL_reg_re;
2205 cp = regcppush(0); /* Save *all* the positions. */
2208 state.ss = PL_savestack_ix;
2209 *PL_reglastparen = 0;
2210 PL_reg_call_cc = &state;
2211 PL_reginput = locinput;
2213 /* XXXX This is too dramatic a measure... */
2216 if (regmatch(re->program + 1)) {
2217 /* Even though we succeeded, we need to restore
2218 global variables, since we may be wrapped inside
2219 SUSPEND, thus the match may be not finished yet. */
2221 /* XXXX Do this only if SUSPENDed? */
2222 PL_reg_call_cc = state.prev;
2223 PL_regcc = state.cc;
2224 PL_reg_re = state.re;
2225 cache_re(PL_reg_re);
2227 /* XXXX This is too dramatic a measure... */
2230 /* These are needed even if not SUSPEND. */
2238 PL_reg_call_cc = state.prev;
2239 PL_regcc = state.cc;
2240 PL_reg_re = state.re;
2241 cache_re(PL_reg_re);
2243 /* XXXX This is too dramatic a measure... */
2252 sv_setsv(save_scalar(PL_replgv), ret);
2256 n = ARG(scan); /* which paren pair */
2257 PL_reg_start_tmp[n] = locinput;
2262 n = ARG(scan); /* which paren pair */
2263 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2264 PL_regendp[n] = locinput - PL_bostr;
2265 if (n > *PL_reglastparen)
2266 *PL_reglastparen = n;
2269 n = ARG(scan); /* which paren pair */
2270 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
2273 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
2275 next = NEXTOPER(NEXTOPER(scan));
2277 next = scan + ARG(scan);
2278 if (OP(next) == IFTHEN) /* Fake one. */
2279 next = NEXTOPER(NEXTOPER(next));
2283 logical = scan->flags;
2285 /*******************************************************************
2286 PL_regcc contains infoblock about the innermost (...)* loop, and
2287 a pointer to the next outer infoblock.
2289 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2291 1) After matching X, regnode for CURLYX is processed;
2293 2) This regnode creates infoblock on the stack, and calls
2294 regmatch() recursively with the starting point at WHILEM node;
2296 3) Each hit of WHILEM node tries to match A and Z (in the order
2297 depending on the current iteration, min/max of {min,max} and
2298 greediness). The information about where are nodes for "A"
2299 and "Z" is read from the infoblock, as is info on how many times "A"
2300 was already matched, and greediness.
2302 4) After A matches, the same WHILEM node is hit again.
2304 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2305 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2306 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2307 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2308 of the external loop.
2310 Currently present infoblocks form a tree with a stem formed by PL_curcc
2311 and whatever it mentions via ->next, and additional attached trees
2312 corresponding to temporarily unset infoblocks as in "5" above.
2314 In the following picture infoblocks for outer loop of
2315 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2316 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2317 infoblocks are drawn below the "reset" infoblock.
2319 In fact in the picture below we do not show failed matches for Z and T
2320 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2321 more obvious *why* one needs to *temporary* unset infoblocks.]
2323 Matched REx position InfoBlocks Comment
2327 Y A)*?Z)*?T x <- O <- I
2328 YA )*?Z)*?T x <- O <- I
2329 YA A)*?Z)*?T x <- O <- I
2330 YAA )*?Z)*?T x <- O <- I
2331 YAA Z)*?T x <- O # Temporary unset I
2334 YAAZ Y(A)*?Z)*?T x <- O
2337 YAAZY (A)*?Z)*?T x <- O
2340 YAAZY A)*?Z)*?T x <- O <- I
2343 YAAZYA )*?Z)*?T x <- O <- I
2346 YAAZYA Z)*?T x <- O # Temporary unset I
2352 YAAZYAZ T x # Temporary unset O
2359 *******************************************************************/
2362 CHECKPOINT cp = PL_savestack_ix;
2364 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2366 cc.oldcc = PL_regcc;
2368 cc.parenfloor = *PL_reglastparen;
2370 cc.min = ARG1(scan);
2371 cc.max = ARG2(scan);
2372 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
2376 PL_reginput = locinput;
2377 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2379 PL_regcc = cc.oldcc;
2385 * This is really hard to understand, because after we match
2386 * what we're trying to match, we must make sure the rest of
2387 * the REx is going to match for sure, and to do that we have
2388 * to go back UP the parse tree by recursing ever deeper. And
2389 * if it fails, we have to reset our parent's current state
2390 * that we can try again after backing off.
2393 CHECKPOINT cp, lastcp;
2394 CURCUR* cc = PL_regcc;
2395 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2397 n = cc->cur + 1; /* how many we know we matched */
2398 PL_reginput = locinput;
2401 PerlIO_printf(Perl_debug_log,
2402 "%*s %ld out of %ld..%ld cc=%lx\n",
2403 REPORT_CODE_OFF+PL_regindent*2, "",
2404 (long)n, (long)cc->min,
2405 (long)cc->max, (long)cc)
2408 /* If degenerate scan matches "", assume scan done. */
2410 if (locinput == cc->lastloc && n >= cc->min) {
2411 PL_regcc = cc->oldcc;
2415 PerlIO_printf(Perl_debug_log,
2416 "%*s empty match detected, try continuation...\n",
2417 REPORT_CODE_OFF+PL_regindent*2, "")
2419 if (regmatch(cc->next))
2427 /* First just match a string of min scans. */
2431 cc->lastloc = locinput;
2432 if (regmatch(cc->scan))
2435 cc->lastloc = lastloc;
2440 /* Check whether we already were at this position.
2441 Postpone detection until we know the match is not
2442 *that* much linear. */
2443 if (!PL_reg_maxiter) {
2444 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2445 PL_reg_leftiter = PL_reg_maxiter;
2447 if (PL_reg_leftiter-- == 0) {
2448 I32 size = (PL_reg_maxiter + 7)/8;
2449 if (PL_reg_poscache) {
2450 if (PL_reg_poscache_size < size) {
2451 Renew(PL_reg_poscache, size, char);
2452 PL_reg_poscache_size = size;
2454 Zero(PL_reg_poscache, size, char);
2457 PL_reg_poscache_size = size;
2458 Newz(29, PL_reg_poscache, size, char);
2461 PerlIO_printf(Perl_debug_log,
2462 "%sDetected a super-linear match, switching on caching%s...\n",
2463 PL_colors[4], PL_colors[5])
2466 if (PL_reg_leftiter < 0) {
2467 I32 o = locinput - PL_bostr, b;
2469 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2472 if (PL_reg_poscache[o] & (1<<b)) {
2474 PerlIO_printf(Perl_debug_log,
2475 "%*s already tried at this position...\n",
2476 REPORT_CODE_OFF+PL_regindent*2, "")
2480 PL_reg_poscache[o] |= (1<<b);
2484 /* Prefer next over scan for minimal matching. */
2487 PL_regcc = cc->oldcc;
2490 cp = regcppush(cc->parenfloor);
2492 if (regmatch(cc->next)) {
2494 sayYES; /* All done. */
2502 if (n >= cc->max) { /* Maximum greed exceeded? */
2503 if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY
2504 && !(PL_reg_flags & RF_warned)) {
2505 PL_reg_flags |= RF_warned;
2506 Perl_warner(aTHX_ WARN_UNSAFE, "%s limit (%d) exceeded",
2507 "Complex regular subexpression recursion",
2514 PerlIO_printf(Perl_debug_log,
2515 "%*s trying longer...\n",
2516 REPORT_CODE_OFF+PL_regindent*2, "")
2518 /* Try scanning more and see if it helps. */
2519 PL_reginput = locinput;
2521 cc->lastloc = locinput;
2522 cp = regcppush(cc->parenfloor);
2524 if (regmatch(cc->scan)) {
2531 cc->lastloc = lastloc;
2535 /* Prefer scan over next for maximal matching. */
2537 if (n < cc->max) { /* More greed allowed? */
2538 cp = regcppush(cc->parenfloor);
2540 cc->lastloc = locinput;
2542 if (regmatch(cc->scan)) {
2547 regcppop(); /* Restore some previous $<digit>s? */
2548 PL_reginput = locinput;
2550 PerlIO_printf(Perl_debug_log,
2551 "%*s failed, try continuation...\n",
2552 REPORT_CODE_OFF+PL_regindent*2, "")
2555 if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY
2556 && !(PL_reg_flags & RF_warned)) {
2557 PL_reg_flags |= RF_warned;
2558 Perl_warner(aTHX_ WARN_UNSAFE, "%s limit (%d) exceeded",
2559 "Complex regular subexpression recursion",
2563 /* Failed deeper matches of scan, so see if this one works. */
2564 PL_regcc = cc->oldcc;
2567 if (regmatch(cc->next))
2573 cc->lastloc = lastloc;
2578 next = scan + ARG(scan);
2581 inner = NEXTOPER(NEXTOPER(scan));
2584 inner = NEXTOPER(scan);
2589 if (OP(next) != c1) /* No choice. */
2590 next = inner; /* Avoid recursion. */
2592 int lastparen = *PL_reglastparen;
2596 PL_reginput = locinput;
2597 if (regmatch(inner))
2600 for (n = *PL_reglastparen; n > lastparen; n--)
2602 *PL_reglastparen = n;
2605 if (n = (c1 == BRANCH ? NEXT_OFF(next) : ARG(next)))
2609 inner = NEXTOPER(scan);
2610 if (c1 == BRANCHJ) {
2611 inner = NEXTOPER(inner);
2613 } while (scan != NULL && OP(scan) == c1);
2627 /* We suppose that the next guy does not need
2628 backtracking: in particular, it is of constant length,
2629 and has no parenths to influence future backrefs. */
2630 ln = ARG1(scan); /* min to match */
2631 n = ARG2(scan); /* max to match */
2632 paren = scan->flags;
2634 if (paren > PL_regsize)
2636 if (paren > *PL_reglastparen)
2637 *PL_reglastparen = paren;
2639 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
2641 scan += NEXT_OFF(scan); /* Skip former OPEN. */
2642 PL_reginput = locinput;
2645 if (ln && regrepeat_hard(scan, ln, &l) < ln)
2647 if (ln && l == 0 && n >= ln
2648 /* In fact, this is tricky. If paren, then the
2649 fact that we did/didnot match may influence
2650 future execution. */
2651 && !(paren && ln == 0))
2653 locinput = PL_reginput;
2654 if (PL_regkind[(U8)OP(next)] == EXACT) {
2655 c1 = (U8)*STRING(next);
2656 if (OP(next) == EXACTF)
2658 else if (OP(next) == EXACTFL)
2659 c2 = PL_fold_locale[c1];
2666 /* This may be improved if l == 0. */
2667 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
2668 /* If it could work, try it. */
2670 UCHARAT(PL_reginput) == c1 ||
2671 UCHARAT(PL_reginput) == c2)
2675 PL_regstartp[paren] =
2676 HOPc(PL_reginput, -l) - PL_bostr;
2677 PL_regendp[paren] = PL_reginput - PL_bostr;
2680 PL_regendp[paren] = -1;
2686 /* Couldn't or didn't -- move forward. */
2687 PL_reginput = locinput;
2688 if (regrepeat_hard(scan, 1, &l)) {
2690 locinput = PL_reginput;
2697 n = regrepeat_hard(scan, n, &l);
2698 if (n != 0 && l == 0
2699 /* In fact, this is tricky. If paren, then the
2700 fact that we did/didnot match may influence
2701 future execution. */
2702 && !(paren && ln == 0))
2704 locinput = PL_reginput;
2706 PerlIO_printf(Perl_debug_log,
2707 "%*s matched %ld times, len=%ld...\n",
2708 REPORT_CODE_OFF+PL_regindent*2, "", n, l)
2711 if (PL_regkind[(U8)OP(next)] == EXACT) {
2712 c1 = (U8)*STRING(next);
2713 if (OP(next) == EXACTF)
2715 else if (OP(next) == EXACTFL)
2716 c2 = PL_fold_locale[c1];
2725 /* If it could work, try it. */
2727 UCHARAT(PL_reginput) == c1 ||
2728 UCHARAT(PL_reginput) == c2)
2731 PerlIO_printf(Perl_debug_log,
2732 "%*s trying tail with n=%ld...\n",
2733 REPORT_CODE_OFF+PL_regindent*2, "", n)
2737 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
2738 PL_regendp[paren] = PL_reginput - PL_bostr;
2741 PL_regendp[paren] = -1;
2747 /* Couldn't or didn't -- back up. */
2749 locinput = HOPc(locinput, -l);
2750 PL_reginput = locinput;
2757 paren = scan->flags; /* Which paren to set */
2758 if (paren > PL_regsize)
2760 if (paren > *PL_reglastparen)
2761 *PL_reglastparen = paren;
2762 ln = ARG1(scan); /* min to match */
2763 n = ARG2(scan); /* max to match */
2764 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
2768 ln = ARG1(scan); /* min to match */
2769 n = ARG2(scan); /* max to match */
2770 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
2775 scan = NEXTOPER(scan);
2781 scan = NEXTOPER(scan);
2785 * Lookahead to avoid useless match attempts
2786 * when we know what character comes next.
2788 if (PL_regkind[(U8)OP(next)] == EXACT) {
2789 c1 = (U8)*STRING(next);
2790 if (OP(next) == EXACTF)
2792 else if (OP(next) == EXACTFL)
2793 c2 = PL_fold_locale[c1];
2799 PL_reginput = locinput;
2803 if (ln && regrepeat(scan, ln) < ln)
2805 locinput = PL_reginput;
2808 char *e = locinput + n - ln; /* Should not check after this */
2809 char *old = locinput;
2811 if (e >= PL_regeol || (n == REG_INFTY))
2814 /* Find place 'next' could work */
2816 while (locinput <= e && *locinput != c1)
2819 while (locinput <= e
2826 /* PL_reginput == old now */
2827 if (locinput != old) {
2828 ln = 1; /* Did some */
2829 if (regrepeat(scan, locinput - old) <
2833 /* PL_reginput == locinput now */
2836 PL_regstartp[paren] = HOPc(locinput, -1) - PL_bostr;
2837 PL_regendp[paren] = locinput - PL_bostr;
2840 PL_regendp[paren] = -1;
2844 PL_reginput = locinput; /* Could be reset... */
2846 /* Couldn't or didn't -- move forward. */
2851 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
2852 /* If it could work, try it. */
2854 UCHARAT(PL_reginput) == c1 ||
2855 UCHARAT(PL_reginput) == c2)
2859 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
2860 PL_regendp[paren] = PL_reginput - PL_bostr;
2863 PL_regendp[paren] = -1;
2869 /* Couldn't or didn't -- move forward. */
2870 PL_reginput = locinput;
2871 if (regrepeat(scan, 1)) {
2873 locinput = PL_reginput;
2881 n = regrepeat(scan, n);
2882 locinput = PL_reginput;
2883 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
2884 (!PL_multiline || OP(next) == SEOL))
2885 ln = n; /* why back off? */
2889 /* If it could work, try it. */
2891 UCHARAT(PL_reginput) == c1 ||
2892 UCHARAT(PL_reginput) == c2)
2896 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
2897 PL_regendp[paren] = PL_reginput - PL_bostr;
2900 PL_regendp[paren] = -1;
2906 /* Couldn't or didn't -- back up. */
2908 PL_reginput = locinput = HOPc(locinput, -1);
2913 /* If it could work, try it. */
2915 UCHARAT(PL_reginput) == c1 ||
2916 UCHARAT(PL_reginput) == c2)
2922 /* Couldn't or didn't -- back up. */
2924 PL_reginput = locinput = HOPc(locinput, -1);
2931 if (PL_reg_call_cc) {
2932 re_cc_state *cur_call_cc = PL_reg_call_cc;
2933 CURCUR *cctmp = PL_regcc;
2934 regexp *re = PL_reg_re;
2935 CHECKPOINT cp, lastcp;
2937 cp = regcppush(0); /* Save *all* the positions. */
2939 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
2941 PL_reginput = locinput; /* Make position available to
2943 cache_re(PL_reg_call_cc->re);
2944 PL_regcc = PL_reg_call_cc->cc;
2945 PL_reg_call_cc = PL_reg_call_cc->prev;
2946 if (regmatch(cur_call_cc->node)) {
2947 PL_reg_call_cc = cur_call_cc;
2953 PL_reg_call_cc = cur_call_cc;
2959 PerlIO_printf(Perl_debug_log,
2960 "%*s continuation failed...\n",
2961 REPORT_CODE_OFF+PL_regindent*2, "")
2965 if (locinput < PL_regtill) {
2966 DEBUG_r(PerlIO_printf(Perl_debug_log,
2967 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
2969 (long)(locinput - PL_reg_starttry),
2970 (long)(PL_regtill - PL_reg_starttry),
2972 sayNO_FINAL; /* Cannot match: too short. */
2974 PL_reginput = locinput; /* put where regtry can find it */
2975 sayYES_FINAL; /* Success! */
2977 PL_reginput = locinput; /* put where regtry can find it */
2978 sayYES_LOUD; /* Success! */
2981 PL_reginput = locinput;
2986 if (UTF) { /* XXXX This is absolutely
2987 broken, we read before
2989 s = HOPMAYBEc(locinput, -scan->flags);
2995 if (locinput < PL_bostr + scan->flags)
2997 PL_reginput = locinput - scan->flags;
3002 PL_reginput = locinput;
3007 if (UTF) { /* XXXX This is absolutely
3008 broken, we read before
3010 s = HOPMAYBEc(locinput, -scan->flags);
3011 if (!s || s < PL_bostr)
3016 if (locinput < PL_bostr + scan->flags)
3018 PL_reginput = locinput - scan->flags;
3023 PL_reginput = locinput;
3026 inner = NEXTOPER(NEXTOPER(scan));
3027 if (regmatch(inner) != n) {
3042 if (OP(scan) == SUSPEND) {
3043 locinput = PL_reginput;
3044 nextchr = UCHARAT(locinput);
3049 next = scan + ARG(scan);
3054 PerlIO_printf(Perl_error_log, "%lx %d\n",
3055 (unsigned long)scan, OP(scan));
3056 Perl_croak(aTHX_ "regexp memory corruption");
3062 * We get here only if there's trouble -- normally "case END" is
3063 * the terminating point.
3065 Perl_croak(aTHX_ "corrupted regexp pointers");
3071 PerlIO_printf(Perl_debug_log,
3072 "%*s %scould match...%s\n",
3073 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3077 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3078 PL_colors[4],PL_colors[5]));
3087 PerlIO_printf(Perl_debug_log,
3088 "%*s %sfailed...%s\n",
3089 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3101 - regrepeat - repeatedly match something simple, report how many
3104 * [This routine now assumes that it will only match on things of length 1.
3105 * That was true before, but now we assume scan - reginput is the count,
3106 * rather than incrementing count on every character. [Er, except utf8.]]
3109 S_regrepeat(pTHX_ regnode *p, I32 max)
3112 register char *scan;
3113 register char *opnd;
3115 register char *loceol = PL_regeol;
3116 register I32 hardcount = 0;
3119 if (max != REG_INFTY && max < loceol - scan)
3120 loceol = scan + max;
3123 while (scan < loceol && *scan != '\n')
3131 while (scan < loceol && *scan != '\n') {
3132 scan += UTF8SKIP(scan);
3138 while (scan < loceol) {
3139 scan += UTF8SKIP(scan);
3143 case EXACT: /* length of string is 1 */
3145 while (scan < loceol && UCHARAT(scan) == c)
3148 case EXACTF: /* length of string is 1 */
3150 while (scan < loceol &&
3151 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
3154 case EXACTFL: /* length of string is 1 */
3155 PL_reg_flags |= RF_tainted;
3157 while (scan < loceol &&
3158 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
3163 while (scan < loceol && REGINCLASSUTF8(p, (U8*)scan)) {
3164 scan += UTF8SKIP(scan);
3170 while (scan < loceol && REGINCLASS(opnd, *scan))
3174 while (scan < loceol && isALNUM(*scan))
3179 while (scan < loceol && swash_fetch(PL_utf8_alnum, (U8*)scan)) {
3180 scan += UTF8SKIP(scan);
3185 PL_reg_flags |= RF_tainted;
3186 while (scan < loceol && isALNUM_LC(*scan))
3190 PL_reg_flags |= RF_tainted;
3192 while (scan < loceol && isALNUM_LC_utf8((U8*)scan)) {
3193 scan += UTF8SKIP(scan);
3199 while (scan < loceol && !isALNUM(*scan))
3204 while (scan < loceol && !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
3205 scan += UTF8SKIP(scan);
3210 PL_reg_flags |= RF_tainted;
3211 while (scan < loceol && !isALNUM_LC(*scan))
3215 PL_reg_flags |= RF_tainted;
3217 while (scan < loceol && !isALNUM_LC_utf8((U8*)scan)) {
3218 scan += UTF8SKIP(scan);
3223 while (scan < loceol && isSPACE(*scan))
3228 while (scan < loceol && (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
3229 scan += UTF8SKIP(scan);
3234 PL_reg_flags |= RF_tainted;
3235 while (scan < loceol && isSPACE_LC(*scan))
3239 PL_reg_flags |= RF_tainted;
3241 while (scan < loceol && (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3242 scan += UTF8SKIP(scan);
3247 while (scan < loceol && !isSPACE(*scan))
3252 while (scan < loceol && !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
3253 scan += UTF8SKIP(scan);
3258 PL_reg_flags |= RF_tainted;
3259 while (scan < loceol && !isSPACE_LC(*scan))
3263 PL_reg_flags |= RF_tainted;
3265 while (scan < loceol && !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3266 scan += UTF8SKIP(scan);
3271 while (scan < loceol && isDIGIT(*scan))
3276 while (scan < loceol && swash_fetch(PL_utf8_digit,(U8*)scan)) {
3277 scan += UTF8SKIP(scan);
3283 while (scan < loceol && !isDIGIT(*scan))
3288 while (scan < loceol && !swash_fetch(PL_utf8_digit,(U8*)scan)) {
3289 scan += UTF8SKIP(scan);
3293 default: /* Called on something of 0 width. */
3294 break; /* So match right here or not at all. */
3300 c = scan - PL_reginput;
3305 SV *prop = sv_newmortal();
3308 PerlIO_printf(Perl_debug_log,
3309 "%*s %s can match %ld times out of %ld...\n",
3310 REPORT_CODE_OFF+1, "", SvPVX(prop),c,max);
3317 - regrepeat_hard - repeatedly match something, report total lenth and length
3319 * The repeater is supposed to have constant length.
3323 S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
3326 register char *scan;
3327 register char *start;
3328 register char *loceol = PL_regeol;
3330 I32 count = 0, res = 1;
3335 start = PL_reginput;
3337 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
3340 while (start < PL_reginput) {
3342 start += UTF8SKIP(start);
3353 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
3355 *lp = l = PL_reginput - start;
3356 if (max != REG_INFTY && l*max < loceol - scan)
3357 loceol = scan + l*max;
3370 - reginclass - determine if a character falls into a character class
3374 S_reginclass(pTHX_ register char *p, register I32 c)
3377 char flags = ANYOF_FLAGS(p);
3381 if (ANYOF_BITMAP_TEST(p, c))
3383 else if (flags & ANYOF_FOLD) {
3385 if (flags & ANYOF_LOCALE) {
3386 PL_reg_flags |= RF_tainted;
3387 cf = PL_fold_locale[c];
3391 if (ANYOF_BITMAP_TEST(p, cf))
3395 if (!match && (flags & ANYOF_CLASS)) {
3396 PL_reg_flags |= RF_tainted;
3398 (ANYOF_CLASS_TEST(p, ANYOF_ALNUM) && isALNUM_LC(c)) ||
3399 (ANYOF_CLASS_TEST(p, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
3400 (ANYOF_CLASS_TEST(p, ANYOF_SPACE) && isSPACE_LC(c)) ||
3401 (ANYOF_CLASS_TEST(p, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
3402 (ANYOF_CLASS_TEST(p, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
3403 (ANYOF_CLASS_TEST(p, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
3404 (ANYOF_CLASS_TEST(p, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
3405 (ANYOF_CLASS_TEST(p, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
3406 (ANYOF_CLASS_TEST(p, ANYOF_ALPHA) && isALPHA_LC(c)) ||
3407 (ANYOF_CLASS_TEST(p, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
3408 (ANYOF_CLASS_TEST(p, ANYOF_ASCII) && isASCII(c)) ||
3409 (ANYOF_CLASS_TEST(p, ANYOF_NASCII) && !isASCII(c)) ||
3410 (ANYOF_CLASS_TEST(p, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
3411 (ANYOF_CLASS_TEST(p, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
3412 (ANYOF_CLASS_TEST(p, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
3413 (ANYOF_CLASS_TEST(p, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
3414 (ANYOF_CLASS_TEST(p, ANYOF_LOWER) && isLOWER_LC(c)) ||
3415 (ANYOF_CLASS_TEST(p, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
3416 (ANYOF_CLASS_TEST(p, ANYOF_PRINT) && isPRINT_LC(c)) ||
3417 (ANYOF_CLASS_TEST(p, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
3418 (ANYOF_CLASS_TEST(p, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
3419 (ANYOF_CLASS_TEST(p, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
3420 (ANYOF_CLASS_TEST(p, ANYOF_UPPER) && isUPPER_LC(c)) ||
3421 (ANYOF_CLASS_TEST(p, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
3422 (ANYOF_CLASS_TEST(p, ANYOF_XDIGIT) && isXDIGIT(c)) ||
3423 (ANYOF_CLASS_TEST(p, ANYOF_NXDIGIT) && !isXDIGIT(c))
3424 ) /* How's that for a conditional? */
3430 return (flags & ANYOF_INVERT) ? !match : match;
3434 S_reginclassutf8(pTHX_ regnode *f, U8 *p)
3437 char flags = ARG1(f);
3439 SV *sv = (SV*)PL_regdata->data[ARG2(f)];
3441 if (swash_fetch(sv, p))
3443 else if (flags & ANYOF_FOLD) {
3446 if (flags & ANYOF_LOCALE) {
3447 PL_reg_flags |= RF_tainted;
3448 uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
3451 uv_to_utf8(tmpbuf, toLOWER_utf8(p));
3452 if (swash_fetch(sv, tmpbuf))
3456 /* UTF8 combined with ANYOF_CLASS is ill-defined. */
3458 return (flags & ANYOF_INVERT) ? !match : match;
3462 S_reghop(pTHX_ U8 *s, I32 off)
3466 while (off-- && s < (U8*)PL_regeol)
3471 if (s > (U8*)PL_bostr) {
3474 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3476 } /* XXX could check well-formedness here */
3484 S_reghopmaybe(pTHX_ U8* s, I32 off)
3488 while (off-- && s < (U8*)PL_regeol)
3495 if (s > (U8*)PL_bostr) {
3498 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3500 } /* XXX could check well-formedness here */
3517 restore_pos(pTHXo_ void *arg)
3520 if (PL_reg_eval_set) {
3521 if (PL_reg_oldsaved) {
3522 PL_reg_re->subbeg = PL_reg_oldsaved;
3523 PL_reg_re->sublen = PL_reg_oldsavedlen;
3524 RX_MATCH_COPIED_on(PL_reg_re);
3526 PL_reg_magic->mg_len = PL_reg_oldpos;
3527 PL_reg_eval_set = 0;
3528 PL_curpm = PL_reg_oldcurpm;