5 * "One Ring to rule them all, One Ring to find them..."
8 /* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
12 /* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
17 /* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
22 #ifdef PERL_EXT_RE_BUILD
23 /* need to replace pregcomp et al, so enable that */
24 # ifndef PERL_IN_XSUB_RE
25 # define PERL_IN_XSUB_RE
27 /* need access to debugger hooks */
28 # if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
33 #ifdef PERL_IN_XSUB_RE
34 /* We *really* need to overwrite these symbols: */
35 # define Perl_regexec_flags my_regexec
36 # define Perl_regdump my_regdump
37 # define Perl_regprop my_regprop
38 # define Perl_re_intuit_start my_re_intuit_start
39 /* *These* symbols are masked to allow static link. */
40 # define Perl_pregexec my_pregexec
41 # define Perl_reginitcolors my_reginitcolors
43 # define PERL_NO_GET_CONTEXT
48 * pregcomp and pregexec -- regsub and regerror are not used in perl
50 * Copyright (c) 1986 by University of Toronto.
51 * Written by Henry Spencer. Not derived from licensed software.
53 * Permission is granted to anyone to use this software for any
54 * purpose on any computer system, and to redistribute it freely,
55 * subject to the following restrictions:
57 * 1. The author is not responsible for the consequences of use of
58 * this software, no matter how awful, even if they arise
61 * 2. The origin of this software must not be misrepresented, either
62 * by explicit claim or by omission.
64 * 3. Altered versions must be plainly marked as such, and must not
65 * be misrepresented as being the original software.
67 **** Alterations to Henry's code are...
69 **** Copyright (c) 1991-2000, Larry Wall
71 **** You may distribute under the terms of either the GNU General Public
72 **** License or the Artistic License, as specified in the README file.
74 * Beware that some of this code is subtly aware of the way operator
75 * precedence is structured in regular expressions. Serious changes in
76 * regular-expression syntax might require a total rethink.
79 #define PERL_IN_REGEXEC_C
82 #ifdef PERL_IN_XSUB_RE
83 # if defined(PERL_CAPI) || defined(PERL_OBJECT)
90 #define RF_tainted 1 /* tainted information used? */
91 #define RF_warned 2 /* warned about big count? */
92 #define RF_evaled 4 /* Did an EVAL with setting? */
93 #define RF_utf8 8 /* String contains multibyte chars? */
95 #define UTF (PL_reg_flags & RF_utf8)
97 #define RS_init 1 /* eval environment created */
98 #define RS_set 2 /* replsv value is set */
101 #define STATIC static
108 #define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c) : ANYOF_BITMAP_TEST(p,c))
109 #define REGINCLASSUTF8(f,p) (ARG1(f) ? reginclassutf8(f,p) : swash_fetch((SV*)PL_regdata->data[ARG2(f)],p))
111 #define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
112 #define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
114 #define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
115 #define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
116 #define HOP(pos,off) (UTF ? reghop((U8*)pos, off) : (U8*)(pos + off))
117 #define HOPMAYBE(pos,off) (UTF ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
118 #define HOPc(pos,off) ((char*)HOP(pos,off))
119 #define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
121 static void restore_pos(pTHXo_ void *arg);
125 S_regcppush(pTHX_ I32 parenfloor)
128 int retval = PL_savestack_ix;
129 int i = (PL_regsize - parenfloor) * 4;
133 for (p = PL_regsize; p > parenfloor; p--) {
134 SSPUSHINT(PL_regendp[p]);
135 SSPUSHINT(PL_regstartp[p]);
136 SSPUSHPTR(PL_reg_start_tmp[p]);
139 SSPUSHINT(PL_regsize);
140 SSPUSHINT(*PL_reglastparen);
141 SSPUSHPTR(PL_reginput);
143 SSPUSHINT(SAVEt_REGCONTEXT);
147 /* These are needed since we do not localize EVAL nodes: */
148 # define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
149 " Setting an EVAL scope, savestack=%"IVdf"\n", \
150 (IV)PL_savestack_ix)); cp = PL_savestack_ix
152 # define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
153 PerlIO_printf(Perl_debug_log, \
154 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
155 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
165 assert(i == SAVEt_REGCONTEXT);
167 input = (char *) SSPOPPTR;
168 *PL_reglastparen = SSPOPINT;
169 PL_regsize = SSPOPINT;
170 for (i -= 3; i > 0; i -= 4) {
171 paren = (U32)SSPOPINT;
172 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
173 PL_regstartp[paren] = SSPOPINT;
175 if (paren <= *PL_reglastparen)
176 PL_regendp[paren] = tmps;
178 PerlIO_printf(Perl_debug_log,
179 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
180 (UV)paren, (IV)PL_regstartp[paren],
181 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
182 (IV)PL_regendp[paren],
183 (paren > *PL_reglastparen ? "(no)" : ""));
187 if (*PL_reglastparen + 1 <= PL_regnpar) {
188 PerlIO_printf(Perl_debug_log,
189 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
190 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
194 /* It would seem that the similar code in regtry()
195 * already takes care of this, and in fact it is in
196 * a better location to since this code can #if 0-ed out
197 * but the code in regtry() is needed or otherwise tests
198 * requiring null fields (pat.t#187 and split.t#{13,14}
199 * (as of 7877) will fail. --jhi */
200 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
201 if (paren > PL_regsize)
202 PL_regstartp[paren] = -1;
203 PL_regendp[paren] = -1;
210 S_regcp_set_to(pTHX_ I32 ss)
213 I32 tmp = PL_savestack_ix;
215 PL_savestack_ix = ss;
217 PL_savestack_ix = tmp;
221 typedef struct re_cc_state
225 struct re_cc_state *prev;
230 #define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
232 #define TRYPAREN(paren, n, input) { \
235 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
236 PL_regendp[paren] = input - PL_bostr; \
239 PL_regendp[paren] = -1; \
241 if (regmatch(next)) \
244 PL_regendp[paren] = -1; \
249 * pregexec and friends
253 - pregexec - match a regexp against a string
256 Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
257 char *strbeg, I32 minend, SV *screamer, U32 nosave)
258 /* strend: pointer to null at end of string */
259 /* strbeg: real beginning of string */
260 /* minend: end of match must be >=minend after stringarg. */
261 /* nosave: For optimizations. */
264 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
265 nosave ? 0 : REXEC_COPY_STR);
269 S_cache_re(pTHX_ regexp *prog)
272 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
274 PL_regprogram = prog->program;
276 PL_regnpar = prog->nparens;
277 PL_regdata = prog->data;
282 * Need to implement the following flags for reg_anch:
284 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
286 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
287 * INTUIT_AUTORITATIVE_ML
288 * INTUIT_ONCE_NOML - Intuit can match in one location only.
291 * Another flag for this function: SECOND_TIME (so that float substrs
292 * with giant delta may be not rechecked).
295 /* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
297 /* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
298 Otherwise, only SvCUR(sv) is used to get strbeg. */
300 /* XXXX We assume that strpos is strbeg unless sv. */
302 /* XXXX Some places assume that there is a fixed substring.
303 An update may be needed if optimizer marks as "INTUITable"
304 RExen without fixed substrings. Similarly, it is assumed that
305 lengths of all the strings are no more than minlen, thus they
306 cannot come from lookahead.
307 (Or minlen should take into account lookahead.) */
309 /* A failure to find a constant substring means that there is no need to make
310 an expensive call to REx engine, thus we celebrate a failure. Similarly,
311 finding a substring too deep into the string means that less calls to
312 regtry() should be needed.
314 REx compiler's optimizer found 4 possible hints:
315 a) Anchored substring;
317 c) Whether we are anchored (beginning-of-line or \G);
318 d) First node (of those at offset 0) which may distingush positions;
319 We use a)b)d) and multiline-part of c), and try to find a position in the
320 string which does not contradict any of them.
323 /* Most of decisions we do here should have been done at compile time.
324 The nodes of the REx which we used for the search should have been
325 deleted from the finite automaton. */
328 Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
329 char *strend, U32 flags, re_scream_pos_data *data)
331 register I32 start_shift;
332 /* Should be nonnegative! */
333 register I32 end_shift;
340 register char *other_last = Nullch; /* other substr checked before this */
341 char *check_at; /* check substr found at this pos */
343 char *i_strpos = strpos;
346 DEBUG_r( if (!PL_colorset) reginitcolors() );
347 DEBUG_r(PerlIO_printf(Perl_debug_log,
348 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
349 PL_colors[4],PL_colors[5],PL_colors[0],
352 (strlen(prog->precomp) > 60 ? "..." : ""),
354 (int)(strend - strpos > 60 ? 60 : strend - strpos),
355 strpos, PL_colors[1],
356 (strend - strpos > 60 ? "..." : ""))
359 if (prog->minlen > strend - strpos) {
360 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
363 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
364 check = prog->check_substr;
365 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
366 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
367 || ( (prog->reganch & ROPT_ANCH_BOL)
368 && !PL_multiline ) ); /* Check after \n? */
371 if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
372 /* SvCUR is not set on references: SvRV and SvPVX overlap */
374 && (strpos != strbeg)) {
375 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
378 if (prog->check_offset_min == prog->check_offset_max) {
379 /* Substring at constant offset from beg-of-str... */
382 PL_regeol = strend; /* Used in HOP() */
383 s = HOPc(strpos, prog->check_offset_min);
385 slen = SvCUR(check); /* >= 1 */
387 if ( strend - s > slen || strend - s < slen - 1
388 || (strend - s == slen && strend[-1] != '\n')) {
389 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
392 /* Now should match s[0..slen-2] */
394 if (slen && (*SvPVX(check) != *s
396 && memNE(SvPVX(check), s, slen)))) {
398 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
402 else if (*SvPVX(check) != *s
403 || ((slen = SvCUR(check)) > 1
404 && memNE(SvPVX(check), s, slen)))
406 goto success_at_start;
409 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
411 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
412 end_shift = prog->minlen - start_shift -
413 CHR_SVLEN(check) + (SvTAIL(check) != 0);
415 I32 end = prog->check_offset_max + CHR_SVLEN(check)
416 - (SvTAIL(check) != 0);
417 I32 eshift = strend - s - end;
419 if (end_shift < eshift)
423 else { /* Can match at random position */
426 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
427 /* Should be nonnegative! */
428 end_shift = prog->minlen - start_shift -
429 CHR_SVLEN(check) + (SvTAIL(check) != 0);
432 #ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
434 Perl_croak(aTHX_ "panic: end_shift");
438 /* Find a possible match in the region s..strend by looking for
439 the "check" substring in the region corrected by start/end_shift. */
440 if (flags & REXEC_SCREAM) {
441 I32 p = -1; /* Internal iterator of scream. */
442 I32 *pp = data ? data->scream_pos : &p;
444 if (PL_screamfirst[BmRARE(check)] >= 0
445 || ( BmRARE(check) == '\n'
446 && (BmPREVIOUS(check) == SvCUR(check) - 1)
448 s = screaminstr(sv, check,
449 start_shift + (s - strbeg), end_shift, pp, 0);
453 *data->scream_olds = s;
456 s = fbm_instr((unsigned char*)s + start_shift,
457 (unsigned char*)strend - end_shift,
458 check, PL_multiline ? FBMrf_MULTILINE : 0);
460 /* Update the count-of-usability, remove useless subpatterns,
463 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
464 (s ? "Found" : "Did not find"),
465 ((check == prog->anchored_substr) ? "anchored" : "floating"),
467 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
469 PL_colors[1], (SvTAIL(check) ? "$" : ""),
470 (s ? " at offset " : "...\n") ) );
477 /* Finish the diagnostic message */
478 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
480 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
481 Start with the other substr.
482 XXXX no SCREAM optimization yet - and a very coarse implementation
483 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
484 *always* match. Probably should be marked during compile...
485 Probably it is right to do no SCREAM here...
488 if (prog->float_substr && prog->anchored_substr) {
489 /* Take into account the "other" substring. */
490 /* XXXX May be hopelessly wrong for UTF... */
493 if (check == prog->float_substr) {
496 char *last = s - start_shift, *last1, *last2;
500 t = s - prog->check_offset_max;
501 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
502 && (!(prog->reganch & ROPT_UTF8)
503 || (PL_bostr = strpos, /* Used in regcopmaybe() */
504 (t = reghopmaybe_c(s, -(prog->check_offset_max)))
509 t += prog->anchored_offset;
510 if (t < other_last) /* These positions already checked */
513 last2 = last1 = strend - prog->minlen;
516 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
517 /* On end-of-str: see comment below. */
518 s = fbm_instr((unsigned char*)t,
519 (unsigned char*)last1 + prog->anchored_offset
520 + SvCUR(prog->anchored_substr)
521 - (SvTAIL(prog->anchored_substr)!=0),
522 prog->anchored_substr, PL_multiline ? FBMrf_MULTILINE : 0);
523 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s anchored substr `%s%.*s%s'%s",
524 (s ? "Found" : "Contradicts"),
526 (int)(SvCUR(prog->anchored_substr)
527 - (SvTAIL(prog->anchored_substr)!=0)),
528 SvPVX(prog->anchored_substr),
529 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
531 if (last1 >= last2) {
532 DEBUG_r(PerlIO_printf(Perl_debug_log,
533 ", giving up...\n"));
536 DEBUG_r(PerlIO_printf(Perl_debug_log,
537 ", trying floating at offset %ld...\n",
538 (long)(s1 + 1 - i_strpos)));
539 PL_regeol = strend; /* Used in HOP() */
540 other_last = last1 + prog->anchored_offset + 1;
545 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
546 (long)(s - i_strpos)));
547 t = s - prog->anchored_offset;
556 else { /* Take into account the floating substring. */
561 last1 = last = strend - prog->minlen + prog->float_min_offset;
562 if (last - t > prog->float_max_offset)
563 last = t + prog->float_max_offset;
564 s = t + prog->float_min_offset;
567 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
568 /* fbm_instr() takes into account exact value of end-of-str
569 if the check is SvTAIL(ed). Since false positives are OK,
570 and end-of-str is not later than strend we are OK. */
571 s = fbm_instr((unsigned char*)s,
572 (unsigned char*)last + SvCUR(prog->float_substr)
573 - (SvTAIL(prog->float_substr)!=0),
574 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
575 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
576 (s ? "Found" : "Contradicts"),
578 (int)(SvCUR(prog->float_substr)
579 - (SvTAIL(prog->float_substr)!=0)),
580 SvPVX(prog->float_substr),
581 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
584 DEBUG_r(PerlIO_printf(Perl_debug_log,
585 ", giving up...\n"));
588 DEBUG_r(PerlIO_printf(Perl_debug_log,
589 ", trying anchored starting at offset %ld...\n",
590 (long)(s1 + 1 - i_strpos)));
591 other_last = last + 1;
592 PL_regeol = strend; /* Used in HOP() */
597 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
598 (long)(s - i_strpos)));
608 t = s - prog->check_offset_max;
610 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
611 && (!(prog->reganch & ROPT_UTF8)
612 || (PL_bostr = strpos, /* Used in regcopmaybe() */
613 ((t = reghopmaybe_c(s, -(prog->check_offset_max)))
616 /* Fixed substring is found far enough so that the match
617 cannot start at strpos. */
619 if (ml_anch && t[-1] != '\n') {
620 /* Eventually fbm_*() should handle this, but often
621 anchored_offset is not 0, so this check will not be wasted. */
622 /* XXXX In the code below we prefer to look for "^" even in
623 presence of anchored substrings. And we search even
624 beyond the found float position. These pessimizations
625 are historical artefacts only. */
627 while (t < strend - prog->minlen) {
629 if (t < check_at - prog->check_offset_min) {
630 if (prog->anchored_substr) {
631 /* Since we moved from the found position,
632 we definitely contradict the found anchored
633 substr. Due to the above check we do not
634 contradict "check" substr.
635 Thus we can arrive here only if check substr
636 is float. Redo checking for "other"=="fixed".
639 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
640 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
641 goto do_other_anchored;
643 /* We don't contradict the found floating substring. */
644 /* XXXX Why not check for STCLASS? */
646 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
647 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
650 /* Position contradicts check-string */
651 /* XXXX probably better to look for check-string
652 than for "\n", so one should lower the limit for t? */
653 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
654 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
655 other_last = strpos = s = t + 1;
660 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
661 PL_colors[0],PL_colors[1]));
665 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
666 PL_colors[0],PL_colors[1]));
670 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
674 /* The found string does not prohibit matching at strpos,
675 - no optimization of calling REx engine can be performed,
676 unless it was an MBOL and we are not after MBOL,
677 or a future STCLASS check will fail this. */
679 /* Even in this situation we may use MBOL flag if strpos is offset
680 wrt the start of the string. */
681 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
682 && (strpos != strbeg) && strpos[-1] != '\n'
683 /* May be due to an implicit anchor of m{.*foo} */
684 && !(prog->reganch & ROPT_IMPLICIT))
689 DEBUG_r( if (ml_anch)
690 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
691 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
694 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
695 && prog->check_substr /* Could be deleted already */
696 && --BmUSEFUL(prog->check_substr) < 0
697 && prog->check_substr == prog->float_substr)
699 /* If flags & SOMETHING - do not do it many times on the same match */
700 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
701 SvREFCNT_dec(prog->check_substr);
702 prog->check_substr = Nullsv; /* disable */
703 prog->float_substr = Nullsv; /* clear */
704 check = Nullsv; /* abort */
706 /* XXXX This is a remnant of the old implementation. It
707 looks wasteful, since now INTUIT can use many
709 prog->reganch &= ~RE_USE_INTUIT;
716 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
717 if (prog->regstclass) {
718 /* minlen == 0 is possible if regstclass is \b or \B,
719 and the fixed substr is ''$.
720 Since minlen is already taken into account, s+1 is before strend;
721 accidentally, minlen >= 1 guaranties no false positives at s + 1
722 even for \b or \B. But (minlen? 1 : 0) below assumes that
723 regstclass does not come from lookahead... */
724 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
725 This leaves EXACTF only, which is dealt with in find_byclass(). */
726 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
727 ? STR_LEN(prog->regstclass)
729 char *endpos = (prog->anchored_substr || ml_anch)
730 ? s + (prog->minlen? cl_l : 0)
731 : (prog->float_substr ? check_at - start_shift + cl_l
733 char *startpos = strbeg;
736 if (prog->reganch & ROPT_UTF8) {
737 PL_regdata = prog->data; /* Used by REGINCLASS UTF logic */
740 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
745 if (endpos == strend) {
746 DEBUG_r( PerlIO_printf(Perl_debug_log,
747 "Could not match STCLASS...\n") );
750 DEBUG_r( PerlIO_printf(Perl_debug_log,
751 "This position contradicts STCLASS...\n") );
752 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
754 /* Contradict one of substrings */
755 if (prog->anchored_substr) {
756 if (prog->anchored_substr == check) {
757 DEBUG_r( what = "anchored" );
759 PL_regeol = strend; /* Used in HOP() */
761 if (s + start_shift + end_shift > strend) {
762 /* XXXX Should be taken into account earlier? */
763 DEBUG_r( PerlIO_printf(Perl_debug_log,
764 "Could not match STCLASS...\n") );
769 DEBUG_r( PerlIO_printf(Perl_debug_log,
770 "Looking for %s substr starting at offset %ld...\n",
771 what, (long)(s + start_shift - i_strpos)) );
774 /* Have both, check_string is floating */
775 if (t + start_shift >= check_at) /* Contradicts floating=check */
776 goto retry_floating_check;
777 /* Recheck anchored substring, but not floating... */
781 DEBUG_r( PerlIO_printf(Perl_debug_log,
782 "Looking for anchored substr starting at offset %ld...\n",
783 (long)(other_last - i_strpos)) );
784 goto do_other_anchored;
786 /* Another way we could have checked stclass at the
787 current position only: */
792 DEBUG_r( PerlIO_printf(Perl_debug_log,
793 "Looking for /%s^%s/m starting at offset %ld...\n",
794 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
797 if (!prog->float_substr) /* Could have been deleted */
799 /* Check is floating subtring. */
800 retry_floating_check:
801 t = check_at - start_shift;
802 DEBUG_r( what = "floating" );
803 goto hop_and_restart;
806 PerlIO_printf(Perl_debug_log,
807 "By STCLASS: moving %ld --> %ld\n",
808 (long)(t - i_strpos), (long)(s - i_strpos));
810 PerlIO_printf(Perl_debug_log,
811 "Does not contradict STCLASS...\n") );
814 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
815 PL_colors[4], (check ? "Guessed" : "Giving up"),
816 PL_colors[5], (long)(s - i_strpos)) );
819 fail_finish: /* Substring not found */
820 if (prog->check_substr) /* could be removed already */
821 BmUSEFUL(prog->check_substr) += 5; /* hooray */
823 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
824 PL_colors[4],PL_colors[5]));
828 /* We know what class REx starts with. Try to find this position... */
830 S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
832 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
838 register I32 tmp = 1; /* Scratch variable? */
840 /* We know what class it must start with. */
844 if (REGINCLASSUTF8(c, (U8*)s)) {
845 if (tmp && (norun || regtry(prog, s)))
857 if (REGINCLASS(c, *(U8*)s)) {
858 if (tmp && (norun || regtry(prog, s)))
878 c2 = PL_fold_locale[c1];
883 e = s; /* Due to minlen logic of intuit() */
884 /* Here it is NOT UTF! */
888 && (ln == 1 || !(OP(c) == EXACTF
890 : ibcmp_locale(s, m, ln)))
891 && (norun || regtry(prog, s)) )
897 if ( (*(U8*)s == c1 || *(U8*)s == c2)
898 && (ln == 1 || !(OP(c) == EXACTF
900 : ibcmp_locale(s, m, ln)))
901 && (norun || regtry(prog, s)) )
908 PL_reg_flags |= RF_tainted;
911 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
912 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
914 if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
916 if ((norun || regtry(prog, s)))
921 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
925 PL_reg_flags |= RF_tainted;
931 U8 *r = reghop((U8*)s, -1);
933 tmp = (I32)utf8_to_uv(r, s - (char*)r, 0, 0);
935 tmp = ((OP(c) == BOUNDUTF8 ?
936 isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
938 if (tmp == !(OP(c) == BOUNDUTF8 ?
939 swash_fetch(PL_utf8_alnum, (U8*)s) :
940 isALNUM_LC_utf8((U8*)s)))
943 if ((norun || regtry(prog, s)))
948 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
952 PL_reg_flags |= RF_tainted;
955 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
956 tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
958 if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
960 else if ((norun || regtry(prog, s)))
964 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
968 PL_reg_flags |= RF_tainted;
974 U8 *r = reghop((U8*)s, -1);
976 tmp = (I32)utf8_to_uv(r, s - (char*)r, 0, 0);
978 tmp = ((OP(c) == NBOUNDUTF8 ?
979 isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
981 if (tmp == !(OP(c) == NBOUNDUTF8 ?
982 swash_fetch(PL_utf8_alnum, (U8*)s) :
983 isALNUM_LC_utf8((U8*)s)))
985 else if ((norun || regtry(prog, s)))
989 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
995 if (tmp && (norun || regtry(prog, s)))
1006 while (s < strend) {
1007 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
1008 if (tmp && (norun || regtry(prog, s)))
1019 PL_reg_flags |= RF_tainted;
1020 while (s < strend) {
1021 if (isALNUM_LC(*s)) {
1022 if (tmp && (norun || regtry(prog, s)))
1033 PL_reg_flags |= RF_tainted;
1034 while (s < strend) {
1035 if (isALNUM_LC_utf8((U8*)s)) {
1036 if (tmp && (norun || regtry(prog, s)))
1047 while (s < strend) {
1049 if (tmp && (norun || regtry(prog, s)))
1060 while (s < strend) {
1061 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
1062 if (tmp && (norun || regtry(prog, s)))
1073 PL_reg_flags |= RF_tainted;
1074 while (s < strend) {
1075 if (!isALNUM_LC(*s)) {
1076 if (tmp && (norun || regtry(prog, s)))
1087 PL_reg_flags |= RF_tainted;
1088 while (s < strend) {
1089 if (!isALNUM_LC_utf8((U8*)s)) {
1090 if (tmp && (norun || regtry(prog, s)))
1101 while (s < strend) {
1103 if (tmp && (norun || regtry(prog, s)))
1114 while (s < strend) {
1115 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
1116 if (tmp && (norun || regtry(prog, s)))
1127 PL_reg_flags |= RF_tainted;
1128 while (s < strend) {
1129 if (isSPACE_LC(*s)) {
1130 if (tmp && (norun || regtry(prog, s)))
1141 PL_reg_flags |= RF_tainted;
1142 while (s < strend) {
1143 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1144 if (tmp && (norun || regtry(prog, s)))
1155 while (s < strend) {
1157 if (tmp && (norun || regtry(prog, s)))
1168 while (s < strend) {
1169 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
1170 if (tmp && (norun || regtry(prog, s)))
1181 PL_reg_flags |= RF_tainted;
1182 while (s < strend) {
1183 if (!isSPACE_LC(*s)) {
1184 if (tmp && (norun || regtry(prog, s)))
1195 PL_reg_flags |= RF_tainted;
1196 while (s < strend) {
1197 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1198 if (tmp && (norun || regtry(prog, s)))
1209 while (s < strend) {
1211 if (tmp && (norun || regtry(prog, s)))
1222 while (s < strend) {
1223 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
1224 if (tmp && (norun || regtry(prog, s)))
1235 PL_reg_flags |= RF_tainted;
1236 while (s < strend) {
1237 if (isDIGIT_LC(*s)) {
1238 if (tmp && (norun || regtry(prog, s)))
1249 PL_reg_flags |= RF_tainted;
1250 while (s < strend) {
1251 if (isDIGIT_LC_utf8((U8*)s)) {
1252 if (tmp && (norun || regtry(prog, s)))
1263 while (s < strend) {
1265 if (tmp && (norun || regtry(prog, s)))
1276 while (s < strend) {
1277 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
1278 if (tmp && (norun || regtry(prog, s)))
1289 PL_reg_flags |= RF_tainted;
1290 while (s < strend) {
1291 if (!isDIGIT_LC(*s)) {
1292 if (tmp && (norun || regtry(prog, s)))
1303 PL_reg_flags |= RF_tainted;
1304 while (s < strend) {
1305 if (!isDIGIT_LC_utf8((U8*)s)) {
1306 if (tmp && (norun || regtry(prog, s)))
1317 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1326 - regexec_flags - match a regexp against a string
1329 Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1330 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1331 /* strend: pointer to null at end of string */
1332 /* strbeg: real beginning of string */
1333 /* minend: end of match must be >=minend after stringarg. */
1334 /* data: May be used for some additional optimizations. */
1335 /* nosave: For optimizations. */
1339 register regnode *c;
1340 register char *startpos = stringarg;
1341 I32 minlen; /* must match at least this many chars */
1342 I32 dontbother = 0; /* how many characters not to try at end */
1343 /* I32 start_shift = 0; */ /* Offset of the start to find
1344 constant substr. */ /* CC */
1345 I32 end_shift = 0; /* Same for the end. */ /* CC */
1346 I32 scream_pos = -1; /* Internal iterator of scream. */
1348 SV* oreplsv = GvSV(PL_replgv);
1354 PL_regnarrate = PL_debug & 512;
1357 /* Be paranoid... */
1358 if (prog == NULL || startpos == NULL) {
1359 Perl_croak(aTHX_ "NULL regexp parameter");
1363 minlen = prog->minlen;
1364 if (strend - startpos < minlen) goto phooey;
1366 if (startpos == strbeg) /* is ^ valid at stringarg? */
1369 PL_regprev = (U32)stringarg[-1];
1370 if (!PL_multiline && PL_regprev == '\n')
1371 PL_regprev = '\0'; /* force ^ to NOT match */
1374 /* Check validity of program. */
1375 if (UCHARAT(prog->program) != REG_MAGIC) {
1376 Perl_croak(aTHX_ "corrupted regexp program");
1380 PL_reg_eval_set = 0;
1383 if (prog->reganch & ROPT_UTF8)
1384 PL_reg_flags |= RF_utf8;
1386 /* Mark beginning of line for ^ and lookbehind. */
1387 PL_regbol = startpos;
1391 /* Mark end of line for $ (and such) */
1394 /* see how far we have to get to not match where we matched before */
1395 PL_regtill = startpos+minend;
1397 /* We start without call_cc context. */
1400 /* If there is a "must appear" string, look for it. */
1403 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1406 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1407 PL_reg_ganch = startpos;
1408 else if (sv && SvTYPE(sv) >= SVt_PVMG
1410 && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) {
1411 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1412 if (prog->reganch & ROPT_ANCH_GPOS) {
1413 if (s > PL_reg_ganch)
1418 else /* pos() not defined */
1419 PL_reg_ganch = strbeg;
1422 if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
1423 re_scream_pos_data d;
1425 d.scream_olds = &scream_olds;
1426 d.scream_pos = &scream_pos;
1427 s = re_intuit_start(prog, sv, s, strend, flags, &d);
1429 goto phooey; /* not present */
1432 DEBUG_r( if (!PL_colorset) reginitcolors() );
1433 DEBUG_r(PerlIO_printf(Perl_debug_log,
1434 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1435 PL_colors[4],PL_colors[5],PL_colors[0],
1438 (strlen(prog->precomp) > 60 ? "..." : ""),
1440 (int)(strend - startpos > 60 ? 60 : strend - startpos),
1441 startpos, PL_colors[1],
1442 (strend - startpos > 60 ? "..." : ""))
1445 /* Simplest case: anchored match need be tried only once. */
1446 /* [unless only anchor is BOL and multiline is set] */
1447 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1448 if (s == startpos && regtry(prog, startpos))
1450 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1451 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1456 dontbother = minlen - 1;
1457 end = HOPc(strend, -dontbother) - 1;
1458 /* for multiline we only have to try after newlines */
1459 if (prog->check_substr) {
1463 if (regtry(prog, s))
1468 if (prog->reganch & RE_USE_INTUIT) {
1469 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1480 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1481 if (regtry(prog, s))
1488 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1489 if (regtry(prog, PL_reg_ganch))
1494 /* Messy cases: unanchored match. */
1495 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
1496 /* we have /x+whatever/ */
1497 /* it must be a one character string (XXXX Except UTF?) */
1498 char ch = SvPVX(prog->anchored_substr)[0];
1504 while (s < strend) {
1506 DEBUG_r( did_match = 1 );
1507 if (regtry(prog, s)) goto got_it;
1509 while (s < strend && *s == ch)
1516 while (s < strend) {
1518 DEBUG_r( did_match = 1 );
1519 if (regtry(prog, s)) goto got_it;
1521 while (s < strend && *s == ch)
1527 DEBUG_r(did_match ||
1528 PerlIO_printf(Perl_debug_log,
1529 "Did not find anchored character...\n"));
1532 else if (prog->anchored_substr != Nullsv
1533 || (prog->float_substr != Nullsv
1534 && prog->float_max_offset < strend - s)) {
1535 SV *must = prog->anchored_substr
1536 ? prog->anchored_substr : prog->float_substr;
1538 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
1540 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
1541 char *last = HOPc(strend, /* Cannot start after this */
1542 -(I32)(CHR_SVLEN(must)
1543 - (SvTAIL(must) != 0) + back_min));
1544 char *last1; /* Last position checked before */
1550 last1 = HOPc(s, -1);
1552 last1 = s - 1; /* bogus */
1554 /* XXXX check_substr already used to find `s', can optimize if
1555 check_substr==must. */
1557 dontbother = end_shift;
1558 strend = HOPc(strend, -dontbother);
1559 while ( (s <= last) &&
1560 ((flags & REXEC_SCREAM)
1561 ? (s = screaminstr(sv, must, HOPc(s, back_min) - strbeg,
1562 end_shift, &scream_pos, 0))
1563 : (s = fbm_instr((unsigned char*)HOP(s, back_min),
1564 (unsigned char*)strend, must,
1565 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
1566 DEBUG_r( did_match = 1 );
1567 if (HOPc(s, -back_max) > last1) {
1568 last1 = HOPc(s, -back_min);
1569 s = HOPc(s, -back_max);
1572 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1574 last1 = HOPc(s, -back_min);
1578 while (s <= last1) {
1579 if (regtry(prog, s))
1585 while (s <= last1) {
1586 if (regtry(prog, s))
1592 DEBUG_r(did_match ||
1593 PerlIO_printf(Perl_debug_log, "Did not find %s substr `%s%.*s%s'%s...\n",
1594 ((must == prog->anchored_substr)
1595 ? "anchored" : "floating"),
1597 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1599 PL_colors[1], (SvTAIL(must) ? "$" : "")));
1602 else if ((c = prog->regstclass)) {
1603 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1604 /* don't bother with what can't match */
1605 strend = HOPc(strend, -(minlen - 1));
1606 if (find_byclass(prog, c, s, strend, startpos, 0))
1608 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
1612 if (prog->float_substr != Nullsv) { /* Trim the end. */
1615 if (flags & REXEC_SCREAM) {
1616 last = screaminstr(sv, prog->float_substr, s - strbeg,
1617 end_shift, &scream_pos, 1); /* last one */
1619 last = scream_olds; /* Only one occurence. */
1623 char *little = SvPV(prog->float_substr, len);
1625 if (SvTAIL(prog->float_substr)) {
1626 if (memEQ(strend - len + 1, little, len - 1))
1627 last = strend - len + 1;
1628 else if (!PL_multiline)
1629 last = memEQ(strend - len, little, len)
1630 ? strend - len : Nullch;
1636 last = rninstr(s, strend, little, little + len);
1638 last = strend; /* matching `$' */
1642 DEBUG_r(PerlIO_printf(Perl_debug_log,
1643 "%sCan't trim the tail, match fails (should not happen)%s\n",
1644 PL_colors[4],PL_colors[5]));
1645 goto phooey; /* Should not happen! */
1647 dontbother = strend - last + prog->float_min_offset;
1649 if (minlen && (dontbother < minlen))
1650 dontbother = minlen - 1;
1651 strend -= dontbother; /* this one's always in bytes! */
1652 /* We don't know much -- general case. */
1655 if (regtry(prog, s))
1664 if (regtry(prog, s))
1666 } while (s++ < strend);
1674 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1676 if (PL_reg_eval_set) {
1677 /* Preserve the current value of $^R */
1678 if (oreplsv != GvSV(PL_replgv))
1679 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1680 restored, the value remains
1682 restore_pos(aTHXo_ 0);
1685 /* make sure $`, $&, $', and $digit will work later */
1686 if ( !(flags & REXEC_NOT_FIRST) ) {
1687 if (RX_MATCH_COPIED(prog)) {
1688 Safefree(prog->subbeg);
1689 RX_MATCH_COPIED_off(prog);
1691 if (flags & REXEC_COPY_STR) {
1692 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1694 s = savepvn(strbeg, i);
1697 RX_MATCH_COPIED_on(prog);
1700 prog->subbeg = strbeg;
1701 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1708 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1709 PL_colors[4],PL_colors[5]));
1710 if (PL_reg_eval_set)
1711 restore_pos(aTHXo_ 0);
1716 - regtry - try match at specific point
1718 STATIC I32 /* 0 failure, 1 success */
1719 S_regtry(pTHX_ regexp *prog, char *startpos)
1728 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
1730 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1733 PL_reg_eval_set = RS_init;
1735 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1736 (IV)(PL_stack_sp - PL_stack_base));
1738 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
1739 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1740 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1742 /* Apparently this is not needed, judging by wantarray. */
1743 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
1744 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1747 /* Make $_ available to executed code. */
1748 if (PL_reg_sv != DEFSV) {
1749 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1754 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
1755 && (mg = mg_find(PL_reg_sv, 'g')))) {
1756 /* prepare for quick setting of pos */
1757 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
1758 mg = mg_find(PL_reg_sv, 'g');
1762 PL_reg_oldpos = mg->mg_len;
1763 SAVEDESTRUCTOR_X(restore_pos, 0);
1766 Newz(22,PL_reg_curpm, 1, PMOP);
1767 PL_reg_curpm->op_pmregexp = prog;
1768 PL_reg_oldcurpm = PL_curpm;
1769 PL_curpm = PL_reg_curpm;
1770 if (RX_MATCH_COPIED(prog)) {
1771 /* Here is a serious problem: we cannot rewrite subbeg,
1772 since it may be needed if this match fails. Thus
1773 $` inside (?{}) could fail... */
1774 PL_reg_oldsaved = prog->subbeg;
1775 PL_reg_oldsavedlen = prog->sublen;
1776 RX_MATCH_COPIED_off(prog);
1779 PL_reg_oldsaved = Nullch;
1780 prog->subbeg = PL_bostr;
1781 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1783 prog->startp[0] = startpos - PL_bostr;
1784 PL_reginput = startpos;
1785 PL_regstartp = prog->startp;
1786 PL_regendp = prog->endp;
1787 PL_reglastparen = &prog->lastparen;
1788 prog->lastparen = 0;
1790 DEBUG_r(PL_reg_starttry = startpos);
1791 if (PL_reg_start_tmpl <= prog->nparens) {
1792 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1793 if(PL_reg_start_tmp)
1794 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1796 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1799 /* XXXX What this code is doing here?!!! There should be no need
1800 to do this again and again, PL_reglastparen should take care of
1803 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
1804 * Actually, the code in regcppop() (which Ilya may be meaning by
1805 * PL_reglastparen), does not seem to be needed at all (?!), whereas
1806 * this code *is* needed for the above-mentioned tests to succeed.
1807 * The common theme on those tests seems to be returning null fields
1808 * from matches. --jhi */
1812 if (prog->nparens) {
1813 for (i = prog->nparens; i >= 1; i--) {
1820 if (regmatch(prog->program + 1)) {
1821 prog->endp[0] = PL_reginput - PL_bostr;
1824 REGCP_UNWIND(lastcp);
1828 #define RE_UNWIND_BRANCH 1
1829 #define RE_UNWIND_BRANCHJ 2
1833 typedef struct { /* XX: makes sense to enlarge it... */
1837 } re_unwind_generic_t;
1850 } re_unwind_branch_t;
1852 typedef union re_unwind_t {
1854 re_unwind_generic_t generic;
1855 re_unwind_branch_t branch;
1859 - regmatch - main matching routine
1861 * Conceptually the strategy is simple: check to see whether the current
1862 * node matches, call self recursively to see whether the rest matches,
1863 * and then act accordingly. In practice we make some effort to avoid
1864 * recursion, in particular by going through "ordinary" nodes (that don't
1865 * need to know whether the rest of the match failed) by a loop instead of
1868 /* [lwall] I've hoisted the register declarations to the outer block in order to
1869 * maybe save a little bit of pushing and popping on the stack. It also takes
1870 * advantage of machines that use a register save mask on subroutine entry.
1872 STATIC I32 /* 0 failure, 1 success */
1873 S_regmatch(pTHX_ regnode *prog)
1876 register regnode *scan; /* Current node. */
1877 regnode *next; /* Next node. */
1878 regnode *inner; /* Next node in internal branch. */
1879 register I32 nextchr; /* renamed nextchr - nextchar colides with
1880 function of same name */
1881 register I32 n; /* no or next */
1882 register I32 ln; /* len or last */
1883 register char *s; /* operand or save */
1884 register char *locinput = PL_reginput;
1885 register I32 c1, c2, paren; /* case fold search, parenth */
1886 int minmod = 0, sw = 0, logical = 0;
1888 I32 firstcp = PL_savestack_ix;
1894 /* Note that nextchr is a byte even in UTF */
1895 nextchr = UCHARAT(locinput);
1897 while (scan != NULL) {
1898 #define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
1900 # define sayYES goto yes
1901 # define sayNO goto no
1902 # define sayYES_FINAL goto yes_final
1903 # define sayYES_LOUD goto yes_loud
1904 # define sayNO_FINAL goto no_final
1905 # define sayNO_SILENT goto do_no
1906 # define saySAME(x) if (x) goto yes; else goto no
1907 # define REPORT_CODE_OFF 24
1909 # define sayYES return 1
1910 # define sayNO return 0
1911 # define sayYES_FINAL return 1
1912 # define sayYES_LOUD return 1
1913 # define sayNO_FINAL return 0
1914 # define sayNO_SILENT return 0
1915 # define saySAME(x) return x
1918 SV *prop = sv_newmortal();
1919 int docolor = *PL_colors[0];
1920 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1921 int l = (PL_regeol - locinput > taill ? taill : PL_regeol - locinput);
1922 /* The part of the string before starttry has one color
1923 (pref0_len chars), between starttry and current
1924 position another one (pref_len - pref0_len chars),
1925 after the current position the third one.
1926 We assume that pref0_len <= pref_len, otherwise we
1927 decrease pref0_len. */
1928 int pref_len = (locinput - PL_bostr > (5 + taill) - l
1929 ? (5 + taill) - l : locinput - PL_bostr);
1930 int pref0_len = pref_len - (locinput - PL_reg_starttry);
1932 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
1933 l = ( PL_regeol - locinput > (5 + taill) - pref_len
1934 ? (5 + taill) - pref_len : PL_regeol - locinput);
1937 if (pref0_len > pref_len)
1938 pref0_len = pref_len;
1939 regprop(prop, scan);
1940 PerlIO_printf(Perl_debug_log,
1941 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
1942 (IV)(locinput - PL_bostr),
1943 PL_colors[4], pref0_len,
1944 locinput - pref_len, PL_colors[5],
1945 PL_colors[2], pref_len - pref0_len,
1946 locinput - pref_len + pref0_len, PL_colors[3],
1947 (docolor ? "" : "> <"),
1948 PL_colors[0], l, locinput, PL_colors[1],
1949 15 - l - pref_len + 1,
1951 (IV)(scan - PL_regprogram), PL_regindent*2, "",
1955 next = scan + NEXT_OFF(scan);
1961 if (locinput == PL_bostr
1962 ? PL_regprev == '\n'
1964 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1966 /* regtill = regbol; */
1971 if (locinput == PL_bostr
1972 ? PL_regprev == '\n'
1973 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1979 if (locinput == PL_bostr)
1983 if (locinput == PL_reg_ganch)
1993 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
1998 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2000 if (PL_regeol - locinput > 1)
2004 if (PL_regeol != locinput)
2008 if (nextchr & 0x80) {
2009 locinput += PL_utf8skip[nextchr];
2010 if (locinput > PL_regeol)
2012 nextchr = UCHARAT(locinput);
2015 if (!nextchr && locinput >= PL_regeol)
2017 nextchr = UCHARAT(++locinput);
2020 if (!nextchr && locinput >= PL_regeol)
2022 nextchr = UCHARAT(++locinput);
2025 if (nextchr & 0x80) {
2026 locinput += PL_utf8skip[nextchr];
2027 if (locinput > PL_regeol)
2029 nextchr = UCHARAT(locinput);
2032 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2034 nextchr = UCHARAT(++locinput);
2037 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2039 nextchr = UCHARAT(++locinput);
2044 /* Inline the first character, for speed. */
2045 if (UCHARAT(s) != nextchr)
2047 if (PL_regeol - locinput < ln)
2049 if (ln > 1 && memNE(s, locinput, ln))
2052 nextchr = UCHARAT(locinput);
2055 PL_reg_flags |= RF_tainted;
2064 c1 = OP(scan) == EXACTF;
2068 if (utf8_to_uv((U8*)s, e - s, 0, 0) !=
2070 toLOWER_utf8((U8*)l) :
2071 toLOWER_LC_utf8((U8*)l)))
2079 nextchr = UCHARAT(locinput);
2083 /* Inline the first character, for speed. */
2084 if (UCHARAT(s) != nextchr &&
2085 UCHARAT(s) != ((OP(scan) == EXACTF)
2086 ? PL_fold : PL_fold_locale)[nextchr])
2088 if (PL_regeol - locinput < ln)
2090 if (ln > 1 && (OP(scan) == EXACTF
2091 ? ibcmp(s, locinput, ln)
2092 : ibcmp_locale(s, locinput, ln)))
2095 nextchr = UCHARAT(locinput);
2098 if (!REGINCLASSUTF8(scan, (U8*)locinput))
2100 if (locinput >= PL_regeol)
2102 locinput += PL_utf8skip[nextchr];
2103 nextchr = UCHARAT(locinput);
2107 nextchr = UCHARAT(locinput);
2108 if (!REGINCLASS(scan, nextchr))
2110 if (!nextchr && locinput >= PL_regeol)
2112 nextchr = UCHARAT(++locinput);
2115 PL_reg_flags |= RF_tainted;
2120 if (!(OP(scan) == ALNUM
2121 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
2123 nextchr = UCHARAT(++locinput);
2126 PL_reg_flags |= RF_tainted;
2131 if (nextchr & 0x80) {
2132 if (!(OP(scan) == ALNUMUTF8
2133 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2134 : isALNUM_LC_utf8((U8*)locinput)))
2138 locinput += PL_utf8skip[nextchr];
2139 nextchr = UCHARAT(locinput);
2142 if (!(OP(scan) == ALNUMUTF8
2143 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
2145 nextchr = UCHARAT(++locinput);
2148 PL_reg_flags |= RF_tainted;
2151 if (!nextchr && locinput >= PL_regeol)
2153 if (OP(scan) == NALNUM
2154 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
2156 nextchr = UCHARAT(++locinput);
2159 PL_reg_flags |= RF_tainted;
2162 if (!nextchr && locinput >= PL_regeol)
2164 if (nextchr & 0x80) {
2165 if (OP(scan) == NALNUMUTF8
2166 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2167 : isALNUM_LC_utf8((U8*)locinput))
2171 locinput += PL_utf8skip[nextchr];
2172 nextchr = UCHARAT(locinput);
2175 if (OP(scan) == NALNUMUTF8
2176 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
2178 nextchr = UCHARAT(++locinput);
2182 PL_reg_flags |= RF_tainted;
2186 /* was last char in word? */
2187 ln = (locinput != PL_regbol) ? UCHARAT(locinput - 1) : PL_regprev;
2188 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2190 n = isALNUM(nextchr);
2193 ln = isALNUM_LC(ln);
2194 n = isALNUM_LC(nextchr);
2196 if (((!ln) == (!n)) == (OP(scan) == BOUND || OP(scan) == BOUNDL))
2201 PL_reg_flags |= RF_tainted;
2205 /* was last char in word? */
2206 if (locinput == PL_regbol)
2209 U8 *r = reghop((U8*)locinput, -1);
2211 ln = utf8_to_uv(r, s - (char*)r, 0, 0);
2213 if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) {
2214 ln = isALNUM_uni(ln);
2215 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
2218 ln = isALNUM_LC_uni(ln);
2219 n = isALNUM_LC_utf8((U8*)locinput);
2221 if (((!ln) == (!n)) == (OP(scan) == BOUNDUTF8 || OP(scan) == BOUNDLUTF8))
2225 PL_reg_flags |= RF_tainted;
2230 if (!(OP(scan) == SPACE
2231 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2233 nextchr = UCHARAT(++locinput);
2236 PL_reg_flags |= RF_tainted;
2241 if (nextchr & 0x80) {
2242 if (!(OP(scan) == SPACEUTF8
2243 ? swash_fetch(PL_utf8_space, (U8*)locinput)
2244 : isSPACE_LC_utf8((U8*)locinput)))
2248 locinput += PL_utf8skip[nextchr];
2249 nextchr = UCHARAT(locinput);
2252 if (!(OP(scan) == SPACEUTF8
2253 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2255 nextchr = UCHARAT(++locinput);
2258 PL_reg_flags |= RF_tainted;
2261 if (!nextchr && locinput >= PL_regeol)
2263 if (OP(scan) == NSPACE
2264 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
2266 nextchr = UCHARAT(++locinput);
2269 PL_reg_flags |= RF_tainted;
2272 if (!nextchr && locinput >= PL_regeol)
2274 if (nextchr & 0x80) {
2275 if (OP(scan) == NSPACEUTF8
2276 ? swash_fetch(PL_utf8_space, (U8*)locinput)
2277 : isSPACE_LC_utf8((U8*)locinput))
2281 locinput += PL_utf8skip[nextchr];
2282 nextchr = UCHARAT(locinput);
2285 if (OP(scan) == NSPACEUTF8
2286 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
2288 nextchr = UCHARAT(++locinput);
2291 PL_reg_flags |= RF_tainted;
2296 if (!(OP(scan) == DIGIT
2297 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
2299 nextchr = UCHARAT(++locinput);
2302 PL_reg_flags |= RF_tainted;
2307 if (nextchr & 0x80) {
2308 if (!(OP(scan) == DIGITUTF8
2309 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2310 : isDIGIT_LC_utf8((U8*)locinput)))
2314 locinput += PL_utf8skip[nextchr];
2315 nextchr = UCHARAT(locinput);
2318 if (!(OP(scan) == DIGITUTF8
2319 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
2321 nextchr = UCHARAT(++locinput);
2324 PL_reg_flags |= RF_tainted;
2327 if (!nextchr && locinput >= PL_regeol)
2329 if (OP(scan) == NDIGIT
2330 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
2332 nextchr = UCHARAT(++locinput);
2335 PL_reg_flags |= RF_tainted;
2338 if (!nextchr && locinput >= PL_regeol)
2340 if (nextchr & 0x80) {
2341 if (OP(scan) == NDIGITUTF8
2342 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2343 : isDIGIT_LC_utf8((U8*)locinput))
2347 locinput += PL_utf8skip[nextchr];
2348 nextchr = UCHARAT(locinput);
2351 if (OP(scan) == NDIGITUTF8
2352 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
2354 nextchr = UCHARAT(++locinput);
2357 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
2359 locinput += PL_utf8skip[nextchr];
2360 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
2361 locinput += UTF8SKIP(locinput);
2362 if (locinput > PL_regeol)
2364 nextchr = UCHARAT(locinput);
2367 PL_reg_flags |= RF_tainted;
2371 n = ARG(scan); /* which paren pair */
2372 ln = PL_regstartp[n];
2373 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
2374 if (*PL_reglastparen < n || ln == -1)
2375 sayNO; /* Do not match unless seen CLOSEn. */
2376 if (ln == PL_regendp[n])
2380 if (UTF && OP(scan) != REF) { /* REF can do byte comparison */
2382 char *e = PL_bostr + PL_regendp[n];
2384 * Note that we can't do the "other character" lookup trick as
2385 * in the 8-bit case (no pun intended) because in Unicode we
2386 * have to map both upper and title case to lower case.
2388 if (OP(scan) == REFF) {
2392 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
2402 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
2409 nextchr = UCHARAT(locinput);
2413 /* Inline the first character, for speed. */
2414 if (UCHARAT(s) != nextchr &&
2416 (UCHARAT(s) != ((OP(scan) == REFF
2417 ? PL_fold : PL_fold_locale)[nextchr]))))
2419 ln = PL_regendp[n] - ln;
2420 if (locinput + ln > PL_regeol)
2422 if (ln > 1 && (OP(scan) == REF
2423 ? memNE(s, locinput, ln)
2425 ? ibcmp(s, locinput, ln)
2426 : ibcmp_locale(s, locinput, ln))))
2429 nextchr = UCHARAT(locinput);
2440 OP_4tree *oop = PL_op;
2441 COP *ocurcop = PL_curcop;
2442 SV **ocurpad = PL_curpad;
2446 PL_op = (OP_4tree*)PL_regdata->data[n];
2447 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
2448 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
2449 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
2451 CALLRUNOPS(aTHX); /* Scalar context. */
2457 PL_curpad = ocurpad;
2458 PL_curcop = ocurcop;
2460 if (logical == 2) { /* Postponed subexpression. */
2462 MAGIC *mg = Null(MAGIC*);
2464 CHECKPOINT cp, lastcp;
2466 if(SvROK(ret) || SvRMAGICAL(ret)) {
2467 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2470 mg = mg_find(sv, 'r');
2473 re = (regexp *)mg->mg_obj;
2474 (void)ReREFCNT_inc(re);
2478 char *t = SvPV(ret, len);
2480 char *oprecomp = PL_regprecomp;
2481 I32 osize = PL_regsize;
2482 I32 onpar = PL_regnpar;
2485 pm.op_pmdynflags = (UTF||DO_UTF8(ret) ? PMdf_UTF8 : 0);
2486 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
2488 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
2489 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
2490 PL_regprecomp = oprecomp;
2495 PerlIO_printf(Perl_debug_log,
2496 "Entering embedded `%s%.60s%s%s'\n",
2500 (strlen(re->precomp) > 60 ? "..." : ""))
2503 state.prev = PL_reg_call_cc;
2504 state.cc = PL_regcc;
2505 state.re = PL_reg_re;
2509 cp = regcppush(0); /* Save *all* the positions. */
2512 state.ss = PL_savestack_ix;
2513 *PL_reglastparen = 0;
2514 PL_reg_call_cc = &state;
2515 PL_reginput = locinput;
2517 /* XXXX This is too dramatic a measure... */
2520 if (regmatch(re->program + 1)) {
2521 /* Even though we succeeded, we need to restore
2522 global variables, since we may be wrapped inside
2523 SUSPEND, thus the match may be not finished yet. */
2525 /* XXXX Do this only if SUSPENDed? */
2526 PL_reg_call_cc = state.prev;
2527 PL_regcc = state.cc;
2528 PL_reg_re = state.re;
2529 cache_re(PL_reg_re);
2531 /* XXXX This is too dramatic a measure... */
2534 /* These are needed even if not SUSPEND. */
2540 REGCP_UNWIND(lastcp);
2542 PL_reg_call_cc = state.prev;
2543 PL_regcc = state.cc;
2544 PL_reg_re = state.re;
2545 cache_re(PL_reg_re);
2547 /* XXXX This is too dramatic a measure... */
2556 sv_setsv(save_scalar(PL_replgv), ret);
2560 n = ARG(scan); /* which paren pair */
2561 PL_reg_start_tmp[n] = locinput;
2566 n = ARG(scan); /* which paren pair */
2567 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2568 PL_regendp[n] = locinput - PL_bostr;
2569 if (n > *PL_reglastparen)
2570 *PL_reglastparen = n;
2573 n = ARG(scan); /* which paren pair */
2574 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
2577 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
2579 next = NEXTOPER(NEXTOPER(scan));
2581 next = scan + ARG(scan);
2582 if (OP(next) == IFTHEN) /* Fake one. */
2583 next = NEXTOPER(NEXTOPER(next));
2587 logical = scan->flags;
2589 /*******************************************************************
2590 PL_regcc contains infoblock about the innermost (...)* loop, and
2591 a pointer to the next outer infoblock.
2593 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2595 1) After matching X, regnode for CURLYX is processed;
2597 2) This regnode creates infoblock on the stack, and calls
2598 regmatch() recursively with the starting point at WHILEM node;
2600 3) Each hit of WHILEM node tries to match A and Z (in the order
2601 depending on the current iteration, min/max of {min,max} and
2602 greediness). The information about where are nodes for "A"
2603 and "Z" is read from the infoblock, as is info on how many times "A"
2604 was already matched, and greediness.
2606 4) After A matches, the same WHILEM node is hit again.
2608 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2609 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2610 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2611 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2612 of the external loop.
2614 Currently present infoblocks form a tree with a stem formed by PL_curcc
2615 and whatever it mentions via ->next, and additional attached trees
2616 corresponding to temporarily unset infoblocks as in "5" above.
2618 In the following picture infoblocks for outer loop of
2619 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2620 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2621 infoblocks are drawn below the "reset" infoblock.
2623 In fact in the picture below we do not show failed matches for Z and T
2624 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2625 more obvious *why* one needs to *temporary* unset infoblocks.]
2627 Matched REx position InfoBlocks Comment
2631 Y A)*?Z)*?T x <- O <- I
2632 YA )*?Z)*?T x <- O <- I
2633 YA A)*?Z)*?T x <- O <- I
2634 YAA )*?Z)*?T x <- O <- I
2635 YAA Z)*?T x <- O # Temporary unset I
2638 YAAZ Y(A)*?Z)*?T x <- O
2641 YAAZY (A)*?Z)*?T x <- O
2644 YAAZY A)*?Z)*?T x <- O <- I
2647 YAAZYA )*?Z)*?T x <- O <- I
2650 YAAZYA Z)*?T x <- O # Temporary unset I
2656 YAAZYAZ T x # Temporary unset O
2663 *******************************************************************/
2666 CHECKPOINT cp = PL_savestack_ix;
2667 /* No need to save/restore up to this paren */
2668 I32 parenfloor = scan->flags;
2670 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2672 cc.oldcc = PL_regcc;
2674 /* XXXX Probably it is better to teach regpush to support
2675 parenfloor > PL_regsize... */
2676 if (parenfloor > *PL_reglastparen)
2677 parenfloor = *PL_reglastparen; /* Pessimization... */
2678 cc.parenfloor = parenfloor;
2680 cc.min = ARG1(scan);
2681 cc.max = ARG2(scan);
2682 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
2686 PL_reginput = locinput;
2687 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2689 PL_regcc = cc.oldcc;
2695 * This is really hard to understand, because after we match
2696 * what we're trying to match, we must make sure the rest of
2697 * the REx is going to match for sure, and to do that we have
2698 * to go back UP the parse tree by recursing ever deeper. And
2699 * if it fails, we have to reset our parent's current state
2700 * that we can try again after backing off.
2703 CHECKPOINT cp, lastcp;
2704 CURCUR* cc = PL_regcc;
2705 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2707 n = cc->cur + 1; /* how many we know we matched */
2708 PL_reginput = locinput;
2711 PerlIO_printf(Perl_debug_log,
2712 "%*s %ld out of %ld..%ld cc=%lx\n",
2713 REPORT_CODE_OFF+PL_regindent*2, "",
2714 (long)n, (long)cc->min,
2715 (long)cc->max, (long)cc)
2718 /* If degenerate scan matches "", assume scan done. */
2720 if (locinput == cc->lastloc && n >= cc->min) {
2721 PL_regcc = cc->oldcc;
2725 PerlIO_printf(Perl_debug_log,
2726 "%*s empty match detected, try continuation...\n",
2727 REPORT_CODE_OFF+PL_regindent*2, "")
2729 if (regmatch(cc->next))
2737 /* First just match a string of min scans. */
2741 cc->lastloc = locinput;
2742 if (regmatch(cc->scan))
2745 cc->lastloc = lastloc;
2750 /* Check whether we already were at this position.
2751 Postpone detection until we know the match is not
2752 *that* much linear. */
2753 if (!PL_reg_maxiter) {
2754 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2755 PL_reg_leftiter = PL_reg_maxiter;
2757 if (PL_reg_leftiter-- == 0) {
2758 I32 size = (PL_reg_maxiter + 7)/8;
2759 if (PL_reg_poscache) {
2760 if (PL_reg_poscache_size < size) {
2761 Renew(PL_reg_poscache, size, char);
2762 PL_reg_poscache_size = size;
2764 Zero(PL_reg_poscache, size, char);
2767 PL_reg_poscache_size = size;
2768 Newz(29, PL_reg_poscache, size, char);
2771 PerlIO_printf(Perl_debug_log,
2772 "%sDetected a super-linear match, switching on caching%s...\n",
2773 PL_colors[4], PL_colors[5])
2776 if (PL_reg_leftiter < 0) {
2777 I32 o = locinput - PL_bostr, b;
2779 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2782 if (PL_reg_poscache[o] & (1<<b)) {
2784 PerlIO_printf(Perl_debug_log,
2785 "%*s already tried at this position...\n",
2786 REPORT_CODE_OFF+PL_regindent*2, "")
2790 PL_reg_poscache[o] |= (1<<b);
2794 /* Prefer next over scan for minimal matching. */
2797 PL_regcc = cc->oldcc;
2800 cp = regcppush(cc->parenfloor);
2802 if (regmatch(cc->next)) {
2804 sayYES; /* All done. */
2806 REGCP_UNWIND(lastcp);
2812 if (n >= cc->max) { /* Maximum greed exceeded? */
2813 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
2814 && !(PL_reg_flags & RF_warned)) {
2815 PL_reg_flags |= RF_warned;
2816 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2817 "Complex regular subexpression recursion",
2824 PerlIO_printf(Perl_debug_log,
2825 "%*s trying longer...\n",
2826 REPORT_CODE_OFF+PL_regindent*2, "")
2828 /* Try scanning more and see if it helps. */
2829 PL_reginput = locinput;
2831 cc->lastloc = locinput;
2832 cp = regcppush(cc->parenfloor);
2834 if (regmatch(cc->scan)) {
2838 REGCP_UNWIND(lastcp);
2841 cc->lastloc = lastloc;
2845 /* Prefer scan over next for maximal matching. */
2847 if (n < cc->max) { /* More greed allowed? */
2848 cp = regcppush(cc->parenfloor);
2850 cc->lastloc = locinput;
2852 if (regmatch(cc->scan)) {
2856 REGCP_UNWIND(lastcp);
2857 regcppop(); /* Restore some previous $<digit>s? */
2858 PL_reginput = locinput;
2860 PerlIO_printf(Perl_debug_log,
2861 "%*s failed, try continuation...\n",
2862 REPORT_CODE_OFF+PL_regindent*2, "")
2865 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
2866 && !(PL_reg_flags & RF_warned)) {
2867 PL_reg_flags |= RF_warned;
2868 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2869 "Complex regular subexpression recursion",
2873 /* Failed deeper matches of scan, so see if this one works. */
2874 PL_regcc = cc->oldcc;
2877 if (regmatch(cc->next))
2883 cc->lastloc = lastloc;
2888 next = scan + ARG(scan);
2891 inner = NEXTOPER(NEXTOPER(scan));
2894 inner = NEXTOPER(scan);
2899 if (OP(next) != c1) /* No choice. */
2900 next = inner; /* Avoid recursion. */
2902 I32 lastparen = *PL_reglastparen;
2904 re_unwind_branch_t *uw;
2906 /* Put unwinding data on stack */
2907 unwind1 = SSNEWt(1,re_unwind_branch_t);
2908 uw = SSPTRt(unwind1,re_unwind_branch_t);
2911 uw->type = ((c1 == BRANCH)
2913 : RE_UNWIND_BRANCHJ);
2914 uw->lastparen = lastparen;
2916 uw->locinput = locinput;
2917 uw->nextchr = nextchr;
2919 uw->regindent = ++PL_regindent;
2922 REGCP_SET(uw->lastcp);
2924 /* Now go into the first branch */
2937 /* We suppose that the next guy does not need
2938 backtracking: in particular, it is of constant length,
2939 and has no parenths to influence future backrefs. */
2940 ln = ARG1(scan); /* min to match */
2941 n = ARG2(scan); /* max to match */
2942 paren = scan->flags;
2944 if (paren > PL_regsize)
2946 if (paren > *PL_reglastparen)
2947 *PL_reglastparen = paren;
2949 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
2951 scan += NEXT_OFF(scan); /* Skip former OPEN. */
2952 PL_reginput = locinput;
2955 if (ln && regrepeat_hard(scan, ln, &l) < ln)
2957 if (ln && l == 0 && n >= ln
2958 /* In fact, this is tricky. If paren, then the
2959 fact that we did/didnot match may influence
2960 future execution. */
2961 && !(paren && ln == 0))
2963 locinput = PL_reginput;
2964 if (PL_regkind[(U8)OP(next)] == EXACT) {
2965 c1 = (U8)*STRING(next);
2966 if (OP(next) == EXACTF)
2968 else if (OP(next) == EXACTFL)
2969 c2 = PL_fold_locale[c1];
2976 /* This may be improved if l == 0. */
2977 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
2978 /* If it could work, try it. */
2980 UCHARAT(PL_reginput) == c1 ||
2981 UCHARAT(PL_reginput) == c2)
2985 PL_regstartp[paren] =
2986 HOPc(PL_reginput, -l) - PL_bostr;
2987 PL_regendp[paren] = PL_reginput - PL_bostr;
2990 PL_regendp[paren] = -1;
2994 REGCP_UNWIND(lastcp);
2996 /* Couldn't or didn't -- move forward. */
2997 PL_reginput = locinput;
2998 if (regrepeat_hard(scan, 1, &l)) {
3000 locinput = PL_reginput;
3007 n = regrepeat_hard(scan, n, &l);
3008 if (n != 0 && l == 0
3009 /* In fact, this is tricky. If paren, then the
3010 fact that we did/didnot match may influence
3011 future execution. */
3012 && !(paren && ln == 0))
3014 locinput = PL_reginput;
3016 PerlIO_printf(Perl_debug_log,
3017 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
3018 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
3022 if (PL_regkind[(U8)OP(next)] == EXACT) {
3023 c1 = (U8)*STRING(next);
3024 if (OP(next) == EXACTF)
3026 else if (OP(next) == EXACTFL)
3027 c2 = PL_fold_locale[c1];
3036 /* If it could work, try it. */
3038 UCHARAT(PL_reginput) == c1 ||
3039 UCHARAT(PL_reginput) == c2)
3042 PerlIO_printf(Perl_debug_log,
3043 "%*s trying tail with n=%"IVdf"...\n",
3044 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
3048 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3049 PL_regendp[paren] = PL_reginput - PL_bostr;
3052 PL_regendp[paren] = -1;
3056 REGCP_UNWIND(lastcp);
3058 /* Couldn't or didn't -- back up. */
3060 locinput = HOPc(locinput, -l);
3061 PL_reginput = locinput;
3068 paren = scan->flags; /* Which paren to set */
3069 if (paren > PL_regsize)
3071 if (paren > *PL_reglastparen)
3072 *PL_reglastparen = paren;
3073 ln = ARG1(scan); /* min to match */
3074 n = ARG2(scan); /* max to match */
3075 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
3079 ln = ARG1(scan); /* min to match */
3080 n = ARG2(scan); /* max to match */
3081 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
3086 scan = NEXTOPER(scan);
3092 scan = NEXTOPER(scan);
3096 * Lookahead to avoid useless match attempts
3097 * when we know what character comes next.
3099 if (PL_regkind[(U8)OP(next)] == EXACT) {
3100 c1 = (U8)*STRING(next);
3101 if (OP(next) == EXACTF)
3103 else if (OP(next) == EXACTFL)
3104 c2 = PL_fold_locale[c1];
3110 PL_reginput = locinput;
3114 if (ln && regrepeat(scan, ln) < ln)
3116 locinput = PL_reginput;
3119 char *e = locinput + n - ln; /* Should not check after this */
3120 char *old = locinput;
3122 if (e >= PL_regeol || (n == REG_INFTY))
3125 /* Find place 'next' could work */
3127 while (locinput <= e && *locinput != c1)
3130 while (locinput <= e
3137 /* PL_reginput == old now */
3138 if (locinput != old) {
3139 ln = 1; /* Did some */
3140 if (regrepeat(scan, locinput - old) <
3144 /* PL_reginput == locinput now */
3145 TRYPAREN(paren, ln, locinput);
3146 PL_reginput = locinput; /* Could be reset... */
3147 REGCP_UNWIND(lastcp);
3148 /* Couldn't or didn't -- move forward. */
3153 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
3154 /* If it could work, try it. */
3156 UCHARAT(PL_reginput) == c1 ||
3157 UCHARAT(PL_reginput) == c2)
3159 TRYPAREN(paren, n, PL_reginput);
3160 REGCP_UNWIND(lastcp);
3162 /* Couldn't or didn't -- move forward. */
3163 PL_reginput = locinput;
3164 if (regrepeat(scan, 1)) {
3166 locinput = PL_reginput;
3174 n = regrepeat(scan, n);
3175 locinput = PL_reginput;
3176 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
3177 (!PL_multiline || OP(next) == SEOL || OP(next) == EOS)) {
3178 ln = n; /* why back off? */
3179 /* ...because $ and \Z can match before *and* after
3180 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3181 We should back off by one in this case. */
3182 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3188 /* If it could work, try it. */
3190 UCHARAT(PL_reginput) == c1 ||
3191 UCHARAT(PL_reginput) == c2)
3193 TRYPAREN(paren, n, PL_reginput);
3194 REGCP_UNWIND(lastcp);
3196 /* Couldn't or didn't -- back up. */
3198 PL_reginput = locinput = HOPc(locinput, -1);
3203 /* If it could work, try it. */
3205 UCHARAT(PL_reginput) == c1 ||
3206 UCHARAT(PL_reginput) == c2)
3208 TRYPAREN(paren, n, PL_reginput);
3209 REGCP_UNWIND(lastcp);
3211 /* Couldn't or didn't -- back up. */
3213 PL_reginput = locinput = HOPc(locinput, -1);
3220 if (PL_reg_call_cc) {
3221 re_cc_state *cur_call_cc = PL_reg_call_cc;
3222 CURCUR *cctmp = PL_regcc;
3223 regexp *re = PL_reg_re;
3224 CHECKPOINT cp, lastcp;
3226 cp = regcppush(0); /* Save *all* the positions. */
3228 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3230 PL_reginput = locinput; /* Make position available to
3232 cache_re(PL_reg_call_cc->re);
3233 PL_regcc = PL_reg_call_cc->cc;
3234 PL_reg_call_cc = PL_reg_call_cc->prev;
3235 if (regmatch(cur_call_cc->node)) {
3236 PL_reg_call_cc = cur_call_cc;
3240 REGCP_UNWIND(lastcp);
3242 PL_reg_call_cc = cur_call_cc;
3248 PerlIO_printf(Perl_debug_log,
3249 "%*s continuation failed...\n",
3250 REPORT_CODE_OFF+PL_regindent*2, "")
3254 if (locinput < PL_regtill) {
3255 DEBUG_r(PerlIO_printf(Perl_debug_log,
3256 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3258 (long)(locinput - PL_reg_starttry),
3259 (long)(PL_regtill - PL_reg_starttry),
3261 sayNO_FINAL; /* Cannot match: too short. */
3263 PL_reginput = locinput; /* put where regtry can find it */
3264 sayYES_FINAL; /* Success! */
3266 PL_reginput = locinput; /* put where regtry can find it */
3267 sayYES_LOUD; /* Success! */
3270 PL_reginput = locinput;
3275 if (UTF) { /* XXXX This is absolutely
3276 broken, we read before
3278 s = HOPMAYBEc(locinput, -scan->flags);
3284 if (locinput < PL_bostr + scan->flags)
3286 PL_reginput = locinput - scan->flags;
3291 PL_reginput = locinput;
3296 if (UTF) { /* XXXX This is absolutely
3297 broken, we read before
3299 s = HOPMAYBEc(locinput, -scan->flags);
3300 if (!s || s < PL_bostr)
3305 if (locinput < PL_bostr + scan->flags)
3307 PL_reginput = locinput - scan->flags;
3312 PL_reginput = locinput;
3315 inner = NEXTOPER(NEXTOPER(scan));
3316 if (regmatch(inner) != n) {
3331 if (OP(scan) == SUSPEND) {
3332 locinput = PL_reginput;
3333 nextchr = UCHARAT(locinput);
3338 next = scan + ARG(scan);
3343 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
3344 PTR2UV(scan), OP(scan));
3345 Perl_croak(aTHX_ "regexp memory corruption");
3352 * We get here only if there's trouble -- normally "case END" is
3353 * the terminating point.
3355 Perl_croak(aTHX_ "corrupted regexp pointers");
3361 PerlIO_printf(Perl_debug_log,
3362 "%*s %scould match...%s\n",
3363 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3367 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3368 PL_colors[4],PL_colors[5]));
3374 #if 0 /* Breaks $^R */
3382 PerlIO_printf(Perl_debug_log,
3383 "%*s %sfailed...%s\n",
3384 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3390 re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
3393 case RE_UNWIND_BRANCH:
3394 case RE_UNWIND_BRANCHJ:
3396 re_unwind_branch_t *uwb = &(uw->branch);
3397 I32 lastparen = uwb->lastparen;
3399 REGCP_UNWIND(uwb->lastcp);
3400 for (n = *PL_reglastparen; n > lastparen; n--)
3402 *PL_reglastparen = n;
3403 scan = next = uwb->next;
3405 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
3406 ? BRANCH : BRANCHJ) ) { /* Failure */
3413 /* Have more choice yet. Reuse the same uwb. */
3415 if ((n = (uwb->type == RE_UNWIND_BRANCH
3416 ? NEXT_OFF(next) : ARG(next))))
3419 next = NULL; /* XXXX Needn't unwinding in this case... */
3421 next = NEXTOPER(scan);
3422 if (uwb->type == RE_UNWIND_BRANCHJ)
3423 next = NEXTOPER(next);
3424 locinput = uwb->locinput;
3425 nextchr = uwb->nextchr;
3427 PL_regindent = uwb->regindent;
3434 Perl_croak(aTHX_ "regexp unwind memory corruption");
3445 - regrepeat - repeatedly match something simple, report how many
3448 * [This routine now assumes that it will only match on things of length 1.
3449 * That was true before, but now we assume scan - reginput is the count,
3450 * rather than incrementing count on every character. [Er, except utf8.]]
3453 S_regrepeat(pTHX_ regnode *p, I32 max)
3456 register char *scan;
3458 register char *loceol = PL_regeol;
3459 register I32 hardcount = 0;
3462 if (max != REG_INFTY && max < loceol - scan)
3463 loceol = scan + max;
3466 while (scan < loceol && *scan != '\n')
3474 while (scan < loceol && *scan != '\n') {
3475 scan += UTF8SKIP(scan);
3481 while (scan < loceol) {
3482 scan += UTF8SKIP(scan);
3486 case EXACT: /* length of string is 1 */
3488 while (scan < loceol && UCHARAT(scan) == c)
3491 case EXACTF: /* length of string is 1 */
3493 while (scan < loceol &&
3494 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
3497 case EXACTFL: /* length of string is 1 */
3498 PL_reg_flags |= RF_tainted;
3500 while (scan < loceol &&
3501 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
3506 while (scan < loceol && REGINCLASSUTF8(p, (U8*)scan)) {
3507 scan += UTF8SKIP(scan);
3512 while (scan < loceol && REGINCLASS(p, *scan))
3516 while (scan < loceol && isALNUM(*scan))
3521 while (scan < loceol && swash_fetch(PL_utf8_alnum, (U8*)scan)) {
3522 scan += UTF8SKIP(scan);
3527 PL_reg_flags |= RF_tainted;
3528 while (scan < loceol && isALNUM_LC(*scan))
3532 PL_reg_flags |= RF_tainted;
3534 while (scan < loceol && isALNUM_LC_utf8((U8*)scan)) {
3535 scan += UTF8SKIP(scan);
3541 while (scan < loceol && !isALNUM(*scan))
3546 while (scan < loceol && !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
3547 scan += UTF8SKIP(scan);
3552 PL_reg_flags |= RF_tainted;
3553 while (scan < loceol && !isALNUM_LC(*scan))
3557 PL_reg_flags |= RF_tainted;
3559 while (scan < loceol && !isALNUM_LC_utf8((U8*)scan)) {
3560 scan += UTF8SKIP(scan);
3565 while (scan < loceol && isSPACE(*scan))
3570 while (scan < loceol && (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
3571 scan += UTF8SKIP(scan);
3576 PL_reg_flags |= RF_tainted;
3577 while (scan < loceol && isSPACE_LC(*scan))
3581 PL_reg_flags |= RF_tainted;
3583 while (scan < loceol && (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3584 scan += UTF8SKIP(scan);
3589 while (scan < loceol && !isSPACE(*scan))
3594 while (scan < loceol && !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
3595 scan += UTF8SKIP(scan);
3600 PL_reg_flags |= RF_tainted;
3601 while (scan < loceol && !isSPACE_LC(*scan))
3605 PL_reg_flags |= RF_tainted;
3607 while (scan < loceol && !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3608 scan += UTF8SKIP(scan);
3613 while (scan < loceol && isDIGIT(*scan))
3618 while (scan < loceol && swash_fetch(PL_utf8_digit,(U8*)scan)) {
3619 scan += UTF8SKIP(scan);
3625 while (scan < loceol && !isDIGIT(*scan))
3630 while (scan < loceol && !swash_fetch(PL_utf8_digit,(U8*)scan)) {
3631 scan += UTF8SKIP(scan);
3635 default: /* Called on something of 0 width. */
3636 break; /* So match right here or not at all. */
3642 c = scan - PL_reginput;
3647 SV *prop = sv_newmortal();
3650 PerlIO_printf(Perl_debug_log,
3651 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
3652 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
3659 - regrepeat_hard - repeatedly match something, report total lenth and length
3661 * The repeater is supposed to have constant length.
3665 S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
3668 register char *scan;
3669 register char *start;
3670 register char *loceol = PL_regeol;
3672 I32 count = 0, res = 1;
3677 start = PL_reginput;
3679 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
3682 while (start < PL_reginput) {
3684 start += UTF8SKIP(start);
3695 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
3697 *lp = l = PL_reginput - start;
3698 if (max != REG_INFTY && l*max < loceol - scan)
3699 loceol = scan + l*max;
3712 - reginclass - determine if a character falls into a character class
3716 S_reginclass(pTHX_ register regnode *p, register I32 c)
3719 char flags = ANYOF_FLAGS(p);
3723 if (ANYOF_BITMAP_TEST(p, c))
3725 else if (flags & ANYOF_FOLD) {
3727 if (flags & ANYOF_LOCALE) {
3728 PL_reg_flags |= RF_tainted;
3729 cf = PL_fold_locale[c];
3733 if (ANYOF_BITMAP_TEST(p, cf))
3737 if (!match && (flags & ANYOF_CLASS)) {
3738 PL_reg_flags |= RF_tainted;
3740 (ANYOF_CLASS_TEST(p, ANYOF_ALNUM) && isALNUM_LC(c)) ||
3741 (ANYOF_CLASS_TEST(p, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
3742 (ANYOF_CLASS_TEST(p, ANYOF_SPACE) && isSPACE_LC(c)) ||
3743 (ANYOF_CLASS_TEST(p, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
3744 (ANYOF_CLASS_TEST(p, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
3745 (ANYOF_CLASS_TEST(p, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
3746 (ANYOF_CLASS_TEST(p, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
3747 (ANYOF_CLASS_TEST(p, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
3748 (ANYOF_CLASS_TEST(p, ANYOF_ALPHA) && isALPHA_LC(c)) ||
3749 (ANYOF_CLASS_TEST(p, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
3750 (ANYOF_CLASS_TEST(p, ANYOF_ASCII) && isASCII(c)) ||
3751 (ANYOF_CLASS_TEST(p, ANYOF_NASCII) && !isASCII(c)) ||
3752 (ANYOF_CLASS_TEST(p, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
3753 (ANYOF_CLASS_TEST(p, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
3754 (ANYOF_CLASS_TEST(p, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
3755 (ANYOF_CLASS_TEST(p, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
3756 (ANYOF_CLASS_TEST(p, ANYOF_LOWER) && isLOWER_LC(c)) ||
3757 (ANYOF_CLASS_TEST(p, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
3758 (ANYOF_CLASS_TEST(p, ANYOF_PRINT) && isPRINT_LC(c)) ||
3759 (ANYOF_CLASS_TEST(p, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
3760 (ANYOF_CLASS_TEST(p, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
3761 (ANYOF_CLASS_TEST(p, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
3762 (ANYOF_CLASS_TEST(p, ANYOF_UPPER) && isUPPER_LC(c)) ||
3763 (ANYOF_CLASS_TEST(p, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
3764 (ANYOF_CLASS_TEST(p, ANYOF_XDIGIT) && isXDIGIT(c)) ||
3765 (ANYOF_CLASS_TEST(p, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
3766 (ANYOF_CLASS_TEST(p, ANYOF_PSXSPC) && isPSXSPC(c)) ||
3767 (ANYOF_CLASS_TEST(p, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
3768 (ANYOF_CLASS_TEST(p, ANYOF_BLANK) && isBLANK(c)) ||
3769 (ANYOF_CLASS_TEST(p, ANYOF_NBLANK) && !isBLANK(c))
3770 ) /* How's that for a conditional? */
3776 return (flags & ANYOF_INVERT) ? !match : match;
3780 S_reginclassutf8(pTHX_ regnode *f, U8 *p)
3783 char flags = ARG1(f);
3785 SV *sv = (SV*)PL_regdata->data[ARG2(f)];
3787 if (swash_fetch(sv, p))
3789 else if (flags & ANYOF_FOLD) {
3790 U8 tmpbuf[UTF8_MAXLEN];
3791 if (flags & ANYOF_LOCALE) {
3792 PL_reg_flags |= RF_tainted;
3793 uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
3796 uv_to_utf8(tmpbuf, toLOWER_utf8(p));
3797 if (swash_fetch(sv, tmpbuf))
3801 /* UTF8 combined with ANYOF_CLASS is ill-defined. */
3803 return (flags & ANYOF_INVERT) ? !match : match;
3807 S_reghop(pTHX_ U8 *s, I32 off)
3811 while (off-- && s < (U8*)PL_regeol)
3816 if (s > (U8*)PL_bostr) {
3819 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3821 } /* XXX could check well-formedness here */
3829 S_reghopmaybe(pTHX_ U8* s, I32 off)
3833 while (off-- && s < (U8*)PL_regeol)
3840 if (s > (U8*)PL_bostr) {
3843 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3845 } /* XXX could check well-formedness here */
3861 restore_pos(pTHXo_ void *arg)
3864 if (PL_reg_eval_set) {
3865 if (PL_reg_oldsaved) {
3866 PL_reg_re->subbeg = PL_reg_oldsaved;
3867 PL_reg_re->sublen = PL_reg_oldsavedlen;
3868 RX_MATCH_COPIED_on(PL_reg_re);
3870 PL_reg_magic->mg_len = PL_reg_oldpos;
3871 PL_reg_eval_set = 0;
3872 PL_curpm = PL_reg_oldcurpm;