5 * "One Ring to rule them all, One Ring to find them..."
8 /* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
12 /* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
17 /* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
22 #ifdef PERL_EXT_RE_BUILD
23 /* need to replace pregcomp et al, so enable that */
24 # ifndef PERL_IN_XSUB_RE
25 # define PERL_IN_XSUB_RE
27 /* need access to debugger hooks */
28 # if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
33 #ifdef PERL_IN_XSUB_RE
34 /* We *really* need to overwrite these symbols: */
35 # define Perl_regexec_flags my_regexec
36 # define Perl_regdump my_regdump
37 # define Perl_regprop my_regprop
38 # define Perl_re_intuit_start my_re_intuit_start
39 /* *These* symbols are masked to allow static link. */
40 # define Perl_pregexec my_pregexec
41 # define Perl_reginitcolors my_reginitcolors
42 # define Perl_regclass_swash my_regclass_swash
44 # define PERL_NO_GET_CONTEXT
49 * pregcomp and pregexec -- regsub and regerror are not used in perl
51 * Copyright (c) 1986 by University of Toronto.
52 * Written by Henry Spencer. Not derived from licensed software.
54 * Permission is granted to anyone to use this software for any
55 * purpose on any computer system, and to redistribute it freely,
56 * subject to the following restrictions:
58 * 1. The author is not responsible for the consequences of use of
59 * this software, no matter how awful, even if they arise
62 * 2. The origin of this software must not be misrepresented, either
63 * by explicit claim or by omission.
65 * 3. Altered versions must be plainly marked as such, and must not
66 * be misrepresented as being the original software.
68 **** Alterations to Henry's code are...
70 **** Copyright (c) 1991-2000, Larry Wall
72 **** You may distribute under the terms of either the GNU General Public
73 **** License or the Artistic License, as specified in the README file.
75 * Beware that some of this code is subtly aware of the way operator
76 * precedence is structured in regular expressions. Serious changes in
77 * regular-expression syntax might require a total rethink.
80 #define PERL_IN_REGEXEC_C
83 #ifdef PERL_IN_XSUB_RE
84 # if defined(PERL_CAPI) || defined(PERL_OBJECT)
91 #define RF_tainted 1 /* tainted information used? */
92 #define RF_warned 2 /* warned about big count? */
93 #define RF_evaled 4 /* Did an EVAL with setting? */
94 #define RF_utf8 8 /* String contains multibyte chars? */
96 #define UTF (PL_reg_flags & RF_utf8)
98 #define RS_init 1 /* eval environment created */
99 #define RS_set 2 /* replsv value is set */
102 #define STATIC static
109 #define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
110 #define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
112 #define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
113 #define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
114 #define HOP(pos,off) (UTF ? reghop((U8*)pos, off) : (U8*)(pos + off))
115 #define HOPMAYBE(pos,off) (UTF ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
116 #define HOPc(pos,off) ((char*)HOP(pos,off))
117 #define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
119 static void restore_pos(pTHXo_ void *arg);
123 S_regcppush(pTHX_ I32 parenfloor)
125 int retval = PL_savestack_ix;
126 int i = (PL_regsize - parenfloor) * 4;
130 for (p = PL_regsize; p > parenfloor; p--) {
131 SSPUSHINT(PL_regendp[p]);
132 SSPUSHINT(PL_regstartp[p]);
133 SSPUSHPTR(PL_reg_start_tmp[p]);
136 SSPUSHINT(PL_regsize);
137 SSPUSHINT(*PL_reglastparen);
138 SSPUSHPTR(PL_reginput);
140 SSPUSHINT(SAVEt_REGCONTEXT);
144 /* These are needed since we do not localize EVAL nodes: */
145 # define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
146 " Setting an EVAL scope, savestack=%"IVdf"\n", \
147 (IV)PL_savestack_ix)); cp = PL_savestack_ix
149 # define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
150 PerlIO_printf(Perl_debug_log, \
151 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
152 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
161 assert(i == SAVEt_REGCONTEXT);
163 input = (char *) SSPOPPTR;
164 *PL_reglastparen = SSPOPINT;
165 PL_regsize = SSPOPINT;
166 for (i -= 3; i > 0; i -= 4) {
167 paren = (U32)SSPOPINT;
168 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
169 PL_regstartp[paren] = SSPOPINT;
171 if (paren <= *PL_reglastparen)
172 PL_regendp[paren] = tmps;
174 PerlIO_printf(Perl_debug_log,
175 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
176 (UV)paren, (IV)PL_regstartp[paren],
177 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
178 (IV)PL_regendp[paren],
179 (paren > *PL_reglastparen ? "(no)" : ""));
183 if (*PL_reglastparen + 1 <= PL_regnpar) {
184 PerlIO_printf(Perl_debug_log,
185 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
186 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
190 /* It would seem that the similar code in regtry()
191 * already takes care of this, and in fact it is in
192 * a better location to since this code can #if 0-ed out
193 * but the code in regtry() is needed or otherwise tests
194 * requiring null fields (pat.t#187 and split.t#{13,14}
195 * (as of patchlevel 7877) will fail. Then again,
196 * this code seems to be necessary or otherwise
197 * building DynaLoader will fail:
198 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
200 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
201 if (paren > PL_regsize)
202 PL_regstartp[paren] = -1;
203 PL_regendp[paren] = -1;
210 S_regcp_set_to(pTHX_ I32 ss)
212 I32 tmp = PL_savestack_ix;
214 PL_savestack_ix = ss;
216 PL_savestack_ix = tmp;
220 typedef struct re_cc_state
224 struct re_cc_state *prev;
229 #define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
231 #define TRYPAREN(paren, n, input) { \
234 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
235 PL_regendp[paren] = input - PL_bostr; \
238 PL_regendp[paren] = -1; \
240 if (regmatch(next)) \
243 PL_regendp[paren] = -1; \
248 * pregexec and friends
252 - pregexec - match a regexp against a string
255 Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
256 char *strbeg, I32 minend, SV *screamer, U32 nosave)
257 /* strend: pointer to null at end of string */
258 /* strbeg: real beginning of string */
259 /* minend: end of match must be >=minend after stringarg. */
260 /* nosave: For optimizations. */
263 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
264 nosave ? 0 : REXEC_COPY_STR);
268 S_cache_re(pTHX_ regexp *prog)
270 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
272 PL_regprogram = prog->program;
274 PL_regnpar = prog->nparens;
275 PL_regdata = prog->data;
280 * Need to implement the following flags for reg_anch:
282 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
284 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
285 * INTUIT_AUTORITATIVE_ML
286 * INTUIT_ONCE_NOML - Intuit can match in one location only.
289 * Another flag for this function: SECOND_TIME (so that float substrs
290 * with giant delta may be not rechecked).
293 /* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
295 /* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
296 Otherwise, only SvCUR(sv) is used to get strbeg. */
298 /* XXXX We assume that strpos is strbeg unless sv. */
300 /* XXXX Some places assume that there is a fixed substring.
301 An update may be needed if optimizer marks as "INTUITable"
302 RExen without fixed substrings. Similarly, it is assumed that
303 lengths of all the strings are no more than minlen, thus they
304 cannot come from lookahead.
305 (Or minlen should take into account lookahead.) */
307 /* A failure to find a constant substring means that there is no need to make
308 an expensive call to REx engine, thus we celebrate a failure. Similarly,
309 finding a substring too deep into the string means that less calls to
310 regtry() should be needed.
312 REx compiler's optimizer found 4 possible hints:
313 a) Anchored substring;
315 c) Whether we are anchored (beginning-of-line or \G);
316 d) First node (of those at offset 0) which may distingush positions;
317 We use a)b)d) and multiline-part of c), and try to find a position in the
318 string which does not contradict any of them.
321 /* Most of decisions we do here should have been done at compile time.
322 The nodes of the REx which we used for the search should have been
323 deleted from the finite automaton. */
326 Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
327 char *strend, U32 flags, re_scream_pos_data *data)
329 register I32 start_shift;
330 /* Should be nonnegative! */
331 register I32 end_shift;
338 register char *other_last = Nullch; /* other substr checked before this */
339 char *check_at; /* check substr found at this pos */
341 char *i_strpos = strpos;
344 DEBUG_r( if (!PL_colorset) reginitcolors() );
345 DEBUG_r(PerlIO_printf(Perl_debug_log,
346 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
347 PL_colors[4],PL_colors[5],PL_colors[0],
350 (strlen(prog->precomp) > 60 ? "..." : ""),
352 (int)(strend - strpos > 60 ? 60 : strend - strpos),
353 strpos, PL_colors[1],
354 (strend - strpos > 60 ? "..." : ""))
357 if (prog->minlen > strend - strpos) {
358 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
361 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
362 check = prog->check_substr;
363 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
364 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
365 || ( (prog->reganch & ROPT_ANCH_BOL)
366 && !PL_multiline ) ); /* Check after \n? */
369 if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
370 /* SvCUR is not set on references: SvRV and SvPVX overlap */
372 && (strpos != strbeg)) {
373 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
376 if (prog->check_offset_min == prog->check_offset_max) {
377 /* Substring at constant offset from beg-of-str... */
380 PL_regeol = strend; /* Used in HOP() */
381 s = HOPc(strpos, prog->check_offset_min);
383 slen = SvCUR(check); /* >= 1 */
385 if ( strend - s > slen || strend - s < slen - 1
386 || (strend - s == slen && strend[-1] != '\n')) {
387 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
390 /* Now should match s[0..slen-2] */
392 if (slen && (*SvPVX(check) != *s
394 && memNE(SvPVX(check), s, slen)))) {
396 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
400 else if (*SvPVX(check) != *s
401 || ((slen = SvCUR(check)) > 1
402 && memNE(SvPVX(check), s, slen)))
404 goto success_at_start;
407 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
409 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
410 end_shift = prog->minlen - start_shift -
411 CHR_SVLEN(check) + (SvTAIL(check) != 0);
413 I32 end = prog->check_offset_max + CHR_SVLEN(check)
414 - (SvTAIL(check) != 0);
415 I32 eshift = strend - s - end;
417 if (end_shift < eshift)
421 else { /* Can match at random position */
424 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
425 /* Should be nonnegative! */
426 end_shift = prog->minlen - start_shift -
427 CHR_SVLEN(check) + (SvTAIL(check) != 0);
430 #ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
432 Perl_croak(aTHX_ "panic: end_shift");
436 /* Find a possible match in the region s..strend by looking for
437 the "check" substring in the region corrected by start/end_shift. */
438 if (flags & REXEC_SCREAM) {
439 I32 p = -1; /* Internal iterator of scream. */
440 I32 *pp = data ? data->scream_pos : &p;
442 if (PL_screamfirst[BmRARE(check)] >= 0
443 || ( BmRARE(check) == '\n'
444 && (BmPREVIOUS(check) == SvCUR(check) - 1)
446 s = screaminstr(sv, check,
447 start_shift + (s - strbeg), end_shift, pp, 0);
451 *data->scream_olds = s;
454 s = fbm_instr((unsigned char*)s + start_shift,
455 (unsigned char*)strend - end_shift,
456 check, PL_multiline ? FBMrf_MULTILINE : 0);
458 /* Update the count-of-usability, remove useless subpatterns,
461 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
462 (s ? "Found" : "Did not find"),
463 ((check == prog->anchored_substr) ? "anchored" : "floating"),
465 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
467 PL_colors[1], (SvTAIL(check) ? "$" : ""),
468 (s ? " at offset " : "...\n") ) );
475 /* Finish the diagnostic message */
476 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
478 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
479 Start with the other substr.
480 XXXX no SCREAM optimization yet - and a very coarse implementation
481 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
482 *always* match. Probably should be marked during compile...
483 Probably it is right to do no SCREAM here...
486 if (prog->float_substr && prog->anchored_substr) {
487 /* Take into account the "other" substring. */
488 /* XXXX May be hopelessly wrong for UTF... */
491 if (check == prog->float_substr) {
494 char *last = s - start_shift, *last1, *last2;
498 t = s - prog->check_offset_max;
499 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
500 && (!(prog->reganch & ROPT_UTF8)
501 || (PL_bostr = strpos, /* Used in regcopmaybe() */
502 (t = reghopmaybe_c(s, -(prog->check_offset_max)))
507 t += prog->anchored_offset;
508 if (t < other_last) /* These positions already checked */
511 last2 = last1 = strend - prog->minlen;
514 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
515 /* On end-of-str: see comment below. */
516 s = fbm_instr((unsigned char*)t,
517 (unsigned char*)last1 + prog->anchored_offset
518 + SvCUR(prog->anchored_substr)
519 - (SvTAIL(prog->anchored_substr)!=0),
520 prog->anchored_substr, PL_multiline ? FBMrf_MULTILINE : 0);
521 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s anchored substr `%s%.*s%s'%s",
522 (s ? "Found" : "Contradicts"),
524 (int)(SvCUR(prog->anchored_substr)
525 - (SvTAIL(prog->anchored_substr)!=0)),
526 SvPVX(prog->anchored_substr),
527 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
529 if (last1 >= last2) {
530 DEBUG_r(PerlIO_printf(Perl_debug_log,
531 ", giving up...\n"));
534 DEBUG_r(PerlIO_printf(Perl_debug_log,
535 ", trying floating at offset %ld...\n",
536 (long)(s1 + 1 - i_strpos)));
537 PL_regeol = strend; /* Used in HOP() */
538 other_last = last1 + prog->anchored_offset + 1;
543 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
544 (long)(s - i_strpos)));
545 t = s - prog->anchored_offset;
554 else { /* Take into account the floating substring. */
559 last1 = last = strend - prog->minlen + prog->float_min_offset;
560 if (last - t > prog->float_max_offset)
561 last = t + prog->float_max_offset;
562 s = t + prog->float_min_offset;
565 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
566 /* fbm_instr() takes into account exact value of end-of-str
567 if the check is SvTAIL(ed). Since false positives are OK,
568 and end-of-str is not later than strend we are OK. */
569 s = fbm_instr((unsigned char*)s,
570 (unsigned char*)last + SvCUR(prog->float_substr)
571 - (SvTAIL(prog->float_substr)!=0),
572 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
573 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
574 (s ? "Found" : "Contradicts"),
576 (int)(SvCUR(prog->float_substr)
577 - (SvTAIL(prog->float_substr)!=0)),
578 SvPVX(prog->float_substr),
579 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
582 DEBUG_r(PerlIO_printf(Perl_debug_log,
583 ", giving up...\n"));
586 DEBUG_r(PerlIO_printf(Perl_debug_log,
587 ", trying anchored starting at offset %ld...\n",
588 (long)(s1 + 1 - i_strpos)));
589 other_last = last + 1;
590 PL_regeol = strend; /* Used in HOP() */
595 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
596 (long)(s - i_strpos)));
606 t = s - prog->check_offset_max;
608 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
609 && (!(prog->reganch & ROPT_UTF8)
610 || (PL_bostr = strpos, /* Used in regcopmaybe() */
611 ((t = reghopmaybe_c(s, -(prog->check_offset_max)))
614 /* Fixed substring is found far enough so that the match
615 cannot start at strpos. */
617 if (ml_anch && t[-1] != '\n') {
618 /* Eventually fbm_*() should handle this, but often
619 anchored_offset is not 0, so this check will not be wasted. */
620 /* XXXX In the code below we prefer to look for "^" even in
621 presence of anchored substrings. And we search even
622 beyond the found float position. These pessimizations
623 are historical artefacts only. */
625 while (t < strend - prog->minlen) {
627 if (t < check_at - prog->check_offset_min) {
628 if (prog->anchored_substr) {
629 /* Since we moved from the found position,
630 we definitely contradict the found anchored
631 substr. Due to the above check we do not
632 contradict "check" substr.
633 Thus we can arrive here only if check substr
634 is float. Redo checking for "other"=="fixed".
637 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
638 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
639 goto do_other_anchored;
641 /* We don't contradict the found floating substring. */
642 /* XXXX Why not check for STCLASS? */
644 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
645 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
648 /* Position contradicts check-string */
649 /* XXXX probably better to look for check-string
650 than for "\n", so one should lower the limit for t? */
651 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
652 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
653 other_last = strpos = s = t + 1;
658 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
659 PL_colors[0],PL_colors[1]));
663 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
664 PL_colors[0],PL_colors[1]));
668 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
672 /* The found string does not prohibit matching at strpos,
673 - no optimization of calling REx engine can be performed,
674 unless it was an MBOL and we are not after MBOL,
675 or a future STCLASS check will fail this. */
677 /* Even in this situation we may use MBOL flag if strpos is offset
678 wrt the start of the string. */
679 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
680 && (strpos != strbeg) && strpos[-1] != '\n'
681 /* May be due to an implicit anchor of m{.*foo} */
682 && !(prog->reganch & ROPT_IMPLICIT))
687 DEBUG_r( if (ml_anch)
688 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
689 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
692 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
693 && prog->check_substr /* Could be deleted already */
694 && --BmUSEFUL(prog->check_substr) < 0
695 && prog->check_substr == prog->float_substr)
697 /* If flags & SOMETHING - do not do it many times on the same match */
698 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
699 SvREFCNT_dec(prog->check_substr);
700 prog->check_substr = Nullsv; /* disable */
701 prog->float_substr = Nullsv; /* clear */
702 check = Nullsv; /* abort */
704 /* XXXX This is a remnant of the old implementation. It
705 looks wasteful, since now INTUIT can use many
707 prog->reganch &= ~RE_USE_INTUIT;
714 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
715 if (prog->regstclass) {
716 /* minlen == 0 is possible if regstclass is \b or \B,
717 and the fixed substr is ''$.
718 Since minlen is already taken into account, s+1 is before strend;
719 accidentally, minlen >= 1 guaranties no false positives at s + 1
720 even for \b or \B. But (minlen? 1 : 0) below assumes that
721 regstclass does not come from lookahead... */
722 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
723 This leaves EXACTF only, which is dealt with in find_byclass(). */
724 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
725 ? STR_LEN(prog->regstclass)
727 char *endpos = (prog->anchored_substr || ml_anch)
728 ? s + (prog->minlen? cl_l : 0)
729 : (prog->float_substr ? check_at - start_shift + cl_l
731 char *startpos = strbeg;
734 if (prog->reganch & ROPT_UTF8) {
735 PL_regdata = prog->data;
738 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
743 if (endpos == strend) {
744 DEBUG_r( PerlIO_printf(Perl_debug_log,
745 "Could not match STCLASS...\n") );
748 DEBUG_r( PerlIO_printf(Perl_debug_log,
749 "This position contradicts STCLASS...\n") );
750 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
752 /* Contradict one of substrings */
753 if (prog->anchored_substr) {
754 if (prog->anchored_substr == check) {
755 DEBUG_r( what = "anchored" );
757 PL_regeol = strend; /* Used in HOP() */
759 if (s + start_shift + end_shift > strend) {
760 /* XXXX Should be taken into account earlier? */
761 DEBUG_r( PerlIO_printf(Perl_debug_log,
762 "Could not match STCLASS...\n") );
767 DEBUG_r( PerlIO_printf(Perl_debug_log,
768 "Looking for %s substr starting at offset %ld...\n",
769 what, (long)(s + start_shift - i_strpos)) );
772 /* Have both, check_string is floating */
773 if (t + start_shift >= check_at) /* Contradicts floating=check */
774 goto retry_floating_check;
775 /* Recheck anchored substring, but not floating... */
779 DEBUG_r( PerlIO_printf(Perl_debug_log,
780 "Looking for anchored substr starting at offset %ld...\n",
781 (long)(other_last - i_strpos)) );
782 goto do_other_anchored;
784 /* Another way we could have checked stclass at the
785 current position only: */
790 DEBUG_r( PerlIO_printf(Perl_debug_log,
791 "Looking for /%s^%s/m starting at offset %ld...\n",
792 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
795 if (!prog->float_substr) /* Could have been deleted */
797 /* Check is floating subtring. */
798 retry_floating_check:
799 t = check_at - start_shift;
800 DEBUG_r( what = "floating" );
801 goto hop_and_restart;
804 PerlIO_printf(Perl_debug_log,
805 "By STCLASS: moving %ld --> %ld\n",
806 (long)(t - i_strpos), (long)(s - i_strpos));
808 PerlIO_printf(Perl_debug_log,
809 "Does not contradict STCLASS...\n") );
812 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
813 PL_colors[4], (check ? "Guessed" : "Giving up"),
814 PL_colors[5], (long)(s - i_strpos)) );
817 fail_finish: /* Substring not found */
818 if (prog->check_substr) /* could be removed already */
819 BmUSEFUL(prog->check_substr) += 5; /* hooray */
821 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
822 PL_colors[4],PL_colors[5]));
826 /* We know what class REx starts with. Try to find this position... */
828 S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
830 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
836 register I32 tmp = 1; /* Scratch variable? */
837 register bool do_utf8 = DO_UTF8(PL_reg_sv);
839 /* We know what class it must start with. */
843 if (reginclass(c, (U8*)s, do_utf8)) {
844 if (tmp && (norun || regtry(prog, s)))
851 s += do_utf8 ? UTF8SKIP(s) : 1;
864 c2 = PL_fold_locale[c1];
869 e = s; /* Due to minlen logic of intuit() */
870 /* Here it is NOT UTF! */
874 && (ln == 1 || !(OP(c) == EXACTF
876 : ibcmp_locale(s, m, ln)))
877 && (norun || regtry(prog, s)) )
883 if ( (*(U8*)s == c1 || *(U8*)s == c2)
884 && (ln == 1 || !(OP(c) == EXACTF
886 : ibcmp_locale(s, m, ln)))
887 && (norun || regtry(prog, s)) )
894 PL_reg_flags |= RF_tainted;
901 U8 *r = reghop((U8*)s, -1);
903 tmp = (I32)utf8_to_uv(r, s - (char*)r, 0, 0);
905 tmp = ((OP(c) == BOUND ?
906 isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
908 if (tmp == !(OP(c) == BOUND ?
909 swash_fetch(PL_utf8_alnum, (U8*)s) :
910 isALNUM_LC_utf8((U8*)s)))
913 if ((norun || regtry(prog, s)))
920 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
921 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
924 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
926 if ((norun || regtry(prog, s)))
932 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
936 PL_reg_flags |= RF_tainted;
943 U8 *r = reghop((U8*)s, -1);
945 tmp = (I32)utf8_to_uv(r, s - (char*)r, 0, 0);
947 tmp = ((OP(c) == NBOUND ?
948 isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
950 if (tmp == !(OP(c) == NBOUND ?
951 swash_fetch(PL_utf8_alnum, (U8*)s) :
952 isALNUM_LC_utf8((U8*)s)))
954 else if ((norun || regtry(prog, s)))
960 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
961 tmp = ((OP(c) == NBOUND ?
962 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
965 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
967 else if ((norun || regtry(prog, s)))
972 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
978 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
979 if (tmp && (norun || regtry(prog, s)))
992 if (tmp && (norun || regtry(prog, s)))
1004 PL_reg_flags |= RF_tainted;
1006 while (s < strend) {
1007 if (isALNUM_LC_utf8((U8*)s)) {
1008 if (tmp && (norun || regtry(prog, s)))
1019 while (s < strend) {
1020 if (isALNUM_LC(*s)) {
1021 if (tmp && (norun || regtry(prog, s)))
1034 while (s < strend) {
1035 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
1036 if (tmp && (norun || regtry(prog, s)))
1047 while (s < strend) {
1049 if (tmp && (norun || regtry(prog, s)))
1061 PL_reg_flags |= RF_tainted;
1063 while (s < strend) {
1064 if (!isALNUM_LC_utf8((U8*)s)) {
1065 if (tmp && (norun || regtry(prog, s)))
1076 while (s < strend) {
1077 if (!isALNUM_LC(*s)) {
1078 if (tmp && (norun || regtry(prog, s)))
1091 while (s < strend) {
1092 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
1093 if (tmp && (norun || regtry(prog, s)))
1104 while (s < strend) {
1106 if (tmp && (norun || regtry(prog, s)))
1118 PL_reg_flags |= RF_tainted;
1120 while (s < strend) {
1121 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1122 if (tmp && (norun || regtry(prog, s)))
1133 while (s < strend) {
1134 if (isSPACE_LC(*s)) {
1135 if (tmp && (norun || regtry(prog, s)))
1148 while (s < strend) {
1149 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
1150 if (tmp && (norun || regtry(prog, s)))
1161 while (s < strend) {
1163 if (tmp && (norun || regtry(prog, s)))
1175 PL_reg_flags |= RF_tainted;
1177 while (s < strend) {
1178 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1179 if (tmp && (norun || regtry(prog, s)))
1190 while (s < strend) {
1191 if (!isSPACE_LC(*s)) {
1192 if (tmp && (norun || regtry(prog, s)))
1205 while (s < strend) {
1206 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
1207 if (tmp && (norun || regtry(prog, s)))
1218 while (s < strend) {
1220 if (tmp && (norun || regtry(prog, s)))
1232 PL_reg_flags |= RF_tainted;
1234 while (s < strend) {
1235 if (isDIGIT_LC_utf8((U8*)s)) {
1236 if (tmp && (norun || regtry(prog, s)))
1247 while (s < strend) {
1248 if (isDIGIT_LC(*s)) {
1249 if (tmp && (norun || regtry(prog, s)))
1262 while (s < strend) {
1263 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
1264 if (tmp && (norun || regtry(prog, s)))
1275 while (s < strend) {
1277 if (tmp && (norun || regtry(prog, s)))
1289 PL_reg_flags |= RF_tainted;
1291 while (s < strend) {
1292 if (!isDIGIT_LC_utf8((U8*)s)) {
1293 if (tmp && (norun || regtry(prog, s)))
1304 while (s < strend) {
1305 if (!isDIGIT_LC(*s)) {
1306 if (tmp && (norun || regtry(prog, s)))
1318 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1327 - regexec_flags - match a regexp against a string
1330 Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1331 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1332 /* strend: pointer to null at end of string */
1333 /* strbeg: real beginning of string */
1334 /* minend: end of match must be >=minend after stringarg. */
1335 /* data: May be used for some additional optimizations. */
1336 /* nosave: For optimizations. */
1339 register regnode *c;
1340 register char *startpos = stringarg;
1341 I32 minlen; /* must match at least this many chars */
1342 I32 dontbother = 0; /* how many characters not to try at end */
1343 /* I32 start_shift = 0; */ /* Offset of the start to find
1344 constant substr. */ /* CC */
1345 I32 end_shift = 0; /* Same for the end. */ /* CC */
1346 I32 scream_pos = -1; /* Internal iterator of scream. */
1348 SV* oreplsv = GvSV(PL_replgv);
1354 PL_regnarrate = PL_debug & 512;
1357 /* Be paranoid... */
1358 if (prog == NULL || startpos == NULL) {
1359 Perl_croak(aTHX_ "NULL regexp parameter");
1363 minlen = prog->minlen;
1364 if (strend - startpos < minlen) goto phooey;
1366 if (startpos == strbeg) /* is ^ valid at stringarg? */
1369 PL_regprev = (U32)stringarg[-1];
1370 if (!PL_multiline && PL_regprev == '\n')
1371 PL_regprev = '\0'; /* force ^ to NOT match */
1374 /* Check validity of program. */
1375 if (UCHARAT(prog->program) != REG_MAGIC) {
1376 Perl_croak(aTHX_ "corrupted regexp program");
1380 PL_reg_eval_set = 0;
1383 if (prog->reganch & ROPT_UTF8)
1384 PL_reg_flags |= RF_utf8;
1386 /* Mark beginning of line for ^ and lookbehind. */
1387 PL_regbol = startpos;
1391 /* Mark end of line for $ (and such) */
1394 /* see how far we have to get to not match where we matched before */
1395 PL_regtill = startpos+minend;
1397 /* We start without call_cc context. */
1400 /* If there is a "must appear" string, look for it. */
1403 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1406 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1407 PL_reg_ganch = startpos;
1408 else if (sv && SvTYPE(sv) >= SVt_PVMG
1410 && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) {
1411 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1412 if (prog->reganch & ROPT_ANCH_GPOS) {
1413 if (s > PL_reg_ganch)
1418 else /* pos() not defined */
1419 PL_reg_ganch = strbeg;
1422 if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
1423 re_scream_pos_data d;
1425 d.scream_olds = &scream_olds;
1426 d.scream_pos = &scream_pos;
1427 s = re_intuit_start(prog, sv, s, strend, flags, &d);
1429 goto phooey; /* not present */
1432 DEBUG_r( if (!PL_colorset) reginitcolors() );
1433 DEBUG_r(PerlIO_printf(Perl_debug_log,
1434 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1435 PL_colors[4],PL_colors[5],PL_colors[0],
1438 (strlen(prog->precomp) > 60 ? "..." : ""),
1440 (int)(strend - startpos > 60 ? 60 : strend - startpos),
1441 startpos, PL_colors[1],
1442 (strend - startpos > 60 ? "..." : ""))
1445 /* Simplest case: anchored match need be tried only once. */
1446 /* [unless only anchor is BOL and multiline is set] */
1447 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1448 if (s == startpos && regtry(prog, startpos))
1450 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1451 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1456 dontbother = minlen - 1;
1457 end = HOPc(strend, -dontbother) - 1;
1458 /* for multiline we only have to try after newlines */
1459 if (prog->check_substr) {
1463 if (regtry(prog, s))
1468 if (prog->reganch & RE_USE_INTUIT) {
1469 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1480 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1481 if (regtry(prog, s))
1488 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1489 if (regtry(prog, PL_reg_ganch))
1494 /* Messy cases: unanchored match. */
1495 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
1496 /* we have /x+whatever/ */
1497 /* it must be a one character string (XXXX Except UTF?) */
1498 char ch = SvPVX(prog->anchored_substr)[0];
1504 while (s < strend) {
1506 DEBUG_r( did_match = 1 );
1507 if (regtry(prog, s)) goto got_it;
1509 while (s < strend && *s == ch)
1516 while (s < strend) {
1518 DEBUG_r( did_match = 1 );
1519 if (regtry(prog, s)) goto got_it;
1521 while (s < strend && *s == ch)
1527 DEBUG_r(did_match ||
1528 PerlIO_printf(Perl_debug_log,
1529 "Did not find anchored character...\n"));
1532 else if (prog->anchored_substr != Nullsv
1533 || (prog->float_substr != Nullsv
1534 && prog->float_max_offset < strend - s)) {
1535 SV *must = prog->anchored_substr
1536 ? prog->anchored_substr : prog->float_substr;
1538 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
1540 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
1541 char *last = HOPc(strend, /* Cannot start after this */
1542 -(I32)(CHR_SVLEN(must)
1543 - (SvTAIL(must) != 0) + back_min));
1544 char *last1; /* Last position checked before */
1550 last1 = HOPc(s, -1);
1552 last1 = s - 1; /* bogus */
1554 /* XXXX check_substr already used to find `s', can optimize if
1555 check_substr==must. */
1557 dontbother = end_shift;
1558 strend = HOPc(strend, -dontbother);
1559 while ( (s <= last) &&
1560 ((flags & REXEC_SCREAM)
1561 ? (s = screaminstr(sv, must, HOPc(s, back_min) - strbeg,
1562 end_shift, &scream_pos, 0))
1563 : (s = fbm_instr((unsigned char*)HOP(s, back_min),
1564 (unsigned char*)strend, must,
1565 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
1566 DEBUG_r( did_match = 1 );
1567 if (HOPc(s, -back_max) > last1) {
1568 last1 = HOPc(s, -back_min);
1569 s = HOPc(s, -back_max);
1572 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1574 last1 = HOPc(s, -back_min);
1578 while (s <= last1) {
1579 if (regtry(prog, s))
1585 while (s <= last1) {
1586 if (regtry(prog, s))
1592 DEBUG_r(did_match ||
1593 PerlIO_printf(Perl_debug_log, "Did not find %s substr `%s%.*s%s'%s...\n",
1594 ((must == prog->anchored_substr)
1595 ? "anchored" : "floating"),
1597 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1599 PL_colors[1], (SvTAIL(must) ? "$" : "")));
1602 else if ((c = prog->regstclass)) {
1603 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1604 /* don't bother with what can't match */
1605 strend = HOPc(strend, -(minlen - 1));
1607 SV *prop = sv_newmortal();
1609 PerlIO_printf(Perl_debug_log, "Matching stclass `%s' against `%s'\n", SvPVX(prop), s);
1611 if (find_byclass(prog, c, s, strend, startpos, 0))
1613 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
1617 if (prog->float_substr != Nullsv) { /* Trim the end. */
1620 if (flags & REXEC_SCREAM) {
1621 last = screaminstr(sv, prog->float_substr, s - strbeg,
1622 end_shift, &scream_pos, 1); /* last one */
1624 last = scream_olds; /* Only one occurrence. */
1628 char *little = SvPV(prog->float_substr, len);
1630 if (SvTAIL(prog->float_substr)) {
1631 if (memEQ(strend - len + 1, little, len - 1))
1632 last = strend - len + 1;
1633 else if (!PL_multiline)
1634 last = memEQ(strend - len, little, len)
1635 ? strend - len : Nullch;
1641 last = rninstr(s, strend, little, little + len);
1643 last = strend; /* matching `$' */
1647 DEBUG_r(PerlIO_printf(Perl_debug_log,
1648 "%sCan't trim the tail, match fails (should not happen)%s\n",
1649 PL_colors[4],PL_colors[5]));
1650 goto phooey; /* Should not happen! */
1652 dontbother = strend - last + prog->float_min_offset;
1654 if (minlen && (dontbother < minlen))
1655 dontbother = minlen - 1;
1656 strend -= dontbother; /* this one's always in bytes! */
1657 /* We don't know much -- general case. */
1660 if (regtry(prog, s))
1669 if (regtry(prog, s))
1671 } while (s++ < strend);
1679 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1681 if (PL_reg_eval_set) {
1682 /* Preserve the current value of $^R */
1683 if (oreplsv != GvSV(PL_replgv))
1684 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1685 restored, the value remains
1687 restore_pos(aTHXo_ 0);
1690 /* make sure $`, $&, $', and $digit will work later */
1691 if ( !(flags & REXEC_NOT_FIRST) ) {
1692 if (RX_MATCH_COPIED(prog)) {
1693 Safefree(prog->subbeg);
1694 RX_MATCH_COPIED_off(prog);
1696 if (flags & REXEC_COPY_STR) {
1697 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1699 s = savepvn(strbeg, i);
1702 RX_MATCH_COPIED_on(prog);
1705 prog->subbeg = strbeg;
1706 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1713 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1714 PL_colors[4],PL_colors[5]));
1715 if (PL_reg_eval_set)
1716 restore_pos(aTHXo_ 0);
1721 - regtry - try match at specific point
1723 STATIC I32 /* 0 failure, 1 success */
1724 S_regtry(pTHX_ regexp *prog, char *startpos)
1732 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
1734 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1737 PL_reg_eval_set = RS_init;
1739 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1740 (IV)(PL_stack_sp - PL_stack_base));
1742 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
1743 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1744 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1746 /* Apparently this is not needed, judging by wantarray. */
1747 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
1748 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1751 /* Make $_ available to executed code. */
1752 if (PL_reg_sv != DEFSV) {
1753 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1758 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
1759 && (mg = mg_find(PL_reg_sv, 'g')))) {
1760 /* prepare for quick setting of pos */
1761 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
1762 mg = mg_find(PL_reg_sv, 'g');
1766 PL_reg_oldpos = mg->mg_len;
1767 SAVEDESTRUCTOR_X(restore_pos, 0);
1770 Newz(22,PL_reg_curpm, 1, PMOP);
1771 PL_reg_curpm->op_pmregexp = prog;
1772 PL_reg_oldcurpm = PL_curpm;
1773 PL_curpm = PL_reg_curpm;
1774 if (RX_MATCH_COPIED(prog)) {
1775 /* Here is a serious problem: we cannot rewrite subbeg,
1776 since it may be needed if this match fails. Thus
1777 $` inside (?{}) could fail... */
1778 PL_reg_oldsaved = prog->subbeg;
1779 PL_reg_oldsavedlen = prog->sublen;
1780 RX_MATCH_COPIED_off(prog);
1783 PL_reg_oldsaved = Nullch;
1784 prog->subbeg = PL_bostr;
1785 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1787 prog->startp[0] = startpos - PL_bostr;
1788 PL_reginput = startpos;
1789 PL_regstartp = prog->startp;
1790 PL_regendp = prog->endp;
1791 PL_reglastparen = &prog->lastparen;
1792 prog->lastparen = 0;
1794 DEBUG_r(PL_reg_starttry = startpos);
1795 if (PL_reg_start_tmpl <= prog->nparens) {
1796 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1797 if(PL_reg_start_tmp)
1798 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1800 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1803 /* XXXX What this code is doing here?!!! There should be no need
1804 to do this again and again, PL_reglastparen should take care of
1807 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
1808 * Actually, the code in regcppop() (which Ilya may be meaning by
1809 * PL_reglastparen), is not needed at all by the test suite
1810 * (op/regexp, op/pat, op/split), but that code is needed, oddly
1811 * enough, for building DynaLoader, or otherwise this
1812 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
1813 * will happen. Meanwhile, this code *is* needed for the
1814 * above-mentioned test suite tests to succeed. The common theme
1815 * on those tests seems to be returning null fields from matches.
1820 if (prog->nparens) {
1821 for (i = prog->nparens; i > *PL_reglastparen; i--) {
1828 if (regmatch(prog->program + 1)) {
1829 prog->endp[0] = PL_reginput - PL_bostr;
1832 REGCP_UNWIND(lastcp);
1836 #define RE_UNWIND_BRANCH 1
1837 #define RE_UNWIND_BRANCHJ 2
1841 typedef struct { /* XX: makes sense to enlarge it... */
1845 } re_unwind_generic_t;
1858 } re_unwind_branch_t;
1860 typedef union re_unwind_t {
1862 re_unwind_generic_t generic;
1863 re_unwind_branch_t branch;
1867 - regmatch - main matching routine
1869 * Conceptually the strategy is simple: check to see whether the current
1870 * node matches, call self recursively to see whether the rest matches,
1871 * and then act accordingly. In practice we make some effort to avoid
1872 * recursion, in particular by going through "ordinary" nodes (that don't
1873 * need to know whether the rest of the match failed) by a loop instead of
1876 /* [lwall] I've hoisted the register declarations to the outer block in order to
1877 * maybe save a little bit of pushing and popping on the stack. It also takes
1878 * advantage of machines that use a register save mask on subroutine entry.
1880 STATIC I32 /* 0 failure, 1 success */
1881 S_regmatch(pTHX_ regnode *prog)
1883 register regnode *scan; /* Current node. */
1884 regnode *next; /* Next node. */
1885 regnode *inner; /* Next node in internal branch. */
1886 register I32 nextchr; /* renamed nextchr - nextchar colides with
1887 function of same name */
1888 register I32 n; /* no or next */
1889 register I32 ln; /* len or last */
1890 register char *s; /* operand or save */
1891 register char *locinput = PL_reginput;
1892 register I32 c1, c2, paren; /* case fold search, parenth */
1893 int minmod = 0, sw = 0, logical = 0;
1895 I32 firstcp = PL_savestack_ix;
1896 register bool do_utf8 = DO_UTF8(PL_reg_sv);
1902 /* Note that nextchr is a byte even in UTF */
1903 nextchr = UCHARAT(locinput);
1905 while (scan != NULL) {
1906 #define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
1908 # define sayYES goto yes
1909 # define sayNO goto no
1910 # define sayYES_FINAL goto yes_final
1911 # define sayYES_LOUD goto yes_loud
1912 # define sayNO_FINAL goto no_final
1913 # define sayNO_SILENT goto do_no
1914 # define saySAME(x) if (x) goto yes; else goto no
1915 # define REPORT_CODE_OFF 24
1917 # define sayYES return 1
1918 # define sayNO return 0
1919 # define sayYES_FINAL return 1
1920 # define sayYES_LOUD return 1
1921 # define sayNO_FINAL return 0
1922 # define sayNO_SILENT return 0
1923 # define saySAME(x) return x
1926 SV *prop = sv_newmortal();
1927 int docolor = *PL_colors[0];
1928 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1929 int l = (PL_regeol - locinput > taill ? taill : PL_regeol - locinput);
1930 /* The part of the string before starttry has one color
1931 (pref0_len chars), between starttry and current
1932 position another one (pref_len - pref0_len chars),
1933 after the current position the third one.
1934 We assume that pref0_len <= pref_len, otherwise we
1935 decrease pref0_len. */
1936 int pref_len = (locinput - PL_bostr > (5 + taill) - l
1937 ? (5 + taill) - l : locinput - PL_bostr);
1938 int pref0_len = pref_len - (locinput - PL_reg_starttry);
1940 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
1941 l = ( PL_regeol - locinput > (5 + taill) - pref_len
1942 ? (5 + taill) - pref_len : PL_regeol - locinput);
1945 if (pref0_len > pref_len)
1946 pref0_len = pref_len;
1947 regprop(prop, scan);
1948 PerlIO_printf(Perl_debug_log,
1949 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
1950 (IV)(locinput - PL_bostr),
1951 PL_colors[4], pref0_len,
1952 locinput - pref_len, PL_colors[5],
1953 PL_colors[2], pref_len - pref0_len,
1954 locinput - pref_len + pref0_len, PL_colors[3],
1955 (docolor ? "" : "> <"),
1956 PL_colors[0], l, locinput, PL_colors[1],
1957 15 - l - pref_len + 1,
1959 (IV)(scan - PL_regprogram), PL_regindent*2, "",
1963 next = scan + NEXT_OFF(scan);
1969 if (locinput == PL_bostr
1970 ? PL_regprev == '\n'
1972 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1974 /* regtill = regbol; */
1979 if (locinput == PL_bostr
1980 ? PL_regprev == '\n'
1981 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1987 if (locinput == PL_bostr)
1991 if (locinput == PL_reg_ganch)
2001 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2006 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2008 if (PL_regeol - locinput > 1)
2012 if (PL_regeol != locinput)
2016 if (DO_UTF8(PL_reg_sv)) {
2017 locinput += PL_utf8skip[nextchr];
2018 if (locinput > PL_regeol)
2020 nextchr = UCHARAT(locinput);
2023 if (!nextchr && locinput >= PL_regeol)
2025 nextchr = UCHARAT(++locinput);
2028 if (DO_UTF8(PL_reg_sv)) {
2029 locinput += PL_utf8skip[nextchr];
2030 if (locinput > PL_regeol)
2032 nextchr = UCHARAT(locinput);
2035 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2037 nextchr = UCHARAT(++locinput);
2042 /* Inline the first character, for speed. */
2043 if (UCHARAT(s) != nextchr)
2045 if (PL_regeol - locinput < ln)
2047 if (ln > 1 && memNE(s, locinput, ln))
2050 nextchr = UCHARAT(locinput);
2053 PL_reg_flags |= RF_tainted;
2062 c1 = OP(scan) == EXACTF;
2066 if (utf8_to_uv((U8*)s, e - s, 0, 0) !=
2068 toLOWER_utf8((U8*)l) :
2069 toLOWER_LC_utf8((U8*)l)))
2077 nextchr = UCHARAT(locinput);
2081 /* Inline the first character, for speed. */
2082 if (UCHARAT(s) != nextchr &&
2083 UCHARAT(s) != ((OP(scan) == EXACTF)
2084 ? PL_fold : PL_fold_locale)[nextchr])
2086 if (PL_regeol - locinput < ln)
2088 if (ln > 1 && (OP(scan) == EXACTF
2089 ? ibcmp(s, locinput, ln)
2090 : ibcmp_locale(s, locinput, ln)))
2093 nextchr = UCHARAT(locinput);
2097 if (!reginclass(scan, (U8*)locinput, do_utf8))
2099 if (locinput >= PL_regeol)
2101 locinput += PL_utf8skip[nextchr];
2102 nextchr = UCHARAT(locinput);
2106 nextchr = UCHARAT(locinput);
2107 if (!reginclass(scan, (U8*)locinput, do_utf8))
2109 if (!nextchr && locinput >= PL_regeol)
2111 nextchr = UCHARAT(++locinput);
2115 PL_reg_flags |= RF_tainted;
2121 if (!(OP(scan) == ALNUM
2122 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2123 : isALNUM_LC_utf8((U8*)locinput)))
2127 locinput += PL_utf8skip[nextchr];
2128 nextchr = UCHARAT(locinput);
2131 if (!(OP(scan) == ALNUM
2132 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
2134 nextchr = UCHARAT(++locinput);
2137 PL_reg_flags |= RF_tainted;
2140 if (!nextchr && locinput >= PL_regeol)
2143 if (OP(scan) == NALNUM
2144 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2145 : isALNUM_LC_utf8((U8*)locinput))
2149 locinput += PL_utf8skip[nextchr];
2150 nextchr = UCHARAT(locinput);
2153 if (OP(scan) == NALNUM
2154 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
2156 nextchr = UCHARAT(++locinput);
2160 PL_reg_flags |= RF_tainted;
2164 /* was last char in word? */
2166 if (locinput == PL_regbol)
2169 U8 *r = reghop((U8*)locinput, -1);
2171 ln = utf8_to_uv(r, s - (char*)r, 0, 0);
2173 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2174 ln = isALNUM_uni(ln);
2175 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
2178 ln = isALNUM_LC_uni(ln);
2179 n = isALNUM_LC_utf8((U8*)locinput);
2183 ln = (locinput != PL_regbol) ?
2184 UCHARAT(locinput - 1) : PL_regprev;
2185 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2187 n = isALNUM(nextchr);
2190 ln = isALNUM_LC(ln);
2191 n = isALNUM_LC(nextchr);
2194 if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2195 OP(scan) == BOUNDL))
2199 PL_reg_flags |= RF_tainted;
2204 if (DO_UTF8(PL_reg_sv)) {
2205 if (nextchr & 0x80) {
2206 if (!(OP(scan) == SPACE
2207 ? swash_fetch(PL_utf8_space, (U8*)locinput)
2208 : isSPACE_LC_utf8((U8*)locinput)))
2212 locinput += PL_utf8skip[nextchr];
2213 nextchr = UCHARAT(locinput);
2216 if (!(OP(scan) == SPACE
2217 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2219 nextchr = UCHARAT(++locinput);
2222 if (!(OP(scan) == SPACE
2223 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2225 nextchr = UCHARAT(++locinput);
2229 PL_reg_flags |= RF_tainted;
2232 if (!nextchr && locinput >= PL_regeol)
2234 if (DO_UTF8(PL_reg_sv)) {
2235 if (OP(scan) == NSPACE
2236 ? swash_fetch(PL_utf8_space, (U8*)locinput)
2237 : isSPACE_LC_utf8((U8*)locinput))
2241 locinput += PL_utf8skip[nextchr];
2242 nextchr = UCHARAT(locinput);
2245 if (OP(scan) == NSPACE
2246 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
2248 nextchr = UCHARAT(++locinput);
2251 PL_reg_flags |= RF_tainted;
2256 if (DO_UTF8(PL_reg_sv)) {
2257 if (!(OP(scan) == DIGIT
2258 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2259 : isDIGIT_LC_utf8((U8*)locinput)))
2263 locinput += PL_utf8skip[nextchr];
2264 nextchr = UCHARAT(locinput);
2267 if (!(OP(scan) == DIGIT
2268 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
2270 nextchr = UCHARAT(++locinput);
2273 PL_reg_flags |= RF_tainted;
2276 if (!nextchr && locinput >= PL_regeol)
2278 if (DO_UTF8(PL_reg_sv)) {
2279 if (OP(scan) == NDIGIT
2280 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2281 : isDIGIT_LC_utf8((U8*)locinput))
2285 locinput += PL_utf8skip[nextchr];
2286 nextchr = UCHARAT(locinput);
2289 if (OP(scan) == NDIGIT
2290 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
2292 nextchr = UCHARAT(++locinput);
2295 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
2297 locinput += PL_utf8skip[nextchr];
2298 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
2299 locinput += UTF8SKIP(locinput);
2300 if (locinput > PL_regeol)
2302 nextchr = UCHARAT(locinput);
2305 PL_reg_flags |= RF_tainted;
2309 n = ARG(scan); /* which paren pair */
2310 ln = PL_regstartp[n];
2311 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
2312 if (*PL_reglastparen < n || ln == -1)
2313 sayNO; /* Do not match unless seen CLOSEn. */
2314 if (ln == PL_regendp[n])
2318 if (UTF && OP(scan) != REF) { /* REF can do byte comparison */
2320 char *e = PL_bostr + PL_regendp[n];
2322 * Note that we can't do the "other character" lookup trick as
2323 * in the 8-bit case (no pun intended) because in Unicode we
2324 * have to map both upper and title case to lower case.
2326 if (OP(scan) == REFF) {
2330 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
2340 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
2347 nextchr = UCHARAT(locinput);
2351 /* Inline the first character, for speed. */
2352 if (UCHARAT(s) != nextchr &&
2354 (UCHARAT(s) != ((OP(scan) == REFF
2355 ? PL_fold : PL_fold_locale)[nextchr]))))
2357 ln = PL_regendp[n] - ln;
2358 if (locinput + ln > PL_regeol)
2360 if (ln > 1 && (OP(scan) == REF
2361 ? memNE(s, locinput, ln)
2363 ? ibcmp(s, locinput, ln)
2364 : ibcmp_locale(s, locinput, ln))))
2367 nextchr = UCHARAT(locinput);
2378 OP_4tree *oop = PL_op;
2379 COP *ocurcop = PL_curcop;
2380 SV **ocurpad = PL_curpad;
2384 PL_op = (OP_4tree*)PL_regdata->data[n];
2385 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
2386 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
2387 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
2389 CALLRUNOPS(aTHX); /* Scalar context. */
2395 PL_curpad = ocurpad;
2396 PL_curcop = ocurcop;
2398 if (logical == 2) { /* Postponed subexpression. */
2400 MAGIC *mg = Null(MAGIC*);
2402 CHECKPOINT cp, lastcp;
2404 if(SvROK(ret) || SvRMAGICAL(ret)) {
2405 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2408 mg = mg_find(sv, 'r');
2411 re = (regexp *)mg->mg_obj;
2412 (void)ReREFCNT_inc(re);
2416 char *t = SvPV(ret, len);
2418 char *oprecomp = PL_regprecomp;
2419 I32 osize = PL_regsize;
2420 I32 onpar = PL_regnpar;
2423 pm.op_pmdynflags = (UTF||DO_UTF8(ret) ? PMdf_UTF8 : 0);
2424 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
2426 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
2427 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
2428 PL_regprecomp = oprecomp;
2433 PerlIO_printf(Perl_debug_log,
2434 "Entering embedded `%s%.60s%s%s'\n",
2438 (strlen(re->precomp) > 60 ? "..." : ""))
2441 state.prev = PL_reg_call_cc;
2442 state.cc = PL_regcc;
2443 state.re = PL_reg_re;
2447 cp = regcppush(0); /* Save *all* the positions. */
2450 state.ss = PL_savestack_ix;
2451 *PL_reglastparen = 0;
2452 PL_reg_call_cc = &state;
2453 PL_reginput = locinput;
2455 /* XXXX This is too dramatic a measure... */
2458 if (regmatch(re->program + 1)) {
2459 /* Even though we succeeded, we need to restore
2460 global variables, since we may be wrapped inside
2461 SUSPEND, thus the match may be not finished yet. */
2463 /* XXXX Do this only if SUSPENDed? */
2464 PL_reg_call_cc = state.prev;
2465 PL_regcc = state.cc;
2466 PL_reg_re = state.re;
2467 cache_re(PL_reg_re);
2469 /* XXXX This is too dramatic a measure... */
2472 /* These are needed even if not SUSPEND. */
2478 REGCP_UNWIND(lastcp);
2480 PL_reg_call_cc = state.prev;
2481 PL_regcc = state.cc;
2482 PL_reg_re = state.re;
2483 cache_re(PL_reg_re);
2485 /* XXXX This is too dramatic a measure... */
2494 sv_setsv(save_scalar(PL_replgv), ret);
2498 n = ARG(scan); /* which paren pair */
2499 PL_reg_start_tmp[n] = locinput;
2504 n = ARG(scan); /* which paren pair */
2505 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2506 PL_regendp[n] = locinput - PL_bostr;
2507 if (n > *PL_reglastparen)
2508 *PL_reglastparen = n;
2511 n = ARG(scan); /* which paren pair */
2512 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
2515 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
2517 next = NEXTOPER(NEXTOPER(scan));
2519 next = scan + ARG(scan);
2520 if (OP(next) == IFTHEN) /* Fake one. */
2521 next = NEXTOPER(NEXTOPER(next));
2525 logical = scan->flags;
2527 /*******************************************************************
2528 PL_regcc contains infoblock about the innermost (...)* loop, and
2529 a pointer to the next outer infoblock.
2531 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2533 1) After matching X, regnode for CURLYX is processed;
2535 2) This regnode creates infoblock on the stack, and calls
2536 regmatch() recursively with the starting point at WHILEM node;
2538 3) Each hit of WHILEM node tries to match A and Z (in the order
2539 depending on the current iteration, min/max of {min,max} and
2540 greediness). The information about where are nodes for "A"
2541 and "Z" is read from the infoblock, as is info on how many times "A"
2542 was already matched, and greediness.
2544 4) After A matches, the same WHILEM node is hit again.
2546 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2547 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2548 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2549 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2550 of the external loop.
2552 Currently present infoblocks form a tree with a stem formed by PL_curcc
2553 and whatever it mentions via ->next, and additional attached trees
2554 corresponding to temporarily unset infoblocks as in "5" above.
2556 In the following picture infoblocks for outer loop of
2557 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2558 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2559 infoblocks are drawn below the "reset" infoblock.
2561 In fact in the picture below we do not show failed matches for Z and T
2562 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2563 more obvious *why* one needs to *temporary* unset infoblocks.]
2565 Matched REx position InfoBlocks Comment
2569 Y A)*?Z)*?T x <- O <- I
2570 YA )*?Z)*?T x <- O <- I
2571 YA A)*?Z)*?T x <- O <- I
2572 YAA )*?Z)*?T x <- O <- I
2573 YAA Z)*?T x <- O # Temporary unset I
2576 YAAZ Y(A)*?Z)*?T x <- O
2579 YAAZY (A)*?Z)*?T x <- O
2582 YAAZY A)*?Z)*?T x <- O <- I
2585 YAAZYA )*?Z)*?T x <- O <- I
2588 YAAZYA Z)*?T x <- O # Temporary unset I
2594 YAAZYAZ T x # Temporary unset O
2601 *******************************************************************/
2604 CHECKPOINT cp = PL_savestack_ix;
2605 /* No need to save/restore up to this paren */
2606 I32 parenfloor = scan->flags;
2608 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2610 cc.oldcc = PL_regcc;
2612 /* XXXX Probably it is better to teach regpush to support
2613 parenfloor > PL_regsize... */
2614 if (parenfloor > *PL_reglastparen)
2615 parenfloor = *PL_reglastparen; /* Pessimization... */
2616 cc.parenfloor = parenfloor;
2618 cc.min = ARG1(scan);
2619 cc.max = ARG2(scan);
2620 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
2624 PL_reginput = locinput;
2625 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2627 PL_regcc = cc.oldcc;
2633 * This is really hard to understand, because after we match
2634 * what we're trying to match, we must make sure the rest of
2635 * the REx is going to match for sure, and to do that we have
2636 * to go back UP the parse tree by recursing ever deeper. And
2637 * if it fails, we have to reset our parent's current state
2638 * that we can try again after backing off.
2641 CHECKPOINT cp, lastcp;
2642 CURCUR* cc = PL_regcc;
2643 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2645 n = cc->cur + 1; /* how many we know we matched */
2646 PL_reginput = locinput;
2649 PerlIO_printf(Perl_debug_log,
2650 "%*s %ld out of %ld..%ld cc=%lx\n",
2651 REPORT_CODE_OFF+PL_regindent*2, "",
2652 (long)n, (long)cc->min,
2653 (long)cc->max, (long)cc)
2656 /* If degenerate scan matches "", assume scan done. */
2658 if (locinput == cc->lastloc && n >= cc->min) {
2659 PL_regcc = cc->oldcc;
2663 PerlIO_printf(Perl_debug_log,
2664 "%*s empty match detected, try continuation...\n",
2665 REPORT_CODE_OFF+PL_regindent*2, "")
2667 if (regmatch(cc->next))
2675 /* First just match a string of min scans. */
2679 cc->lastloc = locinput;
2680 if (regmatch(cc->scan))
2683 cc->lastloc = lastloc;
2688 /* Check whether we already were at this position.
2689 Postpone detection until we know the match is not
2690 *that* much linear. */
2691 if (!PL_reg_maxiter) {
2692 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2693 PL_reg_leftiter = PL_reg_maxiter;
2695 if (PL_reg_leftiter-- == 0) {
2696 I32 size = (PL_reg_maxiter + 7)/8;
2697 if (PL_reg_poscache) {
2698 if (PL_reg_poscache_size < size) {
2699 Renew(PL_reg_poscache, size, char);
2700 PL_reg_poscache_size = size;
2702 Zero(PL_reg_poscache, size, char);
2705 PL_reg_poscache_size = size;
2706 Newz(29, PL_reg_poscache, size, char);
2709 PerlIO_printf(Perl_debug_log,
2710 "%sDetected a super-linear match, switching on caching%s...\n",
2711 PL_colors[4], PL_colors[5])
2714 if (PL_reg_leftiter < 0) {
2715 I32 o = locinput - PL_bostr, b;
2717 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2720 if (PL_reg_poscache[o] & (1<<b)) {
2722 PerlIO_printf(Perl_debug_log,
2723 "%*s already tried at this position...\n",
2724 REPORT_CODE_OFF+PL_regindent*2, "")
2728 PL_reg_poscache[o] |= (1<<b);
2732 /* Prefer next over scan for minimal matching. */
2735 PL_regcc = cc->oldcc;
2738 cp = regcppush(cc->parenfloor);
2740 if (regmatch(cc->next)) {
2742 sayYES; /* All done. */
2744 REGCP_UNWIND(lastcp);
2750 if (n >= cc->max) { /* Maximum greed exceeded? */
2751 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
2752 && !(PL_reg_flags & RF_warned)) {
2753 PL_reg_flags |= RF_warned;
2754 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2755 "Complex regular subexpression recursion",
2762 PerlIO_printf(Perl_debug_log,
2763 "%*s trying longer...\n",
2764 REPORT_CODE_OFF+PL_regindent*2, "")
2766 /* Try scanning more and see if it helps. */
2767 PL_reginput = locinput;
2769 cc->lastloc = locinput;
2770 cp = regcppush(cc->parenfloor);
2772 if (regmatch(cc->scan)) {
2776 REGCP_UNWIND(lastcp);
2779 cc->lastloc = lastloc;
2783 /* Prefer scan over next for maximal matching. */
2785 if (n < cc->max) { /* More greed allowed? */
2786 cp = regcppush(cc->parenfloor);
2788 cc->lastloc = locinput;
2790 if (regmatch(cc->scan)) {
2794 REGCP_UNWIND(lastcp);
2795 regcppop(); /* Restore some previous $<digit>s? */
2796 PL_reginput = locinput;
2798 PerlIO_printf(Perl_debug_log,
2799 "%*s failed, try continuation...\n",
2800 REPORT_CODE_OFF+PL_regindent*2, "")
2803 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
2804 && !(PL_reg_flags & RF_warned)) {
2805 PL_reg_flags |= RF_warned;
2806 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2807 "Complex regular subexpression recursion",
2811 /* Failed deeper matches of scan, so see if this one works. */
2812 PL_regcc = cc->oldcc;
2815 if (regmatch(cc->next))
2821 cc->lastloc = lastloc;
2826 next = scan + ARG(scan);
2829 inner = NEXTOPER(NEXTOPER(scan));
2832 inner = NEXTOPER(scan);
2837 if (OP(next) != c1) /* No choice. */
2838 next = inner; /* Avoid recursion. */
2840 I32 lastparen = *PL_reglastparen;
2842 re_unwind_branch_t *uw;
2844 /* Put unwinding data on stack */
2845 unwind1 = SSNEWt(1,re_unwind_branch_t);
2846 uw = SSPTRt(unwind1,re_unwind_branch_t);
2849 uw->type = ((c1 == BRANCH)
2851 : RE_UNWIND_BRANCHJ);
2852 uw->lastparen = lastparen;
2854 uw->locinput = locinput;
2855 uw->nextchr = nextchr;
2857 uw->regindent = ++PL_regindent;
2860 REGCP_SET(uw->lastcp);
2862 /* Now go into the first branch */
2875 /* We suppose that the next guy does not need
2876 backtracking: in particular, it is of constant length,
2877 and has no parenths to influence future backrefs. */
2878 ln = ARG1(scan); /* min to match */
2879 n = ARG2(scan); /* max to match */
2880 paren = scan->flags;
2882 if (paren > PL_regsize)
2884 if (paren > *PL_reglastparen)
2885 *PL_reglastparen = paren;
2887 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
2889 scan += NEXT_OFF(scan); /* Skip former OPEN. */
2890 PL_reginput = locinput;
2893 if (ln && regrepeat_hard(scan, ln, &l) < ln)
2895 if (ln && l == 0 && n >= ln
2896 /* In fact, this is tricky. If paren, then the
2897 fact that we did/didnot match may influence
2898 future execution. */
2899 && !(paren && ln == 0))
2901 locinput = PL_reginput;
2902 if (PL_regkind[(U8)OP(next)] == EXACT) {
2903 c1 = (U8)*STRING(next);
2904 if (OP(next) == EXACTF)
2906 else if (OP(next) == EXACTFL)
2907 c2 = PL_fold_locale[c1];
2914 /* This may be improved if l == 0. */
2915 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
2916 /* If it could work, try it. */
2918 UCHARAT(PL_reginput) == c1 ||
2919 UCHARAT(PL_reginput) == c2)
2923 PL_regstartp[paren] =
2924 HOPc(PL_reginput, -l) - PL_bostr;
2925 PL_regendp[paren] = PL_reginput - PL_bostr;
2928 PL_regendp[paren] = -1;
2932 REGCP_UNWIND(lastcp);
2934 /* Couldn't or didn't -- move forward. */
2935 PL_reginput = locinput;
2936 if (regrepeat_hard(scan, 1, &l)) {
2938 locinput = PL_reginput;
2945 n = regrepeat_hard(scan, n, &l);
2946 if (n != 0 && l == 0
2947 /* In fact, this is tricky. If paren, then the
2948 fact that we did/didnot match may influence
2949 future execution. */
2950 && !(paren && ln == 0))
2952 locinput = PL_reginput;
2954 PerlIO_printf(Perl_debug_log,
2955 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
2956 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
2960 if (PL_regkind[(U8)OP(next)] == EXACT) {
2961 c1 = (U8)*STRING(next);
2962 if (OP(next) == EXACTF)
2964 else if (OP(next) == EXACTFL)
2965 c2 = PL_fold_locale[c1];
2974 /* If it could work, try it. */
2976 UCHARAT(PL_reginput) == c1 ||
2977 UCHARAT(PL_reginput) == c2)
2980 PerlIO_printf(Perl_debug_log,
2981 "%*s trying tail with n=%"IVdf"...\n",
2982 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
2986 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
2987 PL_regendp[paren] = PL_reginput - PL_bostr;
2990 PL_regendp[paren] = -1;
2994 REGCP_UNWIND(lastcp);
2996 /* Couldn't or didn't -- back up. */
2998 locinput = HOPc(locinput, -l);
2999 PL_reginput = locinput;
3006 paren = scan->flags; /* Which paren to set */
3007 if (paren > PL_regsize)
3009 if (paren > *PL_reglastparen)
3010 *PL_reglastparen = paren;
3011 ln = ARG1(scan); /* min to match */
3012 n = ARG2(scan); /* max to match */
3013 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
3017 ln = ARG1(scan); /* min to match */
3018 n = ARG2(scan); /* max to match */
3019 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
3024 scan = NEXTOPER(scan);
3030 scan = NEXTOPER(scan);
3034 * Lookahead to avoid useless match attempts
3035 * when we know what character comes next.
3037 if (PL_regkind[(U8)OP(next)] == EXACT) {
3038 c1 = (U8)*STRING(next);
3039 if (OP(next) == EXACTF)
3041 else if (OP(next) == EXACTFL)
3042 c2 = PL_fold_locale[c1];
3048 PL_reginput = locinput;
3052 if (ln && regrepeat(scan, ln) < ln)
3054 locinput = PL_reginput;
3057 char *e = locinput + n - ln; /* Should not check after this */
3058 char *old = locinput;
3060 if (e >= PL_regeol || (n == REG_INFTY))
3063 /* Find place 'next' could work */
3065 while (locinput <= e && *locinput != c1)
3068 while (locinput <= e
3075 /* PL_reginput == old now */
3076 if (locinput != old) {
3077 ln = 1; /* Did some */
3078 if (regrepeat(scan, locinput - old) <
3082 /* PL_reginput == locinput now */
3083 TRYPAREN(paren, ln, locinput);
3084 PL_reginput = locinput; /* Could be reset... */
3085 REGCP_UNWIND(lastcp);
3086 /* Couldn't or didn't -- move forward. */
3091 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
3092 /* If it could work, try it. */
3094 UCHARAT(PL_reginput) == c1 ||
3095 UCHARAT(PL_reginput) == c2)
3097 TRYPAREN(paren, n, PL_reginput);
3098 REGCP_UNWIND(lastcp);
3100 /* Couldn't or didn't -- move forward. */
3101 PL_reginput = locinput;
3102 if (regrepeat(scan, 1)) {
3104 locinput = PL_reginput;
3112 n = regrepeat(scan, n);
3113 locinput = PL_reginput;
3114 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
3115 (!PL_multiline || OP(next) == SEOL || OP(next) == EOS)) {
3116 ln = n; /* why back off? */
3117 /* ...because $ and \Z can match before *and* after
3118 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3119 We should back off by one in this case. */
3120 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3126 /* If it could work, try it. */
3128 UCHARAT(PL_reginput) == c1 ||
3129 UCHARAT(PL_reginput) == c2)
3131 TRYPAREN(paren, n, PL_reginput);
3132 REGCP_UNWIND(lastcp);
3134 /* Couldn't or didn't -- back up. */
3136 PL_reginput = locinput = HOPc(locinput, -1);
3141 /* If it could work, try it. */
3143 UCHARAT(PL_reginput) == c1 ||
3144 UCHARAT(PL_reginput) == c2)
3146 TRYPAREN(paren, n, PL_reginput);
3147 REGCP_UNWIND(lastcp);
3149 /* Couldn't or didn't -- back up. */
3151 PL_reginput = locinput = HOPc(locinput, -1);
3158 if (PL_reg_call_cc) {
3159 re_cc_state *cur_call_cc = PL_reg_call_cc;
3160 CURCUR *cctmp = PL_regcc;
3161 regexp *re = PL_reg_re;
3162 CHECKPOINT cp, lastcp;
3164 cp = regcppush(0); /* Save *all* the positions. */
3166 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3168 PL_reginput = locinput; /* Make position available to
3170 cache_re(PL_reg_call_cc->re);
3171 PL_regcc = PL_reg_call_cc->cc;
3172 PL_reg_call_cc = PL_reg_call_cc->prev;
3173 if (regmatch(cur_call_cc->node)) {
3174 PL_reg_call_cc = cur_call_cc;
3178 REGCP_UNWIND(lastcp);
3180 PL_reg_call_cc = cur_call_cc;
3186 PerlIO_printf(Perl_debug_log,
3187 "%*s continuation failed...\n",
3188 REPORT_CODE_OFF+PL_regindent*2, "")
3192 if (locinput < PL_regtill) {
3193 DEBUG_r(PerlIO_printf(Perl_debug_log,
3194 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3196 (long)(locinput - PL_reg_starttry),
3197 (long)(PL_regtill - PL_reg_starttry),
3199 sayNO_FINAL; /* Cannot match: too short. */
3201 PL_reginput = locinput; /* put where regtry can find it */
3202 sayYES_FINAL; /* Success! */
3204 PL_reginput = locinput; /* put where regtry can find it */
3205 sayYES_LOUD; /* Success! */
3208 PL_reginput = locinput;
3213 if (UTF) { /* XXXX This is absolutely
3214 broken, we read before
3216 s = HOPMAYBEc(locinput, -scan->flags);
3222 if (locinput < PL_bostr + scan->flags)
3224 PL_reginput = locinput - scan->flags;
3229 PL_reginput = locinput;
3234 if (UTF) { /* XXXX This is absolutely
3235 broken, we read before
3237 s = HOPMAYBEc(locinput, -scan->flags);
3238 if (!s || s < PL_bostr)
3243 if (locinput < PL_bostr + scan->flags)
3245 PL_reginput = locinput - scan->flags;
3250 PL_reginput = locinput;
3253 inner = NEXTOPER(NEXTOPER(scan));
3254 if (regmatch(inner) != n) {
3269 if (OP(scan) == SUSPEND) {
3270 locinput = PL_reginput;
3271 nextchr = UCHARAT(locinput);
3276 next = scan + ARG(scan);
3281 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
3282 PTR2UV(scan), OP(scan));
3283 Perl_croak(aTHX_ "regexp memory corruption");
3290 * We get here only if there's trouble -- normally "case END" is
3291 * the terminating point.
3293 Perl_croak(aTHX_ "corrupted regexp pointers");
3299 PerlIO_printf(Perl_debug_log,
3300 "%*s %scould match...%s\n",
3301 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3305 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3306 PL_colors[4],PL_colors[5]));
3312 #if 0 /* Breaks $^R */
3320 PerlIO_printf(Perl_debug_log,
3321 "%*s %sfailed...%s\n",
3322 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3328 re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
3331 case RE_UNWIND_BRANCH:
3332 case RE_UNWIND_BRANCHJ:
3334 re_unwind_branch_t *uwb = &(uw->branch);
3335 I32 lastparen = uwb->lastparen;
3337 REGCP_UNWIND(uwb->lastcp);
3338 for (n = *PL_reglastparen; n > lastparen; n--)
3340 *PL_reglastparen = n;
3341 scan = next = uwb->next;
3343 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
3344 ? BRANCH : BRANCHJ) ) { /* Failure */
3351 /* Have more choice yet. Reuse the same uwb. */
3353 if ((n = (uwb->type == RE_UNWIND_BRANCH
3354 ? NEXT_OFF(next) : ARG(next))))
3357 next = NULL; /* XXXX Needn't unwinding in this case... */
3359 next = NEXTOPER(scan);
3360 if (uwb->type == RE_UNWIND_BRANCHJ)
3361 next = NEXTOPER(next);
3362 locinput = uwb->locinput;
3363 nextchr = uwb->nextchr;
3365 PL_regindent = uwb->regindent;
3372 Perl_croak(aTHX_ "regexp unwind memory corruption");
3383 - regrepeat - repeatedly match something simple, report how many
3386 * [This routine now assumes that it will only match on things of length 1.
3387 * That was true before, but now we assume scan - reginput is the count,
3388 * rather than incrementing count on every character. [Er, except utf8.]]
3391 S_regrepeat(pTHX_ regnode *p, I32 max)
3393 register char *scan;
3395 register char *loceol = PL_regeol;
3396 register I32 hardcount = 0;
3397 register bool do_utf8 = DO_UTF8(PL_reg_sv);
3400 if (max != REG_INFTY && max < loceol - scan)
3401 loceol = scan + max;
3404 if (DO_UTF8(PL_reg_sv)) {
3406 while (scan < loceol && *scan != '\n') {
3407 scan += UTF8SKIP(scan);
3411 while (scan < loceol && *scan != '\n')
3416 if (DO_UTF8(PL_reg_sv)) {
3418 while (scan < loceol) {
3419 scan += UTF8SKIP(scan);
3426 case EXACT: /* length of string is 1 */
3428 while (scan < loceol && UCHARAT(scan) == c)
3431 case EXACTF: /* length of string is 1 */
3433 while (scan < loceol &&
3434 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
3437 case EXACTFL: /* length of string is 1 */
3438 PL_reg_flags |= RF_tainted;
3440 while (scan < loceol &&
3441 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
3447 while (scan < loceol && reginclass(p, (U8*)scan, do_utf8)) {
3448 scan += UTF8SKIP(scan);
3452 while (scan < loceol && reginclass(p, (U8*)scan, do_utf8))
3457 if (DO_UTF8(PL_reg_sv)) {
3459 while (scan < loceol && swash_fetch(PL_utf8_alnum, (U8*)scan)) {
3460 scan += UTF8SKIP(scan);
3464 while (scan < loceol && isALNUM(*scan))
3469 PL_reg_flags |= RF_tainted;
3470 if (DO_UTF8(PL_reg_sv)) {
3472 while (scan < loceol && isALNUM_LC_utf8((U8*)scan)) {
3473 scan += UTF8SKIP(scan);
3477 while (scan < loceol && isALNUM_LC(*scan))
3482 if (DO_UTF8(PL_reg_sv)) {
3484 while (scan < loceol && !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
3485 scan += UTF8SKIP(scan);
3489 while (scan < loceol && !isALNUM(*scan))
3494 PL_reg_flags |= RF_tainted;
3495 if (DO_UTF8(PL_reg_sv)) {
3497 while (scan < loceol && !isALNUM_LC_utf8((U8*)scan)) {
3498 scan += UTF8SKIP(scan);
3502 while (scan < loceol && !isALNUM_LC(*scan))
3507 if (DO_UTF8(PL_reg_sv)) {
3509 while (scan < loceol &&
3510 (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
3511 scan += UTF8SKIP(scan);
3515 while (scan < loceol && isSPACE(*scan))
3520 PL_reg_flags |= RF_tainted;
3521 if (DO_UTF8(PL_reg_sv)) {
3523 while (scan < loceol &&
3524 (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3525 scan += UTF8SKIP(scan);
3529 while (scan < loceol && isSPACE_LC(*scan))
3534 if (DO_UTF8(PL_reg_sv)) {
3536 while (scan < loceol &&
3537 !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
3538 scan += UTF8SKIP(scan);
3542 while (scan < loceol && !isSPACE(*scan))
3547 PL_reg_flags |= RF_tainted;
3548 if (DO_UTF8(PL_reg_sv)) {
3550 while (scan < loceol &&
3551 !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3552 scan += UTF8SKIP(scan);
3556 while (scan < loceol && !isSPACE_LC(*scan))
3561 if (DO_UTF8(PL_reg_sv)) {
3563 while (scan < loceol && swash_fetch(PL_utf8_digit,(U8*)scan)) {
3564 scan += UTF8SKIP(scan);
3568 while (scan < loceol && isDIGIT(*scan))
3573 if (DO_UTF8(PL_reg_sv)) {
3575 while (scan < loceol && !swash_fetch(PL_utf8_digit,(U8*)scan)) {
3576 scan += UTF8SKIP(scan);
3580 while (scan < loceol && !isDIGIT(*scan))
3584 default: /* Called on something of 0 width. */
3585 break; /* So match right here or not at all. */
3591 c = scan - PL_reginput;
3596 SV *prop = sv_newmortal();
3599 PerlIO_printf(Perl_debug_log,
3600 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
3601 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
3608 - regrepeat_hard - repeatedly match something, report total lenth and length
3610 * The repeater is supposed to have constant length.
3614 S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
3616 register char *scan;
3617 register char *start;
3618 register char *loceol = PL_regeol;
3620 I32 count = 0, res = 1;
3625 start = PL_reginput;
3627 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
3630 while (start < PL_reginput) {
3632 start += UTF8SKIP(start);
3643 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
3645 *lp = l = PL_reginput - start;
3646 if (max != REG_INFTY && l*max < loceol - scan)
3647 loceol = scan + l*max;
3660 - regclass_swash - prepare the utf8 swash
3664 Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** initsvp)
3669 if (PL_regdata && PL_regdata->count) {
3672 if (PL_regdata->what[n] == 's') {
3673 SV *rv = (SV*)PL_regdata->data[n];
3674 AV *av = (AV*)SvRV((SV*)rv);
3677 si = *av_fetch(av, 0, FALSE);
3678 a = av_fetch(av, 1, FALSE);
3682 else if (si && doinit) {
3683 sw = swash_init("utf8", "", si, 1, 0);
3684 (void)av_store(av, 1, sw);
3696 - reginclass - determine if a character falls into a character class
3700 S_reginclass(pTHX_ register regnode *n, register U8* p, register bool do_utf8)
3702 char flags = ANYOF_FLAGS(n);
3705 if (do_utf8 || (flags & ANYOF_UNICODE)) {
3706 if (do_utf8 && !ANYOF_RUNTIME(n)) {
3708 UV c = utf8_to_uv_simple(p, &len);
3710 if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
3715 SV *sw = regclass_swash(n, TRUE, 0);
3718 if (swash_fetch(sw, p))
3720 else if (flags & ANYOF_FOLD) {
3721 U8 tmpbuf[UTF8_MAXLEN+1];
3723 if (flags & ANYOF_LOCALE) {
3724 PL_reg_flags |= RF_tainted;
3725 uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
3728 uv_to_utf8(tmpbuf, toLOWER_utf8(p));
3729 if (swash_fetch(sw, tmpbuf))
3738 if (ANYOF_BITMAP_TEST(n, c))
3740 else if (flags & ANYOF_FOLD) {
3743 if (flags & ANYOF_LOCALE) {
3744 PL_reg_flags |= RF_tainted;
3745 f = PL_fold_locale[c];
3749 if (f != c && ANYOF_BITMAP_TEST(n, f))
3753 if (!match && (flags & ANYOF_CLASS)) {
3754 PL_reg_flags |= RF_tainted;
3756 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
3757 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
3758 (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
3759 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
3760 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
3761 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
3762 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
3763 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
3764 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
3765 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
3766 (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
3767 (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
3768 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
3769 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
3770 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
3771 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
3772 (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
3773 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
3774 (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
3775 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
3776 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
3777 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
3778 (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
3779 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
3780 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
3781 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
3782 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
3783 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
3784 (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
3785 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
3786 ) /* How's that for a conditional? */
3793 return (flags & ANYOF_INVERT) ? !match : match;
3797 S_reghop(pTHX_ U8 *s, I32 off)
3800 while (off-- && s < (U8*)PL_regeol) {
3801 /* XXX could check well-formedness here */
3807 if (s > (U8*)PL_bostr) {
3809 if (UTF8_IS_CONTINUED(*s)) {
3810 while (s > (U8*)PL_bostr && UTF8_IS_CONTINUATION(*s))
3813 /* XXX could check well-formedness here */
3821 S_reghopmaybe(pTHX_ U8* s, I32 off)
3824 while (off-- && s < (U8*)PL_regeol) {
3825 /* XXX could check well-formedness here */
3833 if (s > (U8*)PL_bostr) {
3835 if (UTF8_IS_CONTINUED(*s)) {
3836 while (s > (U8*)PL_bostr && UTF8_IS_CONTINUATION(*s))
3839 /* XXX could check well-formedness here */
3855 restore_pos(pTHXo_ void *arg)
3857 if (PL_reg_eval_set) {
3858 if (PL_reg_oldsaved) {
3859 PL_reg_re->subbeg = PL_reg_oldsaved;
3860 PL_reg_re->sublen = PL_reg_oldsavedlen;
3861 RX_MATCH_COPIED_on(PL_reg_re);
3863 PL_reg_magic->mg_len = PL_reg_oldpos;
3864 PL_reg_eval_set = 0;
3865 PL_curpm = PL_reg_oldcurpm;