5 * "One Ring to rule them all, One Ring to find them..."
8 /* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
12 /* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
17 /* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
22 #ifdef PERL_EXT_RE_BUILD
23 /* need to replace pregcomp et al, so enable that */
24 # ifndef PERL_IN_XSUB_RE
25 # define PERL_IN_XSUB_RE
27 /* need access to debugger hooks */
28 # if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
33 #ifdef PERL_IN_XSUB_RE
34 /* We *really* need to overwrite these symbols: */
35 # define Perl_regexec_flags my_regexec
36 # define Perl_regdump my_regdump
37 # define Perl_regprop my_regprop
38 # define Perl_re_intuit_start my_re_intuit_start
39 /* *These* symbols are masked to allow static link. */
40 # define Perl_pregexec my_pregexec
41 # define Perl_reginitcolors my_reginitcolors
43 # define PERL_NO_GET_CONTEXT
48 * pregcomp and pregexec -- regsub and regerror are not used in perl
50 * Copyright (c) 1986 by University of Toronto.
51 * Written by Henry Spencer. Not derived from licensed software.
53 * Permission is granted to anyone to use this software for any
54 * purpose on any computer system, and to redistribute it freely,
55 * subject to the following restrictions:
57 * 1. The author is not responsible for the consequences of use of
58 * this software, no matter how awful, even if they arise
61 * 2. The origin of this software must not be misrepresented, either
62 * by explicit claim or by omission.
64 * 3. Altered versions must be plainly marked as such, and must not
65 * be misrepresented as being the original software.
67 **** Alterations to Henry's code are...
69 **** Copyright (c) 1991-2000, Larry Wall
71 **** You may distribute under the terms of either the GNU General Public
72 **** License or the Artistic License, as specified in the README file.
74 * Beware that some of this code is subtly aware of the way operator
75 * precedence is structured in regular expressions. Serious changes in
76 * regular-expression syntax might require a total rethink.
79 #define PERL_IN_REGEXEC_C
82 #ifdef PERL_IN_XSUB_RE
83 # if defined(PERL_CAPI) || defined(PERL_OBJECT)
90 #define RF_tainted 1 /* tainted information used? */
91 #define RF_warned 2 /* warned about big count? */
92 #define RF_evaled 4 /* Did an EVAL with setting? */
93 #define RF_utf8 8 /* String contains multibyte chars? */
95 #define UTF (PL_reg_flags & RF_utf8)
97 #define RS_init 1 /* eval environment created */
98 #define RS_set 2 /* replsv value is set */
101 #define STATIC static
108 #define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
109 #define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
111 #define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
112 #define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
113 #define HOP(pos,off) (UTF ? reghop((U8*)pos, off) : (U8*)(pos + off))
114 #define HOPMAYBE(pos,off) (UTF ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
115 #define HOPc(pos,off) ((char*)HOP(pos,off))
116 #define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
118 static void restore_pos(pTHXo_ void *arg);
122 S_regcppush(pTHX_ I32 parenfloor)
124 int retval = PL_savestack_ix;
125 int i = (PL_regsize - parenfloor) * 4;
129 for (p = PL_regsize; p > parenfloor; p--) {
130 SSPUSHINT(PL_regendp[p]);
131 SSPUSHINT(PL_regstartp[p]);
132 SSPUSHPTR(PL_reg_start_tmp[p]);
135 SSPUSHINT(PL_regsize);
136 SSPUSHINT(*PL_reglastparen);
137 SSPUSHPTR(PL_reginput);
139 SSPUSHINT(SAVEt_REGCONTEXT);
143 /* These are needed since we do not localize EVAL nodes: */
144 # define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
145 " Setting an EVAL scope, savestack=%"IVdf"\n", \
146 (IV)PL_savestack_ix)); cp = PL_savestack_ix
148 # define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
149 PerlIO_printf(Perl_debug_log, \
150 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
151 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
160 assert(i == SAVEt_REGCONTEXT);
162 input = (char *) SSPOPPTR;
163 *PL_reglastparen = SSPOPINT;
164 PL_regsize = SSPOPINT;
165 for (i -= 3; i > 0; i -= 4) {
166 paren = (U32)SSPOPINT;
167 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
168 PL_regstartp[paren] = SSPOPINT;
170 if (paren <= *PL_reglastparen)
171 PL_regendp[paren] = tmps;
173 PerlIO_printf(Perl_debug_log,
174 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
175 (UV)paren, (IV)PL_regstartp[paren],
176 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
177 (IV)PL_regendp[paren],
178 (paren > *PL_reglastparen ? "(no)" : ""));
182 if (*PL_reglastparen + 1 <= PL_regnpar) {
183 PerlIO_printf(Perl_debug_log,
184 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
185 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
189 /* It would seem that the similar code in regtry()
190 * already takes care of this, and in fact it is in
191 * a better location to since this code can #if 0-ed out
192 * but the code in regtry() is needed or otherwise tests
193 * requiring null fields (pat.t#187 and split.t#{13,14}
194 * (as of patchlevel 7877) will fail. Then again,
195 * this code seems to be necessary or otherwise
196 * building DynaLoader will fail:
197 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
199 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
200 if (paren > PL_regsize)
201 PL_regstartp[paren] = -1;
202 PL_regendp[paren] = -1;
209 S_regcp_set_to(pTHX_ I32 ss)
211 I32 tmp = PL_savestack_ix;
213 PL_savestack_ix = ss;
215 PL_savestack_ix = tmp;
219 typedef struct re_cc_state
223 struct re_cc_state *prev;
228 #define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
230 #define TRYPAREN(paren, n, input) { \
233 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
234 PL_regendp[paren] = input - PL_bostr; \
237 PL_regendp[paren] = -1; \
239 if (regmatch(next)) \
242 PL_regendp[paren] = -1; \
247 * pregexec and friends
251 - pregexec - match a regexp against a string
254 Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
255 char *strbeg, I32 minend, SV *screamer, U32 nosave)
256 /* strend: pointer to null at end of string */
257 /* strbeg: real beginning of string */
258 /* minend: end of match must be >=minend after stringarg. */
259 /* nosave: For optimizations. */
262 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
263 nosave ? 0 : REXEC_COPY_STR);
267 S_cache_re(pTHX_ regexp *prog)
269 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
271 PL_regprogram = prog->program;
273 PL_regnpar = prog->nparens;
274 PL_regdata = prog->data;
279 * Need to implement the following flags for reg_anch:
281 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
283 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
284 * INTUIT_AUTORITATIVE_ML
285 * INTUIT_ONCE_NOML - Intuit can match in one location only.
288 * Another flag for this function: SECOND_TIME (so that float substrs
289 * with giant delta may be not rechecked).
292 /* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
294 /* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
295 Otherwise, only SvCUR(sv) is used to get strbeg. */
297 /* XXXX We assume that strpos is strbeg unless sv. */
299 /* XXXX Some places assume that there is a fixed substring.
300 An update may be needed if optimizer marks as "INTUITable"
301 RExen without fixed substrings. Similarly, it is assumed that
302 lengths of all the strings are no more than minlen, thus they
303 cannot come from lookahead.
304 (Or minlen should take into account lookahead.) */
306 /* A failure to find a constant substring means that there is no need to make
307 an expensive call to REx engine, thus we celebrate a failure. Similarly,
308 finding a substring too deep into the string means that less calls to
309 regtry() should be needed.
311 REx compiler's optimizer found 4 possible hints:
312 a) Anchored substring;
314 c) Whether we are anchored (beginning-of-line or \G);
315 d) First node (of those at offset 0) which may distingush positions;
316 We use a)b)d) and multiline-part of c), and try to find a position in the
317 string which does not contradict any of them.
320 /* Most of decisions we do here should have been done at compile time.
321 The nodes of the REx which we used for the search should have been
322 deleted from the finite automaton. */
325 Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
326 char *strend, U32 flags, re_scream_pos_data *data)
328 register I32 start_shift;
329 /* Should be nonnegative! */
330 register I32 end_shift;
337 register char *other_last = Nullch; /* other substr checked before this */
338 char *check_at; /* check substr found at this pos */
340 char *i_strpos = strpos;
343 DEBUG_r( if (!PL_colorset) reginitcolors() );
344 DEBUG_r(PerlIO_printf(Perl_debug_log,
345 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
346 PL_colors[4],PL_colors[5],PL_colors[0],
349 (strlen(prog->precomp) > 60 ? "..." : ""),
351 (int)(strend - strpos > 60 ? 60 : strend - strpos),
352 strpos, PL_colors[1],
353 (strend - strpos > 60 ? "..." : ""))
356 if (prog->minlen > strend - strpos) {
357 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
360 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
361 check = prog->check_substr;
362 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
363 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
364 || ( (prog->reganch & ROPT_ANCH_BOL)
365 && !PL_multiline ) ); /* Check after \n? */
368 if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
369 /* SvCUR is not set on references: SvRV and SvPVX overlap */
371 && (strpos != strbeg)) {
372 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
375 if (prog->check_offset_min == prog->check_offset_max) {
376 /* Substring at constant offset from beg-of-str... */
379 PL_regeol = strend; /* Used in HOP() */
380 s = HOPc(strpos, prog->check_offset_min);
382 slen = SvCUR(check); /* >= 1 */
384 if ( strend - s > slen || strend - s < slen - 1
385 || (strend - s == slen && strend[-1] != '\n')) {
386 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
389 /* Now should match s[0..slen-2] */
391 if (slen && (*SvPVX(check) != *s
393 && memNE(SvPVX(check), s, slen)))) {
395 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
399 else if (*SvPVX(check) != *s
400 || ((slen = SvCUR(check)) > 1
401 && memNE(SvPVX(check), s, slen)))
403 goto success_at_start;
406 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
408 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
409 end_shift = prog->minlen - start_shift -
410 CHR_SVLEN(check) + (SvTAIL(check) != 0);
412 I32 end = prog->check_offset_max + CHR_SVLEN(check)
413 - (SvTAIL(check) != 0);
414 I32 eshift = strend - s - end;
416 if (end_shift < eshift)
420 else { /* Can match at random position */
423 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
424 /* Should be nonnegative! */
425 end_shift = prog->minlen - start_shift -
426 CHR_SVLEN(check) + (SvTAIL(check) != 0);
429 #ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
431 Perl_croak(aTHX_ "panic: end_shift");
435 /* Find a possible match in the region s..strend by looking for
436 the "check" substring in the region corrected by start/end_shift. */
437 if (flags & REXEC_SCREAM) {
438 I32 p = -1; /* Internal iterator of scream. */
439 I32 *pp = data ? data->scream_pos : &p;
441 if (PL_screamfirst[BmRARE(check)] >= 0
442 || ( BmRARE(check) == '\n'
443 && (BmPREVIOUS(check) == SvCUR(check) - 1)
445 s = screaminstr(sv, check,
446 start_shift + (s - strbeg), end_shift, pp, 0);
450 *data->scream_olds = s;
453 s = fbm_instr((unsigned char*)s + start_shift,
454 (unsigned char*)strend - end_shift,
455 check, PL_multiline ? FBMrf_MULTILINE : 0);
457 /* Update the count-of-usability, remove useless subpatterns,
460 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
461 (s ? "Found" : "Did not find"),
462 ((check == prog->anchored_substr) ? "anchored" : "floating"),
464 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
466 PL_colors[1], (SvTAIL(check) ? "$" : ""),
467 (s ? " at offset " : "...\n") ) );
474 /* Finish the diagnostic message */
475 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
477 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
478 Start with the other substr.
479 XXXX no SCREAM optimization yet - and a very coarse implementation
480 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
481 *always* match. Probably should be marked during compile...
482 Probably it is right to do no SCREAM here...
485 if (prog->float_substr && prog->anchored_substr) {
486 /* Take into account the "other" substring. */
487 /* XXXX May be hopelessly wrong for UTF... */
490 if (check == prog->float_substr) {
493 char *last = s - start_shift, *last1, *last2;
497 t = s - prog->check_offset_max;
498 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
499 && (!(prog->reganch & ROPT_UTF8)
500 || (PL_bostr = strpos, /* Used in regcopmaybe() */
501 (t = reghopmaybe_c(s, -(prog->check_offset_max)))
506 t += prog->anchored_offset;
507 if (t < other_last) /* These positions already checked */
510 last2 = last1 = strend - prog->minlen;
513 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
514 /* On end-of-str: see comment below. */
515 s = fbm_instr((unsigned char*)t,
516 (unsigned char*)last1 + prog->anchored_offset
517 + SvCUR(prog->anchored_substr)
518 - (SvTAIL(prog->anchored_substr)!=0),
519 prog->anchored_substr, PL_multiline ? FBMrf_MULTILINE : 0);
520 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s anchored substr `%s%.*s%s'%s",
521 (s ? "Found" : "Contradicts"),
523 (int)(SvCUR(prog->anchored_substr)
524 - (SvTAIL(prog->anchored_substr)!=0)),
525 SvPVX(prog->anchored_substr),
526 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
528 if (last1 >= last2) {
529 DEBUG_r(PerlIO_printf(Perl_debug_log,
530 ", giving up...\n"));
533 DEBUG_r(PerlIO_printf(Perl_debug_log,
534 ", trying floating at offset %ld...\n",
535 (long)(s1 + 1 - i_strpos)));
536 PL_regeol = strend; /* Used in HOP() */
537 other_last = last1 + prog->anchored_offset + 1;
542 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
543 (long)(s - i_strpos)));
544 t = s - prog->anchored_offset;
553 else { /* Take into account the floating substring. */
558 last1 = last = strend - prog->minlen + prog->float_min_offset;
559 if (last - t > prog->float_max_offset)
560 last = t + prog->float_max_offset;
561 s = t + prog->float_min_offset;
564 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
565 /* fbm_instr() takes into account exact value of end-of-str
566 if the check is SvTAIL(ed). Since false positives are OK,
567 and end-of-str is not later than strend we are OK. */
568 s = fbm_instr((unsigned char*)s,
569 (unsigned char*)last + SvCUR(prog->float_substr)
570 - (SvTAIL(prog->float_substr)!=0),
571 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
572 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
573 (s ? "Found" : "Contradicts"),
575 (int)(SvCUR(prog->float_substr)
576 - (SvTAIL(prog->float_substr)!=0)),
577 SvPVX(prog->float_substr),
578 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
581 DEBUG_r(PerlIO_printf(Perl_debug_log,
582 ", giving up...\n"));
585 DEBUG_r(PerlIO_printf(Perl_debug_log,
586 ", trying anchored starting at offset %ld...\n",
587 (long)(s1 + 1 - i_strpos)));
588 other_last = last + 1;
589 PL_regeol = strend; /* Used in HOP() */
594 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
595 (long)(s - i_strpos)));
605 t = s - prog->check_offset_max;
607 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
608 && (!(prog->reganch & ROPT_UTF8)
609 || (PL_bostr = strpos, /* Used in regcopmaybe() */
610 ((t = reghopmaybe_c(s, -(prog->check_offset_max)))
613 /* Fixed substring is found far enough so that the match
614 cannot start at strpos. */
616 if (ml_anch && t[-1] != '\n') {
617 /* Eventually fbm_*() should handle this, but often
618 anchored_offset is not 0, so this check will not be wasted. */
619 /* XXXX In the code below we prefer to look for "^" even in
620 presence of anchored substrings. And we search even
621 beyond the found float position. These pessimizations
622 are historical artefacts only. */
624 while (t < strend - prog->minlen) {
626 if (t < check_at - prog->check_offset_min) {
627 if (prog->anchored_substr) {
628 /* Since we moved from the found position,
629 we definitely contradict the found anchored
630 substr. Due to the above check we do not
631 contradict "check" substr.
632 Thus we can arrive here only if check substr
633 is float. Redo checking for "other"=="fixed".
636 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
637 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
638 goto do_other_anchored;
640 /* We don't contradict the found floating substring. */
641 /* XXXX Why not check for STCLASS? */
643 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
644 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
647 /* Position contradicts check-string */
648 /* XXXX probably better to look for check-string
649 than for "\n", so one should lower the limit for t? */
650 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
651 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
652 other_last = strpos = s = t + 1;
657 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
658 PL_colors[0],PL_colors[1]));
662 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
663 PL_colors[0],PL_colors[1]));
667 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
671 /* The found string does not prohibit matching at strpos,
672 - no optimization of calling REx engine can be performed,
673 unless it was an MBOL and we are not after MBOL,
674 or a future STCLASS check will fail this. */
676 /* Even in this situation we may use MBOL flag if strpos is offset
677 wrt the start of the string. */
678 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
679 && (strpos != strbeg) && strpos[-1] != '\n'
680 /* May be due to an implicit anchor of m{.*foo} */
681 && !(prog->reganch & ROPT_IMPLICIT))
686 DEBUG_r( if (ml_anch)
687 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
688 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
691 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
692 && prog->check_substr /* Could be deleted already */
693 && --BmUSEFUL(prog->check_substr) < 0
694 && prog->check_substr == prog->float_substr)
696 /* If flags & SOMETHING - do not do it many times on the same match */
697 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
698 SvREFCNT_dec(prog->check_substr);
699 prog->check_substr = Nullsv; /* disable */
700 prog->float_substr = Nullsv; /* clear */
701 check = Nullsv; /* abort */
703 /* XXXX This is a remnant of the old implementation. It
704 looks wasteful, since now INTUIT can use many
706 prog->reganch &= ~RE_USE_INTUIT;
713 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
714 if (prog->regstclass) {
715 /* minlen == 0 is possible if regstclass is \b or \B,
716 and the fixed substr is ''$.
717 Since minlen is already taken into account, s+1 is before strend;
718 accidentally, minlen >= 1 guaranties no false positives at s + 1
719 even for \b or \B. But (minlen? 1 : 0) below assumes that
720 regstclass does not come from lookahead... */
721 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
722 This leaves EXACTF only, which is dealt with in find_byclass(). */
723 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
724 ? STR_LEN(prog->regstclass)
726 char *endpos = (prog->anchored_substr || ml_anch)
727 ? s + (prog->minlen? cl_l : 0)
728 : (prog->float_substr ? check_at - start_shift + cl_l
730 char *startpos = strbeg;
733 if (prog->reganch & ROPT_UTF8) {
734 PL_regdata = prog->data;
737 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
742 if (endpos == strend) {
743 DEBUG_r( PerlIO_printf(Perl_debug_log,
744 "Could not match STCLASS...\n") );
747 DEBUG_r( PerlIO_printf(Perl_debug_log,
748 "This position contradicts STCLASS...\n") );
749 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
751 /* Contradict one of substrings */
752 if (prog->anchored_substr) {
753 if (prog->anchored_substr == check) {
754 DEBUG_r( what = "anchored" );
756 PL_regeol = strend; /* Used in HOP() */
758 if (s + start_shift + end_shift > strend) {
759 /* XXXX Should be taken into account earlier? */
760 DEBUG_r( PerlIO_printf(Perl_debug_log,
761 "Could not match STCLASS...\n") );
766 DEBUG_r( PerlIO_printf(Perl_debug_log,
767 "Looking for %s substr starting at offset %ld...\n",
768 what, (long)(s + start_shift - i_strpos)) );
771 /* Have both, check_string is floating */
772 if (t + start_shift >= check_at) /* Contradicts floating=check */
773 goto retry_floating_check;
774 /* Recheck anchored substring, but not floating... */
778 DEBUG_r( PerlIO_printf(Perl_debug_log,
779 "Looking for anchored substr starting at offset %ld...\n",
780 (long)(other_last - i_strpos)) );
781 goto do_other_anchored;
783 /* Another way we could have checked stclass at the
784 current position only: */
789 DEBUG_r( PerlIO_printf(Perl_debug_log,
790 "Looking for /%s^%s/m starting at offset %ld...\n",
791 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
794 if (!prog->float_substr) /* Could have been deleted */
796 /* Check is floating subtring. */
797 retry_floating_check:
798 t = check_at - start_shift;
799 DEBUG_r( what = "floating" );
800 goto hop_and_restart;
803 PerlIO_printf(Perl_debug_log,
804 "By STCLASS: moving %ld --> %ld\n",
805 (long)(t - i_strpos), (long)(s - i_strpos));
807 PerlIO_printf(Perl_debug_log,
808 "Does not contradict STCLASS...\n") );
811 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
812 PL_colors[4], (check ? "Guessed" : "Giving up"),
813 PL_colors[5], (long)(s - i_strpos)) );
816 fail_finish: /* Substring not found */
817 if (prog->check_substr) /* could be removed already */
818 BmUSEFUL(prog->check_substr) += 5; /* hooray */
820 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
821 PL_colors[4],PL_colors[5]));
825 /* We know what class REx starts with. Try to find this position... */
827 S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
829 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
835 register I32 tmp = 1; /* Scratch variable? */
836 register bool do_utf8 = DO_UTF8(PL_reg_sv);
838 /* We know what class it must start with. */
842 if (reginclass(c, (U8*)s, do_utf8)) {
843 if (tmp && (norun || regtry(prog, s)))
850 s += do_utf8 ? UTF8SKIP(s) : 1;
863 c2 = PL_fold_locale[c1];
868 e = s; /* Due to minlen logic of intuit() */
869 /* Here it is NOT UTF! */
873 && (ln == 1 || !(OP(c) == EXACTF
875 : ibcmp_locale(s, m, ln)))
876 && (norun || regtry(prog, s)) )
882 if ( (*(U8*)s == c1 || *(U8*)s == c2)
883 && (ln == 1 || !(OP(c) == EXACTF
885 : ibcmp_locale(s, m, ln)))
886 && (norun || regtry(prog, s)) )
893 PL_reg_flags |= RF_tainted;
900 U8 *r = reghop((U8*)s, -1);
902 tmp = (I32)utf8_to_uv(r, s - (char*)r, 0, 0);
904 tmp = ((OP(c) == BOUND ?
905 isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
907 if (tmp == !(OP(c) == BOUND ?
908 swash_fetch(PL_utf8_alnum, (U8*)s) :
909 isALNUM_LC_utf8((U8*)s)))
912 if ((norun || regtry(prog, s)))
919 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
920 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
923 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
925 if ((norun || regtry(prog, s)))
931 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
935 PL_reg_flags |= RF_tainted;
942 U8 *r = reghop((U8*)s, -1);
944 tmp = (I32)utf8_to_uv(r, s - (char*)r, 0, 0);
946 tmp = ((OP(c) == NBOUND ?
947 isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
949 if (tmp == !(OP(c) == NBOUND ?
950 swash_fetch(PL_utf8_alnum, (U8*)s) :
951 isALNUM_LC_utf8((U8*)s)))
953 else if ((norun || regtry(prog, s)))
959 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
960 tmp = ((OP(c) == NBOUND ?
961 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
964 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
966 else if ((norun || regtry(prog, s)))
971 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
977 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
978 if (tmp && (norun || regtry(prog, s)))
991 if (tmp && (norun || regtry(prog, s)))
1003 PL_reg_flags |= RF_tainted;
1005 while (s < strend) {
1006 if (isALNUM_LC_utf8((U8*)s)) {
1007 if (tmp && (norun || regtry(prog, s)))
1018 while (s < strend) {
1019 if (isALNUM_LC(*s)) {
1020 if (tmp && (norun || regtry(prog, s)))
1033 while (s < strend) {
1034 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
1035 if (tmp && (norun || regtry(prog, s)))
1046 while (s < strend) {
1048 if (tmp && (norun || regtry(prog, s)))
1060 PL_reg_flags |= RF_tainted;
1062 while (s < strend) {
1063 if (!isALNUM_LC_utf8((U8*)s)) {
1064 if (tmp && (norun || regtry(prog, s)))
1075 while (s < strend) {
1076 if (!isALNUM_LC(*s)) {
1077 if (tmp && (norun || regtry(prog, s)))
1090 while (s < strend) {
1091 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
1092 if (tmp && (norun || regtry(prog, s)))
1103 while (s < strend) {
1105 if (tmp && (norun || regtry(prog, s)))
1117 PL_reg_flags |= RF_tainted;
1119 while (s < strend) {
1120 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1121 if (tmp && (norun || regtry(prog, s)))
1132 while (s < strend) {
1133 if (isSPACE_LC(*s)) {
1134 if (tmp && (norun || regtry(prog, s)))
1147 while (s < strend) {
1148 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
1149 if (tmp && (norun || regtry(prog, s)))
1160 while (s < strend) {
1162 if (tmp && (norun || regtry(prog, s)))
1174 PL_reg_flags |= RF_tainted;
1176 while (s < strend) {
1177 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1178 if (tmp && (norun || regtry(prog, s)))
1189 while (s < strend) {
1190 if (!isSPACE_LC(*s)) {
1191 if (tmp && (norun || regtry(prog, s)))
1204 while (s < strend) {
1205 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
1206 if (tmp && (norun || regtry(prog, s)))
1217 while (s < strend) {
1219 if (tmp && (norun || regtry(prog, s)))
1231 PL_reg_flags |= RF_tainted;
1233 while (s < strend) {
1234 if (isDIGIT_LC_utf8((U8*)s)) {
1235 if (tmp && (norun || regtry(prog, s)))
1246 while (s < strend) {
1247 if (isDIGIT_LC(*s)) {
1248 if (tmp && (norun || regtry(prog, s)))
1261 while (s < strend) {
1262 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
1263 if (tmp && (norun || regtry(prog, s)))
1274 while (s < strend) {
1276 if (tmp && (norun || regtry(prog, s)))
1288 PL_reg_flags |= RF_tainted;
1290 while (s < strend) {
1291 if (!isDIGIT_LC_utf8((U8*)s)) {
1292 if (tmp && (norun || regtry(prog, s)))
1303 while (s < strend) {
1304 if (!isDIGIT_LC(*s)) {
1305 if (tmp && (norun || regtry(prog, s)))
1317 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1326 - regexec_flags - match a regexp against a string
1329 Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1330 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1331 /* strend: pointer to null at end of string */
1332 /* strbeg: real beginning of string */
1333 /* minend: end of match must be >=minend after stringarg. */
1334 /* data: May be used for some additional optimizations. */
1335 /* nosave: For optimizations. */
1338 register regnode *c;
1339 register char *startpos = stringarg;
1340 I32 minlen; /* must match at least this many chars */
1341 I32 dontbother = 0; /* how many characters not to try at end */
1342 /* I32 start_shift = 0; */ /* Offset of the start to find
1343 constant substr. */ /* CC */
1344 I32 end_shift = 0; /* Same for the end. */ /* CC */
1345 I32 scream_pos = -1; /* Internal iterator of scream. */
1347 SV* oreplsv = GvSV(PL_replgv);
1353 PL_regnarrate = PL_debug & 512;
1356 /* Be paranoid... */
1357 if (prog == NULL || startpos == NULL) {
1358 Perl_croak(aTHX_ "NULL regexp parameter");
1362 minlen = prog->minlen;
1363 if (strend - startpos < minlen) goto phooey;
1365 if (startpos == strbeg) /* is ^ valid at stringarg? */
1368 PL_regprev = (U32)stringarg[-1];
1369 if (!PL_multiline && PL_regprev == '\n')
1370 PL_regprev = '\0'; /* force ^ to NOT match */
1373 /* Check validity of program. */
1374 if (UCHARAT(prog->program) != REG_MAGIC) {
1375 Perl_croak(aTHX_ "corrupted regexp program");
1379 PL_reg_eval_set = 0;
1382 if (prog->reganch & ROPT_UTF8)
1383 PL_reg_flags |= RF_utf8;
1385 /* Mark beginning of line for ^ and lookbehind. */
1386 PL_regbol = startpos;
1390 /* Mark end of line for $ (and such) */
1393 /* see how far we have to get to not match where we matched before */
1394 PL_regtill = startpos+minend;
1396 /* We start without call_cc context. */
1399 /* If there is a "must appear" string, look for it. */
1402 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1405 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1406 PL_reg_ganch = startpos;
1407 else if (sv && SvTYPE(sv) >= SVt_PVMG
1409 && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) {
1410 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1411 if (prog->reganch & ROPT_ANCH_GPOS) {
1412 if (s > PL_reg_ganch)
1417 else /* pos() not defined */
1418 PL_reg_ganch = strbeg;
1421 if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
1422 re_scream_pos_data d;
1424 d.scream_olds = &scream_olds;
1425 d.scream_pos = &scream_pos;
1426 s = re_intuit_start(prog, sv, s, strend, flags, &d);
1428 goto phooey; /* not present */
1431 DEBUG_r( if (!PL_colorset) reginitcolors() );
1432 DEBUG_r(PerlIO_printf(Perl_debug_log,
1433 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1434 PL_colors[4],PL_colors[5],PL_colors[0],
1437 (strlen(prog->precomp) > 60 ? "..." : ""),
1439 (int)(strend - startpos > 60 ? 60 : strend - startpos),
1440 startpos, PL_colors[1],
1441 (strend - startpos > 60 ? "..." : ""))
1444 /* Simplest case: anchored match need be tried only once. */
1445 /* [unless only anchor is BOL and multiline is set] */
1446 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1447 if (s == startpos && regtry(prog, startpos))
1449 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1450 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1455 dontbother = minlen - 1;
1456 end = HOPc(strend, -dontbother) - 1;
1457 /* for multiline we only have to try after newlines */
1458 if (prog->check_substr) {
1462 if (regtry(prog, s))
1467 if (prog->reganch & RE_USE_INTUIT) {
1468 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1479 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1480 if (regtry(prog, s))
1487 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1488 if (regtry(prog, PL_reg_ganch))
1493 /* Messy cases: unanchored match. */
1494 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
1495 /* we have /x+whatever/ */
1496 /* it must be a one character string (XXXX Except UTF?) */
1497 char ch = SvPVX(prog->anchored_substr)[0];
1503 while (s < strend) {
1505 DEBUG_r( did_match = 1 );
1506 if (regtry(prog, s)) goto got_it;
1508 while (s < strend && *s == ch)
1515 while (s < strend) {
1517 DEBUG_r( did_match = 1 );
1518 if (regtry(prog, s)) goto got_it;
1520 while (s < strend && *s == ch)
1526 DEBUG_r(did_match ||
1527 PerlIO_printf(Perl_debug_log,
1528 "Did not find anchored character...\n"));
1531 else if (prog->anchored_substr != Nullsv
1532 || (prog->float_substr != Nullsv
1533 && prog->float_max_offset < strend - s)) {
1534 SV *must = prog->anchored_substr
1535 ? prog->anchored_substr : prog->float_substr;
1537 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
1539 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
1540 char *last = HOPc(strend, /* Cannot start after this */
1541 -(I32)(CHR_SVLEN(must)
1542 - (SvTAIL(must) != 0) + back_min));
1543 char *last1; /* Last position checked before */
1549 last1 = HOPc(s, -1);
1551 last1 = s - 1; /* bogus */
1553 /* XXXX check_substr already used to find `s', can optimize if
1554 check_substr==must. */
1556 dontbother = end_shift;
1557 strend = HOPc(strend, -dontbother);
1558 while ( (s <= last) &&
1559 ((flags & REXEC_SCREAM)
1560 ? (s = screaminstr(sv, must, HOPc(s, back_min) - strbeg,
1561 end_shift, &scream_pos, 0))
1562 : (s = fbm_instr((unsigned char*)HOP(s, back_min),
1563 (unsigned char*)strend, must,
1564 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
1565 DEBUG_r( did_match = 1 );
1566 if (HOPc(s, -back_max) > last1) {
1567 last1 = HOPc(s, -back_min);
1568 s = HOPc(s, -back_max);
1571 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1573 last1 = HOPc(s, -back_min);
1577 while (s <= last1) {
1578 if (regtry(prog, s))
1584 while (s <= last1) {
1585 if (regtry(prog, s))
1591 DEBUG_r(did_match ||
1592 PerlIO_printf(Perl_debug_log, "Did not find %s substr `%s%.*s%s'%s...\n",
1593 ((must == prog->anchored_substr)
1594 ? "anchored" : "floating"),
1596 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1598 PL_colors[1], (SvTAIL(must) ? "$" : "")));
1601 else if ((c = prog->regstclass)) {
1602 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1603 /* don't bother with what can't match */
1604 strend = HOPc(strend, -(minlen - 1));
1606 SV *prop = sv_newmortal();
1608 PerlIO_printf(Perl_debug_log, "Matching stclass `%s' against `%s'\n", SvPVX(prop), s);
1610 if (find_byclass(prog, c, s, strend, startpos, 0))
1612 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
1616 if (prog->float_substr != Nullsv) { /* Trim the end. */
1619 if (flags & REXEC_SCREAM) {
1620 last = screaminstr(sv, prog->float_substr, s - strbeg,
1621 end_shift, &scream_pos, 1); /* last one */
1623 last = scream_olds; /* Only one occurrence. */
1627 char *little = SvPV(prog->float_substr, len);
1629 if (SvTAIL(prog->float_substr)) {
1630 if (memEQ(strend - len + 1, little, len - 1))
1631 last = strend - len + 1;
1632 else if (!PL_multiline)
1633 last = memEQ(strend - len, little, len)
1634 ? strend - len : Nullch;
1640 last = rninstr(s, strend, little, little + len);
1642 last = strend; /* matching `$' */
1646 DEBUG_r(PerlIO_printf(Perl_debug_log,
1647 "%sCan't trim the tail, match fails (should not happen)%s\n",
1648 PL_colors[4],PL_colors[5]));
1649 goto phooey; /* Should not happen! */
1651 dontbother = strend - last + prog->float_min_offset;
1653 if (minlen && (dontbother < minlen))
1654 dontbother = minlen - 1;
1655 strend -= dontbother; /* this one's always in bytes! */
1656 /* We don't know much -- general case. */
1659 if (regtry(prog, s))
1668 if (regtry(prog, s))
1670 } while (s++ < strend);
1678 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1680 if (PL_reg_eval_set) {
1681 /* Preserve the current value of $^R */
1682 if (oreplsv != GvSV(PL_replgv))
1683 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1684 restored, the value remains
1686 restore_pos(aTHXo_ 0);
1689 /* make sure $`, $&, $', and $digit will work later */
1690 if ( !(flags & REXEC_NOT_FIRST) ) {
1691 if (RX_MATCH_COPIED(prog)) {
1692 Safefree(prog->subbeg);
1693 RX_MATCH_COPIED_off(prog);
1695 if (flags & REXEC_COPY_STR) {
1696 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1698 s = savepvn(strbeg, i);
1701 RX_MATCH_COPIED_on(prog);
1704 prog->subbeg = strbeg;
1705 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1712 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1713 PL_colors[4],PL_colors[5]));
1714 if (PL_reg_eval_set)
1715 restore_pos(aTHXo_ 0);
1720 - regtry - try match at specific point
1722 STATIC I32 /* 0 failure, 1 success */
1723 S_regtry(pTHX_ regexp *prog, char *startpos)
1731 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
1733 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1736 PL_reg_eval_set = RS_init;
1738 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1739 (IV)(PL_stack_sp - PL_stack_base));
1741 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
1742 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1743 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1745 /* Apparently this is not needed, judging by wantarray. */
1746 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
1747 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1750 /* Make $_ available to executed code. */
1751 if (PL_reg_sv != DEFSV) {
1752 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1757 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
1758 && (mg = mg_find(PL_reg_sv, 'g')))) {
1759 /* prepare for quick setting of pos */
1760 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
1761 mg = mg_find(PL_reg_sv, 'g');
1765 PL_reg_oldpos = mg->mg_len;
1766 SAVEDESTRUCTOR_X(restore_pos, 0);
1769 Newz(22,PL_reg_curpm, 1, PMOP);
1770 PL_reg_curpm->op_pmregexp = prog;
1771 PL_reg_oldcurpm = PL_curpm;
1772 PL_curpm = PL_reg_curpm;
1773 if (RX_MATCH_COPIED(prog)) {
1774 /* Here is a serious problem: we cannot rewrite subbeg,
1775 since it may be needed if this match fails. Thus
1776 $` inside (?{}) could fail... */
1777 PL_reg_oldsaved = prog->subbeg;
1778 PL_reg_oldsavedlen = prog->sublen;
1779 RX_MATCH_COPIED_off(prog);
1782 PL_reg_oldsaved = Nullch;
1783 prog->subbeg = PL_bostr;
1784 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1786 prog->startp[0] = startpos - PL_bostr;
1787 PL_reginput = startpos;
1788 PL_regstartp = prog->startp;
1789 PL_regendp = prog->endp;
1790 PL_reglastparen = &prog->lastparen;
1791 prog->lastparen = 0;
1793 DEBUG_r(PL_reg_starttry = startpos);
1794 if (PL_reg_start_tmpl <= prog->nparens) {
1795 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1796 if(PL_reg_start_tmp)
1797 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1799 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1802 /* XXXX What this code is doing here?!!! There should be no need
1803 to do this again and again, PL_reglastparen should take care of
1806 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
1807 * Actually, the code in regcppop() (which Ilya may be meaning by
1808 * PL_reglastparen), is not needed at all by the test suite
1809 * (op/regexp, op/pat, op/split), but that code is needed, oddly
1810 * enough, for building DynaLoader, or otherwise this
1811 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
1812 * will happen. Meanwhile, this code *is* needed for the
1813 * above-mentioned test suite tests to succeed. The common theme
1814 * on those tests seems to be returning null fields from matches.
1819 if (prog->nparens) {
1820 for (i = prog->nparens; i > *PL_reglastparen; i--) {
1827 if (regmatch(prog->program + 1)) {
1828 prog->endp[0] = PL_reginput - PL_bostr;
1831 REGCP_UNWIND(lastcp);
1835 #define RE_UNWIND_BRANCH 1
1836 #define RE_UNWIND_BRANCHJ 2
1840 typedef struct { /* XX: makes sense to enlarge it... */
1844 } re_unwind_generic_t;
1857 } re_unwind_branch_t;
1859 typedef union re_unwind_t {
1861 re_unwind_generic_t generic;
1862 re_unwind_branch_t branch;
1866 - regmatch - main matching routine
1868 * Conceptually the strategy is simple: check to see whether the current
1869 * node matches, call self recursively to see whether the rest matches,
1870 * and then act accordingly. In practice we make some effort to avoid
1871 * recursion, in particular by going through "ordinary" nodes (that don't
1872 * need to know whether the rest of the match failed) by a loop instead of
1875 /* [lwall] I've hoisted the register declarations to the outer block in order to
1876 * maybe save a little bit of pushing and popping on the stack. It also takes
1877 * advantage of machines that use a register save mask on subroutine entry.
1879 STATIC I32 /* 0 failure, 1 success */
1880 S_regmatch(pTHX_ regnode *prog)
1882 register regnode *scan; /* Current node. */
1883 regnode *next; /* Next node. */
1884 regnode *inner; /* Next node in internal branch. */
1885 register I32 nextchr; /* renamed nextchr - nextchar colides with
1886 function of same name */
1887 register I32 n; /* no or next */
1888 register I32 ln; /* len or last */
1889 register char *s; /* operand or save */
1890 register char *locinput = PL_reginput;
1891 register I32 c1, c2, paren; /* case fold search, parenth */
1892 int minmod = 0, sw = 0, logical = 0;
1894 I32 firstcp = PL_savestack_ix;
1895 register bool do_utf8 = DO_UTF8(PL_reg_sv);
1901 /* Note that nextchr is a byte even in UTF */
1902 nextchr = UCHARAT(locinput);
1904 while (scan != NULL) {
1905 #define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
1907 # define sayYES goto yes
1908 # define sayNO goto no
1909 # define sayYES_FINAL goto yes_final
1910 # define sayYES_LOUD goto yes_loud
1911 # define sayNO_FINAL goto no_final
1912 # define sayNO_SILENT goto do_no
1913 # define saySAME(x) if (x) goto yes; else goto no
1914 # define REPORT_CODE_OFF 24
1916 # define sayYES return 1
1917 # define sayNO return 0
1918 # define sayYES_FINAL return 1
1919 # define sayYES_LOUD return 1
1920 # define sayNO_FINAL return 0
1921 # define sayNO_SILENT return 0
1922 # define saySAME(x) return x
1925 SV *prop = sv_newmortal();
1926 int docolor = *PL_colors[0];
1927 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1928 int l = (PL_regeol - locinput > taill ? taill : PL_regeol - locinput);
1929 /* The part of the string before starttry has one color
1930 (pref0_len chars), between starttry and current
1931 position another one (pref_len - pref0_len chars),
1932 after the current position the third one.
1933 We assume that pref0_len <= pref_len, otherwise we
1934 decrease pref0_len. */
1935 int pref_len = (locinput - PL_bostr > (5 + taill) - l
1936 ? (5 + taill) - l : locinput - PL_bostr);
1937 int pref0_len = pref_len - (locinput - PL_reg_starttry);
1939 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
1940 l = ( PL_regeol - locinput > (5 + taill) - pref_len
1941 ? (5 + taill) - pref_len : PL_regeol - locinput);
1944 if (pref0_len > pref_len)
1945 pref0_len = pref_len;
1946 regprop(prop, scan);
1947 PerlIO_printf(Perl_debug_log,
1948 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
1949 (IV)(locinput - PL_bostr),
1950 PL_colors[4], pref0_len,
1951 locinput - pref_len, PL_colors[5],
1952 PL_colors[2], pref_len - pref0_len,
1953 locinput - pref_len + pref0_len, PL_colors[3],
1954 (docolor ? "" : "> <"),
1955 PL_colors[0], l, locinput, PL_colors[1],
1956 15 - l - pref_len + 1,
1958 (IV)(scan - PL_regprogram), PL_regindent*2, "",
1962 next = scan + NEXT_OFF(scan);
1968 if (locinput == PL_bostr
1969 ? PL_regprev == '\n'
1971 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1973 /* regtill = regbol; */
1978 if (locinput == PL_bostr
1979 ? PL_regprev == '\n'
1980 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1986 if (locinput == PL_bostr)
1990 if (locinput == PL_reg_ganch)
2000 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2005 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2007 if (PL_regeol - locinput > 1)
2011 if (PL_regeol != locinput)
2015 if (DO_UTF8(PL_reg_sv)) {
2016 locinput += PL_utf8skip[nextchr];
2017 if (locinput > PL_regeol)
2019 nextchr = UCHARAT(locinput);
2022 if (!nextchr && locinput >= PL_regeol)
2024 nextchr = UCHARAT(++locinput);
2027 if (DO_UTF8(PL_reg_sv)) {
2028 locinput += PL_utf8skip[nextchr];
2029 if (locinput > PL_regeol)
2031 nextchr = UCHARAT(locinput);
2034 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2036 nextchr = UCHARAT(++locinput);
2041 /* Inline the first character, for speed. */
2042 if (UCHARAT(s) != nextchr)
2044 if (PL_regeol - locinput < ln)
2046 if (ln > 1 && memNE(s, locinput, ln))
2049 nextchr = UCHARAT(locinput);
2052 PL_reg_flags |= RF_tainted;
2061 c1 = OP(scan) == EXACTF;
2065 if (utf8_to_uv((U8*)s, e - s, 0, 0) !=
2067 toLOWER_utf8((U8*)l) :
2068 toLOWER_LC_utf8((U8*)l)))
2076 nextchr = UCHARAT(locinput);
2080 /* Inline the first character, for speed. */
2081 if (UCHARAT(s) != nextchr &&
2082 UCHARAT(s) != ((OP(scan) == EXACTF)
2083 ? PL_fold : PL_fold_locale)[nextchr])
2085 if (PL_regeol - locinput < ln)
2087 if (ln > 1 && (OP(scan) == EXACTF
2088 ? ibcmp(s, locinput, ln)
2089 : ibcmp_locale(s, locinput, ln)))
2092 nextchr = UCHARAT(locinput);
2096 if (!reginclass(scan, (U8*)locinput, do_utf8))
2098 if (locinput >= PL_regeol)
2100 locinput += PL_utf8skip[nextchr];
2101 nextchr = UCHARAT(locinput);
2105 nextchr = UCHARAT(locinput);
2106 if (!reginclass(scan, (U8*)locinput, do_utf8))
2108 if (!nextchr && locinput >= PL_regeol)
2110 nextchr = UCHARAT(++locinput);
2114 PL_reg_flags |= RF_tainted;
2120 if (!(OP(scan) == ALNUM
2121 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2122 : isALNUM_LC_utf8((U8*)locinput)))
2126 locinput += PL_utf8skip[nextchr];
2127 nextchr = UCHARAT(locinput);
2130 if (!(OP(scan) == ALNUM
2131 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
2133 nextchr = UCHARAT(++locinput);
2136 PL_reg_flags |= RF_tainted;
2139 if (!nextchr && locinput >= PL_regeol)
2142 if (OP(scan) == NALNUM
2143 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2144 : isALNUM_LC_utf8((U8*)locinput))
2148 locinput += PL_utf8skip[nextchr];
2149 nextchr = UCHARAT(locinput);
2152 if (OP(scan) == NALNUM
2153 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
2155 nextchr = UCHARAT(++locinput);
2159 PL_reg_flags |= RF_tainted;
2163 /* was last char in word? */
2165 if (locinput == PL_regbol)
2168 U8 *r = reghop((U8*)locinput, -1);
2170 ln = utf8_to_uv(r, s - (char*)r, 0, 0);
2172 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2173 ln = isALNUM_uni(ln);
2174 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
2177 ln = isALNUM_LC_uni(ln);
2178 n = isALNUM_LC_utf8((U8*)locinput);
2182 ln = (locinput != PL_regbol) ?
2183 UCHARAT(locinput - 1) : PL_regprev;
2184 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2186 n = isALNUM(nextchr);
2189 ln = isALNUM_LC(ln);
2190 n = isALNUM_LC(nextchr);
2193 if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2194 OP(scan) == BOUNDL))
2198 PL_reg_flags |= RF_tainted;
2203 if (DO_UTF8(PL_reg_sv)) {
2204 if (nextchr & 0x80) {
2205 if (!(OP(scan) == SPACE
2206 ? swash_fetch(PL_utf8_space, (U8*)locinput)
2207 : isSPACE_LC_utf8((U8*)locinput)))
2211 locinput += PL_utf8skip[nextchr];
2212 nextchr = UCHARAT(locinput);
2215 if (!(OP(scan) == SPACE
2216 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2218 nextchr = UCHARAT(++locinput);
2221 if (!(OP(scan) == SPACE
2222 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2224 nextchr = UCHARAT(++locinput);
2228 PL_reg_flags |= RF_tainted;
2231 if (!nextchr && locinput >= PL_regeol)
2233 if (DO_UTF8(PL_reg_sv)) {
2234 if (OP(scan) == NSPACE
2235 ? swash_fetch(PL_utf8_space, (U8*)locinput)
2236 : isSPACE_LC_utf8((U8*)locinput))
2240 locinput += PL_utf8skip[nextchr];
2241 nextchr = UCHARAT(locinput);
2244 if (OP(scan) == NSPACE
2245 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
2247 nextchr = UCHARAT(++locinput);
2250 PL_reg_flags |= RF_tainted;
2255 if (DO_UTF8(PL_reg_sv)) {
2256 if (!(OP(scan) == DIGIT
2257 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2258 : isDIGIT_LC_utf8((U8*)locinput)))
2262 locinput += PL_utf8skip[nextchr];
2263 nextchr = UCHARAT(locinput);
2266 if (!(OP(scan) == DIGIT
2267 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
2269 nextchr = UCHARAT(++locinput);
2272 PL_reg_flags |= RF_tainted;
2275 if (!nextchr && locinput >= PL_regeol)
2277 if (DO_UTF8(PL_reg_sv)) {
2278 if (OP(scan) == NDIGIT
2279 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2280 : isDIGIT_LC_utf8((U8*)locinput))
2284 locinput += PL_utf8skip[nextchr];
2285 nextchr = UCHARAT(locinput);
2288 if (OP(scan) == NDIGIT
2289 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
2291 nextchr = UCHARAT(++locinput);
2294 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
2296 locinput += PL_utf8skip[nextchr];
2297 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
2298 locinput += UTF8SKIP(locinput);
2299 if (locinput > PL_regeol)
2301 nextchr = UCHARAT(locinput);
2304 PL_reg_flags |= RF_tainted;
2308 n = ARG(scan); /* which paren pair */
2309 ln = PL_regstartp[n];
2310 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
2311 if (*PL_reglastparen < n || ln == -1)
2312 sayNO; /* Do not match unless seen CLOSEn. */
2313 if (ln == PL_regendp[n])
2317 if (UTF && OP(scan) != REF) { /* REF can do byte comparison */
2319 char *e = PL_bostr + PL_regendp[n];
2321 * Note that we can't do the "other character" lookup trick as
2322 * in the 8-bit case (no pun intended) because in Unicode we
2323 * have to map both upper and title case to lower case.
2325 if (OP(scan) == REFF) {
2329 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
2339 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
2346 nextchr = UCHARAT(locinput);
2350 /* Inline the first character, for speed. */
2351 if (UCHARAT(s) != nextchr &&
2353 (UCHARAT(s) != ((OP(scan) == REFF
2354 ? PL_fold : PL_fold_locale)[nextchr]))))
2356 ln = PL_regendp[n] - ln;
2357 if (locinput + ln > PL_regeol)
2359 if (ln > 1 && (OP(scan) == REF
2360 ? memNE(s, locinput, ln)
2362 ? ibcmp(s, locinput, ln)
2363 : ibcmp_locale(s, locinput, ln))))
2366 nextchr = UCHARAT(locinput);
2377 OP_4tree *oop = PL_op;
2378 COP *ocurcop = PL_curcop;
2379 SV **ocurpad = PL_curpad;
2383 PL_op = (OP_4tree*)PL_regdata->data[n];
2384 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
2385 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
2386 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
2388 CALLRUNOPS(aTHX); /* Scalar context. */
2394 PL_curpad = ocurpad;
2395 PL_curcop = ocurcop;
2397 if (logical == 2) { /* Postponed subexpression. */
2399 MAGIC *mg = Null(MAGIC*);
2401 CHECKPOINT cp, lastcp;
2403 if(SvROK(ret) || SvRMAGICAL(ret)) {
2404 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2407 mg = mg_find(sv, 'r');
2410 re = (regexp *)mg->mg_obj;
2411 (void)ReREFCNT_inc(re);
2415 char *t = SvPV(ret, len);
2417 char *oprecomp = PL_regprecomp;
2418 I32 osize = PL_regsize;
2419 I32 onpar = PL_regnpar;
2422 pm.op_pmdynflags = (UTF||DO_UTF8(ret) ? PMdf_UTF8 : 0);
2423 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
2425 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
2426 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
2427 PL_regprecomp = oprecomp;
2432 PerlIO_printf(Perl_debug_log,
2433 "Entering embedded `%s%.60s%s%s'\n",
2437 (strlen(re->precomp) > 60 ? "..." : ""))
2440 state.prev = PL_reg_call_cc;
2441 state.cc = PL_regcc;
2442 state.re = PL_reg_re;
2446 cp = regcppush(0); /* Save *all* the positions. */
2449 state.ss = PL_savestack_ix;
2450 *PL_reglastparen = 0;
2451 PL_reg_call_cc = &state;
2452 PL_reginput = locinput;
2454 /* XXXX This is too dramatic a measure... */
2457 if (regmatch(re->program + 1)) {
2458 /* Even though we succeeded, we need to restore
2459 global variables, since we may be wrapped inside
2460 SUSPEND, thus the match may be not finished yet. */
2462 /* XXXX Do this only if SUSPENDed? */
2463 PL_reg_call_cc = state.prev;
2464 PL_regcc = state.cc;
2465 PL_reg_re = state.re;
2466 cache_re(PL_reg_re);
2468 /* XXXX This is too dramatic a measure... */
2471 /* These are needed even if not SUSPEND. */
2477 REGCP_UNWIND(lastcp);
2479 PL_reg_call_cc = state.prev;
2480 PL_regcc = state.cc;
2481 PL_reg_re = state.re;
2482 cache_re(PL_reg_re);
2484 /* XXXX This is too dramatic a measure... */
2493 sv_setsv(save_scalar(PL_replgv), ret);
2497 n = ARG(scan); /* which paren pair */
2498 PL_reg_start_tmp[n] = locinput;
2503 n = ARG(scan); /* which paren pair */
2504 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2505 PL_regendp[n] = locinput - PL_bostr;
2506 if (n > *PL_reglastparen)
2507 *PL_reglastparen = n;
2510 n = ARG(scan); /* which paren pair */
2511 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
2514 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
2516 next = NEXTOPER(NEXTOPER(scan));
2518 next = scan + ARG(scan);
2519 if (OP(next) == IFTHEN) /* Fake one. */
2520 next = NEXTOPER(NEXTOPER(next));
2524 logical = scan->flags;
2526 /*******************************************************************
2527 PL_regcc contains infoblock about the innermost (...)* loop, and
2528 a pointer to the next outer infoblock.
2530 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2532 1) After matching X, regnode for CURLYX is processed;
2534 2) This regnode creates infoblock on the stack, and calls
2535 regmatch() recursively with the starting point at WHILEM node;
2537 3) Each hit of WHILEM node tries to match A and Z (in the order
2538 depending on the current iteration, min/max of {min,max} and
2539 greediness). The information about where are nodes for "A"
2540 and "Z" is read from the infoblock, as is info on how many times "A"
2541 was already matched, and greediness.
2543 4) After A matches, the same WHILEM node is hit again.
2545 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2546 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2547 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2548 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2549 of the external loop.
2551 Currently present infoblocks form a tree with a stem formed by PL_curcc
2552 and whatever it mentions via ->next, and additional attached trees
2553 corresponding to temporarily unset infoblocks as in "5" above.
2555 In the following picture infoblocks for outer loop of
2556 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2557 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2558 infoblocks are drawn below the "reset" infoblock.
2560 In fact in the picture below we do not show failed matches for Z and T
2561 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2562 more obvious *why* one needs to *temporary* unset infoblocks.]
2564 Matched REx position InfoBlocks Comment
2568 Y A)*?Z)*?T x <- O <- I
2569 YA )*?Z)*?T x <- O <- I
2570 YA A)*?Z)*?T x <- O <- I
2571 YAA )*?Z)*?T x <- O <- I
2572 YAA Z)*?T x <- O # Temporary unset I
2575 YAAZ Y(A)*?Z)*?T x <- O
2578 YAAZY (A)*?Z)*?T x <- O
2581 YAAZY A)*?Z)*?T x <- O <- I
2584 YAAZYA )*?Z)*?T x <- O <- I
2587 YAAZYA Z)*?T x <- O # Temporary unset I
2593 YAAZYAZ T x # Temporary unset O
2600 *******************************************************************/
2603 CHECKPOINT cp = PL_savestack_ix;
2604 /* No need to save/restore up to this paren */
2605 I32 parenfloor = scan->flags;
2607 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2609 cc.oldcc = PL_regcc;
2611 /* XXXX Probably it is better to teach regpush to support
2612 parenfloor > PL_regsize... */
2613 if (parenfloor > *PL_reglastparen)
2614 parenfloor = *PL_reglastparen; /* Pessimization... */
2615 cc.parenfloor = parenfloor;
2617 cc.min = ARG1(scan);
2618 cc.max = ARG2(scan);
2619 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
2623 PL_reginput = locinput;
2624 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2626 PL_regcc = cc.oldcc;
2632 * This is really hard to understand, because after we match
2633 * what we're trying to match, we must make sure the rest of
2634 * the REx is going to match for sure, and to do that we have
2635 * to go back UP the parse tree by recursing ever deeper. And
2636 * if it fails, we have to reset our parent's current state
2637 * that we can try again after backing off.
2640 CHECKPOINT cp, lastcp;
2641 CURCUR* cc = PL_regcc;
2642 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2644 n = cc->cur + 1; /* how many we know we matched */
2645 PL_reginput = locinput;
2648 PerlIO_printf(Perl_debug_log,
2649 "%*s %ld out of %ld..%ld cc=%lx\n",
2650 REPORT_CODE_OFF+PL_regindent*2, "",
2651 (long)n, (long)cc->min,
2652 (long)cc->max, (long)cc)
2655 /* If degenerate scan matches "", assume scan done. */
2657 if (locinput == cc->lastloc && n >= cc->min) {
2658 PL_regcc = cc->oldcc;
2662 PerlIO_printf(Perl_debug_log,
2663 "%*s empty match detected, try continuation...\n",
2664 REPORT_CODE_OFF+PL_regindent*2, "")
2666 if (regmatch(cc->next))
2674 /* First just match a string of min scans. */
2678 cc->lastloc = locinput;
2679 if (regmatch(cc->scan))
2682 cc->lastloc = lastloc;
2687 /* Check whether we already were at this position.
2688 Postpone detection until we know the match is not
2689 *that* much linear. */
2690 if (!PL_reg_maxiter) {
2691 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2692 PL_reg_leftiter = PL_reg_maxiter;
2694 if (PL_reg_leftiter-- == 0) {
2695 I32 size = (PL_reg_maxiter + 7)/8;
2696 if (PL_reg_poscache) {
2697 if (PL_reg_poscache_size < size) {
2698 Renew(PL_reg_poscache, size, char);
2699 PL_reg_poscache_size = size;
2701 Zero(PL_reg_poscache, size, char);
2704 PL_reg_poscache_size = size;
2705 Newz(29, PL_reg_poscache, size, char);
2708 PerlIO_printf(Perl_debug_log,
2709 "%sDetected a super-linear match, switching on caching%s...\n",
2710 PL_colors[4], PL_colors[5])
2713 if (PL_reg_leftiter < 0) {
2714 I32 o = locinput - PL_bostr, b;
2716 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2719 if (PL_reg_poscache[o] & (1<<b)) {
2721 PerlIO_printf(Perl_debug_log,
2722 "%*s already tried at this position...\n",
2723 REPORT_CODE_OFF+PL_regindent*2, "")
2727 PL_reg_poscache[o] |= (1<<b);
2731 /* Prefer next over scan for minimal matching. */
2734 PL_regcc = cc->oldcc;
2737 cp = regcppush(cc->parenfloor);
2739 if (regmatch(cc->next)) {
2741 sayYES; /* All done. */
2743 REGCP_UNWIND(lastcp);
2749 if (n >= cc->max) { /* Maximum greed exceeded? */
2750 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
2751 && !(PL_reg_flags & RF_warned)) {
2752 PL_reg_flags |= RF_warned;
2753 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2754 "Complex regular subexpression recursion",
2761 PerlIO_printf(Perl_debug_log,
2762 "%*s trying longer...\n",
2763 REPORT_CODE_OFF+PL_regindent*2, "")
2765 /* Try scanning more and see if it helps. */
2766 PL_reginput = locinput;
2768 cc->lastloc = locinput;
2769 cp = regcppush(cc->parenfloor);
2771 if (regmatch(cc->scan)) {
2775 REGCP_UNWIND(lastcp);
2778 cc->lastloc = lastloc;
2782 /* Prefer scan over next for maximal matching. */
2784 if (n < cc->max) { /* More greed allowed? */
2785 cp = regcppush(cc->parenfloor);
2787 cc->lastloc = locinput;
2789 if (regmatch(cc->scan)) {
2793 REGCP_UNWIND(lastcp);
2794 regcppop(); /* Restore some previous $<digit>s? */
2795 PL_reginput = locinput;
2797 PerlIO_printf(Perl_debug_log,
2798 "%*s failed, try continuation...\n",
2799 REPORT_CODE_OFF+PL_regindent*2, "")
2802 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
2803 && !(PL_reg_flags & RF_warned)) {
2804 PL_reg_flags |= RF_warned;
2805 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2806 "Complex regular subexpression recursion",
2810 /* Failed deeper matches of scan, so see if this one works. */
2811 PL_regcc = cc->oldcc;
2814 if (regmatch(cc->next))
2820 cc->lastloc = lastloc;
2825 next = scan + ARG(scan);
2828 inner = NEXTOPER(NEXTOPER(scan));
2831 inner = NEXTOPER(scan);
2836 if (OP(next) != c1) /* No choice. */
2837 next = inner; /* Avoid recursion. */
2839 I32 lastparen = *PL_reglastparen;
2841 re_unwind_branch_t *uw;
2843 /* Put unwinding data on stack */
2844 unwind1 = SSNEWt(1,re_unwind_branch_t);
2845 uw = SSPTRt(unwind1,re_unwind_branch_t);
2848 uw->type = ((c1 == BRANCH)
2850 : RE_UNWIND_BRANCHJ);
2851 uw->lastparen = lastparen;
2853 uw->locinput = locinput;
2854 uw->nextchr = nextchr;
2856 uw->regindent = ++PL_regindent;
2859 REGCP_SET(uw->lastcp);
2861 /* Now go into the first branch */
2874 /* We suppose that the next guy does not need
2875 backtracking: in particular, it is of constant length,
2876 and has no parenths to influence future backrefs. */
2877 ln = ARG1(scan); /* min to match */
2878 n = ARG2(scan); /* max to match */
2879 paren = scan->flags;
2881 if (paren > PL_regsize)
2883 if (paren > *PL_reglastparen)
2884 *PL_reglastparen = paren;
2886 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
2888 scan += NEXT_OFF(scan); /* Skip former OPEN. */
2889 PL_reginput = locinput;
2892 if (ln && regrepeat_hard(scan, ln, &l) < ln)
2894 if (ln && l == 0 && n >= ln
2895 /* In fact, this is tricky. If paren, then the
2896 fact that we did/didnot match may influence
2897 future execution. */
2898 && !(paren && ln == 0))
2900 locinput = PL_reginput;
2901 if (PL_regkind[(U8)OP(next)] == EXACT) {
2902 c1 = (U8)*STRING(next);
2903 if (OP(next) == EXACTF)
2905 else if (OP(next) == EXACTFL)
2906 c2 = PL_fold_locale[c1];
2913 /* This may be improved if l == 0. */
2914 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
2915 /* If it could work, try it. */
2917 UCHARAT(PL_reginput) == c1 ||
2918 UCHARAT(PL_reginput) == c2)
2922 PL_regstartp[paren] =
2923 HOPc(PL_reginput, -l) - PL_bostr;
2924 PL_regendp[paren] = PL_reginput - PL_bostr;
2927 PL_regendp[paren] = -1;
2931 REGCP_UNWIND(lastcp);
2933 /* Couldn't or didn't -- move forward. */
2934 PL_reginput = locinput;
2935 if (regrepeat_hard(scan, 1, &l)) {
2937 locinput = PL_reginput;
2944 n = regrepeat_hard(scan, n, &l);
2945 if (n != 0 && l == 0
2946 /* In fact, this is tricky. If paren, then the
2947 fact that we did/didnot match may influence
2948 future execution. */
2949 && !(paren && ln == 0))
2951 locinput = PL_reginput;
2953 PerlIO_printf(Perl_debug_log,
2954 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
2955 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
2959 if (PL_regkind[(U8)OP(next)] == EXACT) {
2960 c1 = (U8)*STRING(next);
2961 if (OP(next) == EXACTF)
2963 else if (OP(next) == EXACTFL)
2964 c2 = PL_fold_locale[c1];
2973 /* If it could work, try it. */
2975 UCHARAT(PL_reginput) == c1 ||
2976 UCHARAT(PL_reginput) == c2)
2979 PerlIO_printf(Perl_debug_log,
2980 "%*s trying tail with n=%"IVdf"...\n",
2981 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
2985 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
2986 PL_regendp[paren] = PL_reginput - PL_bostr;
2989 PL_regendp[paren] = -1;
2993 REGCP_UNWIND(lastcp);
2995 /* Couldn't or didn't -- back up. */
2997 locinput = HOPc(locinput, -l);
2998 PL_reginput = locinput;
3005 paren = scan->flags; /* Which paren to set */
3006 if (paren > PL_regsize)
3008 if (paren > *PL_reglastparen)
3009 *PL_reglastparen = paren;
3010 ln = ARG1(scan); /* min to match */
3011 n = ARG2(scan); /* max to match */
3012 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
3016 ln = ARG1(scan); /* min to match */
3017 n = ARG2(scan); /* max to match */
3018 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
3023 scan = NEXTOPER(scan);
3029 scan = NEXTOPER(scan);
3033 * Lookahead to avoid useless match attempts
3034 * when we know what character comes next.
3036 if (PL_regkind[(U8)OP(next)] == EXACT) {
3037 c1 = (U8)*STRING(next);
3038 if (OP(next) == EXACTF)
3040 else if (OP(next) == EXACTFL)
3041 c2 = PL_fold_locale[c1];
3047 PL_reginput = locinput;
3051 if (ln && regrepeat(scan, ln) < ln)
3053 locinput = PL_reginput;
3056 char *e = locinput + n - ln; /* Should not check after this */
3057 char *old = locinput;
3059 if (e >= PL_regeol || (n == REG_INFTY))
3062 /* Find place 'next' could work */
3064 while (locinput <= e && *locinput != c1)
3067 while (locinput <= e
3074 /* PL_reginput == old now */
3075 if (locinput != old) {
3076 ln = 1; /* Did some */
3077 if (regrepeat(scan, locinput - old) <
3081 /* PL_reginput == locinput now */
3082 TRYPAREN(paren, ln, locinput);
3083 PL_reginput = locinput; /* Could be reset... */
3084 REGCP_UNWIND(lastcp);
3085 /* Couldn't or didn't -- move forward. */
3090 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
3091 /* If it could work, try it. */
3093 UCHARAT(PL_reginput) == c1 ||
3094 UCHARAT(PL_reginput) == c2)
3096 TRYPAREN(paren, n, PL_reginput);
3097 REGCP_UNWIND(lastcp);
3099 /* Couldn't or didn't -- move forward. */
3100 PL_reginput = locinput;
3101 if (regrepeat(scan, 1)) {
3103 locinput = PL_reginput;
3111 n = regrepeat(scan, n);
3112 locinput = PL_reginput;
3113 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
3114 (!PL_multiline || OP(next) == SEOL || OP(next) == EOS)) {
3115 ln = n; /* why back off? */
3116 /* ...because $ and \Z can match before *and* after
3117 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3118 We should back off by one in this case. */
3119 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3125 /* If it could work, try it. */
3127 UCHARAT(PL_reginput) == c1 ||
3128 UCHARAT(PL_reginput) == c2)
3130 TRYPAREN(paren, n, PL_reginput);
3131 REGCP_UNWIND(lastcp);
3133 /* Couldn't or didn't -- back up. */
3135 PL_reginput = locinput = HOPc(locinput, -1);
3140 /* If it could work, try it. */
3142 UCHARAT(PL_reginput) == c1 ||
3143 UCHARAT(PL_reginput) == c2)
3145 TRYPAREN(paren, n, PL_reginput);
3146 REGCP_UNWIND(lastcp);
3148 /* Couldn't or didn't -- back up. */
3150 PL_reginput = locinput = HOPc(locinput, -1);
3157 if (PL_reg_call_cc) {
3158 re_cc_state *cur_call_cc = PL_reg_call_cc;
3159 CURCUR *cctmp = PL_regcc;
3160 regexp *re = PL_reg_re;
3161 CHECKPOINT cp, lastcp;
3163 cp = regcppush(0); /* Save *all* the positions. */
3165 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3167 PL_reginput = locinput; /* Make position available to
3169 cache_re(PL_reg_call_cc->re);
3170 PL_regcc = PL_reg_call_cc->cc;
3171 PL_reg_call_cc = PL_reg_call_cc->prev;
3172 if (regmatch(cur_call_cc->node)) {
3173 PL_reg_call_cc = cur_call_cc;
3177 REGCP_UNWIND(lastcp);
3179 PL_reg_call_cc = cur_call_cc;
3185 PerlIO_printf(Perl_debug_log,
3186 "%*s continuation failed...\n",
3187 REPORT_CODE_OFF+PL_regindent*2, "")
3191 if (locinput < PL_regtill) {
3192 DEBUG_r(PerlIO_printf(Perl_debug_log,
3193 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3195 (long)(locinput - PL_reg_starttry),
3196 (long)(PL_regtill - PL_reg_starttry),
3198 sayNO_FINAL; /* Cannot match: too short. */
3200 PL_reginput = locinput; /* put where regtry can find it */
3201 sayYES_FINAL; /* Success! */
3203 PL_reginput = locinput; /* put where regtry can find it */
3204 sayYES_LOUD; /* Success! */
3207 PL_reginput = locinput;
3212 if (UTF) { /* XXXX This is absolutely
3213 broken, we read before
3215 s = HOPMAYBEc(locinput, -scan->flags);
3221 if (locinput < PL_bostr + scan->flags)
3223 PL_reginput = locinput - scan->flags;
3228 PL_reginput = locinput;
3233 if (UTF) { /* XXXX This is absolutely
3234 broken, we read before
3236 s = HOPMAYBEc(locinput, -scan->flags);
3237 if (!s || s < PL_bostr)
3242 if (locinput < PL_bostr + scan->flags)
3244 PL_reginput = locinput - scan->flags;
3249 PL_reginput = locinput;
3252 inner = NEXTOPER(NEXTOPER(scan));
3253 if (regmatch(inner) != n) {
3268 if (OP(scan) == SUSPEND) {
3269 locinput = PL_reginput;
3270 nextchr = UCHARAT(locinput);
3275 next = scan + ARG(scan);
3280 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
3281 PTR2UV(scan), OP(scan));
3282 Perl_croak(aTHX_ "regexp memory corruption");
3289 * We get here only if there's trouble -- normally "case END" is
3290 * the terminating point.
3292 Perl_croak(aTHX_ "corrupted regexp pointers");
3298 PerlIO_printf(Perl_debug_log,
3299 "%*s %scould match...%s\n",
3300 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3304 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3305 PL_colors[4],PL_colors[5]));
3311 #if 0 /* Breaks $^R */
3319 PerlIO_printf(Perl_debug_log,
3320 "%*s %sfailed...%s\n",
3321 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3327 re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
3330 case RE_UNWIND_BRANCH:
3331 case RE_UNWIND_BRANCHJ:
3333 re_unwind_branch_t *uwb = &(uw->branch);
3334 I32 lastparen = uwb->lastparen;
3336 REGCP_UNWIND(uwb->lastcp);
3337 for (n = *PL_reglastparen; n > lastparen; n--)
3339 *PL_reglastparen = n;
3340 scan = next = uwb->next;
3342 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
3343 ? BRANCH : BRANCHJ) ) { /* Failure */
3350 /* Have more choice yet. Reuse the same uwb. */
3352 if ((n = (uwb->type == RE_UNWIND_BRANCH
3353 ? NEXT_OFF(next) : ARG(next))))
3356 next = NULL; /* XXXX Needn't unwinding in this case... */
3358 next = NEXTOPER(scan);
3359 if (uwb->type == RE_UNWIND_BRANCHJ)
3360 next = NEXTOPER(next);
3361 locinput = uwb->locinput;
3362 nextchr = uwb->nextchr;
3364 PL_regindent = uwb->regindent;
3371 Perl_croak(aTHX_ "regexp unwind memory corruption");
3382 - regrepeat - repeatedly match something simple, report how many
3385 * [This routine now assumes that it will only match on things of length 1.
3386 * That was true before, but now we assume scan - reginput is the count,
3387 * rather than incrementing count on every character. [Er, except utf8.]]
3390 S_regrepeat(pTHX_ regnode *p, I32 max)
3392 register char *scan;
3394 register char *loceol = PL_regeol;
3395 register I32 hardcount = 0;
3396 register bool do_utf8 = DO_UTF8(PL_reg_sv);
3399 if (max != REG_INFTY && max < loceol - scan)
3400 loceol = scan + max;
3403 if (DO_UTF8(PL_reg_sv)) {
3405 while (scan < loceol && *scan != '\n') {
3406 scan += UTF8SKIP(scan);
3410 while (scan < loceol && *scan != '\n')
3415 if (DO_UTF8(PL_reg_sv)) {
3417 while (scan < loceol) {
3418 scan += UTF8SKIP(scan);
3425 case EXACT: /* length of string is 1 */
3427 while (scan < loceol && UCHARAT(scan) == c)
3430 case EXACTF: /* length of string is 1 */
3432 while (scan < loceol &&
3433 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
3436 case EXACTFL: /* length of string is 1 */
3437 PL_reg_flags |= RF_tainted;
3439 while (scan < loceol &&
3440 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
3446 while (scan < loceol && reginclass(p, (U8*)scan, do_utf8)) {
3447 scan += UTF8SKIP(scan);
3451 while (scan < loceol && reginclass(p, (U8*)scan, do_utf8))
3456 if (DO_UTF8(PL_reg_sv)) {
3458 while (scan < loceol && swash_fetch(PL_utf8_alnum, (U8*)scan)) {
3459 scan += UTF8SKIP(scan);
3463 while (scan < loceol && isALNUM(*scan))
3468 PL_reg_flags |= RF_tainted;
3469 if (DO_UTF8(PL_reg_sv)) {
3471 while (scan < loceol && isALNUM_LC_utf8((U8*)scan)) {
3472 scan += UTF8SKIP(scan);
3476 while (scan < loceol && isALNUM_LC(*scan))
3481 if (DO_UTF8(PL_reg_sv)) {
3483 while (scan < loceol && !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
3484 scan += UTF8SKIP(scan);
3488 while (scan < loceol && !isALNUM(*scan))
3493 PL_reg_flags |= RF_tainted;
3494 if (DO_UTF8(PL_reg_sv)) {
3496 while (scan < loceol && !isALNUM_LC_utf8((U8*)scan)) {
3497 scan += UTF8SKIP(scan);
3501 while (scan < loceol && !isALNUM_LC(*scan))
3506 if (DO_UTF8(PL_reg_sv)) {
3508 while (scan < loceol &&
3509 (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
3510 scan += UTF8SKIP(scan);
3514 while (scan < loceol && isSPACE(*scan))
3519 PL_reg_flags |= RF_tainted;
3520 if (DO_UTF8(PL_reg_sv)) {
3522 while (scan < loceol &&
3523 (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3524 scan += UTF8SKIP(scan);
3528 while (scan < loceol && isSPACE_LC(*scan))
3533 if (DO_UTF8(PL_reg_sv)) {
3535 while (scan < loceol &&
3536 !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
3537 scan += UTF8SKIP(scan);
3541 while (scan < loceol && !isSPACE(*scan))
3546 PL_reg_flags |= RF_tainted;
3547 if (DO_UTF8(PL_reg_sv)) {
3549 while (scan < loceol &&
3550 !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3551 scan += UTF8SKIP(scan);
3555 while (scan < loceol && !isSPACE_LC(*scan))
3560 if (DO_UTF8(PL_reg_sv)) {
3562 while (scan < loceol && swash_fetch(PL_utf8_digit,(U8*)scan)) {
3563 scan += UTF8SKIP(scan);
3567 while (scan < loceol && isDIGIT(*scan))
3572 if (DO_UTF8(PL_reg_sv)) {
3574 while (scan < loceol && !swash_fetch(PL_utf8_digit,(U8*)scan)) {
3575 scan += UTF8SKIP(scan);
3579 while (scan < loceol && !isDIGIT(*scan))
3583 default: /* Called on something of 0 width. */
3584 break; /* So match right here or not at all. */
3590 c = scan - PL_reginput;
3595 SV *prop = sv_newmortal();
3598 PerlIO_printf(Perl_debug_log,
3599 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
3600 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
3607 - regrepeat_hard - repeatedly match something, report total lenth and length
3609 * The repeater is supposed to have constant length.
3613 S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
3615 register char *scan;
3616 register char *start;
3617 register char *loceol = PL_regeol;
3619 I32 count = 0, res = 1;
3624 start = PL_reginput;
3626 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
3629 while (start < PL_reginput) {
3631 start += UTF8SKIP(start);
3642 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
3644 *lp = l = PL_reginput - start;
3645 if (max != REG_INFTY && l*max < loceol - scan)
3646 loceol = scan + l*max;
3659 - regclass_swash - prepare the utf8 swash
3663 Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** initsvp)
3668 if (PL_regdata && PL_regdata->count) {
3671 if (PL_regdata->what[n] == 's') {
3672 SV *rv = (SV*)PL_regdata->data[n];
3673 AV *av = (AV*)SvRV((SV*)rv);
3676 si = *av_fetch(av, 0, FALSE);
3677 a = av_fetch(av, 1, FALSE);
3681 else if (si && doinit) {
3682 sw = swash_init("utf8", "", si, 1, 0);
3683 (void)av_store(av, 1, sw);
3695 - reginclass - determine if a character falls into a character class
3699 S_reginclass(pTHX_ register regnode *n, register U8* p, register bool do_utf8)
3701 char flags = ANYOF_FLAGS(n);
3704 if (do_utf8 || (flags & ANYOF_UNICODE)) {
3705 if (do_utf8 && !ANYOF_RUNTIME(n)) {
3707 UV c = utf8_to_uv_simple(p, &len);
3709 if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
3714 SV *sw = regclass_swash(n, TRUE, 0);
3717 if (swash_fetch(sw, p))
3719 else if (flags & ANYOF_FOLD) {
3720 U8 tmpbuf[UTF8_MAXLEN+1];
3722 if (flags & ANYOF_LOCALE) {
3723 PL_reg_flags |= RF_tainted;
3724 uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
3727 uv_to_utf8(tmpbuf, toLOWER_utf8(p));
3728 if (swash_fetch(sw, tmpbuf))
3737 if (ANYOF_BITMAP_TEST(n, c))
3739 else if (flags & ANYOF_FOLD) {
3742 if (flags & ANYOF_LOCALE) {
3743 PL_reg_flags |= RF_tainted;
3744 f = PL_fold_locale[c];
3748 if (f != c && ANYOF_BITMAP_TEST(n, f))
3752 if (!match && (flags & ANYOF_CLASS)) {
3753 PL_reg_flags |= RF_tainted;
3755 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
3756 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
3757 (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
3758 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
3759 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
3760 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
3761 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
3762 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
3763 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
3764 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
3765 (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
3766 (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
3767 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
3768 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
3769 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
3770 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
3771 (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
3772 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
3773 (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
3774 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
3775 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
3776 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
3777 (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
3778 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
3779 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
3780 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
3781 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
3782 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
3783 (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
3784 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
3785 ) /* How's that for a conditional? */
3792 return (flags & ANYOF_INVERT) ? !match : match;
3796 S_reghop(pTHX_ U8 *s, I32 off)
3799 while (off-- && s < (U8*)PL_regeol) {
3800 /* XXX could check well-formedness here */
3806 if (s > (U8*)PL_bostr) {
3808 if (UTF8_IS_CONTINUED(*s)) {
3809 while (s > (U8*)PL_bostr && UTF8_IS_CONTINUATION(*s))
3812 /* XXX could check well-formedness here */
3820 S_reghopmaybe(pTHX_ U8* s, I32 off)
3823 while (off-- && s < (U8*)PL_regeol) {
3824 /* XXX could check well-formedness here */
3832 if (s > (U8*)PL_bostr) {
3834 if (UTF8_IS_CONTINUED(*s)) {
3835 while (s > (U8*)PL_bostr && UTF8_IS_CONTINUATION(*s))
3838 /* XXX could check well-formedness here */
3854 restore_pos(pTHXo_ void *arg)
3856 if (PL_reg_eval_set) {
3857 if (PL_reg_oldsaved) {
3858 PL_reg_re->subbeg = PL_reg_oldsaved;
3859 PL_reg_re->sublen = PL_reg_oldsavedlen;
3860 RX_MATCH_COPIED_on(PL_reg_re);
3862 PL_reg_magic->mg_len = PL_reg_oldpos;
3863 PL_reg_eval_set = 0;
3864 PL_curpm = PL_reg_oldcurpm;