5 * "One Ring to rule them all, One Ring to find them..."
8 /* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
12 /* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
17 /* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
22 #ifdef PERL_EXT_RE_BUILD
23 /* need to replace pregcomp et al, so enable that */
24 # ifndef PERL_IN_XSUB_RE
25 # define PERL_IN_XSUB_RE
27 /* need access to debugger hooks */
28 # if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
33 #ifdef PERL_IN_XSUB_RE
34 /* We *really* need to overwrite these symbols: */
35 # define Perl_regexec_flags my_regexec
36 # define Perl_regdump my_regdump
37 # define Perl_regprop my_regprop
38 # define Perl_re_intuit_start my_re_intuit_start
39 /* *These* symbols are masked to allow static link. */
40 # define Perl_pregexec my_pregexec
41 # define Perl_reginitcolors my_reginitcolors
43 # define PERL_NO_GET_CONTEXT
48 * pregcomp and pregexec -- regsub and regerror are not used in perl
50 * Copyright (c) 1986 by University of Toronto.
51 * Written by Henry Spencer. Not derived from licensed software.
53 * Permission is granted to anyone to use this software for any
54 * purpose on any computer system, and to redistribute it freely,
55 * subject to the following restrictions:
57 * 1. The author is not responsible for the consequences of use of
58 * this software, no matter how awful, even if they arise
61 * 2. The origin of this software must not be misrepresented, either
62 * by explicit claim or by omission.
64 * 3. Altered versions must be plainly marked as such, and must not
65 * be misrepresented as being the original software.
67 **** Alterations to Henry's code are...
69 **** Copyright (c) 1991-2000, Larry Wall
71 **** You may distribute under the terms of either the GNU General Public
72 **** License or the Artistic License, as specified in the README file.
74 * Beware that some of this code is subtly aware of the way operator
75 * precedence is structured in regular expressions. Serious changes in
76 * regular-expression syntax might require a total rethink.
79 #define PERL_IN_REGEXEC_C
82 #ifdef PERL_IN_XSUB_RE
83 # if defined(PERL_CAPI) || defined(PERL_OBJECT)
90 #define RF_tainted 1 /* tainted information used? */
91 #define RF_warned 2 /* warned about big count? */
92 #define RF_evaled 4 /* Did an EVAL with setting? */
93 #define RF_utf8 8 /* String contains multibyte chars? */
95 #define UTF (PL_reg_flags & RF_utf8)
97 #define RS_init 1 /* eval environment created */
98 #define RS_set 2 /* replsv value is set */
101 #define STATIC static
108 #define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c) : ANYOF_BITMAP_TEST(p,c))
110 # define REGINCLASSUTF8(f,p) (ARG1(f) ? reginclassutf8(f,p) : swash_fetch(*av_fetch((AV*)SvRV((SV*)PL_regdata->data[ARG2(f)]),0,FALSE),p))
112 # define REGINCLASSUTF8(f,p) (ARG1(f) ? reginclassutf8(f,p) : swash_fetch((SV*)PL_regdata->data[ARG2(f)],p))
115 #define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
116 #define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
118 #define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
119 #define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
120 #define HOP(pos,off) (UTF ? reghop((U8*)pos, off) : (U8*)(pos + off))
121 #define HOPMAYBE(pos,off) (UTF ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
122 #define HOPc(pos,off) ((char*)HOP(pos,off))
123 #define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
125 static void restore_pos(pTHXo_ void *arg);
129 S_regcppush(pTHX_ I32 parenfloor)
131 int retval = PL_savestack_ix;
132 int i = (PL_regsize - parenfloor) * 4;
136 for (p = PL_regsize; p > parenfloor; p--) {
137 SSPUSHINT(PL_regendp[p]);
138 SSPUSHINT(PL_regstartp[p]);
139 SSPUSHPTR(PL_reg_start_tmp[p]);
142 SSPUSHINT(PL_regsize);
143 SSPUSHINT(*PL_reglastparen);
144 SSPUSHPTR(PL_reginput);
146 SSPUSHINT(SAVEt_REGCONTEXT);
150 /* These are needed since we do not localize EVAL nodes: */
151 # define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
152 " Setting an EVAL scope, savestack=%"IVdf"\n", \
153 (IV)PL_savestack_ix)); cp = PL_savestack_ix
155 # define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
156 PerlIO_printf(Perl_debug_log, \
157 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
158 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
167 assert(i == SAVEt_REGCONTEXT);
169 input = (char *) SSPOPPTR;
170 *PL_reglastparen = SSPOPINT;
171 PL_regsize = SSPOPINT;
172 for (i -= 3; i > 0; i -= 4) {
173 paren = (U32)SSPOPINT;
174 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
175 PL_regstartp[paren] = SSPOPINT;
177 if (paren <= *PL_reglastparen)
178 PL_regendp[paren] = tmps;
180 PerlIO_printf(Perl_debug_log,
181 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
182 (UV)paren, (IV)PL_regstartp[paren],
183 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
184 (IV)PL_regendp[paren],
185 (paren > *PL_reglastparen ? "(no)" : ""));
189 if (*PL_reglastparen + 1 <= PL_regnpar) {
190 PerlIO_printf(Perl_debug_log,
191 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
192 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
196 /* It would seem that the similar code in regtry()
197 * already takes care of this, and in fact it is in
198 * a better location to since this code can #if 0-ed out
199 * but the code in regtry() is needed or otherwise tests
200 * requiring null fields (pat.t#187 and split.t#{13,14}
201 * (as of patchlevel 7877) will fail. Then again,
202 * this code seems to be necessary or otherwise
203 * building DynaLoader will fail:
204 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
206 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
207 if (paren > PL_regsize)
208 PL_regstartp[paren] = -1;
209 PL_regendp[paren] = -1;
216 S_regcp_set_to(pTHX_ I32 ss)
218 I32 tmp = PL_savestack_ix;
220 PL_savestack_ix = ss;
222 PL_savestack_ix = tmp;
226 typedef struct re_cc_state
230 struct re_cc_state *prev;
235 #define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
237 #define TRYPAREN(paren, n, input) { \
240 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
241 PL_regendp[paren] = input - PL_bostr; \
244 PL_regendp[paren] = -1; \
246 if (regmatch(next)) \
249 PL_regendp[paren] = -1; \
254 * pregexec and friends
258 - pregexec - match a regexp against a string
261 Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
262 char *strbeg, I32 minend, SV *screamer, U32 nosave)
263 /* strend: pointer to null at end of string */
264 /* strbeg: real beginning of string */
265 /* minend: end of match must be >=minend after stringarg. */
266 /* nosave: For optimizations. */
269 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
270 nosave ? 0 : REXEC_COPY_STR);
274 S_cache_re(pTHX_ regexp *prog)
276 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
278 PL_regprogram = prog->program;
280 PL_regnpar = prog->nparens;
281 PL_regdata = prog->data;
286 * Need to implement the following flags for reg_anch:
288 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
290 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
291 * INTUIT_AUTORITATIVE_ML
292 * INTUIT_ONCE_NOML - Intuit can match in one location only.
295 * Another flag for this function: SECOND_TIME (so that float substrs
296 * with giant delta may be not rechecked).
299 /* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
301 /* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
302 Otherwise, only SvCUR(sv) is used to get strbeg. */
304 /* XXXX We assume that strpos is strbeg unless sv. */
306 /* XXXX Some places assume that there is a fixed substring.
307 An update may be needed if optimizer marks as "INTUITable"
308 RExen without fixed substrings. Similarly, it is assumed that
309 lengths of all the strings are no more than minlen, thus they
310 cannot come from lookahead.
311 (Or minlen should take into account lookahead.) */
313 /* A failure to find a constant substring means that there is no need to make
314 an expensive call to REx engine, thus we celebrate a failure. Similarly,
315 finding a substring too deep into the string means that less calls to
316 regtry() should be needed.
318 REx compiler's optimizer found 4 possible hints:
319 a) Anchored substring;
321 c) Whether we are anchored (beginning-of-line or \G);
322 d) First node (of those at offset 0) which may distingush positions;
323 We use a)b)d) and multiline-part of c), and try to find a position in the
324 string which does not contradict any of them.
327 /* Most of decisions we do here should have been done at compile time.
328 The nodes of the REx which we used for the search should have been
329 deleted from the finite automaton. */
332 Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
333 char *strend, U32 flags, re_scream_pos_data *data)
335 register I32 start_shift;
336 /* Should be nonnegative! */
337 register I32 end_shift;
344 register char *other_last = Nullch; /* other substr checked before this */
345 char *check_at; /* check substr found at this pos */
347 char *i_strpos = strpos;
350 DEBUG_r( if (!PL_colorset) reginitcolors() );
351 DEBUG_r(PerlIO_printf(Perl_debug_log,
352 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
353 PL_colors[4],PL_colors[5],PL_colors[0],
356 (strlen(prog->precomp) > 60 ? "..." : ""),
358 (int)(strend - strpos > 60 ? 60 : strend - strpos),
359 strpos, PL_colors[1],
360 (strend - strpos > 60 ? "..." : ""))
363 if (prog->minlen > strend - strpos) {
364 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
367 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
368 check = prog->check_substr;
369 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
370 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
371 || ( (prog->reganch & ROPT_ANCH_BOL)
372 && !PL_multiline ) ); /* Check after \n? */
375 if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
376 /* SvCUR is not set on references: SvRV and SvPVX overlap */
378 && (strpos != strbeg)) {
379 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
382 if (prog->check_offset_min == prog->check_offset_max) {
383 /* Substring at constant offset from beg-of-str... */
386 PL_regeol = strend; /* Used in HOP() */
387 s = HOPc(strpos, prog->check_offset_min);
389 slen = SvCUR(check); /* >= 1 */
391 if ( strend - s > slen || strend - s < slen - 1
392 || (strend - s == slen && strend[-1] != '\n')) {
393 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
396 /* Now should match s[0..slen-2] */
398 if (slen && (*SvPVX(check) != *s
400 && memNE(SvPVX(check), s, slen)))) {
402 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
406 else if (*SvPVX(check) != *s
407 || ((slen = SvCUR(check)) > 1
408 && memNE(SvPVX(check), s, slen)))
410 goto success_at_start;
413 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
415 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
416 end_shift = prog->minlen - start_shift -
417 CHR_SVLEN(check) + (SvTAIL(check) != 0);
419 I32 end = prog->check_offset_max + CHR_SVLEN(check)
420 - (SvTAIL(check) != 0);
421 I32 eshift = strend - s - end;
423 if (end_shift < eshift)
427 else { /* Can match at random position */
430 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
431 /* Should be nonnegative! */
432 end_shift = prog->minlen - start_shift -
433 CHR_SVLEN(check) + (SvTAIL(check) != 0);
436 #ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
438 Perl_croak(aTHX_ "panic: end_shift");
442 /* Find a possible match in the region s..strend by looking for
443 the "check" substring in the region corrected by start/end_shift. */
444 if (flags & REXEC_SCREAM) {
445 I32 p = -1; /* Internal iterator of scream. */
446 I32 *pp = data ? data->scream_pos : &p;
448 if (PL_screamfirst[BmRARE(check)] >= 0
449 || ( BmRARE(check) == '\n'
450 && (BmPREVIOUS(check) == SvCUR(check) - 1)
452 s = screaminstr(sv, check,
453 start_shift + (s - strbeg), end_shift, pp, 0);
457 *data->scream_olds = s;
460 s = fbm_instr((unsigned char*)s + start_shift,
461 (unsigned char*)strend - end_shift,
462 check, PL_multiline ? FBMrf_MULTILINE : 0);
464 /* Update the count-of-usability, remove useless subpatterns,
467 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
468 (s ? "Found" : "Did not find"),
469 ((check == prog->anchored_substr) ? "anchored" : "floating"),
471 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
473 PL_colors[1], (SvTAIL(check) ? "$" : ""),
474 (s ? " at offset " : "...\n") ) );
481 /* Finish the diagnostic message */
482 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
484 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
485 Start with the other substr.
486 XXXX no SCREAM optimization yet - and a very coarse implementation
487 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
488 *always* match. Probably should be marked during compile...
489 Probably it is right to do no SCREAM here...
492 if (prog->float_substr && prog->anchored_substr) {
493 /* Take into account the "other" substring. */
494 /* XXXX May be hopelessly wrong for UTF... */
497 if (check == prog->float_substr) {
500 char *last = s - start_shift, *last1, *last2;
504 t = s - prog->check_offset_max;
505 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
506 && (!(prog->reganch & ROPT_UTF8)
507 || (PL_bostr = strpos, /* Used in regcopmaybe() */
508 (t = reghopmaybe_c(s, -(prog->check_offset_max)))
513 t += prog->anchored_offset;
514 if (t < other_last) /* These positions already checked */
517 last2 = last1 = strend - prog->minlen;
520 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
521 /* On end-of-str: see comment below. */
522 s = fbm_instr((unsigned char*)t,
523 (unsigned char*)last1 + prog->anchored_offset
524 + SvCUR(prog->anchored_substr)
525 - (SvTAIL(prog->anchored_substr)!=0),
526 prog->anchored_substr, PL_multiline ? FBMrf_MULTILINE : 0);
527 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s anchored substr `%s%.*s%s'%s",
528 (s ? "Found" : "Contradicts"),
530 (int)(SvCUR(prog->anchored_substr)
531 - (SvTAIL(prog->anchored_substr)!=0)),
532 SvPVX(prog->anchored_substr),
533 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
535 if (last1 >= last2) {
536 DEBUG_r(PerlIO_printf(Perl_debug_log,
537 ", giving up...\n"));
540 DEBUG_r(PerlIO_printf(Perl_debug_log,
541 ", trying floating at offset %ld...\n",
542 (long)(s1 + 1 - i_strpos)));
543 PL_regeol = strend; /* Used in HOP() */
544 other_last = last1 + prog->anchored_offset + 1;
549 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
550 (long)(s - i_strpos)));
551 t = s - prog->anchored_offset;
560 else { /* Take into account the floating substring. */
565 last1 = last = strend - prog->minlen + prog->float_min_offset;
566 if (last - t > prog->float_max_offset)
567 last = t + prog->float_max_offset;
568 s = t + prog->float_min_offset;
571 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
572 /* fbm_instr() takes into account exact value of end-of-str
573 if the check is SvTAIL(ed). Since false positives are OK,
574 and end-of-str is not later than strend we are OK. */
575 s = fbm_instr((unsigned char*)s,
576 (unsigned char*)last + SvCUR(prog->float_substr)
577 - (SvTAIL(prog->float_substr)!=0),
578 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
579 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
580 (s ? "Found" : "Contradicts"),
582 (int)(SvCUR(prog->float_substr)
583 - (SvTAIL(prog->float_substr)!=0)),
584 SvPVX(prog->float_substr),
585 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
588 DEBUG_r(PerlIO_printf(Perl_debug_log,
589 ", giving up...\n"));
592 DEBUG_r(PerlIO_printf(Perl_debug_log,
593 ", trying anchored starting at offset %ld...\n",
594 (long)(s1 + 1 - i_strpos)));
595 other_last = last + 1;
596 PL_regeol = strend; /* Used in HOP() */
601 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
602 (long)(s - i_strpos)));
612 t = s - prog->check_offset_max;
614 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
615 && (!(prog->reganch & ROPT_UTF8)
616 || (PL_bostr = strpos, /* Used in regcopmaybe() */
617 ((t = reghopmaybe_c(s, -(prog->check_offset_max)))
620 /* Fixed substring is found far enough so that the match
621 cannot start at strpos. */
623 if (ml_anch && t[-1] != '\n') {
624 /* Eventually fbm_*() should handle this, but often
625 anchored_offset is not 0, so this check will not be wasted. */
626 /* XXXX In the code below we prefer to look for "^" even in
627 presence of anchored substrings. And we search even
628 beyond the found float position. These pessimizations
629 are historical artefacts only. */
631 while (t < strend - prog->minlen) {
633 if (t < check_at - prog->check_offset_min) {
634 if (prog->anchored_substr) {
635 /* Since we moved from the found position,
636 we definitely contradict the found anchored
637 substr. Due to the above check we do not
638 contradict "check" substr.
639 Thus we can arrive here only if check substr
640 is float. Redo checking for "other"=="fixed".
643 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
644 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
645 goto do_other_anchored;
647 /* We don't contradict the found floating substring. */
648 /* XXXX Why not check for STCLASS? */
650 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
651 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
654 /* Position contradicts check-string */
655 /* XXXX probably better to look for check-string
656 than for "\n", so one should lower the limit for t? */
657 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
658 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
659 other_last = strpos = s = t + 1;
664 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
665 PL_colors[0],PL_colors[1]));
669 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
670 PL_colors[0],PL_colors[1]));
674 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
678 /* The found string does not prohibit matching at strpos,
679 - no optimization of calling REx engine can be performed,
680 unless it was an MBOL and we are not after MBOL,
681 or a future STCLASS check will fail this. */
683 /* Even in this situation we may use MBOL flag if strpos is offset
684 wrt the start of the string. */
685 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
686 && (strpos != strbeg) && strpos[-1] != '\n'
687 /* May be due to an implicit anchor of m{.*foo} */
688 && !(prog->reganch & ROPT_IMPLICIT))
693 DEBUG_r( if (ml_anch)
694 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
695 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
698 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
699 && prog->check_substr /* Could be deleted already */
700 && --BmUSEFUL(prog->check_substr) < 0
701 && prog->check_substr == prog->float_substr)
703 /* If flags & SOMETHING - do not do it many times on the same match */
704 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
705 SvREFCNT_dec(prog->check_substr);
706 prog->check_substr = Nullsv; /* disable */
707 prog->float_substr = Nullsv; /* clear */
708 check = Nullsv; /* abort */
710 /* XXXX This is a remnant of the old implementation. It
711 looks wasteful, since now INTUIT can use many
713 prog->reganch &= ~RE_USE_INTUIT;
720 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
721 if (prog->regstclass) {
722 /* minlen == 0 is possible if regstclass is \b or \B,
723 and the fixed substr is ''$.
724 Since minlen is already taken into account, s+1 is before strend;
725 accidentally, minlen >= 1 guaranties no false positives at s + 1
726 even for \b or \B. But (minlen? 1 : 0) below assumes that
727 regstclass does not come from lookahead... */
728 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
729 This leaves EXACTF only, which is dealt with in find_byclass(). */
730 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
731 ? STR_LEN(prog->regstclass)
733 char *endpos = (prog->anchored_substr || ml_anch)
734 ? s + (prog->minlen? cl_l : 0)
735 : (prog->float_substr ? check_at - start_shift + cl_l
737 char *startpos = strbeg;
740 if (prog->reganch & ROPT_UTF8) {
741 PL_regdata = prog->data; /* Used by REGINCLASS UTF logic */
744 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
749 if (endpos == strend) {
750 DEBUG_r( PerlIO_printf(Perl_debug_log,
751 "Could not match STCLASS...\n") );
754 DEBUG_r( PerlIO_printf(Perl_debug_log,
755 "This position contradicts STCLASS...\n") );
756 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
758 /* Contradict one of substrings */
759 if (prog->anchored_substr) {
760 if (prog->anchored_substr == check) {
761 DEBUG_r( what = "anchored" );
763 PL_regeol = strend; /* Used in HOP() */
765 if (s + start_shift + end_shift > strend) {
766 /* XXXX Should be taken into account earlier? */
767 DEBUG_r( PerlIO_printf(Perl_debug_log,
768 "Could not match STCLASS...\n") );
773 DEBUG_r( PerlIO_printf(Perl_debug_log,
774 "Looking for %s substr starting at offset %ld...\n",
775 what, (long)(s + start_shift - i_strpos)) );
778 /* Have both, check_string is floating */
779 if (t + start_shift >= check_at) /* Contradicts floating=check */
780 goto retry_floating_check;
781 /* Recheck anchored substring, but not floating... */
785 DEBUG_r( PerlIO_printf(Perl_debug_log,
786 "Looking for anchored substr starting at offset %ld...\n",
787 (long)(other_last - i_strpos)) );
788 goto do_other_anchored;
790 /* Another way we could have checked stclass at the
791 current position only: */
796 DEBUG_r( PerlIO_printf(Perl_debug_log,
797 "Looking for /%s^%s/m starting at offset %ld...\n",
798 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
801 if (!prog->float_substr) /* Could have been deleted */
803 /* Check is floating subtring. */
804 retry_floating_check:
805 t = check_at - start_shift;
806 DEBUG_r( what = "floating" );
807 goto hop_and_restart;
810 PerlIO_printf(Perl_debug_log,
811 "By STCLASS: moving %ld --> %ld\n",
812 (long)(t - i_strpos), (long)(s - i_strpos));
814 PerlIO_printf(Perl_debug_log,
815 "Does not contradict STCLASS...\n") );
818 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
819 PL_colors[4], (check ? "Guessed" : "Giving up"),
820 PL_colors[5], (long)(s - i_strpos)) );
823 fail_finish: /* Substring not found */
824 if (prog->check_substr) /* could be removed already */
825 BmUSEFUL(prog->check_substr) += 5; /* hooray */
827 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
828 PL_colors[4],PL_colors[5]));
832 /* We know what class REx starts with. Try to find this position... */
834 S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
836 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
842 register I32 tmp = 1; /* Scratch variable? */
844 /* We know what class it must start with. */
848 if (REGINCLASSUTF8(c, (U8*)s)) {
849 if (tmp && (norun || regtry(prog, s)))
861 if (REGINCLASS(c, *(U8*)s)) {
862 if (tmp && (norun || regtry(prog, s)))
882 c2 = PL_fold_locale[c1];
887 e = s; /* Due to minlen logic of intuit() */
888 /* Here it is NOT UTF! */
892 && (ln == 1 || !(OP(c) == EXACTF
894 : ibcmp_locale(s, m, ln)))
895 && (norun || regtry(prog, s)) )
901 if ( (*(U8*)s == c1 || *(U8*)s == c2)
902 && (ln == 1 || !(OP(c) == EXACTF
904 : ibcmp_locale(s, m, ln)))
905 && (norun || regtry(prog, s)) )
912 PL_reg_flags |= RF_tainted;
915 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
916 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
918 if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
920 if ((norun || regtry(prog, s)))
925 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
929 PL_reg_flags |= RF_tainted;
935 U8 *r = reghop((U8*)s, -1);
937 tmp = (I32)utf8_to_uv(r, s - (char*)r, 0, 0);
939 tmp = ((OP(c) == BOUNDUTF8 ?
940 isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
942 if (tmp == !(OP(c) == BOUNDUTF8 ?
943 swash_fetch(PL_utf8_alnum, (U8*)s) :
944 isALNUM_LC_utf8((U8*)s)))
947 if ((norun || regtry(prog, s)))
952 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
956 PL_reg_flags |= RF_tainted;
959 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
960 tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
962 if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
964 else if ((norun || regtry(prog, s)))
968 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
972 PL_reg_flags |= RF_tainted;
978 U8 *r = reghop((U8*)s, -1);
980 tmp = (I32)utf8_to_uv(r, s - (char*)r, 0, 0);
982 tmp = ((OP(c) == NBOUNDUTF8 ?
983 isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
985 if (tmp == !(OP(c) == NBOUNDUTF8 ?
986 swash_fetch(PL_utf8_alnum, (U8*)s) :
987 isALNUM_LC_utf8((U8*)s)))
989 else if ((norun || regtry(prog, s)))
993 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
999 if (tmp && (norun || regtry(prog, s)))
1010 while (s < strend) {
1011 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
1012 if (tmp && (norun || regtry(prog, s)))
1023 PL_reg_flags |= RF_tainted;
1024 while (s < strend) {
1025 if (isALNUM_LC(*s)) {
1026 if (tmp && (norun || regtry(prog, s)))
1037 PL_reg_flags |= RF_tainted;
1038 while (s < strend) {
1039 if (isALNUM_LC_utf8((U8*)s)) {
1040 if (tmp && (norun || regtry(prog, s)))
1051 while (s < strend) {
1053 if (tmp && (norun || regtry(prog, s)))
1064 while (s < strend) {
1065 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
1066 if (tmp && (norun || regtry(prog, s)))
1077 PL_reg_flags |= RF_tainted;
1078 while (s < strend) {
1079 if (!isALNUM_LC(*s)) {
1080 if (tmp && (norun || regtry(prog, s)))
1091 PL_reg_flags |= RF_tainted;
1092 while (s < strend) {
1093 if (!isALNUM_LC_utf8((U8*)s)) {
1094 if (tmp && (norun || regtry(prog, s)))
1105 while (s < strend) {
1107 if (tmp && (norun || regtry(prog, s)))
1118 while (s < strend) {
1119 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
1120 if (tmp && (norun || regtry(prog, s)))
1131 PL_reg_flags |= RF_tainted;
1132 while (s < strend) {
1133 if (isSPACE_LC(*s)) {
1134 if (tmp && (norun || regtry(prog, s)))
1145 PL_reg_flags |= RF_tainted;
1146 while (s < strend) {
1147 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1148 if (tmp && (norun || regtry(prog, s)))
1159 while (s < strend) {
1161 if (tmp && (norun || regtry(prog, s)))
1172 while (s < strend) {
1173 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
1174 if (tmp && (norun || regtry(prog, s)))
1185 PL_reg_flags |= RF_tainted;
1186 while (s < strend) {
1187 if (!isSPACE_LC(*s)) {
1188 if (tmp && (norun || regtry(prog, s)))
1199 PL_reg_flags |= RF_tainted;
1200 while (s < strend) {
1201 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1202 if (tmp && (norun || regtry(prog, s)))
1213 while (s < strend) {
1215 if (tmp && (norun || regtry(prog, s)))
1226 while (s < strend) {
1227 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
1228 if (tmp && (norun || regtry(prog, s)))
1239 PL_reg_flags |= RF_tainted;
1240 while (s < strend) {
1241 if (isDIGIT_LC(*s)) {
1242 if (tmp && (norun || regtry(prog, s)))
1253 PL_reg_flags |= RF_tainted;
1254 while (s < strend) {
1255 if (isDIGIT_LC_utf8((U8*)s)) {
1256 if (tmp && (norun || regtry(prog, s)))
1267 while (s < strend) {
1269 if (tmp && (norun || regtry(prog, s)))
1280 while (s < strend) {
1281 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
1282 if (tmp && (norun || regtry(prog, s)))
1293 PL_reg_flags |= RF_tainted;
1294 while (s < strend) {
1295 if (!isDIGIT_LC(*s)) {
1296 if (tmp && (norun || regtry(prog, s)))
1307 PL_reg_flags |= RF_tainted;
1308 while (s < strend) {
1309 if (!isDIGIT_LC_utf8((U8*)s)) {
1310 if (tmp && (norun || regtry(prog, s)))
1321 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1330 - regexec_flags - match a regexp against a string
1333 Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1334 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1335 /* strend: pointer to null at end of string */
1336 /* strbeg: real beginning of string */
1337 /* minend: end of match must be >=minend after stringarg. */
1338 /* data: May be used for some additional optimizations. */
1339 /* nosave: For optimizations. */
1342 register regnode *c;
1343 register char *startpos = stringarg;
1344 I32 minlen; /* must match at least this many chars */
1345 I32 dontbother = 0; /* how many characters not to try at end */
1346 /* I32 start_shift = 0; */ /* Offset of the start to find
1347 constant substr. */ /* CC */
1348 I32 end_shift = 0; /* Same for the end. */ /* CC */
1349 I32 scream_pos = -1; /* Internal iterator of scream. */
1351 SV* oreplsv = GvSV(PL_replgv);
1357 PL_regnarrate = PL_debug & 512;
1360 /* Be paranoid... */
1361 if (prog == NULL || startpos == NULL) {
1362 Perl_croak(aTHX_ "NULL regexp parameter");
1366 minlen = prog->minlen;
1367 if (strend - startpos < minlen) goto phooey;
1369 if (startpos == strbeg) /* is ^ valid at stringarg? */
1372 PL_regprev = (U32)stringarg[-1];
1373 if (!PL_multiline && PL_regprev == '\n')
1374 PL_regprev = '\0'; /* force ^ to NOT match */
1377 /* Check validity of program. */
1378 if (UCHARAT(prog->program) != REG_MAGIC) {
1379 Perl_croak(aTHX_ "corrupted regexp program");
1383 PL_reg_eval_set = 0;
1386 if (prog->reganch & ROPT_UTF8)
1387 PL_reg_flags |= RF_utf8;
1389 /* Mark beginning of line for ^ and lookbehind. */
1390 PL_regbol = startpos;
1394 /* Mark end of line for $ (and such) */
1397 /* see how far we have to get to not match where we matched before */
1398 PL_regtill = startpos+minend;
1400 /* We start without call_cc context. */
1403 /* If there is a "must appear" string, look for it. */
1406 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1409 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1410 PL_reg_ganch = startpos;
1411 else if (sv && SvTYPE(sv) >= SVt_PVMG
1413 && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) {
1414 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1415 if (prog->reganch & ROPT_ANCH_GPOS) {
1416 if (s > PL_reg_ganch)
1421 else /* pos() not defined */
1422 PL_reg_ganch = strbeg;
1425 if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
1426 re_scream_pos_data d;
1428 d.scream_olds = &scream_olds;
1429 d.scream_pos = &scream_pos;
1430 s = re_intuit_start(prog, sv, s, strend, flags, &d);
1432 goto phooey; /* not present */
1435 DEBUG_r( if (!PL_colorset) reginitcolors() );
1436 DEBUG_r(PerlIO_printf(Perl_debug_log,
1437 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1438 PL_colors[4],PL_colors[5],PL_colors[0],
1441 (strlen(prog->precomp) > 60 ? "..." : ""),
1443 (int)(strend - startpos > 60 ? 60 : strend - startpos),
1444 startpos, PL_colors[1],
1445 (strend - startpos > 60 ? "..." : ""))
1448 /* Simplest case: anchored match need be tried only once. */
1449 /* [unless only anchor is BOL and multiline is set] */
1450 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1451 if (s == startpos && regtry(prog, startpos))
1453 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1454 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1459 dontbother = minlen - 1;
1460 end = HOPc(strend, -dontbother) - 1;
1461 /* for multiline we only have to try after newlines */
1462 if (prog->check_substr) {
1466 if (regtry(prog, s))
1471 if (prog->reganch & RE_USE_INTUIT) {
1472 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1483 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1484 if (regtry(prog, s))
1491 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1492 if (regtry(prog, PL_reg_ganch))
1497 /* Messy cases: unanchored match. */
1498 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
1499 /* we have /x+whatever/ */
1500 /* it must be a one character string (XXXX Except UTF?) */
1501 char ch = SvPVX(prog->anchored_substr)[0];
1507 while (s < strend) {
1509 DEBUG_r( did_match = 1 );
1510 if (regtry(prog, s)) goto got_it;
1512 while (s < strend && *s == ch)
1519 while (s < strend) {
1521 DEBUG_r( did_match = 1 );
1522 if (regtry(prog, s)) goto got_it;
1524 while (s < strend && *s == ch)
1530 DEBUG_r(did_match ||
1531 PerlIO_printf(Perl_debug_log,
1532 "Did not find anchored character...\n"));
1535 else if (prog->anchored_substr != Nullsv
1536 || (prog->float_substr != Nullsv
1537 && prog->float_max_offset < strend - s)) {
1538 SV *must = prog->anchored_substr
1539 ? prog->anchored_substr : prog->float_substr;
1541 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
1543 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
1544 char *last = HOPc(strend, /* Cannot start after this */
1545 -(I32)(CHR_SVLEN(must)
1546 - (SvTAIL(must) != 0) + back_min));
1547 char *last1; /* Last position checked before */
1553 last1 = HOPc(s, -1);
1555 last1 = s - 1; /* bogus */
1557 /* XXXX check_substr already used to find `s', can optimize if
1558 check_substr==must. */
1560 dontbother = end_shift;
1561 strend = HOPc(strend, -dontbother);
1562 while ( (s <= last) &&
1563 ((flags & REXEC_SCREAM)
1564 ? (s = screaminstr(sv, must, HOPc(s, back_min) - strbeg,
1565 end_shift, &scream_pos, 0))
1566 : (s = fbm_instr((unsigned char*)HOP(s, back_min),
1567 (unsigned char*)strend, must,
1568 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
1569 DEBUG_r( did_match = 1 );
1570 if (HOPc(s, -back_max) > last1) {
1571 last1 = HOPc(s, -back_min);
1572 s = HOPc(s, -back_max);
1575 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1577 last1 = HOPc(s, -back_min);
1581 while (s <= last1) {
1582 if (regtry(prog, s))
1588 while (s <= last1) {
1589 if (regtry(prog, s))
1595 DEBUG_r(did_match ||
1596 PerlIO_printf(Perl_debug_log, "Did not find %s substr `%s%.*s%s'%s...\n",
1597 ((must == prog->anchored_substr)
1598 ? "anchored" : "floating"),
1600 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1602 PL_colors[1], (SvTAIL(must) ? "$" : "")));
1605 else if ((c = prog->regstclass)) {
1606 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1607 /* don't bother with what can't match */
1608 strend = HOPc(strend, -(minlen - 1));
1609 if (find_byclass(prog, c, s, strend, startpos, 0))
1611 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
1615 if (prog->float_substr != Nullsv) { /* Trim the end. */
1618 if (flags & REXEC_SCREAM) {
1619 last = screaminstr(sv, prog->float_substr, s - strbeg,
1620 end_shift, &scream_pos, 1); /* last one */
1622 last = scream_olds; /* Only one occurence. */
1626 char *little = SvPV(prog->float_substr, len);
1628 if (SvTAIL(prog->float_substr)) {
1629 if (memEQ(strend - len + 1, little, len - 1))
1630 last = strend - len + 1;
1631 else if (!PL_multiline)
1632 last = memEQ(strend - len, little, len)
1633 ? strend - len : Nullch;
1639 last = rninstr(s, strend, little, little + len);
1641 last = strend; /* matching `$' */
1645 DEBUG_r(PerlIO_printf(Perl_debug_log,
1646 "%sCan't trim the tail, match fails (should not happen)%s\n",
1647 PL_colors[4],PL_colors[5]));
1648 goto phooey; /* Should not happen! */
1650 dontbother = strend - last + prog->float_min_offset;
1652 if (minlen && (dontbother < minlen))
1653 dontbother = minlen - 1;
1654 strend -= dontbother; /* this one's always in bytes! */
1655 /* We don't know much -- general case. */
1658 if (regtry(prog, s))
1667 if (regtry(prog, s))
1669 } while (s++ < strend);
1677 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1679 if (PL_reg_eval_set) {
1680 /* Preserve the current value of $^R */
1681 if (oreplsv != GvSV(PL_replgv))
1682 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1683 restored, the value remains
1685 restore_pos(aTHXo_ 0);
1688 /* make sure $`, $&, $', and $digit will work later */
1689 if ( !(flags & REXEC_NOT_FIRST) ) {
1690 if (RX_MATCH_COPIED(prog)) {
1691 Safefree(prog->subbeg);
1692 RX_MATCH_COPIED_off(prog);
1694 if (flags & REXEC_COPY_STR) {
1695 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1697 s = savepvn(strbeg, i);
1700 RX_MATCH_COPIED_on(prog);
1703 prog->subbeg = strbeg;
1704 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1711 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1712 PL_colors[4],PL_colors[5]));
1713 if (PL_reg_eval_set)
1714 restore_pos(aTHXo_ 0);
1719 - regtry - try match at specific point
1721 STATIC I32 /* 0 failure, 1 success */
1722 S_regtry(pTHX_ regexp *prog, char *startpos)
1730 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
1732 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1735 PL_reg_eval_set = RS_init;
1737 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1738 (IV)(PL_stack_sp - PL_stack_base));
1740 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
1741 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1742 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1744 /* Apparently this is not needed, judging by wantarray. */
1745 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
1746 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1749 /* Make $_ available to executed code. */
1750 if (PL_reg_sv != DEFSV) {
1751 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1756 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
1757 && (mg = mg_find(PL_reg_sv, 'g')))) {
1758 /* prepare for quick setting of pos */
1759 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
1760 mg = mg_find(PL_reg_sv, 'g');
1764 PL_reg_oldpos = mg->mg_len;
1765 SAVEDESTRUCTOR_X(restore_pos, 0);
1768 Newz(22,PL_reg_curpm, 1, PMOP);
1769 PL_reg_curpm->op_pmregexp = prog;
1770 PL_reg_oldcurpm = PL_curpm;
1771 PL_curpm = PL_reg_curpm;
1772 if (RX_MATCH_COPIED(prog)) {
1773 /* Here is a serious problem: we cannot rewrite subbeg,
1774 since it may be needed if this match fails. Thus
1775 $` inside (?{}) could fail... */
1776 PL_reg_oldsaved = prog->subbeg;
1777 PL_reg_oldsavedlen = prog->sublen;
1778 RX_MATCH_COPIED_off(prog);
1781 PL_reg_oldsaved = Nullch;
1782 prog->subbeg = PL_bostr;
1783 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1785 prog->startp[0] = startpos - PL_bostr;
1786 PL_reginput = startpos;
1787 PL_regstartp = prog->startp;
1788 PL_regendp = prog->endp;
1789 PL_reglastparen = &prog->lastparen;
1790 prog->lastparen = 0;
1792 DEBUG_r(PL_reg_starttry = startpos);
1793 if (PL_reg_start_tmpl <= prog->nparens) {
1794 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1795 if(PL_reg_start_tmp)
1796 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1798 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1801 /* XXXX What this code is doing here?!!! There should be no need
1802 to do this again and again, PL_reglastparen should take care of
1805 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
1806 * Actually, the code in regcppop() (which Ilya may be meaning by
1807 * PL_reglastparen), is not needed at all by the test suite
1808 * (op/regexp, op/pat, op/split), but that code is needed, oddly
1809 * enough, for building DynaLoader, or otherwise this
1810 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
1811 * will happen. Meanwhile, this code *is* needed for the
1812 * above-mentioned test suite tests to succeed. The common theme
1813 * on those tests seems to be returning null fields from matches.
1818 if (prog->nparens) {
1819 for (i = prog->nparens; i > *PL_reglastparen; i--) {
1826 if (regmatch(prog->program + 1)) {
1827 prog->endp[0] = PL_reginput - PL_bostr;
1830 REGCP_UNWIND(lastcp);
1834 #define RE_UNWIND_BRANCH 1
1835 #define RE_UNWIND_BRANCHJ 2
1839 typedef struct { /* XX: makes sense to enlarge it... */
1843 } re_unwind_generic_t;
1856 } re_unwind_branch_t;
1858 typedef union re_unwind_t {
1860 re_unwind_generic_t generic;
1861 re_unwind_branch_t branch;
1865 - regmatch - main matching routine
1867 * Conceptually the strategy is simple: check to see whether the current
1868 * node matches, call self recursively to see whether the rest matches,
1869 * and then act accordingly. In practice we make some effort to avoid
1870 * recursion, in particular by going through "ordinary" nodes (that don't
1871 * need to know whether the rest of the match failed) by a loop instead of
1874 /* [lwall] I've hoisted the register declarations to the outer block in order to
1875 * maybe save a little bit of pushing and popping on the stack. It also takes
1876 * advantage of machines that use a register save mask on subroutine entry.
1878 STATIC I32 /* 0 failure, 1 success */
1879 S_regmatch(pTHX_ regnode *prog)
1881 register regnode *scan; /* Current node. */
1882 regnode *next; /* Next node. */
1883 regnode *inner; /* Next node in internal branch. */
1884 register I32 nextchr; /* renamed nextchr - nextchar colides with
1885 function of same name */
1886 register I32 n; /* no or next */
1887 register I32 ln; /* len or last */
1888 register char *s; /* operand or save */
1889 register char *locinput = PL_reginput;
1890 register I32 c1, c2, paren; /* case fold search, parenth */
1891 int minmod = 0, sw = 0, logical = 0;
1893 I32 firstcp = PL_savestack_ix;
1899 /* Note that nextchr is a byte even in UTF */
1900 nextchr = UCHARAT(locinput);
1902 while (scan != NULL) {
1903 #define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
1905 # define sayYES goto yes
1906 # define sayNO goto no
1907 # define sayYES_FINAL goto yes_final
1908 # define sayYES_LOUD goto yes_loud
1909 # define sayNO_FINAL goto no_final
1910 # define sayNO_SILENT goto do_no
1911 # define saySAME(x) if (x) goto yes; else goto no
1912 # define REPORT_CODE_OFF 24
1914 # define sayYES return 1
1915 # define sayNO return 0
1916 # define sayYES_FINAL return 1
1917 # define sayYES_LOUD return 1
1918 # define sayNO_FINAL return 0
1919 # define sayNO_SILENT return 0
1920 # define saySAME(x) return x
1923 SV *prop = sv_newmortal();
1924 int docolor = *PL_colors[0];
1925 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1926 int l = (PL_regeol - locinput > taill ? taill : PL_regeol - locinput);
1927 /* The part of the string before starttry has one color
1928 (pref0_len chars), between starttry and current
1929 position another one (pref_len - pref0_len chars),
1930 after the current position the third one.
1931 We assume that pref0_len <= pref_len, otherwise we
1932 decrease pref0_len. */
1933 int pref_len = (locinput - PL_bostr > (5 + taill) - l
1934 ? (5 + taill) - l : locinput - PL_bostr);
1935 int pref0_len = pref_len - (locinput - PL_reg_starttry);
1937 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
1938 l = ( PL_regeol - locinput > (5 + taill) - pref_len
1939 ? (5 + taill) - pref_len : PL_regeol - locinput);
1942 if (pref0_len > pref_len)
1943 pref0_len = pref_len;
1944 regprop(prop, scan);
1945 PerlIO_printf(Perl_debug_log,
1946 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
1947 (IV)(locinput - PL_bostr),
1948 PL_colors[4], pref0_len,
1949 locinput - pref_len, PL_colors[5],
1950 PL_colors[2], pref_len - pref0_len,
1951 locinput - pref_len + pref0_len, PL_colors[3],
1952 (docolor ? "" : "> <"),
1953 PL_colors[0], l, locinput, PL_colors[1],
1954 15 - l - pref_len + 1,
1956 (IV)(scan - PL_regprogram), PL_regindent*2, "",
1960 next = scan + NEXT_OFF(scan);
1966 if (locinput == PL_bostr
1967 ? PL_regprev == '\n'
1969 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1971 /* regtill = regbol; */
1976 if (locinput == PL_bostr
1977 ? PL_regprev == '\n'
1978 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1984 if (locinput == PL_bostr)
1988 if (locinput == PL_reg_ganch)
1998 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2003 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2005 if (PL_regeol - locinput > 1)
2009 if (PL_regeol != locinput)
2013 if (nextchr & 0x80) {
2014 locinput += PL_utf8skip[nextchr];
2015 if (locinput > PL_regeol)
2017 nextchr = UCHARAT(locinput);
2020 if (!nextchr && locinput >= PL_regeol)
2022 nextchr = UCHARAT(++locinput);
2025 if (!nextchr && locinput >= PL_regeol)
2027 nextchr = UCHARAT(++locinput);
2030 if (nextchr & 0x80) {
2031 locinput += PL_utf8skip[nextchr];
2032 if (locinput > PL_regeol)
2034 nextchr = UCHARAT(locinput);
2037 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2039 nextchr = UCHARAT(++locinput);
2042 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2044 nextchr = UCHARAT(++locinput);
2049 /* Inline the first character, for speed. */
2050 if (UCHARAT(s) != nextchr)
2052 if (PL_regeol - locinput < ln)
2054 if (ln > 1 && memNE(s, locinput, ln))
2057 nextchr = UCHARAT(locinput);
2060 PL_reg_flags |= RF_tainted;
2069 c1 = OP(scan) == EXACTF;
2073 if (utf8_to_uv((U8*)s, e - s, 0, 0) !=
2075 toLOWER_utf8((U8*)l) :
2076 toLOWER_LC_utf8((U8*)l)))
2084 nextchr = UCHARAT(locinput);
2088 /* Inline the first character, for speed. */
2089 if (UCHARAT(s) != nextchr &&
2090 UCHARAT(s) != ((OP(scan) == EXACTF)
2091 ? PL_fold : PL_fold_locale)[nextchr])
2093 if (PL_regeol - locinput < ln)
2095 if (ln > 1 && (OP(scan) == EXACTF
2096 ? ibcmp(s, locinput, ln)
2097 : ibcmp_locale(s, locinput, ln)))
2100 nextchr = UCHARAT(locinput);
2103 if (!REGINCLASSUTF8(scan, (U8*)locinput))
2105 if (locinput >= PL_regeol)
2107 locinput += PL_utf8skip[nextchr];
2108 nextchr = UCHARAT(locinput);
2112 nextchr = UCHARAT(locinput);
2113 if (!REGINCLASS(scan, nextchr))
2115 if (!nextchr && locinput >= PL_regeol)
2117 nextchr = UCHARAT(++locinput);
2120 PL_reg_flags |= RF_tainted;
2125 if (!(OP(scan) == ALNUM
2126 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
2128 nextchr = UCHARAT(++locinput);
2131 PL_reg_flags |= RF_tainted;
2136 if (nextchr & 0x80) {
2137 if (!(OP(scan) == ALNUMUTF8
2138 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2139 : isALNUM_LC_utf8((U8*)locinput)))
2143 locinput += PL_utf8skip[nextchr];
2144 nextchr = UCHARAT(locinput);
2147 if (!(OP(scan) == ALNUMUTF8
2148 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
2150 nextchr = UCHARAT(++locinput);
2153 PL_reg_flags |= RF_tainted;
2156 if (!nextchr && locinput >= PL_regeol)
2158 if (OP(scan) == NALNUM
2159 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
2161 nextchr = UCHARAT(++locinput);
2164 PL_reg_flags |= RF_tainted;
2167 if (!nextchr && locinput >= PL_regeol)
2169 if (nextchr & 0x80) {
2170 if (OP(scan) == NALNUMUTF8
2171 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2172 : isALNUM_LC_utf8((U8*)locinput))
2176 locinput += PL_utf8skip[nextchr];
2177 nextchr = UCHARAT(locinput);
2180 if (OP(scan) == NALNUMUTF8
2181 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
2183 nextchr = UCHARAT(++locinput);
2187 PL_reg_flags |= RF_tainted;
2191 /* was last char in word? */
2192 ln = (locinput != PL_regbol) ? UCHARAT(locinput - 1) : PL_regprev;
2193 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2195 n = isALNUM(nextchr);
2198 ln = isALNUM_LC(ln);
2199 n = isALNUM_LC(nextchr);
2201 if (((!ln) == (!n)) == (OP(scan) == BOUND || OP(scan) == BOUNDL))
2206 PL_reg_flags |= RF_tainted;
2210 /* was last char in word? */
2211 if (locinput == PL_regbol)
2214 U8 *r = reghop((U8*)locinput, -1);
2216 ln = utf8_to_uv(r, s - (char*)r, 0, 0);
2218 if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) {
2219 ln = isALNUM_uni(ln);
2220 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
2223 ln = isALNUM_LC_uni(ln);
2224 n = isALNUM_LC_utf8((U8*)locinput);
2226 if (((!ln) == (!n)) == (OP(scan) == BOUNDUTF8 || OP(scan) == BOUNDLUTF8))
2230 PL_reg_flags |= RF_tainted;
2235 if (!(OP(scan) == SPACE
2236 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2238 nextchr = UCHARAT(++locinput);
2241 PL_reg_flags |= RF_tainted;
2246 if (nextchr & 0x80) {
2247 if (!(OP(scan) == SPACEUTF8
2248 ? swash_fetch(PL_utf8_space, (U8*)locinput)
2249 : isSPACE_LC_utf8((U8*)locinput)))
2253 locinput += PL_utf8skip[nextchr];
2254 nextchr = UCHARAT(locinput);
2257 if (!(OP(scan) == SPACEUTF8
2258 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2260 nextchr = UCHARAT(++locinput);
2263 PL_reg_flags |= RF_tainted;
2266 if (!nextchr && locinput >= PL_regeol)
2268 if (OP(scan) == NSPACE
2269 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
2271 nextchr = UCHARAT(++locinput);
2274 PL_reg_flags |= RF_tainted;
2277 if (!nextchr && locinput >= PL_regeol)
2279 if (nextchr & 0x80) {
2280 if (OP(scan) == NSPACEUTF8
2281 ? swash_fetch(PL_utf8_space, (U8*)locinput)
2282 : isSPACE_LC_utf8((U8*)locinput))
2286 locinput += PL_utf8skip[nextchr];
2287 nextchr = UCHARAT(locinput);
2290 if (OP(scan) == NSPACEUTF8
2291 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
2293 nextchr = UCHARAT(++locinput);
2296 PL_reg_flags |= RF_tainted;
2301 if (!(OP(scan) == DIGIT
2302 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
2304 nextchr = UCHARAT(++locinput);
2307 PL_reg_flags |= RF_tainted;
2312 if (nextchr & 0x80) {
2313 if (!(OP(scan) == DIGITUTF8
2314 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2315 : isDIGIT_LC_utf8((U8*)locinput)))
2319 locinput += PL_utf8skip[nextchr];
2320 nextchr = UCHARAT(locinput);
2323 if (!(OP(scan) == DIGITUTF8
2324 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
2326 nextchr = UCHARAT(++locinput);
2329 PL_reg_flags |= RF_tainted;
2332 if (!nextchr && locinput >= PL_regeol)
2334 if (OP(scan) == NDIGIT
2335 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
2337 nextchr = UCHARAT(++locinput);
2340 PL_reg_flags |= RF_tainted;
2343 if (!nextchr && locinput >= PL_regeol)
2345 if (nextchr & 0x80) {
2346 if (OP(scan) == NDIGITUTF8
2347 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2348 : isDIGIT_LC_utf8((U8*)locinput))
2352 locinput += PL_utf8skip[nextchr];
2353 nextchr = UCHARAT(locinput);
2356 if (OP(scan) == NDIGITUTF8
2357 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
2359 nextchr = UCHARAT(++locinput);
2362 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
2364 locinput += PL_utf8skip[nextchr];
2365 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
2366 locinput += UTF8SKIP(locinput);
2367 if (locinput > PL_regeol)
2369 nextchr = UCHARAT(locinput);
2372 PL_reg_flags |= RF_tainted;
2376 n = ARG(scan); /* which paren pair */
2377 ln = PL_regstartp[n];
2378 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
2379 if (*PL_reglastparen < n || ln == -1)
2380 sayNO; /* Do not match unless seen CLOSEn. */
2381 if (ln == PL_regendp[n])
2385 if (UTF && OP(scan) != REF) { /* REF can do byte comparison */
2387 char *e = PL_bostr + PL_regendp[n];
2389 * Note that we can't do the "other character" lookup trick as
2390 * in the 8-bit case (no pun intended) because in Unicode we
2391 * have to map both upper and title case to lower case.
2393 if (OP(scan) == REFF) {
2397 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
2407 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
2414 nextchr = UCHARAT(locinput);
2418 /* Inline the first character, for speed. */
2419 if (UCHARAT(s) != nextchr &&
2421 (UCHARAT(s) != ((OP(scan) == REFF
2422 ? PL_fold : PL_fold_locale)[nextchr]))))
2424 ln = PL_regendp[n] - ln;
2425 if (locinput + ln > PL_regeol)
2427 if (ln > 1 && (OP(scan) == REF
2428 ? memNE(s, locinput, ln)
2430 ? ibcmp(s, locinput, ln)
2431 : ibcmp_locale(s, locinput, ln))))
2434 nextchr = UCHARAT(locinput);
2445 OP_4tree *oop = PL_op;
2446 COP *ocurcop = PL_curcop;
2447 SV **ocurpad = PL_curpad;
2451 PL_op = (OP_4tree*)PL_regdata->data[n];
2452 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
2453 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
2454 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
2456 CALLRUNOPS(aTHX); /* Scalar context. */
2462 PL_curpad = ocurpad;
2463 PL_curcop = ocurcop;
2465 if (logical == 2) { /* Postponed subexpression. */
2467 MAGIC *mg = Null(MAGIC*);
2469 CHECKPOINT cp, lastcp;
2471 if(SvROK(ret) || SvRMAGICAL(ret)) {
2472 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2475 mg = mg_find(sv, 'r');
2478 re = (regexp *)mg->mg_obj;
2479 (void)ReREFCNT_inc(re);
2483 char *t = SvPV(ret, len);
2485 char *oprecomp = PL_regprecomp;
2486 I32 osize = PL_regsize;
2487 I32 onpar = PL_regnpar;
2490 pm.op_pmdynflags = (UTF||DO_UTF8(ret) ? PMdf_UTF8 : 0);
2491 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
2493 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
2494 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
2495 PL_regprecomp = oprecomp;
2500 PerlIO_printf(Perl_debug_log,
2501 "Entering embedded `%s%.60s%s%s'\n",
2505 (strlen(re->precomp) > 60 ? "..." : ""))
2508 state.prev = PL_reg_call_cc;
2509 state.cc = PL_regcc;
2510 state.re = PL_reg_re;
2514 cp = regcppush(0); /* Save *all* the positions. */
2517 state.ss = PL_savestack_ix;
2518 *PL_reglastparen = 0;
2519 PL_reg_call_cc = &state;
2520 PL_reginput = locinput;
2522 /* XXXX This is too dramatic a measure... */
2525 if (regmatch(re->program + 1)) {
2526 /* Even though we succeeded, we need to restore
2527 global variables, since we may be wrapped inside
2528 SUSPEND, thus the match may be not finished yet. */
2530 /* XXXX Do this only if SUSPENDed? */
2531 PL_reg_call_cc = state.prev;
2532 PL_regcc = state.cc;
2533 PL_reg_re = state.re;
2534 cache_re(PL_reg_re);
2536 /* XXXX This is too dramatic a measure... */
2539 /* These are needed even if not SUSPEND. */
2545 REGCP_UNWIND(lastcp);
2547 PL_reg_call_cc = state.prev;
2548 PL_regcc = state.cc;
2549 PL_reg_re = state.re;
2550 cache_re(PL_reg_re);
2552 /* XXXX This is too dramatic a measure... */
2561 sv_setsv(save_scalar(PL_replgv), ret);
2565 n = ARG(scan); /* which paren pair */
2566 PL_reg_start_tmp[n] = locinput;
2571 n = ARG(scan); /* which paren pair */
2572 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2573 PL_regendp[n] = locinput - PL_bostr;
2574 if (n > *PL_reglastparen)
2575 *PL_reglastparen = n;
2578 n = ARG(scan); /* which paren pair */
2579 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
2582 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
2584 next = NEXTOPER(NEXTOPER(scan));
2586 next = scan + ARG(scan);
2587 if (OP(next) == IFTHEN) /* Fake one. */
2588 next = NEXTOPER(NEXTOPER(next));
2592 logical = scan->flags;
2594 /*******************************************************************
2595 PL_regcc contains infoblock about the innermost (...)* loop, and
2596 a pointer to the next outer infoblock.
2598 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2600 1) After matching X, regnode for CURLYX is processed;
2602 2) This regnode creates infoblock on the stack, and calls
2603 regmatch() recursively with the starting point at WHILEM node;
2605 3) Each hit of WHILEM node tries to match A and Z (in the order
2606 depending on the current iteration, min/max of {min,max} and
2607 greediness). The information about where are nodes for "A"
2608 and "Z" is read from the infoblock, as is info on how many times "A"
2609 was already matched, and greediness.
2611 4) After A matches, the same WHILEM node is hit again.
2613 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2614 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2615 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2616 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2617 of the external loop.
2619 Currently present infoblocks form a tree with a stem formed by PL_curcc
2620 and whatever it mentions via ->next, and additional attached trees
2621 corresponding to temporarily unset infoblocks as in "5" above.
2623 In the following picture infoblocks for outer loop of
2624 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2625 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2626 infoblocks are drawn below the "reset" infoblock.
2628 In fact in the picture below we do not show failed matches for Z and T
2629 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2630 more obvious *why* one needs to *temporary* unset infoblocks.]
2632 Matched REx position InfoBlocks Comment
2636 Y A)*?Z)*?T x <- O <- I
2637 YA )*?Z)*?T x <- O <- I
2638 YA A)*?Z)*?T x <- O <- I
2639 YAA )*?Z)*?T x <- O <- I
2640 YAA Z)*?T x <- O # Temporary unset I
2643 YAAZ Y(A)*?Z)*?T x <- O
2646 YAAZY (A)*?Z)*?T x <- O
2649 YAAZY A)*?Z)*?T x <- O <- I
2652 YAAZYA )*?Z)*?T x <- O <- I
2655 YAAZYA Z)*?T x <- O # Temporary unset I
2661 YAAZYAZ T x # Temporary unset O
2668 *******************************************************************/
2671 CHECKPOINT cp = PL_savestack_ix;
2672 /* No need to save/restore up to this paren */
2673 I32 parenfloor = scan->flags;
2675 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2677 cc.oldcc = PL_regcc;
2679 /* XXXX Probably it is better to teach regpush to support
2680 parenfloor > PL_regsize... */
2681 if (parenfloor > *PL_reglastparen)
2682 parenfloor = *PL_reglastparen; /* Pessimization... */
2683 cc.parenfloor = parenfloor;
2685 cc.min = ARG1(scan);
2686 cc.max = ARG2(scan);
2687 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
2691 PL_reginput = locinput;
2692 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2694 PL_regcc = cc.oldcc;
2700 * This is really hard to understand, because after we match
2701 * what we're trying to match, we must make sure the rest of
2702 * the REx is going to match for sure, and to do that we have
2703 * to go back UP the parse tree by recursing ever deeper. And
2704 * if it fails, we have to reset our parent's current state
2705 * that we can try again after backing off.
2708 CHECKPOINT cp, lastcp;
2709 CURCUR* cc = PL_regcc;
2710 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2712 n = cc->cur + 1; /* how many we know we matched */
2713 PL_reginput = locinput;
2716 PerlIO_printf(Perl_debug_log,
2717 "%*s %ld out of %ld..%ld cc=%lx\n",
2718 REPORT_CODE_OFF+PL_regindent*2, "",
2719 (long)n, (long)cc->min,
2720 (long)cc->max, (long)cc)
2723 /* If degenerate scan matches "", assume scan done. */
2725 if (locinput == cc->lastloc && n >= cc->min) {
2726 PL_regcc = cc->oldcc;
2730 PerlIO_printf(Perl_debug_log,
2731 "%*s empty match detected, try continuation...\n",
2732 REPORT_CODE_OFF+PL_regindent*2, "")
2734 if (regmatch(cc->next))
2742 /* First just match a string of min scans. */
2746 cc->lastloc = locinput;
2747 if (regmatch(cc->scan))
2750 cc->lastloc = lastloc;
2755 /* Check whether we already were at this position.
2756 Postpone detection until we know the match is not
2757 *that* much linear. */
2758 if (!PL_reg_maxiter) {
2759 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2760 PL_reg_leftiter = PL_reg_maxiter;
2762 if (PL_reg_leftiter-- == 0) {
2763 I32 size = (PL_reg_maxiter + 7)/8;
2764 if (PL_reg_poscache) {
2765 if (PL_reg_poscache_size < size) {
2766 Renew(PL_reg_poscache, size, char);
2767 PL_reg_poscache_size = size;
2769 Zero(PL_reg_poscache, size, char);
2772 PL_reg_poscache_size = size;
2773 Newz(29, PL_reg_poscache, size, char);
2776 PerlIO_printf(Perl_debug_log,
2777 "%sDetected a super-linear match, switching on caching%s...\n",
2778 PL_colors[4], PL_colors[5])
2781 if (PL_reg_leftiter < 0) {
2782 I32 o = locinput - PL_bostr, b;
2784 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2787 if (PL_reg_poscache[o] & (1<<b)) {
2789 PerlIO_printf(Perl_debug_log,
2790 "%*s already tried at this position...\n",
2791 REPORT_CODE_OFF+PL_regindent*2, "")
2795 PL_reg_poscache[o] |= (1<<b);
2799 /* Prefer next over scan for minimal matching. */
2802 PL_regcc = cc->oldcc;
2805 cp = regcppush(cc->parenfloor);
2807 if (regmatch(cc->next)) {
2809 sayYES; /* All done. */
2811 REGCP_UNWIND(lastcp);
2817 if (n >= cc->max) { /* Maximum greed exceeded? */
2818 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
2819 && !(PL_reg_flags & RF_warned)) {
2820 PL_reg_flags |= RF_warned;
2821 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2822 "Complex regular subexpression recursion",
2829 PerlIO_printf(Perl_debug_log,
2830 "%*s trying longer...\n",
2831 REPORT_CODE_OFF+PL_regindent*2, "")
2833 /* Try scanning more and see if it helps. */
2834 PL_reginput = locinput;
2836 cc->lastloc = locinput;
2837 cp = regcppush(cc->parenfloor);
2839 if (regmatch(cc->scan)) {
2843 REGCP_UNWIND(lastcp);
2846 cc->lastloc = lastloc;
2850 /* Prefer scan over next for maximal matching. */
2852 if (n < cc->max) { /* More greed allowed? */
2853 cp = regcppush(cc->parenfloor);
2855 cc->lastloc = locinput;
2857 if (regmatch(cc->scan)) {
2861 REGCP_UNWIND(lastcp);
2862 regcppop(); /* Restore some previous $<digit>s? */
2863 PL_reginput = locinput;
2865 PerlIO_printf(Perl_debug_log,
2866 "%*s failed, try continuation...\n",
2867 REPORT_CODE_OFF+PL_regindent*2, "")
2870 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
2871 && !(PL_reg_flags & RF_warned)) {
2872 PL_reg_flags |= RF_warned;
2873 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2874 "Complex regular subexpression recursion",
2878 /* Failed deeper matches of scan, so see if this one works. */
2879 PL_regcc = cc->oldcc;
2882 if (regmatch(cc->next))
2888 cc->lastloc = lastloc;
2893 next = scan + ARG(scan);
2896 inner = NEXTOPER(NEXTOPER(scan));
2899 inner = NEXTOPER(scan);
2904 if (OP(next) != c1) /* No choice. */
2905 next = inner; /* Avoid recursion. */
2907 I32 lastparen = *PL_reglastparen;
2909 re_unwind_branch_t *uw;
2911 /* Put unwinding data on stack */
2912 unwind1 = SSNEWt(1,re_unwind_branch_t);
2913 uw = SSPTRt(unwind1,re_unwind_branch_t);
2916 uw->type = ((c1 == BRANCH)
2918 : RE_UNWIND_BRANCHJ);
2919 uw->lastparen = lastparen;
2921 uw->locinput = locinput;
2922 uw->nextchr = nextchr;
2924 uw->regindent = ++PL_regindent;
2927 REGCP_SET(uw->lastcp);
2929 /* Now go into the first branch */
2942 /* We suppose that the next guy does not need
2943 backtracking: in particular, it is of constant length,
2944 and has no parenths to influence future backrefs. */
2945 ln = ARG1(scan); /* min to match */
2946 n = ARG2(scan); /* max to match */
2947 paren = scan->flags;
2949 if (paren > PL_regsize)
2951 if (paren > *PL_reglastparen)
2952 *PL_reglastparen = paren;
2954 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
2956 scan += NEXT_OFF(scan); /* Skip former OPEN. */
2957 PL_reginput = locinput;
2960 if (ln && regrepeat_hard(scan, ln, &l) < ln)
2962 if (ln && l == 0 && n >= ln
2963 /* In fact, this is tricky. If paren, then the
2964 fact that we did/didnot match may influence
2965 future execution. */
2966 && !(paren && ln == 0))
2968 locinput = PL_reginput;
2969 if (PL_regkind[(U8)OP(next)] == EXACT) {
2970 c1 = (U8)*STRING(next);
2971 if (OP(next) == EXACTF)
2973 else if (OP(next) == EXACTFL)
2974 c2 = PL_fold_locale[c1];
2981 /* This may be improved if l == 0. */
2982 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
2983 /* If it could work, try it. */
2985 UCHARAT(PL_reginput) == c1 ||
2986 UCHARAT(PL_reginput) == c2)
2990 PL_regstartp[paren] =
2991 HOPc(PL_reginput, -l) - PL_bostr;
2992 PL_regendp[paren] = PL_reginput - PL_bostr;
2995 PL_regendp[paren] = -1;
2999 REGCP_UNWIND(lastcp);
3001 /* Couldn't or didn't -- move forward. */
3002 PL_reginput = locinput;
3003 if (regrepeat_hard(scan, 1, &l)) {
3005 locinput = PL_reginput;
3012 n = regrepeat_hard(scan, n, &l);
3013 if (n != 0 && l == 0
3014 /* In fact, this is tricky. If paren, then the
3015 fact that we did/didnot match may influence
3016 future execution. */
3017 && !(paren && ln == 0))
3019 locinput = PL_reginput;
3021 PerlIO_printf(Perl_debug_log,
3022 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
3023 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
3027 if (PL_regkind[(U8)OP(next)] == EXACT) {
3028 c1 = (U8)*STRING(next);
3029 if (OP(next) == EXACTF)
3031 else if (OP(next) == EXACTFL)
3032 c2 = PL_fold_locale[c1];
3041 /* If it could work, try it. */
3043 UCHARAT(PL_reginput) == c1 ||
3044 UCHARAT(PL_reginput) == c2)
3047 PerlIO_printf(Perl_debug_log,
3048 "%*s trying tail with n=%"IVdf"...\n",
3049 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
3053 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3054 PL_regendp[paren] = PL_reginput - PL_bostr;
3057 PL_regendp[paren] = -1;
3061 REGCP_UNWIND(lastcp);
3063 /* Couldn't or didn't -- back up. */
3065 locinput = HOPc(locinput, -l);
3066 PL_reginput = locinput;
3073 paren = scan->flags; /* Which paren to set */
3074 if (paren > PL_regsize)
3076 if (paren > *PL_reglastparen)
3077 *PL_reglastparen = paren;
3078 ln = ARG1(scan); /* min to match */
3079 n = ARG2(scan); /* max to match */
3080 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
3084 ln = ARG1(scan); /* min to match */
3085 n = ARG2(scan); /* max to match */
3086 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
3091 scan = NEXTOPER(scan);
3097 scan = NEXTOPER(scan);
3101 * Lookahead to avoid useless match attempts
3102 * when we know what character comes next.
3104 if (PL_regkind[(U8)OP(next)] == EXACT) {
3105 c1 = (U8)*STRING(next);
3106 if (OP(next) == EXACTF)
3108 else if (OP(next) == EXACTFL)
3109 c2 = PL_fold_locale[c1];
3115 PL_reginput = locinput;
3119 if (ln && regrepeat(scan, ln) < ln)
3121 locinput = PL_reginput;
3124 char *e = locinput + n - ln; /* Should not check after this */
3125 char *old = locinput;
3127 if (e >= PL_regeol || (n == REG_INFTY))
3130 /* Find place 'next' could work */
3132 while (locinput <= e && *locinput != c1)
3135 while (locinput <= e
3142 /* PL_reginput == old now */
3143 if (locinput != old) {
3144 ln = 1; /* Did some */
3145 if (regrepeat(scan, locinput - old) <
3149 /* PL_reginput == locinput now */
3150 TRYPAREN(paren, ln, locinput);
3151 PL_reginput = locinput; /* Could be reset... */
3152 REGCP_UNWIND(lastcp);
3153 /* Couldn't or didn't -- move forward. */
3158 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
3159 /* If it could work, try it. */
3161 UCHARAT(PL_reginput) == c1 ||
3162 UCHARAT(PL_reginput) == c2)
3164 TRYPAREN(paren, n, PL_reginput);
3165 REGCP_UNWIND(lastcp);
3167 /* Couldn't or didn't -- move forward. */
3168 PL_reginput = locinput;
3169 if (regrepeat(scan, 1)) {
3171 locinput = PL_reginput;
3179 n = regrepeat(scan, n);
3180 locinput = PL_reginput;
3181 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
3182 (!PL_multiline || OP(next) == SEOL || OP(next) == EOS)) {
3183 ln = n; /* why back off? */
3184 /* ...because $ and \Z can match before *and* after
3185 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3186 We should back off by one in this case. */
3187 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3193 /* If it could work, try it. */
3195 UCHARAT(PL_reginput) == c1 ||
3196 UCHARAT(PL_reginput) == c2)
3198 TRYPAREN(paren, n, PL_reginput);
3199 REGCP_UNWIND(lastcp);
3201 /* Couldn't or didn't -- back up. */
3203 PL_reginput = locinput = HOPc(locinput, -1);
3208 /* If it could work, try it. */
3210 UCHARAT(PL_reginput) == c1 ||
3211 UCHARAT(PL_reginput) == c2)
3213 TRYPAREN(paren, n, PL_reginput);
3214 REGCP_UNWIND(lastcp);
3216 /* Couldn't or didn't -- back up. */
3218 PL_reginput = locinput = HOPc(locinput, -1);
3225 if (PL_reg_call_cc) {
3226 re_cc_state *cur_call_cc = PL_reg_call_cc;
3227 CURCUR *cctmp = PL_regcc;
3228 regexp *re = PL_reg_re;
3229 CHECKPOINT cp, lastcp;
3231 cp = regcppush(0); /* Save *all* the positions. */
3233 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3235 PL_reginput = locinput; /* Make position available to
3237 cache_re(PL_reg_call_cc->re);
3238 PL_regcc = PL_reg_call_cc->cc;
3239 PL_reg_call_cc = PL_reg_call_cc->prev;
3240 if (regmatch(cur_call_cc->node)) {
3241 PL_reg_call_cc = cur_call_cc;
3245 REGCP_UNWIND(lastcp);
3247 PL_reg_call_cc = cur_call_cc;
3253 PerlIO_printf(Perl_debug_log,
3254 "%*s continuation failed...\n",
3255 REPORT_CODE_OFF+PL_regindent*2, "")
3259 if (locinput < PL_regtill) {
3260 DEBUG_r(PerlIO_printf(Perl_debug_log,
3261 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3263 (long)(locinput - PL_reg_starttry),
3264 (long)(PL_regtill - PL_reg_starttry),
3266 sayNO_FINAL; /* Cannot match: too short. */
3268 PL_reginput = locinput; /* put where regtry can find it */
3269 sayYES_FINAL; /* Success! */
3271 PL_reginput = locinput; /* put where regtry can find it */
3272 sayYES_LOUD; /* Success! */
3275 PL_reginput = locinput;
3280 if (UTF) { /* XXXX This is absolutely
3281 broken, we read before
3283 s = HOPMAYBEc(locinput, -scan->flags);
3289 if (locinput < PL_bostr + scan->flags)
3291 PL_reginput = locinput - scan->flags;
3296 PL_reginput = locinput;
3301 if (UTF) { /* XXXX This is absolutely
3302 broken, we read before
3304 s = HOPMAYBEc(locinput, -scan->flags);
3305 if (!s || s < PL_bostr)
3310 if (locinput < PL_bostr + scan->flags)
3312 PL_reginput = locinput - scan->flags;
3317 PL_reginput = locinput;
3320 inner = NEXTOPER(NEXTOPER(scan));
3321 if (regmatch(inner) != n) {
3336 if (OP(scan) == SUSPEND) {
3337 locinput = PL_reginput;
3338 nextchr = UCHARAT(locinput);
3343 next = scan + ARG(scan);
3348 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
3349 PTR2UV(scan), OP(scan));
3350 Perl_croak(aTHX_ "regexp memory corruption");
3357 * We get here only if there's trouble -- normally "case END" is
3358 * the terminating point.
3360 Perl_croak(aTHX_ "corrupted regexp pointers");
3366 PerlIO_printf(Perl_debug_log,
3367 "%*s %scould match...%s\n",
3368 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3372 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3373 PL_colors[4],PL_colors[5]));
3379 #if 0 /* Breaks $^R */
3387 PerlIO_printf(Perl_debug_log,
3388 "%*s %sfailed...%s\n",
3389 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3395 re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
3398 case RE_UNWIND_BRANCH:
3399 case RE_UNWIND_BRANCHJ:
3401 re_unwind_branch_t *uwb = &(uw->branch);
3402 I32 lastparen = uwb->lastparen;
3404 REGCP_UNWIND(uwb->lastcp);
3405 for (n = *PL_reglastparen; n > lastparen; n--)
3407 *PL_reglastparen = n;
3408 scan = next = uwb->next;
3410 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
3411 ? BRANCH : BRANCHJ) ) { /* Failure */
3418 /* Have more choice yet. Reuse the same uwb. */
3420 if ((n = (uwb->type == RE_UNWIND_BRANCH
3421 ? NEXT_OFF(next) : ARG(next))))
3424 next = NULL; /* XXXX Needn't unwinding in this case... */
3426 next = NEXTOPER(scan);
3427 if (uwb->type == RE_UNWIND_BRANCHJ)
3428 next = NEXTOPER(next);
3429 locinput = uwb->locinput;
3430 nextchr = uwb->nextchr;
3432 PL_regindent = uwb->regindent;
3439 Perl_croak(aTHX_ "regexp unwind memory corruption");
3450 - regrepeat - repeatedly match something simple, report how many
3453 * [This routine now assumes that it will only match on things of length 1.
3454 * That was true before, but now we assume scan - reginput is the count,
3455 * rather than incrementing count on every character. [Er, except utf8.]]
3458 S_regrepeat(pTHX_ regnode *p, I32 max)
3460 register char *scan;
3462 register char *loceol = PL_regeol;
3463 register I32 hardcount = 0;
3466 if (max != REG_INFTY && max < loceol - scan)
3467 loceol = scan + max;
3470 while (scan < loceol && *scan != '\n')
3478 while (scan < loceol && *scan != '\n') {
3479 scan += UTF8SKIP(scan);
3485 while (scan < loceol) {
3486 scan += UTF8SKIP(scan);
3490 case EXACT: /* length of string is 1 */
3492 while (scan < loceol && UCHARAT(scan) == c)
3495 case EXACTF: /* length of string is 1 */
3497 while (scan < loceol &&
3498 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
3501 case EXACTFL: /* length of string is 1 */
3502 PL_reg_flags |= RF_tainted;
3504 while (scan < loceol &&
3505 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
3510 while (scan < loceol && REGINCLASSUTF8(p, (U8*)scan)) {
3511 scan += UTF8SKIP(scan);
3516 while (scan < loceol && REGINCLASS(p, *scan))
3520 while (scan < loceol && isALNUM(*scan))
3525 while (scan < loceol && swash_fetch(PL_utf8_alnum, (U8*)scan)) {
3526 scan += UTF8SKIP(scan);
3531 PL_reg_flags |= RF_tainted;
3532 while (scan < loceol && isALNUM_LC(*scan))
3536 PL_reg_flags |= RF_tainted;
3538 while (scan < loceol && isALNUM_LC_utf8((U8*)scan)) {
3539 scan += UTF8SKIP(scan);
3545 while (scan < loceol && !isALNUM(*scan))
3550 while (scan < loceol && !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
3551 scan += UTF8SKIP(scan);
3556 PL_reg_flags |= RF_tainted;
3557 while (scan < loceol && !isALNUM_LC(*scan))
3561 PL_reg_flags |= RF_tainted;
3563 while (scan < loceol && !isALNUM_LC_utf8((U8*)scan)) {
3564 scan += UTF8SKIP(scan);
3569 while (scan < loceol && isSPACE(*scan))
3574 while (scan < loceol && (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
3575 scan += UTF8SKIP(scan);
3580 PL_reg_flags |= RF_tainted;
3581 while (scan < loceol && isSPACE_LC(*scan))
3585 PL_reg_flags |= RF_tainted;
3587 while (scan < loceol && (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3588 scan += UTF8SKIP(scan);
3593 while (scan < loceol && !isSPACE(*scan))
3598 while (scan < loceol && !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
3599 scan += UTF8SKIP(scan);
3604 PL_reg_flags |= RF_tainted;
3605 while (scan < loceol && !isSPACE_LC(*scan))
3609 PL_reg_flags |= RF_tainted;
3611 while (scan < loceol && !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3612 scan += UTF8SKIP(scan);
3617 while (scan < loceol && isDIGIT(*scan))
3622 while (scan < loceol && swash_fetch(PL_utf8_digit,(U8*)scan)) {
3623 scan += UTF8SKIP(scan);
3629 while (scan < loceol && !isDIGIT(*scan))
3634 while (scan < loceol && !swash_fetch(PL_utf8_digit,(U8*)scan)) {
3635 scan += UTF8SKIP(scan);
3639 default: /* Called on something of 0 width. */
3640 break; /* So match right here or not at all. */
3646 c = scan - PL_reginput;
3651 SV *prop = sv_newmortal();
3654 PerlIO_printf(Perl_debug_log,
3655 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
3656 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
3663 - regrepeat_hard - repeatedly match something, report total lenth and length
3665 * The repeater is supposed to have constant length.
3669 S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
3671 register char *scan;
3672 register char *start;
3673 register char *loceol = PL_regeol;
3675 I32 count = 0, res = 1;
3680 start = PL_reginput;
3682 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
3685 while (start < PL_reginput) {
3687 start += UTF8SKIP(start);
3698 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
3700 *lp = l = PL_reginput - start;
3701 if (max != REG_INFTY && l*max < loceol - scan)
3702 loceol = scan + l*max;
3715 - reginclass - determine if a character falls into a character class
3719 S_reginclass(pTHX_ register regnode *p, register I32 c)
3721 char flags = ANYOF_FLAGS(p);
3725 if (ANYOF_BITMAP_TEST(p, c))
3727 else if (flags & ANYOF_FOLD) {
3729 if (flags & ANYOF_LOCALE) {
3730 PL_reg_flags |= RF_tainted;
3731 cf = PL_fold_locale[c];
3735 if (ANYOF_BITMAP_TEST(p, cf))
3739 if (!match && (flags & ANYOF_CLASS)) {
3740 PL_reg_flags |= RF_tainted;
3742 (ANYOF_CLASS_TEST(p, ANYOF_ALNUM) && isALNUM_LC(c)) ||
3743 (ANYOF_CLASS_TEST(p, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
3744 (ANYOF_CLASS_TEST(p, ANYOF_SPACE) && isSPACE_LC(c)) ||
3745 (ANYOF_CLASS_TEST(p, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
3746 (ANYOF_CLASS_TEST(p, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
3747 (ANYOF_CLASS_TEST(p, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
3748 (ANYOF_CLASS_TEST(p, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
3749 (ANYOF_CLASS_TEST(p, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
3750 (ANYOF_CLASS_TEST(p, ANYOF_ALPHA) && isALPHA_LC(c)) ||
3751 (ANYOF_CLASS_TEST(p, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
3752 (ANYOF_CLASS_TEST(p, ANYOF_ASCII) && isASCII(c)) ||
3753 (ANYOF_CLASS_TEST(p, ANYOF_NASCII) && !isASCII(c)) ||
3754 (ANYOF_CLASS_TEST(p, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
3755 (ANYOF_CLASS_TEST(p, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
3756 (ANYOF_CLASS_TEST(p, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
3757 (ANYOF_CLASS_TEST(p, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
3758 (ANYOF_CLASS_TEST(p, ANYOF_LOWER) && isLOWER_LC(c)) ||
3759 (ANYOF_CLASS_TEST(p, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
3760 (ANYOF_CLASS_TEST(p, ANYOF_PRINT) && isPRINT_LC(c)) ||
3761 (ANYOF_CLASS_TEST(p, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
3762 (ANYOF_CLASS_TEST(p, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
3763 (ANYOF_CLASS_TEST(p, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
3764 (ANYOF_CLASS_TEST(p, ANYOF_UPPER) && isUPPER_LC(c)) ||
3765 (ANYOF_CLASS_TEST(p, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
3766 (ANYOF_CLASS_TEST(p, ANYOF_XDIGIT) && isXDIGIT(c)) ||
3767 (ANYOF_CLASS_TEST(p, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
3768 (ANYOF_CLASS_TEST(p, ANYOF_PSXSPC) && isPSXSPC(c)) ||
3769 (ANYOF_CLASS_TEST(p, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
3770 (ANYOF_CLASS_TEST(p, ANYOF_BLANK) && isBLANK(c)) ||
3771 (ANYOF_CLASS_TEST(p, ANYOF_NBLANK) && !isBLANK(c))
3772 ) /* How's that for a conditional? */
3778 return (flags & ANYOF_INVERT) ? !match : match;
3782 S_reginclassutf8(pTHX_ regnode *f, U8 *p)
3784 char flags = ARG1(f);
3787 SV *rv = (SV*)PL_regdata->data[ARG2(f)];
3788 AV *av = (AV*)SvRV((SV*)rv);
3789 SV *sw = *av_fetch(av, 0, FALSE);
3790 SV *lv = *av_fetch(av, 1, FALSE);
3792 SV *sw = (SV*)PL_regdata->data[ARG2(f)];
3795 if (swash_fetch(sw, p))
3797 else if (flags & ANYOF_FOLD) {
3798 U8 tmpbuf[UTF8_MAXLEN+1];
3799 if (flags & ANYOF_LOCALE) {
3800 PL_reg_flags |= RF_tainted;
3801 uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
3804 uv_to_utf8(tmpbuf, toLOWER_utf8(p));
3805 if (swash_fetch(sw, tmpbuf))
3809 /* UTF8 combined with ANYOF_CLASS is ill-defined. */
3811 return (flags & ANYOF_INVERT) ? !match : match;
3815 S_reghop(pTHX_ U8 *s, I32 off)
3818 while (off-- && s < (U8*)PL_regeol)
3823 if (s > (U8*)PL_bostr) {
3826 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3828 } /* XXX could check well-formedness here */
3836 S_reghopmaybe(pTHX_ U8* s, I32 off)
3839 while (off-- && s < (U8*)PL_regeol)
3846 if (s > (U8*)PL_bostr) {
3849 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3851 } /* XXX could check well-formedness here */
3867 restore_pos(pTHXo_ void *arg)
3869 if (PL_reg_eval_set) {
3870 if (PL_reg_oldsaved) {
3871 PL_reg_re->subbeg = PL_reg_oldsaved;
3872 PL_reg_re->sublen = PL_reg_oldsavedlen;
3873 RX_MATCH_COPIED_on(PL_reg_re);
3875 PL_reg_magic->mg_len = PL_reg_oldpos;
3876 PL_reg_eval_set = 0;
3877 PL_curpm = PL_reg_oldcurpm;