5 * "One Ring to rule them all, One Ring to find them..."
8 /* This file contains functions for executing a regular expression. See
9 * also regcomp.c which funnily enough, contains functions for compiling
10 * a regular expression.
12 * This file is also copied at build time to ext/re/re_exec.c, where
13 * it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
14 * This causes the main functions to be compiled under new names and with
15 * debugging support added, which makes "use re 'debug'" work.
19 /* NOTE: this is derived from Henry Spencer's regexp code, and should not
20 * confused with the original package (see point 3 below). Thanks, Henry!
23 /* Additional note: this code is very heavily munged from Henry's version
24 * in places. In some spots I've traded clarity for efficiency, so don't
25 * blame Henry for some of the lack of readability.
28 /* The names of the functions have been changed from regcomp and
29 * regexec to pregcomp and pregexec in order to avoid conflicts
30 * with the POSIX routines of the same names.
33 #ifdef PERL_EXT_RE_BUILD
34 /* need to replace pregcomp et al, so enable that */
35 # ifndef PERL_IN_XSUB_RE
36 # define PERL_IN_XSUB_RE
38 /* need access to debugger hooks */
39 # if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
44 #ifdef PERL_IN_XSUB_RE
45 /* We *really* need to overwrite these symbols: */
46 # define Perl_regexec_flags my_regexec
47 # define Perl_regdump my_regdump
48 # define Perl_regprop my_regprop
49 # define Perl_re_intuit_start my_re_intuit_start
50 /* *These* symbols are masked to allow static link. */
51 # define Perl_pregexec my_pregexec
52 # define Perl_reginitcolors my_reginitcolors
53 # define Perl_regclass_swash my_regclass_swash
55 # define PERL_NO_GET_CONTEXT
59 * pregcomp and pregexec -- regsub and regerror are not used in perl
61 * Copyright (c) 1986 by University of Toronto.
62 * Written by Henry Spencer. Not derived from licensed software.
64 * Permission is granted to anyone to use this software for any
65 * purpose on any computer system, and to redistribute it freely,
66 * subject to the following restrictions:
68 * 1. The author is not responsible for the consequences of use of
69 * this software, no matter how awful, even if they arise
72 * 2. The origin of this software must not be misrepresented, either
73 * by explicit claim or by omission.
75 * 3. Altered versions must be plainly marked as such, and must not
76 * be misrepresented as being the original software.
78 **** Alterations to Henry's code are...
80 **** Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
81 **** 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
83 **** You may distribute under the terms of either the GNU General Public
84 **** License or the Artistic License, as specified in the README file.
86 * Beware that some of this code is subtly aware of the way operator
87 * precedence is structured in regular expressions. Serious changes in
88 * regular-expression syntax might require a total rethink.
91 #define PERL_IN_REGEXEC_C
96 #define RF_tainted 1 /* tainted information used? */
97 #define RF_warned 2 /* warned about big count? */
98 #define RF_evaled 4 /* Did an EVAL with setting? */
99 #define RF_utf8 8 /* String contains multibyte chars? */
101 #define UTF ((PL_reg_flags & RF_utf8) != 0)
103 #define RS_init 1 /* eval environment created */
104 #define RS_set 2 /* replsv value is set */
107 #define STATIC static
110 #define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
116 #define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
117 #define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
119 #define HOPc(pos,off) ((char *)(PL_reg_match_utf8 \
120 ? reghop3((U8*)pos, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr)) \
122 #define HOPBACKc(pos, off) ((char*) \
123 ((PL_reg_match_utf8) \
124 ? reghopmaybe3((U8*)pos, -off, ((U8*)(off < 0 ? PL_regeol : PL_bostr))) \
125 : (pos - off >= PL_bostr) \
130 #define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
131 #define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
132 #define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
134 #define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
135 if (!CAT2(PL_utf8_,class)) { bool ok; ENTER; save_re_context(); ok=CAT2(is_utf8_,class)((const U8*)str); assert(ok); LEAVE; } } STMT_END
136 #define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS(alnum,"a")
137 #define LOAD_UTF8_CHARCLASS_DIGIT() LOAD_UTF8_CHARCLASS(digit,"0")
138 #define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
139 #define LOAD_UTF8_CHARCLASS_MARK() LOAD_UTF8_CHARCLASS(mark, "\xcd\x86")
141 /* for use after a quantifier and before an EXACT-like node -- japhy */
142 #define JUMPABLE(rn) ( \
143 OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
144 OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
145 OP(rn) == PLUS || OP(rn) == MINMOD || \
146 (PL_regkind[(U8)OP(rn)] == CURLY && ARG1(rn) > 0) \
149 #define HAS_TEXT(rn) ( \
150 PL_regkind[(U8)OP(rn)] == EXACT || PL_regkind[(U8)OP(rn)] == REF \
154 Search for mandatory following text node; for lookahead, the text must
155 follow but for lookbehind (rn->flags != 0) we skip to the next step.
157 #define FIND_NEXT_IMPT(rn) STMT_START { \
158 while (JUMPABLE(rn)) \
159 if (OP(rn) == SUSPEND || PL_regkind[(U8)OP(rn)] == CURLY) \
160 rn = NEXTOPER(NEXTOPER(rn)); \
161 else if (OP(rn) == PLUS) \
163 else if (OP(rn) == IFMATCH) \
164 rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
165 else rn += NEXT_OFF(rn); \
168 static void restore_pos(pTHX_ void *arg);
171 S_regcppush(pTHX_ I32 parenfloor)
174 const int retval = PL_savestack_ix;
175 #define REGCP_PAREN_ELEMS 4
176 const int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
179 if (paren_elems_to_push < 0)
180 Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
182 #define REGCP_OTHER_ELEMS 6
183 SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
184 for (p = PL_regsize; p > parenfloor; p--) {
185 /* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
186 SSPUSHINT(PL_regendp[p]);
187 SSPUSHINT(PL_regstartp[p]);
188 SSPUSHPTR(PL_reg_start_tmp[p]);
191 /* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
192 SSPUSHINT(PL_regsize);
193 SSPUSHINT(*PL_reglastparen);
194 SSPUSHINT(*PL_reglastcloseparen);
195 SSPUSHPTR(PL_reginput);
196 #define REGCP_FRAME_ELEMS 2
197 /* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
198 * are needed for the regexp context stack bookkeeping. */
199 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
200 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
205 /* These are needed since we do not localize EVAL nodes: */
206 # define REGCP_SET(cp) DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, \
207 " Setting an EVAL scope, savestack=%"IVdf"\n", \
208 (IV)PL_savestack_ix)); cp = PL_savestack_ix
210 # define REGCP_UNWIND(cp) DEBUG_EXECUTE_r(cp != PL_savestack_ix ? \
211 PerlIO_printf(Perl_debug_log, \
212 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
213 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
223 GET_RE_DEBUG_FLAGS_DECL;
225 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
227 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
228 i = SSPOPINT; /* Parentheses elements to pop. */
229 input = (char *) SSPOPPTR;
230 *PL_reglastcloseparen = SSPOPINT;
231 *PL_reglastparen = SSPOPINT;
232 PL_regsize = SSPOPINT;
234 /* Now restore the parentheses context. */
235 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
236 i > 0; i -= REGCP_PAREN_ELEMS) {
238 paren = (U32)SSPOPINT;
239 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
240 PL_regstartp[paren] = SSPOPINT;
242 if (paren <= *PL_reglastparen)
243 PL_regendp[paren] = tmps;
245 PerlIO_printf(Perl_debug_log,
246 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
247 (UV)paren, (IV)PL_regstartp[paren],
248 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
249 (IV)PL_regendp[paren],
250 (paren > *PL_reglastparen ? "(no)" : ""));
254 if ((I32)(*PL_reglastparen + 1) <= PL_regnpar) {
255 PerlIO_printf(Perl_debug_log,
256 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
257 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
261 /* It would seem that the similar code in regtry()
262 * already takes care of this, and in fact it is in
263 * a better location to since this code can #if 0-ed out
264 * but the code in regtry() is needed or otherwise tests
265 * requiring null fields (pat.t#187 and split.t#{13,14}
266 * (as of patchlevel 7877) will fail. Then again,
267 * this code seems to be necessary or otherwise
268 * building DynaLoader will fail:
269 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
271 for (paren = *PL_reglastparen + 1; (I32)paren <= PL_regnpar; paren++) {
272 if ((I32)paren > PL_regsize)
273 PL_regstartp[paren] = -1;
274 PL_regendp[paren] = -1;
280 #define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
282 #define TRYPAREN(paren, n, input, where) { \
285 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
286 PL_regendp[paren] = input - PL_bostr; \
289 PL_regendp[paren] = -1; \
291 REGMATCH(next, where); \
295 PL_regendp[paren] = -1; \
300 * pregexec and friends
304 - pregexec - match a regexp against a string
307 Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
308 char *strbeg, I32 minend, SV *screamer, U32 nosave)
309 /* strend: pointer to null at end of string */
310 /* strbeg: real beginning of string */
311 /* minend: end of match must be >=minend after stringarg. */
312 /* nosave: For optimizations. */
315 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
316 nosave ? 0 : REXEC_COPY_STR);
320 S_cache_re(pTHX_ regexp *prog)
323 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
325 PL_regprogram = prog->program;
327 PL_regnpar = prog->nparens;
328 PL_regdata = prog->data;
333 * Need to implement the following flags for reg_anch:
335 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
337 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
338 * INTUIT_AUTORITATIVE_ML
339 * INTUIT_ONCE_NOML - Intuit can match in one location only.
342 * Another flag for this function: SECOND_TIME (so that float substrs
343 * with giant delta may be not rechecked).
346 /* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
348 /* If SCREAM, then SvPVX_const(sv) should be compatible with strpos and strend.
349 Otherwise, only SvCUR(sv) is used to get strbeg. */
351 /* XXXX We assume that strpos is strbeg unless sv. */
353 /* XXXX Some places assume that there is a fixed substring.
354 An update may be needed if optimizer marks as "INTUITable"
355 RExen without fixed substrings. Similarly, it is assumed that
356 lengths of all the strings are no more than minlen, thus they
357 cannot come from lookahead.
358 (Or minlen should take into account lookahead.) */
360 /* A failure to find a constant substring means that there is no need to make
361 an expensive call to REx engine, thus we celebrate a failure. Similarly,
362 finding a substring too deep into the string means that less calls to
363 regtry() should be needed.
365 REx compiler's optimizer found 4 possible hints:
366 a) Anchored substring;
368 c) Whether we are anchored (beginning-of-line or \G);
369 d) First node (of those at offset 0) which may distingush positions;
370 We use a)b)d) and multiline-part of c), and try to find a position in the
371 string which does not contradict any of them.
374 /* Most of decisions we do here should have been done at compile time.
375 The nodes of the REx which we used for the search should have been
376 deleted from the finite automaton. */
379 Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
380 char *strend, U32 flags, re_scream_pos_data *data)
383 register I32 start_shift = 0;
384 /* Should be nonnegative! */
385 register I32 end_shift = 0;
390 const int do_utf8 = sv ? SvUTF8(sv) : 0; /* if no sv we have to assume bytes */
392 register char *other_last = NULL; /* other substr checked before this */
393 char *check_at = NULL; /* check substr found at this pos */
394 const I32 multiline = prog->reganch & PMf_MULTILINE;
396 const char * const i_strpos = strpos;
397 SV * const dsv = PERL_DEBUG_PAD_ZERO(0);
400 GET_RE_DEBUG_FLAGS_DECL;
402 RX_MATCH_UTF8_set(prog,do_utf8);
404 if (prog->reganch & ROPT_UTF8) {
405 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
406 "UTF-8 regex...\n"));
407 PL_reg_flags |= RF_utf8;
411 const char *s = PL_reg_match_utf8 ?
412 sv_uni_display(dsv, sv, 60, UNI_DISPLAY_REGEX) :
414 const int len = PL_reg_match_utf8 ?
415 strlen(s) : strend - strpos;
418 if (PL_reg_match_utf8)
419 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
420 "UTF-8 target...\n"));
421 PerlIO_printf(Perl_debug_log,
422 "%sGuessing start of match, REx%s \"%s%.60s%s%s\" against \"%s%.*s%s%s\"...\n",
423 PL_colors[4], PL_colors[5], PL_colors[0],
426 (strlen(prog->precomp) > 60 ? "..." : ""),
428 (int)(len > 60 ? 60 : len),
430 (len > 60 ? "..." : "")
434 /* CHR_DIST() would be more correct here but it makes things slow. */
435 if (prog->minlen > strend - strpos) {
436 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
437 "String too short... [re_intuit_start]\n"));
440 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
443 if (!prog->check_utf8 && prog->check_substr)
444 to_utf8_substr(prog);
445 check = prog->check_utf8;
447 if (!prog->check_substr && prog->check_utf8)
448 to_byte_substr(prog);
449 check = prog->check_substr;
451 if (check == &PL_sv_undef) {
452 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
453 "Non-utf string cannot match utf check string\n"));
456 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
457 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
458 || ( (prog->reganch & ROPT_ANCH_BOL)
459 && !multiline ) ); /* Check after \n? */
462 if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
463 | ROPT_IMPLICIT)) /* not a real BOL */
464 /* SvCUR is not set on references: SvRV and SvPVX_const overlap */
466 && (strpos != strbeg)) {
467 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
470 if (prog->check_offset_min == prog->check_offset_max &&
471 !(prog->reganch & ROPT_CANY_SEEN)) {
472 /* Substring at constant offset from beg-of-str... */
475 s = HOP3c(strpos, prog->check_offset_min, strend);
477 slen = SvCUR(check); /* >= 1 */
479 if ( strend - s > slen || strend - s < slen - 1
480 || (strend - s == slen && strend[-1] != '\n')) {
481 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
484 /* Now should match s[0..slen-2] */
486 if (slen && (*SvPVX_const(check) != *s
488 && memNE(SvPVX_const(check), s, slen)))) {
490 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
494 else if (*SvPVX_const(check) != *s
495 || ((slen = SvCUR(check)) > 1
496 && memNE(SvPVX_const(check), s, slen)))
499 goto success_at_start;
502 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
504 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
505 end_shift = prog->minlen - start_shift -
506 CHR_SVLEN(check) + (SvTAIL(check) != 0);
508 const I32 end = prog->check_offset_max + CHR_SVLEN(check)
509 - (SvTAIL(check) != 0);
510 const I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
512 if (end_shift < eshift)
516 else { /* Can match at random position */
519 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
520 /* Should be nonnegative! */
521 end_shift = prog->minlen - start_shift -
522 CHR_SVLEN(check) + (SvTAIL(check) != 0);
525 #ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
527 Perl_croak(aTHX_ "panic: end_shift");
531 /* Find a possible match in the region s..strend by looking for
532 the "check" substring in the region corrected by start/end_shift. */
533 if (flags & REXEC_SCREAM) {
534 I32 p = -1; /* Internal iterator of scream. */
535 I32 * const pp = data ? data->scream_pos : &p;
537 if (PL_screamfirst[BmRARE(check)] >= 0
538 || ( BmRARE(check) == '\n'
539 && (BmPREVIOUS(check) == SvCUR(check) - 1)
541 s = screaminstr(sv, check,
542 start_shift + (s - strbeg), end_shift, pp, 0);
545 /* we may be pointing at the wrong string */
546 if (s && RX_MATCH_COPIED(prog))
547 s = strbeg + (s - SvPVX_const(sv));
549 *data->scream_olds = s;
551 else if (prog->reganch & ROPT_CANY_SEEN)
552 s = fbm_instr((U8*)(s + start_shift),
553 (U8*)(strend - end_shift),
554 check, multiline ? FBMrf_MULTILINE : 0);
556 s = fbm_instr(HOP3(s, start_shift, strend),
557 HOP3(strend, -end_shift, strbeg),
558 check, multiline ? FBMrf_MULTILINE : 0);
560 /* Update the count-of-usability, remove useless subpatterns,
563 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s %s substr \"%s%.*s%s\"%s%s",
564 (s ? "Found" : "Did not find"),
565 (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) ? "anchored" : "floating"),
567 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
569 PL_colors[1], (SvTAIL(check) ? "$" : ""),
570 (s ? " at offset " : "...\n") ) );
577 /* Finish the diagnostic message */
578 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
580 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
581 Start with the other substr.
582 XXXX no SCREAM optimization yet - and a very coarse implementation
583 XXXX /ttx+/ results in anchored="ttx", floating="x". floating will
584 *always* match. Probably should be marked during compile...
585 Probably it is right to do no SCREAM here...
588 if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8) : (prog->float_substr && prog->anchored_substr)) {
589 /* Take into account the "other" substring. */
590 /* XXXX May be hopelessly wrong for UTF... */
593 if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
596 char * const last = HOP3c(s, -start_shift, strbeg);
601 t = s - prog->check_offset_max;
602 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
604 || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
609 t = HOP3c(t, prog->anchored_offset, strend);
610 if (t < other_last) /* These positions already checked */
612 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
615 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
616 /* On end-of-str: see comment below. */
617 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
618 if (must == &PL_sv_undef) {
620 DEBUG_EXECUTE_r(must = prog->anchored_utf8); /* for debug */
625 HOP3(HOP3(last1, prog->anchored_offset, strend)
626 + SvCUR(must), -(SvTAIL(must)!=0), strbeg),
628 multiline ? FBMrf_MULTILINE : 0
630 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
631 "%s anchored substr \"%s%.*s%s\"%s",
632 (s ? "Found" : "Contradicts"),
635 - (SvTAIL(must)!=0)),
637 PL_colors[1], (SvTAIL(must) ? "$" : "")));
639 if (last1 >= last2) {
640 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
641 ", giving up...\n"));
644 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
645 ", trying floating at offset %ld...\n",
646 (long)(HOP3c(s1, 1, strend) - i_strpos)));
647 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
648 s = HOP3c(last, 1, strend);
652 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
653 (long)(s - i_strpos)));
654 t = HOP3c(s, -prog->anchored_offset, strbeg);
655 other_last = HOP3c(s, 1, strend);
663 else { /* Take into account the floating substring. */
668 t = HOP3c(s, -start_shift, strbeg);
670 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
671 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
672 last = HOP3c(t, prog->float_max_offset, strend);
673 s = HOP3c(t, prog->float_min_offset, strend);
676 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
677 must = do_utf8 ? prog->float_utf8 : prog->float_substr;
678 /* fbm_instr() takes into account exact value of end-of-str
679 if the check is SvTAIL(ed). Since false positives are OK,
680 and end-of-str is not later than strend we are OK. */
681 if (must == &PL_sv_undef) {
683 DEBUG_EXECUTE_r(must = prog->float_utf8); /* for debug message */
686 s = fbm_instr((unsigned char*)s,
687 (unsigned char*)last + SvCUR(must)
689 must, multiline ? FBMrf_MULTILINE : 0);
690 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s floating substr \"%s%.*s%s\"%s",
691 (s ? "Found" : "Contradicts"),
693 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
695 PL_colors[1], (SvTAIL(must) ? "$" : "")));
698 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
699 ", giving up...\n"));
702 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
703 ", trying anchored starting at offset %ld...\n",
704 (long)(s1 + 1 - i_strpos)));
706 s = HOP3c(t, 1, strend);
710 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
711 (long)(s - i_strpos)));
712 other_last = s; /* Fix this later. --Hugo */
721 t = s - prog->check_offset_max;
722 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
724 || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
726 /* Fixed substring is found far enough so that the match
727 cannot start at strpos. */
729 if (ml_anch && t[-1] != '\n') {
730 /* Eventually fbm_*() should handle this, but often
731 anchored_offset is not 0, so this check will not be wasted. */
732 /* XXXX In the code below we prefer to look for "^" even in
733 presence of anchored substrings. And we search even
734 beyond the found float position. These pessimizations
735 are historical artefacts only. */
737 while (t < strend - prog->minlen) {
739 if (t < check_at - prog->check_offset_min) {
740 if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
741 /* Since we moved from the found position,
742 we definitely contradict the found anchored
743 substr. Due to the above check we do not
744 contradict "check" substr.
745 Thus we can arrive here only if check substr
746 is float. Redo checking for "other"=="fixed".
749 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
750 PL_colors[0], PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
751 goto do_other_anchored;
753 /* We don't contradict the found floating substring. */
754 /* XXXX Why not check for STCLASS? */
756 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
757 PL_colors[0], PL_colors[1], (long)(s - i_strpos)));
760 /* Position contradicts check-string */
761 /* XXXX probably better to look for check-string
762 than for "\n", so one should lower the limit for t? */
763 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
764 PL_colors[0], PL_colors[1], (long)(t + 1 - i_strpos)));
765 other_last = strpos = s = t + 1;
770 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
771 PL_colors[0], PL_colors[1]));
775 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
776 PL_colors[0], PL_colors[1]));
780 ++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr); /* hooray/5 */
783 /* The found string does not prohibit matching at strpos,
784 - no optimization of calling REx engine can be performed,
785 unless it was an MBOL and we are not after MBOL,
786 or a future STCLASS check will fail this. */
788 /* Even in this situation we may use MBOL flag if strpos is offset
789 wrt the start of the string. */
790 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
791 && (strpos != strbeg) && strpos[-1] != '\n'
792 /* May be due to an implicit anchor of m{.*foo} */
793 && !(prog->reganch & ROPT_IMPLICIT))
798 DEBUG_EXECUTE_r( if (ml_anch)
799 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
800 (long)(strpos - i_strpos), PL_colors[0], PL_colors[1]);
803 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
805 prog->check_utf8 /* Could be deleted already */
806 && --BmUSEFUL(prog->check_utf8) < 0
807 && (prog->check_utf8 == prog->float_utf8)
809 prog->check_substr /* Could be deleted already */
810 && --BmUSEFUL(prog->check_substr) < 0
811 && (prog->check_substr == prog->float_substr)
814 /* If flags & SOMETHING - do not do it many times on the same match */
815 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
816 SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
817 if (do_utf8 ? prog->check_substr : prog->check_utf8)
818 SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
819 prog->check_substr = prog->check_utf8 = NULL; /* disable */
820 prog->float_substr = prog->float_utf8 = NULL; /* clear */
821 check = NULL; /* abort */
823 /* XXXX This is a remnant of the old implementation. It
824 looks wasteful, since now INTUIT can use many
826 prog->reganch &= ~RE_USE_INTUIT;
833 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
834 if (prog->regstclass) {
835 /* minlen == 0 is possible if regstclass is \b or \B,
836 and the fixed substr is ''$.
837 Since minlen is already taken into account, s+1 is before strend;
838 accidentally, minlen >= 1 guaranties no false positives at s + 1
839 even for \b or \B. But (minlen? 1 : 0) below assumes that
840 regstclass does not come from lookahead... */
841 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
842 This leaves EXACTF only, which is dealt with in find_byclass(). */
843 const U8* const str = (U8*)STRING(prog->regstclass);
844 const int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
845 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
847 const char * const endpos = (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
848 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
849 : (prog->float_substr || prog->float_utf8
850 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
856 s = find_byclass(prog, prog->regstclass, s, endpos, 1);
859 const char *what = NULL;
861 if (endpos == strend) {
862 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
863 "Could not match STCLASS...\n") );
866 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
867 "This position contradicts STCLASS...\n") );
868 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
870 /* Contradict one of substrings */
871 if (prog->anchored_substr || prog->anchored_utf8) {
872 if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
873 DEBUG_EXECUTE_r( what = "anchored" );
875 s = HOP3c(t, 1, strend);
876 if (s + start_shift + end_shift > strend) {
877 /* XXXX Should be taken into account earlier? */
878 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
879 "Could not match STCLASS...\n") );
884 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
885 "Looking for %s substr starting at offset %ld...\n",
886 what, (long)(s + start_shift - i_strpos)) );
889 /* Have both, check_string is floating */
890 if (t + start_shift >= check_at) /* Contradicts floating=check */
891 goto retry_floating_check;
892 /* Recheck anchored substring, but not floating... */
896 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
897 "Looking for anchored substr starting at offset %ld...\n",
898 (long)(other_last - i_strpos)) );
899 goto do_other_anchored;
901 /* Another way we could have checked stclass at the
902 current position only: */
907 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
908 "Looking for /%s^%s/m starting at offset %ld...\n",
909 PL_colors[0], PL_colors[1], (long)(t - i_strpos)) );
912 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr)) /* Could have been deleted */
914 /* Check is floating subtring. */
915 retry_floating_check:
916 t = check_at - start_shift;
917 DEBUG_EXECUTE_r( what = "floating" );
918 goto hop_and_restart;
921 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
922 "By STCLASS: moving %ld --> %ld\n",
923 (long)(t - i_strpos), (long)(s - i_strpos))
927 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
928 "Does not contradict STCLASS...\n");
933 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
934 PL_colors[4], (check ? "Guessed" : "Giving up"),
935 PL_colors[5], (long)(s - i_strpos)) );
938 fail_finish: /* Substring not found */
939 if (prog->check_substr || prog->check_utf8) /* could be removed already */
940 BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
942 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
943 PL_colors[4], PL_colors[5]));
947 /* We know what class REx starts with. Try to find this position... */
949 S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, const char *strend, I32 norun)
952 const I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
956 register STRLEN uskip;
960 register I32 tmp = 1; /* Scratch variable? */
961 register const bool do_utf8 = PL_reg_match_utf8;
963 /* We know what class it must start with. */
967 while (s + (uskip = UTF8SKIP(s)) <= strend) {
968 if ((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
969 !UTF8_IS_INVARIANT((U8)s[0]) ?
970 reginclass(c, (U8*)s, 0, do_utf8) :
971 REGINCLASS(c, (U8*)s)) {
972 if (tmp && (norun || regtry(prog, s)))
986 if (REGINCLASS(c, (U8*)s) ||
987 (ANYOF_FOLD_SHARP_S(c, s, strend) &&
988 /* The assignment of 2 is intentional:
989 * for the folded sharp s, the skip is 2. */
990 (skip = SHARP_S_SKIP))) {
991 if (tmp && (norun || regtry(prog, s)))
1003 while (s < strend) {
1004 if (tmp && (norun || regtry(prog, s)))
1013 ln = STR_LEN(c); /* length to match in octets/bytes */
1014 lnc = (I32) ln; /* length to match in characters */
1016 STRLEN ulen1, ulen2;
1018 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
1019 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
1020 const U32 uniflags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
1022 to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
1023 to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
1025 c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXBYTES_CASE,
1027 c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXBYTES_CASE,
1030 while (sm < ((U8 *) m + ln)) {
1045 c2 = PL_fold_locale[c1];
1047 e = HOP3c(strend, -((I32)lnc), s);
1050 e = s; /* Due to minlen logic of intuit() */
1052 /* The idea in the EXACTF* cases is to first find the
1053 * first character of the EXACTF* node and then, if
1054 * necessary, case-insensitively compare the full
1055 * text of the node. The c1 and c2 are the first
1056 * characters (though in Unicode it gets a bit
1057 * more complicated because there are more cases
1058 * than just upper and lower: one needs to use
1059 * the so-called folding case for case-insensitive
1060 * matching (called "loose matching" in Unicode).
1061 * ibcmp_utf8() will do just that. */
1065 U8 tmpbuf [UTF8_MAXBYTES+1];
1066 STRLEN len, foldlen;
1067 const U32 uniflags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
1069 /* Upper and lower of 1st char are equal -
1070 * probably not a "letter". */
1072 c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1076 ibcmp_utf8(s, (char **)0, 0, do_utf8,
1077 m, (char **)0, ln, (bool)UTF))
1078 && (norun || regtry(prog, s)) )
1081 U8 foldbuf[UTF8_MAXBYTES_CASE+1];
1082 uvchr_to_utf8(tmpbuf, c);
1083 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
1085 && (f == c1 || f == c2)
1086 && (ln == foldlen ||
1087 !ibcmp_utf8((char *) foldbuf,
1088 (char **)0, foldlen, do_utf8,
1090 (char **)0, ln, (bool)UTF))
1091 && (norun || regtry(prog, s)) )
1099 c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1102 /* Handle some of the three Greek sigmas cases.
1103 * Note that not all the possible combinations
1104 * are handled here: some of them are handled
1105 * by the standard folding rules, and some of
1106 * them (the character class or ANYOF cases)
1107 * are handled during compiletime in
1108 * regexec.c:S_regclass(). */
1109 if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1110 c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1111 c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
1113 if ( (c == c1 || c == c2)
1115 ibcmp_utf8(s, (char **)0, 0, do_utf8,
1116 m, (char **)0, ln, (bool)UTF))
1117 && (norun || regtry(prog, s)) )
1120 U8 foldbuf[UTF8_MAXBYTES_CASE+1];
1121 uvchr_to_utf8(tmpbuf, c);
1122 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
1124 && (f == c1 || f == c2)
1125 && (ln == foldlen ||
1126 !ibcmp_utf8((char *) foldbuf,
1127 (char **)0, foldlen, do_utf8,
1129 (char **)0, ln, (bool)UTF))
1130 && (norun || regtry(prog, s)) )
1141 && (ln == 1 || !(OP(c) == EXACTF
1143 : ibcmp_locale(s, m, ln)))
1144 && (norun || regtry(prog, s)) )
1150 if ( (*(U8*)s == c1 || *(U8*)s == c2)
1151 && (ln == 1 || !(OP(c) == EXACTF
1153 : ibcmp_locale(s, m, ln)))
1154 && (norun || regtry(prog, s)) )
1161 PL_reg_flags |= RF_tainted;
1168 U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1169 tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
1171 tmp = ((OP(c) == BOUND ?
1172 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1173 LOAD_UTF8_CHARCLASS_ALNUM();
1174 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1175 if (tmp == !(OP(c) == BOUND ?
1176 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1177 isALNUM_LC_utf8((U8*)s)))
1180 if ((norun || regtry(prog, s)))
1187 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1188 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1189 while (s < strend) {
1191 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1193 if ((norun || regtry(prog, s)))
1199 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
1203 PL_reg_flags |= RF_tainted;
1210 U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1211 tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
1213 tmp = ((OP(c) == NBOUND ?
1214 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1215 LOAD_UTF8_CHARCLASS_ALNUM();
1216 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1217 if (tmp == !(OP(c) == NBOUND ?
1218 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1219 isALNUM_LC_utf8((U8*)s)))
1221 else if ((norun || regtry(prog, s)))
1227 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1228 tmp = ((OP(c) == NBOUND ?
1229 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1230 while (s < strend) {
1232 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1234 else if ((norun || regtry(prog, s)))
1239 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
1244 LOAD_UTF8_CHARCLASS_ALNUM();
1245 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1246 if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
1247 if (tmp && (norun || regtry(prog, s)))
1258 while (s < strend) {
1260 if (tmp && (norun || regtry(prog, s)))
1272 PL_reg_flags |= RF_tainted;
1274 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1275 if (isALNUM_LC_utf8((U8*)s)) {
1276 if (tmp && (norun || regtry(prog, s)))
1287 while (s < strend) {
1288 if (isALNUM_LC(*s)) {
1289 if (tmp && (norun || regtry(prog, s)))
1302 LOAD_UTF8_CHARCLASS_ALNUM();
1303 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1304 if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
1305 if (tmp && (norun || regtry(prog, s)))
1316 while (s < strend) {
1318 if (tmp && (norun || regtry(prog, s)))
1330 PL_reg_flags |= RF_tainted;
1332 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1333 if (!isALNUM_LC_utf8((U8*)s)) {
1334 if (tmp && (norun || regtry(prog, s)))
1345 while (s < strend) {
1346 if (!isALNUM_LC(*s)) {
1347 if (tmp && (norun || regtry(prog, s)))
1360 LOAD_UTF8_CHARCLASS_SPACE();
1361 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1362 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
1363 if (tmp && (norun || regtry(prog, s)))
1374 while (s < strend) {
1376 if (tmp && (norun || regtry(prog, s)))
1388 PL_reg_flags |= RF_tainted;
1390 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1391 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1392 if (tmp && (norun || regtry(prog, s)))
1403 while (s < strend) {
1404 if (isSPACE_LC(*s)) {
1405 if (tmp && (norun || regtry(prog, s)))
1418 LOAD_UTF8_CHARCLASS_SPACE();
1419 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1420 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
1421 if (tmp && (norun || regtry(prog, s)))
1432 while (s < strend) {
1434 if (tmp && (norun || regtry(prog, s)))
1446 PL_reg_flags |= RF_tainted;
1448 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1449 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1450 if (tmp && (norun || regtry(prog, s)))
1461 while (s < strend) {
1462 if (!isSPACE_LC(*s)) {
1463 if (tmp && (norun || regtry(prog, s)))
1476 LOAD_UTF8_CHARCLASS_DIGIT();
1477 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1478 if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
1479 if (tmp && (norun || regtry(prog, s)))
1490 while (s < strend) {
1492 if (tmp && (norun || regtry(prog, s)))
1504 PL_reg_flags |= RF_tainted;
1506 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1507 if (isDIGIT_LC_utf8((U8*)s)) {
1508 if (tmp && (norun || regtry(prog, s)))
1519 while (s < strend) {
1520 if (isDIGIT_LC(*s)) {
1521 if (tmp && (norun || regtry(prog, s)))
1534 LOAD_UTF8_CHARCLASS_DIGIT();
1535 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1536 if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
1537 if (tmp && (norun || regtry(prog, s)))
1548 while (s < strend) {
1550 if (tmp && (norun || regtry(prog, s)))
1562 PL_reg_flags |= RF_tainted;
1564 while (s + (uskip = UTF8SKIP(s)) <= strend) {
1565 if (!isDIGIT_LC_utf8((U8*)s)) {
1566 if (tmp && (norun || regtry(prog, s)))
1577 while (s < strend) {
1578 if (!isDIGIT_LC(*s)) {
1579 if (tmp && (norun || regtry(prog, s)))
1591 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1600 - regexec_flags - match a regexp against a string
1603 Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1604 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1605 /* strend: pointer to null at end of string */
1606 /* strbeg: real beginning of string */
1607 /* minend: end of match must be >=minend after stringarg. */
1608 /* data: May be used for some additional optimizations. */
1609 /* nosave: For optimizations. */
1613 register regnode *c;
1614 register char *startpos = stringarg;
1615 I32 minlen; /* must match at least this many chars */
1616 I32 dontbother = 0; /* how many characters not to try at end */
1617 I32 end_shift = 0; /* Same for the end. */ /* CC */
1618 I32 scream_pos = -1; /* Internal iterator of scream. */
1619 char *scream_olds = NULL;
1620 SV* oreplsv = GvSV(PL_replgv);
1621 const bool do_utf8 = DO_UTF8(sv);
1622 const I32 multiline = prog->reganch & PMf_MULTILINE;
1624 SV * const dsv0 = PERL_DEBUG_PAD_ZERO(0);
1625 SV * const dsv1 = PERL_DEBUG_PAD_ZERO(1);
1628 GET_RE_DEBUG_FLAGS_DECL;
1630 PERL_UNUSED_ARG(data);
1631 RX_MATCH_UTF8_set(prog,do_utf8);
1635 PL_regnarrate = DEBUG_r_TEST;
1638 /* Be paranoid... */
1639 if (prog == NULL || startpos == NULL) {
1640 Perl_croak(aTHX_ "NULL regexp parameter");
1644 minlen = prog->minlen;
1645 if (strend - startpos < minlen) {
1646 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
1647 "String too short [regexec_flags]...\n"));
1651 /* Check validity of program. */
1652 if (UCHARAT(prog->program) != REG_MAGIC) {
1653 Perl_croak(aTHX_ "corrupted regexp program");
1657 PL_reg_eval_set = 0;
1660 if (prog->reganch & ROPT_UTF8)
1661 PL_reg_flags |= RF_utf8;
1663 /* Mark beginning of line for ^ and lookbehind. */
1664 PL_regbol = startpos;
1668 /* Mark end of line for $ (and such) */
1671 /* see how far we have to get to not match where we matched before */
1672 PL_regtill = startpos+minend;
1674 /* We start without call_cc context. */
1677 /* If there is a "must appear" string, look for it. */
1680 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1683 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1684 PL_reg_ganch = startpos;
1685 else if (sv && SvTYPE(sv) >= SVt_PVMG
1687 && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1688 && mg->mg_len >= 0) {
1689 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1690 if (prog->reganch & ROPT_ANCH_GPOS) {
1691 if (s > PL_reg_ganch)
1696 else /* pos() not defined */
1697 PL_reg_ganch = strbeg;
1700 if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL || prog->check_utf8 != NULL)) {
1701 re_scream_pos_data d;
1703 d.scream_olds = &scream_olds;
1704 d.scream_pos = &scream_pos;
1705 s = re_intuit_start(prog, sv, s, strend, flags, &d);
1707 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
1708 goto phooey; /* not present */
1713 const char * const s0 = UTF
1714 ? pv_uni_display(dsv0, (U8*)prog->precomp, prog->prelen, 60,
1717 const int len0 = UTF ? SvCUR(dsv0) : prog->prelen;
1718 const char * const s1 = do_utf8 ? sv_uni_display(dsv1, sv, 60,
1719 UNI_DISPLAY_REGEX) : startpos;
1720 const int len1 = do_utf8 ? SvCUR(dsv1) : strend - startpos;
1723 PerlIO_printf(Perl_debug_log,
1724 "%sMatching REx%s \"%s%*.*s%s%s\" against \"%s%.*s%s%s\"\n",
1725 PL_colors[4], PL_colors[5], PL_colors[0],
1728 len0 > 60 ? "..." : "",
1730 (int)(len1 > 60 ? 60 : len1),
1732 (len1 > 60 ? "..." : "")
1736 /* Simplest case: anchored match need be tried only once. */
1737 /* [unless only anchor is BOL and multiline is set] */
1738 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1739 if (s == startpos && regtry(prog, startpos))
1741 else if (multiline || (prog->reganch & ROPT_IMPLICIT)
1742 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1747 dontbother = minlen - 1;
1748 end = HOP3c(strend, -dontbother, strbeg) - 1;
1749 /* for multiline we only have to try after newlines */
1750 if (prog->check_substr || prog->check_utf8) {
1754 if (regtry(prog, s))
1759 if (prog->reganch & RE_USE_INTUIT) {
1760 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1771 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1772 if (regtry(prog, s))
1779 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1780 if (regtry(prog, PL_reg_ganch))
1785 /* Messy cases: unanchored match. */
1786 if ((prog->anchored_substr || prog->anchored_utf8) && prog->reganch & ROPT_SKIP) {
1787 /* we have /x+whatever/ */
1788 /* it must be a one character string (XXXX Except UTF?) */
1793 if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1794 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1795 ch = SvPVX_const(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
1798 while (s < strend) {
1800 DEBUG_EXECUTE_r( did_match = 1 );
1801 if (regtry(prog, s)) goto got_it;
1803 while (s < strend && *s == ch)
1810 while (s < strend) {
1812 DEBUG_EXECUTE_r( did_match = 1 );
1813 if (regtry(prog, s)) goto got_it;
1815 while (s < strend && *s == ch)
1821 DEBUG_EXECUTE_r(if (!did_match)
1822 PerlIO_printf(Perl_debug_log,
1823 "Did not find anchored character...\n")
1826 else if (prog->anchored_substr != NULL
1827 || prog->anchored_utf8 != NULL
1828 || ((prog->float_substr != NULL || prog->float_utf8 != NULL)
1829 && prog->float_max_offset < strend - s)) {
1834 char *last1; /* Last position checked before */
1838 if (prog->anchored_substr || prog->anchored_utf8) {
1839 if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1840 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1841 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
1842 back_max = back_min = prog->anchored_offset;
1844 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1845 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1846 must = do_utf8 ? prog->float_utf8 : prog->float_substr;
1847 back_max = prog->float_max_offset;
1848 back_min = prog->float_min_offset;
1850 if (must == &PL_sv_undef)
1851 /* could not downgrade utf8 check substring, so must fail */
1854 last = HOP3c(strend, /* Cannot start after this */
1855 -(I32)(CHR_SVLEN(must)
1856 - (SvTAIL(must) != 0) + back_min), strbeg);
1859 last1 = HOPc(s, -1);
1861 last1 = s - 1; /* bogus */
1863 /* XXXX check_substr already used to find "s", can optimize if
1864 check_substr==must. */
1866 dontbother = end_shift;
1867 strend = HOPc(strend, -dontbother);
1868 while ( (s <= last) &&
1869 ((flags & REXEC_SCREAM)
1870 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
1871 end_shift, &scream_pos, 0))
1872 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
1873 (unsigned char*)strend, must,
1874 multiline ? FBMrf_MULTILINE : 0))) ) {
1875 /* we may be pointing at the wrong string */
1876 if ((flags & REXEC_SCREAM) && RX_MATCH_COPIED(prog))
1877 s = strbeg + (s - SvPVX_const(sv));
1878 DEBUG_EXECUTE_r( did_match = 1 );
1879 if (HOPc(s, -back_max) > last1) {
1880 last1 = HOPc(s, -back_min);
1881 s = HOPc(s, -back_max);
1884 char * const t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1886 last1 = HOPc(s, -back_min);
1890 while (s <= last1) {
1891 if (regtry(prog, s))
1897 while (s <= last1) {
1898 if (regtry(prog, s))
1904 DEBUG_EXECUTE_r(if (!did_match)
1905 PerlIO_printf(Perl_debug_log,
1906 "Did not find %s substr \"%s%.*s%s\"%s...\n",
1907 ((must == prog->anchored_substr || must == prog->anchored_utf8)
1908 ? "anchored" : "floating"),
1910 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1912 PL_colors[1], (SvTAIL(must) ? "$" : ""))
1916 else if ((c = prog->regstclass)) {
1918 I32 op = (U8)OP(prog->regstclass);
1919 /* don't bother with what can't match */
1920 if (PL_regkind[op] != EXACT && op != CANY)
1921 strend = HOPc(strend, -(minlen - 1));
1924 SV *prop = sv_newmortal();
1932 pv_uni_display(dsv0, (U8*)SvPVX_const(prop), SvCUR(prop), 60,
1933 UNI_DISPLAY_REGEX) :
1935 len0 = UTF ? SvCUR(dsv0) : SvCUR(prop);
1937 sv_uni_display(dsv1, sv, 60, UNI_DISPLAY_REGEX) : s;
1938 len1 = UTF ? SvCUR(dsv1) : strend - s;
1939 PerlIO_printf(Perl_debug_log,
1940 "Matching stclass \"%*.*s\" against \"%*.*s\"\n",
1944 if (find_byclass(prog, c, s, strend, 0))
1946 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
1950 if (prog->float_substr != NULL || prog->float_utf8 != NULL) {
1955 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1956 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1957 float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
1959 if (flags & REXEC_SCREAM) {
1960 last = screaminstr(sv, float_real, s - strbeg,
1961 end_shift, &scream_pos, 1); /* last one */
1963 last = scream_olds; /* Only one occurrence. */
1964 /* we may be pointing at the wrong string */
1965 else if (RX_MATCH_COPIED(prog))
1966 s = strbeg + (s - SvPVX_const(sv));
1970 const char * const little = SvPV_const(float_real, len);
1972 if (SvTAIL(float_real)) {
1973 if (memEQ(strend - len + 1, little, len - 1))
1974 last = strend - len + 1;
1975 else if (!multiline)
1976 last = memEQ(strend - len, little, len)
1977 ? strend - len : NULL;
1983 last = rninstr(s, strend, little, little + len);
1985 last = strend; /* matching "$" */
1989 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
1990 "%sCan't trim the tail, match fails (should not happen)%s\n",
1991 PL_colors[4], PL_colors[5]));
1992 goto phooey; /* Should not happen! */
1994 dontbother = strend - last + prog->float_min_offset;
1996 if (minlen && (dontbother < minlen))
1997 dontbother = minlen - 1;
1998 strend -= dontbother; /* this one's always in bytes! */
1999 /* We don't know much -- general case. */
2002 if (regtry(prog, s))
2011 if (regtry(prog, s))
2013 } while (s++ < strend);
2021 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
2023 if (PL_reg_eval_set) {
2024 /* Preserve the current value of $^R */
2025 if (oreplsv != GvSV(PL_replgv))
2026 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
2027 restored, the value remains
2029 restore_pos(aTHX_ 0);
2032 /* make sure $`, $&, $', and $digit will work later */
2033 if ( !(flags & REXEC_NOT_FIRST) ) {
2034 RX_MATCH_COPY_FREE(prog);
2035 if (flags & REXEC_COPY_STR) {
2036 I32 i = PL_regeol - startpos + (stringarg - strbeg);
2037 #ifdef PERL_OLD_COPY_ON_WRITE
2039 || (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
2041 PerlIO_printf(Perl_debug_log,
2042 "Copy on write: regexp capture, type %d\n",
2045 prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
2046 prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
2047 assert (SvPOKp(prog->saved_copy));
2051 RX_MATCH_COPIED_on(prog);
2052 s = savepvn(strbeg, i);
2058 prog->subbeg = strbeg;
2059 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
2066 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
2067 PL_colors[4], PL_colors[5]));
2068 if (PL_reg_eval_set)
2069 restore_pos(aTHX_ 0);
2074 - regtry - try match at specific point
2076 STATIC I32 /* 0 failure, 1 success */
2077 S_regtry(pTHX_ regexp *prog, char *startpos)
2084 GET_RE_DEBUG_FLAGS_DECL;
2087 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
2089 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
2092 PL_reg_eval_set = RS_init;
2093 DEBUG_EXECUTE_r(DEBUG_s(
2094 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
2095 (IV)(PL_stack_sp - PL_stack_base));
2097 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
2098 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2099 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
2101 /* Apparently this is not needed, judging by wantarray. */
2102 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
2103 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2106 /* Make $_ available to executed code. */
2107 if (PL_reg_sv != DEFSV) {
2112 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
2113 && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
2114 /* prepare for quick setting of pos */
2115 #ifdef PERL_OLD_COPY_ON_WRITE
2117 sv_force_normal_flags(sv, 0);
2119 mg = sv_magicext(PL_reg_sv, (SV*)0, PERL_MAGIC_regex_global,
2120 &PL_vtbl_mglob, NULL, 0);
2124 PL_reg_oldpos = mg->mg_len;
2125 SAVEDESTRUCTOR_X(restore_pos, 0);
2127 if (!PL_reg_curpm) {
2128 Newxz(PL_reg_curpm, 1, PMOP);
2131 SV* repointer = newSViv(0);
2132 /* so we know which PL_regex_padav element is PL_reg_curpm */
2133 SvFLAGS(repointer) |= SVf_BREAK;
2134 av_push(PL_regex_padav,repointer);
2135 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2136 PL_regex_pad = AvARRAY(PL_regex_padav);
2140 PM_SETRE(PL_reg_curpm, prog);
2141 PL_reg_oldcurpm = PL_curpm;
2142 PL_curpm = PL_reg_curpm;
2143 if (RX_MATCH_COPIED(prog)) {
2144 /* Here is a serious problem: we cannot rewrite subbeg,
2145 since it may be needed if this match fails. Thus
2146 $` inside (?{}) could fail... */
2147 PL_reg_oldsaved = prog->subbeg;
2148 PL_reg_oldsavedlen = prog->sublen;
2149 #ifdef PERL_OLD_COPY_ON_WRITE
2150 PL_nrs = prog->saved_copy;
2152 RX_MATCH_COPIED_off(prog);
2155 PL_reg_oldsaved = NULL;
2156 prog->subbeg = PL_bostr;
2157 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2159 prog->startp[0] = startpos - PL_bostr;
2160 PL_reginput = startpos;
2161 PL_regstartp = prog->startp;
2162 PL_regendp = prog->endp;
2163 PL_reglastparen = &prog->lastparen;
2164 PL_reglastcloseparen = &prog->lastcloseparen;
2165 prog->lastparen = 0;
2166 prog->lastcloseparen = 0;
2168 DEBUG_EXECUTE_r(PL_reg_starttry = startpos);
2169 if (PL_reg_start_tmpl <= prog->nparens) {
2170 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2171 if(PL_reg_start_tmp)
2172 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2174 Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2177 /* XXXX What this code is doing here?!!! There should be no need
2178 to do this again and again, PL_reglastparen should take care of
2181 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2182 * Actually, the code in regcppop() (which Ilya may be meaning by
2183 * PL_reglastparen), is not needed at all by the test suite
2184 * (op/regexp, op/pat, op/split), but that code is needed, oddly
2185 * enough, for building DynaLoader, or otherwise this
2186 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
2187 * will happen. Meanwhile, this code *is* needed for the
2188 * above-mentioned test suite tests to succeed. The common theme
2189 * on those tests seems to be returning null fields from matches.
2194 if (prog->nparens) {
2195 for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
2202 if (regmatch(prog->program + 1)) {
2203 prog->endp[0] = PL_reginput - PL_bostr;
2206 REGCP_UNWIND(lastcp);
2210 #define RE_UNWIND_BRANCH 1
2211 #define RE_UNWIND_BRANCHJ 2
2215 typedef struct { /* XX: makes sense to enlarge it... */
2219 } re_unwind_generic_t;
2232 } re_unwind_branch_t;
2234 typedef union re_unwind_t {
2236 re_unwind_generic_t generic;
2237 re_unwind_branch_t branch;
2240 #define sayYES goto yes
2241 #define sayNO goto no
2242 #define sayNO_ANYOF goto no_anyof
2243 #define sayYES_FINAL goto yes_final
2244 #define sayYES_LOUD goto yes_loud
2245 #define sayNO_FINAL goto no_final
2246 #define sayNO_SILENT goto do_no
2247 #define saySAME(x) if (x) goto yes; else goto no
2249 #define POSCACHE_SUCCESS 0 /* caching success rather than failure */
2250 #define POSCACHE_SEEN 1 /* we know what we're caching */
2251 #define POSCACHE_START 2 /* the real cache: this bit maps to pos 0 */
2252 #define CACHEsayYES STMT_START { \
2253 if (st->cache_offset | st->cache_bit) { \
2254 if (!(PL_reg_poscache[0] & (1<<POSCACHE_SEEN))) \
2255 PL_reg_poscache[0] |= (1<<POSCACHE_SUCCESS) || (1<<POSCACHE_SEEN); \
2256 else if (!(PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))) { \
2257 /* cache records failure, but this is success */ \
2259 PerlIO_printf(Perl_debug_log, \
2260 "%*s (remove success from failure cache)\n", \
2261 REPORT_CODE_OFF+PL_regindent*2, "") \
2263 PL_reg_poscache[st->cache_offset] &= ~(1<<st->cache_bit); \
2268 #define CACHEsayNO STMT_START { \
2269 if (st->cache_offset | st->cache_bit) { \
2270 if (!(PL_reg_poscache[0] & (1<<POSCACHE_SEEN))) \
2271 PL_reg_poscache[0] |= (1<<POSCACHE_SEEN); \
2272 else if ((PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))) { \
2273 /* cache records success, but this is failure */ \
2275 PerlIO_printf(Perl_debug_log, \
2276 "%*s (remove failure from success cache)\n", \
2277 REPORT_CODE_OFF+PL_regindent*2, "") \
2279 PL_reg_poscache[st->cache_offset] &= ~(1<<st->cache_bit); \
2285 /* this is used to determine how far from the left messages like
2286 'failed...' are printed. Currently 29 makes these messages line
2287 up with the opcode they refer to. Earlier perls used 25 which
2288 left these messages outdented making reviewing a debug output
2291 #define REPORT_CODE_OFF 29
2294 /* Make sure there is a test for this +1 options in re_tests */
2295 #define TRIE_INITAL_ACCEPT_BUFFLEN 4;
2297 /* grab a new slab and return the first slot in it */
2299 STATIC regmatch_state *
2302 regmatch_slab *s = PL_regmatch_slab->next;
2304 Newx(s, 1, regmatch_slab);
2305 s->prev = PL_regmatch_slab;
2307 PL_regmatch_slab->next = s;
2309 PL_regmatch_slab = s;
2310 return &s->states[0];
2313 /* simulate a recursive call to regmatch */
2315 #define REGMATCH(ns, where) \
2318 st->resume_state = resume_##where; \
2319 goto start_recurse; \
2320 resume_point_##where:
2323 - regmatch - main matching routine
2325 * Conceptually the strategy is simple: check to see whether the current
2326 * node matches, call self recursively to see whether the rest matches,
2327 * and then act accordingly. In practice we make some effort to avoid
2328 * recursion, in particular by going through "ordinary" nodes (that don't
2329 * need to know whether the rest of the match failed) by a loop instead of
2332 /* [lwall] I've hoisted the register declarations to the outer block in order to
2333 * maybe save a little bit of pushing and popping on the stack. It also takes
2334 * advantage of machines that use a register save mask on subroutine entry.
2336 * This function used to be heavily recursive, but since this had the
2337 * effect of blowing the CPU stack on complex regexes, it has been
2338 * restructured to be iterative, and to save state onto the heap rather
2339 * than the stack. Essentially whereever regmatch() used to be called, it
2340 * pushes the current state, notes where to return, then jumps back into
2343 * Originally the structure of this function used to look something like
2348 while (scan != NULL) {
2349 a++; // do stuff with a and b
2355 if (regmatch(...)) // recurse
2365 * Now it looks something like this:
2373 regmatch_state *st = new();
2375 st->a++; // do stuff with a and b
2377 while (scan != NULL) {
2385 st->resume_state = resume_FOO;
2386 goto start_recurse; // recurse
2395 st = new(); push a new state
2396 st->a = 1; st->b = 2;
2403 switch (resume_state) {
2405 goto resume_point_FOO;
2412 * WARNING: this means that any line in this function that contains a
2413 * REGMATCH() or TRYPAREN() is actually simulating a recursive call to
2414 * regmatch() using gotos instead. Thus the values of any local variables
2415 * not saved in the regmatch_state structure will have been lost when
2416 * execution resumes on the next line .
2418 * States (ie the st pointer) are allocated in slabs of about 4K in size.
2419 * PL_regmatch_state always points to the currently active state, and
2420 * PL_regmatch_slab points to the slab currently containing PL_regmatch_state.
2421 * The first time regmatch is called, the first slab is allocated, and is
2422 * never freed until interpreter desctruction. When the slab is full,
2423 * a new one is allocated chained to the end. At exit from regmatch, slabs
2424 * allocated since entry are freed.
2428 STATIC I32 /* 0 failure, 1 success */
2429 S_regmatch(pTHX_ regnode *prog)
2432 register const bool do_utf8 = PL_reg_match_utf8;
2433 const U32 uniflags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
2435 regmatch_slab *orig_slab;
2436 regmatch_state *orig_state;
2438 /* the current state. This is a cached copy of PL_regmatch_state */
2439 register regmatch_state *st;
2441 /* cache heavy used fields of st in registers */
2442 register regnode *scan;
2443 register regnode *next;
2444 register I32 n = 0; /* initialize to shut up compiler warning */
2445 register char *locinput = PL_reginput;
2447 /* these variables are NOT saved during a recusive RFEGMATCH: */
2448 register I32 nextchr; /* is always set to UCHARAT(locinput) */
2449 bool result; /* return value of S_regmatch */
2450 regnode *inner; /* Next node in internal branch. */
2451 int depth = 0; /* depth of recursion */
2454 SV *re_debug_flags = NULL;
2459 /* on first ever call to regmatch, allocate first slab */
2460 if (!PL_regmatch_slab) {
2461 Newx(PL_regmatch_slab, 1, regmatch_slab);
2462 PL_regmatch_slab->prev = NULL;
2463 PL_regmatch_slab->next = NULL;
2464 PL_regmatch_state = &PL_regmatch_slab->states[0] - 1;
2467 /* remember current high-water mark for exit */
2468 /* XXX this should be done with SAVE* instead */
2469 orig_slab = PL_regmatch_slab;
2470 orig_state = PL_regmatch_state;
2472 /* grab next free state slot */
2473 st = ++PL_regmatch_state;
2474 if (st > &(PL_regmatch_slab->states[PERL_REGMATCH_SLAB_SLOTS-1]))
2475 st = PL_regmatch_state = S_push_slab(aTHX);
2482 /* Note that nextchr is a byte even in UTF */
2483 nextchr = UCHARAT(locinput);
2485 while (scan != NULL) {
2488 SV * const prop = sv_newmortal();
2489 const int docolor = *PL_colors[0];
2490 const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
2491 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
2492 /* The part of the string before starttry has one color
2493 (pref0_len chars), between starttry and current
2494 position another one (pref_len - pref0_len chars),
2495 after the current position the third one.
2496 We assume that pref0_len <= pref_len, otherwise we
2497 decrease pref0_len. */
2498 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
2499 ? (5 + taill) - l : locinput - PL_bostr;
2502 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2504 pref0_len = pref_len - (locinput - PL_reg_starttry);
2505 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
2506 l = ( PL_regeol - locinput > (5 + taill) - pref_len
2507 ? (5 + taill) - pref_len : PL_regeol - locinput);
2508 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2512 if (pref0_len > pref_len)
2513 pref0_len = pref_len;
2514 regprop(prop, scan);
2516 const char * const s0 =
2517 do_utf8 && OP(scan) != CANY ?
2518 pv_uni_display(PERL_DEBUG_PAD(0), (U8*)(locinput - pref_len),
2519 pref0_len, 60, UNI_DISPLAY_REGEX) :
2520 locinput - pref_len;
2521 const int len0 = do_utf8 ? strlen(s0) : pref0_len;
2522 const char * const s1 = do_utf8 && OP(scan) != CANY ?
2523 pv_uni_display(PERL_DEBUG_PAD(1),
2524 (U8*)(locinput - pref_len + pref0_len),
2525 pref_len - pref0_len, 60, UNI_DISPLAY_REGEX) :
2526 locinput - pref_len + pref0_len;
2527 const int len1 = do_utf8 ? strlen(s1) : pref_len - pref0_len;
2528 const char * const s2 = do_utf8 && OP(scan) != CANY ?
2529 pv_uni_display(PERL_DEBUG_PAD(2), (U8*)locinput,
2530 PL_regeol - locinput, 60, UNI_DISPLAY_REGEX) :
2532 const int len2 = do_utf8 ? strlen(s2) : l;
2533 PerlIO_printf(Perl_debug_log,
2534 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
2535 (IV)(locinput - PL_bostr),
2542 (docolor ? "" : "> <"),
2546 15 - l - pref_len + 1,
2548 (IV)(scan - PL_regprogram), PL_regindent*2, "",
2553 next = scan + NEXT_OFF(scan);
2559 if (locinput == PL_bostr)
2561 /* regtill = regbol; */
2566 if (locinput == PL_bostr ||
2567 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
2573 if (locinput == PL_bostr)
2577 if (locinput == PL_reg_ganch)
2583 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2588 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2590 if (PL_regeol - locinput > 1)
2594 if (PL_regeol != locinput)
2598 if (!nextchr && locinput >= PL_regeol)
2601 locinput += PL_utf8skip[nextchr];
2602 if (locinput > PL_regeol)
2604 nextchr = UCHARAT(locinput);
2607 nextchr = UCHARAT(++locinput);
2610 if (!nextchr && locinput >= PL_regeol)
2612 nextchr = UCHARAT(++locinput);
2615 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2618 locinput += PL_utf8skip[nextchr];
2619 if (locinput > PL_regeol)
2621 nextchr = UCHARAT(locinput);
2624 nextchr = UCHARAT(++locinput);
2630 traverse the TRIE keeping track of all accepting states
2631 we transition through until we get to a failing node.
2639 U8 *uc = ( U8* )locinput;
2646 U8 *uscan = (U8*)NULL;
2648 SV *sv_accept_buff = NULL;
2649 const enum { trie_plain, trie_utf8, trie_uft8_fold }
2650 trie_type = do_utf8 ?
2651 (OP(scan) == TRIE ? trie_utf8 : trie_uft8_fold)
2654 /* what trie are we using right now */
2656 = (reg_trie_data*)PL_regdata->data[ ARG( scan ) ];
2657 st->accepted = 0; /* how many accepting states we have seen */
2660 while ( state && uc <= (U8*)PL_regeol ) {
2662 if (trie->states[ state ].wordnum) {
2663 if (!st->accepted ) {
2666 bufflen = TRIE_INITAL_ACCEPT_BUFFLEN;
2667 sv_accept_buff=newSV(bufflen *
2668 sizeof(reg_trie_accepted) - 1);
2669 SvCUR_set(sv_accept_buff,
2670 sizeof(reg_trie_accepted));
2671 SvPOK_on(sv_accept_buff);
2672 sv_2mortal(sv_accept_buff);
2674 (reg_trie_accepted*)SvPV_nolen(sv_accept_buff );
2677 if (st->accepted >= bufflen) {
2679 st->accept_buff =(reg_trie_accepted*)
2680 SvGROW(sv_accept_buff,
2681 bufflen * sizeof(reg_trie_accepted));
2683 SvCUR_set(sv_accept_buff,SvCUR(sv_accept_buff)
2684 + sizeof(reg_trie_accepted));
2686 st->accept_buff[st->accepted].wordnum = trie->states[state].wordnum;
2687 st->accept_buff[st->accepted].endpos = uc;
2691 base = trie->states[ state ].trans.base;
2693 DEBUG_TRIE_EXECUTE_r(
2694 PerlIO_printf( Perl_debug_log,
2695 "%*s %sState: %4"UVxf", Base: %4"UVxf", Accepted: %4"UVxf" ",
2696 REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
2697 (UV)state, (UV)base, (UV)st->accepted );
2701 switch (trie_type) {
2702 case trie_uft8_fold:
2704 uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags );
2709 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
2710 uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );
2711 uvc = to_uni_fold( uvc, foldbuf, &foldlen );
2712 foldlen -= UNISKIP( uvc );
2713 uscan = foldbuf + UNISKIP( uvc );
2717 uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN,
2726 charid = trie->charmap[ uvc ];
2730 if (trie->widecharmap) {
2731 SV** svpp = (SV**)NULL;
2732 svpp = hv_fetch(trie->widecharmap,
2733 (char*)&uvc, sizeof(UV), 0);
2735 charid = (U16)SvIV(*svpp);
2740 (base + charid > trie->uniquecharcount )
2741 && (base + charid - 1 - trie->uniquecharcount
2743 && trie->trans[base + charid - 1 -
2744 trie->uniquecharcount].check == state)
2746 state = trie->trans[base + charid - 1 -
2747 trie->uniquecharcount ].next;
2758 DEBUG_TRIE_EXECUTE_r(
2759 PerlIO_printf( Perl_debug_log,
2760 "Charid:%3x CV:%4"UVxf" After State: %4"UVxf"%s\n",
2761 charid, uvc, (UV)state, PL_colors[5] );
2768 There was at least one accepting state that we
2769 transitioned through. Presumably the number of accepting
2770 states is going to be low, typically one or two. So we
2771 simply scan through to find the one with lowest wordnum.
2772 Once we find it, we swap the last state into its place
2773 and decrement the size. We then try to match the rest of
2774 the pattern at the point where the word ends, if we
2775 succeed then we end the loop, otherwise the loop
2776 eventually terminates once all of the accepting states
2780 if ( st->accepted == 1 ) {
2782 SV **tmp = av_fetch( trie->words, st->accept_buff[ 0 ].wordnum-1, 0 );
2783 PerlIO_printf( Perl_debug_log,
2784 "%*s %sonly one match : #%d <%s>%s\n",
2785 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],
2786 st->accept_buff[ 0 ].wordnum,
2787 tmp ? SvPV_nolen_const( *tmp ) : "not compiled under -Dr",
2790 PL_reginput = (char *)st->accept_buff[ 0 ].endpos;
2791 /* in this case we free tmps/leave before we call regmatch
2792 as we wont be using accept_buff again. */
2795 REGMATCH(scan + NEXT_OFF(scan), TRIE1);
2796 /*** all unsaved local vars undefined at this point */
2799 PerlIO_printf( Perl_debug_log,"%*s %sgot %"IVdf" possible matches%s\n",
2800 REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4], (IV)st->accepted,
2803 while ( !result && st->accepted-- ) {
2806 for( cur = 1 ; cur <= st->accepted ; cur++ ) {
2807 DEBUG_TRIE_EXECUTE_r(
2808 PerlIO_printf( Perl_debug_log,
2809 "%*s %sgot %"IVdf" (%d) as best, looking at %"IVdf" (%d)%s\n",
2810 REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
2811 (IV)best, st->accept_buff[ best ].wordnum, (IV)cur,
2812 st->accept_buff[ cur ].wordnum, PL_colors[5] );
2815 if ( st->accept_buff[ cur ].wordnum < st->accept_buff[ best ].wordnum )
2819 SV ** const tmp = av_fetch( trie->words, st->accept_buff[ best ].wordnum - 1, 0 );
2820 PerlIO_printf( Perl_debug_log, "%*s %strying alternation #%d <%s> at 0x%p%s\n",
2821 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],
2822 st->accept_buff[best].wordnum,
2823 tmp ? SvPV_nolen_const( *tmp ) : "not compiled under -Dr",scan,
2826 if ( best<st->accepted ) {
2827 reg_trie_accepted tmp = st->accept_buff[ best ];
2828 st->accept_buff[ best ] = st->accept_buff[ st->accepted ];
2829 st->accept_buff[ st->accepted ] = tmp;
2830 best = st->accepted;
2832 PL_reginput = (char *)st->accept_buff[ best ].endpos;
2835 as far as I can tell we only need the SAVETMPS/FREETMPS
2836 for re's with EVAL in them but I'm leaving them in for
2837 all until I can be sure.
2840 REGMATCH(scan + NEXT_OFF(scan), TRIE2);
2841 /*** all unsaved local vars undefined at this point */
2854 /* unreached codepoint */
2856 char *s = STRING(scan);
2857 st->ln = STR_LEN(scan);
2858 if (do_utf8 != UTF) {
2859 /* The target and the pattern have differing utf8ness. */
2861 const char *e = s + st->ln;
2864 /* The target is utf8, the pattern is not utf8. */
2869 if (NATIVE_TO_UNI(*(U8*)s) !=
2870 utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
2878 /* The target is not utf8, the pattern is utf8. */
2883 if (NATIVE_TO_UNI(*((U8*)l)) !=
2884 utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
2892 nextchr = UCHARAT(locinput);
2895 /* The target and the pattern have the same utf8ness. */
2896 /* Inline the first character, for speed. */
2897 if (UCHARAT(s) != nextchr)
2899 if (PL_regeol - locinput < st->ln)
2901 if (st->ln > 1 && memNE(s, locinput, st->ln))
2904 nextchr = UCHARAT(locinput);
2908 PL_reg_flags |= RF_tainted;
2911 char *s = STRING(scan);
2912 st->ln = STR_LEN(scan);
2914 if (do_utf8 || UTF) {
2915 /* Either target or the pattern are utf8. */
2917 char *e = PL_regeol;
2919 if (ibcmp_utf8(s, 0, st->ln, (bool)UTF,
2920 l, &e, 0, do_utf8)) {
2921 /* One more case for the sharp s:
2922 * pack("U0U*", 0xDF) =~ /ss/i,
2923 * the 0xC3 0x9F are the UTF-8
2924 * byte sequence for the U+00DF. */
2926 toLOWER(s[0]) == 's' &&
2928 toLOWER(s[1]) == 's' &&
2935 nextchr = UCHARAT(locinput);
2939 /* Neither the target and the pattern are utf8. */
2941 /* Inline the first character, for speed. */
2942 if (UCHARAT(s) != nextchr &&
2943 UCHARAT(s) != ((OP(scan) == EXACTF)
2944 ? PL_fold : PL_fold_locale)[nextchr])
2946 if (PL_regeol - locinput < st->ln)
2948 if (st->ln > 1 && (OP(scan) == EXACTF
2949 ? ibcmp(s, locinput, st->ln)
2950 : ibcmp_locale(s, locinput, st->ln)))
2953 nextchr = UCHARAT(locinput);
2958 STRLEN inclasslen = PL_regeol - locinput;
2960 if (!reginclass(scan, (U8*)locinput, &inclasslen, do_utf8))
2962 if (locinput >= PL_regeol)
2964 locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
2965 nextchr = UCHARAT(locinput);
2970 nextchr = UCHARAT(locinput);
2971 if (!REGINCLASS(scan, (U8*)locinput))
2973 if (!nextchr && locinput >= PL_regeol)
2975 nextchr = UCHARAT(++locinput);
2979 /* If we might have the case of the German sharp s
2980 * in a casefolding Unicode character class. */
2982 if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
2983 locinput += SHARP_S_SKIP;
2984 nextchr = UCHARAT(locinput);
2990 PL_reg_flags |= RF_tainted;
2996 LOAD_UTF8_CHARCLASS_ALNUM();
2997 if (!(OP(scan) == ALNUM
2998 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
2999 : isALNUM_LC_utf8((U8*)locinput)))
3003 locinput += PL_utf8skip[nextchr];
3004 nextchr = UCHARAT(locinput);
3007 if (!(OP(scan) == ALNUM
3008 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
3010 nextchr = UCHARAT(++locinput);
3013 PL_reg_flags |= RF_tainted;
3016 if (!nextchr && locinput >= PL_regeol)
3019 LOAD_UTF8_CHARCLASS_ALNUM();
3020 if (OP(scan) == NALNUM
3021 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
3022 : isALNUM_LC_utf8((U8*)locinput))
3026 locinput += PL_utf8skip[nextchr];
3027 nextchr = UCHARAT(locinput);
3030 if (OP(scan) == NALNUM
3031 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
3033 nextchr = UCHARAT(++locinput);
3037 PL_reg_flags |= RF_tainted;
3041 /* was last char in word? */
3043 if (locinput == PL_bostr)
3046 const U8 * const r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
3048 st->ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
3050 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
3051 st->ln = isALNUM_uni(st->ln);
3052 LOAD_UTF8_CHARCLASS_ALNUM();
3053 n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
3056 st->ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(st->ln));
3057 n = isALNUM_LC_utf8((U8*)locinput);
3061 st->ln = (locinput != PL_bostr) ?
3062 UCHARAT(locinput - 1) : '\n';
3063 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
3064 st->ln = isALNUM(st->ln);
3065 n = isALNUM(nextchr);
3068 st->ln = isALNUM_LC(st->ln);
3069 n = isALNUM_LC(nextchr);
3072 if (((!st->ln) == (!n)) == (OP(scan) == BOUND ||
3073 OP(scan) == BOUNDL))
3077 PL_reg_flags |= RF_tainted;
3083 if (UTF8_IS_CONTINUED(nextchr)) {
3084 LOAD_UTF8_CHARCLASS_SPACE();
3085 if (!(OP(scan) == SPACE
3086 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
3087 : isSPACE_LC_utf8((U8*)locinput)))
3091 locinput += PL_utf8skip[nextchr];
3092 nextchr = UCHARAT(locinput);
3095 if (!(OP(scan) == SPACE
3096 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3098 nextchr = UCHARAT(++locinput);
3101 if (!(OP(scan) == SPACE
3102 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3104 nextchr = UCHARAT(++locinput);
3108 PL_reg_flags |= RF_tainted;
3111 if (!nextchr && locinput >= PL_regeol)
3114 LOAD_UTF8_CHARCLASS_SPACE();
3115 if (OP(scan) == NSPACE
3116 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
3117 : isSPACE_LC_utf8((U8*)locinput))
3121 locinput += PL_utf8skip[nextchr];
3122 nextchr = UCHARAT(locinput);
3125 if (OP(scan) == NSPACE
3126 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
3128 nextchr = UCHARAT(++locinput);
3131 PL_reg_flags |= RF_tainted;
3137 LOAD_UTF8_CHARCLASS_DIGIT();
3138 if (!(OP(scan) == DIGIT
3139 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
3140 : isDIGIT_LC_utf8((U8*)locinput)))
3144 locinput += PL_utf8skip[nextchr];
3145 nextchr = UCHARAT(locinput);
3148 if (!(OP(scan) == DIGIT
3149 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
3151 nextchr = UCHARAT(++locinput);
3154 PL_reg_flags |= RF_tainted;
3157 if (!nextchr && locinput >= PL_regeol)
3160 LOAD_UTF8_CHARCLASS_DIGIT();
3161 if (OP(scan) == NDIGIT
3162 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
3163 : isDIGIT_LC_utf8((U8*)locinput))
3167 locinput += PL_utf8skip[nextchr];
3168 nextchr = UCHARAT(locinput);
3171 if (OP(scan) == NDIGIT
3172 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
3174 nextchr = UCHARAT(++locinput);
3177 if (locinput >= PL_regeol)
3180 LOAD_UTF8_CHARCLASS_MARK();
3181 if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3183 locinput += PL_utf8skip[nextchr];
3184 while (locinput < PL_regeol &&
3185 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3186 locinput += UTF8SKIP(locinput);
3187 if (locinput > PL_regeol)
3192 nextchr = UCHARAT(locinput);
3195 PL_reg_flags |= RF_tainted;
3200 n = ARG(scan); /* which paren pair */
3201 st->ln = PL_regstartp[n];
3202 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
3203 if ((I32)*PL_reglastparen < n || st->ln == -1)
3204 sayNO; /* Do not match unless seen CLOSEn. */
3205 if (st->ln == PL_regendp[n])
3208 s = PL_bostr + st->ln;
3209 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
3211 const char *e = PL_bostr + PL_regendp[n];
3213 * Note that we can't do the "other character" lookup trick as
3214 * in the 8-bit case (no pun intended) because in Unicode we
3215 * have to map both upper and title case to lower case.
3217 if (OP(scan) == REFF) {
3219 STRLEN ulen1, ulen2;
3220 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3221 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
3225 toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
3226 toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
3227 if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
3234 nextchr = UCHARAT(locinput);
3238 /* Inline the first character, for speed. */
3239 if (UCHARAT(s) != nextchr &&
3241 (UCHARAT(s) != ((OP(scan) == REFF
3242 ? PL_fold : PL_fold_locale)[nextchr]))))
3244 st->ln = PL_regendp[n] - st->ln;
3245 if (locinput + st->ln > PL_regeol)
3247 if (st->ln > 1 && (OP(scan) == REF
3248 ? memNE(s, locinput, st->ln)
3250 ? ibcmp(s, locinput, st->ln)
3251 : ibcmp_locale(s, locinput, st->ln))))
3254 nextchr = UCHARAT(locinput);
3266 OP_4tree * const oop = PL_op;
3267 COP * const ocurcop = PL_curcop;
3270 struct regexp * const oreg = PL_reg_re;
3273 PL_op = (OP_4tree*)PL_regdata->data[n];
3274 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
3275 PAD_SAVE_LOCAL(old_comppad, (PAD*)PL_regdata->data[n + 2]);
3276 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
3279 SV ** const before = SP;
3280 CALLRUNOPS(aTHX); /* Scalar context. */
3283 ret = &PL_sv_undef; /* protect against empty (?{}) blocks. */
3291 PAD_RESTORE_LOCAL(old_comppad);
3292 PL_curcop = ocurcop;
3294 if (st->logical == 2) { /* Postponed subexpression. */
3301 if(SvROK(ret) && SvSMAGICAL(sv = SvRV(ret)))
3302 mg = mg_find(sv, PERL_MAGIC_qr);
3303 else if (SvSMAGICAL(ret)) {
3304 if (SvGMAGICAL(ret))
3305 sv_unmagic(ret, PERL_MAGIC_qr);
3307 mg = mg_find(ret, PERL_MAGIC_qr);
3311 re = (regexp *)mg->mg_obj;
3312 (void)ReREFCNT_inc(re);
3316 const char * const t = SvPV_const(ret, len);
3318 char * const oprecomp = PL_regprecomp;
3319 const I32 osize = PL_regsize;
3320 const I32 onpar = PL_regnpar;
3323 if (DO_UTF8(ret)) pm.op_pmdynflags |= PMdf_DYN_UTF8;
3324 re = CALLREGCOMP(aTHX_ (char*)t, (char*)t + len, &pm);
3326 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
3328 sv_magic(ret,(SV*)ReREFCNT_inc(re),
3330 PL_regprecomp = oprecomp;
3335 PerlIO_printf(Perl_debug_log,
3336 "Entering embedded \"%s%.60s%s%s\"\n",
3340 (strlen(re->precomp) > 60 ? "..." : ""))
3343 state.prev = PL_reg_call_cc;
3345 state.re = PL_reg_re;
3349 st->cp = regcppush(0); /* Save *all* the positions. */
3350 REGCP_SET(st->lastcp);
3352 state.ss = PL_savestack_ix;
3353 *PL_reglastparen = 0;
3354 *PL_reglastcloseparen = 0;
3355 PL_reg_call_cc = &state;
3356 PL_reginput = locinput;
3357 toggleutf = ((PL_reg_flags & RF_utf8) != 0) ^
3358 ((re->reganch & ROPT_UTF8) != 0);
3359 if (toggleutf) PL_reg_flags ^= RF_utf8;
3361 /* XXXX This is too dramatic a measure... */
3364 /* XXX the only recursion left in regmatch() */
3365 if (regmatch(re->program + 1)) {
3366 /* Even though we succeeded, we need to restore
3367 global variables, since we may be wrapped inside
3368 SUSPEND, thus the match may be not finished yet. */
3370 /* XXXX Do this only if SUSPENDed? */
3371 PL_reg_call_cc = state.prev;
3373 PL_reg_re = state.re;
3374 cache_re(PL_reg_re);
3375 if (toggleutf) PL_reg_flags ^= RF_utf8;
3377 /* XXXX This is too dramatic a measure... */
3380 /* These are needed even if not SUSPEND. */
3386 REGCP_UNWIND(st->lastcp);
3388 PL_reg_call_cc = state.prev;
3390 PL_reg_re = state.re;
3391 cache_re(PL_reg_re);
3392 if (toggleutf) PL_reg_flags ^= RF_utf8;
3394 /* XXXX This is too dramatic a measure... */
3400 st->sw = SvTRUE(ret);
3404 sv_setsv(save_scalar(PL_replgv), ret);
3410 n = ARG(scan); /* which paren pair */
3411 PL_reg_start_tmp[n] = locinput;
3416 n = ARG(scan); /* which paren pair */
3417 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
3418 PL_regendp[n] = locinput - PL_bostr;
3419 if (n > (I32)*PL_reglastparen)
3420 *PL_reglastparen = n;
3421 *PL_reglastcloseparen = n;
3424 n = ARG(scan); /* which paren pair */
3425 st->sw = ((I32)*PL_reglastparen >= n && PL_regendp[n] != -1);
3428 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
3430 next = NEXTOPER(NEXTOPER(scan));
3432 next = scan + ARG(scan);
3433 if (OP(next) == IFTHEN) /* Fake one. */
3434 next = NEXTOPER(NEXTOPER(next));
3438 st->logical = scan->flags;
3440 /*******************************************************************
3441 cc contains infoblock about the innermost (...)* loop, and
3442 a pointer to the next outer infoblock.
3444 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
3446 1) After matching Y, regnode for CURLYX is processed;
3448 2) This regnode mallocs an infoblock, and calls regmatch() recursively
3449 with the starting point at WHILEM node;
3451 3) Each hit of WHILEM node tries to match A and Z (in the order
3452 depending on the current iteration, min/max of {min,max} and
3453 greediness). The information about where are nodes for "A"
3454 and "Z" is read from the infoblock, as is info on how many times "A"
3455 was already matched, and greediness.
3457 4) After A matches, the same WHILEM node is hit again.
3459 5) Each time WHILEM is hit, cc is the infoblock created by CURLYX
3460 of the same pair. Thus when WHILEM tries to match Z, it temporarily
3461 resets cc, since this Y(A)*Z can be a part of some other loop:
3462 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
3463 of the external loop.
3465 Currently present infoblocks form a tree with a stem formed by PL_curcc
3466 and whatever it mentions via ->next, and additional attached trees
3467 corresponding to temporarily unset infoblocks as in "5" above.
3469 In the following picture, infoblocks for outer loop of
3470 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
3471 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
3472 infoblocks are drawn below the "reset" infoblock.
3474 In fact in the picture below we do not show failed matches for Z and T
3475 by WHILEM blocks. [We illustrate minimal matches, since for them it is
3476 more obvious *why* one needs to *temporary* unset infoblocks.]
3478 Matched REx position InfoBlocks Comment
3482 Y A)*?Z)*?T x <- O <- I
3483 YA )*?Z)*?T x <- O <- I
3484 YA A)*?Z)*?T x <- O <- I
3485 YAA )*?Z)*?T x <- O <- I
3486 YAA Z)*?T x <- O # Temporary unset I
3489 YAAZ Y(A)*?Z)*?T x <- O
3492 YAAZY (A)*?Z)*?T x <- O
3495 YAAZY A)*?Z)*?T x <- O <- I
3498 YAAZYA )*?Z)*?T x <- O <- I
3501 YAAZYA Z)*?T x <- O # Temporary unset I
3507 YAAZYAZ T x # Temporary unset O
3514 *******************************************************************/
3517 /* No need to save/restore up to this paren */
3518 I32 parenfloor = scan->flags;
3522 Newx(newcc, 1, CURCUR);
3524 newcc->oldcc = st->cc;
3527 st->cp = PL_savestack_ix;
3528 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
3530 /* XXXX Probably it is better to teach regpush to support
3531 parenfloor > PL_regsize... */
3532 if (parenfloor > (I32)*PL_reglastparen)
3533 parenfloor = *PL_reglastparen; /* Pessimization... */
3534 st->cc->parenfloor = parenfloor;
3536 st->cc->min = ARG1(scan);
3537 st->cc->max = ARG2(scan);
3538 st->cc->scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
3539 st->cc->next = next;
3540 st->cc->minmod = st->minmod;
3541 st->cc->lastloc = 0;
3542 PL_reginput = locinput;
3543 REGMATCH(PREVOPER(next), CURLYX); /* start on the WHILEM */
3544 /*** all unsaved local vars undefined at this point */
3553 * This is really hard to understand, because after we match
3554 * what we're trying to match, we must make sure the rest of
3555 * the REx is going to match for sure, and to do that we have
3556 * to go back UP the parse tree by recursing ever deeper. And
3557 * if it fails, we have to reset our parent's current state
3558 * that we can try again after backing off.
3561 st->lastloc = st->cc->lastloc; /* Detection of 0-len. */
3562 st->cache_offset = 0;
3565 n = st->cc->cur + 1; /* how many we know we matched */
3566 PL_reginput = locinput;
3569 PerlIO_printf(Perl_debug_log,
3570 "%*s %ld out of %ld..%ld cc=%"UVxf"\n",
3571 REPORT_CODE_OFF+PL_regindent*2, "",
3572 (long)n, (long)st->cc->min,
3573 (long)st->cc->max, PTR2UV(st->cc))
3576 /* If degenerate scan matches "", assume scan done. */
3578 if (locinput == st->cc->lastloc && n >= st->cc->min) {
3580 st->cc = st->cc->oldcc;
3582 st->ln = st->cc->cur;
3584 PerlIO_printf(Perl_debug_log,
3585 "%*s empty match detected, try continuation...\n",
3586 REPORT_CODE_OFF+PL_regindent*2, "")
3588 REGMATCH(st->oldcc->next, WHILEM1);
3589 /*** all unsaved local vars undefined at this point */
3594 st->cc->oldcc->cur = st->ln;
3598 /* First just match a string of min scans. */
3600 if (n < st->cc->min) {
3602 st->cc->lastloc = locinput;
3603 REGMATCH(st->cc->scan, WHILEM2);
3604 /*** all unsaved local vars undefined at this point */
3607 st->cc->cur = n - 1;
3608 st->cc->lastloc = st->lastloc;
3613 /* Check whether we already were at this position.
3614 Postpone detection until we know the match is not
3615 *that* much linear. */
3616 if (!PL_reg_maxiter) {
3617 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
3618 PL_reg_leftiter = PL_reg_maxiter;
3620 if (PL_reg_leftiter-- == 0) {
3621 const I32 size = (PL_reg_maxiter + 7 + POSCACHE_START)/8;
3622 if (PL_reg_poscache) {
3623 if ((I32)PL_reg_poscache_size < size) {
3624 Renew(PL_reg_poscache, size, char);
3625 PL_reg_poscache_size = size;
3627 Zero(PL_reg_poscache, size, char);
3630 PL_reg_poscache_size = size;
3631 Newxz(PL_reg_poscache, size, char);
3634 PerlIO_printf(Perl_debug_log,
3635 "%sDetected a super-linear match, switching on caching%s...\n",
3636 PL_colors[4], PL_colors[5])
3639 if (PL_reg_leftiter < 0) {
3640 st->cache_offset = locinput - PL_bostr;
3642 st->cache_offset = (scan->flags & 0xf) - 1 + POSCACHE_START
3643 + st->cache_offset * (scan->flags>>4);
3644 st->cache_bit = st->cache_offset % 8;
3645 st->cache_offset /= 8;
3646 if (PL_reg_poscache[st->cache_offset] & (1<<st->cache_bit)) {
3648 PerlIO_printf(Perl_debug_log,
3649 "%*s already tried at this position...\n",
3650 REPORT_CODE_OFF+PL_regindent*2, "")
3652 if (PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))
3653 /* cache records success */
3656 /* cache records failure */
3659 PL_reg_poscache[st->cache_offset] |= (1<<st->cache_bit);
3663 /* Prefer next over scan for minimal matching. */
3665 if (st->cc->minmod) {
3667 st->cc = st->cc->oldcc;
3669 st->ln = st->cc->cur;
3670 st->cp = regcppush(st->oldcc->parenfloor);
3671 REGCP_SET(st->lastcp);
3672 REGMATCH(st->oldcc->next, WHILEM3);
3673 /*** all unsaved local vars undefined at this point */
3677 CACHEsayYES; /* All done. */
3679 REGCP_UNWIND(st->lastcp);
3682 st->cc->oldcc->cur = st->ln;
3684 if (n >= st->cc->max) { /* Maximum greed exceeded? */
3685 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3686 && !(PL_reg_flags & RF_warned)) {
3687 PL_reg_flags |= RF_warned;
3688 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
3689 "Complex regular subexpression recursion",
3696 PerlIO_printf(Perl_debug_log,
3697 "%*s trying longer...\n",
3698 REPORT_CODE_OFF+PL_regindent*2, "")
3700 /* Try scanning more and see if it helps. */
3701 PL_reginput = locinput;
3703 st->cc->lastloc = locinput;
3704 st->cp = regcppush(st->cc->parenfloor);
3705 REGCP_SET(st->lastcp);
3706 REGMATCH(st->cc->scan, WHILEM4);
3707 /*** all unsaved local vars undefined at this point */
3712 REGCP_UNWIND(st->lastcp);
3714 st->cc->cur = n - 1;
3715 st->cc->lastloc = st->lastloc;
3719 /* Prefer scan over next for maximal matching. */
3721 if (n < st->cc->max) { /* More greed allowed? */
3722 st->cp = regcppush(st->cc->parenfloor);
3724 st->cc->lastloc = locinput;
3725 REGCP_SET(st->lastcp);
3726 REGMATCH(st->cc->scan, WHILEM5);
3727 /*** all unsaved local vars undefined at this point */
3732 REGCP_UNWIND(st->lastcp);
3733 regcppop(); /* Restore some previous $<digit>s? */
3734 PL_reginput = locinput;
3736 PerlIO_printf(Perl_debug_log,
3737 "%*s failed, try continuation...\n",
3738 REPORT_CODE_OFF+PL_regindent*2, "")
3741 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3742 && !(PL_reg_flags & RF_warned)) {
3743 PL_reg_flags |= RF_warned;
3744 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
3745 "Complex regular subexpression recursion",
3749 /* Failed deeper matches of scan, so see if this one works. */
3751 st->cc = st->cc->oldcc;
3753 st->ln = st->cc->cur;
3754 REGMATCH(st->oldcc->next, WHILEM6);
3755 /*** all unsaved local vars undefined at this point */
3760 st->cc->oldcc->cur = st->ln;
3761 st->cc->cur = n - 1;
3762 st->cc->lastloc = st->lastloc;
3767 next = scan + ARG(scan);
3770 inner = NEXTOPER(NEXTOPER(scan));
3773 inner = NEXTOPER(scan);
3777 if (OP(next) != st->c1) /* No choice. */
3778 next = inner; /* Avoid recursion. */
3780 const I32 lastparen = *PL_reglastparen;
3781 /* Put unwinding data on stack */
3782 const I32 unwind1 = SSNEWt(1,re_unwind_branch_t);
3783 re_unwind_branch_t * const uw = SSPTRt(unwind1,re_unwind_branch_t);
3785 uw->prev = st->unwind;
3786 st->unwind = unwind1;
3787 uw->type = ((st->c1 == BRANCH)
3789 : RE_UNWIND_BRANCHJ);
3790 uw->lastparen = lastparen;
3792 uw->locinput = locinput;
3793 uw->nextchr = nextchr;
3795 uw->regindent = ++PL_regindent;
3798 REGCP_SET(uw->lastcp);
3800 /* Now go into the first branch */
3810 st->curlym_l = st->matches = 0;
3812 /* We suppose that the next guy does not need
3813 backtracking: in particular, it is of constant non-zero length,
3814 and has no parenths to influence future backrefs. */
3815 st->ln = ARG1(scan); /* min to match */
3816 n = ARG2(scan); /* max to match */
3817 st->paren = scan->flags;
3819 if (st->paren > PL_regsize)
3820 PL_regsize = st->paren;
3821 if (st->paren > (I32)*PL_reglastparen)
3822 *PL_reglastparen = st->paren;
3824 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
3826 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3827 PL_reginput = locinput;
3828 st->maxwanted = st->minmod ? st->ln : n;
3829 if (st->maxwanted) {
3830 while (PL_reginput < PL_regeol && st->matches < st->maxwanted) {
3831 REGMATCH(scan, CURLYM1);
3832 /*** all unsaved local vars undefined at this point */
3835 /* on first match, determine length, curlym_l */
3836 if (!st->matches++) {
3837 if (PL_reg_match_utf8) {
3839 while (s < PL_reginput) {
3845 st->curlym_l = PL_reginput - locinput;
3847 if (st->curlym_l == 0) {
3848 st->matches = st->maxwanted;
3852 locinput = PL_reginput;
3856 PL_reginput = locinput;
3860 if (st->ln && st->matches < st->ln)
3862 if (HAS_TEXT(next) || JUMPABLE(next)) {
3863 regnode *text_node = next;
3865 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
3867 if (! HAS_TEXT(text_node)) st->c1 = st->c2 = -1000;
3869 if (PL_regkind[(U8)OP(text_node)] == REF) {
3870 st->c1 = st->c2 = -1000;
3873 else { st->c1 = (U8)*STRING(text_node); }
3874 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
3875 st->c2 = PL_fold[st->c1];
3876 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
3877 st->c2 = PL_fold_locale[st->c1];
3883 st->c1 = st->c2 = -1000;
3885 REGCP_SET(st->lastcp);
3886 while (n >= st->ln || (n == REG_INFTY && st->ln > 0)) { /* ln overflow ? */
3887 /* If it could work, try it. */
3888 if (st->c1 == -1000 ||
3889 UCHARAT(PL_reginput) == st->c1 ||
3890 UCHARAT(PL_reginput) == st->c2)
3894 PL_regstartp[st->paren] =
3895 HOPc(PL_reginput, -st->curlym_l) - PL_bostr;
3896 PL_regendp[st->paren] = PL_reginput - PL_bostr;
3899 PL_regendp[st->paren] = -1;
3901 REGMATCH(next, CURLYM2);
3902 /*** all unsaved local vars undefined at this point */
3905 REGCP_UNWIND(st->lastcp);
3907 /* Couldn't or didn't -- move forward. */
3908 PL_reginput = locinput;
3909 REGMATCH(scan, CURLYM3);
3910 /*** all unsaved local vars undefined at this point */
3913 locinput = PL_reginput;
3921 PerlIO_printf(Perl_debug_log,
3922 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
3923 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
3924 (IV) st->matches, (IV)st->curlym_l)
3926 if (st->matches >= st->ln) {
3927 if (HAS_TEXT(next) || JUMPABLE(next)) {
3928 regnode *text_node = next;
3930 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
3932 if (! HAS_TEXT(text_node)) st->c1 = st->c2 = -1000;
3934 if (PL_regkind[(U8)OP(text_node)] == REF) {
3935 st->c1 = st->c2 = -1000;
3938 else { st->c1 = (U8)*STRING(text_node); }
3940 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
3941 st->c2 = PL_fold[st->c1];
3942 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
3943 st->c2 = PL_fold_locale[st->c1];
3949 st->c1 = st->c2 = -1000;
3952 REGCP_SET(st->lastcp);
3953 while (st->matches >= st->ln) {
3954 /* If it could work, try it. */
3955 if (st->c1 == -1000 ||
3956 UCHARAT(PL_reginput) == st->c1 ||
3957 UCHARAT(PL_reginput) == st->c2)
3960 PerlIO_printf(Perl_debug_log,
3961 "%*s trying tail with matches=%"IVdf"...\n",
3962 (int)(REPORT_CODE_OFF+PL_regindent*2),
3963 "", (IV)st->matches)
3967 PL_regstartp[st->paren]
3968 = HOPc(PL_reginput, -st->curlym_l) - PL_bostr;
3969 PL_regendp[st->paren] = PL_reginput - PL_bostr;
3972 PL_regendp[st->paren] = -1;
3974 REGMATCH(next, CURLYM4);
3975 /*** all unsaved local vars undefined at this point */
3978 REGCP_UNWIND(st->lastcp);
3980 /* Couldn't or didn't -- back up. */
3982 locinput = HOPc(locinput, -st->curlym_l);
3983 PL_reginput = locinput;
3991 st->paren = scan->flags; /* Which paren to set */
3992 if (st->paren > PL_regsize)
3993 PL_regsize = st->paren;
3994 if (st->paren > (I32)*PL_reglastparen)
3995 *PL_reglastparen = st->paren;
3996 st->ln = ARG1(scan); /* min to match */
3997 n = ARG2(scan); /* max to match */
3998 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
4002 st->ln = ARG1(scan); /* min to match */
4003 n = ARG2(scan); /* max to match */
4004 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
4009 scan = NEXTOPER(scan);
4015 scan = NEXTOPER(scan);
4019 * Lookahead to avoid useless match attempts
4020 * when we know what character comes next.
4024 * Used to only do .*x and .*?x, but now it allows
4025 * for )'s, ('s and (?{ ... })'s to be in the way
4026 * of the quantifier and the EXACT-like node. -- japhy
4029 if (HAS_TEXT(next) || JUMPABLE(next)) {
4031 regnode *text_node = next;
4033 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
4035 if (! HAS_TEXT(text_node)) st->c1 = st->c2 = -1000;
4037 if (PL_regkind[(U8)OP(text_node)] == REF) {
4038 st->c1 = st->c2 = -1000;
4039 goto assume_ok_easy;
4041 else { s = (U8*)STRING(text_node); }
4044 st->c2 = st->c1 = *s;
4045 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
4046 st->c2 = PL_fold[st->c1];
4047 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
4048 st->c2 = PL_fold_locale[st->c1];
4051 if (OP(text_node) == EXACTF || OP(text_node) == REFF) {
4052 STRLEN ulen1, ulen2;
4053 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
4054 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
4056 to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
4057 to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
4059 st->c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
4061 st->c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
4065 st->c2 = st->c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
4072 st->c1 = st->c2 = -1000;
4074 PL_reginput = locinput;
4077 if (st->ln && regrepeat(scan, st->ln) < st->ln)
4079 locinput = PL_reginput;
4080 REGCP_SET(st->lastcp);
4081 if (st->c1 != -1000) {
4085 if (n == REG_INFTY) {
4086 st->e = PL_regeol - 1;
4088 while (UTF8_IS_CONTINUATION(*(U8*)st->e))
4093 for (st->e = locinput;
4094 m >0 && st->e + UTF8SKIP(st->e) <= PL_regeol; m--)
4095 st->e += UTF8SKIP(st->e);
4098 st->e = locinput + n - st->ln;
4099 if (st->e >= PL_regeol)
4100 st->e = PL_regeol - 1;
4103 /* Find place 'next' could work */
4105 if (st->c1 == st->c2) {
4106 while (locinput <= st->e &&
4107 UCHARAT(locinput) != st->c1)
4110 while (locinput <= st->e
4111 && UCHARAT(locinput) != st->c1
4112 && UCHARAT(locinput) != st->c2)
4115 st->count = locinput - st->old;
4118 if (st->c1 == st->c2) {
4120 /* count initialised to
4121 * utf8_distance(old, locinput) */
4122 while (locinput <= st->e &&
4123 utf8n_to_uvchr((U8*)locinput,
4124 UTF8_MAXBYTES, &len,
4125 uniflags) != (UV)st->c1) {
4131 /* count initialised to
4132 * utf8_distance(old, locinput) */
4133 while (locinput <= st->e) {
4134 UV c = utf8n_to_uvchr((U8*)locinput,
4135 UTF8_MAXBYTES, &len,
4137 if (c == (UV)st->c1 || c == (UV)st->c2)
4144 if (locinput > st->e)
4146 /* PL_reginput == old now */
4147 if (locinput != st->old) {
4148 st->ln = 1; /* Did some */
4149 if (regrepeat(scan, st->count) < st->count)
4152 /* PL_reginput == locinput now */
4153 TRYPAREN(st->paren, st->ln, locinput, PLUS1);
4154 /*** all unsaved local vars undefined at this point */
4155 PL_reginput = locinput; /* Could be reset... */
4156 REGCP_UNWIND(st->lastcp);
4157 /* Couldn't or didn't -- move forward. */
4160 locinput += UTF8SKIP(locinput);
4167 while (n >= st->ln || (n == REG_INFTY && st->ln > 0)) { /* ln overflow ? */
4169 if (st->c1 != -1000) {
4171 c = utf8n_to_uvchr((U8*)PL_reginput,
4175 c = UCHARAT(PL_reginput);
4176 /* If it could work, try it. */
4177 if (c == (UV)st->c1 || c == (UV)st->c2)
4179 TRYPAREN(st->paren, st->ln, PL_reginput, PLUS2);
4180 /*** all unsaved local vars undefined at this point */
4181 REGCP_UNWIND(st->lastcp);
4184 /* If it could work, try it. */
4185 else if (st->c1 == -1000)
4187 TRYPAREN(st->paren, st->ln, PL_reginput, PLUS3);
4188 /*** all unsaved local vars undefined at this point */
4189 REGCP_UNWIND(st->lastcp);
4191 /* Couldn't or didn't -- move forward. */
4192 PL_reginput = locinput;
4193 if (regrepeat(scan, 1)) {
4195 locinput = PL_reginput;
4202 n = regrepeat(scan, n);
4203 locinput = PL_reginput;
4204 if (st->ln < n && PL_regkind[(U8)OP(next)] == EOL &&
4205 (OP(next) != MEOL ||
4206 OP(next) == SEOL || OP(next) == EOS))
4208 st->ln = n; /* why back off? */
4209 /* ...because $ and \Z can match before *and* after
4210 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
4211 We should back off by one in this case. */
4212 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
4215 REGCP_SET(st->lastcp);
4218 while (n >= st->ln) {
4219 if (st->c1 != -1000) {
4221 c = utf8n_to_uvchr((U8*)PL_reginput,
4225 c = UCHARAT(PL_reginput);
4227 /* If it could work, try it. */
4228 if (st->c1 == -1000 || c == (UV)st->c1 || c == (UV)st->c2)
4230 TRYPAREN(st->paren, n, PL_reginput, PLUS4);
4231 /*** all unsaved local vars undefined at this point */
4232 REGCP_UNWIND(st->lastcp);
4234 /* Couldn't or didn't -- back up. */
4236 PL_reginput = locinput = HOPc(locinput, -1);
4243 if (PL_reg_call_cc) {
4244 st->cur_call_cc = PL_reg_call_cc;
4245 st->end_re = PL_reg_re;
4247 /* Save *all* the positions. */
4248 st->cp = regcppush(0);
4249 REGCP_SET(st->lastcp);
4251 /* Restore parens of the caller. */
4253 I32 tmp = PL_savestack_ix;
4254 PL_savestack_ix = PL_reg_call_cc->ss;
4256 PL_savestack_ix = tmp;
4259 /* Make position available to the callcc. */
4260 PL_reginput = locinput;
4262 cache_re(PL_reg_call_cc->re);
4264 st->cc = PL_reg_call_cc->cc;
4265 PL_reg_call_cc = PL_reg_call_cc->prev;
4266 REGMATCH(st->cur_call_cc->node, END);
4267 /*** all unsaved local vars undefined at this point */
4269 PL_reg_call_cc = st->cur_call_cc;
4273 REGCP_UNWIND(st->lastcp);
4275 PL_reg_call_cc = st->cur_call_cc;
4277 PL_reg_re = st->end_re;
4278 cache_re(st->end_re);
4281 PerlIO_printf(Perl_debug_log,
4282 "%*s continuation failed...\n",
4283 REPORT_CODE_OFF+PL_regindent*2, "")
4287 if (locinput < PL_regtill) {
4288 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4289 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
4291 (long)(locinput - PL_reg_starttry),
4292 (long)(PL_regtill - PL_reg_starttry),
4294 sayNO_FINAL; /* Cannot match: too short. */
4296 PL_reginput = locinput; /* put where regtry can find it */
4297 sayYES_FINAL; /* Success! */
4299 PL_reginput = locinput; /* put where regtry can find it */
4300 sayYES_LOUD; /* Success! */
4303 PL_reginput = locinput;
4308 char * const s = HOPBACKc(locinput, scan->flags);
4314 PL_reginput = locinput;
4319 char * const s = HOPBACKc(locinput, scan->flags);
4325 PL_reginput = locinput;
4328 REGMATCH(NEXTOPER(NEXTOPER(scan)), IFMATCH);
4329 /*** all unsaved local vars undefined at this point */
4345 if (OP(scan) == SUSPEND) {
4346 locinput = PL_reginput;
4347 nextchr = UCHARAT(locinput);
4352 next = scan + ARG(scan);
4357 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
4358 PTR2UV(scan), OP(scan));
4359 Perl_croak(aTHX_ "regexp memory corruption");
4367 /* simulate recursively calling regmatch(), but without actually
4368 * recursing - ie save the current state on the heap rather than on
4369 * the stack, then re-enter the loop. This avoids complex regexes
4370 * blowing the processor stack */
4374 /* push new state */
4375 regmatch_state *oldst = st;
4379 /* grab the next free state slot */
4381 if (st > &(PL_regmatch_slab->states[PERL_REGMATCH_SLAB_SLOTS-1]))
4382 st = S_push_slab(aTHX);
4383 PL_regmatch_state = st;
4387 oldst->locinput = locinput;
4388 oldst->reg_call_cc = PL_reg_call_cc;
4391 locinput = PL_reginput;
4392 nextchr = UCHARAT(locinput);
4404 * We get here only if there's trouble -- normally "case END" is
4405 * the terminating point.
4407 Perl_croak(aTHX_ "corrupted regexp pointers");
4413 PerlIO_printf(Perl_debug_log,
4414 "%*s %scould match...%s\n",
4415 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4], PL_colors[5])
4419 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
4420 PL_colors[4], PL_colors[5]));
4431 PerlIO_printf(Perl_debug_log,
4432 "%*s %sfailed...%s\n",
4433 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4], PL_colors[5])
4439 re_unwind_t * const uw = SSPTRt(st->unwind,re_unwind_t);
4442 case RE_UNWIND_BRANCH:
4443 case RE_UNWIND_BRANCHJ:
4445 re_unwind_branch_t * const uwb = &(uw->branch);
4446 const I32 lastparen = uwb->lastparen;
4448 REGCP_UNWIND(uwb->lastcp);
4449 for (n = *PL_reglastparen; n > lastparen; n--)
4451 *PL_reglastparen = n;
4452 scan = next = uwb->next;
4454 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
4455 ? BRANCH : BRANCHJ) ) { /* Failure */
4456 st->unwind = uwb->prev;
4462 /* Have more choice yet. Reuse the same uwb. */
4463 if ((n = (uwb->type == RE_UNWIND_BRANCH
4464 ? NEXT_OFF(next) : ARG(next))))
4467 next = NULL; /* XXXX Needn't unwinding in this case... */
4469 next = NEXTOPER(scan);
4470 if (uwb->type == RE_UNWIND_BRANCHJ)
4471 next = NEXTOPER(next);
4472 locinput = uwb->locinput;
4473 nextchr = uwb->nextchr;
4475 PL_regindent = uwb->regindent;
4482 Perl_croak(aTHX_ "regexp unwind memory corruption");
4493 /* restore previous state and re-enter */
4495 if (st < &PL_regmatch_slab->states[0]) {
4496 PL_regmatch_slab = PL_regmatch_slab->prev;
4497 st = &PL_regmatch_slab->states[PERL_REGMATCH_SLAB_SLOTS-1];
4499 PL_regmatch_state = st;
4501 PL_reg_call_cc = st->reg_call_cc;
4505 locinput = st->locinput;
4506 nextchr = UCHARAT(locinput);
4508 switch (st->resume_state) {
4510 goto resume_point_TRIE1;
4512 goto resume_point_TRIE2;
4514 goto resume_point_CURLYX;
4515 case resume_WHILEM1:
4516 goto resume_point_WHILEM1;
4517 case resume_WHILEM2:
4518 goto resume_point_WHILEM2;
4519 case resume_WHILEM3:
4520 goto resume_point_WHILEM3;
4521 case resume_WHILEM4:
4522 goto resume_point_WHILEM4;
4523 case resume_WHILEM5:
4524 goto resume_point_WHILEM5;
4525 case resume_WHILEM6:
4526 goto resume_point_WHILEM6;
4527 case resume_CURLYM1:
4528 goto resume_point_CURLYM1;
4529 case resume_CURLYM2:
4530 goto resume_point_CURLYM2;
4531 case resume_CURLYM3:
4532 goto resume_point_CURLYM3;
4533 case resume_CURLYM4:
4534 goto resume_point_CURLYM4;
4535 case resume_IFMATCH:
4536 goto resume_point_IFMATCH;
4538 goto resume_point_PLUS1;
4540 goto resume_point_PLUS2;
4542 goto resume_point_PLUS3;
4544 goto resume_point_PLUS4;
4546 goto resume_point_END;
4548 Perl_croak(aTHX_ "regexp resume memory corruption");
4552 /* restore original high-water mark */
4553 PL_regmatch_slab = orig_slab;
4554 PL_regmatch_state = orig_state;
4556 /* free all slabs above current one */
4557 if (orig_slab->next) {
4558 regmatch_slab *osl, *sl = orig_slab->next;
4559 orig_slab->next = NULL;
4571 - regrepeat - repeatedly match something simple, report how many
4574 * [This routine now assumes that it will only match on things of length 1.
4575 * That was true before, but now we assume scan - reginput is the count,
4576 * rather than incrementing count on every character. [Er, except utf8.]]
4579 S_regrepeat(pTHX_ const regnode *p, I32 max)
4582 register char *scan;
4584 register char *loceol = PL_regeol;
4585 register I32 hardcount = 0;
4586 register bool do_utf8 = PL_reg_match_utf8;
4589 if (max == REG_INFTY)
4591 else if (max < loceol - scan)
4592 loceol = scan + max;
4597 while (scan < loceol && hardcount < max && *scan != '\n') {
4598 scan += UTF8SKIP(scan);
4602 while (scan < loceol && *scan != '\n')
4609 while (scan < loceol && hardcount < max) {
4610 scan += UTF8SKIP(scan);
4620 case EXACT: /* length of string is 1 */
4622 while (scan < loceol && UCHARAT(scan) == c)
4625 case EXACTF: /* length of string is 1 */
4627 while (scan < loceol &&
4628 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
4631 case EXACTFL: /* length of string is 1 */
4632 PL_reg_flags |= RF_tainted;
4634 while (scan < loceol &&
4635 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
4641 while (hardcount < max && scan < loceol &&
4642 reginclass(p, (U8*)scan, 0, do_utf8)) {
4643 scan += UTF8SKIP(scan);
4647 while (scan < loceol && REGINCLASS(p, (U8*)scan))
4654 LOAD_UTF8_CHARCLASS_ALNUM();
4655 while (hardcount < max && scan < loceol &&
4656 swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
4657 scan += UTF8SKIP(scan);
4661 while (scan < loceol && isALNUM(*scan))
4666 PL_reg_flags |= RF_tainted;
4669 while (hardcount < max && scan < loceol &&
4670 isALNUM_LC_utf8((U8*)scan)) {
4671 scan += UTF8SKIP(scan);
4675 while (scan < loceol && isALNUM_LC(*scan))
4682 LOAD_UTF8_CHARCLASS_ALNUM();
4683 while (hardcount < max && scan < loceol &&
4684 !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
4685 scan += UTF8SKIP(scan);
4689 while (scan < loceol && !isALNUM(*scan))
4694 PL_reg_flags |= RF_tainted;
4697 while (hardcount < max && scan < loceol &&
4698 !isALNUM_LC_utf8((U8*)scan)) {
4699 scan += UTF8SKIP(scan);
4703 while (scan < loceol && !isALNUM_LC(*scan))
4710 LOAD_UTF8_CHARCLASS_SPACE();
4711 while (hardcount < max && scan < loceol &&
4713 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
4714 scan += UTF8SKIP(scan);
4718 while (scan < loceol && isSPACE(*scan))
4723 PL_reg_flags |= RF_tainted;
4726 while (hardcount < max && scan < loceol &&
4727 (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4728 scan += UTF8SKIP(scan);
4732 while (scan < loceol && isSPACE_LC(*scan))
4739 LOAD_UTF8_CHARCLASS_SPACE();
4740 while (hardcount < max && scan < loceol &&
4742 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
4743 scan += UTF8SKIP(scan);
4747 while (scan < loceol && !isSPACE(*scan))
4752 PL_reg_flags |= RF_tainted;
4755 while (hardcount < max && scan < loceol &&
4756 !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4757 scan += UTF8SKIP(scan);
4761 while (scan < loceol && !isSPACE_LC(*scan))
4768 LOAD_UTF8_CHARCLASS_DIGIT();
4769 while (hardcount < max && scan < loceol &&
4770 swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
4771 scan += UTF8SKIP(scan);
4775 while (scan < loceol && isDIGIT(*scan))
4782 LOAD_UTF8_CHARCLASS_DIGIT();
4783 while (hardcount < max && scan < loceol &&
4784 !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
4785 scan += UTF8SKIP(scan);
4789 while (scan < loceol && !isDIGIT(*scan))
4793 default: /* Called on something of 0 width. */
4794 break; /* So match right here or not at all. */
4800 c = scan - PL_reginput;
4804 SV *re_debug_flags = NULL;
4805 SV * const prop = sv_newmortal();
4809 PerlIO_printf(Perl_debug_log,
4810 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
4811 REPORT_CODE_OFF+1, "", SvPVX_const(prop),(IV)c,(IV)max);
4820 - regclass_swash - prepare the utf8 swash
4824 Perl_regclass_swash(pTHX_ register const regnode* node, bool doinit, SV** listsvp, SV **altsvp)
4831 if (PL_regdata && PL_regdata->count) {
4832 const U32 n = ARG(node);
4834 if (PL_regdata->what[n] == 's') {
4835 SV * const rv = (SV*)PL_regdata->data[n];
4836 AV * const av = (AV*)SvRV((SV*)rv);
4837 SV **const ary = AvARRAY(av);
4840 /* See the end of regcomp.c:S_regclass() for
4841 * documentation of these array elements. */
4844 a = SvROK(ary[1]) ? &ary[1] : 0;
4845 b = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : 0;
4849 else if (si && doinit) {
4850 sw = swash_init("utf8", "", si, 1, 0);
4851 (void)av_store(av, 1, sw);
4867 - reginclass - determine if a character falls into a character class
4869 The n is the ANYOF regnode, the p is the target string, lenp
4870 is pointer to the maximum length of how far to go in the p
4871 (if the lenp is zero, UTF8SKIP(p) is used),
4872 do_utf8 tells whether the target string is in UTF-8.
4877 S_reginclass(pTHX_ register const regnode *n, register const U8* p, STRLEN* lenp, register bool do_utf8)
4880 const char flags = ANYOF_FLAGS(n);
4886 if (do_utf8 && !UTF8_IS_INVARIANT(c)) {
4887 c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &len,
4888 ckWARN(WARN_UTF8) ? UTF8_CHECK_ONLY :
4889 UTF8_ALLOW_ANYUV|UTF8_CHECK_ONLY);
4890 if (len == (STRLEN)-1)
4891 Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
4894 plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
4895 if (do_utf8 || (flags & ANYOF_UNICODE)) {
4898 if (do_utf8 && !ANYOF_RUNTIME(n)) {
4899 if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
4902 if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
4906 SV * const sw = regclass_swash(n, TRUE, 0, (SV**)&av);
4909 if (swash_fetch(sw, p, do_utf8))
4911 else if (flags & ANYOF_FOLD) {
4912 if (!match && lenp && av) {
4914 for (i = 0; i <= av_len(av); i++) {
4915 SV* const sv = *av_fetch(av, i, FALSE);
4917 const char * const s = SvPV_const(sv, len);
4919 if (len <= plen && memEQ(s, (char*)p, len)) {
4927 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
4930 to_utf8_fold(p, tmpbuf, &tmplen);
4931 if (swash_fetch(sw, tmpbuf, do_utf8))
4937 if (match && lenp && *lenp == 0)
4938 *lenp = UNISKIP(NATIVE_TO_UNI(c));
4940 if (!match && c < 256) {
4941 if (ANYOF_BITMAP_TEST(n, c))
4943 else if (flags & ANYOF_FOLD) {
4946 if (flags & ANYOF_LOCALE) {
4947 PL_reg_flags |= RF_tainted;
4948 f = PL_fold_locale[c];
4952 if (f != c && ANYOF_BITMAP_TEST(n, f))
4956 if (!match && (flags & ANYOF_CLASS)) {
4957 PL_reg_flags |= RF_tainted;
4959 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
4960 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
4961 (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
4962 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
4963 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
4964 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
4965 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
4966 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
4967 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
4968 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
4969 (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
4970 (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
4971 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
4972 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
4973 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
4974 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
4975 (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
4976 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
4977 (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
4978 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
4979 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
4980 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
4981 (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
4982 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
4983 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
4984 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
4985 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
4986 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
4987 (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
4988 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
4989 ) /* How's that for a conditional? */
4996 return (flags & ANYOF_INVERT) ? !match : match;
5000 S_reghop3(U8 *s, I32 off, U8* lim)
5004 while (off-- && s < lim) {
5005 /* XXX could check well-formedness here */
5013 if (UTF8_IS_CONTINUED(*s)) {
5014 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
5017 /* XXX could check well-formedness here */
5025 S_reghopmaybe3(U8* s, I32 off, U8* lim)
5029 while (off-- && s < lim) {
5030 /* XXX could check well-formedness here */
5040 if (UTF8_IS_CONTINUED(*s)) {
5041 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
5044 /* XXX could check well-formedness here */
5056 restore_pos(pTHX_ void *arg)
5059 PERL_UNUSED_ARG(arg);
5060 if (PL_reg_eval_set) {
5061 if (PL_reg_oldsaved) {
5062 PL_reg_re->subbeg = PL_reg_oldsaved;
5063 PL_reg_re->sublen = PL_reg_oldsavedlen;
5064 #ifdef PERL_OLD_COPY_ON_WRITE
5065 PL_reg_re->saved_copy = PL_nrs;
5067 RX_MATCH_COPIED_on(PL_reg_re);
5069 PL_reg_magic->mg_len = PL_reg_oldpos;
5070 PL_reg_eval_set = 0;
5071 PL_curpm = PL_reg_oldcurpm;
5076 S_to_utf8_substr(pTHX_ register regexp *prog)
5078 if (prog->float_substr && !prog->float_utf8) {
5080 prog->float_utf8 = sv = newSVsv(prog->float_substr);
5081 sv_utf8_upgrade(sv);
5082 if (SvTAIL(prog->float_substr))
5084 if (prog->float_substr == prog->check_substr)
5085 prog->check_utf8 = sv;
5087 if (prog->anchored_substr && !prog->anchored_utf8) {
5089 prog->anchored_utf8 = sv = newSVsv(prog->anchored_substr);
5090 sv_utf8_upgrade(sv);
5091 if (SvTAIL(prog->anchored_substr))
5093 if (prog->anchored_substr == prog->check_substr)
5094 prog->check_utf8 = sv;
5099 S_to_byte_substr(pTHX_ register regexp *prog)
5102 if (prog->float_utf8 && !prog->float_substr) {
5104 prog->float_substr = sv = newSVsv(prog->float_utf8);
5105 if (sv_utf8_downgrade(sv, TRUE)) {
5106 if (SvTAIL(prog->float_utf8))
5110 prog->float_substr = sv = &PL_sv_undef;
5112 if (prog->float_utf8 == prog->check_utf8)
5113 prog->check_substr = sv;
5115 if (prog->anchored_utf8 && !prog->anchored_substr) {
5117 prog->anchored_substr = sv = newSVsv(prog->anchored_utf8);
5118 if (sv_utf8_downgrade(sv, TRUE)) {
5119 if (SvTAIL(prog->anchored_utf8))
5123 prog->anchored_substr = sv = &PL_sv_undef;
5125 if (prog->anchored_utf8 == prog->check_utf8)
5126 prog->check_substr = sv;
5132 * c-indentation-style: bsd
5134 * indent-tabs-mode: t
5137 * ex: set ts=8 sts=4 sw=4 noet: