5 * "One Ring to rule them all, One Ring to find them..."
8 /* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
12 /* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
17 /* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
22 #ifdef PERL_EXT_RE_BUILD
23 /* need to replace pregcomp et al, so enable that */
24 # ifndef PERL_IN_XSUB_RE
25 # define PERL_IN_XSUB_RE
27 /* need access to debugger hooks */
28 # if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
33 #ifdef PERL_IN_XSUB_RE
34 /* We *really* need to overwrite these symbols: */
35 # define Perl_regexec_flags my_regexec
36 # define Perl_regdump my_regdump
37 # define Perl_regprop my_regprop
38 # define Perl_re_intuit_start my_re_intuit_start
39 /* *These* symbols are masked to allow static link. */
40 # define Perl_pregexec my_pregexec
41 # define Perl_reginitcolors my_reginitcolors
42 # define Perl_regclass_swash my_regclass_swash
44 # define PERL_NO_GET_CONTEXT
49 * pregcomp and pregexec -- regsub and regerror are not used in perl
51 * Copyright (c) 1986 by University of Toronto.
52 * Written by Henry Spencer. Not derived from licensed software.
54 * Permission is granted to anyone to use this software for any
55 * purpose on any computer system, and to redistribute it freely,
56 * subject to the following restrictions:
58 * 1. The author is not responsible for the consequences of use of
59 * this software, no matter how awful, even if they arise
62 * 2. The origin of this software must not be misrepresented, either
63 * by explicit claim or by omission.
65 * 3. Altered versions must be plainly marked as such, and must not
66 * be misrepresented as being the original software.
68 **** Alterations to Henry's code are...
70 **** Copyright (c) 1991-2002, Larry Wall
72 **** You may distribute under the terms of either the GNU General Public
73 **** License or the Artistic License, as specified in the README file.
75 * Beware that some of this code is subtly aware of the way operator
76 * precedence is structured in regular expressions. Serious changes in
77 * regular-expression syntax might require a total rethink.
80 #define PERL_IN_REGEXEC_C
85 #define RF_tainted 1 /* tainted information used? */
86 #define RF_warned 2 /* warned about big count? */
87 #define RF_evaled 4 /* Did an EVAL with setting? */
88 #define RF_utf8 8 /* String contains multibyte chars? */
90 #define UTF (PL_reg_flags & RF_utf8)
92 #define RS_init 1 /* eval environment created */
93 #define RS_set 2 /* replsv value is set */
99 #define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
105 #define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
106 #define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
108 #define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
109 #define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
110 #define HOP(pos,off) (PL_reg_match_utf8 ? reghop((U8*)pos, off) : (U8*)(pos + off))
111 #define HOPMAYBE(pos,off) (PL_reg_match_utf8 ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
112 #define HOPc(pos,off) ((char*)HOP(pos,off))
113 #define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
115 #define HOPBACK(pos, off) ( \
116 (PL_reg_match_utf8) \
117 ? reghopmaybe((U8*)pos, -off) \
118 : (pos - off >= PL_bostr) \
122 #define HOPBACKc(pos, off) (char*)HOPBACK(pos, off)
124 #define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
125 #define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
126 #define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
127 #define HOPMAYBE3(pos,off,lim) (PL_reg_match_utf8 ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
128 #define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
129 #define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
131 #define LOAD_UTF8_CHARCLASS(a,b) STMT_START { if (!CAT2(PL_utf8_,a)) { ENTER; save_re_context(); (void)CAT2(is_utf8_, a)((U8*)b); LEAVE; } } STMT_END
133 /* for use after a quantifier and before an EXACT-like node -- japhy */
134 #define JUMPABLE(rn) ( \
135 OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
136 OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
137 OP(rn) == PLUS || OP(rn) == MINMOD || \
138 (PL_regkind[(U8)OP(rn)] == CURLY && ARG1(rn) > 0) \
141 #define HAS_TEXT(rn) ( \
142 PL_regkind[(U8)OP(rn)] == EXACT || PL_regkind[(U8)OP(rn)] == REF \
146 Search for mandatory following text node; for lookahead, the text must
147 follow but for lookbehind (rn->flags != 0) we skip to the next step.
149 #define FIND_NEXT_IMPT(rn) STMT_START { \
150 while (JUMPABLE(rn)) \
151 if (OP(rn) == SUSPEND || PL_regkind[(U8)OP(rn)] == CURLY) \
152 rn = NEXTOPER(NEXTOPER(rn)); \
153 else if (OP(rn) == PLUS) \
155 else if (OP(rn) == IFMATCH) \
156 rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
157 else rn += NEXT_OFF(rn); \
160 static void restore_pos(pTHX_ void *arg);
163 S_regcppush(pTHX_ I32 parenfloor)
165 int retval = PL_savestack_ix;
166 #define REGCP_PAREN_ELEMS 4
167 int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
170 if (paren_elems_to_push < 0)
171 Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
173 #define REGCP_OTHER_ELEMS 6
174 SSCHECK(paren_elems_to_push + REGCP_OTHER_ELEMS);
175 for (p = PL_regsize; p > parenfloor; p--) {
176 /* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
177 SSPUSHINT(PL_regendp[p]);
178 SSPUSHINT(PL_regstartp[p]);
179 SSPUSHPTR(PL_reg_start_tmp[p]);
182 /* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
183 SSPUSHINT(PL_regsize);
184 SSPUSHINT(*PL_reglastparen);
185 SSPUSHINT(*PL_reglastcloseparen);
186 SSPUSHPTR(PL_reginput);
187 #define REGCP_FRAME_ELEMS 2
188 /* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
189 * are needed for the regexp context stack bookkeeping. */
190 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
191 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
196 /* These are needed since we do not localize EVAL nodes: */
197 # define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
198 " Setting an EVAL scope, savestack=%"IVdf"\n", \
199 (IV)PL_savestack_ix)); cp = PL_savestack_ix
201 # define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
202 PerlIO_printf(Perl_debug_log, \
203 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
204 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
214 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
216 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
217 i = SSPOPINT; /* Parentheses elements to pop. */
218 input = (char *) SSPOPPTR;
219 *PL_reglastcloseparen = SSPOPINT;
220 *PL_reglastparen = SSPOPINT;
221 PL_regsize = SSPOPINT;
223 /* Now restore the parentheses context. */
224 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
225 i > 0; i -= REGCP_PAREN_ELEMS) {
226 paren = (U32)SSPOPINT;
227 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
228 PL_regstartp[paren] = SSPOPINT;
230 if (paren <= *PL_reglastparen)
231 PL_regendp[paren] = tmps;
233 PerlIO_printf(Perl_debug_log,
234 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
235 (UV)paren, (IV)PL_regstartp[paren],
236 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
237 (IV)PL_regendp[paren],
238 (paren > *PL_reglastparen ? "(no)" : ""));
242 if (*PL_reglastparen + 1 <= PL_regnpar) {
243 PerlIO_printf(Perl_debug_log,
244 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
245 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
249 /* It would seem that the similar code in regtry()
250 * already takes care of this, and in fact it is in
251 * a better location to since this code can #if 0-ed out
252 * but the code in regtry() is needed or otherwise tests
253 * requiring null fields (pat.t#187 and split.t#{13,14}
254 * (as of patchlevel 7877) will fail. Then again,
255 * this code seems to be necessary or otherwise
256 * building DynaLoader will fail:
257 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
259 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
260 if (paren > PL_regsize)
261 PL_regstartp[paren] = -1;
262 PL_regendp[paren] = -1;
269 S_regcp_set_to(pTHX_ I32 ss)
271 I32 tmp = PL_savestack_ix;
273 PL_savestack_ix = ss;
275 PL_savestack_ix = tmp;
279 typedef struct re_cc_state
283 struct re_cc_state *prev;
288 #define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
290 #define TRYPAREN(paren, n, input) { \
293 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
294 PL_regendp[paren] = input - PL_bostr; \
297 PL_regendp[paren] = -1; \
299 if (regmatch(next)) \
302 PL_regendp[paren] = -1; \
307 * pregexec and friends
311 - pregexec - match a regexp against a string
314 Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
315 char *strbeg, I32 minend, SV *screamer, U32 nosave)
316 /* strend: pointer to null at end of string */
317 /* strbeg: real beginning of string */
318 /* minend: end of match must be >=minend after stringarg. */
319 /* nosave: For optimizations. */
322 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
323 nosave ? 0 : REXEC_COPY_STR);
327 S_cache_re(pTHX_ regexp *prog)
329 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
331 PL_regprogram = prog->program;
333 PL_regnpar = prog->nparens;
334 PL_regdata = prog->data;
339 * Need to implement the following flags for reg_anch:
341 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
343 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
344 * INTUIT_AUTORITATIVE_ML
345 * INTUIT_ONCE_NOML - Intuit can match in one location only.
348 * Another flag for this function: SECOND_TIME (so that float substrs
349 * with giant delta may be not rechecked).
352 /* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
354 /* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
355 Otherwise, only SvCUR(sv) is used to get strbeg. */
357 /* XXXX We assume that strpos is strbeg unless sv. */
359 /* XXXX Some places assume that there is a fixed substring.
360 An update may be needed if optimizer marks as "INTUITable"
361 RExen without fixed substrings. Similarly, it is assumed that
362 lengths of all the strings are no more than minlen, thus they
363 cannot come from lookahead.
364 (Or minlen should take into account lookahead.) */
366 /* A failure to find a constant substring means that there is no need to make
367 an expensive call to REx engine, thus we celebrate a failure. Similarly,
368 finding a substring too deep into the string means that less calls to
369 regtry() should be needed.
371 REx compiler's optimizer found 4 possible hints:
372 a) Anchored substring;
374 c) Whether we are anchored (beginning-of-line or \G);
375 d) First node (of those at offset 0) which may distingush positions;
376 We use a)b)d) and multiline-part of c), and try to find a position in the
377 string which does not contradict any of them.
380 /* Most of decisions we do here should have been done at compile time.
381 The nodes of the REx which we used for the search should have been
382 deleted from the finite automaton. */
385 Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
386 char *strend, U32 flags, re_scream_pos_data *data)
388 register I32 start_shift = 0;
389 /* Should be nonnegative! */
390 register I32 end_shift = 0;
395 int do_utf8 = sv ? SvUTF8(sv) : 0; /* if no sv we have to assume bytes */
397 register char *other_last = Nullch; /* other substr checked before this */
398 char *check_at = Nullch; /* check substr found at this pos */
400 char *i_strpos = strpos;
401 SV *dsv = PERL_DEBUG_PAD_ZERO(0);
404 if (prog->reganch & ROPT_UTF8) {
405 DEBUG_r(PerlIO_printf(Perl_debug_log,
406 "UTF-8 regex...\n"));
407 PL_reg_flags |= RF_utf8;
411 char *s = PL_reg_match_utf8 ?
412 sv_uni_display(dsv, sv, 60, UNI_DISPLAY_REGEX) :
414 int len = PL_reg_match_utf8 ?
415 strlen(s) : strend - strpos;
418 if (PL_reg_match_utf8)
419 DEBUG_r(PerlIO_printf(Perl_debug_log,
420 "UTF-8 target...\n"));
421 PerlIO_printf(Perl_debug_log,
422 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
423 PL_colors[4],PL_colors[5],PL_colors[0],
426 (strlen(prog->precomp) > 60 ? "..." : ""),
428 (int)(len > 60 ? 60 : len),
430 (len > 60 ? "..." : "")
434 if (prog->minlen > CHR_DIST((U8*)strend, (U8*)strpos)) {
435 DEBUG_r(PerlIO_printf(Perl_debug_log,
436 "String too short... [re_intuit_start]\n"));
439 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
442 if (!prog->check_utf8 && prog->check_substr)
443 to_utf8_substr(prog);
444 check = prog->check_utf8;
446 if (!prog->check_substr && prog->check_utf8)
447 to_byte_substr(prog);
448 check = prog->check_substr;
450 if (check == &PL_sv_undef) {
451 DEBUG_r(PerlIO_printf(Perl_debug_log,
452 "Non-utf string cannot match utf check string\n"));
455 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
456 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
457 || ( (prog->reganch & ROPT_ANCH_BOL)
458 && !PL_multiline ) ); /* Check after \n? */
461 if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
462 | ROPT_IMPLICIT)) /* not a real BOL */
463 /* SvCUR is not set on references: SvRV and SvPVX overlap */
465 && (strpos != strbeg)) {
466 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
469 if (prog->check_offset_min == prog->check_offset_max &&
470 !(prog->reganch & ROPT_CANY_SEEN)) {
471 /* Substring at constant offset from beg-of-str... */
474 s = HOP3c(strpos, prog->check_offset_min, strend);
476 slen = SvCUR(check); /* >= 1 */
478 if ( strend - s > slen || strend - s < slen - 1
479 || (strend - s == slen && strend[-1] != '\n')) {
480 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
483 /* Now should match s[0..slen-2] */
485 if (slen && (*SvPVX(check) != *s
487 && memNE(SvPVX(check), s, slen)))) {
489 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
493 else if (*SvPVX(check) != *s
494 || ((slen = SvCUR(check)) > 1
495 && memNE(SvPVX(check), s, slen)))
497 goto success_at_start;
500 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
502 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
503 end_shift = prog->minlen - start_shift -
504 CHR_SVLEN(check) + (SvTAIL(check) != 0);
506 I32 end = prog->check_offset_max + CHR_SVLEN(check)
507 - (SvTAIL(check) != 0);
508 I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
510 if (end_shift < eshift)
514 else { /* Can match at random position */
517 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
518 /* Should be nonnegative! */
519 end_shift = prog->minlen - start_shift -
520 CHR_SVLEN(check) + (SvTAIL(check) != 0);
523 #ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
525 Perl_croak(aTHX_ "panic: end_shift");
529 /* Find a possible match in the region s..strend by looking for
530 the "check" substring in the region corrected by start/end_shift. */
531 if (flags & REXEC_SCREAM) {
532 I32 p = -1; /* Internal iterator of scream. */
533 I32 *pp = data ? data->scream_pos : &p;
535 if (PL_screamfirst[BmRARE(check)] >= 0
536 || ( BmRARE(check) == '\n'
537 && (BmPREVIOUS(check) == SvCUR(check) - 1)
539 s = screaminstr(sv, check,
540 start_shift + (s - strbeg), end_shift, pp, 0);
544 *data->scream_olds = s;
546 else if (prog->reganch & ROPT_CANY_SEEN)
547 s = fbm_instr((U8*)(s + start_shift),
548 (U8*)(strend - end_shift),
549 check, PL_multiline ? FBMrf_MULTILINE : 0);
551 s = fbm_instr(HOP3(s, start_shift, strend),
552 HOP3(strend, -end_shift, strbeg),
553 check, PL_multiline ? FBMrf_MULTILINE : 0);
555 /* Update the count-of-usability, remove useless subpatterns,
558 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
559 (s ? "Found" : "Did not find"),
560 (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) ? "anchored" : "floating"),
562 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
564 PL_colors[1], (SvTAIL(check) ? "$" : ""),
565 (s ? " at offset " : "...\n") ) );
572 /* Finish the diagnostic message */
573 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
575 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
576 Start with the other substr.
577 XXXX no SCREAM optimization yet - and a very coarse implementation
578 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
579 *always* match. Probably should be marked during compile...
580 Probably it is right to do no SCREAM here...
583 if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8) : (prog->float_substr && prog->anchored_substr)) {
584 /* Take into account the "other" substring. */
585 /* XXXX May be hopelessly wrong for UTF... */
588 if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
591 char *last = HOP3c(s, -start_shift, strbeg), *last1, *last2;
595 t = s - prog->check_offset_max;
596 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
597 && (!(prog->reganch & ROPT_UTF8)
598 || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
603 t = HOP3c(t, prog->anchored_offset, strend);
604 if (t < other_last) /* These positions already checked */
606 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
609 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
610 /* On end-of-str: see comment below. */
611 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
612 if (must == &PL_sv_undef) {
614 DEBUG_r(must = prog->anchored_utf8); /* for debug */
619 HOP3(HOP3(last1, prog->anchored_offset, strend)
620 + SvCUR(must), -(SvTAIL(must)!=0), strbeg),
622 PL_multiline ? FBMrf_MULTILINE : 0
624 DEBUG_r(PerlIO_printf(Perl_debug_log,
625 "%s anchored substr `%s%.*s%s'%s",
626 (s ? "Found" : "Contradicts"),
629 - (SvTAIL(must)!=0)),
631 PL_colors[1], (SvTAIL(must) ? "$" : "")));
633 if (last1 >= last2) {
634 DEBUG_r(PerlIO_printf(Perl_debug_log,
635 ", giving up...\n"));
638 DEBUG_r(PerlIO_printf(Perl_debug_log,
639 ", trying floating at offset %ld...\n",
640 (long)(HOP3c(s1, 1, strend) - i_strpos)));
641 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
642 s = HOP3c(last, 1, strend);
646 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
647 (long)(s - i_strpos)));
648 t = HOP3c(s, -prog->anchored_offset, strbeg);
649 other_last = HOP3c(s, 1, strend);
657 else { /* Take into account the floating substring. */
662 t = HOP3c(s, -start_shift, strbeg);
664 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
665 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
666 last = HOP3c(t, prog->float_max_offset, strend);
667 s = HOP3c(t, prog->float_min_offset, strend);
670 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
671 must = do_utf8 ? prog->float_utf8 : prog->float_substr;
672 /* fbm_instr() takes into account exact value of end-of-str
673 if the check is SvTAIL(ed). Since false positives are OK,
674 and end-of-str is not later than strend we are OK. */
675 if (must == &PL_sv_undef) {
677 DEBUG_r(must = prog->float_utf8); /* for debug message */
680 s = fbm_instr((unsigned char*)s,
681 (unsigned char*)last + SvCUR(must)
683 must, PL_multiline ? FBMrf_MULTILINE : 0);
684 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
685 (s ? "Found" : "Contradicts"),
687 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
689 PL_colors[1], (SvTAIL(must) ? "$" : "")));
692 DEBUG_r(PerlIO_printf(Perl_debug_log,
693 ", giving up...\n"));
696 DEBUG_r(PerlIO_printf(Perl_debug_log,
697 ", trying anchored starting at offset %ld...\n",
698 (long)(s1 + 1 - i_strpos)));
700 s = HOP3c(t, 1, strend);
704 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
705 (long)(s - i_strpos)));
706 other_last = s; /* Fix this later. --Hugo */
715 t = s - prog->check_offset_max;
716 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
717 && (!(prog->reganch & ROPT_UTF8)
718 || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
720 /* Fixed substring is found far enough so that the match
721 cannot start at strpos. */
723 if (ml_anch && t[-1] != '\n') {
724 /* Eventually fbm_*() should handle this, but often
725 anchored_offset is not 0, so this check will not be wasted. */
726 /* XXXX In the code below we prefer to look for "^" even in
727 presence of anchored substrings. And we search even
728 beyond the found float position. These pessimizations
729 are historical artefacts only. */
731 while (t < strend - prog->minlen) {
733 if (t < check_at - prog->check_offset_min) {
734 if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
735 /* Since we moved from the found position,
736 we definitely contradict the found anchored
737 substr. Due to the above check we do not
738 contradict "check" substr.
739 Thus we can arrive here only if check substr
740 is float. Redo checking for "other"=="fixed".
743 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
744 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
745 goto do_other_anchored;
747 /* We don't contradict the found floating substring. */
748 /* XXXX Why not check for STCLASS? */
750 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
751 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
754 /* Position contradicts check-string */
755 /* XXXX probably better to look for check-string
756 than for "\n", so one should lower the limit for t? */
757 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
758 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
759 other_last = strpos = s = t + 1;
764 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
765 PL_colors[0],PL_colors[1]));
769 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
770 PL_colors[0],PL_colors[1]));
774 ++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr); /* hooray/5 */
777 /* The found string does not prohibit matching at strpos,
778 - no optimization of calling REx engine can be performed,
779 unless it was an MBOL and we are not after MBOL,
780 or a future STCLASS check will fail this. */
782 /* Even in this situation we may use MBOL flag if strpos is offset
783 wrt the start of the string. */
784 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
785 && (strpos != strbeg) && strpos[-1] != '\n'
786 /* May be due to an implicit anchor of m{.*foo} */
787 && !(prog->reganch & ROPT_IMPLICIT))
792 DEBUG_r( if (ml_anch)
793 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
794 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
797 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
799 prog->check_utf8 /* Could be deleted already */
800 && --BmUSEFUL(prog->check_utf8) < 0
801 && (prog->check_utf8 == prog->float_utf8)
803 prog->check_substr /* Could be deleted already */
804 && --BmUSEFUL(prog->check_substr) < 0
805 && (prog->check_substr == prog->float_substr)
808 /* If flags & SOMETHING - do not do it many times on the same match */
809 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
810 SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
811 if (do_utf8 ? prog->check_substr : prog->check_utf8)
812 SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
813 prog->check_substr = prog->check_utf8 = Nullsv; /* disable */
814 prog->float_substr = prog->float_utf8 = Nullsv; /* clear */
815 check = Nullsv; /* abort */
817 /* XXXX This is a remnant of the old implementation. It
818 looks wasteful, since now INTUIT can use many
820 prog->reganch &= ~RE_USE_INTUIT;
827 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
828 if (prog->regstclass) {
829 /* minlen == 0 is possible if regstclass is \b or \B,
830 and the fixed substr is ''$.
831 Since minlen is already taken into account, s+1 is before strend;
832 accidentally, minlen >= 1 guaranties no false positives at s + 1
833 even for \b or \B. But (minlen? 1 : 0) below assumes that
834 regstclass does not come from lookahead... */
835 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
836 This leaves EXACTF only, which is dealt with in find_byclass(). */
837 U8* str = (U8*)STRING(prog->regstclass);
838 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
839 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
841 char *endpos = (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
842 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
843 : (prog->float_substr || prog->float_utf8
844 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
847 char *startpos = strbeg;
850 if (prog->reganch & ROPT_UTF8) {
851 PL_regdata = prog->data;
854 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
859 if (endpos == strend) {
860 DEBUG_r( PerlIO_printf(Perl_debug_log,
861 "Could not match STCLASS...\n") );
864 DEBUG_r( PerlIO_printf(Perl_debug_log,
865 "This position contradicts STCLASS...\n") );
866 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
868 /* Contradict one of substrings */
869 if (prog->anchored_substr || prog->anchored_utf8) {
870 if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
871 DEBUG_r( what = "anchored" );
873 s = HOP3c(t, 1, strend);
874 if (s + start_shift + end_shift > strend) {
875 /* XXXX Should be taken into account earlier? */
876 DEBUG_r( PerlIO_printf(Perl_debug_log,
877 "Could not match STCLASS...\n") );
882 DEBUG_r( PerlIO_printf(Perl_debug_log,
883 "Looking for %s substr starting at offset %ld...\n",
884 what, (long)(s + start_shift - i_strpos)) );
887 /* Have both, check_string is floating */
888 if (t + start_shift >= check_at) /* Contradicts floating=check */
889 goto retry_floating_check;
890 /* Recheck anchored substring, but not floating... */
894 DEBUG_r( PerlIO_printf(Perl_debug_log,
895 "Looking for anchored substr starting at offset %ld...\n",
896 (long)(other_last - i_strpos)) );
897 goto do_other_anchored;
899 /* Another way we could have checked stclass at the
900 current position only: */
905 DEBUG_r( PerlIO_printf(Perl_debug_log,
906 "Looking for /%s^%s/m starting at offset %ld...\n",
907 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
910 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr)) /* Could have been deleted */
912 /* Check is floating subtring. */
913 retry_floating_check:
914 t = check_at - start_shift;
915 DEBUG_r( what = "floating" );
916 goto hop_and_restart;
919 DEBUG_r(PerlIO_printf(Perl_debug_log,
920 "By STCLASS: moving %ld --> %ld\n",
921 (long)(t - i_strpos), (long)(s - i_strpos))
925 DEBUG_r(PerlIO_printf(Perl_debug_log,
926 "Does not contradict STCLASS...\n");
931 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
932 PL_colors[4], (check ? "Guessed" : "Giving up"),
933 PL_colors[5], (long)(s - i_strpos)) );
936 fail_finish: /* Substring not found */
937 if (prog->check_substr || prog->check_utf8) /* could be removed already */
938 BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
940 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
941 PL_colors[4],PL_colors[5]));
945 /* We know what class REx starts with. Try to find this position... */
947 S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
949 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
955 register I32 tmp = 1; /* Scratch variable? */
956 register bool do_utf8 = PL_reg_match_utf8;
958 /* We know what class it must start with. */
962 STRLEN skip = do_utf8 ? UTF8SKIP(s) : 1;
965 reginclass(c, (U8*)s, 0, do_utf8) :
966 REGINCLASS(c, (U8*)s) ||
967 (ANYOF_FOLD_SHARP_S(c, s, strend) &&
968 /* The assignment of 2 is intentional:
969 * for the sharp s, the skip is 2. */
970 (skip = SHARP_S_SKIP)
972 if (tmp && (norun || regtry(prog, s)))
984 if (tmp && (norun || regtry(prog, s)))
996 U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
997 U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
999 to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
1000 to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
1002 c1 = utf8_to_uvchr(tmpbuf1, 0);
1003 c2 = utf8_to_uvchr(tmpbuf2, 0);
1014 c2 = PL_fold_locale[c1];
1016 e = do_utf8 ? s + ln : strend - ln;
1019 e = s; /* Due to minlen logic of intuit() */
1021 /* The idea in the EXACTF* cases is to first find the
1022 * first character of the EXACTF* node and then, if
1023 * necessary, case-insensitively compare the full
1024 * text of the node. The c1 and c2 are the first
1025 * characters (though in Unicode it gets a bit
1026 * more complicated because there are more cases
1027 * than just upper and lower: one needs to use
1028 * the so-called folding case for case-insensitive
1029 * matching (called "loose matching" in Unicode).
1030 * ibcmp_utf8() will do just that. */
1034 U8 tmpbuf [UTF8_MAXLEN+1];
1035 U8 foldbuf[UTF8_MAXLEN_FOLD+1];
1036 STRLEN len, foldlen;
1040 c = utf8_to_uvchr((U8*)s, &len);
1043 ibcmp_utf8(s, (char **)0, 0, do_utf8,
1044 m, (char **)0, ln, UTF))
1045 && (norun || regtry(prog, s)) )
1048 uvchr_to_utf8(tmpbuf, c);
1049 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
1051 && (f == c1 || f == c2)
1052 && (ln == foldlen ||
1053 !ibcmp_utf8((char *) foldbuf,
1054 (char **)0, foldlen, do_utf8,
1056 (char **)0, ln, UTF))
1057 && (norun || regtry(prog, s)) )
1065 c = utf8_to_uvchr((U8*)s, &len);
1067 /* Handle some of the three Greek sigmas cases.
1068 * Note that not all the possible combinations
1069 * are handled here: some of them are handled
1070 * by the standard folding rules, and some of
1071 * them (the character class or ANYOF cases)
1072 * are handled during compiletime in
1073 * regexec.c:S_regclass(). */
1074 if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1075 c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1076 c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
1078 if ( (c == c1 || c == c2)
1080 ibcmp_utf8(s, (char **)0, 0, do_utf8,
1081 m, (char **)0, ln, UTF))
1082 && (norun || regtry(prog, s)) )
1085 uvchr_to_utf8(tmpbuf, c);
1086 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
1088 && (f == c1 || f == c2)
1089 && (ln == foldlen ||
1090 !ibcmp_utf8((char *) foldbuf,
1091 (char **)0, foldlen, do_utf8,
1093 (char **)0, ln, UTF))
1094 && (norun || regtry(prog, s)) )
1105 && (ln == 1 || !(OP(c) == EXACTF
1107 : ibcmp_locale(s, m, ln)))
1108 && (norun || regtry(prog, s)) )
1114 if ( (*(U8*)s == c1 || *(U8*)s == c2)
1115 && (ln == 1 || !(OP(c) == EXACTF
1117 : ibcmp_locale(s, m, ln)))
1118 && (norun || regtry(prog, s)) )
1125 PL_reg_flags |= RF_tainted;
1132 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
1134 tmp = s > (char*)r ?
1135 utf8n_to_uvchr(r, s - (char*)r, 0, 0) :
1136 utf8n_to_uvchr(s, UTF8SKIP(s), 0, 0);
1138 tmp = ((OP(c) == BOUND ?
1139 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1140 LOAD_UTF8_CHARCLASS(alnum,"a");
1141 while (s < strend) {
1142 if (tmp == !(OP(c) == BOUND ?
1143 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1144 isALNUM_LC_utf8((U8*)s)))
1147 if ((norun || regtry(prog, s)))
1154 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1155 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1156 while (s < strend) {
1158 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1160 if ((norun || regtry(prog, s)))
1166 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
1170 PL_reg_flags |= RF_tainted;
1177 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
1179 tmp = s > (char*)r ?
1180 utf8n_to_uvchr(r, s - (char*)r, 0, 0) :
1181 utf8n_to_uvchr(s, UTF8SKIP(s), 0, 0);
1183 tmp = ((OP(c) == NBOUND ?
1184 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1185 LOAD_UTF8_CHARCLASS(alnum,"a");
1186 while (s < strend) {
1187 if (tmp == !(OP(c) == NBOUND ?
1188 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1189 isALNUM_LC_utf8((U8*)s)))
1191 else if ((norun || regtry(prog, s)))
1197 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1198 tmp = ((OP(c) == NBOUND ?
1199 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1200 while (s < strend) {
1202 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1204 else if ((norun || regtry(prog, s)))
1209 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
1214 LOAD_UTF8_CHARCLASS(alnum,"a");
1215 while (s < strend) {
1216 if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
1217 if (tmp && (norun || regtry(prog, s)))
1228 while (s < strend) {
1230 if (tmp && (norun || regtry(prog, s)))
1242 PL_reg_flags |= RF_tainted;
1244 while (s < strend) {
1245 if (isALNUM_LC_utf8((U8*)s)) {
1246 if (tmp && (norun || regtry(prog, s)))
1257 while (s < strend) {
1258 if (isALNUM_LC(*s)) {
1259 if (tmp && (norun || regtry(prog, s)))
1272 LOAD_UTF8_CHARCLASS(alnum,"a");
1273 while (s < strend) {
1274 if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
1275 if (tmp && (norun || regtry(prog, s)))
1286 while (s < strend) {
1288 if (tmp && (norun || regtry(prog, s)))
1300 PL_reg_flags |= RF_tainted;
1302 while (s < strend) {
1303 if (!isALNUM_LC_utf8((U8*)s)) {
1304 if (tmp && (norun || regtry(prog, s)))
1315 while (s < strend) {
1316 if (!isALNUM_LC(*s)) {
1317 if (tmp && (norun || regtry(prog, s)))
1330 LOAD_UTF8_CHARCLASS(space," ");
1331 while (s < strend) {
1332 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
1333 if (tmp && (norun || regtry(prog, s)))
1344 while (s < strend) {
1346 if (tmp && (norun || regtry(prog, s)))
1358 PL_reg_flags |= RF_tainted;
1360 while (s < strend) {
1361 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1362 if (tmp && (norun || regtry(prog, s)))
1373 while (s < strend) {
1374 if (isSPACE_LC(*s)) {
1375 if (tmp && (norun || regtry(prog, s)))
1388 LOAD_UTF8_CHARCLASS(space," ");
1389 while (s < strend) {
1390 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
1391 if (tmp && (norun || regtry(prog, s)))
1402 while (s < strend) {
1404 if (tmp && (norun || regtry(prog, s)))
1416 PL_reg_flags |= RF_tainted;
1418 while (s < strend) {
1419 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1420 if (tmp && (norun || regtry(prog, s)))
1431 while (s < strend) {
1432 if (!isSPACE_LC(*s)) {
1433 if (tmp && (norun || regtry(prog, s)))
1446 LOAD_UTF8_CHARCLASS(digit,"0");
1447 while (s < strend) {
1448 if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
1449 if (tmp && (norun || regtry(prog, s)))
1460 while (s < strend) {
1462 if (tmp && (norun || regtry(prog, s)))
1474 PL_reg_flags |= RF_tainted;
1476 while (s < strend) {
1477 if (isDIGIT_LC_utf8((U8*)s)) {
1478 if (tmp && (norun || regtry(prog, s)))
1489 while (s < strend) {
1490 if (isDIGIT_LC(*s)) {
1491 if (tmp && (norun || regtry(prog, s)))
1504 LOAD_UTF8_CHARCLASS(digit,"0");
1505 while (s < strend) {
1506 if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
1507 if (tmp && (norun || regtry(prog, s)))
1518 while (s < strend) {
1520 if (tmp && (norun || regtry(prog, s)))
1532 PL_reg_flags |= RF_tainted;
1534 while (s < strend) {
1535 if (!isDIGIT_LC_utf8((U8*)s)) {
1536 if (tmp && (norun || regtry(prog, s)))
1547 while (s < strend) {
1548 if (!isDIGIT_LC(*s)) {
1549 if (tmp && (norun || regtry(prog, s)))
1561 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1570 - regexec_flags - match a regexp against a string
1573 Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1574 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1575 /* strend: pointer to null at end of string */
1576 /* strbeg: real beginning of string */
1577 /* minend: end of match must be >=minend after stringarg. */
1578 /* data: May be used for some additional optimizations. */
1579 /* nosave: For optimizations. */
1582 register regnode *c;
1583 register char *startpos = stringarg;
1584 I32 minlen; /* must match at least this many chars */
1585 I32 dontbother = 0; /* how many characters not to try at end */
1586 /* I32 start_shift = 0; */ /* Offset of the start to find
1587 constant substr. */ /* CC */
1588 I32 end_shift = 0; /* Same for the end. */ /* CC */
1589 I32 scream_pos = -1; /* Internal iterator of scream. */
1591 SV* oreplsv = GvSV(PL_replgv);
1592 bool do_utf8 = DO_UTF8(sv);
1594 SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
1595 SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
1602 PL_regnarrate = DEBUG_r_TEST;
1605 /* Be paranoid... */
1606 if (prog == NULL || startpos == NULL) {
1607 Perl_croak(aTHX_ "NULL regexp parameter");
1611 minlen = prog->minlen;
1612 if (strend - startpos < minlen) {
1613 DEBUG_r(PerlIO_printf(Perl_debug_log,
1614 "String too short [regexec_flags]...\n"));
1618 /* Check validity of program. */
1619 if (UCHARAT(prog->program) != REG_MAGIC) {
1620 Perl_croak(aTHX_ "corrupted regexp program");
1624 PL_reg_eval_set = 0;
1627 if (prog->reganch & ROPT_UTF8)
1628 PL_reg_flags |= RF_utf8;
1630 /* Mark beginning of line for ^ and lookbehind. */
1631 PL_regbol = startpos;
1635 /* Mark end of line for $ (and such) */
1638 /* see how far we have to get to not match where we matched before */
1639 PL_regtill = startpos+minend;
1641 /* We start without call_cc context. */
1644 /* If there is a "must appear" string, look for it. */
1647 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1650 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1651 PL_reg_ganch = startpos;
1652 else if (sv && SvTYPE(sv) >= SVt_PVMG
1654 && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1655 && mg->mg_len >= 0) {
1656 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1657 if (prog->reganch & ROPT_ANCH_GPOS) {
1658 if (s > PL_reg_ganch)
1663 else /* pos() not defined */
1664 PL_reg_ganch = strbeg;
1667 if (!(flags & REXEC_CHECKED) && (prog->check_substr != Nullsv || prog->check_utf8 != Nullsv)) {
1668 re_scream_pos_data d;
1670 d.scream_olds = &scream_olds;
1671 d.scream_pos = &scream_pos;
1672 s = re_intuit_start(prog, sv, s, strend, flags, &d);
1674 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
1675 goto phooey; /* not present */
1681 pv_uni_display(dsv0, (U8*)prog->precomp, prog->prelen, 60,
1682 UNI_DISPLAY_REGEX) :
1684 int len0 = UTF ? SvCUR(dsv0) : prog->prelen;
1685 char *s1 = do_utf8 ? sv_uni_display(dsv1, sv, 60,
1686 UNI_DISPLAY_REGEX) : startpos;
1687 int len1 = do_utf8 ? SvCUR(dsv1) : strend - startpos;
1690 PerlIO_printf(Perl_debug_log,
1691 "%sMatching REx%s `%s%*.*s%s%s' against `%s%.*s%s%s'\n",
1692 PL_colors[4],PL_colors[5],PL_colors[0],
1695 len0 > 60 ? "..." : "",
1697 (int)(len1 > 60 ? 60 : len1),
1699 (len1 > 60 ? "..." : "")
1703 /* Simplest case: anchored match need be tried only once. */
1704 /* [unless only anchor is BOL and multiline is set] */
1705 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1706 if (s == startpos && regtry(prog, startpos))
1708 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1709 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1714 dontbother = minlen - 1;
1715 end = HOP3c(strend, -dontbother, strbeg) - 1;
1716 /* for multiline we only have to try after newlines */
1717 if (prog->check_substr || prog->check_utf8) {
1721 if (regtry(prog, s))
1726 if (prog->reganch & RE_USE_INTUIT) {
1727 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1738 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1739 if (regtry(prog, s))
1746 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1747 if (regtry(prog, PL_reg_ganch))
1752 /* Messy cases: unanchored match. */
1753 if ((prog->anchored_substr || prog->anchored_utf8) && prog->reganch & ROPT_SKIP) {
1754 /* we have /x+whatever/ */
1755 /* it must be a one character string (XXXX Except UTF?) */
1760 if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1761 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1762 ch = SvPVX(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
1765 while (s < strend) {
1767 DEBUG_r( did_match = 1 );
1768 if (regtry(prog, s)) goto got_it;
1770 while (s < strend && *s == ch)
1777 while (s < strend) {
1779 DEBUG_r( did_match = 1 );
1780 if (regtry(prog, s)) goto got_it;
1782 while (s < strend && *s == ch)
1788 DEBUG_r(if (!did_match)
1789 PerlIO_printf(Perl_debug_log,
1790 "Did not find anchored character...\n")
1794 else if (prog->anchored_substr != Nullsv
1795 || prog->anchored_utf8 != Nullsv
1796 || ((prog->float_substr != Nullsv || prog->float_utf8 != Nullsv)
1797 && prog->float_max_offset < strend - s)) {
1802 char *last1; /* Last position checked before */
1806 if (prog->anchored_substr || prog->anchored_utf8) {
1807 if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1808 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1809 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
1810 back_max = back_min = prog->anchored_offset;
1812 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1813 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1814 must = do_utf8 ? prog->float_utf8 : prog->float_substr;
1815 back_max = prog->float_max_offset;
1816 back_min = prog->float_min_offset;
1818 if (must == &PL_sv_undef)
1819 /* could not downgrade utf8 check substring, so must fail */
1822 last = HOP3c(strend, /* Cannot start after this */
1823 -(I32)(CHR_SVLEN(must)
1824 - (SvTAIL(must) != 0) + back_min), strbeg);
1827 last1 = HOPc(s, -1);
1829 last1 = s - 1; /* bogus */
1831 /* XXXX check_substr already used to find `s', can optimize if
1832 check_substr==must. */
1834 dontbother = end_shift;
1835 strend = HOPc(strend, -dontbother);
1836 while ( (s <= last) &&
1837 ((flags & REXEC_SCREAM)
1838 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
1839 end_shift, &scream_pos, 0))
1840 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
1841 (unsigned char*)strend, must,
1842 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
1843 DEBUG_r( did_match = 1 );
1844 if (HOPc(s, -back_max) > last1) {
1845 last1 = HOPc(s, -back_min);
1846 s = HOPc(s, -back_max);
1849 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1851 last1 = HOPc(s, -back_min);
1855 while (s <= last1) {
1856 if (regtry(prog, s))
1862 while (s <= last1) {
1863 if (regtry(prog, s))
1869 DEBUG_r(if (!did_match)
1870 PerlIO_printf(Perl_debug_log,
1871 "Did not find %s substr `%s%.*s%s'%s...\n",
1872 ((must == prog->anchored_substr || must == prog->anchored_utf8)
1873 ? "anchored" : "floating"),
1875 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1877 PL_colors[1], (SvTAIL(must) ? "$" : ""))
1881 else if ((c = prog->regstclass)) {
1882 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1883 /* don't bother with what can't match */
1884 strend = HOPc(strend, -(minlen - 1));
1886 SV *prop = sv_newmortal();
1894 pv_uni_display(dsv0, (U8*)SvPVX(prop), SvCUR(prop), 60,
1895 UNI_DISPLAY_REGEX) :
1897 len0 = UTF ? SvCUR(dsv0) : SvCUR(prop);
1899 sv_uni_display(dsv1, sv, 60, UNI_DISPLAY_REGEX) : s;
1900 len1 = UTF ? SvCUR(dsv1) : strend - s;
1901 PerlIO_printf(Perl_debug_log,
1902 "Matching stclass `%*.*s' against `%*.*s'\n",
1906 if (find_byclass(prog, c, s, strend, startpos, 0))
1908 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
1912 if (prog->float_substr != Nullsv || prog->float_utf8 != Nullsv) {
1917 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1918 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1919 float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
1921 if (flags & REXEC_SCREAM) {
1922 last = screaminstr(sv, float_real, s - strbeg,
1923 end_shift, &scream_pos, 1); /* last one */
1925 last = scream_olds; /* Only one occurrence. */
1929 char *little = SvPV(float_real, len);
1931 if (SvTAIL(float_real)) {
1932 if (memEQ(strend - len + 1, little, len - 1))
1933 last = strend - len + 1;
1934 else if (!PL_multiline)
1935 last = memEQ(strend - len, little, len)
1936 ? strend - len : Nullch;
1942 last = rninstr(s, strend, little, little + len);
1944 last = strend; /* matching `$' */
1948 DEBUG_r(PerlIO_printf(Perl_debug_log,
1949 "%sCan't trim the tail, match fails (should not happen)%s\n",
1950 PL_colors[4],PL_colors[5]));
1951 goto phooey; /* Should not happen! */
1953 dontbother = strend - last + prog->float_min_offset;
1955 if (minlen && (dontbother < minlen))
1956 dontbother = minlen - 1;
1957 strend -= dontbother; /* this one's always in bytes! */
1958 /* We don't know much -- general case. */
1961 if (regtry(prog, s))
1970 if (regtry(prog, s))
1972 } while (s++ < strend);
1980 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1982 if (PL_reg_eval_set) {
1983 /* Preserve the current value of $^R */
1984 if (oreplsv != GvSV(PL_replgv))
1985 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1986 restored, the value remains
1988 restore_pos(aTHX_ 0);
1991 /* make sure $`, $&, $', and $digit will work later */
1992 if ( !(flags & REXEC_NOT_FIRST) ) {
1993 if (RX_MATCH_COPIED(prog)) {
1994 Safefree(prog->subbeg);
1995 RX_MATCH_COPIED_off(prog);
1997 if (flags & REXEC_COPY_STR) {
1998 I32 i = PL_regeol - startpos + (stringarg - strbeg);
2000 s = savepvn(strbeg, i);
2003 RX_MATCH_COPIED_on(prog);
2006 prog->subbeg = strbeg;
2007 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
2014 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
2015 PL_colors[4],PL_colors[5]));
2016 if (PL_reg_eval_set)
2017 restore_pos(aTHX_ 0);
2022 - regtry - try match at specific point
2024 STATIC I32 /* 0 failure, 1 success */
2025 S_regtry(pTHX_ regexp *prog, char *startpos)
2033 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
2035 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
2038 PL_reg_eval_set = RS_init;
2040 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
2041 (IV)(PL_stack_sp - PL_stack_base));
2043 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
2044 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2045 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
2047 /* Apparently this is not needed, judging by wantarray. */
2048 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
2049 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2052 /* Make $_ available to executed code. */
2053 if (PL_reg_sv != DEFSV) {
2054 /* SAVE_DEFSV does *not* suffice here for USE_5005THREADS */
2059 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
2060 && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
2061 /* prepare for quick setting of pos */
2062 sv_magic(PL_reg_sv, (SV*)0,
2063 PERL_MAGIC_regex_global, Nullch, 0);
2064 mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global);
2068 PL_reg_oldpos = mg->mg_len;
2069 SAVEDESTRUCTOR_X(restore_pos, 0);
2071 if (!PL_reg_curpm) {
2072 Newz(22,PL_reg_curpm, 1, PMOP);
2075 SV* repointer = newSViv(0);
2076 /* so we know which PL_regex_padav element is PL_reg_curpm */
2077 SvFLAGS(repointer) |= SVf_BREAK;
2078 av_push(PL_regex_padav,repointer);
2079 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2080 PL_regex_pad = AvARRAY(PL_regex_padav);
2084 PM_SETRE(PL_reg_curpm, prog);
2085 PL_reg_oldcurpm = PL_curpm;
2086 PL_curpm = PL_reg_curpm;
2087 if (RX_MATCH_COPIED(prog)) {
2088 /* Here is a serious problem: we cannot rewrite subbeg,
2089 since it may be needed if this match fails. Thus
2090 $` inside (?{}) could fail... */
2091 PL_reg_oldsaved = prog->subbeg;
2092 PL_reg_oldsavedlen = prog->sublen;
2093 RX_MATCH_COPIED_off(prog);
2096 PL_reg_oldsaved = Nullch;
2097 prog->subbeg = PL_bostr;
2098 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2100 prog->startp[0] = startpos - PL_bostr;
2101 PL_reginput = startpos;
2102 PL_regstartp = prog->startp;
2103 PL_regendp = prog->endp;
2104 PL_reglastparen = &prog->lastparen;
2105 PL_reglastcloseparen = &prog->lastcloseparen;
2106 prog->lastparen = 0;
2108 DEBUG_r(PL_reg_starttry = startpos);
2109 if (PL_reg_start_tmpl <= prog->nparens) {
2110 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2111 if(PL_reg_start_tmp)
2112 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2114 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2117 /* XXXX What this code is doing here?!!! There should be no need
2118 to do this again and again, PL_reglastparen should take care of
2121 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2122 * Actually, the code in regcppop() (which Ilya may be meaning by
2123 * PL_reglastparen), is not needed at all by the test suite
2124 * (op/regexp, op/pat, op/split), but that code is needed, oddly
2125 * enough, for building DynaLoader, or otherwise this
2126 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
2127 * will happen. Meanwhile, this code *is* needed for the
2128 * above-mentioned test suite tests to succeed. The common theme
2129 * on those tests seems to be returning null fields from matches.
2134 if (prog->nparens) {
2135 for (i = prog->nparens; i > *PL_reglastparen; i--) {
2142 if (regmatch(prog->program + 1)) {
2143 prog->endp[0] = PL_reginput - PL_bostr;
2146 REGCP_UNWIND(lastcp);
2150 #define RE_UNWIND_BRANCH 1
2151 #define RE_UNWIND_BRANCHJ 2
2155 typedef struct { /* XX: makes sense to enlarge it... */
2159 } re_unwind_generic_t;
2172 } re_unwind_branch_t;
2174 typedef union re_unwind_t {
2176 re_unwind_generic_t generic;
2177 re_unwind_branch_t branch;
2180 #define sayYES goto yes
2181 #define sayNO goto no
2182 #define sayNO_ANYOF goto no_anyof
2183 #define sayYES_FINAL goto yes_final
2184 #define sayYES_LOUD goto yes_loud
2185 #define sayNO_FINAL goto no_final
2186 #define sayNO_SILENT goto do_no
2187 #define saySAME(x) if (x) goto yes; else goto no
2189 #define REPORT_CODE_OFF 24
2192 - regmatch - main matching routine
2194 * Conceptually the strategy is simple: check to see whether the current
2195 * node matches, call self recursively to see whether the rest matches,
2196 * and then act accordingly. In practice we make some effort to avoid
2197 * recursion, in particular by going through "ordinary" nodes (that don't
2198 * need to know whether the rest of the match failed) by a loop instead of
2201 /* [lwall] I've hoisted the register declarations to the outer block in order to
2202 * maybe save a little bit of pushing and popping on the stack. It also takes
2203 * advantage of machines that use a register save mask on subroutine entry.
2205 STATIC I32 /* 0 failure, 1 success */
2206 S_regmatch(pTHX_ regnode *prog)
2208 register regnode *scan; /* Current node. */
2209 regnode *next; /* Next node. */
2210 regnode *inner; /* Next node in internal branch. */
2211 register I32 nextchr; /* renamed nextchr - nextchar colides with
2212 function of same name */
2213 register I32 n; /* no or next */
2214 register I32 ln = 0; /* len or last */
2215 register char *s = Nullch; /* operand or save */
2216 register char *locinput = PL_reginput;
2217 register I32 c1 = 0, c2 = 0, paren; /* case fold search, parenth */
2218 int minmod = 0, sw = 0, logical = 0;
2221 I32 firstcp = PL_savestack_ix;
2223 register bool do_utf8 = PL_reg_match_utf8;
2225 SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
2226 SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
2227 SV *dsv2 = PERL_DEBUG_PAD_ZERO(2);
2234 /* Note that nextchr is a byte even in UTF */
2235 nextchr = UCHARAT(locinput);
2237 while (scan != NULL) {
2240 SV *prop = sv_newmortal();
2241 int docolor = *PL_colors[0];
2242 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
2243 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
2244 /* The part of the string before starttry has one color
2245 (pref0_len chars), between starttry and current
2246 position another one (pref_len - pref0_len chars),
2247 after the current position the third one.
2248 We assume that pref0_len <= pref_len, otherwise we
2249 decrease pref0_len. */
2250 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
2251 ? (5 + taill) - l : locinput - PL_bostr;
2254 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2256 pref0_len = pref_len - (locinput - PL_reg_starttry);
2257 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
2258 l = ( PL_regeol - locinput > (5 + taill) - pref_len
2259 ? (5 + taill) - pref_len : PL_regeol - locinput);
2260 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2264 if (pref0_len > pref_len)
2265 pref0_len = pref_len;
2266 regprop(prop, scan);
2270 pv_uni_display(dsv0, (U8*)(locinput - pref_len),
2271 pref0_len, 60, UNI_DISPLAY_REGEX) :
2272 locinput - pref_len;
2273 int len0 = do_utf8 ? strlen(s0) : pref0_len;
2274 char *s1 = do_utf8 ?
2275 pv_uni_display(dsv1, (U8*)(locinput - pref_len + pref0_len),
2276 pref_len - pref0_len, 60, UNI_DISPLAY_REGEX) :
2277 locinput - pref_len + pref0_len;
2278 int len1 = do_utf8 ? strlen(s1) : pref_len - pref0_len;
2279 char *s2 = do_utf8 ?
2280 pv_uni_display(dsv2, (U8*)locinput,
2281 PL_regeol - locinput, 60, UNI_DISPLAY_REGEX) :
2283 int len2 = do_utf8 ? strlen(s2) : l;
2284 PerlIO_printf(Perl_debug_log,
2285 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
2286 (IV)(locinput - PL_bostr),
2293 (docolor ? "" : "> <"),
2297 15 - l - pref_len + 1,
2299 (IV)(scan - PL_regprogram), PL_regindent*2, "",
2304 next = scan + NEXT_OFF(scan);
2310 if (locinput == PL_bostr || (PL_multiline &&
2311 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
2313 /* regtill = regbol; */
2318 if (locinput == PL_bostr ||
2319 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
2325 if (locinput == PL_bostr)
2329 if (locinput == PL_reg_ganch)
2339 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2344 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2346 if (PL_regeol - locinput > 1)
2350 if (PL_regeol != locinput)
2354 if (!nextchr && locinput >= PL_regeol)
2357 locinput += PL_utf8skip[nextchr];
2358 if (locinput > PL_regeol)
2360 nextchr = UCHARAT(locinput);
2363 nextchr = UCHARAT(++locinput);
2366 if (!nextchr && locinput >= PL_regeol)
2368 nextchr = UCHARAT(++locinput);
2371 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2374 locinput += PL_utf8skip[nextchr];
2375 if (locinput > PL_regeol)
2377 nextchr = UCHARAT(locinput);
2380 nextchr = UCHARAT(++locinput);
2385 if (do_utf8 != (UTF!=0)) {
2386 /* The target and the pattern have differing utf8ness. */
2392 /* The target is utf8, the pattern is not utf8. */
2396 if (NATIVE_TO_UNI(*(U8*)s) !=
2397 utf8_to_uvuni((U8*)l, &ulen))
2404 /* The target is not utf8, the pattern is utf8. */
2408 if (NATIVE_TO_UNI(*((U8*)l)) !=
2409 utf8_to_uvuni((U8*)s, &ulen))
2416 nextchr = UCHARAT(locinput);
2419 /* The target and the pattern have the same utf8ness. */
2420 /* Inline the first character, for speed. */
2421 if (UCHARAT(s) != nextchr)
2423 if (PL_regeol - locinput < ln)
2425 if (ln > 1 && memNE(s, locinput, ln))
2428 nextchr = UCHARAT(locinput);
2431 PL_reg_flags |= RF_tainted;
2437 if (do_utf8 || UTF) {
2438 /* Either target or the pattern are utf8. */
2440 char *e = PL_regeol;
2442 if (ibcmp_utf8(s, 0, ln, UTF,
2443 l, &e, 0, do_utf8)) {
2444 /* One more case for the sharp s:
2445 * pack("U0U*", 0xDF) =~ /ss/i,
2446 * the 0xC3 0x9F are the UTF-8
2447 * byte sequence for the U+00DF. */
2449 toLOWER(s[0]) == 's' &&
2451 toLOWER(s[1]) == 's' &&
2458 nextchr = UCHARAT(locinput);
2462 /* Neither the target and the pattern are utf8. */
2464 /* Inline the first character, for speed. */
2465 if (UCHARAT(s) != nextchr &&
2466 UCHARAT(s) != ((OP(scan) == EXACTF)
2467 ? PL_fold : PL_fold_locale)[nextchr])
2469 if (PL_regeol - locinput < ln)
2471 if (ln > 1 && (OP(scan) == EXACTF
2472 ? ibcmp(s, locinput, ln)
2473 : ibcmp_locale(s, locinput, ln)))
2476 nextchr = UCHARAT(locinput);
2480 STRLEN inclasslen = PL_regeol - locinput;
2482 if (!reginclass(scan, (U8*)locinput, &inclasslen, do_utf8))
2484 if (locinput >= PL_regeol)
2486 locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
2487 nextchr = UCHARAT(locinput);
2492 nextchr = UCHARAT(locinput);
2493 if (!REGINCLASS(scan, (U8*)locinput))
2495 if (!nextchr && locinput >= PL_regeol)
2497 nextchr = UCHARAT(++locinput);
2501 /* If we might have the case of the German sharp s
2502 * in a casefolding Unicode character class. */
2504 if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
2505 locinput += SHARP_S_SKIP;
2506 nextchr = UCHARAT(locinput);
2512 PL_reg_flags |= RF_tainted;
2518 LOAD_UTF8_CHARCLASS(alnum,"a");
2519 if (!(OP(scan) == ALNUM
2520 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
2521 : isALNUM_LC_utf8((U8*)locinput)))
2525 locinput += PL_utf8skip[nextchr];
2526 nextchr = UCHARAT(locinput);
2529 if (!(OP(scan) == ALNUM
2530 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
2532 nextchr = UCHARAT(++locinput);
2535 PL_reg_flags |= RF_tainted;
2538 if (!nextchr && locinput >= PL_regeol)
2541 LOAD_UTF8_CHARCLASS(alnum,"a");
2542 if (OP(scan) == NALNUM
2543 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
2544 : isALNUM_LC_utf8((U8*)locinput))
2548 locinput += PL_utf8skip[nextchr];
2549 nextchr = UCHARAT(locinput);
2552 if (OP(scan) == NALNUM
2553 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
2555 nextchr = UCHARAT(++locinput);
2559 PL_reg_flags |= RF_tainted;
2563 /* was last char in word? */
2565 if (locinput == PL_bostr)
2568 U8 *r = reghop3((U8*)locinput, -1, (U8*)PL_reg_starttry);
2570 ln = locinput > (char*)r ?
2571 utf8n_to_uvchr(r, locinput - (char*)r, 0, 0) :
2572 utf8n_to_uvchr(locinput, UTF8SKIP(locinput), 0, 0);
2574 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2575 ln = isALNUM_uni(ln);
2576 LOAD_UTF8_CHARCLASS(alnum,"a");
2577 n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
2580 ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
2581 n = isALNUM_LC_utf8((U8*)locinput);
2585 ln = (locinput != PL_bostr) ?
2586 UCHARAT(locinput - 1) : '\n';
2587 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2589 n = isALNUM(nextchr);
2592 ln = isALNUM_LC(ln);
2593 n = isALNUM_LC(nextchr);
2596 if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2597 OP(scan) == BOUNDL))
2601 PL_reg_flags |= RF_tainted;
2607 if (UTF8_IS_CONTINUED(nextchr)) {
2608 LOAD_UTF8_CHARCLASS(space," ");
2609 if (!(OP(scan) == SPACE
2610 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
2611 : isSPACE_LC_utf8((U8*)locinput)))
2615 locinput += PL_utf8skip[nextchr];
2616 nextchr = UCHARAT(locinput);
2619 if (!(OP(scan) == SPACE
2620 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2622 nextchr = UCHARAT(++locinput);
2625 if (!(OP(scan) == SPACE
2626 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2628 nextchr = UCHARAT(++locinput);
2632 PL_reg_flags |= RF_tainted;
2635 if (!nextchr && locinput >= PL_regeol)
2638 LOAD_UTF8_CHARCLASS(space," ");
2639 if (OP(scan) == NSPACE
2640 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
2641 : isSPACE_LC_utf8((U8*)locinput))
2645 locinput += PL_utf8skip[nextchr];
2646 nextchr = UCHARAT(locinput);
2649 if (OP(scan) == NSPACE
2650 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
2652 nextchr = UCHARAT(++locinput);
2655 PL_reg_flags |= RF_tainted;
2661 LOAD_UTF8_CHARCLASS(digit,"0");
2662 if (!(OP(scan) == DIGIT
2663 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
2664 : isDIGIT_LC_utf8((U8*)locinput)))
2668 locinput += PL_utf8skip[nextchr];
2669 nextchr = UCHARAT(locinput);
2672 if (!(OP(scan) == DIGIT
2673 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
2675 nextchr = UCHARAT(++locinput);
2678 PL_reg_flags |= RF_tainted;
2681 if (!nextchr && locinput >= PL_regeol)
2684 LOAD_UTF8_CHARCLASS(digit,"0");
2685 if (OP(scan) == NDIGIT
2686 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
2687 : isDIGIT_LC_utf8((U8*)locinput))
2691 locinput += PL_utf8skip[nextchr];
2692 nextchr = UCHARAT(locinput);
2695 if (OP(scan) == NDIGIT
2696 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
2698 nextchr = UCHARAT(++locinput);
2701 if (locinput >= PL_regeol)
2704 LOAD_UTF8_CHARCLASS(mark,"~");
2705 if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
2707 locinput += PL_utf8skip[nextchr];
2708 while (locinput < PL_regeol &&
2709 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
2710 locinput += UTF8SKIP(locinput);
2711 if (locinput > PL_regeol)
2716 nextchr = UCHARAT(locinput);
2719 PL_reg_flags |= RF_tainted;
2723 n = ARG(scan); /* which paren pair */
2724 ln = PL_regstartp[n];
2725 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
2726 if (*PL_reglastparen < n || ln == -1)
2727 sayNO; /* Do not match unless seen CLOSEn. */
2728 if (ln == PL_regendp[n])
2732 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
2734 char *e = PL_bostr + PL_regendp[n];
2736 * Note that we can't do the "other character" lookup trick as
2737 * in the 8-bit case (no pun intended) because in Unicode we
2738 * have to map both upper and title case to lower case.
2740 if (OP(scan) == REFF) {
2741 STRLEN ulen1, ulen2;
2742 U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
2743 U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
2747 toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
2748 toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
2749 if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
2756 nextchr = UCHARAT(locinput);
2760 /* Inline the first character, for speed. */
2761 if (UCHARAT(s) != nextchr &&
2763 (UCHARAT(s) != ((OP(scan) == REFF
2764 ? PL_fold : PL_fold_locale)[nextchr]))))
2766 ln = PL_regendp[n] - ln;
2767 if (locinput + ln > PL_regeol)
2769 if (ln > 1 && (OP(scan) == REF
2770 ? memNE(s, locinput, ln)
2772 ? ibcmp(s, locinput, ln)
2773 : ibcmp_locale(s, locinput, ln))))
2776 nextchr = UCHARAT(locinput);
2787 OP_4tree *oop = PL_op;
2788 COP *ocurcop = PL_curcop;
2789 SV **ocurpad = PL_curpad;
2793 PL_op = (OP_4tree*)PL_regdata->data[n];
2794 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
2795 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
2796 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
2800 CALLRUNOPS(aTHX); /* Scalar context. */
2803 ret = Nullsv; /* protect against empty (?{}) blocks. */
2811 PL_curpad = ocurpad;
2812 PL_curcop = ocurcop;
2814 if (logical == 2) { /* Postponed subexpression. */
2816 MAGIC *mg = Null(MAGIC*);
2818 CHECKPOINT cp, lastcp;
2820 if(SvROK(ret) || SvRMAGICAL(ret)) {
2821 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2824 mg = mg_find(sv, PERL_MAGIC_qr);
2827 re = (regexp *)mg->mg_obj;
2828 (void)ReREFCNT_inc(re);
2832 char *t = SvPV(ret, len);
2834 char *oprecomp = PL_regprecomp;
2835 I32 osize = PL_regsize;
2836 I32 onpar = PL_regnpar;
2839 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
2841 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
2842 sv_magic(ret,(SV*)ReREFCNT_inc(re),
2844 PL_regprecomp = oprecomp;
2849 PerlIO_printf(Perl_debug_log,
2850 "Entering embedded `%s%.60s%s%s'\n",
2854 (strlen(re->precomp) > 60 ? "..." : ""))
2857 state.prev = PL_reg_call_cc;
2858 state.cc = PL_regcc;
2859 state.re = PL_reg_re;
2863 cp = regcppush(0); /* Save *all* the positions. */
2866 state.ss = PL_savestack_ix;
2867 *PL_reglastparen = 0;
2868 *PL_reglastcloseparen = 0;
2869 PL_reg_call_cc = &state;
2870 PL_reginput = locinput;
2872 /* XXXX This is too dramatic a measure... */
2875 if (regmatch(re->program + 1)) {
2876 /* Even though we succeeded, we need to restore
2877 global variables, since we may be wrapped inside
2878 SUSPEND, thus the match may be not finished yet. */
2880 /* XXXX Do this only if SUSPENDed? */
2881 PL_reg_call_cc = state.prev;
2882 PL_regcc = state.cc;
2883 PL_reg_re = state.re;
2884 cache_re(PL_reg_re);
2886 /* XXXX This is too dramatic a measure... */
2889 /* These are needed even if not SUSPEND. */
2895 REGCP_UNWIND(lastcp);
2897 PL_reg_call_cc = state.prev;
2898 PL_regcc = state.cc;
2899 PL_reg_re = state.re;
2900 cache_re(PL_reg_re);
2902 /* XXXX This is too dramatic a measure... */
2912 sv_setsv(save_scalar(PL_replgv), ret);
2916 n = ARG(scan); /* which paren pair */
2917 PL_reg_start_tmp[n] = locinput;
2922 n = ARG(scan); /* which paren pair */
2923 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2924 PL_regendp[n] = locinput - PL_bostr;
2925 if (n > *PL_reglastparen)
2926 *PL_reglastparen = n;
2927 *PL_reglastcloseparen = n;
2930 n = ARG(scan); /* which paren pair */
2931 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
2934 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
2936 next = NEXTOPER(NEXTOPER(scan));
2938 next = scan + ARG(scan);
2939 if (OP(next) == IFTHEN) /* Fake one. */
2940 next = NEXTOPER(NEXTOPER(next));
2944 logical = scan->flags;
2946 /*******************************************************************
2947 PL_regcc contains infoblock about the innermost (...)* loop, and
2948 a pointer to the next outer infoblock.
2950 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2952 1) After matching X, regnode for CURLYX is processed;
2954 2) This regnode creates infoblock on the stack, and calls
2955 regmatch() recursively with the starting point at WHILEM node;
2957 3) Each hit of WHILEM node tries to match A and Z (in the order
2958 depending on the current iteration, min/max of {min,max} and
2959 greediness). The information about where are nodes for "A"
2960 and "Z" is read from the infoblock, as is info on how many times "A"
2961 was already matched, and greediness.
2963 4) After A matches, the same WHILEM node is hit again.
2965 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2966 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2967 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2968 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2969 of the external loop.
2971 Currently present infoblocks form a tree with a stem formed by PL_curcc
2972 and whatever it mentions via ->next, and additional attached trees
2973 corresponding to temporarily unset infoblocks as in "5" above.
2975 In the following picture infoblocks for outer loop of
2976 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2977 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2978 infoblocks are drawn below the "reset" infoblock.
2980 In fact in the picture below we do not show failed matches for Z and T
2981 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2982 more obvious *why* one needs to *temporary* unset infoblocks.]
2984 Matched REx position InfoBlocks Comment
2988 Y A)*?Z)*?T x <- O <- I
2989 YA )*?Z)*?T x <- O <- I
2990 YA A)*?Z)*?T x <- O <- I
2991 YAA )*?Z)*?T x <- O <- I
2992 YAA Z)*?T x <- O # Temporary unset I
2995 YAAZ Y(A)*?Z)*?T x <- O
2998 YAAZY (A)*?Z)*?T x <- O
3001 YAAZY A)*?Z)*?T x <- O <- I
3004 YAAZYA )*?Z)*?T x <- O <- I
3007 YAAZYA Z)*?T x <- O # Temporary unset I
3013 YAAZYAZ T x # Temporary unset O
3020 *******************************************************************/
3023 CHECKPOINT cp = PL_savestack_ix;
3024 /* No need to save/restore up to this paren */
3025 I32 parenfloor = scan->flags;
3027 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
3029 cc.oldcc = PL_regcc;
3031 /* XXXX Probably it is better to teach regpush to support
3032 parenfloor > PL_regsize... */
3033 if (parenfloor > *PL_reglastparen)
3034 parenfloor = *PL_reglastparen; /* Pessimization... */
3035 cc.parenfloor = parenfloor;
3037 cc.min = ARG1(scan);
3038 cc.max = ARG2(scan);
3039 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
3043 PL_reginput = locinput;
3044 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
3046 PL_regcc = cc.oldcc;
3052 * This is really hard to understand, because after we match
3053 * what we're trying to match, we must make sure the rest of
3054 * the REx is going to match for sure, and to do that we have
3055 * to go back UP the parse tree by recursing ever deeper. And
3056 * if it fails, we have to reset our parent's current state
3057 * that we can try again after backing off.
3060 CHECKPOINT cp, lastcp;
3061 CURCUR* cc = PL_regcc;
3062 char *lastloc = cc->lastloc; /* Detection of 0-len. */
3064 n = cc->cur + 1; /* how many we know we matched */
3065 PL_reginput = locinput;
3068 PerlIO_printf(Perl_debug_log,
3069 "%*s %ld out of %ld..%ld cc=%lx\n",
3070 REPORT_CODE_OFF+PL_regindent*2, "",
3071 (long)n, (long)cc->min,
3072 (long)cc->max, (long)cc)
3075 /* If degenerate scan matches "", assume scan done. */
3077 if (locinput == cc->lastloc && n >= cc->min) {
3078 PL_regcc = cc->oldcc;
3082 PerlIO_printf(Perl_debug_log,
3083 "%*s empty match detected, try continuation...\n",
3084 REPORT_CODE_OFF+PL_regindent*2, "")
3086 if (regmatch(cc->next))
3094 /* First just match a string of min scans. */
3098 cc->lastloc = locinput;
3099 if (regmatch(cc->scan))
3102 cc->lastloc = lastloc;
3107 /* Check whether we already were at this position.
3108 Postpone detection until we know the match is not
3109 *that* much linear. */
3110 if (!PL_reg_maxiter) {
3111 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
3112 PL_reg_leftiter = PL_reg_maxiter;
3114 if (PL_reg_leftiter-- == 0) {
3115 I32 size = (PL_reg_maxiter + 7)/8;
3116 if (PL_reg_poscache) {
3117 if (PL_reg_poscache_size < size) {
3118 Renew(PL_reg_poscache, size, char);
3119 PL_reg_poscache_size = size;
3121 Zero(PL_reg_poscache, size, char);
3124 PL_reg_poscache_size = size;
3125 Newz(29, PL_reg_poscache, size, char);
3128 PerlIO_printf(Perl_debug_log,
3129 "%sDetected a super-linear match, switching on caching%s...\n",
3130 PL_colors[4], PL_colors[5])
3133 if (PL_reg_leftiter < 0) {
3134 I32 o = locinput - PL_bostr, b;
3136 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
3139 if (PL_reg_poscache[o] & (1<<b)) {
3141 PerlIO_printf(Perl_debug_log,
3142 "%*s already tried at this position...\n",
3143 REPORT_CODE_OFF+PL_regindent*2, "")
3147 PL_reg_poscache[o] |= (1<<b);
3151 /* Prefer next over scan for minimal matching. */
3154 PL_regcc = cc->oldcc;
3157 cp = regcppush(cc->parenfloor);
3159 if (regmatch(cc->next)) {
3161 sayYES; /* All done. */
3163 REGCP_UNWIND(lastcp);
3169 if (n >= cc->max) { /* Maximum greed exceeded? */
3170 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3171 && !(PL_reg_flags & RF_warned)) {
3172 PL_reg_flags |= RF_warned;
3173 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
3174 "Complex regular subexpression recursion",
3181 PerlIO_printf(Perl_debug_log,
3182 "%*s trying longer...\n",
3183 REPORT_CODE_OFF+PL_regindent*2, "")
3185 /* Try scanning more and see if it helps. */
3186 PL_reginput = locinput;
3188 cc->lastloc = locinput;
3189 cp = regcppush(cc->parenfloor);
3191 if (regmatch(cc->scan)) {
3195 REGCP_UNWIND(lastcp);
3198 cc->lastloc = lastloc;
3202 /* Prefer scan over next for maximal matching. */
3204 if (n < cc->max) { /* More greed allowed? */
3205 cp = regcppush(cc->parenfloor);
3207 cc->lastloc = locinput;
3209 if (regmatch(cc->scan)) {
3213 REGCP_UNWIND(lastcp);
3214 regcppop(); /* Restore some previous $<digit>s? */
3215 PL_reginput = locinput;
3217 PerlIO_printf(Perl_debug_log,
3218 "%*s failed, try continuation...\n",
3219 REPORT_CODE_OFF+PL_regindent*2, "")
3222 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3223 && !(PL_reg_flags & RF_warned)) {
3224 PL_reg_flags |= RF_warned;
3225 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
3226 "Complex regular subexpression recursion",
3230 /* Failed deeper matches of scan, so see if this one works. */
3231 PL_regcc = cc->oldcc;
3234 if (regmatch(cc->next))
3240 cc->lastloc = lastloc;
3245 next = scan + ARG(scan);
3248 inner = NEXTOPER(NEXTOPER(scan));
3251 inner = NEXTOPER(scan);
3255 if (OP(next) != c1) /* No choice. */
3256 next = inner; /* Avoid recursion. */
3258 I32 lastparen = *PL_reglastparen;
3260 re_unwind_branch_t *uw;
3262 /* Put unwinding data on stack */
3263 unwind1 = SSNEWt(1,re_unwind_branch_t);
3264 uw = SSPTRt(unwind1,re_unwind_branch_t);
3267 uw->type = ((c1 == BRANCH)
3269 : RE_UNWIND_BRANCHJ);
3270 uw->lastparen = lastparen;
3272 uw->locinput = locinput;
3273 uw->nextchr = nextchr;
3275 uw->regindent = ++PL_regindent;
3278 REGCP_SET(uw->lastcp);
3280 /* Now go into the first branch */
3293 /* We suppose that the next guy does not need
3294 backtracking: in particular, it is of constant length,
3295 and has no parenths to influence future backrefs. */
3296 ln = ARG1(scan); /* min to match */
3297 n = ARG2(scan); /* max to match */
3298 paren = scan->flags;
3300 if (paren > PL_regsize)
3302 if (paren > *PL_reglastparen)
3303 *PL_reglastparen = paren;
3305 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
3307 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3308 PL_reginput = locinput;
3311 if (ln && regrepeat_hard(scan, ln, &l) < ln)
3313 /* if we matched something zero-length we don't need to
3314 backtrack - capturing parens are already defined, so
3315 the caveat in the maximal case doesn't apply
3317 XXXX if ln == 0, we can redo this check first time
3318 through the following loop
3321 n = ln; /* don't backtrack */
3322 locinput = PL_reginput;
3323 if (HAS_TEXT(next) || JUMPABLE(next)) {
3324 regnode *text_node = next;
3326 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
3328 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
3330 if (PL_regkind[(U8)OP(text_node)] == REF) {
3332 n = ARG(text_node); /* which paren pair */
3333 ln = PL_regstartp[n];
3334 /* assume yes if we haven't seen CLOSEn */
3336 *PL_reglastparen < n ||
3343 c1 = *(PL_bostr + ln);
3345 else { c1 = (U8)*STRING(text_node); }
3346 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
3348 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
3349 c2 = PL_fold_locale[c1];
3358 /* This may be improved if l == 0. */
3359 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
3360 /* If it could work, try it. */
3362 UCHARAT(PL_reginput) == c1 ||
3363 UCHARAT(PL_reginput) == c2)
3367 PL_regstartp[paren] =
3368 HOPc(PL_reginput, -l) - PL_bostr;
3369 PL_regendp[paren] = PL_reginput - PL_bostr;
3372 PL_regendp[paren] = -1;
3376 REGCP_UNWIND(lastcp);
3378 /* Couldn't or didn't -- move forward. */
3379 PL_reginput = locinput;
3380 if (regrepeat_hard(scan, 1, &l)) {
3382 locinput = PL_reginput;
3389 n = regrepeat_hard(scan, n, &l);
3390 /* if we matched something zero-length we don't need to
3391 backtrack, unless the minimum count is zero and we
3392 are capturing the result - in that case the capture
3393 being defined or not may affect later execution
3395 if (n != 0 && l == 0 && !(paren && ln == 0))
3396 ln = n; /* don't backtrack */
3397 locinput = PL_reginput;
3399 PerlIO_printf(Perl_debug_log,
3400 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
3401 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
3405 if (HAS_TEXT(next) || JUMPABLE(next)) {
3406 regnode *text_node = next;
3408 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
3410 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
3412 if (PL_regkind[(U8)OP(text_node)] == REF) {
3414 n = ARG(text_node); /* which paren pair */
3415 ln = PL_regstartp[n];
3416 /* assume yes if we haven't seen CLOSEn */
3418 *PL_reglastparen < n ||
3425 c1 = *(PL_bostr + ln);
3427 else { c1 = (U8)*STRING(text_node); }
3429 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
3431 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
3432 c2 = PL_fold_locale[c1];
3443 /* If it could work, try it. */
3445 UCHARAT(PL_reginput) == c1 ||
3446 UCHARAT(PL_reginput) == c2)
3449 PerlIO_printf(Perl_debug_log,
3450 "%*s trying tail with n=%"IVdf"...\n",
3451 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
3455 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3456 PL_regendp[paren] = PL_reginput - PL_bostr;
3459 PL_regendp[paren] = -1;
3463 REGCP_UNWIND(lastcp);
3465 /* Couldn't or didn't -- back up. */
3467 locinput = HOPc(locinput, -l);
3468 PL_reginput = locinput;
3475 paren = scan->flags; /* Which paren to set */
3476 if (paren > PL_regsize)
3478 if (paren > *PL_reglastparen)
3479 *PL_reglastparen = paren;
3480 ln = ARG1(scan); /* min to match */
3481 n = ARG2(scan); /* max to match */
3482 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
3486 ln = ARG1(scan); /* min to match */
3487 n = ARG2(scan); /* max to match */
3488 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
3493 scan = NEXTOPER(scan);
3499 scan = NEXTOPER(scan);
3503 * Lookahead to avoid useless match attempts
3504 * when we know what character comes next.
3508 * Used to only do .*x and .*?x, but now it allows
3509 * for )'s, ('s and (?{ ... })'s to be in the way
3510 * of the quantifier and the EXACT-like node. -- japhy
3513 if (HAS_TEXT(next) || JUMPABLE(next)) {
3515 regnode *text_node = next;
3517 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
3519 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
3521 if (PL_regkind[(U8)OP(text_node)] == REF) {
3523 n = ARG(text_node); /* which paren pair */
3524 ln = PL_regstartp[n];
3525 /* assume yes if we haven't seen CLOSEn */
3527 *PL_reglastparen < n ||
3532 goto assume_ok_easy;
3534 s = (U8*)PL_bostr + ln;
3536 else { s = (U8*)STRING(text_node); }
3540 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
3542 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
3543 c2 = PL_fold_locale[c1];
3546 if (OP(text_node) == EXACTF || OP(text_node) == REFF) {
3547 STRLEN ulen1, ulen2;
3548 U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
3549 U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
3551 to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
3552 to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
3554 c1 = utf8_to_uvuni(tmpbuf1, 0);
3555 c2 = utf8_to_uvuni(tmpbuf2, 0);
3558 c2 = c1 = utf8_to_uvchr(s, NULL);
3566 PL_reginput = locinput;
3570 if (ln && regrepeat(scan, ln) < ln)
3572 locinput = PL_reginput;
3575 char *e; /* Should not check after this */
3576 char *old = locinput;
3578 if (n == REG_INFTY) {
3581 while (UTF8_IS_CONTINUATION(*(U8*)e))
3587 m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3591 e = locinput + n - ln;
3597 /* Find place 'next' could work */
3600 while (locinput <= e &&
3601 UCHARAT(locinput) != c1)
3604 while (locinput <= e
3605 && UCHARAT(locinput) != c1
3606 && UCHARAT(locinput) != c2)
3609 count = locinput - old;
3616 utf8_to_uvchr((U8*)locinput, &len) != c1;
3621 for (count = 0; locinput <= e; count++) {
3622 UV c = utf8_to_uvchr((U8*)locinput, &len);
3623 if (c == c1 || c == c2)
3631 /* PL_reginput == old now */
3632 if (locinput != old) {
3633 ln = 1; /* Did some */
3634 if (regrepeat(scan, count) < count)
3637 /* PL_reginput == locinput now */
3638 TRYPAREN(paren, ln, locinput);
3639 PL_reginput = locinput; /* Could be reset... */
3640 REGCP_UNWIND(lastcp);
3641 /* Couldn't or didn't -- move forward. */
3644 locinput += UTF8SKIP(locinput);
3650 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
3654 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
3656 c = UCHARAT(PL_reginput);
3657 /* If it could work, try it. */
3658 if (c == c1 || c == c2)
3660 TRYPAREN(paren, n, PL_reginput);
3661 REGCP_UNWIND(lastcp);
3664 /* If it could work, try it. */
3665 else if (c1 == -1000)
3667 TRYPAREN(paren, n, PL_reginput);
3668 REGCP_UNWIND(lastcp);
3670 /* Couldn't or didn't -- move forward. */
3671 PL_reginput = locinput;
3672 if (regrepeat(scan, 1)) {
3674 locinput = PL_reginput;
3682 n = regrepeat(scan, n);
3683 locinput = PL_reginput;
3684 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
3685 ((!PL_multiline && OP(next) != MEOL) ||
3686 OP(next) == SEOL || OP(next) == EOS))
3688 ln = n; /* why back off? */
3689 /* ...because $ and \Z can match before *and* after
3690 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3691 We should back off by one in this case. */
3692 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3701 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
3703 c = UCHARAT(PL_reginput);
3705 /* If it could work, try it. */
3706 if (c1 == -1000 || c == c1 || c == c2)
3708 TRYPAREN(paren, n, PL_reginput);
3709 REGCP_UNWIND(lastcp);
3711 /* Couldn't or didn't -- back up. */
3713 PL_reginput = locinput = HOPc(locinput, -1);
3721 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
3723 c = UCHARAT(PL_reginput);
3725 /* If it could work, try it. */
3726 if (c1 == -1000 || c == c1 || c == c2)
3728 TRYPAREN(paren, n, PL_reginput);
3729 REGCP_UNWIND(lastcp);
3731 /* Couldn't or didn't -- back up. */
3733 PL_reginput = locinput = HOPc(locinput, -1);
3740 if (PL_reg_call_cc) {
3741 re_cc_state *cur_call_cc = PL_reg_call_cc;
3742 CURCUR *cctmp = PL_regcc;
3743 regexp *re = PL_reg_re;
3744 CHECKPOINT cp, lastcp;
3746 cp = regcppush(0); /* Save *all* the positions. */
3748 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3750 PL_reginput = locinput; /* Make position available to
3752 cache_re(PL_reg_call_cc->re);
3753 PL_regcc = PL_reg_call_cc->cc;
3754 PL_reg_call_cc = PL_reg_call_cc->prev;
3755 if (regmatch(cur_call_cc->node)) {
3756 PL_reg_call_cc = cur_call_cc;
3760 REGCP_UNWIND(lastcp);
3762 PL_reg_call_cc = cur_call_cc;
3768 PerlIO_printf(Perl_debug_log,
3769 "%*s continuation failed...\n",
3770 REPORT_CODE_OFF+PL_regindent*2, "")
3774 if (locinput < PL_regtill) {
3775 DEBUG_r(PerlIO_printf(Perl_debug_log,
3776 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3778 (long)(locinput - PL_reg_starttry),
3779 (long)(PL_regtill - PL_reg_starttry),
3781 sayNO_FINAL; /* Cannot match: too short. */
3783 PL_reginput = locinput; /* put where regtry can find it */
3784 sayYES_FINAL; /* Success! */
3786 PL_reginput = locinput; /* put where regtry can find it */
3787 sayYES_LOUD; /* Success! */
3790 PL_reginput = locinput;
3795 s = HOPBACKc(locinput, scan->flags);
3801 PL_reginput = locinput;
3806 s = HOPBACKc(locinput, scan->flags);
3812 PL_reginput = locinput;
3815 inner = NEXTOPER(NEXTOPER(scan));
3816 if (regmatch(inner) != n) {
3831 if (OP(scan) == SUSPEND) {
3832 locinput = PL_reginput;
3833 nextchr = UCHARAT(locinput);
3838 next = scan + ARG(scan);
3843 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
3844 PTR2UV(scan), OP(scan));
3845 Perl_croak(aTHX_ "regexp memory corruption");
3852 * We get here only if there's trouble -- normally "case END" is
3853 * the terminating point.
3855 Perl_croak(aTHX_ "corrupted regexp pointers");
3861 PerlIO_printf(Perl_debug_log,
3862 "%*s %scould match...%s\n",
3863 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3867 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3868 PL_colors[4],PL_colors[5]));
3874 #if 0 /* Breaks $^R */
3882 PerlIO_printf(Perl_debug_log,
3883 "%*s %sfailed...%s\n",
3884 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3890 re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
3893 case RE_UNWIND_BRANCH:
3894 case RE_UNWIND_BRANCHJ:
3896 re_unwind_branch_t *uwb = &(uw->branch);
3897 I32 lastparen = uwb->lastparen;
3899 REGCP_UNWIND(uwb->lastcp);
3900 for (n = *PL_reglastparen; n > lastparen; n--)
3902 *PL_reglastparen = n;
3903 scan = next = uwb->next;
3905 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
3906 ? BRANCH : BRANCHJ) ) { /* Failure */
3913 /* Have more choice yet. Reuse the same uwb. */
3915 if ((n = (uwb->type == RE_UNWIND_BRANCH
3916 ? NEXT_OFF(next) : ARG(next))))
3919 next = NULL; /* XXXX Needn't unwinding in this case... */
3921 next = NEXTOPER(scan);
3922 if (uwb->type == RE_UNWIND_BRANCHJ)
3923 next = NEXTOPER(next);
3924 locinput = uwb->locinput;
3925 nextchr = uwb->nextchr;
3927 PL_regindent = uwb->regindent;
3934 Perl_croak(aTHX_ "regexp unwind memory corruption");
3945 - regrepeat - repeatedly match something simple, report how many
3948 * [This routine now assumes that it will only match on things of length 1.
3949 * That was true before, but now we assume scan - reginput is the count,
3950 * rather than incrementing count on every character. [Er, except utf8.]]
3953 S_regrepeat(pTHX_ regnode *p, I32 max)
3955 register char *scan;
3957 register char *loceol = PL_regeol;
3958 register I32 hardcount = 0;
3959 register bool do_utf8 = PL_reg_match_utf8;
3962 if (max != REG_INFTY && max < loceol - scan)
3963 loceol = scan + max;
3968 while (scan < loceol && hardcount < max && *scan != '\n') {
3969 scan += UTF8SKIP(scan);
3973 while (scan < loceol && *scan != '\n')
3980 while (scan < loceol && hardcount < max) {
3981 scan += UTF8SKIP(scan);
3991 case EXACT: /* length of string is 1 */
3993 while (scan < loceol && UCHARAT(scan) == c)
3996 case EXACTF: /* length of string is 1 */
3998 while (scan < loceol &&
3999 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
4002 case EXACTFL: /* length of string is 1 */
4003 PL_reg_flags |= RF_tainted;
4005 while (scan < loceol &&
4006 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
4012 while (hardcount < max && scan < loceol &&
4013 reginclass(p, (U8*)scan, 0, do_utf8)) {
4014 scan += UTF8SKIP(scan);
4018 while (scan < loceol && REGINCLASS(p, (U8*)scan))
4025 LOAD_UTF8_CHARCLASS(alnum,"a");
4026 while (hardcount < max && scan < loceol &&
4027 swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
4028 scan += UTF8SKIP(scan);
4032 while (scan < loceol && isALNUM(*scan))
4037 PL_reg_flags |= RF_tainted;
4040 while (hardcount < max && scan < loceol &&
4041 isALNUM_LC_utf8((U8*)scan)) {
4042 scan += UTF8SKIP(scan);
4046 while (scan < loceol && isALNUM_LC(*scan))
4053 LOAD_UTF8_CHARCLASS(alnum,"a");
4054 while (hardcount < max && scan < loceol &&
4055 !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
4056 scan += UTF8SKIP(scan);
4060 while (scan < loceol && !isALNUM(*scan))
4065 PL_reg_flags |= RF_tainted;
4068 while (hardcount < max && scan < loceol &&
4069 !isALNUM_LC_utf8((U8*)scan)) {
4070 scan += UTF8SKIP(scan);
4074 while (scan < loceol && !isALNUM_LC(*scan))
4081 LOAD_UTF8_CHARCLASS(space," ");
4082 while (hardcount < max && scan < loceol &&
4084 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
4085 scan += UTF8SKIP(scan);
4089 while (scan < loceol && isSPACE(*scan))
4094 PL_reg_flags |= RF_tainted;
4097 while (hardcount < max && scan < loceol &&
4098 (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4099 scan += UTF8SKIP(scan);
4103 while (scan < loceol && isSPACE_LC(*scan))
4110 LOAD_UTF8_CHARCLASS(space," ");
4111 while (hardcount < max && scan < loceol &&
4113 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
4114 scan += UTF8SKIP(scan);
4118 while (scan < loceol && !isSPACE(*scan))
4123 PL_reg_flags |= RF_tainted;
4126 while (hardcount < max && scan < loceol &&
4127 !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4128 scan += UTF8SKIP(scan);
4132 while (scan < loceol && !isSPACE_LC(*scan))
4139 LOAD_UTF8_CHARCLASS(digit,"0");
4140 while (hardcount < max && scan < loceol &&
4141 swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
4142 scan += UTF8SKIP(scan);
4146 while (scan < loceol && isDIGIT(*scan))
4153 LOAD_UTF8_CHARCLASS(digit,"0");
4154 while (hardcount < max && scan < loceol &&
4155 !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
4156 scan += UTF8SKIP(scan);
4160 while (scan < loceol && !isDIGIT(*scan))
4164 default: /* Called on something of 0 width. */
4165 break; /* So match right here or not at all. */
4171 c = scan - PL_reginput;
4176 SV *prop = sv_newmortal();
4179 PerlIO_printf(Perl_debug_log,
4180 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
4181 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
4188 - regrepeat_hard - repeatedly match something, report total lenth and length
4190 * The repeater is supposed to have constant length.
4194 S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
4196 register char *scan = Nullch;
4197 register char *start;
4198 register char *loceol = PL_regeol;
4200 I32 count = 0, res = 1;
4205 start = PL_reginput;
4206 if (PL_reg_match_utf8) {
4207 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
4210 while (start < PL_reginput) {
4212 start += UTF8SKIP(start);
4223 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
4225 *lp = l = PL_reginput - start;
4226 if (max != REG_INFTY && l*max < loceol - scan)
4227 loceol = scan + l*max;
4240 - regclass_swash - prepare the utf8 swash
4244 Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** listsvp, SV **altsvp)
4250 if (PL_regdata && PL_regdata->count) {
4253 if (PL_regdata->what[n] == 's') {
4254 SV *rv = (SV*)PL_regdata->data[n];
4255 AV *av = (AV*)SvRV((SV*)rv);
4258 /* See the end of regcomp.c:S_reglass() for
4259 * documentation of these array elements. */
4261 si = *av_fetch(av, 0, FALSE);
4262 a = av_fetch(av, 1, FALSE);
4263 b = av_fetch(av, 2, FALSE);
4267 else if (si && doinit) {
4268 sw = swash_init("utf8", "", si, 1, 0);
4269 (void)av_store(av, 1, sw);
4285 - reginclass - determine if a character falls into a character class
4287 The n is the ANYOF regnode, the p is the target string, lenp
4288 is pointer to the maximum length of how far to go in the p
4289 (if the lenp is zero, UTF8SKIP(p) is used),
4290 do_utf8 tells whether the target string is in UTF-8.
4295 S_reginclass(pTHX_ register regnode *n, register U8* p, STRLEN* lenp, register bool do_utf8)
4297 char flags = ANYOF_FLAGS(n);
4303 c = do_utf8 ? utf8_to_uvchr(p, &len) : *p;
4305 plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
4306 if (do_utf8 || (flags & ANYOF_UNICODE)) {
4309 if (do_utf8 && !ANYOF_RUNTIME(n)) {
4310 if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
4313 if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
4317 SV *sw = regclass_swash(n, TRUE, 0, (SV**)&av);
4320 if (swash_fetch(sw, p, do_utf8))
4322 else if (flags & ANYOF_FOLD) {
4323 if (!match && lenp && av) {
4326 for (i = 0; i <= av_len(av); i++) {
4327 SV* sv = *av_fetch(av, i, FALSE);
4329 char *s = SvPV(sv, len);
4331 if (len <= plen && memEQ(s, (char*)p, len)) {
4339 U8 tmpbuf[UTF8_MAXLEN_FOLD+1];
4342 to_utf8_fold(p, tmpbuf, &tmplen);
4343 if (swash_fetch(sw, tmpbuf, do_utf8))
4349 if (match && lenp && *lenp == 0)
4350 *lenp = UNISKIP(NATIVE_TO_UNI(c));
4352 if (!match && c < 256) {
4353 if (ANYOF_BITMAP_TEST(n, c))
4355 else if (flags & ANYOF_FOLD) {
4358 if (flags & ANYOF_LOCALE) {
4359 PL_reg_flags |= RF_tainted;
4360 f = PL_fold_locale[c];
4364 if (f != c && ANYOF_BITMAP_TEST(n, f))
4368 if (!match && (flags & ANYOF_CLASS)) {
4369 PL_reg_flags |= RF_tainted;
4371 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
4372 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
4373 (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
4374 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
4375 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
4376 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
4377 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
4378 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
4379 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
4380 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
4381 (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
4382 (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
4383 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
4384 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
4385 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
4386 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
4387 (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
4388 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
4389 (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
4390 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
4391 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
4392 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
4393 (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
4394 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
4395 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
4396 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
4397 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
4398 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
4399 (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
4400 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
4401 ) /* How's that for a conditional? */
4408 return (flags & ANYOF_INVERT) ? !match : match;
4412 S_reghop(pTHX_ U8 *s, I32 off)
4414 return S_reghop3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4418 S_reghop3(pTHX_ U8 *s, I32 off, U8* lim)
4421 while (off-- && s < lim) {
4422 /* XXX could check well-formedness here */
4430 if (UTF8_IS_CONTINUED(*s)) {
4431 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
4434 /* XXX could check well-formedness here */
4442 S_reghopmaybe(pTHX_ U8 *s, I32 off)
4444 return S_reghopmaybe3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4448 S_reghopmaybe3(pTHX_ U8* s, I32 off, U8* lim)
4451 while (off-- && s < lim) {
4452 /* XXX could check well-formedness here */
4462 if (UTF8_IS_CONTINUED(*s)) {
4463 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
4466 /* XXX could check well-formedness here */
4478 restore_pos(pTHX_ void *arg)
4480 if (PL_reg_eval_set) {
4481 if (PL_reg_oldsaved) {
4482 PL_reg_re->subbeg = PL_reg_oldsaved;
4483 PL_reg_re->sublen = PL_reg_oldsavedlen;
4484 RX_MATCH_COPIED_on(PL_reg_re);
4486 PL_reg_magic->mg_len = PL_reg_oldpos;
4487 PL_reg_eval_set = 0;
4488 PL_curpm = PL_reg_oldcurpm;
4493 S_to_utf8_substr(pTHX_ register regexp *prog)
4496 if (prog->float_substr && !prog->float_utf8) {
4497 prog->float_utf8 = sv = NEWSV(117, 0);
4498 SvSetMagicSV(sv, prog->float_substr);
4499 sv_utf8_upgrade(sv);
4500 if (SvTAIL(prog->float_substr))
4502 if (prog->float_substr == prog->check_substr)
4503 prog->check_utf8 = sv;
4505 if (prog->anchored_substr && !prog->anchored_utf8) {
4506 prog->anchored_utf8 = sv = NEWSV(118, 0);
4507 SvSetMagicSV(sv, prog->anchored_substr);
4508 sv_utf8_upgrade(sv);
4509 if (SvTAIL(prog->anchored_substr))
4511 if (prog->anchored_substr == prog->check_substr)
4512 prog->check_utf8 = sv;
4517 S_to_byte_substr(pTHX_ register regexp *prog)
4520 if (prog->float_utf8 && !prog->float_substr) {
4521 prog->float_substr = sv = NEWSV(117, 0);
4522 SvSetMagicSV(sv, prog->float_utf8);
4523 if (sv_utf8_downgrade(sv, TRUE)) {
4524 if (SvTAIL(prog->float_utf8))
4528 prog->float_substr = sv = &PL_sv_undef;
4530 if (prog->float_utf8 == prog->check_utf8)
4531 prog->check_substr = sv;
4533 if (prog->anchored_utf8 && !prog->anchored_substr) {
4534 prog->anchored_substr = sv = NEWSV(118, 0);
4535 SvSetMagicSV(sv, prog->anchored_utf8);
4536 if (sv_utf8_downgrade(sv, TRUE)) {
4537 if (SvTAIL(prog->anchored_utf8))
4541 prog->anchored_substr = sv = &PL_sv_undef;
4543 if (prog->anchored_utf8 == prog->check_utf8)
4544 prog->check_substr = sv;