5 * "One Ring to rule them all, One Ring to find them..."
8 /* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
12 /* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
17 /* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
22 #ifdef PERL_EXT_RE_BUILD
23 /* need to replace pregcomp et al, so enable that */
24 # ifndef PERL_IN_XSUB_RE
25 # define PERL_IN_XSUB_RE
27 /* need access to debugger hooks */
28 # if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
33 #ifdef PERL_IN_XSUB_RE
34 /* We *really* need to overwrite these symbols: */
35 # define Perl_regexec_flags my_regexec
36 # define Perl_regdump my_regdump
37 # define Perl_regprop my_regprop
38 # define Perl_re_intuit_start my_re_intuit_start
39 /* *These* symbols are masked to allow static link. */
40 # define Perl_pregexec my_pregexec
41 # define Perl_reginitcolors my_reginitcolors
42 # define Perl_regclass_swash my_regclass_swash
44 # define PERL_NO_GET_CONTEXT
49 * pregcomp and pregexec -- regsub and regerror are not used in perl
51 * Copyright (c) 1986 by University of Toronto.
52 * Written by Henry Spencer. Not derived from licensed software.
54 * Permission is granted to anyone to use this software for any
55 * purpose on any computer system, and to redistribute it freely,
56 * subject to the following restrictions:
58 * 1. The author is not responsible for the consequences of use of
59 * this software, no matter how awful, even if they arise
62 * 2. The origin of this software must not be misrepresented, either
63 * by explicit claim or by omission.
65 * 3. Altered versions must be plainly marked as such, and must not
66 * be misrepresented as being the original software.
68 **** Alterations to Henry's code are...
70 **** Copyright (c) 1991-2001, Larry Wall
72 **** You may distribute under the terms of either the GNU General Public
73 **** License or the Artistic License, as specified in the README file.
75 * Beware that some of this code is subtly aware of the way operator
76 * precedence is structured in regular expressions. Serious changes in
77 * regular-expression syntax might require a total rethink.
80 #define PERL_IN_REGEXEC_C
83 #ifdef PERL_IN_XSUB_RE
84 # if defined(PERL_CAPI) || defined(PERL_OBJECT)
91 #define RF_tainted 1 /* tainted information used? */
92 #define RF_warned 2 /* warned about big count? */
93 #define RF_evaled 4 /* Did an EVAL with setting? */
94 #define RF_utf8 8 /* String contains multibyte chars? */
96 #define UTF (PL_reg_flags & RF_utf8)
98 #define RS_init 1 /* eval environment created */
99 #define RS_set 2 /* replsv value is set */
102 #define STATIC static
109 #define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
110 #define CHR_DIST(a,b) (DO_UTF8(PL_reg_sv) ? utf8_distance(a,b) : a - b)
112 #define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
113 #define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
114 #define HOP(pos,off) (DO_UTF8(PL_reg_sv) ? reghop((U8*)pos, off) : (U8*)(pos + off))
115 #define HOPMAYBE(pos,off) (DO_UTF8(PL_reg_sv) ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
116 #define HOPc(pos,off) ((char*)HOP(pos,off))
117 #define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
119 #define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
120 #define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
121 #define HOP3(pos,off,lim) (DO_UTF8(PL_reg_sv) ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
122 #define HOPMAYBE3(pos,off,lim) (DO_UTF8(PL_reg_sv) ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
123 #define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
124 #define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
126 static void restore_pos(pTHXo_ void *arg);
130 S_regcppush(pTHX_ I32 parenfloor)
132 int retval = PL_savestack_ix;
133 int i = (PL_regsize - parenfloor) * 4;
137 for (p = PL_regsize; p > parenfloor; p--) {
138 SSPUSHINT(PL_regendp[p]);
139 SSPUSHINT(PL_regstartp[p]);
140 SSPUSHPTR(PL_reg_start_tmp[p]);
143 SSPUSHINT(PL_regsize);
144 SSPUSHINT(*PL_reglastparen);
145 SSPUSHPTR(PL_reginput);
147 SSPUSHINT(SAVEt_REGCONTEXT);
151 /* These are needed since we do not localize EVAL nodes: */
152 # define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
153 " Setting an EVAL scope, savestack=%"IVdf"\n", \
154 (IV)PL_savestack_ix)); cp = PL_savestack_ix
156 # define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
157 PerlIO_printf(Perl_debug_log, \
158 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
159 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
168 assert(i == SAVEt_REGCONTEXT);
170 input = (char *) SSPOPPTR;
171 *PL_reglastparen = SSPOPINT;
172 PL_regsize = SSPOPINT;
173 for (i -= 3; i > 0; i -= 4) {
174 paren = (U32)SSPOPINT;
175 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
176 PL_regstartp[paren] = SSPOPINT;
178 if (paren <= *PL_reglastparen)
179 PL_regendp[paren] = tmps;
181 PerlIO_printf(Perl_debug_log,
182 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
183 (UV)paren, (IV)PL_regstartp[paren],
184 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
185 (IV)PL_regendp[paren],
186 (paren > *PL_reglastparen ? "(no)" : ""));
190 if (*PL_reglastparen + 1 <= PL_regnpar) {
191 PerlIO_printf(Perl_debug_log,
192 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
193 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
197 /* It would seem that the similar code in regtry()
198 * already takes care of this, and in fact it is in
199 * a better location to since this code can #if 0-ed out
200 * but the code in regtry() is needed or otherwise tests
201 * requiring null fields (pat.t#187 and split.t#{13,14}
202 * (as of patchlevel 7877) will fail. Then again,
203 * this code seems to be necessary or otherwise
204 * building DynaLoader will fail:
205 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
207 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
208 if (paren > PL_regsize)
209 PL_regstartp[paren] = -1;
210 PL_regendp[paren] = -1;
217 S_regcp_set_to(pTHX_ I32 ss)
219 I32 tmp = PL_savestack_ix;
221 PL_savestack_ix = ss;
223 PL_savestack_ix = tmp;
227 typedef struct re_cc_state
231 struct re_cc_state *prev;
236 #define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
238 #define TRYPAREN(paren, n, input) { \
241 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
242 PL_regendp[paren] = input - PL_bostr; \
245 PL_regendp[paren] = -1; \
247 if (regmatch(next)) \
250 PL_regendp[paren] = -1; \
255 * pregexec and friends
259 - pregexec - match a regexp against a string
262 Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
263 char *strbeg, I32 minend, SV *screamer, U32 nosave)
264 /* strend: pointer to null at end of string */
265 /* strbeg: real beginning of string */
266 /* minend: end of match must be >=minend after stringarg. */
267 /* nosave: For optimizations. */
270 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
271 nosave ? 0 : REXEC_COPY_STR);
275 S_cache_re(pTHX_ regexp *prog)
277 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
279 PL_regprogram = prog->program;
281 PL_regnpar = prog->nparens;
282 PL_regdata = prog->data;
287 * Need to implement the following flags for reg_anch:
289 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
291 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
292 * INTUIT_AUTORITATIVE_ML
293 * INTUIT_ONCE_NOML - Intuit can match in one location only.
296 * Another flag for this function: SECOND_TIME (so that float substrs
297 * with giant delta may be not rechecked).
300 /* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
302 /* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
303 Otherwise, only SvCUR(sv) is used to get strbeg. */
305 /* XXXX We assume that strpos is strbeg unless sv. */
307 /* XXXX Some places assume that there is a fixed substring.
308 An update may be needed if optimizer marks as "INTUITable"
309 RExen without fixed substrings. Similarly, it is assumed that
310 lengths of all the strings are no more than minlen, thus they
311 cannot come from lookahead.
312 (Or minlen should take into account lookahead.) */
314 /* A failure to find a constant substring means that there is no need to make
315 an expensive call to REx engine, thus we celebrate a failure. Similarly,
316 finding a substring too deep into the string means that less calls to
317 regtry() should be needed.
319 REx compiler's optimizer found 4 possible hints:
320 a) Anchored substring;
322 c) Whether we are anchored (beginning-of-line or \G);
323 d) First node (of those at offset 0) which may distingush positions;
324 We use a)b)d) and multiline-part of c), and try to find a position in the
325 string which does not contradict any of them.
328 /* Most of decisions we do here should have been done at compile time.
329 The nodes of the REx which we used for the search should have been
330 deleted from the finite automaton. */
333 Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
334 char *strend, U32 flags, re_scream_pos_data *data)
336 register I32 start_shift;
337 /* Should be nonnegative! */
338 register I32 end_shift;
345 register char *other_last = Nullch; /* other substr checked before this */
346 char *check_at; /* check substr found at this pos */
348 char *i_strpos = strpos;
351 DEBUG_r( if (!PL_colorset) reginitcolors() );
352 DEBUG_r(PerlIO_printf(Perl_debug_log,
353 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
354 PL_colors[4],PL_colors[5],PL_colors[0],
357 (strlen(prog->precomp) > 60 ? "..." : ""),
359 (int)(strend - strpos > 60 ? 60 : strend - strpos),
360 strpos, PL_colors[1],
361 (strend - strpos > 60 ? "..." : ""))
364 if (prog->reganch & ROPT_UTF8)
365 PL_reg_flags |= RF_utf8;
367 if (prog->minlen > CHR_DIST((U8*)strend, (U8*)strpos)) {
368 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
371 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
373 check = prog->check_substr;
374 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
375 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
376 || ( (prog->reganch & ROPT_ANCH_BOL)
377 && !PL_multiline ) ); /* Check after \n? */
380 if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
381 /* SvCUR is not set on references: SvRV and SvPVX overlap */
383 && (strpos != strbeg)) {
384 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
387 if (prog->check_offset_min == prog->check_offset_max) {
388 /* Substring at constant offset from beg-of-str... */
391 s = HOP3c(strpos, prog->check_offset_min, strend);
393 slen = SvCUR(check); /* >= 1 */
395 if ( strend - s > slen || strend - s < slen - 1
396 || (strend - s == slen && strend[-1] != '\n')) {
397 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
400 /* Now should match s[0..slen-2] */
402 if (slen && (*SvPVX(check) != *s
404 && memNE(SvPVX(check), s, slen)))) {
406 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
410 else if (*SvPVX(check) != *s
411 || ((slen = SvCUR(check)) > 1
412 && memNE(SvPVX(check), s, slen)))
414 goto success_at_start;
417 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
419 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
420 end_shift = prog->minlen - start_shift -
421 CHR_SVLEN(check) + (SvTAIL(check) != 0);
423 I32 end = prog->check_offset_max + CHR_SVLEN(check)
424 - (SvTAIL(check) != 0);
425 I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
427 if (end_shift < eshift)
431 else { /* Can match at random position */
434 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
435 /* Should be nonnegative! */
436 end_shift = prog->minlen - start_shift -
437 CHR_SVLEN(check) + (SvTAIL(check) != 0);
440 #ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
442 Perl_croak(aTHX_ "panic: end_shift");
448 /* Find a possible match in the region s..strend by looking for
449 the "check" substring in the region corrected by start/end_shift. */
450 if (flags & REXEC_SCREAM) {
451 I32 p = -1; /* Internal iterator of scream. */
452 I32 *pp = data ? data->scream_pos : &p;
454 if (PL_screamfirst[BmRARE(check)] >= 0
455 || ( BmRARE(check) == '\n'
456 && (BmPREVIOUS(check) == SvCUR(check) - 1)
458 s = screaminstr(sv, check,
459 start_shift + (s - strbeg), end_shift, pp, 0);
463 *data->scream_olds = s;
466 s = fbm_instr(HOP3(s, start_shift, strend),
467 HOP3(strend, -end_shift, strbeg),
468 check, PL_multiline ? FBMrf_MULTILINE : 0);
470 /* Update the count-of-usability, remove useless subpatterns,
473 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
474 (s ? "Found" : "Did not find"),
475 ((check == prog->anchored_substr) ? "anchored" : "floating"),
477 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
479 PL_colors[1], (SvTAIL(check) ? "$" : ""),
480 (s ? " at offset " : "...\n") ) );
487 /* Finish the diagnostic message */
488 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
490 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
491 Start with the other substr.
492 XXXX no SCREAM optimization yet - and a very coarse implementation
493 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
494 *always* match. Probably should be marked during compile...
495 Probably it is right to do no SCREAM here...
498 if (prog->float_substr && prog->anchored_substr) {
499 /* Take into account the "other" substring. */
500 /* XXXX May be hopelessly wrong for UTF... */
503 if (check == prog->float_substr) {
506 char *last = HOP3c(s, -start_shift, strbeg), *last1, *last2;
509 t = s - prog->check_offset_max;
510 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
511 && (!(prog->reganch & ROPT_UTF8)
512 || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
517 t = HOP3c(t, prog->anchored_offset, strend);
518 if (t < other_last) /* These positions already checked */
520 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
523 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
524 /* On end-of-str: see comment below. */
525 s = fbm_instr((unsigned char*)t,
526 HOP3(HOP3(last1, prog->anchored_offset, strend)
527 + SvCUR(prog->anchored_substr),
528 -(SvTAIL(prog->anchored_substr)!=0), strbeg),
529 prog->anchored_substr,
530 PL_multiline ? FBMrf_MULTILINE : 0);
531 DEBUG_r(PerlIO_printf(Perl_debug_log,
532 "%s anchored substr `%s%.*s%s'%s",
533 (s ? "Found" : "Contradicts"),
535 (int)(SvCUR(prog->anchored_substr)
536 - (SvTAIL(prog->anchored_substr)!=0)),
537 SvPVX(prog->anchored_substr),
538 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
540 if (last1 >= last2) {
541 DEBUG_r(PerlIO_printf(Perl_debug_log,
542 ", giving up...\n"));
545 DEBUG_r(PerlIO_printf(Perl_debug_log,
546 ", trying floating at offset %ld...\n",
547 (long)(HOP3c(s1, 1, strend) - i_strpos)));
548 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
549 s = HOP3c(last, 1, strend);
553 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
554 (long)(s - i_strpos)));
555 t = HOP3c(s, -prog->anchored_offset, strbeg);
556 other_last = HOP3c(s, 1, strend);
564 else { /* Take into account the floating substring. */
568 t = HOP3c(s, -start_shift, strbeg);
570 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
571 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
572 last = HOP3c(t, prog->float_max_offset, strend);
573 s = HOP3c(t, prog->float_min_offset, strend);
576 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
577 /* fbm_instr() takes into account exact value of end-of-str
578 if the check is SvTAIL(ed). Since false positives are OK,
579 and end-of-str is not later than strend we are OK. */
580 s = fbm_instr((unsigned char*)s,
581 (unsigned char*)last + SvCUR(prog->float_substr)
582 - (SvTAIL(prog->float_substr)!=0),
583 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
584 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
585 (s ? "Found" : "Contradicts"),
587 (int)(SvCUR(prog->float_substr)
588 - (SvTAIL(prog->float_substr)!=0)),
589 SvPVX(prog->float_substr),
590 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
593 DEBUG_r(PerlIO_printf(Perl_debug_log,
594 ", giving up...\n"));
597 DEBUG_r(PerlIO_printf(Perl_debug_log,
598 ", trying anchored starting at offset %ld...\n",
599 (long)(s1 + 1 - i_strpos)));
601 s = HOP3c(t, 1, strend);
605 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
606 (long)(s - i_strpos)));
607 other_last = s; /* Fix this later. --Hugo */
616 t = s - prog->check_offset_max;
617 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
618 && (!(prog->reganch & ROPT_UTF8)
619 || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
621 /* Fixed substring is found far enough so that the match
622 cannot start at strpos. */
624 if (ml_anch && t[-1] != '\n') {
625 /* Eventually fbm_*() should handle this, but often
626 anchored_offset is not 0, so this check will not be wasted. */
627 /* XXXX In the code below we prefer to look for "^" even in
628 presence of anchored substrings. And we search even
629 beyond the found float position. These pessimizations
630 are historical artefacts only. */
632 while (t < strend - prog->minlen) {
634 if (t < check_at - prog->check_offset_min) {
635 if (prog->anchored_substr) {
636 /* Since we moved from the found position,
637 we definitely contradict the found anchored
638 substr. Due to the above check we do not
639 contradict "check" substr.
640 Thus we can arrive here only if check substr
641 is float. Redo checking for "other"=="fixed".
644 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
645 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
646 goto do_other_anchored;
648 /* We don't contradict the found floating substring. */
649 /* XXXX Why not check for STCLASS? */
651 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
652 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
655 /* Position contradicts check-string */
656 /* XXXX probably better to look for check-string
657 than for "\n", so one should lower the limit for t? */
658 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
659 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
660 other_last = strpos = s = t + 1;
665 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
666 PL_colors[0],PL_colors[1]));
670 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
671 PL_colors[0],PL_colors[1]));
675 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
678 /* The found string does not prohibit matching at strpos,
679 - no optimization of calling REx engine can be performed,
680 unless it was an MBOL and we are not after MBOL,
681 or a future STCLASS check will fail this. */
683 /* Even in this situation we may use MBOL flag if strpos is offset
684 wrt the start of the string. */
685 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
686 && (strpos != strbeg) && strpos[-1] != '\n'
687 /* May be due to an implicit anchor of m{.*foo} */
688 && !(prog->reganch & ROPT_IMPLICIT))
693 DEBUG_r( if (ml_anch)
694 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
695 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
698 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
699 && prog->check_substr /* Could be deleted already */
700 && --BmUSEFUL(prog->check_substr) < 0
701 && prog->check_substr == prog->float_substr)
703 /* If flags & SOMETHING - do not do it many times on the same match */
704 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
705 SvREFCNT_dec(prog->check_substr);
706 prog->check_substr = Nullsv; /* disable */
707 prog->float_substr = Nullsv; /* clear */
708 check = Nullsv; /* abort */
710 /* XXXX This is a remnant of the old implementation. It
711 looks wasteful, since now INTUIT can use many
713 prog->reganch &= ~RE_USE_INTUIT;
720 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
721 if (prog->regstclass) {
722 /* minlen == 0 is possible if regstclass is \b or \B,
723 and the fixed substr is ''$.
724 Since minlen is already taken into account, s+1 is before strend;
725 accidentally, minlen >= 1 guaranties no false positives at s + 1
726 even for \b or \B. But (minlen? 1 : 0) below assumes that
727 regstclass does not come from lookahead... */
728 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
729 This leaves EXACTF only, which is dealt with in find_byclass(). */
730 U8* str = (U8*)STRING(prog->regstclass);
731 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
732 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
734 char *endpos = (prog->anchored_substr || ml_anch)
735 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
736 : (prog->float_substr
737 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
740 char *startpos = strbeg;
743 if (prog->reganch & ROPT_UTF8) {
744 PL_regdata = prog->data;
747 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
752 if (endpos == strend) {
753 DEBUG_r( PerlIO_printf(Perl_debug_log,
754 "Could not match STCLASS...\n") );
757 DEBUG_r( PerlIO_printf(Perl_debug_log,
758 "This position contradicts STCLASS...\n") );
759 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
761 /* Contradict one of substrings */
762 if (prog->anchored_substr) {
763 if (prog->anchored_substr == check) {
764 DEBUG_r( what = "anchored" );
766 s = HOP3c(t, 1, strend);
767 if (s + start_shift + end_shift > strend) {
768 /* XXXX Should be taken into account earlier? */
769 DEBUG_r( PerlIO_printf(Perl_debug_log,
770 "Could not match STCLASS...\n") );
775 DEBUG_r( PerlIO_printf(Perl_debug_log,
776 "Looking for %s substr starting at offset %ld...\n",
777 what, (long)(s + start_shift - i_strpos)) );
780 /* Have both, check_string is floating */
781 if (t + start_shift >= check_at) /* Contradicts floating=check */
782 goto retry_floating_check;
783 /* Recheck anchored substring, but not floating... */
787 DEBUG_r( PerlIO_printf(Perl_debug_log,
788 "Looking for anchored substr starting at offset %ld...\n",
789 (long)(other_last - i_strpos)) );
790 goto do_other_anchored;
792 /* Another way we could have checked stclass at the
793 current position only: */
798 DEBUG_r( PerlIO_printf(Perl_debug_log,
799 "Looking for /%s^%s/m starting at offset %ld...\n",
800 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
803 if (!prog->float_substr) /* Could have been deleted */
805 /* Check is floating subtring. */
806 retry_floating_check:
807 t = check_at - start_shift;
808 DEBUG_r( what = "floating" );
809 goto hop_and_restart;
812 PerlIO_printf(Perl_debug_log,
813 "By STCLASS: moving %ld --> %ld\n",
814 (long)(t - i_strpos), (long)(s - i_strpos));
816 PerlIO_printf(Perl_debug_log,
817 "Does not contradict STCLASS...\n") );
820 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
821 PL_colors[4], (check ? "Guessed" : "Giving up"),
822 PL_colors[5], (long)(s - i_strpos)) );
825 fail_finish: /* Substring not found */
826 if (prog->check_substr) /* could be removed already */
827 BmUSEFUL(prog->check_substr) += 5; /* hooray */
829 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
830 PL_colors[4],PL_colors[5]));
834 /* We know what class REx starts with. Try to find this position... */
836 S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
838 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
844 register I32 tmp = 1; /* Scratch variable? */
845 register bool do_utf8 = DO_UTF8(PL_reg_sv);
847 /* We know what class it must start with. */
851 if (reginclass(c, (U8*)s, do_utf8)) {
852 if (tmp && (norun || regtry(prog, s)))
859 s += do_utf8 ? UTF8SKIP(s) : 1;
866 c1 = to_utf8_lower((U8*)m);
867 c2 = to_utf8_upper((U8*)m);
878 c2 = PL_fold_locale[c1];
883 e = s; /* Due to minlen logic of intuit() */
889 if ( utf8_to_uv_simple((U8*)s, &len) == c1
896 UV c = utf8_to_uv_simple((U8*)s, &len);
897 if ( (c == c1 || c == c2) && regtry(prog, s) )
906 && (ln == 1 || !(OP(c) == EXACTF
908 : ibcmp_locale(s, m, ln)))
909 && (norun || regtry(prog, s)) )
915 if ( (*(U8*)s == c1 || *(U8*)s == c2)
916 && (ln == 1 || !(OP(c) == EXACTF
918 : ibcmp_locale(s, m, ln)))
919 && (norun || regtry(prog, s)) )
926 PL_reg_flags |= RF_tainted;
933 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
935 tmp = (I32)utf8_to_uv(r, s - (char*)r, 0, 0);
937 tmp = ((OP(c) == BOUND ?
938 isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
940 if (tmp == !(OP(c) == BOUND ?
941 swash_fetch(PL_utf8_alnum, (U8*)s) :
942 isALNUM_LC_utf8((U8*)s)))
945 if ((norun || regtry(prog, s)))
952 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
953 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
956 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
958 if ((norun || regtry(prog, s)))
964 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
968 PL_reg_flags |= RF_tainted;
975 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
977 tmp = (I32)utf8_to_uv(r, s - (char*)r, 0, 0);
979 tmp = ((OP(c) == NBOUND ?
980 isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
982 if (tmp == !(OP(c) == NBOUND ?
983 swash_fetch(PL_utf8_alnum, (U8*)s) :
984 isALNUM_LC_utf8((U8*)s)))
986 else if ((norun || regtry(prog, s)))
992 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
993 tmp = ((OP(c) == NBOUND ?
994 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
997 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
999 else if ((norun || regtry(prog, s)))
1004 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
1009 while (s < strend) {
1010 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
1011 if (tmp && (norun || regtry(prog, s)))
1022 while (s < strend) {
1024 if (tmp && (norun || regtry(prog, s)))
1036 PL_reg_flags |= RF_tainted;
1038 while (s < strend) {
1039 if (isALNUM_LC_utf8((U8*)s)) {
1040 if (tmp && (norun || regtry(prog, s)))
1051 while (s < strend) {
1052 if (isALNUM_LC(*s)) {
1053 if (tmp && (norun || regtry(prog, s)))
1066 while (s < strend) {
1067 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
1068 if (tmp && (norun || regtry(prog, s)))
1079 while (s < strend) {
1081 if (tmp && (norun || regtry(prog, s)))
1093 PL_reg_flags |= RF_tainted;
1095 while (s < strend) {
1096 if (!isALNUM_LC_utf8((U8*)s)) {
1097 if (tmp && (norun || regtry(prog, s)))
1108 while (s < strend) {
1109 if (!isALNUM_LC(*s)) {
1110 if (tmp && (norun || regtry(prog, s)))
1123 while (s < strend) {
1124 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
1125 if (tmp && (norun || regtry(prog, s)))
1136 while (s < strend) {
1138 if (tmp && (norun || regtry(prog, s)))
1150 PL_reg_flags |= RF_tainted;
1152 while (s < strend) {
1153 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1154 if (tmp && (norun || regtry(prog, s)))
1165 while (s < strend) {
1166 if (isSPACE_LC(*s)) {
1167 if (tmp && (norun || regtry(prog, s)))
1180 while (s < strend) {
1181 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
1182 if (tmp && (norun || regtry(prog, s)))
1193 while (s < strend) {
1195 if (tmp && (norun || regtry(prog, s)))
1207 PL_reg_flags |= RF_tainted;
1209 while (s < strend) {
1210 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1211 if (tmp && (norun || regtry(prog, s)))
1222 while (s < strend) {
1223 if (!isSPACE_LC(*s)) {
1224 if (tmp && (norun || regtry(prog, s)))
1237 while (s < strend) {
1238 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
1239 if (tmp && (norun || regtry(prog, s)))
1250 while (s < strend) {
1252 if (tmp && (norun || regtry(prog, s)))
1264 PL_reg_flags |= RF_tainted;
1266 while (s < strend) {
1267 if (isDIGIT_LC_utf8((U8*)s)) {
1268 if (tmp && (norun || regtry(prog, s)))
1279 while (s < strend) {
1280 if (isDIGIT_LC(*s)) {
1281 if (tmp && (norun || regtry(prog, s)))
1294 while (s < strend) {
1295 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
1296 if (tmp && (norun || regtry(prog, s)))
1307 while (s < strend) {
1309 if (tmp && (norun || regtry(prog, s)))
1321 PL_reg_flags |= RF_tainted;
1323 while (s < strend) {
1324 if (!isDIGIT_LC_utf8((U8*)s)) {
1325 if (tmp && (norun || regtry(prog, s)))
1336 while (s < strend) {
1337 if (!isDIGIT_LC(*s)) {
1338 if (tmp && (norun || regtry(prog, s)))
1350 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1359 - regexec_flags - match a regexp against a string
1362 Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1363 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1364 /* strend: pointer to null at end of string */
1365 /* strbeg: real beginning of string */
1366 /* minend: end of match must be >=minend after stringarg. */
1367 /* data: May be used for some additional optimizations. */
1368 /* nosave: For optimizations. */
1371 register regnode *c;
1372 register char *startpos = stringarg;
1373 I32 minlen; /* must match at least this many chars */
1374 I32 dontbother = 0; /* how many characters not to try at end */
1375 /* I32 start_shift = 0; */ /* Offset of the start to find
1376 constant substr. */ /* CC */
1377 I32 end_shift = 0; /* Same for the end. */ /* CC */
1378 I32 scream_pos = -1; /* Internal iterator of scream. */
1380 SV* oreplsv = GvSV(PL_replgv);
1381 bool do_utf8 = DO_UTF8(sv);
1387 PL_regnarrate = PL_debug & 512;
1390 /* Be paranoid... */
1391 if (prog == NULL || startpos == NULL) {
1392 Perl_croak(aTHX_ "NULL regexp parameter");
1396 minlen = prog->minlen;
1398 if (utf8_distance((U8*)strend, (U8*)startpos) < minlen) goto phooey;
1401 if (strend - startpos < minlen) goto phooey;
1404 if (startpos == strbeg) /* is ^ valid at stringarg? */
1407 if (prog->reganch & ROPT_UTF8 && do_utf8) {
1408 U8 *s = reghop3((U8*)stringarg, -1, (U8*)strbeg);
1409 PL_regprev = utf8_to_uv(s, (U8*)stringarg - s, NULL, 0);
1412 PL_regprev = (U32)stringarg[-1];
1413 if (!PL_multiline && PL_regprev == '\n')
1414 PL_regprev = '\0'; /* force ^ to NOT match */
1417 /* Check validity of program. */
1418 if (UCHARAT(prog->program) != REG_MAGIC) {
1419 Perl_croak(aTHX_ "corrupted regexp program");
1423 PL_reg_eval_set = 0;
1426 if (prog->reganch & ROPT_UTF8)
1427 PL_reg_flags |= RF_utf8;
1429 /* Mark beginning of line for ^ and lookbehind. */
1430 PL_regbol = startpos;
1434 /* Mark end of line for $ (and such) */
1437 /* see how far we have to get to not match where we matched before */
1438 PL_regtill = startpos+minend;
1440 /* We start without call_cc context. */
1443 /* If there is a "must appear" string, look for it. */
1446 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1449 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1450 PL_reg_ganch = startpos;
1451 else if (sv && SvTYPE(sv) >= SVt_PVMG
1453 && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) {
1454 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1455 if (prog->reganch & ROPT_ANCH_GPOS) {
1456 if (s > PL_reg_ganch)
1461 else /* pos() not defined */
1462 PL_reg_ganch = strbeg;
1465 if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
1466 re_scream_pos_data d;
1468 d.scream_olds = &scream_olds;
1469 d.scream_pos = &scream_pos;
1470 s = re_intuit_start(prog, sv, s, strend, flags, &d);
1472 goto phooey; /* not present */
1475 DEBUG_r( if (!PL_colorset) reginitcolors() );
1476 DEBUG_r(PerlIO_printf(Perl_debug_log,
1477 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1478 PL_colors[4],PL_colors[5],PL_colors[0],
1481 (strlen(prog->precomp) > 60 ? "..." : ""),
1483 (int)(strend - startpos > 60 ? 60 : strend - startpos),
1484 startpos, PL_colors[1],
1485 (strend - startpos > 60 ? "..." : ""))
1488 /* Simplest case: anchored match need be tried only once. */
1489 /* [unless only anchor is BOL and multiline is set] */
1490 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1491 if (s == startpos && regtry(prog, startpos))
1493 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1494 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1499 dontbother = minlen - 1;
1500 end = HOP3c(strend, -dontbother, strbeg) - 1;
1501 /* for multiline we only have to try after newlines */
1502 if (prog->check_substr) {
1506 if (regtry(prog, s))
1511 if (prog->reganch & RE_USE_INTUIT) {
1512 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1523 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1524 if (regtry(prog, s))
1531 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1532 if (regtry(prog, PL_reg_ganch))
1537 /* Messy cases: unanchored match. */
1538 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
1539 /* we have /x+whatever/ */
1540 /* it must be a one character string (XXXX Except UTF?) */
1541 char ch = SvPVX(prog->anchored_substr)[0];
1547 while (s < strend) {
1549 DEBUG_r( did_match = 1 );
1550 if (regtry(prog, s)) goto got_it;
1552 while (s < strend && *s == ch)
1559 while (s < strend) {
1561 DEBUG_r( did_match = 1 );
1562 if (regtry(prog, s)) goto got_it;
1564 while (s < strend && *s == ch)
1570 DEBUG_r(did_match ||
1571 PerlIO_printf(Perl_debug_log,
1572 "Did not find anchored character...\n"));
1575 else if (do_utf8 == (UTF!=0) &&
1576 (prog->anchored_substr != Nullsv
1577 || (prog->float_substr != Nullsv
1578 && prog->float_max_offset < strend - s))) {
1579 SV *must = prog->anchored_substr
1580 ? prog->anchored_substr : prog->float_substr;
1582 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
1584 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
1585 char *last = HOP3c(strend, /* Cannot start after this */
1586 -(I32)(CHR_SVLEN(must)
1587 - (SvTAIL(must) != 0) + back_min), strbeg);
1588 char *last1; /* Last position checked before */
1594 last1 = HOPc(s, -1);
1596 last1 = s - 1; /* bogus */
1598 /* XXXX check_substr already used to find `s', can optimize if
1599 check_substr==must. */
1601 dontbother = end_shift;
1602 strend = HOPc(strend, -dontbother);
1603 while ( (s <= last) &&
1604 ((flags & REXEC_SCREAM)
1605 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
1606 end_shift, &scream_pos, 0))
1607 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
1608 (unsigned char*)strend, must,
1609 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
1610 DEBUG_r( did_match = 1 );
1611 if (HOPc(s, -back_max) > last1) {
1612 last1 = HOPc(s, -back_min);
1613 s = HOPc(s, -back_max);
1616 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1618 last1 = HOPc(s, -back_min);
1622 while (s <= last1) {
1623 if (regtry(prog, s))
1629 while (s <= last1) {
1630 if (regtry(prog, s))
1636 DEBUG_r(did_match ||
1637 PerlIO_printf(Perl_debug_log, "Did not find %s substr `%s%.*s%s'%s...\n",
1638 ((must == prog->anchored_substr)
1639 ? "anchored" : "floating"),
1641 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1643 PL_colors[1], (SvTAIL(must) ? "$" : "")));
1646 else if ((c = prog->regstclass)) {
1647 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1648 /* don't bother with what can't match */
1649 strend = HOPc(strend, -(minlen - 1));
1651 SV *prop = sv_newmortal();
1653 PerlIO_printf(Perl_debug_log, "Matching stclass `%s' against `%s'\n", SvPVX(prop), s);
1655 if (find_byclass(prog, c, s, strend, startpos, 0))
1657 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
1661 if (prog->float_substr != Nullsv) { /* Trim the end. */
1664 if (flags & REXEC_SCREAM) {
1665 last = screaminstr(sv, prog->float_substr, s - strbeg,
1666 end_shift, &scream_pos, 1); /* last one */
1668 last = scream_olds; /* Only one occurrence. */
1672 char *little = SvPV(prog->float_substr, len);
1674 if (SvTAIL(prog->float_substr)) {
1675 if (memEQ(strend - len + 1, little, len - 1))
1676 last = strend - len + 1;
1677 else if (!PL_multiline)
1678 last = memEQ(strend - len, little, len)
1679 ? strend - len : Nullch;
1685 last = rninstr(s, strend, little, little + len);
1687 last = strend; /* matching `$' */
1691 DEBUG_r(PerlIO_printf(Perl_debug_log,
1692 "%sCan't trim the tail, match fails (should not happen)%s\n",
1693 PL_colors[4],PL_colors[5]));
1694 goto phooey; /* Should not happen! */
1696 dontbother = strend - last + prog->float_min_offset;
1698 if (minlen && (dontbother < minlen))
1699 dontbother = minlen - 1;
1700 strend -= dontbother; /* this one's always in bytes! */
1701 /* We don't know much -- general case. */
1704 if (regtry(prog, s))
1713 if (regtry(prog, s))
1715 } while (s++ < strend);
1723 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1725 if (PL_reg_eval_set) {
1726 /* Preserve the current value of $^R */
1727 if (oreplsv != GvSV(PL_replgv))
1728 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1729 restored, the value remains
1731 restore_pos(aTHXo_ 0);
1734 /* make sure $`, $&, $', and $digit will work later */
1735 if ( !(flags & REXEC_NOT_FIRST) ) {
1736 if (RX_MATCH_COPIED(prog)) {
1737 Safefree(prog->subbeg);
1738 RX_MATCH_COPIED_off(prog);
1740 if (flags & REXEC_COPY_STR) {
1741 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1743 s = savepvn(strbeg, i);
1746 RX_MATCH_COPIED_on(prog);
1749 prog->subbeg = strbeg;
1750 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1757 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1758 PL_colors[4],PL_colors[5]));
1759 if (PL_reg_eval_set)
1760 restore_pos(aTHXo_ 0);
1765 - regtry - try match at specific point
1767 STATIC I32 /* 0 failure, 1 success */
1768 S_regtry(pTHX_ regexp *prog, char *startpos)
1776 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
1778 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1781 PL_reg_eval_set = RS_init;
1783 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1784 (IV)(PL_stack_sp - PL_stack_base));
1786 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
1787 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1788 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1790 /* Apparently this is not needed, judging by wantarray. */
1791 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
1792 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1795 /* Make $_ available to executed code. */
1796 if (PL_reg_sv != DEFSV) {
1797 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1802 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
1803 && (mg = mg_find(PL_reg_sv, 'g')))) {
1804 /* prepare for quick setting of pos */
1805 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
1806 mg = mg_find(PL_reg_sv, 'g');
1810 PL_reg_oldpos = mg->mg_len;
1811 SAVEDESTRUCTOR_X(restore_pos, 0);
1814 Newz(22,PL_reg_curpm, 1, PMOP);
1815 PL_reg_curpm->op_pmregexp = prog;
1816 PL_reg_oldcurpm = PL_curpm;
1817 PL_curpm = PL_reg_curpm;
1818 if (RX_MATCH_COPIED(prog)) {
1819 /* Here is a serious problem: we cannot rewrite subbeg,
1820 since it may be needed if this match fails. Thus
1821 $` inside (?{}) could fail... */
1822 PL_reg_oldsaved = prog->subbeg;
1823 PL_reg_oldsavedlen = prog->sublen;
1824 RX_MATCH_COPIED_off(prog);
1827 PL_reg_oldsaved = Nullch;
1828 prog->subbeg = PL_bostr;
1829 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1831 prog->startp[0] = startpos - PL_bostr;
1832 PL_reginput = startpos;
1833 PL_regstartp = prog->startp;
1834 PL_regendp = prog->endp;
1835 PL_reglastparen = &prog->lastparen;
1836 prog->lastparen = 0;
1838 DEBUG_r(PL_reg_starttry = startpos);
1839 if (PL_reg_start_tmpl <= prog->nparens) {
1840 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1841 if(PL_reg_start_tmp)
1842 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1844 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1847 /* XXXX What this code is doing here?!!! There should be no need
1848 to do this again and again, PL_reglastparen should take care of
1851 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
1852 * Actually, the code in regcppop() (which Ilya may be meaning by
1853 * PL_reglastparen), is not needed at all by the test suite
1854 * (op/regexp, op/pat, op/split), but that code is needed, oddly
1855 * enough, for building DynaLoader, or otherwise this
1856 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
1857 * will happen. Meanwhile, this code *is* needed for the
1858 * above-mentioned test suite tests to succeed. The common theme
1859 * on those tests seems to be returning null fields from matches.
1864 if (prog->nparens) {
1865 for (i = prog->nparens; i > *PL_reglastparen; i--) {
1872 if (regmatch(prog->program + 1)) {
1873 prog->endp[0] = PL_reginput - PL_bostr;
1876 REGCP_UNWIND(lastcp);
1880 #define RE_UNWIND_BRANCH 1
1881 #define RE_UNWIND_BRANCHJ 2
1885 typedef struct { /* XX: makes sense to enlarge it... */
1889 } re_unwind_generic_t;
1902 } re_unwind_branch_t;
1904 typedef union re_unwind_t {
1906 re_unwind_generic_t generic;
1907 re_unwind_branch_t branch;
1911 - regmatch - main matching routine
1913 * Conceptually the strategy is simple: check to see whether the current
1914 * node matches, call self recursively to see whether the rest matches,
1915 * and then act accordingly. In practice we make some effort to avoid
1916 * recursion, in particular by going through "ordinary" nodes (that don't
1917 * need to know whether the rest of the match failed) by a loop instead of
1920 /* [lwall] I've hoisted the register declarations to the outer block in order to
1921 * maybe save a little bit of pushing and popping on the stack. It also takes
1922 * advantage of machines that use a register save mask on subroutine entry.
1924 STATIC I32 /* 0 failure, 1 success */
1925 S_regmatch(pTHX_ regnode *prog)
1927 register regnode *scan; /* Current node. */
1928 regnode *next; /* Next node. */
1929 regnode *inner; /* Next node in internal branch. */
1930 register I32 nextchr; /* renamed nextchr - nextchar colides with
1931 function of same name */
1932 register I32 n; /* no or next */
1933 register I32 ln; /* len or last */
1934 register char *s; /* operand or save */
1935 register char *locinput = PL_reginput;
1936 register I32 c1, c2, paren; /* case fold search, parenth */
1937 int minmod = 0, sw = 0, logical = 0;
1939 I32 firstcp = PL_savestack_ix;
1940 register bool do_utf8 = DO_UTF8(PL_reg_sv);
1946 /* Note that nextchr is a byte even in UTF */
1947 nextchr = UCHARAT(locinput);
1949 while (scan != NULL) {
1950 #define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
1952 # define sayYES goto yes
1953 # define sayNO goto no
1954 # define sayYES_FINAL goto yes_final
1955 # define sayYES_LOUD goto yes_loud
1956 # define sayNO_FINAL goto no_final
1957 # define sayNO_SILENT goto do_no
1958 # define saySAME(x) if (x) goto yes; else goto no
1959 # define REPORT_CODE_OFF 24
1961 # define sayYES return 1
1962 # define sayNO return 0
1963 # define sayYES_FINAL return 1
1964 # define sayYES_LOUD return 1
1965 # define sayNO_FINAL return 0
1966 # define sayNO_SILENT return 0
1967 # define saySAME(x) return x
1970 SV *prop = sv_newmortal();
1971 int docolor = *PL_colors[0];
1972 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1973 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
1974 /* The part of the string before starttry has one color
1975 (pref0_len chars), between starttry and current
1976 position another one (pref_len - pref0_len chars),
1977 after the current position the third one.
1978 We assume that pref0_len <= pref_len, otherwise we
1979 decrease pref0_len. */
1980 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
1981 ? (5 + taill) - l : locinput - PL_bostr;
1984 while (UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
1986 pref0_len = pref_len - (locinput - PL_reg_starttry);
1987 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
1988 l = ( PL_regeol - locinput > (5 + taill) - pref_len
1989 ? (5 + taill) - pref_len : PL_regeol - locinput);
1990 while (UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
1994 if (pref0_len > pref_len)
1995 pref0_len = pref_len;
1996 regprop(prop, scan);
1997 PerlIO_printf(Perl_debug_log,
1998 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
1999 (IV)(locinput - PL_bostr),
2000 PL_colors[4], pref0_len,
2001 locinput - pref_len, PL_colors[5],
2002 PL_colors[2], pref_len - pref0_len,
2003 locinput - pref_len + pref0_len, PL_colors[3],
2004 (docolor ? "" : "> <"),
2005 PL_colors[0], l, locinput, PL_colors[1],
2006 15 - l - pref_len + 1,
2008 (IV)(scan - PL_regprogram), PL_regindent*2, "",
2012 next = scan + NEXT_OFF(scan);
2018 if (locinput == PL_bostr
2019 ? PL_regprev == '\n'
2021 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
2023 /* regtill = regbol; */
2028 if (locinput == PL_bostr
2029 ? PL_regprev == '\n'
2030 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
2036 if (locinput == PL_bostr)
2040 if (locinput == PL_reg_ganch)
2050 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2055 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2057 if (PL_regeol - locinput > 1)
2061 if (PL_regeol != locinput)
2066 locinput += PL_utf8skip[nextchr];
2067 if (locinput > PL_regeol)
2069 nextchr = UCHARAT(locinput);
2072 if (!nextchr && locinput >= PL_regeol)
2074 nextchr = UCHARAT(++locinput);
2077 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2080 locinput += PL_utf8skip[nextchr];
2081 if (locinput > PL_regeol)
2083 nextchr = UCHARAT(locinput);
2086 nextchr = UCHARAT(++locinput);
2091 if (do_utf8 != (UTF!=0)) {
2099 if (*((U8*)s) != utf8_to_uv_simple((U8*)l, &len))
2108 if (*((U8*)l) != utf8_to_uv_simple((U8*)s, &len))
2114 nextchr = UCHARAT(locinput);
2117 /* Inline the first character, for speed. */
2118 if (UCHARAT(s) != nextchr)
2120 if (PL_regeol - locinput < ln)
2122 if (ln > 1 && memNE(s, locinput, ln))
2125 nextchr = UCHARAT(locinput);
2128 PL_reg_flags |= RF_tainted;
2138 c1 = OP(scan) == EXACTF;
2140 if (l >= PL_regeol) {
2143 if ((UTF ? utf8_to_uv((U8*)s, e - s, 0, 0) : *((U8*)s)) !=
2144 (c1 ? toLOWER_utf8((U8*)l) : toLOWER_LC_utf8((U8*)l)))
2146 s += UTF ? UTF8SKIP(s) : 1;
2150 nextchr = UCHARAT(locinput);
2154 /* Inline the first character, for speed. */
2155 if (UCHARAT(s) != nextchr &&
2156 UCHARAT(s) != ((OP(scan) == EXACTF)
2157 ? PL_fold : PL_fold_locale)[nextchr])
2159 if (PL_regeol - locinput < ln)
2161 if (ln > 1 && (OP(scan) == EXACTF
2162 ? ibcmp(s, locinput, ln)
2163 : ibcmp_locale(s, locinput, ln)))
2166 nextchr = UCHARAT(locinput);
2170 if (!reginclass(scan, (U8*)locinput, do_utf8))
2172 if (locinput >= PL_regeol)
2174 locinput += PL_utf8skip[nextchr];
2175 nextchr = UCHARAT(locinput);
2179 nextchr = UCHARAT(locinput);
2180 if (!reginclass(scan, (U8*)locinput, do_utf8))
2182 if (!nextchr && locinput >= PL_regeol)
2184 nextchr = UCHARAT(++locinput);
2188 PL_reg_flags |= RF_tainted;
2194 if (!(OP(scan) == ALNUM
2195 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2196 : isALNUM_LC_utf8((U8*)locinput)))
2200 locinput += PL_utf8skip[nextchr];
2201 nextchr = UCHARAT(locinput);
2204 if (!(OP(scan) == ALNUM
2205 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
2207 nextchr = UCHARAT(++locinput);
2210 PL_reg_flags |= RF_tainted;
2213 if (!nextchr && locinput >= PL_regeol)
2216 if (OP(scan) == NALNUM
2217 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2218 : isALNUM_LC_utf8((U8*)locinput))
2222 locinput += PL_utf8skip[nextchr];
2223 nextchr = UCHARAT(locinput);
2226 if (OP(scan) == NALNUM
2227 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
2229 nextchr = UCHARAT(++locinput);
2233 PL_reg_flags |= RF_tainted;
2237 /* was last char in word? */
2239 if (locinput == PL_regbol)
2242 U8 *r = reghop((U8*)locinput, -1);
2244 ln = utf8_to_uv(r, s - (char*)r, 0, 0);
2246 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2247 ln = isALNUM_uni(ln);
2248 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
2251 ln = isALNUM_LC_uni(ln);
2252 n = isALNUM_LC_utf8((U8*)locinput);
2256 ln = (locinput != PL_regbol) ?
2257 UCHARAT(locinput - 1) : PL_regprev;
2258 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2260 n = isALNUM(nextchr);
2263 ln = isALNUM_LC(ln);
2264 n = isALNUM_LC(nextchr);
2267 if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2268 OP(scan) == BOUNDL))
2272 PL_reg_flags |= RF_tainted;
2278 if (UTF8_IS_CONTINUED(nextchr)) {
2279 if (!(OP(scan) == SPACE
2280 ? swash_fetch(PL_utf8_space, (U8*)locinput)
2281 : isSPACE_LC_utf8((U8*)locinput)))
2285 locinput += PL_utf8skip[nextchr];
2286 nextchr = UCHARAT(locinput);
2289 if (!(OP(scan) == SPACE
2290 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2292 nextchr = UCHARAT(++locinput);
2295 if (!(OP(scan) == SPACE
2296 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2298 nextchr = UCHARAT(++locinput);
2302 PL_reg_flags |= RF_tainted;
2305 if (!nextchr && locinput >= PL_regeol)
2308 if (OP(scan) == NSPACE
2309 ? swash_fetch(PL_utf8_space, (U8*)locinput)
2310 : isSPACE_LC_utf8((U8*)locinput))
2314 locinput += PL_utf8skip[nextchr];
2315 nextchr = UCHARAT(locinput);
2318 if (OP(scan) == NSPACE
2319 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
2321 nextchr = UCHARAT(++locinput);
2324 PL_reg_flags |= RF_tainted;
2330 if (!(OP(scan) == DIGIT
2331 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2332 : isDIGIT_LC_utf8((U8*)locinput)))
2336 locinput += PL_utf8skip[nextchr];
2337 nextchr = UCHARAT(locinput);
2340 if (!(OP(scan) == DIGIT
2341 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
2343 nextchr = UCHARAT(++locinput);
2346 PL_reg_flags |= RF_tainted;
2349 if (!nextchr && locinput >= PL_regeol)
2352 if (OP(scan) == NDIGIT
2353 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2354 : isDIGIT_LC_utf8((U8*)locinput))
2358 locinput += PL_utf8skip[nextchr];
2359 nextchr = UCHARAT(locinput);
2362 if (OP(scan) == NDIGIT
2363 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
2365 nextchr = UCHARAT(++locinput);
2368 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
2370 locinput += PL_utf8skip[nextchr];
2371 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
2372 locinput += UTF8SKIP(locinput);
2373 if (locinput > PL_regeol)
2375 nextchr = UCHARAT(locinput);
2378 PL_reg_flags |= RF_tainted;
2382 n = ARG(scan); /* which paren pair */
2383 ln = PL_regstartp[n];
2384 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
2385 if (*PL_reglastparen < n || ln == -1)
2386 sayNO; /* Do not match unless seen CLOSEn. */
2387 if (ln == PL_regendp[n])
2391 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
2393 char *e = PL_bostr + PL_regendp[n];
2395 * Note that we can't do the "other character" lookup trick as
2396 * in the 8-bit case (no pun intended) because in Unicode we
2397 * have to map both upper and title case to lower case.
2399 if (OP(scan) == REFF) {
2403 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
2413 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
2420 nextchr = UCHARAT(locinput);
2424 /* Inline the first character, for speed. */
2425 if (UCHARAT(s) != nextchr &&
2427 (UCHARAT(s) != ((OP(scan) == REFF
2428 ? PL_fold : PL_fold_locale)[nextchr]))))
2430 ln = PL_regendp[n] - ln;
2431 if (locinput + ln > PL_regeol)
2433 if (ln > 1 && (OP(scan) == REF
2434 ? memNE(s, locinput, ln)
2436 ? ibcmp(s, locinput, ln)
2437 : ibcmp_locale(s, locinput, ln))))
2440 nextchr = UCHARAT(locinput);
2451 OP_4tree *oop = PL_op;
2452 COP *ocurcop = PL_curcop;
2453 SV **ocurpad = PL_curpad;
2457 PL_op = (OP_4tree*)PL_regdata->data[n];
2458 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
2459 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
2460 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
2462 CALLRUNOPS(aTHX); /* Scalar context. */
2468 PL_curpad = ocurpad;
2469 PL_curcop = ocurcop;
2471 if (logical == 2) { /* Postponed subexpression. */
2473 MAGIC *mg = Null(MAGIC*);
2475 CHECKPOINT cp, lastcp;
2477 if(SvROK(ret) || SvRMAGICAL(ret)) {
2478 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2481 mg = mg_find(sv, 'r');
2484 re = (regexp *)mg->mg_obj;
2485 (void)ReREFCNT_inc(re);
2489 char *t = SvPV(ret, len);
2491 char *oprecomp = PL_regprecomp;
2492 I32 osize = PL_regsize;
2493 I32 onpar = PL_regnpar;
2496 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
2498 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
2499 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
2500 PL_regprecomp = oprecomp;
2505 PerlIO_printf(Perl_debug_log,
2506 "Entering embedded `%s%.60s%s%s'\n",
2510 (strlen(re->precomp) > 60 ? "..." : ""))
2513 state.prev = PL_reg_call_cc;
2514 state.cc = PL_regcc;
2515 state.re = PL_reg_re;
2519 cp = regcppush(0); /* Save *all* the positions. */
2522 state.ss = PL_savestack_ix;
2523 *PL_reglastparen = 0;
2524 PL_reg_call_cc = &state;
2525 PL_reginput = locinput;
2527 /* XXXX This is too dramatic a measure... */
2530 if (regmatch(re->program + 1)) {
2531 /* Even though we succeeded, we need to restore
2532 global variables, since we may be wrapped inside
2533 SUSPEND, thus the match may be not finished yet. */
2535 /* XXXX Do this only if SUSPENDed? */
2536 PL_reg_call_cc = state.prev;
2537 PL_regcc = state.cc;
2538 PL_reg_re = state.re;
2539 cache_re(PL_reg_re);
2541 /* XXXX This is too dramatic a measure... */
2544 /* These are needed even if not SUSPEND. */
2550 REGCP_UNWIND(lastcp);
2552 PL_reg_call_cc = state.prev;
2553 PL_regcc = state.cc;
2554 PL_reg_re = state.re;
2555 cache_re(PL_reg_re);
2557 /* XXXX This is too dramatic a measure... */
2566 sv_setsv(save_scalar(PL_replgv), ret);
2570 n = ARG(scan); /* which paren pair */
2571 PL_reg_start_tmp[n] = locinput;
2576 n = ARG(scan); /* which paren pair */
2577 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2578 PL_regendp[n] = locinput - PL_bostr;
2579 if (n > *PL_reglastparen)
2580 *PL_reglastparen = n;
2583 n = ARG(scan); /* which paren pair */
2584 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
2587 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
2589 next = NEXTOPER(NEXTOPER(scan));
2591 next = scan + ARG(scan);
2592 if (OP(next) == IFTHEN) /* Fake one. */
2593 next = NEXTOPER(NEXTOPER(next));
2597 logical = scan->flags;
2599 /*******************************************************************
2600 PL_regcc contains infoblock about the innermost (...)* loop, and
2601 a pointer to the next outer infoblock.
2603 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2605 1) After matching X, regnode for CURLYX is processed;
2607 2) This regnode creates infoblock on the stack, and calls
2608 regmatch() recursively with the starting point at WHILEM node;
2610 3) Each hit of WHILEM node tries to match A and Z (in the order
2611 depending on the current iteration, min/max of {min,max} and
2612 greediness). The information about where are nodes for "A"
2613 and "Z" is read from the infoblock, as is info on how many times "A"
2614 was already matched, and greediness.
2616 4) After A matches, the same WHILEM node is hit again.
2618 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2619 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2620 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2621 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2622 of the external loop.
2624 Currently present infoblocks form a tree with a stem formed by PL_curcc
2625 and whatever it mentions via ->next, and additional attached trees
2626 corresponding to temporarily unset infoblocks as in "5" above.
2628 In the following picture infoblocks for outer loop of
2629 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2630 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2631 infoblocks are drawn below the "reset" infoblock.
2633 In fact in the picture below we do not show failed matches for Z and T
2634 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2635 more obvious *why* one needs to *temporary* unset infoblocks.]
2637 Matched REx position InfoBlocks Comment
2641 Y A)*?Z)*?T x <- O <- I
2642 YA )*?Z)*?T x <- O <- I
2643 YA A)*?Z)*?T x <- O <- I
2644 YAA )*?Z)*?T x <- O <- I
2645 YAA Z)*?T x <- O # Temporary unset I
2648 YAAZ Y(A)*?Z)*?T x <- O
2651 YAAZY (A)*?Z)*?T x <- O
2654 YAAZY A)*?Z)*?T x <- O <- I
2657 YAAZYA )*?Z)*?T x <- O <- I
2660 YAAZYA Z)*?T x <- O # Temporary unset I
2666 YAAZYAZ T x # Temporary unset O
2673 *******************************************************************/
2676 CHECKPOINT cp = PL_savestack_ix;
2677 /* No need to save/restore up to this paren */
2678 I32 parenfloor = scan->flags;
2680 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2682 cc.oldcc = PL_regcc;
2684 /* XXXX Probably it is better to teach regpush to support
2685 parenfloor > PL_regsize... */
2686 if (parenfloor > *PL_reglastparen)
2687 parenfloor = *PL_reglastparen; /* Pessimization... */
2688 cc.parenfloor = parenfloor;
2690 cc.min = ARG1(scan);
2691 cc.max = ARG2(scan);
2692 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
2696 PL_reginput = locinput;
2697 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2699 PL_regcc = cc.oldcc;
2705 * This is really hard to understand, because after we match
2706 * what we're trying to match, we must make sure the rest of
2707 * the REx is going to match for sure, and to do that we have
2708 * to go back UP the parse tree by recursing ever deeper. And
2709 * if it fails, we have to reset our parent's current state
2710 * that we can try again after backing off.
2713 CHECKPOINT cp, lastcp;
2714 CURCUR* cc = PL_regcc;
2715 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2717 n = cc->cur + 1; /* how many we know we matched */
2718 PL_reginput = locinput;
2721 PerlIO_printf(Perl_debug_log,
2722 "%*s %ld out of %ld..%ld cc=%lx\n",
2723 REPORT_CODE_OFF+PL_regindent*2, "",
2724 (long)n, (long)cc->min,
2725 (long)cc->max, (long)cc)
2728 /* If degenerate scan matches "", assume scan done. */
2730 if (locinput == cc->lastloc && n >= cc->min) {
2731 PL_regcc = cc->oldcc;
2735 PerlIO_printf(Perl_debug_log,
2736 "%*s empty match detected, try continuation...\n",
2737 REPORT_CODE_OFF+PL_regindent*2, "")
2739 if (regmatch(cc->next))
2747 /* First just match a string of min scans. */
2751 cc->lastloc = locinput;
2752 if (regmatch(cc->scan))
2755 cc->lastloc = lastloc;
2760 /* Check whether we already were at this position.
2761 Postpone detection until we know the match is not
2762 *that* much linear. */
2763 if (!PL_reg_maxiter) {
2764 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2765 PL_reg_leftiter = PL_reg_maxiter;
2767 if (PL_reg_leftiter-- == 0) {
2768 I32 size = (PL_reg_maxiter + 7)/8;
2769 if (PL_reg_poscache) {
2770 if (PL_reg_poscache_size < size) {
2771 Renew(PL_reg_poscache, size, char);
2772 PL_reg_poscache_size = size;
2774 Zero(PL_reg_poscache, size, char);
2777 PL_reg_poscache_size = size;
2778 Newz(29, PL_reg_poscache, size, char);
2781 PerlIO_printf(Perl_debug_log,
2782 "%sDetected a super-linear match, switching on caching%s...\n",
2783 PL_colors[4], PL_colors[5])
2786 if (PL_reg_leftiter < 0) {
2787 I32 o = locinput - PL_bostr, b;
2789 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2792 if (PL_reg_poscache[o] & (1<<b)) {
2794 PerlIO_printf(Perl_debug_log,
2795 "%*s already tried at this position...\n",
2796 REPORT_CODE_OFF+PL_regindent*2, "")
2800 PL_reg_poscache[o] |= (1<<b);
2804 /* Prefer next over scan for minimal matching. */
2807 PL_regcc = cc->oldcc;
2810 cp = regcppush(cc->parenfloor);
2812 if (regmatch(cc->next)) {
2814 sayYES; /* All done. */
2816 REGCP_UNWIND(lastcp);
2822 if (n >= cc->max) { /* Maximum greed exceeded? */
2823 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
2824 && !(PL_reg_flags & RF_warned)) {
2825 PL_reg_flags |= RF_warned;
2826 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2827 "Complex regular subexpression recursion",
2834 PerlIO_printf(Perl_debug_log,
2835 "%*s trying longer...\n",
2836 REPORT_CODE_OFF+PL_regindent*2, "")
2838 /* Try scanning more and see if it helps. */
2839 PL_reginput = locinput;
2841 cc->lastloc = locinput;
2842 cp = regcppush(cc->parenfloor);
2844 if (regmatch(cc->scan)) {
2848 REGCP_UNWIND(lastcp);
2851 cc->lastloc = lastloc;
2855 /* Prefer scan over next for maximal matching. */
2857 if (n < cc->max) { /* More greed allowed? */
2858 cp = regcppush(cc->parenfloor);
2860 cc->lastloc = locinput;
2862 if (regmatch(cc->scan)) {
2866 REGCP_UNWIND(lastcp);
2867 regcppop(); /* Restore some previous $<digit>s? */
2868 PL_reginput = locinput;
2870 PerlIO_printf(Perl_debug_log,
2871 "%*s failed, try continuation...\n",
2872 REPORT_CODE_OFF+PL_regindent*2, "")
2875 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
2876 && !(PL_reg_flags & RF_warned)) {
2877 PL_reg_flags |= RF_warned;
2878 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2879 "Complex regular subexpression recursion",
2883 /* Failed deeper matches of scan, so see if this one works. */
2884 PL_regcc = cc->oldcc;
2887 if (regmatch(cc->next))
2893 cc->lastloc = lastloc;
2898 next = scan + ARG(scan);
2901 inner = NEXTOPER(NEXTOPER(scan));
2904 inner = NEXTOPER(scan);
2909 if (OP(next) != c1) /* No choice. */
2910 next = inner; /* Avoid recursion. */
2912 I32 lastparen = *PL_reglastparen;
2914 re_unwind_branch_t *uw;
2916 /* Put unwinding data on stack */
2917 unwind1 = SSNEWt(1,re_unwind_branch_t);
2918 uw = SSPTRt(unwind1,re_unwind_branch_t);
2921 uw->type = ((c1 == BRANCH)
2923 : RE_UNWIND_BRANCHJ);
2924 uw->lastparen = lastparen;
2926 uw->locinput = locinput;
2927 uw->nextchr = nextchr;
2929 uw->regindent = ++PL_regindent;
2932 REGCP_SET(uw->lastcp);
2934 /* Now go into the first branch */
2947 /* We suppose that the next guy does not need
2948 backtracking: in particular, it is of constant length,
2949 and has no parenths to influence future backrefs. */
2950 ln = ARG1(scan); /* min to match */
2951 n = ARG2(scan); /* max to match */
2952 paren = scan->flags;
2954 if (paren > PL_regsize)
2956 if (paren > *PL_reglastparen)
2957 *PL_reglastparen = paren;
2959 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
2961 scan += NEXT_OFF(scan); /* Skip former OPEN. */
2962 PL_reginput = locinput;
2965 if (ln && regrepeat_hard(scan, ln, &l) < ln)
2967 if (ln && l == 0 && n >= ln
2968 /* In fact, this is tricky. If paren, then the
2969 fact that we did/didnot match may influence
2970 future execution. */
2971 && !(paren && ln == 0))
2973 locinput = PL_reginput;
2974 if (PL_regkind[(U8)OP(next)] == EXACT) {
2975 c1 = (U8)*STRING(next);
2976 if (OP(next) == EXACTF)
2978 else if (OP(next) == EXACTFL)
2979 c2 = PL_fold_locale[c1];
2986 /* This may be improved if l == 0. */
2987 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
2988 /* If it could work, try it. */
2990 UCHARAT(PL_reginput) == c1 ||
2991 UCHARAT(PL_reginput) == c2)
2995 PL_regstartp[paren] =
2996 HOPc(PL_reginput, -l) - PL_bostr;
2997 PL_regendp[paren] = PL_reginput - PL_bostr;
3000 PL_regendp[paren] = -1;
3004 REGCP_UNWIND(lastcp);
3006 /* Couldn't or didn't -- move forward. */
3007 PL_reginput = locinput;
3008 if (regrepeat_hard(scan, 1, &l)) {
3010 locinput = PL_reginput;
3017 n = regrepeat_hard(scan, n, &l);
3018 if (n != 0 && l == 0
3019 /* In fact, this is tricky. If paren, then the
3020 fact that we did/didnot match may influence
3021 future execution. */
3022 && !(paren && ln == 0))
3024 locinput = PL_reginput;
3026 PerlIO_printf(Perl_debug_log,
3027 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
3028 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
3032 if (PL_regkind[(U8)OP(next)] == EXACT) {
3033 c1 = (U8)*STRING(next);
3034 if (OP(next) == EXACTF)
3036 else if (OP(next) == EXACTFL)
3037 c2 = PL_fold_locale[c1];
3046 /* If it could work, try it. */
3048 UCHARAT(PL_reginput) == c1 ||
3049 UCHARAT(PL_reginput) == c2)
3052 PerlIO_printf(Perl_debug_log,
3053 "%*s trying tail with n=%"IVdf"...\n",
3054 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
3058 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3059 PL_regendp[paren] = PL_reginput - PL_bostr;
3062 PL_regendp[paren] = -1;
3066 REGCP_UNWIND(lastcp);
3068 /* Couldn't or didn't -- back up. */
3070 locinput = HOPc(locinput, -l);
3071 PL_reginput = locinput;
3078 paren = scan->flags; /* Which paren to set */
3079 if (paren > PL_regsize)
3081 if (paren > *PL_reglastparen)
3082 *PL_reglastparen = paren;
3083 ln = ARG1(scan); /* min to match */
3084 n = ARG2(scan); /* max to match */
3085 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
3089 ln = ARG1(scan); /* min to match */
3090 n = ARG2(scan); /* max to match */
3091 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
3096 scan = NEXTOPER(scan);
3102 scan = NEXTOPER(scan);
3106 * Lookahead to avoid useless match attempts
3107 * when we know what character comes next.
3109 if (PL_regkind[(U8)OP(next)] == EXACT) {
3110 U8 *s = (U8*)STRING(next);
3113 if (OP(next) == EXACTF)
3115 else if (OP(next) == EXACTFL)
3116 c2 = PL_fold_locale[c1];
3119 if (OP(next) == EXACTF) {
3120 c1 = to_utf8_lower(s);
3121 c2 = to_utf8_upper(s);
3124 c2 = c1 = utf8_to_uv_simple(s, NULL);
3130 PL_reginput = locinput;
3134 if (ln && regrepeat(scan, ln) < ln)
3136 locinput = PL_reginput;
3139 char *e; /* Should not check after this */
3140 char *old = locinput;
3142 if (n == REG_INFTY) {
3145 while (UTF8_IS_CONTINUATION(*(U8*)e))
3151 m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3155 e = locinput + n - ln;
3161 /* Find place 'next' could work */
3164 while (locinput <= e && *locinput != c1)
3167 while (locinput <= e
3172 count = locinput - old;
3179 utf8_to_uv_simple((U8*)locinput, &len) != c1;
3184 for (count = 0; locinput <= e; count++) {
3185 UV c = utf8_to_uv_simple((U8*)locinput, &len);
3186 if (c == c1 || c == c2)
3194 /* PL_reginput == old now */
3195 if (locinput != old) {
3196 ln = 1; /* Did some */
3197 if (regrepeat(scan, count) < count)
3200 /* PL_reginput == locinput now */
3201 TRYPAREN(paren, ln, locinput);
3202 PL_reginput = locinput; /* Could be reset... */
3203 REGCP_UNWIND(lastcp);
3204 /* Couldn't or didn't -- move forward. */
3207 locinput += UTF8SKIP(locinput);
3213 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
3217 c = utf8_to_uv_simple((U8*)PL_reginput, NULL);
3219 c = UCHARAT(PL_reginput);
3221 /* If it could work, try it. */
3222 if (c1 == -1000 || c == c1 || c == c2)
3224 TRYPAREN(paren, n, PL_reginput);
3225 REGCP_UNWIND(lastcp);
3227 /* Couldn't or didn't -- move forward. */
3228 PL_reginput = locinput;
3229 if (regrepeat(scan, 1)) {
3231 locinput = PL_reginput;
3239 n = regrepeat(scan, n);
3240 locinput = PL_reginput;
3241 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
3242 (!PL_multiline || OP(next) == SEOL || OP(next) == EOS)) {
3243 ln = n; /* why back off? */
3244 /* ...because $ and \Z can match before *and* after
3245 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3246 We should back off by one in this case. */
3247 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3256 c = utf8_to_uv_simple((U8*)PL_reginput, NULL);
3258 c = UCHARAT(PL_reginput);
3260 /* If it could work, try it. */
3261 if (c1 == -1000 || c == c1 || c == c2)
3263 TRYPAREN(paren, n, PL_reginput);
3264 REGCP_UNWIND(lastcp);
3266 /* Couldn't or didn't -- back up. */
3268 PL_reginput = locinput = HOPc(locinput, -1);
3276 c = utf8_to_uv_simple((U8*)PL_reginput, NULL);
3278 c = UCHARAT(PL_reginput);
3280 /* If it could work, try it. */
3281 if (c1 == -1000 || c == c1 || c == c2)
3283 TRYPAREN(paren, n, PL_reginput);
3284 REGCP_UNWIND(lastcp);
3286 /* Couldn't or didn't -- back up. */
3288 PL_reginput = locinput = HOPc(locinput, -1);
3295 if (PL_reg_call_cc) {
3296 re_cc_state *cur_call_cc = PL_reg_call_cc;
3297 CURCUR *cctmp = PL_regcc;
3298 regexp *re = PL_reg_re;
3299 CHECKPOINT cp, lastcp;
3301 cp = regcppush(0); /* Save *all* the positions. */
3303 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3305 PL_reginput = locinput; /* Make position available to
3307 cache_re(PL_reg_call_cc->re);
3308 PL_regcc = PL_reg_call_cc->cc;
3309 PL_reg_call_cc = PL_reg_call_cc->prev;
3310 if (regmatch(cur_call_cc->node)) {
3311 PL_reg_call_cc = cur_call_cc;
3315 REGCP_UNWIND(lastcp);
3317 PL_reg_call_cc = cur_call_cc;
3323 PerlIO_printf(Perl_debug_log,
3324 "%*s continuation failed...\n",
3325 REPORT_CODE_OFF+PL_regindent*2, "")
3329 if (locinput < PL_regtill) {
3330 DEBUG_r(PerlIO_printf(Perl_debug_log,
3331 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3333 (long)(locinput - PL_reg_starttry),
3334 (long)(PL_regtill - PL_reg_starttry),
3336 sayNO_FINAL; /* Cannot match: too short. */
3338 PL_reginput = locinput; /* put where regtry can find it */
3339 sayYES_FINAL; /* Success! */
3341 PL_reginput = locinput; /* put where regtry can find it */
3342 sayYES_LOUD; /* Success! */
3345 PL_reginput = locinput;
3350 if (UTF) { /* XXXX This is absolutely
3351 broken, we read before
3353 s = HOPMAYBEc(locinput, -scan->flags);
3359 if (locinput < PL_bostr + scan->flags)
3361 PL_reginput = locinput - scan->flags;
3366 PL_reginput = locinput;
3371 if (UTF) { /* XXXX This is absolutely
3372 broken, we read before
3374 s = HOPMAYBEc(locinput, -scan->flags);
3375 if (!s || s < PL_bostr)
3380 if (locinput < PL_bostr + scan->flags)
3382 PL_reginput = locinput - scan->flags;
3387 PL_reginput = locinput;
3390 inner = NEXTOPER(NEXTOPER(scan));
3391 if (regmatch(inner) != n) {
3406 if (OP(scan) == SUSPEND) {
3407 locinput = PL_reginput;
3408 nextchr = UCHARAT(locinput);
3413 next = scan + ARG(scan);
3418 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
3419 PTR2UV(scan), OP(scan));
3420 Perl_croak(aTHX_ "regexp memory corruption");
3427 * We get here only if there's trouble -- normally "case END" is
3428 * the terminating point.
3430 Perl_croak(aTHX_ "corrupted regexp pointers");
3436 PerlIO_printf(Perl_debug_log,
3437 "%*s %scould match...%s\n",
3438 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3442 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3443 PL_colors[4],PL_colors[5]));
3449 #if 0 /* Breaks $^R */
3457 PerlIO_printf(Perl_debug_log,
3458 "%*s %sfailed...%s\n",
3459 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3465 re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
3468 case RE_UNWIND_BRANCH:
3469 case RE_UNWIND_BRANCHJ:
3471 re_unwind_branch_t *uwb = &(uw->branch);
3472 I32 lastparen = uwb->lastparen;
3474 REGCP_UNWIND(uwb->lastcp);
3475 for (n = *PL_reglastparen; n > lastparen; n--)
3477 *PL_reglastparen = n;
3478 scan = next = uwb->next;
3480 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
3481 ? BRANCH : BRANCHJ) ) { /* Failure */
3488 /* Have more choice yet. Reuse the same uwb. */
3490 if ((n = (uwb->type == RE_UNWIND_BRANCH
3491 ? NEXT_OFF(next) : ARG(next))))
3494 next = NULL; /* XXXX Needn't unwinding in this case... */
3496 next = NEXTOPER(scan);
3497 if (uwb->type == RE_UNWIND_BRANCHJ)
3498 next = NEXTOPER(next);
3499 locinput = uwb->locinput;
3500 nextchr = uwb->nextchr;
3502 PL_regindent = uwb->regindent;
3509 Perl_croak(aTHX_ "regexp unwind memory corruption");
3520 - regrepeat - repeatedly match something simple, report how many
3523 * [This routine now assumes that it will only match on things of length 1.
3524 * That was true before, but now we assume scan - reginput is the count,
3525 * rather than incrementing count on every character. [Er, except utf8.]]
3528 S_regrepeat(pTHX_ regnode *p, I32 max)
3530 register char *scan;
3532 register char *loceol = PL_regeol;
3533 register I32 hardcount = 0;
3534 register bool do_utf8 = DO_UTF8(PL_reg_sv);
3537 if (max != REG_INFTY && max < loceol - scan)
3538 loceol = scan + max;
3543 while (scan < loceol && hardcount < max && *scan != '\n') {
3544 scan += UTF8SKIP(scan);
3548 while (scan < loceol && *scan != '\n')
3555 while (hardcount < max && scan < loceol) {
3556 scan += UTF8SKIP(scan);
3563 case EXACT: /* length of string is 1 */
3565 while (scan < loceol && UCHARAT(scan) == c)
3568 case EXACTF: /* length of string is 1 */
3570 while (scan < loceol &&
3571 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
3574 case EXACTFL: /* length of string is 1 */
3575 PL_reg_flags |= RF_tainted;
3577 while (scan < loceol &&
3578 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
3584 while (hardcount < max && scan < loceol &&
3585 reginclass(p, (U8*)scan, do_utf8)) {
3586 scan += UTF8SKIP(scan);
3590 while (scan < loceol && reginclass(p, (U8*)scan, do_utf8))
3597 while (hardcount < max && scan < loceol &&
3598 swash_fetch(PL_utf8_alnum, (U8*)scan)) {
3599 scan += UTF8SKIP(scan);
3603 while (scan < loceol && isALNUM(*scan))
3608 PL_reg_flags |= RF_tainted;
3611 while (hardcount < max && scan < loceol &&
3612 isALNUM_LC_utf8((U8*)scan)) {
3613 scan += UTF8SKIP(scan);
3617 while (scan < loceol && isALNUM_LC(*scan))
3624 while (hardcount < max && scan < loceol &&
3625 !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
3626 scan += UTF8SKIP(scan);
3630 while (scan < loceol && !isALNUM(*scan))
3635 PL_reg_flags |= RF_tainted;
3638 while (hardcount < max && scan < loceol &&
3639 !isALNUM_LC_utf8((U8*)scan)) {
3640 scan += UTF8SKIP(scan);
3644 while (scan < loceol && !isALNUM_LC(*scan))
3651 while (hardcount < max && scan < loceol &&
3652 (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
3653 scan += UTF8SKIP(scan);
3657 while (scan < loceol && isSPACE(*scan))
3662 PL_reg_flags |= RF_tainted;
3665 while (hardcount < max && scan < loceol &&
3666 (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3667 scan += UTF8SKIP(scan);
3671 while (scan < loceol && isSPACE_LC(*scan))
3678 while (hardcount < max && scan < loceol &&
3679 !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
3680 scan += UTF8SKIP(scan);
3684 while (scan < loceol && !isSPACE(*scan))
3689 PL_reg_flags |= RF_tainted;
3692 while (hardcount < max && scan < loceol &&
3693 !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3694 scan += UTF8SKIP(scan);
3698 while (scan < loceol && !isSPACE_LC(*scan))
3705 while (hardcount < max && scan < loceol &&
3706 swash_fetch(PL_utf8_digit,(U8*)scan)) {
3707 scan += UTF8SKIP(scan);
3711 while (scan < loceol && isDIGIT(*scan))
3718 while (hardcount < max && scan < loceol &&
3719 !swash_fetch(PL_utf8_digit,(U8*)scan)) {
3720 scan += UTF8SKIP(scan);
3724 while (scan < loceol && !isDIGIT(*scan))
3728 default: /* Called on something of 0 width. */
3729 break; /* So match right here or not at all. */
3735 c = scan - PL_reginput;
3740 SV *prop = sv_newmortal();
3743 PerlIO_printf(Perl_debug_log,
3744 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
3745 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
3752 - regrepeat_hard - repeatedly match something, report total lenth and length
3754 * The repeater is supposed to have constant length.
3758 S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
3760 register char *scan;
3761 register char *start;
3762 register char *loceol = PL_regeol;
3764 I32 count = 0, res = 1;
3769 start = PL_reginput;
3770 if (DO_UTF8(PL_reg_sv)) {
3771 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
3774 while (start < PL_reginput) {
3776 start += UTF8SKIP(start);
3787 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
3789 *lp = l = PL_reginput - start;
3790 if (max != REG_INFTY && l*max < loceol - scan)
3791 loceol = scan + l*max;
3804 - regclass_swash - prepare the utf8 swash
3808 Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** initsvp)
3813 if (PL_regdata && PL_regdata->count) {
3816 if (PL_regdata->what[n] == 's') {
3817 SV *rv = (SV*)PL_regdata->data[n];
3818 AV *av = (AV*)SvRV((SV*)rv);
3821 si = *av_fetch(av, 0, FALSE);
3822 a = av_fetch(av, 1, FALSE);
3826 else if (si && doinit) {
3827 sw = swash_init("utf8", "", si, 1, 0);
3828 (void)av_store(av, 1, sw);
3840 - reginclass - determine if a character falls into a character class
3844 S_reginclass(pTHX_ register regnode *n, register U8* p, register bool do_utf8)
3846 char flags = ANYOF_FLAGS(n);
3852 c = utf8_to_uv_simple(p, &len);
3856 if (do_utf8 || (flags & ANYOF_UNICODE)) {
3857 if (do_utf8 && !ANYOF_RUNTIME(n)) {
3858 if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
3861 if (do_utf8 && flags & ANYOF_UNICODE_ALL && c >= 256)
3864 SV *sw = regclass_swash(n, TRUE, 0);
3867 if (swash_fetch(sw, p))
3869 else if (flags & ANYOF_FOLD) {
3870 U8 tmpbuf[UTF8_MAXLEN+1];
3872 if (flags & ANYOF_LOCALE) {
3873 PL_reg_flags |= RF_tainted;
3874 uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
3877 uv_to_utf8(tmpbuf, toLOWER_utf8(p));
3878 if (swash_fetch(sw, tmpbuf))
3884 if (!match && c < 256) {
3885 if (ANYOF_BITMAP_TEST(n, c))
3887 else if (flags & ANYOF_FOLD) {
3890 if (flags & ANYOF_LOCALE) {
3891 PL_reg_flags |= RF_tainted;
3892 f = PL_fold_locale[c];
3896 if (f != c && ANYOF_BITMAP_TEST(n, f))
3900 if (!match && (flags & ANYOF_CLASS)) {
3901 PL_reg_flags |= RF_tainted;
3903 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
3904 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
3905 (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
3906 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
3907 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
3908 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
3909 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
3910 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
3911 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
3912 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
3913 (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
3914 (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
3915 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
3916 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
3917 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
3918 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
3919 (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
3920 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
3921 (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
3922 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
3923 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
3924 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
3925 (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
3926 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
3927 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
3928 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
3929 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
3930 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
3931 (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
3932 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
3933 ) /* How's that for a conditional? */
3940 return (flags & ANYOF_INVERT) ? !match : match;
3944 S_reghop(pTHX_ U8 *s, I32 off)
3946 return S_reghop3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
3950 S_reghop3(pTHX_ U8 *s, I32 off, U8* lim)
3953 while (off-- && s < lim) {
3954 /* XXX could check well-formedness here */
3962 if (UTF8_IS_CONTINUED(*s)) {
3963 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
3966 /* XXX could check well-formedness here */
3974 S_reghopmaybe(pTHX_ U8 *s, I32 off)
3976 return S_reghopmaybe3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
3980 S_reghopmaybe3(pTHX_ U8* s, I32 off, U8* lim)
3983 while (off-- && s < lim) {
3984 /* XXX could check well-formedness here */
3994 if (UTF8_IS_CONTINUED(*s)) {
3995 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
3998 /* XXX could check well-formedness here */
4014 restore_pos(pTHXo_ void *arg)
4016 if (PL_reg_eval_set) {
4017 if (PL_reg_oldsaved) {
4018 PL_reg_re->subbeg = PL_reg_oldsaved;
4019 PL_reg_re->sublen = PL_reg_oldsavedlen;
4020 RX_MATCH_COPIED_on(PL_reg_re);
4022 PL_reg_magic->mg_len = PL_reg_oldpos;
4023 PL_reg_eval_set = 0;
4024 PL_curpm = PL_reg_oldcurpm;