5 * "One Ring to rule them all, One Ring to find them..."
8 /* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
12 /* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
17 /* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
22 #ifdef PERL_EXT_RE_BUILD
23 /* need to replace pregcomp et al, so enable that */
24 # ifndef PERL_IN_XSUB_RE
25 # define PERL_IN_XSUB_RE
27 /* need access to debugger hooks */
28 # if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
33 #ifdef PERL_IN_XSUB_RE
34 /* We *really* need to overwrite these symbols: */
35 # define Perl_regexec_flags my_regexec
36 # define Perl_regdump my_regdump
37 # define Perl_regprop my_regprop
38 # define Perl_re_intuit_start my_re_intuit_start
39 /* *These* symbols are masked to allow static link. */
40 # define Perl_pregexec my_pregexec
41 # define Perl_reginitcolors my_reginitcolors
42 # define Perl_regclass_swash my_regclass_swash
44 # define PERL_NO_GET_CONTEXT
49 * pregcomp and pregexec -- regsub and regerror are not used in perl
51 * Copyright (c) 1986 by University of Toronto.
52 * Written by Henry Spencer. Not derived from licensed software.
54 * Permission is granted to anyone to use this software for any
55 * purpose on any computer system, and to redistribute it freely,
56 * subject to the following restrictions:
58 * 1. The author is not responsible for the consequences of use of
59 * this software, no matter how awful, even if they arise
62 * 2. The origin of this software must not be misrepresented, either
63 * by explicit claim or by omission.
65 * 3. Altered versions must be plainly marked as such, and must not
66 * be misrepresented as being the original software.
68 **** Alterations to Henry's code are...
70 **** Copyright (c) 1991-2001, Larry Wall
72 **** You may distribute under the terms of either the GNU General Public
73 **** License or the Artistic License, as specified in the README file.
75 * Beware that some of this code is subtly aware of the way operator
76 * precedence is structured in regular expressions. Serious changes in
77 * regular-expression syntax might require a total rethink.
80 #define PERL_IN_REGEXEC_C
83 #ifdef PERL_IN_XSUB_RE
84 # if defined(PERL_CAPI) || defined(PERL_OBJECT)
91 #define RF_tainted 1 /* tainted information used? */
92 #define RF_warned 2 /* warned about big count? */
93 #define RF_evaled 4 /* Did an EVAL with setting? */
94 #define RF_utf8 8 /* String contains multibyte chars? */
96 #define UTF (PL_reg_flags & RF_utf8)
98 #define RS_init 1 /* eval environment created */
99 #define RS_set 2 /* replsv value is set */
102 #define STATIC static
109 #define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
110 #define CHR_DIST(a,b) (DO_UTF8(PL_reg_sv) ? utf8_distance(a,b) : a - b)
112 #define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
113 #define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
114 #define HOP(pos,off) (DO_UTF8(PL_reg_sv) ? reghop((U8*)pos, off) : (U8*)(pos + off))
115 #define HOPMAYBE(pos,off) (DO_UTF8(PL_reg_sv) ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
116 #define HOPc(pos,off) ((char*)HOP(pos,off))
117 #define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
119 #define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
120 #define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
121 #define HOP3(pos,off,lim) (DO_UTF8(PL_reg_sv) ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
122 #define HOPMAYBE3(pos,off,lim) (DO_UTF8(PL_reg_sv) ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
123 #define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
124 #define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
126 static void restore_pos(pTHXo_ void *arg);
130 S_regcppush(pTHX_ I32 parenfloor)
132 int retval = PL_savestack_ix;
133 int i = (PL_regsize - parenfloor) * 4;
137 for (p = PL_regsize; p > parenfloor; p--) {
138 SSPUSHINT(PL_regendp[p]);
139 SSPUSHINT(PL_regstartp[p]);
140 SSPUSHPTR(PL_reg_start_tmp[p]);
143 SSPUSHINT(PL_regsize);
144 SSPUSHINT(*PL_reglastparen);
145 SSPUSHPTR(PL_reginput);
147 SSPUSHINT(SAVEt_REGCONTEXT);
151 /* These are needed since we do not localize EVAL nodes: */
152 # define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
153 " Setting an EVAL scope, savestack=%"IVdf"\n", \
154 (IV)PL_savestack_ix)); cp = PL_savestack_ix
156 # define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
157 PerlIO_printf(Perl_debug_log, \
158 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
159 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
168 assert(i == SAVEt_REGCONTEXT);
170 input = (char *) SSPOPPTR;
171 *PL_reglastparen = SSPOPINT;
172 PL_regsize = SSPOPINT;
173 for (i -= 3; i > 0; i -= 4) {
174 paren = (U32)SSPOPINT;
175 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
176 PL_regstartp[paren] = SSPOPINT;
178 if (paren <= *PL_reglastparen)
179 PL_regendp[paren] = tmps;
181 PerlIO_printf(Perl_debug_log,
182 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
183 (UV)paren, (IV)PL_regstartp[paren],
184 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
185 (IV)PL_regendp[paren],
186 (paren > *PL_reglastparen ? "(no)" : ""));
190 if (*PL_reglastparen + 1 <= PL_regnpar) {
191 PerlIO_printf(Perl_debug_log,
192 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
193 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
197 /* It would seem that the similar code in regtry()
198 * already takes care of this, and in fact it is in
199 * a better location to since this code can #if 0-ed out
200 * but the code in regtry() is needed or otherwise tests
201 * requiring null fields (pat.t#187 and split.t#{13,14}
202 * (as of patchlevel 7877) will fail. Then again,
203 * this code seems to be necessary or otherwise
204 * building DynaLoader will fail:
205 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
207 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
208 if (paren > PL_regsize)
209 PL_regstartp[paren] = -1;
210 PL_regendp[paren] = -1;
217 S_regcp_set_to(pTHX_ I32 ss)
219 I32 tmp = PL_savestack_ix;
221 PL_savestack_ix = ss;
223 PL_savestack_ix = tmp;
227 typedef struct re_cc_state
231 struct re_cc_state *prev;
236 #define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
238 #define TRYPAREN(paren, n, input) { \
241 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
242 PL_regendp[paren] = input - PL_bostr; \
245 PL_regendp[paren] = -1; \
247 if (regmatch(next)) \
250 PL_regendp[paren] = -1; \
255 * pregexec and friends
259 - pregexec - match a regexp against a string
262 Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
263 char *strbeg, I32 minend, SV *screamer, U32 nosave)
264 /* strend: pointer to null at end of string */
265 /* strbeg: real beginning of string */
266 /* minend: end of match must be >=minend after stringarg. */
267 /* nosave: For optimizations. */
270 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
271 nosave ? 0 : REXEC_COPY_STR);
275 S_cache_re(pTHX_ regexp *prog)
277 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
279 PL_regprogram = prog->program;
281 PL_regnpar = prog->nparens;
282 PL_regdata = prog->data;
287 * Need to implement the following flags for reg_anch:
289 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
291 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
292 * INTUIT_AUTORITATIVE_ML
293 * INTUIT_ONCE_NOML - Intuit can match in one location only.
296 * Another flag for this function: SECOND_TIME (so that float substrs
297 * with giant delta may be not rechecked).
300 /* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
302 /* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
303 Otherwise, only SvCUR(sv) is used to get strbeg. */
305 /* XXXX We assume that strpos is strbeg unless sv. */
307 /* XXXX Some places assume that there is a fixed substring.
308 An update may be needed if optimizer marks as "INTUITable"
309 RExen without fixed substrings. Similarly, it is assumed that
310 lengths of all the strings are no more than minlen, thus they
311 cannot come from lookahead.
312 (Or minlen should take into account lookahead.) */
314 /* A failure to find a constant substring means that there is no need to make
315 an expensive call to REx engine, thus we celebrate a failure. Similarly,
316 finding a substring too deep into the string means that less calls to
317 regtry() should be needed.
319 REx compiler's optimizer found 4 possible hints:
320 a) Anchored substring;
322 c) Whether we are anchored (beginning-of-line or \G);
323 d) First node (of those at offset 0) which may distingush positions;
324 We use a)b)d) and multiline-part of c), and try to find a position in the
325 string which does not contradict any of them.
328 /* Most of decisions we do here should have been done at compile time.
329 The nodes of the REx which we used for the search should have been
330 deleted from the finite automaton. */
333 Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
334 char *strend, U32 flags, re_scream_pos_data *data)
336 register I32 start_shift;
337 /* Should be nonnegative! */
338 register I32 end_shift;
345 register char *other_last = Nullch; /* other substr checked before this */
346 char *check_at; /* check substr found at this pos */
348 char *i_strpos = strpos;
351 DEBUG_r( if (!PL_colorset) reginitcolors() );
352 DEBUG_r(PerlIO_printf(Perl_debug_log,
353 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
354 PL_colors[4],PL_colors[5],PL_colors[0],
357 (strlen(prog->precomp) > 60 ? "..." : ""),
359 (int)(strend - strpos > 60 ? 60 : strend - strpos),
360 strpos, PL_colors[1],
361 (strend - strpos > 60 ? "..." : ""))
364 if (prog->reganch & ROPT_UTF8)
365 PL_reg_flags |= RF_utf8;
367 if (prog->minlen > CHR_DIST((U8*)strend, (U8*)strpos)) {
368 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
371 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
373 check = prog->check_substr;
374 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
375 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
376 || ( (prog->reganch & ROPT_ANCH_BOL)
377 && !PL_multiline ) ); /* Check after \n? */
380 if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
381 /* SvCUR is not set on references: SvRV and SvPVX overlap */
383 && (strpos != strbeg)) {
384 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
387 if (prog->check_offset_min == prog->check_offset_max) {
388 /* Substring at constant offset from beg-of-str... */
391 s = HOP3c(strpos, prog->check_offset_min, strend);
393 slen = SvCUR(check); /* >= 1 */
395 if ( strend - s > slen || strend - s < slen - 1
396 || (strend - s == slen && strend[-1] != '\n')) {
397 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
400 /* Now should match s[0..slen-2] */
402 if (slen && (*SvPVX(check) != *s
404 && memNE(SvPVX(check), s, slen)))) {
406 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
410 else if (*SvPVX(check) != *s
411 || ((slen = SvCUR(check)) > 1
412 && memNE(SvPVX(check), s, slen)))
414 goto success_at_start;
417 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
419 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
420 end_shift = prog->minlen - start_shift -
421 CHR_SVLEN(check) + (SvTAIL(check) != 0);
423 I32 end = prog->check_offset_max + CHR_SVLEN(check)
424 - (SvTAIL(check) != 0);
425 I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
427 if (end_shift < eshift)
431 else { /* Can match at random position */
434 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
435 /* Should be nonnegative! */
436 end_shift = prog->minlen - start_shift -
437 CHR_SVLEN(check) + (SvTAIL(check) != 0);
440 #ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
442 Perl_croak(aTHX_ "panic: end_shift");
446 /* Find a possible match in the region s..strend by looking for
447 the "check" substring in the region corrected by start/end_shift. */
448 if (flags & REXEC_SCREAM) {
449 I32 p = -1; /* Internal iterator of scream. */
450 I32 *pp = data ? data->scream_pos : &p;
452 if (PL_screamfirst[BmRARE(check)] >= 0
453 || ( BmRARE(check) == '\n'
454 && (BmPREVIOUS(check) == SvCUR(check) - 1)
456 s = screaminstr(sv, check,
457 start_shift + (s - strbeg), end_shift, pp, 0);
461 *data->scream_olds = s;
464 s = fbm_instr(HOP3(s, start_shift, strend),
465 HOP3(strend, -end_shift, strbeg),
466 check, PL_multiline ? FBMrf_MULTILINE : 0);
468 /* Update the count-of-usability, remove useless subpatterns,
471 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
472 (s ? "Found" : "Did not find"),
473 ((check == prog->anchored_substr) ? "anchored" : "floating"),
475 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
477 PL_colors[1], (SvTAIL(check) ? "$" : ""),
478 (s ? " at offset " : "...\n") ) );
485 /* Finish the diagnostic message */
486 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
488 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
489 Start with the other substr.
490 XXXX no SCREAM optimization yet - and a very coarse implementation
491 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
492 *always* match. Probably should be marked during compile...
493 Probably it is right to do no SCREAM here...
496 if (prog->float_substr && prog->anchored_substr) {
497 /* Take into account the "other" substring. */
498 /* XXXX May be hopelessly wrong for UTF... */
501 if (check == prog->float_substr) {
504 char *last = HOP3c(s, -start_shift, strbeg), *last1, *last2;
507 t = s - prog->check_offset_max;
508 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
509 && (!(prog->reganch & ROPT_UTF8)
510 || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
515 t = HOP3c(t, prog->anchored_offset, strend);
516 if (t < other_last) /* These positions already checked */
518 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
521 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
522 /* On end-of-str: see comment below. */
523 s = fbm_instr((unsigned char*)t,
524 HOP3(HOP3(last1, prog->anchored_offset, strend)
525 + SvCUR(prog->anchored_substr),
526 -(SvTAIL(prog->anchored_substr)!=0), strbeg),
527 prog->anchored_substr,
528 PL_multiline ? FBMrf_MULTILINE : 0);
529 DEBUG_r(PerlIO_printf(Perl_debug_log,
530 "%s anchored substr `%s%.*s%s'%s",
531 (s ? "Found" : "Contradicts"),
533 (int)(SvCUR(prog->anchored_substr)
534 - (SvTAIL(prog->anchored_substr)!=0)),
535 SvPVX(prog->anchored_substr),
536 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
538 if (last1 >= last2) {
539 DEBUG_r(PerlIO_printf(Perl_debug_log,
540 ", giving up...\n"));
543 DEBUG_r(PerlIO_printf(Perl_debug_log,
544 ", trying floating at offset %ld...\n",
545 (long)(HOP3c(s1, 1, strend) - i_strpos)));
546 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
547 s = HOP3c(last, 1, strend);
551 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
552 (long)(s - i_strpos)));
553 t = HOP3c(s, -prog->anchored_offset, strbeg);
554 other_last = HOP3c(s, 1, strend);
562 else { /* Take into account the floating substring. */
566 t = HOP3c(s, -start_shift, strbeg);
568 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
569 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
570 last = HOP3c(t, prog->float_max_offset, strend);
571 s = HOP3c(t, prog->float_min_offset, strend);
574 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
575 /* fbm_instr() takes into account exact value of end-of-str
576 if the check is SvTAIL(ed). Since false positives are OK,
577 and end-of-str is not later than strend we are OK. */
578 s = fbm_instr((unsigned char*)s,
579 (unsigned char*)last + SvCUR(prog->float_substr)
580 - (SvTAIL(prog->float_substr)!=0),
581 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
582 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
583 (s ? "Found" : "Contradicts"),
585 (int)(SvCUR(prog->float_substr)
586 - (SvTAIL(prog->float_substr)!=0)),
587 SvPVX(prog->float_substr),
588 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
591 DEBUG_r(PerlIO_printf(Perl_debug_log,
592 ", giving up...\n"));
595 DEBUG_r(PerlIO_printf(Perl_debug_log,
596 ", trying anchored starting at offset %ld...\n",
597 (long)(s1 + 1 - i_strpos)));
598 other_last = last + 1;
599 s = HOP3c(t, 1, strend);
603 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
604 (long)(s - i_strpos)));
614 t = s - prog->check_offset_max;
615 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
616 && (!(prog->reganch & ROPT_UTF8)
617 || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
619 /* Fixed substring is found far enough so that the match
620 cannot start at strpos. */
622 if (ml_anch && t[-1] != '\n') {
623 /* Eventually fbm_*() should handle this, but often
624 anchored_offset is not 0, so this check will not be wasted. */
625 /* XXXX In the code below we prefer to look for "^" even in
626 presence of anchored substrings. And we search even
627 beyond the found float position. These pessimizations
628 are historical artefacts only. */
630 while (t < strend - prog->minlen) {
632 if (t < check_at - prog->check_offset_min) {
633 if (prog->anchored_substr) {
634 /* Since we moved from the found position,
635 we definitely contradict the found anchored
636 substr. Due to the above check we do not
637 contradict "check" substr.
638 Thus we can arrive here only if check substr
639 is float. Redo checking for "other"=="fixed".
642 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
643 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
644 goto do_other_anchored;
646 /* We don't contradict the found floating substring. */
647 /* XXXX Why not check for STCLASS? */
649 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
650 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
653 /* Position contradicts check-string */
654 /* XXXX probably better to look for check-string
655 than for "\n", so one should lower the limit for t? */
656 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
657 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
658 other_last = strpos = s = t + 1;
663 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
664 PL_colors[0],PL_colors[1]));
668 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
669 PL_colors[0],PL_colors[1]));
673 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
676 /* The found string does not prohibit matching at strpos,
677 - no optimization of calling REx engine can be performed,
678 unless it was an MBOL and we are not after MBOL,
679 or a future STCLASS check will fail this. */
681 /* Even in this situation we may use MBOL flag if strpos is offset
682 wrt the start of the string. */
683 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
684 && (strpos != strbeg) && strpos[-1] != '\n'
685 /* May be due to an implicit anchor of m{.*foo} */
686 && !(prog->reganch & ROPT_IMPLICIT))
691 DEBUG_r( if (ml_anch)
692 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
693 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
696 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
697 && prog->check_substr /* Could be deleted already */
698 && --BmUSEFUL(prog->check_substr) < 0
699 && prog->check_substr == prog->float_substr)
701 /* If flags & SOMETHING - do not do it many times on the same match */
702 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
703 SvREFCNT_dec(prog->check_substr);
704 prog->check_substr = Nullsv; /* disable */
705 prog->float_substr = Nullsv; /* clear */
706 check = Nullsv; /* abort */
708 /* XXXX This is a remnant of the old implementation. It
709 looks wasteful, since now INTUIT can use many
711 prog->reganch &= ~RE_USE_INTUIT;
718 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
719 if (prog->regstclass) {
720 /* minlen == 0 is possible if regstclass is \b or \B,
721 and the fixed substr is ''$.
722 Since minlen is already taken into account, s+1 is before strend;
723 accidentally, minlen >= 1 guaranties no false positives at s + 1
724 even for \b or \B. But (minlen? 1 : 0) below assumes that
725 regstclass does not come from lookahead... */
726 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
727 This leaves EXACTF only, which is dealt with in find_byclass(). */
728 U8* str = (U8*)STRING(prog->regstclass);
729 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
730 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
732 char *endpos = (prog->anchored_substr || ml_anch)
733 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
734 : (prog->float_substr
735 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
738 char *startpos = strbeg;
741 if (prog->reganch & ROPT_UTF8) {
742 PL_regdata = prog->data;
745 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
750 if (endpos == strend) {
751 DEBUG_r( PerlIO_printf(Perl_debug_log,
752 "Could not match STCLASS...\n") );
755 DEBUG_r( PerlIO_printf(Perl_debug_log,
756 "This position contradicts STCLASS...\n") );
757 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
759 /* Contradict one of substrings */
760 if (prog->anchored_substr) {
761 if (prog->anchored_substr == check) {
762 DEBUG_r( what = "anchored" );
764 s = HOP3c(t, 1, strend);
765 if (s + start_shift + end_shift > strend) {
766 /* XXXX Should be taken into account earlier? */
767 DEBUG_r( PerlIO_printf(Perl_debug_log,
768 "Could not match STCLASS...\n") );
773 DEBUG_r( PerlIO_printf(Perl_debug_log,
774 "Looking for %s substr starting at offset %ld...\n",
775 what, (long)(s + start_shift - i_strpos)) );
778 /* Have both, check_string is floating */
779 if (t + start_shift >= check_at) /* Contradicts floating=check */
780 goto retry_floating_check;
781 /* Recheck anchored substring, but not floating... */
785 DEBUG_r( PerlIO_printf(Perl_debug_log,
786 "Looking for anchored substr starting at offset %ld...\n",
787 (long)(other_last - i_strpos)) );
788 goto do_other_anchored;
790 /* Another way we could have checked stclass at the
791 current position only: */
796 DEBUG_r( PerlIO_printf(Perl_debug_log,
797 "Looking for /%s^%s/m starting at offset %ld...\n",
798 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
801 if (!prog->float_substr) /* Could have been deleted */
803 /* Check is floating subtring. */
804 retry_floating_check:
805 t = check_at - start_shift;
806 DEBUG_r( what = "floating" );
807 goto hop_and_restart;
810 PerlIO_printf(Perl_debug_log,
811 "By STCLASS: moving %ld --> %ld\n",
812 (long)(t - i_strpos), (long)(s - i_strpos));
814 PerlIO_printf(Perl_debug_log,
815 "Does not contradict STCLASS...\n") );
818 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
819 PL_colors[4], (check ? "Guessed" : "Giving up"),
820 PL_colors[5], (long)(s - i_strpos)) );
823 fail_finish: /* Substring not found */
824 if (prog->check_substr) /* could be removed already */
825 BmUSEFUL(prog->check_substr) += 5; /* hooray */
827 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
828 PL_colors[4],PL_colors[5]));
832 /* We know what class REx starts with. Try to find this position... */
834 S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
836 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
842 register I32 tmp = 1; /* Scratch variable? */
843 register bool do_utf8 = DO_UTF8(PL_reg_sv);
845 /* We know what class it must start with. */
849 if (reginclass(c, (U8*)s, do_utf8)) {
850 if (tmp && (norun || regtry(prog, s)))
857 s += do_utf8 ? UTF8SKIP(s) : 1;
864 c1 = to_utf8_lower((U8*)m);
865 c2 = to_utf8_upper((U8*)m);
876 c2 = PL_fold_locale[c1];
881 e = s; /* Due to minlen logic of intuit() */
887 if ( utf8_to_uv_simple((U8*)s, &len) == c1
894 UV c = utf8_to_uv_simple((U8*)s, &len);
895 if ( (c == c1 || c == c2) && regtry(prog, s) )
904 && (ln == 1 || !(OP(c) == EXACTF
906 : ibcmp_locale(s, m, ln)))
907 && (norun || regtry(prog, s)) )
913 if ( (*(U8*)s == c1 || *(U8*)s == c2)
914 && (ln == 1 || !(OP(c) == EXACTF
916 : ibcmp_locale(s, m, ln)))
917 && (norun || regtry(prog, s)) )
924 PL_reg_flags |= RF_tainted;
931 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
933 tmp = (I32)utf8_to_uv(r, s - (char*)r, 0, 0);
935 tmp = ((OP(c) == BOUND ?
936 isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
938 if (tmp == !(OP(c) == BOUND ?
939 swash_fetch(PL_utf8_alnum, (U8*)s) :
940 isALNUM_LC_utf8((U8*)s)))
943 if ((norun || regtry(prog, s)))
950 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
951 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
954 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
956 if ((norun || regtry(prog, s)))
962 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
966 PL_reg_flags |= RF_tainted;
973 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
975 tmp = (I32)utf8_to_uv(r, s - (char*)r, 0, 0);
977 tmp = ((OP(c) == NBOUND ?
978 isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
980 if (tmp == !(OP(c) == NBOUND ?
981 swash_fetch(PL_utf8_alnum, (U8*)s) :
982 isALNUM_LC_utf8((U8*)s)))
984 else if ((norun || regtry(prog, s)))
990 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
991 tmp = ((OP(c) == NBOUND ?
992 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
995 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
997 else if ((norun || regtry(prog, s)))
1002 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
1007 while (s < strend) {
1008 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
1009 if (tmp && (norun || regtry(prog, s)))
1020 while (s < strend) {
1022 if (tmp && (norun || regtry(prog, s)))
1034 PL_reg_flags |= RF_tainted;
1036 while (s < strend) {
1037 if (isALNUM_LC_utf8((U8*)s)) {
1038 if (tmp && (norun || regtry(prog, s)))
1049 while (s < strend) {
1050 if (isALNUM_LC(*s)) {
1051 if (tmp && (norun || regtry(prog, s)))
1064 while (s < strend) {
1065 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
1066 if (tmp && (norun || regtry(prog, s)))
1077 while (s < strend) {
1079 if (tmp && (norun || regtry(prog, s)))
1091 PL_reg_flags |= RF_tainted;
1093 while (s < strend) {
1094 if (!isALNUM_LC_utf8((U8*)s)) {
1095 if (tmp && (norun || regtry(prog, s)))
1106 while (s < strend) {
1107 if (!isALNUM_LC(*s)) {
1108 if (tmp && (norun || regtry(prog, s)))
1121 while (s < strend) {
1122 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
1123 if (tmp && (norun || regtry(prog, s)))
1134 while (s < strend) {
1136 if (tmp && (norun || regtry(prog, s)))
1148 PL_reg_flags |= RF_tainted;
1150 while (s < strend) {
1151 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1152 if (tmp && (norun || regtry(prog, s)))
1163 while (s < strend) {
1164 if (isSPACE_LC(*s)) {
1165 if (tmp && (norun || regtry(prog, s)))
1178 while (s < strend) {
1179 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
1180 if (tmp && (norun || regtry(prog, s)))
1191 while (s < strend) {
1193 if (tmp && (norun || regtry(prog, s)))
1205 PL_reg_flags |= RF_tainted;
1207 while (s < strend) {
1208 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1209 if (tmp && (norun || regtry(prog, s)))
1220 while (s < strend) {
1221 if (!isSPACE_LC(*s)) {
1222 if (tmp && (norun || regtry(prog, s)))
1235 while (s < strend) {
1236 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
1237 if (tmp && (norun || regtry(prog, s)))
1248 while (s < strend) {
1250 if (tmp && (norun || regtry(prog, s)))
1262 PL_reg_flags |= RF_tainted;
1264 while (s < strend) {
1265 if (isDIGIT_LC_utf8((U8*)s)) {
1266 if (tmp && (norun || regtry(prog, s)))
1277 while (s < strend) {
1278 if (isDIGIT_LC(*s)) {
1279 if (tmp && (norun || regtry(prog, s)))
1292 while (s < strend) {
1293 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
1294 if (tmp && (norun || regtry(prog, s)))
1305 while (s < strend) {
1307 if (tmp && (norun || regtry(prog, s)))
1319 PL_reg_flags |= RF_tainted;
1321 while (s < strend) {
1322 if (!isDIGIT_LC_utf8((U8*)s)) {
1323 if (tmp && (norun || regtry(prog, s)))
1334 while (s < strend) {
1335 if (!isDIGIT_LC(*s)) {
1336 if (tmp && (norun || regtry(prog, s)))
1348 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1357 - regexec_flags - match a regexp against a string
1360 Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1361 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1362 /* strend: pointer to null at end of string */
1363 /* strbeg: real beginning of string */
1364 /* minend: end of match must be >=minend after stringarg. */
1365 /* data: May be used for some additional optimizations. */
1366 /* nosave: For optimizations. */
1369 register regnode *c;
1370 register char *startpos = stringarg;
1371 I32 minlen; /* must match at least this many chars */
1372 I32 dontbother = 0; /* how many characters not to try at end */
1373 /* I32 start_shift = 0; */ /* Offset of the start to find
1374 constant substr. */ /* CC */
1375 I32 end_shift = 0; /* Same for the end. */ /* CC */
1376 I32 scream_pos = -1; /* Internal iterator of scream. */
1378 SV* oreplsv = GvSV(PL_replgv);
1379 bool do_utf8 = DO_UTF8(sv);
1385 PL_regnarrate = PL_debug & 512;
1388 /* Be paranoid... */
1389 if (prog == NULL || startpos == NULL) {
1390 Perl_croak(aTHX_ "NULL regexp parameter");
1394 minlen = prog->minlen;
1396 if (utf8_distance((U8*)strend, (U8*)startpos) < minlen) goto phooey;
1399 if (strend - startpos < minlen) goto phooey;
1402 if (startpos == strbeg) /* is ^ valid at stringarg? */
1405 if (prog->reganch & ROPT_UTF8 && do_utf8) {
1406 U8 *s = reghop3((U8*)stringarg, -1, (U8*)strbeg);
1407 PL_regprev = utf8_to_uv(s, (U8*)stringarg - s, NULL, 0);
1410 PL_regprev = (U32)stringarg[-1];
1411 if (!PL_multiline && PL_regprev == '\n')
1412 PL_regprev = '\0'; /* force ^ to NOT match */
1415 /* Check validity of program. */
1416 if (UCHARAT(prog->program) != REG_MAGIC) {
1417 Perl_croak(aTHX_ "corrupted regexp program");
1421 PL_reg_eval_set = 0;
1424 if (prog->reganch & ROPT_UTF8)
1425 PL_reg_flags |= RF_utf8;
1427 /* Mark beginning of line for ^ and lookbehind. */
1428 PL_regbol = startpos;
1432 /* Mark end of line for $ (and such) */
1435 /* see how far we have to get to not match where we matched before */
1436 PL_regtill = startpos+minend;
1438 /* We start without call_cc context. */
1441 /* If there is a "must appear" string, look for it. */
1444 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1447 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1448 PL_reg_ganch = startpos;
1449 else if (sv && SvTYPE(sv) >= SVt_PVMG
1451 && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) {
1452 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1453 if (prog->reganch & ROPT_ANCH_GPOS) {
1454 if (s > PL_reg_ganch)
1459 else /* pos() not defined */
1460 PL_reg_ganch = strbeg;
1463 if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
1464 re_scream_pos_data d;
1466 d.scream_olds = &scream_olds;
1467 d.scream_pos = &scream_pos;
1468 s = re_intuit_start(prog, sv, s, strend, flags, &d);
1470 goto phooey; /* not present */
1473 DEBUG_r( if (!PL_colorset) reginitcolors() );
1474 DEBUG_r(PerlIO_printf(Perl_debug_log,
1475 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1476 PL_colors[4],PL_colors[5],PL_colors[0],
1479 (strlen(prog->precomp) > 60 ? "..." : ""),
1481 (int)(strend - startpos > 60 ? 60 : strend - startpos),
1482 startpos, PL_colors[1],
1483 (strend - startpos > 60 ? "..." : ""))
1486 /* Simplest case: anchored match need be tried only once. */
1487 /* [unless only anchor is BOL and multiline is set] */
1488 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1489 if (s == startpos && regtry(prog, startpos))
1491 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1492 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1497 dontbother = minlen - 1;
1498 end = HOP3c(strend, -dontbother, strbeg) - 1;
1499 /* for multiline we only have to try after newlines */
1500 if (prog->check_substr) {
1504 if (regtry(prog, s))
1509 if (prog->reganch & RE_USE_INTUIT) {
1510 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1521 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1522 if (regtry(prog, s))
1529 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1530 if (regtry(prog, PL_reg_ganch))
1535 /* Messy cases: unanchored match. */
1536 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
1537 /* we have /x+whatever/ */
1538 /* it must be a one character string (XXXX Except UTF?) */
1539 char ch = SvPVX(prog->anchored_substr)[0];
1545 while (s < strend) {
1547 DEBUG_r( did_match = 1 );
1548 if (regtry(prog, s)) goto got_it;
1550 while (s < strend && *s == ch)
1557 while (s < strend) {
1559 DEBUG_r( did_match = 1 );
1560 if (regtry(prog, s)) goto got_it;
1562 while (s < strend && *s == ch)
1568 DEBUG_r(did_match ||
1569 PerlIO_printf(Perl_debug_log,
1570 "Did not find anchored character...\n"));
1573 else if (do_utf8 == (UTF!=0) &&
1574 (prog->anchored_substr != Nullsv
1575 || (prog->float_substr != Nullsv
1576 && prog->float_max_offset < strend - s))) {
1577 SV *must = prog->anchored_substr
1578 ? prog->anchored_substr : prog->float_substr;
1580 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
1582 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
1583 char *last = HOP3c(strend, /* Cannot start after this */
1584 -(I32)(CHR_SVLEN(must)
1585 - (SvTAIL(must) != 0) + back_min), strbeg);
1586 char *last1; /* Last position checked before */
1592 last1 = HOPc(s, -1);
1594 last1 = s - 1; /* bogus */
1596 /* XXXX check_substr already used to find `s', can optimize if
1597 check_substr==must. */
1599 dontbother = end_shift;
1600 strend = HOPc(strend, -dontbother);
1601 while ( (s <= last) &&
1602 ((flags & REXEC_SCREAM)
1603 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
1604 end_shift, &scream_pos, 0))
1605 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
1606 (unsigned char*)strend, must,
1607 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
1608 DEBUG_r( did_match = 1 );
1609 if (HOPc(s, -back_max) > last1) {
1610 last1 = HOPc(s, -back_min);
1611 s = HOPc(s, -back_max);
1614 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1616 last1 = HOPc(s, -back_min);
1620 while (s <= last1) {
1621 if (regtry(prog, s))
1627 while (s <= last1) {
1628 if (regtry(prog, s))
1634 DEBUG_r(did_match ||
1635 PerlIO_printf(Perl_debug_log, "Did not find %s substr `%s%.*s%s'%s...\n",
1636 ((must == prog->anchored_substr)
1637 ? "anchored" : "floating"),
1639 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1641 PL_colors[1], (SvTAIL(must) ? "$" : "")));
1644 else if ((c = prog->regstclass)) {
1645 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1646 /* don't bother with what can't match */
1647 strend = HOPc(strend, -(minlen - 1));
1649 SV *prop = sv_newmortal();
1651 PerlIO_printf(Perl_debug_log, "Matching stclass `%s' against `%s'\n", SvPVX(prop), s);
1653 if (find_byclass(prog, c, s, strend, startpos, 0))
1655 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
1659 if (prog->float_substr != Nullsv) { /* Trim the end. */
1662 if (flags & REXEC_SCREAM) {
1663 last = screaminstr(sv, prog->float_substr, s - strbeg,
1664 end_shift, &scream_pos, 1); /* last one */
1666 last = scream_olds; /* Only one occurrence. */
1670 char *little = SvPV(prog->float_substr, len);
1672 if (SvTAIL(prog->float_substr)) {
1673 if (memEQ(strend - len + 1, little, len - 1))
1674 last = strend - len + 1;
1675 else if (!PL_multiline)
1676 last = memEQ(strend - len, little, len)
1677 ? strend - len : Nullch;
1683 last = rninstr(s, strend, little, little + len);
1685 last = strend; /* matching `$' */
1689 DEBUG_r(PerlIO_printf(Perl_debug_log,
1690 "%sCan't trim the tail, match fails (should not happen)%s\n",
1691 PL_colors[4],PL_colors[5]));
1692 goto phooey; /* Should not happen! */
1694 dontbother = strend - last + prog->float_min_offset;
1696 if (minlen && (dontbother < minlen))
1697 dontbother = minlen - 1;
1698 strend -= dontbother; /* this one's always in bytes! */
1699 /* We don't know much -- general case. */
1702 if (regtry(prog, s))
1711 if (regtry(prog, s))
1713 } while (s++ < strend);
1721 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1723 if (PL_reg_eval_set) {
1724 /* Preserve the current value of $^R */
1725 if (oreplsv != GvSV(PL_replgv))
1726 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1727 restored, the value remains
1729 restore_pos(aTHXo_ 0);
1732 /* make sure $`, $&, $', and $digit will work later */
1733 if ( !(flags & REXEC_NOT_FIRST) ) {
1734 if (RX_MATCH_COPIED(prog)) {
1735 Safefree(prog->subbeg);
1736 RX_MATCH_COPIED_off(prog);
1738 if (flags & REXEC_COPY_STR) {
1739 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1741 s = savepvn(strbeg, i);
1744 RX_MATCH_COPIED_on(prog);
1747 prog->subbeg = strbeg;
1748 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1755 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1756 PL_colors[4],PL_colors[5]));
1757 if (PL_reg_eval_set)
1758 restore_pos(aTHXo_ 0);
1763 - regtry - try match at specific point
1765 STATIC I32 /* 0 failure, 1 success */
1766 S_regtry(pTHX_ regexp *prog, char *startpos)
1774 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
1776 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1779 PL_reg_eval_set = RS_init;
1781 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1782 (IV)(PL_stack_sp - PL_stack_base));
1784 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
1785 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1786 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1788 /* Apparently this is not needed, judging by wantarray. */
1789 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
1790 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1793 /* Make $_ available to executed code. */
1794 if (PL_reg_sv != DEFSV) {
1795 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1800 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
1801 && (mg = mg_find(PL_reg_sv, 'g')))) {
1802 /* prepare for quick setting of pos */
1803 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
1804 mg = mg_find(PL_reg_sv, 'g');
1808 PL_reg_oldpos = mg->mg_len;
1809 SAVEDESTRUCTOR_X(restore_pos, 0);
1812 Newz(22,PL_reg_curpm, 1, PMOP);
1813 PL_reg_curpm->op_pmregexp = prog;
1814 PL_reg_oldcurpm = PL_curpm;
1815 PL_curpm = PL_reg_curpm;
1816 if (RX_MATCH_COPIED(prog)) {
1817 /* Here is a serious problem: we cannot rewrite subbeg,
1818 since it may be needed if this match fails. Thus
1819 $` inside (?{}) could fail... */
1820 PL_reg_oldsaved = prog->subbeg;
1821 PL_reg_oldsavedlen = prog->sublen;
1822 RX_MATCH_COPIED_off(prog);
1825 PL_reg_oldsaved = Nullch;
1826 prog->subbeg = PL_bostr;
1827 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1829 prog->startp[0] = startpos - PL_bostr;
1830 PL_reginput = startpos;
1831 PL_regstartp = prog->startp;
1832 PL_regendp = prog->endp;
1833 PL_reglastparen = &prog->lastparen;
1834 prog->lastparen = 0;
1836 DEBUG_r(PL_reg_starttry = startpos);
1837 if (PL_reg_start_tmpl <= prog->nparens) {
1838 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1839 if(PL_reg_start_tmp)
1840 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1842 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1845 /* XXXX What this code is doing here?!!! There should be no need
1846 to do this again and again, PL_reglastparen should take care of
1849 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
1850 * Actually, the code in regcppop() (which Ilya may be meaning by
1851 * PL_reglastparen), is not needed at all by the test suite
1852 * (op/regexp, op/pat, op/split), but that code is needed, oddly
1853 * enough, for building DynaLoader, or otherwise this
1854 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
1855 * will happen. Meanwhile, this code *is* needed for the
1856 * above-mentioned test suite tests to succeed. The common theme
1857 * on those tests seems to be returning null fields from matches.
1862 if (prog->nparens) {
1863 for (i = prog->nparens; i > *PL_reglastparen; i--) {
1870 if (regmatch(prog->program + 1)) {
1871 prog->endp[0] = PL_reginput - PL_bostr;
1874 REGCP_UNWIND(lastcp);
1878 #define RE_UNWIND_BRANCH 1
1879 #define RE_UNWIND_BRANCHJ 2
1883 typedef struct { /* XX: makes sense to enlarge it... */
1887 } re_unwind_generic_t;
1900 } re_unwind_branch_t;
1902 typedef union re_unwind_t {
1904 re_unwind_generic_t generic;
1905 re_unwind_branch_t branch;
1909 - regmatch - main matching routine
1911 * Conceptually the strategy is simple: check to see whether the current
1912 * node matches, call self recursively to see whether the rest matches,
1913 * and then act accordingly. In practice we make some effort to avoid
1914 * recursion, in particular by going through "ordinary" nodes (that don't
1915 * need to know whether the rest of the match failed) by a loop instead of
1918 /* [lwall] I've hoisted the register declarations to the outer block in order to
1919 * maybe save a little bit of pushing and popping on the stack. It also takes
1920 * advantage of machines that use a register save mask on subroutine entry.
1922 STATIC I32 /* 0 failure, 1 success */
1923 S_regmatch(pTHX_ regnode *prog)
1925 register regnode *scan; /* Current node. */
1926 regnode *next; /* Next node. */
1927 regnode *inner; /* Next node in internal branch. */
1928 register I32 nextchr; /* renamed nextchr - nextchar colides with
1929 function of same name */
1930 register I32 n; /* no or next */
1931 register I32 ln; /* len or last */
1932 register char *s; /* operand or save */
1933 register char *locinput = PL_reginput;
1934 register I32 c1, c2, paren; /* case fold search, parenth */
1935 int minmod = 0, sw = 0, logical = 0;
1937 I32 firstcp = PL_savestack_ix;
1938 register bool do_utf8 = DO_UTF8(PL_reg_sv);
1944 /* Note that nextchr is a byte even in UTF */
1945 nextchr = UCHARAT(locinput);
1947 while (scan != NULL) {
1948 #define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
1950 # define sayYES goto yes
1951 # define sayNO goto no
1952 # define sayYES_FINAL goto yes_final
1953 # define sayYES_LOUD goto yes_loud
1954 # define sayNO_FINAL goto no_final
1955 # define sayNO_SILENT goto do_no
1956 # define saySAME(x) if (x) goto yes; else goto no
1957 # define REPORT_CODE_OFF 24
1959 # define sayYES return 1
1960 # define sayNO return 0
1961 # define sayYES_FINAL return 1
1962 # define sayYES_LOUD return 1
1963 # define sayNO_FINAL return 0
1964 # define sayNO_SILENT return 0
1965 # define saySAME(x) return x
1968 SV *prop = sv_newmortal();
1969 int docolor = *PL_colors[0];
1970 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1971 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
1972 /* The part of the string before starttry has one color
1973 (pref0_len chars), between starttry and current
1974 position another one (pref_len - pref0_len chars),
1975 after the current position the third one.
1976 We assume that pref0_len <= pref_len, otherwise we
1977 decrease pref0_len. */
1978 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
1979 ? (5 + taill) - l : locinput - PL_bostr;
1982 while (UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
1984 pref0_len = pref_len - (locinput - PL_reg_starttry);
1985 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
1986 l = ( PL_regeol - locinput > (5 + taill) - pref_len
1987 ? (5 + taill) - pref_len : PL_regeol - locinput);
1988 while (UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
1992 if (pref0_len > pref_len)
1993 pref0_len = pref_len;
1994 regprop(prop, scan);
1995 PerlIO_printf(Perl_debug_log,
1996 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
1997 (IV)(locinput - PL_bostr),
1998 PL_colors[4], pref0_len,
1999 locinput - pref_len, PL_colors[5],
2000 PL_colors[2], pref_len - pref0_len,
2001 locinput - pref_len + pref0_len, PL_colors[3],
2002 (docolor ? "" : "> <"),
2003 PL_colors[0], l, locinput, PL_colors[1],
2004 15 - l - pref_len + 1,
2006 (IV)(scan - PL_regprogram), PL_regindent*2, "",
2010 next = scan + NEXT_OFF(scan);
2016 if (locinput == PL_bostr
2017 ? PL_regprev == '\n'
2019 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
2021 /* regtill = regbol; */
2026 if (locinput == PL_bostr
2027 ? PL_regprev == '\n'
2028 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
2034 if (locinput == PL_bostr)
2038 if (locinput == PL_reg_ganch)
2048 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2053 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2055 if (PL_regeol - locinput > 1)
2059 if (PL_regeol != locinput)
2064 locinput += PL_utf8skip[nextchr];
2065 if (locinput > PL_regeol)
2067 nextchr = UCHARAT(locinput);
2070 if (!nextchr && locinput >= PL_regeol)
2072 nextchr = UCHARAT(++locinput);
2075 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2078 locinput += PL_utf8skip[nextchr];
2079 if (locinput > PL_regeol)
2081 nextchr = UCHARAT(locinput);
2084 nextchr = UCHARAT(++locinput);
2089 if (do_utf8 != (UTF!=0)) {
2097 if (*((U8*)s) != utf8_to_uv_simple((U8*)l, &len))
2106 if (*((U8*)l) != utf8_to_uv_simple((U8*)s, &len))
2112 nextchr = UCHARAT(locinput);
2115 /* Inline the first character, for speed. */
2116 if (UCHARAT(s) != nextchr)
2118 if (PL_regeol - locinput < ln)
2120 if (ln > 1 && memNE(s, locinput, ln))
2123 nextchr = UCHARAT(locinput);
2126 PL_reg_flags |= RF_tainted;
2136 c1 = OP(scan) == EXACTF;
2138 if (l >= PL_regeol) {
2141 if ((UTF ? utf8_to_uv((U8*)s, e - s, 0, 0) : *((U8*)s)) !=
2142 (c1 ? toLOWER_utf8((U8*)l) : toLOWER_LC_utf8((U8*)l)))
2144 s += UTF ? UTF8SKIP(s) : 1;
2148 nextchr = UCHARAT(locinput);
2152 /* Inline the first character, for speed. */
2153 if (UCHARAT(s) != nextchr &&
2154 UCHARAT(s) != ((OP(scan) == EXACTF)
2155 ? PL_fold : PL_fold_locale)[nextchr])
2157 if (PL_regeol - locinput < ln)
2159 if (ln > 1 && (OP(scan) == EXACTF
2160 ? ibcmp(s, locinput, ln)
2161 : ibcmp_locale(s, locinput, ln)))
2164 nextchr = UCHARAT(locinput);
2168 if (!reginclass(scan, (U8*)locinput, do_utf8))
2170 if (locinput >= PL_regeol)
2172 locinput += PL_utf8skip[nextchr];
2173 nextchr = UCHARAT(locinput);
2177 nextchr = UCHARAT(locinput);
2178 if (!reginclass(scan, (U8*)locinput, do_utf8))
2180 if (!nextchr && locinput >= PL_regeol)
2182 nextchr = UCHARAT(++locinput);
2186 PL_reg_flags |= RF_tainted;
2192 if (!(OP(scan) == ALNUM
2193 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2194 : isALNUM_LC_utf8((U8*)locinput)))
2198 locinput += PL_utf8skip[nextchr];
2199 nextchr = UCHARAT(locinput);
2202 if (!(OP(scan) == ALNUM
2203 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
2205 nextchr = UCHARAT(++locinput);
2208 PL_reg_flags |= RF_tainted;
2211 if (!nextchr && locinput >= PL_regeol)
2214 if (OP(scan) == NALNUM
2215 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2216 : isALNUM_LC_utf8((U8*)locinput))
2220 locinput += PL_utf8skip[nextchr];
2221 nextchr = UCHARAT(locinput);
2224 if (OP(scan) == NALNUM
2225 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
2227 nextchr = UCHARAT(++locinput);
2231 PL_reg_flags |= RF_tainted;
2235 /* was last char in word? */
2237 if (locinput == PL_regbol)
2240 U8 *r = reghop((U8*)locinput, -1);
2242 ln = utf8_to_uv(r, s - (char*)r, 0, 0);
2244 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2245 ln = isALNUM_uni(ln);
2246 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
2249 ln = isALNUM_LC_uni(ln);
2250 n = isALNUM_LC_utf8((U8*)locinput);
2254 ln = (locinput != PL_regbol) ?
2255 UCHARAT(locinput - 1) : PL_regprev;
2256 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2258 n = isALNUM(nextchr);
2261 ln = isALNUM_LC(ln);
2262 n = isALNUM_LC(nextchr);
2265 if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2266 OP(scan) == BOUNDL))
2270 PL_reg_flags |= RF_tainted;
2276 if (UTF8_IS_CONTINUED(nextchr)) {
2277 if (!(OP(scan) == SPACE
2278 ? swash_fetch(PL_utf8_space, (U8*)locinput)
2279 : isSPACE_LC_utf8((U8*)locinput)))
2283 locinput += PL_utf8skip[nextchr];
2284 nextchr = UCHARAT(locinput);
2287 if (!(OP(scan) == SPACE
2288 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2290 nextchr = UCHARAT(++locinput);
2293 if (!(OP(scan) == SPACE
2294 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2296 nextchr = UCHARAT(++locinput);
2300 PL_reg_flags |= RF_tainted;
2303 if (!nextchr && locinput >= PL_regeol)
2306 if (OP(scan) == NSPACE
2307 ? swash_fetch(PL_utf8_space, (U8*)locinput)
2308 : isSPACE_LC_utf8((U8*)locinput))
2312 locinput += PL_utf8skip[nextchr];
2313 nextchr = UCHARAT(locinput);
2316 if (OP(scan) == NSPACE
2317 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
2319 nextchr = UCHARAT(++locinput);
2322 PL_reg_flags |= RF_tainted;
2328 if (!(OP(scan) == DIGIT
2329 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2330 : isDIGIT_LC_utf8((U8*)locinput)))
2334 locinput += PL_utf8skip[nextchr];
2335 nextchr = UCHARAT(locinput);
2338 if (!(OP(scan) == DIGIT
2339 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
2341 nextchr = UCHARAT(++locinput);
2344 PL_reg_flags |= RF_tainted;
2347 if (!nextchr && locinput >= PL_regeol)
2350 if (OP(scan) == NDIGIT
2351 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2352 : isDIGIT_LC_utf8((U8*)locinput))
2356 locinput += PL_utf8skip[nextchr];
2357 nextchr = UCHARAT(locinput);
2360 if (OP(scan) == NDIGIT
2361 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
2363 nextchr = UCHARAT(++locinput);
2366 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
2368 locinput += PL_utf8skip[nextchr];
2369 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
2370 locinput += UTF8SKIP(locinput);
2371 if (locinput > PL_regeol)
2373 nextchr = UCHARAT(locinput);
2376 PL_reg_flags |= RF_tainted;
2380 n = ARG(scan); /* which paren pair */
2381 ln = PL_regstartp[n];
2382 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
2383 if (*PL_reglastparen < n || ln == -1)
2384 sayNO; /* Do not match unless seen CLOSEn. */
2385 if (ln == PL_regendp[n])
2389 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
2391 char *e = PL_bostr + PL_regendp[n];
2393 * Note that we can't do the "other character" lookup trick as
2394 * in the 8-bit case (no pun intended) because in Unicode we
2395 * have to map both upper and title case to lower case.
2397 if (OP(scan) == REFF) {
2401 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
2411 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
2418 nextchr = UCHARAT(locinput);
2422 /* Inline the first character, for speed. */
2423 if (UCHARAT(s) != nextchr &&
2425 (UCHARAT(s) != ((OP(scan) == REFF
2426 ? PL_fold : PL_fold_locale)[nextchr]))))
2428 ln = PL_regendp[n] - ln;
2429 if (locinput + ln > PL_regeol)
2431 if (ln > 1 && (OP(scan) == REF
2432 ? memNE(s, locinput, ln)
2434 ? ibcmp(s, locinput, ln)
2435 : ibcmp_locale(s, locinput, ln))))
2438 nextchr = UCHARAT(locinput);
2449 OP_4tree *oop = PL_op;
2450 COP *ocurcop = PL_curcop;
2451 SV **ocurpad = PL_curpad;
2455 PL_op = (OP_4tree*)PL_regdata->data[n];
2456 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
2457 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
2458 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
2460 CALLRUNOPS(aTHX); /* Scalar context. */
2466 PL_curpad = ocurpad;
2467 PL_curcop = ocurcop;
2469 if (logical == 2) { /* Postponed subexpression. */
2471 MAGIC *mg = Null(MAGIC*);
2473 CHECKPOINT cp, lastcp;
2475 if(SvROK(ret) || SvRMAGICAL(ret)) {
2476 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2479 mg = mg_find(sv, 'r');
2482 re = (regexp *)mg->mg_obj;
2483 (void)ReREFCNT_inc(re);
2487 char *t = SvPV(ret, len);
2489 char *oprecomp = PL_regprecomp;
2490 I32 osize = PL_regsize;
2491 I32 onpar = PL_regnpar;
2494 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
2496 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
2497 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
2498 PL_regprecomp = oprecomp;
2503 PerlIO_printf(Perl_debug_log,
2504 "Entering embedded `%s%.60s%s%s'\n",
2508 (strlen(re->precomp) > 60 ? "..." : ""))
2511 state.prev = PL_reg_call_cc;
2512 state.cc = PL_regcc;
2513 state.re = PL_reg_re;
2517 cp = regcppush(0); /* Save *all* the positions. */
2520 state.ss = PL_savestack_ix;
2521 *PL_reglastparen = 0;
2522 PL_reg_call_cc = &state;
2523 PL_reginput = locinput;
2525 /* XXXX This is too dramatic a measure... */
2528 if (regmatch(re->program + 1)) {
2529 /* Even though we succeeded, we need to restore
2530 global variables, since we may be wrapped inside
2531 SUSPEND, thus the match may be not finished yet. */
2533 /* XXXX Do this only if SUSPENDed? */
2534 PL_reg_call_cc = state.prev;
2535 PL_regcc = state.cc;
2536 PL_reg_re = state.re;
2537 cache_re(PL_reg_re);
2539 /* XXXX This is too dramatic a measure... */
2542 /* These are needed even if not SUSPEND. */
2548 REGCP_UNWIND(lastcp);
2550 PL_reg_call_cc = state.prev;
2551 PL_regcc = state.cc;
2552 PL_reg_re = state.re;
2553 cache_re(PL_reg_re);
2555 /* XXXX This is too dramatic a measure... */
2564 sv_setsv(save_scalar(PL_replgv), ret);
2568 n = ARG(scan); /* which paren pair */
2569 PL_reg_start_tmp[n] = locinput;
2574 n = ARG(scan); /* which paren pair */
2575 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2576 PL_regendp[n] = locinput - PL_bostr;
2577 if (n > *PL_reglastparen)
2578 *PL_reglastparen = n;
2581 n = ARG(scan); /* which paren pair */
2582 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
2585 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
2587 next = NEXTOPER(NEXTOPER(scan));
2589 next = scan + ARG(scan);
2590 if (OP(next) == IFTHEN) /* Fake one. */
2591 next = NEXTOPER(NEXTOPER(next));
2595 logical = scan->flags;
2597 /*******************************************************************
2598 PL_regcc contains infoblock about the innermost (...)* loop, and
2599 a pointer to the next outer infoblock.
2601 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2603 1) After matching X, regnode for CURLYX is processed;
2605 2) This regnode creates infoblock on the stack, and calls
2606 regmatch() recursively with the starting point at WHILEM node;
2608 3) Each hit of WHILEM node tries to match A and Z (in the order
2609 depending on the current iteration, min/max of {min,max} and
2610 greediness). The information about where are nodes for "A"
2611 and "Z" is read from the infoblock, as is info on how many times "A"
2612 was already matched, and greediness.
2614 4) After A matches, the same WHILEM node is hit again.
2616 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2617 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2618 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2619 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2620 of the external loop.
2622 Currently present infoblocks form a tree with a stem formed by PL_curcc
2623 and whatever it mentions via ->next, and additional attached trees
2624 corresponding to temporarily unset infoblocks as in "5" above.
2626 In the following picture infoblocks for outer loop of
2627 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2628 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2629 infoblocks are drawn below the "reset" infoblock.
2631 In fact in the picture below we do not show failed matches for Z and T
2632 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2633 more obvious *why* one needs to *temporary* unset infoblocks.]
2635 Matched REx position InfoBlocks Comment
2639 Y A)*?Z)*?T x <- O <- I
2640 YA )*?Z)*?T x <- O <- I
2641 YA A)*?Z)*?T x <- O <- I
2642 YAA )*?Z)*?T x <- O <- I
2643 YAA Z)*?T x <- O # Temporary unset I
2646 YAAZ Y(A)*?Z)*?T x <- O
2649 YAAZY (A)*?Z)*?T x <- O
2652 YAAZY A)*?Z)*?T x <- O <- I
2655 YAAZYA )*?Z)*?T x <- O <- I
2658 YAAZYA Z)*?T x <- O # Temporary unset I
2664 YAAZYAZ T x # Temporary unset O
2671 *******************************************************************/
2674 CHECKPOINT cp = PL_savestack_ix;
2675 /* No need to save/restore up to this paren */
2676 I32 parenfloor = scan->flags;
2678 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2680 cc.oldcc = PL_regcc;
2682 /* XXXX Probably it is better to teach regpush to support
2683 parenfloor > PL_regsize... */
2684 if (parenfloor > *PL_reglastparen)
2685 parenfloor = *PL_reglastparen; /* Pessimization... */
2686 cc.parenfloor = parenfloor;
2688 cc.min = ARG1(scan);
2689 cc.max = ARG2(scan);
2690 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
2694 PL_reginput = locinput;
2695 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2697 PL_regcc = cc.oldcc;
2703 * This is really hard to understand, because after we match
2704 * what we're trying to match, we must make sure the rest of
2705 * the REx is going to match for sure, and to do that we have
2706 * to go back UP the parse tree by recursing ever deeper. And
2707 * if it fails, we have to reset our parent's current state
2708 * that we can try again after backing off.
2711 CHECKPOINT cp, lastcp;
2712 CURCUR* cc = PL_regcc;
2713 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2715 n = cc->cur + 1; /* how many we know we matched */
2716 PL_reginput = locinput;
2719 PerlIO_printf(Perl_debug_log,
2720 "%*s %ld out of %ld..%ld cc=%lx\n",
2721 REPORT_CODE_OFF+PL_regindent*2, "",
2722 (long)n, (long)cc->min,
2723 (long)cc->max, (long)cc)
2726 /* If degenerate scan matches "", assume scan done. */
2728 if (locinput == cc->lastloc && n >= cc->min) {
2729 PL_regcc = cc->oldcc;
2733 PerlIO_printf(Perl_debug_log,
2734 "%*s empty match detected, try continuation...\n",
2735 REPORT_CODE_OFF+PL_regindent*2, "")
2737 if (regmatch(cc->next))
2745 /* First just match a string of min scans. */
2749 cc->lastloc = locinput;
2750 if (regmatch(cc->scan))
2753 cc->lastloc = lastloc;
2758 /* Check whether we already were at this position.
2759 Postpone detection until we know the match is not
2760 *that* much linear. */
2761 if (!PL_reg_maxiter) {
2762 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2763 PL_reg_leftiter = PL_reg_maxiter;
2765 if (PL_reg_leftiter-- == 0) {
2766 I32 size = (PL_reg_maxiter + 7)/8;
2767 if (PL_reg_poscache) {
2768 if (PL_reg_poscache_size < size) {
2769 Renew(PL_reg_poscache, size, char);
2770 PL_reg_poscache_size = size;
2772 Zero(PL_reg_poscache, size, char);
2775 PL_reg_poscache_size = size;
2776 Newz(29, PL_reg_poscache, size, char);
2779 PerlIO_printf(Perl_debug_log,
2780 "%sDetected a super-linear match, switching on caching%s...\n",
2781 PL_colors[4], PL_colors[5])
2784 if (PL_reg_leftiter < 0) {
2785 I32 o = locinput - PL_bostr, b;
2787 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2790 if (PL_reg_poscache[o] & (1<<b)) {
2792 PerlIO_printf(Perl_debug_log,
2793 "%*s already tried at this position...\n",
2794 REPORT_CODE_OFF+PL_regindent*2, "")
2798 PL_reg_poscache[o] |= (1<<b);
2802 /* Prefer next over scan for minimal matching. */
2805 PL_regcc = cc->oldcc;
2808 cp = regcppush(cc->parenfloor);
2810 if (regmatch(cc->next)) {
2812 sayYES; /* All done. */
2814 REGCP_UNWIND(lastcp);
2820 if (n >= cc->max) { /* Maximum greed exceeded? */
2821 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
2822 && !(PL_reg_flags & RF_warned)) {
2823 PL_reg_flags |= RF_warned;
2824 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2825 "Complex regular subexpression recursion",
2832 PerlIO_printf(Perl_debug_log,
2833 "%*s trying longer...\n",
2834 REPORT_CODE_OFF+PL_regindent*2, "")
2836 /* Try scanning more and see if it helps. */
2837 PL_reginput = locinput;
2839 cc->lastloc = locinput;
2840 cp = regcppush(cc->parenfloor);
2842 if (regmatch(cc->scan)) {
2846 REGCP_UNWIND(lastcp);
2849 cc->lastloc = lastloc;
2853 /* Prefer scan over next for maximal matching. */
2855 if (n < cc->max) { /* More greed allowed? */
2856 cp = regcppush(cc->parenfloor);
2858 cc->lastloc = locinput;
2860 if (regmatch(cc->scan)) {
2864 REGCP_UNWIND(lastcp);
2865 regcppop(); /* Restore some previous $<digit>s? */
2866 PL_reginput = locinput;
2868 PerlIO_printf(Perl_debug_log,
2869 "%*s failed, try continuation...\n",
2870 REPORT_CODE_OFF+PL_regindent*2, "")
2873 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
2874 && !(PL_reg_flags & RF_warned)) {
2875 PL_reg_flags |= RF_warned;
2876 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2877 "Complex regular subexpression recursion",
2881 /* Failed deeper matches of scan, so see if this one works. */
2882 PL_regcc = cc->oldcc;
2885 if (regmatch(cc->next))
2891 cc->lastloc = lastloc;
2896 next = scan + ARG(scan);
2899 inner = NEXTOPER(NEXTOPER(scan));
2902 inner = NEXTOPER(scan);
2907 if (OP(next) != c1) /* No choice. */
2908 next = inner; /* Avoid recursion. */
2910 I32 lastparen = *PL_reglastparen;
2912 re_unwind_branch_t *uw;
2914 /* Put unwinding data on stack */
2915 unwind1 = SSNEWt(1,re_unwind_branch_t);
2916 uw = SSPTRt(unwind1,re_unwind_branch_t);
2919 uw->type = ((c1 == BRANCH)
2921 : RE_UNWIND_BRANCHJ);
2922 uw->lastparen = lastparen;
2924 uw->locinput = locinput;
2925 uw->nextchr = nextchr;
2927 uw->regindent = ++PL_regindent;
2930 REGCP_SET(uw->lastcp);
2932 /* Now go into the first branch */
2945 /* We suppose that the next guy does not need
2946 backtracking: in particular, it is of constant length,
2947 and has no parenths to influence future backrefs. */
2948 ln = ARG1(scan); /* min to match */
2949 n = ARG2(scan); /* max to match */
2950 paren = scan->flags;
2952 if (paren > PL_regsize)
2954 if (paren > *PL_reglastparen)
2955 *PL_reglastparen = paren;
2957 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
2959 scan += NEXT_OFF(scan); /* Skip former OPEN. */
2960 PL_reginput = locinput;
2963 if (ln && regrepeat_hard(scan, ln, &l) < ln)
2965 if (ln && l == 0 && n >= ln
2966 /* In fact, this is tricky. If paren, then the
2967 fact that we did/didnot match may influence
2968 future execution. */
2969 && !(paren && ln == 0))
2971 locinput = PL_reginput;
2972 if (PL_regkind[(U8)OP(next)] == EXACT) {
2973 c1 = (U8)*STRING(next);
2974 if (OP(next) == EXACTF)
2976 else if (OP(next) == EXACTFL)
2977 c2 = PL_fold_locale[c1];
2984 /* This may be improved if l == 0. */
2985 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
2986 /* If it could work, try it. */
2988 UCHARAT(PL_reginput) == c1 ||
2989 UCHARAT(PL_reginput) == c2)
2993 PL_regstartp[paren] =
2994 HOPc(PL_reginput, -l) - PL_bostr;
2995 PL_regendp[paren] = PL_reginput - PL_bostr;
2998 PL_regendp[paren] = -1;
3002 REGCP_UNWIND(lastcp);
3004 /* Couldn't or didn't -- move forward. */
3005 PL_reginput = locinput;
3006 if (regrepeat_hard(scan, 1, &l)) {
3008 locinput = PL_reginput;
3015 n = regrepeat_hard(scan, n, &l);
3016 if (n != 0 && l == 0
3017 /* In fact, this is tricky. If paren, then the
3018 fact that we did/didnot match may influence
3019 future execution. */
3020 && !(paren && ln == 0))
3022 locinput = PL_reginput;
3024 PerlIO_printf(Perl_debug_log,
3025 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
3026 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
3030 if (PL_regkind[(U8)OP(next)] == EXACT) {
3031 c1 = (U8)*STRING(next);
3032 if (OP(next) == EXACTF)
3034 else if (OP(next) == EXACTFL)
3035 c2 = PL_fold_locale[c1];
3044 /* If it could work, try it. */
3046 UCHARAT(PL_reginput) == c1 ||
3047 UCHARAT(PL_reginput) == c2)
3050 PerlIO_printf(Perl_debug_log,
3051 "%*s trying tail with n=%"IVdf"...\n",
3052 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
3056 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3057 PL_regendp[paren] = PL_reginput - PL_bostr;
3060 PL_regendp[paren] = -1;
3064 REGCP_UNWIND(lastcp);
3066 /* Couldn't or didn't -- back up. */
3068 locinput = HOPc(locinput, -l);
3069 PL_reginput = locinput;
3076 paren = scan->flags; /* Which paren to set */
3077 if (paren > PL_regsize)
3079 if (paren > *PL_reglastparen)
3080 *PL_reglastparen = paren;
3081 ln = ARG1(scan); /* min to match */
3082 n = ARG2(scan); /* max to match */
3083 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
3087 ln = ARG1(scan); /* min to match */
3088 n = ARG2(scan); /* max to match */
3089 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
3094 scan = NEXTOPER(scan);
3100 scan = NEXTOPER(scan);
3104 * Lookahead to avoid useless match attempts
3105 * when we know what character comes next.
3107 if (PL_regkind[(U8)OP(next)] == EXACT) {
3108 U8 *s = (U8*)STRING(next);
3111 if (OP(next) == EXACTF)
3113 else if (OP(next) == EXACTFL)
3114 c2 = PL_fold_locale[c1];
3117 if (OP(next) == EXACTF) {
3118 c1 = to_utf8_lower(s);
3119 c2 = to_utf8_upper(s);
3122 c2 = c1 = utf8_to_uv_simple(s, NULL);
3128 PL_reginput = locinput;
3132 if (ln && regrepeat(scan, ln) < ln)
3134 locinput = PL_reginput;
3137 char *e; /* Should not check after this */
3138 char *old = locinput;
3140 if (n == REG_INFTY) {
3143 while (UTF8_IS_CONTINUATION(*(U8*)e))
3149 m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3153 e = locinput + n - ln;
3159 /* Find place 'next' could work */
3162 while (locinput <= e && *locinput != c1)
3165 while (locinput <= e
3170 count = locinput - old;
3177 utf8_to_uv_simple((U8*)locinput, &len) != c1;
3182 for (count = 0; locinput <= e; count++) {
3183 UV c = utf8_to_uv_simple((U8*)locinput, &len);
3184 if (c == c1 || c == c2)
3192 /* PL_reginput == old now */
3193 if (locinput != old) {
3194 ln = 1; /* Did some */
3195 if (regrepeat(scan, count) < count)
3198 /* PL_reginput == locinput now */
3199 TRYPAREN(paren, ln, locinput);
3200 PL_reginput = locinput; /* Could be reset... */
3201 REGCP_UNWIND(lastcp);
3202 /* Couldn't or didn't -- move forward. */
3205 locinput += UTF8SKIP(locinput);
3211 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
3215 c = utf8_to_uv_simple((U8*)PL_reginput, NULL);
3217 c = UCHARAT(PL_reginput);
3219 /* If it could work, try it. */
3220 if (c1 == -1000 || c == c1 || c == c2)
3222 TRYPAREN(paren, n, PL_reginput);
3223 REGCP_UNWIND(lastcp);
3225 /* Couldn't or didn't -- move forward. */
3226 PL_reginput = locinput;
3227 if (regrepeat(scan, 1)) {
3229 locinput = PL_reginput;
3237 n = regrepeat(scan, n);
3238 locinput = PL_reginput;
3239 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
3240 (!PL_multiline || OP(next) == SEOL || OP(next) == EOS)) {
3241 ln = n; /* why back off? */
3242 /* ...because $ and \Z can match before *and* after
3243 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3244 We should back off by one in this case. */
3245 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3254 c = utf8_to_uv_simple((U8*)PL_reginput, NULL);
3256 c = UCHARAT(PL_reginput);
3258 /* If it could work, try it. */
3259 if (c1 == -1000 || c == c1 || c == c2)
3261 TRYPAREN(paren, n, PL_reginput);
3262 REGCP_UNWIND(lastcp);
3264 /* Couldn't or didn't -- back up. */
3266 PL_reginput = locinput = HOPc(locinput, -1);
3274 c = utf8_to_uv_simple((U8*)PL_reginput, NULL);
3276 c = UCHARAT(PL_reginput);
3278 /* If it could work, try it. */
3279 if (c1 == -1000 || c == c1 || c == c2)
3281 TRYPAREN(paren, n, PL_reginput);
3282 REGCP_UNWIND(lastcp);
3284 /* Couldn't or didn't -- back up. */
3286 PL_reginput = locinput = HOPc(locinput, -1);
3293 if (PL_reg_call_cc) {
3294 re_cc_state *cur_call_cc = PL_reg_call_cc;
3295 CURCUR *cctmp = PL_regcc;
3296 regexp *re = PL_reg_re;
3297 CHECKPOINT cp, lastcp;
3299 cp = regcppush(0); /* Save *all* the positions. */
3301 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3303 PL_reginput = locinput; /* Make position available to
3305 cache_re(PL_reg_call_cc->re);
3306 PL_regcc = PL_reg_call_cc->cc;
3307 PL_reg_call_cc = PL_reg_call_cc->prev;
3308 if (regmatch(cur_call_cc->node)) {
3309 PL_reg_call_cc = cur_call_cc;
3313 REGCP_UNWIND(lastcp);
3315 PL_reg_call_cc = cur_call_cc;
3321 PerlIO_printf(Perl_debug_log,
3322 "%*s continuation failed...\n",
3323 REPORT_CODE_OFF+PL_regindent*2, "")
3327 if (locinput < PL_regtill) {
3328 DEBUG_r(PerlIO_printf(Perl_debug_log,
3329 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3331 (long)(locinput - PL_reg_starttry),
3332 (long)(PL_regtill - PL_reg_starttry),
3334 sayNO_FINAL; /* Cannot match: too short. */
3336 PL_reginput = locinput; /* put where regtry can find it */
3337 sayYES_FINAL; /* Success! */
3339 PL_reginput = locinput; /* put where regtry can find it */
3340 sayYES_LOUD; /* Success! */
3343 PL_reginput = locinput;
3348 if (UTF) { /* XXXX This is absolutely
3349 broken, we read before
3351 s = HOPMAYBEc(locinput, -scan->flags);
3357 if (locinput < PL_bostr + scan->flags)
3359 PL_reginput = locinput - scan->flags;
3364 PL_reginput = locinput;
3369 if (UTF) { /* XXXX This is absolutely
3370 broken, we read before
3372 s = HOPMAYBEc(locinput, -scan->flags);
3373 if (!s || s < PL_bostr)
3378 if (locinput < PL_bostr + scan->flags)
3380 PL_reginput = locinput - scan->flags;
3385 PL_reginput = locinput;
3388 inner = NEXTOPER(NEXTOPER(scan));
3389 if (regmatch(inner) != n) {
3404 if (OP(scan) == SUSPEND) {
3405 locinput = PL_reginput;
3406 nextchr = UCHARAT(locinput);
3411 next = scan + ARG(scan);
3416 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
3417 PTR2UV(scan), OP(scan));
3418 Perl_croak(aTHX_ "regexp memory corruption");
3425 * We get here only if there's trouble -- normally "case END" is
3426 * the terminating point.
3428 Perl_croak(aTHX_ "corrupted regexp pointers");
3434 PerlIO_printf(Perl_debug_log,
3435 "%*s %scould match...%s\n",
3436 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3440 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3441 PL_colors[4],PL_colors[5]));
3447 #if 0 /* Breaks $^R */
3455 PerlIO_printf(Perl_debug_log,
3456 "%*s %sfailed...%s\n",
3457 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3463 re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
3466 case RE_UNWIND_BRANCH:
3467 case RE_UNWIND_BRANCHJ:
3469 re_unwind_branch_t *uwb = &(uw->branch);
3470 I32 lastparen = uwb->lastparen;
3472 REGCP_UNWIND(uwb->lastcp);
3473 for (n = *PL_reglastparen; n > lastparen; n--)
3475 *PL_reglastparen = n;
3476 scan = next = uwb->next;
3478 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
3479 ? BRANCH : BRANCHJ) ) { /* Failure */
3486 /* Have more choice yet. Reuse the same uwb. */
3488 if ((n = (uwb->type == RE_UNWIND_BRANCH
3489 ? NEXT_OFF(next) : ARG(next))))
3492 next = NULL; /* XXXX Needn't unwinding in this case... */
3494 next = NEXTOPER(scan);
3495 if (uwb->type == RE_UNWIND_BRANCHJ)
3496 next = NEXTOPER(next);
3497 locinput = uwb->locinput;
3498 nextchr = uwb->nextchr;
3500 PL_regindent = uwb->regindent;
3507 Perl_croak(aTHX_ "regexp unwind memory corruption");
3518 - regrepeat - repeatedly match something simple, report how many
3521 * [This routine now assumes that it will only match on things of length 1.
3522 * That was true before, but now we assume scan - reginput is the count,
3523 * rather than incrementing count on every character. [Er, except utf8.]]
3526 S_regrepeat(pTHX_ regnode *p, I32 max)
3528 register char *scan;
3530 register char *loceol = PL_regeol;
3531 register I32 hardcount = 0;
3532 register bool do_utf8 = DO_UTF8(PL_reg_sv);
3535 if (max != REG_INFTY && max < loceol - scan)
3536 loceol = scan + max;
3541 while (scan < loceol && hardcount < max && *scan != '\n') {
3542 scan += UTF8SKIP(scan);
3546 while (scan < loceol && *scan != '\n')
3553 while (hardcount < max && scan < loceol) {
3554 scan += UTF8SKIP(scan);
3561 case EXACT: /* length of string is 1 */
3563 while (scan < loceol && UCHARAT(scan) == c)
3566 case EXACTF: /* length of string is 1 */
3568 while (scan < loceol &&
3569 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
3572 case EXACTFL: /* length of string is 1 */
3573 PL_reg_flags |= RF_tainted;
3575 while (scan < loceol &&
3576 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
3582 while (hardcount < max && scan < loceol &&
3583 reginclass(p, (U8*)scan, do_utf8)) {
3584 scan += UTF8SKIP(scan);
3588 while (scan < loceol && reginclass(p, (U8*)scan, do_utf8))
3595 while (hardcount < max && scan < loceol &&
3596 swash_fetch(PL_utf8_alnum, (U8*)scan)) {
3597 scan += UTF8SKIP(scan);
3601 while (scan < loceol && isALNUM(*scan))
3606 PL_reg_flags |= RF_tainted;
3609 while (hardcount < max && scan < loceol &&
3610 isALNUM_LC_utf8((U8*)scan)) {
3611 scan += UTF8SKIP(scan);
3615 while (scan < loceol && isALNUM_LC(*scan))
3622 while (hardcount < max && scan < loceol &&
3623 !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
3624 scan += UTF8SKIP(scan);
3628 while (scan < loceol && !isALNUM(*scan))
3633 PL_reg_flags |= RF_tainted;
3636 while (hardcount < max && scan < loceol &&
3637 !isALNUM_LC_utf8((U8*)scan)) {
3638 scan += UTF8SKIP(scan);
3642 while (scan < loceol && !isALNUM_LC(*scan))
3649 while (hardcount < max && scan < loceol &&
3650 (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
3651 scan += UTF8SKIP(scan);
3655 while (scan < loceol && isSPACE(*scan))
3660 PL_reg_flags |= RF_tainted;
3663 while (hardcount < max && scan < loceol &&
3664 (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3665 scan += UTF8SKIP(scan);
3669 while (scan < loceol && isSPACE_LC(*scan))
3676 while (hardcount < max && scan < loceol &&
3677 !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
3678 scan += UTF8SKIP(scan);
3682 while (scan < loceol && !isSPACE(*scan))
3687 PL_reg_flags |= RF_tainted;
3690 while (hardcount < max && scan < loceol &&
3691 !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3692 scan += UTF8SKIP(scan);
3696 while (scan < loceol && !isSPACE_LC(*scan))
3703 while (hardcount < max && scan < loceol &&
3704 swash_fetch(PL_utf8_digit,(U8*)scan)) {
3705 scan += UTF8SKIP(scan);
3709 while (scan < loceol && isDIGIT(*scan))
3716 while (hardcount < max && scan < loceol &&
3717 !swash_fetch(PL_utf8_digit,(U8*)scan)) {
3718 scan += UTF8SKIP(scan);
3722 while (scan < loceol && !isDIGIT(*scan))
3726 default: /* Called on something of 0 width. */
3727 break; /* So match right here or not at all. */
3733 c = scan - PL_reginput;
3738 SV *prop = sv_newmortal();
3741 PerlIO_printf(Perl_debug_log,
3742 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
3743 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
3750 - regrepeat_hard - repeatedly match something, report total lenth and length
3752 * The repeater is supposed to have constant length.
3756 S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
3758 register char *scan;
3759 register char *start;
3760 register char *loceol = PL_regeol;
3762 I32 count = 0, res = 1;
3767 start = PL_reginput;
3768 if (DO_UTF8(PL_reg_sv)) {
3769 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
3772 while (start < PL_reginput) {
3774 start += UTF8SKIP(start);
3785 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
3787 *lp = l = PL_reginput - start;
3788 if (max != REG_INFTY && l*max < loceol - scan)
3789 loceol = scan + l*max;
3802 - regclass_swash - prepare the utf8 swash
3806 Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** initsvp)
3811 if (PL_regdata && PL_regdata->count) {
3814 if (PL_regdata->what[n] == 's') {
3815 SV *rv = (SV*)PL_regdata->data[n];
3816 AV *av = (AV*)SvRV((SV*)rv);
3819 si = *av_fetch(av, 0, FALSE);
3820 a = av_fetch(av, 1, FALSE);
3824 else if (si && doinit) {
3825 sw = swash_init("utf8", "", si, 1, 0);
3826 (void)av_store(av, 1, sw);
3838 - reginclass - determine if a character falls into a character class
3842 S_reginclass(pTHX_ register regnode *n, register U8* p, register bool do_utf8)
3844 char flags = ANYOF_FLAGS(n);
3850 c = utf8_to_uv_simple(p, &len);
3854 if (do_utf8 || (flags & ANYOF_UNICODE)) {
3855 if (do_utf8 && !ANYOF_RUNTIME(n)) {
3856 if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
3859 if (do_utf8 && flags & ANYOF_UNICODE_ALL && c >= 256)
3862 SV *sw = regclass_swash(n, TRUE, 0);
3865 if (swash_fetch(sw, p))
3867 else if (flags & ANYOF_FOLD) {
3868 U8 tmpbuf[UTF8_MAXLEN+1];
3870 if (flags & ANYOF_LOCALE) {
3871 PL_reg_flags |= RF_tainted;
3872 uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
3875 uv_to_utf8(tmpbuf, toLOWER_utf8(p));
3876 if (swash_fetch(sw, tmpbuf))
3882 if (!match && c < 256) {
3883 if (ANYOF_BITMAP_TEST(n, c))
3885 else if (flags & ANYOF_FOLD) {
3888 if (flags & ANYOF_LOCALE) {
3889 PL_reg_flags |= RF_tainted;
3890 f = PL_fold_locale[c];
3894 if (f != c && ANYOF_BITMAP_TEST(n, f))
3898 if (!match && (flags & ANYOF_CLASS)) {
3899 PL_reg_flags |= RF_tainted;
3901 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
3902 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
3903 (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
3904 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
3905 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
3906 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
3907 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
3908 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
3909 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
3910 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
3911 (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
3912 (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
3913 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
3914 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
3915 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
3916 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
3917 (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
3918 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
3919 (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
3920 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
3921 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
3922 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
3923 (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
3924 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
3925 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
3926 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
3927 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
3928 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
3929 (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
3930 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
3931 ) /* How's that for a conditional? */
3938 return (flags & ANYOF_INVERT) ? !match : match;
3942 S_reghop(pTHX_ U8 *s, I32 off)
3944 return S_reghop3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
3948 S_reghop3(pTHX_ U8 *s, I32 off, U8* lim)
3951 while (off-- && s < lim) {
3952 /* XXX could check well-formedness here */
3960 if (UTF8_IS_CONTINUED(*s)) {
3961 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
3964 /* XXX could check well-formedness here */
3972 S_reghopmaybe(pTHX_ U8 *s, I32 off)
3974 return S_reghopmaybe3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
3978 S_reghopmaybe3(pTHX_ U8* s, I32 off, U8* lim)
3981 while (off-- && s < lim) {
3982 /* XXX could check well-formedness here */
3992 if (UTF8_IS_CONTINUED(*s)) {
3993 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
3996 /* XXX could check well-formedness here */
4012 restore_pos(pTHXo_ void *arg)
4014 if (PL_reg_eval_set) {
4015 if (PL_reg_oldsaved) {
4016 PL_reg_re->subbeg = PL_reg_oldsaved;
4017 PL_reg_re->sublen = PL_reg_oldsavedlen;
4018 RX_MATCH_COPIED_on(PL_reg_re);
4020 PL_reg_magic->mg_len = PL_reg_oldpos;
4021 PL_reg_eval_set = 0;
4022 PL_curpm = PL_reg_oldcurpm;