5 * "One Ring to rule them all, One Ring to find them..."
8 /* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
12 /* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
17 /* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
22 #ifdef PERL_EXT_RE_BUILD
23 /* need to replace pregcomp et al, so enable that */
24 # ifndef PERL_IN_XSUB_RE
25 # define PERL_IN_XSUB_RE
27 /* need access to debugger hooks */
28 # if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
33 #ifdef PERL_IN_XSUB_RE
34 /* We *really* need to overwrite these symbols: */
35 # define Perl_regexec_flags my_regexec
36 # define Perl_regdump my_regdump
37 # define Perl_regprop my_regprop
38 # define Perl_re_intuit_start my_re_intuit_start
39 /* *These* symbols are masked to allow static link. */
40 # define Perl_pregexec my_pregexec
41 # define Perl_reginitcolors my_reginitcolors
42 # define Perl_regclass_swash my_regclass_swash
44 # define PERL_NO_GET_CONTEXT
49 * pregcomp and pregexec -- regsub and regerror are not used in perl
51 * Copyright (c) 1986 by University of Toronto.
52 * Written by Henry Spencer. Not derived from licensed software.
54 * Permission is granted to anyone to use this software for any
55 * purpose on any computer system, and to redistribute it freely,
56 * subject to the following restrictions:
58 * 1. The author is not responsible for the consequences of use of
59 * this software, no matter how awful, even if they arise
62 * 2. The origin of this software must not be misrepresented, either
63 * by explicit claim or by omission.
65 * 3. Altered versions must be plainly marked as such, and must not
66 * be misrepresented as being the original software.
68 **** Alterations to Henry's code are...
70 **** Copyright (c) 1991-2001, Larry Wall
72 **** You may distribute under the terms of either the GNU General Public
73 **** License or the Artistic License, as specified in the README file.
75 * Beware that some of this code is subtly aware of the way operator
76 * precedence is structured in regular expressions. Serious changes in
77 * regular-expression syntax might require a total rethink.
80 #define PERL_IN_REGEXEC_C
83 #ifdef PERL_IN_XSUB_RE
84 # if defined(PERL_CAPI) || defined(PERL_OBJECT)
91 #define RF_tainted 1 /* tainted information used? */
92 #define RF_warned 2 /* warned about big count? */
93 #define RF_evaled 4 /* Did an EVAL with setting? */
94 #define RF_utf8 8 /* String contains multibyte chars? */
96 #define UTF (PL_reg_flags & RF_utf8)
98 #define RS_init 1 /* eval environment created */
99 #define RS_set 2 /* replsv value is set */
102 #define STATIC static
109 #define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
110 #define CHR_DIST(a,b) (DO_UTF8(PL_reg_sv) ? utf8_distance(a,b) : a - b)
112 #define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
113 #define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
114 #define HOP(pos,off) (DO_UTF8(PL_reg_sv) ? reghop((U8*)pos, off) : (U8*)(pos + off))
115 #define HOPMAYBE(pos,off) (DO_UTF8(PL_reg_sv) ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
116 #define HOPc(pos,off) ((char*)HOP(pos,off))
117 #define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
119 #define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
120 #define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
121 #define HOP3(pos,off,lim) (DO_UTF8(PL_reg_sv) ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
122 #define HOPMAYBE3(pos,off,lim) (DO_UTF8(PL_reg_sv) ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
123 #define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
124 #define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
126 static void restore_pos(pTHXo_ void *arg);
130 S_regcppush(pTHX_ I32 parenfloor)
132 int retval = PL_savestack_ix;
133 int i = (PL_regsize - parenfloor) * 4;
137 for (p = PL_regsize; p > parenfloor; p--) {
138 SSPUSHINT(PL_regendp[p]);
139 SSPUSHINT(PL_regstartp[p]);
140 SSPUSHPTR(PL_reg_start_tmp[p]);
143 SSPUSHINT(PL_regsize);
144 SSPUSHINT(*PL_reglastparen);
145 SSPUSHPTR(PL_reginput);
147 SSPUSHINT(SAVEt_REGCONTEXT);
151 /* These are needed since we do not localize EVAL nodes: */
152 # define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
153 " Setting an EVAL scope, savestack=%"IVdf"\n", \
154 (IV)PL_savestack_ix)); cp = PL_savestack_ix
156 # define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
157 PerlIO_printf(Perl_debug_log, \
158 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
159 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
168 assert(i == SAVEt_REGCONTEXT);
170 input = (char *) SSPOPPTR;
171 *PL_reglastparen = SSPOPINT;
172 PL_regsize = SSPOPINT;
173 for (i -= 3; i > 0; i -= 4) {
174 paren = (U32)SSPOPINT;
175 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
176 PL_regstartp[paren] = SSPOPINT;
178 if (paren <= *PL_reglastparen)
179 PL_regendp[paren] = tmps;
181 PerlIO_printf(Perl_debug_log,
182 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
183 (UV)paren, (IV)PL_regstartp[paren],
184 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
185 (IV)PL_regendp[paren],
186 (paren > *PL_reglastparen ? "(no)" : ""));
190 if (*PL_reglastparen + 1 <= PL_regnpar) {
191 PerlIO_printf(Perl_debug_log,
192 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
193 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
197 /* It would seem that the similar code in regtry()
198 * already takes care of this, and in fact it is in
199 * a better location to since this code can #if 0-ed out
200 * but the code in regtry() is needed or otherwise tests
201 * requiring null fields (pat.t#187 and split.t#{13,14}
202 * (as of patchlevel 7877) will fail. Then again,
203 * this code seems to be necessary or otherwise
204 * building DynaLoader will fail:
205 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
207 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
208 if (paren > PL_regsize)
209 PL_regstartp[paren] = -1;
210 PL_regendp[paren] = -1;
217 S_regcp_set_to(pTHX_ I32 ss)
219 I32 tmp = PL_savestack_ix;
221 PL_savestack_ix = ss;
223 PL_savestack_ix = tmp;
227 typedef struct re_cc_state
231 struct re_cc_state *prev;
236 #define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
238 #define TRYPAREN(paren, n, input) { \
241 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
242 PL_regendp[paren] = input - PL_bostr; \
245 PL_regendp[paren] = -1; \
247 if (regmatch(next)) \
250 PL_regendp[paren] = -1; \
255 * pregexec and friends
259 - pregexec - match a regexp against a string
262 Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
263 char *strbeg, I32 minend, SV *screamer, U32 nosave)
264 /* strend: pointer to null at end of string */
265 /* strbeg: real beginning of string */
266 /* minend: end of match must be >=minend after stringarg. */
267 /* nosave: For optimizations. */
270 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
271 nosave ? 0 : REXEC_COPY_STR);
275 S_cache_re(pTHX_ regexp *prog)
277 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
279 PL_regprogram = prog->program;
281 PL_regnpar = prog->nparens;
282 PL_regdata = prog->data;
287 * Need to implement the following flags for reg_anch:
289 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
291 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
292 * INTUIT_AUTORITATIVE_ML
293 * INTUIT_ONCE_NOML - Intuit can match in one location only.
296 * Another flag for this function: SECOND_TIME (so that float substrs
297 * with giant delta may be not rechecked).
300 /* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
302 /* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
303 Otherwise, only SvCUR(sv) is used to get strbeg. */
305 /* XXXX We assume that strpos is strbeg unless sv. */
307 /* XXXX Some places assume that there is a fixed substring.
308 An update may be needed if optimizer marks as "INTUITable"
309 RExen without fixed substrings. Similarly, it is assumed that
310 lengths of all the strings are no more than minlen, thus they
311 cannot come from lookahead.
312 (Or minlen should take into account lookahead.) */
314 /* A failure to find a constant substring means that there is no need to make
315 an expensive call to REx engine, thus we celebrate a failure. Similarly,
316 finding a substring too deep into the string means that less calls to
317 regtry() should be needed.
319 REx compiler's optimizer found 4 possible hints:
320 a) Anchored substring;
322 c) Whether we are anchored (beginning-of-line or \G);
323 d) First node (of those at offset 0) which may distingush positions;
324 We use a)b)d) and multiline-part of c), and try to find a position in the
325 string which does not contradict any of them.
328 /* Most of decisions we do here should have been done at compile time.
329 The nodes of the REx which we used for the search should have been
330 deleted from the finite automaton. */
333 Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
334 char *strend, U32 flags, re_scream_pos_data *data)
336 register I32 start_shift;
337 /* Should be nonnegative! */
338 register I32 end_shift;
345 register char *other_last = Nullch; /* other substr checked before this */
346 char *check_at; /* check substr found at this pos */
348 char *i_strpos = strpos;
351 DEBUG_r( if (!PL_colorset) reginitcolors() );
352 DEBUG_r(PerlIO_printf(Perl_debug_log,
353 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
354 PL_colors[4],PL_colors[5],PL_colors[0],
357 (strlen(prog->precomp) > 60 ? "..." : ""),
359 (int)(strend - strpos > 60 ? 60 : strend - strpos),
360 strpos, PL_colors[1],
361 (strend - strpos > 60 ? "..." : ""))
364 if (prog->reganch & ROPT_UTF8)
365 PL_reg_flags |= RF_utf8;
367 if (prog->minlen > CHR_DIST((U8*)strend, (U8*)strpos)) {
368 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
371 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
373 check = prog->check_substr;
374 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
375 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
376 || ( (prog->reganch & ROPT_ANCH_BOL)
377 && !PL_multiline ) ); /* Check after \n? */
380 if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
381 /* SvCUR is not set on references: SvRV and SvPVX overlap */
383 && (strpos != strbeg)) {
384 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
387 if (prog->check_offset_min == prog->check_offset_max) {
388 /* Substring at constant offset from beg-of-str... */
391 s = HOP3c(strpos, prog->check_offset_min, strend);
393 slen = SvCUR(check); /* >= 1 */
395 if ( strend - s > slen || strend - s < slen - 1
396 || (strend - s == slen && strend[-1] != '\n')) {
397 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
400 /* Now should match s[0..slen-2] */
402 if (slen && (*SvPVX(check) != *s
404 && memNE(SvPVX(check), s, slen)))) {
406 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
410 else if (*SvPVX(check) != *s
411 || ((slen = SvCUR(check)) > 1
412 && memNE(SvPVX(check), s, slen)))
414 goto success_at_start;
417 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
419 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
420 end_shift = prog->minlen - start_shift -
421 CHR_SVLEN(check) + (SvTAIL(check) != 0);
423 I32 end = prog->check_offset_max + CHR_SVLEN(check)
424 - (SvTAIL(check) != 0);
425 I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
427 if (end_shift < eshift)
431 else { /* Can match at random position */
434 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
435 /* Should be nonnegative! */
436 end_shift = prog->minlen - start_shift -
437 CHR_SVLEN(check) + (SvTAIL(check) != 0);
440 #ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
442 Perl_croak(aTHX_ "panic: end_shift");
446 /* Find a possible match in the region s..strend by looking for
447 the "check" substring in the region corrected by start/end_shift. */
448 if (flags & REXEC_SCREAM) {
449 I32 p = -1; /* Internal iterator of scream. */
450 I32 *pp = data ? data->scream_pos : &p;
452 if (PL_screamfirst[BmRARE(check)] >= 0
453 || ( BmRARE(check) == '\n'
454 && (BmPREVIOUS(check) == SvCUR(check) - 1)
456 s = screaminstr(sv, check,
457 start_shift + (s - strbeg), end_shift, pp, 0);
461 *data->scream_olds = s;
464 s = fbm_instr(HOP3(s, start_shift, strend),
465 HOP3(strend, -end_shift, strbeg),
466 check, PL_multiline ? FBMrf_MULTILINE : 0);
468 /* Update the count-of-usability, remove useless subpatterns,
471 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
472 (s ? "Found" : "Did not find"),
473 ((check == prog->anchored_substr) ? "anchored" : "floating"),
475 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
477 PL_colors[1], (SvTAIL(check) ? "$" : ""),
478 (s ? " at offset " : "...\n") ) );
485 /* Finish the diagnostic message */
486 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
488 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
489 Start with the other substr.
490 XXXX no SCREAM optimization yet - and a very coarse implementation
491 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
492 *always* match. Probably should be marked during compile...
493 Probably it is right to do no SCREAM here...
496 if (prog->float_substr && prog->anchored_substr) {
497 /* Take into account the "other" substring. */
498 /* XXXX May be hopelessly wrong for UTF... */
501 if (check == prog->float_substr) {
504 char *last = HOP3c(s, -start_shift, strbeg), *last1, *last2;
507 t = s - prog->check_offset_max;
508 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
509 && (!(prog->reganch & ROPT_UTF8)
510 || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
515 t = HOP3c(t, prog->anchored_offset, strend);
516 if (t < other_last) /* These positions already checked */
518 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
521 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
522 /* On end-of-str: see comment below. */
523 s = fbm_instr((unsigned char*)t,
524 HOP3(HOP3(last1, prog->anchored_offset, strend)
525 + SvCUR(prog->anchored_substr),
526 -(SvTAIL(prog->anchored_substr)!=0), strbeg),
527 prog->anchored_substr,
528 PL_multiline ? FBMrf_MULTILINE : 0);
529 DEBUG_r(PerlIO_printf(Perl_debug_log,
530 "%s anchored substr `%s%.*s%s'%s",
531 (s ? "Found" : "Contradicts"),
533 (int)(SvCUR(prog->anchored_substr)
534 - (SvTAIL(prog->anchored_substr)!=0)),
535 SvPVX(prog->anchored_substr),
536 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
538 if (last1 >= last2) {
539 DEBUG_r(PerlIO_printf(Perl_debug_log,
540 ", giving up...\n"));
543 DEBUG_r(PerlIO_printf(Perl_debug_log,
544 ", trying floating at offset %ld...\n",
545 (long)(HOP3c(s1, 1, strend) - i_strpos)));
546 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
547 s = HOP3c(last, 1, strend);
551 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
552 (long)(s - i_strpos)));
553 t = HOP3c(s, -prog->anchored_offset, strbeg);
554 other_last = HOP3c(s, 1, strend);
562 else { /* Take into account the floating substring. */
566 t = HOP3c(s, -start_shift, strbeg);
568 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
569 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
570 last = HOP3c(t, prog->float_max_offset, strend);
571 s = HOP3c(t, prog->float_min_offset, strend);
574 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
575 /* fbm_instr() takes into account exact value of end-of-str
576 if the check is SvTAIL(ed). Since false positives are OK,
577 and end-of-str is not later than strend we are OK. */
578 s = fbm_instr((unsigned char*)s,
579 (unsigned char*)last + SvCUR(prog->float_substr)
580 - (SvTAIL(prog->float_substr)!=0),
581 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
582 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
583 (s ? "Found" : "Contradicts"),
585 (int)(SvCUR(prog->float_substr)
586 - (SvTAIL(prog->float_substr)!=0)),
587 SvPVX(prog->float_substr),
588 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
591 DEBUG_r(PerlIO_printf(Perl_debug_log,
592 ", giving up...\n"));
595 DEBUG_r(PerlIO_printf(Perl_debug_log,
596 ", trying anchored starting at offset %ld...\n",
597 (long)(s1 + 1 - i_strpos)));
599 s = HOP3c(t, 1, strend);
603 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
604 (long)(s - i_strpos)));
605 other_last = s; /* Fix this later. --Hugo */
614 t = s - prog->check_offset_max;
615 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
616 && (!(prog->reganch & ROPT_UTF8)
617 || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
619 /* Fixed substring is found far enough so that the match
620 cannot start at strpos. */
622 if (ml_anch && t[-1] != '\n') {
623 /* Eventually fbm_*() should handle this, but often
624 anchored_offset is not 0, so this check will not be wasted. */
625 /* XXXX In the code below we prefer to look for "^" even in
626 presence of anchored substrings. And we search even
627 beyond the found float position. These pessimizations
628 are historical artefacts only. */
630 while (t < strend - prog->minlen) {
632 if (t < check_at - prog->check_offset_min) {
633 if (prog->anchored_substr) {
634 /* Since we moved from the found position,
635 we definitely contradict the found anchored
636 substr. Due to the above check we do not
637 contradict "check" substr.
638 Thus we can arrive here only if check substr
639 is float. Redo checking for "other"=="fixed".
642 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
643 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
644 goto do_other_anchored;
646 /* We don't contradict the found floating substring. */
647 /* XXXX Why not check for STCLASS? */
649 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
650 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
653 /* Position contradicts check-string */
654 /* XXXX probably better to look for check-string
655 than for "\n", so one should lower the limit for t? */
656 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
657 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
658 other_last = strpos = s = t + 1;
663 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
664 PL_colors[0],PL_colors[1]));
668 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
669 PL_colors[0],PL_colors[1]));
673 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
676 /* The found string does not prohibit matching at strpos,
677 - no optimization of calling REx engine can be performed,
678 unless it was an MBOL and we are not after MBOL,
679 or a future STCLASS check will fail this. */
681 /* Even in this situation we may use MBOL flag if strpos is offset
682 wrt the start of the string. */
683 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
684 && (strpos != strbeg) && strpos[-1] != '\n'
685 /* May be due to an implicit anchor of m{.*foo} */
686 && !(prog->reganch & ROPT_IMPLICIT))
691 DEBUG_r( if (ml_anch)
692 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
693 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
696 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
697 && prog->check_substr /* Could be deleted already */
698 && --BmUSEFUL(prog->check_substr) < 0
699 && prog->check_substr == prog->float_substr)
701 /* If flags & SOMETHING - do not do it many times on the same match */
702 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
703 SvREFCNT_dec(prog->check_substr);
704 prog->check_substr = Nullsv; /* disable */
705 prog->float_substr = Nullsv; /* clear */
706 check = Nullsv; /* abort */
708 /* XXXX This is a remnant of the old implementation. It
709 looks wasteful, since now INTUIT can use many
711 prog->reganch &= ~RE_USE_INTUIT;
718 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
719 if (prog->regstclass) {
720 /* minlen == 0 is possible if regstclass is \b or \B,
721 and the fixed substr is ''$.
722 Since minlen is already taken into account, s+1 is before strend;
723 accidentally, minlen >= 1 guaranties no false positives at s + 1
724 even for \b or \B. But (minlen? 1 : 0) below assumes that
725 regstclass does not come from lookahead... */
726 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
727 This leaves EXACTF only, which is dealt with in find_byclass(). */
728 U8* str = (U8*)STRING(prog->regstclass);
729 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
730 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
732 char *endpos = (prog->anchored_substr || ml_anch)
733 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
734 : (prog->float_substr
735 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
738 char *startpos = strbeg;
741 if (prog->reganch & ROPT_UTF8) {
742 PL_regdata = prog->data;
745 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
750 if (endpos == strend) {
751 DEBUG_r( PerlIO_printf(Perl_debug_log,
752 "Could not match STCLASS...\n") );
755 DEBUG_r( PerlIO_printf(Perl_debug_log,
756 "This position contradicts STCLASS...\n") );
757 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
759 /* Contradict one of substrings */
760 if (prog->anchored_substr) {
761 if (prog->anchored_substr == check) {
762 DEBUG_r( what = "anchored" );
764 s = HOP3c(t, 1, strend);
765 if (s + start_shift + end_shift > strend) {
766 /* XXXX Should be taken into account earlier? */
767 DEBUG_r( PerlIO_printf(Perl_debug_log,
768 "Could not match STCLASS...\n") );
773 DEBUG_r( PerlIO_printf(Perl_debug_log,
774 "Looking for %s substr starting at offset %ld...\n",
775 what, (long)(s + start_shift - i_strpos)) );
778 /* Have both, check_string is floating */
779 if (t + start_shift >= check_at) /* Contradicts floating=check */
780 goto retry_floating_check;
781 /* Recheck anchored substring, but not floating... */
785 DEBUG_r( PerlIO_printf(Perl_debug_log,
786 "Looking for anchored substr starting at offset %ld...\n",
787 (long)(other_last - i_strpos)) );
788 goto do_other_anchored;
790 /* Another way we could have checked stclass at the
791 current position only: */
796 DEBUG_r( PerlIO_printf(Perl_debug_log,
797 "Looking for /%s^%s/m starting at offset %ld...\n",
798 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
801 if (!prog->float_substr) /* Could have been deleted */
803 /* Check is floating subtring. */
804 retry_floating_check:
805 t = check_at - start_shift;
806 DEBUG_r( what = "floating" );
807 goto hop_and_restart;
810 PerlIO_printf(Perl_debug_log,
811 "By STCLASS: moving %ld --> %ld\n",
812 (long)(t - i_strpos), (long)(s - i_strpos));
814 PerlIO_printf(Perl_debug_log,
815 "Does not contradict STCLASS...\n") );
818 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
819 PL_colors[4], (check ? "Guessed" : "Giving up"),
820 PL_colors[5], (long)(s - i_strpos)) );
823 fail_finish: /* Substring not found */
824 if (prog->check_substr) /* could be removed already */
825 BmUSEFUL(prog->check_substr) += 5; /* hooray */
827 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
828 PL_colors[4],PL_colors[5]));
832 /* We know what class REx starts with. Try to find this position... */
834 S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
836 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
842 register I32 tmp = 1; /* Scratch variable? */
843 register bool do_utf8 = DO_UTF8(PL_reg_sv);
845 /* We know what class it must start with. */
849 if (reginclass(c, (U8*)s, do_utf8)) {
850 if (tmp && (norun || regtry(prog, s)))
857 s += do_utf8 ? UTF8SKIP(s) : 1;
864 c1 = to_utf8_lower((U8*)m);
865 c2 = to_utf8_upper((U8*)m);
876 c2 = PL_fold_locale[c1];
881 e = s; /* Due to minlen logic of intuit() */
887 if ( utf8_to_uv_simple((U8*)s, &len) == c1
894 UV c = utf8_to_uv_simple((U8*)s, &len);
895 if ( (c == c1 || c == c2) && regtry(prog, s) )
904 && (ln == 1 || !(OP(c) == EXACTF
906 : ibcmp_locale(s, m, ln)))
907 && (norun || regtry(prog, s)) )
913 if ( (*(U8*)s == c1 || *(U8*)s == c2)
914 && (ln == 1 || !(OP(c) == EXACTF
916 : ibcmp_locale(s, m, ln)))
917 && (norun || regtry(prog, s)) )
924 PL_reg_flags |= RF_tainted;
931 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
933 tmp = (I32)utf8_to_uv(r, s - (char*)r, 0, 0);
935 tmp = ((OP(c) == BOUND ?
936 isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
938 if (tmp == !(OP(c) == BOUND ?
939 swash_fetch(PL_utf8_alnum, (U8*)s) :
940 isALNUM_LC_utf8((U8*)s)))
943 if ((norun || regtry(prog, s)))
950 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
951 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
954 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
956 if ((norun || regtry(prog, s)))
962 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
966 PL_reg_flags |= RF_tainted;
973 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
975 tmp = (I32)utf8_to_uv(r, s - (char*)r, 0, 0);
977 tmp = ((OP(c) == NBOUND ?
978 isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
980 if (tmp == !(OP(c) == NBOUND ?
981 swash_fetch(PL_utf8_alnum, (U8*)s) :
982 isALNUM_LC_utf8((U8*)s)))
984 else if ((norun || regtry(prog, s)))
990 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
991 tmp = ((OP(c) == NBOUND ?
992 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
995 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
997 else if ((norun || regtry(prog, s)))
1002 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
1007 while (s < strend) {
1008 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
1009 if (tmp && (norun || regtry(prog, s)))
1020 while (s < strend) {
1022 if (tmp && (norun || regtry(prog, s)))
1034 PL_reg_flags |= RF_tainted;
1036 while (s < strend) {
1037 if (isALNUM_LC_utf8((U8*)s)) {
1038 if (tmp && (norun || regtry(prog, s)))
1049 while (s < strend) {
1050 if (isALNUM_LC(*s)) {
1051 if (tmp && (norun || regtry(prog, s)))
1064 while (s < strend) {
1065 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
1066 if (tmp && (norun || regtry(prog, s)))
1077 while (s < strend) {
1079 if (tmp && (norun || regtry(prog, s)))
1091 PL_reg_flags |= RF_tainted;
1093 while (s < strend) {
1094 if (!isALNUM_LC_utf8((U8*)s)) {
1095 if (tmp && (norun || regtry(prog, s)))
1106 while (s < strend) {
1107 if (!isALNUM_LC(*s)) {
1108 if (tmp && (norun || regtry(prog, s)))
1121 while (s < strend) {
1122 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
1123 if (tmp && (norun || regtry(prog, s)))
1134 while (s < strend) {
1136 if (tmp && (norun || regtry(prog, s)))
1148 PL_reg_flags |= RF_tainted;
1150 while (s < strend) {
1151 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1152 if (tmp && (norun || regtry(prog, s)))
1163 while (s < strend) {
1164 if (isSPACE_LC(*s)) {
1165 if (tmp && (norun || regtry(prog, s)))
1178 while (s < strend) {
1179 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
1180 if (tmp && (norun || regtry(prog, s)))
1191 while (s < strend) {
1193 if (tmp && (norun || regtry(prog, s)))
1205 PL_reg_flags |= RF_tainted;
1207 while (s < strend) {
1208 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1209 if (tmp && (norun || regtry(prog, s)))
1220 while (s < strend) {
1221 if (!isSPACE_LC(*s)) {
1222 if (tmp && (norun || regtry(prog, s)))
1235 while (s < strend) {
1236 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
1237 if (tmp && (norun || regtry(prog, s)))
1248 while (s < strend) {
1250 if (tmp && (norun || regtry(prog, s)))
1262 PL_reg_flags |= RF_tainted;
1264 while (s < strend) {
1265 if (isDIGIT_LC_utf8((U8*)s)) {
1266 if (tmp && (norun || regtry(prog, s)))
1277 while (s < strend) {
1278 if (isDIGIT_LC(*s)) {
1279 if (tmp && (norun || regtry(prog, s)))
1292 while (s < strend) {
1293 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
1294 if (tmp && (norun || regtry(prog, s)))
1305 while (s < strend) {
1307 if (tmp && (norun || regtry(prog, s)))
1319 PL_reg_flags |= RF_tainted;
1321 while (s < strend) {
1322 if (!isDIGIT_LC_utf8((U8*)s)) {
1323 if (tmp && (norun || regtry(prog, s)))
1334 while (s < strend) {
1335 if (!isDIGIT_LC(*s)) {
1336 if (tmp && (norun || regtry(prog, s)))
1348 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1357 - regexec_flags - match a regexp against a string
1360 Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1361 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1362 /* strend: pointer to null at end of string */
1363 /* strbeg: real beginning of string */
1364 /* minend: end of match must be >=minend after stringarg. */
1365 /* data: May be used for some additional optimizations. */
1366 /* nosave: For optimizations. */
1369 register regnode *c;
1370 register char *startpos = stringarg;
1371 I32 minlen; /* must match at least this many chars */
1372 I32 dontbother = 0; /* how many characters not to try at end */
1373 /* I32 start_shift = 0; */ /* Offset of the start to find
1374 constant substr. */ /* CC */
1375 I32 end_shift = 0; /* Same for the end. */ /* CC */
1376 I32 scream_pos = -1; /* Internal iterator of scream. */
1378 SV* oreplsv = GvSV(PL_replgv);
1379 bool do_utf8 = DO_UTF8(sv);
1385 PL_regnarrate = PL_debug & 512;
1388 /* Be paranoid... */
1389 if (prog == NULL || startpos == NULL) {
1390 Perl_croak(aTHX_ "NULL regexp parameter");
1394 minlen = prog->minlen;
1396 if (utf8_distance((U8*)strend, (U8*)startpos) < minlen) goto phooey;
1399 if (strend - startpos < minlen) goto phooey;
1402 if (startpos == strbeg) /* is ^ valid at stringarg? */
1405 if (prog->reganch & ROPT_UTF8 && do_utf8) {
1406 U8 *s = reghop3((U8*)stringarg, -1, (U8*)strbeg);
1407 PL_regprev = utf8_to_uv(s, (U8*)stringarg - s, NULL, 0);
1410 PL_regprev = (U32)stringarg[-1];
1411 if (!PL_multiline && PL_regprev == '\n')
1412 PL_regprev = '\0'; /* force ^ to NOT match */
1415 /* Check validity of program. */
1416 if (UCHARAT(prog->program) != REG_MAGIC) {
1417 Perl_croak(aTHX_ "corrupted regexp program");
1421 PL_reg_eval_set = 0;
1424 if (prog->reganch & ROPT_UTF8)
1425 PL_reg_flags |= RF_utf8;
1427 /* Mark beginning of line for ^ and lookbehind. */
1428 PL_regbol = startpos;
1432 /* Mark end of line for $ (and such) */
1435 /* see how far we have to get to not match where we matched before */
1436 PL_regtill = startpos+minend;
1438 /* We start without call_cc context. */
1441 /* If there is a "must appear" string, look for it. */
1444 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1447 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1448 PL_reg_ganch = startpos;
1449 else if (sv && SvTYPE(sv) >= SVt_PVMG
1451 && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) {
1452 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1453 if (prog->reganch & ROPT_ANCH_GPOS) {
1454 if (s > PL_reg_ganch)
1459 else /* pos() not defined */
1460 PL_reg_ganch = strbeg;
1463 if (do_utf8 == (UTF!=0) &&
1464 !(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
1465 re_scream_pos_data d;
1467 d.scream_olds = &scream_olds;
1468 d.scream_pos = &scream_pos;
1469 s = re_intuit_start(prog, sv, s, strend, flags, &d);
1471 goto phooey; /* not present */
1474 DEBUG_r( if (!PL_colorset) reginitcolors() );
1475 DEBUG_r(PerlIO_printf(Perl_debug_log,
1476 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1477 PL_colors[4],PL_colors[5],PL_colors[0],
1480 (strlen(prog->precomp) > 60 ? "..." : ""),
1482 (int)(strend - startpos > 60 ? 60 : strend - startpos),
1483 startpos, PL_colors[1],
1484 (strend - startpos > 60 ? "..." : ""))
1487 /* Simplest case: anchored match need be tried only once. */
1488 /* [unless only anchor is BOL and multiline is set] */
1489 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1490 if (s == startpos && regtry(prog, startpos))
1492 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1493 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1498 dontbother = minlen - 1;
1499 end = HOP3c(strend, -dontbother, strbeg) - 1;
1500 /* for multiline we only have to try after newlines */
1501 if (prog->check_substr) {
1505 if (regtry(prog, s))
1510 if (prog->reganch & RE_USE_INTUIT) {
1511 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1522 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1523 if (regtry(prog, s))
1530 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1531 if (regtry(prog, PL_reg_ganch))
1536 /* Messy cases: unanchored match. */
1537 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
1538 /* we have /x+whatever/ */
1539 /* it must be a one character string (XXXX Except UTF?) */
1540 char ch = SvPVX(prog->anchored_substr)[0];
1546 while (s < strend) {
1548 DEBUG_r( did_match = 1 );
1549 if (regtry(prog, s)) goto got_it;
1551 while (s < strend && *s == ch)
1558 while (s < strend) {
1560 DEBUG_r( did_match = 1 );
1561 if (regtry(prog, s)) goto got_it;
1563 while (s < strend && *s == ch)
1569 DEBUG_r(did_match ||
1570 PerlIO_printf(Perl_debug_log,
1571 "Did not find anchored character...\n"));
1574 else if (do_utf8 == (UTF!=0) &&
1575 (prog->anchored_substr != Nullsv
1576 || (prog->float_substr != Nullsv
1577 && prog->float_max_offset < strend - s))) {
1578 SV *must = prog->anchored_substr
1579 ? prog->anchored_substr : prog->float_substr;
1581 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
1583 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
1584 char *last = HOP3c(strend, /* Cannot start after this */
1585 -(I32)(CHR_SVLEN(must)
1586 - (SvTAIL(must) != 0) + back_min), strbeg);
1587 char *last1; /* Last position checked before */
1593 last1 = HOPc(s, -1);
1595 last1 = s - 1; /* bogus */
1597 /* XXXX check_substr already used to find `s', can optimize if
1598 check_substr==must. */
1600 dontbother = end_shift;
1601 strend = HOPc(strend, -dontbother);
1602 while ( (s <= last) &&
1603 ((flags & REXEC_SCREAM)
1604 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
1605 end_shift, &scream_pos, 0))
1606 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
1607 (unsigned char*)strend, must,
1608 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
1609 DEBUG_r( did_match = 1 );
1610 if (HOPc(s, -back_max) > last1) {
1611 last1 = HOPc(s, -back_min);
1612 s = HOPc(s, -back_max);
1615 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1617 last1 = HOPc(s, -back_min);
1621 while (s <= last1) {
1622 if (regtry(prog, s))
1628 while (s <= last1) {
1629 if (regtry(prog, s))
1635 DEBUG_r(did_match ||
1636 PerlIO_printf(Perl_debug_log, "Did not find %s substr `%s%.*s%s'%s...\n",
1637 ((must == prog->anchored_substr)
1638 ? "anchored" : "floating"),
1640 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1642 PL_colors[1], (SvTAIL(must) ? "$" : "")));
1645 else if ((c = prog->regstclass)) {
1646 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1647 /* don't bother with what can't match */
1648 strend = HOPc(strend, -(minlen - 1));
1650 SV *prop = sv_newmortal();
1652 PerlIO_printf(Perl_debug_log, "Matching stclass `%s' against `%s'\n", SvPVX(prop), s);
1654 if (find_byclass(prog, c, s, strend, startpos, 0))
1656 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
1660 if (prog->float_substr != Nullsv) { /* Trim the end. */
1663 if (flags & REXEC_SCREAM) {
1664 last = screaminstr(sv, prog->float_substr, s - strbeg,
1665 end_shift, &scream_pos, 1); /* last one */
1667 last = scream_olds; /* Only one occurrence. */
1671 char *little = SvPV(prog->float_substr, len);
1673 if (SvTAIL(prog->float_substr)) {
1674 if (memEQ(strend - len + 1, little, len - 1))
1675 last = strend - len + 1;
1676 else if (!PL_multiline)
1677 last = memEQ(strend - len, little, len)
1678 ? strend - len : Nullch;
1684 last = rninstr(s, strend, little, little + len);
1686 last = strend; /* matching `$' */
1690 DEBUG_r(PerlIO_printf(Perl_debug_log,
1691 "%sCan't trim the tail, match fails (should not happen)%s\n",
1692 PL_colors[4],PL_colors[5]));
1693 goto phooey; /* Should not happen! */
1695 dontbother = strend - last + prog->float_min_offset;
1697 if (minlen && (dontbother < minlen))
1698 dontbother = minlen - 1;
1699 strend -= dontbother; /* this one's always in bytes! */
1700 /* We don't know much -- general case. */
1703 if (regtry(prog, s))
1712 if (regtry(prog, s))
1714 } while (s++ < strend);
1722 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1724 if (PL_reg_eval_set) {
1725 /* Preserve the current value of $^R */
1726 if (oreplsv != GvSV(PL_replgv))
1727 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1728 restored, the value remains
1730 restore_pos(aTHXo_ 0);
1733 /* make sure $`, $&, $', and $digit will work later */
1734 if ( !(flags & REXEC_NOT_FIRST) ) {
1735 if (RX_MATCH_COPIED(prog)) {
1736 Safefree(prog->subbeg);
1737 RX_MATCH_COPIED_off(prog);
1739 if (flags & REXEC_COPY_STR) {
1740 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1742 s = savepvn(strbeg, i);
1745 RX_MATCH_COPIED_on(prog);
1748 prog->subbeg = strbeg;
1749 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1756 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1757 PL_colors[4],PL_colors[5]));
1758 if (PL_reg_eval_set)
1759 restore_pos(aTHXo_ 0);
1764 - regtry - try match at specific point
1766 STATIC I32 /* 0 failure, 1 success */
1767 S_regtry(pTHX_ regexp *prog, char *startpos)
1775 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
1777 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1780 PL_reg_eval_set = RS_init;
1782 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1783 (IV)(PL_stack_sp - PL_stack_base));
1785 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
1786 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1787 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1789 /* Apparently this is not needed, judging by wantarray. */
1790 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
1791 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1794 /* Make $_ available to executed code. */
1795 if (PL_reg_sv != DEFSV) {
1796 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1801 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
1802 && (mg = mg_find(PL_reg_sv, 'g')))) {
1803 /* prepare for quick setting of pos */
1804 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
1805 mg = mg_find(PL_reg_sv, 'g');
1809 PL_reg_oldpos = mg->mg_len;
1810 SAVEDESTRUCTOR_X(restore_pos, 0);
1813 Newz(22,PL_reg_curpm, 1, PMOP);
1814 PL_reg_curpm->op_pmregexp = prog;
1815 PL_reg_oldcurpm = PL_curpm;
1816 PL_curpm = PL_reg_curpm;
1817 if (RX_MATCH_COPIED(prog)) {
1818 /* Here is a serious problem: we cannot rewrite subbeg,
1819 since it may be needed if this match fails. Thus
1820 $` inside (?{}) could fail... */
1821 PL_reg_oldsaved = prog->subbeg;
1822 PL_reg_oldsavedlen = prog->sublen;
1823 RX_MATCH_COPIED_off(prog);
1826 PL_reg_oldsaved = Nullch;
1827 prog->subbeg = PL_bostr;
1828 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1830 prog->startp[0] = startpos - PL_bostr;
1831 PL_reginput = startpos;
1832 PL_regstartp = prog->startp;
1833 PL_regendp = prog->endp;
1834 PL_reglastparen = &prog->lastparen;
1835 prog->lastparen = 0;
1837 DEBUG_r(PL_reg_starttry = startpos);
1838 if (PL_reg_start_tmpl <= prog->nparens) {
1839 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1840 if(PL_reg_start_tmp)
1841 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1843 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1846 /* XXXX What this code is doing here?!!! There should be no need
1847 to do this again and again, PL_reglastparen should take care of
1850 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
1851 * Actually, the code in regcppop() (which Ilya may be meaning by
1852 * PL_reglastparen), is not needed at all by the test suite
1853 * (op/regexp, op/pat, op/split), but that code is needed, oddly
1854 * enough, for building DynaLoader, or otherwise this
1855 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
1856 * will happen. Meanwhile, this code *is* needed for the
1857 * above-mentioned test suite tests to succeed. The common theme
1858 * on those tests seems to be returning null fields from matches.
1863 if (prog->nparens) {
1864 for (i = prog->nparens; i > *PL_reglastparen; i--) {
1871 if (regmatch(prog->program + 1)) {
1872 prog->endp[0] = PL_reginput - PL_bostr;
1875 REGCP_UNWIND(lastcp);
1879 #define RE_UNWIND_BRANCH 1
1880 #define RE_UNWIND_BRANCHJ 2
1884 typedef struct { /* XX: makes sense to enlarge it... */
1888 } re_unwind_generic_t;
1901 } re_unwind_branch_t;
1903 typedef union re_unwind_t {
1905 re_unwind_generic_t generic;
1906 re_unwind_branch_t branch;
1910 - regmatch - main matching routine
1912 * Conceptually the strategy is simple: check to see whether the current
1913 * node matches, call self recursively to see whether the rest matches,
1914 * and then act accordingly. In practice we make some effort to avoid
1915 * recursion, in particular by going through "ordinary" nodes (that don't
1916 * need to know whether the rest of the match failed) by a loop instead of
1919 /* [lwall] I've hoisted the register declarations to the outer block in order to
1920 * maybe save a little bit of pushing and popping on the stack. It also takes
1921 * advantage of machines that use a register save mask on subroutine entry.
1923 STATIC I32 /* 0 failure, 1 success */
1924 S_regmatch(pTHX_ regnode *prog)
1926 register regnode *scan; /* Current node. */
1927 regnode *next; /* Next node. */
1928 regnode *inner; /* Next node in internal branch. */
1929 register I32 nextchr; /* renamed nextchr - nextchar colides with
1930 function of same name */
1931 register I32 n; /* no or next */
1932 register I32 ln; /* len or last */
1933 register char *s; /* operand or save */
1934 register char *locinput = PL_reginput;
1935 register I32 c1, c2, paren; /* case fold search, parenth */
1936 int minmod = 0, sw = 0, logical = 0;
1938 I32 firstcp = PL_savestack_ix;
1939 register bool do_utf8 = DO_UTF8(PL_reg_sv);
1945 /* Note that nextchr is a byte even in UTF */
1946 nextchr = UCHARAT(locinput);
1948 while (scan != NULL) {
1949 #define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
1951 # define sayYES goto yes
1952 # define sayNO goto no
1953 # define sayYES_FINAL goto yes_final
1954 # define sayYES_LOUD goto yes_loud
1955 # define sayNO_FINAL goto no_final
1956 # define sayNO_SILENT goto do_no
1957 # define saySAME(x) if (x) goto yes; else goto no
1958 # define REPORT_CODE_OFF 24
1960 # define sayYES return 1
1961 # define sayNO return 0
1962 # define sayYES_FINAL return 1
1963 # define sayYES_LOUD return 1
1964 # define sayNO_FINAL return 0
1965 # define sayNO_SILENT return 0
1966 # define saySAME(x) return x
1969 SV *prop = sv_newmortal();
1970 int docolor = *PL_colors[0];
1971 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1972 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
1973 /* The part of the string before starttry has one color
1974 (pref0_len chars), between starttry and current
1975 position another one (pref_len - pref0_len chars),
1976 after the current position the third one.
1977 We assume that pref0_len <= pref_len, otherwise we
1978 decrease pref0_len. */
1979 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
1980 ? (5 + taill) - l : locinput - PL_bostr;
1983 while (UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
1985 pref0_len = pref_len - (locinput - PL_reg_starttry);
1986 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
1987 l = ( PL_regeol - locinput > (5 + taill) - pref_len
1988 ? (5 + taill) - pref_len : PL_regeol - locinput);
1989 while (UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
1993 if (pref0_len > pref_len)
1994 pref0_len = pref_len;
1995 regprop(prop, scan);
1996 PerlIO_printf(Perl_debug_log,
1997 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
1998 (IV)(locinput - PL_bostr),
1999 PL_colors[4], pref0_len,
2000 locinput - pref_len, PL_colors[5],
2001 PL_colors[2], pref_len - pref0_len,
2002 locinput - pref_len + pref0_len, PL_colors[3],
2003 (docolor ? "" : "> <"),
2004 PL_colors[0], l, locinput, PL_colors[1],
2005 15 - l - pref_len + 1,
2007 (IV)(scan - PL_regprogram), PL_regindent*2, "",
2011 next = scan + NEXT_OFF(scan);
2017 if (locinput == PL_bostr
2018 ? PL_regprev == '\n'
2020 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
2022 /* regtill = regbol; */
2027 if (locinput == PL_bostr
2028 ? PL_regprev == '\n'
2029 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
2035 if (locinput == PL_bostr)
2039 if (locinput == PL_reg_ganch)
2049 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2054 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2056 if (PL_regeol - locinput > 1)
2060 if (PL_regeol != locinput)
2065 locinput += PL_utf8skip[nextchr];
2066 if (locinput > PL_regeol)
2068 nextchr = UCHARAT(locinput);
2071 if (!nextchr && locinput >= PL_regeol)
2073 nextchr = UCHARAT(++locinput);
2076 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2079 locinput += PL_utf8skip[nextchr];
2080 if (locinput > PL_regeol)
2082 nextchr = UCHARAT(locinput);
2085 nextchr = UCHARAT(++locinput);
2090 if (do_utf8 != (UTF!=0)) {
2098 if (*((U8*)s) != utf8_to_uv_simple((U8*)l, &len))
2107 if (*((U8*)l) != utf8_to_uv_simple((U8*)s, &len))
2113 nextchr = UCHARAT(locinput);
2116 /* Inline the first character, for speed. */
2117 if (UCHARAT(s) != nextchr)
2119 if (PL_regeol - locinput < ln)
2121 if (ln > 1 && memNE(s, locinput, ln))
2124 nextchr = UCHARAT(locinput);
2127 PL_reg_flags |= RF_tainted;
2137 c1 = OP(scan) == EXACTF;
2139 if (l >= PL_regeol) {
2142 if ((UTF ? utf8_to_uv((U8*)s, e - s, 0, 0) : *((U8*)s)) !=
2143 (c1 ? toLOWER_utf8((U8*)l) : toLOWER_LC_utf8((U8*)l)))
2145 s += UTF ? UTF8SKIP(s) : 1;
2149 nextchr = UCHARAT(locinput);
2153 /* Inline the first character, for speed. */
2154 if (UCHARAT(s) != nextchr &&
2155 UCHARAT(s) != ((OP(scan) == EXACTF)
2156 ? PL_fold : PL_fold_locale)[nextchr])
2158 if (PL_regeol - locinput < ln)
2160 if (ln > 1 && (OP(scan) == EXACTF
2161 ? ibcmp(s, locinput, ln)
2162 : ibcmp_locale(s, locinput, ln)))
2165 nextchr = UCHARAT(locinput);
2169 if (!reginclass(scan, (U8*)locinput, do_utf8))
2171 if (locinput >= PL_regeol)
2173 locinput += PL_utf8skip[nextchr];
2174 nextchr = UCHARAT(locinput);
2178 nextchr = UCHARAT(locinput);
2179 if (!reginclass(scan, (U8*)locinput, do_utf8))
2181 if (!nextchr && locinput >= PL_regeol)
2183 nextchr = UCHARAT(++locinput);
2187 PL_reg_flags |= RF_tainted;
2193 if (!(OP(scan) == ALNUM
2194 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2195 : isALNUM_LC_utf8((U8*)locinput)))
2199 locinput += PL_utf8skip[nextchr];
2200 nextchr = UCHARAT(locinput);
2203 if (!(OP(scan) == ALNUM
2204 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
2206 nextchr = UCHARAT(++locinput);
2209 PL_reg_flags |= RF_tainted;
2212 if (!nextchr && locinput >= PL_regeol)
2215 if (OP(scan) == NALNUM
2216 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2217 : isALNUM_LC_utf8((U8*)locinput))
2221 locinput += PL_utf8skip[nextchr];
2222 nextchr = UCHARAT(locinput);
2225 if (OP(scan) == NALNUM
2226 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
2228 nextchr = UCHARAT(++locinput);
2232 PL_reg_flags |= RF_tainted;
2236 /* was last char in word? */
2238 if (locinput == PL_regbol)
2241 U8 *r = reghop((U8*)locinput, -1);
2243 ln = utf8_to_uv(r, s - (char*)r, 0, 0);
2245 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2246 ln = isALNUM_uni(ln);
2247 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
2250 ln = isALNUM_LC_uni(ln);
2251 n = isALNUM_LC_utf8((U8*)locinput);
2255 ln = (locinput != PL_regbol) ?
2256 UCHARAT(locinput - 1) : PL_regprev;
2257 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2259 n = isALNUM(nextchr);
2262 ln = isALNUM_LC(ln);
2263 n = isALNUM_LC(nextchr);
2266 if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2267 OP(scan) == BOUNDL))
2271 PL_reg_flags |= RF_tainted;
2277 if (UTF8_IS_CONTINUED(nextchr)) {
2278 if (!(OP(scan) == SPACE
2279 ? swash_fetch(PL_utf8_space, (U8*)locinput)
2280 : isSPACE_LC_utf8((U8*)locinput)))
2284 locinput += PL_utf8skip[nextchr];
2285 nextchr = UCHARAT(locinput);
2288 if (!(OP(scan) == SPACE
2289 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2291 nextchr = UCHARAT(++locinput);
2294 if (!(OP(scan) == SPACE
2295 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2297 nextchr = UCHARAT(++locinput);
2301 PL_reg_flags |= RF_tainted;
2304 if (!nextchr && locinput >= PL_regeol)
2307 if (OP(scan) == NSPACE
2308 ? swash_fetch(PL_utf8_space, (U8*)locinput)
2309 : isSPACE_LC_utf8((U8*)locinput))
2313 locinput += PL_utf8skip[nextchr];
2314 nextchr = UCHARAT(locinput);
2317 if (OP(scan) == NSPACE
2318 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
2320 nextchr = UCHARAT(++locinput);
2323 PL_reg_flags |= RF_tainted;
2329 if (!(OP(scan) == DIGIT
2330 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2331 : isDIGIT_LC_utf8((U8*)locinput)))
2335 locinput += PL_utf8skip[nextchr];
2336 nextchr = UCHARAT(locinput);
2339 if (!(OP(scan) == DIGIT
2340 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
2342 nextchr = UCHARAT(++locinput);
2345 PL_reg_flags |= RF_tainted;
2348 if (!nextchr && locinput >= PL_regeol)
2351 if (OP(scan) == NDIGIT
2352 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2353 : isDIGIT_LC_utf8((U8*)locinput))
2357 locinput += PL_utf8skip[nextchr];
2358 nextchr = UCHARAT(locinput);
2361 if (OP(scan) == NDIGIT
2362 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
2364 nextchr = UCHARAT(++locinput);
2367 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
2369 locinput += PL_utf8skip[nextchr];
2370 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
2371 locinput += UTF8SKIP(locinput);
2372 if (locinput > PL_regeol)
2374 nextchr = UCHARAT(locinput);
2377 PL_reg_flags |= RF_tainted;
2381 n = ARG(scan); /* which paren pair */
2382 ln = PL_regstartp[n];
2383 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
2384 if (*PL_reglastparen < n || ln == -1)
2385 sayNO; /* Do not match unless seen CLOSEn. */
2386 if (ln == PL_regendp[n])
2390 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
2392 char *e = PL_bostr + PL_regendp[n];
2394 * Note that we can't do the "other character" lookup trick as
2395 * in the 8-bit case (no pun intended) because in Unicode we
2396 * have to map both upper and title case to lower case.
2398 if (OP(scan) == REFF) {
2402 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
2412 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
2419 nextchr = UCHARAT(locinput);
2423 /* Inline the first character, for speed. */
2424 if (UCHARAT(s) != nextchr &&
2426 (UCHARAT(s) != ((OP(scan) == REFF
2427 ? PL_fold : PL_fold_locale)[nextchr]))))
2429 ln = PL_regendp[n] - ln;
2430 if (locinput + ln > PL_regeol)
2432 if (ln > 1 && (OP(scan) == REF
2433 ? memNE(s, locinput, ln)
2435 ? ibcmp(s, locinput, ln)
2436 : ibcmp_locale(s, locinput, ln))))
2439 nextchr = UCHARAT(locinput);
2450 OP_4tree *oop = PL_op;
2451 COP *ocurcop = PL_curcop;
2452 SV **ocurpad = PL_curpad;
2456 PL_op = (OP_4tree*)PL_regdata->data[n];
2457 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
2458 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
2459 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
2461 CALLRUNOPS(aTHX); /* Scalar context. */
2467 PL_curpad = ocurpad;
2468 PL_curcop = ocurcop;
2470 if (logical == 2) { /* Postponed subexpression. */
2472 MAGIC *mg = Null(MAGIC*);
2474 CHECKPOINT cp, lastcp;
2476 if(SvROK(ret) || SvRMAGICAL(ret)) {
2477 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2480 mg = mg_find(sv, 'r');
2483 re = (regexp *)mg->mg_obj;
2484 (void)ReREFCNT_inc(re);
2488 char *t = SvPV(ret, len);
2490 char *oprecomp = PL_regprecomp;
2491 I32 osize = PL_regsize;
2492 I32 onpar = PL_regnpar;
2495 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
2497 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
2498 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
2499 PL_regprecomp = oprecomp;
2504 PerlIO_printf(Perl_debug_log,
2505 "Entering embedded `%s%.60s%s%s'\n",
2509 (strlen(re->precomp) > 60 ? "..." : ""))
2512 state.prev = PL_reg_call_cc;
2513 state.cc = PL_regcc;
2514 state.re = PL_reg_re;
2518 cp = regcppush(0); /* Save *all* the positions. */
2521 state.ss = PL_savestack_ix;
2522 *PL_reglastparen = 0;
2523 PL_reg_call_cc = &state;
2524 PL_reginput = locinput;
2526 /* XXXX This is too dramatic a measure... */
2529 if (regmatch(re->program + 1)) {
2530 /* Even though we succeeded, we need to restore
2531 global variables, since we may be wrapped inside
2532 SUSPEND, thus the match may be not finished yet. */
2534 /* XXXX Do this only if SUSPENDed? */
2535 PL_reg_call_cc = state.prev;
2536 PL_regcc = state.cc;
2537 PL_reg_re = state.re;
2538 cache_re(PL_reg_re);
2540 /* XXXX This is too dramatic a measure... */
2543 /* These are needed even if not SUSPEND. */
2549 REGCP_UNWIND(lastcp);
2551 PL_reg_call_cc = state.prev;
2552 PL_regcc = state.cc;
2553 PL_reg_re = state.re;
2554 cache_re(PL_reg_re);
2556 /* XXXX This is too dramatic a measure... */
2565 sv_setsv(save_scalar(PL_replgv), ret);
2569 n = ARG(scan); /* which paren pair */
2570 PL_reg_start_tmp[n] = locinput;
2575 n = ARG(scan); /* which paren pair */
2576 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2577 PL_regendp[n] = locinput - PL_bostr;
2578 if (n > *PL_reglastparen)
2579 *PL_reglastparen = n;
2582 n = ARG(scan); /* which paren pair */
2583 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
2586 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
2588 next = NEXTOPER(NEXTOPER(scan));
2590 next = scan + ARG(scan);
2591 if (OP(next) == IFTHEN) /* Fake one. */
2592 next = NEXTOPER(NEXTOPER(next));
2596 logical = scan->flags;
2598 /*******************************************************************
2599 PL_regcc contains infoblock about the innermost (...)* loop, and
2600 a pointer to the next outer infoblock.
2602 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2604 1) After matching X, regnode for CURLYX is processed;
2606 2) This regnode creates infoblock on the stack, and calls
2607 regmatch() recursively with the starting point at WHILEM node;
2609 3) Each hit of WHILEM node tries to match A and Z (in the order
2610 depending on the current iteration, min/max of {min,max} and
2611 greediness). The information about where are nodes for "A"
2612 and "Z" is read from the infoblock, as is info on how many times "A"
2613 was already matched, and greediness.
2615 4) After A matches, the same WHILEM node is hit again.
2617 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2618 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2619 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2620 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2621 of the external loop.
2623 Currently present infoblocks form a tree with a stem formed by PL_curcc
2624 and whatever it mentions via ->next, and additional attached trees
2625 corresponding to temporarily unset infoblocks as in "5" above.
2627 In the following picture infoblocks for outer loop of
2628 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2629 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2630 infoblocks are drawn below the "reset" infoblock.
2632 In fact in the picture below we do not show failed matches for Z and T
2633 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2634 more obvious *why* one needs to *temporary* unset infoblocks.]
2636 Matched REx position InfoBlocks Comment
2640 Y A)*?Z)*?T x <- O <- I
2641 YA )*?Z)*?T x <- O <- I
2642 YA A)*?Z)*?T x <- O <- I
2643 YAA )*?Z)*?T x <- O <- I
2644 YAA Z)*?T x <- O # Temporary unset I
2647 YAAZ Y(A)*?Z)*?T x <- O
2650 YAAZY (A)*?Z)*?T x <- O
2653 YAAZY A)*?Z)*?T x <- O <- I
2656 YAAZYA )*?Z)*?T x <- O <- I
2659 YAAZYA Z)*?T x <- O # Temporary unset I
2665 YAAZYAZ T x # Temporary unset O
2672 *******************************************************************/
2675 CHECKPOINT cp = PL_savestack_ix;
2676 /* No need to save/restore up to this paren */
2677 I32 parenfloor = scan->flags;
2679 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2681 cc.oldcc = PL_regcc;
2683 /* XXXX Probably it is better to teach regpush to support
2684 parenfloor > PL_regsize... */
2685 if (parenfloor > *PL_reglastparen)
2686 parenfloor = *PL_reglastparen; /* Pessimization... */
2687 cc.parenfloor = parenfloor;
2689 cc.min = ARG1(scan);
2690 cc.max = ARG2(scan);
2691 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
2695 PL_reginput = locinput;
2696 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2698 PL_regcc = cc.oldcc;
2704 * This is really hard to understand, because after we match
2705 * what we're trying to match, we must make sure the rest of
2706 * the REx is going to match for sure, and to do that we have
2707 * to go back UP the parse tree by recursing ever deeper. And
2708 * if it fails, we have to reset our parent's current state
2709 * that we can try again after backing off.
2712 CHECKPOINT cp, lastcp;
2713 CURCUR* cc = PL_regcc;
2714 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2716 n = cc->cur + 1; /* how many we know we matched */
2717 PL_reginput = locinput;
2720 PerlIO_printf(Perl_debug_log,
2721 "%*s %ld out of %ld..%ld cc=%lx\n",
2722 REPORT_CODE_OFF+PL_regindent*2, "",
2723 (long)n, (long)cc->min,
2724 (long)cc->max, (long)cc)
2727 /* If degenerate scan matches "", assume scan done. */
2729 if (locinput == cc->lastloc && n >= cc->min) {
2730 PL_regcc = cc->oldcc;
2734 PerlIO_printf(Perl_debug_log,
2735 "%*s empty match detected, try continuation...\n",
2736 REPORT_CODE_OFF+PL_regindent*2, "")
2738 if (regmatch(cc->next))
2746 /* First just match a string of min scans. */
2750 cc->lastloc = locinput;
2751 if (regmatch(cc->scan))
2754 cc->lastloc = lastloc;
2759 /* Check whether we already were at this position.
2760 Postpone detection until we know the match is not
2761 *that* much linear. */
2762 if (!PL_reg_maxiter) {
2763 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2764 PL_reg_leftiter = PL_reg_maxiter;
2766 if (PL_reg_leftiter-- == 0) {
2767 I32 size = (PL_reg_maxiter + 7)/8;
2768 if (PL_reg_poscache) {
2769 if (PL_reg_poscache_size < size) {
2770 Renew(PL_reg_poscache, size, char);
2771 PL_reg_poscache_size = size;
2773 Zero(PL_reg_poscache, size, char);
2776 PL_reg_poscache_size = size;
2777 Newz(29, PL_reg_poscache, size, char);
2780 PerlIO_printf(Perl_debug_log,
2781 "%sDetected a super-linear match, switching on caching%s...\n",
2782 PL_colors[4], PL_colors[5])
2785 if (PL_reg_leftiter < 0) {
2786 I32 o = locinput - PL_bostr, b;
2788 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2791 if (PL_reg_poscache[o] & (1<<b)) {
2793 PerlIO_printf(Perl_debug_log,
2794 "%*s already tried at this position...\n",
2795 REPORT_CODE_OFF+PL_regindent*2, "")
2799 PL_reg_poscache[o] |= (1<<b);
2803 /* Prefer next over scan for minimal matching. */
2806 PL_regcc = cc->oldcc;
2809 cp = regcppush(cc->parenfloor);
2811 if (regmatch(cc->next)) {
2813 sayYES; /* All done. */
2815 REGCP_UNWIND(lastcp);
2821 if (n >= cc->max) { /* Maximum greed exceeded? */
2822 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
2823 && !(PL_reg_flags & RF_warned)) {
2824 PL_reg_flags |= RF_warned;
2825 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2826 "Complex regular subexpression recursion",
2833 PerlIO_printf(Perl_debug_log,
2834 "%*s trying longer...\n",
2835 REPORT_CODE_OFF+PL_regindent*2, "")
2837 /* Try scanning more and see if it helps. */
2838 PL_reginput = locinput;
2840 cc->lastloc = locinput;
2841 cp = regcppush(cc->parenfloor);
2843 if (regmatch(cc->scan)) {
2847 REGCP_UNWIND(lastcp);
2850 cc->lastloc = lastloc;
2854 /* Prefer scan over next for maximal matching. */
2856 if (n < cc->max) { /* More greed allowed? */
2857 cp = regcppush(cc->parenfloor);
2859 cc->lastloc = locinput;
2861 if (regmatch(cc->scan)) {
2865 REGCP_UNWIND(lastcp);
2866 regcppop(); /* Restore some previous $<digit>s? */
2867 PL_reginput = locinput;
2869 PerlIO_printf(Perl_debug_log,
2870 "%*s failed, try continuation...\n",
2871 REPORT_CODE_OFF+PL_regindent*2, "")
2874 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
2875 && !(PL_reg_flags & RF_warned)) {
2876 PL_reg_flags |= RF_warned;
2877 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2878 "Complex regular subexpression recursion",
2882 /* Failed deeper matches of scan, so see if this one works. */
2883 PL_regcc = cc->oldcc;
2886 if (regmatch(cc->next))
2892 cc->lastloc = lastloc;
2897 next = scan + ARG(scan);
2900 inner = NEXTOPER(NEXTOPER(scan));
2903 inner = NEXTOPER(scan);
2908 if (OP(next) != c1) /* No choice. */
2909 next = inner; /* Avoid recursion. */
2911 I32 lastparen = *PL_reglastparen;
2913 re_unwind_branch_t *uw;
2915 /* Put unwinding data on stack */
2916 unwind1 = SSNEWt(1,re_unwind_branch_t);
2917 uw = SSPTRt(unwind1,re_unwind_branch_t);
2920 uw->type = ((c1 == BRANCH)
2922 : RE_UNWIND_BRANCHJ);
2923 uw->lastparen = lastparen;
2925 uw->locinput = locinput;
2926 uw->nextchr = nextchr;
2928 uw->regindent = ++PL_regindent;
2931 REGCP_SET(uw->lastcp);
2933 /* Now go into the first branch */
2946 /* We suppose that the next guy does not need
2947 backtracking: in particular, it is of constant length,
2948 and has no parenths to influence future backrefs. */
2949 ln = ARG1(scan); /* min to match */
2950 n = ARG2(scan); /* max to match */
2951 paren = scan->flags;
2953 if (paren > PL_regsize)
2955 if (paren > *PL_reglastparen)
2956 *PL_reglastparen = paren;
2958 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
2960 scan += NEXT_OFF(scan); /* Skip former OPEN. */
2961 PL_reginput = locinput;
2964 if (ln && regrepeat_hard(scan, ln, &l) < ln)
2966 if (ln && l == 0 && n >= ln
2967 /* In fact, this is tricky. If paren, then the
2968 fact that we did/didnot match may influence
2969 future execution. */
2970 && !(paren && ln == 0))
2972 locinput = PL_reginput;
2973 if (PL_regkind[(U8)OP(next)] == EXACT) {
2974 c1 = (U8)*STRING(next);
2975 if (OP(next) == EXACTF)
2977 else if (OP(next) == EXACTFL)
2978 c2 = PL_fold_locale[c1];
2985 /* This may be improved if l == 0. */
2986 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
2987 /* If it could work, try it. */
2989 UCHARAT(PL_reginput) == c1 ||
2990 UCHARAT(PL_reginput) == c2)
2994 PL_regstartp[paren] =
2995 HOPc(PL_reginput, -l) - PL_bostr;
2996 PL_regendp[paren] = PL_reginput - PL_bostr;
2999 PL_regendp[paren] = -1;
3003 REGCP_UNWIND(lastcp);
3005 /* Couldn't or didn't -- move forward. */
3006 PL_reginput = locinput;
3007 if (regrepeat_hard(scan, 1, &l)) {
3009 locinput = PL_reginput;
3016 n = regrepeat_hard(scan, n, &l);
3017 if (n != 0 && l == 0
3018 /* In fact, this is tricky. If paren, then the
3019 fact that we did/didnot match may influence
3020 future execution. */
3021 && !(paren && ln == 0))
3023 locinput = PL_reginput;
3025 PerlIO_printf(Perl_debug_log,
3026 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
3027 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
3031 if (PL_regkind[(U8)OP(next)] == EXACT) {
3032 c1 = (U8)*STRING(next);
3033 if (OP(next) == EXACTF)
3035 else if (OP(next) == EXACTFL)
3036 c2 = PL_fold_locale[c1];
3045 /* If it could work, try it. */
3047 UCHARAT(PL_reginput) == c1 ||
3048 UCHARAT(PL_reginput) == c2)
3051 PerlIO_printf(Perl_debug_log,
3052 "%*s trying tail with n=%"IVdf"...\n",
3053 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
3057 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3058 PL_regendp[paren] = PL_reginput - PL_bostr;
3061 PL_regendp[paren] = -1;
3065 REGCP_UNWIND(lastcp);
3067 /* Couldn't or didn't -- back up. */
3069 locinput = HOPc(locinput, -l);
3070 PL_reginput = locinput;
3077 paren = scan->flags; /* Which paren to set */
3078 if (paren > PL_regsize)
3080 if (paren > *PL_reglastparen)
3081 *PL_reglastparen = paren;
3082 ln = ARG1(scan); /* min to match */
3083 n = ARG2(scan); /* max to match */
3084 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
3088 ln = ARG1(scan); /* min to match */
3089 n = ARG2(scan); /* max to match */
3090 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
3095 scan = NEXTOPER(scan);
3101 scan = NEXTOPER(scan);
3105 * Lookahead to avoid useless match attempts
3106 * when we know what character comes next.
3108 if (PL_regkind[(U8)OP(next)] == EXACT) {
3109 U8 *s = (U8*)STRING(next);
3112 if (OP(next) == EXACTF)
3114 else if (OP(next) == EXACTFL)
3115 c2 = PL_fold_locale[c1];
3118 if (OP(next) == EXACTF) {
3119 c1 = to_utf8_lower(s);
3120 c2 = to_utf8_upper(s);
3123 c2 = c1 = utf8_to_uv_simple(s, NULL);
3129 PL_reginput = locinput;
3133 if (ln && regrepeat(scan, ln) < ln)
3135 locinput = PL_reginput;
3138 char *e; /* Should not check after this */
3139 char *old = locinput;
3141 if (n == REG_INFTY) {
3144 while (UTF8_IS_CONTINUATION(*(U8*)e))
3150 m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3154 e = locinput + n - ln;
3160 /* Find place 'next' could work */
3163 while (locinput <= e && *locinput != c1)
3166 while (locinput <= e
3171 count = locinput - old;
3178 utf8_to_uv_simple((U8*)locinput, &len) != c1;
3183 for (count = 0; locinput <= e; count++) {
3184 UV c = utf8_to_uv_simple((U8*)locinput, &len);
3185 if (c == c1 || c == c2)
3193 /* PL_reginput == old now */
3194 if (locinput != old) {
3195 ln = 1; /* Did some */
3196 if (regrepeat(scan, count) < count)
3199 /* PL_reginput == locinput now */
3200 TRYPAREN(paren, ln, locinput);
3201 PL_reginput = locinput; /* Could be reset... */
3202 REGCP_UNWIND(lastcp);
3203 /* Couldn't or didn't -- move forward. */
3206 locinput += UTF8SKIP(locinput);
3212 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
3216 c = utf8_to_uv_simple((U8*)PL_reginput, NULL);
3218 c = UCHARAT(PL_reginput);
3220 /* If it could work, try it. */
3221 if (c1 == -1000 || c == c1 || c == c2)
3223 TRYPAREN(paren, n, PL_reginput);
3224 REGCP_UNWIND(lastcp);
3226 /* Couldn't or didn't -- move forward. */
3227 PL_reginput = locinput;
3228 if (regrepeat(scan, 1)) {
3230 locinput = PL_reginput;
3238 n = regrepeat(scan, n);
3239 locinput = PL_reginput;
3240 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
3241 (!PL_multiline || OP(next) == SEOL || OP(next) == EOS)) {
3242 ln = n; /* why back off? */
3243 /* ...because $ and \Z can match before *and* after
3244 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3245 We should back off by one in this case. */
3246 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3255 c = utf8_to_uv_simple((U8*)PL_reginput, NULL);
3257 c = UCHARAT(PL_reginput);
3259 /* If it could work, try it. */
3260 if (c1 == -1000 || c == c1 || c == c2)
3262 TRYPAREN(paren, n, PL_reginput);
3263 REGCP_UNWIND(lastcp);
3265 /* Couldn't or didn't -- back up. */
3267 PL_reginput = locinput = HOPc(locinput, -1);
3275 c = utf8_to_uv_simple((U8*)PL_reginput, NULL);
3277 c = UCHARAT(PL_reginput);
3279 /* If it could work, try it. */
3280 if (c1 == -1000 || c == c1 || c == c2)
3282 TRYPAREN(paren, n, PL_reginput);
3283 REGCP_UNWIND(lastcp);
3285 /* Couldn't or didn't -- back up. */
3287 PL_reginput = locinput = HOPc(locinput, -1);
3294 if (PL_reg_call_cc) {
3295 re_cc_state *cur_call_cc = PL_reg_call_cc;
3296 CURCUR *cctmp = PL_regcc;
3297 regexp *re = PL_reg_re;
3298 CHECKPOINT cp, lastcp;
3300 cp = regcppush(0); /* Save *all* the positions. */
3302 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3304 PL_reginput = locinput; /* Make position available to
3306 cache_re(PL_reg_call_cc->re);
3307 PL_regcc = PL_reg_call_cc->cc;
3308 PL_reg_call_cc = PL_reg_call_cc->prev;
3309 if (regmatch(cur_call_cc->node)) {
3310 PL_reg_call_cc = cur_call_cc;
3314 REGCP_UNWIND(lastcp);
3316 PL_reg_call_cc = cur_call_cc;
3322 PerlIO_printf(Perl_debug_log,
3323 "%*s continuation failed...\n",
3324 REPORT_CODE_OFF+PL_regindent*2, "")
3328 if (locinput < PL_regtill) {
3329 DEBUG_r(PerlIO_printf(Perl_debug_log,
3330 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3332 (long)(locinput - PL_reg_starttry),
3333 (long)(PL_regtill - PL_reg_starttry),
3335 sayNO_FINAL; /* Cannot match: too short. */
3337 PL_reginput = locinput; /* put where regtry can find it */
3338 sayYES_FINAL; /* Success! */
3340 PL_reginput = locinput; /* put where regtry can find it */
3341 sayYES_LOUD; /* Success! */
3344 PL_reginput = locinput;
3349 if (UTF) { /* XXXX This is absolutely
3350 broken, we read before
3352 s = HOPMAYBEc(locinput, -scan->flags);
3358 if (locinput < PL_bostr + scan->flags)
3360 PL_reginput = locinput - scan->flags;
3365 PL_reginput = locinput;
3370 if (UTF) { /* XXXX This is absolutely
3371 broken, we read before
3373 s = HOPMAYBEc(locinput, -scan->flags);
3374 if (!s || s < PL_bostr)
3379 if (locinput < PL_bostr + scan->flags)
3381 PL_reginput = locinput - scan->flags;
3386 PL_reginput = locinput;
3389 inner = NEXTOPER(NEXTOPER(scan));
3390 if (regmatch(inner) != n) {
3405 if (OP(scan) == SUSPEND) {
3406 locinput = PL_reginput;
3407 nextchr = UCHARAT(locinput);
3412 next = scan + ARG(scan);
3417 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
3418 PTR2UV(scan), OP(scan));
3419 Perl_croak(aTHX_ "regexp memory corruption");
3426 * We get here only if there's trouble -- normally "case END" is
3427 * the terminating point.
3429 Perl_croak(aTHX_ "corrupted regexp pointers");
3435 PerlIO_printf(Perl_debug_log,
3436 "%*s %scould match...%s\n",
3437 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3441 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3442 PL_colors[4],PL_colors[5]));
3448 #if 0 /* Breaks $^R */
3456 PerlIO_printf(Perl_debug_log,
3457 "%*s %sfailed...%s\n",
3458 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3464 re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
3467 case RE_UNWIND_BRANCH:
3468 case RE_UNWIND_BRANCHJ:
3470 re_unwind_branch_t *uwb = &(uw->branch);
3471 I32 lastparen = uwb->lastparen;
3473 REGCP_UNWIND(uwb->lastcp);
3474 for (n = *PL_reglastparen; n > lastparen; n--)
3476 *PL_reglastparen = n;
3477 scan = next = uwb->next;
3479 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
3480 ? BRANCH : BRANCHJ) ) { /* Failure */
3487 /* Have more choice yet. Reuse the same uwb. */
3489 if ((n = (uwb->type == RE_UNWIND_BRANCH
3490 ? NEXT_OFF(next) : ARG(next))))
3493 next = NULL; /* XXXX Needn't unwinding in this case... */
3495 next = NEXTOPER(scan);
3496 if (uwb->type == RE_UNWIND_BRANCHJ)
3497 next = NEXTOPER(next);
3498 locinput = uwb->locinput;
3499 nextchr = uwb->nextchr;
3501 PL_regindent = uwb->regindent;
3508 Perl_croak(aTHX_ "regexp unwind memory corruption");
3519 - regrepeat - repeatedly match something simple, report how many
3522 * [This routine now assumes that it will only match on things of length 1.
3523 * That was true before, but now we assume scan - reginput is the count,
3524 * rather than incrementing count on every character. [Er, except utf8.]]
3527 S_regrepeat(pTHX_ regnode *p, I32 max)
3529 register char *scan;
3531 register char *loceol = PL_regeol;
3532 register I32 hardcount = 0;
3533 register bool do_utf8 = DO_UTF8(PL_reg_sv);
3536 if (max != REG_INFTY && max < loceol - scan)
3537 loceol = scan + max;
3542 while (scan < loceol && hardcount < max && *scan != '\n') {
3543 scan += UTF8SKIP(scan);
3547 while (scan < loceol && *scan != '\n')
3554 while (hardcount < max && scan < loceol) {
3555 scan += UTF8SKIP(scan);
3562 case EXACT: /* length of string is 1 */
3564 while (scan < loceol && UCHARAT(scan) == c)
3567 case EXACTF: /* length of string is 1 */
3569 while (scan < loceol &&
3570 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
3573 case EXACTFL: /* length of string is 1 */
3574 PL_reg_flags |= RF_tainted;
3576 while (scan < loceol &&
3577 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
3583 while (hardcount < max && scan < loceol &&
3584 reginclass(p, (U8*)scan, do_utf8)) {
3585 scan += UTF8SKIP(scan);
3589 while (scan < loceol && reginclass(p, (U8*)scan, do_utf8))
3596 while (hardcount < max && scan < loceol &&
3597 swash_fetch(PL_utf8_alnum, (U8*)scan)) {
3598 scan += UTF8SKIP(scan);
3602 while (scan < loceol && isALNUM(*scan))
3607 PL_reg_flags |= RF_tainted;
3610 while (hardcount < max && scan < loceol &&
3611 isALNUM_LC_utf8((U8*)scan)) {
3612 scan += UTF8SKIP(scan);
3616 while (scan < loceol && isALNUM_LC(*scan))
3623 while (hardcount < max && scan < loceol &&
3624 !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
3625 scan += UTF8SKIP(scan);
3629 while (scan < loceol && !isALNUM(*scan))
3634 PL_reg_flags |= RF_tainted;
3637 while (hardcount < max && scan < loceol &&
3638 !isALNUM_LC_utf8((U8*)scan)) {
3639 scan += UTF8SKIP(scan);
3643 while (scan < loceol && !isALNUM_LC(*scan))
3650 while (hardcount < max && scan < loceol &&
3651 (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
3652 scan += UTF8SKIP(scan);
3656 while (scan < loceol && isSPACE(*scan))
3661 PL_reg_flags |= RF_tainted;
3664 while (hardcount < max && scan < loceol &&
3665 (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3666 scan += UTF8SKIP(scan);
3670 while (scan < loceol && isSPACE_LC(*scan))
3677 while (hardcount < max && scan < loceol &&
3678 !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
3679 scan += UTF8SKIP(scan);
3683 while (scan < loceol && !isSPACE(*scan))
3688 PL_reg_flags |= RF_tainted;
3691 while (hardcount < max && scan < loceol &&
3692 !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3693 scan += UTF8SKIP(scan);
3697 while (scan < loceol && !isSPACE_LC(*scan))
3704 while (hardcount < max && scan < loceol &&
3705 swash_fetch(PL_utf8_digit,(U8*)scan)) {
3706 scan += UTF8SKIP(scan);
3710 while (scan < loceol && isDIGIT(*scan))
3717 while (hardcount < max && scan < loceol &&
3718 !swash_fetch(PL_utf8_digit,(U8*)scan)) {
3719 scan += UTF8SKIP(scan);
3723 while (scan < loceol && !isDIGIT(*scan))
3727 default: /* Called on something of 0 width. */
3728 break; /* So match right here or not at all. */
3734 c = scan - PL_reginput;
3739 SV *prop = sv_newmortal();
3742 PerlIO_printf(Perl_debug_log,
3743 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
3744 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
3751 - regrepeat_hard - repeatedly match something, report total lenth and length
3753 * The repeater is supposed to have constant length.
3757 S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
3759 register char *scan;
3760 register char *start;
3761 register char *loceol = PL_regeol;
3763 I32 count = 0, res = 1;
3768 start = PL_reginput;
3769 if (DO_UTF8(PL_reg_sv)) {
3770 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
3773 while (start < PL_reginput) {
3775 start += UTF8SKIP(start);
3786 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
3788 *lp = l = PL_reginput - start;
3789 if (max != REG_INFTY && l*max < loceol - scan)
3790 loceol = scan + l*max;
3803 - regclass_swash - prepare the utf8 swash
3807 Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** initsvp)
3812 if (PL_regdata && PL_regdata->count) {
3815 if (PL_regdata->what[n] == 's') {
3816 SV *rv = (SV*)PL_regdata->data[n];
3817 AV *av = (AV*)SvRV((SV*)rv);
3820 si = *av_fetch(av, 0, FALSE);
3821 a = av_fetch(av, 1, FALSE);
3825 else if (si && doinit) {
3826 sw = swash_init("utf8", "", si, 1, 0);
3827 (void)av_store(av, 1, sw);
3839 - reginclass - determine if a character falls into a character class
3843 S_reginclass(pTHX_ register regnode *n, register U8* p, register bool do_utf8)
3845 char flags = ANYOF_FLAGS(n);
3851 c = utf8_to_uv_simple(p, &len);
3855 if (do_utf8 || (flags & ANYOF_UNICODE)) {
3856 if (do_utf8 && !ANYOF_RUNTIME(n)) {
3857 if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
3860 if (do_utf8 && flags & ANYOF_UNICODE_ALL && c >= 256)
3863 SV *sw = regclass_swash(n, TRUE, 0);
3866 if (swash_fetch(sw, p))
3868 else if (flags & ANYOF_FOLD) {
3869 U8 tmpbuf[UTF8_MAXLEN+1];
3871 if (flags & ANYOF_LOCALE) {
3872 PL_reg_flags |= RF_tainted;
3873 uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
3876 uv_to_utf8(tmpbuf, toLOWER_utf8(p));
3877 if (swash_fetch(sw, tmpbuf))
3883 if (!match && c < 256) {
3884 if (ANYOF_BITMAP_TEST(n, c))
3886 else if (flags & ANYOF_FOLD) {
3889 if (flags & ANYOF_LOCALE) {
3890 PL_reg_flags |= RF_tainted;
3891 f = PL_fold_locale[c];
3895 if (f != c && ANYOF_BITMAP_TEST(n, f))
3899 if (!match && (flags & ANYOF_CLASS)) {
3900 PL_reg_flags |= RF_tainted;
3902 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
3903 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
3904 (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
3905 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
3906 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
3907 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
3908 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
3909 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
3910 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
3911 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
3912 (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
3913 (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
3914 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
3915 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
3916 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
3917 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
3918 (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
3919 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
3920 (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
3921 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
3922 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
3923 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
3924 (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
3925 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
3926 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
3927 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
3928 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
3929 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
3930 (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
3931 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
3932 ) /* How's that for a conditional? */
3939 return (flags & ANYOF_INVERT) ? !match : match;
3943 S_reghop(pTHX_ U8 *s, I32 off)
3945 return S_reghop3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
3949 S_reghop3(pTHX_ U8 *s, I32 off, U8* lim)
3952 while (off-- && s < lim) {
3953 /* XXX could check well-formedness here */
3961 if (UTF8_IS_CONTINUED(*s)) {
3962 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
3965 /* XXX could check well-formedness here */
3973 S_reghopmaybe(pTHX_ U8 *s, I32 off)
3975 return S_reghopmaybe3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
3979 S_reghopmaybe3(pTHX_ U8* s, I32 off, U8* lim)
3982 while (off-- && s < lim) {
3983 /* XXX could check well-formedness here */
3993 if (UTF8_IS_CONTINUED(*s)) {
3994 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
3997 /* XXX could check well-formedness here */
4013 restore_pos(pTHXo_ void *arg)
4015 if (PL_reg_eval_set) {
4016 if (PL_reg_oldsaved) {
4017 PL_reg_re->subbeg = PL_reg_oldsaved;
4018 PL_reg_re->sublen = PL_reg_oldsavedlen;
4019 RX_MATCH_COPIED_on(PL_reg_re);
4021 PL_reg_magic->mg_len = PL_reg_oldpos;
4022 PL_reg_eval_set = 0;
4023 PL_curpm = PL_reg_oldcurpm;