INSTALL tweak
[p5sagit/p5-mst-13.2.git] / regexec.c
CommitLineData
a0d0e21e 1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c 8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73 17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e 22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e 29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603 35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c 39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
d88dccdf 41# define Perl_reginitcolors my_reginitcolors
c5be433b 42
43# define PERL_NO_GET_CONTEXT
56953603 44#endif
45
f0fcb552 46/*SUPPRESS 112*/
a687059c 47/*
e50aee73 48 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c 49 *
50 * Copyright (c) 1986 by University of Toronto.
51 * Written by Henry Spencer. Not derived from licensed software.
52 *
53 * Permission is granted to anyone to use this software for any
54 * purpose on any computer system, and to redistribute it freely,
55 * subject to the following restrictions:
56 *
57 * 1. The author is not responsible for the consequences of use of
58 * this software, no matter how awful, even if they arise
59 * from defects in it.
60 *
61 * 2. The origin of this software must not be misrepresented, either
62 * by explicit claim or by omission.
63 *
64 * 3. Altered versions must be plainly marked as such, and must not
65 * be misrepresented as being the original software.
66 *
67 **** Alterations to Henry's code are...
68 ****
4eb8286e 69 **** Copyright (c) 1991-1999, Larry Wall
a687059c 70 ****
9ef589d8 71 **** You may distribute under the terms of either the GNU General Public
72 **** License or the Artistic License, as specified in the README file.
a687059c 73 *
74 * Beware that some of this code is subtly aware of the way operator
75 * precedence is structured in regular expressions. Serious changes in
76 * regular-expression syntax might require a total rethink.
77 */
78#include "EXTERN.h"
864dbfa3 79#define PERL_IN_REGEXEC_C
a687059c 80#include "perl.h"
0f5d15d6 81
c5be433b 82#ifdef PERL_IN_XSUB_RE
83# if defined(PERL_CAPI) || defined(PERL_OBJECT)
84# include "XSUB.h"
85# endif
86#endif
87
a687059c 88#include "regcomp.h"
89
c277df42 90#define RF_tainted 1 /* tainted information used? */
91#define RF_warned 2 /* warned about big count? */
ce862d02 92#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3 93#define RF_utf8 8 /* String contains multibyte chars? */
94
95#define UTF (PL_reg_flags & RF_utf8)
ce862d02 96
97#define RS_init 1 /* eval environment created */
98#define RS_set 2 /* replsv value is set */
c277df42 99
a687059c 100#ifndef STATIC
101#define STATIC static
102#endif
103
c277df42 104/*
105 * Forwards.
106 */
107
b8c5462f 108#define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c) : ANYOF_BITMAP_TEST(p,c))
a0ed51b3 109#define REGINCLASSUTF8(f,p) (ARG1(f) ? reginclassutf8(f,p) : swash_fetch((SV*)PL_regdata->data[ARG2(f)],p))
110
111#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
112#define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
113
dfe13c55 114#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
115#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
116#define HOP(pos,off) (UTF ? reghop((U8*)pos, off) : (U8*)(pos + off))
117#define HOPMAYBE(pos,off) (UTF ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
118#define HOPc(pos,off) ((char*)HOP(pos,off))
119#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 120
51371543 121static void restore_pos(pTHXo_ void *arg);
122
123
76e3520e 124STATIC CHECKPOINT
cea2e8a9 125S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 126{
11343788 127 dTHR;
3280af22 128 int retval = PL_savestack_ix;
129 int i = (PL_regsize - parenfloor) * 4;
a0d0e21e 130 int p;
131
132 SSCHECK(i + 5);
3280af22 133 for (p = PL_regsize; p > parenfloor; p--) {
cf93c79d 134 SSPUSHINT(PL_regendp[p]);
135 SSPUSHINT(PL_regstartp[p]);
3280af22 136 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e 137 SSPUSHINT(p);
138 }
3280af22 139 SSPUSHINT(PL_regsize);
140 SSPUSHINT(*PL_reglastparen);
141 SSPUSHPTR(PL_reginput);
a0d0e21e 142 SSPUSHINT(i + 3);
143 SSPUSHINT(SAVEt_REGCONTEXT);
144 return retval;
145}
146
c277df42 147/* These are needed since we do not localize EVAL nodes: */
c3464db5 148# define REGCP_SET DEBUG_r(PerlIO_printf(Perl_debug_log, \
149 " Setting an EVAL scope, savestack=%i\n", \
3280af22 150 PL_savestack_ix)); lastcp = PL_savestack_ix
c3464db5 151
3280af22 152# define REGCP_UNWIND DEBUG_r(lastcp != PL_savestack_ix ? \
c3464db5 153 PerlIO_printf(Perl_debug_log, \
154 " Clearing an EVAL scope, savestack=%i..%i\n", \
3280af22 155 lastcp, PL_savestack_ix) : 0); regcpblow(lastcp)
c277df42 156
76e3520e 157STATIC char *
cea2e8a9 158S_regcppop(pTHX)
a0d0e21e 159{
11343788 160 dTHR;
a0d0e21e 161 I32 i = SSPOPINT;
162 U32 paren = 0;
163 char *input;
cf93c79d 164 I32 tmps;
a0d0e21e 165 assert(i == SAVEt_REGCONTEXT);
166 i = SSPOPINT;
167 input = (char *) SSPOPPTR;
3280af22 168 *PL_reglastparen = SSPOPINT;
169 PL_regsize = SSPOPINT;
c277df42 170 for (i -= 3; i > 0; i -= 4) {
a0d0e21e 171 paren = (U32)SSPOPINT;
3280af22 172 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d 173 PL_regstartp[paren] = SSPOPINT;
174 tmps = SSPOPINT;
3280af22 175 if (paren <= *PL_reglastparen)
176 PL_regendp[paren] = tmps;
c277df42 177 DEBUG_r(
c3464db5 178 PerlIO_printf(Perl_debug_log,
179 " restoring \\%d to %d(%d)..%d%s\n",
cf93c79d 180 paren, PL_regstartp[paren],
181 PL_reg_start_tmp[paren] - PL_bostr,
182 PL_regendp[paren],
3280af22 183 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 184 );
a0d0e21e 185 }
c277df42 186 DEBUG_r(
3280af22 187 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 188 PerlIO_printf(Perl_debug_log,
189 " restoring \\%d..\\%d to undef\n",
3280af22 190 *PL_reglastparen + 1, PL_regnpar);
c277df42 191 }
192 );
3280af22 193 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
194 if (paren > PL_regsize)
cf93c79d 195 PL_regstartp[paren] = -1;
196 PL_regendp[paren] = -1;
a0d0e21e 197 }
198 return input;
199}
200
0f5d15d6 201STATIC char *
cea2e8a9 202S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6 203{
46124e9e 204 dTHR;
0f5d15d6 205 I32 tmp = PL_savestack_ix;
206
207 PL_savestack_ix = ss;
208 regcppop();
209 PL_savestack_ix = tmp;
942e002e 210 return Nullch;
0f5d15d6 211}
212
213typedef struct re_cc_state
214{
215 I32 ss;
216 regnode *node;
217 struct re_cc_state *prev;
218 CURCUR *cc;
219 regexp *re;
220} re_cc_state;
221
c277df42 222#define regcpblow(cp) LEAVE_SCOPE(cp)
a0d0e21e 223
a687059c 224/*
e50aee73 225 * pregexec and friends
a687059c 226 */
227
228/*
c277df42 229 - pregexec - match a regexp against a string
a687059c 230 */
c277df42 231I32
864dbfa3 232Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 233 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42 234/* strend: pointer to null at end of string */
235/* strbeg: real beginning of string */
236/* minend: end of match must be >=minend after stringarg. */
237/* nosave: For optimizations. */
238{
239 return
240 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
241 nosave ? 0 : REXEC_COPY_STR);
242}
0f5d15d6 243
244STATIC void
cea2e8a9 245S_cache_re(pTHX_ regexp *prog)
0f5d15d6 246{
46124e9e 247 dTHR;
0f5d15d6 248 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
249#ifdef DEBUGGING
250 PL_regprogram = prog->program;
251#endif
252 PL_regnpar = prog->nparens;
253 PL_regdata = prog->data;
254 PL_reg_re = prog;
255}
22e551b9 256
cad2e5aa 257/*
258 * Need to implement the following flags for reg_anch:
259 *
260 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
261 * USE_INTUIT_ML
262 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
263 * INTUIT_AUTORITATIVE_ML
264 * INTUIT_ONCE_NOML - Intuit can match in one location only.
265 * INTUIT_ONCE_ML
266 *
267 * Another flag for this function: SECOND_TIME (so that float substrs
268 * with giant delta may be not rechecked).
269 */
270
271/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
272
2c2d71f5 273/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa 274 Otherwise, only SvCUR(sv) is used to get strbeg. */
275
276/* XXXX We assume that strpos is strbeg unless sv. */
277
2c2d71f5 278/* A failure to find a constant substring means that there is no need to make
279 an expensive call to REx engine, thus we celebrate a failure. Similarly,
280 finding a substring too deep into the string means that less calls to
281 regtry() should be needed. */
282
cad2e5aa 283char *
284Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
285 char *strend, U32 flags, re_scream_pos_data *data)
286{
2c2d71f5 287 register I32 start_shift;
cad2e5aa 288 /* Should be nonnegative! */
2c2d71f5 289 register I32 end_shift;
290 register char *s;
291 register SV *check;
cad2e5aa 292 char *t;
293 I32 ml_anch;
2c2d71f5 294 char *tmp;
295 register char *other_last = Nullch;
cad2e5aa 296
297 DEBUG_r( if (!PL_colorset) reginitcolors() );
298 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 299 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
cad2e5aa 300 PL_colors[4],PL_colors[5],PL_colors[0],
301 prog->precomp,
302 PL_colors[1],
303 (strlen(prog->precomp) > 60 ? "..." : ""),
304 PL_colors[0],
305 (strend - strpos > 60 ? 60 : strend - strpos),
306 strpos, PL_colors[1],
307 (strend - strpos > 60 ? "..." : ""))
308 );
309
2c2d71f5 310 if (prog->minlen > strend - strpos) {
311 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
cad2e5aa 312 goto fail;
2c2d71f5 313 }
314 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa 315 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
316 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 317 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa 318
319 if ((prog->check_offset_min == prog->check_offset_max) && !ml_anch) {
2c2d71f5 320 /* Substring at constant offset from beg-of-str... */
cad2e5aa 321 I32 slen;
322
323 if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
2c2d71f5 324 && (sv && (strpos + SvCUR(sv) != strend)) ) {
325 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
cad2e5aa 326 goto fail;
2c2d71f5 327 }
adac82c7 328 PL_regeol = strend; /* Used in HOP() */
2c2d71f5 329 s = HOPc(strpos, prog->check_offset_min);
cad2e5aa 330 if (SvTAIL(prog->check_substr)) {
331 slen = SvCUR(prog->check_substr); /* >= 1 */
332
2c2d71f5 333 if ( strend - s > slen || strend - s < slen - 1
334 || (strend - s == slen && strend[-1] != '\n')) {
335 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
336 goto fail_finish;
cad2e5aa 337 }
338 /* Now should match s[0..slen-2] */
339 slen--;
340 if (slen && (*SvPVX(prog->check_substr) != *s
341 || (slen > 1
2c2d71f5 342 && memNE(SvPVX(prog->check_substr), s, slen)))) {
343 report_neq:
344 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
345 goto fail_finish;
346 }
cad2e5aa 347 }
348 else if (*SvPVX(prog->check_substr) != *s
349 || ((slen = SvCUR(prog->check_substr)) > 1
350 && memNE(SvPVX(prog->check_substr), s, slen)))
2c2d71f5 351 goto report_neq;
352 goto success_at_start;
cad2e5aa 353 }
2c2d71f5 354 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 355 s = strpos;
2c2d71f5 356 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
357 /* Should be nonnegative! */
358 end_shift = prog->minlen - start_shift -
359 CHR_SVLEN(prog->check_substr) + (SvTAIL(prog->check_substr) != 0);
360 if (!ml_anch) {
361 I32 end = prog->check_offset_max + CHR_SVLEN(prog->check_substr)
362 - (SvTAIL(prog->check_substr) != 0);
363 I32 eshift = strend - s - end;
364
365 if (end_shift < eshift)
366 end_shift = eshift;
367 }
cad2e5aa 368 }
2c2d71f5 369 else { /* Can match at random position */
cad2e5aa 370 ml_anch = 0;
371 s = strpos;
2c2d71f5 372 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
373 /* Should be nonnegative! */
374 end_shift = prog->minlen - start_shift -
375 CHR_SVLEN(prog->check_substr) + (SvTAIL(prog->check_substr) != 0);
cad2e5aa 376 }
377
2c2d71f5 378#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 379 if (end_shift < 0)
6bbae5e6 380 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5 381#endif
382
383 check = prog->check_substr;
384 restart:
385 /* Find a possible match in the region s..strend by looking for
386 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 387 if (flags & REXEC_SCREAM) {
cad2e5aa 388 char *strbeg = SvPVX(sv); /* XXXX Assume PV_force() on SCREAM! */
389 I32 p = -1; /* Internal iterator of scream. */
390 I32 *pp = data ? data->scream_pos : &p;
391
2c2d71f5 392 if (PL_screamfirst[BmRARE(check)] >= 0
393 || ( BmRARE(check) == '\n'
394 && (BmPREVIOUS(check) == SvCUR(check) - 1)
395 && SvTAIL(check) ))
396 s = screaminstr(sv, check,
397 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 398 else
2c2d71f5 399 goto fail_finish;
cad2e5aa 400 if (data)
401 *data->scream_olds = s;
402 }
403 else
404 s = fbm_instr((unsigned char*)s + start_shift,
405 (unsigned char*)strend - end_shift,
2c2d71f5 406 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa 407
408 /* Update the count-of-usability, remove useless subpatterns,
409 unshift s. */
2c2d71f5 410
411 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
412 (s ? "Found" : "Did not find"),
413 ((check == prog->anchored_substr) ? "anchored" : "floating"),
414 PL_colors[0],
415 SvCUR(check) - (SvTAIL(check)!=0), SvPVX(check),
416 PL_colors[1], (SvTAIL(check) ? "$" : ""),
417 (s ? " at offset " : "...\n") ) );
418
419 if (!s)
420 goto fail_finish;
421
422 /* Finish the diagnostic message */
423 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - strpos)) );
424
425 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
426 Start with the other substr.
427 XXXX no SCREAM optimization yet - and a very coarse implementation
428 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
429 *always* match. Probably should be marked during compile...
430 Probably it is right to do no SCREAM here...
431 */
432
433 if (prog->float_substr && prog->anchored_substr) {
434 /* Take into account the anchored substring. */
435 /* XXXX May be hopelessly wrong for UTF... */
436 if (!other_last)
437 other_last = strpos - 1;
438 if (check == prog->float_substr) {
439 char *last = s - start_shift, *last1, *last2;
440 char *s1 = s;
441
442 tmp = PL_bostr;
443 t = s - prog->check_offset_max;
444 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
445 && (!(prog->reganch & ROPT_UTF8)
446 || (PL_bostr = strpos, /* Used in regcopmaybe() */
447 (t = reghopmaybe_c(s, -(prog->check_offset_max)))
448 && t > strpos)))
449 ;
450 else
451 t = strpos;
452 t += prog->anchored_offset;
453 if (t <= other_last)
454 t = other_last + 1;
455 PL_bostr = tmp;
456 last2 = last1 = strend - prog->minlen;
457 if (last < last1)
458 last1 = last;
459 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
460 /* On end-of-str: see comment below. */
461 s = fbm_instr((unsigned char*)t,
462 (unsigned char*)last1 + prog->anchored_offset
463 + SvCUR(prog->anchored_substr)
464 - (SvTAIL(prog->anchored_substr)!=0),
465 prog->anchored_substr, PL_multiline ? FBMrf_MULTILINE : 0);
466 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s anchored substr `%s%.*s%s'%s",
467 (s ? "Found" : "Contradicts"),
468 PL_colors[0],
469 SvCUR(prog->anchored_substr)
470 - (SvTAIL(prog->anchored_substr)!=0),
471 SvPVX(prog->anchored_substr),
472 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
473 if (!s) {
474 if (last1 >= last2) {
475 DEBUG_r(PerlIO_printf(Perl_debug_log,
476 ", giving up...\n"));
477 goto fail_finish;
478 }
479 DEBUG_r(PerlIO_printf(Perl_debug_log,
480 ", trying floating at offset %ld...\n",
481 (long)(s1 + 1 - strpos)));
482 PL_regeol = strend; /* Used in HOP() */
483 other_last = last1 + prog->anchored_offset;
484 s = HOPc(last, 1);
485 goto restart;
486 }
487 else {
488 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
489 (long)(s - strpos)));
490 t = s - prog->anchored_offset;
491 other_last = s - 1;
492 if (t == strpos)
493 goto try_at_start;
494 s = s1;
495 goto try_at_offset;
496 }
497 }
498 else { /* Take into account the floating substring. */
499 char *last, *last1;
500 char *s1 = s;
501
502 t = s - start_shift;
503 last1 = last = strend - prog->minlen + prog->float_min_offset;
504 if (last - t > prog->float_max_offset)
505 last = t + prog->float_max_offset;
506 s = t + prog->float_min_offset;
507 if (s <= other_last)
508 s = other_last + 1;
509 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
510 /* fbm_instr() takes into account exact value of end-of-str
511 if the check is SvTAIL(ed). Since false positives are OK,
512 and end-of-str is not later than strend we are OK. */
513 s = fbm_instr((unsigned char*)s,
514 (unsigned char*)last + SvCUR(prog->float_substr)
515 - (SvTAIL(prog->float_substr)!=0),
516 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
517 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
518 (s ? "Found" : "Contradicts"),
519 PL_colors[0],
520 SvCUR(prog->float_substr)
521 - (SvTAIL(prog->float_substr)!=0),
522 SvPVX(prog->float_substr),
523 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
524 if (!s) {
525 if (last1 == last) {
526 DEBUG_r(PerlIO_printf(Perl_debug_log,
527 ", giving up...\n"));
528 goto fail_finish;
529 }
530 DEBUG_r(PerlIO_printf(Perl_debug_log,
531 ", trying anchored starting at offset %ld...\n",
532 (long)(s1 + 1 - strpos)));
533 other_last = last;
534 PL_regeol = strend; /* Used in HOP() */
535 s = HOPc(t, 1);
536 goto restart;
537 }
538 else {
539 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
540 (long)(s - strpos)));
541 other_last = s - 1;
542 if (t == strpos)
543 goto try_at_start;
544 s = s1;
545 goto try_at_offset;
546 }
547 }
cad2e5aa 548 }
2c2d71f5 549
550 t = s - prog->check_offset_max;
551 tmp = PL_bostr;
552 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
553 && (!(prog->reganch & ROPT_UTF8)
554 || (PL_bostr = strpos, /* Used in regcopmaybe() */
555 ((t = reghopmaybe_c(s, -(prog->check_offset_max)))
556 && t > strpos)))) {
557 PL_bostr = tmp;
558 /* Fixed substring is found far enough so that the match
559 cannot start at strpos. */
560 try_at_offset:
cad2e5aa 561 if (ml_anch && t[-1] != '\n') {
2c2d71f5 562 find_anchor: /* Eventually fbm_*() should handle this */
563 while (t < strend - prog->minlen) {
cad2e5aa 564 if (*t == '\n') {
565 if (t < s - prog->check_offset_min) {
566 s = t + 1;
2c2d71f5 567 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
568 PL_colors[0],PL_colors[1], (long)(s - strpos)));
cad2e5aa 569 goto set_useful;
570 }
2c2d71f5 571 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting at offset %ld...\n",
572 PL_colors[0],PL_colors[1], (long)(t + 1 - strpos)));
cad2e5aa 573 s = t + 1;
574 goto restart;
575 }
576 t++;
577 }
2c2d71f5 578 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
579 PL_colors[0],PL_colors[1]));
580 goto fail_finish;
cad2e5aa 581 }
582 s = t;
583 set_useful:
2c2d71f5 584 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa 585 }
586 else {
2c2d71f5 587 PL_bostr = tmp;
588 /* The found string does not prohibit matching at beg-of-str
589 - no optimization of calling REx engine can be performed,
590 unless it was an MBOL and we are not after MBOL. */
591 try_at_start:
592 /* Even in this situation we may use MBOL flag if strpos is offset
593 wrt the start of the string. */
594 if (ml_anch && sv
cad2e5aa 595 && (strpos + SvCUR(sv) != strend) && strpos[-1] != '\n') {
596 t = strpos;
597 goto find_anchor;
598 }
2c2d71f5 599 success_at_start:
cad2e5aa 600 if (!(prog->reganch & ROPT_NAUGHTY)
601 && --BmUSEFUL(prog->check_substr) < 0
602 && prog->check_substr == prog->float_substr) { /* boo */
603 /* If flags & SOMETHING - do not do it many times on the same match */
604 SvREFCNT_dec(prog->check_substr);
605 prog->check_substr = Nullsv; /* disable */
606 prog->float_substr = Nullsv; /* clear */
607 s = strpos;
608 prog->reganch &= ~RE_USE_INTUIT;
609 }
610 else
611 s = strpos;
612 }
613
2c2d71f5 614 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sGuessed:%s match at offset %ld\n",
615 PL_colors[4], PL_colors[5], (long)(s - strpos)) );
cad2e5aa 616 return s;
2c2d71f5 617
618 fail_finish: /* Substring not found */
619 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 620 fail:
2c2d71f5 621 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa 622 PL_colors[4],PL_colors[5]));
623 return Nullch;
624}
9661b544 625
a687059c 626/*
c277df42 627 - regexec_flags - match a regexp against a string
a687059c 628 */
79072805 629I32
864dbfa3 630Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
22e551b9 631 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
c277df42 632/* strend: pointer to null at end of string */
633/* strbeg: real beginning of string */
634/* minend: end of match must be >=minend after stringarg. */
635/* data: May be used for some additional optimizations. */
636/* nosave: For optimizations. */
a687059c 637{
5c0ca799 638 dTHR;
a0d0e21e 639 register char *s;
c277df42 640 register regnode *c;
a0d0e21e 641 register char *startpos = stringarg;
642 register I32 tmp;
c277df42 643 I32 minlen; /* must match at least this many chars */
a0d0e21e 644 I32 dontbother = 0; /* how many characters not to try at end */
c277df42 645 I32 start_shift = 0; /* Offset of the start to find
a0ed51b3 646 constant substr. */ /* CC */
647 I32 end_shift = 0; /* Same for the end. */ /* CC */
c277df42 648 I32 scream_pos = -1; /* Internal iterator of scream. */
649 char *scream_olds;
3280af22 650 SV* oreplsv = GvSV(PL_replgv);
a687059c 651
2ab05381 652 PL_regcc = 0;
a0d0e21e 653
0f5d15d6 654 cache_re(prog);
a0d0e21e 655#ifdef DEBUGGING
3280af22 656 PL_regnarrate = PL_debug & 512;
a0d0e21e 657#endif
658
659 /* Be paranoid... */
660 if (prog == NULL || startpos == NULL) {
cea2e8a9 661 Perl_croak(aTHX_ "NULL regexp parameter");
a0d0e21e 662 return 0;
663 }
664
c277df42 665 minlen = prog->minlen;
666 if (strend - startpos < minlen) goto phooey;
667
a0d0e21e 668 if (startpos == strbeg) /* is ^ valid at stringarg? */
3280af22 669 PL_regprev = '\n';
a0d0e21e 670 else {
a0ed51b3 671 PL_regprev = (U32)stringarg[-1];
3280af22 672 if (!PL_multiline && PL_regprev == '\n')
673 PL_regprev = '\0'; /* force ^ to NOT match */
a0d0e21e 674 }
bbce6d69 675
a0d0e21e 676 /* Check validity of program. */
22c35a8c 677 if (UCHARAT(prog->program) != REG_MAGIC) {
cea2e8a9 678 Perl_croak(aTHX_ "corrupted regexp program");
a0d0e21e 679 }
680
3280af22 681 PL_reg_flags = 0;
682 PL_reg_eval_set = 0;
2c2d71f5 683 PL_reg_maxiter = 0;
a0d0e21e 684
a0ed51b3 685 if (prog->reganch & ROPT_UTF8)
686 PL_reg_flags |= RF_utf8;
687
688 /* Mark beginning of line for ^ and lookbehind. */
689 PL_regbol = startpos;
690 PL_bostr = strbeg;
9661b544 691 PL_reg_sv = sv;
a0ed51b3 692
693 /* Mark end of line for $ (and such) */
694 PL_regeol = strend;
695
696 /* see how far we have to get to not match where we matched before */
697 PL_regtill = startpos+minend;
698
0f5d15d6 699 /* We start without call_cc context. */
700 PL_reg_call_cc = 0;
701
a0d0e21e 702 /* If there is a "must appear" string, look for it. */
703 s = startpos;
cad2e5aa 704
705 if (prog->reganch & ROPT_GPOS_SEEN) {
706 MAGIC *mg;
707
708 if (!(flags & REXEC_IGNOREPOS) && sv && SvTYPE(sv) >= SVt_PVMG
709 && SvMAGIC(sv) && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0)
710 PL_reg_ganch = strbeg + mg->mg_len;
a0d0e21e 711 else
cad2e5aa 712 PL_reg_ganch = startpos;
713 if (prog->reganch & ROPT_ANCH_GPOS) {
714 if (s > PL_reg_ganch)
715 goto phooey;
716 s = PL_reg_ganch;
a0ed51b3 717 }
a0d0e21e 718 }
a687059c 719
cad2e5aa 720 if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
721 re_scream_pos_data d;
722
723 d.scream_olds = &scream_olds;
724 d.scream_pos = &scream_pos;
725 s = re_intuit_start(prog, sv, s, strend, flags, &d);
726 if (!s)
727 goto phooey; /* not present */
728 }
729
730 DEBUG_r( if (!PL_colorset) reginitcolors() );
731 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 732 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
8d300b32 733 PL_colors[4],PL_colors[5],PL_colors[0],
734 prog->precomp,
735 PL_colors[1],
c277df42 736 (strlen(prog->precomp) > 60 ? "..." : ""),
cad2e5aa 737 PL_colors[0],
c277df42 738 (strend - startpos > 60 ? 60 : strend - startpos),
8d300b32 739 startpos, PL_colors[1],
c277df42 740 (strend - startpos > 60 ? "..." : ""))
741 );
742
a0d0e21e 743 /* Simplest case: anchored match need be tried only once. */
774d564b 744 /* [unless only anchor is BOL and multiline is set] */
22e551b9 745 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
cad2e5aa 746 if (s == startpos && regtry(prog, startpos))
a0d0e21e 747 goto got_it;
22e551b9 748 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
749 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
774d564b 750 {
cad2e5aa 751 char *end;
752
a0d0e21e 753 if (minlen)
754 dontbother = minlen - 1;
cad2e5aa 755 end = HOPc(strend, -dontbother) - 1;
a0d0e21e 756 /* for multiline we only have to try after newlines */
cad2e5aa 757 if (prog->check_substr) {
758 while (1) {
759 if (regtry(prog, s))
a0d0e21e 760 goto got_it;
cad2e5aa 761 if (s >= end)
762 goto phooey;
763 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
764 if (!s)
765 goto phooey;
766 }
767 } else {
768 if (s > startpos)
769 s--;
770 while (s < end) {
771 if (*s++ == '\n') { /* don't need PL_utf8skip here */
772 if (regtry(prog, s))
773 goto got_it;
774 }
775 }
35c8bce7 776 }
35c8bce7 777 }
a0d0e21e 778 goto phooey;
22e551b9 779 } else if (prog->reganch & ROPT_ANCH_GPOS) {
780 if (regtry(prog, PL_reg_ganch))
781 goto got_it;
782 goto phooey;
a0d0e21e 783 }
35c8bce7 784
a0d0e21e 785 /* Messy cases: unanchored match. */
c277df42 786 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
787 /* we have /x+whatever/ */
cad2e5aa 788 /* it must be a one character string (XXXX Except UTF?) */
c277df42 789 char ch = SvPVX(prog->anchored_substr)[0];
a0ed51b3 790 if (UTF) {
791 while (s < strend) {
792 if (*s == ch) {
793 if (regtry(prog, s)) goto got_it;
794 s += UTF8SKIP(s);
795 while (s < strend && *s == ch)
796 s += UTF8SKIP(s);
797 }
798 s += UTF8SKIP(s);
799 }
800 }
801 else {
802 while (s < strend) {
803 if (*s == ch) {
804 if (regtry(prog, s)) goto got_it;
c277df42 805 s++;
a0ed51b3 806 while (s < strend && *s == ch)
807 s++;
808 }
809 s++;
a0d0e21e 810 }
a687059c 811 }
c277df42 812 }
813 /*SUPPRESS 560*/
814 else if (prog->anchored_substr != Nullsv
815 || (prog->float_substr != Nullsv
816 && prog->float_max_offset < strend - s)) {
817 SV *must = prog->anchored_substr
818 ? prog->anchored_substr : prog->float_substr;
819 I32 back_max =
820 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
821 I32 back_min =
822 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
823 I32 delta = back_max - back_min;
cf93c79d 824 char *last = HOPc(strend, /* Cannot start after this */
73c4f7a1 825 -(I32)(CHR_SVLEN(must)
826 - (SvTAIL(must) != 0) + back_min));
a0ed51b3 827 char *last1; /* Last position checked before */
828
829 if (s > PL_bostr)
dfe13c55 830 last1 = HOPc(s, -1);
a0ed51b3 831 else
832 last1 = s - 1; /* bogus */
c277df42 833
834 /* XXXX check_substr already used to find `s', can optimize if
835 check_substr==must. */
836 scream_pos = -1;
837 dontbother = end_shift;
dfe13c55 838 strend = HOPc(strend, -dontbother);
c277df42 839 while ( (s <= last) &&
22e551b9 840 ((flags & REXEC_SCREAM)
841 ? (s = screaminstr(sv, must, HOPc(s, back_min) - strbeg,
c277df42 842 end_shift, &scream_pos, 0))
a0ed51b3 843 : (s = fbm_instr((unsigned char*)HOP(s, back_min),
cf93c79d 844 (unsigned char*)strend, must,
845 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
dfe13c55 846 if (HOPc(s, -back_max) > last1) {
847 last1 = HOPc(s, -back_min);
848 s = HOPc(s, -back_max);
a0ed51b3 849 }
850 else {
dfe13c55 851 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
c277df42 852
dfe13c55 853 last1 = HOPc(s, -back_min);
c277df42 854 s = t;
a0d0e21e 855 }
a0ed51b3 856 if (UTF) {
857 while (s <= last1) {
858 if (regtry(prog, s))
859 goto got_it;
860 s += UTF8SKIP(s);
861 }
862 }
863 else {
864 while (s <= last1) {
865 if (regtry(prog, s))
866 goto got_it;
867 s++;
868 }
a0d0e21e 869 }
870 }
871 goto phooey;
a0ed51b3 872 }
873 else if (c = prog->regstclass) {
a0d0e21e 874 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
a0ed51b3 875 char *cc;
a687059c 876
a0d0e21e 877 if (minlen)
878 dontbother = minlen - 1;
dfe13c55 879 strend = HOPc(strend, -dontbother); /* don't bother with what can't match */
a0d0e21e 880 tmp = 1;
881 /* We know what class it must start with. */
882 switch (OP(c)) {
a0ed51b3 883 case ANYOFUTF8:
cd439c50 884 cc = MASK(c);
a0ed51b3 885 while (s < strend) {
886 if (REGINCLASSUTF8(c, (U8*)s)) {
887 if (tmp && regtry(prog, s))
888 goto got_it;
889 else
890 tmp = doevery;
891 }
892 else
893 tmp = 1;
894 s += UTF8SKIP(s);
895 }
896 break;
a0d0e21e 897 case ANYOF:
cd439c50 898 cc = MASK(c);
a0d0e21e 899 while (s < strend) {
a0ed51b3 900 if (REGINCLASS(cc, *s)) {
a0d0e21e 901 if (tmp && regtry(prog, s))
902 goto got_it;
903 else
904 tmp = doevery;
a687059c 905 }
a0d0e21e 906 else
907 tmp = 1;
908 s++;
909 }
910 break;
bbce6d69 911 case BOUNDL:
3280af22 912 PL_reg_flags |= RF_tainted;
bbce6d69 913 /* FALL THROUGH */
a0d0e21e 914 case BOUND:
a0ed51b3 915 if (minlen) {
916 dontbother++;
917 strend -= 1;
918 }
3280af22 919 tmp = (s != startpos) ? UCHARAT(s - 1) : PL_regprev;
95bac841 920 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 921 while (s < strend) {
95bac841 922 if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
a0d0e21e 923 tmp = !tmp;
924 if (regtry(prog, s))
925 goto got_it;
a687059c 926 }
a0d0e21e 927 s++;
928 }
929 if ((minlen || tmp) && regtry(prog,s))
930 goto got_it;
931 break;
a0ed51b3 932 case BOUNDLUTF8:
933 PL_reg_flags |= RF_tainted;
934 /* FALL THROUGH */
935 case BOUNDUTF8:
936 if (minlen) {
937 dontbother++;
dfe13c55 938 strend = reghop_c(strend, -1);
a0ed51b3 939 }
dfe13c55 940 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : PL_regprev;
a0ed51b3 941 tmp = ((OP(c) == BOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
942 while (s < strend) {
dfe13c55 943 if (tmp == !(OP(c) == BOUND ?
944 swash_fetch(PL_utf8_alnum, (U8*)s) :
945 isALNUM_LC_utf8((U8*)s)))
946 {
a0ed51b3 947 tmp = !tmp;
948 if (regtry(prog, s))
949 goto got_it;
950 }
951 s += UTF8SKIP(s);
952 }
953 if ((minlen || tmp) && regtry(prog,s))
954 goto got_it;
955 break;
bbce6d69 956 case NBOUNDL:
3280af22 957 PL_reg_flags |= RF_tainted;
bbce6d69 958 /* FALL THROUGH */
a0d0e21e 959 case NBOUND:
a0ed51b3 960 if (minlen) {
961 dontbother++;
962 strend -= 1;
963 }
3280af22 964 tmp = (s != startpos) ? UCHARAT(s - 1) : PL_regprev;
95bac841 965 tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 966 while (s < strend) {
95bac841 967 if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
a0d0e21e 968 tmp = !tmp;
969 else if (regtry(prog, s))
970 goto got_it;
971 s++;
972 }
973 if ((minlen || !tmp) && regtry(prog,s))
974 goto got_it;
975 break;
a0ed51b3 976 case NBOUNDLUTF8:
977 PL_reg_flags |= RF_tainted;
978 /* FALL THROUGH */
979 case NBOUNDUTF8:
980 if (minlen) {
981 dontbother++;
dfe13c55 982 strend = reghop_c(strend, -1);
a0ed51b3 983 }
dfe13c55 984 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : PL_regprev;
a0ed51b3 985 tmp = ((OP(c) == NBOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
986 while (s < strend) {
dfe13c55 987 if (tmp == !(OP(c) == NBOUND ?
988 swash_fetch(PL_utf8_alnum, (U8*)s) :
989 isALNUM_LC_utf8((U8*)s)))
a0ed51b3 990 tmp = !tmp;
991 else if (regtry(prog, s))
992 goto got_it;
993 s += UTF8SKIP(s);
994 }
995 if ((minlen || !tmp) && regtry(prog,s))
996 goto got_it;
997 break;
a0d0e21e 998 case ALNUM:
999 while (s < strend) {
bbce6d69 1000 if (isALNUM(*s)) {
1001 if (tmp && regtry(prog, s))
1002 goto got_it;
1003 else
1004 tmp = doevery;
1005 }
1006 else
1007 tmp = 1;
1008 s++;
1009 }
1010 break;
a0ed51b3 1011 case ALNUMUTF8:
1012 while (s < strend) {
dfe13c55 1013 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
a0ed51b3 1014 if (tmp && regtry(prog, s))
1015 goto got_it;
1016 else
1017 tmp = doevery;
1018 }
1019 else
1020 tmp = 1;
1021 s += UTF8SKIP(s);
1022 }
1023 break;
bbce6d69 1024 case ALNUML:
3280af22 1025 PL_reg_flags |= RF_tainted;
bbce6d69 1026 while (s < strend) {
1027 if (isALNUM_LC(*s)) {
a0d0e21e 1028 if (tmp && regtry(prog, s))
1029 goto got_it;
a687059c 1030 else
a0d0e21e 1031 tmp = doevery;
1032 }
1033 else
1034 tmp = 1;
1035 s++;
1036 }
1037 break;
a0ed51b3 1038 case ALNUMLUTF8:
1039 PL_reg_flags |= RF_tainted;
1040 while (s < strend) {
dfe13c55 1041 if (isALNUM_LC_utf8((U8*)s)) {
a0ed51b3 1042 if (tmp && regtry(prog, s))
1043 goto got_it;
1044 else
1045 tmp = doevery;
1046 }
1047 else
1048 tmp = 1;
1049 s += UTF8SKIP(s);
1050 }
1051 break;
a0d0e21e 1052 case NALNUM:
1053 while (s < strend) {
bbce6d69 1054 if (!isALNUM(*s)) {
1055 if (tmp && regtry(prog, s))
1056 goto got_it;
1057 else
1058 tmp = doevery;
1059 }
1060 else
1061 tmp = 1;
1062 s++;
1063 }
1064 break;
a0ed51b3 1065 case NALNUMUTF8:
1066 while (s < strend) {
dfe13c55 1067 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
a0ed51b3 1068 if (tmp && regtry(prog, s))
1069 goto got_it;
1070 else
1071 tmp = doevery;
1072 }
1073 else
1074 tmp = 1;
1075 s += UTF8SKIP(s);
1076 }
1077 break;
bbce6d69 1078 case NALNUML:
3280af22 1079 PL_reg_flags |= RF_tainted;
bbce6d69 1080 while (s < strend) {
1081 if (!isALNUM_LC(*s)) {
a0d0e21e 1082 if (tmp && regtry(prog, s))
1083 goto got_it;
a687059c 1084 else
a0d0e21e 1085 tmp = doevery;
a687059c 1086 }
a0d0e21e 1087 else
1088 tmp = 1;
1089 s++;
1090 }
1091 break;
a0ed51b3 1092 case NALNUMLUTF8:
1093 PL_reg_flags |= RF_tainted;
1094 while (s < strend) {
dfe13c55 1095 if (!isALNUM_LC_utf8((U8*)s)) {
a0ed51b3 1096 if (tmp && regtry(prog, s))
1097 goto got_it;
1098 else
1099 tmp = doevery;
1100 }
1101 else
1102 tmp = 1;
1103 s += UTF8SKIP(s);
1104 }
1105 break;
a0d0e21e 1106 case SPACE:
1107 while (s < strend) {
1108 if (isSPACE(*s)) {
1109 if (tmp && regtry(prog, s))
1110 goto got_it;
1111 else
1112 tmp = doevery;
2304df62 1113 }
a0d0e21e 1114 else
1115 tmp = 1;
1116 s++;
1117 }
1118 break;
a0ed51b3 1119 case SPACEUTF8:
1120 while (s < strend) {
dfe13c55 1121 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
a0ed51b3 1122 if (tmp && regtry(prog, s))
1123 goto got_it;
1124 else
1125 tmp = doevery;
1126 }
1127 else
1128 tmp = 1;
1129 s += UTF8SKIP(s);
1130 }
1131 break;
bbce6d69 1132 case SPACEL:
3280af22 1133 PL_reg_flags |= RF_tainted;
bbce6d69 1134 while (s < strend) {
1135 if (isSPACE_LC(*s)) {
1136 if (tmp && regtry(prog, s))
1137 goto got_it;
1138 else
1139 tmp = doevery;
1140 }
1141 else
1142 tmp = 1;
1143 s++;
1144 }
1145 break;
a0ed51b3 1146 case SPACELUTF8:
1147 PL_reg_flags |= RF_tainted;
1148 while (s < strend) {
dfe13c55 1149 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
a0ed51b3 1150 if (tmp && regtry(prog, s))
1151 goto got_it;
1152 else
1153 tmp = doevery;
1154 }
1155 else
1156 tmp = 1;
1157 s += UTF8SKIP(s);
1158 }
1159 break;
a0d0e21e 1160 case NSPACE:
1161 while (s < strend) {
1162 if (!isSPACE(*s)) {
1163 if (tmp && regtry(prog, s))
1164 goto got_it;
1165 else
1166 tmp = doevery;
a687059c 1167 }
a0d0e21e 1168 else
1169 tmp = 1;
1170 s++;
1171 }
1172 break;
a0ed51b3 1173 case NSPACEUTF8:
1174 while (s < strend) {
dfe13c55 1175 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
a0ed51b3 1176 if (tmp && regtry(prog, s))
1177 goto got_it;
1178 else
1179 tmp = doevery;
1180 }
1181 else
1182 tmp = 1;
1183 s += UTF8SKIP(s);
1184 }
1185 break;
bbce6d69 1186 case NSPACEL:
3280af22 1187 PL_reg_flags |= RF_tainted;
bbce6d69 1188 while (s < strend) {
1189 if (!isSPACE_LC(*s)) {
1190 if (tmp && regtry(prog, s))
1191 goto got_it;
1192 else
1193 tmp = doevery;
1194 }
1195 else
1196 tmp = 1;
1197 s++;
1198 }
1199 break;
a0ed51b3 1200 case NSPACELUTF8:
1201 PL_reg_flags |= RF_tainted;
1202 while (s < strend) {
dfe13c55 1203 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
a0ed51b3 1204 if (tmp && regtry(prog, s))
1205 goto got_it;
1206 else
1207 tmp = doevery;
1208 }
1209 else
1210 tmp = 1;
1211 s += UTF8SKIP(s);
1212 }
1213 break;
a0d0e21e 1214 case DIGIT:
1215 while (s < strend) {
1216 if (isDIGIT(*s)) {
1217 if (tmp && regtry(prog, s))
1218 goto got_it;
1219 else
1220 tmp = doevery;
2b69d0c2 1221 }
a0d0e21e 1222 else
1223 tmp = 1;
1224 s++;
1225 }
1226 break;
a0ed51b3 1227 case DIGITUTF8:
1228 while (s < strend) {
dfe13c55 1229 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
a0ed51b3 1230 if (tmp && regtry(prog, s))
1231 goto got_it;
1232 else
1233 tmp = doevery;
1234 }
1235 else
1236 tmp = 1;
1237 s += UTF8SKIP(s);
1238 }
1239 break;
b8c5462f 1240 case DIGITL:
1241 PL_reg_flags |= RF_tainted;
1242 while (s < strend) {
1243 if (isDIGIT_LC(*s)) {
1244 if (tmp && regtry(prog, s))
1245 goto got_it;
1246 else
1247 tmp = doevery;
1248 }
1249 else
1250 tmp = 1;
1251 s++;
1252 }
1253 break;
1254 case DIGITLUTF8:
1255 PL_reg_flags |= RF_tainted;
1256 while (s < strend) {
1257 if (isDIGIT_LC_utf8((U8*)s)) {
1258 if (tmp && regtry(prog, s))
1259 goto got_it;
1260 else
1261 tmp = doevery;
1262 }
1263 else
1264 tmp = 1;
1265 s += UTF8SKIP(s);
1266 }
1267 break;
a0d0e21e 1268 case NDIGIT:
1269 while (s < strend) {
1270 if (!isDIGIT(*s)) {
1271 if (tmp && regtry(prog, s))
1272 goto got_it;
1273 else
1274 tmp = doevery;
a687059c 1275 }
a0d0e21e 1276 else
1277 tmp = 1;
1278 s++;
1279 }
1280 break;
a0ed51b3 1281 case NDIGITUTF8:
1282 while (s < strend) {
dfe13c55 1283 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
a0ed51b3 1284 if (tmp && regtry(prog, s))
1285 goto got_it;
1286 else
1287 tmp = doevery;
1288 }
1289 else
1290 tmp = 1;
1291 s += UTF8SKIP(s);
1292 }
1293 break;
b8c5462f 1294 case NDIGITL:
1295 PL_reg_flags |= RF_tainted;
1296 while (s < strend) {
1297 if (!isDIGIT_LC(*s)) {
1298 if (tmp && regtry(prog, s))
1299 goto got_it;
1300 else
1301 tmp = doevery;
1302 }
1303 else
1304 tmp = 1;
1305 s++;
a0ed51b3 1306 }
b8c5462f 1307 break;
1308 case NDIGITLUTF8:
1309 PL_reg_flags |= RF_tainted;
1310 while (s < strend) {
1311 if (!isDIGIT_LC_utf8((U8*)s)) {
1312 if (tmp && regtry(prog, s))
1313 goto got_it;
cf93c79d 1314 else
b8c5462f 1315 tmp = doevery;
1316 }
1317 else
1318 tmp = 1;
1319 s += UTF8SKIP(s);
1320 }
1321 break;
d6a28714 1322 }
1323 }
1324 else {
1325 dontbother = 0;
1326 if (prog->float_substr != Nullsv) { /* Trim the end. */
1327 char *last;
1328 I32 oldpos = scream_pos;
1329
1330 if (flags & REXEC_SCREAM) {
1331 last = screaminstr(sv, prog->float_substr, s - strbeg,
1332 end_shift, &scream_pos, 1); /* last one */
1333 if (!last)
1334 last = scream_olds; /* Only one occurence. */
b8c5462f 1335 }
d6a28714 1336 else {
1337 STRLEN len;
1338 char *little = SvPV(prog->float_substr, len);
1339
1340 if (SvTAIL(prog->float_substr)) {
1341 if (memEQ(strend - len + 1, little, len - 1))
1342 last = strend - len + 1;
1343 else if (!PL_multiline)
1344 last = memEQ(strend - len, little, len)
1345 ? strend - len : Nullch;
b8c5462f 1346 else
d6a28714 1347 goto find_last;
1348 } else {
1349 find_last:
1350 if (len)
1351 last = rninstr(s, strend, little, little + len);
b8c5462f 1352 else
d6a28714 1353 last = strend; /* matching `$' */
b8c5462f 1354 }
b8c5462f 1355 }
d6a28714 1356 if (last == NULL) goto phooey; /* Should not happen! */
1357 dontbother = strend - last + prog->float_min_offset;
1358 }
1359 if (minlen && (dontbother < minlen))
1360 dontbother = minlen - 1;
1361 strend -= dontbother; /* this one's always in bytes! */
1362 /* We don't know much -- general case. */
1363 if (UTF) {
1364 for (;;) {
1365 if (regtry(prog, s))
1366 goto got_it;
1367 if (s >= strend)
1368 break;
b8c5462f 1369 s += UTF8SKIP(s);
d6a28714 1370 };
1371 }
1372 else {
1373 do {
1374 if (regtry(prog, s))
1375 goto got_it;
1376 } while (s++ < strend);
1377 }
1378 }
1379
1380 /* Failure. */
1381 goto phooey;
1382
1383got_it:
1384 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1385
1386 if (PL_reg_eval_set) {
1387 /* Preserve the current value of $^R */
1388 if (oreplsv != GvSV(PL_replgv))
1389 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1390 restored, the value remains
1391 the same. */
1392 restore_pos(aTHXo_ 0);
1393 }
1394
1395 /* make sure $`, $&, $', and $digit will work later */
1396 if ( !(flags & REXEC_NOT_FIRST) ) {
1397 if (RX_MATCH_COPIED(prog)) {
1398 Safefree(prog->subbeg);
1399 RX_MATCH_COPIED_off(prog);
1400 }
1401 if (flags & REXEC_COPY_STR) {
1402 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1403
1404 s = savepvn(strbeg, i);
1405 prog->subbeg = s;
1406 prog->sublen = i;
1407 RX_MATCH_COPIED_on(prog);
1408 }
1409 else {
1410 prog->subbeg = strbeg;
1411 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1412 }
1413 }
1414
1415 return 1;
1416
1417phooey:
1418 if (PL_reg_eval_set)
1419 restore_pos(aTHXo_ 0);
1420 return 0;
1421}
1422
1423/*
1424 - regtry - try match at specific point
1425 */
1426STATIC I32 /* 0 failure, 1 success */
1427S_regtry(pTHX_ regexp *prog, char *startpos)
1428{
1429 dTHR;
1430 register I32 i;
1431 register I32 *sp;
1432 register I32 *ep;
1433 CHECKPOINT lastcp;
1434
1435 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1436 MAGIC *mg;
1437
1438 PL_reg_eval_set = RS_init;
1439 DEBUG_r(DEBUG_s(
1440 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %i\n",
1441 PL_stack_sp - PL_stack_base);
1442 ));
1443 SAVEINT(cxstack[cxstack_ix].blk_oldsp);
1444 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1445 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1446 SAVETMPS;
1447 /* Apparently this is not needed, judging by wantarray. */
1448 /* SAVEINT(cxstack[cxstack_ix].blk_gimme);
1449 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1450
1451 if (PL_reg_sv) {
1452 /* Make $_ available to executed code. */
1453 if (PL_reg_sv != DEFSV) {
1454 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1455 SAVESPTR(DEFSV);
1456 DEFSV = PL_reg_sv;
b8c5462f 1457 }
d6a28714 1458
1459 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
1460 && (mg = mg_find(PL_reg_sv, 'g')))) {
1461 /* prepare for quick setting of pos */
1462 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
1463 mg = mg_find(PL_reg_sv, 'g');
1464 mg->mg_len = -1;
b8c5462f 1465 }
d6a28714 1466 PL_reg_magic = mg;
1467 PL_reg_oldpos = mg->mg_len;
1468 SAVEDESTRUCTOR(restore_pos, 0);
1469 }
1470 if (!PL_reg_curpm)
1471 New(22,PL_reg_curpm, 1, PMOP);
1472 PL_reg_curpm->op_pmregexp = prog;
1473 PL_reg_oldcurpm = PL_curpm;
1474 PL_curpm = PL_reg_curpm;
1475 if (RX_MATCH_COPIED(prog)) {
1476 /* Here is a serious problem: we cannot rewrite subbeg,
1477 since it may be needed if this match fails. Thus
1478 $` inside (?{}) could fail... */
1479 PL_reg_oldsaved = prog->subbeg;
1480 PL_reg_oldsavedlen = prog->sublen;
1481 RX_MATCH_COPIED_off(prog);
1482 }
1483 else
1484 PL_reg_oldsaved = Nullch;
1485 prog->subbeg = PL_bostr;
1486 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1487 }
1488 prog->startp[0] = startpos - PL_bostr;
1489 PL_reginput = startpos;
1490 PL_regstartp = prog->startp;
1491 PL_regendp = prog->endp;
1492 PL_reglastparen = &prog->lastparen;
1493 prog->lastparen = 0;
1494 PL_regsize = 0;
1495 DEBUG_r(PL_reg_starttry = startpos);
1496 if (PL_reg_start_tmpl <= prog->nparens) {
1497 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1498 if(PL_reg_start_tmp)
1499 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1500 else
1501 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1502 }
1503
1504 /* XXXX What this code is doing here?!!! There should be no need
1505 to do this again and again, PL_reglastparen should take care of
1506 this! */
1507 sp = prog->startp;
1508 ep = prog->endp;
1509 if (prog->nparens) {
1510 for (i = prog->nparens; i >= 1; i--) {
1511 *++sp = -1;
1512 *++ep = -1;
1513 }
1514 }
1515 REGCP_SET;
1516 if (regmatch(prog->program + 1)) {
1517 prog->endp[0] = PL_reginput - PL_bostr;
1518 return 1;
1519 }
1520 REGCP_UNWIND;
1521 return 0;
1522}
1523
1524/*
1525 - regmatch - main matching routine
1526 *
1527 * Conceptually the strategy is simple: check to see whether the current
1528 * node matches, call self recursively to see whether the rest matches,
1529 * and then act accordingly. In practice we make some effort to avoid
1530 * recursion, in particular by going through "ordinary" nodes (that don't
1531 * need to know whether the rest of the match failed) by a loop instead of
1532 * by recursion.
1533 */
1534/* [lwall] I've hoisted the register declarations to the outer block in order to
1535 * maybe save a little bit of pushing and popping on the stack. It also takes
1536 * advantage of machines that use a register save mask on subroutine entry.
1537 */
1538STATIC I32 /* 0 failure, 1 success */
1539S_regmatch(pTHX_ regnode *prog)
1540{
1541 dTHR;
1542 register regnode *scan; /* Current node. */
1543 regnode *next; /* Next node. */
1544 regnode *inner; /* Next node in internal branch. */
1545 register I32 nextchr; /* renamed nextchr - nextchar colides with
1546 function of same name */
1547 register I32 n; /* no or next */
1548 register I32 ln; /* len or last */
1549 register char *s; /* operand or save */
1550 register char *locinput = PL_reginput;
1551 register I32 c1, c2, paren; /* case fold search, parenth */
1552 int minmod = 0, sw = 0, logical = 0;
1553#ifdef DEBUGGING
1554 PL_regindent++;
1555#endif
1556
1557 /* Note that nextchr is a byte even in UTF */
1558 nextchr = UCHARAT(locinput);
1559 scan = prog;
1560 while (scan != NULL) {
1561#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
1562#ifdef DEBUGGING
1563# define sayYES goto yes
1564# define sayNO goto no
1565# define saySAME(x) if (x) goto yes; else goto no
1566# define REPORT_CODE_OFF 24
1567#else
1568# define sayYES return 1
1569# define sayNO return 0
1570# define saySAME(x) return x
1571#endif
1572 DEBUG_r( {
1573 SV *prop = sv_newmortal();
1574 int docolor = *PL_colors[0];
1575 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1576 int l = (PL_regeol - locinput > taill ? taill : PL_regeol - locinput);
1577 /* The part of the string before starttry has one color
1578 (pref0_len chars), between starttry and current
1579 position another one (pref_len - pref0_len chars),
1580 after the current position the third one.
1581 We assume that pref0_len <= pref_len, otherwise we
1582 decrease pref0_len. */
1583 int pref_len = (locinput - PL_bostr > (5 + taill) - l
1584 ? (5 + taill) - l : locinput - PL_bostr);
1585 int pref0_len = pref_len - (locinput - PL_reg_starttry);
1586
1587 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
1588 l = ( PL_regeol - locinput > (5 + taill) - pref_len
1589 ? (5 + taill) - pref_len : PL_regeol - locinput);
1590 if (pref0_len < 0)
1591 pref0_len = 0;
1592 if (pref0_len > pref_len)
1593 pref0_len = pref_len;
1594 regprop(prop, scan);
1595 PerlIO_printf(Perl_debug_log,
1596 "%4i <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3d:%*s%s\n",
1597 locinput - PL_bostr,
1598 PL_colors[4], pref0_len,
1599 locinput - pref_len, PL_colors[5],
1600 PL_colors[2], pref_len - pref0_len,
1601 locinput - pref_len + pref0_len, PL_colors[3],
1602 (docolor ? "" : "> <"),
1603 PL_colors[0], l, locinput, PL_colors[1],
1604 15 - l - pref_len + 1,
1605 "",
1606 scan - PL_regprogram, PL_regindent*2, "",
1607 SvPVX(prop));
1608 } );
1609
1610 next = scan + NEXT_OFF(scan);
1611 if (next == scan)
1612 next = NULL;
1613
1614 switch (OP(scan)) {
1615 case BOL:
1616 if (locinput == PL_bostr
1617 ? PL_regprev == '\n'
1618 : (PL_multiline &&
1619 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1620 {
1621 /* regtill = regbol; */
b8c5462f 1622 break;
1623 }
d6a28714 1624 sayNO;
1625 case MBOL:
1626 if (locinput == PL_bostr
1627 ? PL_regprev == '\n'
1628 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1629 {
b8c5462f 1630 break;
1631 }
d6a28714 1632 sayNO;
1633 case SBOL:
1634 if (locinput == PL_regbol && PL_regprev == '\n')
b8c5462f 1635 break;
d6a28714 1636 sayNO;
1637 case GPOS:
1638 if (locinput == PL_reg_ganch)
1639 break;
1640 sayNO;
1641 case EOL:
1642 if (PL_multiline)
1643 goto meol;
1644 else
1645 goto seol;
1646 case MEOL:
1647 meol:
1648 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 1649 sayNO;
b8c5462f 1650 break;
d6a28714 1651 case SEOL:
1652 seol:
1653 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 1654 sayNO;
d6a28714 1655 if (PL_regeol - locinput > 1)
b8c5462f 1656 sayNO;
b8c5462f 1657 break;
d6a28714 1658 case EOS:
1659 if (PL_regeol != locinput)
b8c5462f 1660 sayNO;
d6a28714 1661 break;
1662 case SANYUTF8:
b8c5462f 1663 if (nextchr & 0x80) {
b8c5462f 1664 locinput += PL_utf8skip[nextchr];
d6a28714 1665 if (locinput > PL_regeol)
1666 sayNO;
b8c5462f 1667 nextchr = UCHARAT(locinput);
1668 break;
1669 }
d6a28714 1670 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1671 sayNO;
b8c5462f 1672 nextchr = UCHARAT(++locinput);
a0d0e21e 1673 break;
d6a28714 1674 case SANY:
1675 if (!nextchr && locinput >= PL_regeol)
b8c5462f 1676 sayNO;
1677 nextchr = UCHARAT(++locinput);
b85d18e9 1678 break;
d6a28714 1679 case ANYUTF8:
a0ed51b3 1680 if (nextchr & 0x80) {
b8c5462f 1681 locinput += PL_utf8skip[nextchr];
d6a28714 1682 if (locinput > PL_regeol)
1683 sayNO;
a0ed51b3 1684 nextchr = UCHARAT(locinput);
1685 break;
1686 }
d6a28714 1687 if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
a0ed51b3 1688 sayNO;
1689 nextchr = UCHARAT(++locinput);
1690 break;
d6a28714 1691 case REG_ANY:
1692 if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
4633a7c4 1693 sayNO;
76e3520e 1694 nextchr = UCHARAT(++locinput);
a0d0e21e 1695 break;
d6a28714 1696 case EXACT:
cd439c50 1697 s = STRING(scan);
1698 ln = STR_LEN(scan);
d6a28714 1699 /* Inline the first character, for speed. */
1700 if (UCHARAT(s) != nextchr)
1701 sayNO;
1702 if (PL_regeol - locinput < ln)
1703 sayNO;
1704 if (ln > 1 && memNE(s, locinput, ln))
1705 sayNO;
1706 locinput += ln;
1707 nextchr = UCHARAT(locinput);
1708 break;
1709 case EXACTFL:
b8c5462f 1710 PL_reg_flags |= RF_tainted;
1711 /* FALL THROUGH */
d6a28714 1712 case EXACTF:
cd439c50 1713 s = STRING(scan);
1714 ln = STR_LEN(scan);
d6a28714 1715
1716 if (UTF) {
1717 char *l = locinput;
1718 char *e = s + ln;
1719 c1 = OP(scan) == EXACTF;
1720 while (s < e) {
1721 if (l >= PL_regeol)
1722 sayNO;
1723 if (utf8_to_uv((U8*)s, 0) != (c1 ?
1724 toLOWER_utf8((U8*)l) :
1725 toLOWER_LC_utf8((U8*)l)))
1726 {
1727 sayNO;
1728 }
1729 s += UTF8SKIP(s);
1730 l += UTF8SKIP(l);
b8c5462f 1731 }
d6a28714 1732 locinput = l;
a0ed51b3 1733 nextchr = UCHARAT(locinput);
1734 break;
1735 }
d6a28714 1736
1737 /* Inline the first character, for speed. */
1738 if (UCHARAT(s) != nextchr &&
1739 UCHARAT(s) != ((OP(scan) == EXACTF)
1740 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 1741 sayNO;
d6a28714 1742 if (PL_regeol - locinput < ln)
b8c5462f 1743 sayNO;
d6a28714 1744 if (ln > 1 && (OP(scan) == EXACTF
1745 ? ibcmp(s, locinput, ln)
1746 : ibcmp_locale(s, locinput, ln)))
4633a7c4 1747 sayNO;
d6a28714 1748 locinput += ln;
1749 nextchr = UCHARAT(locinput);
a0d0e21e 1750 break;
d6a28714 1751 case ANYOFUTF8:
cd439c50 1752 s = MASK(scan);
d6a28714 1753 if (!REGINCLASSUTF8(scan, (U8*)locinput))
4633a7c4 1754 sayNO;
d6a28714 1755 if (locinput >= PL_regeol)
1756 sayNO;
1757 locinput += PL_utf8skip[nextchr];
1758 nextchr = UCHARAT(locinput);
1759 break;
1760 case ANYOF:
cd439c50 1761 s = MASK(scan);
d6a28714 1762 if (nextchr < 0)
b8c5462f 1763 nextchr = UCHARAT(locinput);
d6a28714 1764 if (!REGINCLASS(s, nextchr))
1765 sayNO;
1766 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1767 sayNO;
b8c5462f 1768 nextchr = UCHARAT(++locinput);
1769 break;
d6a28714 1770 case ALNUML:
b8c5462f 1771 PL_reg_flags |= RF_tainted;
1772 /* FALL THROUGH */
d6a28714 1773 case ALNUM:
b8c5462f 1774 if (!nextchr)
4633a7c4 1775 sayNO;
d6a28714 1776 if (!(OP(scan) == ALNUM
1777 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
b8c5462f 1778 sayNO;
1779 nextchr = UCHARAT(++locinput);
bbce6d69 1780 break;
d6a28714 1781 case ALNUMLUTF8:
3280af22 1782 PL_reg_flags |= RF_tainted;
bbce6d69 1783 /* FALL THROUGH */
d6a28714 1784 case ALNUMUTF8:
b8c5462f 1785 if (!nextchr)
1786 sayNO;
1787 if (nextchr & 0x80) {
d6a28714 1788 if (!(OP(scan) == ALNUMUTF8
1789 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
1790 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f 1791 {
1792 sayNO;
a0ed51b3 1793 }
b8c5462f 1794 locinput += PL_utf8skip[nextchr];
a0ed51b3 1795 nextchr = UCHARAT(locinput);
1796 break;
1797 }
d6a28714 1798 if (!(OP(scan) == ALNUMUTF8
1799 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 1800 sayNO;
b8c5462f 1801 nextchr = UCHARAT(++locinput);
a0d0e21e 1802 break;
d6a28714 1803 case NALNUML:
b8c5462f 1804 PL_reg_flags |= RF_tainted;
1805 /* FALL THROUGH */
d6a28714 1806 case NALNUM:
1807 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 1808 sayNO;
d6a28714 1809 if (OP(scan) == NALNUM
1810 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
a0ed51b3 1811 sayNO;
b8c5462f 1812 nextchr = UCHARAT(++locinput);
a0ed51b3 1813 break;
d6a28714 1814 case NALNUMLUTF8:
b8c5462f 1815 PL_reg_flags |= RF_tainted;
1816 /* FALL THROUGH */
d6a28714 1817 case NALNUMUTF8:
3280af22 1818 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1819 sayNO;
b8c5462f 1820 if (nextchr & 0x80) {
d6a28714 1821 if (OP(scan) == NALNUMUTF8
1822 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
1823 : isALNUM_LC_utf8((U8*)locinput))
1824 {
b8c5462f 1825 sayNO;
d6a28714 1826 }
b8c5462f 1827 locinput += PL_utf8skip[nextchr];
1828 nextchr = UCHARAT(locinput);
1829 break;
1830 }
d6a28714 1831 if (OP(scan) == NALNUMUTF8
1832 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 1833 sayNO;
76e3520e 1834 nextchr = UCHARAT(++locinput);
a0d0e21e 1835 break;
d6a28714 1836 case BOUNDL:
1837 case NBOUNDL:
3280af22 1838 PL_reg_flags |= RF_tainted;
bbce6d69 1839 /* FALL THROUGH */
d6a28714 1840 case BOUND:
1841 case NBOUND:
1842 /* was last char in word? */
1843 ln = (locinput != PL_regbol) ? UCHARAT(locinput - 1) : PL_regprev;
1844 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
1845 ln = isALNUM(ln);
1846 n = isALNUM(nextchr);
1847 }
1848 else {
1849 ln = isALNUM_LC(ln);
1850 n = isALNUM_LC(nextchr);
1851 }
1852 if (((!ln) == (!n)) == (OP(scan) == BOUND || OP(scan) == BOUNDL))
4633a7c4 1853 sayNO;
a0d0e21e 1854 break;
d6a28714 1855 case BOUNDLUTF8:
1856 case NBOUNDLUTF8:
a0ed51b3 1857 PL_reg_flags |= RF_tainted;
1858 /* FALL THROUGH */
d6a28714 1859 case BOUNDUTF8:
1860 case NBOUNDUTF8:
1861 /* was last char in word? */
1862 ln = (locinput != PL_regbol)
1863 ? utf8_to_uv(reghop((U8*)locinput, -1), 0) : PL_regprev;
1864 if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) {
1865 ln = isALNUM_uni(ln);
1866 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
a0ed51b3 1867 }
d6a28714 1868 else {
1869 ln = isALNUM_LC_uni(ln);
1870 n = isALNUM_LC_utf8((U8*)locinput);
1871 }
1872 if (((!ln) == (!n)) == (OP(scan) == BOUNDUTF8 || OP(scan) == BOUNDLUTF8))
a0ed51b3 1873 sayNO;
a0ed51b3 1874 break;
d6a28714 1875 case SPACEL:
3280af22 1876 PL_reg_flags |= RF_tainted;
bbce6d69 1877 /* FALL THROUGH */
d6a28714 1878 case SPACE:
1879 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1880 sayNO;
d6a28714 1881 if (!(OP(scan) == SPACE
1882 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
4633a7c4 1883 sayNO;
76e3520e 1884 nextchr = UCHARAT(++locinput);
a0d0e21e 1885 break;
d6a28714 1886 case SPACELUTF8:
a0ed51b3 1887 PL_reg_flags |= RF_tainted;
1888 /* FALL THROUGH */
d6a28714 1889 case SPACEUTF8:
a0ed51b3 1890 if (!nextchr && locinput >= PL_regeol)
1891 sayNO;
1892 if (nextchr & 0x80) {
d6a28714 1893 if (!(OP(scan) == SPACEUTF8
1894 ? swash_fetch(PL_utf8_space,(U8*)locinput)
1895 : isSPACE_LC_utf8((U8*)locinput)))
1896 {
a0ed51b3 1897 sayNO;
d6a28714 1898 }
6f06b55f 1899 locinput += PL_utf8skip[nextchr];
a0ed51b3 1900 nextchr = UCHARAT(locinput);
1901 break;
1902 }
d6a28714 1903 if (!(OP(scan) == SPACEUTF8
1904 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
a0ed51b3 1905 sayNO;
1906 nextchr = UCHARAT(++locinput);
1907 break;
d6a28714 1908 case NSPACEL:
3280af22 1909 PL_reg_flags |= RF_tainted;
bbce6d69 1910 /* FALL THROUGH */
d6a28714 1911 case NSPACE:
b8c5462f 1912 if (!nextchr)
1913 sayNO;
d6a28714 1914 if (OP(scan) == SPACE
1915 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 1916 sayNO;
b8c5462f 1917 nextchr = UCHARAT(++locinput);
a0d0e21e 1918 break;
d6a28714 1919 case NSPACELUTF8:
a0ed51b3 1920 PL_reg_flags |= RF_tainted;
1921 /* FALL THROUGH */
d6a28714 1922 case NSPACEUTF8:
b8c5462f 1923 if (!nextchr)
1924 sayNO;
1925 if (nextchr & 0x80) {
d6a28714 1926 if (OP(scan) == NSPACEUTF8
1927 ? swash_fetch(PL_utf8_space,(U8*)locinput)
1928 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f 1929 {
1930 sayNO;
1931 }
1932 locinput += PL_utf8skip[nextchr];
1933 nextchr = UCHARAT(locinput);
1934 break;
a0ed51b3 1935 }
d6a28714 1936 if (OP(scan) == NSPACEUTF8
1937 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 1938 sayNO;
76e3520e 1939 nextchr = UCHARAT(++locinput);
a0d0e21e 1940 break;
d6a28714 1941 case DIGITL:
a0ed51b3 1942 PL_reg_flags |= RF_tainted;
1943 /* FALL THROUGH */
d6a28714 1944 case DIGIT:
a0ed51b3 1945 if (!nextchr && locinput >= PL_regeol)
1946 sayNO;
d6a28714 1947 if (!(OP(scan) == DIGIT
1948 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
4633a7c4 1949 sayNO;
76e3520e 1950 nextchr = UCHARAT(++locinput);
a0d0e21e 1951 break;
d6a28714 1952 case DIGITLUTF8:
a0ed51b3 1953 PL_reg_flags |= RF_tainted;
1954 /* FALL THROUGH */
d6a28714 1955 case DIGITUTF8:
a0ed51b3 1956 if (!nextchr)
1957 sayNO;
1958 if (nextchr & 0x80) {
d6a28714 1959 if (OP(scan) == NDIGITUTF8
1960 ? swash_fetch(PL_utf8_digit,(U8*)locinput)
1961 : isDIGIT_LC_utf8((U8*)locinput))
dfe13c55 1962 {
a0ed51b3 1963 sayNO;
dfe13c55 1964 }
6f06b55f 1965 locinput += PL_utf8skip[nextchr];
a0ed51b3 1966 nextchr = UCHARAT(locinput);
1967 break;
1968 }
d6a28714 1969 if (!isDIGIT(nextchr))
a0ed51b3 1970 sayNO;
1971 nextchr = UCHARAT(++locinput);
1972 break;
d6a28714 1973 case NDIGITL:
b8c5462f 1974 PL_reg_flags |= RF_tainted;
1975 /* FALL THROUGH */
d6a28714 1976 case NDIGIT:
b8c5462f 1977 if (!nextchr)
1978 sayNO;
d6a28714 1979 if (OP(scan) == DIGIT
1980 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
4633a7c4 1981 sayNO;
76e3520e 1982 nextchr = UCHARAT(++locinput);
a0d0e21e 1983 break;
d6a28714 1984 case NDIGITLUTF8:
b8c5462f 1985 PL_reg_flags |= RF_tainted;
1986 /* FALL THROUGH */
d6a28714 1987 case NDIGITUTF8:
b8c5462f 1988 if (!nextchr && locinput >= PL_regeol)
1989 sayNO;
a0ed51b3 1990 if (nextchr & 0x80) {
d6a28714 1991 if (swash_fetch(PL_utf8_digit,(U8*)locinput))
a0ed51b3 1992 sayNO;
6f06b55f 1993 locinput += PL_utf8skip[nextchr];
a0ed51b3 1994 nextchr = UCHARAT(locinput);
1995 break;
1996 }
d6a28714 1997 if (isDIGIT(nextchr))
a0ed51b3 1998 sayNO;
1999 nextchr = UCHARAT(++locinput);
2000 break;
2001 case CLUMP:
dfe13c55 2002 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3 2003 sayNO;
6f06b55f 2004 locinput += PL_utf8skip[nextchr];
dfe13c55 2005 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3 2006 locinput += UTF8SKIP(locinput);
2007 if (locinput > PL_regeol)
2008 sayNO;
2009 nextchr = UCHARAT(locinput);
2010 break;
c8756f30 2011 case REFFL:
3280af22 2012 PL_reg_flags |= RF_tainted;
c8756f30 2013 /* FALL THROUGH */
c277df42 2014 case REF:
c8756f30 2015 case REFF:
c277df42 2016 n = ARG(scan); /* which paren pair */
cf93c79d 2017 ln = PL_regstartp[n];
2c2d71f5 2018 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2019 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2020 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2021 if (ln == PL_regendp[n])
a0d0e21e 2022 break;
a0ed51b3 2023
cf93c79d 2024 s = PL_bostr + ln;
a0ed51b3 2025 if (UTF && OP(scan) != REF) { /* REF can do byte comparison */
2026 char *l = locinput;
cf93c79d 2027 char *e = PL_bostr + PL_regendp[n];
a0ed51b3 2028 /*
2029 * Note that we can't do the "other character" lookup trick as
2030 * in the 8-bit case (no pun intended) because in Unicode we
2031 * have to map both upper and title case to lower case.
2032 */
2033 if (OP(scan) == REFF) {
2034 while (s < e) {
2035 if (l >= PL_regeol)
2036 sayNO;
dfe13c55 2037 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
a0ed51b3 2038 sayNO;
2039 s += UTF8SKIP(s);
2040 l += UTF8SKIP(l);
2041 }
2042 }
2043 else {
2044 while (s < e) {
2045 if (l >= PL_regeol)
2046 sayNO;
dfe13c55 2047 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
a0ed51b3 2048 sayNO;
2049 s += UTF8SKIP(s);
2050 l += UTF8SKIP(l);
2051 }
2052 }
2053 locinput = l;
2054 nextchr = UCHARAT(locinput);
2055 break;
2056 }
2057
a0d0e21e 2058 /* Inline the first character, for speed. */
76e3520e 2059 if (UCHARAT(s) != nextchr &&
c8756f30 2060 (OP(scan) == REF ||
2061 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2062 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2063 sayNO;
cf93c79d 2064 ln = PL_regendp[n] - ln;
3280af22 2065 if (locinput + ln > PL_regeol)
4633a7c4 2066 sayNO;
c8756f30 2067 if (ln > 1 && (OP(scan) == REF
2068 ? memNE(s, locinput, ln)
2069 : (OP(scan) == REFF
2070 ? ibcmp(s, locinput, ln)
2071 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2072 sayNO;
a0d0e21e 2073 locinput += ln;
76e3520e 2074 nextchr = UCHARAT(locinput);
a0d0e21e 2075 break;
2076
2077 case NOTHING:
c277df42 2078 case TAIL:
a0d0e21e 2079 break;
2080 case BACK:
2081 break;
c277df42 2082 case EVAL:
2083 {
2084 dSP;
533c011a 2085 OP_4tree *oop = PL_op;
3280af22 2086 COP *ocurcop = PL_curcop;
2087 SV **ocurpad = PL_curpad;
c277df42 2088 SV *ret;
2089
2090 n = ARG(scan);
533c011a 2091 PL_op = (OP_4tree*)PL_regdata->data[n];
2092 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%x\n", PL_op) );
dfad63ad 2093 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2094 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2095
cea2e8a9 2096 CALLRUNOPS(aTHX); /* Scalar context. */
c277df42 2097 SPAGAIN;
2098 ret = POPs;
2099 PUTBACK;
2100
0f5d15d6 2101 PL_op = oop;
2102 PL_curpad = ocurpad;
2103 PL_curcop = ocurcop;
c277df42 2104 if (logical) {
0f5d15d6 2105 if (logical == 2) { /* Postponed subexpression. */
2106 regexp *re;
22c35a8c 2107 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2108 re_cc_state state;
0f5d15d6 2109 CHECKPOINT cp, lastcp;
2110
2111 if(SvROK(ret) || SvRMAGICAL(ret)) {
2112 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2113
2114 if(SvMAGICAL(sv))
2115 mg = mg_find(sv, 'r');
2116 }
2117 if (mg) {
2118 re = (regexp *)mg->mg_obj;
df0003d4 2119 (void)ReREFCNT_inc(re);
0f5d15d6 2120 }
2121 else {
2122 STRLEN len;
2123 char *t = SvPV(ret, len);
2124 PMOP pm;
2125 char *oprecomp = PL_regprecomp;
2126 I32 osize = PL_regsize;
2127 I32 onpar = PL_regnpar;
2128
2129 pm.op_pmflags = 0;
cea2e8a9 2130 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
0f5d15d6 2131 if (!(SvFLAGS(ret)
2132 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
2133 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
2134 PL_regprecomp = oprecomp;
2135 PL_regsize = osize;
2136 PL_regnpar = onpar;
2137 }
2138 DEBUG_r(
2139 PerlIO_printf(Perl_debug_log,
2140 "Entering embedded `%s%.60s%s%s'\n",
2141 PL_colors[0],
2142 re->precomp,
2143 PL_colors[1],
2144 (strlen(re->precomp) > 60 ? "..." : ""))
2145 );
2146 state.node = next;
2147 state.prev = PL_reg_call_cc;
2148 state.cc = PL_regcc;
2149 state.re = PL_reg_re;
2150
2ab05381 2151 PL_regcc = 0;
0f5d15d6 2152
2153 cp = regcppush(0); /* Save *all* the positions. */
2154 REGCP_SET;
2155 cache_re(re);
2156 state.ss = PL_savestack_ix;
2157 *PL_reglastparen = 0;
2158 PL_reg_call_cc = &state;
2159 PL_reginput = locinput;
2c2d71f5 2160
2161 /* XXXX This is too dramatic a measure... */
2162 PL_reg_maxiter = 0;
2163
0f5d15d6 2164 if (regmatch(re->program + 1)) {
2c914db6 2165 /* Even though we succeeded, we need to restore
2166 global variables, since we may be wrapped inside
2167 SUSPEND, thus the match may be not finished yet. */
2168
2169 /* XXXX Do this only if SUSPENDed? */
2170 PL_reg_call_cc = state.prev;
2171 PL_regcc = state.cc;
2172 PL_reg_re = state.re;
2173 cache_re(PL_reg_re);
2174
2175 /* XXXX This is too dramatic a measure... */
2176 PL_reg_maxiter = 0;
2177
2178 /* These are needed even if not SUSPEND. */
0f5d15d6 2179 ReREFCNT_dec(re);
2180 regcpblow(cp);
2181 sayYES;
2182 }
2183 DEBUG_r(
2184 PerlIO_printf(Perl_debug_log,
2185 "%*s failed...\n",
2186 REPORT_CODE_OFF+PL_regindent*2, "")
2187 );
2188 ReREFCNT_dec(re);
2189 REGCP_UNWIND;
2190 regcppop();
2191 PL_reg_call_cc = state.prev;
2192 PL_regcc = state.cc;
2193 PL_reg_re = state.re;
d3790889 2194 cache_re(PL_reg_re);
2c2d71f5 2195
2196 /* XXXX This is too dramatic a measure... */
2197 PL_reg_maxiter = 0;
2198
0f5d15d6 2199 sayNO;
2200 }
c277df42 2201 sw = SvTRUE(ret);
0f5d15d6 2202 logical = 0;
a0ed51b3 2203 }
2204 else
3280af22 2205 sv_setsv(save_scalar(PL_replgv), ret);
c277df42 2206 break;
2207 }
a0d0e21e 2208 case OPEN:
c277df42 2209 n = ARG(scan); /* which paren pair */
3280af22 2210 PL_reg_start_tmp[n] = locinput;
2211 if (n > PL_regsize)
2212 PL_regsize = n;
a0d0e21e 2213 break;
2214 case CLOSE:
c277df42 2215 n = ARG(scan); /* which paren pair */
cf93c79d 2216 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2217 PL_regendp[n] = locinput - PL_bostr;
3280af22 2218 if (n > *PL_reglastparen)
2219 *PL_reglastparen = n;
a0d0e21e 2220 break;
c277df42 2221 case GROUPP:
2222 n = ARG(scan); /* which paren pair */
cf93c79d 2223 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42 2224 break;
2225 case IFTHEN:
2c2d71f5 2226 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42 2227 if (sw)
2228 next = NEXTOPER(NEXTOPER(scan));
2229 else {
2230 next = scan + ARG(scan);
2231 if (OP(next) == IFTHEN) /* Fake one. */
2232 next = NEXTOPER(NEXTOPER(next));
2233 }
2234 break;
2235 case LOGICAL:
0f5d15d6 2236 logical = scan->flags;
c277df42 2237 break;
2ab05381 2238/*******************************************************************
2239 PL_regcc contains infoblock about the innermost (...)* loop, and
2240 a pointer to the next outer infoblock.
2241
2242 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2243
2244 1) After matching X, regnode for CURLYX is processed;
2245
2246 2) This regnode creates infoblock on the stack, and calls
2247 regmatch() recursively with the starting point at WHILEM node;
2248
2249 3) Each hit of WHILEM node tries to match A and Z (in the order
2250 depending on the current iteration, min/max of {min,max} and
2251 greediness). The information about where are nodes for "A"
2252 and "Z" is read from the infoblock, as is info on how many times "A"
2253 was already matched, and greediness.
2254
2255 4) After A matches, the same WHILEM node is hit again.
2256
2257 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2258 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2259 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2260 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2261 of the external loop.
2262
2263 Currently present infoblocks form a tree with a stem formed by PL_curcc
2264 and whatever it mentions via ->next, and additional attached trees
2265 corresponding to temporarily unset infoblocks as in "5" above.
2266
2267 In the following picture infoblocks for outer loop of
2268 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2269 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2270 infoblocks are drawn below the "reset" infoblock.
2271
2272 In fact in the picture below we do not show failed matches for Z and T
2273 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2274 more obvious *why* one needs to *temporary* unset infoblocks.]
2275
2276 Matched REx position InfoBlocks Comment
2277 (Y(A)*?Z)*?T x
2278 Y(A)*?Z)*?T x <- O
2279 Y (A)*?Z)*?T x <- O
2280 Y A)*?Z)*?T x <- O <- I
2281 YA )*?Z)*?T x <- O <- I
2282 YA A)*?Z)*?T x <- O <- I
2283 YAA )*?Z)*?T x <- O <- I
2284 YAA Z)*?T x <- O # Temporary unset I
2285 I
2286
2287 YAAZ Y(A)*?Z)*?T x <- O
2288 I
2289
2290 YAAZY (A)*?Z)*?T x <- O
2291 I
2292
2293 YAAZY A)*?Z)*?T x <- O <- I
2294 I
2295
2296 YAAZYA )*?Z)*?T x <- O <- I
2297 I
2298
2299 YAAZYA Z)*?T x <- O # Temporary unset I
2300 I,I
2301
2302 YAAZYAZ )*?T x <- O
2303 I,I
2304
2305 YAAZYAZ T x # Temporary unset O
2306 O
2307 I,I
2308
2309 YAAZYAZT x
2310 O
2311 I,I
2312 *******************************************************************/
a0d0e21e 2313 case CURLYX: {
2314 CURCUR cc;
3280af22 2315 CHECKPOINT cp = PL_savestack_ix;
c277df42 2316
2317 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2318 next += ARG(next);
3280af22 2319 cc.oldcc = PL_regcc;
2320 PL_regcc = &cc;
2321 cc.parenfloor = *PL_reglastparen;
a0d0e21e 2322 cc.cur = -1;
2323 cc.min = ARG1(scan);
2324 cc.max = ARG2(scan);
c277df42 2325 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e 2326 cc.next = next;
2327 cc.minmod = minmod;
2328 cc.lastloc = 0;
3280af22 2329 PL_reginput = locinput;
a0d0e21e 2330 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2331 regcpblow(cp);
3280af22 2332 PL_regcc = cc.oldcc;
4633a7c4 2333 saySAME(n);
a0d0e21e 2334 }
2335 /* NOT REACHED */
2336 case WHILEM: {
2337 /*
2338 * This is really hard to understand, because after we match
2339 * what we're trying to match, we must make sure the rest of
2c2d71f5 2340 * the REx is going to match for sure, and to do that we have
a0d0e21e 2341 * to go back UP the parse tree by recursing ever deeper. And
2342 * if it fails, we have to reset our parent's current state
2343 * that we can try again after backing off.
2344 */
2345
c277df42 2346 CHECKPOINT cp, lastcp;
3280af22 2347 CURCUR* cc = PL_regcc;
c277df42 2348 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2349
4633a7c4 2350 n = cc->cur + 1; /* how many we know we matched */
3280af22 2351 PL_reginput = locinput;
a0d0e21e 2352
c277df42 2353 DEBUG_r(
2354 PerlIO_printf(Perl_debug_log,
2355 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2356 REPORT_CODE_OFF+PL_regindent*2, "",
c277df42 2357 (long)n, (long)cc->min,
2358 (long)cc->max, (long)cc)
2359 );
4633a7c4 2360
a0d0e21e 2361 /* If degenerate scan matches "", assume scan done. */
2362
579cf2c3 2363 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2364 PL_regcc = cc->oldcc;
2ab05381 2365 if (PL_regcc)
2366 ln = PL_regcc->cur;
c277df42 2367 DEBUG_r(
c3464db5 2368 PerlIO_printf(Perl_debug_log,
2369 "%*s empty match detected, try continuation...\n",
3280af22 2370 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2371 );
a0d0e21e 2372 if (regmatch(cc->next))
4633a7c4 2373 sayYES;
c277df42 2374 DEBUG_r(
c3464db5 2375 PerlIO_printf(Perl_debug_log,
2376 "%*s failed...\n",
3280af22 2377 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2378 );
2ab05381 2379 if (PL_regcc)
2380 PL_regcc->cur = ln;
3280af22 2381 PL_regcc = cc;
4633a7c4 2382 sayNO;
a0d0e21e 2383 }
2384
2385 /* First just match a string of min scans. */
2386
2387 if (n < cc->min) {
2388 cc->cur = n;
2389 cc->lastloc = locinput;
4633a7c4 2390 if (regmatch(cc->scan))
2391 sayYES;
2392 cc->cur = n - 1;
c277df42 2393 cc->lastloc = lastloc;
2394 DEBUG_r(
c3464db5 2395 PerlIO_printf(Perl_debug_log,
2396 "%*s failed...\n",
3280af22 2397 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2398 );
4633a7c4 2399 sayNO;
a0d0e21e 2400 }
2401
2c2d71f5 2402 if (scan->flags) {
2403 /* Check whether we already were at this position.
2404 Postpone detection until we know the match is not
2405 *that* much linear. */
2406 if (!PL_reg_maxiter) {
2407 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2408 PL_reg_leftiter = PL_reg_maxiter;
2409 }
2410 if (PL_reg_leftiter-- == 0) {
2411 I32 size = (PL_reg_maxiter + 7)/8;
2412 if (PL_reg_poscache) {
2413 if (PL_reg_poscache_size < size) {
2414 Renew(PL_reg_poscache, size, char);
2415 PL_reg_poscache_size = size;
2416 }
2417 Zero(PL_reg_poscache, size, char);
2418 }
2419 else {
2420 PL_reg_poscache_size = size;
2421 Newz(29, PL_reg_poscache, size, char);
2422 }
2423 DEBUG_r(
2424 PerlIO_printf(Perl_debug_log,
2425 "%sDetected a super-linear match, switching on caching%s...\n",
2426 PL_colors[4], PL_colors[5])
2427 );
2428 }
2429 if (PL_reg_leftiter < 0) {
2430 I32 o = locinput - PL_bostr, b;
2431
2432 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2433 b = o % 8;
2434 o /= 8;
2435 if (PL_reg_poscache[o] & (1<<b)) {
2436 DEBUG_r(
2437 PerlIO_printf(Perl_debug_log,
2438 "%*s already tried at this position...\n",
2439 REPORT_CODE_OFF+PL_regindent*2, "")
2440 );
2441 sayNO;
2442 }
2443 PL_reg_poscache[o] |= (1<<b);
2444 }
2445 }
2446
a0d0e21e 2447 /* Prefer next over scan for minimal matching. */
2448
2449 if (cc->minmod) {
3280af22 2450 PL_regcc = cc->oldcc;
2ab05381 2451 if (PL_regcc)
2452 ln = PL_regcc->cur;
5f05dabc 2453 cp = regcppush(cc->parenfloor);
c277df42 2454 REGCP_SET;
5f05dabc 2455 if (regmatch(cc->next)) {
c277df42 2456 regcpblow(cp);
4633a7c4 2457 sayYES; /* All done. */
5f05dabc 2458 }
c277df42 2459 REGCP_UNWIND;
5f05dabc 2460 regcppop();
2ab05381 2461 if (PL_regcc)
2462 PL_regcc->cur = ln;
3280af22 2463 PL_regcc = cc;
a0d0e21e 2464
c277df42 2465 if (n >= cc->max) { /* Maximum greed exceeded? */
599cee73 2466 if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY
3280af22 2467 && !(PL_reg_flags & RF_warned)) {
2468 PL_reg_flags |= RF_warned;
cea2e8a9 2469 Perl_warner(aTHX_ WARN_UNSAFE, "%s limit (%d) exceeded",
2f3ca594 2470 "Complex regular subexpression recursion",
2471 REG_INFTY - 1);
c277df42 2472 }
4633a7c4 2473 sayNO;
c277df42 2474 }
a687059c 2475
c277df42 2476 DEBUG_r(
c3464db5 2477 PerlIO_printf(Perl_debug_log,
2478 "%*s trying longer...\n",
3280af22 2479 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2480 );
a0d0e21e 2481 /* Try scanning more and see if it helps. */
3280af22 2482 PL_reginput = locinput;
a0d0e21e 2483 cc->cur = n;
2484 cc->lastloc = locinput;
5f05dabc 2485 cp = regcppush(cc->parenfloor);
c277df42 2486 REGCP_SET;
5f05dabc 2487 if (regmatch(cc->scan)) {
c277df42 2488 regcpblow(cp);
4633a7c4 2489 sayYES;
5f05dabc 2490 }
c277df42 2491 DEBUG_r(
c3464db5 2492 PerlIO_printf(Perl_debug_log,
2493 "%*s failed...\n",
3280af22 2494 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2495 );
2496 REGCP_UNWIND;
5f05dabc 2497 regcppop();
4633a7c4 2498 cc->cur = n - 1;
c277df42 2499 cc->lastloc = lastloc;
4633a7c4 2500 sayNO;
a0d0e21e 2501 }
2502
2503 /* Prefer scan over next for maximal matching. */
2504
2505 if (n < cc->max) { /* More greed allowed? */
5f05dabc 2506 cp = regcppush(cc->parenfloor);
a0d0e21e 2507 cc->cur = n;
2508 cc->lastloc = locinput;
c277df42 2509 REGCP_SET;
5f05dabc 2510 if (regmatch(cc->scan)) {
c277df42 2511 regcpblow(cp);
4633a7c4 2512 sayYES;
5f05dabc 2513 }
c277df42 2514 REGCP_UNWIND;
a0d0e21e 2515 regcppop(); /* Restore some previous $<digit>s? */
3280af22 2516 PL_reginput = locinput;
c277df42 2517 DEBUG_r(
c3464db5 2518 PerlIO_printf(Perl_debug_log,
2519 "%*s failed, try continuation...\n",
3280af22 2520 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2521 );
2522 }
599cee73 2523 if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY
2524 && !(PL_reg_flags & RF_warned)) {
3280af22 2525 PL_reg_flags |= RF_warned;
cea2e8a9 2526 Perl_warner(aTHX_ WARN_UNSAFE, "%s limit (%d) exceeded",
cb5d145d 2527 "Complex regular subexpression recursion",
2528 REG_INFTY - 1);
a0d0e21e 2529 }
2530
2531 /* Failed deeper matches of scan, so see if this one works. */
3280af22 2532 PL_regcc = cc->oldcc;
2ab05381 2533 if (PL_regcc)
2534 ln = PL_regcc->cur;
a0d0e21e 2535 if (regmatch(cc->next))
4633a7c4 2536 sayYES;
c277df42 2537 DEBUG_r(
c3464db5 2538 PerlIO_printf(Perl_debug_log, "%*s failed...\n",
3280af22 2539 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2540 );
2ab05381 2541 if (PL_regcc)
2542 PL_regcc->cur = ln;
3280af22 2543 PL_regcc = cc;
4633a7c4 2544 cc->cur = n - 1;
c277df42 2545 cc->lastloc = lastloc;
4633a7c4 2546 sayNO;
a0d0e21e 2547 }
2548 /* NOT REACHED */
c277df42 2549 case BRANCHJ:
2550 next = scan + ARG(scan);
2551 if (next == scan)
2552 next = NULL;
2553 inner = NEXTOPER(NEXTOPER(scan));
2554 goto do_branch;
2555 case BRANCH:
2556 inner = NEXTOPER(scan);
2557 do_branch:
2558 {
2559 CHECKPOINT lastcp;
2560 c1 = OP(scan);
2561 if (OP(next) != c1) /* No choice. */
2562 next = inner; /* Avoid recursion. */
a0d0e21e 2563 else {
3280af22 2564 int lastparen = *PL_reglastparen;
c277df42 2565
2566 REGCP_SET;
a0d0e21e 2567 do {
3280af22 2568 PL_reginput = locinput;
c277df42 2569 if (regmatch(inner))
4633a7c4 2570 sayYES;
c277df42 2571 REGCP_UNWIND;
3280af22 2572 for (n = *PL_reglastparen; n > lastparen; n--)
cf93c79d 2573 PL_regendp[n] = -1;
3280af22 2574 *PL_reglastparen = n;
c277df42 2575 scan = next;
a0d0e21e 2576 /*SUPPRESS 560*/
c277df42 2577 if (n = (c1 == BRANCH ? NEXT_OFF(next) : ARG(next)))
2578 next += n;
a0d0e21e 2579 else
c277df42 2580 next = NULL;
c277df42 2581 inner = NEXTOPER(scan);
2582 if (c1 == BRANCHJ) {
2583 inner = NEXTOPER(inner);
2584 }
2585 } while (scan != NULL && OP(scan) == c1);
4633a7c4 2586 sayNO;
a0d0e21e 2587 /* NOTREACHED */
a687059c 2588 }
a0d0e21e 2589 }
2590 break;
2591 case MINMOD:
2592 minmod = 1;
2593 break;
c277df42 2594 case CURLYM:
2595 {
00db4c45 2596 I32 l = 0;
c277df42 2597 CHECKPOINT lastcp;
2598
2599 /* We suppose that the next guy does not need
2600 backtracking: in particular, it is of constant length,
2601 and has no parenths to influence future backrefs. */
2602 ln = ARG1(scan); /* min to match */
2603 n = ARG2(scan); /* max to match */
c277df42 2604 paren = scan->flags;
2605 if (paren) {
3280af22 2606 if (paren > PL_regsize)
2607 PL_regsize = paren;
2608 if (paren > *PL_reglastparen)
2609 *PL_reglastparen = paren;
c277df42 2610 }
dc45a647 2611 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42 2612 if (paren)
2613 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 2614 PL_reginput = locinput;
c277df42 2615 if (minmod) {
2616 minmod = 0;
2617 if (ln && regrepeat_hard(scan, ln, &l) < ln)
2618 sayNO;
5f4b28b2 2619 if (ln && l == 0 && n >= ln
c277df42 2620 /* In fact, this is tricky. If paren, then the
2621 fact that we did/didnot match may influence
2622 future execution. */
2623 && !(paren && ln == 0))
2624 ln = n;
3280af22 2625 locinput = PL_reginput;
22c35a8c 2626 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2627 c1 = (U8)*STRING(next);
c277df42 2628 if (OP(next) == EXACTF)
22c35a8c 2629 c2 = PL_fold[c1];
c277df42 2630 else if (OP(next) == EXACTFL)
22c35a8c 2631 c2 = PL_fold_locale[c1];
c277df42 2632 else
2633 c2 = c1;
a0ed51b3 2634 }
2635 else
c277df42 2636 c1 = c2 = -1000;
2637 REGCP_SET;
5f4b28b2 2638 /* This may be improved if l == 0. */
c277df42 2639 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
2640 /* If it could work, try it. */
2641 if (c1 == -1000 ||
3280af22 2642 UCHARAT(PL_reginput) == c1 ||
2643 UCHARAT(PL_reginput) == c2)
c277df42 2644 {
2645 if (paren) {
2646 if (n) {
cf93c79d 2647 PL_regstartp[paren] =
2648 HOPc(PL_reginput, -l) - PL_bostr;
2649 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3 2650 }
2651 else
cf93c79d 2652 PL_regendp[paren] = -1;
c277df42 2653 }
2654 if (regmatch(next))
2655 sayYES;
2656 REGCP_UNWIND;
2657 }
2658 /* Couldn't or didn't -- move forward. */
3280af22 2659 PL_reginput = locinput;
c277df42 2660 if (regrepeat_hard(scan, 1, &l)) {
2661 ln++;
3280af22 2662 locinput = PL_reginput;
c277df42 2663 }
2664 else
2665 sayNO;
2666 }
a0ed51b3 2667 }
2668 else {
c277df42 2669 n = regrepeat_hard(scan, n, &l);
2670 if (n != 0 && l == 0
2671 /* In fact, this is tricky. If paren, then the
2672 fact that we did/didnot match may influence
2673 future execution. */
2674 && !(paren && ln == 0))
2675 ln = n;
3280af22 2676 locinput = PL_reginput;
c277df42 2677 DEBUG_r(
5c0ca799 2678 PerlIO_printf(Perl_debug_log,
2679 "%*s matched %ld times, len=%ld...\n",
3280af22 2680 REPORT_CODE_OFF+PL_regindent*2, "", n, l)
c277df42 2681 );
2682 if (n >= ln) {
22c35a8c 2683 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2684 c1 = (U8)*STRING(next);
c277df42 2685 if (OP(next) == EXACTF)
22c35a8c 2686 c2 = PL_fold[c1];
c277df42 2687 else if (OP(next) == EXACTFL)
22c35a8c 2688 c2 = PL_fold_locale[c1];
c277df42 2689 else
2690 c2 = c1;
a0ed51b3 2691 }
2692 else
c277df42 2693 c1 = c2 = -1000;
2694 }
2695 REGCP_SET;
2696 while (n >= ln) {
2697 /* If it could work, try it. */
2698 if (c1 == -1000 ||
3280af22 2699 UCHARAT(PL_reginput) == c1 ||
2700 UCHARAT(PL_reginput) == c2)
a0ed51b3 2701 {
2702 DEBUG_r(
c3464db5 2703 PerlIO_printf(Perl_debug_log,
2704 "%*s trying tail with n=%ld...\n",
3280af22 2705 REPORT_CODE_OFF+PL_regindent*2, "", n)
a0ed51b3 2706 );
2707 if (paren) {
2708 if (n) {
cf93c79d 2709 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
2710 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 2711 }
a0ed51b3 2712 else
cf93c79d 2713 PL_regendp[paren] = -1;
c277df42 2714 }
a0ed51b3 2715 if (regmatch(next))
2716 sayYES;
2717 REGCP_UNWIND;
2718 }
c277df42 2719 /* Couldn't or didn't -- back up. */
2720 n--;
dfe13c55 2721 locinput = HOPc(locinput, -l);
3280af22 2722 PL_reginput = locinput;
c277df42 2723 }
2724 }
2725 sayNO;
2726 break;
2727 }
2728 case CURLYN:
2729 paren = scan->flags; /* Which paren to set */
3280af22 2730 if (paren > PL_regsize)
2731 PL_regsize = paren;
2732 if (paren > *PL_reglastparen)
2733 *PL_reglastparen = paren;
c277df42 2734 ln = ARG1(scan); /* min to match */
2735 n = ARG2(scan); /* max to match */
dc45a647 2736 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 2737 goto repeat;
a0d0e21e 2738 case CURLY:
c277df42 2739 paren = 0;
a0d0e21e 2740 ln = ARG1(scan); /* min to match */
2741 n = ARG2(scan); /* max to match */
dc45a647 2742 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e 2743 goto repeat;
2744 case STAR:
2745 ln = 0;
c277df42 2746 n = REG_INFTY;
a0d0e21e 2747 scan = NEXTOPER(scan);
c277df42 2748 paren = 0;
a0d0e21e 2749 goto repeat;
2750 case PLUS:
c277df42 2751 ln = 1;
2752 n = REG_INFTY;
2753 scan = NEXTOPER(scan);
2754 paren = 0;
2755 repeat:
a0d0e21e 2756 /*
2757 * Lookahead to avoid useless match attempts
2758 * when we know what character comes next.
2759 */
22c35a8c 2760 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2761 c1 = (U8)*STRING(next);
bbce6d69 2762 if (OP(next) == EXACTF)
22c35a8c 2763 c2 = PL_fold[c1];
bbce6d69 2764 else if (OP(next) == EXACTFL)
22c35a8c 2765 c2 = PL_fold_locale[c1];
bbce6d69 2766 else
2767 c2 = c1;
2768 }
a0d0e21e 2769 else
bbce6d69 2770 c1 = c2 = -1000;
3280af22 2771 PL_reginput = locinput;
a0d0e21e 2772 if (minmod) {
c277df42 2773 CHECKPOINT lastcp;
a0d0e21e 2774 minmod = 0;
2775 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 2776 sayNO;
a0ed51b3 2777 locinput = PL_reginput;
c277df42 2778 REGCP_SET;
0fe9bf95 2779 if (c1 != -1000) {
2780 char *e = locinput + n - ln; /* Should not check after this */
2781 char *old = locinput;
2782
2783 if (e >= PL_regeol || (n == REG_INFTY))
2784 e = PL_regeol - 1;
2785 while (1) {
2786 /* Find place 'next' could work */
2787 if (c1 == c2) {
2788 while (locinput <= e && *locinput != c1)
2789 locinput++;
2790 } else {
2791 while (locinput <= e
2792 && *locinput != c1
2793 && *locinput != c2)
2794 locinput++;
2795 }
2796 if (locinput > e)
2797 sayNO;
2798 /* PL_reginput == old now */
2799 if (locinput != old) {
2800 ln = 1; /* Did some */
2801 if (regrepeat(scan, locinput - old) <
2802 locinput - old)
2803 sayNO;
2804 }
2805 /* PL_reginput == locinput now */
2806 if (paren) {
2807 if (ln) {
cf93c79d 2808 PL_regstartp[paren] = HOPc(locinput, -1) - PL_bostr;
2809 PL_regendp[paren] = locinput - PL_bostr;
0fe9bf95 2810 }
2811 else
cf93c79d 2812 PL_regendp[paren] = -1;
0fe9bf95 2813 }
2814 if (regmatch(next))
2815 sayYES;
2816 PL_reginput = locinput; /* Could be reset... */
2817 REGCP_UNWIND;
2818 /* Couldn't or didn't -- move forward. */
2819 old = locinput++;
2820 }
2821 }
2822 else
c277df42 2823 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
a0d0e21e 2824 /* If it could work, try it. */
bbce6d69 2825 if (c1 == -1000 ||
3280af22 2826 UCHARAT(PL_reginput) == c1 ||
2827 UCHARAT(PL_reginput) == c2)
bbce6d69 2828 {
c277df42 2829 if (paren) {
2830 if (n) {
cf93c79d 2831 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
2832 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3 2833 }
2834 else
cf93c79d 2835 PL_regendp[paren] = -1;
c277df42 2836 }
a0d0e21e 2837 if (regmatch(next))
4633a7c4 2838 sayYES;
c277df42 2839 REGCP_UNWIND;
bbce6d69 2840 }
c277df42 2841 /* Couldn't or didn't -- move forward. */
a0ed51b3 2842 PL_reginput = locinput;
a0d0e21e 2843 if (regrepeat(scan, 1)) {
2844 ln++;
a0ed51b3 2845 locinput = PL_reginput;
2846 }
2847 else
4633a7c4 2848 sayNO;
a0d0e21e 2849 }
2850 }
2851 else {
c277df42 2852 CHECKPOINT lastcp;
a0d0e21e 2853 n = regrepeat(scan, n);
a0ed51b3 2854 locinput = PL_reginput;
22c35a8c 2855 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
3280af22 2856 (!PL_multiline || OP(next) == SEOL))
a0d0e21e 2857 ln = n; /* why back off? */
c277df42 2858 REGCP_SET;
2859 if (paren) {
2860 while (n >= ln) {
2861 /* If it could work, try it. */
2862 if (c1 == -1000 ||
3280af22 2863 UCHARAT(PL_reginput) == c1 ||
2864 UCHARAT(PL_reginput) == c2)
c277df42 2865 {
2866 if (paren && n) {
2867 if (n) {
cf93c79d 2868 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
2869 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3 2870 }
2871 else
cf93c79d 2872 PL_regendp[paren] = -1;
c277df42 2873 }
2874 if (regmatch(next))
2875 sayYES;
2876 REGCP_UNWIND;
2877 }
2878 /* Couldn't or didn't -- back up. */
2879 n--;
dfe13c55 2880 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 2881 }
a0ed51b3 2882 }
2883 else {
c277df42 2884 while (n >= ln) {
2885 /* If it could work, try it. */
2886 if (c1 == -1000 ||
3280af22 2887 UCHARAT(PL_reginput) == c1 ||
2888 UCHARAT(PL_reginput) == c2)
c277df42 2889 {
2890 if (regmatch(next))
2891 sayYES;
2892 REGCP_UNWIND;
2893 }
2894 /* Couldn't or didn't -- back up. */
2895 n--;
dfe13c55 2896 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 2897 }
a0d0e21e 2898 }
2899 }
4633a7c4 2900 sayNO;
c277df42 2901 break;
a0d0e21e 2902 case END:
0f5d15d6 2903 if (PL_reg_call_cc) {
2904 re_cc_state *cur_call_cc = PL_reg_call_cc;
2905 CURCUR *cctmp = PL_regcc;
2906 regexp *re = PL_reg_re;
2907 CHECKPOINT cp, lastcp;
2908
2909 cp = regcppush(0); /* Save *all* the positions. */
2910 REGCP_SET;
2911 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
2912 the caller. */
2913 PL_reginput = locinput; /* Make position available to
2914 the callcc. */
2915 cache_re(PL_reg_call_cc->re);
2916 PL_regcc = PL_reg_call_cc->cc;
2917 PL_reg_call_cc = PL_reg_call_cc->prev;
2918 if (regmatch(cur_call_cc->node)) {
2919 PL_reg_call_cc = cur_call_cc;
2920 regcpblow(cp);
2921 sayYES;
2922 }
2923 REGCP_UNWIND;
2924 regcppop();
2925 PL_reg_call_cc = cur_call_cc;
2926 PL_regcc = cctmp;
2927 PL_reg_re = re;
2928 cache_re(re);
2929
2930 DEBUG_r(
2931 PerlIO_printf(Perl_debug_log,
2932 "%*s continuation failed...\n",
2933 REPORT_CODE_OFF+PL_regindent*2, "")
2934 );
2935 sayNO;
2936 }
3280af22 2937 if (locinput < PL_regtill)
7e5428c5 2938 sayNO; /* Cannot match: too short. */
2939 /* Fall through */
2940 case SUCCEED:
3280af22 2941 PL_reginput = locinput; /* put where regtry can find it */
4633a7c4 2942 sayYES; /* Success! */
c277df42 2943 case SUSPEND:
2944 n = 1;
9fe1d20c 2945 PL_reginput = locinput;
c277df42 2946 goto do_ifmatch;
a0d0e21e 2947 case UNLESSM:
c277df42 2948 n = 0;
a0ed51b3 2949 if (scan->flags) {
0fe9bf95 2950 if (UTF) { /* XXXX This is absolutely
2951 broken, we read before
2952 start of string. */
2953 s = HOPMAYBEc(locinput, -scan->flags);
2954 if (!s)
2955 goto say_yes;
2956 PL_reginput = s;
2957 }
2958 else {
2959 if (locinput < PL_bostr + scan->flags)
2960 goto say_yes;
2961 PL_reginput = locinput - scan->flags;
2962 goto do_ifmatch;
2963 }
a0ed51b3 2964 }
2965 else
2966 PL_reginput = locinput;
c277df42 2967 goto do_ifmatch;
2968 case IFMATCH:
2969 n = 1;
a0ed51b3 2970 if (scan->flags) {
0fe9bf95 2971 if (UTF) { /* XXXX This is absolutely
2972 broken, we read before
2973 start of string. */
2974 s = HOPMAYBEc(locinput, -scan->flags);
2975 if (!s || s < PL_bostr)
2976 goto say_no;
2977 PL_reginput = s;
2978 }
2979 else {
2980 if (locinput < PL_bostr + scan->flags)
2981 goto say_no;
2982 PL_reginput = locinput - scan->flags;
2983 goto do_ifmatch;
2984 }
a0ed51b3 2985 }
2986 else
2987 PL_reginput = locinput;
2988
c277df42 2989 do_ifmatch:
c277df42 2990 inner = NEXTOPER(NEXTOPER(scan));
2991 if (regmatch(inner) != n) {
2992 say_no:
2993 if (logical) {
2994 logical = 0;
2995 sw = 0;
2996 goto do_longjump;
a0ed51b3 2997 }
2998 else
c277df42 2999 sayNO;
3000 }
3001 say_yes:
3002 if (logical) {
3003 logical = 0;
3004 sw = 1;
3005 }
fe44a5e8 3006 if (OP(scan) == SUSPEND) {
3280af22 3007 locinput = PL_reginput;
565764a8 3008 nextchr = UCHARAT(locinput);
fe44a5e8 3009 }
c277df42 3010 /* FALL THROUGH. */
3011 case LONGJMP:
3012 do_longjump:
3013 next = scan + ARG(scan);
3014 if (next == scan)
3015 next = NULL;
a0d0e21e 3016 break;
3017 default:
c030ccd9 3018 PerlIO_printf(PerlIO_stderr(), "%lx %d\n",
c277df42 3019 (unsigned long)scan, OP(scan));
cea2e8a9 3020 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3021 }
a0d0e21e 3022 scan = next;
3023 }
a687059c 3024
a0d0e21e 3025 /*
3026 * We get here only if there's trouble -- normally "case END" is
3027 * the terminating point.
3028 */
cea2e8a9 3029 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3030 /*NOTREACHED*/
4633a7c4 3031 sayNO;
3032
3033yes:
3034#ifdef DEBUGGING
3280af22 3035 PL_regindent--;
4633a7c4 3036#endif
3037 return 1;
3038
3039no:
3040#ifdef DEBUGGING
3280af22 3041 PL_regindent--;
4633a7c4 3042#endif
a0d0e21e 3043 return 0;
a687059c 3044}
3045
3046/*
3047 - regrepeat - repeatedly match something simple, report how many
3048 */
3049/*
3050 * [This routine now assumes that it will only match on things of length 1.
3051 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3052 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3053 */
76e3520e 3054STATIC I32
cea2e8a9 3055S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3056{
5c0ca799 3057 dTHR;
a0d0e21e 3058 register char *scan;
3059 register char *opnd;
3060 register I32 c;
3280af22 3061 register char *loceol = PL_regeol;
a0ed51b3 3062 register I32 hardcount = 0;
a0d0e21e 3063
3280af22 3064 scan = PL_reginput;
c277df42 3065 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3066 loceol = scan + max;
a0d0e21e 3067 switch (OP(p)) {
22c35a8c 3068 case REG_ANY:
a0d0e21e 3069 while (scan < loceol && *scan != '\n')
3070 scan++;
3071 break;
3072 case SANY:
3073 scan = loceol;
3074 break;
a0ed51b3 3075 case ANYUTF8:
3076 loceol = PL_regeol;
3077 while (scan < loceol && *scan != '\n') {
3078 scan += UTF8SKIP(scan);
3079 hardcount++;
3080 }
3081 break;
3082 case SANYUTF8:
3083 loceol = PL_regeol;
3084 while (scan < loceol) {
3085 scan += UTF8SKIP(scan);
3086 hardcount++;
3087 }
3088 break;
bbce6d69 3089 case EXACT: /* length of string is 1 */
cd439c50 3090 c = (U8)*STRING(p);
bbce6d69 3091 while (scan < loceol && UCHARAT(scan) == c)
3092 scan++;
3093 break;
3094 case EXACTF: /* length of string is 1 */
cd439c50 3095 c = (U8)*STRING(p);
bbce6d69 3096 while (scan < loceol &&
22c35a8c 3097 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 3098 scan++;
3099 break;
3100 case EXACTFL: /* length of string is 1 */
3280af22 3101 PL_reg_flags |= RF_tainted;
cd439c50 3102 c = (U8)*STRING(p);
bbce6d69 3103 while (scan < loceol &&
22c35a8c 3104 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e 3105 scan++;
3106 break;
a0ed51b3 3107 case ANYOFUTF8:
3108 loceol = PL_regeol;
3109 while (scan < loceol && REGINCLASSUTF8(p, (U8*)scan)) {
3110 scan += UTF8SKIP(scan);
3111 hardcount++;
3112 }
3113 break;
a0d0e21e 3114 case ANYOF:
cd439c50 3115 opnd = MASK(p);
ae5c130c 3116 while (scan < loceol && REGINCLASS(opnd, *scan))
a0d0e21e 3117 scan++;
a0d0e21e 3118 break;
3119 case ALNUM:
3120 while (scan < loceol && isALNUM(*scan))
3121 scan++;
3122 break;
a0ed51b3 3123 case ALNUMUTF8:
3124 loceol = PL_regeol;
dfe13c55 3125 while (scan < loceol && swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3 3126 scan += UTF8SKIP(scan);
3127 hardcount++;
3128 }
3129 break;
bbce6d69 3130 case ALNUML:
3280af22 3131 PL_reg_flags |= RF_tainted;
bbce6d69 3132 while (scan < loceol && isALNUM_LC(*scan))
3133 scan++;
3134 break;
a0ed51b3 3135 case ALNUMLUTF8:
3136 PL_reg_flags |= RF_tainted;
3137 loceol = PL_regeol;
dfe13c55 3138 while (scan < loceol && isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3 3139 scan += UTF8SKIP(scan);
3140 hardcount++;
3141 }
3142 break;
3143 break;
a0d0e21e 3144 case NALNUM:
3145 while (scan < loceol && !isALNUM(*scan))
3146 scan++;
3147 break;
a0ed51b3 3148 case NALNUMUTF8:
3149 loceol = PL_regeol;
dfe13c55 3150 while (scan < loceol && !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3 3151 scan += UTF8SKIP(scan);
3152 hardcount++;
3153 }
3154 break;
bbce6d69 3155 case NALNUML:
3280af22 3156 PL_reg_flags |= RF_tainted;
bbce6d69 3157 while (scan < loceol && !isALNUM_LC(*scan))
3158 scan++;
3159 break;
a0ed51b3 3160 case NALNUMLUTF8:
3161 PL_reg_flags |= RF_tainted;
3162 loceol = PL_regeol;
dfe13c55 3163 while (scan < loceol && !isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3 3164 scan += UTF8SKIP(scan);
3165 hardcount++;
3166 }
3167 break;
a0d0e21e 3168 case SPACE:
3169 while (scan < loceol && isSPACE(*scan))
3170 scan++;
3171 break;
a0ed51b3 3172 case SPACEUTF8:
3173 loceol = PL_regeol;
dfe13c55 3174 while (scan < loceol && (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3 3175 scan += UTF8SKIP(scan);
3176 hardcount++;
3177 }
3178 break;
bbce6d69 3179 case SPACEL:
3280af22 3180 PL_reg_flags |= RF_tainted;
bbce6d69 3181 while (scan < loceol && isSPACE_LC(*scan))
3182 scan++;
3183 break;
a0ed51b3 3184 case SPACELUTF8:
3185 PL_reg_flags |= RF_tainted;
3186 loceol = PL_regeol;
dfe13c55 3187 while (scan < loceol && (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3 3188 scan += UTF8SKIP(scan);
3189 hardcount++;
3190 }
3191 break;
a0d0e21e 3192 case NSPACE:
3193 while (scan < loceol && !isSPACE(*scan))
3194 scan++;
3195 break;
a0ed51b3 3196 case NSPACEUTF8:
3197 loceol = PL_regeol;
dfe13c55 3198 while (scan < loceol && !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3 3199 scan += UTF8SKIP(scan);
3200 hardcount++;
3201 }
3202 break;
bbce6d69 3203 case NSPACEL:
3280af22 3204 PL_reg_flags |= RF_tainted;
bbce6d69 3205 while (scan < loceol && !isSPACE_LC(*scan))
3206 scan++;
3207 break;
a0ed51b3 3208 case NSPACELUTF8:
3209 PL_reg_flags |= RF_tainted;
3210 loceol = PL_regeol;
dfe13c55 3211 while (scan < loceol && !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3 3212 scan += UTF8SKIP(scan);
3213 hardcount++;
3214 }
3215 break;
a0d0e21e 3216 case DIGIT:
3217 while (scan < loceol && isDIGIT(*scan))
3218 scan++;
3219 break;
a0ed51b3 3220 case DIGITUTF8:
3221 loceol = PL_regeol;
dfe13c55 3222 while (scan < loceol && swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3 3223 scan += UTF8SKIP(scan);
3224 hardcount++;
3225 }
3226 break;
3227 break;
a0d0e21e 3228 case NDIGIT:
3229 while (scan < loceol && !isDIGIT(*scan))
3230 scan++;
3231 break;
a0ed51b3 3232 case NDIGITUTF8:
3233 loceol = PL_regeol;
dfe13c55 3234 while (scan < loceol && !swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3 3235 scan += UTF8SKIP(scan);
3236 hardcount++;
3237 }
3238 break;
a0d0e21e 3239 default: /* Called on something of 0 width. */
3240 break; /* So match right here or not at all. */
3241 }
a687059c 3242
a0ed51b3 3243 if (hardcount)
3244 c = hardcount;
3245 else
3246 c = scan - PL_reginput;
3280af22 3247 PL_reginput = scan;
a687059c 3248
c277df42 3249 DEBUG_r(
3250 {
3251 SV *prop = sv_newmortal();
3252
3253 regprop(prop, p);
3254 PerlIO_printf(Perl_debug_log,
3255 "%*s %s can match %ld times out of %ld...\n",
3256 REPORT_CODE_OFF+1, "", SvPVX(prop),c,max);
3257 });
3258
a0d0e21e 3259 return(c);
a687059c 3260}
3261
3262/*
c277df42 3263 - regrepeat_hard - repeatedly match something, report total lenth and length
3264 *
3265 * The repeater is supposed to have constant length.
3266 */
3267
76e3520e 3268STATIC I32
cea2e8a9 3269S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42 3270{
5c0ca799 3271 dTHR;
c277df42 3272 register char *scan;
3273 register char *start;
3280af22 3274 register char *loceol = PL_regeol;
a0ed51b3 3275 I32 l = 0;
708e3b05 3276 I32 count = 0, res = 1;
a0ed51b3 3277
3278 if (!max)
3279 return 0;
c277df42 3280
3280af22 3281 start = PL_reginput;
a0ed51b3 3282 if (UTF) {
708e3b05 3283 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3 3284 if (!count++) {
3285 l = 0;
3286 while (start < PL_reginput) {
3287 l++;
3288 start += UTF8SKIP(start);
3289 }
3290 *lp = l;
3291 if (l == 0)
3292 return max;
3293 }
3294 if (count == max)
3295 return count;
3296 }
3297 }
3298 else {
708e3b05 3299 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3 3300 if (!count++) {
3301 *lp = l = PL_reginput - start;
3302 if (max != REG_INFTY && l*max < loceol - scan)
3303 loceol = scan + l*max;
3304 if (l == 0)
3305 return max;
c277df42 3306 }
3307 }
3308 }
708e3b05 3309 if (!res)
3280af22 3310 PL_reginput = scan;
c277df42 3311
a0ed51b3 3312 return count;
c277df42 3313}
3314
3315/*
cb8d8820 3316 - reginclass - determine if a character falls into a character class
bbce6d69 3317 */
3318
76e3520e 3319STATIC bool
cea2e8a9 3320S_reginclass(pTHX_ register char *p, register I32 c)
bbce6d69 3321{
5c0ca799 3322 dTHR;
b8c5462f 3323 char flags = ANYOF_FLAGS(p);
bbce6d69 3324 bool match = FALSE;
3325
3326 c &= 0xFF;
b8c5462f 3327 if (ANYOF_BITMAP_TEST(p, c))
bbce6d69 3328 match = TRUE;
3329 else if (flags & ANYOF_FOLD) {
3330 I32 cf;
3331 if (flags & ANYOF_LOCALE) {
3280af22 3332 PL_reg_flags |= RF_tainted;
22c35a8c 3333 cf = PL_fold_locale[c];
bbce6d69 3334 }
3335 else
22c35a8c 3336 cf = PL_fold[c];
b8c5462f 3337 if (ANYOF_BITMAP_TEST(p, cf))
bbce6d69 3338 match = TRUE;
3339 }
3340
b8c5462f 3341 if (!match && (flags & ANYOF_CLASS)) {
3280af22 3342 PL_reg_flags |= RF_tainted;
b8c5462f 3343 if (
3344 (ANYOF_CLASS_TEST(p, ANYOF_ALNUM) && isALNUM_LC(c)) ||
3345 (ANYOF_CLASS_TEST(p, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
3346 (ANYOF_CLASS_TEST(p, ANYOF_SPACE) && isSPACE_LC(c)) ||
3347 (ANYOF_CLASS_TEST(p, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
3348 (ANYOF_CLASS_TEST(p, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
3349 (ANYOF_CLASS_TEST(p, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
3350 (ANYOF_CLASS_TEST(p, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
3351 (ANYOF_CLASS_TEST(p, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
3352 (ANYOF_CLASS_TEST(p, ANYOF_ALPHA) && isALPHA_LC(c)) ||
3353 (ANYOF_CLASS_TEST(p, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
3354 (ANYOF_CLASS_TEST(p, ANYOF_ASCII) && isASCII(c)) ||
3355 (ANYOF_CLASS_TEST(p, ANYOF_NASCII) && !isASCII(c)) ||
3356 (ANYOF_CLASS_TEST(p, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
3357 (ANYOF_CLASS_TEST(p, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
3358 (ANYOF_CLASS_TEST(p, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
3359 (ANYOF_CLASS_TEST(p, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
3360 (ANYOF_CLASS_TEST(p, ANYOF_LOWER) && isLOWER_LC(c)) ||
3361 (ANYOF_CLASS_TEST(p, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
3362 (ANYOF_CLASS_TEST(p, ANYOF_PRINT) && isPRINT_LC(c)) ||
3363 (ANYOF_CLASS_TEST(p, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
3364 (ANYOF_CLASS_TEST(p, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
3365 (ANYOF_CLASS_TEST(p, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
3366 (ANYOF_CLASS_TEST(p, ANYOF_UPPER) && isUPPER_LC(c)) ||
3367 (ANYOF_CLASS_TEST(p, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
3368 (ANYOF_CLASS_TEST(p, ANYOF_XDIGIT) && isXDIGIT(c)) ||
3369 (ANYOF_CLASS_TEST(p, ANYOF_NXDIGIT) && !isXDIGIT(c))
3370 ) /* How's that for a conditional? */
bbce6d69 3371 {
3372 match = TRUE;
3373 }
3374 }
3375
ae5c130c 3376 return (flags & ANYOF_INVERT) ? !match : match;
bbce6d69 3377}
3378
a0ed51b3 3379STATIC bool
cea2e8a9 3380S_reginclassutf8(pTHX_ regnode *f, U8 *p)
c485e607 3381{
3382 dTHR;
a0ed51b3 3383 char flags = ARG1(f);
3384 bool match = FALSE;
3385 SV *sv = (SV*)PL_regdata->data[ARG2(f)];
3386
3387 if (swash_fetch(sv, p))
3388 match = TRUE;
3389 else if (flags & ANYOF_FOLD) {
3390 I32 cf;
dfe13c55 3391 U8 tmpbuf[10];
a0ed51b3 3392 if (flags & ANYOF_LOCALE) {
3393 PL_reg_flags |= RF_tainted;
3394 uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
3395 }
3396 else
3397 uv_to_utf8(tmpbuf, toLOWER_utf8(p));
3398 if (swash_fetch(sv, tmpbuf))
3399 match = TRUE;
3400 }
3401
b8c5462f 3402 /* UTF8 combined with ANYOF_CLASS is ill-defined. */
a0ed51b3 3403
3404 return (flags & ANYOF_INVERT) ? !match : match;
3405}
161b471a 3406
dfe13c55 3407STATIC U8 *
cea2e8a9 3408S_reghop(pTHX_ U8 *s, I32 off)
c485e607 3409{
3410 dTHR;
a0ed51b3 3411 if (off >= 0) {
3412 while (off-- && s < (U8*)PL_regeol)
3413 s += UTF8SKIP(s);
3414 }
3415 else {
3416 while (off++) {
3417 if (s > (U8*)PL_bostr) {
3418 s--;
3419 if (*s & 0x80) {
3420 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3421 s--;
3422 } /* XXX could check well-formedness here */
3423 }
3424 }
3425 }
3426 return s;
3427}
161b471a 3428
dfe13c55 3429STATIC U8 *
cea2e8a9 3430S_reghopmaybe(pTHX_ U8* s, I32 off)
a0ed51b3 3431{
c485e607 3432 dTHR;
a0ed51b3 3433 if (off >= 0) {
3434 while (off-- && s < (U8*)PL_regeol)
3435 s += UTF8SKIP(s);
3436 if (off >= 0)
3437 return 0;
3438 }
3439 else {
3440 while (off++) {
3441 if (s > (U8*)PL_bostr) {
3442 s--;
3443 if (*s & 0x80) {
3444 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3445 s--;
3446 } /* XXX could check well-formedness here */
3447 }
3448 else
3449 break;
3450 }
3451 if (off <= 0)
3452 return 0;
3453 }
3454 return s;
3455}
51371543 3456
3457#ifdef PERL_OBJECT
3458#define NO_XSLOCKS
3459#include "XSUB.h"
3460#endif
3461
3462static void
3463restore_pos(pTHXo_ void *arg)
3464{
3465 dTHR;
3466 if (PL_reg_eval_set) {
3467 if (PL_reg_oldsaved) {
3468 PL_reg_re->subbeg = PL_reg_oldsaved;
3469 PL_reg_re->sublen = PL_reg_oldsavedlen;
3470 RX_MATCH_COPIED_on(PL_reg_re);
3471 }
3472 PL_reg_magic->mg_len = PL_reg_oldpos;
3473 PL_reg_eval_set = 0;
3474 PL_curpm = PL_reg_oldcurpm;
3475 }
3476}
3477