integrate cfgperl contents into mainline; resolve h2xs.PL conflict
[p5sagit/p5-mst-13.2.git] / regexec.c
CommitLineData
a0d0e21e 1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c 8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73 17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e 22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e 29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603 35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c 39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
d88dccdf 41# define Perl_reginitcolors my_reginitcolors
c5be433b 42
43# define PERL_NO_GET_CONTEXT
56953603 44#endif
45
f0fcb552 46/*SUPPRESS 112*/
a687059c 47/*
e50aee73 48 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c 49 *
50 * Copyright (c) 1986 by University of Toronto.
51 * Written by Henry Spencer. Not derived from licensed software.
52 *
53 * Permission is granted to anyone to use this software for any
54 * purpose on any computer system, and to redistribute it freely,
55 * subject to the following restrictions:
56 *
57 * 1. The author is not responsible for the consequences of use of
58 * this software, no matter how awful, even if they arise
59 * from defects in it.
60 *
61 * 2. The origin of this software must not be misrepresented, either
62 * by explicit claim or by omission.
63 *
64 * 3. Altered versions must be plainly marked as such, and must not
65 * be misrepresented as being the original software.
66 *
67 **** Alterations to Henry's code are...
68 ****
4eb8286e 69 **** Copyright (c) 1991-1999, Larry Wall
a687059c 70 ****
9ef589d8 71 **** You may distribute under the terms of either the GNU General Public
72 **** License or the Artistic License, as specified in the README file.
a687059c 73 *
74 * Beware that some of this code is subtly aware of the way operator
75 * precedence is structured in regular expressions. Serious changes in
76 * regular-expression syntax might require a total rethink.
77 */
78#include "EXTERN.h"
864dbfa3 79#define PERL_IN_REGEXEC_C
a687059c 80#include "perl.h"
0f5d15d6 81
c5be433b 82#ifdef PERL_IN_XSUB_RE
83# if defined(PERL_CAPI) || defined(PERL_OBJECT)
84# include "XSUB.h"
85# endif
86#endif
87
a687059c 88#include "regcomp.h"
89
c277df42 90#define RF_tainted 1 /* tainted information used? */
91#define RF_warned 2 /* warned about big count? */
ce862d02 92#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3 93#define RF_utf8 8 /* String contains multibyte chars? */
94
95#define UTF (PL_reg_flags & RF_utf8)
ce862d02 96
97#define RS_init 1 /* eval environment created */
98#define RS_set 2 /* replsv value is set */
c277df42 99
a687059c 100#ifndef STATIC
101#define STATIC static
102#endif
103
c277df42 104/*
105 * Forwards.
106 */
107
b8c5462f 108#define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c) : ANYOF_BITMAP_TEST(p,c))
a0ed51b3 109#define REGINCLASSUTF8(f,p) (ARG1(f) ? reginclassutf8(f,p) : swash_fetch((SV*)PL_regdata->data[ARG2(f)],p))
110
111#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
112#define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
113
dfe13c55 114#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
115#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
116#define HOP(pos,off) (UTF ? reghop((U8*)pos, off) : (U8*)(pos + off))
117#define HOPMAYBE(pos,off) (UTF ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
118#define HOPc(pos,off) ((char*)HOP(pos,off))
119#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 120
51371543 121static void restore_pos(pTHXo_ void *arg);
122
123
76e3520e 124STATIC CHECKPOINT
cea2e8a9 125S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 126{
11343788 127 dTHR;
3280af22 128 int retval = PL_savestack_ix;
129 int i = (PL_regsize - parenfloor) * 4;
a0d0e21e 130 int p;
131
132 SSCHECK(i + 5);
3280af22 133 for (p = PL_regsize; p > parenfloor; p--) {
cf93c79d 134 SSPUSHINT(PL_regendp[p]);
135 SSPUSHINT(PL_regstartp[p]);
3280af22 136 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e 137 SSPUSHINT(p);
138 }
3280af22 139 SSPUSHINT(PL_regsize);
140 SSPUSHINT(*PL_reglastparen);
141 SSPUSHPTR(PL_reginput);
a0d0e21e 142 SSPUSHINT(i + 3);
143 SSPUSHINT(SAVEt_REGCONTEXT);
144 return retval;
145}
146
c277df42 147/* These are needed since we do not localize EVAL nodes: */
c3464db5 148# define REGCP_SET DEBUG_r(PerlIO_printf(Perl_debug_log, \
149 " Setting an EVAL scope, savestack=%i\n", \
3280af22 150 PL_savestack_ix)); lastcp = PL_savestack_ix
c3464db5 151
3280af22 152# define REGCP_UNWIND DEBUG_r(lastcp != PL_savestack_ix ? \
c3464db5 153 PerlIO_printf(Perl_debug_log, \
154 " Clearing an EVAL scope, savestack=%i..%i\n", \
3280af22 155 lastcp, PL_savestack_ix) : 0); regcpblow(lastcp)
c277df42 156
76e3520e 157STATIC char *
cea2e8a9 158S_regcppop(pTHX)
a0d0e21e 159{
11343788 160 dTHR;
a0d0e21e 161 I32 i = SSPOPINT;
162 U32 paren = 0;
163 char *input;
cf93c79d 164 I32 tmps;
a0d0e21e 165 assert(i == SAVEt_REGCONTEXT);
166 i = SSPOPINT;
167 input = (char *) SSPOPPTR;
3280af22 168 *PL_reglastparen = SSPOPINT;
169 PL_regsize = SSPOPINT;
c277df42 170 for (i -= 3; i > 0; i -= 4) {
a0d0e21e 171 paren = (U32)SSPOPINT;
3280af22 172 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d 173 PL_regstartp[paren] = SSPOPINT;
174 tmps = SSPOPINT;
3280af22 175 if (paren <= *PL_reglastparen)
176 PL_regendp[paren] = tmps;
c277df42 177 DEBUG_r(
c3464db5 178 PerlIO_printf(Perl_debug_log,
179 " restoring \\%d to %d(%d)..%d%s\n",
cf93c79d 180 paren, PL_regstartp[paren],
181 PL_reg_start_tmp[paren] - PL_bostr,
182 PL_regendp[paren],
3280af22 183 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 184 );
a0d0e21e 185 }
c277df42 186 DEBUG_r(
3280af22 187 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 188 PerlIO_printf(Perl_debug_log,
189 " restoring \\%d..\\%d to undef\n",
3280af22 190 *PL_reglastparen + 1, PL_regnpar);
c277df42 191 }
192 );
3280af22 193 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
194 if (paren > PL_regsize)
cf93c79d 195 PL_regstartp[paren] = -1;
196 PL_regendp[paren] = -1;
a0d0e21e 197 }
198 return input;
199}
200
0f5d15d6 201STATIC char *
cea2e8a9 202S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6 203{
46124e9e 204 dTHR;
0f5d15d6 205 I32 tmp = PL_savestack_ix;
206
207 PL_savestack_ix = ss;
208 regcppop();
209 PL_savestack_ix = tmp;
942e002e 210 return Nullch;
0f5d15d6 211}
212
213typedef struct re_cc_state
214{
215 I32 ss;
216 regnode *node;
217 struct re_cc_state *prev;
218 CURCUR *cc;
219 regexp *re;
220} re_cc_state;
221
c277df42 222#define regcpblow(cp) LEAVE_SCOPE(cp)
a0d0e21e 223
a687059c 224/*
e50aee73 225 * pregexec and friends
a687059c 226 */
227
228/*
c277df42 229 - pregexec - match a regexp against a string
a687059c 230 */
c277df42 231I32
864dbfa3 232Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 233 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42 234/* strend: pointer to null at end of string */
235/* strbeg: real beginning of string */
236/* minend: end of match must be >=minend after stringarg. */
237/* nosave: For optimizations. */
238{
239 return
240 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
241 nosave ? 0 : REXEC_COPY_STR);
242}
0f5d15d6 243
244STATIC void
cea2e8a9 245S_cache_re(pTHX_ regexp *prog)
0f5d15d6 246{
46124e9e 247 dTHR;
0f5d15d6 248 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
249#ifdef DEBUGGING
250 PL_regprogram = prog->program;
251#endif
252 PL_regnpar = prog->nparens;
253 PL_regdata = prog->data;
254 PL_reg_re = prog;
255}
22e551b9 256
cad2e5aa 257/*
258 * Need to implement the following flags for reg_anch:
259 *
260 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
261 * USE_INTUIT_ML
262 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
263 * INTUIT_AUTORITATIVE_ML
264 * INTUIT_ONCE_NOML - Intuit can match in one location only.
265 * INTUIT_ONCE_ML
266 *
267 * Another flag for this function: SECOND_TIME (so that float substrs
268 * with giant delta may be not rechecked).
269 */
270
271/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
272
2c2d71f5 273/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa 274 Otherwise, only SvCUR(sv) is used to get strbeg. */
275
276/* XXXX We assume that strpos is strbeg unless sv. */
277
2c2d71f5 278/* A failure to find a constant substring means that there is no need to make
279 an expensive call to REx engine, thus we celebrate a failure. Similarly,
280 finding a substring too deep into the string means that less calls to
30944b6d 281 regtry() should be needed.
282
283 REx compiler's optimizer found 4 possible hints:
284 a) Anchored substring;
285 b) Fixed substring;
286 c) Whether we are anchored (beginning-of-line or \G);
287 d) First node (of those at offset 0) which may distingush positions;
288 We use 'a', 'b', multiline-part of 'c', and try to find a position in the
289 string which does not contradict any of them.
290 */
2c2d71f5 291
cad2e5aa 292char *
293Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
294 char *strend, U32 flags, re_scream_pos_data *data)
295{
2c2d71f5 296 register I32 start_shift;
cad2e5aa 297 /* Should be nonnegative! */
2c2d71f5 298 register I32 end_shift;
299 register char *s;
300 register SV *check;
cad2e5aa 301 char *t;
302 I32 ml_anch;
2c2d71f5 303 char *tmp;
304 register char *other_last = Nullch;
30944b6d 305#ifdef DEBUGGING
306 char *i_strpos = strpos;
307#endif
cad2e5aa 308
309 DEBUG_r( if (!PL_colorset) reginitcolors() );
310 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 311 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
cad2e5aa 312 PL_colors[4],PL_colors[5],PL_colors[0],
313 prog->precomp,
314 PL_colors[1],
315 (strlen(prog->precomp) > 60 ? "..." : ""),
316 PL_colors[0],
317 (strend - strpos > 60 ? 60 : strend - strpos),
318 strpos, PL_colors[1],
319 (strend - strpos > 60 ? "..." : ""))
320 );
321
2c2d71f5 322 if (prog->minlen > strend - strpos) {
323 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
cad2e5aa 324 goto fail;
2c2d71f5 325 }
326 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa 327 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
328 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 329 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa 330
331 if ((prog->check_offset_min == prog->check_offset_max) && !ml_anch) {
2c2d71f5 332 /* Substring at constant offset from beg-of-str... */
cad2e5aa 333 I32 slen;
334
335 if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
2c2d71f5 336 && (sv && (strpos + SvCUR(sv) != strend)) ) {
337 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
cad2e5aa 338 goto fail;
2c2d71f5 339 }
adac82c7 340 PL_regeol = strend; /* Used in HOP() */
2c2d71f5 341 s = HOPc(strpos, prog->check_offset_min);
cad2e5aa 342 if (SvTAIL(prog->check_substr)) {
343 slen = SvCUR(prog->check_substr); /* >= 1 */
344
2c2d71f5 345 if ( strend - s > slen || strend - s < slen - 1
346 || (strend - s == slen && strend[-1] != '\n')) {
347 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
348 goto fail_finish;
cad2e5aa 349 }
350 /* Now should match s[0..slen-2] */
351 slen--;
352 if (slen && (*SvPVX(prog->check_substr) != *s
353 || (slen > 1
2c2d71f5 354 && memNE(SvPVX(prog->check_substr), s, slen)))) {
355 report_neq:
356 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
357 goto fail_finish;
358 }
cad2e5aa 359 }
360 else if (*SvPVX(prog->check_substr) != *s
361 || ((slen = SvCUR(prog->check_substr)) > 1
362 && memNE(SvPVX(prog->check_substr), s, slen)))
2c2d71f5 363 goto report_neq;
364 goto success_at_start;
cad2e5aa 365 }
2c2d71f5 366 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 367 s = strpos;
2c2d71f5 368 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
369 /* Should be nonnegative! */
370 end_shift = prog->minlen - start_shift -
371 CHR_SVLEN(prog->check_substr) + (SvTAIL(prog->check_substr) != 0);
372 if (!ml_anch) {
373 I32 end = prog->check_offset_max + CHR_SVLEN(prog->check_substr)
374 - (SvTAIL(prog->check_substr) != 0);
375 I32 eshift = strend - s - end;
376
377 if (end_shift < eshift)
378 end_shift = eshift;
379 }
cad2e5aa 380 }
2c2d71f5 381 else { /* Can match at random position */
cad2e5aa 382 ml_anch = 0;
383 s = strpos;
2c2d71f5 384 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
385 /* Should be nonnegative! */
386 end_shift = prog->minlen - start_shift -
387 CHR_SVLEN(prog->check_substr) + (SvTAIL(prog->check_substr) != 0);
cad2e5aa 388 }
389
2c2d71f5 390#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 391 if (end_shift < 0)
6bbae5e6 392 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5 393#endif
394
395 check = prog->check_substr;
396 restart:
397 /* Find a possible match in the region s..strend by looking for
398 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 399 if (flags & REXEC_SCREAM) {
cad2e5aa 400 char *strbeg = SvPVX(sv); /* XXXX Assume PV_force() on SCREAM! */
401 I32 p = -1; /* Internal iterator of scream. */
402 I32 *pp = data ? data->scream_pos : &p;
403
2c2d71f5 404 if (PL_screamfirst[BmRARE(check)] >= 0
405 || ( BmRARE(check) == '\n'
406 && (BmPREVIOUS(check) == SvCUR(check) - 1)
407 && SvTAIL(check) ))
408 s = screaminstr(sv, check,
409 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 410 else
2c2d71f5 411 goto fail_finish;
cad2e5aa 412 if (data)
413 *data->scream_olds = s;
414 }
415 else
416 s = fbm_instr((unsigned char*)s + start_shift,
417 (unsigned char*)strend - end_shift,
2c2d71f5 418 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa 419
420 /* Update the count-of-usability, remove useless subpatterns,
421 unshift s. */
2c2d71f5 422
423 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
424 (s ? "Found" : "Did not find"),
425 ((check == prog->anchored_substr) ? "anchored" : "floating"),
426 PL_colors[0],
427 SvCUR(check) - (SvTAIL(check)!=0), SvPVX(check),
428 PL_colors[1], (SvTAIL(check) ? "$" : ""),
429 (s ? " at offset " : "...\n") ) );
430
431 if (!s)
432 goto fail_finish;
433
434 /* Finish the diagnostic message */
30944b6d 435 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5 436
437 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
438 Start with the other substr.
439 XXXX no SCREAM optimization yet - and a very coarse implementation
440 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
441 *always* match. Probably should be marked during compile...
442 Probably it is right to do no SCREAM here...
443 */
444
445 if (prog->float_substr && prog->anchored_substr) {
30944b6d 446 /* Take into account the "other" substring. */
2c2d71f5 447 /* XXXX May be hopelessly wrong for UTF... */
448 if (!other_last)
449 other_last = strpos - 1;
450 if (check == prog->float_substr) {
30944b6d 451 do_other_anchored:
452 {
2c2d71f5 453 char *last = s - start_shift, *last1, *last2;
454 char *s1 = s;
455
456 tmp = PL_bostr;
457 t = s - prog->check_offset_max;
458 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
459 && (!(prog->reganch & ROPT_UTF8)
460 || (PL_bostr = strpos, /* Used in regcopmaybe() */
461 (t = reghopmaybe_c(s, -(prog->check_offset_max)))
462 && t > strpos)))
30944b6d 463 /* EMPTY */;
2c2d71f5 464 else
465 t = strpos;
466 t += prog->anchored_offset;
467 if (t <= other_last)
468 t = other_last + 1;
469 PL_bostr = tmp;
470 last2 = last1 = strend - prog->minlen;
471 if (last < last1)
472 last1 = last;
473 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
474 /* On end-of-str: see comment below. */
475 s = fbm_instr((unsigned char*)t,
476 (unsigned char*)last1 + prog->anchored_offset
477 + SvCUR(prog->anchored_substr)
478 - (SvTAIL(prog->anchored_substr)!=0),
479 prog->anchored_substr, PL_multiline ? FBMrf_MULTILINE : 0);
480 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s anchored substr `%s%.*s%s'%s",
481 (s ? "Found" : "Contradicts"),
482 PL_colors[0],
483 SvCUR(prog->anchored_substr)
484 - (SvTAIL(prog->anchored_substr)!=0),
485 SvPVX(prog->anchored_substr),
486 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
487 if (!s) {
488 if (last1 >= last2) {
489 DEBUG_r(PerlIO_printf(Perl_debug_log,
490 ", giving up...\n"));
491 goto fail_finish;
492 }
493 DEBUG_r(PerlIO_printf(Perl_debug_log,
494 ", trying floating at offset %ld...\n",
30944b6d 495 (long)(s1 + 1 - i_strpos)));
2c2d71f5 496 PL_regeol = strend; /* Used in HOP() */
497 other_last = last1 + prog->anchored_offset;
498 s = HOPc(last, 1);
499 goto restart;
500 }
501 else {
502 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 503 (long)(s - i_strpos)));
2c2d71f5 504 t = s - prog->anchored_offset;
505 other_last = s - 1;
30944b6d 506 s = s1;
2c2d71f5 507 if (t == strpos)
508 goto try_at_start;
2c2d71f5 509 goto try_at_offset;
510 }
30944b6d 511 }
2c2d71f5 512 }
513 else { /* Take into account the floating substring. */
514 char *last, *last1;
515 char *s1 = s;
516
517 t = s - start_shift;
518 last1 = last = strend - prog->minlen + prog->float_min_offset;
519 if (last - t > prog->float_max_offset)
520 last = t + prog->float_max_offset;
521 s = t + prog->float_min_offset;
522 if (s <= other_last)
523 s = other_last + 1;
524 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
525 /* fbm_instr() takes into account exact value of end-of-str
526 if the check is SvTAIL(ed). Since false positives are OK,
527 and end-of-str is not later than strend we are OK. */
528 s = fbm_instr((unsigned char*)s,
529 (unsigned char*)last + SvCUR(prog->float_substr)
530 - (SvTAIL(prog->float_substr)!=0),
531 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
532 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
533 (s ? "Found" : "Contradicts"),
534 PL_colors[0],
535 SvCUR(prog->float_substr)
536 - (SvTAIL(prog->float_substr)!=0),
537 SvPVX(prog->float_substr),
538 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
539 if (!s) {
540 if (last1 == last) {
541 DEBUG_r(PerlIO_printf(Perl_debug_log,
542 ", giving up...\n"));
543 goto fail_finish;
544 }
545 DEBUG_r(PerlIO_printf(Perl_debug_log,
546 ", trying anchored starting at offset %ld...\n",
30944b6d 547 (long)(s1 + 1 - i_strpos)));
2c2d71f5 548 other_last = last;
549 PL_regeol = strend; /* Used in HOP() */
550 s = HOPc(t, 1);
551 goto restart;
552 }
553 else {
554 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 555 (long)(s - i_strpos)));
2c2d71f5 556 other_last = s - 1;
30944b6d 557 s = s1;
2c2d71f5 558 if (t == strpos)
559 goto try_at_start;
2c2d71f5 560 goto try_at_offset;
561 }
562 }
cad2e5aa 563 }
2c2d71f5 564
565 t = s - prog->check_offset_max;
566 tmp = PL_bostr;
567 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
568 && (!(prog->reganch & ROPT_UTF8)
569 || (PL_bostr = strpos, /* Used in regcopmaybe() */
570 ((t = reghopmaybe_c(s, -(prog->check_offset_max)))
571 && t > strpos)))) {
572 PL_bostr = tmp;
573 /* Fixed substring is found far enough so that the match
574 cannot start at strpos. */
575 try_at_offset:
cad2e5aa 576 if (ml_anch && t[-1] != '\n') {
30944b6d 577 /* Eventually fbm_*() should handle this, but often
578 anchored_offset is not 0, so this check will not be wasted. */
579 /* XXXX In the code below we prefer to look for "^" even in
580 presence of anchored substrings. And we search even
581 beyond the found float position. These pessimizations
582 are historical artefacts only. */
583 find_anchor:
2c2d71f5 584 while (t < strend - prog->minlen) {
cad2e5aa 585 if (*t == '\n') {
586 if (t < s - prog->check_offset_min) {
30944b6d 587 if (prog->anchored_substr) {
588 /* We definitely contradict the found anchored
589 substr. Due to the above check we do not
590 contradict "check" substr.
591 Thus we can arrive here only if check substr
592 is float. Redo checking for "other"=="fixed".
593 */
594 strpos = t + 1;
595 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
596 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
597 goto do_other_anchored;
598 }
cad2e5aa 599 s = t + 1;
2c2d71f5 600 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 601 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa 602 goto set_useful;
603 }
2c2d71f5 604 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting at offset %ld...\n",
30944b6d 605 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
606 strpos = s = t + 1;
cad2e5aa 607 goto restart;
608 }
609 t++;
610 }
2c2d71f5 611 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
612 PL_colors[0],PL_colors[1]));
613 goto fail_finish;
cad2e5aa 614 }
615 s = t;
616 set_useful:
2c2d71f5 617 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa 618 }
619 else {
2c2d71f5 620 PL_bostr = tmp;
621 /* The found string does not prohibit matching at beg-of-str
622 - no optimization of calling REx engine can be performed,
623 unless it was an MBOL and we are not after MBOL. */
624 try_at_start:
625 /* Even in this situation we may use MBOL flag if strpos is offset
626 wrt the start of the string. */
627 if (ml_anch && sv
cad2e5aa 628 && (strpos + SvCUR(sv) != strend) && strpos[-1] != '\n') {
629 t = strpos;
630 goto find_anchor;
631 }
30944b6d 632 DEBUG_r( if (ml_anch)
633 PerlIO_printf(Perl_debug_log, "Does not contradict /%s^%s/m...\n",
634 PL_colors[0],PL_colors[1]);
635 );
2c2d71f5 636 success_at_start:
30944b6d 637 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
cad2e5aa 638 && --BmUSEFUL(prog->check_substr) < 0
639 && prog->check_substr == prog->float_substr) { /* boo */
640 /* If flags & SOMETHING - do not do it many times on the same match */
641 SvREFCNT_dec(prog->check_substr);
642 prog->check_substr = Nullsv; /* disable */
643 prog->float_substr = Nullsv; /* clear */
644 s = strpos;
645 prog->reganch &= ~RE_USE_INTUIT;
646 }
647 else
648 s = strpos;
649 }
650
2c2d71f5 651 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sGuessed:%s match at offset %ld\n",
30944b6d 652 PL_colors[4], PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 653 return s;
2c2d71f5 654
655 fail_finish: /* Substring not found */
656 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 657 fail:
2c2d71f5 658 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa 659 PL_colors[4],PL_colors[5]));
660 return Nullch;
661}
9661b544 662
a687059c 663/*
c277df42 664 - regexec_flags - match a regexp against a string
a687059c 665 */
79072805 666I32
864dbfa3 667Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
22e551b9 668 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
c277df42 669/* strend: pointer to null at end of string */
670/* strbeg: real beginning of string */
671/* minend: end of match must be >=minend after stringarg. */
672/* data: May be used for some additional optimizations. */
673/* nosave: For optimizations. */
a687059c 674{
5c0ca799 675 dTHR;
a0d0e21e 676 register char *s;
c277df42 677 register regnode *c;
a0d0e21e 678 register char *startpos = stringarg;
679 register I32 tmp;
c277df42 680 I32 minlen; /* must match at least this many chars */
a0d0e21e 681 I32 dontbother = 0; /* how many characters not to try at end */
c277df42 682 I32 start_shift = 0; /* Offset of the start to find
a0ed51b3 683 constant substr. */ /* CC */
684 I32 end_shift = 0; /* Same for the end. */ /* CC */
c277df42 685 I32 scream_pos = -1; /* Internal iterator of scream. */
686 char *scream_olds;
3280af22 687 SV* oreplsv = GvSV(PL_replgv);
a687059c 688
2ab05381 689 PL_regcc = 0;
a0d0e21e 690
0f5d15d6 691 cache_re(prog);
a0d0e21e 692#ifdef DEBUGGING
3280af22 693 PL_regnarrate = PL_debug & 512;
a0d0e21e 694#endif
695
696 /* Be paranoid... */
697 if (prog == NULL || startpos == NULL) {
cea2e8a9 698 Perl_croak(aTHX_ "NULL regexp parameter");
a0d0e21e 699 return 0;
700 }
701
c277df42 702 minlen = prog->minlen;
703 if (strend - startpos < minlen) goto phooey;
704
a0d0e21e 705 if (startpos == strbeg) /* is ^ valid at stringarg? */
3280af22 706 PL_regprev = '\n';
a0d0e21e 707 else {
a0ed51b3 708 PL_regprev = (U32)stringarg[-1];
3280af22 709 if (!PL_multiline && PL_regprev == '\n')
710 PL_regprev = '\0'; /* force ^ to NOT match */
a0d0e21e 711 }
bbce6d69 712
a0d0e21e 713 /* Check validity of program. */
22c35a8c 714 if (UCHARAT(prog->program) != REG_MAGIC) {
cea2e8a9 715 Perl_croak(aTHX_ "corrupted regexp program");
a0d0e21e 716 }
717
3280af22 718 PL_reg_flags = 0;
719 PL_reg_eval_set = 0;
2c2d71f5 720 PL_reg_maxiter = 0;
a0d0e21e 721
a0ed51b3 722 if (prog->reganch & ROPT_UTF8)
723 PL_reg_flags |= RF_utf8;
724
725 /* Mark beginning of line for ^ and lookbehind. */
726 PL_regbol = startpos;
727 PL_bostr = strbeg;
9661b544 728 PL_reg_sv = sv;
a0ed51b3 729
730 /* Mark end of line for $ (and such) */
731 PL_regeol = strend;
732
733 /* see how far we have to get to not match where we matched before */
734 PL_regtill = startpos+minend;
735
0f5d15d6 736 /* We start without call_cc context. */
737 PL_reg_call_cc = 0;
738
a0d0e21e 739 /* If there is a "must appear" string, look for it. */
740 s = startpos;
cad2e5aa 741
742 if (prog->reganch & ROPT_GPOS_SEEN) {
743 MAGIC *mg;
744
745 if (!(flags & REXEC_IGNOREPOS) && sv && SvTYPE(sv) >= SVt_PVMG
746 && SvMAGIC(sv) && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0)
747 PL_reg_ganch = strbeg + mg->mg_len;
a0d0e21e 748 else
cad2e5aa 749 PL_reg_ganch = startpos;
750 if (prog->reganch & ROPT_ANCH_GPOS) {
751 if (s > PL_reg_ganch)
752 goto phooey;
753 s = PL_reg_ganch;
a0ed51b3 754 }
a0d0e21e 755 }
a687059c 756
cad2e5aa 757 if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
758 re_scream_pos_data d;
759
760 d.scream_olds = &scream_olds;
761 d.scream_pos = &scream_pos;
762 s = re_intuit_start(prog, sv, s, strend, flags, &d);
763 if (!s)
764 goto phooey; /* not present */
765 }
766
767 DEBUG_r( if (!PL_colorset) reginitcolors() );
768 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 769 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
8d300b32 770 PL_colors[4],PL_colors[5],PL_colors[0],
771 prog->precomp,
772 PL_colors[1],
c277df42 773 (strlen(prog->precomp) > 60 ? "..." : ""),
cad2e5aa 774 PL_colors[0],
c277df42 775 (strend - startpos > 60 ? 60 : strend - startpos),
8d300b32 776 startpos, PL_colors[1],
c277df42 777 (strend - startpos > 60 ? "..." : ""))
778 );
779
a0d0e21e 780 /* Simplest case: anchored match need be tried only once. */
774d564b 781 /* [unless only anchor is BOL and multiline is set] */
22e551b9 782 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
cad2e5aa 783 if (s == startpos && regtry(prog, startpos))
a0d0e21e 784 goto got_it;
22e551b9 785 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
786 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
774d564b 787 {
cad2e5aa 788 char *end;
789
a0d0e21e 790 if (minlen)
791 dontbother = minlen - 1;
cad2e5aa 792 end = HOPc(strend, -dontbother) - 1;
a0d0e21e 793 /* for multiline we only have to try after newlines */
cad2e5aa 794 if (prog->check_substr) {
30944b6d 795 if (s == startpos)
796 goto after_try;
cad2e5aa 797 while (1) {
798 if (regtry(prog, s))
a0d0e21e 799 goto got_it;
30944b6d 800 after_try:
cad2e5aa 801 if (s >= end)
802 goto phooey;
803 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
804 if (!s)
805 goto phooey;
806 }
807 } else {
808 if (s > startpos)
809 s--;
810 while (s < end) {
811 if (*s++ == '\n') { /* don't need PL_utf8skip here */
812 if (regtry(prog, s))
813 goto got_it;
814 }
815 }
35c8bce7 816 }
35c8bce7 817 }
a0d0e21e 818 goto phooey;
22e551b9 819 } else if (prog->reganch & ROPT_ANCH_GPOS) {
820 if (regtry(prog, PL_reg_ganch))
821 goto got_it;
822 goto phooey;
a0d0e21e 823 }
35c8bce7 824
a0d0e21e 825 /* Messy cases: unanchored match. */
c277df42 826 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
827 /* we have /x+whatever/ */
cad2e5aa 828 /* it must be a one character string (XXXX Except UTF?) */
c277df42 829 char ch = SvPVX(prog->anchored_substr)[0];
a0ed51b3 830 if (UTF) {
831 while (s < strend) {
832 if (*s == ch) {
833 if (regtry(prog, s)) goto got_it;
834 s += UTF8SKIP(s);
835 while (s < strend && *s == ch)
836 s += UTF8SKIP(s);
837 }
838 s += UTF8SKIP(s);
839 }
840 }
841 else {
842 while (s < strend) {
843 if (*s == ch) {
844 if (regtry(prog, s)) goto got_it;
c277df42 845 s++;
a0ed51b3 846 while (s < strend && *s == ch)
847 s++;
848 }
849 s++;
a0d0e21e 850 }
a687059c 851 }
c277df42 852 }
853 /*SUPPRESS 560*/
854 else if (prog->anchored_substr != Nullsv
855 || (prog->float_substr != Nullsv
856 && prog->float_max_offset < strend - s)) {
857 SV *must = prog->anchored_substr
858 ? prog->anchored_substr : prog->float_substr;
859 I32 back_max =
860 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
861 I32 back_min =
862 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
863 I32 delta = back_max - back_min;
cf93c79d 864 char *last = HOPc(strend, /* Cannot start after this */
73c4f7a1 865 -(I32)(CHR_SVLEN(must)
866 - (SvTAIL(must) != 0) + back_min));
a0ed51b3 867 char *last1; /* Last position checked before */
868
869 if (s > PL_bostr)
dfe13c55 870 last1 = HOPc(s, -1);
a0ed51b3 871 else
872 last1 = s - 1; /* bogus */
c277df42 873
874 /* XXXX check_substr already used to find `s', can optimize if
875 check_substr==must. */
876 scream_pos = -1;
877 dontbother = end_shift;
dfe13c55 878 strend = HOPc(strend, -dontbother);
c277df42 879 while ( (s <= last) &&
22e551b9 880 ((flags & REXEC_SCREAM)
881 ? (s = screaminstr(sv, must, HOPc(s, back_min) - strbeg,
c277df42 882 end_shift, &scream_pos, 0))
a0ed51b3 883 : (s = fbm_instr((unsigned char*)HOP(s, back_min),
cf93c79d 884 (unsigned char*)strend, must,
885 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
dfe13c55 886 if (HOPc(s, -back_max) > last1) {
887 last1 = HOPc(s, -back_min);
888 s = HOPc(s, -back_max);
a0ed51b3 889 }
890 else {
dfe13c55 891 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
c277df42 892
dfe13c55 893 last1 = HOPc(s, -back_min);
c277df42 894 s = t;
a0d0e21e 895 }
a0ed51b3 896 if (UTF) {
897 while (s <= last1) {
898 if (regtry(prog, s))
899 goto got_it;
900 s += UTF8SKIP(s);
901 }
902 }
903 else {
904 while (s <= last1) {
905 if (regtry(prog, s))
906 goto got_it;
907 s++;
908 }
a0d0e21e 909 }
910 }
911 goto phooey;
a0ed51b3 912 }
913 else if (c = prog->regstclass) {
a0d0e21e 914 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
a0ed51b3 915 char *cc;
a687059c 916
a0d0e21e 917 if (minlen)
918 dontbother = minlen - 1;
dfe13c55 919 strend = HOPc(strend, -dontbother); /* don't bother with what can't match */
a0d0e21e 920 tmp = 1;
921 /* We know what class it must start with. */
922 switch (OP(c)) {
a0ed51b3 923 case ANYOFUTF8:
cd439c50 924 cc = MASK(c);
a0ed51b3 925 while (s < strend) {
926 if (REGINCLASSUTF8(c, (U8*)s)) {
927 if (tmp && regtry(prog, s))
928 goto got_it;
929 else
930 tmp = doevery;
931 }
932 else
933 tmp = 1;
934 s += UTF8SKIP(s);
935 }
936 break;
a0d0e21e 937 case ANYOF:
cd439c50 938 cc = MASK(c);
a0d0e21e 939 while (s < strend) {
a0ed51b3 940 if (REGINCLASS(cc, *s)) {
a0d0e21e 941 if (tmp && regtry(prog, s))
942 goto got_it;
943 else
944 tmp = doevery;
a687059c 945 }
a0d0e21e 946 else
947 tmp = 1;
948 s++;
949 }
950 break;
bbce6d69 951 case BOUNDL:
3280af22 952 PL_reg_flags |= RF_tainted;
bbce6d69 953 /* FALL THROUGH */
a0d0e21e 954 case BOUND:
a0ed51b3 955 if (minlen) {
956 dontbother++;
957 strend -= 1;
958 }
3280af22 959 tmp = (s != startpos) ? UCHARAT(s - 1) : PL_regprev;
95bac841 960 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 961 while (s < strend) {
95bac841 962 if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
a0d0e21e 963 tmp = !tmp;
964 if (regtry(prog, s))
965 goto got_it;
a687059c 966 }
a0d0e21e 967 s++;
968 }
969 if ((minlen || tmp) && regtry(prog,s))
970 goto got_it;
971 break;
a0ed51b3 972 case BOUNDLUTF8:
973 PL_reg_flags |= RF_tainted;
974 /* FALL THROUGH */
975 case BOUNDUTF8:
976 if (minlen) {
977 dontbother++;
dfe13c55 978 strend = reghop_c(strend, -1);
a0ed51b3 979 }
dfe13c55 980 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : PL_regprev;
a0ed51b3 981 tmp = ((OP(c) == BOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
982 while (s < strend) {
dfe13c55 983 if (tmp == !(OP(c) == BOUND ?
984 swash_fetch(PL_utf8_alnum, (U8*)s) :
985 isALNUM_LC_utf8((U8*)s)))
986 {
a0ed51b3 987 tmp = !tmp;
988 if (regtry(prog, s))
989 goto got_it;
990 }
991 s += UTF8SKIP(s);
992 }
993 if ((minlen || tmp) && regtry(prog,s))
994 goto got_it;
995 break;
bbce6d69 996 case NBOUNDL:
3280af22 997 PL_reg_flags |= RF_tainted;
bbce6d69 998 /* FALL THROUGH */
a0d0e21e 999 case NBOUND:
a0ed51b3 1000 if (minlen) {
1001 dontbother++;
1002 strend -= 1;
1003 }
3280af22 1004 tmp = (s != startpos) ? UCHARAT(s - 1) : PL_regprev;
95bac841 1005 tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 1006 while (s < strend) {
95bac841 1007 if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
a0d0e21e 1008 tmp = !tmp;
1009 else if (regtry(prog, s))
1010 goto got_it;
1011 s++;
1012 }
1013 if ((minlen || !tmp) && regtry(prog,s))
1014 goto got_it;
1015 break;
a0ed51b3 1016 case NBOUNDLUTF8:
1017 PL_reg_flags |= RF_tainted;
1018 /* FALL THROUGH */
1019 case NBOUNDUTF8:
1020 if (minlen) {
1021 dontbother++;
dfe13c55 1022 strend = reghop_c(strend, -1);
a0ed51b3 1023 }
dfe13c55 1024 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : PL_regprev;
a0ed51b3 1025 tmp = ((OP(c) == NBOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
1026 while (s < strend) {
dfe13c55 1027 if (tmp == !(OP(c) == NBOUND ?
1028 swash_fetch(PL_utf8_alnum, (U8*)s) :
1029 isALNUM_LC_utf8((U8*)s)))
a0ed51b3 1030 tmp = !tmp;
1031 else if (regtry(prog, s))
1032 goto got_it;
1033 s += UTF8SKIP(s);
1034 }
1035 if ((minlen || !tmp) && regtry(prog,s))
1036 goto got_it;
1037 break;
a0d0e21e 1038 case ALNUM:
1039 while (s < strend) {
bbce6d69 1040 if (isALNUM(*s)) {
1041 if (tmp && regtry(prog, s))
1042 goto got_it;
1043 else
1044 tmp = doevery;
1045 }
1046 else
1047 tmp = 1;
1048 s++;
1049 }
1050 break;
a0ed51b3 1051 case ALNUMUTF8:
1052 while (s < strend) {
dfe13c55 1053 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
a0ed51b3 1054 if (tmp && regtry(prog, s))
1055 goto got_it;
1056 else
1057 tmp = doevery;
1058 }
1059 else
1060 tmp = 1;
1061 s += UTF8SKIP(s);
1062 }
1063 break;
bbce6d69 1064 case ALNUML:
3280af22 1065 PL_reg_flags |= RF_tainted;
bbce6d69 1066 while (s < strend) {
1067 if (isALNUM_LC(*s)) {
a0d0e21e 1068 if (tmp && regtry(prog, s))
1069 goto got_it;
a687059c 1070 else
a0d0e21e 1071 tmp = doevery;
1072 }
1073 else
1074 tmp = 1;
1075 s++;
1076 }
1077 break;
a0ed51b3 1078 case ALNUMLUTF8:
1079 PL_reg_flags |= RF_tainted;
1080 while (s < strend) {
dfe13c55 1081 if (isALNUM_LC_utf8((U8*)s)) {
a0ed51b3 1082 if (tmp && regtry(prog, s))
1083 goto got_it;
1084 else
1085 tmp = doevery;
1086 }
1087 else
1088 tmp = 1;
1089 s += UTF8SKIP(s);
1090 }
1091 break;
a0d0e21e 1092 case NALNUM:
1093 while (s < strend) {
bbce6d69 1094 if (!isALNUM(*s)) {
1095 if (tmp && regtry(prog, s))
1096 goto got_it;
1097 else
1098 tmp = doevery;
1099 }
1100 else
1101 tmp = 1;
1102 s++;
1103 }
1104 break;
a0ed51b3 1105 case NALNUMUTF8:
1106 while (s < strend) {
dfe13c55 1107 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
a0ed51b3 1108 if (tmp && regtry(prog, s))
1109 goto got_it;
1110 else
1111 tmp = doevery;
1112 }
1113 else
1114 tmp = 1;
1115 s += UTF8SKIP(s);
1116 }
1117 break;
bbce6d69 1118 case NALNUML:
3280af22 1119 PL_reg_flags |= RF_tainted;
bbce6d69 1120 while (s < strend) {
1121 if (!isALNUM_LC(*s)) {
a0d0e21e 1122 if (tmp && regtry(prog, s))
1123 goto got_it;
a687059c 1124 else
a0d0e21e 1125 tmp = doevery;
a687059c 1126 }
a0d0e21e 1127 else
1128 tmp = 1;
1129 s++;
1130 }
1131 break;
a0ed51b3 1132 case NALNUMLUTF8:
1133 PL_reg_flags |= RF_tainted;
1134 while (s < strend) {
dfe13c55 1135 if (!isALNUM_LC_utf8((U8*)s)) {
a0ed51b3 1136 if (tmp && regtry(prog, s))
1137 goto got_it;
1138 else
1139 tmp = doevery;
1140 }
1141 else
1142 tmp = 1;
1143 s += UTF8SKIP(s);
1144 }
1145 break;
a0d0e21e 1146 case SPACE:
1147 while (s < strend) {
1148 if (isSPACE(*s)) {
1149 if (tmp && regtry(prog, s))
1150 goto got_it;
1151 else
1152 tmp = doevery;
2304df62 1153 }
a0d0e21e 1154 else
1155 tmp = 1;
1156 s++;
1157 }
1158 break;
a0ed51b3 1159 case SPACEUTF8:
1160 while (s < strend) {
dfe13c55 1161 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
a0ed51b3 1162 if (tmp && regtry(prog, s))
1163 goto got_it;
1164 else
1165 tmp = doevery;
1166 }
1167 else
1168 tmp = 1;
1169 s += UTF8SKIP(s);
1170 }
1171 break;
bbce6d69 1172 case SPACEL:
3280af22 1173 PL_reg_flags |= RF_tainted;
bbce6d69 1174 while (s < strend) {
1175 if (isSPACE_LC(*s)) {
1176 if (tmp && regtry(prog, s))
1177 goto got_it;
1178 else
1179 tmp = doevery;
1180 }
1181 else
1182 tmp = 1;
1183 s++;
1184 }
1185 break;
a0ed51b3 1186 case SPACELUTF8:
1187 PL_reg_flags |= RF_tainted;
1188 while (s < strend) {
dfe13c55 1189 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
a0ed51b3 1190 if (tmp && regtry(prog, s))
1191 goto got_it;
1192 else
1193 tmp = doevery;
1194 }
1195 else
1196 tmp = 1;
1197 s += UTF8SKIP(s);
1198 }
1199 break;
a0d0e21e 1200 case NSPACE:
1201 while (s < strend) {
1202 if (!isSPACE(*s)) {
1203 if (tmp && regtry(prog, s))
1204 goto got_it;
1205 else
1206 tmp = doevery;
a687059c 1207 }
a0d0e21e 1208 else
1209 tmp = 1;
1210 s++;
1211 }
1212 break;
a0ed51b3 1213 case NSPACEUTF8:
1214 while (s < strend) {
dfe13c55 1215 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
a0ed51b3 1216 if (tmp && regtry(prog, s))
1217 goto got_it;
1218 else
1219 tmp = doevery;
1220 }
1221 else
1222 tmp = 1;
1223 s += UTF8SKIP(s);
1224 }
1225 break;
bbce6d69 1226 case NSPACEL:
3280af22 1227 PL_reg_flags |= RF_tainted;
bbce6d69 1228 while (s < strend) {
1229 if (!isSPACE_LC(*s)) {
1230 if (tmp && regtry(prog, s))
1231 goto got_it;
1232 else
1233 tmp = doevery;
1234 }
1235 else
1236 tmp = 1;
1237 s++;
1238 }
1239 break;
a0ed51b3 1240 case NSPACELUTF8:
1241 PL_reg_flags |= RF_tainted;
1242 while (s < strend) {
dfe13c55 1243 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
a0ed51b3 1244 if (tmp && regtry(prog, s))
1245 goto got_it;
1246 else
1247 tmp = doevery;
1248 }
1249 else
1250 tmp = 1;
1251 s += UTF8SKIP(s);
1252 }
1253 break;
a0d0e21e 1254 case DIGIT:
1255 while (s < strend) {
1256 if (isDIGIT(*s)) {
1257 if (tmp && regtry(prog, s))
1258 goto got_it;
1259 else
1260 tmp = doevery;
2b69d0c2 1261 }
a0d0e21e 1262 else
1263 tmp = 1;
1264 s++;
1265 }
1266 break;
a0ed51b3 1267 case DIGITUTF8:
1268 while (s < strend) {
dfe13c55 1269 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
a0ed51b3 1270 if (tmp && regtry(prog, s))
1271 goto got_it;
1272 else
1273 tmp = doevery;
1274 }
1275 else
1276 tmp = 1;
1277 s += UTF8SKIP(s);
1278 }
1279 break;
b8c5462f 1280 case DIGITL:
1281 PL_reg_flags |= RF_tainted;
1282 while (s < strend) {
1283 if (isDIGIT_LC(*s)) {
1284 if (tmp && regtry(prog, s))
1285 goto got_it;
1286 else
1287 tmp = doevery;
1288 }
1289 else
1290 tmp = 1;
1291 s++;
1292 }
1293 break;
1294 case DIGITLUTF8:
1295 PL_reg_flags |= RF_tainted;
1296 while (s < strend) {
1297 if (isDIGIT_LC_utf8((U8*)s)) {
1298 if (tmp && regtry(prog, s))
1299 goto got_it;
1300 else
1301 tmp = doevery;
1302 }
1303 else
1304 tmp = 1;
1305 s += UTF8SKIP(s);
1306 }
1307 break;
a0d0e21e 1308 case NDIGIT:
1309 while (s < strend) {
1310 if (!isDIGIT(*s)) {
1311 if (tmp && regtry(prog, s))
1312 goto got_it;
1313 else
1314 tmp = doevery;
a687059c 1315 }
a0d0e21e 1316 else
1317 tmp = 1;
1318 s++;
1319 }
1320 break;
a0ed51b3 1321 case NDIGITUTF8:
1322 while (s < strend) {
dfe13c55 1323 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
a0ed51b3 1324 if (tmp && regtry(prog, s))
1325 goto got_it;
1326 else
1327 tmp = doevery;
1328 }
1329 else
1330 tmp = 1;
1331 s += UTF8SKIP(s);
1332 }
1333 break;
b8c5462f 1334 case NDIGITL:
1335 PL_reg_flags |= RF_tainted;
1336 while (s < strend) {
1337 if (!isDIGIT_LC(*s)) {
1338 if (tmp && regtry(prog, s))
1339 goto got_it;
1340 else
1341 tmp = doevery;
1342 }
1343 else
1344 tmp = 1;
1345 s++;
a0ed51b3 1346 }
b8c5462f 1347 break;
1348 case NDIGITLUTF8:
1349 PL_reg_flags |= RF_tainted;
1350 while (s < strend) {
1351 if (!isDIGIT_LC_utf8((U8*)s)) {
1352 if (tmp && regtry(prog, s))
1353 goto got_it;
cf93c79d 1354 else
b8c5462f 1355 tmp = doevery;
1356 }
1357 else
1358 tmp = 1;
1359 s += UTF8SKIP(s);
1360 }
1361 break;
d6a28714 1362 }
1363 }
1364 else {
1365 dontbother = 0;
1366 if (prog->float_substr != Nullsv) { /* Trim the end. */
1367 char *last;
1368 I32 oldpos = scream_pos;
1369
1370 if (flags & REXEC_SCREAM) {
1371 last = screaminstr(sv, prog->float_substr, s - strbeg,
1372 end_shift, &scream_pos, 1); /* last one */
1373 if (!last)
1374 last = scream_olds; /* Only one occurence. */
b8c5462f 1375 }
d6a28714 1376 else {
1377 STRLEN len;
1378 char *little = SvPV(prog->float_substr, len);
1379
1380 if (SvTAIL(prog->float_substr)) {
1381 if (memEQ(strend - len + 1, little, len - 1))
1382 last = strend - len + 1;
1383 else if (!PL_multiline)
1384 last = memEQ(strend - len, little, len)
1385 ? strend - len : Nullch;
b8c5462f 1386 else
d6a28714 1387 goto find_last;
1388 } else {
1389 find_last:
1390 if (len)
1391 last = rninstr(s, strend, little, little + len);
b8c5462f 1392 else
d6a28714 1393 last = strend; /* matching `$' */
b8c5462f 1394 }
b8c5462f 1395 }
d6a28714 1396 if (last == NULL) goto phooey; /* Should not happen! */
1397 dontbother = strend - last + prog->float_min_offset;
1398 }
1399 if (minlen && (dontbother < minlen))
1400 dontbother = minlen - 1;
1401 strend -= dontbother; /* this one's always in bytes! */
1402 /* We don't know much -- general case. */
1403 if (UTF) {
1404 for (;;) {
1405 if (regtry(prog, s))
1406 goto got_it;
1407 if (s >= strend)
1408 break;
b8c5462f 1409 s += UTF8SKIP(s);
d6a28714 1410 };
1411 }
1412 else {
1413 do {
1414 if (regtry(prog, s))
1415 goto got_it;
1416 } while (s++ < strend);
1417 }
1418 }
1419
1420 /* Failure. */
1421 goto phooey;
1422
1423got_it:
1424 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1425
1426 if (PL_reg_eval_set) {
1427 /* Preserve the current value of $^R */
1428 if (oreplsv != GvSV(PL_replgv))
1429 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1430 restored, the value remains
1431 the same. */
1432 restore_pos(aTHXo_ 0);
1433 }
1434
1435 /* make sure $`, $&, $', and $digit will work later */
1436 if ( !(flags & REXEC_NOT_FIRST) ) {
1437 if (RX_MATCH_COPIED(prog)) {
1438 Safefree(prog->subbeg);
1439 RX_MATCH_COPIED_off(prog);
1440 }
1441 if (flags & REXEC_COPY_STR) {
1442 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1443
1444 s = savepvn(strbeg, i);
1445 prog->subbeg = s;
1446 prog->sublen = i;
1447 RX_MATCH_COPIED_on(prog);
1448 }
1449 else {
1450 prog->subbeg = strbeg;
1451 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1452 }
1453 }
1454
1455 return 1;
1456
1457phooey:
1458 if (PL_reg_eval_set)
1459 restore_pos(aTHXo_ 0);
1460 return 0;
1461}
1462
1463/*
1464 - regtry - try match at specific point
1465 */
1466STATIC I32 /* 0 failure, 1 success */
1467S_regtry(pTHX_ regexp *prog, char *startpos)
1468{
1469 dTHR;
1470 register I32 i;
1471 register I32 *sp;
1472 register I32 *ep;
1473 CHECKPOINT lastcp;
1474
1475 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1476 MAGIC *mg;
1477
1478 PL_reg_eval_set = RS_init;
1479 DEBUG_r(DEBUG_s(
1480 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %i\n",
1481 PL_stack_sp - PL_stack_base);
1482 ));
1483 SAVEINT(cxstack[cxstack_ix].blk_oldsp);
1484 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1485 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1486 SAVETMPS;
1487 /* Apparently this is not needed, judging by wantarray. */
1488 /* SAVEINT(cxstack[cxstack_ix].blk_gimme);
1489 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1490
1491 if (PL_reg_sv) {
1492 /* Make $_ available to executed code. */
1493 if (PL_reg_sv != DEFSV) {
1494 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1495 SAVESPTR(DEFSV);
1496 DEFSV = PL_reg_sv;
b8c5462f 1497 }
d6a28714 1498
1499 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
1500 && (mg = mg_find(PL_reg_sv, 'g')))) {
1501 /* prepare for quick setting of pos */
1502 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
1503 mg = mg_find(PL_reg_sv, 'g');
1504 mg->mg_len = -1;
b8c5462f 1505 }
d6a28714 1506 PL_reg_magic = mg;
1507 PL_reg_oldpos = mg->mg_len;
1508 SAVEDESTRUCTOR(restore_pos, 0);
1509 }
1510 if (!PL_reg_curpm)
1511 New(22,PL_reg_curpm, 1, PMOP);
1512 PL_reg_curpm->op_pmregexp = prog;
1513 PL_reg_oldcurpm = PL_curpm;
1514 PL_curpm = PL_reg_curpm;
1515 if (RX_MATCH_COPIED(prog)) {
1516 /* Here is a serious problem: we cannot rewrite subbeg,
1517 since it may be needed if this match fails. Thus
1518 $` inside (?{}) could fail... */
1519 PL_reg_oldsaved = prog->subbeg;
1520 PL_reg_oldsavedlen = prog->sublen;
1521 RX_MATCH_COPIED_off(prog);
1522 }
1523 else
1524 PL_reg_oldsaved = Nullch;
1525 prog->subbeg = PL_bostr;
1526 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1527 }
1528 prog->startp[0] = startpos - PL_bostr;
1529 PL_reginput = startpos;
1530 PL_regstartp = prog->startp;
1531 PL_regendp = prog->endp;
1532 PL_reglastparen = &prog->lastparen;
1533 prog->lastparen = 0;
1534 PL_regsize = 0;
1535 DEBUG_r(PL_reg_starttry = startpos);
1536 if (PL_reg_start_tmpl <= prog->nparens) {
1537 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1538 if(PL_reg_start_tmp)
1539 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1540 else
1541 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1542 }
1543
1544 /* XXXX What this code is doing here?!!! There should be no need
1545 to do this again and again, PL_reglastparen should take care of
1546 this! */
1547 sp = prog->startp;
1548 ep = prog->endp;
1549 if (prog->nparens) {
1550 for (i = prog->nparens; i >= 1; i--) {
1551 *++sp = -1;
1552 *++ep = -1;
1553 }
1554 }
1555 REGCP_SET;
1556 if (regmatch(prog->program + 1)) {
1557 prog->endp[0] = PL_reginput - PL_bostr;
1558 return 1;
1559 }
1560 REGCP_UNWIND;
1561 return 0;
1562}
1563
1564/*
1565 - regmatch - main matching routine
1566 *
1567 * Conceptually the strategy is simple: check to see whether the current
1568 * node matches, call self recursively to see whether the rest matches,
1569 * and then act accordingly. In practice we make some effort to avoid
1570 * recursion, in particular by going through "ordinary" nodes (that don't
1571 * need to know whether the rest of the match failed) by a loop instead of
1572 * by recursion.
1573 */
1574/* [lwall] I've hoisted the register declarations to the outer block in order to
1575 * maybe save a little bit of pushing and popping on the stack. It also takes
1576 * advantage of machines that use a register save mask on subroutine entry.
1577 */
1578STATIC I32 /* 0 failure, 1 success */
1579S_regmatch(pTHX_ regnode *prog)
1580{
1581 dTHR;
1582 register regnode *scan; /* Current node. */
1583 regnode *next; /* Next node. */
1584 regnode *inner; /* Next node in internal branch. */
1585 register I32 nextchr; /* renamed nextchr - nextchar colides with
1586 function of same name */
1587 register I32 n; /* no or next */
1588 register I32 ln; /* len or last */
1589 register char *s; /* operand or save */
1590 register char *locinput = PL_reginput;
1591 register I32 c1, c2, paren; /* case fold search, parenth */
1592 int minmod = 0, sw = 0, logical = 0;
1593#ifdef DEBUGGING
1594 PL_regindent++;
1595#endif
1596
1597 /* Note that nextchr is a byte even in UTF */
1598 nextchr = UCHARAT(locinput);
1599 scan = prog;
1600 while (scan != NULL) {
1601#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
1602#ifdef DEBUGGING
1603# define sayYES goto yes
1604# define sayNO goto no
7821416a 1605# define sayYES_FINAL goto yes_final
1606# define sayYES_LOUD goto yes_loud
1607# define sayNO_FINAL goto no_final
1608# define sayNO_SILENT goto do_no
d6a28714 1609# define saySAME(x) if (x) goto yes; else goto no
1610# define REPORT_CODE_OFF 24
1611#else
1612# define sayYES return 1
1613# define sayNO return 0
7821416a 1614# define sayYES_FINAL return 1
1615# define sayYES_LOUD return 1
1616# define sayNO_FINAL return 0
1617# define sayNO_SILENT return 0
d6a28714 1618# define saySAME(x) return x
1619#endif
1620 DEBUG_r( {
1621 SV *prop = sv_newmortal();
1622 int docolor = *PL_colors[0];
1623 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1624 int l = (PL_regeol - locinput > taill ? taill : PL_regeol - locinput);
1625 /* The part of the string before starttry has one color
1626 (pref0_len chars), between starttry and current
1627 position another one (pref_len - pref0_len chars),
1628 after the current position the third one.
1629 We assume that pref0_len <= pref_len, otherwise we
1630 decrease pref0_len. */
1631 int pref_len = (locinput - PL_bostr > (5 + taill) - l
1632 ? (5 + taill) - l : locinput - PL_bostr);
1633 int pref0_len = pref_len - (locinput - PL_reg_starttry);
1634
1635 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
1636 l = ( PL_regeol - locinput > (5 + taill) - pref_len
1637 ? (5 + taill) - pref_len : PL_regeol - locinput);
1638 if (pref0_len < 0)
1639 pref0_len = 0;
1640 if (pref0_len > pref_len)
1641 pref0_len = pref_len;
1642 regprop(prop, scan);
1643 PerlIO_printf(Perl_debug_log,
1644 "%4i <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3d:%*s%s\n",
1645 locinput - PL_bostr,
1646 PL_colors[4], pref0_len,
1647 locinput - pref_len, PL_colors[5],
1648 PL_colors[2], pref_len - pref0_len,
1649 locinput - pref_len + pref0_len, PL_colors[3],
1650 (docolor ? "" : "> <"),
1651 PL_colors[0], l, locinput, PL_colors[1],
1652 15 - l - pref_len + 1,
1653 "",
1654 scan - PL_regprogram, PL_regindent*2, "",
1655 SvPVX(prop));
1656 } );
1657
1658 next = scan + NEXT_OFF(scan);
1659 if (next == scan)
1660 next = NULL;
1661
1662 switch (OP(scan)) {
1663 case BOL:
1664 if (locinput == PL_bostr
1665 ? PL_regprev == '\n'
1666 : (PL_multiline &&
1667 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1668 {
1669 /* regtill = regbol; */
b8c5462f 1670 break;
1671 }
d6a28714 1672 sayNO;
1673 case MBOL:
1674 if (locinput == PL_bostr
1675 ? PL_regprev == '\n'
1676 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1677 {
b8c5462f 1678 break;
1679 }
d6a28714 1680 sayNO;
1681 case SBOL:
1682 if (locinput == PL_regbol && PL_regprev == '\n')
b8c5462f 1683 break;
d6a28714 1684 sayNO;
1685 case GPOS:
1686 if (locinput == PL_reg_ganch)
1687 break;
1688 sayNO;
1689 case EOL:
1690 if (PL_multiline)
1691 goto meol;
1692 else
1693 goto seol;
1694 case MEOL:
1695 meol:
1696 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 1697 sayNO;
b8c5462f 1698 break;
d6a28714 1699 case SEOL:
1700 seol:
1701 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 1702 sayNO;
d6a28714 1703 if (PL_regeol - locinput > 1)
b8c5462f 1704 sayNO;
b8c5462f 1705 break;
d6a28714 1706 case EOS:
1707 if (PL_regeol != locinput)
b8c5462f 1708 sayNO;
d6a28714 1709 break;
1710 case SANYUTF8:
b8c5462f 1711 if (nextchr & 0x80) {
b8c5462f 1712 locinput += PL_utf8skip[nextchr];
d6a28714 1713 if (locinput > PL_regeol)
1714 sayNO;
b8c5462f 1715 nextchr = UCHARAT(locinput);
1716 break;
1717 }
d6a28714 1718 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1719 sayNO;
b8c5462f 1720 nextchr = UCHARAT(++locinput);
a0d0e21e 1721 break;
d6a28714 1722 case SANY:
1723 if (!nextchr && locinput >= PL_regeol)
b8c5462f 1724 sayNO;
1725 nextchr = UCHARAT(++locinput);
b85d18e9 1726 break;
d6a28714 1727 case ANYUTF8:
a0ed51b3 1728 if (nextchr & 0x80) {
b8c5462f 1729 locinput += PL_utf8skip[nextchr];
d6a28714 1730 if (locinput > PL_regeol)
1731 sayNO;
a0ed51b3 1732 nextchr = UCHARAT(locinput);
1733 break;
1734 }
d6a28714 1735 if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
a0ed51b3 1736 sayNO;
1737 nextchr = UCHARAT(++locinput);
1738 break;
d6a28714 1739 case REG_ANY:
1740 if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
4633a7c4 1741 sayNO;
76e3520e 1742 nextchr = UCHARAT(++locinput);
a0d0e21e 1743 break;
d6a28714 1744 case EXACT:
cd439c50 1745 s = STRING(scan);
1746 ln = STR_LEN(scan);
d6a28714 1747 /* Inline the first character, for speed. */
1748 if (UCHARAT(s) != nextchr)
1749 sayNO;
1750 if (PL_regeol - locinput < ln)
1751 sayNO;
1752 if (ln > 1 && memNE(s, locinput, ln))
1753 sayNO;
1754 locinput += ln;
1755 nextchr = UCHARAT(locinput);
1756 break;
1757 case EXACTFL:
b8c5462f 1758 PL_reg_flags |= RF_tainted;
1759 /* FALL THROUGH */
d6a28714 1760 case EXACTF:
cd439c50 1761 s = STRING(scan);
1762 ln = STR_LEN(scan);
d6a28714 1763
1764 if (UTF) {
1765 char *l = locinput;
1766 char *e = s + ln;
1767 c1 = OP(scan) == EXACTF;
1768 while (s < e) {
1769 if (l >= PL_regeol)
1770 sayNO;
1771 if (utf8_to_uv((U8*)s, 0) != (c1 ?
1772 toLOWER_utf8((U8*)l) :
1773 toLOWER_LC_utf8((U8*)l)))
1774 {
1775 sayNO;
1776 }
1777 s += UTF8SKIP(s);
1778 l += UTF8SKIP(l);
b8c5462f 1779 }
d6a28714 1780 locinput = l;
a0ed51b3 1781 nextchr = UCHARAT(locinput);
1782 break;
1783 }
d6a28714 1784
1785 /* Inline the first character, for speed. */
1786 if (UCHARAT(s) != nextchr &&
1787 UCHARAT(s) != ((OP(scan) == EXACTF)
1788 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 1789 sayNO;
d6a28714 1790 if (PL_regeol - locinput < ln)
b8c5462f 1791 sayNO;
d6a28714 1792 if (ln > 1 && (OP(scan) == EXACTF
1793 ? ibcmp(s, locinput, ln)
1794 : ibcmp_locale(s, locinput, ln)))
4633a7c4 1795 sayNO;
d6a28714 1796 locinput += ln;
1797 nextchr = UCHARAT(locinput);
a0d0e21e 1798 break;
d6a28714 1799 case ANYOFUTF8:
cd439c50 1800 s = MASK(scan);
d6a28714 1801 if (!REGINCLASSUTF8(scan, (U8*)locinput))
4633a7c4 1802 sayNO;
d6a28714 1803 if (locinput >= PL_regeol)
1804 sayNO;
1805 locinput += PL_utf8skip[nextchr];
1806 nextchr = UCHARAT(locinput);
1807 break;
1808 case ANYOF:
cd439c50 1809 s = MASK(scan);
d6a28714 1810 if (nextchr < 0)
b8c5462f 1811 nextchr = UCHARAT(locinput);
d6a28714 1812 if (!REGINCLASS(s, nextchr))
1813 sayNO;
1814 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1815 sayNO;
b8c5462f 1816 nextchr = UCHARAT(++locinput);
1817 break;
d6a28714 1818 case ALNUML:
b8c5462f 1819 PL_reg_flags |= RF_tainted;
1820 /* FALL THROUGH */
d6a28714 1821 case ALNUM:
b8c5462f 1822 if (!nextchr)
4633a7c4 1823 sayNO;
d6a28714 1824 if (!(OP(scan) == ALNUM
1825 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
b8c5462f 1826 sayNO;
1827 nextchr = UCHARAT(++locinput);
bbce6d69 1828 break;
d6a28714 1829 case ALNUMLUTF8:
3280af22 1830 PL_reg_flags |= RF_tainted;
bbce6d69 1831 /* FALL THROUGH */
d6a28714 1832 case ALNUMUTF8:
b8c5462f 1833 if (!nextchr)
1834 sayNO;
1835 if (nextchr & 0x80) {
d6a28714 1836 if (!(OP(scan) == ALNUMUTF8
1837 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
1838 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f 1839 {
1840 sayNO;
a0ed51b3 1841 }
b8c5462f 1842 locinput += PL_utf8skip[nextchr];
a0ed51b3 1843 nextchr = UCHARAT(locinput);
1844 break;
1845 }
d6a28714 1846 if (!(OP(scan) == ALNUMUTF8
1847 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 1848 sayNO;
b8c5462f 1849 nextchr = UCHARAT(++locinput);
a0d0e21e 1850 break;
d6a28714 1851 case NALNUML:
b8c5462f 1852 PL_reg_flags |= RF_tainted;
1853 /* FALL THROUGH */
d6a28714 1854 case NALNUM:
1855 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 1856 sayNO;
d6a28714 1857 if (OP(scan) == NALNUM
1858 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
a0ed51b3 1859 sayNO;
b8c5462f 1860 nextchr = UCHARAT(++locinput);
a0ed51b3 1861 break;
d6a28714 1862 case NALNUMLUTF8:
b8c5462f 1863 PL_reg_flags |= RF_tainted;
1864 /* FALL THROUGH */
d6a28714 1865 case NALNUMUTF8:
3280af22 1866 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1867 sayNO;
b8c5462f 1868 if (nextchr & 0x80) {
d6a28714 1869 if (OP(scan) == NALNUMUTF8
1870 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
1871 : isALNUM_LC_utf8((U8*)locinput))
1872 {
b8c5462f 1873 sayNO;
d6a28714 1874 }
b8c5462f 1875 locinput += PL_utf8skip[nextchr];
1876 nextchr = UCHARAT(locinput);
1877 break;
1878 }
d6a28714 1879 if (OP(scan) == NALNUMUTF8
1880 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 1881 sayNO;
76e3520e 1882 nextchr = UCHARAT(++locinput);
a0d0e21e 1883 break;
d6a28714 1884 case BOUNDL:
1885 case NBOUNDL:
3280af22 1886 PL_reg_flags |= RF_tainted;
bbce6d69 1887 /* FALL THROUGH */
d6a28714 1888 case BOUND:
1889 case NBOUND:
1890 /* was last char in word? */
1891 ln = (locinput != PL_regbol) ? UCHARAT(locinput - 1) : PL_regprev;
1892 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
1893 ln = isALNUM(ln);
1894 n = isALNUM(nextchr);
1895 }
1896 else {
1897 ln = isALNUM_LC(ln);
1898 n = isALNUM_LC(nextchr);
1899 }
1900 if (((!ln) == (!n)) == (OP(scan) == BOUND || OP(scan) == BOUNDL))
4633a7c4 1901 sayNO;
a0d0e21e 1902 break;
d6a28714 1903 case BOUNDLUTF8:
1904 case NBOUNDLUTF8:
a0ed51b3 1905 PL_reg_flags |= RF_tainted;
1906 /* FALL THROUGH */
d6a28714 1907 case BOUNDUTF8:
1908 case NBOUNDUTF8:
1909 /* was last char in word? */
1910 ln = (locinput != PL_regbol)
1911 ? utf8_to_uv(reghop((U8*)locinput, -1), 0) : PL_regprev;
1912 if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) {
1913 ln = isALNUM_uni(ln);
1914 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
a0ed51b3 1915 }
d6a28714 1916 else {
1917 ln = isALNUM_LC_uni(ln);
1918 n = isALNUM_LC_utf8((U8*)locinput);
1919 }
1920 if (((!ln) == (!n)) == (OP(scan) == BOUNDUTF8 || OP(scan) == BOUNDLUTF8))
a0ed51b3 1921 sayNO;
a0ed51b3 1922 break;
d6a28714 1923 case SPACEL:
3280af22 1924 PL_reg_flags |= RF_tainted;
bbce6d69 1925 /* FALL THROUGH */
d6a28714 1926 case SPACE:
1927 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1928 sayNO;
d6a28714 1929 if (!(OP(scan) == SPACE
1930 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
4633a7c4 1931 sayNO;
76e3520e 1932 nextchr = UCHARAT(++locinput);
a0d0e21e 1933 break;
d6a28714 1934 case SPACELUTF8:
a0ed51b3 1935 PL_reg_flags |= RF_tainted;
1936 /* FALL THROUGH */
d6a28714 1937 case SPACEUTF8:
a0ed51b3 1938 if (!nextchr && locinput >= PL_regeol)
1939 sayNO;
1940 if (nextchr & 0x80) {
d6a28714 1941 if (!(OP(scan) == SPACEUTF8
1942 ? swash_fetch(PL_utf8_space,(U8*)locinput)
1943 : isSPACE_LC_utf8((U8*)locinput)))
1944 {
a0ed51b3 1945 sayNO;
d6a28714 1946 }
6f06b55f 1947 locinput += PL_utf8skip[nextchr];
a0ed51b3 1948 nextchr = UCHARAT(locinput);
1949 break;
1950 }
d6a28714 1951 if (!(OP(scan) == SPACEUTF8
1952 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
a0ed51b3 1953 sayNO;
1954 nextchr = UCHARAT(++locinput);
1955 break;
d6a28714 1956 case NSPACEL:
3280af22 1957 PL_reg_flags |= RF_tainted;
bbce6d69 1958 /* FALL THROUGH */
d6a28714 1959 case NSPACE:
b8c5462f 1960 if (!nextchr)
1961 sayNO;
d6a28714 1962 if (OP(scan) == SPACE
1963 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 1964 sayNO;
b8c5462f 1965 nextchr = UCHARAT(++locinput);
a0d0e21e 1966 break;
d6a28714 1967 case NSPACELUTF8:
a0ed51b3 1968 PL_reg_flags |= RF_tainted;
1969 /* FALL THROUGH */
d6a28714 1970 case NSPACEUTF8:
b8c5462f 1971 if (!nextchr)
1972 sayNO;
1973 if (nextchr & 0x80) {
d6a28714 1974 if (OP(scan) == NSPACEUTF8
1975 ? swash_fetch(PL_utf8_space,(U8*)locinput)
1976 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f 1977 {
1978 sayNO;
1979 }
1980 locinput += PL_utf8skip[nextchr];
1981 nextchr = UCHARAT(locinput);
1982 break;
a0ed51b3 1983 }
d6a28714 1984 if (OP(scan) == NSPACEUTF8
1985 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 1986 sayNO;
76e3520e 1987 nextchr = UCHARAT(++locinput);
a0d0e21e 1988 break;
d6a28714 1989 case DIGITL:
a0ed51b3 1990 PL_reg_flags |= RF_tainted;
1991 /* FALL THROUGH */
d6a28714 1992 case DIGIT:
a0ed51b3 1993 if (!nextchr && locinput >= PL_regeol)
1994 sayNO;
d6a28714 1995 if (!(OP(scan) == DIGIT
1996 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
4633a7c4 1997 sayNO;
76e3520e 1998 nextchr = UCHARAT(++locinput);
a0d0e21e 1999 break;
d6a28714 2000 case DIGITLUTF8:
a0ed51b3 2001 PL_reg_flags |= RF_tainted;
2002 /* FALL THROUGH */
d6a28714 2003 case DIGITUTF8:
a0ed51b3 2004 if (!nextchr)
2005 sayNO;
2006 if (nextchr & 0x80) {
d6a28714 2007 if (OP(scan) == NDIGITUTF8
2008 ? swash_fetch(PL_utf8_digit,(U8*)locinput)
2009 : isDIGIT_LC_utf8((U8*)locinput))
dfe13c55 2010 {
a0ed51b3 2011 sayNO;
dfe13c55 2012 }
6f06b55f 2013 locinput += PL_utf8skip[nextchr];
a0ed51b3 2014 nextchr = UCHARAT(locinput);
2015 break;
2016 }
d6a28714 2017 if (!isDIGIT(nextchr))
a0ed51b3 2018 sayNO;
2019 nextchr = UCHARAT(++locinput);
2020 break;
d6a28714 2021 case NDIGITL:
b8c5462f 2022 PL_reg_flags |= RF_tainted;
2023 /* FALL THROUGH */
d6a28714 2024 case NDIGIT:
b8c5462f 2025 if (!nextchr)
2026 sayNO;
d6a28714 2027 if (OP(scan) == DIGIT
2028 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
4633a7c4 2029 sayNO;
76e3520e 2030 nextchr = UCHARAT(++locinput);
a0d0e21e 2031 break;
d6a28714 2032 case NDIGITLUTF8:
b8c5462f 2033 PL_reg_flags |= RF_tainted;
2034 /* FALL THROUGH */
d6a28714 2035 case NDIGITUTF8:
b8c5462f 2036 if (!nextchr && locinput >= PL_regeol)
2037 sayNO;
a0ed51b3 2038 if (nextchr & 0x80) {
d6a28714 2039 if (swash_fetch(PL_utf8_digit,(U8*)locinput))
a0ed51b3 2040 sayNO;
6f06b55f 2041 locinput += PL_utf8skip[nextchr];
a0ed51b3 2042 nextchr = UCHARAT(locinput);
2043 break;
2044 }
d6a28714 2045 if (isDIGIT(nextchr))
a0ed51b3 2046 sayNO;
2047 nextchr = UCHARAT(++locinput);
2048 break;
2049 case CLUMP:
dfe13c55 2050 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3 2051 sayNO;
6f06b55f 2052 locinput += PL_utf8skip[nextchr];
dfe13c55 2053 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3 2054 locinput += UTF8SKIP(locinput);
2055 if (locinput > PL_regeol)
2056 sayNO;
2057 nextchr = UCHARAT(locinput);
2058 break;
c8756f30 2059 case REFFL:
3280af22 2060 PL_reg_flags |= RF_tainted;
c8756f30 2061 /* FALL THROUGH */
c277df42 2062 case REF:
c8756f30 2063 case REFF:
c277df42 2064 n = ARG(scan); /* which paren pair */
cf93c79d 2065 ln = PL_regstartp[n];
2c2d71f5 2066 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2067 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2068 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2069 if (ln == PL_regendp[n])
a0d0e21e 2070 break;
a0ed51b3 2071
cf93c79d 2072 s = PL_bostr + ln;
a0ed51b3 2073 if (UTF && OP(scan) != REF) { /* REF can do byte comparison */
2074 char *l = locinput;
cf93c79d 2075 char *e = PL_bostr + PL_regendp[n];
a0ed51b3 2076 /*
2077 * Note that we can't do the "other character" lookup trick as
2078 * in the 8-bit case (no pun intended) because in Unicode we
2079 * have to map both upper and title case to lower case.
2080 */
2081 if (OP(scan) == REFF) {
2082 while (s < e) {
2083 if (l >= PL_regeol)
2084 sayNO;
dfe13c55 2085 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
a0ed51b3 2086 sayNO;
2087 s += UTF8SKIP(s);
2088 l += UTF8SKIP(l);
2089 }
2090 }
2091 else {
2092 while (s < e) {
2093 if (l >= PL_regeol)
2094 sayNO;
dfe13c55 2095 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
a0ed51b3 2096 sayNO;
2097 s += UTF8SKIP(s);
2098 l += UTF8SKIP(l);
2099 }
2100 }
2101 locinput = l;
2102 nextchr = UCHARAT(locinput);
2103 break;
2104 }
2105
a0d0e21e 2106 /* Inline the first character, for speed. */
76e3520e 2107 if (UCHARAT(s) != nextchr &&
c8756f30 2108 (OP(scan) == REF ||
2109 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2110 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2111 sayNO;
cf93c79d 2112 ln = PL_regendp[n] - ln;
3280af22 2113 if (locinput + ln > PL_regeol)
4633a7c4 2114 sayNO;
c8756f30 2115 if (ln > 1 && (OP(scan) == REF
2116 ? memNE(s, locinput, ln)
2117 : (OP(scan) == REFF
2118 ? ibcmp(s, locinput, ln)
2119 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2120 sayNO;
a0d0e21e 2121 locinput += ln;
76e3520e 2122 nextchr = UCHARAT(locinput);
a0d0e21e 2123 break;
2124
2125 case NOTHING:
c277df42 2126 case TAIL:
a0d0e21e 2127 break;
2128 case BACK:
2129 break;
c277df42 2130 case EVAL:
2131 {
2132 dSP;
533c011a 2133 OP_4tree *oop = PL_op;
3280af22 2134 COP *ocurcop = PL_curcop;
2135 SV **ocurpad = PL_curpad;
c277df42 2136 SV *ret;
2137
2138 n = ARG(scan);
533c011a 2139 PL_op = (OP_4tree*)PL_regdata->data[n];
2140 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%x\n", PL_op) );
dfad63ad 2141 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2142 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2143
cea2e8a9 2144 CALLRUNOPS(aTHX); /* Scalar context. */
c277df42 2145 SPAGAIN;
2146 ret = POPs;
2147 PUTBACK;
2148
0f5d15d6 2149 PL_op = oop;
2150 PL_curpad = ocurpad;
2151 PL_curcop = ocurcop;
c277df42 2152 if (logical) {
0f5d15d6 2153 if (logical == 2) { /* Postponed subexpression. */
2154 regexp *re;
22c35a8c 2155 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2156 re_cc_state state;
0f5d15d6 2157 CHECKPOINT cp, lastcp;
2158
2159 if(SvROK(ret) || SvRMAGICAL(ret)) {
2160 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2161
2162 if(SvMAGICAL(sv))
2163 mg = mg_find(sv, 'r');
2164 }
2165 if (mg) {
2166 re = (regexp *)mg->mg_obj;
df0003d4 2167 (void)ReREFCNT_inc(re);
0f5d15d6 2168 }
2169 else {
2170 STRLEN len;
2171 char *t = SvPV(ret, len);
2172 PMOP pm;
2173 char *oprecomp = PL_regprecomp;
2174 I32 osize = PL_regsize;
2175 I32 onpar = PL_regnpar;
2176
2177 pm.op_pmflags = 0;
cea2e8a9 2178 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
0f5d15d6 2179 if (!(SvFLAGS(ret)
2180 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
2181 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
2182 PL_regprecomp = oprecomp;
2183 PL_regsize = osize;
2184 PL_regnpar = onpar;
2185 }
2186 DEBUG_r(
2187 PerlIO_printf(Perl_debug_log,
2188 "Entering embedded `%s%.60s%s%s'\n",
2189 PL_colors[0],
2190 re->precomp,
2191 PL_colors[1],
2192 (strlen(re->precomp) > 60 ? "..." : ""))
2193 );
2194 state.node = next;
2195 state.prev = PL_reg_call_cc;
2196 state.cc = PL_regcc;
2197 state.re = PL_reg_re;
2198
2ab05381 2199 PL_regcc = 0;
0f5d15d6 2200
2201 cp = regcppush(0); /* Save *all* the positions. */
2202 REGCP_SET;
2203 cache_re(re);
2204 state.ss = PL_savestack_ix;
2205 *PL_reglastparen = 0;
2206 PL_reg_call_cc = &state;
2207 PL_reginput = locinput;
2c2d71f5 2208
2209 /* XXXX This is too dramatic a measure... */
2210 PL_reg_maxiter = 0;
2211
0f5d15d6 2212 if (regmatch(re->program + 1)) {
2c914db6 2213 /* Even though we succeeded, we need to restore
2214 global variables, since we may be wrapped inside
2215 SUSPEND, thus the match may be not finished yet. */
2216
2217 /* XXXX Do this only if SUSPENDed? */
2218 PL_reg_call_cc = state.prev;
2219 PL_regcc = state.cc;
2220 PL_reg_re = state.re;
2221 cache_re(PL_reg_re);
2222
2223 /* XXXX This is too dramatic a measure... */
2224 PL_reg_maxiter = 0;
2225
2226 /* These are needed even if not SUSPEND. */
0f5d15d6 2227 ReREFCNT_dec(re);
2228 regcpblow(cp);
2229 sayYES;
2230 }
0f5d15d6 2231 ReREFCNT_dec(re);
2232 REGCP_UNWIND;
2233 regcppop();
2234 PL_reg_call_cc = state.prev;
2235 PL_regcc = state.cc;
2236 PL_reg_re = state.re;
d3790889 2237 cache_re(PL_reg_re);
2c2d71f5 2238
2239 /* XXXX This is too dramatic a measure... */
2240 PL_reg_maxiter = 0;
2241
0f5d15d6 2242 sayNO;
2243 }
c277df42 2244 sw = SvTRUE(ret);
0f5d15d6 2245 logical = 0;
a0ed51b3 2246 }
2247 else
3280af22 2248 sv_setsv(save_scalar(PL_replgv), ret);
c277df42 2249 break;
2250 }
a0d0e21e 2251 case OPEN:
c277df42 2252 n = ARG(scan); /* which paren pair */
3280af22 2253 PL_reg_start_tmp[n] = locinput;
2254 if (n > PL_regsize)
2255 PL_regsize = n;
a0d0e21e 2256 break;
2257 case CLOSE:
c277df42 2258 n = ARG(scan); /* which paren pair */
cf93c79d 2259 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2260 PL_regendp[n] = locinput - PL_bostr;
3280af22 2261 if (n > *PL_reglastparen)
2262 *PL_reglastparen = n;
a0d0e21e 2263 break;
c277df42 2264 case GROUPP:
2265 n = ARG(scan); /* which paren pair */
cf93c79d 2266 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42 2267 break;
2268 case IFTHEN:
2c2d71f5 2269 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42 2270 if (sw)
2271 next = NEXTOPER(NEXTOPER(scan));
2272 else {
2273 next = scan + ARG(scan);
2274 if (OP(next) == IFTHEN) /* Fake one. */
2275 next = NEXTOPER(NEXTOPER(next));
2276 }
2277 break;
2278 case LOGICAL:
0f5d15d6 2279 logical = scan->flags;
c277df42 2280 break;
2ab05381 2281/*******************************************************************
2282 PL_regcc contains infoblock about the innermost (...)* loop, and
2283 a pointer to the next outer infoblock.
2284
2285 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2286
2287 1) After matching X, regnode for CURLYX is processed;
2288
2289 2) This regnode creates infoblock on the stack, and calls
2290 regmatch() recursively with the starting point at WHILEM node;
2291
2292 3) Each hit of WHILEM node tries to match A and Z (in the order
2293 depending on the current iteration, min/max of {min,max} and
2294 greediness). The information about where are nodes for "A"
2295 and "Z" is read from the infoblock, as is info on how many times "A"
2296 was already matched, and greediness.
2297
2298 4) After A matches, the same WHILEM node is hit again.
2299
2300 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2301 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2302 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2303 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2304 of the external loop.
2305
2306 Currently present infoblocks form a tree with a stem formed by PL_curcc
2307 and whatever it mentions via ->next, and additional attached trees
2308 corresponding to temporarily unset infoblocks as in "5" above.
2309
2310 In the following picture infoblocks for outer loop of
2311 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2312 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2313 infoblocks are drawn below the "reset" infoblock.
2314
2315 In fact in the picture below we do not show failed matches for Z and T
2316 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2317 more obvious *why* one needs to *temporary* unset infoblocks.]
2318
2319 Matched REx position InfoBlocks Comment
2320 (Y(A)*?Z)*?T x
2321 Y(A)*?Z)*?T x <- O
2322 Y (A)*?Z)*?T x <- O
2323 Y A)*?Z)*?T x <- O <- I
2324 YA )*?Z)*?T x <- O <- I
2325 YA A)*?Z)*?T x <- O <- I
2326 YAA )*?Z)*?T x <- O <- I
2327 YAA Z)*?T x <- O # Temporary unset I
2328 I
2329
2330 YAAZ Y(A)*?Z)*?T x <- O
2331 I
2332
2333 YAAZY (A)*?Z)*?T x <- O
2334 I
2335
2336 YAAZY A)*?Z)*?T x <- O <- I
2337 I
2338
2339 YAAZYA )*?Z)*?T x <- O <- I
2340 I
2341
2342 YAAZYA Z)*?T x <- O # Temporary unset I
2343 I,I
2344
2345 YAAZYAZ )*?T x <- O
2346 I,I
2347
2348 YAAZYAZ T x # Temporary unset O
2349 O
2350 I,I
2351
2352 YAAZYAZT x
2353 O
2354 I,I
2355 *******************************************************************/
a0d0e21e 2356 case CURLYX: {
2357 CURCUR cc;
3280af22 2358 CHECKPOINT cp = PL_savestack_ix;
c277df42 2359
2360 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2361 next += ARG(next);
3280af22 2362 cc.oldcc = PL_regcc;
2363 PL_regcc = &cc;
2364 cc.parenfloor = *PL_reglastparen;
a0d0e21e 2365 cc.cur = -1;
2366 cc.min = ARG1(scan);
2367 cc.max = ARG2(scan);
c277df42 2368 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e 2369 cc.next = next;
2370 cc.minmod = minmod;
2371 cc.lastloc = 0;
3280af22 2372 PL_reginput = locinput;
a0d0e21e 2373 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2374 regcpblow(cp);
3280af22 2375 PL_regcc = cc.oldcc;
4633a7c4 2376 saySAME(n);
a0d0e21e 2377 }
2378 /* NOT REACHED */
2379 case WHILEM: {
2380 /*
2381 * This is really hard to understand, because after we match
2382 * what we're trying to match, we must make sure the rest of
2c2d71f5 2383 * the REx is going to match for sure, and to do that we have
a0d0e21e 2384 * to go back UP the parse tree by recursing ever deeper. And
2385 * if it fails, we have to reset our parent's current state
2386 * that we can try again after backing off.
2387 */
2388
c277df42 2389 CHECKPOINT cp, lastcp;
3280af22 2390 CURCUR* cc = PL_regcc;
c277df42 2391 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2392
4633a7c4 2393 n = cc->cur + 1; /* how many we know we matched */
3280af22 2394 PL_reginput = locinput;
a0d0e21e 2395
c277df42 2396 DEBUG_r(
2397 PerlIO_printf(Perl_debug_log,
2398 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2399 REPORT_CODE_OFF+PL_regindent*2, "",
c277df42 2400 (long)n, (long)cc->min,
2401 (long)cc->max, (long)cc)
2402 );
4633a7c4 2403
a0d0e21e 2404 /* If degenerate scan matches "", assume scan done. */
2405
579cf2c3 2406 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2407 PL_regcc = cc->oldcc;
2ab05381 2408 if (PL_regcc)
2409 ln = PL_regcc->cur;
c277df42 2410 DEBUG_r(
c3464db5 2411 PerlIO_printf(Perl_debug_log,
2412 "%*s empty match detected, try continuation...\n",
3280af22 2413 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2414 );
a0d0e21e 2415 if (regmatch(cc->next))
4633a7c4 2416 sayYES;
2ab05381 2417 if (PL_regcc)
2418 PL_regcc->cur = ln;
3280af22 2419 PL_regcc = cc;
4633a7c4 2420 sayNO;
a0d0e21e 2421 }
2422
2423 /* First just match a string of min scans. */
2424
2425 if (n < cc->min) {
2426 cc->cur = n;
2427 cc->lastloc = locinput;
4633a7c4 2428 if (regmatch(cc->scan))
2429 sayYES;
2430 cc->cur = n - 1;
c277df42 2431 cc->lastloc = lastloc;
4633a7c4 2432 sayNO;
a0d0e21e 2433 }
2434
2c2d71f5 2435 if (scan->flags) {
2436 /* Check whether we already were at this position.
2437 Postpone detection until we know the match is not
2438 *that* much linear. */
2439 if (!PL_reg_maxiter) {
2440 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2441 PL_reg_leftiter = PL_reg_maxiter;
2442 }
2443 if (PL_reg_leftiter-- == 0) {
2444 I32 size = (PL_reg_maxiter + 7)/8;
2445 if (PL_reg_poscache) {
2446 if (PL_reg_poscache_size < size) {
2447 Renew(PL_reg_poscache, size, char);
2448 PL_reg_poscache_size = size;
2449 }
2450 Zero(PL_reg_poscache, size, char);
2451 }
2452 else {
2453 PL_reg_poscache_size = size;
2454 Newz(29, PL_reg_poscache, size, char);
2455 }
2456 DEBUG_r(
2457 PerlIO_printf(Perl_debug_log,
2458 "%sDetected a super-linear match, switching on caching%s...\n",
2459 PL_colors[4], PL_colors[5])
2460 );
2461 }
2462 if (PL_reg_leftiter < 0) {
2463 I32 o = locinput - PL_bostr, b;
2464
2465 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2466 b = o % 8;
2467 o /= 8;
2468 if (PL_reg_poscache[o] & (1<<b)) {
2469 DEBUG_r(
2470 PerlIO_printf(Perl_debug_log,
2471 "%*s already tried at this position...\n",
2472 REPORT_CODE_OFF+PL_regindent*2, "")
2473 );
7821416a 2474 sayNO_SILENT;
2c2d71f5 2475 }
2476 PL_reg_poscache[o] |= (1<<b);
2477 }
2478 }
2479
a0d0e21e 2480 /* Prefer next over scan for minimal matching. */
2481
2482 if (cc->minmod) {
3280af22 2483 PL_regcc = cc->oldcc;
2ab05381 2484 if (PL_regcc)
2485 ln = PL_regcc->cur;
5f05dabc 2486 cp = regcppush(cc->parenfloor);
c277df42 2487 REGCP_SET;
5f05dabc 2488 if (regmatch(cc->next)) {
c277df42 2489 regcpblow(cp);
4633a7c4 2490 sayYES; /* All done. */
5f05dabc 2491 }
c277df42 2492 REGCP_UNWIND;
5f05dabc 2493 regcppop();
2ab05381 2494 if (PL_regcc)
2495 PL_regcc->cur = ln;
3280af22 2496 PL_regcc = cc;
a0d0e21e 2497
c277df42 2498 if (n >= cc->max) { /* Maximum greed exceeded? */
599cee73 2499 if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY
3280af22 2500 && !(PL_reg_flags & RF_warned)) {
2501 PL_reg_flags |= RF_warned;
cea2e8a9 2502 Perl_warner(aTHX_ WARN_UNSAFE, "%s limit (%d) exceeded",
2f3ca594 2503 "Complex regular subexpression recursion",
2504 REG_INFTY - 1);
c277df42 2505 }
4633a7c4 2506 sayNO;
c277df42 2507 }
a687059c 2508
c277df42 2509 DEBUG_r(
c3464db5 2510 PerlIO_printf(Perl_debug_log,
2511 "%*s trying longer...\n",
3280af22 2512 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2513 );
a0d0e21e 2514 /* Try scanning more and see if it helps. */
3280af22 2515 PL_reginput = locinput;
a0d0e21e 2516 cc->cur = n;
2517 cc->lastloc = locinput;
5f05dabc 2518 cp = regcppush(cc->parenfloor);
c277df42 2519 REGCP_SET;
5f05dabc 2520 if (regmatch(cc->scan)) {
c277df42 2521 regcpblow(cp);
4633a7c4 2522 sayYES;
5f05dabc 2523 }
c277df42 2524 REGCP_UNWIND;
5f05dabc 2525 regcppop();
4633a7c4 2526 cc->cur = n - 1;
c277df42 2527 cc->lastloc = lastloc;
4633a7c4 2528 sayNO;
a0d0e21e 2529 }
2530
2531 /* Prefer scan over next for maximal matching. */
2532
2533 if (n < cc->max) { /* More greed allowed? */
5f05dabc 2534 cp = regcppush(cc->parenfloor);
a0d0e21e 2535 cc->cur = n;
2536 cc->lastloc = locinput;
c277df42 2537 REGCP_SET;
5f05dabc 2538 if (regmatch(cc->scan)) {
c277df42 2539 regcpblow(cp);
4633a7c4 2540 sayYES;
5f05dabc 2541 }
c277df42 2542 REGCP_UNWIND;
a0d0e21e 2543 regcppop(); /* Restore some previous $<digit>s? */
3280af22 2544 PL_reginput = locinput;
c277df42 2545 DEBUG_r(
c3464db5 2546 PerlIO_printf(Perl_debug_log,
2547 "%*s failed, try continuation...\n",
3280af22 2548 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2549 );
2550 }
599cee73 2551 if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY
2552 && !(PL_reg_flags & RF_warned)) {
3280af22 2553 PL_reg_flags |= RF_warned;
cea2e8a9 2554 Perl_warner(aTHX_ WARN_UNSAFE, "%s limit (%d) exceeded",
cb5d145d 2555 "Complex regular subexpression recursion",
2556 REG_INFTY - 1);
a0d0e21e 2557 }
2558
2559 /* Failed deeper matches of scan, so see if this one works. */
3280af22 2560 PL_regcc = cc->oldcc;
2ab05381 2561 if (PL_regcc)
2562 ln = PL_regcc->cur;
a0d0e21e 2563 if (regmatch(cc->next))
4633a7c4 2564 sayYES;
2ab05381 2565 if (PL_regcc)
2566 PL_regcc->cur = ln;
3280af22 2567 PL_regcc = cc;
4633a7c4 2568 cc->cur = n - 1;
c277df42 2569 cc->lastloc = lastloc;
4633a7c4 2570 sayNO;
a0d0e21e 2571 }
2572 /* NOT REACHED */
c277df42 2573 case BRANCHJ:
2574 next = scan + ARG(scan);
2575 if (next == scan)
2576 next = NULL;
2577 inner = NEXTOPER(NEXTOPER(scan));
2578 goto do_branch;
2579 case BRANCH:
2580 inner = NEXTOPER(scan);
2581 do_branch:
2582 {
2583 CHECKPOINT lastcp;
2584 c1 = OP(scan);
2585 if (OP(next) != c1) /* No choice. */
2586 next = inner; /* Avoid recursion. */
a0d0e21e 2587 else {
3280af22 2588 int lastparen = *PL_reglastparen;
c277df42 2589
2590 REGCP_SET;
a0d0e21e 2591 do {
3280af22 2592 PL_reginput = locinput;
c277df42 2593 if (regmatch(inner))
4633a7c4 2594 sayYES;
c277df42 2595 REGCP_UNWIND;
3280af22 2596 for (n = *PL_reglastparen; n > lastparen; n--)
cf93c79d 2597 PL_regendp[n] = -1;
3280af22 2598 *PL_reglastparen = n;
c277df42 2599 scan = next;
a0d0e21e 2600 /*SUPPRESS 560*/
c277df42 2601 if (n = (c1 == BRANCH ? NEXT_OFF(next) : ARG(next)))
2602 next += n;
a0d0e21e 2603 else
c277df42 2604 next = NULL;
c277df42 2605 inner = NEXTOPER(scan);
2606 if (c1 == BRANCHJ) {
2607 inner = NEXTOPER(inner);
2608 }
2609 } while (scan != NULL && OP(scan) == c1);
4633a7c4 2610 sayNO;
a0d0e21e 2611 /* NOTREACHED */
a687059c 2612 }
a0d0e21e 2613 }
2614 break;
2615 case MINMOD:
2616 minmod = 1;
2617 break;
c277df42 2618 case CURLYM:
2619 {
00db4c45 2620 I32 l = 0;
c277df42 2621 CHECKPOINT lastcp;
2622
2623 /* We suppose that the next guy does not need
2624 backtracking: in particular, it is of constant length,
2625 and has no parenths to influence future backrefs. */
2626 ln = ARG1(scan); /* min to match */
2627 n = ARG2(scan); /* max to match */
c277df42 2628 paren = scan->flags;
2629 if (paren) {
3280af22 2630 if (paren > PL_regsize)
2631 PL_regsize = paren;
2632 if (paren > *PL_reglastparen)
2633 *PL_reglastparen = paren;
c277df42 2634 }
dc45a647 2635 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42 2636 if (paren)
2637 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 2638 PL_reginput = locinput;
c277df42 2639 if (minmod) {
2640 minmod = 0;
2641 if (ln && regrepeat_hard(scan, ln, &l) < ln)
2642 sayNO;
5f4b28b2 2643 if (ln && l == 0 && n >= ln
c277df42 2644 /* In fact, this is tricky. If paren, then the
2645 fact that we did/didnot match may influence
2646 future execution. */
2647 && !(paren && ln == 0))
2648 ln = n;
3280af22 2649 locinput = PL_reginput;
22c35a8c 2650 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2651 c1 = (U8)*STRING(next);
c277df42 2652 if (OP(next) == EXACTF)
22c35a8c 2653 c2 = PL_fold[c1];
c277df42 2654 else if (OP(next) == EXACTFL)
22c35a8c 2655 c2 = PL_fold_locale[c1];
c277df42 2656 else
2657 c2 = c1;
a0ed51b3 2658 }
2659 else
c277df42 2660 c1 = c2 = -1000;
2661 REGCP_SET;
5f4b28b2 2662 /* This may be improved if l == 0. */
c277df42 2663 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
2664 /* If it could work, try it. */
2665 if (c1 == -1000 ||
3280af22 2666 UCHARAT(PL_reginput) == c1 ||
2667 UCHARAT(PL_reginput) == c2)
c277df42 2668 {
2669 if (paren) {
2670 if (n) {
cf93c79d 2671 PL_regstartp[paren] =
2672 HOPc(PL_reginput, -l) - PL_bostr;
2673 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3 2674 }
2675 else
cf93c79d 2676 PL_regendp[paren] = -1;
c277df42 2677 }
2678 if (regmatch(next))
2679 sayYES;
2680 REGCP_UNWIND;
2681 }
2682 /* Couldn't or didn't -- move forward. */
3280af22 2683 PL_reginput = locinput;
c277df42 2684 if (regrepeat_hard(scan, 1, &l)) {
2685 ln++;
3280af22 2686 locinput = PL_reginput;
c277df42 2687 }
2688 else
2689 sayNO;
2690 }
a0ed51b3 2691 }
2692 else {
c277df42 2693 n = regrepeat_hard(scan, n, &l);
2694 if (n != 0 && l == 0
2695 /* In fact, this is tricky. If paren, then the
2696 fact that we did/didnot match may influence
2697 future execution. */
2698 && !(paren && ln == 0))
2699 ln = n;
3280af22 2700 locinput = PL_reginput;
c277df42 2701 DEBUG_r(
5c0ca799 2702 PerlIO_printf(Perl_debug_log,
2703 "%*s matched %ld times, len=%ld...\n",
3280af22 2704 REPORT_CODE_OFF+PL_regindent*2, "", n, l)
c277df42 2705 );
2706 if (n >= ln) {
22c35a8c 2707 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2708 c1 = (U8)*STRING(next);
c277df42 2709 if (OP(next) == EXACTF)
22c35a8c 2710 c2 = PL_fold[c1];
c277df42 2711 else if (OP(next) == EXACTFL)
22c35a8c 2712 c2 = PL_fold_locale[c1];
c277df42 2713 else
2714 c2 = c1;
a0ed51b3 2715 }
2716 else
c277df42 2717 c1 = c2 = -1000;
2718 }
2719 REGCP_SET;
2720 while (n >= ln) {
2721 /* If it could work, try it. */
2722 if (c1 == -1000 ||
3280af22 2723 UCHARAT(PL_reginput) == c1 ||
2724 UCHARAT(PL_reginput) == c2)
a0ed51b3 2725 {
2726 DEBUG_r(
c3464db5 2727 PerlIO_printf(Perl_debug_log,
2728 "%*s trying tail with n=%ld...\n",
3280af22 2729 REPORT_CODE_OFF+PL_regindent*2, "", n)
a0ed51b3 2730 );
2731 if (paren) {
2732 if (n) {
cf93c79d 2733 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
2734 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 2735 }
a0ed51b3 2736 else
cf93c79d 2737 PL_regendp[paren] = -1;
c277df42 2738 }
a0ed51b3 2739 if (regmatch(next))
2740 sayYES;
2741 REGCP_UNWIND;
2742 }
c277df42 2743 /* Couldn't or didn't -- back up. */
2744 n--;
dfe13c55 2745 locinput = HOPc(locinput, -l);
3280af22 2746 PL_reginput = locinput;
c277df42 2747 }
2748 }
2749 sayNO;
2750 break;
2751 }
2752 case CURLYN:
2753 paren = scan->flags; /* Which paren to set */
3280af22 2754 if (paren > PL_regsize)
2755 PL_regsize = paren;
2756 if (paren > *PL_reglastparen)
2757 *PL_reglastparen = paren;
c277df42 2758 ln = ARG1(scan); /* min to match */
2759 n = ARG2(scan); /* max to match */
dc45a647 2760 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 2761 goto repeat;
a0d0e21e 2762 case CURLY:
c277df42 2763 paren = 0;
a0d0e21e 2764 ln = ARG1(scan); /* min to match */
2765 n = ARG2(scan); /* max to match */
dc45a647 2766 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e 2767 goto repeat;
2768 case STAR:
2769 ln = 0;
c277df42 2770 n = REG_INFTY;
a0d0e21e 2771 scan = NEXTOPER(scan);
c277df42 2772 paren = 0;
a0d0e21e 2773 goto repeat;
2774 case PLUS:
c277df42 2775 ln = 1;
2776 n = REG_INFTY;
2777 scan = NEXTOPER(scan);
2778 paren = 0;
2779 repeat:
a0d0e21e 2780 /*
2781 * Lookahead to avoid useless match attempts
2782 * when we know what character comes next.
2783 */
22c35a8c 2784 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2785 c1 = (U8)*STRING(next);
bbce6d69 2786 if (OP(next) == EXACTF)
22c35a8c 2787 c2 = PL_fold[c1];
bbce6d69 2788 else if (OP(next) == EXACTFL)
22c35a8c 2789 c2 = PL_fold_locale[c1];
bbce6d69 2790 else
2791 c2 = c1;
2792 }
a0d0e21e 2793 else
bbce6d69 2794 c1 = c2 = -1000;
3280af22 2795 PL_reginput = locinput;
a0d0e21e 2796 if (minmod) {
c277df42 2797 CHECKPOINT lastcp;
a0d0e21e 2798 minmod = 0;
2799 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 2800 sayNO;
a0ed51b3 2801 locinput = PL_reginput;
c277df42 2802 REGCP_SET;
0fe9bf95 2803 if (c1 != -1000) {
2804 char *e = locinput + n - ln; /* Should not check after this */
2805 char *old = locinput;
2806
2807 if (e >= PL_regeol || (n == REG_INFTY))
2808 e = PL_regeol - 1;
2809 while (1) {
2810 /* Find place 'next' could work */
2811 if (c1 == c2) {
2812 while (locinput <= e && *locinput != c1)
2813 locinput++;
2814 } else {
2815 while (locinput <= e
2816 && *locinput != c1
2817 && *locinput != c2)
2818 locinput++;
2819 }
2820 if (locinput > e)
2821 sayNO;
2822 /* PL_reginput == old now */
2823 if (locinput != old) {
2824 ln = 1; /* Did some */
2825 if (regrepeat(scan, locinput - old) <
2826 locinput - old)
2827 sayNO;
2828 }
2829 /* PL_reginput == locinput now */
2830 if (paren) {
2831 if (ln) {
cf93c79d 2832 PL_regstartp[paren] = HOPc(locinput, -1) - PL_bostr;
2833 PL_regendp[paren] = locinput - PL_bostr;
0fe9bf95 2834 }
2835 else
cf93c79d 2836 PL_regendp[paren] = -1;
0fe9bf95 2837 }
2838 if (regmatch(next))
2839 sayYES;
2840 PL_reginput = locinput; /* Could be reset... */
2841 REGCP_UNWIND;
2842 /* Couldn't or didn't -- move forward. */
2843 old = locinput++;
2844 }
2845 }
2846 else
c277df42 2847 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
a0d0e21e 2848 /* If it could work, try it. */
bbce6d69 2849 if (c1 == -1000 ||
3280af22 2850 UCHARAT(PL_reginput) == c1 ||
2851 UCHARAT(PL_reginput) == c2)
bbce6d69 2852 {
c277df42 2853 if (paren) {
2854 if (n) {
cf93c79d 2855 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
2856 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3 2857 }
2858 else
cf93c79d 2859 PL_regendp[paren] = -1;
c277df42 2860 }
a0d0e21e 2861 if (regmatch(next))
4633a7c4 2862 sayYES;
c277df42 2863 REGCP_UNWIND;
bbce6d69 2864 }
c277df42 2865 /* Couldn't or didn't -- move forward. */
a0ed51b3 2866 PL_reginput = locinput;
a0d0e21e 2867 if (regrepeat(scan, 1)) {
2868 ln++;
a0ed51b3 2869 locinput = PL_reginput;
2870 }
2871 else
4633a7c4 2872 sayNO;
a0d0e21e 2873 }
2874 }
2875 else {
c277df42 2876 CHECKPOINT lastcp;
a0d0e21e 2877 n = regrepeat(scan, n);
a0ed51b3 2878 locinput = PL_reginput;
22c35a8c 2879 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
3280af22 2880 (!PL_multiline || OP(next) == SEOL))
a0d0e21e 2881 ln = n; /* why back off? */
c277df42 2882 REGCP_SET;
2883 if (paren) {
2884 while (n >= ln) {
2885 /* If it could work, try it. */
2886 if (c1 == -1000 ||
3280af22 2887 UCHARAT(PL_reginput) == c1 ||
2888 UCHARAT(PL_reginput) == c2)
c277df42 2889 {
2890 if (paren && n) {
2891 if (n) {
cf93c79d 2892 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
2893 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3 2894 }
2895 else
cf93c79d 2896 PL_regendp[paren] = -1;
c277df42 2897 }
2898 if (regmatch(next))
2899 sayYES;
2900 REGCP_UNWIND;
2901 }
2902 /* Couldn't or didn't -- back up. */
2903 n--;
dfe13c55 2904 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 2905 }
a0ed51b3 2906 }
2907 else {
c277df42 2908 while (n >= ln) {
2909 /* If it could work, try it. */
2910 if (c1 == -1000 ||
3280af22 2911 UCHARAT(PL_reginput) == c1 ||
2912 UCHARAT(PL_reginput) == c2)
c277df42 2913 {
2914 if (regmatch(next))
2915 sayYES;
2916 REGCP_UNWIND;
2917 }
2918 /* Couldn't or didn't -- back up. */
2919 n--;
dfe13c55 2920 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 2921 }
a0d0e21e 2922 }
2923 }
4633a7c4 2924 sayNO;
c277df42 2925 break;
a0d0e21e 2926 case END:
0f5d15d6 2927 if (PL_reg_call_cc) {
2928 re_cc_state *cur_call_cc = PL_reg_call_cc;
2929 CURCUR *cctmp = PL_regcc;
2930 regexp *re = PL_reg_re;
2931 CHECKPOINT cp, lastcp;
2932
2933 cp = regcppush(0); /* Save *all* the positions. */
2934 REGCP_SET;
2935 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
2936 the caller. */
2937 PL_reginput = locinput; /* Make position available to
2938 the callcc. */
2939 cache_re(PL_reg_call_cc->re);
2940 PL_regcc = PL_reg_call_cc->cc;
2941 PL_reg_call_cc = PL_reg_call_cc->prev;
2942 if (regmatch(cur_call_cc->node)) {
2943 PL_reg_call_cc = cur_call_cc;
2944 regcpblow(cp);
2945 sayYES;
2946 }
2947 REGCP_UNWIND;
2948 regcppop();
2949 PL_reg_call_cc = cur_call_cc;
2950 PL_regcc = cctmp;
2951 PL_reg_re = re;
2952 cache_re(re);
2953
2954 DEBUG_r(
2955 PerlIO_printf(Perl_debug_log,
2956 "%*s continuation failed...\n",
2957 REPORT_CODE_OFF+PL_regindent*2, "")
2958 );
7821416a 2959 sayNO_SILENT;
0f5d15d6 2960 }
7821416a 2961 if (locinput < PL_regtill) {
2962 DEBUG_r(PerlIO_printf(Perl_debug_log,
2963 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
2964 PL_colors[4],
2965 (long)(locinput - PL_reg_starttry),
2966 (long)(PL_regtill - PL_reg_starttry),
2967 PL_colors[5]));
2968 sayNO_FINAL; /* Cannot match: too short. */
2969 }
2970 PL_reginput = locinput; /* put where regtry can find it */
2971 sayYES_FINAL; /* Success! */
7e5428c5 2972 case SUCCEED:
3280af22 2973 PL_reginput = locinput; /* put where regtry can find it */
7821416a 2974 sayYES_LOUD; /* Success! */
c277df42 2975 case SUSPEND:
2976 n = 1;
9fe1d20c 2977 PL_reginput = locinput;
c277df42 2978 goto do_ifmatch;
a0d0e21e 2979 case UNLESSM:
c277df42 2980 n = 0;
a0ed51b3 2981 if (scan->flags) {
0fe9bf95 2982 if (UTF) { /* XXXX This is absolutely
2983 broken, we read before
2984 start of string. */
2985 s = HOPMAYBEc(locinput, -scan->flags);
2986 if (!s)
2987 goto say_yes;
2988 PL_reginput = s;
2989 }
2990 else {
2991 if (locinput < PL_bostr + scan->flags)
2992 goto say_yes;
2993 PL_reginput = locinput - scan->flags;
2994 goto do_ifmatch;
2995 }
a0ed51b3 2996 }
2997 else
2998 PL_reginput = locinput;
c277df42 2999 goto do_ifmatch;
3000 case IFMATCH:
3001 n = 1;
a0ed51b3 3002 if (scan->flags) {
0fe9bf95 3003 if (UTF) { /* XXXX This is absolutely
3004 broken, we read before
3005 start of string. */
3006 s = HOPMAYBEc(locinput, -scan->flags);
3007 if (!s || s < PL_bostr)
3008 goto say_no;
3009 PL_reginput = s;
3010 }
3011 else {
3012 if (locinput < PL_bostr + scan->flags)
3013 goto say_no;
3014 PL_reginput = locinput - scan->flags;
3015 goto do_ifmatch;
3016 }
a0ed51b3 3017 }
3018 else
3019 PL_reginput = locinput;
3020
c277df42 3021 do_ifmatch:
c277df42 3022 inner = NEXTOPER(NEXTOPER(scan));
3023 if (regmatch(inner) != n) {
3024 say_no:
3025 if (logical) {
3026 logical = 0;
3027 sw = 0;
3028 goto do_longjump;
a0ed51b3 3029 }
3030 else
c277df42 3031 sayNO;
3032 }
3033 say_yes:
3034 if (logical) {
3035 logical = 0;
3036 sw = 1;
3037 }
fe44a5e8 3038 if (OP(scan) == SUSPEND) {
3280af22 3039 locinput = PL_reginput;
565764a8 3040 nextchr = UCHARAT(locinput);
fe44a5e8 3041 }
c277df42 3042 /* FALL THROUGH. */
3043 case LONGJMP:
3044 do_longjump:
3045 next = scan + ARG(scan);
3046 if (next == scan)
3047 next = NULL;
a0d0e21e 3048 break;
3049 default:
c030ccd9 3050 PerlIO_printf(PerlIO_stderr(), "%lx %d\n",
c277df42 3051 (unsigned long)scan, OP(scan));
cea2e8a9 3052 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3053 }
a0d0e21e 3054 scan = next;
3055 }
a687059c 3056
a0d0e21e 3057 /*
3058 * We get here only if there's trouble -- normally "case END" is
3059 * the terminating point.
3060 */
cea2e8a9 3061 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3062 /*NOTREACHED*/
4633a7c4 3063 sayNO;
3064
7821416a 3065yes_loud:
3066 DEBUG_r(
3067 PerlIO_printf(Perl_debug_log,
3068 "%*s %scould match...%s\n",
3069 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3070 );
3071 goto yes;
3072yes_final:
3073 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3074 PL_colors[4],PL_colors[5]));
4633a7c4 3075yes:
3076#ifdef DEBUGGING
3280af22 3077 PL_regindent--;
4633a7c4 3078#endif
3079 return 1;
3080
3081no:
7821416a 3082 DEBUG_r(
3083 PerlIO_printf(Perl_debug_log,
3084 "%*s %sfailed...%s\n",
3085 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3086 );
3087 goto do_no;
3088no_final:
3089do_no:
4633a7c4 3090#ifdef DEBUGGING
3280af22 3091 PL_regindent--;
4633a7c4 3092#endif
a0d0e21e 3093 return 0;
a687059c 3094}
3095
3096/*
3097 - regrepeat - repeatedly match something simple, report how many
3098 */
3099/*
3100 * [This routine now assumes that it will only match on things of length 1.
3101 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3102 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3103 */
76e3520e 3104STATIC I32
cea2e8a9 3105S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3106{
5c0ca799 3107 dTHR;
a0d0e21e 3108 register char *scan;
3109 register char *opnd;
3110 register I32 c;
3280af22 3111 register char *loceol = PL_regeol;
a0ed51b3 3112 register I32 hardcount = 0;
a0d0e21e 3113
3280af22 3114 scan = PL_reginput;
c277df42 3115 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3116 loceol = scan + max;
a0d0e21e 3117 switch (OP(p)) {
22c35a8c 3118 case REG_ANY:
a0d0e21e 3119 while (scan < loceol && *scan != '\n')
3120 scan++;
3121 break;
3122 case SANY:
3123 scan = loceol;
3124 break;
a0ed51b3 3125 case ANYUTF8:
3126 loceol = PL_regeol;
3127 while (scan < loceol && *scan != '\n') {
3128 scan += UTF8SKIP(scan);
3129 hardcount++;
3130 }
3131 break;
3132 case SANYUTF8:
3133 loceol = PL_regeol;
3134 while (scan < loceol) {
3135 scan += UTF8SKIP(scan);
3136 hardcount++;
3137 }
3138 break;
bbce6d69 3139 case EXACT: /* length of string is 1 */
cd439c50 3140 c = (U8)*STRING(p);
bbce6d69 3141 while (scan < loceol && UCHARAT(scan) == c)
3142 scan++;
3143 break;
3144 case EXACTF: /* length of string is 1 */
cd439c50 3145 c = (U8)*STRING(p);
bbce6d69 3146 while (scan < loceol &&
22c35a8c 3147 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 3148 scan++;
3149 break;
3150 case EXACTFL: /* length of string is 1 */
3280af22 3151 PL_reg_flags |= RF_tainted;
cd439c50 3152 c = (U8)*STRING(p);
bbce6d69 3153 while (scan < loceol &&
22c35a8c 3154 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e 3155 scan++;
3156 break;
a0ed51b3 3157 case ANYOFUTF8:
3158 loceol = PL_regeol;
3159 while (scan < loceol && REGINCLASSUTF8(p, (U8*)scan)) {
3160 scan += UTF8SKIP(scan);
3161 hardcount++;
3162 }
3163 break;
a0d0e21e 3164 case ANYOF:
cd439c50 3165 opnd = MASK(p);
ae5c130c 3166 while (scan < loceol && REGINCLASS(opnd, *scan))
a0d0e21e 3167 scan++;
a0d0e21e 3168 break;
3169 case ALNUM:
3170 while (scan < loceol && isALNUM(*scan))
3171 scan++;
3172 break;
a0ed51b3 3173 case ALNUMUTF8:
3174 loceol = PL_regeol;
dfe13c55 3175 while (scan < loceol && swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3 3176 scan += UTF8SKIP(scan);
3177 hardcount++;
3178 }
3179 break;
bbce6d69 3180 case ALNUML:
3280af22 3181 PL_reg_flags |= RF_tainted;
bbce6d69 3182 while (scan < loceol && isALNUM_LC(*scan))
3183 scan++;
3184 break;
a0ed51b3 3185 case ALNUMLUTF8:
3186 PL_reg_flags |= RF_tainted;
3187 loceol = PL_regeol;
dfe13c55 3188 while (scan < loceol && isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3 3189 scan += UTF8SKIP(scan);
3190 hardcount++;
3191 }
3192 break;
3193 break;
a0d0e21e 3194 case NALNUM:
3195 while (scan < loceol && !isALNUM(*scan))
3196 scan++;
3197 break;
a0ed51b3 3198 case NALNUMUTF8:
3199 loceol = PL_regeol;
dfe13c55 3200 while (scan < loceol && !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3 3201 scan += UTF8SKIP(scan);
3202 hardcount++;
3203 }
3204 break;
bbce6d69 3205 case NALNUML:
3280af22 3206 PL_reg_flags |= RF_tainted;
bbce6d69 3207 while (scan < loceol && !isALNUM_LC(*scan))
3208 scan++;
3209 break;
a0ed51b3 3210 case NALNUMLUTF8:
3211 PL_reg_flags |= RF_tainted;
3212 loceol = PL_regeol;
dfe13c55 3213 while (scan < loceol && !isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3 3214 scan += UTF8SKIP(scan);
3215 hardcount++;
3216 }
3217 break;
a0d0e21e 3218 case SPACE:
3219 while (scan < loceol && isSPACE(*scan))
3220 scan++;
3221 break;
a0ed51b3 3222 case SPACEUTF8:
3223 loceol = PL_regeol;
dfe13c55 3224 while (scan < loceol && (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3 3225 scan += UTF8SKIP(scan);
3226 hardcount++;
3227 }
3228 break;
bbce6d69 3229 case SPACEL:
3280af22 3230 PL_reg_flags |= RF_tainted;
bbce6d69 3231 while (scan < loceol && isSPACE_LC(*scan))
3232 scan++;
3233 break;
a0ed51b3 3234 case SPACELUTF8:
3235 PL_reg_flags |= RF_tainted;
3236 loceol = PL_regeol;
dfe13c55 3237 while (scan < loceol && (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3 3238 scan += UTF8SKIP(scan);
3239 hardcount++;
3240 }
3241 break;
a0d0e21e 3242 case NSPACE:
3243 while (scan < loceol && !isSPACE(*scan))
3244 scan++;
3245 break;
a0ed51b3 3246 case NSPACEUTF8:
3247 loceol = PL_regeol;
dfe13c55 3248 while (scan < loceol && !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3 3249 scan += UTF8SKIP(scan);
3250 hardcount++;
3251 }
3252 break;
bbce6d69 3253 case NSPACEL:
3280af22 3254 PL_reg_flags |= RF_tainted;
bbce6d69 3255 while (scan < loceol && !isSPACE_LC(*scan))
3256 scan++;
3257 break;
a0ed51b3 3258 case NSPACELUTF8:
3259 PL_reg_flags |= RF_tainted;
3260 loceol = PL_regeol;
dfe13c55 3261 while (scan < loceol && !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3 3262 scan += UTF8SKIP(scan);
3263 hardcount++;
3264 }
3265 break;
a0d0e21e 3266 case DIGIT:
3267 while (scan < loceol && isDIGIT(*scan))
3268 scan++;
3269 break;
a0ed51b3 3270 case DIGITUTF8:
3271 loceol = PL_regeol;
dfe13c55 3272 while (scan < loceol && swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3 3273 scan += UTF8SKIP(scan);
3274 hardcount++;
3275 }
3276 break;
3277 break;
a0d0e21e 3278 case NDIGIT:
3279 while (scan < loceol && !isDIGIT(*scan))
3280 scan++;
3281 break;
a0ed51b3 3282 case NDIGITUTF8:
3283 loceol = PL_regeol;
dfe13c55 3284 while (scan < loceol && !swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3 3285 scan += UTF8SKIP(scan);
3286 hardcount++;
3287 }
3288 break;
a0d0e21e 3289 default: /* Called on something of 0 width. */
3290 break; /* So match right here or not at all. */
3291 }
a687059c 3292
a0ed51b3 3293 if (hardcount)
3294 c = hardcount;
3295 else
3296 c = scan - PL_reginput;
3280af22 3297 PL_reginput = scan;
a687059c 3298
c277df42 3299 DEBUG_r(
3300 {
3301 SV *prop = sv_newmortal();
3302
3303 regprop(prop, p);
3304 PerlIO_printf(Perl_debug_log,
3305 "%*s %s can match %ld times out of %ld...\n",
3306 REPORT_CODE_OFF+1, "", SvPVX(prop),c,max);
3307 });
3308
a0d0e21e 3309 return(c);
a687059c 3310}
3311
3312/*
c277df42 3313 - regrepeat_hard - repeatedly match something, report total lenth and length
3314 *
3315 * The repeater is supposed to have constant length.
3316 */
3317
76e3520e 3318STATIC I32
cea2e8a9 3319S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42 3320{
5c0ca799 3321 dTHR;
c277df42 3322 register char *scan;
3323 register char *start;
3280af22 3324 register char *loceol = PL_regeol;
a0ed51b3 3325 I32 l = 0;
708e3b05 3326 I32 count = 0, res = 1;
a0ed51b3 3327
3328 if (!max)
3329 return 0;
c277df42 3330
3280af22 3331 start = PL_reginput;
a0ed51b3 3332 if (UTF) {
708e3b05 3333 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3 3334 if (!count++) {
3335 l = 0;
3336 while (start < PL_reginput) {
3337 l++;
3338 start += UTF8SKIP(start);
3339 }
3340 *lp = l;
3341 if (l == 0)
3342 return max;
3343 }
3344 if (count == max)
3345 return count;
3346 }
3347 }
3348 else {
708e3b05 3349 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3 3350 if (!count++) {
3351 *lp = l = PL_reginput - start;
3352 if (max != REG_INFTY && l*max < loceol - scan)
3353 loceol = scan + l*max;
3354 if (l == 0)
3355 return max;
c277df42 3356 }
3357 }
3358 }
708e3b05 3359 if (!res)
3280af22 3360 PL_reginput = scan;
c277df42 3361
a0ed51b3 3362 return count;
c277df42 3363}
3364
3365/*
cb8d8820 3366 - reginclass - determine if a character falls into a character class
bbce6d69 3367 */
3368
76e3520e 3369STATIC bool
cea2e8a9 3370S_reginclass(pTHX_ register char *p, register I32 c)
bbce6d69 3371{
5c0ca799 3372 dTHR;
b8c5462f 3373 char flags = ANYOF_FLAGS(p);
bbce6d69 3374 bool match = FALSE;
3375
3376 c &= 0xFF;
b8c5462f 3377 if (ANYOF_BITMAP_TEST(p, c))
bbce6d69 3378 match = TRUE;
3379 else if (flags & ANYOF_FOLD) {
3380 I32 cf;
3381 if (flags & ANYOF_LOCALE) {
3280af22 3382 PL_reg_flags |= RF_tainted;
22c35a8c 3383 cf = PL_fold_locale[c];
bbce6d69 3384 }
3385 else
22c35a8c 3386 cf = PL_fold[c];
b8c5462f 3387 if (ANYOF_BITMAP_TEST(p, cf))
bbce6d69 3388 match = TRUE;
3389 }
3390
b8c5462f 3391 if (!match && (flags & ANYOF_CLASS)) {
3280af22 3392 PL_reg_flags |= RF_tainted;
b8c5462f 3393 if (
3394 (ANYOF_CLASS_TEST(p, ANYOF_ALNUM) && isALNUM_LC(c)) ||
3395 (ANYOF_CLASS_TEST(p, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
3396 (ANYOF_CLASS_TEST(p, ANYOF_SPACE) && isSPACE_LC(c)) ||
3397 (ANYOF_CLASS_TEST(p, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
3398 (ANYOF_CLASS_TEST(p, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
3399 (ANYOF_CLASS_TEST(p, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
3400 (ANYOF_CLASS_TEST(p, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
3401 (ANYOF_CLASS_TEST(p, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
3402 (ANYOF_CLASS_TEST(p, ANYOF_ALPHA) && isALPHA_LC(c)) ||
3403 (ANYOF_CLASS_TEST(p, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
3404 (ANYOF_CLASS_TEST(p, ANYOF_ASCII) && isASCII(c)) ||
3405 (ANYOF_CLASS_TEST(p, ANYOF_NASCII) && !isASCII(c)) ||
3406 (ANYOF_CLASS_TEST(p, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
3407 (ANYOF_CLASS_TEST(p, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
3408 (ANYOF_CLASS_TEST(p, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
3409 (ANYOF_CLASS_TEST(p, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
3410 (ANYOF_CLASS_TEST(p, ANYOF_LOWER) && isLOWER_LC(c)) ||
3411 (ANYOF_CLASS_TEST(p, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
3412 (ANYOF_CLASS_TEST(p, ANYOF_PRINT) && isPRINT_LC(c)) ||
3413 (ANYOF_CLASS_TEST(p, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
3414 (ANYOF_CLASS_TEST(p, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
3415 (ANYOF_CLASS_TEST(p, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
3416 (ANYOF_CLASS_TEST(p, ANYOF_UPPER) && isUPPER_LC(c)) ||
3417 (ANYOF_CLASS_TEST(p, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
3418 (ANYOF_CLASS_TEST(p, ANYOF_XDIGIT) && isXDIGIT(c)) ||
3419 (ANYOF_CLASS_TEST(p, ANYOF_NXDIGIT) && !isXDIGIT(c))
3420 ) /* How's that for a conditional? */
bbce6d69 3421 {
3422 match = TRUE;
3423 }
3424 }
3425
ae5c130c 3426 return (flags & ANYOF_INVERT) ? !match : match;
bbce6d69 3427}
3428
a0ed51b3 3429STATIC bool
cea2e8a9 3430S_reginclassutf8(pTHX_ regnode *f, U8 *p)
c485e607 3431{
3432 dTHR;
a0ed51b3 3433 char flags = ARG1(f);
3434 bool match = FALSE;
3435 SV *sv = (SV*)PL_regdata->data[ARG2(f)];
3436
3437 if (swash_fetch(sv, p))
3438 match = TRUE;
3439 else if (flags & ANYOF_FOLD) {
3440 I32 cf;
dfe13c55 3441 U8 tmpbuf[10];
a0ed51b3 3442 if (flags & ANYOF_LOCALE) {
3443 PL_reg_flags |= RF_tainted;
3444 uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
3445 }
3446 else
3447 uv_to_utf8(tmpbuf, toLOWER_utf8(p));
3448 if (swash_fetch(sv, tmpbuf))
3449 match = TRUE;
3450 }
3451
b8c5462f 3452 /* UTF8 combined with ANYOF_CLASS is ill-defined. */
a0ed51b3 3453
3454 return (flags & ANYOF_INVERT) ? !match : match;
3455}
161b471a 3456
dfe13c55 3457STATIC U8 *
cea2e8a9 3458S_reghop(pTHX_ U8 *s, I32 off)
c485e607 3459{
3460 dTHR;
a0ed51b3 3461 if (off >= 0) {
3462 while (off-- && s < (U8*)PL_regeol)
3463 s += UTF8SKIP(s);
3464 }
3465 else {
3466 while (off++) {
3467 if (s > (U8*)PL_bostr) {
3468 s--;
3469 if (*s & 0x80) {
3470 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3471 s--;
3472 } /* XXX could check well-formedness here */
3473 }
3474 }
3475 }
3476 return s;
3477}
161b471a 3478
dfe13c55 3479STATIC U8 *
cea2e8a9 3480S_reghopmaybe(pTHX_ U8* s, I32 off)
a0ed51b3 3481{
c485e607 3482 dTHR;
a0ed51b3 3483 if (off >= 0) {
3484 while (off-- && s < (U8*)PL_regeol)
3485 s += UTF8SKIP(s);
3486 if (off >= 0)
3487 return 0;
3488 }
3489 else {
3490 while (off++) {
3491 if (s > (U8*)PL_bostr) {
3492 s--;
3493 if (*s & 0x80) {
3494 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3495 s--;
3496 } /* XXX could check well-formedness here */
3497 }
3498 else
3499 break;
3500 }
3501 if (off <= 0)
3502 return 0;
3503 }
3504 return s;
3505}
51371543 3506
3507#ifdef PERL_OBJECT
3508#define NO_XSLOCKS
3509#include "XSUB.h"
3510#endif
3511
3512static void
3513restore_pos(pTHXo_ void *arg)
3514{
3515 dTHR;
3516 if (PL_reg_eval_set) {
3517 if (PL_reg_oldsaved) {
3518 PL_reg_re->subbeg = PL_reg_oldsaved;
3519 PL_reg_re->sublen = PL_reg_oldsavedlen;
3520 RX_MATCH_COPIED_on(PL_reg_re);
3521 }
3522 PL_reg_magic->mg_len = PL_reg_oldpos;
3523 PL_reg_eval_set = 0;
3524 PL_curpm = PL_reg_oldcurpm;
3525 }
3526}
3527