miniperl build fixes for NeXTstep and cygwin (from Hans Mulder
[p5sagit/p5-mst-13.2.git] / regexec.c
CommitLineData
a0d0e21e 1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c 8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73 17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e 22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e 29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603 35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c 39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
d88dccdf 41# define Perl_reginitcolors my_reginitcolors
c5be433b 42
43# define PERL_NO_GET_CONTEXT
56953603 44#endif
45
f0fcb552 46/*SUPPRESS 112*/
a687059c 47/*
e50aee73 48 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c 49 *
50 * Copyright (c) 1986 by University of Toronto.
51 * Written by Henry Spencer. Not derived from licensed software.
52 *
53 * Permission is granted to anyone to use this software for any
54 * purpose on any computer system, and to redistribute it freely,
55 * subject to the following restrictions:
56 *
57 * 1. The author is not responsible for the consequences of use of
58 * this software, no matter how awful, even if they arise
59 * from defects in it.
60 *
61 * 2. The origin of this software must not be misrepresented, either
62 * by explicit claim or by omission.
63 *
64 * 3. Altered versions must be plainly marked as such, and must not
65 * be misrepresented as being the original software.
66 *
67 **** Alterations to Henry's code are...
68 ****
4eb8286e 69 **** Copyright (c) 1991-1999, Larry Wall
a687059c 70 ****
9ef589d8 71 **** You may distribute under the terms of either the GNU General Public
72 **** License or the Artistic License, as specified in the README file.
a687059c 73 *
74 * Beware that some of this code is subtly aware of the way operator
75 * precedence is structured in regular expressions. Serious changes in
76 * regular-expression syntax might require a total rethink.
77 */
78#include "EXTERN.h"
864dbfa3 79#define PERL_IN_REGEXEC_C
a687059c 80#include "perl.h"
0f5d15d6 81
c5be433b 82#ifdef PERL_IN_XSUB_RE
83# if defined(PERL_CAPI) || defined(PERL_OBJECT)
84# include "XSUB.h"
85# endif
86#endif
87
a687059c 88#include "regcomp.h"
89
c277df42 90#define RF_tainted 1 /* tainted information used? */
91#define RF_warned 2 /* warned about big count? */
ce862d02 92#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3 93#define RF_utf8 8 /* String contains multibyte chars? */
94
95#define UTF (PL_reg_flags & RF_utf8)
ce862d02 96
97#define RS_init 1 /* eval environment created */
98#define RS_set 2 /* replsv value is set */
c277df42 99
a687059c 100#ifndef STATIC
101#define STATIC static
102#endif
103
c277df42 104/*
105 * Forwards.
106 */
107
b8c5462f 108#define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c) : ANYOF_BITMAP_TEST(p,c))
a0ed51b3 109#define REGINCLASSUTF8(f,p) (ARG1(f) ? reginclassutf8(f,p) : swash_fetch((SV*)PL_regdata->data[ARG2(f)],p))
110
111#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
112#define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
113
dfe13c55 114#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
115#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
116#define HOP(pos,off) (UTF ? reghop((U8*)pos, off) : (U8*)(pos + off))
117#define HOPMAYBE(pos,off) (UTF ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
118#define HOPc(pos,off) ((char*)HOP(pos,off))
119#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 120
51371543 121static void restore_pos(pTHXo_ void *arg);
122
123
76e3520e 124STATIC CHECKPOINT
cea2e8a9 125S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 126{
11343788 127 dTHR;
3280af22 128 int retval = PL_savestack_ix;
129 int i = (PL_regsize - parenfloor) * 4;
a0d0e21e 130 int p;
131
132 SSCHECK(i + 5);
3280af22 133 for (p = PL_regsize; p > parenfloor; p--) {
cf93c79d 134 SSPUSHINT(PL_regendp[p]);
135 SSPUSHINT(PL_regstartp[p]);
3280af22 136 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e 137 SSPUSHINT(p);
138 }
3280af22 139 SSPUSHINT(PL_regsize);
140 SSPUSHINT(*PL_reglastparen);
141 SSPUSHPTR(PL_reginput);
a0d0e21e 142 SSPUSHINT(i + 3);
143 SSPUSHINT(SAVEt_REGCONTEXT);
144 return retval;
145}
146
c277df42 147/* These are needed since we do not localize EVAL nodes: */
c3464db5 148# define REGCP_SET DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b 149 " Setting an EVAL scope, savestack=%"IVdf"\n", \
150 (IV)PL_savestack_ix)); lastcp = PL_savestack_ix
c3464db5 151
3280af22 152# define REGCP_UNWIND DEBUG_r(lastcp != PL_savestack_ix ? \
c3464db5 153 PerlIO_printf(Perl_debug_log, \
faccc32b 154 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
155 (IV)lastcp, (IV)PL_savestack_ix) : 0); regcpblow(lastcp)
c277df42 156
76e3520e 157STATIC char *
cea2e8a9 158S_regcppop(pTHX)
a0d0e21e 159{
11343788 160 dTHR;
a0d0e21e 161 I32 i = SSPOPINT;
162 U32 paren = 0;
163 char *input;
cf93c79d 164 I32 tmps;
a0d0e21e 165 assert(i == SAVEt_REGCONTEXT);
166 i = SSPOPINT;
167 input = (char *) SSPOPPTR;
3280af22 168 *PL_reglastparen = SSPOPINT;
169 PL_regsize = SSPOPINT;
c277df42 170 for (i -= 3; i > 0; i -= 4) {
a0d0e21e 171 paren = (U32)SSPOPINT;
3280af22 172 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d 173 PL_regstartp[paren] = SSPOPINT;
174 tmps = SSPOPINT;
3280af22 175 if (paren <= *PL_reglastparen)
176 PL_regendp[paren] = tmps;
c277df42 177 DEBUG_r(
c3464db5 178 PerlIO_printf(Perl_debug_log,
b900a521 179 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
180 (UV)paren, (IV)PL_regstartp[paren],
181 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
182 (IV)PL_regendp[paren],
3280af22 183 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 184 );
a0d0e21e 185 }
c277df42 186 DEBUG_r(
3280af22 187 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 188 PerlIO_printf(Perl_debug_log,
faccc32b 189 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
190 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42 191 }
192 );
3280af22 193 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
194 if (paren > PL_regsize)
cf93c79d 195 PL_regstartp[paren] = -1;
196 PL_regendp[paren] = -1;
a0d0e21e 197 }
198 return input;
199}
200
0f5d15d6 201STATIC char *
cea2e8a9 202S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6 203{
46124e9e 204 dTHR;
0f5d15d6 205 I32 tmp = PL_savestack_ix;
206
207 PL_savestack_ix = ss;
208 regcppop();
209 PL_savestack_ix = tmp;
942e002e 210 return Nullch;
0f5d15d6 211}
212
213typedef struct re_cc_state
214{
215 I32 ss;
216 regnode *node;
217 struct re_cc_state *prev;
218 CURCUR *cc;
219 regexp *re;
220} re_cc_state;
221
c277df42 222#define regcpblow(cp) LEAVE_SCOPE(cp)
a0d0e21e 223
a687059c 224/*
e50aee73 225 * pregexec and friends
a687059c 226 */
227
228/*
c277df42 229 - pregexec - match a regexp against a string
a687059c 230 */
c277df42 231I32
864dbfa3 232Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 233 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42 234/* strend: pointer to null at end of string */
235/* strbeg: real beginning of string */
236/* minend: end of match must be >=minend after stringarg. */
237/* nosave: For optimizations. */
238{
239 return
240 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
241 nosave ? 0 : REXEC_COPY_STR);
242}
0f5d15d6 243
244STATIC void
cea2e8a9 245S_cache_re(pTHX_ regexp *prog)
0f5d15d6 246{
46124e9e 247 dTHR;
0f5d15d6 248 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
249#ifdef DEBUGGING
250 PL_regprogram = prog->program;
251#endif
252 PL_regnpar = prog->nparens;
253 PL_regdata = prog->data;
254 PL_reg_re = prog;
255}
22e551b9 256
cad2e5aa 257/*
258 * Need to implement the following flags for reg_anch:
259 *
260 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
261 * USE_INTUIT_ML
262 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
263 * INTUIT_AUTORITATIVE_ML
264 * INTUIT_ONCE_NOML - Intuit can match in one location only.
265 * INTUIT_ONCE_ML
266 *
267 * Another flag for this function: SECOND_TIME (so that float substrs
268 * with giant delta may be not rechecked).
269 */
270
271/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
272
2c2d71f5 273/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa 274 Otherwise, only SvCUR(sv) is used to get strbeg. */
275
276/* XXXX We assume that strpos is strbeg unless sv. */
277
6eb5f6b9 278/* XXXX Some places assume that there is a fixed substring.
279 An update may be needed if optimizer marks as "INTUITable"
280 RExen without fixed substrings. Similarly, it is assumed that
281 lengths of all the strings are no more than minlen, thus they
282 cannot come from lookahead.
283 (Or minlen should take into account lookahead.) */
284
2c2d71f5 285/* A failure to find a constant substring means that there is no need to make
286 an expensive call to REx engine, thus we celebrate a failure. Similarly,
287 finding a substring too deep into the string means that less calls to
30944b6d 288 regtry() should be needed.
289
290 REx compiler's optimizer found 4 possible hints:
291 a) Anchored substring;
292 b) Fixed substring;
293 c) Whether we are anchored (beginning-of-line or \G);
294 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 295 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d 296 string which does not contradict any of them.
297 */
2c2d71f5 298
6eb5f6b9 299/* Most of decisions we do here should have been done at compile time.
300 The nodes of the REx which we used for the search should have been
301 deleted from the finite automaton. */
302
cad2e5aa 303char *
304Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
305 char *strend, U32 flags, re_scream_pos_data *data)
306{
2c2d71f5 307 register I32 start_shift;
cad2e5aa 308 /* Should be nonnegative! */
2c2d71f5 309 register I32 end_shift;
310 register char *s;
311 register SV *check;
cad2e5aa 312 char *t;
313 I32 ml_anch;
2c2d71f5 314 char *tmp;
6eb5f6b9 315 register char *other_last = Nullch; /* other substr checked before this */
316 char *check_at; /* check substr found at this pos */
30944b6d 317#ifdef DEBUGGING
318 char *i_strpos = strpos;
319#endif
cad2e5aa 320
321 DEBUG_r( if (!PL_colorset) reginitcolors() );
322 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 323 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
cad2e5aa 324 PL_colors[4],PL_colors[5],PL_colors[0],
325 prog->precomp,
326 PL_colors[1],
327 (strlen(prog->precomp) > 60 ? "..." : ""),
328 PL_colors[0],
b900a521 329 (int)(strend - strpos > 60 ? 60 : strend - strpos),
cad2e5aa 330 strpos, PL_colors[1],
331 (strend - strpos > 60 ? "..." : ""))
332 );
333
2c2d71f5 334 if (prog->minlen > strend - strpos) {
335 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
cad2e5aa 336 goto fail;
2c2d71f5 337 }
653099ff 338 check = prog->check_substr;
2c2d71f5 339 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa 340 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
341 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 342 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa 343
344 if ((prog->check_offset_min == prog->check_offset_max) && !ml_anch) {
2c2d71f5 345 /* Substring at constant offset from beg-of-str... */
cad2e5aa 346 I32 slen;
347
348 if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
2c2d71f5 349 && (sv && (strpos + SvCUR(sv) != strend)) ) {
350 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
cad2e5aa 351 goto fail;
2c2d71f5 352 }
adac82c7 353 PL_regeol = strend; /* Used in HOP() */
2c2d71f5 354 s = HOPc(strpos, prog->check_offset_min);
653099ff 355 if (SvTAIL(check)) {
356 slen = SvCUR(check); /* >= 1 */
cad2e5aa 357
2c2d71f5 358 if ( strend - s > slen || strend - s < slen - 1
359 || (strend - s == slen && strend[-1] != '\n')) {
360 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
361 goto fail_finish;
cad2e5aa 362 }
363 /* Now should match s[0..slen-2] */
364 slen--;
653099ff 365 if (slen && (*SvPVX(check) != *s
cad2e5aa 366 || (slen > 1
653099ff 367 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5 368 report_neq:
369 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
370 goto fail_finish;
371 }
cad2e5aa 372 }
653099ff 373 else if (*SvPVX(check) != *s
374 || ((slen = SvCUR(check)) > 1
375 && memNE(SvPVX(check), s, slen)))
2c2d71f5 376 goto report_neq;
377 goto success_at_start;
cad2e5aa 378 }
2c2d71f5 379 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 380 s = strpos;
2c2d71f5 381 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 382 end_shift = prog->minlen - start_shift -
653099ff 383 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 384 if (!ml_anch) {
653099ff 385 I32 end = prog->check_offset_max + CHR_SVLEN(check)
386 - (SvTAIL(check) != 0);
2c2d71f5 387 I32 eshift = strend - s - end;
388
389 if (end_shift < eshift)
390 end_shift = eshift;
391 }
cad2e5aa 392 }
2c2d71f5 393 else { /* Can match at random position */
cad2e5aa 394 ml_anch = 0;
395 s = strpos;
2c2d71f5 396 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
397 /* Should be nonnegative! */
398 end_shift = prog->minlen - start_shift -
653099ff 399 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa 400 }
401
2c2d71f5 402#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 403 if (end_shift < 0)
6bbae5e6 404 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5 405#endif
406
2c2d71f5 407 restart:
408 /* Find a possible match in the region s..strend by looking for
409 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 410 if (flags & REXEC_SCREAM) {
cad2e5aa 411 char *strbeg = SvPVX(sv); /* XXXX Assume PV_force() on SCREAM! */
412 I32 p = -1; /* Internal iterator of scream. */
413 I32 *pp = data ? data->scream_pos : &p;
414
2c2d71f5 415 if (PL_screamfirst[BmRARE(check)] >= 0
416 || ( BmRARE(check) == '\n'
417 && (BmPREVIOUS(check) == SvCUR(check) - 1)
418 && SvTAIL(check) ))
419 s = screaminstr(sv, check,
420 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 421 else
2c2d71f5 422 goto fail_finish;
cad2e5aa 423 if (data)
424 *data->scream_olds = s;
425 }
426 else
427 s = fbm_instr((unsigned char*)s + start_shift,
428 (unsigned char*)strend - end_shift,
2c2d71f5 429 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa 430
431 /* Update the count-of-usability, remove useless subpatterns,
432 unshift s. */
2c2d71f5 433
434 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
435 (s ? "Found" : "Did not find"),
436 ((check == prog->anchored_substr) ? "anchored" : "floating"),
437 PL_colors[0],
7b0972df 438 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
439 SvPVX(check),
2c2d71f5 440 PL_colors[1], (SvTAIL(check) ? "$" : ""),
441 (s ? " at offset " : "...\n") ) );
442
443 if (!s)
444 goto fail_finish;
445
6eb5f6b9 446 check_at = s;
447
2c2d71f5 448 /* Finish the diagnostic message */
30944b6d 449 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5 450
451 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
452 Start with the other substr.
453 XXXX no SCREAM optimization yet - and a very coarse implementation
454 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
455 *always* match. Probably should be marked during compile...
456 Probably it is right to do no SCREAM here...
457 */
458
459 if (prog->float_substr && prog->anchored_substr) {
30944b6d 460 /* Take into account the "other" substring. */
2c2d71f5 461 /* XXXX May be hopelessly wrong for UTF... */
462 if (!other_last)
6eb5f6b9 463 other_last = strpos;
2c2d71f5 464 if (check == prog->float_substr) {
30944b6d 465 do_other_anchored:
466 {
2c2d71f5 467 char *last = s - start_shift, *last1, *last2;
468 char *s1 = s;
469
470 tmp = PL_bostr;
471 t = s - prog->check_offset_max;
472 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
473 && (!(prog->reganch & ROPT_UTF8)
474 || (PL_bostr = strpos, /* Used in regcopmaybe() */
475 (t = reghopmaybe_c(s, -(prog->check_offset_max)))
476 && t > strpos)))
30944b6d 477 /* EMPTY */;
2c2d71f5 478 else
479 t = strpos;
480 t += prog->anchored_offset;
6eb5f6b9 481 if (t < other_last) /* These positions already checked */
482 t = other_last;
2c2d71f5 483 PL_bostr = tmp;
484 last2 = last1 = strend - prog->minlen;
485 if (last < last1)
486 last1 = last;
487 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
488 /* On end-of-str: see comment below. */
489 s = fbm_instr((unsigned char*)t,
490 (unsigned char*)last1 + prog->anchored_offset
491 + SvCUR(prog->anchored_substr)
492 - (SvTAIL(prog->anchored_substr)!=0),
493 prog->anchored_substr, PL_multiline ? FBMrf_MULTILINE : 0);
494 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s anchored substr `%s%.*s%s'%s",
495 (s ? "Found" : "Contradicts"),
496 PL_colors[0],
7b0972df 497 (int)(SvCUR(prog->anchored_substr)
498 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5 499 SvPVX(prog->anchored_substr),
500 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
501 if (!s) {
502 if (last1 >= last2) {
503 DEBUG_r(PerlIO_printf(Perl_debug_log,
504 ", giving up...\n"));
505 goto fail_finish;
506 }
507 DEBUG_r(PerlIO_printf(Perl_debug_log,
508 ", trying floating at offset %ld...\n",
30944b6d 509 (long)(s1 + 1 - i_strpos)));
2c2d71f5 510 PL_regeol = strend; /* Used in HOP() */
6eb5f6b9 511 other_last = last1 + prog->anchored_offset + 1;
2c2d71f5 512 s = HOPc(last, 1);
513 goto restart;
514 }
515 else {
516 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 517 (long)(s - i_strpos)));
2c2d71f5 518 t = s - prog->anchored_offset;
6eb5f6b9 519 other_last = s + 1;
30944b6d 520 s = s1;
2c2d71f5 521 if (t == strpos)
522 goto try_at_start;
2c2d71f5 523 goto try_at_offset;
524 }
30944b6d 525 }
2c2d71f5 526 }
527 else { /* Take into account the floating substring. */
528 char *last, *last1;
529 char *s1 = s;
530
531 t = s - start_shift;
532 last1 = last = strend - prog->minlen + prog->float_min_offset;
533 if (last - t > prog->float_max_offset)
534 last = t + prog->float_max_offset;
535 s = t + prog->float_min_offset;
6eb5f6b9 536 if (s < other_last)
537 s = other_last;
2c2d71f5 538 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
539 /* fbm_instr() takes into account exact value of end-of-str
540 if the check is SvTAIL(ed). Since false positives are OK,
541 and end-of-str is not later than strend we are OK. */
542 s = fbm_instr((unsigned char*)s,
543 (unsigned char*)last + SvCUR(prog->float_substr)
544 - (SvTAIL(prog->float_substr)!=0),
545 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
546 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
547 (s ? "Found" : "Contradicts"),
548 PL_colors[0],
7b0972df 549 (int)(SvCUR(prog->float_substr)
550 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5 551 SvPVX(prog->float_substr),
552 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
553 if (!s) {
554 if (last1 == last) {
555 DEBUG_r(PerlIO_printf(Perl_debug_log,
556 ", giving up...\n"));
557 goto fail_finish;
558 }
559 DEBUG_r(PerlIO_printf(Perl_debug_log,
560 ", trying anchored starting at offset %ld...\n",
30944b6d 561 (long)(s1 + 1 - i_strpos)));
6eb5f6b9 562 other_last = last + 1;
2c2d71f5 563 PL_regeol = strend; /* Used in HOP() */
564 s = HOPc(t, 1);
565 goto restart;
566 }
567 else {
568 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 569 (long)(s - i_strpos)));
6eb5f6b9 570 other_last = s + 1;
30944b6d 571 s = s1;
2c2d71f5 572 if (t == strpos)
573 goto try_at_start;
2c2d71f5 574 goto try_at_offset;
575 }
576 }
cad2e5aa 577 }
2c2d71f5 578
579 t = s - prog->check_offset_max;
580 tmp = PL_bostr;
581 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
582 && (!(prog->reganch & ROPT_UTF8)
583 || (PL_bostr = strpos, /* Used in regcopmaybe() */
584 ((t = reghopmaybe_c(s, -(prog->check_offset_max)))
585 && t > strpos)))) {
586 PL_bostr = tmp;
587 /* Fixed substring is found far enough so that the match
588 cannot start at strpos. */
589 try_at_offset:
cad2e5aa 590 if (ml_anch && t[-1] != '\n') {
30944b6d 591 /* Eventually fbm_*() should handle this, but often
592 anchored_offset is not 0, so this check will not be wasted. */
593 /* XXXX In the code below we prefer to look for "^" even in
594 presence of anchored substrings. And we search even
595 beyond the found float position. These pessimizations
596 are historical artefacts only. */
597 find_anchor:
2c2d71f5 598 while (t < strend - prog->minlen) {
cad2e5aa 599 if (*t == '\n') {
600 if (t < s - prog->check_offset_min) {
30944b6d 601 if (prog->anchored_substr) {
602 /* We definitely contradict the found anchored
603 substr. Due to the above check we do not
604 contradict "check" substr.
605 Thus we can arrive here only if check substr
606 is float. Redo checking for "other"=="fixed".
607 */
608 strpos = t + 1;
609 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
610 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
611 goto do_other_anchored;
612 }
cad2e5aa 613 s = t + 1;
2c2d71f5 614 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 615 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa 616 goto set_useful;
617 }
2c2d71f5 618 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting at offset %ld...\n",
30944b6d 619 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
620 strpos = s = t + 1;
cad2e5aa 621 goto restart;
622 }
623 t++;
624 }
2c2d71f5 625 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
626 PL_colors[0],PL_colors[1]));
627 goto fail_finish;
cad2e5aa 628 }
629 s = t;
630 set_useful:
2c2d71f5 631 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa 632 }
633 else {
2c2d71f5 634 PL_bostr = tmp;
635 /* The found string does not prohibit matching at beg-of-str
636 - no optimization of calling REx engine can be performed,
637 unless it was an MBOL and we are not after MBOL. */
638 try_at_start:
639 /* Even in this situation we may use MBOL flag if strpos is offset
640 wrt the start of the string. */
641 if (ml_anch && sv
cad2e5aa 642 && (strpos + SvCUR(sv) != strend) && strpos[-1] != '\n') {
643 t = strpos;
644 goto find_anchor;
645 }
30944b6d 646 DEBUG_r( if (ml_anch)
647 PerlIO_printf(Perl_debug_log, "Does not contradict /%s^%s/m...\n",
648 PL_colors[0],PL_colors[1]);
649 );
2c2d71f5 650 success_at_start:
30944b6d 651 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 652 && prog->check_substr /* Could be deleted already */
cad2e5aa 653 && --BmUSEFUL(prog->check_substr) < 0
66e933ab 654 && prog->check_substr == prog->float_substr)
655 {
cad2e5aa 656 /* If flags & SOMETHING - do not do it many times on the same match */
657 SvREFCNT_dec(prog->check_substr);
658 prog->check_substr = Nullsv; /* disable */
659 prog->float_substr = Nullsv; /* clear */
660 s = strpos;
3cf5c195 661 /* XXXX This is a remnant of the old implementation. It
662 looks wasteful, since now INTUIT can use many
6eb5f6b9 663 other heuristics. */
cad2e5aa 664 prog->reganch &= ~RE_USE_INTUIT;
665 }
666 else
667 s = strpos;
668 }
669
6eb5f6b9 670 /* Last resort... */
671 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
672 if (prog->regstclass) {
673 /* minlen == 0 is possible if regstclass is \b or \B,
674 and the fixed substr is ''$.
675 Since minlen is already taken into account, s+1 is before strend;
676 accidentally, minlen >= 1 guaranties no false positives at s + 1
677 even for \b or \B. But (minlen? 1 : 0) below assumes that
678 regstclass does not come from lookahead... */
679 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
680 This leaves EXACTF only, which is dealt with in find_byclass(). */
66e933ab 681 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
682 ? STR_LEN(prog->regstclass)
683 : 1);
6eb5f6b9 684 char *endpos = (prog->anchored_substr || ml_anch)
66e933ab 685 ? s + (prog->minlen? cl_l : 0)
686 : (prog->float_substr ? check_at - start_shift + cl_l
6eb5f6b9 687 : strend) ;
688 char *startpos = sv ? strend - SvCUR(sv) : s;
689
690 t = s;
691 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
692 if (!s) {
693#ifdef DEBUGGING
694 char *what;
695#endif
696 if (endpos == strend) {
697 DEBUG_r( PerlIO_printf(Perl_debug_log,
698 "Could not match STCLASS...\n") );
699 goto fail;
700 }
66e933ab 701 DEBUG_r( PerlIO_printf(Perl_debug_log,
702 "This position contradicts STCLASS...\n") );
653099ff 703 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
704 goto fail;
6eb5f6b9 705 /* Contradict one of substrings */
706 if (prog->anchored_substr) {
6eb5f6b9 707 if (prog->anchored_substr == check) {
708 DEBUG_r( what = "anchored" );
709 hop_and_restart:
710 PL_regeol = strend; /* Used in HOP() */
711 s = HOPc(t, 1);
66e933ab 712 if (s + start_shift + end_shift > strend) {
713 /* XXXX Should be taken into account earlier? */
714 DEBUG_r( PerlIO_printf(Perl_debug_log,
715 "Could not match STCLASS...\n") );
716 goto fail;
717 }
6eb5f6b9 718 DEBUG_r( PerlIO_printf(Perl_debug_log,
66e933ab 719 "Trying %s substr starting at offset %ld...\n",
6eb5f6b9 720 what, (long)(s + start_shift - i_strpos)) );
721 goto restart;
722 }
66e933ab 723 /* Have both, check_string is floating */
6eb5f6b9 724 if (t + start_shift >= check_at) /* Contradicts floating=check */
725 goto retry_floating_check;
726 /* Recheck anchored substring, but not floating... */
727 s = check_at;
728 DEBUG_r( PerlIO_printf(Perl_debug_log,
66e933ab 729 "Trying anchored substr starting at offset %ld...\n",
6eb5f6b9 730 (long)(other_last - i_strpos)) );
731 goto do_other_anchored;
732 }
66e933ab 733 if (!prog->float_substr) { /* Could have been deleted */
734 if (ml_anch) {
735 s = t = t + 1;
736 goto try_at_offset;
737 }
738 goto fail;
739 }
6eb5f6b9 740 /* Check is floating subtring. */
741 retry_floating_check:
742 t = check_at - start_shift;
743 DEBUG_r( what = "floating" );
744 goto hop_and_restart;
745 }
746 DEBUG_r( if (t != s)
747 PerlIO_printf(Perl_debug_log,
748 "By STCLASS: moving %ld --> %ld\n",
749 (long)(t - i_strpos), (long)(s - i_strpos));
750 else
751 PerlIO_printf(Perl_debug_log,
752 "Does not contradict STCLASS...\n") );
753 }
2c2d71f5 754 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sGuessed:%s match at offset %ld\n",
30944b6d 755 PL_colors[4], PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 756 return s;
2c2d71f5 757
758 fail_finish: /* Substring not found */
66e933ab 759 if (prog->check_substr) /* could be removed already */
760 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 761 fail:
2c2d71f5 762 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa 763 PL_colors[4],PL_colors[5]));
764 return Nullch;
765}
9661b544 766
6eb5f6b9 767/* We know what class REx starts with. Try to find this position... */
3c3eec57 768STATIC char *
769S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 770{
6eb5f6b9 771 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
772 char *m;
773 int ln;
774 int c1;
775 int c2;
776 char *e;
777 register I32 tmp = 1; /* Scratch variable? */
cad2e5aa 778
6eb5f6b9 779 /* We know what class it must start with. */
780 switch (OP(c)) {
781 case ANYOFUTF8:
782 while (s < strend) {
783 if (REGINCLASSUTF8(c, (U8*)s)) {
784 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 785 goto got_it;
3cf5c195 786 else
6eb5f6b9 787 tmp = doevery;
a0ed51b3 788 }
6eb5f6b9 789 else
790 tmp = 1;
a0ed51b3 791 s += UTF8SKIP(s);
792 }
6eb5f6b9 793 break;
794 case ANYOF:
a0ed51b3 795 while (s < strend) {
6eb5f6b9 796 if (REGINCLASS(c, *s)) {
797 if (tmp && (norun || regtry(prog, s)))
798 goto got_it;
799 else
800 tmp = doevery;
a0ed51b3 801 }
6eb5f6b9 802 else
803 tmp = 1;
a0ed51b3 804 s++;
a0d0e21e 805 }
6eb5f6b9 806 break;
807 case EXACTF:
808 m = STRING(c);
809 ln = STR_LEN(c);
810 c1 = *m;
811 c2 = PL_fold[c1];
812 goto do_exactf;
813 case EXACTFL:
814 m = STRING(c);
815 ln = STR_LEN(c);
816 c1 = *m;
817 c2 = PL_fold_locale[c1];
818 do_exactf:
819 e = strend - ln;
b3c9acc1 820
6eb5f6b9 821 if (norun && e < s)
822 e = s; /* Due to minlen logic of intuit() */
b3c9acc1 823 /* Here it is NOT UTF! */
824 if (c1 == c2) {
825 while (s <= e) {
826 if ( *s == c1
66e933ab 827 && (ln == 1 || !(OP(c) == EXACTF
828 ? ibcmp(s, m, ln)
829 : ibcmp_locale(s, m, ln)))
6eb5f6b9 830 && (norun || regtry(prog, s)) )
b3c9acc1 831 goto got_it;
832 s++;
833 }
834 } else {
835 while (s <= e) {
836 if ( (*s == c1 || *s == c2)
66e933ab 837 && (ln == 1 || !(OP(c) == EXACTF
838 ? ibcmp(s, m, ln)
839 : ibcmp_locale(s, m, ln)))
6eb5f6b9 840 && (norun || regtry(prog, s)) )
b3c9acc1 841 goto got_it;
842 s++;
843 }
844 }
845 break;
bbce6d69 846 case BOUNDL:
3280af22 847 PL_reg_flags |= RF_tainted;
bbce6d69 848 /* FALL THROUGH */
a0d0e21e 849 case BOUND:
6eb5f6b9 850 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
95bac841 851 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 852 while (s < strend) {
95bac841 853 if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
a0d0e21e 854 tmp = !tmp;
6eb5f6b9 855 if ((norun || regtry(prog, s)))
a0d0e21e 856 goto got_it;
a687059c 857 }
a0d0e21e 858 s++;
859 }
6eb5f6b9 860 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0d0e21e 861 goto got_it;
862 break;
a0ed51b3 863 case BOUNDLUTF8:
864 PL_reg_flags |= RF_tainted;
865 /* FALL THROUGH */
866 case BOUNDUTF8:
6eb5f6b9 867 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : '\n';
a0ed51b3 868 tmp = ((OP(c) == BOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
869 while (s < strend) {
dfe13c55 870 if (tmp == !(OP(c) == BOUND ?
871 swash_fetch(PL_utf8_alnum, (U8*)s) :
872 isALNUM_LC_utf8((U8*)s)))
873 {
a0ed51b3 874 tmp = !tmp;
6eb5f6b9 875 if ((norun || regtry(prog, s)))
a0ed51b3 876 goto got_it;
877 }
878 s += UTF8SKIP(s);
879 }
6eb5f6b9 880 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3 881 goto got_it;
882 break;
bbce6d69 883 case NBOUNDL:
3280af22 884 PL_reg_flags |= RF_tainted;
bbce6d69 885 /* FALL THROUGH */
a0d0e21e 886 case NBOUND:
6eb5f6b9 887 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
95bac841 888 tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 889 while (s < strend) {
95bac841 890 if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
a0d0e21e 891 tmp = !tmp;
6eb5f6b9 892 else if ((norun || regtry(prog, s)))
a0d0e21e 893 goto got_it;
894 s++;
895 }
6eb5f6b9 896 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0d0e21e 897 goto got_it;
898 break;
a0ed51b3 899 case NBOUNDLUTF8:
900 PL_reg_flags |= RF_tainted;
901 /* FALL THROUGH */
902 case NBOUNDUTF8:
6eb5f6b9 903 if (prog->minlen)
dfe13c55 904 strend = reghop_c(strend, -1);
6eb5f6b9 905 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : '\n';
a0ed51b3 906 tmp = ((OP(c) == NBOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
907 while (s < strend) {
dfe13c55 908 if (tmp == !(OP(c) == NBOUND ?
909 swash_fetch(PL_utf8_alnum, (U8*)s) :
910 isALNUM_LC_utf8((U8*)s)))
a0ed51b3 911 tmp = !tmp;
6eb5f6b9 912 else if ((norun || regtry(prog, s)))
a0ed51b3 913 goto got_it;
914 s += UTF8SKIP(s);
915 }
6eb5f6b9 916 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3 917 goto got_it;
918 break;
a0d0e21e 919 case ALNUM:
920 while (s < strend) {
bbce6d69 921 if (isALNUM(*s)) {
6eb5f6b9 922 if (tmp && (norun || regtry(prog, s)))
bbce6d69 923 goto got_it;
924 else
925 tmp = doevery;
926 }
927 else
928 tmp = 1;
929 s++;
930 }
931 break;
a0ed51b3 932 case ALNUMUTF8:
933 while (s < strend) {
dfe13c55 934 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
6eb5f6b9 935 if (tmp && (norun || regtry(prog, s)))
a0ed51b3 936 goto got_it;
937 else
938 tmp = doevery;
939 }
940 else
941 tmp = 1;
942 s += UTF8SKIP(s);
943 }
944 break;
bbce6d69 945 case ALNUML:
3280af22 946 PL_reg_flags |= RF_tainted;
bbce6d69 947 while (s < strend) {
948 if (isALNUM_LC(*s)) {
6eb5f6b9 949 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 950 goto got_it;
a687059c 951 else
a0d0e21e 952 tmp = doevery;
953 }
954 else
955 tmp = 1;
956 s++;
957 }
958 break;
a0ed51b3 959 case ALNUMLUTF8:
960 PL_reg_flags |= RF_tainted;
961 while (s < strend) {
dfe13c55 962 if (isALNUM_LC_utf8((U8*)s)) {
6eb5f6b9 963 if (tmp && (norun || regtry(prog, s)))
a0ed51b3 964 goto got_it;
965 else
966 tmp = doevery;
967 }
968 else
969 tmp = 1;
970 s += UTF8SKIP(s);
971 }
972 break;
a0d0e21e 973 case NALNUM:
974 while (s < strend) {
bbce6d69 975 if (!isALNUM(*s)) {
6eb5f6b9 976 if (tmp && (norun || regtry(prog, s)))
bbce6d69 977 goto got_it;
978 else
979 tmp = doevery;
980 }
981 else
982 tmp = 1;
983 s++;
984 }
985 break;
a0ed51b3 986 case NALNUMUTF8:
987 while (s < strend) {
dfe13c55 988 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
6eb5f6b9 989 if (tmp && (norun || regtry(prog, s)))
a0ed51b3 990 goto got_it;
991 else
992 tmp = doevery;
993 }
994 else
995 tmp = 1;
996 s += UTF8SKIP(s);
997 }
998 break;
bbce6d69 999 case NALNUML:
3280af22 1000 PL_reg_flags |= RF_tainted;
bbce6d69 1001 while (s < strend) {
1002 if (!isALNUM_LC(*s)) {
6eb5f6b9 1003 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 1004 goto got_it;
a687059c 1005 else
a0d0e21e 1006 tmp = doevery;
a687059c 1007 }
a0d0e21e 1008 else
1009 tmp = 1;
1010 s++;
1011 }
1012 break;
a0ed51b3 1013 case NALNUMLUTF8:
1014 PL_reg_flags |= RF_tainted;
1015 while (s < strend) {
dfe13c55 1016 if (!isALNUM_LC_utf8((U8*)s)) {
6eb5f6b9 1017 if (tmp && (norun || regtry(prog, s)))
a0ed51b3 1018 goto got_it;
1019 else
1020 tmp = doevery;
1021 }
1022 else
1023 tmp = 1;
1024 s += UTF8SKIP(s);
1025 }
1026 break;
a0d0e21e 1027 case SPACE:
1028 while (s < strend) {
1029 if (isSPACE(*s)) {
6eb5f6b9 1030 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 1031 goto got_it;
1032 else
1033 tmp = doevery;
2304df62 1034 }
a0d0e21e 1035 else
1036 tmp = 1;
1037 s++;
1038 }
1039 break;
a0ed51b3 1040 case SPACEUTF8:
1041 while (s < strend) {
dfe13c55 1042 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
6eb5f6b9 1043 if (tmp && (norun || regtry(prog, s)))
a0ed51b3 1044 goto got_it;
1045 else
1046 tmp = doevery;
1047 }
1048 else
1049 tmp = 1;
1050 s += UTF8SKIP(s);
1051 }
1052 break;
bbce6d69 1053 case SPACEL:
3280af22 1054 PL_reg_flags |= RF_tainted;
bbce6d69 1055 while (s < strend) {
1056 if (isSPACE_LC(*s)) {
6eb5f6b9 1057 if (tmp && (norun || regtry(prog, s)))
bbce6d69 1058 goto got_it;
1059 else
1060 tmp = doevery;
1061 }
1062 else
1063 tmp = 1;
1064 s++;
1065 }
1066 break;
a0ed51b3 1067 case SPACELUTF8:
1068 PL_reg_flags |= RF_tainted;
1069 while (s < strend) {
dfe13c55 1070 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
6eb5f6b9 1071 if (tmp && (norun || regtry(prog, s)))
a0ed51b3 1072 goto got_it;
1073 else
1074 tmp = doevery;
1075 }
1076 else
1077 tmp = 1;
1078 s += UTF8SKIP(s);
1079 }
1080 break;
a0d0e21e 1081 case NSPACE:
1082 while (s < strend) {
1083 if (!isSPACE(*s)) {
6eb5f6b9 1084 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 1085 goto got_it;
1086 else
1087 tmp = doevery;
a687059c 1088 }
a0d0e21e 1089 else
1090 tmp = 1;
1091 s++;
1092 }
1093 break;
a0ed51b3 1094 case NSPACEUTF8:
1095 while (s < strend) {
dfe13c55 1096 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
6eb5f6b9 1097 if (tmp && (norun || regtry(prog, s)))
a0ed51b3 1098 goto got_it;
1099 else
1100 tmp = doevery;
1101 }
1102 else
1103 tmp = 1;
1104 s += UTF8SKIP(s);
1105 }
1106 break;
bbce6d69 1107 case NSPACEL:
3280af22 1108 PL_reg_flags |= RF_tainted;
bbce6d69 1109 while (s < strend) {
1110 if (!isSPACE_LC(*s)) {
6eb5f6b9 1111 if (tmp && (norun || regtry(prog, s)))
bbce6d69 1112 goto got_it;
1113 else
1114 tmp = doevery;
1115 }
1116 else
1117 tmp = 1;
1118 s++;
1119 }
1120 break;
a0ed51b3 1121 case NSPACELUTF8:
1122 PL_reg_flags |= RF_tainted;
1123 while (s < strend) {
dfe13c55 1124 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
6eb5f6b9 1125 if (tmp && (norun || regtry(prog, s)))
a0ed51b3 1126 goto got_it;
1127 else
1128 tmp = doevery;
1129 }
1130 else
1131 tmp = 1;
1132 s += UTF8SKIP(s);
1133 }
1134 break;
a0d0e21e 1135 case DIGIT:
1136 while (s < strend) {
1137 if (isDIGIT(*s)) {
6eb5f6b9 1138 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 1139 goto got_it;
1140 else
1141 tmp = doevery;
2b69d0c2 1142 }
a0d0e21e 1143 else
1144 tmp = 1;
1145 s++;
1146 }
1147 break;
a0ed51b3 1148 case DIGITUTF8:
1149 while (s < strend) {
dfe13c55 1150 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
6eb5f6b9 1151 if (tmp && (norun || regtry(prog, s)))
a0ed51b3 1152 goto got_it;
1153 else
1154 tmp = doevery;
1155 }
1156 else
1157 tmp = 1;
1158 s += UTF8SKIP(s);
1159 }
1160 break;
b8c5462f 1161 case DIGITL:
1162 PL_reg_flags |= RF_tainted;
1163 while (s < strend) {
1164 if (isDIGIT_LC(*s)) {
6eb5f6b9 1165 if (tmp && (norun || regtry(prog, s)))
b8c5462f 1166 goto got_it;
1167 else
1168 tmp = doevery;
1169 }
1170 else
1171 tmp = 1;
1172 s++;
1173 }
1174 break;
1175 case DIGITLUTF8:
1176 PL_reg_flags |= RF_tainted;
1177 while (s < strend) {
1178 if (isDIGIT_LC_utf8((U8*)s)) {
6eb5f6b9 1179 if (tmp && (norun || regtry(prog, s)))
b8c5462f 1180 goto got_it;
1181 else
1182 tmp = doevery;
1183 }
1184 else
1185 tmp = 1;
1186 s += UTF8SKIP(s);
1187 }
1188 break;
a0d0e21e 1189 case NDIGIT:
1190 while (s < strend) {
1191 if (!isDIGIT(*s)) {
6eb5f6b9 1192 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 1193 goto got_it;
1194 else
1195 tmp = doevery;
a687059c 1196 }
a0d0e21e 1197 else
1198 tmp = 1;
1199 s++;
1200 }
1201 break;
a0ed51b3 1202 case NDIGITUTF8:
1203 while (s < strend) {
dfe13c55 1204 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
6eb5f6b9 1205 if (tmp && (norun || regtry(prog, s)))
a0ed51b3 1206 goto got_it;
1207 else
1208 tmp = doevery;
1209 }
1210 else
1211 tmp = 1;
1212 s += UTF8SKIP(s);
1213 }
1214 break;
b8c5462f 1215 case NDIGITL:
1216 PL_reg_flags |= RF_tainted;
1217 while (s < strend) {
1218 if (!isDIGIT_LC(*s)) {
6eb5f6b9 1219 if (tmp && (norun || regtry(prog, s)))
b8c5462f 1220 goto got_it;
1221 else
1222 tmp = doevery;
1223 }
1224 else
1225 tmp = 1;
1226 s++;
a0ed51b3 1227 }
b8c5462f 1228 break;
1229 case NDIGITLUTF8:
1230 PL_reg_flags |= RF_tainted;
1231 while (s < strend) {
1232 if (!isDIGIT_LC_utf8((U8*)s)) {
6eb5f6b9 1233 if (tmp && (norun || regtry(prog, s)))
b8c5462f 1234 goto got_it;
cf93c79d 1235 else
b8c5462f 1236 tmp = doevery;
1237 }
1238 else
1239 tmp = 1;
1240 s += UTF8SKIP(s);
1241 }
1242 break;
b3c9acc1 1243 default:
3c3eec57 1244 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1245 break;
d6a28714 1246 }
6eb5f6b9 1247 return 0;
1248 got_it:
1249 return s;
1250}
1251
1252/*
1253 - regexec_flags - match a regexp against a string
1254 */
1255I32
1256Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1257 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1258/* strend: pointer to null at end of string */
1259/* strbeg: real beginning of string */
1260/* minend: end of match must be >=minend after stringarg. */
1261/* data: May be used for some additional optimizations. */
1262/* nosave: For optimizations. */
1263{
1264 dTHR;
1265 register char *s;
1266 register regnode *c;
1267 register char *startpos = stringarg;
1268 register I32 tmp;
1269 I32 minlen; /* must match at least this many chars */
1270 I32 dontbother = 0; /* how many characters not to try at end */
1271 I32 start_shift = 0; /* Offset of the start to find
1272 constant substr. */ /* CC */
1273 I32 end_shift = 0; /* Same for the end. */ /* CC */
1274 I32 scream_pos = -1; /* Internal iterator of scream. */
1275 char *scream_olds;
1276 SV* oreplsv = GvSV(PL_replgv);
1277
1278 PL_regcc = 0;
1279
1280 cache_re(prog);
1281#ifdef DEBUGGING
1282 PL_regnarrate = PL_debug & 512;
1283#endif
1284
1285 /* Be paranoid... */
1286 if (prog == NULL || startpos == NULL) {
1287 Perl_croak(aTHX_ "NULL regexp parameter");
1288 return 0;
1289 }
1290
1291 minlen = prog->minlen;
1292 if (strend - startpos < minlen) goto phooey;
1293
1294 if (startpos == strbeg) /* is ^ valid at stringarg? */
1295 PL_regprev = '\n';
1296 else {
1297 PL_regprev = (U32)stringarg[-1];
1298 if (!PL_multiline && PL_regprev == '\n')
1299 PL_regprev = '\0'; /* force ^ to NOT match */
1300 }
1301
1302 /* Check validity of program. */
1303 if (UCHARAT(prog->program) != REG_MAGIC) {
1304 Perl_croak(aTHX_ "corrupted regexp program");
1305 }
1306
1307 PL_reg_flags = 0;
1308 PL_reg_eval_set = 0;
1309 PL_reg_maxiter = 0;
1310
1311 if (prog->reganch & ROPT_UTF8)
1312 PL_reg_flags |= RF_utf8;
1313
1314 /* Mark beginning of line for ^ and lookbehind. */
1315 PL_regbol = startpos;
1316 PL_bostr = strbeg;
1317 PL_reg_sv = sv;
1318
1319 /* Mark end of line for $ (and such) */
1320 PL_regeol = strend;
1321
1322 /* see how far we have to get to not match where we matched before */
1323 PL_regtill = startpos+minend;
1324
1325 /* We start without call_cc context. */
1326 PL_reg_call_cc = 0;
1327
1328 /* If there is a "must appear" string, look for it. */
1329 s = startpos;
1330
1331 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1332 MAGIC *mg;
1333
1334 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1335 PL_reg_ganch = startpos;
1336 else if (sv && SvTYPE(sv) >= SVt_PVMG
1337 && SvMAGIC(sv)
1338 && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) {
1339 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1340 if (prog->reganch & ROPT_ANCH_GPOS) {
1341 if (s > PL_reg_ganch)
1342 goto phooey;
1343 s = PL_reg_ganch;
1344 }
1345 }
1346 else /* pos() not defined */
1347 PL_reg_ganch = strbeg;
1348 }
1349
1350 if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
1351 re_scream_pos_data d;
1352
1353 d.scream_olds = &scream_olds;
1354 d.scream_pos = &scream_pos;
1355 s = re_intuit_start(prog, sv, s, strend, flags, &d);
1356 if (!s)
1357 goto phooey; /* not present */
1358 }
1359
1360 DEBUG_r( if (!PL_colorset) reginitcolors() );
1361 DEBUG_r(PerlIO_printf(Perl_debug_log,
1362 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1363 PL_colors[4],PL_colors[5],PL_colors[0],
1364 prog->precomp,
1365 PL_colors[1],
1366 (strlen(prog->precomp) > 60 ? "..." : ""),
1367 PL_colors[0],
1368 (int)(strend - startpos > 60 ? 60 : strend - startpos),
1369 startpos, PL_colors[1],
1370 (strend - startpos > 60 ? "..." : ""))
1371 );
1372
1373 /* Simplest case: anchored match need be tried only once. */
1374 /* [unless only anchor is BOL and multiline is set] */
1375 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1376 if (s == startpos && regtry(prog, startpos))
1377 goto got_it;
1378 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1379 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1380 {
1381 char *end;
1382
1383 if (minlen)
1384 dontbother = minlen - 1;
1385 end = HOPc(strend, -dontbother) - 1;
1386 /* for multiline we only have to try after newlines */
1387 if (prog->check_substr) {
1388 if (s == startpos)
1389 goto after_try;
1390 while (1) {
1391 if (regtry(prog, s))
1392 goto got_it;
1393 after_try:
1394 if (s >= end)
1395 goto phooey;
1396 if (prog->reganch & RE_USE_INTUIT) {
1397 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1398 if (!s)
1399 goto phooey;
1400 }
1401 else
1402 s++;
1403 }
1404 } else {
1405 if (s > startpos)
1406 s--;
1407 while (s < end) {
1408 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1409 if (regtry(prog, s))
1410 goto got_it;
1411 }
1412 }
1413 }
1414 }
1415 goto phooey;
1416 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1417 if (regtry(prog, PL_reg_ganch))
1418 goto got_it;
1419 goto phooey;
1420 }
1421
1422 /* Messy cases: unanchored match. */
1423 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
1424 /* we have /x+whatever/ */
1425 /* it must be a one character string (XXXX Except UTF?) */
1426 char ch = SvPVX(prog->anchored_substr)[0];
1427 if (UTF) {
1428 while (s < strend) {
1429 if (*s == ch) {
1430 if (regtry(prog, s)) goto got_it;
1431 s += UTF8SKIP(s);
1432 while (s < strend && *s == ch)
1433 s += UTF8SKIP(s);
1434 }
1435 s += UTF8SKIP(s);
1436 }
1437 }
1438 else {
1439 while (s < strend) {
1440 if (*s == ch) {
1441 if (regtry(prog, s)) goto got_it;
1442 s++;
1443 while (s < strend && *s == ch)
1444 s++;
1445 }
1446 s++;
1447 }
1448 }
1449 }
1450 /*SUPPRESS 560*/
1451 else if (prog->anchored_substr != Nullsv
1452 || (prog->float_substr != Nullsv
1453 && prog->float_max_offset < strend - s)) {
1454 SV *must = prog->anchored_substr
1455 ? prog->anchored_substr : prog->float_substr;
1456 I32 back_max =
1457 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
1458 I32 back_min =
1459 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
1460 I32 delta = back_max - back_min;
1461 char *last = HOPc(strend, /* Cannot start after this */
1462 -(I32)(CHR_SVLEN(must)
1463 - (SvTAIL(must) != 0) + back_min));
1464 char *last1; /* Last position checked before */
1465
1466 if (s > PL_bostr)
1467 last1 = HOPc(s, -1);
1468 else
1469 last1 = s - 1; /* bogus */
1470
1471 /* XXXX check_substr already used to find `s', can optimize if
1472 check_substr==must. */
1473 scream_pos = -1;
1474 dontbother = end_shift;
1475 strend = HOPc(strend, -dontbother);
1476 while ( (s <= last) &&
1477 ((flags & REXEC_SCREAM)
1478 ? (s = screaminstr(sv, must, HOPc(s, back_min) - strbeg,
1479 end_shift, &scream_pos, 0))
1480 : (s = fbm_instr((unsigned char*)HOP(s, back_min),
1481 (unsigned char*)strend, must,
1482 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
1483 if (HOPc(s, -back_max) > last1) {
1484 last1 = HOPc(s, -back_min);
1485 s = HOPc(s, -back_max);
1486 }
1487 else {
1488 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1489
1490 last1 = HOPc(s, -back_min);
1491 s = t;
1492 }
1493 if (UTF) {
1494 while (s <= last1) {
1495 if (regtry(prog, s))
1496 goto got_it;
1497 s += UTF8SKIP(s);
1498 }
1499 }
1500 else {
1501 while (s <= last1) {
1502 if (regtry(prog, s))
1503 goto got_it;
1504 s++;
1505 }
1506 }
1507 }
1508 goto phooey;
1509 }
1510 else if (c = prog->regstclass) {
66e933ab 1511 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1512 /* don't bother with what can't match */
6eb5f6b9 1513 strend = HOPc(strend, -(minlen - 1));
1514 if (find_byclass(prog, c, s, strend, startpos, 0))
1515 goto got_it;
d6a28714 1516 }
1517 else {
1518 dontbother = 0;
1519 if (prog->float_substr != Nullsv) { /* Trim the end. */
1520 char *last;
1521 I32 oldpos = scream_pos;
1522
1523 if (flags & REXEC_SCREAM) {
1524 last = screaminstr(sv, prog->float_substr, s - strbeg,
1525 end_shift, &scream_pos, 1); /* last one */
1526 if (!last)
1527 last = scream_olds; /* Only one occurence. */
b8c5462f 1528 }
d6a28714 1529 else {
1530 STRLEN len;
1531 char *little = SvPV(prog->float_substr, len);
1532
1533 if (SvTAIL(prog->float_substr)) {
1534 if (memEQ(strend - len + 1, little, len - 1))
1535 last = strend - len + 1;
1536 else if (!PL_multiline)
1537 last = memEQ(strend - len, little, len)
1538 ? strend - len : Nullch;
b8c5462f 1539 else
d6a28714 1540 goto find_last;
1541 } else {
1542 find_last:
1543 if (len)
1544 last = rninstr(s, strend, little, little + len);
b8c5462f 1545 else
d6a28714 1546 last = strend; /* matching `$' */
b8c5462f 1547 }
b8c5462f 1548 }
d6a28714 1549 if (last == NULL) goto phooey; /* Should not happen! */
1550 dontbother = strend - last + prog->float_min_offset;
1551 }
1552 if (minlen && (dontbother < minlen))
1553 dontbother = minlen - 1;
1554 strend -= dontbother; /* this one's always in bytes! */
1555 /* We don't know much -- general case. */
1556 if (UTF) {
1557 for (;;) {
1558 if (regtry(prog, s))
1559 goto got_it;
1560 if (s >= strend)
1561 break;
b8c5462f 1562 s += UTF8SKIP(s);
d6a28714 1563 };
1564 }
1565 else {
1566 do {
1567 if (regtry(prog, s))
1568 goto got_it;
1569 } while (s++ < strend);
1570 }
1571 }
1572
1573 /* Failure. */
1574 goto phooey;
1575
1576got_it:
1577 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1578
1579 if (PL_reg_eval_set) {
1580 /* Preserve the current value of $^R */
1581 if (oreplsv != GvSV(PL_replgv))
1582 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1583 restored, the value remains
1584 the same. */
1585 restore_pos(aTHXo_ 0);
1586 }
1587
1588 /* make sure $`, $&, $', and $digit will work later */
1589 if ( !(flags & REXEC_NOT_FIRST) ) {
1590 if (RX_MATCH_COPIED(prog)) {
1591 Safefree(prog->subbeg);
1592 RX_MATCH_COPIED_off(prog);
1593 }
1594 if (flags & REXEC_COPY_STR) {
1595 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1596
1597 s = savepvn(strbeg, i);
1598 prog->subbeg = s;
1599 prog->sublen = i;
1600 RX_MATCH_COPIED_on(prog);
1601 }
1602 else {
1603 prog->subbeg = strbeg;
1604 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1605 }
1606 }
1607
1608 return 1;
1609
1610phooey:
1611 if (PL_reg_eval_set)
1612 restore_pos(aTHXo_ 0);
1613 return 0;
1614}
1615
1616/*
1617 - regtry - try match at specific point
1618 */
1619STATIC I32 /* 0 failure, 1 success */
1620S_regtry(pTHX_ regexp *prog, char *startpos)
1621{
1622 dTHR;
1623 register I32 i;
1624 register I32 *sp;
1625 register I32 *ep;
1626 CHECKPOINT lastcp;
1627
1628 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1629 MAGIC *mg;
1630
1631 PL_reg_eval_set = RS_init;
1632 DEBUG_r(DEBUG_s(
b900a521 1633 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1634 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 1635 ));
e8347627 1636 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714 1637 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1638 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1639 SAVETMPS;
1640 /* Apparently this is not needed, judging by wantarray. */
e8347627 1641 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714 1642 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1643
1644 if (PL_reg_sv) {
1645 /* Make $_ available to executed code. */
1646 if (PL_reg_sv != DEFSV) {
1647 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1648 SAVESPTR(DEFSV);
1649 DEFSV = PL_reg_sv;
b8c5462f 1650 }
d6a28714 1651
1652 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
1653 && (mg = mg_find(PL_reg_sv, 'g')))) {
1654 /* prepare for quick setting of pos */
1655 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
1656 mg = mg_find(PL_reg_sv, 'g');
1657 mg->mg_len = -1;
b8c5462f 1658 }
d6a28714 1659 PL_reg_magic = mg;
1660 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1661 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714 1662 }
1663 if (!PL_reg_curpm)
1664 New(22,PL_reg_curpm, 1, PMOP);
1665 PL_reg_curpm->op_pmregexp = prog;
1666 PL_reg_oldcurpm = PL_curpm;
1667 PL_curpm = PL_reg_curpm;
1668 if (RX_MATCH_COPIED(prog)) {
1669 /* Here is a serious problem: we cannot rewrite subbeg,
1670 since it may be needed if this match fails. Thus
1671 $` inside (?{}) could fail... */
1672 PL_reg_oldsaved = prog->subbeg;
1673 PL_reg_oldsavedlen = prog->sublen;
1674 RX_MATCH_COPIED_off(prog);
1675 }
1676 else
1677 PL_reg_oldsaved = Nullch;
1678 prog->subbeg = PL_bostr;
1679 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1680 }
1681 prog->startp[0] = startpos - PL_bostr;
1682 PL_reginput = startpos;
1683 PL_regstartp = prog->startp;
1684 PL_regendp = prog->endp;
1685 PL_reglastparen = &prog->lastparen;
1686 prog->lastparen = 0;
1687 PL_regsize = 0;
1688 DEBUG_r(PL_reg_starttry = startpos);
1689 if (PL_reg_start_tmpl <= prog->nparens) {
1690 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1691 if(PL_reg_start_tmp)
1692 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1693 else
1694 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1695 }
1696
1697 /* XXXX What this code is doing here?!!! There should be no need
1698 to do this again and again, PL_reglastparen should take care of
1699 this! */
1700 sp = prog->startp;
1701 ep = prog->endp;
1702 if (prog->nparens) {
1703 for (i = prog->nparens; i >= 1; i--) {
1704 *++sp = -1;
1705 *++ep = -1;
1706 }
1707 }
1708 REGCP_SET;
1709 if (regmatch(prog->program + 1)) {
1710 prog->endp[0] = PL_reginput - PL_bostr;
1711 return 1;
1712 }
1713 REGCP_UNWIND;
1714 return 0;
1715}
1716
1717/*
1718 - regmatch - main matching routine
1719 *
1720 * Conceptually the strategy is simple: check to see whether the current
1721 * node matches, call self recursively to see whether the rest matches,
1722 * and then act accordingly. In practice we make some effort to avoid
1723 * recursion, in particular by going through "ordinary" nodes (that don't
1724 * need to know whether the rest of the match failed) by a loop instead of
1725 * by recursion.
1726 */
1727/* [lwall] I've hoisted the register declarations to the outer block in order to
1728 * maybe save a little bit of pushing and popping on the stack. It also takes
1729 * advantage of machines that use a register save mask on subroutine entry.
1730 */
1731STATIC I32 /* 0 failure, 1 success */
1732S_regmatch(pTHX_ regnode *prog)
1733{
1734 dTHR;
1735 register regnode *scan; /* Current node. */
1736 regnode *next; /* Next node. */
1737 regnode *inner; /* Next node in internal branch. */
1738 register I32 nextchr; /* renamed nextchr - nextchar colides with
1739 function of same name */
1740 register I32 n; /* no or next */
1741 register I32 ln; /* len or last */
1742 register char *s; /* operand or save */
1743 register char *locinput = PL_reginput;
1744 register I32 c1, c2, paren; /* case fold search, parenth */
1745 int minmod = 0, sw = 0, logical = 0;
1746#ifdef DEBUGGING
1747 PL_regindent++;
1748#endif
1749
1750 /* Note that nextchr is a byte even in UTF */
1751 nextchr = UCHARAT(locinput);
1752 scan = prog;
1753 while (scan != NULL) {
1754#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
1755#ifdef DEBUGGING
1756# define sayYES goto yes
1757# define sayNO goto no
7821416a 1758# define sayYES_FINAL goto yes_final
1759# define sayYES_LOUD goto yes_loud
1760# define sayNO_FINAL goto no_final
1761# define sayNO_SILENT goto do_no
d6a28714 1762# define saySAME(x) if (x) goto yes; else goto no
1763# define REPORT_CODE_OFF 24
1764#else
1765# define sayYES return 1
1766# define sayNO return 0
7821416a 1767# define sayYES_FINAL return 1
1768# define sayYES_LOUD return 1
1769# define sayNO_FINAL return 0
1770# define sayNO_SILENT return 0
d6a28714 1771# define saySAME(x) return x
1772#endif
1773 DEBUG_r( {
1774 SV *prop = sv_newmortal();
1775 int docolor = *PL_colors[0];
1776 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1777 int l = (PL_regeol - locinput > taill ? taill : PL_regeol - locinput);
1778 /* The part of the string before starttry has one color
1779 (pref0_len chars), between starttry and current
1780 position another one (pref_len - pref0_len chars),
1781 after the current position the third one.
1782 We assume that pref0_len <= pref_len, otherwise we
1783 decrease pref0_len. */
1784 int pref_len = (locinput - PL_bostr > (5 + taill) - l
1785 ? (5 + taill) - l : locinput - PL_bostr);
1786 int pref0_len = pref_len - (locinput - PL_reg_starttry);
1787
1788 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
1789 l = ( PL_regeol - locinput > (5 + taill) - pref_len
1790 ? (5 + taill) - pref_len : PL_regeol - locinput);
1791 if (pref0_len < 0)
1792 pref0_len = 0;
1793 if (pref0_len > pref_len)
1794 pref0_len = pref_len;
1795 regprop(prop, scan);
1796 PerlIO_printf(Perl_debug_log,
b900a521 1797 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
1798 (IV)(locinput - PL_bostr),
d6a28714 1799 PL_colors[4], pref0_len,
1800 locinput - pref_len, PL_colors[5],
1801 PL_colors[2], pref_len - pref0_len,
1802 locinput - pref_len + pref0_len, PL_colors[3],
1803 (docolor ? "" : "> <"),
1804 PL_colors[0], l, locinput, PL_colors[1],
1805 15 - l - pref_len + 1,
1806 "",
b900a521 1807 (IV)(scan - PL_regprogram), PL_regindent*2, "",
d6a28714 1808 SvPVX(prop));
1809 } );
1810
1811 next = scan + NEXT_OFF(scan);
1812 if (next == scan)
1813 next = NULL;
1814
1815 switch (OP(scan)) {
1816 case BOL:
1817 if (locinput == PL_bostr
1818 ? PL_regprev == '\n'
1819 : (PL_multiline &&
1820 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1821 {
1822 /* regtill = regbol; */
b8c5462f 1823 break;
1824 }
d6a28714 1825 sayNO;
1826 case MBOL:
1827 if (locinput == PL_bostr
1828 ? PL_regprev == '\n'
1829 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1830 {
b8c5462f 1831 break;
1832 }
d6a28714 1833 sayNO;
1834 case SBOL:
1835 if (locinput == PL_regbol && PL_regprev == '\n')
b8c5462f 1836 break;
d6a28714 1837 sayNO;
1838 case GPOS:
1839 if (locinput == PL_reg_ganch)
1840 break;
1841 sayNO;
1842 case EOL:
1843 if (PL_multiline)
1844 goto meol;
1845 else
1846 goto seol;
1847 case MEOL:
1848 meol:
1849 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 1850 sayNO;
b8c5462f 1851 break;
d6a28714 1852 case SEOL:
1853 seol:
1854 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 1855 sayNO;
d6a28714 1856 if (PL_regeol - locinput > 1)
b8c5462f 1857 sayNO;
b8c5462f 1858 break;
d6a28714 1859 case EOS:
1860 if (PL_regeol != locinput)
b8c5462f 1861 sayNO;
d6a28714 1862 break;
1863 case SANYUTF8:
b8c5462f 1864 if (nextchr & 0x80) {
b8c5462f 1865 locinput += PL_utf8skip[nextchr];
d6a28714 1866 if (locinput > PL_regeol)
1867 sayNO;
b8c5462f 1868 nextchr = UCHARAT(locinput);
1869 break;
1870 }
d6a28714 1871 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1872 sayNO;
b8c5462f 1873 nextchr = UCHARAT(++locinput);
a0d0e21e 1874 break;
d6a28714 1875 case SANY:
1876 if (!nextchr && locinput >= PL_regeol)
b8c5462f 1877 sayNO;
1878 nextchr = UCHARAT(++locinput);
b85d18e9 1879 break;
d6a28714 1880 case ANYUTF8:
a0ed51b3 1881 if (nextchr & 0x80) {
b8c5462f 1882 locinput += PL_utf8skip[nextchr];
d6a28714 1883 if (locinput > PL_regeol)
1884 sayNO;
a0ed51b3 1885 nextchr = UCHARAT(locinput);
1886 break;
1887 }
d6a28714 1888 if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
a0ed51b3 1889 sayNO;
1890 nextchr = UCHARAT(++locinput);
1891 break;
d6a28714 1892 case REG_ANY:
1893 if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
4633a7c4 1894 sayNO;
76e3520e 1895 nextchr = UCHARAT(++locinput);
a0d0e21e 1896 break;
d6a28714 1897 case EXACT:
cd439c50 1898 s = STRING(scan);
1899 ln = STR_LEN(scan);
d6a28714 1900 /* Inline the first character, for speed. */
1901 if (UCHARAT(s) != nextchr)
1902 sayNO;
1903 if (PL_regeol - locinput < ln)
1904 sayNO;
1905 if (ln > 1 && memNE(s, locinput, ln))
1906 sayNO;
1907 locinput += ln;
1908 nextchr = UCHARAT(locinput);
1909 break;
1910 case EXACTFL:
b8c5462f 1911 PL_reg_flags |= RF_tainted;
1912 /* FALL THROUGH */
d6a28714 1913 case EXACTF:
cd439c50 1914 s = STRING(scan);
1915 ln = STR_LEN(scan);
d6a28714 1916
1917 if (UTF) {
1918 char *l = locinput;
1919 char *e = s + ln;
1920 c1 = OP(scan) == EXACTF;
1921 while (s < e) {
1922 if (l >= PL_regeol)
1923 sayNO;
1924 if (utf8_to_uv((U8*)s, 0) != (c1 ?
1925 toLOWER_utf8((U8*)l) :
1926 toLOWER_LC_utf8((U8*)l)))
1927 {
1928 sayNO;
1929 }
1930 s += UTF8SKIP(s);
1931 l += UTF8SKIP(l);
b8c5462f 1932 }
d6a28714 1933 locinput = l;
a0ed51b3 1934 nextchr = UCHARAT(locinput);
1935 break;
1936 }
d6a28714 1937
1938 /* Inline the first character, for speed. */
1939 if (UCHARAT(s) != nextchr &&
1940 UCHARAT(s) != ((OP(scan) == EXACTF)
1941 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 1942 sayNO;
d6a28714 1943 if (PL_regeol - locinput < ln)
b8c5462f 1944 sayNO;
d6a28714 1945 if (ln > 1 && (OP(scan) == EXACTF
1946 ? ibcmp(s, locinput, ln)
1947 : ibcmp_locale(s, locinput, ln)))
4633a7c4 1948 sayNO;
d6a28714 1949 locinput += ln;
1950 nextchr = UCHARAT(locinput);
a0d0e21e 1951 break;
d6a28714 1952 case ANYOFUTF8:
d6a28714 1953 if (!REGINCLASSUTF8(scan, (U8*)locinput))
4633a7c4 1954 sayNO;
d6a28714 1955 if (locinput >= PL_regeol)
1956 sayNO;
1957 locinput += PL_utf8skip[nextchr];
1958 nextchr = UCHARAT(locinput);
1959 break;
1960 case ANYOF:
d6a28714 1961 if (nextchr < 0)
b8c5462f 1962 nextchr = UCHARAT(locinput);
936ed897 1963 if (!REGINCLASS(scan, nextchr))
d6a28714 1964 sayNO;
1965 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1966 sayNO;
b8c5462f 1967 nextchr = UCHARAT(++locinput);
1968 break;
d6a28714 1969 case ALNUML:
b8c5462f 1970 PL_reg_flags |= RF_tainted;
1971 /* FALL THROUGH */
d6a28714 1972 case ALNUM:
b8c5462f 1973 if (!nextchr)
4633a7c4 1974 sayNO;
d6a28714 1975 if (!(OP(scan) == ALNUM
1976 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
b8c5462f 1977 sayNO;
1978 nextchr = UCHARAT(++locinput);
bbce6d69 1979 break;
d6a28714 1980 case ALNUMLUTF8:
3280af22 1981 PL_reg_flags |= RF_tainted;
bbce6d69 1982 /* FALL THROUGH */
d6a28714 1983 case ALNUMUTF8:
b8c5462f 1984 if (!nextchr)
1985 sayNO;
1986 if (nextchr & 0x80) {
d6a28714 1987 if (!(OP(scan) == ALNUMUTF8
1988 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
1989 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f 1990 {
1991 sayNO;
a0ed51b3 1992 }
b8c5462f 1993 locinput += PL_utf8skip[nextchr];
a0ed51b3 1994 nextchr = UCHARAT(locinput);
1995 break;
1996 }
d6a28714 1997 if (!(OP(scan) == ALNUMUTF8
1998 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 1999 sayNO;
b8c5462f 2000 nextchr = UCHARAT(++locinput);
a0d0e21e 2001 break;
d6a28714 2002 case NALNUML:
b8c5462f 2003 PL_reg_flags |= RF_tainted;
2004 /* FALL THROUGH */
d6a28714 2005 case NALNUM:
2006 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2007 sayNO;
d6a28714 2008 if (OP(scan) == NALNUM
2009 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
a0ed51b3 2010 sayNO;
b8c5462f 2011 nextchr = UCHARAT(++locinput);
a0ed51b3 2012 break;
d6a28714 2013 case NALNUMLUTF8:
b8c5462f 2014 PL_reg_flags |= RF_tainted;
2015 /* FALL THROUGH */
d6a28714 2016 case NALNUMUTF8:
3280af22 2017 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2018 sayNO;
b8c5462f 2019 if (nextchr & 0x80) {
d6a28714 2020 if (OP(scan) == NALNUMUTF8
2021 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2022 : isALNUM_LC_utf8((U8*)locinput))
2023 {
b8c5462f 2024 sayNO;
d6a28714 2025 }
b8c5462f 2026 locinput += PL_utf8skip[nextchr];
2027 nextchr = UCHARAT(locinput);
2028 break;
2029 }
d6a28714 2030 if (OP(scan) == NALNUMUTF8
2031 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2032 sayNO;
76e3520e 2033 nextchr = UCHARAT(++locinput);
a0d0e21e 2034 break;
d6a28714 2035 case BOUNDL:
2036 case NBOUNDL:
3280af22 2037 PL_reg_flags |= RF_tainted;
bbce6d69 2038 /* FALL THROUGH */
d6a28714 2039 case BOUND:
2040 case NBOUND:
2041 /* was last char in word? */
2042 ln = (locinput != PL_regbol) ? UCHARAT(locinput - 1) : PL_regprev;
2043 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2044 ln = isALNUM(ln);
2045 n = isALNUM(nextchr);
2046 }
2047 else {
2048 ln = isALNUM_LC(ln);
2049 n = isALNUM_LC(nextchr);
2050 }
2051 if (((!ln) == (!n)) == (OP(scan) == BOUND || OP(scan) == BOUNDL))
4633a7c4 2052 sayNO;
a0d0e21e 2053 break;
d6a28714 2054 case BOUNDLUTF8:
2055 case NBOUNDLUTF8:
a0ed51b3 2056 PL_reg_flags |= RF_tainted;
2057 /* FALL THROUGH */
d6a28714 2058 case BOUNDUTF8:
2059 case NBOUNDUTF8:
2060 /* was last char in word? */
2061 ln = (locinput != PL_regbol)
2062 ? utf8_to_uv(reghop((U8*)locinput, -1), 0) : PL_regprev;
2063 if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) {
2064 ln = isALNUM_uni(ln);
2065 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
a0ed51b3 2066 }
d6a28714 2067 else {
2068 ln = isALNUM_LC_uni(ln);
2069 n = isALNUM_LC_utf8((U8*)locinput);
2070 }
2071 if (((!ln) == (!n)) == (OP(scan) == BOUNDUTF8 || OP(scan) == BOUNDLUTF8))
a0ed51b3 2072 sayNO;
a0ed51b3 2073 break;
d6a28714 2074 case SPACEL:
3280af22 2075 PL_reg_flags |= RF_tainted;
bbce6d69 2076 /* FALL THROUGH */
d6a28714 2077 case SPACE:
2078 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2079 sayNO;
d6a28714 2080 if (!(OP(scan) == SPACE
2081 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
4633a7c4 2082 sayNO;
76e3520e 2083 nextchr = UCHARAT(++locinput);
a0d0e21e 2084 break;
d6a28714 2085 case SPACELUTF8:
a0ed51b3 2086 PL_reg_flags |= RF_tainted;
2087 /* FALL THROUGH */
d6a28714 2088 case SPACEUTF8:
a0ed51b3 2089 if (!nextchr && locinput >= PL_regeol)
2090 sayNO;
2091 if (nextchr & 0x80) {
d6a28714 2092 if (!(OP(scan) == SPACEUTF8
2093 ? swash_fetch(PL_utf8_space,(U8*)locinput)
2094 : isSPACE_LC_utf8((U8*)locinput)))
2095 {
a0ed51b3 2096 sayNO;
d6a28714 2097 }
6f06b55f 2098 locinput += PL_utf8skip[nextchr];
a0ed51b3 2099 nextchr = UCHARAT(locinput);
2100 break;
2101 }
d6a28714 2102 if (!(OP(scan) == SPACEUTF8
2103 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
a0ed51b3 2104 sayNO;
2105 nextchr = UCHARAT(++locinput);
2106 break;
d6a28714 2107 case NSPACEL:
3280af22 2108 PL_reg_flags |= RF_tainted;
bbce6d69 2109 /* FALL THROUGH */
d6a28714 2110 case NSPACE:
b8c5462f 2111 if (!nextchr)
2112 sayNO;
d6a28714 2113 if (OP(scan) == SPACE
2114 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2115 sayNO;
b8c5462f 2116 nextchr = UCHARAT(++locinput);
a0d0e21e 2117 break;
d6a28714 2118 case NSPACELUTF8:
a0ed51b3 2119 PL_reg_flags |= RF_tainted;
2120 /* FALL THROUGH */
d6a28714 2121 case NSPACEUTF8:
b8c5462f 2122 if (!nextchr)
2123 sayNO;
2124 if (nextchr & 0x80) {
d6a28714 2125 if (OP(scan) == NSPACEUTF8
2126 ? swash_fetch(PL_utf8_space,(U8*)locinput)
2127 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f 2128 {
2129 sayNO;
2130 }
2131 locinput += PL_utf8skip[nextchr];
2132 nextchr = UCHARAT(locinput);
2133 break;
a0ed51b3 2134 }
d6a28714 2135 if (OP(scan) == NSPACEUTF8
2136 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2137 sayNO;
76e3520e 2138 nextchr = UCHARAT(++locinput);
a0d0e21e 2139 break;
d6a28714 2140 case DIGITL:
a0ed51b3 2141 PL_reg_flags |= RF_tainted;
2142 /* FALL THROUGH */
d6a28714 2143 case DIGIT:
a0ed51b3 2144 if (!nextchr && locinput >= PL_regeol)
2145 sayNO;
d6a28714 2146 if (!(OP(scan) == DIGIT
2147 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
4633a7c4 2148 sayNO;
76e3520e 2149 nextchr = UCHARAT(++locinput);
a0d0e21e 2150 break;
d6a28714 2151 case DIGITLUTF8:
a0ed51b3 2152 PL_reg_flags |= RF_tainted;
2153 /* FALL THROUGH */
d6a28714 2154 case DIGITUTF8:
a0ed51b3 2155 if (!nextchr)
2156 sayNO;
2157 if (nextchr & 0x80) {
d6a28714 2158 if (OP(scan) == NDIGITUTF8
2159 ? swash_fetch(PL_utf8_digit,(U8*)locinput)
2160 : isDIGIT_LC_utf8((U8*)locinput))
dfe13c55 2161 {
a0ed51b3 2162 sayNO;
dfe13c55 2163 }
6f06b55f 2164 locinput += PL_utf8skip[nextchr];
a0ed51b3 2165 nextchr = UCHARAT(locinput);
2166 break;
2167 }
d6a28714 2168 if (!isDIGIT(nextchr))
a0ed51b3 2169 sayNO;
2170 nextchr = UCHARAT(++locinput);
2171 break;
d6a28714 2172 case NDIGITL:
b8c5462f 2173 PL_reg_flags |= RF_tainted;
2174 /* FALL THROUGH */
d6a28714 2175 case NDIGIT:
b8c5462f 2176 if (!nextchr)
2177 sayNO;
d6a28714 2178 if (OP(scan) == DIGIT
2179 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
4633a7c4 2180 sayNO;
76e3520e 2181 nextchr = UCHARAT(++locinput);
a0d0e21e 2182 break;
d6a28714 2183 case NDIGITLUTF8:
b8c5462f 2184 PL_reg_flags |= RF_tainted;
2185 /* FALL THROUGH */
d6a28714 2186 case NDIGITUTF8:
b8c5462f 2187 if (!nextchr && locinput >= PL_regeol)
2188 sayNO;
a0ed51b3 2189 if (nextchr & 0x80) {
d6a28714 2190 if (swash_fetch(PL_utf8_digit,(U8*)locinput))
a0ed51b3 2191 sayNO;
6f06b55f 2192 locinput += PL_utf8skip[nextchr];
a0ed51b3 2193 nextchr = UCHARAT(locinput);
2194 break;
2195 }
d6a28714 2196 if (isDIGIT(nextchr))
a0ed51b3 2197 sayNO;
2198 nextchr = UCHARAT(++locinput);
2199 break;
2200 case CLUMP:
dfe13c55 2201 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3 2202 sayNO;
6f06b55f 2203 locinput += PL_utf8skip[nextchr];
dfe13c55 2204 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3 2205 locinput += UTF8SKIP(locinput);
2206 if (locinput > PL_regeol)
2207 sayNO;
2208 nextchr = UCHARAT(locinput);
2209 break;
c8756f30 2210 case REFFL:
3280af22 2211 PL_reg_flags |= RF_tainted;
c8756f30 2212 /* FALL THROUGH */
c277df42 2213 case REF:
c8756f30 2214 case REFF:
c277df42 2215 n = ARG(scan); /* which paren pair */
cf93c79d 2216 ln = PL_regstartp[n];
2c2d71f5 2217 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2218 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2219 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2220 if (ln == PL_regendp[n])
a0d0e21e 2221 break;
a0ed51b3 2222
cf93c79d 2223 s = PL_bostr + ln;
a0ed51b3 2224 if (UTF && OP(scan) != REF) { /* REF can do byte comparison */
2225 char *l = locinput;
cf93c79d 2226 char *e = PL_bostr + PL_regendp[n];
a0ed51b3 2227 /*
2228 * Note that we can't do the "other character" lookup trick as
2229 * in the 8-bit case (no pun intended) because in Unicode we
2230 * have to map both upper and title case to lower case.
2231 */
2232 if (OP(scan) == REFF) {
2233 while (s < e) {
2234 if (l >= PL_regeol)
2235 sayNO;
dfe13c55 2236 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
a0ed51b3 2237 sayNO;
2238 s += UTF8SKIP(s);
2239 l += UTF8SKIP(l);
2240 }
2241 }
2242 else {
2243 while (s < e) {
2244 if (l >= PL_regeol)
2245 sayNO;
dfe13c55 2246 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
a0ed51b3 2247 sayNO;
2248 s += UTF8SKIP(s);
2249 l += UTF8SKIP(l);
2250 }
2251 }
2252 locinput = l;
2253 nextchr = UCHARAT(locinput);
2254 break;
2255 }
2256
a0d0e21e 2257 /* Inline the first character, for speed. */
76e3520e 2258 if (UCHARAT(s) != nextchr &&
c8756f30 2259 (OP(scan) == REF ||
2260 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2261 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2262 sayNO;
cf93c79d 2263 ln = PL_regendp[n] - ln;
3280af22 2264 if (locinput + ln > PL_regeol)
4633a7c4 2265 sayNO;
c8756f30 2266 if (ln > 1 && (OP(scan) == REF
2267 ? memNE(s, locinput, ln)
2268 : (OP(scan) == REFF
2269 ? ibcmp(s, locinput, ln)
2270 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2271 sayNO;
a0d0e21e 2272 locinput += ln;
76e3520e 2273 nextchr = UCHARAT(locinput);
a0d0e21e 2274 break;
2275
2276 case NOTHING:
c277df42 2277 case TAIL:
a0d0e21e 2278 break;
2279 case BACK:
2280 break;
c277df42 2281 case EVAL:
2282 {
2283 dSP;
533c011a 2284 OP_4tree *oop = PL_op;
3280af22 2285 COP *ocurcop = PL_curcop;
2286 SV **ocurpad = PL_curpad;
c277df42 2287 SV *ret;
2288
2289 n = ARG(scan);
533c011a 2290 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2291 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2292 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2293 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2294
cea2e8a9 2295 CALLRUNOPS(aTHX); /* Scalar context. */
c277df42 2296 SPAGAIN;
2297 ret = POPs;
2298 PUTBACK;
2299
0f5d15d6 2300 PL_op = oop;
2301 PL_curpad = ocurpad;
2302 PL_curcop = ocurcop;
c277df42 2303 if (logical) {
0f5d15d6 2304 if (logical == 2) { /* Postponed subexpression. */
2305 regexp *re;
22c35a8c 2306 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2307 re_cc_state state;
0f5d15d6 2308 CHECKPOINT cp, lastcp;
2309
2310 if(SvROK(ret) || SvRMAGICAL(ret)) {
2311 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2312
2313 if(SvMAGICAL(sv))
2314 mg = mg_find(sv, 'r');
2315 }
2316 if (mg) {
2317 re = (regexp *)mg->mg_obj;
df0003d4 2318 (void)ReREFCNT_inc(re);
0f5d15d6 2319 }
2320 else {
2321 STRLEN len;
2322 char *t = SvPV(ret, len);
2323 PMOP pm;
2324 char *oprecomp = PL_regprecomp;
2325 I32 osize = PL_regsize;
2326 I32 onpar = PL_regnpar;
2327
2328 pm.op_pmflags = 0;
cea2e8a9 2329 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
0f5d15d6 2330 if (!(SvFLAGS(ret)
2331 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
2332 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
2333 PL_regprecomp = oprecomp;
2334 PL_regsize = osize;
2335 PL_regnpar = onpar;
2336 }
2337 DEBUG_r(
2338 PerlIO_printf(Perl_debug_log,
2339 "Entering embedded `%s%.60s%s%s'\n",
2340 PL_colors[0],
2341 re->precomp,
2342 PL_colors[1],
2343 (strlen(re->precomp) > 60 ? "..." : ""))
2344 );
2345 state.node = next;
2346 state.prev = PL_reg_call_cc;
2347 state.cc = PL_regcc;
2348 state.re = PL_reg_re;
2349
2ab05381 2350 PL_regcc = 0;
0f5d15d6 2351
2352 cp = regcppush(0); /* Save *all* the positions. */
2353 REGCP_SET;
2354 cache_re(re);
2355 state.ss = PL_savestack_ix;
2356 *PL_reglastparen = 0;
2357 PL_reg_call_cc = &state;
2358 PL_reginput = locinput;
2c2d71f5 2359
2360 /* XXXX This is too dramatic a measure... */
2361 PL_reg_maxiter = 0;
2362
0f5d15d6 2363 if (regmatch(re->program + 1)) {
2c914db6 2364 /* Even though we succeeded, we need to restore
2365 global variables, since we may be wrapped inside
2366 SUSPEND, thus the match may be not finished yet. */
2367
2368 /* XXXX Do this only if SUSPENDed? */
2369 PL_reg_call_cc = state.prev;
2370 PL_regcc = state.cc;
2371 PL_reg_re = state.re;
2372 cache_re(PL_reg_re);
2373
2374 /* XXXX This is too dramatic a measure... */
2375 PL_reg_maxiter = 0;
2376
2377 /* These are needed even if not SUSPEND. */
0f5d15d6 2378 ReREFCNT_dec(re);
2379 regcpblow(cp);
2380 sayYES;
2381 }
0f5d15d6 2382 ReREFCNT_dec(re);
2383 REGCP_UNWIND;
2384 regcppop();
2385 PL_reg_call_cc = state.prev;
2386 PL_regcc = state.cc;
2387 PL_reg_re = state.re;
d3790889 2388 cache_re(PL_reg_re);
2c2d71f5 2389
2390 /* XXXX This is too dramatic a measure... */
2391 PL_reg_maxiter = 0;
2392
0f5d15d6 2393 sayNO;
2394 }
c277df42 2395 sw = SvTRUE(ret);
0f5d15d6 2396 logical = 0;
a0ed51b3 2397 }
2398 else
3280af22 2399 sv_setsv(save_scalar(PL_replgv), ret);
c277df42 2400 break;
2401 }
a0d0e21e 2402 case OPEN:
c277df42 2403 n = ARG(scan); /* which paren pair */
3280af22 2404 PL_reg_start_tmp[n] = locinput;
2405 if (n > PL_regsize)
2406 PL_regsize = n;
a0d0e21e 2407 break;
2408 case CLOSE:
c277df42 2409 n = ARG(scan); /* which paren pair */
cf93c79d 2410 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2411 PL_regendp[n] = locinput - PL_bostr;
3280af22 2412 if (n > *PL_reglastparen)
2413 *PL_reglastparen = n;
a0d0e21e 2414 break;
c277df42 2415 case GROUPP:
2416 n = ARG(scan); /* which paren pair */
cf93c79d 2417 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42 2418 break;
2419 case IFTHEN:
2c2d71f5 2420 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42 2421 if (sw)
2422 next = NEXTOPER(NEXTOPER(scan));
2423 else {
2424 next = scan + ARG(scan);
2425 if (OP(next) == IFTHEN) /* Fake one. */
2426 next = NEXTOPER(NEXTOPER(next));
2427 }
2428 break;
2429 case LOGICAL:
0f5d15d6 2430 logical = scan->flags;
c277df42 2431 break;
2ab05381 2432/*******************************************************************
2433 PL_regcc contains infoblock about the innermost (...)* loop, and
2434 a pointer to the next outer infoblock.
2435
2436 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2437
2438 1) After matching X, regnode for CURLYX is processed;
2439
2440 2) This regnode creates infoblock on the stack, and calls
2441 regmatch() recursively with the starting point at WHILEM node;
2442
2443 3) Each hit of WHILEM node tries to match A and Z (in the order
2444 depending on the current iteration, min/max of {min,max} and
2445 greediness). The information about where are nodes for "A"
2446 and "Z" is read from the infoblock, as is info on how many times "A"
2447 was already matched, and greediness.
2448
2449 4) After A matches, the same WHILEM node is hit again.
2450
2451 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2452 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2453 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2454 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2455 of the external loop.
2456
2457 Currently present infoblocks form a tree with a stem formed by PL_curcc
2458 and whatever it mentions via ->next, and additional attached trees
2459 corresponding to temporarily unset infoblocks as in "5" above.
2460
2461 In the following picture infoblocks for outer loop of
2462 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2463 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2464 infoblocks are drawn below the "reset" infoblock.
2465
2466 In fact in the picture below we do not show failed matches for Z and T
2467 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2468 more obvious *why* one needs to *temporary* unset infoblocks.]
2469
2470 Matched REx position InfoBlocks Comment
2471 (Y(A)*?Z)*?T x
2472 Y(A)*?Z)*?T x <- O
2473 Y (A)*?Z)*?T x <- O
2474 Y A)*?Z)*?T x <- O <- I
2475 YA )*?Z)*?T x <- O <- I
2476 YA A)*?Z)*?T x <- O <- I
2477 YAA )*?Z)*?T x <- O <- I
2478 YAA Z)*?T x <- O # Temporary unset I
2479 I
2480
2481 YAAZ Y(A)*?Z)*?T x <- O
2482 I
2483
2484 YAAZY (A)*?Z)*?T x <- O
2485 I
2486
2487 YAAZY A)*?Z)*?T x <- O <- I
2488 I
2489
2490 YAAZYA )*?Z)*?T x <- O <- I
2491 I
2492
2493 YAAZYA Z)*?T x <- O # Temporary unset I
2494 I,I
2495
2496 YAAZYAZ )*?T x <- O
2497 I,I
2498
2499 YAAZYAZ T x # Temporary unset O
2500 O
2501 I,I
2502
2503 YAAZYAZT x
2504 O
2505 I,I
2506 *******************************************************************/
a0d0e21e 2507 case CURLYX: {
2508 CURCUR cc;
3280af22 2509 CHECKPOINT cp = PL_savestack_ix;
c277df42 2510
2511 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2512 next += ARG(next);
3280af22 2513 cc.oldcc = PL_regcc;
2514 PL_regcc = &cc;
2515 cc.parenfloor = *PL_reglastparen;
a0d0e21e 2516 cc.cur = -1;
2517 cc.min = ARG1(scan);
2518 cc.max = ARG2(scan);
c277df42 2519 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e 2520 cc.next = next;
2521 cc.minmod = minmod;
2522 cc.lastloc = 0;
3280af22 2523 PL_reginput = locinput;
a0d0e21e 2524 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2525 regcpblow(cp);
3280af22 2526 PL_regcc = cc.oldcc;
4633a7c4 2527 saySAME(n);
a0d0e21e 2528 }
2529 /* NOT REACHED */
2530 case WHILEM: {
2531 /*
2532 * This is really hard to understand, because after we match
2533 * what we're trying to match, we must make sure the rest of
2c2d71f5 2534 * the REx is going to match for sure, and to do that we have
a0d0e21e 2535 * to go back UP the parse tree by recursing ever deeper. And
2536 * if it fails, we have to reset our parent's current state
2537 * that we can try again after backing off.
2538 */
2539
c277df42 2540 CHECKPOINT cp, lastcp;
3280af22 2541 CURCUR* cc = PL_regcc;
c277df42 2542 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2543
4633a7c4 2544 n = cc->cur + 1; /* how many we know we matched */
3280af22 2545 PL_reginput = locinput;
a0d0e21e 2546
c277df42 2547 DEBUG_r(
2548 PerlIO_printf(Perl_debug_log,
2549 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2550 REPORT_CODE_OFF+PL_regindent*2, "",
c277df42 2551 (long)n, (long)cc->min,
2552 (long)cc->max, (long)cc)
2553 );
4633a7c4 2554
a0d0e21e 2555 /* If degenerate scan matches "", assume scan done. */
2556
579cf2c3 2557 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2558 PL_regcc = cc->oldcc;
2ab05381 2559 if (PL_regcc)
2560 ln = PL_regcc->cur;
c277df42 2561 DEBUG_r(
c3464db5 2562 PerlIO_printf(Perl_debug_log,
2563 "%*s empty match detected, try continuation...\n",
3280af22 2564 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2565 );
a0d0e21e 2566 if (regmatch(cc->next))
4633a7c4 2567 sayYES;
2ab05381 2568 if (PL_regcc)
2569 PL_regcc->cur = ln;
3280af22 2570 PL_regcc = cc;
4633a7c4 2571 sayNO;
a0d0e21e 2572 }
2573
2574 /* First just match a string of min scans. */
2575
2576 if (n < cc->min) {
2577 cc->cur = n;
2578 cc->lastloc = locinput;
4633a7c4 2579 if (regmatch(cc->scan))
2580 sayYES;
2581 cc->cur = n - 1;
c277df42 2582 cc->lastloc = lastloc;
4633a7c4 2583 sayNO;
a0d0e21e 2584 }
2585
2c2d71f5 2586 if (scan->flags) {
2587 /* Check whether we already were at this position.
2588 Postpone detection until we know the match is not
2589 *that* much linear. */
2590 if (!PL_reg_maxiter) {
2591 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2592 PL_reg_leftiter = PL_reg_maxiter;
2593 }
2594 if (PL_reg_leftiter-- == 0) {
2595 I32 size = (PL_reg_maxiter + 7)/8;
2596 if (PL_reg_poscache) {
2597 if (PL_reg_poscache_size < size) {
2598 Renew(PL_reg_poscache, size, char);
2599 PL_reg_poscache_size = size;
2600 }
2601 Zero(PL_reg_poscache, size, char);
2602 }
2603 else {
2604 PL_reg_poscache_size = size;
2605 Newz(29, PL_reg_poscache, size, char);
2606 }
2607 DEBUG_r(
2608 PerlIO_printf(Perl_debug_log,
2609 "%sDetected a super-linear match, switching on caching%s...\n",
2610 PL_colors[4], PL_colors[5])
2611 );
2612 }
2613 if (PL_reg_leftiter < 0) {
2614 I32 o = locinput - PL_bostr, b;
2615
2616 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2617 b = o % 8;
2618 o /= 8;
2619 if (PL_reg_poscache[o] & (1<<b)) {
2620 DEBUG_r(
2621 PerlIO_printf(Perl_debug_log,
2622 "%*s already tried at this position...\n",
2623 REPORT_CODE_OFF+PL_regindent*2, "")
2624 );
7821416a 2625 sayNO_SILENT;
2c2d71f5 2626 }
2627 PL_reg_poscache[o] |= (1<<b);
2628 }
2629 }
2630
a0d0e21e 2631 /* Prefer next over scan for minimal matching. */
2632
2633 if (cc->minmod) {
3280af22 2634 PL_regcc = cc->oldcc;
2ab05381 2635 if (PL_regcc)
2636 ln = PL_regcc->cur;
5f05dabc 2637 cp = regcppush(cc->parenfloor);
c277df42 2638 REGCP_SET;
5f05dabc 2639 if (regmatch(cc->next)) {
c277df42 2640 regcpblow(cp);
4633a7c4 2641 sayYES; /* All done. */
5f05dabc 2642 }
c277df42 2643 REGCP_UNWIND;
5f05dabc 2644 regcppop();
2ab05381 2645 if (PL_regcc)
2646 PL_regcc->cur = ln;
3280af22 2647 PL_regcc = cc;
a0d0e21e 2648
c277df42 2649 if (n >= cc->max) { /* Maximum greed exceeded? */
599cee73 2650 if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY
3280af22 2651 && !(PL_reg_flags & RF_warned)) {
2652 PL_reg_flags |= RF_warned;
cea2e8a9 2653 Perl_warner(aTHX_ WARN_UNSAFE, "%s limit (%d) exceeded",
2f3ca594 2654 "Complex regular subexpression recursion",
2655 REG_INFTY - 1);
c277df42 2656 }
4633a7c4 2657 sayNO;
c277df42 2658 }
a687059c 2659
c277df42 2660 DEBUG_r(
c3464db5 2661 PerlIO_printf(Perl_debug_log,
2662 "%*s trying longer...\n",
3280af22 2663 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2664 );
a0d0e21e 2665 /* Try scanning more and see if it helps. */
3280af22 2666 PL_reginput = locinput;
a0d0e21e 2667 cc->cur = n;
2668 cc->lastloc = locinput;
5f05dabc 2669 cp = regcppush(cc->parenfloor);
c277df42 2670 REGCP_SET;
5f05dabc 2671 if (regmatch(cc->scan)) {
c277df42 2672 regcpblow(cp);
4633a7c4 2673 sayYES;
5f05dabc 2674 }
c277df42 2675 REGCP_UNWIND;
5f05dabc 2676 regcppop();
4633a7c4 2677 cc->cur = n - 1;
c277df42 2678 cc->lastloc = lastloc;
4633a7c4 2679 sayNO;
a0d0e21e 2680 }
2681
2682 /* Prefer scan over next for maximal matching. */
2683
2684 if (n < cc->max) { /* More greed allowed? */
5f05dabc 2685 cp = regcppush(cc->parenfloor);
a0d0e21e 2686 cc->cur = n;
2687 cc->lastloc = locinput;
c277df42 2688 REGCP_SET;
5f05dabc 2689 if (regmatch(cc->scan)) {
c277df42 2690 regcpblow(cp);
4633a7c4 2691 sayYES;
5f05dabc 2692 }
c277df42 2693 REGCP_UNWIND;
a0d0e21e 2694 regcppop(); /* Restore some previous $<digit>s? */
3280af22 2695 PL_reginput = locinput;
c277df42 2696 DEBUG_r(
c3464db5 2697 PerlIO_printf(Perl_debug_log,
2698 "%*s failed, try continuation...\n",
3280af22 2699 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2700 );
2701 }
599cee73 2702 if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY
2703 && !(PL_reg_flags & RF_warned)) {
3280af22 2704 PL_reg_flags |= RF_warned;
cea2e8a9 2705 Perl_warner(aTHX_ WARN_UNSAFE, "%s limit (%d) exceeded",
cb5d145d 2706 "Complex regular subexpression recursion",
2707 REG_INFTY - 1);
a0d0e21e 2708 }
2709
2710 /* Failed deeper matches of scan, so see if this one works. */
3280af22 2711 PL_regcc = cc->oldcc;
2ab05381 2712 if (PL_regcc)
2713 ln = PL_regcc->cur;
a0d0e21e 2714 if (regmatch(cc->next))
4633a7c4 2715 sayYES;
2ab05381 2716 if (PL_regcc)
2717 PL_regcc->cur = ln;
3280af22 2718 PL_regcc = cc;
4633a7c4 2719 cc->cur = n - 1;
c277df42 2720 cc->lastloc = lastloc;
4633a7c4 2721 sayNO;
a0d0e21e 2722 }
2723 /* NOT REACHED */
c277df42 2724 case BRANCHJ:
2725 next = scan + ARG(scan);
2726 if (next == scan)
2727 next = NULL;
2728 inner = NEXTOPER(NEXTOPER(scan));
2729 goto do_branch;
2730 case BRANCH:
2731 inner = NEXTOPER(scan);
2732 do_branch:
2733 {
2734 CHECKPOINT lastcp;
2735 c1 = OP(scan);
2736 if (OP(next) != c1) /* No choice. */
2737 next = inner; /* Avoid recursion. */
a0d0e21e 2738 else {
3280af22 2739 int lastparen = *PL_reglastparen;
c277df42 2740
2741 REGCP_SET;
a0d0e21e 2742 do {
3280af22 2743 PL_reginput = locinput;
c277df42 2744 if (regmatch(inner))
4633a7c4 2745 sayYES;
c277df42 2746 REGCP_UNWIND;
3280af22 2747 for (n = *PL_reglastparen; n > lastparen; n--)
cf93c79d 2748 PL_regendp[n] = -1;
3280af22 2749 *PL_reglastparen = n;
c277df42 2750 scan = next;
a0d0e21e 2751 /*SUPPRESS 560*/
c277df42 2752 if (n = (c1 == BRANCH ? NEXT_OFF(next) : ARG(next)))
2753 next += n;
a0d0e21e 2754 else
c277df42 2755 next = NULL;
c277df42 2756 inner = NEXTOPER(scan);
2757 if (c1 == BRANCHJ) {
2758 inner = NEXTOPER(inner);
2759 }
2760 } while (scan != NULL && OP(scan) == c1);
4633a7c4 2761 sayNO;
a0d0e21e 2762 /* NOTREACHED */
a687059c 2763 }
a0d0e21e 2764 }
2765 break;
2766 case MINMOD:
2767 minmod = 1;
2768 break;
c277df42 2769 case CURLYM:
2770 {
00db4c45 2771 I32 l = 0;
c277df42 2772 CHECKPOINT lastcp;
2773
2774 /* We suppose that the next guy does not need
2775 backtracking: in particular, it is of constant length,
2776 and has no parenths to influence future backrefs. */
2777 ln = ARG1(scan); /* min to match */
2778 n = ARG2(scan); /* max to match */
c277df42 2779 paren = scan->flags;
2780 if (paren) {
3280af22 2781 if (paren > PL_regsize)
2782 PL_regsize = paren;
2783 if (paren > *PL_reglastparen)
2784 *PL_reglastparen = paren;
c277df42 2785 }
dc45a647 2786 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42 2787 if (paren)
2788 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 2789 PL_reginput = locinput;
c277df42 2790 if (minmod) {
2791 minmod = 0;
2792 if (ln && regrepeat_hard(scan, ln, &l) < ln)
2793 sayNO;
5f4b28b2 2794 if (ln && l == 0 && n >= ln
c277df42 2795 /* In fact, this is tricky. If paren, then the
2796 fact that we did/didnot match may influence
2797 future execution. */
2798 && !(paren && ln == 0))
2799 ln = n;
3280af22 2800 locinput = PL_reginput;
22c35a8c 2801 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2802 c1 = (U8)*STRING(next);
c277df42 2803 if (OP(next) == EXACTF)
22c35a8c 2804 c2 = PL_fold[c1];
c277df42 2805 else if (OP(next) == EXACTFL)
22c35a8c 2806 c2 = PL_fold_locale[c1];
c277df42 2807 else
2808 c2 = c1;
a0ed51b3 2809 }
2810 else
c277df42 2811 c1 = c2 = -1000;
2812 REGCP_SET;
5f4b28b2 2813 /* This may be improved if l == 0. */
c277df42 2814 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
2815 /* If it could work, try it. */
2816 if (c1 == -1000 ||
3280af22 2817 UCHARAT(PL_reginput) == c1 ||
2818 UCHARAT(PL_reginput) == c2)
c277df42 2819 {
2820 if (paren) {
2821 if (n) {
cf93c79d 2822 PL_regstartp[paren] =
2823 HOPc(PL_reginput, -l) - PL_bostr;
2824 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3 2825 }
2826 else
cf93c79d 2827 PL_regendp[paren] = -1;
c277df42 2828 }
2829 if (regmatch(next))
2830 sayYES;
2831 REGCP_UNWIND;
2832 }
2833 /* Couldn't or didn't -- move forward. */
3280af22 2834 PL_reginput = locinput;
c277df42 2835 if (regrepeat_hard(scan, 1, &l)) {
2836 ln++;
3280af22 2837 locinput = PL_reginput;
c277df42 2838 }
2839 else
2840 sayNO;
2841 }
a0ed51b3 2842 }
2843 else {
c277df42 2844 n = regrepeat_hard(scan, n, &l);
2845 if (n != 0 && l == 0
2846 /* In fact, this is tricky. If paren, then the
2847 fact that we did/didnot match may influence
2848 future execution. */
2849 && !(paren && ln == 0))
2850 ln = n;
3280af22 2851 locinput = PL_reginput;
c277df42 2852 DEBUG_r(
5c0ca799 2853 PerlIO_printf(Perl_debug_log,
faccc32b 2854 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 2855 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 2856 (IV) n, (IV)l)
c277df42 2857 );
2858 if (n >= ln) {
22c35a8c 2859 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2860 c1 = (U8)*STRING(next);
c277df42 2861 if (OP(next) == EXACTF)
22c35a8c 2862 c2 = PL_fold[c1];
c277df42 2863 else if (OP(next) == EXACTFL)
22c35a8c 2864 c2 = PL_fold_locale[c1];
c277df42 2865 else
2866 c2 = c1;
a0ed51b3 2867 }
2868 else
c277df42 2869 c1 = c2 = -1000;
2870 }
2871 REGCP_SET;
2872 while (n >= ln) {
2873 /* If it could work, try it. */
2874 if (c1 == -1000 ||
3280af22 2875 UCHARAT(PL_reginput) == c1 ||
2876 UCHARAT(PL_reginput) == c2)
a0ed51b3 2877 {
2878 DEBUG_r(
c3464db5 2879 PerlIO_printf(Perl_debug_log,
7b0972df 2880 "%*s trying tail with n=%"IVdf"...\n",
2881 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3 2882 );
2883 if (paren) {
2884 if (n) {
cf93c79d 2885 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
2886 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 2887 }
a0ed51b3 2888 else
cf93c79d 2889 PL_regendp[paren] = -1;
c277df42 2890 }
a0ed51b3 2891 if (regmatch(next))
2892 sayYES;
2893 REGCP_UNWIND;
2894 }
c277df42 2895 /* Couldn't or didn't -- back up. */
2896 n--;
dfe13c55 2897 locinput = HOPc(locinput, -l);
3280af22 2898 PL_reginput = locinput;
c277df42 2899 }
2900 }
2901 sayNO;
2902 break;
2903 }
2904 case CURLYN:
2905 paren = scan->flags; /* Which paren to set */
3280af22 2906 if (paren > PL_regsize)
2907 PL_regsize = paren;
2908 if (paren > *PL_reglastparen)
2909 *PL_reglastparen = paren;
c277df42 2910 ln = ARG1(scan); /* min to match */
2911 n = ARG2(scan); /* max to match */
dc45a647 2912 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 2913 goto repeat;
a0d0e21e 2914 case CURLY:
c277df42 2915 paren = 0;
a0d0e21e 2916 ln = ARG1(scan); /* min to match */
2917 n = ARG2(scan); /* max to match */
dc45a647 2918 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e 2919 goto repeat;
2920 case STAR:
2921 ln = 0;
c277df42 2922 n = REG_INFTY;
a0d0e21e 2923 scan = NEXTOPER(scan);
c277df42 2924 paren = 0;
a0d0e21e 2925 goto repeat;
2926 case PLUS:
c277df42 2927 ln = 1;
2928 n = REG_INFTY;
2929 scan = NEXTOPER(scan);
2930 paren = 0;
2931 repeat:
a0d0e21e 2932 /*
2933 * Lookahead to avoid useless match attempts
2934 * when we know what character comes next.
2935 */
22c35a8c 2936 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2937 c1 = (U8)*STRING(next);
bbce6d69 2938 if (OP(next) == EXACTF)
22c35a8c 2939 c2 = PL_fold[c1];
bbce6d69 2940 else if (OP(next) == EXACTFL)
22c35a8c 2941 c2 = PL_fold_locale[c1];
bbce6d69 2942 else
2943 c2 = c1;
2944 }
a0d0e21e 2945 else
bbce6d69 2946 c1 = c2 = -1000;
3280af22 2947 PL_reginput = locinput;
a0d0e21e 2948 if (minmod) {
c277df42 2949 CHECKPOINT lastcp;
a0d0e21e 2950 minmod = 0;
2951 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 2952 sayNO;
a0ed51b3 2953 locinput = PL_reginput;
c277df42 2954 REGCP_SET;
0fe9bf95 2955 if (c1 != -1000) {
2956 char *e = locinput + n - ln; /* Should not check after this */
2957 char *old = locinput;
2958
2959 if (e >= PL_regeol || (n == REG_INFTY))
2960 e = PL_regeol - 1;
2961 while (1) {
2962 /* Find place 'next' could work */
2963 if (c1 == c2) {
2964 while (locinput <= e && *locinput != c1)
2965 locinput++;
2966 } else {
2967 while (locinput <= e
2968 && *locinput != c1
2969 && *locinput != c2)
2970 locinput++;
2971 }
2972 if (locinput > e)
2973 sayNO;
2974 /* PL_reginput == old now */
2975 if (locinput != old) {
2976 ln = 1; /* Did some */
2977 if (regrepeat(scan, locinput - old) <
2978 locinput - old)
2979 sayNO;
2980 }
2981 /* PL_reginput == locinput now */
2982 if (paren) {
2983 if (ln) {
cf93c79d 2984 PL_regstartp[paren] = HOPc(locinput, -1) - PL_bostr;
2985 PL_regendp[paren] = locinput - PL_bostr;
0fe9bf95 2986 }
2987 else
cf93c79d 2988 PL_regendp[paren] = -1;
0fe9bf95 2989 }
2990 if (regmatch(next))
2991 sayYES;
2992 PL_reginput = locinput; /* Could be reset... */
2993 REGCP_UNWIND;
2994 /* Couldn't or didn't -- move forward. */
2995 old = locinput++;
2996 }
2997 }
2998 else
c277df42 2999 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
a0d0e21e 3000 /* If it could work, try it. */
bbce6d69 3001 if (c1 == -1000 ||
3280af22 3002 UCHARAT(PL_reginput) == c1 ||
3003 UCHARAT(PL_reginput) == c2)
bbce6d69 3004 {
c277df42 3005 if (paren) {
3006 if (n) {
cf93c79d 3007 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
3008 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3 3009 }
3010 else
cf93c79d 3011 PL_regendp[paren] = -1;
c277df42 3012 }
a0d0e21e 3013 if (regmatch(next))
4633a7c4 3014 sayYES;
c277df42 3015 REGCP_UNWIND;
bbce6d69 3016 }
c277df42 3017 /* Couldn't or didn't -- move forward. */
a0ed51b3 3018 PL_reginput = locinput;
a0d0e21e 3019 if (regrepeat(scan, 1)) {
3020 ln++;
a0ed51b3 3021 locinput = PL_reginput;
3022 }
3023 else
4633a7c4 3024 sayNO;
a0d0e21e 3025 }
3026 }
3027 else {
c277df42 3028 CHECKPOINT lastcp;
a0d0e21e 3029 n = regrepeat(scan, n);
a0ed51b3 3030 locinput = PL_reginput;
22c35a8c 3031 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
3280af22 3032 (!PL_multiline || OP(next) == SEOL))
a0d0e21e 3033 ln = n; /* why back off? */
c277df42 3034 REGCP_SET;
3035 if (paren) {
3036 while (n >= ln) {
3037 /* If it could work, try it. */
3038 if (c1 == -1000 ||
3280af22 3039 UCHARAT(PL_reginput) == c1 ||
3040 UCHARAT(PL_reginput) == c2)
c277df42 3041 {
3042 if (paren && n) {
3043 if (n) {
cf93c79d 3044 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
3045 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3 3046 }
3047 else
cf93c79d 3048 PL_regendp[paren] = -1;
c277df42 3049 }
3050 if (regmatch(next))
3051 sayYES;
3052 REGCP_UNWIND;
3053 }
3054 /* Couldn't or didn't -- back up. */
3055 n--;
dfe13c55 3056 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3057 }
a0ed51b3 3058 }
3059 else {
c277df42 3060 while (n >= ln) {
3061 /* If it could work, try it. */
3062 if (c1 == -1000 ||
3280af22 3063 UCHARAT(PL_reginput) == c1 ||
3064 UCHARAT(PL_reginput) == c2)
c277df42 3065 {
3066 if (regmatch(next))
3067 sayYES;
3068 REGCP_UNWIND;
3069 }
3070 /* Couldn't or didn't -- back up. */
3071 n--;
dfe13c55 3072 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3073 }
a0d0e21e 3074 }
3075 }
4633a7c4 3076 sayNO;
c277df42 3077 break;
a0d0e21e 3078 case END:
0f5d15d6 3079 if (PL_reg_call_cc) {
3080 re_cc_state *cur_call_cc = PL_reg_call_cc;
3081 CURCUR *cctmp = PL_regcc;
3082 regexp *re = PL_reg_re;
3083 CHECKPOINT cp, lastcp;
3084
3085 cp = regcppush(0); /* Save *all* the positions. */
3086 REGCP_SET;
3087 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3088 the caller. */
3089 PL_reginput = locinput; /* Make position available to
3090 the callcc. */
3091 cache_re(PL_reg_call_cc->re);
3092 PL_regcc = PL_reg_call_cc->cc;
3093 PL_reg_call_cc = PL_reg_call_cc->prev;
3094 if (regmatch(cur_call_cc->node)) {
3095 PL_reg_call_cc = cur_call_cc;
3096 regcpblow(cp);
3097 sayYES;
3098 }
3099 REGCP_UNWIND;
3100 regcppop();
3101 PL_reg_call_cc = cur_call_cc;
3102 PL_regcc = cctmp;
3103 PL_reg_re = re;
3104 cache_re(re);
3105
3106 DEBUG_r(
3107 PerlIO_printf(Perl_debug_log,
3108 "%*s continuation failed...\n",
3109 REPORT_CODE_OFF+PL_regindent*2, "")
3110 );
7821416a 3111 sayNO_SILENT;
0f5d15d6 3112 }
7821416a 3113 if (locinput < PL_regtill) {
3114 DEBUG_r(PerlIO_printf(Perl_debug_log,
3115 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3116 PL_colors[4],
3117 (long)(locinput - PL_reg_starttry),
3118 (long)(PL_regtill - PL_reg_starttry),
3119 PL_colors[5]));
3120 sayNO_FINAL; /* Cannot match: too short. */
3121 }
3122 PL_reginput = locinput; /* put where regtry can find it */
3123 sayYES_FINAL; /* Success! */
7e5428c5 3124 case SUCCEED:
3280af22 3125 PL_reginput = locinput; /* put where regtry can find it */
7821416a 3126 sayYES_LOUD; /* Success! */
c277df42 3127 case SUSPEND:
3128 n = 1;
9fe1d20c 3129 PL_reginput = locinput;
c277df42 3130 goto do_ifmatch;
a0d0e21e 3131 case UNLESSM:
c277df42 3132 n = 0;
a0ed51b3 3133 if (scan->flags) {
0fe9bf95 3134 if (UTF) { /* XXXX This is absolutely
3135 broken, we read before
3136 start of string. */
3137 s = HOPMAYBEc(locinput, -scan->flags);
3138 if (!s)
3139 goto say_yes;
3140 PL_reginput = s;
3141 }
3142 else {
3143 if (locinput < PL_bostr + scan->flags)
3144 goto say_yes;
3145 PL_reginput = locinput - scan->flags;
3146 goto do_ifmatch;
3147 }
a0ed51b3 3148 }
3149 else
3150 PL_reginput = locinput;
c277df42 3151 goto do_ifmatch;
3152 case IFMATCH:
3153 n = 1;
a0ed51b3 3154 if (scan->flags) {
0fe9bf95 3155 if (UTF) { /* XXXX This is absolutely
3156 broken, we read before
3157 start of string. */
3158 s = HOPMAYBEc(locinput, -scan->flags);
3159 if (!s || s < PL_bostr)
3160 goto say_no;
3161 PL_reginput = s;
3162 }
3163 else {
3164 if (locinput < PL_bostr + scan->flags)
3165 goto say_no;
3166 PL_reginput = locinput - scan->flags;
3167 goto do_ifmatch;
3168 }
a0ed51b3 3169 }
3170 else
3171 PL_reginput = locinput;
3172
c277df42 3173 do_ifmatch:
c277df42 3174 inner = NEXTOPER(NEXTOPER(scan));
3175 if (regmatch(inner) != n) {
3176 say_no:
3177 if (logical) {
3178 logical = 0;
3179 sw = 0;
3180 goto do_longjump;
a0ed51b3 3181 }
3182 else
c277df42 3183 sayNO;
3184 }
3185 say_yes:
3186 if (logical) {
3187 logical = 0;
3188 sw = 1;
3189 }
fe44a5e8 3190 if (OP(scan) == SUSPEND) {
3280af22 3191 locinput = PL_reginput;
565764a8 3192 nextchr = UCHARAT(locinput);
fe44a5e8 3193 }
c277df42 3194 /* FALL THROUGH. */
3195 case LONGJMP:
3196 do_longjump:
3197 next = scan + ARG(scan);
3198 if (next == scan)
3199 next = NULL;
a0d0e21e 3200 break;
3201 default:
b900a521 3202 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 3203 PTR2UV(scan), OP(scan));
cea2e8a9 3204 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3205 }
a0d0e21e 3206 scan = next;
3207 }
a687059c 3208
a0d0e21e 3209 /*
3210 * We get here only if there's trouble -- normally "case END" is
3211 * the terminating point.
3212 */
cea2e8a9 3213 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3214 /*NOTREACHED*/
4633a7c4 3215 sayNO;
3216
7821416a 3217yes_loud:
3218 DEBUG_r(
3219 PerlIO_printf(Perl_debug_log,
3220 "%*s %scould match...%s\n",
3221 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3222 );
3223 goto yes;
3224yes_final:
3225 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3226 PL_colors[4],PL_colors[5]));
4633a7c4 3227yes:
3228#ifdef DEBUGGING
3280af22 3229 PL_regindent--;
4633a7c4 3230#endif
3231 return 1;
3232
3233no:
7821416a 3234 DEBUG_r(
3235 PerlIO_printf(Perl_debug_log,
3236 "%*s %sfailed...%s\n",
3237 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3238 );
3239 goto do_no;
3240no_final:
3241do_no:
4633a7c4 3242#ifdef DEBUGGING
3280af22 3243 PL_regindent--;
4633a7c4 3244#endif
a0d0e21e 3245 return 0;
a687059c 3246}
3247
3248/*
3249 - regrepeat - repeatedly match something simple, report how many
3250 */
3251/*
3252 * [This routine now assumes that it will only match on things of length 1.
3253 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3254 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3255 */
76e3520e 3256STATIC I32
cea2e8a9 3257S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3258{
5c0ca799 3259 dTHR;
a0d0e21e 3260 register char *scan;
a0d0e21e 3261 register I32 c;
3280af22 3262 register char *loceol = PL_regeol;
a0ed51b3 3263 register I32 hardcount = 0;
a0d0e21e 3264
3280af22 3265 scan = PL_reginput;
c277df42 3266 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3267 loceol = scan + max;
a0d0e21e 3268 switch (OP(p)) {
22c35a8c 3269 case REG_ANY:
a0d0e21e 3270 while (scan < loceol && *scan != '\n')
3271 scan++;
3272 break;
3273 case SANY:
3274 scan = loceol;
3275 break;
a0ed51b3 3276 case ANYUTF8:
3277 loceol = PL_regeol;
3278 while (scan < loceol && *scan != '\n') {
3279 scan += UTF8SKIP(scan);
3280 hardcount++;
3281 }
3282 break;
3283 case SANYUTF8:
3284 loceol = PL_regeol;
3285 while (scan < loceol) {
3286 scan += UTF8SKIP(scan);
3287 hardcount++;
3288 }
3289 break;
bbce6d69 3290 case EXACT: /* length of string is 1 */
cd439c50 3291 c = (U8)*STRING(p);
bbce6d69 3292 while (scan < loceol && UCHARAT(scan) == c)
3293 scan++;
3294 break;
3295 case EXACTF: /* length of string is 1 */
cd439c50 3296 c = (U8)*STRING(p);
bbce6d69 3297 while (scan < loceol &&
22c35a8c 3298 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 3299 scan++;
3300 break;
3301 case EXACTFL: /* length of string is 1 */
3280af22 3302 PL_reg_flags |= RF_tainted;
cd439c50 3303 c = (U8)*STRING(p);
bbce6d69 3304 while (scan < loceol &&
22c35a8c 3305 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e 3306 scan++;
3307 break;
a0ed51b3 3308 case ANYOFUTF8:
3309 loceol = PL_regeol;
3310 while (scan < loceol && REGINCLASSUTF8(p, (U8*)scan)) {
3311 scan += UTF8SKIP(scan);
3312 hardcount++;
3313 }
3314 break;
a0d0e21e 3315 case ANYOF:
936ed897 3316 while (scan < loceol && REGINCLASS(p, *scan))
a0d0e21e 3317 scan++;
a0d0e21e 3318 break;
3319 case ALNUM:
3320 while (scan < loceol && isALNUM(*scan))
3321 scan++;
3322 break;
a0ed51b3 3323 case ALNUMUTF8:
3324 loceol = PL_regeol;
dfe13c55 3325 while (scan < loceol && swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3 3326 scan += UTF8SKIP(scan);
3327 hardcount++;
3328 }
3329 break;
bbce6d69 3330 case ALNUML:
3280af22 3331 PL_reg_flags |= RF_tainted;
bbce6d69 3332 while (scan < loceol && isALNUM_LC(*scan))
3333 scan++;
3334 break;
a0ed51b3 3335 case ALNUMLUTF8:
3336 PL_reg_flags |= RF_tainted;
3337 loceol = PL_regeol;
dfe13c55 3338 while (scan < loceol && isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3 3339 scan += UTF8SKIP(scan);
3340 hardcount++;
3341 }
3342 break;
3343 break;
a0d0e21e 3344 case NALNUM:
3345 while (scan < loceol && !isALNUM(*scan))
3346 scan++;
3347 break;
a0ed51b3 3348 case NALNUMUTF8:
3349 loceol = PL_regeol;
dfe13c55 3350 while (scan < loceol && !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3 3351 scan += UTF8SKIP(scan);
3352 hardcount++;
3353 }
3354 break;
bbce6d69 3355 case NALNUML:
3280af22 3356 PL_reg_flags |= RF_tainted;
bbce6d69 3357 while (scan < loceol && !isALNUM_LC(*scan))
3358 scan++;
3359 break;
a0ed51b3 3360 case NALNUMLUTF8:
3361 PL_reg_flags |= RF_tainted;
3362 loceol = PL_regeol;
dfe13c55 3363 while (scan < loceol && !isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3 3364 scan += UTF8SKIP(scan);
3365 hardcount++;
3366 }
3367 break;
a0d0e21e 3368 case SPACE:
3369 while (scan < loceol && isSPACE(*scan))
3370 scan++;
3371 break;
a0ed51b3 3372 case SPACEUTF8:
3373 loceol = PL_regeol;
dfe13c55 3374 while (scan < loceol && (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3 3375 scan += UTF8SKIP(scan);
3376 hardcount++;
3377 }
3378 break;
bbce6d69 3379 case SPACEL:
3280af22 3380 PL_reg_flags |= RF_tainted;
bbce6d69 3381 while (scan < loceol && isSPACE_LC(*scan))
3382 scan++;
3383 break;
a0ed51b3 3384 case SPACELUTF8:
3385 PL_reg_flags |= RF_tainted;
3386 loceol = PL_regeol;
dfe13c55 3387 while (scan < loceol && (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3 3388 scan += UTF8SKIP(scan);
3389 hardcount++;
3390 }
3391 break;
a0d0e21e 3392 case NSPACE:
3393 while (scan < loceol && !isSPACE(*scan))
3394 scan++;
3395 break;
a0ed51b3 3396 case NSPACEUTF8:
3397 loceol = PL_regeol;
dfe13c55 3398 while (scan < loceol && !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3 3399 scan += UTF8SKIP(scan);
3400 hardcount++;
3401 }
3402 break;
bbce6d69 3403 case NSPACEL:
3280af22 3404 PL_reg_flags |= RF_tainted;
bbce6d69 3405 while (scan < loceol && !isSPACE_LC(*scan))
3406 scan++;
3407 break;
a0ed51b3 3408 case NSPACELUTF8:
3409 PL_reg_flags |= RF_tainted;
3410 loceol = PL_regeol;
dfe13c55 3411 while (scan < loceol && !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3 3412 scan += UTF8SKIP(scan);
3413 hardcount++;
3414 }
3415 break;
a0d0e21e 3416 case DIGIT:
3417 while (scan < loceol && isDIGIT(*scan))
3418 scan++;
3419 break;
a0ed51b3 3420 case DIGITUTF8:
3421 loceol = PL_regeol;
dfe13c55 3422 while (scan < loceol && swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3 3423 scan += UTF8SKIP(scan);
3424 hardcount++;
3425 }
3426 break;
3427 break;
a0d0e21e 3428 case NDIGIT:
3429 while (scan < loceol && !isDIGIT(*scan))
3430 scan++;
3431 break;
a0ed51b3 3432 case NDIGITUTF8:
3433 loceol = PL_regeol;
dfe13c55 3434 while (scan < loceol && !swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3 3435 scan += UTF8SKIP(scan);
3436 hardcount++;
3437 }
3438 break;
a0d0e21e 3439 default: /* Called on something of 0 width. */
3440 break; /* So match right here or not at all. */
3441 }
a687059c 3442
a0ed51b3 3443 if (hardcount)
3444 c = hardcount;
3445 else
3446 c = scan - PL_reginput;
3280af22 3447 PL_reginput = scan;
a687059c 3448
c277df42 3449 DEBUG_r(
3450 {
3451 SV *prop = sv_newmortal();
3452
3453 regprop(prop, p);
3454 PerlIO_printf(Perl_debug_log,
7b0972df 3455 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
3456 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
c277df42 3457 });
3458
a0d0e21e 3459 return(c);
a687059c 3460}
3461
3462/*
c277df42 3463 - regrepeat_hard - repeatedly match something, report total lenth and length
3464 *
3465 * The repeater is supposed to have constant length.
3466 */
3467
76e3520e 3468STATIC I32
cea2e8a9 3469S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42 3470{
5c0ca799 3471 dTHR;
c277df42 3472 register char *scan;
3473 register char *start;
3280af22 3474 register char *loceol = PL_regeol;
a0ed51b3 3475 I32 l = 0;
708e3b05 3476 I32 count = 0, res = 1;
a0ed51b3 3477
3478 if (!max)
3479 return 0;
c277df42 3480
3280af22 3481 start = PL_reginput;
a0ed51b3 3482 if (UTF) {
708e3b05 3483 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3 3484 if (!count++) {
3485 l = 0;
3486 while (start < PL_reginput) {
3487 l++;
3488 start += UTF8SKIP(start);
3489 }
3490 *lp = l;
3491 if (l == 0)
3492 return max;
3493 }
3494 if (count == max)
3495 return count;
3496 }
3497 }
3498 else {
708e3b05 3499 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3 3500 if (!count++) {
3501 *lp = l = PL_reginput - start;
3502 if (max != REG_INFTY && l*max < loceol - scan)
3503 loceol = scan + l*max;
3504 if (l == 0)
3505 return max;
c277df42 3506 }
3507 }
3508 }
708e3b05 3509 if (!res)
3280af22 3510 PL_reginput = scan;
c277df42 3511
a0ed51b3 3512 return count;
c277df42 3513}
3514
3515/*
cb8d8820 3516 - reginclass - determine if a character falls into a character class
bbce6d69 3517 */
3518
76e3520e 3519STATIC bool
936ed897 3520S_reginclass(pTHX_ register regnode *p, register I32 c)
bbce6d69 3521{
5c0ca799 3522 dTHR;
b8c5462f 3523 char flags = ANYOF_FLAGS(p);
bbce6d69 3524 bool match = FALSE;
3525
3526 c &= 0xFF;
b8c5462f 3527 if (ANYOF_BITMAP_TEST(p, c))
bbce6d69 3528 match = TRUE;
3529 else if (flags & ANYOF_FOLD) {
3530 I32 cf;
3531 if (flags & ANYOF_LOCALE) {
3280af22 3532 PL_reg_flags |= RF_tainted;
22c35a8c 3533 cf = PL_fold_locale[c];
bbce6d69 3534 }
3535 else
22c35a8c 3536 cf = PL_fold[c];
b8c5462f 3537 if (ANYOF_BITMAP_TEST(p, cf))
bbce6d69 3538 match = TRUE;
3539 }
3540
b8c5462f 3541 if (!match && (flags & ANYOF_CLASS)) {
3280af22 3542 PL_reg_flags |= RF_tainted;
b8c5462f 3543 if (
3544 (ANYOF_CLASS_TEST(p, ANYOF_ALNUM) && isALNUM_LC(c)) ||
3545 (ANYOF_CLASS_TEST(p, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
3546 (ANYOF_CLASS_TEST(p, ANYOF_SPACE) && isSPACE_LC(c)) ||
3547 (ANYOF_CLASS_TEST(p, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
3548 (ANYOF_CLASS_TEST(p, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
3549 (ANYOF_CLASS_TEST(p, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
3550 (ANYOF_CLASS_TEST(p, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
3551 (ANYOF_CLASS_TEST(p, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
3552 (ANYOF_CLASS_TEST(p, ANYOF_ALPHA) && isALPHA_LC(c)) ||
3553 (ANYOF_CLASS_TEST(p, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
3554 (ANYOF_CLASS_TEST(p, ANYOF_ASCII) && isASCII(c)) ||
3555 (ANYOF_CLASS_TEST(p, ANYOF_NASCII) && !isASCII(c)) ||
3556 (ANYOF_CLASS_TEST(p, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
3557 (ANYOF_CLASS_TEST(p, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
3558 (ANYOF_CLASS_TEST(p, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
3559 (ANYOF_CLASS_TEST(p, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
3560 (ANYOF_CLASS_TEST(p, ANYOF_LOWER) && isLOWER_LC(c)) ||
3561 (ANYOF_CLASS_TEST(p, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
3562 (ANYOF_CLASS_TEST(p, ANYOF_PRINT) && isPRINT_LC(c)) ||
3563 (ANYOF_CLASS_TEST(p, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
3564 (ANYOF_CLASS_TEST(p, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
3565 (ANYOF_CLASS_TEST(p, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
3566 (ANYOF_CLASS_TEST(p, ANYOF_UPPER) && isUPPER_LC(c)) ||
3567 (ANYOF_CLASS_TEST(p, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
3568 (ANYOF_CLASS_TEST(p, ANYOF_XDIGIT) && isXDIGIT(c)) ||
3569 (ANYOF_CLASS_TEST(p, ANYOF_NXDIGIT) && !isXDIGIT(c))
3570 ) /* How's that for a conditional? */
bbce6d69 3571 {
3572 match = TRUE;
3573 }
3574 }
3575
ae5c130c 3576 return (flags & ANYOF_INVERT) ? !match : match;
bbce6d69 3577}
3578
a0ed51b3 3579STATIC bool
cea2e8a9 3580S_reginclassutf8(pTHX_ regnode *f, U8 *p)
c485e607 3581{
3582 dTHR;
a0ed51b3 3583 char flags = ARG1(f);
3584 bool match = FALSE;
3585 SV *sv = (SV*)PL_regdata->data[ARG2(f)];
3586
3587 if (swash_fetch(sv, p))
3588 match = TRUE;
3589 else if (flags & ANYOF_FOLD) {
3590 I32 cf;
dfe13c55 3591 U8 tmpbuf[10];
a0ed51b3 3592 if (flags & ANYOF_LOCALE) {
3593 PL_reg_flags |= RF_tainted;
3594 uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
3595 }
3596 else
3597 uv_to_utf8(tmpbuf, toLOWER_utf8(p));
3598 if (swash_fetch(sv, tmpbuf))
3599 match = TRUE;
3600 }
3601
b8c5462f 3602 /* UTF8 combined with ANYOF_CLASS is ill-defined. */
a0ed51b3 3603
3604 return (flags & ANYOF_INVERT) ? !match : match;
3605}
161b471a 3606
dfe13c55 3607STATIC U8 *
cea2e8a9 3608S_reghop(pTHX_ U8 *s, I32 off)
c485e607 3609{
3610 dTHR;
a0ed51b3 3611 if (off >= 0) {
3612 while (off-- && s < (U8*)PL_regeol)
3613 s += UTF8SKIP(s);
3614 }
3615 else {
3616 while (off++) {
3617 if (s > (U8*)PL_bostr) {
3618 s--;
3619 if (*s & 0x80) {
3620 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3621 s--;
3622 } /* XXX could check well-formedness here */
3623 }
3624 }
3625 }
3626 return s;
3627}
161b471a 3628
dfe13c55 3629STATIC U8 *
cea2e8a9 3630S_reghopmaybe(pTHX_ U8* s, I32 off)
a0ed51b3 3631{
c485e607 3632 dTHR;
a0ed51b3 3633 if (off >= 0) {
3634 while (off-- && s < (U8*)PL_regeol)
3635 s += UTF8SKIP(s);
3636 if (off >= 0)
3637 return 0;
3638 }
3639 else {
3640 while (off++) {
3641 if (s > (U8*)PL_bostr) {
3642 s--;
3643 if (*s & 0x80) {
3644 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3645 s--;
3646 } /* XXX could check well-formedness here */
3647 }
3648 else
3649 break;
3650 }
3651 if (off <= 0)
3652 return 0;
3653 }
3654 return s;
3655}
51371543 3656
3657#ifdef PERL_OBJECT
51371543 3658#include "XSUB.h"
3659#endif
3660
3661static void
3662restore_pos(pTHXo_ void *arg)
3663{
3664 dTHR;
3665 if (PL_reg_eval_set) {
3666 if (PL_reg_oldsaved) {
3667 PL_reg_re->subbeg = PL_reg_oldsaved;
3668 PL_reg_re->sublen = PL_reg_oldsavedlen;
3669 RX_MATCH_COPIED_on(PL_reg_re);
3670 }
3671 PL_reg_magic->mg_len = PL_reg_oldpos;
3672 PL_reg_eval_set = 0;
3673 PL_curpm = PL_reg_oldcurpm;
3674 }
3675}
3676