add missing file from change#1943
[p5sagit/p5-mst-13.2.git] / regexec.c
CommitLineData
a0d0e21e 1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c 8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73 17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e 22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
28# ifndef DEBUGGING
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603 35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
d06ea78c 38/* *These* symbols are masked to allow static link. */
39# define Perl_pregexec my_pregexec
56953603 40#endif
41
f0fcb552 42/*SUPPRESS 112*/
a687059c 43/*
e50aee73 44 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c 45 *
46 * Copyright (c) 1986 by University of Toronto.
47 * Written by Henry Spencer. Not derived from licensed software.
48 *
49 * Permission is granted to anyone to use this software for any
50 * purpose on any computer system, and to redistribute it freely,
51 * subject to the following restrictions:
52 *
53 * 1. The author is not responsible for the consequences of use of
54 * this software, no matter how awful, even if they arise
55 * from defects in it.
56 *
57 * 2. The origin of this software must not be misrepresented, either
58 * by explicit claim or by omission.
59 *
60 * 3. Altered versions must be plainly marked as such, and must not
61 * be misrepresented as being the original software.
62 *
63 **** Alterations to Henry's code are...
64 ****
a0ed51b3 65 **** Copyright (c) 1991-1998, Larry Wall
a687059c 66 ****
9ef589d8 67 **** You may distribute under the terms of either the GNU General Public
68 **** License or the Artistic License, as specified in the README file.
a687059c 69 *
70 * Beware that some of this code is subtly aware of the way operator
71 * precedence is structured in regular expressions. Serious changes in
72 * regular-expression syntax might require a total rethink.
73 */
74#include "EXTERN.h"
75#include "perl.h"
0f5d15d6 76typedef MAGIC *my_magic;
77
a687059c 78#include "regcomp.h"
79
c277df42 80#define RF_tainted 1 /* tainted information used? */
81#define RF_warned 2 /* warned about big count? */
ce862d02 82#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3 83#define RF_utf8 8 /* String contains multibyte chars? */
84
85#define UTF (PL_reg_flags & RF_utf8)
ce862d02 86
87#define RS_init 1 /* eval environment created */
88#define RS_set 2 /* replsv value is set */
c277df42 89
a687059c 90#ifndef STATIC
91#define STATIC static
92#endif
93
76e3520e 94#ifndef PERL_OBJECT
a0d0e21e 95typedef I32 CHECKPOINT;
96
c277df42 97/*
98 * Forwards.
99 */
100
101static I32 regmatch _((regnode *prog));
102static I32 regrepeat _((regnode *p, I32 max));
103static I32 regrepeat_hard _((regnode *p, I32 max, I32 *lp));
104static I32 regtry _((regexp *prog, char *startpos));
ae5c130c 105
c277df42 106static bool reginclass _((char *p, I32 c));
a0ed51b3 107static bool reginclassutf8 _((regnode *f, U8* p));
55497cff 108static CHECKPOINT regcppush _((I32 parenfloor));
109static char * regcppop _((void));
942e002e 110static char * regcp_set_to _((I32 ss));
111static void cache_re _((regexp *prog));
76e3520e 112#endif
a0ed51b3 113
ae5c130c 114#define REGINCLASS(p,c) (*(p) ? reginclass(p,c) : ANYOF_TEST(p,c))
a0ed51b3 115#define REGINCLASSUTF8(f,p) (ARG1(f) ? reginclassutf8(f,p) : swash_fetch((SV*)PL_regdata->data[ARG2(f)],p))
116
117#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
118#define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
119
dfe13c55 120#ifndef PERL_OBJECT
121static U8 * reghop _((U8 *pos, I32 off));
122static U8 * reghopmaybe _((U8 *pos, I32 off));
123#endif
124#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
125#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
126#define HOP(pos,off) (UTF ? reghop((U8*)pos, off) : (U8*)(pos + off))
127#define HOPMAYBE(pos,off) (UTF ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
128#define HOPc(pos,off) ((char*)HOP(pos,off))
129#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 130
76e3520e 131STATIC CHECKPOINT
8ac85365 132regcppush(I32 parenfloor)
a0d0e21e 133{
11343788 134 dTHR;
3280af22 135 int retval = PL_savestack_ix;
136 int i = (PL_regsize - parenfloor) * 4;
a0d0e21e 137 int p;
138
139 SSCHECK(i + 5);
3280af22 140 for (p = PL_regsize; p > parenfloor; p--) {
141 SSPUSHPTR(PL_regendp[p]);
142 SSPUSHPTR(PL_regstartp[p]);
143 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e 144 SSPUSHINT(p);
145 }
3280af22 146 SSPUSHINT(PL_regsize);
147 SSPUSHINT(*PL_reglastparen);
148 SSPUSHPTR(PL_reginput);
a0d0e21e 149 SSPUSHINT(i + 3);
150 SSPUSHINT(SAVEt_REGCONTEXT);
151 return retval;
152}
153
c277df42 154/* These are needed since we do not localize EVAL nodes: */
c3464db5 155# define REGCP_SET DEBUG_r(PerlIO_printf(Perl_debug_log, \
156 " Setting an EVAL scope, savestack=%i\n", \
3280af22 157 PL_savestack_ix)); lastcp = PL_savestack_ix
c3464db5 158
3280af22 159# define REGCP_UNWIND DEBUG_r(lastcp != PL_savestack_ix ? \
c3464db5 160 PerlIO_printf(Perl_debug_log, \
161 " Clearing an EVAL scope, savestack=%i..%i\n", \
3280af22 162 lastcp, PL_savestack_ix) : 0); regcpblow(lastcp)
c277df42 163
76e3520e 164STATIC char *
8ac85365 165regcppop(void)
a0d0e21e 166{
11343788 167 dTHR;
a0d0e21e 168 I32 i = SSPOPINT;
169 U32 paren = 0;
170 char *input;
171 char *tmps;
172 assert(i == SAVEt_REGCONTEXT);
173 i = SSPOPINT;
174 input = (char *) SSPOPPTR;
3280af22 175 *PL_reglastparen = SSPOPINT;
176 PL_regsize = SSPOPINT;
c277df42 177 for (i -= 3; i > 0; i -= 4) {
a0d0e21e 178 paren = (U32)SSPOPINT;
3280af22 179 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
180 PL_regstartp[paren] = (char *) SSPOPPTR;
a0d0e21e 181 tmps = (char*)SSPOPPTR;
3280af22 182 if (paren <= *PL_reglastparen)
183 PL_regendp[paren] = tmps;
c277df42 184 DEBUG_r(
c3464db5 185 PerlIO_printf(Perl_debug_log,
186 " restoring \\%d to %d(%d)..%d%s\n",
3280af22 187 paren, PL_regstartp[paren] - PL_regbol,
188 PL_reg_start_tmp[paren] - PL_regbol,
189 PL_regendp[paren] - PL_regbol,
190 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 191 );
a0d0e21e 192 }
c277df42 193 DEBUG_r(
3280af22 194 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 195 PerlIO_printf(Perl_debug_log,
196 " restoring \\%d..\\%d to undef\n",
3280af22 197 *PL_reglastparen + 1, PL_regnpar);
c277df42 198 }
199 );
3280af22 200 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
201 if (paren > PL_regsize)
202 PL_regstartp[paren] = Nullch;
203 PL_regendp[paren] = Nullch;
a0d0e21e 204 }
205 return input;
206}
207
0f5d15d6 208STATIC char *
209regcp_set_to(I32 ss)
210{
46124e9e 211 dTHR;
0f5d15d6 212 I32 tmp = PL_savestack_ix;
213
214 PL_savestack_ix = ss;
215 regcppop();
216 PL_savestack_ix = tmp;
942e002e 217 return Nullch;
0f5d15d6 218}
219
220typedef struct re_cc_state
221{
222 I32 ss;
223 regnode *node;
224 struct re_cc_state *prev;
225 CURCUR *cc;
226 regexp *re;
227} re_cc_state;
228
c277df42 229#define regcpblow(cp) LEAVE_SCOPE(cp)
a0d0e21e 230
a687059c 231/*
e50aee73 232 * pregexec and friends
a687059c 233 */
234
235/*
c277df42 236 - pregexec - match a regexp against a string
a687059c 237 */
c277df42 238I32
c3464db5 239pregexec(register regexp *prog, char *stringarg, register char *strend,
240 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42 241/* strend: pointer to null at end of string */
242/* strbeg: real beginning of string */
243/* minend: end of match must be >=minend after stringarg. */
244/* nosave: For optimizations. */
245{
246 return
247 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
248 nosave ? 0 : REXEC_COPY_STR);
249}
0f5d15d6 250
251STATIC void
252cache_re(regexp *prog)
253{
46124e9e 254 dTHR;
0f5d15d6 255 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
256#ifdef DEBUGGING
257 PL_regprogram = prog->program;
258#endif
259 PL_regnpar = prog->nparens;
260 PL_regdata = prog->data;
261 PL_reg_re = prog;
262}
c277df42 263
a687059c 264/*
c277df42 265 - regexec_flags - match a regexp against a string
a687059c 266 */
79072805 267I32
c3464db5 268regexec_flags(register regexp *prog, char *stringarg, register char *strend,
269 char *strbeg, I32 minend, SV *screamer, void *data, U32 flags)
c277df42 270/* strend: pointer to null at end of string */
271/* strbeg: real beginning of string */
272/* minend: end of match must be >=minend after stringarg. */
273/* data: May be used for some additional optimizations. */
274/* nosave: For optimizations. */
a687059c 275{
5c0ca799 276 dTHR;
a0d0e21e 277 register char *s;
c277df42 278 register regnode *c;
a0d0e21e 279 register char *startpos = stringarg;
280 register I32 tmp;
c277df42 281 I32 minlen; /* must match at least this many chars */
a0d0e21e 282 I32 dontbother = 0; /* how many characters not to try at end */
283 CURCUR cc;
c277df42 284 I32 start_shift = 0; /* Offset of the start to find
a0ed51b3 285 constant substr. */ /* CC */
286 I32 end_shift = 0; /* Same for the end. */ /* CC */
c277df42 287 I32 scream_pos = -1; /* Internal iterator of scream. */
288 char *scream_olds;
3280af22 289 SV* oreplsv = GvSV(PL_replgv);
a687059c 290
a0d0e21e 291 cc.cur = 0;
4633a7c4 292 cc.oldcc = 0;
3280af22 293 PL_regcc = &cc;
a0d0e21e 294
0f5d15d6 295 cache_re(prog);
a0d0e21e 296#ifdef DEBUGGING
3280af22 297 PL_regnarrate = PL_debug & 512;
a0d0e21e 298#endif
299
300 /* Be paranoid... */
301 if (prog == NULL || startpos == NULL) {
302 croak("NULL regexp parameter");
303 return 0;
304 }
305
c277df42 306 minlen = prog->minlen;
307 if (strend - startpos < minlen) goto phooey;
308
a0d0e21e 309 if (startpos == strbeg) /* is ^ valid at stringarg? */
3280af22 310 PL_regprev = '\n';
a0d0e21e 311 else {
a0ed51b3 312 PL_regprev = (U32)stringarg[-1];
3280af22 313 if (!PL_multiline && PL_regprev == '\n')
314 PL_regprev = '\0'; /* force ^ to NOT match */
a0d0e21e 315 }
bbce6d69 316
a0d0e21e 317 /* Check validity of program. */
318 if (UCHARAT(prog->program) != MAGIC) {
319 FAIL("corrupted regexp program");
320 }
321
3280af22 322 PL_reg_flags = 0;
323 PL_reg_eval_set = 0;
a0d0e21e 324
a0ed51b3 325 if (prog->reganch & ROPT_UTF8)
326 PL_reg_flags |= RF_utf8;
327
328 /* Mark beginning of line for ^ and lookbehind. */
329 PL_regbol = startpos;
330 PL_bostr = strbeg;
331
332 /* Mark end of line for $ (and such) */
333 PL_regeol = strend;
334
335 /* see how far we have to get to not match where we matched before */
336 PL_regtill = startpos+minend;
337
0f5d15d6 338 /* We start without call_cc context. */
339 PL_reg_call_cc = 0;
340
a0d0e21e 341 /* If there is a "must appear" string, look for it. */
342 s = startpos;
c277df42 343 if (!(flags & REXEC_CHECKED)
344 && prog->check_substr != Nullsv &&
774d564b 345 !(prog->reganch & ROPT_ANCH_GPOS) &&
c277df42 346 (!(prog->reganch & (ROPT_ANCH_BOL | ROPT_ANCH_MBOL))
3280af22 347 || (PL_multiline && prog->check_substr == prog->anchored_substr)) )
a0d0e21e 348 {
a0ed51b3 349 char *t;
350 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
c277df42 351 /* Should be nonnegative! */
a0ed51b3 352 end_shift = minlen - start_shift - CHR_SVLEN(prog->check_substr);
c277df42 353 if (screamer) {
3280af22 354 if (PL_screamfirst[BmRARE(prog->check_substr)] >= 0)
c277df42 355 s = screaminstr(screamer, prog->check_substr,
356 start_shift + (stringarg - strbeg),
357 end_shift, &scream_pos, 0);
a0d0e21e 358 else
359 s = Nullch;
c277df42 360 scream_olds = s;
0a12ae7d 361 }
a0d0e21e 362 else
c277df42 363 s = fbm_instr((unsigned char*)s + start_shift,
364 (unsigned char*)strend - end_shift,
411d5715 365 prog->check_substr, 0);
a0d0e21e 366 if (!s) {
c277df42 367 ++BmUSEFUL(prog->check_substr); /* hooray */
a0d0e21e 368 goto phooey; /* not present */
a0ed51b3 369 }
370 else if (s - stringarg > prog->check_offset_max &&
371 (UTF
dfe13c55 372 ? ((t = reghopmaybe_c(s, -(prog->check_offset_max))) && t >= stringarg)
a0ed51b3 373 : (t = s - prog->check_offset_max) != 0
374 )
375 )
376 {
c277df42 377 ++BmUSEFUL(prog->check_substr); /* hooray/2 */
a0ed51b3 378 s = t;
379 }
380 else if (!(prog->reganch & ROPT_NAUGHTY)
c277df42 381 && --BmUSEFUL(prog->check_substr) < 0
382 && prog->check_substr == prog->float_substr) { /* boo */
383 SvREFCNT_dec(prog->check_substr);
384 prog->check_substr = Nullsv; /* disable */
385 prog->float_substr = Nullsv; /* clear */
a0d0e21e 386 s = startpos;
a0ed51b3 387 }
388 else
389 s = startpos;
a0d0e21e 390 }
a687059c 391
c277df42 392 DEBUG_r(
393 PerlIO_printf(Perl_debug_log,
8d300b32 394 "%sMatching%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
395 PL_colors[4],PL_colors[5],PL_colors[0],
396 prog->precomp,
397 PL_colors[1],
c277df42 398 (strlen(prog->precomp) > 60 ? "..." : ""),
8d300b32 399 PL_colors[0],
c277df42 400 (strend - startpos > 60 ? 60 : strend - startpos),
8d300b32 401 startpos, PL_colors[1],
c277df42 402 (strend - startpos > 60 ? "..." : ""))
403 );
404
a0d0e21e 405 /* Simplest case: anchored match need be tried only once. */
774d564b 406 /* [unless only anchor is BOL and multiline is set] */
a0d0e21e 407 if (prog->reganch & ROPT_ANCH) {
408 if (regtry(prog, startpos))
409 goto got_it;
774d564b 410 else if (!(prog->reganch & ROPT_ANCH_GPOS) &&
3280af22 411 (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
c277df42 412 || (prog->reganch & ROPT_ANCH_MBOL)))
774d564b 413 {
a0d0e21e 414 if (minlen)
415 dontbother = minlen - 1;
dfe13c55 416 strend = HOPc(strend, -dontbother);
a0d0e21e 417 /* for multiline we only have to try after newlines */
418 if (s > startpos)
419 s--;
420 while (s < strend) {
a0ed51b3 421 if (*s++ == '\n') { /* don't need utf8skip here */
a0d0e21e 422 if (s < strend && regtry(prog, s))
423 goto got_it;
424 }
35c8bce7 425 }
35c8bce7 426 }
a0d0e21e 427 goto phooey;
428 }
35c8bce7 429
a0d0e21e 430 /* Messy cases: unanchored match. */
c277df42 431 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
432 /* we have /x+whatever/ */
433 /* it must be a one character string */
434 char ch = SvPVX(prog->anchored_substr)[0];
a0ed51b3 435 if (UTF) {
436 while (s < strend) {
437 if (*s == ch) {
438 if (regtry(prog, s)) goto got_it;
439 s += UTF8SKIP(s);
440 while (s < strend && *s == ch)
441 s += UTF8SKIP(s);
442 }
443 s += UTF8SKIP(s);
444 }
445 }
446 else {
447 while (s < strend) {
448 if (*s == ch) {
449 if (regtry(prog, s)) goto got_it;
c277df42 450 s++;
a0ed51b3 451 while (s < strend && *s == ch)
452 s++;
453 }
454 s++;
a0d0e21e 455 }
a687059c 456 }
c277df42 457 }
458 /*SUPPRESS 560*/
459 else if (prog->anchored_substr != Nullsv
460 || (prog->float_substr != Nullsv
461 && prog->float_max_offset < strend - s)) {
462 SV *must = prog->anchored_substr
463 ? prog->anchored_substr : prog->float_substr;
464 I32 back_max =
465 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
466 I32 back_min =
467 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
468 I32 delta = back_max - back_min;
dfe13c55 469 char *last = HOPc(strend, 0-(CHR_SVLEN(must) + back_min)); /* Cannot start after this */
a0ed51b3 470 char *last1; /* Last position checked before */
471
472 if (s > PL_bostr)
dfe13c55 473 last1 = HOPc(s, -1);
a0ed51b3 474 else
475 last1 = s - 1; /* bogus */
c277df42 476
477 /* XXXX check_substr already used to find `s', can optimize if
478 check_substr==must. */
479 scream_pos = -1;
480 dontbother = end_shift;
dfe13c55 481 strend = HOPc(strend, -dontbother);
c277df42 482 while ( (s <= last) &&
483 (screamer
dfe13c55 484 ? (s = screaminstr(screamer, must, HOPc(s, back_min) - strbeg,
c277df42 485 end_shift, &scream_pos, 0))
a0ed51b3 486 : (s = fbm_instr((unsigned char*)HOP(s, back_min),
411d5715 487 (unsigned char*)strend, must, 0))) ) {
dfe13c55 488 if (HOPc(s, -back_max) > last1) {
489 last1 = HOPc(s, -back_min);
490 s = HOPc(s, -back_max);
a0ed51b3 491 }
492 else {
dfe13c55 493 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
c277df42 494
dfe13c55 495 last1 = HOPc(s, -back_min);
c277df42 496 s = t;
a0d0e21e 497 }
a0ed51b3 498 if (UTF) {
499 while (s <= last1) {
500 if (regtry(prog, s))
501 goto got_it;
502 s += UTF8SKIP(s);
503 }
504 }
505 else {
506 while (s <= last1) {
507 if (regtry(prog, s))
508 goto got_it;
509 s++;
510 }
a0d0e21e 511 }
512 }
513 goto phooey;
a0ed51b3 514 }
515 else if (c = prog->regstclass) {
a0d0e21e 516 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
a0ed51b3 517 char *cc;
a687059c 518
a0d0e21e 519 if (minlen)
520 dontbother = minlen - 1;
dfe13c55 521 strend = HOPc(strend, -dontbother); /* don't bother with what can't match */
a0d0e21e 522 tmp = 1;
523 /* We know what class it must start with. */
524 switch (OP(c)) {
a0ed51b3 525 case ANYOFUTF8:
526 cc = (char *) OPERAND(c);
527 while (s < strend) {
528 if (REGINCLASSUTF8(c, (U8*)s)) {
529 if (tmp && regtry(prog, s))
530 goto got_it;
531 else
532 tmp = doevery;
533 }
534 else
535 tmp = 1;
536 s += UTF8SKIP(s);
537 }
538 break;
a0d0e21e 539 case ANYOF:
a0ed51b3 540 cc = (char *) OPERAND(c);
a0d0e21e 541 while (s < strend) {
a0ed51b3 542 if (REGINCLASS(cc, *s)) {
a0d0e21e 543 if (tmp && regtry(prog, s))
544 goto got_it;
545 else
546 tmp = doevery;
a687059c 547 }
a0d0e21e 548 else
549 tmp = 1;
550 s++;
551 }
552 break;
bbce6d69 553 case BOUNDL:
3280af22 554 PL_reg_flags |= RF_tainted;
bbce6d69 555 /* FALL THROUGH */
a0d0e21e 556 case BOUND:
a0ed51b3 557 if (minlen) {
558 dontbother++;
559 strend -= 1;
560 }
3280af22 561 tmp = (s != startpos) ? UCHARAT(s - 1) : PL_regprev;
95bac841 562 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 563 while (s < strend) {
95bac841 564 if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
a0d0e21e 565 tmp = !tmp;
566 if (regtry(prog, s))
567 goto got_it;
a687059c 568 }
a0d0e21e 569 s++;
570 }
571 if ((minlen || tmp) && regtry(prog,s))
572 goto got_it;
573 break;
a0ed51b3 574 case BOUNDLUTF8:
575 PL_reg_flags |= RF_tainted;
576 /* FALL THROUGH */
577 case BOUNDUTF8:
578 if (minlen) {
579 dontbother++;
dfe13c55 580 strend = reghop_c(strend, -1);
a0ed51b3 581 }
dfe13c55 582 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : PL_regprev;
a0ed51b3 583 tmp = ((OP(c) == BOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
584 while (s < strend) {
dfe13c55 585 if (tmp == !(OP(c) == BOUND ?
586 swash_fetch(PL_utf8_alnum, (U8*)s) :
587 isALNUM_LC_utf8((U8*)s)))
588 {
a0ed51b3 589 tmp = !tmp;
590 if (regtry(prog, s))
591 goto got_it;
592 }
593 s += UTF8SKIP(s);
594 }
595 if ((minlen || tmp) && regtry(prog,s))
596 goto got_it;
597 break;
bbce6d69 598 case NBOUNDL:
3280af22 599 PL_reg_flags |= RF_tainted;
bbce6d69 600 /* FALL THROUGH */
a0d0e21e 601 case NBOUND:
a0ed51b3 602 if (minlen) {
603 dontbother++;
604 strend -= 1;
605 }
3280af22 606 tmp = (s != startpos) ? UCHARAT(s - 1) : PL_regprev;
95bac841 607 tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 608 while (s < strend) {
95bac841 609 if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
a0d0e21e 610 tmp = !tmp;
611 else if (regtry(prog, s))
612 goto got_it;
613 s++;
614 }
615 if ((minlen || !tmp) && regtry(prog,s))
616 goto got_it;
617 break;
a0ed51b3 618 case NBOUNDLUTF8:
619 PL_reg_flags |= RF_tainted;
620 /* FALL THROUGH */
621 case NBOUNDUTF8:
622 if (minlen) {
623 dontbother++;
dfe13c55 624 strend = reghop_c(strend, -1);
a0ed51b3 625 }
dfe13c55 626 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : PL_regprev;
a0ed51b3 627 tmp = ((OP(c) == NBOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
628 while (s < strend) {
dfe13c55 629 if (tmp == !(OP(c) == NBOUND ?
630 swash_fetch(PL_utf8_alnum, (U8*)s) :
631 isALNUM_LC_utf8((U8*)s)))
a0ed51b3 632 tmp = !tmp;
633 else if (regtry(prog, s))
634 goto got_it;
635 s += UTF8SKIP(s);
636 }
637 if ((minlen || !tmp) && regtry(prog,s))
638 goto got_it;
639 break;
a0d0e21e 640 case ALNUM:
641 while (s < strend) {
bbce6d69 642 if (isALNUM(*s)) {
643 if (tmp && regtry(prog, s))
644 goto got_it;
645 else
646 tmp = doevery;
647 }
648 else
649 tmp = 1;
650 s++;
651 }
652 break;
a0ed51b3 653 case ALNUMUTF8:
654 while (s < strend) {
dfe13c55 655 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
a0ed51b3 656 if (tmp && regtry(prog, s))
657 goto got_it;
658 else
659 tmp = doevery;
660 }
661 else
662 tmp = 1;
663 s += UTF8SKIP(s);
664 }
665 break;
bbce6d69 666 case ALNUML:
3280af22 667 PL_reg_flags |= RF_tainted;
bbce6d69 668 while (s < strend) {
669 if (isALNUM_LC(*s)) {
a0d0e21e 670 if (tmp && regtry(prog, s))
671 goto got_it;
a687059c 672 else
a0d0e21e 673 tmp = doevery;
674 }
675 else
676 tmp = 1;
677 s++;
678 }
679 break;
a0ed51b3 680 case ALNUMLUTF8:
681 PL_reg_flags |= RF_tainted;
682 while (s < strend) {
dfe13c55 683 if (isALNUM_LC_utf8((U8*)s)) {
a0ed51b3 684 if (tmp && regtry(prog, s))
685 goto got_it;
686 else
687 tmp = doevery;
688 }
689 else
690 tmp = 1;
691 s += UTF8SKIP(s);
692 }
693 break;
a0d0e21e 694 case NALNUM:
695 while (s < strend) {
bbce6d69 696 if (!isALNUM(*s)) {
697 if (tmp && regtry(prog, s))
698 goto got_it;
699 else
700 tmp = doevery;
701 }
702 else
703 tmp = 1;
704 s++;
705 }
706 break;
a0ed51b3 707 case NALNUMUTF8:
708 while (s < strend) {
dfe13c55 709 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
a0ed51b3 710 if (tmp && regtry(prog, s))
711 goto got_it;
712 else
713 tmp = doevery;
714 }
715 else
716 tmp = 1;
717 s += UTF8SKIP(s);
718 }
719 break;
bbce6d69 720 case NALNUML:
3280af22 721 PL_reg_flags |= RF_tainted;
bbce6d69 722 while (s < strend) {
723 if (!isALNUM_LC(*s)) {
a0d0e21e 724 if (tmp && regtry(prog, s))
725 goto got_it;
a687059c 726 else
a0d0e21e 727 tmp = doevery;
a687059c 728 }
a0d0e21e 729 else
730 tmp = 1;
731 s++;
732 }
733 break;
a0ed51b3 734 case NALNUMLUTF8:
735 PL_reg_flags |= RF_tainted;
736 while (s < strend) {
dfe13c55 737 if (!isALNUM_LC_utf8((U8*)s)) {
a0ed51b3 738 if (tmp && regtry(prog, s))
739 goto got_it;
740 else
741 tmp = doevery;
742 }
743 else
744 tmp = 1;
745 s += UTF8SKIP(s);
746 }
747 break;
a0d0e21e 748 case SPACE:
749 while (s < strend) {
750 if (isSPACE(*s)) {
751 if (tmp && regtry(prog, s))
752 goto got_it;
753 else
754 tmp = doevery;
2304df62 755 }
a0d0e21e 756 else
757 tmp = 1;
758 s++;
759 }
760 break;
a0ed51b3 761 case SPACEUTF8:
762 while (s < strend) {
dfe13c55 763 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
a0ed51b3 764 if (tmp && regtry(prog, s))
765 goto got_it;
766 else
767 tmp = doevery;
768 }
769 else
770 tmp = 1;
771 s += UTF8SKIP(s);
772 }
773 break;
bbce6d69 774 case SPACEL:
3280af22 775 PL_reg_flags |= RF_tainted;
bbce6d69 776 while (s < strend) {
777 if (isSPACE_LC(*s)) {
778 if (tmp && regtry(prog, s))
779 goto got_it;
780 else
781 tmp = doevery;
782 }
783 else
784 tmp = 1;
785 s++;
786 }
787 break;
a0ed51b3 788 case SPACELUTF8:
789 PL_reg_flags |= RF_tainted;
790 while (s < strend) {
dfe13c55 791 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
a0ed51b3 792 if (tmp && regtry(prog, s))
793 goto got_it;
794 else
795 tmp = doevery;
796 }
797 else
798 tmp = 1;
799 s += UTF8SKIP(s);
800 }
801 break;
a0d0e21e 802 case NSPACE:
803 while (s < strend) {
804 if (!isSPACE(*s)) {
805 if (tmp && regtry(prog, s))
806 goto got_it;
807 else
808 tmp = doevery;
a687059c 809 }
a0d0e21e 810 else
811 tmp = 1;
812 s++;
813 }
814 break;
a0ed51b3 815 case NSPACEUTF8:
816 while (s < strend) {
dfe13c55 817 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
a0ed51b3 818 if (tmp && regtry(prog, s))
819 goto got_it;
820 else
821 tmp = doevery;
822 }
823 else
824 tmp = 1;
825 s += UTF8SKIP(s);
826 }
827 break;
bbce6d69 828 case NSPACEL:
3280af22 829 PL_reg_flags |= RF_tainted;
bbce6d69 830 while (s < strend) {
831 if (!isSPACE_LC(*s)) {
832 if (tmp && regtry(prog, s))
833 goto got_it;
834 else
835 tmp = doevery;
836 }
837 else
838 tmp = 1;
839 s++;
840 }
841 break;
a0ed51b3 842 case NSPACELUTF8:
843 PL_reg_flags |= RF_tainted;
844 while (s < strend) {
dfe13c55 845 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
a0ed51b3 846 if (tmp && regtry(prog, s))
847 goto got_it;
848 else
849 tmp = doevery;
850 }
851 else
852 tmp = 1;
853 s += UTF8SKIP(s);
854 }
855 break;
a0d0e21e 856 case DIGIT:
857 while (s < strend) {
858 if (isDIGIT(*s)) {
859 if (tmp && regtry(prog, s))
860 goto got_it;
861 else
862 tmp = doevery;
2b69d0c2 863 }
a0d0e21e 864 else
865 tmp = 1;
866 s++;
867 }
868 break;
a0ed51b3 869 case DIGITUTF8:
870 while (s < strend) {
dfe13c55 871 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
a0ed51b3 872 if (tmp && regtry(prog, s))
873 goto got_it;
874 else
875 tmp = doevery;
876 }
877 else
878 tmp = 1;
879 s += UTF8SKIP(s);
880 }
881 break;
a0d0e21e 882 case NDIGIT:
883 while (s < strend) {
884 if (!isDIGIT(*s)) {
885 if (tmp && regtry(prog, s))
886 goto got_it;
887 else
888 tmp = doevery;
a687059c 889 }
a0d0e21e 890 else
891 tmp = 1;
892 s++;
893 }
894 break;
a0ed51b3 895 case NDIGITUTF8:
896 while (s < strend) {
dfe13c55 897 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
a0ed51b3 898 if (tmp && regtry(prog, s))
899 goto got_it;
900 else
901 tmp = doevery;
902 }
903 else
904 tmp = 1;
905 s += UTF8SKIP(s);
906 }
907 break;
a687059c 908 }
a0d0e21e 909 }
910 else {
c277df42 911 dontbother = 0;
912 if (prog->float_substr != Nullsv) { /* Trim the end. */
913 char *last;
914 I32 oldpos = scream_pos;
915
916 if (screamer) {
917 last = screaminstr(screamer, prog->float_substr, s - strbeg,
918 end_shift, &scream_pos, 1); /* last one */
919 if (!last) {
920 last = scream_olds; /* Only one occurence. */
921 }
a0ed51b3 922 }
923 else {
c277df42 924 STRLEN len;
925 char *little = SvPV(prog->float_substr, len);
19b4f81a 926 if (len)
927 last = rninstr(s, strend, little, little + len);
928 else
929 last = strend; /* matching `$' */
c277df42 930 }
931 if (last == NULL) goto phooey; /* Should not happen! */
19b4f81a 932 dontbother = strend - last + prog->float_min_offset;
c277df42 933 }
934 if (minlen && (dontbother < minlen))
a0d0e21e 935 dontbother = minlen - 1;
a0ed51b3 936 strend -= dontbother; /* this one's always in bytes! */
a0d0e21e 937 /* We don't know much -- general case. */
a0ed51b3 938 if (UTF) {
939 for (;;) {
84df6dba 940 if (regtry(prog, s))
a0ed51b3 941 goto got_it;
a0ed51b3 942 if (s >= strend)
943 break;
944 s += UTF8SKIP(s);
945 };
946 }
947 else {
948 do {
949 if (regtry(prog, s))
950 goto got_it;
951 } while (s++ < strend);
952 }
a0d0e21e 953 }
954
955 /* Failure. */
956 goto phooey;
a687059c 957
a0d0e21e 958got_it:
959 prog->subbeg = strbeg;
19b4f81a 960 prog->subend = PL_regeol; /* strend may have been modified */
3280af22 961 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
5f05dabc 962
963 /* make sure $`, $&, $', and $digit will work later */
c277df42 964 if (strbeg != prog->subbase) { /* second+ //g match. */
965 if (!(flags & REXEC_COPY_STR)) {
137443ea 966 if (prog->subbase) {
967 Safefree(prog->subbase);
968 prog->subbase = Nullch;
969 }
970 }
971 else {
19b4f81a 972 I32 i = PL_regeol - startpos + (stringarg - strbeg);
137443ea 973 s = savepvn(strbeg, i);
974 Safefree(prog->subbase);
975 prog->subbase = s;
976 prog->subbeg = prog->subbase;
977 prog->subend = prog->subbase + i;
978 s = prog->subbase + (stringarg - strbeg);
979 for (i = 0; i <= prog->nparens; i++) {
980 if (prog->endp[i]) {
981 prog->startp[i] = s + (prog->startp[i] - startpos);
982 prog->endp[i] = s + (prog->endp[i] - startpos);
983 }
a0d0e21e 984 }
985 }
a0d0e21e 986 }
ce862d02 987 /* Preserve the current value of $^R */
3280af22 988 if (oreplsv != GvSV(PL_replgv)) {
989 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
ce862d02 990 restored, the value remains
991 the same. */
992 }
a0d0e21e 993 return 1;
994
995phooey:
a0d0e21e 996 return 0;
a687059c 997}
998
999/*
1000 - regtry - try match at specific point
1001 */
76e3520e 1002STATIC I32 /* 0 failure, 1 success */
8ac85365 1003regtry(regexp *prog, char *startpos)
a687059c 1004{
c277df42 1005 dTHR;
a0d0e21e 1006 register I32 i;
1007 register char **sp;
1008 register char **ep;
c277df42 1009 CHECKPOINT lastcp;
a0d0e21e 1010
3280af22 1011 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1012 PL_reg_eval_set = RS_init;
ce862d02 1013 DEBUG_r(DEBUG_s(
c3464db5 1014 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %i\n",
3280af22 1015 PL_stack_sp - PL_stack_base);
ce862d02 1016 ));
1017 SAVEINT(cxstack[cxstack_ix].blk_oldsp);
3280af22 1018 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
ce862d02 1019 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1020 SAVETMPS;
1021 /* Apparently this is not needed, judging by wantarray. */
1022 /* SAVEINT(cxstack[cxstack_ix].blk_gimme);
1023 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1024 }
3280af22 1025 PL_reginput = startpos;
1026 PL_regstartp = prog->startp;
1027 PL_regendp = prog->endp;
1028 PL_reglastparen = &prog->lastparen;
a0d0e21e 1029 prog->lastparen = 0;
3280af22 1030 PL_regsize = 0;
1031 if (PL_reg_start_tmpl <= prog->nparens) {
1032 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1033 if(PL_reg_start_tmp)
1034 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
c277df42 1035 else
3280af22 1036 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
c277df42 1037 }
a0d0e21e 1038
1039 sp = prog->startp;
1040 ep = prog->endp;
1041 if (prog->nparens) {
1042 for (i = prog->nparens; i >= 0; i--) {
1043 *sp++ = NULL;
1044 *ep++ = NULL;
a687059c 1045 }
a0d0e21e 1046 }
c277df42 1047 REGCP_SET;
7e5428c5 1048 if (regmatch(prog->program + 1)) {
a0d0e21e 1049 prog->startp[0] = startpos;
3280af22 1050 prog->endp[0] = PL_reginput;
a0d0e21e 1051 return 1;
1052 }
c277df42 1053 REGCP_UNWIND;
1054 return 0;
a687059c 1055}
1056
1057/*
1058 - regmatch - main matching routine
1059 *
1060 * Conceptually the strategy is simple: check to see whether the current
1061 * node matches, call self recursively to see whether the rest matches,
1062 * and then act accordingly. In practice we make some effort to avoid
1063 * recursion, in particular by going through "ordinary" nodes (that don't
1064 * need to know whether the rest of the match failed) by a loop instead of
1065 * by recursion.
1066 */
1067/* [lwall] I've hoisted the register declarations to the outer block in order to
1068 * maybe save a little bit of pushing and popping on the stack. It also takes
1069 * advantage of machines that use a register save mask on subroutine entry.
1070 */
76e3520e 1071STATIC I32 /* 0 failure, 1 success */
c277df42 1072regmatch(regnode *prog)
a687059c 1073{
c277df42 1074 dTHR;
1075 register regnode *scan; /* Current node. */
1076 regnode *next; /* Next node. */
1077 regnode *inner; /* Next node in internal branch. */
c3464db5 1078 register I32 nextchr; /* renamed nextchr - nextchar colides with
1079 function of same name */
a0d0e21e 1080 register I32 n; /* no or next */
1081 register I32 ln; /* len or last */
1082 register char *s; /* operand or save */
3280af22 1083 register char *locinput = PL_reginput;
c277df42 1084 register I32 c1, c2, paren; /* case fold search, parenth */
1085 int minmod = 0, sw = 0, logical = 0;
4633a7c4 1086#ifdef DEBUGGING
3280af22 1087 PL_regindent++;
4633a7c4 1088#endif
a0d0e21e 1089
a0ed51b3 1090 /* Note that nextchr is a byte even in UTF */
76e3520e 1091 nextchr = UCHARAT(locinput);
a0d0e21e 1092 scan = prog;
1093 while (scan != NULL) {
c277df42 1094#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
a687059c 1095#ifdef DEBUGGING
c277df42 1096# define sayYES goto yes
1097# define sayNO goto no
1098# define saySAME(x) if (x) goto yes; else goto no
1099# define REPORT_CODE_OFF 24
4633a7c4 1100#else
c277df42 1101# define sayYES return 1
1102# define sayNO return 0
1103# define saySAME(x) return x
a687059c 1104#endif
c277df42 1105 DEBUG_r( {
1106 SV *prop = sv_newmortal();
3280af22 1107 int docolor = *PL_colors[0];
c277df42 1108 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
3280af22 1109 int l = (PL_regeol - locinput > taill ? taill : PL_regeol - locinput);
1110 int pref_len = (locinput - PL_bostr > (5 + taill) - l
1111 ? (5 + taill) - l : locinput - PL_bostr);
8d300b32 1112 int pref0_len = pref_len - (locinput - PL_reginput);
c277df42 1113
3280af22 1114 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
1115 l = ( PL_regeol - locinput > (5 + taill) - pref_len
1116 ? (5 + taill) - pref_len : PL_regeol - locinput);
8d300b32 1117 if (pref0_len < 0)
1118 pref0_len = 0;
c277df42 1119 regprop(prop, scan);
1120 PerlIO_printf(Perl_debug_log,
8d300b32 1121 "%4i <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3d:%*s%s\n",
3280af22 1122 locinput - PL_bostr,
8d300b32 1123 PL_colors[4], pref0_len,
1124 locinput - pref_len, PL_colors[5],
1125 PL_colors[2], pref_len - pref0_len,
1126 locinput - pref_len + pref0_len, PL_colors[3],
c277df42 1127 (docolor ? "" : "> <"),
3280af22 1128 PL_colors[0], l, locinput, PL_colors[1],
c277df42 1129 15 - l - pref_len + 1,
1130 "",
3280af22 1131 scan - PL_regprogram, PL_regindent*2, "",
c277df42 1132 SvPVX(prop));
1133 } );
a687059c 1134
c277df42 1135 next = scan + NEXT_OFF(scan);
a0d0e21e 1136 if (next == scan)
1137 next = NULL;
a687059c 1138
a0d0e21e 1139 switch (OP(scan)) {
1140 case BOL:
3280af22 1141 if (locinput == PL_bostr
1142 ? PL_regprev == '\n'
1143 : (PL_multiline &&
1144 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
a0d0e21e 1145 {
a0ed51b3 1146 /* regtill = regbol; */
a0d0e21e 1147 break;
1148 }
4633a7c4 1149 sayNO;
a0d0e21e 1150 case MBOL:
3280af22 1151 if (locinput == PL_bostr
1152 ? PL_regprev == '\n'
1153 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
a0d0e21e 1154 {
1155 break;
1156 }
4633a7c4 1157 sayNO;
a0d0e21e 1158 case SBOL:
3280af22 1159 if (locinput == PL_regbol && PL_regprev == '\n')
a0d0e21e 1160 break;
4633a7c4 1161 sayNO;
774d564b 1162 case GPOS:
3280af22 1163 if (locinput == PL_regbol)
a0d0e21e 1164 break;
4633a7c4 1165 sayNO;
a0d0e21e 1166 case EOL:
3280af22 1167 if (PL_multiline)
a0d0e21e 1168 goto meol;
1169 else
1170 goto seol;
1171 case MEOL:
1172 meol:
3280af22 1173 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
4633a7c4 1174 sayNO;
a0d0e21e 1175 break;
1176 case SEOL:
1177 seol:
3280af22 1178 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
4633a7c4 1179 sayNO;
3280af22 1180 if (PL_regeol - locinput > 1)
4633a7c4 1181 sayNO;
a0d0e21e 1182 break;
b85d18e9 1183 case EOS:
3280af22 1184 if (PL_regeol != locinput)
b85d18e9 1185 sayNO;
1186 break;
a0ed51b3 1187 case SANYUTF8:
1188 if (nextchr & 0x80) {
a176fa2a 1189 locinput += utf8skip[nextchr];
a0ed51b3 1190 if (locinput > PL_regeol)
1191 sayNO;
1192 nextchr = UCHARAT(locinput);
1193 break;
1194 }
1195 if (!nextchr && locinput >= PL_regeol)
1196 sayNO;
1197 nextchr = UCHARAT(++locinput);
1198 break;
a0d0e21e 1199 case SANY:
3280af22 1200 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1201 sayNO;
76e3520e 1202 nextchr = UCHARAT(++locinput);
a0d0e21e 1203 break;
a0ed51b3 1204 case ANYUTF8:
1205 if (nextchr & 0x80) {
a176fa2a 1206 locinput += utf8skip[nextchr];
a0ed51b3 1207 if (locinput > PL_regeol)
1208 sayNO;
1209 nextchr = UCHARAT(locinput);
1210 break;
1211 }
1212 if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
1213 sayNO;
1214 nextchr = UCHARAT(++locinput);
1215 break;
a0d0e21e 1216 case ANY:
3280af22 1217 if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
4633a7c4 1218 sayNO;
76e3520e 1219 nextchr = UCHARAT(++locinput);
a0d0e21e 1220 break;
bbce6d69 1221 case EXACT:
161b471a 1222 s = (char *) OPERAND(scan);
c277df42 1223 ln = UCHARAT(s++);
a0d0e21e 1224 /* Inline the first character, for speed. */
76e3520e 1225 if (UCHARAT(s) != nextchr)
4633a7c4 1226 sayNO;
3280af22 1227 if (PL_regeol - locinput < ln)
4633a7c4 1228 sayNO;
36477c24 1229 if (ln > 1 && memNE(s, locinput, ln))
4633a7c4 1230 sayNO;
a0d0e21e 1231 locinput += ln;
76e3520e 1232 nextchr = UCHARAT(locinput);
bbce6d69 1233 break;
1234 case EXACTFL:
3280af22 1235 PL_reg_flags |= RF_tainted;
bbce6d69 1236 /* FALL THROUGH */
1237 case EXACTF:
161b471a 1238 s = (char *) OPERAND(scan);
c277df42 1239 ln = UCHARAT(s++);
a0ed51b3 1240
1241 if (UTF) {
1242 char *l = locinput;
1243 char *e = s + ln;
1244 c1 = OP(scan) == EXACTF;
1245 while (s < e) {
1246 if (l >= PL_regeol)
1247 sayNO;
dfe13c55 1248 if (utf8_to_uv((U8*)s, 0) != (c1 ?
1249 toLOWER_utf8((U8*)l) :
1250 toLOWER_LC_utf8((U8*)l)))
1251 {
a0ed51b3 1252 sayNO;
dfe13c55 1253 }
a0ed51b3 1254 s += UTF8SKIP(s);
1255 l += UTF8SKIP(l);
1256 }
1257 locinput = l;
1258 nextchr = UCHARAT(locinput);
1259 break;
1260 }
1261
bbce6d69 1262 /* Inline the first character, for speed. */
76e3520e 1263 if (UCHARAT(s) != nextchr &&
bbce6d69 1264 UCHARAT(s) != ((OP(scan) == EXACTF)
76e3520e 1265 ? fold : fold_locale)[nextchr])
bbce6d69 1266 sayNO;
3280af22 1267 if (PL_regeol - locinput < ln)
bbce6d69 1268 sayNO;
5f05dabc 1269 if (ln > 1 && (OP(scan) == EXACTF
1270 ? ibcmp(s, locinput, ln)
1271 : ibcmp_locale(s, locinput, ln)))
bbce6d69 1272 sayNO;
1273 locinput += ln;
76e3520e 1274 nextchr = UCHARAT(locinput);
a0d0e21e 1275 break;
a0ed51b3 1276 case ANYOFUTF8:
1277 s = (char *) OPERAND(scan);
1278 if (!REGINCLASSUTF8(scan, (U8*)locinput))
1279 sayNO;
1280 if (locinput >= PL_regeol)
1281 sayNO;
a176fa2a 1282 locinput += utf8skip[nextchr];
a0ed51b3 1283 nextchr = UCHARAT(locinput);
1284 break;
a0d0e21e 1285 case ANYOF:
161b471a 1286 s = (char *) OPERAND(scan);
76e3520e 1287 if (nextchr < 0)
1288 nextchr = UCHARAT(locinput);
873ef191 1289 if (!REGINCLASS(s, nextchr))
4633a7c4 1290 sayNO;
3280af22 1291 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1292 sayNO;
76e3520e 1293 nextchr = UCHARAT(++locinput);
a0d0e21e 1294 break;
bbce6d69 1295 case ALNUML:
3280af22 1296 PL_reg_flags |= RF_tainted;
bbce6d69 1297 /* FALL THROUGH */
a0d0e21e 1298 case ALNUM:
76e3520e 1299 if (!nextchr)
4633a7c4 1300 sayNO;
bbce6d69 1301 if (!(OP(scan) == ALNUM
76e3520e 1302 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
4633a7c4 1303 sayNO;
76e3520e 1304 nextchr = UCHARAT(++locinput);
a0d0e21e 1305 break;
a0ed51b3 1306 case ALNUMLUTF8:
1307 PL_reg_flags |= RF_tainted;
1308 /* FALL THROUGH */
1309 case ALNUMUTF8:
1310 if (!nextchr)
1311 sayNO;
1312 if (nextchr & 0x80) {
1313 if (!(OP(scan) == ALNUMUTF8
dfe13c55 1314 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
1315 : isALNUM_LC_utf8((U8*)locinput)))
1316 {
a0ed51b3 1317 sayNO;
dfe13c55 1318 }
a176fa2a 1319 locinput += utf8skip[nextchr];
a0ed51b3 1320 nextchr = UCHARAT(locinput);
1321 break;
1322 }
1323 if (!(OP(scan) == ALNUMUTF8
1324 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
1325 sayNO;
1326 nextchr = UCHARAT(++locinput);
1327 break;
bbce6d69 1328 case NALNUML:
3280af22 1329 PL_reg_flags |= RF_tainted;
bbce6d69 1330 /* FALL THROUGH */
a0d0e21e 1331 case NALNUM:
3280af22 1332 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1333 sayNO;
bbce6d69 1334 if (OP(scan) == NALNUM
76e3520e 1335 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
4633a7c4 1336 sayNO;
76e3520e 1337 nextchr = UCHARAT(++locinput);
a0d0e21e 1338 break;
a0ed51b3 1339 case NALNUMLUTF8:
1340 PL_reg_flags |= RF_tainted;
1341 /* FALL THROUGH */
1342 case NALNUMUTF8:
1343 if (!nextchr && locinput >= PL_regeol)
1344 sayNO;
1345 if (nextchr & 0x80) {
1346 if (OP(scan) == NALNUMUTF8
dfe13c55 1347 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
1348 : isALNUM_LC_utf8((U8*)locinput))
1349 {
a0ed51b3 1350 sayNO;
dfe13c55 1351 }
a176fa2a 1352 locinput += utf8skip[nextchr];
a0ed51b3 1353 nextchr = UCHARAT(locinput);
1354 break;
1355 }
1356 if (OP(scan) == NALNUMUTF8
1357 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
1358 sayNO;
1359 nextchr = UCHARAT(++locinput);
1360 break;
bbce6d69 1361 case BOUNDL:
1362 case NBOUNDL:
3280af22 1363 PL_reg_flags |= RF_tainted;
bbce6d69 1364 /* FALL THROUGH */
a0d0e21e 1365 case BOUND:
bbce6d69 1366 case NBOUND:
1367 /* was last char in word? */
3280af22 1368 ln = (locinput != PL_regbol) ? UCHARAT(locinput - 1) : PL_regprev;
bbce6d69 1369 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
1370 ln = isALNUM(ln);
76e3520e 1371 n = isALNUM(nextchr);
bbce6d69 1372 }
1373 else {
1374 ln = isALNUM_LC(ln);
76e3520e 1375 n = isALNUM_LC(nextchr);
bbce6d69 1376 }
95bac841 1377 if (((!ln) == (!n)) == (OP(scan) == BOUND || OP(scan) == BOUNDL))
4633a7c4 1378 sayNO;
a0d0e21e 1379 break;
a0ed51b3 1380 case BOUNDLUTF8:
1381 case NBOUNDLUTF8:
1382 PL_reg_flags |= RF_tainted;
1383 /* FALL THROUGH */
1384 case BOUNDUTF8:
1385 case NBOUNDUTF8:
1386 /* was last char in word? */
dfe13c55 1387 ln = (locinput != PL_regbol)
1388 ? utf8_to_uv(reghop((U8*)locinput, -1), 0) : PL_regprev;
a0ed51b3 1389 if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) {
1390 ln = isALNUM_uni(ln);
dfe13c55 1391 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
a0ed51b3 1392 }
1393 else {
1394 ln = isALNUM_LC_uni(ln);
dfe13c55 1395 n = isALNUM_LC_utf8((U8*)locinput);
a0ed51b3 1396 }
1397 if (((!ln) == (!n)) == (OP(scan) == BOUNDUTF8 || OP(scan) == BOUNDLUTF8))
1398 sayNO;
1399 break;
bbce6d69 1400 case SPACEL:
3280af22 1401 PL_reg_flags |= RF_tainted;
bbce6d69 1402 /* FALL THROUGH */
a0d0e21e 1403 case SPACE:
3280af22 1404 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1405 sayNO;
bbce6d69 1406 if (!(OP(scan) == SPACE
76e3520e 1407 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
4633a7c4 1408 sayNO;
76e3520e 1409 nextchr = UCHARAT(++locinput);
a0d0e21e 1410 break;
a0ed51b3 1411 case SPACELUTF8:
1412 PL_reg_flags |= RF_tainted;
1413 /* FALL THROUGH */
1414 case SPACEUTF8:
1415 if (!nextchr && locinput >= PL_regeol)
1416 sayNO;
1417 if (nextchr & 0x80) {
1418 if (!(OP(scan) == SPACEUTF8
dfe13c55 1419 ? swash_fetch(PL_utf8_space,(U8*)locinput)
1420 : isSPACE_LC_utf8((U8*)locinput)))
1421 {
a0ed51b3 1422 sayNO;
dfe13c55 1423 }
a176fa2a 1424 locinput += utf8skip[nextchr];
a0ed51b3 1425 nextchr = UCHARAT(locinput);
1426 break;
1427 }
1428 if (!(OP(scan) == SPACEUTF8
1429 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
1430 sayNO;
1431 nextchr = UCHARAT(++locinput);
1432 break;
bbce6d69 1433 case NSPACEL:
3280af22 1434 PL_reg_flags |= RF_tainted;
bbce6d69 1435 /* FALL THROUGH */
a0d0e21e 1436 case NSPACE:
76e3520e 1437 if (!nextchr)
4633a7c4 1438 sayNO;
bbce6d69 1439 if (OP(scan) == SPACE
76e3520e 1440 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 1441 sayNO;
76e3520e 1442 nextchr = UCHARAT(++locinput);
a0d0e21e 1443 break;
a0ed51b3 1444 case NSPACELUTF8:
1445 PL_reg_flags |= RF_tainted;
1446 /* FALL THROUGH */
1447 case NSPACEUTF8:
1448 if (!nextchr)
1449 sayNO;
1450 if (nextchr & 0x80) {
1451 if (OP(scan) == NSPACEUTF8
dfe13c55 1452 ? swash_fetch(PL_utf8_space,(U8*)locinput)
1453 : isSPACE_LC_utf8((U8*)locinput))
1454 {
a0ed51b3 1455 sayNO;
dfe13c55 1456 }
a176fa2a 1457 locinput += utf8skip[nextchr];
a0ed51b3 1458 nextchr = UCHARAT(locinput);
1459 break;
1460 }
1461 if (OP(scan) == NSPACEUTF8
1462 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
1463 sayNO;
1464 nextchr = UCHARAT(++locinput);
1465 break;
a0d0e21e 1466 case DIGIT:
76e3520e 1467 if (!isDIGIT(nextchr))
4633a7c4 1468 sayNO;
76e3520e 1469 nextchr = UCHARAT(++locinput);
a0d0e21e 1470 break;
a0ed51b3 1471 case DIGITUTF8:
1472 if (nextchr & 0x80) {
dfe13c55 1473 if (!(swash_fetch(PL_utf8_digit,(U8*)locinput)))
a0ed51b3 1474 sayNO;
a176fa2a 1475 locinput += utf8skip[nextchr];
a0ed51b3 1476 nextchr = UCHARAT(locinput);
1477 break;
1478 }
1479 if (!isDIGIT(nextchr))
1480 sayNO;
1481 nextchr = UCHARAT(++locinput);
1482 break;
a0d0e21e 1483 case NDIGIT:
3280af22 1484 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1485 sayNO;
76e3520e 1486 if (isDIGIT(nextchr))
4633a7c4 1487 sayNO;
76e3520e 1488 nextchr = UCHARAT(++locinput);
a0d0e21e 1489 break;
a0ed51b3 1490 case NDIGITUTF8:
1491 if (!nextchr && locinput >= PL_regeol)
1492 sayNO;
1493 if (nextchr & 0x80) {
dfe13c55 1494 if (swash_fetch(PL_utf8_digit,(U8*)locinput))
a0ed51b3 1495 sayNO;
a176fa2a 1496 locinput += utf8skip[nextchr];
a0ed51b3 1497 nextchr = UCHARAT(locinput);
1498 break;
1499 }
1500 if (isDIGIT(nextchr))
1501 sayNO;
1502 nextchr = UCHARAT(++locinput);
1503 break;
1504 case CLUMP:
dfe13c55 1505 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3 1506 sayNO;
a176fa2a 1507 locinput += utf8skip[nextchr];
dfe13c55 1508 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3 1509 locinput += UTF8SKIP(locinput);
1510 if (locinput > PL_regeol)
1511 sayNO;
1512 nextchr = UCHARAT(locinput);
1513 break;
c8756f30 1514 case REFFL:
3280af22 1515 PL_reg_flags |= RF_tainted;
c8756f30 1516 /* FALL THROUGH */
c277df42 1517 case REF:
c8756f30 1518 case REFF:
c277df42 1519 n = ARG(scan); /* which paren pair */
3280af22 1520 s = PL_regstartp[n];
1521 if (*PL_reglastparen < n || !s)
af3f8c16 1522 sayNO; /* Do not match unless seen CLOSEn. */
3280af22 1523 if (s == PL_regendp[n])
a0d0e21e 1524 break;
a0ed51b3 1525
1526 if (UTF && OP(scan) != REF) { /* REF can do byte comparison */
1527 char *l = locinput;
1528 char *e = PL_regendp[n];
1529 /*
1530 * Note that we can't do the "other character" lookup trick as
1531 * in the 8-bit case (no pun intended) because in Unicode we
1532 * have to map both upper and title case to lower case.
1533 */
1534 if (OP(scan) == REFF) {
1535 while (s < e) {
1536 if (l >= PL_regeol)
1537 sayNO;
dfe13c55 1538 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
a0ed51b3 1539 sayNO;
1540 s += UTF8SKIP(s);
1541 l += UTF8SKIP(l);
1542 }
1543 }
1544 else {
1545 while (s < e) {
1546 if (l >= PL_regeol)
1547 sayNO;
dfe13c55 1548 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
a0ed51b3 1549 sayNO;
1550 s += UTF8SKIP(s);
1551 l += UTF8SKIP(l);
1552 }
1553 }
1554 locinput = l;
1555 nextchr = UCHARAT(locinput);
1556 break;
1557 }
1558
a0d0e21e 1559 /* Inline the first character, for speed. */
76e3520e 1560 if (UCHARAT(s) != nextchr &&
c8756f30 1561 (OP(scan) == REF ||
1562 (UCHARAT(s) != ((OP(scan) == REFF
76e3520e 1563 ? fold : fold_locale)[nextchr]))))
4633a7c4 1564 sayNO;
3280af22 1565 ln = PL_regendp[n] - s;
1566 if (locinput + ln > PL_regeol)
4633a7c4 1567 sayNO;
c8756f30 1568 if (ln > 1 && (OP(scan) == REF
1569 ? memNE(s, locinput, ln)
1570 : (OP(scan) == REFF
1571 ? ibcmp(s, locinput, ln)
1572 : ibcmp_locale(s, locinput, ln))))
4633a7c4 1573 sayNO;
a0d0e21e 1574 locinput += ln;
76e3520e 1575 nextchr = UCHARAT(locinput);
a0d0e21e 1576 break;
1577
1578 case NOTHING:
c277df42 1579 case TAIL:
a0d0e21e 1580 break;
1581 case BACK:
1582 break;
c277df42 1583 case EVAL:
1584 {
1585 dSP;
533c011a 1586 OP_4tree *oop = PL_op;
3280af22 1587 COP *ocurcop = PL_curcop;
1588 SV **ocurpad = PL_curpad;
c277df42 1589 SV *ret;
1590
1591 n = ARG(scan);
533c011a 1592 PL_op = (OP_4tree*)PL_regdata->data[n];
1593 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%x\n", PL_op) );
3280af22 1594 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 1]);
c277df42 1595
76e3520e 1596 CALLRUNOPS(); /* Scalar context. */
c277df42 1597 SPAGAIN;
1598 ret = POPs;
1599 PUTBACK;
1600
0f5d15d6 1601 PL_op = oop;
1602 PL_curpad = ocurpad;
1603 PL_curcop = ocurcop;
c277df42 1604 if (logical) {
0f5d15d6 1605 if (logical == 2) { /* Postponed subexpression. */
1606 regexp *re;
1607 my_magic mg = Null(my_magic);
1608 re_cc_state state;
1609 CURCUR cctmp;
1610 CHECKPOINT cp, lastcp;
1611
1612 if(SvROK(ret) || SvRMAGICAL(ret)) {
1613 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
1614
1615 if(SvMAGICAL(sv))
1616 mg = mg_find(sv, 'r');
1617 }
1618 if (mg) {
1619 re = (regexp *)mg->mg_obj;
1620 ReREFCNT_inc(re);
1621 }
1622 else {
1623 STRLEN len;
1624 char *t = SvPV(ret, len);
1625 PMOP pm;
1626 char *oprecomp = PL_regprecomp;
1627 I32 osize = PL_regsize;
1628 I32 onpar = PL_regnpar;
1629
1630 pm.op_pmflags = 0;
1631 re = CALLREGCOMP(t, t + len, &pm);
1632 if (!(SvFLAGS(ret)
1633 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
1634 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
1635 PL_regprecomp = oprecomp;
1636 PL_regsize = osize;
1637 PL_regnpar = onpar;
1638 }
1639 DEBUG_r(
1640 PerlIO_printf(Perl_debug_log,
1641 "Entering embedded `%s%.60s%s%s'\n",
1642 PL_colors[0],
1643 re->precomp,
1644 PL_colors[1],
1645 (strlen(re->precomp) > 60 ? "..." : ""))
1646 );
1647 state.node = next;
1648 state.prev = PL_reg_call_cc;
1649 state.cc = PL_regcc;
1650 state.re = PL_reg_re;
1651
1652 cctmp.cur = 0;
1653 cctmp.oldcc = 0;
1654 PL_regcc = &cctmp;
1655
1656 cp = regcppush(0); /* Save *all* the positions. */
1657 REGCP_SET;
1658 cache_re(re);
1659 state.ss = PL_savestack_ix;
1660 *PL_reglastparen = 0;
1661 PL_reg_call_cc = &state;
1662 PL_reginput = locinput;
1663 if (regmatch(re->program + 1)) {
1664 ReREFCNT_dec(re);
1665 regcpblow(cp);
1666 sayYES;
1667 }
1668 DEBUG_r(
1669 PerlIO_printf(Perl_debug_log,
1670 "%*s failed...\n",
1671 REPORT_CODE_OFF+PL_regindent*2, "")
1672 );
1673 ReREFCNT_dec(re);
1674 REGCP_UNWIND;
1675 regcppop();
1676 PL_reg_call_cc = state.prev;
1677 PL_regcc = state.cc;
1678 PL_reg_re = state.re;
d3790889 1679 cache_re(PL_reg_re);
0f5d15d6 1680 sayNO;
1681 }
c277df42 1682 sw = SvTRUE(ret);
0f5d15d6 1683 logical = 0;
a0ed51b3 1684 }
1685 else
3280af22 1686 sv_setsv(save_scalar(PL_replgv), ret);
c277df42 1687 break;
1688 }
a0d0e21e 1689 case OPEN:
c277df42 1690 n = ARG(scan); /* which paren pair */
3280af22 1691 PL_reg_start_tmp[n] = locinput;
1692 if (n > PL_regsize)
1693 PL_regsize = n;
a0d0e21e 1694 break;
1695 case CLOSE:
c277df42 1696 n = ARG(scan); /* which paren pair */
3280af22 1697 PL_regstartp[n] = PL_reg_start_tmp[n];
1698 PL_regendp[n] = locinput;
1699 if (n > *PL_reglastparen)
1700 *PL_reglastparen = n;
a0d0e21e 1701 break;
c277df42 1702 case GROUPP:
1703 n = ARG(scan); /* which paren pair */
3280af22 1704 sw = (*PL_reglastparen >= n && PL_regendp[n] != NULL);
c277df42 1705 break;
1706 case IFTHEN:
1707 if (sw)
1708 next = NEXTOPER(NEXTOPER(scan));
1709 else {
1710 next = scan + ARG(scan);
1711 if (OP(next) == IFTHEN) /* Fake one. */
1712 next = NEXTOPER(NEXTOPER(next));
1713 }
1714 break;
1715 case LOGICAL:
0f5d15d6 1716 logical = scan->flags;
c277df42 1717 break;
a0d0e21e 1718 case CURLYX: {
1719 CURCUR cc;
3280af22 1720 CHECKPOINT cp = PL_savestack_ix;
c277df42 1721
1722 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
1723 next += ARG(next);
3280af22 1724 cc.oldcc = PL_regcc;
1725 PL_regcc = &cc;
1726 cc.parenfloor = *PL_reglastparen;
a0d0e21e 1727 cc.cur = -1;
1728 cc.min = ARG1(scan);
1729 cc.max = ARG2(scan);
c277df42 1730 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e 1731 cc.next = next;
1732 cc.minmod = minmod;
1733 cc.lastloc = 0;
3280af22 1734 PL_reginput = locinput;
a0d0e21e 1735 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
1736 regcpblow(cp);
3280af22 1737 PL_regcc = cc.oldcc;
4633a7c4 1738 saySAME(n);
a0d0e21e 1739 }
1740 /* NOT REACHED */
1741 case WHILEM: {
1742 /*
1743 * This is really hard to understand, because after we match
1744 * what we're trying to match, we must make sure the rest of
1745 * the RE is going to match for sure, and to do that we have
1746 * to go back UP the parse tree by recursing ever deeper. And
1747 * if it fails, we have to reset our parent's current state
1748 * that we can try again after backing off.
1749 */
1750
c277df42 1751 CHECKPOINT cp, lastcp;
3280af22 1752 CURCUR* cc = PL_regcc;
c277df42 1753 char *lastloc = cc->lastloc; /* Detection of 0-len. */
1754
4633a7c4 1755 n = cc->cur + 1; /* how many we know we matched */
3280af22 1756 PL_reginput = locinput;
a0d0e21e 1757
c277df42 1758 DEBUG_r(
1759 PerlIO_printf(Perl_debug_log,
1760 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 1761 REPORT_CODE_OFF+PL_regindent*2, "",
c277df42 1762 (long)n, (long)cc->min,
1763 (long)cc->max, (long)cc)
1764 );
4633a7c4 1765
a0d0e21e 1766 /* If degenerate scan matches "", assume scan done. */
1767
579cf2c3 1768 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 1769 PL_regcc = cc->oldcc;
1770 ln = PL_regcc->cur;
c277df42 1771 DEBUG_r(
c3464db5 1772 PerlIO_printf(Perl_debug_log,
1773 "%*s empty match detected, try continuation...\n",
3280af22 1774 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 1775 );
a0d0e21e 1776 if (regmatch(cc->next))
4633a7c4 1777 sayYES;
c277df42 1778 DEBUG_r(
c3464db5 1779 PerlIO_printf(Perl_debug_log,
1780 "%*s failed...\n",
3280af22 1781 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 1782 );
3280af22 1783 PL_regcc->cur = ln;
1784 PL_regcc = cc;
4633a7c4 1785 sayNO;
a0d0e21e 1786 }
1787
1788 /* First just match a string of min scans. */
1789
1790 if (n < cc->min) {
1791 cc->cur = n;
1792 cc->lastloc = locinput;
4633a7c4 1793 if (regmatch(cc->scan))
1794 sayYES;
1795 cc->cur = n - 1;
c277df42 1796 cc->lastloc = lastloc;
1797 DEBUG_r(
c3464db5 1798 PerlIO_printf(Perl_debug_log,
1799 "%*s failed...\n",
3280af22 1800 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 1801 );
4633a7c4 1802 sayNO;
a0d0e21e 1803 }
1804
1805 /* Prefer next over scan for minimal matching. */
1806
1807 if (cc->minmod) {
3280af22 1808 PL_regcc = cc->oldcc;
1809 ln = PL_regcc->cur;
5f05dabc 1810 cp = regcppush(cc->parenfloor);
c277df42 1811 REGCP_SET;
5f05dabc 1812 if (regmatch(cc->next)) {
c277df42 1813 regcpblow(cp);
4633a7c4 1814 sayYES; /* All done. */
5f05dabc 1815 }
c277df42 1816 REGCP_UNWIND;
5f05dabc 1817 regcppop();
3280af22 1818 PL_regcc->cur = ln;
1819 PL_regcc = cc;
a0d0e21e 1820
c277df42 1821 if (n >= cc->max) { /* Maximum greed exceeded? */
599cee73 1822 if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY
3280af22 1823 && !(PL_reg_flags & RF_warned)) {
1824 PL_reg_flags |= RF_warned;
599cee73 1825 warner(WARN_UNSAFE, "%s limit (%d) exceeded",
2f3ca594 1826 "Complex regular subexpression recursion",
1827 REG_INFTY - 1);
c277df42 1828 }
4633a7c4 1829 sayNO;
c277df42 1830 }
a687059c 1831
c277df42 1832 DEBUG_r(
c3464db5 1833 PerlIO_printf(Perl_debug_log,
1834 "%*s trying longer...\n",
3280af22 1835 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 1836 );
a0d0e21e 1837 /* Try scanning more and see if it helps. */
3280af22 1838 PL_reginput = locinput;
a0d0e21e 1839 cc->cur = n;
1840 cc->lastloc = locinput;
5f05dabc 1841 cp = regcppush(cc->parenfloor);
c277df42 1842 REGCP_SET;
5f05dabc 1843 if (regmatch(cc->scan)) {
c277df42 1844 regcpblow(cp);
4633a7c4 1845 sayYES;
5f05dabc 1846 }
c277df42 1847 DEBUG_r(
c3464db5 1848 PerlIO_printf(Perl_debug_log,
1849 "%*s failed...\n",
3280af22 1850 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 1851 );
1852 REGCP_UNWIND;
5f05dabc 1853 regcppop();
4633a7c4 1854 cc->cur = n - 1;
c277df42 1855 cc->lastloc = lastloc;
4633a7c4 1856 sayNO;
a0d0e21e 1857 }
1858
1859 /* Prefer scan over next for maximal matching. */
1860
1861 if (n < cc->max) { /* More greed allowed? */
5f05dabc 1862 cp = regcppush(cc->parenfloor);
a0d0e21e 1863 cc->cur = n;
1864 cc->lastloc = locinput;
c277df42 1865 REGCP_SET;
5f05dabc 1866 if (regmatch(cc->scan)) {
c277df42 1867 regcpblow(cp);
4633a7c4 1868 sayYES;
5f05dabc 1869 }
c277df42 1870 REGCP_UNWIND;
a0d0e21e 1871 regcppop(); /* Restore some previous $<digit>s? */
3280af22 1872 PL_reginput = locinput;
c277df42 1873 DEBUG_r(
c3464db5 1874 PerlIO_printf(Perl_debug_log,
1875 "%*s failed, try continuation...\n",
3280af22 1876 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 1877 );
1878 }
599cee73 1879 if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY
1880 && !(PL_reg_flags & RF_warned)) {
3280af22 1881 PL_reg_flags |= RF_warned;
599cee73 1882 warner(WARN_UNSAFE, "%s limit (%d) exceeded",
cb5d145d 1883 "Complex regular subexpression recursion",
1884 REG_INFTY - 1);
a0d0e21e 1885 }
1886
1887 /* Failed deeper matches of scan, so see if this one works. */
3280af22 1888 PL_regcc = cc->oldcc;
1889 ln = PL_regcc->cur;
a0d0e21e 1890 if (regmatch(cc->next))
4633a7c4 1891 sayYES;
c277df42 1892 DEBUG_r(
c3464db5 1893 PerlIO_printf(Perl_debug_log, "%*s failed...\n",
3280af22 1894 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 1895 );
3280af22 1896 PL_regcc->cur = ln;
1897 PL_regcc = cc;
4633a7c4 1898 cc->cur = n - 1;
c277df42 1899 cc->lastloc = lastloc;
4633a7c4 1900 sayNO;
a0d0e21e 1901 }
1902 /* NOT REACHED */
c277df42 1903 case BRANCHJ:
1904 next = scan + ARG(scan);
1905 if (next == scan)
1906 next = NULL;
1907 inner = NEXTOPER(NEXTOPER(scan));
1908 goto do_branch;
1909 case BRANCH:
1910 inner = NEXTOPER(scan);
1911 do_branch:
1912 {
1913 CHECKPOINT lastcp;
1914 c1 = OP(scan);
1915 if (OP(next) != c1) /* No choice. */
1916 next = inner; /* Avoid recursion. */
a0d0e21e 1917 else {
3280af22 1918 int lastparen = *PL_reglastparen;
c277df42 1919
1920 REGCP_SET;
a0d0e21e 1921 do {
3280af22 1922 PL_reginput = locinput;
c277df42 1923 if (regmatch(inner))
4633a7c4 1924 sayYES;
c277df42 1925 REGCP_UNWIND;
3280af22 1926 for (n = *PL_reglastparen; n > lastparen; n--)
1927 PL_regendp[n] = 0;
1928 *PL_reglastparen = n;
c277df42 1929 scan = next;
a0d0e21e 1930 /*SUPPRESS 560*/
c277df42 1931 if (n = (c1 == BRANCH ? NEXT_OFF(next) : ARG(next)))
1932 next += n;
a0d0e21e 1933 else
c277df42 1934 next = NULL;
c277df42 1935 inner = NEXTOPER(scan);
1936 if (c1 == BRANCHJ) {
1937 inner = NEXTOPER(inner);
1938 }
1939 } while (scan != NULL && OP(scan) == c1);
4633a7c4 1940 sayNO;
a0d0e21e 1941 /* NOTREACHED */
a687059c 1942 }
a0d0e21e 1943 }
1944 break;
1945 case MINMOD:
1946 minmod = 1;
1947 break;
c277df42 1948 case CURLYM:
1949 {
00db4c45 1950 I32 l = 0;
c277df42 1951 CHECKPOINT lastcp;
1952
1953 /* We suppose that the next guy does not need
1954 backtracking: in particular, it is of constant length,
1955 and has no parenths to influence future backrefs. */
1956 ln = ARG1(scan); /* min to match */
1957 n = ARG2(scan); /* max to match */
c277df42 1958 paren = scan->flags;
1959 if (paren) {
3280af22 1960 if (paren > PL_regsize)
1961 PL_regsize = paren;
1962 if (paren > *PL_reglastparen)
1963 *PL_reglastparen = paren;
c277df42 1964 }
dc45a647 1965 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42 1966 if (paren)
1967 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 1968 PL_reginput = locinput;
c277df42 1969 if (minmod) {
1970 minmod = 0;
1971 if (ln && regrepeat_hard(scan, ln, &l) < ln)
1972 sayNO;
5f4b28b2 1973 if (ln && l == 0 && n >= ln
c277df42 1974 /* In fact, this is tricky. If paren, then the
1975 fact that we did/didnot match may influence
1976 future execution. */
1977 && !(paren && ln == 0))
1978 ln = n;
3280af22 1979 locinput = PL_reginput;
c277df42 1980 if (regkind[(U8)OP(next)] == EXACT) {
1981 c1 = UCHARAT(OPERAND(next) + 1);
1982 if (OP(next) == EXACTF)
1983 c2 = fold[c1];
1984 else if (OP(next) == EXACTFL)
1985 c2 = fold_locale[c1];
1986 else
1987 c2 = c1;
a0ed51b3 1988 }
1989 else
c277df42 1990 c1 = c2 = -1000;
1991 REGCP_SET;
5f4b28b2 1992 /* This may be improved if l == 0. */
c277df42 1993 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
1994 /* If it could work, try it. */
1995 if (c1 == -1000 ||
3280af22 1996 UCHARAT(PL_reginput) == c1 ||
1997 UCHARAT(PL_reginput) == c2)
c277df42 1998 {
1999 if (paren) {
2000 if (n) {
dfe13c55 2001 PL_regstartp[paren] = HOPc(PL_reginput, -l);
3280af22 2002 PL_regendp[paren] = PL_reginput;
a0ed51b3 2003 }
2004 else
3280af22 2005 PL_regendp[paren] = NULL;
c277df42 2006 }
2007 if (regmatch(next))
2008 sayYES;
2009 REGCP_UNWIND;
2010 }
2011 /* Couldn't or didn't -- move forward. */
3280af22 2012 PL_reginput = locinput;
c277df42 2013 if (regrepeat_hard(scan, 1, &l)) {
2014 ln++;
3280af22 2015 locinput = PL_reginput;
c277df42 2016 }
2017 else
2018 sayNO;
2019 }
a0ed51b3 2020 }
2021 else {
c277df42 2022 n = regrepeat_hard(scan, n, &l);
2023 if (n != 0 && l == 0
2024 /* In fact, this is tricky. If paren, then the
2025 fact that we did/didnot match may influence
2026 future execution. */
2027 && !(paren && ln == 0))
2028 ln = n;
3280af22 2029 locinput = PL_reginput;
c277df42 2030 DEBUG_r(
5c0ca799 2031 PerlIO_printf(Perl_debug_log,
2032 "%*s matched %ld times, len=%ld...\n",
3280af22 2033 REPORT_CODE_OFF+PL_regindent*2, "", n, l)
c277df42 2034 );
2035 if (n >= ln) {
2036 if (regkind[(U8)OP(next)] == EXACT) {
2037 c1 = UCHARAT(OPERAND(next) + 1);
2038 if (OP(next) == EXACTF)
2039 c2 = fold[c1];
2040 else if (OP(next) == EXACTFL)
2041 c2 = fold_locale[c1];
2042 else
2043 c2 = c1;
a0ed51b3 2044 }
2045 else
c277df42 2046 c1 = c2 = -1000;
2047 }
2048 REGCP_SET;
2049 while (n >= ln) {
2050 /* If it could work, try it. */
2051 if (c1 == -1000 ||
3280af22 2052 UCHARAT(PL_reginput) == c1 ||
2053 UCHARAT(PL_reginput) == c2)
a0ed51b3 2054 {
2055 DEBUG_r(
c3464db5 2056 PerlIO_printf(Perl_debug_log,
2057 "%*s trying tail with n=%ld...\n",
3280af22 2058 REPORT_CODE_OFF+PL_regindent*2, "", n)
a0ed51b3 2059 );
2060 if (paren) {
2061 if (n) {
dfe13c55 2062 PL_regstartp[paren] = HOPc(PL_reginput, -l);
a0ed51b3 2063 PL_regendp[paren] = PL_reginput;
c277df42 2064 }
a0ed51b3 2065 else
2066 PL_regendp[paren] = NULL;
c277df42 2067 }
a0ed51b3 2068 if (regmatch(next))
2069 sayYES;
2070 REGCP_UNWIND;
2071 }
c277df42 2072 /* Couldn't or didn't -- back up. */
2073 n--;
dfe13c55 2074 locinput = HOPc(locinput, -l);
3280af22 2075 PL_reginput = locinput;
c277df42 2076 }
2077 }
2078 sayNO;
2079 break;
2080 }
2081 case CURLYN:
2082 paren = scan->flags; /* Which paren to set */
3280af22 2083 if (paren > PL_regsize)
2084 PL_regsize = paren;
2085 if (paren > *PL_reglastparen)
2086 *PL_reglastparen = paren;
c277df42 2087 ln = ARG1(scan); /* min to match */
2088 n = ARG2(scan); /* max to match */
dc45a647 2089 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 2090 goto repeat;
a0d0e21e 2091 case CURLY:
c277df42 2092 paren = 0;
a0d0e21e 2093 ln = ARG1(scan); /* min to match */
2094 n = ARG2(scan); /* max to match */
dc45a647 2095 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e 2096 goto repeat;
2097 case STAR:
2098 ln = 0;
c277df42 2099 n = REG_INFTY;
a0d0e21e 2100 scan = NEXTOPER(scan);
c277df42 2101 paren = 0;
a0d0e21e 2102 goto repeat;
2103 case PLUS:
c277df42 2104 ln = 1;
2105 n = REG_INFTY;
2106 scan = NEXTOPER(scan);
2107 paren = 0;
2108 repeat:
a0d0e21e 2109 /*
2110 * Lookahead to avoid useless match attempts
2111 * when we know what character comes next.
2112 */
bbce6d69 2113 if (regkind[(U8)OP(next)] == EXACT) {
2114 c1 = UCHARAT(OPERAND(next) + 1);
2115 if (OP(next) == EXACTF)
2116 c2 = fold[c1];
2117 else if (OP(next) == EXACTFL)
2118 c2 = fold_locale[c1];
2119 else
2120 c2 = c1;
2121 }
a0d0e21e 2122 else
bbce6d69 2123 c1 = c2 = -1000;
3280af22 2124 PL_reginput = locinput;
a0d0e21e 2125 if (minmod) {
c277df42 2126 CHECKPOINT lastcp;
a0d0e21e 2127 minmod = 0;
2128 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 2129 sayNO;
a0ed51b3 2130 locinput = PL_reginput;
c277df42 2131 REGCP_SET;
2132 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
a0d0e21e 2133 /* If it could work, try it. */
bbce6d69 2134 if (c1 == -1000 ||
3280af22 2135 UCHARAT(PL_reginput) == c1 ||
2136 UCHARAT(PL_reginput) == c2)
bbce6d69 2137 {
c277df42 2138 if (paren) {
2139 if (n) {
dfe13c55 2140 PL_regstartp[paren] = HOPc(PL_reginput, -1);
3280af22 2141 PL_regendp[paren] = PL_reginput;
a0ed51b3 2142 }
2143 else
3280af22 2144 PL_regendp[paren] = NULL;
c277df42 2145 }
a0d0e21e 2146 if (regmatch(next))
4633a7c4 2147 sayYES;
c277df42 2148 REGCP_UNWIND;
bbce6d69 2149 }
c277df42 2150 /* Couldn't or didn't -- move forward. */
a0ed51b3 2151 PL_reginput = locinput;
a0d0e21e 2152 if (regrepeat(scan, 1)) {
2153 ln++;
a0ed51b3 2154 locinput = PL_reginput;
2155 }
2156 else
4633a7c4 2157 sayNO;
a0d0e21e 2158 }
2159 }
2160 else {
c277df42 2161 CHECKPOINT lastcp;
a0d0e21e 2162 n = regrepeat(scan, n);
a0ed51b3 2163 locinput = PL_reginput;
a0d0e21e 2164 if (ln < n && regkind[(U8)OP(next)] == EOL &&
3280af22 2165 (!PL_multiline || OP(next) == SEOL))
a0d0e21e 2166 ln = n; /* why back off? */
c277df42 2167 REGCP_SET;
2168 if (paren) {
2169 while (n >= ln) {
2170 /* If it could work, try it. */
2171 if (c1 == -1000 ||
3280af22 2172 UCHARAT(PL_reginput) == c1 ||
2173 UCHARAT(PL_reginput) == c2)
c277df42 2174 {
2175 if (paren && n) {
2176 if (n) {
dfe13c55 2177 PL_regstartp[paren] = HOPc(PL_reginput, -1);
3280af22 2178 PL_regendp[paren] = PL_reginput;
a0ed51b3 2179 }
2180 else
3280af22 2181 PL_regendp[paren] = NULL;
c277df42 2182 }
2183 if (regmatch(next))
2184 sayYES;
2185 REGCP_UNWIND;
2186 }
2187 /* Couldn't or didn't -- back up. */
2188 n--;
dfe13c55 2189 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 2190 }
a0ed51b3 2191 }
2192 else {
c277df42 2193 while (n >= ln) {
2194 /* If it could work, try it. */
2195 if (c1 == -1000 ||
3280af22 2196 UCHARAT(PL_reginput) == c1 ||
2197 UCHARAT(PL_reginput) == c2)
c277df42 2198 {
2199 if (regmatch(next))
2200 sayYES;
2201 REGCP_UNWIND;
2202 }
2203 /* Couldn't or didn't -- back up. */
2204 n--;
dfe13c55 2205 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 2206 }
a0d0e21e 2207 }
2208 }
4633a7c4 2209 sayNO;
c277df42 2210 break;
a0d0e21e 2211 case END:
0f5d15d6 2212 if (PL_reg_call_cc) {
2213 re_cc_state *cur_call_cc = PL_reg_call_cc;
2214 CURCUR *cctmp = PL_regcc;
2215 regexp *re = PL_reg_re;
2216 CHECKPOINT cp, lastcp;
2217
2218 cp = regcppush(0); /* Save *all* the positions. */
2219 REGCP_SET;
2220 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
2221 the caller. */
2222 PL_reginput = locinput; /* Make position available to
2223 the callcc. */
2224 cache_re(PL_reg_call_cc->re);
2225 PL_regcc = PL_reg_call_cc->cc;
2226 PL_reg_call_cc = PL_reg_call_cc->prev;
2227 if (regmatch(cur_call_cc->node)) {
2228 PL_reg_call_cc = cur_call_cc;
2229 regcpblow(cp);
2230 sayYES;
2231 }
2232 REGCP_UNWIND;
2233 regcppop();
2234 PL_reg_call_cc = cur_call_cc;
2235 PL_regcc = cctmp;
2236 PL_reg_re = re;
2237 cache_re(re);
2238
2239 DEBUG_r(
2240 PerlIO_printf(Perl_debug_log,
2241 "%*s continuation failed...\n",
2242 REPORT_CODE_OFF+PL_regindent*2, "")
2243 );
2244 sayNO;
2245 }
3280af22 2246 if (locinput < PL_regtill)
7e5428c5 2247 sayNO; /* Cannot match: too short. */
2248 /* Fall through */
2249 case SUCCEED:
3280af22 2250 PL_reginput = locinput; /* put where regtry can find it */
4633a7c4 2251 sayYES; /* Success! */
c277df42 2252 case SUSPEND:
2253 n = 1;
9fe1d20c 2254 PL_reginput = locinput;
c277df42 2255 goto do_ifmatch;
a0d0e21e 2256 case UNLESSM:
c277df42 2257 n = 0;
a0ed51b3 2258 if (scan->flags) {
dfe13c55 2259 s = HOPMAYBEc(locinput, -scan->flags);
a0ed51b3 2260 if (!s)
2261 goto say_yes;
2262 PL_reginput = s;
2263 }
2264 else
2265 PL_reginput = locinput;
c277df42 2266 goto do_ifmatch;
2267 case IFMATCH:
2268 n = 1;
a0ed51b3 2269 if (scan->flags) {
dfe13c55 2270 s = HOPMAYBEc(locinput, -scan->flags);
a0ed51b3 2271 if (!s)
2272 goto say_no;
2273 PL_reginput = s;
2274 }
2275 else
2276 PL_reginput = locinput;
2277
c277df42 2278 do_ifmatch:
c277df42 2279 inner = NEXTOPER(NEXTOPER(scan));
2280 if (regmatch(inner) != n) {
2281 say_no:
2282 if (logical) {
2283 logical = 0;
2284 sw = 0;
2285 goto do_longjump;
a0ed51b3 2286 }
2287 else
c277df42 2288 sayNO;
2289 }
2290 say_yes:
2291 if (logical) {
2292 logical = 0;
2293 sw = 1;
2294 }
fe44a5e8 2295 if (OP(scan) == SUSPEND) {
3280af22 2296 locinput = PL_reginput;
565764a8 2297 nextchr = UCHARAT(locinput);
fe44a5e8 2298 }
c277df42 2299 /* FALL THROUGH. */
2300 case LONGJMP:
2301 do_longjump:
2302 next = scan + ARG(scan);
2303 if (next == scan)
2304 next = NULL;
a0d0e21e 2305 break;
2306 default:
c030ccd9 2307 PerlIO_printf(PerlIO_stderr(), "%lx %d\n",
c277df42 2308 (unsigned long)scan, OP(scan));
a0d0e21e 2309 FAIL("regexp memory corruption");
a687059c 2310 }
a0d0e21e 2311 scan = next;
2312 }
a687059c 2313
a0d0e21e 2314 /*
2315 * We get here only if there's trouble -- normally "case END" is
2316 * the terminating point.
2317 */
2318 FAIL("corrupted regexp pointers");
2319 /*NOTREACHED*/
4633a7c4 2320 sayNO;
2321
2322yes:
2323#ifdef DEBUGGING
3280af22 2324 PL_regindent--;
4633a7c4 2325#endif
2326 return 1;
2327
2328no:
2329#ifdef DEBUGGING
3280af22 2330 PL_regindent--;
4633a7c4 2331#endif
a0d0e21e 2332 return 0;
a687059c 2333}
2334
2335/*
2336 - regrepeat - repeatedly match something simple, report how many
2337 */
2338/*
2339 * [This routine now assumes that it will only match on things of length 1.
2340 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 2341 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 2342 */
76e3520e 2343STATIC I32
c277df42 2344regrepeat(regnode *p, I32 max)
a687059c 2345{
5c0ca799 2346 dTHR;
a0d0e21e 2347 register char *scan;
2348 register char *opnd;
2349 register I32 c;
3280af22 2350 register char *loceol = PL_regeol;
a0ed51b3 2351 register I32 hardcount = 0;
a0d0e21e 2352
3280af22 2353 scan = PL_reginput;
c277df42 2354 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 2355 loceol = scan + max;
161b471a 2356 opnd = (char *) OPERAND(p);
a0d0e21e 2357 switch (OP(p)) {
2358 case ANY:
2359 while (scan < loceol && *scan != '\n')
2360 scan++;
2361 break;
2362 case SANY:
2363 scan = loceol;
2364 break;
a0ed51b3 2365 case ANYUTF8:
2366 loceol = PL_regeol;
2367 while (scan < loceol && *scan != '\n') {
2368 scan += UTF8SKIP(scan);
2369 hardcount++;
2370 }
2371 break;
2372 case SANYUTF8:
2373 loceol = PL_regeol;
2374 while (scan < loceol) {
2375 scan += UTF8SKIP(scan);
2376 hardcount++;
2377 }
2378 break;
bbce6d69 2379 case EXACT: /* length of string is 1 */
2380 c = UCHARAT(++opnd);
2381 while (scan < loceol && UCHARAT(scan) == c)
2382 scan++;
2383 break;
2384 case EXACTF: /* length of string is 1 */
2385 c = UCHARAT(++opnd);
2386 while (scan < loceol &&
2387 (UCHARAT(scan) == c || UCHARAT(scan) == fold[c]))
2388 scan++;
2389 break;
2390 case EXACTFL: /* length of string is 1 */
3280af22 2391 PL_reg_flags |= RF_tainted;
bbce6d69 2392 c = UCHARAT(++opnd);
2393 while (scan < loceol &&
2394 (UCHARAT(scan) == c || UCHARAT(scan) == fold_locale[c]))
a0d0e21e 2395 scan++;
2396 break;
a0ed51b3 2397 case ANYOFUTF8:
2398 loceol = PL_regeol;
2399 while (scan < loceol && REGINCLASSUTF8(p, (U8*)scan)) {
2400 scan += UTF8SKIP(scan);
2401 hardcount++;
2402 }
2403 break;
a0d0e21e 2404 case ANYOF:
ae5c130c 2405 while (scan < loceol && REGINCLASS(opnd, *scan))
a0d0e21e 2406 scan++;
a0d0e21e 2407 break;
2408 case ALNUM:
2409 while (scan < loceol && isALNUM(*scan))
2410 scan++;
2411 break;
a0ed51b3 2412 case ALNUMUTF8:
2413 loceol = PL_regeol;
dfe13c55 2414 while (scan < loceol && swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3 2415 scan += UTF8SKIP(scan);
2416 hardcount++;
2417 }
2418 break;
bbce6d69 2419 case ALNUML:
3280af22 2420 PL_reg_flags |= RF_tainted;
bbce6d69 2421 while (scan < loceol && isALNUM_LC(*scan))
2422 scan++;
2423 break;
a0ed51b3 2424 case ALNUMLUTF8:
2425 PL_reg_flags |= RF_tainted;
2426 loceol = PL_regeol;
dfe13c55 2427 while (scan < loceol && isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3 2428 scan += UTF8SKIP(scan);
2429 hardcount++;
2430 }
2431 break;
2432 break;
a0d0e21e 2433 case NALNUM:
2434 while (scan < loceol && !isALNUM(*scan))
2435 scan++;
2436 break;
a0ed51b3 2437 case NALNUMUTF8:
2438 loceol = PL_regeol;
dfe13c55 2439 while (scan < loceol && !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3 2440 scan += UTF8SKIP(scan);
2441 hardcount++;
2442 }
2443 break;
bbce6d69 2444 case NALNUML:
3280af22 2445 PL_reg_flags |= RF_tainted;
bbce6d69 2446 while (scan < loceol && !isALNUM_LC(*scan))
2447 scan++;
2448 break;
a0ed51b3 2449 case NALNUMLUTF8:
2450 PL_reg_flags |= RF_tainted;
2451 loceol = PL_regeol;
dfe13c55 2452 while (scan < loceol && !isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3 2453 scan += UTF8SKIP(scan);
2454 hardcount++;
2455 }
2456 break;
a0d0e21e 2457 case SPACE:
2458 while (scan < loceol && isSPACE(*scan))
2459 scan++;
2460 break;
a0ed51b3 2461 case SPACEUTF8:
2462 loceol = PL_regeol;
dfe13c55 2463 while (scan < loceol && (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3 2464 scan += UTF8SKIP(scan);
2465 hardcount++;
2466 }
2467 break;
bbce6d69 2468 case SPACEL:
3280af22 2469 PL_reg_flags |= RF_tainted;
bbce6d69 2470 while (scan < loceol && isSPACE_LC(*scan))
2471 scan++;
2472 break;
a0ed51b3 2473 case SPACELUTF8:
2474 PL_reg_flags |= RF_tainted;
2475 loceol = PL_regeol;
dfe13c55 2476 while (scan < loceol && (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3 2477 scan += UTF8SKIP(scan);
2478 hardcount++;
2479 }
2480 break;
a0d0e21e 2481 case NSPACE:
2482 while (scan < loceol && !isSPACE(*scan))
2483 scan++;
2484 break;
a0ed51b3 2485 case NSPACEUTF8:
2486 loceol = PL_regeol;
dfe13c55 2487 while (scan < loceol && !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3 2488 scan += UTF8SKIP(scan);
2489 hardcount++;
2490 }
2491 break;
bbce6d69 2492 case NSPACEL:
3280af22 2493 PL_reg_flags |= RF_tainted;
bbce6d69 2494 while (scan < loceol && !isSPACE_LC(*scan))
2495 scan++;
2496 break;
a0ed51b3 2497 case NSPACELUTF8:
2498 PL_reg_flags |= RF_tainted;
2499 loceol = PL_regeol;
dfe13c55 2500 while (scan < loceol && !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3 2501 scan += UTF8SKIP(scan);
2502 hardcount++;
2503 }
2504 break;
a0d0e21e 2505 case DIGIT:
2506 while (scan < loceol && isDIGIT(*scan))
2507 scan++;
2508 break;
a0ed51b3 2509 case DIGITUTF8:
2510 loceol = PL_regeol;
dfe13c55 2511 while (scan < loceol && swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3 2512 scan += UTF8SKIP(scan);
2513 hardcount++;
2514 }
2515 break;
2516 break;
a0d0e21e 2517 case NDIGIT:
2518 while (scan < loceol && !isDIGIT(*scan))
2519 scan++;
2520 break;
a0ed51b3 2521 case NDIGITUTF8:
2522 loceol = PL_regeol;
dfe13c55 2523 while (scan < loceol && !swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3 2524 scan += UTF8SKIP(scan);
2525 hardcount++;
2526 }
2527 break;
a0d0e21e 2528 default: /* Called on something of 0 width. */
2529 break; /* So match right here or not at all. */
2530 }
a687059c 2531
a0ed51b3 2532 if (hardcount)
2533 c = hardcount;
2534 else
2535 c = scan - PL_reginput;
3280af22 2536 PL_reginput = scan;
a687059c 2537
c277df42 2538 DEBUG_r(
2539 {
2540 SV *prop = sv_newmortal();
2541
2542 regprop(prop, p);
2543 PerlIO_printf(Perl_debug_log,
2544 "%*s %s can match %ld times out of %ld...\n",
2545 REPORT_CODE_OFF+1, "", SvPVX(prop),c,max);
2546 });
2547
a0d0e21e 2548 return(c);
a687059c 2549}
2550
2551/*
c277df42 2552 - regrepeat_hard - repeatedly match something, report total lenth and length
2553 *
2554 * The repeater is supposed to have constant length.
2555 */
2556
76e3520e 2557STATIC I32
c277df42 2558regrepeat_hard(regnode *p, I32 max, I32 *lp)
2559{
5c0ca799 2560 dTHR;
c277df42 2561 register char *scan;
2562 register char *start;
3280af22 2563 register char *loceol = PL_regeol;
a0ed51b3 2564 I32 l = 0;
708e3b05 2565 I32 count = 0, res = 1;
a0ed51b3 2566
2567 if (!max)
2568 return 0;
c277df42 2569
3280af22 2570 start = PL_reginput;
a0ed51b3 2571 if (UTF) {
708e3b05 2572 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3 2573 if (!count++) {
2574 l = 0;
2575 while (start < PL_reginput) {
2576 l++;
2577 start += UTF8SKIP(start);
2578 }
2579 *lp = l;
2580 if (l == 0)
2581 return max;
2582 }
2583 if (count == max)
2584 return count;
2585 }
2586 }
2587 else {
708e3b05 2588 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3 2589 if (!count++) {
2590 *lp = l = PL_reginput - start;
2591 if (max != REG_INFTY && l*max < loceol - scan)
2592 loceol = scan + l*max;
2593 if (l == 0)
2594 return max;
c277df42 2595 }
2596 }
2597 }
708e3b05 2598 if (!res)
3280af22 2599 PL_reginput = scan;
c277df42 2600
a0ed51b3 2601 return count;
c277df42 2602}
2603
2604/*
bbce6d69 2605 - regclass - determine if a character falls into a character class
2606 */
2607
76e3520e 2608STATIC bool
8ac85365 2609reginclass(register char *p, register I32 c)
bbce6d69 2610{
5c0ca799 2611 dTHR;
bbce6d69 2612 char flags = *p;
2613 bool match = FALSE;
2614
2615 c &= 0xFF;
ae5c130c 2616 if (ANYOF_TEST(p, c))
bbce6d69 2617 match = TRUE;
2618 else if (flags & ANYOF_FOLD) {
2619 I32 cf;
2620 if (flags & ANYOF_LOCALE) {
3280af22 2621 PL_reg_flags |= RF_tainted;
bbce6d69 2622 cf = fold_locale[c];
2623 }
2624 else
2625 cf = fold[c];
ae5c130c 2626 if (ANYOF_TEST(p, cf))
bbce6d69 2627 match = TRUE;
2628 }
2629
2630 if (!match && (flags & ANYOF_ISA)) {
3280af22 2631 PL_reg_flags |= RF_tainted;
bbce6d69 2632
2633 if (((flags & ANYOF_ALNUML) && isALNUM_LC(c)) ||
2634 ((flags & ANYOF_NALNUML) && !isALNUM_LC(c)) ||
2635 ((flags & ANYOF_SPACEL) && isSPACE_LC(c)) ||
2636 ((flags & ANYOF_NSPACEL) && !isSPACE_LC(c)))
2637 {
2638 match = TRUE;
2639 }
2640 }
2641
ae5c130c 2642 return (flags & ANYOF_INVERT) ? !match : match;
bbce6d69 2643}
2644
a0ed51b3 2645STATIC bool
2646reginclassutf8(regnode *f, U8 *p)
c485e607 2647{
2648 dTHR;
a0ed51b3 2649 char flags = ARG1(f);
2650 bool match = FALSE;
2651 SV *sv = (SV*)PL_regdata->data[ARG2(f)];
2652
2653 if (swash_fetch(sv, p))
2654 match = TRUE;
2655 else if (flags & ANYOF_FOLD) {
2656 I32 cf;
dfe13c55 2657 U8 tmpbuf[10];
a0ed51b3 2658 if (flags & ANYOF_LOCALE) {
2659 PL_reg_flags |= RF_tainted;
2660 uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
2661 }
2662 else
2663 uv_to_utf8(tmpbuf, toLOWER_utf8(p));
2664 if (swash_fetch(sv, tmpbuf))
2665 match = TRUE;
2666 }
2667
2668 if (!match && (flags & ANYOF_ISA)) {
2669 PL_reg_flags |= RF_tainted;
2670
2671 if (((flags & ANYOF_ALNUML) && isALNUM_LC_utf8(p)) ||
2672 ((flags & ANYOF_NALNUML) && !isALNUM_LC_utf8(p)) ||
2673 ((flags & ANYOF_SPACEL) && isSPACE_LC_utf8(p)) ||
2674 ((flags & ANYOF_NSPACEL) && !isSPACE_LC_utf8(p)))
2675 {
2676 match = TRUE;
2677 }
2678 }
2679
2680 return (flags & ANYOF_INVERT) ? !match : match;
2681}
161b471a 2682
dfe13c55 2683STATIC U8 *
2684reghop(U8 *s, I32 off)
c485e607 2685{
2686 dTHR;
a0ed51b3 2687 if (off >= 0) {
2688 while (off-- && s < (U8*)PL_regeol)
2689 s += UTF8SKIP(s);
2690 }
2691 else {
2692 while (off++) {
2693 if (s > (U8*)PL_bostr) {
2694 s--;
2695 if (*s & 0x80) {
2696 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
2697 s--;
2698 } /* XXX could check well-formedness here */
2699 }
2700 }
2701 }
2702 return s;
2703}
161b471a 2704
dfe13c55 2705STATIC U8 *
2706reghopmaybe(U8* s, I32 off)
a0ed51b3 2707{
c485e607 2708 dTHR;
a0ed51b3 2709 if (off >= 0) {
2710 while (off-- && s < (U8*)PL_regeol)
2711 s += UTF8SKIP(s);
2712 if (off >= 0)
2713 return 0;
2714 }
2715 else {
2716 while (off++) {
2717 if (s > (U8*)PL_bostr) {
2718 s--;
2719 if (*s & 0x80) {
2720 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
2721 s--;
2722 } /* XXX could check well-formedness here */
2723 }
2724 else
2725 break;
2726 }
2727 if (off <= 0)
2728 return 0;
2729 }
2730 return s;
2731}