up patchlevel to 5.005_50
[p5sagit/p5-mst-13.2.git] / regexec.c
CommitLineData
a0d0e21e 1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c 8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73 17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e 22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
28# ifndef DEBUGGING
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603 35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
d06ea78c 38/* *These* symbols are masked to allow static link. */
39# define Perl_pregexec my_pregexec
56953603 40#endif
41
f0fcb552 42/*SUPPRESS 112*/
a687059c 43/*
e50aee73 44 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c 45 *
46 * Copyright (c) 1986 by University of Toronto.
47 * Written by Henry Spencer. Not derived from licensed software.
48 *
49 * Permission is granted to anyone to use this software for any
50 * purpose on any computer system, and to redistribute it freely,
51 * subject to the following restrictions:
52 *
53 * 1. The author is not responsible for the consequences of use of
54 * this software, no matter how awful, even if they arise
55 * from defects in it.
56 *
57 * 2. The origin of this software must not be misrepresented, either
58 * by explicit claim or by omission.
59 *
60 * 3. Altered versions must be plainly marked as such, and must not
61 * be misrepresented as being the original software.
62 *
63 **** Alterations to Henry's code are...
64 ****
a0ed51b3 65 **** Copyright (c) 1991-1998, Larry Wall
a687059c 66 ****
9ef589d8 67 **** You may distribute under the terms of either the GNU General Public
68 **** License or the Artistic License, as specified in the README file.
a687059c 69 *
70 * Beware that some of this code is subtly aware of the way operator
71 * precedence is structured in regular expressions. Serious changes in
72 * regular-expression syntax might require a total rethink.
73 */
74#include "EXTERN.h"
75#include "perl.h"
76#include "regcomp.h"
77
c277df42 78#define RF_tainted 1 /* tainted information used? */
79#define RF_warned 2 /* warned about big count? */
ce862d02 80#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3 81#define RF_utf8 8 /* String contains multibyte chars? */
82
83#define UTF (PL_reg_flags & RF_utf8)
ce862d02 84
85#define RS_init 1 /* eval environment created */
86#define RS_set 2 /* replsv value is set */
c277df42 87
a687059c 88#ifndef STATIC
89#define STATIC static
90#endif
91
76e3520e 92#ifndef PERL_OBJECT
a0d0e21e 93typedef I32 CHECKPOINT;
94
c277df42 95/*
96 * Forwards.
97 */
98
99static I32 regmatch _((regnode *prog));
100static I32 regrepeat _((regnode *p, I32 max));
101static I32 regrepeat_hard _((regnode *p, I32 max, I32 *lp));
102static I32 regtry _((regexp *prog, char *startpos));
ae5c130c 103
c277df42 104static bool reginclass _((char *p, I32 c));
a0ed51b3 105static bool reginclassutf8 _((regnode *f, U8* p));
55497cff 106static CHECKPOINT regcppush _((I32 parenfloor));
107static char * regcppop _((void));
76e3520e 108#endif
a0ed51b3 109
ae5c130c 110#define REGINCLASS(p,c) (*(p) ? reginclass(p,c) : ANYOF_TEST(p,c))
a0ed51b3 111#define REGINCLASSUTF8(f,p) (ARG1(f) ? reginclassutf8(f,p) : swash_fetch((SV*)PL_regdata->data[ARG2(f)],p))
112
113#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
114#define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
115
116static char * reghop _((unsigned char *pos, I32 off));
117static char * reghopmaybe _((unsigned char *pos, I32 off));
118#define HOP(pos,off) (UTF ? reghop(pos, off) : (pos + off))
119#define HOPMAYBE(pos,off) (UTF ? reghopmaybe(pos, off) : (pos + off))
a0d0e21e 120
76e3520e 121STATIC CHECKPOINT
8ac85365 122regcppush(I32 parenfloor)
a0d0e21e 123{
11343788 124 dTHR;
3280af22 125 int retval = PL_savestack_ix;
126 int i = (PL_regsize - parenfloor) * 4;
a0d0e21e 127 int p;
128
129 SSCHECK(i + 5);
3280af22 130 for (p = PL_regsize; p > parenfloor; p--) {
131 SSPUSHPTR(PL_regendp[p]);
132 SSPUSHPTR(PL_regstartp[p]);
133 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e 134 SSPUSHINT(p);
135 }
3280af22 136 SSPUSHINT(PL_regsize);
137 SSPUSHINT(*PL_reglastparen);
138 SSPUSHPTR(PL_reginput);
a0d0e21e 139 SSPUSHINT(i + 3);
140 SSPUSHINT(SAVEt_REGCONTEXT);
141 return retval;
142}
143
c277df42 144/* These are needed since we do not localize EVAL nodes: */
c3464db5 145# define REGCP_SET DEBUG_r(PerlIO_printf(Perl_debug_log, \
146 " Setting an EVAL scope, savestack=%i\n", \
3280af22 147 PL_savestack_ix)); lastcp = PL_savestack_ix
c3464db5 148
3280af22 149# define REGCP_UNWIND DEBUG_r(lastcp != PL_savestack_ix ? \
c3464db5 150 PerlIO_printf(Perl_debug_log, \
151 " Clearing an EVAL scope, savestack=%i..%i\n", \
3280af22 152 lastcp, PL_savestack_ix) : 0); regcpblow(lastcp)
c277df42 153
76e3520e 154STATIC char *
8ac85365 155regcppop(void)
a0d0e21e 156{
11343788 157 dTHR;
a0d0e21e 158 I32 i = SSPOPINT;
159 U32 paren = 0;
160 char *input;
161 char *tmps;
162 assert(i == SAVEt_REGCONTEXT);
163 i = SSPOPINT;
164 input = (char *) SSPOPPTR;
3280af22 165 *PL_reglastparen = SSPOPINT;
166 PL_regsize = SSPOPINT;
c277df42 167 for (i -= 3; i > 0; i -= 4) {
a0d0e21e 168 paren = (U32)SSPOPINT;
3280af22 169 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
170 PL_regstartp[paren] = (char *) SSPOPPTR;
a0d0e21e 171 tmps = (char*)SSPOPPTR;
3280af22 172 if (paren <= *PL_reglastparen)
173 PL_regendp[paren] = tmps;
c277df42 174 DEBUG_r(
c3464db5 175 PerlIO_printf(Perl_debug_log,
176 " restoring \\%d to %d(%d)..%d%s\n",
3280af22 177 paren, PL_regstartp[paren] - PL_regbol,
178 PL_reg_start_tmp[paren] - PL_regbol,
179 PL_regendp[paren] - PL_regbol,
180 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 181 );
a0d0e21e 182 }
c277df42 183 DEBUG_r(
3280af22 184 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 185 PerlIO_printf(Perl_debug_log,
186 " restoring \\%d..\\%d to undef\n",
3280af22 187 *PL_reglastparen + 1, PL_regnpar);
c277df42 188 }
189 );
3280af22 190 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
191 if (paren > PL_regsize)
192 PL_regstartp[paren] = Nullch;
193 PL_regendp[paren] = Nullch;
a0d0e21e 194 }
195 return input;
196}
197
c277df42 198#define regcpblow(cp) LEAVE_SCOPE(cp)
a0d0e21e 199
a687059c 200/*
e50aee73 201 * pregexec and friends
a687059c 202 */
203
204/*
c277df42 205 - pregexec - match a regexp against a string
a687059c 206 */
c277df42 207I32
c3464db5 208pregexec(register regexp *prog, char *stringarg, register char *strend,
209 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42 210/* strend: pointer to null at end of string */
211/* strbeg: real beginning of string */
212/* minend: end of match must be >=minend after stringarg. */
213/* nosave: For optimizations. */
214{
215 return
216 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
217 nosave ? 0 : REXEC_COPY_STR);
218}
219
a687059c 220/*
c277df42 221 - regexec_flags - match a regexp against a string
a687059c 222 */
79072805 223I32
c3464db5 224regexec_flags(register regexp *prog, char *stringarg, register char *strend,
225 char *strbeg, I32 minend, SV *screamer, void *data, U32 flags)
c277df42 226/* strend: pointer to null at end of string */
227/* strbeg: real beginning of string */
228/* minend: end of match must be >=minend after stringarg. */
229/* data: May be used for some additional optimizations. */
230/* nosave: For optimizations. */
a687059c 231{
5c0ca799 232 dTHR;
a0d0e21e 233 register char *s;
c277df42 234 register regnode *c;
a0d0e21e 235 register char *startpos = stringarg;
236 register I32 tmp;
c277df42 237 I32 minlen; /* must match at least this many chars */
a0d0e21e 238 I32 dontbother = 0; /* how many characters not to try at end */
239 CURCUR cc;
c277df42 240 I32 start_shift = 0; /* Offset of the start to find
a0ed51b3 241 constant substr. */ /* CC */
242 I32 end_shift = 0; /* Same for the end. */ /* CC */
c277df42 243 I32 scream_pos = -1; /* Internal iterator of scream. */
244 char *scream_olds;
3280af22 245 SV* oreplsv = GvSV(PL_replgv);
a687059c 246
a0d0e21e 247 cc.cur = 0;
4633a7c4 248 cc.oldcc = 0;
3280af22 249 PL_regcc = &cc;
a0d0e21e 250
3280af22 251 PL_regprecomp = prog->precomp; /* Needed for error messages. */
a0d0e21e 252#ifdef DEBUGGING
3280af22 253 PL_regnarrate = PL_debug & 512;
254 PL_regprogram = prog->program;
a0d0e21e 255#endif
256
257 /* Be paranoid... */
258 if (prog == NULL || startpos == NULL) {
259 croak("NULL regexp parameter");
260 return 0;
261 }
262
c277df42 263 minlen = prog->minlen;
264 if (strend - startpos < minlen) goto phooey;
265
a0d0e21e 266 if (startpos == strbeg) /* is ^ valid at stringarg? */
3280af22 267 PL_regprev = '\n';
a0d0e21e 268 else {
a0ed51b3 269 PL_regprev = (U32)stringarg[-1];
3280af22 270 if (!PL_multiline && PL_regprev == '\n')
271 PL_regprev = '\0'; /* force ^ to NOT match */
a0d0e21e 272 }
bbce6d69 273
a0d0e21e 274 /* Check validity of program. */
275 if (UCHARAT(prog->program) != MAGIC) {
276 FAIL("corrupted regexp program");
277 }
278
3280af22 279 PL_regnpar = prog->nparens;
280 PL_reg_flags = 0;
281 PL_reg_eval_set = 0;
a0d0e21e 282
a0ed51b3 283 if (prog->reganch & ROPT_UTF8)
284 PL_reg_flags |= RF_utf8;
285
286 /* Mark beginning of line for ^ and lookbehind. */
287 PL_regbol = startpos;
288 PL_bostr = strbeg;
289
290 /* Mark end of line for $ (and such) */
291 PL_regeol = strend;
292
293 /* see how far we have to get to not match where we matched before */
294 PL_regtill = startpos+minend;
295
a0d0e21e 296 /* If there is a "must appear" string, look for it. */
297 s = startpos;
c277df42 298 if (!(flags & REXEC_CHECKED)
299 && prog->check_substr != Nullsv &&
774d564b 300 !(prog->reganch & ROPT_ANCH_GPOS) &&
c277df42 301 (!(prog->reganch & (ROPT_ANCH_BOL | ROPT_ANCH_MBOL))
3280af22 302 || (PL_multiline && prog->check_substr == prog->anchored_substr)) )
a0d0e21e 303 {
a0ed51b3 304 char *t;
305 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
c277df42 306 /* Should be nonnegative! */
a0ed51b3 307 end_shift = minlen - start_shift - CHR_SVLEN(prog->check_substr);
c277df42 308 if (screamer) {
3280af22 309 if (PL_screamfirst[BmRARE(prog->check_substr)] >= 0)
c277df42 310 s = screaminstr(screamer, prog->check_substr,
311 start_shift + (stringarg - strbeg),
312 end_shift, &scream_pos, 0);
a0d0e21e 313 else
314 s = Nullch;
c277df42 315 scream_olds = s;
0a12ae7d 316 }
a0d0e21e 317 else
c277df42 318 s = fbm_instr((unsigned char*)s + start_shift,
319 (unsigned char*)strend - end_shift,
411d5715 320 prog->check_substr, 0);
a0d0e21e 321 if (!s) {
c277df42 322 ++BmUSEFUL(prog->check_substr); /* hooray */
a0d0e21e 323 goto phooey; /* not present */
a0ed51b3 324 }
325 else if (s - stringarg > prog->check_offset_max &&
326 (UTF
327 ? ((t = reghopmaybe(s, -(prog->check_offset_max))) && t >= stringarg)
328 : (t = s - prog->check_offset_max) != 0
329 )
330 )
331 {
c277df42 332 ++BmUSEFUL(prog->check_substr); /* hooray/2 */
a0ed51b3 333 s = t;
334 }
335 else if (!(prog->reganch & ROPT_NAUGHTY)
c277df42 336 && --BmUSEFUL(prog->check_substr) < 0
337 && prog->check_substr == prog->float_substr) { /* boo */
338 SvREFCNT_dec(prog->check_substr);
339 prog->check_substr = Nullsv; /* disable */
340 prog->float_substr = Nullsv; /* clear */
a0d0e21e 341 s = startpos;
a0ed51b3 342 }
343 else
344 s = startpos;
a0d0e21e 345 }
a687059c 346
c277df42 347 DEBUG_r(
348 PerlIO_printf(Perl_debug_log,
349 "Matching `%.60s%s' against `%.*s%s'\n",
350 prog->precomp,
351 (strlen(prog->precomp) > 60 ? "..." : ""),
352 (strend - startpos > 60 ? 60 : strend - startpos),
353 startpos,
354 (strend - startpos > 60 ? "..." : ""))
355 );
356
a0ed51b3 357 PL_regdata = prog->data;
358
a0d0e21e 359 /* Simplest case: anchored match need be tried only once. */
774d564b 360 /* [unless only anchor is BOL and multiline is set] */
a0d0e21e 361 if (prog->reganch & ROPT_ANCH) {
362 if (regtry(prog, startpos))
363 goto got_it;
774d564b 364 else if (!(prog->reganch & ROPT_ANCH_GPOS) &&
3280af22 365 (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
c277df42 366 || (prog->reganch & ROPT_ANCH_MBOL)))
774d564b 367 {
a0d0e21e 368 if (minlen)
369 dontbother = minlen - 1;
a0ed51b3 370 strend = HOP(strend, -dontbother);
a0d0e21e 371 /* for multiline we only have to try after newlines */
372 if (s > startpos)
373 s--;
374 while (s < strend) {
a0ed51b3 375 if (*s++ == '\n') { /* don't need utf8skip here */
a0d0e21e 376 if (s < strend && regtry(prog, s))
377 goto got_it;
378 }
35c8bce7 379 }
35c8bce7 380 }
a0d0e21e 381 goto phooey;
382 }
35c8bce7 383
a0d0e21e 384 /* Messy cases: unanchored match. */
c277df42 385 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
386 /* we have /x+whatever/ */
387 /* it must be a one character string */
388 char ch = SvPVX(prog->anchored_substr)[0];
a0ed51b3 389 if (UTF) {
390 while (s < strend) {
391 if (*s == ch) {
392 if (regtry(prog, s)) goto got_it;
393 s += UTF8SKIP(s);
394 while (s < strend && *s == ch)
395 s += UTF8SKIP(s);
396 }
397 s += UTF8SKIP(s);
398 }
399 }
400 else {
401 while (s < strend) {
402 if (*s == ch) {
403 if (regtry(prog, s)) goto got_it;
c277df42 404 s++;
a0ed51b3 405 while (s < strend && *s == ch)
406 s++;
407 }
408 s++;
a0d0e21e 409 }
a687059c 410 }
c277df42 411 }
412 /*SUPPRESS 560*/
413 else if (prog->anchored_substr != Nullsv
414 || (prog->float_substr != Nullsv
415 && prog->float_max_offset < strend - s)) {
416 SV *must = prog->anchored_substr
417 ? prog->anchored_substr : prog->float_substr;
418 I32 back_max =
419 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
420 I32 back_min =
421 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
422 I32 delta = back_max - back_min;
a0ed51b3 423 char *last = HOP(strend, -(CHR_SVLEN(must) + back_min)); /* Cannot start after this */
424 char *last1; /* Last position checked before */
425
426 if (s > PL_bostr)
427 last1 = HOP(s, -1);
428 else
429 last1 = s - 1; /* bogus */
c277df42 430
431 /* XXXX check_substr already used to find `s', can optimize if
432 check_substr==must. */
433 scream_pos = -1;
434 dontbother = end_shift;
a0ed51b3 435 strend = HOP(strend, -dontbother);
c277df42 436 while ( (s <= last) &&
437 (screamer
a0ed51b3 438 ? (s = screaminstr(screamer, must, HOP(s, back_min) - strbeg,
c277df42 439 end_shift, &scream_pos, 0))
a0ed51b3 440 : (s = fbm_instr((unsigned char*)HOP(s, back_min),
411d5715 441 (unsigned char*)strend, must, 0))) ) {
a0ed51b3 442 if (HOP(s, -back_max) > last1) {
443 last1 = HOP(s, -back_min);
444 s = HOP(s, -back_max);
445 }
446 else {
447 char *t = (last1 >= PL_bostr) ? HOP(last1, 1) : last + 1;
c277df42 448
a0ed51b3 449 last1 = HOP(s, -back_min);
c277df42 450 s = t;
a0d0e21e 451 }
a0ed51b3 452 if (UTF) {
453 while (s <= last1) {
454 if (regtry(prog, s))
455 goto got_it;
456 s += UTF8SKIP(s);
457 }
458 }
459 else {
460 while (s <= last1) {
461 if (regtry(prog, s))
462 goto got_it;
463 s++;
464 }
a0d0e21e 465 }
466 }
467 goto phooey;
a0ed51b3 468 }
469 else if (c = prog->regstclass) {
a0d0e21e 470 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
a0ed51b3 471 char *cc;
a687059c 472
a0d0e21e 473 if (minlen)
474 dontbother = minlen - 1;
a0ed51b3 475 strend = HOP(strend, -dontbother); /* don't bother with what can't match */
a0d0e21e 476 tmp = 1;
477 /* We know what class it must start with. */
478 switch (OP(c)) {
a0ed51b3 479 case ANYOFUTF8:
480 cc = (char *) OPERAND(c);
481 while (s < strend) {
482 if (REGINCLASSUTF8(c, (U8*)s)) {
483 if (tmp && regtry(prog, s))
484 goto got_it;
485 else
486 tmp = doevery;
487 }
488 else
489 tmp = 1;
490 s += UTF8SKIP(s);
491 }
492 break;
a0d0e21e 493 case ANYOF:
a0ed51b3 494 cc = (char *) OPERAND(c);
a0d0e21e 495 while (s < strend) {
a0ed51b3 496 if (REGINCLASS(cc, *s)) {
a0d0e21e 497 if (tmp && regtry(prog, s))
498 goto got_it;
499 else
500 tmp = doevery;
a687059c 501 }
a0d0e21e 502 else
503 tmp = 1;
504 s++;
505 }
506 break;
bbce6d69 507 case BOUNDL:
3280af22 508 PL_reg_flags |= RF_tainted;
bbce6d69 509 /* FALL THROUGH */
a0d0e21e 510 case BOUND:
a0ed51b3 511 if (minlen) {
512 dontbother++;
513 strend -= 1;
514 }
3280af22 515 tmp = (s != startpos) ? UCHARAT(s - 1) : PL_regprev;
95bac841 516 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 517 while (s < strend) {
95bac841 518 if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
a0d0e21e 519 tmp = !tmp;
520 if (regtry(prog, s))
521 goto got_it;
a687059c 522 }
a0d0e21e 523 s++;
524 }
525 if ((minlen || tmp) && regtry(prog,s))
526 goto got_it;
527 break;
a0ed51b3 528 case BOUNDLUTF8:
529 PL_reg_flags |= RF_tainted;
530 /* FALL THROUGH */
531 case BOUNDUTF8:
532 if (minlen) {
533 dontbother++;
534 strend = reghop(strend, -1);
535 }
536 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop(s, -1), 0) : PL_regprev;
537 tmp = ((OP(c) == BOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
538 while (s < strend) {
539 if (tmp == !(OP(c) == BOUND ? swash_fetch(PL_utf8_alnum, s) : isALNUM_LC_utf8(s))) {
540 tmp = !tmp;
541 if (regtry(prog, s))
542 goto got_it;
543 }
544 s += UTF8SKIP(s);
545 }
546 if ((minlen || tmp) && regtry(prog,s))
547 goto got_it;
548 break;
bbce6d69 549 case NBOUNDL:
3280af22 550 PL_reg_flags |= RF_tainted;
bbce6d69 551 /* FALL THROUGH */
a0d0e21e 552 case NBOUND:
a0ed51b3 553 if (minlen) {
554 dontbother++;
555 strend -= 1;
556 }
3280af22 557 tmp = (s != startpos) ? UCHARAT(s - 1) : PL_regprev;
95bac841 558 tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 559 while (s < strend) {
95bac841 560 if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
a0d0e21e 561 tmp = !tmp;
562 else if (regtry(prog, s))
563 goto got_it;
564 s++;
565 }
566 if ((minlen || !tmp) && regtry(prog,s))
567 goto got_it;
568 break;
a0ed51b3 569 case NBOUNDLUTF8:
570 PL_reg_flags |= RF_tainted;
571 /* FALL THROUGH */
572 case NBOUNDUTF8:
573 if (minlen) {
574 dontbother++;
575 strend = reghop(strend, -1);
576 }
577 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop(s, -1), 0) : PL_regprev;
578 tmp = ((OP(c) == NBOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
579 while (s < strend) {
580 if (tmp == !(OP(c) == NBOUND ? swash_fetch(PL_utf8_alnum, s) : isALNUM_LC_utf8(s)))
581 tmp = !tmp;
582 else if (regtry(prog, s))
583 goto got_it;
584 s += UTF8SKIP(s);
585 }
586 if ((minlen || !tmp) && regtry(prog,s))
587 goto got_it;
588 break;
a0d0e21e 589 case ALNUM:
590 while (s < strend) {
bbce6d69 591 if (isALNUM(*s)) {
592 if (tmp && regtry(prog, s))
593 goto got_it;
594 else
595 tmp = doevery;
596 }
597 else
598 tmp = 1;
599 s++;
600 }
601 break;
a0ed51b3 602 case ALNUMUTF8:
603 while (s < strend) {
604 if (swash_fetch(PL_utf8_alnum, s)) {
605 if (tmp && regtry(prog, s))
606 goto got_it;
607 else
608 tmp = doevery;
609 }
610 else
611 tmp = 1;
612 s += UTF8SKIP(s);
613 }
614 break;
bbce6d69 615 case ALNUML:
3280af22 616 PL_reg_flags |= RF_tainted;
bbce6d69 617 while (s < strend) {
618 if (isALNUM_LC(*s)) {
a0d0e21e 619 if (tmp && regtry(prog, s))
620 goto got_it;
a687059c 621 else
a0d0e21e 622 tmp = doevery;
623 }
624 else
625 tmp = 1;
626 s++;
627 }
628 break;
a0ed51b3 629 case ALNUMLUTF8:
630 PL_reg_flags |= RF_tainted;
631 while (s < strend) {
632 if (isALNUM_LC_utf8(s)) {
633 if (tmp && regtry(prog, s))
634 goto got_it;
635 else
636 tmp = doevery;
637 }
638 else
639 tmp = 1;
640 s += UTF8SKIP(s);
641 }
642 break;
a0d0e21e 643 case NALNUM:
644 while (s < strend) {
bbce6d69 645 if (!isALNUM(*s)) {
646 if (tmp && regtry(prog, s))
647 goto got_it;
648 else
649 tmp = doevery;
650 }
651 else
652 tmp = 1;
653 s++;
654 }
655 break;
a0ed51b3 656 case NALNUMUTF8:
657 while (s < strend) {
658 if (!swash_fetch(PL_utf8_alnum, s)) {
659 if (tmp && regtry(prog, s))
660 goto got_it;
661 else
662 tmp = doevery;
663 }
664 else
665 tmp = 1;
666 s += UTF8SKIP(s);
667 }
668 break;
bbce6d69 669 case NALNUML:
3280af22 670 PL_reg_flags |= RF_tainted;
bbce6d69 671 while (s < strend) {
672 if (!isALNUM_LC(*s)) {
a0d0e21e 673 if (tmp && regtry(prog, s))
674 goto got_it;
a687059c 675 else
a0d0e21e 676 tmp = doevery;
a687059c 677 }
a0d0e21e 678 else
679 tmp = 1;
680 s++;
681 }
682 break;
a0ed51b3 683 case NALNUMLUTF8:
684 PL_reg_flags |= RF_tainted;
685 while (s < strend) {
686 if (!isALNUM_LC_utf8(s)) {
687 if (tmp && regtry(prog, s))
688 goto got_it;
689 else
690 tmp = doevery;
691 }
692 else
693 tmp = 1;
694 s += UTF8SKIP(s);
695 }
696 break;
a0d0e21e 697 case SPACE:
698 while (s < strend) {
699 if (isSPACE(*s)) {
700 if (tmp && regtry(prog, s))
701 goto got_it;
702 else
703 tmp = doevery;
2304df62 704 }
a0d0e21e 705 else
706 tmp = 1;
707 s++;
708 }
709 break;
a0ed51b3 710 case SPACEUTF8:
711 while (s < strend) {
712 if (*s == ' ' || swash_fetch(PL_utf8_space,s)) {
713 if (tmp && regtry(prog, s))
714 goto got_it;
715 else
716 tmp = doevery;
717 }
718 else
719 tmp = 1;
720 s += UTF8SKIP(s);
721 }
722 break;
bbce6d69 723 case SPACEL:
3280af22 724 PL_reg_flags |= RF_tainted;
bbce6d69 725 while (s < strend) {
726 if (isSPACE_LC(*s)) {
727 if (tmp && regtry(prog, s))
728 goto got_it;
729 else
730 tmp = doevery;
731 }
732 else
733 tmp = 1;
734 s++;
735 }
736 break;
a0ed51b3 737 case SPACELUTF8:
738 PL_reg_flags |= RF_tainted;
739 while (s < strend) {
740 if (*s == ' ' || isSPACE_LC_utf8(s)) {
741 if (tmp && regtry(prog, s))
742 goto got_it;
743 else
744 tmp = doevery;
745 }
746 else
747 tmp = 1;
748 s += UTF8SKIP(s);
749 }
750 break;
a0d0e21e 751 case NSPACE:
752 while (s < strend) {
753 if (!isSPACE(*s)) {
754 if (tmp && regtry(prog, s))
755 goto got_it;
756 else
757 tmp = doevery;
a687059c 758 }
a0d0e21e 759 else
760 tmp = 1;
761 s++;
762 }
763 break;
a0ed51b3 764 case NSPACEUTF8:
765 while (s < strend) {
766 if (!(*s == ' ' || swash_fetch(PL_utf8_space,s))) {
767 if (tmp && regtry(prog, s))
768 goto got_it;
769 else
770 tmp = doevery;
771 }
772 else
773 tmp = 1;
774 s += UTF8SKIP(s);
775 }
776 break;
bbce6d69 777 case NSPACEL:
3280af22 778 PL_reg_flags |= RF_tainted;
bbce6d69 779 while (s < strend) {
780 if (!isSPACE_LC(*s)) {
781 if (tmp && regtry(prog, s))
782 goto got_it;
783 else
784 tmp = doevery;
785 }
786 else
787 tmp = 1;
788 s++;
789 }
790 break;
a0ed51b3 791 case NSPACELUTF8:
792 PL_reg_flags |= RF_tainted;
793 while (s < strend) {
794 if (!(*s == ' ' || isSPACE_LC_utf8(s))) {
795 if (tmp && regtry(prog, s))
796 goto got_it;
797 else
798 tmp = doevery;
799 }
800 else
801 tmp = 1;
802 s += UTF8SKIP(s);
803 }
804 break;
a0d0e21e 805 case DIGIT:
806 while (s < strend) {
807 if (isDIGIT(*s)) {
808 if (tmp && regtry(prog, s))
809 goto got_it;
810 else
811 tmp = doevery;
2b69d0c2 812 }
a0d0e21e 813 else
814 tmp = 1;
815 s++;
816 }
817 break;
a0ed51b3 818 case DIGITUTF8:
819 while (s < strend) {
820 if (swash_fetch(PL_utf8_digit,s)) {
821 if (tmp && regtry(prog, s))
822 goto got_it;
823 else
824 tmp = doevery;
825 }
826 else
827 tmp = 1;
828 s += UTF8SKIP(s);
829 }
830 break;
a0d0e21e 831 case NDIGIT:
832 while (s < strend) {
833 if (!isDIGIT(*s)) {
834 if (tmp && regtry(prog, s))
835 goto got_it;
836 else
837 tmp = doevery;
a687059c 838 }
a0d0e21e 839 else
840 tmp = 1;
841 s++;
842 }
843 break;
a0ed51b3 844 case NDIGITUTF8:
845 while (s < strend) {
846 if (!swash_fetch(PL_utf8_digit,s)) {
847 if (tmp && regtry(prog, s))
848 goto got_it;
849 else
850 tmp = doevery;
851 }
852 else
853 tmp = 1;
854 s += UTF8SKIP(s);
855 }
856 break;
a687059c 857 }
a0d0e21e 858 }
859 else {
c277df42 860 dontbother = 0;
861 if (prog->float_substr != Nullsv) { /* Trim the end. */
862 char *last;
863 I32 oldpos = scream_pos;
864
865 if (screamer) {
866 last = screaminstr(screamer, prog->float_substr, s - strbeg,
867 end_shift, &scream_pos, 1); /* last one */
868 if (!last) {
869 last = scream_olds; /* Only one occurence. */
870 }
a0ed51b3 871 }
872 else {
c277df42 873 STRLEN len;
874 char *little = SvPV(prog->float_substr, len);
875 last = rninstr(s, strend, little, little + len);
876 }
877 if (last == NULL) goto phooey; /* Should not happen! */
878 dontbother = strend - last - 1;
879 }
880 if (minlen && (dontbother < minlen))
a0d0e21e 881 dontbother = minlen - 1;
a0ed51b3 882 strend -= dontbother; /* this one's always in bytes! */
a0d0e21e 883 /* We don't know much -- general case. */
a0ed51b3 884 if (UTF) {
885 for (;;) {
886 if (regtry(prog, s)) {
887 strend += dontbother; /* this one's always in bytes! */
888 dontbother = 0;
889 goto got_it;
890 }
891 if (s >= strend)
892 break;
893 s += UTF8SKIP(s);
894 };
895 }
896 else {
897 do {
898 if (regtry(prog, s))
899 goto got_it;
900 } while (s++ < strend);
901 }
a0d0e21e 902 }
903
904 /* Failure. */
905 goto phooey;
a687059c 906
a0d0e21e 907got_it:
a0ed51b3 908 strend = HOP(strend, dontbother); /* uncheat */
a0d0e21e 909 prog->subbeg = strbeg;
910 prog->subend = strend;
3280af22 911 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
5f05dabc 912
913 /* make sure $`, $&, $', and $digit will work later */
c277df42 914 if (strbeg != prog->subbase) { /* second+ //g match. */
915 if (!(flags & REXEC_COPY_STR)) {
137443ea 916 if (prog->subbase) {
917 Safefree(prog->subbase);
918 prog->subbase = Nullch;
919 }
920 }
921 else {
922 I32 i = strend - startpos + (stringarg - strbeg);
923 s = savepvn(strbeg, i);
924 Safefree(prog->subbase);
925 prog->subbase = s;
926 prog->subbeg = prog->subbase;
927 prog->subend = prog->subbase + i;
928 s = prog->subbase + (stringarg - strbeg);
929 for (i = 0; i <= prog->nparens; i++) {
930 if (prog->endp[i]) {
931 prog->startp[i] = s + (prog->startp[i] - startpos);
932 prog->endp[i] = s + (prog->endp[i] - startpos);
933 }
a0d0e21e 934 }
935 }
a0d0e21e 936 }
ce862d02 937 /* Preserve the current value of $^R */
3280af22 938 if (oreplsv != GvSV(PL_replgv)) {
939 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
ce862d02 940 restored, the value remains
941 the same. */
942 }
a0d0e21e 943 return 1;
944
945phooey:
a0d0e21e 946 return 0;
a687059c 947}
948
949/*
950 - regtry - try match at specific point
951 */
76e3520e 952STATIC I32 /* 0 failure, 1 success */
8ac85365 953regtry(regexp *prog, char *startpos)
a687059c 954{
c277df42 955 dTHR;
a0d0e21e 956 register I32 i;
957 register char **sp;
958 register char **ep;
c277df42 959 CHECKPOINT lastcp;
a0d0e21e 960
3280af22 961 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
962 PL_reg_eval_set = RS_init;
ce862d02 963 DEBUG_r(DEBUG_s(
c3464db5 964 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %i\n",
3280af22 965 PL_stack_sp - PL_stack_base);
ce862d02 966 ));
967 SAVEINT(cxstack[cxstack_ix].blk_oldsp);
3280af22 968 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
ce862d02 969 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
970 SAVETMPS;
971 /* Apparently this is not needed, judging by wantarray. */
972 /* SAVEINT(cxstack[cxstack_ix].blk_gimme);
973 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
974 }
3280af22 975 PL_reginput = startpos;
976 PL_regstartp = prog->startp;
977 PL_regendp = prog->endp;
978 PL_reglastparen = &prog->lastparen;
a0d0e21e 979 prog->lastparen = 0;
3280af22 980 PL_regsize = 0;
981 if (PL_reg_start_tmpl <= prog->nparens) {
982 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
983 if(PL_reg_start_tmp)
984 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
c277df42 985 else
3280af22 986 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
c277df42 987 }
a0d0e21e 988
989 sp = prog->startp;
990 ep = prog->endp;
991 if (prog->nparens) {
992 for (i = prog->nparens; i >= 0; i--) {
993 *sp++ = NULL;
994 *ep++ = NULL;
a687059c 995 }
a0d0e21e 996 }
c277df42 997 REGCP_SET;
7e5428c5 998 if (regmatch(prog->program + 1)) {
a0d0e21e 999 prog->startp[0] = startpos;
3280af22 1000 prog->endp[0] = PL_reginput;
a0d0e21e 1001 return 1;
1002 }
c277df42 1003 REGCP_UNWIND;
1004 return 0;
a687059c 1005}
1006
1007/*
1008 - regmatch - main matching routine
1009 *
1010 * Conceptually the strategy is simple: check to see whether the current
1011 * node matches, call self recursively to see whether the rest matches,
1012 * and then act accordingly. In practice we make some effort to avoid
1013 * recursion, in particular by going through "ordinary" nodes (that don't
1014 * need to know whether the rest of the match failed) by a loop instead of
1015 * by recursion.
1016 */
1017/* [lwall] I've hoisted the register declarations to the outer block in order to
1018 * maybe save a little bit of pushing and popping on the stack. It also takes
1019 * advantage of machines that use a register save mask on subroutine entry.
1020 */
76e3520e 1021STATIC I32 /* 0 failure, 1 success */
c277df42 1022regmatch(regnode *prog)
a687059c 1023{
c277df42 1024 dTHR;
1025 register regnode *scan; /* Current node. */
1026 regnode *next; /* Next node. */
1027 regnode *inner; /* Next node in internal branch. */
c3464db5 1028 register I32 nextchr; /* renamed nextchr - nextchar colides with
1029 function of same name */
a0d0e21e 1030 register I32 n; /* no or next */
1031 register I32 ln; /* len or last */
1032 register char *s; /* operand or save */
3280af22 1033 register char *locinput = PL_reginput;
c277df42 1034 register I32 c1, c2, paren; /* case fold search, parenth */
1035 int minmod = 0, sw = 0, logical = 0;
4633a7c4 1036#ifdef DEBUGGING
3280af22 1037 PL_regindent++;
4633a7c4 1038#endif
a0d0e21e 1039
a0ed51b3 1040 /* Note that nextchr is a byte even in UTF */
76e3520e 1041 nextchr = UCHARAT(locinput);
a0d0e21e 1042 scan = prog;
1043 while (scan != NULL) {
c277df42 1044#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
a687059c 1045#ifdef DEBUGGING
c277df42 1046# define sayYES goto yes
1047# define sayNO goto no
1048# define saySAME(x) if (x) goto yes; else goto no
1049# define REPORT_CODE_OFF 24
4633a7c4 1050#else
c277df42 1051# define sayYES return 1
1052# define sayNO return 0
1053# define saySAME(x) return x
a687059c 1054#endif
c277df42 1055 DEBUG_r( {
1056 SV *prop = sv_newmortal();
3280af22 1057 int docolor = *PL_colors[0];
c277df42 1058 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
3280af22 1059 int l = (PL_regeol - locinput > taill ? taill : PL_regeol - locinput);
1060 int pref_len = (locinput - PL_bostr > (5 + taill) - l
1061 ? (5 + taill) - l : locinput - PL_bostr);
c277df42 1062
3280af22 1063 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
1064 l = ( PL_regeol - locinput > (5 + taill) - pref_len
1065 ? (5 + taill) - pref_len : PL_regeol - locinput);
c277df42 1066 regprop(prop, scan);
1067 PerlIO_printf(Perl_debug_log,
54dc92de 1068 "%4i <%s%.*s%s%s%s%.*s%s>%*s|%3d:%*s%s\n",
3280af22 1069 locinput - PL_bostr,
1070 PL_colors[2], pref_len, locinput - pref_len, PL_colors[3],
c277df42 1071 (docolor ? "" : "> <"),
3280af22 1072 PL_colors[0], l, locinput, PL_colors[1],
c277df42 1073 15 - l - pref_len + 1,
1074 "",
3280af22 1075 scan - PL_regprogram, PL_regindent*2, "",
c277df42 1076 SvPVX(prop));
1077 } );
a687059c 1078
c277df42 1079 next = scan + NEXT_OFF(scan);
a0d0e21e 1080 if (next == scan)
1081 next = NULL;
a687059c 1082
a0d0e21e 1083 switch (OP(scan)) {
1084 case BOL:
3280af22 1085 if (locinput == PL_bostr
1086 ? PL_regprev == '\n'
1087 : (PL_multiline &&
1088 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
a0d0e21e 1089 {
a0ed51b3 1090 /* regtill = regbol; */
a0d0e21e 1091 break;
1092 }
4633a7c4 1093 sayNO;
a0d0e21e 1094 case MBOL:
3280af22 1095 if (locinput == PL_bostr
1096 ? PL_regprev == '\n'
1097 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
a0d0e21e 1098 {
1099 break;
1100 }
4633a7c4 1101 sayNO;
a0d0e21e 1102 case SBOL:
3280af22 1103 if (locinput == PL_regbol && PL_regprev == '\n')
a0d0e21e 1104 break;
4633a7c4 1105 sayNO;
774d564b 1106 case GPOS:
3280af22 1107 if (locinput == PL_regbol)
a0d0e21e 1108 break;
4633a7c4 1109 sayNO;
a0d0e21e 1110 case EOL:
3280af22 1111 if (PL_multiline)
a0d0e21e 1112 goto meol;
1113 else
1114 goto seol;
1115 case MEOL:
1116 meol:
3280af22 1117 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
4633a7c4 1118 sayNO;
a0d0e21e 1119 break;
1120 case SEOL:
1121 seol:
3280af22 1122 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
4633a7c4 1123 sayNO;
3280af22 1124 if (PL_regeol - locinput > 1)
4633a7c4 1125 sayNO;
a0d0e21e 1126 break;
b85d18e9 1127 case EOS:
3280af22 1128 if (PL_regeol != locinput)
b85d18e9 1129 sayNO;
1130 break;
a0ed51b3 1131 case SANYUTF8:
1132 if (nextchr & 0x80) {
1133 locinput += PL_utf8skip[nextchr];
1134 if (locinput > PL_regeol)
1135 sayNO;
1136 nextchr = UCHARAT(locinput);
1137 break;
1138 }
1139 if (!nextchr && locinput >= PL_regeol)
1140 sayNO;
1141 nextchr = UCHARAT(++locinput);
1142 break;
a0d0e21e 1143 case SANY:
3280af22 1144 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1145 sayNO;
76e3520e 1146 nextchr = UCHARAT(++locinput);
a0d0e21e 1147 break;
a0ed51b3 1148 case ANYUTF8:
1149 if (nextchr & 0x80) {
1150 locinput += PL_utf8skip[nextchr];
1151 if (locinput > PL_regeol)
1152 sayNO;
1153 nextchr = UCHARAT(locinput);
1154 break;
1155 }
1156 if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
1157 sayNO;
1158 nextchr = UCHARAT(++locinput);
1159 break;
a0d0e21e 1160 case ANY:
3280af22 1161 if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
4633a7c4 1162 sayNO;
76e3520e 1163 nextchr = UCHARAT(++locinput);
a0d0e21e 1164 break;
bbce6d69 1165 case EXACT:
161b471a 1166 s = (char *) OPERAND(scan);
c277df42 1167 ln = UCHARAT(s++);
a0d0e21e 1168 /* Inline the first character, for speed. */
76e3520e 1169 if (UCHARAT(s) != nextchr)
4633a7c4 1170 sayNO;
3280af22 1171 if (PL_regeol - locinput < ln)
4633a7c4 1172 sayNO;
36477c24 1173 if (ln > 1 && memNE(s, locinput, ln))
4633a7c4 1174 sayNO;
a0d0e21e 1175 locinput += ln;
76e3520e 1176 nextchr = UCHARAT(locinput);
bbce6d69 1177 break;
1178 case EXACTFL:
3280af22 1179 PL_reg_flags |= RF_tainted;
bbce6d69 1180 /* FALL THROUGH */
1181 case EXACTF:
161b471a 1182 s = (char *) OPERAND(scan);
c277df42 1183 ln = UCHARAT(s++);
a0ed51b3 1184
1185 if (UTF) {
1186 char *l = locinput;
1187 char *e = s + ln;
1188 c1 = OP(scan) == EXACTF;
1189 while (s < e) {
1190 if (l >= PL_regeol)
1191 sayNO;
1192 if (utf8_to_uv(s, 0) != (c1 ? toLOWER_utf8(l) : toLOWER_LC_utf8(l)))
1193 sayNO;
1194 s += UTF8SKIP(s);
1195 l += UTF8SKIP(l);
1196 }
1197 locinput = l;
1198 nextchr = UCHARAT(locinput);
1199 break;
1200 }
1201
bbce6d69 1202 /* Inline the first character, for speed. */
76e3520e 1203 if (UCHARAT(s) != nextchr &&
bbce6d69 1204 UCHARAT(s) != ((OP(scan) == EXACTF)
76e3520e 1205 ? fold : fold_locale)[nextchr])
bbce6d69 1206 sayNO;
3280af22 1207 if (PL_regeol - locinput < ln)
bbce6d69 1208 sayNO;
5f05dabc 1209 if (ln > 1 && (OP(scan) == EXACTF
1210 ? ibcmp(s, locinput, ln)
1211 : ibcmp_locale(s, locinput, ln)))
bbce6d69 1212 sayNO;
1213 locinput += ln;
76e3520e 1214 nextchr = UCHARAT(locinput);
a0d0e21e 1215 break;
a0ed51b3 1216 case ANYOFUTF8:
1217 s = (char *) OPERAND(scan);
1218 if (!REGINCLASSUTF8(scan, (U8*)locinput))
1219 sayNO;
1220 if (locinput >= PL_regeol)
1221 sayNO;
1222 locinput += PL_utf8skip[nextchr];
1223 nextchr = UCHARAT(locinput);
1224 break;
a0d0e21e 1225 case ANYOF:
161b471a 1226 s = (char *) OPERAND(scan);
76e3520e 1227 if (nextchr < 0)
1228 nextchr = UCHARAT(locinput);
873ef191 1229 if (!REGINCLASS(s, nextchr))
4633a7c4 1230 sayNO;
3280af22 1231 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1232 sayNO;
76e3520e 1233 nextchr = UCHARAT(++locinput);
a0d0e21e 1234 break;
bbce6d69 1235 case ALNUML:
3280af22 1236 PL_reg_flags |= RF_tainted;
bbce6d69 1237 /* FALL THROUGH */
a0d0e21e 1238 case ALNUM:
76e3520e 1239 if (!nextchr)
4633a7c4 1240 sayNO;
bbce6d69 1241 if (!(OP(scan) == ALNUM
76e3520e 1242 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
4633a7c4 1243 sayNO;
76e3520e 1244 nextchr = UCHARAT(++locinput);
a0d0e21e 1245 break;
a0ed51b3 1246 case ALNUMLUTF8:
1247 PL_reg_flags |= RF_tainted;
1248 /* FALL THROUGH */
1249 case ALNUMUTF8:
1250 if (!nextchr)
1251 sayNO;
1252 if (nextchr & 0x80) {
1253 if (!(OP(scan) == ALNUMUTF8
1254 ? swash_fetch(PL_utf8_alnum, locinput) : isALNUM_LC_utf8(locinput)))
1255 sayNO;
1256 locinput += PL_utf8skip[nextchr];
1257 nextchr = UCHARAT(locinput);
1258 break;
1259 }
1260 if (!(OP(scan) == ALNUMUTF8
1261 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
1262 sayNO;
1263 nextchr = UCHARAT(++locinput);
1264 break;
bbce6d69 1265 case NALNUML:
3280af22 1266 PL_reg_flags |= RF_tainted;
bbce6d69 1267 /* FALL THROUGH */
a0d0e21e 1268 case NALNUM:
3280af22 1269 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1270 sayNO;
bbce6d69 1271 if (OP(scan) == NALNUM
76e3520e 1272 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
4633a7c4 1273 sayNO;
76e3520e 1274 nextchr = UCHARAT(++locinput);
a0d0e21e 1275 break;
a0ed51b3 1276 case NALNUMLUTF8:
1277 PL_reg_flags |= RF_tainted;
1278 /* FALL THROUGH */
1279 case NALNUMUTF8:
1280 if (!nextchr && locinput >= PL_regeol)
1281 sayNO;
1282 if (nextchr & 0x80) {
1283 if (OP(scan) == NALNUMUTF8
1284 ? swash_fetch(PL_utf8_alnum, locinput) : isALNUM_LC_utf8(locinput))
1285 sayNO;
1286 locinput += PL_utf8skip[nextchr];
1287 nextchr = UCHARAT(locinput);
1288 break;
1289 }
1290 if (OP(scan) == NALNUMUTF8
1291 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
1292 sayNO;
1293 nextchr = UCHARAT(++locinput);
1294 break;
bbce6d69 1295 case BOUNDL:
1296 case NBOUNDL:
3280af22 1297 PL_reg_flags |= RF_tainted;
bbce6d69 1298 /* FALL THROUGH */
a0d0e21e 1299 case BOUND:
bbce6d69 1300 case NBOUND:
1301 /* was last char in word? */
3280af22 1302 ln = (locinput != PL_regbol) ? UCHARAT(locinput - 1) : PL_regprev;
bbce6d69 1303 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
1304 ln = isALNUM(ln);
76e3520e 1305 n = isALNUM(nextchr);
bbce6d69 1306 }
1307 else {
1308 ln = isALNUM_LC(ln);
76e3520e 1309 n = isALNUM_LC(nextchr);
bbce6d69 1310 }
95bac841 1311 if (((!ln) == (!n)) == (OP(scan) == BOUND || OP(scan) == BOUNDL))
4633a7c4 1312 sayNO;
a0d0e21e 1313 break;
a0ed51b3 1314 case BOUNDLUTF8:
1315 case NBOUNDLUTF8:
1316 PL_reg_flags |= RF_tainted;
1317 /* FALL THROUGH */
1318 case BOUNDUTF8:
1319 case NBOUNDUTF8:
1320 /* was last char in word? */
1321 ln = (locinput != PL_regbol) ? utf8_to_uv(reghop(locinput, -1), 0) : PL_regprev;
1322 if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) {
1323 ln = isALNUM_uni(ln);
1324 n = swash_fetch(PL_utf8_alnum, locinput);
1325 }
1326 else {
1327 ln = isALNUM_LC_uni(ln);
1328 n = isALNUM_LC_utf8(locinput);
1329 }
1330 if (((!ln) == (!n)) == (OP(scan) == BOUNDUTF8 || OP(scan) == BOUNDLUTF8))
1331 sayNO;
1332 break;
bbce6d69 1333 case SPACEL:
3280af22 1334 PL_reg_flags |= RF_tainted;
bbce6d69 1335 /* FALL THROUGH */
a0d0e21e 1336 case SPACE:
3280af22 1337 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1338 sayNO;
bbce6d69 1339 if (!(OP(scan) == SPACE
76e3520e 1340 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
4633a7c4 1341 sayNO;
76e3520e 1342 nextchr = UCHARAT(++locinput);
a0d0e21e 1343 break;
a0ed51b3 1344 case SPACELUTF8:
1345 PL_reg_flags |= RF_tainted;
1346 /* FALL THROUGH */
1347 case SPACEUTF8:
1348 if (!nextchr && locinput >= PL_regeol)
1349 sayNO;
1350 if (nextchr & 0x80) {
1351 if (!(OP(scan) == SPACEUTF8
1352 ? swash_fetch(PL_utf8_space,locinput) : isSPACE_LC_utf8(locinput)))
1353 sayNO;
1354 locinput += PL_utf8skip[nextchr];
1355 nextchr = UCHARAT(locinput);
1356 break;
1357 }
1358 if (!(OP(scan) == SPACEUTF8
1359 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
1360 sayNO;
1361 nextchr = UCHARAT(++locinput);
1362 break;
bbce6d69 1363 case NSPACEL:
3280af22 1364 PL_reg_flags |= RF_tainted;
bbce6d69 1365 /* FALL THROUGH */
a0d0e21e 1366 case NSPACE:
76e3520e 1367 if (!nextchr)
4633a7c4 1368 sayNO;
bbce6d69 1369 if (OP(scan) == SPACE
76e3520e 1370 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 1371 sayNO;
76e3520e 1372 nextchr = UCHARAT(++locinput);
a0d0e21e 1373 break;
a0ed51b3 1374 case NSPACELUTF8:
1375 PL_reg_flags |= RF_tainted;
1376 /* FALL THROUGH */
1377 case NSPACEUTF8:
1378 if (!nextchr)
1379 sayNO;
1380 if (nextchr & 0x80) {
1381 if (OP(scan) == NSPACEUTF8
1382 ? swash_fetch(PL_utf8_space,locinput) : isSPACE_LC_utf8(locinput))
1383 sayNO;
1384 locinput += PL_utf8skip[nextchr];
1385 nextchr = UCHARAT(locinput);
1386 break;
1387 }
1388 if (OP(scan) == NSPACEUTF8
1389 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
1390 sayNO;
1391 nextchr = UCHARAT(++locinput);
1392 break;
a0d0e21e 1393 case DIGIT:
76e3520e 1394 if (!isDIGIT(nextchr))
4633a7c4 1395 sayNO;
76e3520e 1396 nextchr = UCHARAT(++locinput);
a0d0e21e 1397 break;
a0ed51b3 1398 case DIGITUTF8:
1399 if (nextchr & 0x80) {
1400 if (!(swash_fetch(PL_utf8_digit,locinput)))
1401 sayNO;
1402 locinput += PL_utf8skip[nextchr];
1403 nextchr = UCHARAT(locinput);
1404 break;
1405 }
1406 if (!isDIGIT(nextchr))
1407 sayNO;
1408 nextchr = UCHARAT(++locinput);
1409 break;
a0d0e21e 1410 case NDIGIT:
3280af22 1411 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1412 sayNO;
76e3520e 1413 if (isDIGIT(nextchr))
4633a7c4 1414 sayNO;
76e3520e 1415 nextchr = UCHARAT(++locinput);
a0d0e21e 1416 break;
a0ed51b3 1417 case NDIGITUTF8:
1418 if (!nextchr && locinput >= PL_regeol)
1419 sayNO;
1420 if (nextchr & 0x80) {
1421 if (swash_fetch(PL_utf8_digit,locinput))
1422 sayNO;
1423 locinput += PL_utf8skip[nextchr];
1424 nextchr = UCHARAT(locinput);
1425 break;
1426 }
1427 if (isDIGIT(nextchr))
1428 sayNO;
1429 nextchr = UCHARAT(++locinput);
1430 break;
1431 case CLUMP:
1432 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark, locinput))
1433 sayNO;
1434 locinput += PL_utf8skip[nextchr];
1435 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark, locinput))
1436 locinput += UTF8SKIP(locinput);
1437 if (locinput > PL_regeol)
1438 sayNO;
1439 nextchr = UCHARAT(locinput);
1440 break;
c8756f30 1441 case REFFL:
3280af22 1442 PL_reg_flags |= RF_tainted;
c8756f30 1443 /* FALL THROUGH */
c277df42 1444 case REF:
c8756f30 1445 case REFF:
c277df42 1446 n = ARG(scan); /* which paren pair */
3280af22 1447 s = PL_regstartp[n];
1448 if (*PL_reglastparen < n || !s)
af3f8c16 1449 sayNO; /* Do not match unless seen CLOSEn. */
3280af22 1450 if (s == PL_regendp[n])
a0d0e21e 1451 break;
a0ed51b3 1452
1453 if (UTF && OP(scan) != REF) { /* REF can do byte comparison */
1454 char *l = locinput;
1455 char *e = PL_regendp[n];
1456 /*
1457 * Note that we can't do the "other character" lookup trick as
1458 * in the 8-bit case (no pun intended) because in Unicode we
1459 * have to map both upper and title case to lower case.
1460 */
1461 if (OP(scan) == REFF) {
1462 while (s < e) {
1463 if (l >= PL_regeol)
1464 sayNO;
1465 if (toLOWER_utf8(s) != toLOWER_utf8(l))
1466 sayNO;
1467 s += UTF8SKIP(s);
1468 l += UTF8SKIP(l);
1469 }
1470 }
1471 else {
1472 while (s < e) {
1473 if (l >= PL_regeol)
1474 sayNO;
1475 if (toLOWER_LC_utf8(s) != toLOWER_LC_utf8(l))
1476 sayNO;
1477 s += UTF8SKIP(s);
1478 l += UTF8SKIP(l);
1479 }
1480 }
1481 locinput = l;
1482 nextchr = UCHARAT(locinput);
1483 break;
1484 }
1485
a0d0e21e 1486 /* Inline the first character, for speed. */
76e3520e 1487 if (UCHARAT(s) != nextchr &&
c8756f30 1488 (OP(scan) == REF ||
1489 (UCHARAT(s) != ((OP(scan) == REFF
76e3520e 1490 ? fold : fold_locale)[nextchr]))))
4633a7c4 1491 sayNO;
3280af22 1492 ln = PL_regendp[n] - s;
1493 if (locinput + ln > PL_regeol)
4633a7c4 1494 sayNO;
c8756f30 1495 if (ln > 1 && (OP(scan) == REF
1496 ? memNE(s, locinput, ln)
1497 : (OP(scan) == REFF
1498 ? ibcmp(s, locinput, ln)
1499 : ibcmp_locale(s, locinput, ln))))
4633a7c4 1500 sayNO;
a0d0e21e 1501 locinput += ln;
76e3520e 1502 nextchr = UCHARAT(locinput);
a0d0e21e 1503 break;
1504
1505 case NOTHING:
c277df42 1506 case TAIL:
a0d0e21e 1507 break;
1508 case BACK:
1509 break;
c277df42 1510 case EVAL:
1511 {
1512 dSP;
533c011a 1513 OP_4tree *oop = PL_op;
3280af22 1514 COP *ocurcop = PL_curcop;
1515 SV **ocurpad = PL_curpad;
c277df42 1516 SV *ret;
1517
1518 n = ARG(scan);
533c011a 1519 PL_op = (OP_4tree*)PL_regdata->data[n];
1520 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%x\n", PL_op) );
3280af22 1521 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 1]);
c277df42 1522
76e3520e 1523 CALLRUNOPS(); /* Scalar context. */
c277df42 1524 SPAGAIN;
1525 ret = POPs;
1526 PUTBACK;
1527
1528 if (logical) {
1529 logical = 0;
1530 sw = SvTRUE(ret);
a0ed51b3 1531 }
1532 else
3280af22 1533 sv_setsv(save_scalar(PL_replgv), ret);
533c011a 1534 PL_op = oop;
3280af22 1535 PL_curpad = ocurpad;
1536 PL_curcop = ocurcop;
c277df42 1537 break;
1538 }
a0d0e21e 1539 case OPEN:
c277df42 1540 n = ARG(scan); /* which paren pair */
3280af22 1541 PL_reg_start_tmp[n] = locinput;
1542 if (n > PL_regsize)
1543 PL_regsize = n;
a0d0e21e 1544 break;
1545 case CLOSE:
c277df42 1546 n = ARG(scan); /* which paren pair */
3280af22 1547 PL_regstartp[n] = PL_reg_start_tmp[n];
1548 PL_regendp[n] = locinput;
1549 if (n > *PL_reglastparen)
1550 *PL_reglastparen = n;
a0d0e21e 1551 break;
c277df42 1552 case GROUPP:
1553 n = ARG(scan); /* which paren pair */
3280af22 1554 sw = (*PL_reglastparen >= n && PL_regendp[n] != NULL);
c277df42 1555 break;
1556 case IFTHEN:
1557 if (sw)
1558 next = NEXTOPER(NEXTOPER(scan));
1559 else {
1560 next = scan + ARG(scan);
1561 if (OP(next) == IFTHEN) /* Fake one. */
1562 next = NEXTOPER(NEXTOPER(next));
1563 }
1564 break;
1565 case LOGICAL:
1566 logical = 1;
1567 break;
a0d0e21e 1568 case CURLYX: {
1569 CURCUR cc;
3280af22 1570 CHECKPOINT cp = PL_savestack_ix;
c277df42 1571
1572 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
1573 next += ARG(next);
3280af22 1574 cc.oldcc = PL_regcc;
1575 PL_regcc = &cc;
1576 cc.parenfloor = *PL_reglastparen;
a0d0e21e 1577 cc.cur = -1;
1578 cc.min = ARG1(scan);
1579 cc.max = ARG2(scan);
c277df42 1580 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e 1581 cc.next = next;
1582 cc.minmod = minmod;
1583 cc.lastloc = 0;
3280af22 1584 PL_reginput = locinput;
a0d0e21e 1585 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
1586 regcpblow(cp);
3280af22 1587 PL_regcc = cc.oldcc;
4633a7c4 1588 saySAME(n);
a0d0e21e 1589 }
1590 /* NOT REACHED */
1591 case WHILEM: {
1592 /*
1593 * This is really hard to understand, because after we match
1594 * what we're trying to match, we must make sure the rest of
1595 * the RE is going to match for sure, and to do that we have
1596 * to go back UP the parse tree by recursing ever deeper. And
1597 * if it fails, we have to reset our parent's current state
1598 * that we can try again after backing off.
1599 */
1600
c277df42 1601 CHECKPOINT cp, lastcp;
3280af22 1602 CURCUR* cc = PL_regcc;
c277df42 1603 char *lastloc = cc->lastloc; /* Detection of 0-len. */
1604
4633a7c4 1605 n = cc->cur + 1; /* how many we know we matched */
3280af22 1606 PL_reginput = locinput;
a0d0e21e 1607
c277df42 1608 DEBUG_r(
1609 PerlIO_printf(Perl_debug_log,
1610 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 1611 REPORT_CODE_OFF+PL_regindent*2, "",
c277df42 1612 (long)n, (long)cc->min,
1613 (long)cc->max, (long)cc)
1614 );
4633a7c4 1615
a0d0e21e 1616 /* If degenerate scan matches "", assume scan done. */
1617
579cf2c3 1618 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 1619 PL_regcc = cc->oldcc;
1620 ln = PL_regcc->cur;
c277df42 1621 DEBUG_r(
c3464db5 1622 PerlIO_printf(Perl_debug_log,
1623 "%*s empty match detected, try continuation...\n",
3280af22 1624 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 1625 );
a0d0e21e 1626 if (regmatch(cc->next))
4633a7c4 1627 sayYES;
c277df42 1628 DEBUG_r(
c3464db5 1629 PerlIO_printf(Perl_debug_log,
1630 "%*s failed...\n",
3280af22 1631 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 1632 );
3280af22 1633 PL_regcc->cur = ln;
1634 PL_regcc = cc;
4633a7c4 1635 sayNO;
a0d0e21e 1636 }
1637
1638 /* First just match a string of min scans. */
1639
1640 if (n < cc->min) {
1641 cc->cur = n;
1642 cc->lastloc = locinput;
4633a7c4 1643 if (regmatch(cc->scan))
1644 sayYES;
1645 cc->cur = n - 1;
c277df42 1646 cc->lastloc = lastloc;
1647 DEBUG_r(
c3464db5 1648 PerlIO_printf(Perl_debug_log,
1649 "%*s failed...\n",
3280af22 1650 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 1651 );
4633a7c4 1652 sayNO;
a0d0e21e 1653 }
1654
1655 /* Prefer next over scan for minimal matching. */
1656
1657 if (cc->minmod) {
3280af22 1658 PL_regcc = cc->oldcc;
1659 ln = PL_regcc->cur;
5f05dabc 1660 cp = regcppush(cc->parenfloor);
c277df42 1661 REGCP_SET;
5f05dabc 1662 if (regmatch(cc->next)) {
c277df42 1663 regcpblow(cp);
4633a7c4 1664 sayYES; /* All done. */
5f05dabc 1665 }
c277df42 1666 REGCP_UNWIND;
5f05dabc 1667 regcppop();
3280af22 1668 PL_regcc->cur = ln;
1669 PL_regcc = cc;
a0d0e21e 1670
c277df42 1671 if (n >= cc->max) { /* Maximum greed exceeded? */
3280af22 1672 if (PL_dowarn && n >= REG_INFTY
1673 && !(PL_reg_flags & RF_warned)) {
1674 PL_reg_flags |= RF_warned;
2f3ca594 1675 warn("%s limit (%d) exceeded",
1676 "Complex regular subexpression recursion",
1677 REG_INFTY - 1);
c277df42 1678 }
4633a7c4 1679 sayNO;
c277df42 1680 }
a687059c 1681
c277df42 1682 DEBUG_r(
c3464db5 1683 PerlIO_printf(Perl_debug_log,
1684 "%*s trying longer...\n",
3280af22 1685 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 1686 );
a0d0e21e 1687 /* Try scanning more and see if it helps. */
3280af22 1688 PL_reginput = locinput;
a0d0e21e 1689 cc->cur = n;
1690 cc->lastloc = locinput;
5f05dabc 1691 cp = regcppush(cc->parenfloor);
c277df42 1692 REGCP_SET;
5f05dabc 1693 if (regmatch(cc->scan)) {
c277df42 1694 regcpblow(cp);
4633a7c4 1695 sayYES;
5f05dabc 1696 }
c277df42 1697 DEBUG_r(
c3464db5 1698 PerlIO_printf(Perl_debug_log,
1699 "%*s failed...\n",
3280af22 1700 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 1701 );
1702 REGCP_UNWIND;
5f05dabc 1703 regcppop();
4633a7c4 1704 cc->cur = n - 1;
c277df42 1705 cc->lastloc = lastloc;
4633a7c4 1706 sayNO;
a0d0e21e 1707 }
1708
1709 /* Prefer scan over next for maximal matching. */
1710
1711 if (n < cc->max) { /* More greed allowed? */
5f05dabc 1712 cp = regcppush(cc->parenfloor);
a0d0e21e 1713 cc->cur = n;
1714 cc->lastloc = locinput;
c277df42 1715 REGCP_SET;
5f05dabc 1716 if (regmatch(cc->scan)) {
c277df42 1717 regcpblow(cp);
4633a7c4 1718 sayYES;
5f05dabc 1719 }
c277df42 1720 REGCP_UNWIND;
a0d0e21e 1721 regcppop(); /* Restore some previous $<digit>s? */
3280af22 1722 PL_reginput = locinput;
c277df42 1723 DEBUG_r(
c3464db5 1724 PerlIO_printf(Perl_debug_log,
1725 "%*s failed, try continuation...\n",
3280af22 1726 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 1727 );
1728 }
3280af22 1729 if (PL_dowarn && n >= REG_INFTY && !(PL_reg_flags & RF_warned)) {
1730 PL_reg_flags |= RF_warned;
c277df42 1731 warn("count exceeded %d", REG_INFTY - 1);
a0d0e21e 1732 }
1733
1734 /* Failed deeper matches of scan, so see if this one works. */
3280af22 1735 PL_regcc = cc->oldcc;
1736 ln = PL_regcc->cur;
a0d0e21e 1737 if (regmatch(cc->next))
4633a7c4 1738 sayYES;
c277df42 1739 DEBUG_r(
c3464db5 1740 PerlIO_printf(Perl_debug_log, "%*s failed...\n",
3280af22 1741 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 1742 );
3280af22 1743 PL_regcc->cur = ln;
1744 PL_regcc = cc;
4633a7c4 1745 cc->cur = n - 1;
c277df42 1746 cc->lastloc = lastloc;
4633a7c4 1747 sayNO;
a0d0e21e 1748 }
1749 /* NOT REACHED */
c277df42 1750 case BRANCHJ:
1751 next = scan + ARG(scan);
1752 if (next == scan)
1753 next = NULL;
1754 inner = NEXTOPER(NEXTOPER(scan));
1755 goto do_branch;
1756 case BRANCH:
1757 inner = NEXTOPER(scan);
1758 do_branch:
1759 {
1760 CHECKPOINT lastcp;
1761 c1 = OP(scan);
1762 if (OP(next) != c1) /* No choice. */
1763 next = inner; /* Avoid recursion. */
a0d0e21e 1764 else {
3280af22 1765 int lastparen = *PL_reglastparen;
c277df42 1766
1767 REGCP_SET;
a0d0e21e 1768 do {
3280af22 1769 PL_reginput = locinput;
c277df42 1770 if (regmatch(inner))
4633a7c4 1771 sayYES;
c277df42 1772 REGCP_UNWIND;
3280af22 1773 for (n = *PL_reglastparen; n > lastparen; n--)
1774 PL_regendp[n] = 0;
1775 *PL_reglastparen = n;
c277df42 1776 scan = next;
a0d0e21e 1777 /*SUPPRESS 560*/
c277df42 1778 if (n = (c1 == BRANCH ? NEXT_OFF(next) : ARG(next)))
1779 next += n;
a0d0e21e 1780 else
c277df42 1781 next = NULL;
c277df42 1782 inner = NEXTOPER(scan);
1783 if (c1 == BRANCHJ) {
1784 inner = NEXTOPER(inner);
1785 }
1786 } while (scan != NULL && OP(scan) == c1);
4633a7c4 1787 sayNO;
a0d0e21e 1788 /* NOTREACHED */
a687059c 1789 }
a0d0e21e 1790 }
1791 break;
1792 case MINMOD:
1793 minmod = 1;
1794 break;
c277df42 1795 case CURLYM:
1796 {
00db4c45 1797 I32 l = 0;
c277df42 1798 CHECKPOINT lastcp;
1799
1800 /* We suppose that the next guy does not need
1801 backtracking: in particular, it is of constant length,
1802 and has no parenths to influence future backrefs. */
1803 ln = ARG1(scan); /* min to match */
1804 n = ARG2(scan); /* max to match */
c277df42 1805 paren = scan->flags;
1806 if (paren) {
3280af22 1807 if (paren > PL_regsize)
1808 PL_regsize = paren;
1809 if (paren > *PL_reglastparen)
1810 *PL_reglastparen = paren;
c277df42 1811 }
dc45a647 1812 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42 1813 if (paren)
1814 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 1815 PL_reginput = locinput;
c277df42 1816 if (minmod) {
1817 minmod = 0;
1818 if (ln && regrepeat_hard(scan, ln, &l) < ln)
1819 sayNO;
5f4b28b2 1820 if (ln && l == 0 && n >= ln
c277df42 1821 /* In fact, this is tricky. If paren, then the
1822 fact that we did/didnot match may influence
1823 future execution. */
1824 && !(paren && ln == 0))
1825 ln = n;
3280af22 1826 locinput = PL_reginput;
c277df42 1827 if (regkind[(U8)OP(next)] == EXACT) {
1828 c1 = UCHARAT(OPERAND(next) + 1);
1829 if (OP(next) == EXACTF)
1830 c2 = fold[c1];
1831 else if (OP(next) == EXACTFL)
1832 c2 = fold_locale[c1];
1833 else
1834 c2 = c1;
a0ed51b3 1835 }
1836 else
c277df42 1837 c1 = c2 = -1000;
1838 REGCP_SET;
5f4b28b2 1839 /* This may be improved if l == 0. */
c277df42 1840 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
1841 /* If it could work, try it. */
1842 if (c1 == -1000 ||
3280af22 1843 UCHARAT(PL_reginput) == c1 ||
1844 UCHARAT(PL_reginput) == c2)
c277df42 1845 {
1846 if (paren) {
1847 if (n) {
a0ed51b3 1848 PL_regstartp[paren] = HOP(PL_reginput, -l);
3280af22 1849 PL_regendp[paren] = PL_reginput;
a0ed51b3 1850 }
1851 else
3280af22 1852 PL_regendp[paren] = NULL;
c277df42 1853 }
1854 if (regmatch(next))
1855 sayYES;
1856 REGCP_UNWIND;
1857 }
1858 /* Couldn't or didn't -- move forward. */
3280af22 1859 PL_reginput = locinput;
c277df42 1860 if (regrepeat_hard(scan, 1, &l)) {
1861 ln++;
3280af22 1862 locinput = PL_reginput;
c277df42 1863 }
1864 else
1865 sayNO;
1866 }
a0ed51b3 1867 }
1868 else {
c277df42 1869 n = regrepeat_hard(scan, n, &l);
1870 if (n != 0 && l == 0
1871 /* In fact, this is tricky. If paren, then the
1872 fact that we did/didnot match may influence
1873 future execution. */
1874 && !(paren && ln == 0))
1875 ln = n;
3280af22 1876 locinput = PL_reginput;
c277df42 1877 DEBUG_r(
5c0ca799 1878 PerlIO_printf(Perl_debug_log,
1879 "%*s matched %ld times, len=%ld...\n",
3280af22 1880 REPORT_CODE_OFF+PL_regindent*2, "", n, l)
c277df42 1881 );
1882 if (n >= ln) {
1883 if (regkind[(U8)OP(next)] == EXACT) {
1884 c1 = UCHARAT(OPERAND(next) + 1);
1885 if (OP(next) == EXACTF)
1886 c2 = fold[c1];
1887 else if (OP(next) == EXACTFL)
1888 c2 = fold_locale[c1];
1889 else
1890 c2 = c1;
a0ed51b3 1891 }
1892 else
c277df42 1893 c1 = c2 = -1000;
1894 }
1895 REGCP_SET;
1896 while (n >= ln) {
1897 /* If it could work, try it. */
1898 if (c1 == -1000 ||
3280af22 1899 UCHARAT(PL_reginput) == c1 ||
1900 UCHARAT(PL_reginput) == c2)
a0ed51b3 1901 {
1902 DEBUG_r(
c3464db5 1903 PerlIO_printf(Perl_debug_log,
1904 "%*s trying tail with n=%ld...\n",
3280af22 1905 REPORT_CODE_OFF+PL_regindent*2, "", n)
a0ed51b3 1906 );
1907 if (paren) {
1908 if (n) {
1909 PL_regstartp[paren] = HOP(PL_reginput, -l);
1910 PL_regendp[paren] = PL_reginput;
c277df42 1911 }
a0ed51b3 1912 else
1913 PL_regendp[paren] = NULL;
c277df42 1914 }
a0ed51b3 1915 if (regmatch(next))
1916 sayYES;
1917 REGCP_UNWIND;
1918 }
c277df42 1919 /* Couldn't or didn't -- back up. */
1920 n--;
a0ed51b3 1921 locinput = HOP(locinput, -l);
3280af22 1922 PL_reginput = locinput;
c277df42 1923 }
1924 }
1925 sayNO;
1926 break;
1927 }
1928 case CURLYN:
1929 paren = scan->flags; /* Which paren to set */
3280af22 1930 if (paren > PL_regsize)
1931 PL_regsize = paren;
1932 if (paren > *PL_reglastparen)
1933 *PL_reglastparen = paren;
c277df42 1934 ln = ARG1(scan); /* min to match */
1935 n = ARG2(scan); /* max to match */
dc45a647 1936 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 1937 goto repeat;
a0d0e21e 1938 case CURLY:
c277df42 1939 paren = 0;
a0d0e21e 1940 ln = ARG1(scan); /* min to match */
1941 n = ARG2(scan); /* max to match */
dc45a647 1942 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e 1943 goto repeat;
1944 case STAR:
1945 ln = 0;
c277df42 1946 n = REG_INFTY;
a0d0e21e 1947 scan = NEXTOPER(scan);
c277df42 1948 paren = 0;
a0d0e21e 1949 goto repeat;
1950 case PLUS:
c277df42 1951 ln = 1;
1952 n = REG_INFTY;
1953 scan = NEXTOPER(scan);
1954 paren = 0;
1955 repeat:
a0d0e21e 1956 /*
1957 * Lookahead to avoid useless match attempts
1958 * when we know what character comes next.
1959 */
bbce6d69 1960 if (regkind[(U8)OP(next)] == EXACT) {
1961 c1 = UCHARAT(OPERAND(next) + 1);
1962 if (OP(next) == EXACTF)
1963 c2 = fold[c1];
1964 else if (OP(next) == EXACTFL)
1965 c2 = fold_locale[c1];
1966 else
1967 c2 = c1;
1968 }
a0d0e21e 1969 else
bbce6d69 1970 c1 = c2 = -1000;
3280af22 1971 PL_reginput = locinput;
a0d0e21e 1972 if (minmod) {
c277df42 1973 CHECKPOINT lastcp;
a0d0e21e 1974 minmod = 0;
1975 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 1976 sayNO;
a0ed51b3 1977 locinput = PL_reginput;
c277df42 1978 REGCP_SET;
1979 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
a0d0e21e 1980 /* If it could work, try it. */
bbce6d69 1981 if (c1 == -1000 ||
3280af22 1982 UCHARAT(PL_reginput) == c1 ||
1983 UCHARAT(PL_reginput) == c2)
bbce6d69 1984 {
c277df42 1985 if (paren) {
1986 if (n) {
a0ed51b3 1987 PL_regstartp[paren] = HOP(PL_reginput, -1);
3280af22 1988 PL_regendp[paren] = PL_reginput;
a0ed51b3 1989 }
1990 else
3280af22 1991 PL_regendp[paren] = NULL;
c277df42 1992 }
a0d0e21e 1993 if (regmatch(next))
4633a7c4 1994 sayYES;
c277df42 1995 REGCP_UNWIND;
bbce6d69 1996 }
c277df42 1997 /* Couldn't or didn't -- move forward. */
a0ed51b3 1998 PL_reginput = locinput;
a0d0e21e 1999 if (regrepeat(scan, 1)) {
2000 ln++;
a0ed51b3 2001 locinput = PL_reginput;
2002 }
2003 else
4633a7c4 2004 sayNO;
a0d0e21e 2005 }
2006 }
2007 else {
c277df42 2008 CHECKPOINT lastcp;
a0d0e21e 2009 n = regrepeat(scan, n);
a0ed51b3 2010 locinput = PL_reginput;
a0d0e21e 2011 if (ln < n && regkind[(U8)OP(next)] == EOL &&
3280af22 2012 (!PL_multiline || OP(next) == SEOL))
a0d0e21e 2013 ln = n; /* why back off? */
c277df42 2014 REGCP_SET;
2015 if (paren) {
2016 while (n >= ln) {
2017 /* If it could work, try it. */
2018 if (c1 == -1000 ||
3280af22 2019 UCHARAT(PL_reginput) == c1 ||
2020 UCHARAT(PL_reginput) == c2)
c277df42 2021 {
2022 if (paren && n) {
2023 if (n) {
a0ed51b3 2024 PL_regstartp[paren] = HOP(PL_reginput, -1);
3280af22 2025 PL_regendp[paren] = PL_reginput;
a0ed51b3 2026 }
2027 else
3280af22 2028 PL_regendp[paren] = NULL;
c277df42 2029 }
2030 if (regmatch(next))
2031 sayYES;
2032 REGCP_UNWIND;
2033 }
2034 /* Couldn't or didn't -- back up. */
2035 n--;
a0ed51b3 2036 PL_reginput = locinput = HOP(locinput, -1);
c277df42 2037 }
a0ed51b3 2038 }
2039 else {
c277df42 2040 while (n >= ln) {
2041 /* If it could work, try it. */
2042 if (c1 == -1000 ||
3280af22 2043 UCHARAT(PL_reginput) == c1 ||
2044 UCHARAT(PL_reginput) == c2)
c277df42 2045 {
2046 if (regmatch(next))
2047 sayYES;
2048 REGCP_UNWIND;
2049 }
2050 /* Couldn't or didn't -- back up. */
2051 n--;
a0ed51b3 2052 PL_reginput = locinput = HOP(locinput, -1);
bbce6d69 2053 }
a0d0e21e 2054 }
2055 }
4633a7c4 2056 sayNO;
c277df42 2057 break;
a0d0e21e 2058 case END:
3280af22 2059 if (locinput < PL_regtill)
7e5428c5 2060 sayNO; /* Cannot match: too short. */
2061 /* Fall through */
2062 case SUCCEED:
3280af22 2063 PL_reginput = locinput; /* put where regtry can find it */
4633a7c4 2064 sayYES; /* Success! */
c277df42 2065 case SUSPEND:
2066 n = 1;
2067 goto do_ifmatch;
a0d0e21e 2068 case UNLESSM:
c277df42 2069 n = 0;
a0ed51b3 2070 if (scan->flags) {
2071 s = HOPMAYBE(locinput, -scan->flags);
2072 if (!s)
2073 goto say_yes;
2074 PL_reginput = s;
2075 }
2076 else
2077 PL_reginput = locinput;
c277df42 2078 goto do_ifmatch;
2079 case IFMATCH:
2080 n = 1;
a0ed51b3 2081 if (scan->flags) {
2082 s = HOPMAYBE(locinput, -scan->flags);
2083 if (!s)
2084 goto say_no;
2085 PL_reginput = s;
2086 }
2087 else
2088 PL_reginput = locinput;
2089
c277df42 2090 do_ifmatch:
c277df42 2091 inner = NEXTOPER(NEXTOPER(scan));
2092 if (regmatch(inner) != n) {
2093 say_no:
2094 if (logical) {
2095 logical = 0;
2096 sw = 0;
2097 goto do_longjump;
a0ed51b3 2098 }
2099 else
c277df42 2100 sayNO;
2101 }
2102 say_yes:
2103 if (logical) {
2104 logical = 0;
2105 sw = 1;
2106 }
fe44a5e8 2107 if (OP(scan) == SUSPEND) {
3280af22 2108 locinput = PL_reginput;
565764a8 2109 nextchr = UCHARAT(locinput);
fe44a5e8 2110 }
c277df42 2111 /* FALL THROUGH. */
2112 case LONGJMP:
2113 do_longjump:
2114 next = scan + ARG(scan);
2115 if (next == scan)
2116 next = NULL;
a0d0e21e 2117 break;
2118 default:
c030ccd9 2119 PerlIO_printf(PerlIO_stderr(), "%lx %d\n",
c277df42 2120 (unsigned long)scan, OP(scan));
a0d0e21e 2121 FAIL("regexp memory corruption");
a687059c 2122 }
a0d0e21e 2123 scan = next;
2124 }
a687059c 2125
a0d0e21e 2126 /*
2127 * We get here only if there's trouble -- normally "case END" is
2128 * the terminating point.
2129 */
2130 FAIL("corrupted regexp pointers");
2131 /*NOTREACHED*/
4633a7c4 2132 sayNO;
2133
2134yes:
2135#ifdef DEBUGGING
3280af22 2136 PL_regindent--;
4633a7c4 2137#endif
2138 return 1;
2139
2140no:
2141#ifdef DEBUGGING
3280af22 2142 PL_regindent--;
4633a7c4 2143#endif
a0d0e21e 2144 return 0;
a687059c 2145}
2146
2147/*
2148 - regrepeat - repeatedly match something simple, report how many
2149 */
2150/*
2151 * [This routine now assumes that it will only match on things of length 1.
2152 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 2153 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 2154 */
76e3520e 2155STATIC I32
c277df42 2156regrepeat(regnode *p, I32 max)
a687059c 2157{
5c0ca799 2158 dTHR;
a0d0e21e 2159 register char *scan;
2160 register char *opnd;
2161 register I32 c;
3280af22 2162 register char *loceol = PL_regeol;
a0ed51b3 2163 register I32 hardcount = 0;
a0d0e21e 2164
3280af22 2165 scan = PL_reginput;
c277df42 2166 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 2167 loceol = scan + max;
161b471a 2168 opnd = (char *) OPERAND(p);
a0d0e21e 2169 switch (OP(p)) {
2170 case ANY:
2171 while (scan < loceol && *scan != '\n')
2172 scan++;
2173 break;
2174 case SANY:
2175 scan = loceol;
2176 break;
a0ed51b3 2177 case ANYUTF8:
2178 loceol = PL_regeol;
2179 while (scan < loceol && *scan != '\n') {
2180 scan += UTF8SKIP(scan);
2181 hardcount++;
2182 }
2183 break;
2184 case SANYUTF8:
2185 loceol = PL_regeol;
2186 while (scan < loceol) {
2187 scan += UTF8SKIP(scan);
2188 hardcount++;
2189 }
2190 break;
bbce6d69 2191 case EXACT: /* length of string is 1 */
2192 c = UCHARAT(++opnd);
2193 while (scan < loceol && UCHARAT(scan) == c)
2194 scan++;
2195 break;
2196 case EXACTF: /* length of string is 1 */
2197 c = UCHARAT(++opnd);
2198 while (scan < loceol &&
2199 (UCHARAT(scan) == c || UCHARAT(scan) == fold[c]))
2200 scan++;
2201 break;
2202 case EXACTFL: /* length of string is 1 */
3280af22 2203 PL_reg_flags |= RF_tainted;
bbce6d69 2204 c = UCHARAT(++opnd);
2205 while (scan < loceol &&
2206 (UCHARAT(scan) == c || UCHARAT(scan) == fold_locale[c]))
a0d0e21e 2207 scan++;
2208 break;
a0ed51b3 2209 case ANYOFUTF8:
2210 loceol = PL_regeol;
2211 while (scan < loceol && REGINCLASSUTF8(p, (U8*)scan)) {
2212 scan += UTF8SKIP(scan);
2213 hardcount++;
2214 }
2215 break;
a0d0e21e 2216 case ANYOF:
ae5c130c 2217 while (scan < loceol && REGINCLASS(opnd, *scan))
a0d0e21e 2218 scan++;
a0d0e21e 2219 break;
2220 case ALNUM:
2221 while (scan < loceol && isALNUM(*scan))
2222 scan++;
2223 break;
a0ed51b3 2224 case ALNUMUTF8:
2225 loceol = PL_regeol;
2226 while (scan < loceol && swash_fetch(PL_utf8_alnum, scan)) {
2227 scan += UTF8SKIP(scan);
2228 hardcount++;
2229 }
2230 break;
bbce6d69 2231 case ALNUML:
3280af22 2232 PL_reg_flags |= RF_tainted;
bbce6d69 2233 while (scan < loceol && isALNUM_LC(*scan))
2234 scan++;
2235 break;
a0ed51b3 2236 case ALNUMLUTF8:
2237 PL_reg_flags |= RF_tainted;
2238 loceol = PL_regeol;
2239 while (scan < loceol && isALNUM_LC_utf8(scan)) {
2240 scan += UTF8SKIP(scan);
2241 hardcount++;
2242 }
2243 break;
2244 break;
a0d0e21e 2245 case NALNUM:
2246 while (scan < loceol && !isALNUM(*scan))
2247 scan++;
2248 break;
a0ed51b3 2249 case NALNUMUTF8:
2250 loceol = PL_regeol;
2251 while (scan < loceol && !swash_fetch(PL_utf8_alnum, scan)) {
2252 scan += UTF8SKIP(scan);
2253 hardcount++;
2254 }
2255 break;
bbce6d69 2256 case NALNUML:
3280af22 2257 PL_reg_flags |= RF_tainted;
bbce6d69 2258 while (scan < loceol && !isALNUM_LC(*scan))
2259 scan++;
2260 break;
a0ed51b3 2261 case NALNUMLUTF8:
2262 PL_reg_flags |= RF_tainted;
2263 loceol = PL_regeol;
2264 while (scan < loceol && !isALNUM_LC_utf8(scan)) {
2265 scan += UTF8SKIP(scan);
2266 hardcount++;
2267 }
2268 break;
a0d0e21e 2269 case SPACE:
2270 while (scan < loceol && isSPACE(*scan))
2271 scan++;
2272 break;
a0ed51b3 2273 case SPACEUTF8:
2274 loceol = PL_regeol;
2275 while (scan < loceol && (*scan == ' ' || swash_fetch(PL_utf8_space,scan))) {
2276 scan += UTF8SKIP(scan);
2277 hardcount++;
2278 }
2279 break;
bbce6d69 2280 case SPACEL:
3280af22 2281 PL_reg_flags |= RF_tainted;
bbce6d69 2282 while (scan < loceol && isSPACE_LC(*scan))
2283 scan++;
2284 break;
a0ed51b3 2285 case SPACELUTF8:
2286 PL_reg_flags |= RF_tainted;
2287 loceol = PL_regeol;
2288 while (scan < loceol && (*scan == ' ' || isSPACE_LC_utf8(scan))) {
2289 scan += UTF8SKIP(scan);
2290 hardcount++;
2291 }
2292 break;
a0d0e21e 2293 case NSPACE:
2294 while (scan < loceol && !isSPACE(*scan))
2295 scan++;
2296 break;
a0ed51b3 2297 case NSPACEUTF8:
2298 loceol = PL_regeol;
2299 while (scan < loceol && !(*scan == ' ' || swash_fetch(PL_utf8_space,scan))) {
2300 scan += UTF8SKIP(scan);
2301 hardcount++;
2302 }
2303 break;
bbce6d69 2304 case NSPACEL:
3280af22 2305 PL_reg_flags |= RF_tainted;
bbce6d69 2306 while (scan < loceol && !isSPACE_LC(*scan))
2307 scan++;
2308 break;
a0ed51b3 2309 case NSPACELUTF8:
2310 PL_reg_flags |= RF_tainted;
2311 loceol = PL_regeol;
2312 while (scan < loceol && !(*scan == ' ' || isSPACE_LC_utf8(scan))) {
2313 scan += UTF8SKIP(scan);
2314 hardcount++;
2315 }
2316 break;
a0d0e21e 2317 case DIGIT:
2318 while (scan < loceol && isDIGIT(*scan))
2319 scan++;
2320 break;
a0ed51b3 2321 case DIGITUTF8:
2322 loceol = PL_regeol;
2323 while (scan < loceol && swash_fetch(PL_utf8_digit,scan)) {
2324 scan += UTF8SKIP(scan);
2325 hardcount++;
2326 }
2327 break;
2328 break;
a0d0e21e 2329 case NDIGIT:
2330 while (scan < loceol && !isDIGIT(*scan))
2331 scan++;
2332 break;
a0ed51b3 2333 case NDIGITUTF8:
2334 loceol = PL_regeol;
2335 while (scan < loceol && !swash_fetch(PL_utf8_digit,scan)) {
2336 scan += UTF8SKIP(scan);
2337 hardcount++;
2338 }
2339 break;
a0d0e21e 2340 default: /* Called on something of 0 width. */
2341 break; /* So match right here or not at all. */
2342 }
a687059c 2343
a0ed51b3 2344 if (hardcount)
2345 c = hardcount;
2346 else
2347 c = scan - PL_reginput;
3280af22 2348 PL_reginput = scan;
a687059c 2349
c277df42 2350 DEBUG_r(
2351 {
2352 SV *prop = sv_newmortal();
2353
2354 regprop(prop, p);
2355 PerlIO_printf(Perl_debug_log,
2356 "%*s %s can match %ld times out of %ld...\n",
2357 REPORT_CODE_OFF+1, "", SvPVX(prop),c,max);
2358 });
2359
a0d0e21e 2360 return(c);
a687059c 2361}
2362
2363/*
c277df42 2364 - regrepeat_hard - repeatedly match something, report total lenth and length
2365 *
2366 * The repeater is supposed to have constant length.
2367 */
2368
76e3520e 2369STATIC I32
c277df42 2370regrepeat_hard(regnode *p, I32 max, I32 *lp)
2371{
5c0ca799 2372 dTHR;
c277df42 2373 register char *scan;
2374 register char *start;
3280af22 2375 register char *loceol = PL_regeol;
a0ed51b3 2376 I32 l = 0;
2377 I32 count = 0;
2378
2379 if (!max)
2380 return 0;
c277df42 2381
3280af22 2382 start = PL_reginput;
a0ed51b3 2383 if (UTF) {
2384 while (PL_reginput < loceol && (scan = PL_reginput, regmatch(p))) {
2385 if (!count++) {
2386 l = 0;
2387 while (start < PL_reginput) {
2388 l++;
2389 start += UTF8SKIP(start);
2390 }
2391 *lp = l;
2392 if (l == 0)
2393 return max;
2394 }
2395 if (count == max)
2396 return count;
2397 }
2398 }
2399 else {
2400 while (PL_reginput < loceol && (scan = PL_reginput, regmatch(p))) {
2401 if (!count++) {
2402 *lp = l = PL_reginput - start;
2403 if (max != REG_INFTY && l*max < loceol - scan)
2404 loceol = scan + l*max;
2405 if (l == 0)
2406 return max;
c277df42 2407 }
2408 }
2409 }
3280af22 2410 if (PL_reginput < loceol)
2411 PL_reginput = scan;
c277df42 2412
a0ed51b3 2413 return count;
c277df42 2414}
2415
2416/*
bbce6d69 2417 - regclass - determine if a character falls into a character class
2418 */
2419
76e3520e 2420STATIC bool
8ac85365 2421reginclass(register char *p, register I32 c)
bbce6d69 2422{
5c0ca799 2423 dTHR;
bbce6d69 2424 char flags = *p;
2425 bool match = FALSE;
2426
2427 c &= 0xFF;
ae5c130c 2428 if (ANYOF_TEST(p, c))
bbce6d69 2429 match = TRUE;
2430 else if (flags & ANYOF_FOLD) {
2431 I32 cf;
2432 if (flags & ANYOF_LOCALE) {
3280af22 2433 PL_reg_flags |= RF_tainted;
bbce6d69 2434 cf = fold_locale[c];
2435 }
2436 else
2437 cf = fold[c];
ae5c130c 2438 if (ANYOF_TEST(p, cf))
bbce6d69 2439 match = TRUE;
2440 }
2441
2442 if (!match && (flags & ANYOF_ISA)) {
3280af22 2443 PL_reg_flags |= RF_tainted;
bbce6d69 2444
2445 if (((flags & ANYOF_ALNUML) && isALNUM_LC(c)) ||
2446 ((flags & ANYOF_NALNUML) && !isALNUM_LC(c)) ||
2447 ((flags & ANYOF_SPACEL) && isSPACE_LC(c)) ||
2448 ((flags & ANYOF_NSPACEL) && !isSPACE_LC(c)))
2449 {
2450 match = TRUE;
2451 }
2452 }
2453
ae5c130c 2454 return (flags & ANYOF_INVERT) ? !match : match;
bbce6d69 2455}
2456
a0ed51b3 2457STATIC bool
2458reginclassutf8(regnode *f, U8 *p)
2459{
2460 char flags = ARG1(f);
2461 bool match = FALSE;
2462 SV *sv = (SV*)PL_regdata->data[ARG2(f)];
2463
2464 if (swash_fetch(sv, p))
2465 match = TRUE;
2466 else if (flags & ANYOF_FOLD) {
2467 I32 cf;
2468 char tmpbuf[10];
2469 if (flags & ANYOF_LOCALE) {
2470 PL_reg_flags |= RF_tainted;
2471 uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
2472 }
2473 else
2474 uv_to_utf8(tmpbuf, toLOWER_utf8(p));
2475 if (swash_fetch(sv, tmpbuf))
2476 match = TRUE;
2477 }
2478
2479 if (!match && (flags & ANYOF_ISA)) {
2480 PL_reg_flags |= RF_tainted;
2481
2482 if (((flags & ANYOF_ALNUML) && isALNUM_LC_utf8(p)) ||
2483 ((flags & ANYOF_NALNUML) && !isALNUM_LC_utf8(p)) ||
2484 ((flags & ANYOF_SPACEL) && isSPACE_LC_utf8(p)) ||
2485 ((flags & ANYOF_NSPACEL) && !isSPACE_LC_utf8(p)))
2486 {
2487 match = TRUE;
2488 }
2489 }
2490
2491 return (flags & ANYOF_INVERT) ? !match : match;
2492}
161b471a 2493
a0ed51b3 2494STATIC char *
2495reghop(unsigned char *s, I32 off)
2496{
2497 if (off >= 0) {
2498 while (off-- && s < (U8*)PL_regeol)
2499 s += UTF8SKIP(s);
2500 }
2501 else {
2502 while (off++) {
2503 if (s > (U8*)PL_bostr) {
2504 s--;
2505 if (*s & 0x80) {
2506 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
2507 s--;
2508 } /* XXX could check well-formedness here */
2509 }
2510 }
2511 }
2512 return s;
2513}
161b471a 2514
a0ed51b3 2515STATIC char *
2516reghopmaybe(unsigned char *s, I32 off)
2517{
2518 if (off >= 0) {
2519 while (off-- && s < (U8*)PL_regeol)
2520 s += UTF8SKIP(s);
2521 if (off >= 0)
2522 return 0;
2523 }
2524 else {
2525 while (off++) {
2526 if (s > (U8*)PL_bostr) {
2527 s--;
2528 if (*s & 0x80) {
2529 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
2530 s--;
2531 } /* XXX could check well-formedness here */
2532 }
2533 else
2534 break;
2535 }
2536 if (off <= 0)
2537 return 0;
2538 }
2539 return s;
2540}