* "One Ring to rule them all, One Ring to find them..."
*/
+/* This file contains functions for executing a regular expression. See
+ * also regcomp.c which funnily enough, contains functions for compiling
+ * a regular expression.
+ */
+
/* NOTE: this is derived from Henry Spencer's regexp code, and should not
* confused with the original package (see point 3 below). Thanks, Henry!
*/
char *m;
STRLEN ln;
STRLEN lnc;
+ register STRLEN uskip;
unsigned int c1;
unsigned int c2;
char *e;
switch (OP(c)) {
case ANYOF:
if (do_utf8) {
- while (s < strend) {
+ while (s + (uskip = UTF8SKIP(s)) <= strend) {
if ((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
!UTF8_IS_INVARIANT((U8)s[0]) ?
reginclass(c, (U8*)s, 0, do_utf8) :
}
else
tmp = 1;
- s += UTF8SKIP(s);
+ s += uskip;
}
}
else {
tmp = ((OP(c) == BOUND ?
isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
LOAD_UTF8_CHARCLASS(alnum,"a");
- while (s < strend) {
+ while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (tmp == !(OP(c) == BOUND ?
swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
isALNUM_LC_utf8((U8*)s)))
if ((norun || regtry(prog, s)))
goto got_it;
}
- s += UTF8SKIP(s);
+ s += uskip;
}
}
else {
tmp = ((OP(c) == NBOUND ?
isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
LOAD_UTF8_CHARCLASS(alnum,"a");
- while (s < strend) {
+ while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (tmp == !(OP(c) == NBOUND ?
swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
isALNUM_LC_utf8((U8*)s)))
tmp = !tmp;
else if ((norun || regtry(prog, s)))
goto got_it;
- s += UTF8SKIP(s);
+ s += uskip;
}
}
else {
case ALNUM:
if (do_utf8) {
LOAD_UTF8_CHARCLASS(alnum,"a");
- while (s < strend) {
+ while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
}
else
tmp = 1;
- s += UTF8SKIP(s);
+ s += uskip;
}
}
else {
case ALNUML:
PL_reg_flags |= RF_tainted;
if (do_utf8) {
- while (s < strend) {
+ while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (isALNUM_LC_utf8((U8*)s)) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
}
else
tmp = 1;
- s += UTF8SKIP(s);
+ s += uskip;
}
}
else {
case NALNUM:
if (do_utf8) {
LOAD_UTF8_CHARCLASS(alnum,"a");
- while (s < strend) {
+ while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
}
else
tmp = 1;
- s += UTF8SKIP(s);
+ s += uskip;
}
}
else {
case NALNUML:
PL_reg_flags |= RF_tainted;
if (do_utf8) {
- while (s < strend) {
+ while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (!isALNUM_LC_utf8((U8*)s)) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
}
else
tmp = 1;
- s += UTF8SKIP(s);
+ s += uskip;
}
}
else {
case SPACE:
if (do_utf8) {
LOAD_UTF8_CHARCLASS(space," ");
- while (s < strend) {
+ while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
}
else
tmp = 1;
- s += UTF8SKIP(s);
+ s += uskip;
}
}
else {
case SPACEL:
PL_reg_flags |= RF_tainted;
if (do_utf8) {
- while (s < strend) {
+ while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
}
else
tmp = 1;
- s += UTF8SKIP(s);
+ s += uskip;
}
}
else {
case NSPACE:
if (do_utf8) {
LOAD_UTF8_CHARCLASS(space," ");
- while (s < strend) {
+ while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
}
else
tmp = 1;
- s += UTF8SKIP(s);
+ s += uskip;
}
}
else {
case NSPACEL:
PL_reg_flags |= RF_tainted;
if (do_utf8) {
- while (s < strend) {
+ while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
}
else
tmp = 1;
- s += UTF8SKIP(s);
+ s += uskip;
}
}
else {
case DIGIT:
if (do_utf8) {
LOAD_UTF8_CHARCLASS(digit,"0");
- while (s < strend) {
+ while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
}
else
tmp = 1;
- s += UTF8SKIP(s);
+ s += uskip;
}
}
else {
case DIGITL:
PL_reg_flags |= RF_tainted;
if (do_utf8) {
- while (s < strend) {
+ while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (isDIGIT_LC_utf8((U8*)s)) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
}
else
tmp = 1;
- s += UTF8SKIP(s);
+ s += uskip;
}
}
else {
case NDIGIT:
if (do_utf8) {
LOAD_UTF8_CHARCLASS(digit,"0");
- while (s < strend) {
+ while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
}
else
tmp = 1;
- s += UTF8SKIP(s);
+ s += uskip;
}
}
else {
case NDIGITL:
PL_reg_flags |= RF_tainted;
if (do_utf8) {
- while (s < strend) {
+ while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (!isDIGIT_LC_utf8((U8*)s)) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
}
else
tmp = 1;
- s += UTF8SKIP(s);
+ s += uskip;
}
}
else {
CHECKPOINT lastcp;
/* We suppose that the next guy does not need
- backtracking: in particular, it is of constant length,
+ backtracking: in particular, it is of constant non-zero length,
and has no parenths to influence future backrefs. */
ln = ARG1(scan); /* min to match */
n = ARG2(scan); /* max to match */
minmod = 0;
if (ln && regrepeat_hard(scan, ln, &l) < ln)
sayNO;
- /* if we matched something zero-length we don't need to
- backtrack - capturing parens are already defined, so
- the caveat in the maximal case doesn't apply
-
- XXXX if ln == 0, we can redo this check first time
- through the following loop
- */
- if (ln && l == 0)
- n = ln; /* don't backtrack */
locinput = PL_reginput;
if (HAS_TEXT(next) || JUMPABLE(next)) {
regnode *text_node = next;
c1 = c2 = -1000;
assume_ok_MM:
REGCP_SET(lastcp);
- /* This may be improved if l == 0. */
- while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
+ while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
/* If it could work, try it. */
if (c1 == -1000 ||
UCHARAT(PL_reginput) == c1 ||
}
else {
n = regrepeat_hard(scan, n, &l);
- /* if we matched something zero-length we don't need to
- backtrack, unless the minimum count is zero and we
- are capturing the result - in that case the capture
- being defined or not may affect later execution
- */
- if (n != 0 && l == 0 && !(paren && ln == 0))
- ln = n; /* don't backtrack */
locinput = PL_reginput;
DEBUG_r(
PerlIO_printf(Perl_debug_log,
/*
- regrepeat_hard - repeatedly match something, report total lenth and length
*
- * The repeater is supposed to have constant length.
+ * The repeater is supposed to have constant non-zero length.
*/
STATIC I32