* blame Henry for some of the lack of readability.
*/
+/* The names of the functions have been changed from regcomp and
+ * regexec to pregcomp and pregexec in order to avoid conflicts
+ * with the POSIX routines of the same names.
+*/
+
/*SUPPRESS 112*/
/*
- * regcomp and regexec -- regsub and regerror are not used in perl
+ * pregcomp and pregexec -- regsub and regerror are not used in perl
*
* Copyright (c) 1986 by University of Toronto.
* Written by Henry Spencer. Not derived from licensed software.
#define regcpblow(cp) leave_scope(cp)
/*
- * regexec and friends
+ * pregexec and friends
*/
/*
static I32 regtry _((regexp *prog, char *startpos));
/*
- - regexec - match a regexp against a string
+ - pregexec - match a regexp against a string
*/
I32
-regexec(prog, stringarg, strend, strbeg, minend, screamer, safebase)
+pregexec(prog, stringarg, strend, strbeg, minend, screamer, safebase)
register regexp *prog;
char *stringarg;
register char *strend; /* pointer to null at end of string */
CURCUR cc;
cc.cur = 0;
+ cc.oldcc = 0;
regcc = &cc;
#ifdef DEBUGGING
register char *s; /* operand or save */
register char *locinput = reginput;
int minmod = 0;
+#ifdef DEBUGGING
+ static int regindent = 0;
+ regindent++;
+#endif
nextchar = *locinput;
scan = prog;
while (scan != NULL) {
#ifdef DEBUGGING
- if (regnarrate)
- fprintf(stderr, "%2d%-8.8s\t<%.10s>\n",
+#define sayYES goto yes
+#define sayNO goto no
+#define saySAME(x) if (x) goto yes; else goto no
+ if (regnarrate) {
+ fprintf(stderr, "%*s%2d%-8.8s\t<%.10s>\n", regindent*2, "",
scan - regprogram, regprop(scan), locinput);
+ }
+#else
+#define sayYES return 1
+#define sayNO return 0
+#define saySAME(x) return x
#endif
#ifdef REGALIGN
/* regtill = regbol; */
break;
}
- return 0;
+ sayNO;
case MBOL:
if (locinput == regbol
? regprev == '\n'
{
break;
}
- return 0;
+ sayNO;
case SBOL:
if (locinput == regbol && regprev == '\n')
break;
- return 0;
+ sayNO;
case GBOL:
if (locinput == regbol)
break;
- return 0;
+ sayNO;
case EOL:
if (multiline)
goto meol;
case MEOL:
meol:
if ((nextchar || locinput < regeol) && nextchar != '\n')
- return 0;
+ sayNO;
break;
case SEOL:
seol:
if ((nextchar || locinput < regeol) && nextchar != '\n')
- return 0;
+ sayNO;
if (regeol - locinput > 1)
- return 0;
+ sayNO;
break;
case SANY:
if (!nextchar && locinput >= regeol)
- return 0;
+ sayNO;
nextchar = *++locinput;
break;
case ANY:
if (!nextchar && locinput >= regeol || nextchar == '\n')
- return 0;
+ sayNO;
nextchar = *++locinput;
break;
case EXACTLY:
ln = *s++;
/* Inline the first character, for speed. */
if (*s != nextchar)
- return 0;
+ sayNO;
if (regeol - locinput < ln)
- return 0;
+ sayNO;
if (ln > 1 && bcmp(s, locinput, ln) != 0)
- return 0;
+ sayNO;
locinput += ln;
nextchar = *locinput;
break;
if (nextchar < 0)
nextchar = UCHARAT(locinput);
if (s[nextchar >> 3] & (1 << (nextchar&7)))
- return 0;
+ sayNO;
if (!nextchar && locinput >= regeol)
- return 0;
+ sayNO;
nextchar = *++locinput;
break;
case ALNUM:
if (!nextchar)
- return 0;
+ sayNO;
if (!isALNUM(nextchar))
- return 0;
+ sayNO;
nextchar = *++locinput;
break;
case NALNUM:
if (!nextchar && locinput >= regeol)
- return 0;
+ sayNO;
if (isALNUM(nextchar))
- return 0;
+ sayNO;
nextchar = *++locinput;
break;
case NBOUND:
ln = isALNUM(locinput[-1]);
n = isALNUM(nextchar); /* is next char in word? */
if ((ln == n) == (OP(scan) == BOUND))
- return 0;
+ sayNO;
break;
case SPACE:
if (!nextchar && locinput >= regeol)
- return 0;
+ sayNO;
if (!isSPACE(nextchar))
- return 0;
+ sayNO;
nextchar = *++locinput;
break;
case NSPACE:
if (!nextchar)
- return 0;
+ sayNO;
if (isSPACE(nextchar))
- return 0;
+ sayNO;
nextchar = *++locinput;
break;
case DIGIT:
if (!isDIGIT(nextchar))
- return 0;
+ sayNO;
nextchar = *++locinput;
break;
case NDIGIT:
if (!nextchar && locinput >= regeol)
- return 0;
+ sayNO;
if (isDIGIT(nextchar))
- return 0;
+ sayNO;
nextchar = *++locinput;
break;
case REF:
n = ARG1(scan); /* which paren pair */
s = regstartp[n];
if (!s)
- return 0;
+ sayNO;
if (!regendp[n])
- return 0;
+ sayNO;
if (s == regendp[n])
break;
/* Inline the first character, for speed. */
if (*s != nextchar)
- return 0;
+ sayNO;
ln = regendp[n] - s;
if (locinput + ln > regeol)
- return 0;
+ sayNO;
if (ln > 1 && bcmp(s, locinput, ln) != 0)
- return 0;
+ sayNO;
locinput += ln;
nextchar = *locinput;
break;
n = regmatch(PREVOPER(next)); /* start on the WHILEM */
regcpblow(cp);
regcc = cc.oldcc;
- return n;
+ saySAME(n);
}
/* NOT REACHED */
case WHILEM: {
*/
CURCUR* cc = regcc;
- n = cc->cur + 1;
+ n = cc->cur + 1; /* how many we know we matched */
reginput = locinput;
+#ifdef DEBUGGING
+ if (regnarrate)
+ fprintf(stderr, "%*s %d %lx\n", regindent*2, "",
+ n, (long)cc);
+#endif
+
/* If degenerate scan matches "", assume scan done. */
if (locinput == cc->lastloc) {
regcc = cc->oldcc;
ln = regcc->cur;
if (regmatch(cc->next))
- return TRUE;
+ sayYES;
regcc->cur = ln;
regcc = cc;
- return FALSE;
+ sayNO;
}
/* First just match a string of min scans. */
if (n < cc->min) {
cc->cur = n;
cc->lastloc = locinput;
- return regmatch(cc->scan);
+ if (regmatch(cc->scan))
+ sayYES;
+ cc->cur = n - 1;
+ sayNO;
}
/* Prefer next over scan for minimal matching. */
regcc = cc->oldcc;
ln = regcc->cur;
if (regmatch(cc->next))
- return TRUE; /* All done. */
+ sayYES; /* All done. */
regcc->cur = ln;
regcc = cc;
if (n >= cc->max) /* Maximum greed exceeded? */
- return FALSE;
+ sayNO;
/* Try scanning more and see if it helps. */
reginput = locinput;
cc->cur = n;
cc->lastloc = locinput;
- return regmatch(cc->scan);
+ if (regmatch(cc->scan))
+ sayYES;
+ cc->cur = n - 1;
+ sayNO;
}
/* Prefer scan over next for maximal matching. */
cc->cur = n;
cc->lastloc = locinput;
if (regmatch(cc->scan))
- return TRUE;
+ sayYES;
regcppop(); /* Restore some previous $<digit>s? */
reginput = locinput;
}
regcc = cc->oldcc;
ln = regcc->cur;
if (regmatch(cc->next))
- return TRUE;
+ sayYES;
regcc->cur = ln;
regcc = cc;
- return FALSE;
+ cc->cur = n - 1;
+ sayNO;
}
/* NOT REACHED */
case BRANCH: {
if (OP(next) != BRANCH) /* No choice. */
next = NEXTOPER(scan);/* Avoid recursion. */
else {
+ int lastparen = *reglastparen;
do {
reginput = locinput;
if (regmatch(NEXTOPER(scan)))
- return 1;
+ sayYES;
+ for (n = *reglastparen; n > lastparen; n--)
+ regendp[n] = 0;
+ *reglastparen = n;
+
#ifdef REGALIGN
/*SUPPRESS 560*/
if (n = NEXT(scan))
scan = regnext(scan);
#endif
} while (scan != NULL && OP(scan) == BRANCH);
- return 0;
+ sayNO;
/* NOTREACHED */
}
}
if (minmod) {
minmod = 0;
if (ln && regrepeat(scan, ln) < ln)
- return 0;
- while (n >= ln) {
+ sayNO;
+ while (n >= ln || (n == 32767 && ln > 0)) { /* ln overflow ? */
/* If it could work, try it. */
if (nextchar == -1000 || *reginput == nextchar)
if (regmatch(next))
- return 1;
+ sayYES;
/* Couldn't or didn't -- back up. */
+ reginput = locinput + ln;
if (regrepeat(scan, 1)) {
ln++;
reginput = locinput + ln;
}
else
- return 0;
+ sayNO;
}
}
else {
/* If it could work, try it. */
if (nextchar == -1000 || *reginput == nextchar)
if (regmatch(next))
- return 1;
+ sayYES;
/* Couldn't or didn't -- back up. */
n--;
reginput = locinput + n;
}
}
- return 0;
+ sayNO;
case SUCCEED:
case END:
reginput = locinput; /* put where regtry can find it */
- return 1; /* Success! */
+ sayYES; /* Success! */
case IFMATCH:
reginput = locinput;
scan = NEXTOPER(scan);
if (!regmatch(scan))
- return 0;
+ sayNO;
break;
case UNLESSM:
reginput = locinput;
scan = NEXTOPER(scan);
if (regmatch(scan))
- return 0;
+ sayNO;
break;
default:
fprintf(stderr, "%x %d\n",(unsigned)scan,scan[1]);
*/
FAIL("corrupted regexp pointers");
/*NOTREACHED*/
+ sayNO;
+
+yes:
+#ifdef DEBUGGING
+ regindent--;
+#endif
+ return 1;
+
+no:
+#ifdef DEBUGGING
+ regindent--;
+#endif
return 0;
}