* blame Henry for some of the lack of readability.
*/
-/* $Header: regexec.c,v 3.0 89/10/18 15:22:53 lwall Locked $
+/* $Header: regexec.c,v 3.0.1.3 90/02/28 18:14:39 lwall Locked $
*
* $Log: regexec.c,v $
+ * Revision 3.0.1.3 90/02/28 18:14:39 lwall
+ * patch9: /[\200-\377]/ didn't work on machines with signed chars
+ * patch9: \d, \w, and \s could misfire on characters with high bit set
+ * patch9: /\bfoo/i didn't work
+ *
+ * Revision 3.0.1.2 89/12/21 20:16:27 lwall
+ * patch7: certain patterns didn't match correctly at end of string
+ *
+ * Revision 3.0.1.1 89/11/11 04:52:04 lwall
+ * patch2: /\b$foo/ didn't work
+ *
* Revision 3.0 89/10/18 15:22:53 lwall
* 3.0 baseline
*
int regnarrate = 0;
#endif
+#define isALNUM(c) (isascii(c) && (isalpha(c) || isdigit(c) || c == '_'))
+#define isSPACE(c) (isascii(c) && isspace(c))
+#define isDIGIT(c) (isascii(c) && isdigit(c))
+#define isUPPER(c) (isascii(c) && isupper(c))
+
/*
* regexec and friends
*/
*/
static char *regprecomp;
static char *reginput; /* String-input pointer. */
+static char regprev; /* char before regbol, \n if none */
static char *regbol; /* Beginning of input, for ^ check. */
static char *regeol; /* End of input, for $ check. */
static char **regstartp; /* Pointer to startp array. */
register int tmp;
int minlen = 0; /* must match at least this many chars */
int dontbother = 0; /* how many characters not to try at end */
- int beginning = (string == strbeg); /* is ^ valid at stringarg? */
/* Be paranoid... */
if (prog == NULL || string == NULL) {
return(0);
}
+ if (string == strbeg) /* is ^ valid at stringarg? */
+ regprev = '\n';
+ else
+ regprev = stringarg[-1];
regprecomp = prog->precomp;
/* Check validity of program. */
if (UCHARAT(prog->program) != MAGIC) {
string = c;
strend = string + i;
for (s = string; s < strend; s++)
- if (isupper(*s))
+ if (isUPPER(*s))
*s = tolower(*s);
}
/* If there is a "must appear" string, look for it. */
s = string;
if (prog->regmust != Nullstr) {
- if (beginning && screamer) {
+ if (stringarg == strbeg && screamer) {
if (screamfirst[prog->regmust->str_rare] >= 0)
s = screaminstr(screamer,prog->regmust);
else
}
/* Mark beginning of line for ^ . */
- if (beginning)
- regbol = string;
- else
- regbol = NULL;
+ regbol = string;
/* Mark end of line for $ (and such) */
regeol = strend;
case ANYOF: case ANYBUT:
c = OPERAND(c);
while (s < strend) {
- i = *s;
+ i = UCHARAT(s);
if (!(c[i >> 3] & (1 << (i&7))))
if (regtry(prog, s))
goto got_it;
dontbother++,strend--;
if (s != string) {
i = s[-1];
- tmp = (isalpha(i) || isdigit(i) || i == '_');
+ tmp = isALNUM(i);
}
else
- tmp = 0; /* assume not alphanumeric */
+ tmp = isALNUM(regprev); /* assume not alphanumeric */
while (s < strend) {
i = *s;
- if (tmp != (isalpha(i) || isdigit(i) || i == '_')) {
+ if (tmp != isALNUM(i)) {
tmp = !tmp;
if (regtry(prog, s))
goto got_it;
}
s++;
}
- if (tmp && regtry(prog,s))
+ if ((minlen || tmp) && regtry(prog,s))
goto got_it;
break;
case NBOUND:
dontbother++,strend--;
if (s != string) {
i = s[-1];
- tmp = (isalpha(i) || isdigit(i) || i == '_');
+ tmp = isALNUM(i);
}
else
- tmp = 0; /* assume not alphanumeric */
+ tmp = isALNUM(regprev); /* assume not alphanumeric */
while (s < strend) {
i = *s;
- if (tmp != (isalpha(i) || isdigit(i) || i == '_'))
+ if (tmp != isALNUM(i))
tmp = !tmp;
else if (regtry(prog, s))
goto got_it;
s++;
}
- if (!tmp && regtry(prog,s))
+ if ((minlen || !tmp) && regtry(prog,s))
goto got_it;
break;
case ALNUM:
while (s < strend) {
i = *s;
- if (isalpha(i) || isdigit(i) || i == '_')
+ if (isALNUM(i))
if (regtry(prog, s))
goto got_it;
s++;
case NALNUM:
while (s < strend) {
i = *s;
- if (!isalpha(i) && !isdigit(i) && i != '_')
+ if (!isALNUM(i))
if (regtry(prog, s))
goto got_it;
s++;
break;
case SPACE:
while (s < strend) {
- if (isspace(*s))
+ if (isSPACE(*s))
if (regtry(prog, s))
goto got_it;
s++;
break;
case NSPACE:
while (s < strend) {
- if (!isspace(*s))
+ if (!isSPACE(*s))
if (regtry(prog, s))
goto got_it;
s++;
break;
case DIGIT:
while (s < strend) {
- if (isdigit(*s))
+ if (isDIGIT(*s))
if (regtry(prog, s))
goto got_it;
s++;
break;
case NDIGIT:
while (s < strend) {
- if (!isdigit(*s))
+ if (!isDIGIT(*s))
if (regtry(prog, s))
goto got_it;
s++;
}
}
else {
- dontbother = minend;
+ if (minlen)
+ dontbother = minlen - 1;
strend -= dontbother;
/* We don't know much -- general case. */
do {
switch (OP(scan)) {
case BOL:
- if (locinput == regbol ||
+ if (locinput == regbol ? regprev == '\n' :
((nextchar || locinput < regeol) &&
locinput[-1] == '\n') )
{
case ALNUM:
if (!nextchar)
return(0);
- if (!isalpha(nextchar) && !isdigit(nextchar) &&
- nextchar != '_')
+ if (!isALNUM(nextchar))
return(0);
nextchar = *++locinput;
break;
case NALNUM:
if (!nextchar && locinput >= regeol)
return(0);
- if (isalpha(nextchar) || isdigit(nextchar) ||
- nextchar == '_')
+ if (isALNUM(nextchar))
return(0);
nextchar = *++locinput;
break;
case NBOUND:
case BOUND:
if (locinput == regbol) /* was last char in word? */
- ln = 0;
+ ln = isALNUM(regprev);
else
- ln = (isalpha(locinput[-1]) ||
- isdigit(locinput[-1]) ||
- locinput[-1] == '_' );
- n = (isalpha(nextchar) || isdigit(nextchar) ||
- nextchar == '_' ); /* is next char in word? */
+ ln = isALNUM(locinput[-1]);
+ n = isALNUM(nextchar); /* is next char in word? */
if ((ln == n) == (OP(scan) == BOUND))
return(0);
break;
case SPACE:
if (!nextchar && locinput >= regeol)
return(0);
- if (!isspace(nextchar))
+ if (!isSPACE(nextchar))
return(0);
nextchar = *++locinput;
break;
case NSPACE:
if (!nextchar)
return(0);
- if (isspace(nextchar))
+ if (isSPACE(nextchar))
return(0);
nextchar = *++locinput;
break;
case DIGIT:
- if (!isdigit(nextchar))
+ if (!isDIGIT(nextchar))
return(0);
nextchar = *++locinput;
break;
case NDIGIT:
if (!nextchar && locinput >= regeol)
return(0);
- if (isdigit(nextchar))
+ if (isDIGIT(nextchar))
return(0);
nextchar = *++locinput;
break;
}
break;
case ALNUM:
- while (isalpha(*scan) || isdigit(*scan) || *scan == '_')
+ while (isALNUM(*scan))
scan++;
break;
case NALNUM:
- while (scan < loceol && (!isalpha(*scan) && !isdigit(*scan) &&
- *scan != '_'))
+ while (scan < loceol && !isALNUM(*scan))
scan++;
break;
case SPACE:
- while (scan < loceol && isspace(*scan))
+ while (scan < loceol && isSPACE(*scan))
scan++;
break;
case NSPACE:
- while (scan < loceol && !isspace(*scan))
+ while (scan < loceol && !isSPACE(*scan))
scan++;
break;
case DIGIT:
- while (isdigit(*scan))
+ while (isDIGIT(*scan))
scan++;
break;
case NDIGIT:
- while (scan < loceol && !isdigit(*scan))
+ while (scan < loceol && !isDIGIT(*scan))
scan++;
break;
default: /* Oh dear. Called inappropriately. */