* blame Henry for some of the lack of readability.
*/
-/* $Header: regexec.c,v 3.0.1.6 90/11/10 02:00:57 lwall Locked $
+/* $RCSfile: regexec.c,v $$Revision: 4.0.1.2 $$Date: 91/06/07 11:50:33 $
*
* $Log: regexec.c,v $
- * Revision 3.0.1.6 90/11/10 02:00:57 lwall
- * patch38: patterns like /^foo.*bar/ sped up some
- * patch38: /[^whatever]+/ could scan past end of string
+ * Revision 4.0.1.2 91/06/07 11:50:33 lwall
+ * patch4: new copyright notice
+ * patch4: // wouldn't use previous pattern if it started with a null character
*
- * Revision 3.0.1.5 90/10/16 10:25:36 lwall
- * patch29: /^pat/ occasionally matched in middle of string when $* = 0
- * patch29: /.{n,m}$/ could match with fewer than n characters remaining
- * patch29: /\d{9}/ could match more than 9 characters
+ * Revision 4.0.1.1 91/04/12 09:07:39 lwall
+ * patch1: regexec only allocated space for 9 subexpresssions
*
- * Revision 3.0.1.4 90/08/09 05:12:03 lwall
- * patch19: sped up /x+y/ patterns greatly by not retrying on every x
- * patch19: inhibited backoff on patterns anchored to the end like /\s+$/
- * patch19: sped up {m,n} on simple items
- * patch19: $' broke on embedded nulls
- * patch19: $ will now only match at end of string if $* == 0
- *
- * Revision 3.0.1.3 90/02/28 18:14:39 lwall
- * patch9: /[\200-\377]/ didn't work on machines with signed chars
- * patch9: \d, \w, and \s could misfire on characters with high bit set
- * patch9: /\bfoo/i didn't work
- *
- * Revision 3.0.1.2 89/12/21 20:16:27 lwall
- * patch7: certain patterns didn't match correctly at end of string
- *
- * Revision 3.0.1.1 89/11/11 04:52:04 lwall
- * patch2: /\b$foo/ didn't work
- *
- * Revision 3.0 89/10/18 15:22:53 lwall
- * 3.0 baseline
+ * Revision 4.0 91/03/20 01:39:16 lwall
+ * 4.0 baseline.
*
*/
*
**** Alterations to Henry's code are...
****
- **** Copyright (c) 1989, Larry Wall
+ **** Copyright (c) 1991, Larry Wall
****
- **** You may distribute under the terms of the GNU General Public License
- **** as specified in the README file that comes with the perl 3.0 kit.
+ **** You may distribute under the terms of either the GNU General Public
+ **** License or the Artistic License, as specified in the README file.
*
* Beware that some of this code is subtly aware of the way operator
* precedence is structured in regular expressions. Serious changes in
static char *reglastparen; /* Similarly for lastparen. */
static char *regtill;
-static char *regmystartp[10]; /* For remembering backreferences. */
-static char *regmyendp[10];
+static int regmyp_size = 0;
+static char **regmystartp = Null(char**);
+static char **regmyendp = Null(char**);
/*
* Forwards.
/* If there is a "must appear" string, look for it. */
s = string;
if (prog->regmust != Nullstr &&
- (!(prog->reganch & 1) || (multiline && prog->regback >= 0)) ) {
+ (!(prog->reganch & ROPT_ANCH)
+ || (multiline && prog->regback >= 0)) ) {
if (stringarg == strbeg && screamer) {
if (screamfirst[prog->regmust->str_rare] >= 0)
s = screaminstr(screamer,prog->regmust);
/* see how far we have to get to not match where we matched before */
regtill = string+minend;
+ /* Allocate our backreference arrays */
+ if ( regmyp_size < prog->nparens + 1 ) {
+ /* Allocate or enlarge the arrays */
+ regmyp_size = prog->nparens + 1;
+ if ( regmyp_size < 10 ) regmyp_size = 10; /* minimum */
+ if ( regmystartp ) {
+ /* reallocate larger */
+ Renew(regmystartp,regmyp_size,char*);
+ Renew(regmyendp, regmyp_size,char*);
+ }
+ else {
+ /* Initial allocation */
+ New(1102,regmystartp,regmyp_size,char*);
+ New(1102,regmyendp, regmyp_size,char*);
+ }
+
+ }
+
/* Simplest case: anchored match need be tried only once. */
/* [unless multiline is set] */
- if (prog->reganch & 1) {
+ if (prog->reganch & ROPT_ANCH) {
if (regtry(prog, string))
goto got_it;
else if (multiline) {
/* Messy cases: unanchored match. */
if (prog->regstart) {
- if (prog->reganch & 2) { /* we have /x+whatever/ */
+ if (prog->reganch & ROPT_SKIP) { /* we have /x+whatever/ */
/* it must be a one character string */
i = prog->regstart->str_ptr[0];
while (s < strend) {
goto phooey;
}
if (c = prog->regstclass) {
- int doevery = (prog->reganch & 2) == 0;
+ int doevery = (prog->reganch & ROPT_SKIP) == 0;
if (minlen)
dontbother = minlen - 1;
s = nsavestr(strbeg,i); /* so $digit will work later */
if (prog->subbase)
Safefree(prog->subbase);
- prog->subbase = s;
+ prog->subbeg = prog->subbase = s;
prog->subend = s+i;
}
else
sp = prog->startp;
ep = prog->endp;
if (prog->nparens) {
- for (i = NSUBEXP; i > 0; i--) {
+ for (i = prog->nparens; i >= 0; i--) {
*sp++ = NULL;
*ep++ = NULL;
}
((nextchar || locinput < regeol) &&
locinput[-1] == '\n') )
{
- regtill = regbol;
+ /* regtill = regbol; */
break;
}
return(0);
return(0);
if (!multiline && regeol - locinput > 1)
return 0;
- regtill = regbol;
+ /* regtill = regbol; */
break;
case ANY:
if ((nextchar == '\0' && locinput >= regeol) ||
nextchar = *++locinput;
break;
case REF:
- case REF+1:
- case REF+2:
- case REF+3:
- case REF+4:
- case REF+5:
- case REF+6:
- case REF+7:
- case REF+8:
- case REF+9:
- n = OP(scan) - REF;
+ n = ARG1(scan); /* which paren pair */
s = regmystartp[n];
if (!s)
return(0);
break;
case BACK:
break;
- case OPEN+1:
- case OPEN+2:
- case OPEN+3:
- case OPEN+4:
- case OPEN+5:
- case OPEN+6:
- case OPEN+7:
- case OPEN+8:
- case OPEN+9:
- n = OP(scan) - OPEN;
+ case OPEN:
+ n = ARG1(scan); /* which paren pair */
reginput = locinput;
regmystartp[n] = locinput; /* for REF */
} else
return(0);
/* NOTREACHED */
- case CLOSE+1:
- case CLOSE+2:
- case CLOSE+3:
- case CLOSE+4:
- case CLOSE+5:
- case CLOSE+6:
- case CLOSE+7:
- case CLOSE+8:
- case CLOSE+9: {
- n = OP(scan) - CLOSE;
+ case CLOSE: {
+ n = ARG1(scan); /* which paren pair */
reginput = locinput;
regmyendp[n] = locinput; /* for REF */