perl 4.0 patch 14: patch #11, continued
[p5sagit/p5-mst-13.2.git] / regexec.c
index 61439ea..bb63eda 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -7,29 +7,18 @@
  * blame Henry for some of the lack of readability.
  */
 
-/* $Header: regexec.c,v 3.0.1.4 90/08/09 05:12:03 lwall Locked $
+/* $RCSfile: regexec.c,v $$Revision: 4.0.1.2 $$Date: 91/06/07 11:50:33 $
  *
  * $Log:       regexec.c,v $
- * Revision 3.0.1.4  90/08/09  05:12:03  lwall
- * patch19: sped up /x+y/ patterns greatly by not retrying on every x
- * patch19: inhibited backoff on patterns anchored to the end like /\s+$/
- * patch19: sped up {m,n} on simple items
- * patch19: $' broke on embedded nulls
- * patch19: $ will now only match at end of string if $* == 0
+ * Revision 4.0.1.2  91/06/07  11:50:33  lwall
+ * patch4: new copyright notice
+ * patch4: // wouldn't use previous pattern if it started with a null character
  * 
- * Revision 3.0.1.3  90/02/28  18:14:39  lwall
- * patch9: /[\200-\377]/ didn't work on machines with signed chars
- * patch9: \d, \w, and \s could misfire on characters with high bit set
- * patch9: /\bfoo/i didn't work
+ * Revision 4.0.1.1  91/04/12  09:07:39  lwall
+ * patch1: regexec only allocated space for 9 subexpresssions
  * 
- * Revision 3.0.1.2  89/12/21  20:16:27  lwall
- * patch7: certain patterns didn't match correctly at end of string
- * 
- * Revision 3.0.1.1  89/11/11  04:52:04  lwall
- * patch2: /\b$foo/ didn't work
- * 
- * Revision 3.0  89/10/18  15:22:53  lwall
- * 3.0 baseline
+ * Revision 4.0  91/03/20  01:39:16  lwall
+ * 4.0 baseline.
  * 
  */
 
  *
  ****    Alterations to Henry's code are...
  ****
- ****    Copyright (c) 1989, Larry Wall
+ ****    Copyright (c) 1991, Larry Wall
  ****
- ****    You may distribute under the terms of the GNU General Public License
- ****    as specified in the README file that comes with the perl 3.0 kit.
+ ****    You may distribute under the terms of either the GNU General Public
+ ****    License or the Artistic License, as specified in the README file.
  *
  * Beware that some of this code is subtly aware of the way operator
  * precedence is structured in regular expressions.  Serious changes in
@@ -98,8 +87,9 @@ static char **regendp;                /* Ditto for endp. */
 static char *reglastparen;     /* Similarly for lastparen. */
 static char *regtill;
 
-static char *regmystartp[10];  /* For remembering backreferences. */
-static char *regmyendp[10];
+static int regmyp_size = 0;
+static char **regmystartp = Null(char**);
+static char **regmyendp   = Null(char**);
 
 /*
  * Forwards.
@@ -139,8 +129,11 @@ int safebase;      /* no need to remember string in subbase */
 
        if (string == strbeg)   /* is ^ valid at stringarg? */
            regprev = '\n';
-       else
+       else {
            regprev = stringarg[-1];
+           if (!multiline && regprev == '\n')
+               regprev = '\0';         /* force ^ to NOT match */
+       }
        regprecomp = prog->precomp;
        /* Check validity of program. */
        if (UCHARAT(prog->program) != MAGIC) {
@@ -161,7 +154,9 @@ int safebase;       /* no need to remember string in subbase */
 
        /* If there is a "must appear" string, look for it. */
        s = string;
-       if (prog->regmust != Nullstr) {
+       if (prog->regmust != Nullstr &&
+           (!(prog->reganch & ROPT_ANCH)
+            || (multiline && prog->regback >= 0)) ) {
                if (stringarg == strbeg && screamer) {
                        if (screamfirst[prog->regmust->str_rare] >= 0)
                                s = screaminstr(screamer,prog->regmust);
@@ -203,9 +198,27 @@ int safebase;      /* no need to remember string in subbase */
        /* see how far we have to get to not match where we matched before */
        regtill = string+minend;
 
+       /* Allocate our backreference arrays */
+       if ( regmyp_size < prog->nparens + 1 ) {
+           /* Allocate or enlarge the arrays */
+           regmyp_size = prog->nparens + 1;
+           if ( regmyp_size < 10 ) regmyp_size = 10;   /* minimum */
+           if ( regmystartp ) {
+               /* reallocate larger */
+               Renew(regmystartp,regmyp_size,char*);
+               Renew(regmyendp,  regmyp_size,char*);
+           }
+           else {
+               /* Initial allocation */
+               New(1102,regmystartp,regmyp_size,char*);
+               New(1102,regmyendp,  regmyp_size,char*);
+           }
+       
+       }
+
        /* Simplest case:  anchored match need be tried only once. */
        /*  [unless multiline is set] */
-       if (prog->reganch & 1) {
+       if (prog->reganch & ROPT_ANCH) {
                if (regtry(prog, string))
                        goto got_it;
                else if (multiline) {
@@ -227,7 +240,7 @@ int safebase;       /* no need to remember string in subbase */
 
        /* Messy cases:  unanchored match. */
        if (prog->regstart) {
-               if (prog->reganch & 2) {        /* we have /x+whatever/ */
+               if (prog->reganch & ROPT_SKIP) {  /* we have /x+whatever/ */
                    /* it must be a one character string */
                    i = prog->regstart->str_ptr[0];
                    while (s < strend) {
@@ -267,7 +280,7 @@ int safebase;       /* no need to remember string in subbase */
                goto phooey;
        }
        if (c = prog->regstclass) {
-               int doevery = (prog->reganch & 2) == 0;
+               int doevery = (prog->reganch & ROPT_SKIP) == 0;
 
                if (minlen)
                    dontbother = minlen - 1;
@@ -437,7 +450,7 @@ int safebase;       /* no need to remember string in subbase */
                    s = nsavestr(strbeg,i);     /* so $digit will work later */
                    if (prog->subbase)
                            Safefree(prog->subbase);
-                   prog->subbase = s;
+                   prog->subbeg = prog->subbase = s;
                    prog->subend = s+i;
                }
                else
@@ -481,7 +494,7 @@ char *string;
        sp = prog->startp;
        ep = prog->endp;
        if (prog->nparens) {
-               for (i = NSUBEXP; i > 0; i--) {
+               for (i = prog->nparens; i >= 0; i--) {
                        *sp++ = NULL;
                        *ep++ = NULL;
                }
@@ -546,7 +559,7 @@ char *prog;
                            ((nextchar || locinput < regeol) &&
                              locinput[-1] == '\n') )
                        {
-                               regtill = regbol;
+                               /* regtill = regbol; */
                                break;
                        }
                        return(0);
@@ -555,7 +568,7 @@ char *prog;
                                return(0);
                        if (!multiline && regeol - locinput > 1)
                                return 0;
-                       regtill = regbol;
+                       /* regtill = regbol; */
                        break;
                case ANY:
                        if ((nextchar == '\0' && locinput >= regeol) ||
@@ -582,9 +595,9 @@ char *prog;
                                nextchar = UCHARAT(locinput);
                        if (s[nextchar >> 3] & (1 << (nextchar&7)))
                                return(0);
-                       nextchar = *++locinput;
-                       if (!nextchar && locinput > regeol)
+                       if (!nextchar && locinput >= regeol)
                                return 0;
+                       nextchar = *++locinput;
                        break;
                case ALNUM:
                        if (!nextchar)
@@ -637,16 +650,7 @@ char *prog;
                        nextchar = *++locinput;
                        break;
                case REF:
-               case REF+1:
-               case REF+2:
-               case REF+3:
-               case REF+4:
-               case REF+5:
-               case REF+6:
-               case REF+7:
-               case REF+8:
-               case REF+9:
-                       n = OP(scan) - REF;
+                       n = ARG1(scan);  /* which paren pair */
                        s = regmystartp[n];
                        if (!s)
                            return(0);
@@ -670,16 +674,8 @@ char *prog;
                        break;
                case BACK:
                        break;
-               case OPEN+1:
-               case OPEN+2:
-               case OPEN+3:
-               case OPEN+4:
-               case OPEN+5:
-               case OPEN+6:
-               case OPEN+7:
-               case OPEN+8:
-               case OPEN+9:
-                       n = OP(scan) - OPEN;
+               case OPEN:
+                       n = ARG1(scan);  /* which paren pair */
                        reginput = locinput;
 
                        regmystartp[n] = locinput;      /* for REF */
@@ -695,16 +691,8 @@ char *prog;
                        } else
                                return(0);
                        /* NOTREACHED */
-               case CLOSE+1:
-               case CLOSE+2:
-               case CLOSE+3:
-               case CLOSE+4:
-               case CLOSE+5:
-               case CLOSE+6:
-               case CLOSE+7:
-               case CLOSE+8:
-               case CLOSE+9: {
-                               n = OP(scan) - CLOSE;
+               case CLOSE: {
+                               n = ARG1(scan);  /* which paren pair */
                                reginput = locinput;
 
                                regmyendp[n] = locinput;        /* for REF */
@@ -771,7 +759,7 @@ char *prog;
                                nextchar = -1000;
                        reginput = locinput;
                        n = regrepeat(scan, n);
-                       if (!multiline && OP(next) == EOL)
+                       if (!multiline && OP(next) == EOL && ln < n)
                            ln = n;                     /* why back off? */
                        while (n >= ln) {
                                /* If it could work, try it. */
@@ -845,7 +833,7 @@ int max;
                }
                break;
        case ALNUM:
-               while (isALNUM(*scan))
+               while (scan < loceol && isALNUM(*scan))
                        scan++;
                break;
        case NALNUM:
@@ -861,7 +849,7 @@ int max;
                        scan++;
                break;
        case DIGIT:
-               while (isDIGIT(*scan))
+               while (scan < loceol && isDIGIT(*scan))
                        scan++;
                break;
        case NDIGIT: