[inseparable changes from patch from perl5.003_08 to perl5.003_09]
[p5sagit/p5-mst-13.2.git] / regexec.c
index 1267065..d9a893e 100644 (file)
--- a/regexec.c
+++ b/regexec.c
  * blame Henry for some of the lack of readability.
  */
 
+/* The names of the functions have been changed from regcomp and
+ * regexec to  pregcomp and pregexec in order to avoid conflicts
+ * with the POSIX routines of the same names.
+*/
+
 /*SUPPRESS 112*/
 /*
- * regcomp and regexec -- regsub and regerror are not used in perl
+ * pregcomp and pregexec -- regsub and regerror are not used in perl
  *
  *     Copyright (c) 1986 by University of Toronto.
  *     Written by Henry Spencer.  Not derived from licensed software.
@@ -77,10 +82,10 @@ static CURCUR* regcc;
 
 typedef I32 CHECKPOINT;
 
-CHECKPOINT regcppush _((I32 parenfloor));
-char * regcppop _((void));
+static CHECKPOINT regcppush _((I32 parenfloor));
+static char * regcppop _((void));
 
-CHECKPOINT
+static CHECKPOINT
 regcppush(parenfloor)
 I32 parenfloor;
 {
@@ -102,7 +107,7 @@ I32 parenfloor;
     return retval;
 }
 
-char*
+static char *
 regcppop()
 {
     I32 i = SSPOPINT;
@@ -132,7 +137,7 @@ regcppop()
 #define regcpblow(cp) leave_scope(cp)
 
 /*
- * regexec and friends
+ * pregexec and friends
  */
 
 /*
@@ -142,12 +147,15 @@ regcppop()
 static I32 regmatch _((char *prog));
 static I32 regrepeat _((char *p, I32 max));
 static I32 regtry _((regexp *prog, char *startpos));
+static bool reginclass _((char *p, I32 c));
+
+static bool regtainted;                /* tainted information used? */
 
 /*
- - regexec - match a regexp against a string
+ - pregexec - match a regexp against a string
  */
 I32
-regexec(prog, stringarg, strend, strbeg, minend, screamer, safebase)
+pregexec(prog, stringarg, strend, strbeg, minend, screamer, safebase)
 register regexp *prog;
 char *stringarg;
 register char *strend; /* pointer to null at end of string */
@@ -157,7 +165,6 @@ SV *screamer;
 I32 safebase;  /* no need to remember string in subbase */
 {
     register char *s;
-    register I32 i;
     register char *c;
     register char *startpos = stringarg;
     register I32 tmp;
@@ -166,6 +173,7 @@ I32 safebase;       /* no need to remember string in subbase */
     CURCUR cc;
 
     cc.cur = 0;
+    cc.oldcc = 0;
     regcc = &cc;
 
 #ifdef DEBUGGING
@@ -186,23 +194,15 @@ I32 safebase;     /* no need to remember string in subbase */
        if (!multiline && regprev == '\n')
            regprev = '\0';             /* force ^ to NOT match */
     }
+
     regprecomp = prog->precomp;
-    regnpar = prog->nparens;
     /* Check validity of program. */
     if (UCHARAT(prog->program) != MAGIC) {
        FAIL("corrupted regexp program");
     }
 
-    if (prog->do_folding) {
-       i = strend - startpos;
-       New(1101,c,i+1,char);
-       Copy(startpos, c, i+1, char);
-       startpos = c;
-       strend = startpos + i;
-       for (s = startpos; s < strend; s++)
-           if (isUPPER(*s))
-               *s = toLOWER(*s);
-    }
+    regnpar = prog->nparens;
+    regtainted = FALSE;
 
     /* If there is a "must appear" string, look for it. */
     s = startpos;
@@ -275,13 +275,13 @@ I32 safebase;     /* no need to remember string in subbase */
     if (prog->regstart) {
        if (prog->reganch & ROPT_SKIP) {  /* we have /x+whatever/ */
            /* it must be a one character string */
-           i = SvPVX(prog->regstart)[0];
+           char ch = SvPVX(prog->regstart)[0];
            while (s < strend) {
-               if (*s == i) {
+               if (*s == ch) {
                    if (regtry(prog, s))
                        goto got_it;
                    s++;
-                   while (s < strend && *s == i)
+                   while (s < strend && *s == ch)
                        s++;
                }
                s++;
@@ -321,8 +321,7 @@ I32 safebase;       /* no need to remember string in subbase */
        case ANYOF:
            c = OPERAND(c);
            while (s < strend) {
-               i = UCHARAT(s);
-               if (!(c[i >> 3] & (1 << (i&7)))) {
+               if (reginclass(c, *s)) {
                    if (tmp && regtry(prog, s))
                        goto got_it;
                    else
@@ -333,18 +332,16 @@ I32 safebase;     /* no need to remember string in subbase */
                s++;
            }
            break;
+       case BOUNDL:
+           regtainted = TRUE;
+           /* FALL THROUGH */
        case BOUND:
            if (minlen)
                dontbother++,strend--;
-           if (s != startpos) {
-               i = s[-1];
-               tmp = isALNUM(i);
-           }
-           else
-               tmp = isALNUM(regprev); /* assume not alphanumeric */
+           tmp = (s != startpos) ? UCHARAT(s - 1) : regprev;
+           tmp = (OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp));
            while (s < strend) {
-               i = *s;
-               if (tmp != isALNUM(i)) {
+               if (tmp != (OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
                    tmp = !tmp;
                    if (regtry(prog, s))
                        goto got_it;
@@ -354,18 +351,16 @@ I32 safebase;     /* no need to remember string in subbase */
            if ((minlen || tmp) && regtry(prog,s))
                goto got_it;
            break;
+       case NBOUNDL:
+           regtainted = TRUE;
+           /* FALL THROUGH */
        case NBOUND:
            if (minlen)
                dontbother++,strend--;
-           if (s != startpos) {
-               i = s[-1];
-               tmp = isALNUM(i);
-           }
-           else
-               tmp = isALNUM(regprev); /* assume not alphanumeric */
+           tmp = (s != startpos) ? UCHARAT(s - 1) : regprev;
+           tmp = (OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp));
            while (s < strend) {
-               i = *s;
-               if (tmp != isALNUM(i))
+               if (tmp != (OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
                    tmp = !tmp;
                else if (regtry(prog, s))
                    goto got_it;
@@ -376,8 +371,21 @@ I32 safebase;      /* no need to remember string in subbase */
            break;
        case ALNUM:
            while (s < strend) {
-               i = *s;
-               if (isALNUM(i)) {
+               if (isALNUM(*s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s++;
+           }
+           break;
+       case ALNUML:
+           regtainted = TRUE;
+           while (s < strend) {
+               if (isALNUM_LC(*s)) {
                    if (tmp && regtry(prog, s))
                        goto got_it;
                    else
@@ -390,8 +398,21 @@ I32 safebase;      /* no need to remember string in subbase */
            break;
        case NALNUM:
            while (s < strend) {
-               i = *s;
-               if (!isALNUM(i)) {
+               if (!isALNUM(*s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s++;
+           }
+           break;
+       case NALNUML:
+           regtainted = TRUE;
+           while (s < strend) {
+               if (!isALNUM_LC(*s)) {
                    if (tmp && regtry(prog, s))
                        goto got_it;
                    else
@@ -415,6 +436,20 @@ I32 safebase;      /* no need to remember string in subbase */
                s++;
            }
            break;
+       case SPACEL:
+           regtainted = TRUE;
+           while (s < strend) {
+               if (isSPACE_LC(*s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s++;
+           }
+           break;
        case NSPACE:
            while (s < strend) {
                if (!isSPACE(*s)) {
@@ -428,6 +463,20 @@ I32 safebase;      /* no need to remember string in subbase */
                s++;
            }
            break;
+       case NSPACEL:
+           regtainted = TRUE;
+           while (s < strend) {
+               if (!isSPACE_LC(*s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s++;
+           }
+           break;
        case DIGIT:
            while (s < strend) {
                if (isDIGIT(*s)) {
@@ -471,11 +520,12 @@ I32 safebase;     /* no need to remember string in subbase */
     goto phooey;
 
 got_it:
+    strend += dontbother;      /* uncheat */
     prog->subbeg = strbeg;
     prog->subend = strend;
-    if ((!safebase && (prog->nparens || sawampersand)) || prog->do_folding) {
-       strend += dontbother;   /* uncheat */
-       i = strend - startpos + (stringarg - strbeg);
+    prog->exec_tainted = regtainted;
+    if (!safebase && (prog->nparens || sawampersand)) {
+       I32 i = strend - startpos + (stringarg - strbeg);
        if (safebase) {                 /* no need for $digit later */
            s = strbeg;
            prog->subend = s+i;
@@ -498,14 +548,10 @@ got_it:
                prog->endp[i] = s + (prog->endp[i] - startpos);
            }
        }
-       if (prog->do_folding)
-           Safefree(startpos);
     }
     return 1;
 
 phooey:
-    if (prog->do_folding)
-       Safefree(startpos);
     return 0;
 }
 
@@ -570,15 +616,28 @@ char *prog;
     register I32 ln;           /* len or last */
     register char *s;          /* operand or save */
     register char *locinput = reginput;
+    register I32 c1, c2;       /* case fold search */
     int minmod = 0;
+#ifdef DEBUGGING
+    static int regindent = 0;
+    regindent++;
+#endif
 
-    nextchar = *locinput;
+    nextchar = UCHARAT(locinput);
     scan = prog;
     while (scan != NULL) {
 #ifdef DEBUGGING
-       if (regnarrate)
-           fprintf(stderr, "%2d%-8.8s\t<%.10s>\n",
+#define sayYES goto yes
+#define sayNO goto no
+#define saySAME(x) if (x) goto yes; else goto no
+       if (regnarrate) {
+           PerlIO_printf(Perl_debug_log, "%*s%2d%-8.8s\t<%.10s>\n", regindent*2, "",
                scan - regprogram, regprop(scan), locinput);
+       }
+#else
+#define sayYES return 1
+#define sayNO return 0
+#define saySAME(x) return x
 #endif
 
 #ifdef REGALIGN
@@ -598,7 +657,7 @@ char *prog;
                /* regtill = regbol; */
                break;
            }
-           return 0;
+           sayNO;
        case MBOL:
            if (locinput == regbol
                ? regprev == '\n'
@@ -606,15 +665,15 @@ char *prog;
            {
                break;
            }
-           return 0;
+           sayNO;
        case SBOL:
            if (locinput == regbol && regprev == '\n')
                break;
-           return 0;
+           sayNO;
        case GBOL:
            if (locinput == regbol)
                break;
-           return 0;
+           sayNO;
        case EOL:
            if (multiline)
                goto meol;
@@ -623,117 +682,161 @@ char *prog;
        case MEOL:
          meol:
            if ((nextchar || locinput < regeol) && nextchar != '\n')
-               return 0;
+               sayNO;
            break;
        case SEOL:
          seol:
            if ((nextchar || locinput < regeol) && nextchar != '\n')
-               return 0;
+               sayNO;
            if (regeol - locinput > 1)
-               return 0;
+               sayNO;
            break;
        case SANY:
            if (!nextchar && locinput >= regeol)
-               return 0;
-           nextchar = *++locinput;
+               sayNO;
+           nextchar = UCHARAT(++locinput);
            break;
        case ANY:
            if (!nextchar && locinput >= regeol || nextchar == '\n')
-               return 0;
-           nextchar = *++locinput;
+               sayNO;
+           nextchar = UCHARAT(++locinput);
            break;
-       case EXACTLY:
+       case EXACT:
            s = OPERAND(scan);
            ln = *s++;
            /* Inline the first character, for speed. */
            if (*s != nextchar)
-               return 0;
+               sayNO;
+           if (regeol - locinput < ln)
+               sayNO;
+           if (ln > 1 && memcmp(s, locinput, ln) != 0)
+               sayNO;
+           locinput += ln;
+           nextchar = UCHARAT(locinput);
+           break;
+       case EXACTFL:
+           regtainted = TRUE;
+           /* FALL THROUGH */
+       case EXACTF:
+           s = OPERAND(scan);
+           ln = *s++;
+           /* Inline the first character, for speed. */
+           if (UCHARAT(s) != nextchar &&
+               UCHARAT(s) != ((OP(scan) == EXACTF)
+                              ? fold : fold_locale)[nextchar])
+               sayNO;
            if (regeol - locinput < ln)
-               return 0;
-           if (ln > 1 && bcmp(s, locinput, ln) != 0)
-               return 0;
+               sayNO;
+           if (ln > 1 && ((OP(scan) == EXACTF)
+                          ? ibcmp : ibcmp_locale)(s, locinput, ln) != 0)
+               sayNO;
            locinput += ln;
-           nextchar = *locinput;
+           nextchar = UCHARAT(locinput);
            break;
        case ANYOF:
            s = OPERAND(scan);
            if (nextchar < 0)
                nextchar = UCHARAT(locinput);
-           if (s[nextchar >> 3] & (1 << (nextchar&7)))
-               return 0;
+           if (!reginclass(s, nextchar))
+               sayNO;
            if (!nextchar && locinput >= regeol)
-               return 0;
-           nextchar = *++locinput;
+               sayNO;
+           nextchar = UCHARAT(++locinput);
            break;
+       case ALNUML:
+           regtainted = TRUE;
+           /* FALL THROUGH */
        case ALNUM:
            if (!nextchar)
-               return 0;
-           if (!isALNUM(nextchar))
-               return 0;
-           nextchar = *++locinput;
+               sayNO;
+           if (!(OP(scan) == ALNUM
+                 ? isALNUM(nextchar) : isALNUM_LC(nextchar)))
+               sayNO;
+           nextchar = UCHARAT(++locinput);
            break;
+       case NALNUML:
+           regtainted = TRUE;
+           /* FALL THROUGH */
        case NALNUM:
            if (!nextchar && locinput >= regeol)
-               return 0;
-           if (isALNUM(nextchar))
-               return 0;
-           nextchar = *++locinput;
+               sayNO;
+           if (OP(scan) == NALNUM
+               ? isALNUM(nextchar) : isALNUM_LC(nextchar))
+               sayNO;
+           nextchar = UCHARAT(++locinput);
            break;
-       case NBOUND:
+       case BOUNDL:
+       case NBOUNDL:
+           regtainted = TRUE;
+           /* FALL THROUGH */
        case BOUND:
-           if (locinput == regbol)     /* was last char in word? */
-               ln = isALNUM(regprev);
-           else 
-               ln = isALNUM(locinput[-1]);
-           n = isALNUM(nextchar); /* is next char in word? */
-           if ((ln == n) == (OP(scan) == BOUND))
-               return 0;
+       case NBOUND:
+           /* was last char in word? */
+           ln = (locinput != regbol) ? UCHARAT(locinput - 1) : regprev;
+           if (OP(scan) == BOUND || OP(scan) == NBOUND) {
+               ln = isALNUM(ln);
+               n = isALNUM(nextchar);
+           }
+           else {
+               ln = isALNUM_LC(ln);
+               n = isALNUM_LC(nextchar);
+           }
+           if ((ln == n) == (OP(scan) == BOUND || OP(scan) == BOUNDL))
+               sayNO;
            break;
+       case SPACEL:
+           regtainted = TRUE;
+           /* FALL THROUGH */
        case SPACE:
            if (!nextchar && locinput >= regeol)
-               return 0;
-           if (!isSPACE(nextchar))
-               return 0;
-           nextchar = *++locinput;
+               sayNO;
+           if (!(OP(scan) == SPACE
+                 ? isSPACE(nextchar) : isSPACE_LC(nextchar)))
+               sayNO;
+           nextchar = UCHARAT(++locinput);
            break;
+       case NSPACEL:
+           regtainted = TRUE;
+           /* FALL THROUGH */
        case NSPACE:
            if (!nextchar)
-               return 0;
-           if (isSPACE(nextchar))
-               return 0;
-           nextchar = *++locinput;
+               sayNO;
+           if (OP(scan) == SPACE
+               ? isSPACE(nextchar) : isSPACE_LC(nextchar))
+               sayNO;
+           nextchar = UCHARAT(++locinput);
            break;
        case DIGIT:
            if (!isDIGIT(nextchar))
-               return 0;
-           nextchar = *++locinput;
+               sayNO;
+           nextchar = UCHARAT(++locinput);
            break;
        case NDIGIT:
            if (!nextchar && locinput >= regeol)
-               return 0;
+               sayNO;
            if (isDIGIT(nextchar))
-               return 0;
-           nextchar = *++locinput;
+               sayNO;
+           nextchar = UCHARAT(++locinput);
            break;
        case REF:
            n = ARG1(scan);  /* which paren pair */
            s = regstartp[n];
            if (!s)
-               return 0;
+               sayNO;
            if (!regendp[n])
-               return 0;
+               sayNO;
            if (s == regendp[n])
                break;
            /* Inline the first character, for speed. */
            if (*s != nextchar)
-               return 0;
+               sayNO;
            ln = regendp[n] - s;
            if (locinput + ln > regeol)
-               return 0;
-           if (ln > 1 && bcmp(s, locinput, ln) != 0)
-               return 0;
+               sayNO;
+           if (ln > 1 && memcmp(s, locinput, ln) != 0)
+               sayNO;
            locinput += ln;
-           nextchar = *locinput;
+           nextchar = UCHARAT(locinput);
            break;
 
        case NOTHING:
@@ -769,7 +872,7 @@ char *prog;
                n = regmatch(PREVOPER(next));   /* start on the WHILEM */
                regcpblow(cp);
                regcc = cc.oldcc;
-               return n;
+               saySAME(n);
            }
            /* NOT REACHED */
        case WHILEM: {
@@ -783,19 +886,25 @@ char *prog;
                 */
 
                CURCUR* cc = regcc;
-               n = cc->cur + 1;
+               n = cc->cur + 1;        /* how many we know we matched */
                reginput = locinput;
 
+#ifdef DEBUGGING
+               if (regnarrate)
+                   PerlIO_printf(Perl_debug_log, "%*s  %d  %lx\n", regindent*2, "",
+                       n, (long)cc);
+#endif
+
                /* If degenerate scan matches "", assume scan done. */
 
                if (locinput == cc->lastloc) {
                    regcc = cc->oldcc;
                    ln = regcc->cur;
                    if (regmatch(cc->next))
-                       return TRUE;
+                       sayYES;
                    regcc->cur = ln;
                    regcc = cc;
-                   return FALSE;
+                   sayNO;
                }
 
                /* First just match a string of min scans. */
@@ -803,7 +912,10 @@ char *prog;
                if (n < cc->min) {
                    cc->cur = n;
                    cc->lastloc = locinput;
-                   return regmatch(cc->scan);
+                   if (regmatch(cc->scan))
+                       sayYES;
+                   cc->cur = n - 1;
+                   sayNO;
                }
 
                /* Prefer next over scan for minimal matching. */
@@ -812,18 +924,21 @@ char *prog;
                    regcc = cc->oldcc;
                    ln = regcc->cur;
                    if (regmatch(cc->next))
-                       return TRUE;    /* All done. */
+                       sayYES; /* All done. */
                    regcc->cur = ln;
                    regcc = cc;
 
                    if (n >= cc->max)   /* Maximum greed exceeded? */
-                       return FALSE;
+                       sayNO;
 
                    /* Try scanning more and see if it helps. */
                    reginput = locinput;
                    cc->cur = n;
                    cc->lastloc = locinput;
-                   return regmatch(cc->scan);
+                   if (regmatch(cc->scan))
+                       sayYES;
+                   cc->cur = n - 1;
+                   sayNO;
                }
 
                /* Prefer scan over next for maximal matching. */
@@ -833,7 +948,7 @@ char *prog;
                    cc->cur = n;
                    cc->lastloc = locinput;
                    if (regmatch(cc->scan))
-                       return TRUE;
+                       sayYES;
                    regcppop();         /* Restore some previous $<digit>s? */
                    reginput = locinput;
                }
@@ -842,20 +957,26 @@ char *prog;
                regcc = cc->oldcc;
                ln = regcc->cur;
                if (regmatch(cc->next))
-                   return TRUE;
+                   sayYES;
                regcc->cur = ln;
                regcc = cc;
-               return FALSE;
+               cc->cur = n - 1;
+               sayNO;
            }
            /* NOT REACHED */
        case BRANCH: {
                if (OP(next) != BRANCH)   /* No choice. */
                    next = NEXTOPER(scan);/* Avoid recursion. */
                else {
+                   int lastparen = *reglastparen;
                    do {
                        reginput = locinput;
                        if (regmatch(NEXTOPER(scan)))
-                           return 1;
+                           sayYES;
+                       for (n = *reglastparen; n > lastparen; n--)
+                           regendp[n] = 0;
+                       *reglastparen = n;
+                           
 #ifdef REGALIGN
                        /*SUPPRESS 560*/
                        if (n = NEXT(scan))
@@ -866,7 +987,7 @@ char *prog;
                        scan = regnext(scan);
 #endif
                    } while (scan != NULL && OP(scan) == BRANCH);
-                   return 0;
+                   sayNO;
                    /* NOTREACHED */
                }
            }
@@ -893,27 +1014,39 @@ char *prog;
            n = 32767;
            scan = NEXTOPER(scan);
          repeat:
-           if (OP(next) == EXACTLY)
-               nextchar = *(OPERAND(next)+1);
+           if (regkind[(U8)OP(next)] == EXACT) {
+               c1 = UCHARAT(OPERAND(next) + 1);
+               if (OP(next) == EXACTF)
+                   c2 = fold[c1];
+               else if (OP(next) == EXACTFL)
+                   c2 = fold_locale[c1];
+               else
+                   c2 = c1;
+           }
            else
-               nextchar = -1000;
+               c1 = c2 = -1000;
            reginput = locinput;
            if (minmod) {
                minmod = 0;
                if (ln && regrepeat(scan, ln) < ln)
-                   return 0;
-               while (n >= ln) {
+                   sayNO;
+               while (n >= ln || (n == 32767 && ln > 0)) { /* ln overflow ? */
                    /* If it could work, try it. */
-                   if (nextchar == -1000 || *reginput == nextchar)
+                   if (c1 == -1000 ||
+                       UCHARAT(reginput) == c1 ||
+                       UCHARAT(reginput) == c2)
+                   {
                        if (regmatch(next))
-                           return 1;
+                           sayYES;
+                   }
                    /* Couldn't or didn't -- back up. */
+                   reginput = locinput + ln;
                    if (regrepeat(scan, 1)) {
                        ln++;
                        reginput = locinput + ln;
                    }
                    else
-                       return 0;
+                       sayNO;
                }
            }
            else {
@@ -923,33 +1056,37 @@ char *prog;
                    ln = n;                     /* why back off? */
                while (n >= ln) {
                    /* If it could work, try it. */
-                   if (nextchar == -1000 || *reginput == nextchar)
+                   if (c1 == -1000 ||
+                       UCHARAT(reginput) == c1 ||
+                       UCHARAT(reginput) == c2)
+                   {
                        if (regmatch(next))
-                           return 1;
+                           sayYES;
+                   }
                    /* Couldn't or didn't -- back up. */
                    n--;
                    reginput = locinput + n;
                }
            }
-           return 0;
+           sayNO;
        case SUCCEED:
        case END:
            reginput = locinput;        /* put where regtry can find it */
-           return 1;                   /* Success! */
+           sayYES;                     /* Success! */
        case IFMATCH:
            reginput = locinput;
            scan = NEXTOPER(scan);
            if (!regmatch(scan))
-               return 0;
+               sayNO;
            break;
        case UNLESSM:
            reginput = locinput;
            scan = NEXTOPER(scan);
            if (regmatch(scan))
-               return 0;
+               sayNO;
            break;
        default:
-           fprintf(stderr, "%x %d\n",(unsigned)scan,scan[1]);
+           PerlIO_printf(PerlIO_stderr(), "%x %d\n",(unsigned)scan,scan[1]);
            FAIL("regexp memory corruption");
        }
        scan = next;
@@ -961,6 +1098,18 @@ char *prog;
     */
     FAIL("corrupted regexp pointers");
     /*NOTREACHED*/
+    sayNO;
+
+yes:
+#ifdef DEBUGGING
+    regindent--;
+#endif
+    return 1;
+
+no:
+#ifdef DEBUGGING
+    regindent--;
+#endif
     return 0;
 }
 
@@ -994,34 +1143,64 @@ I32 max;
     case SANY:
        scan = loceol;
        break;
-    case EXACTLY:              /* length of string is 1 */
-       opnd++;
-       while (scan < loceol && *opnd == *scan)
+    case EXACT:                /* length of string is 1 */
+       c = UCHARAT(++opnd);
+       while (scan < loceol && UCHARAT(scan) == c)
+           scan++;
+       break;
+    case EXACTF:       /* length of string is 1 */
+       c = UCHARAT(++opnd);
+       while (scan < loceol &&
+              (UCHARAT(scan) == c || UCHARAT(scan) == fold[c]))
+           scan++;
+       break;
+    case EXACTFL:      /* length of string is 1 */
+       regtainted = TRUE;
+       c = UCHARAT(++opnd);
+       while (scan < loceol &&
+              (UCHARAT(scan) == c || UCHARAT(scan) == fold_locale[c]))
            scan++;
        break;
     case ANYOF:
-       c = UCHARAT(scan);
-       while (scan < loceol && !(opnd[c >> 3] & (1 << (c & 7)))) {
+       while (scan < loceol && reginclass(opnd, *scan))
            scan++;
-           c = UCHARAT(scan);
-       }
        break;
     case ALNUM:
        while (scan < loceol && isALNUM(*scan))
            scan++;
        break;
+    case ALNUML:
+       regtainted = TRUE;
+       while (scan < loceol && isALNUM_LC(*scan))
+           scan++;
+       break;
     case NALNUM:
        while (scan < loceol && !isALNUM(*scan))
            scan++;
        break;
+    case NALNUML:
+       regtainted = TRUE;
+       while (scan < loceol && !isALNUM_LC(*scan))
+           scan++;
+       break;
     case SPACE:
        while (scan < loceol && isSPACE(*scan))
            scan++;
        break;
+    case SPACEL:
+       regtainted = TRUE;
+       while (scan < loceol && isSPACE_LC(*scan))
+           scan++;
+       break;
     case NSPACE:
        while (scan < loceol && !isSPACE(*scan))
            scan++;
        break;
+    case NSPACEL:
+       regtainted = TRUE;
+       while (scan < loceol && !isSPACE_LC(*scan))
+           scan++;
+       break;
     case DIGIT:
        while (scan < loceol && isDIGIT(*scan))
            scan++;
@@ -1041,6 +1220,48 @@ I32 max;
 }
 
 /*
+ - regclass - determine if a character falls into a character class
+ */
+
+static bool
+reginclass(p, c)
+register char *p;
+register I32 c;
+{
+    char flags = *p;
+    bool match = FALSE;
+
+    c &= 0xFF;
+    if (p[1 + (c >> 3)] & (1 << (c & 7)))
+       match = TRUE;
+    else if (flags & ANYOF_FOLD) {
+       I32 cf;
+       if (flags & ANYOF_LOCALE) {
+           regtainted = TRUE;
+           cf = fold_locale[c];
+       }
+       else
+           cf = fold[c];
+       if (p[1 + (cf >> 3)] & (1 << (cf & 7)))
+           match = TRUE;
+    }
+
+    if (!match && (flags & ANYOF_ISA)) {
+       regtainted = TRUE;
+
+       if (((flags & ANYOF_ALNUML)  && isALNUM_LC(c))  ||
+           ((flags & ANYOF_NALNUML) && !isALNUM_LC(c)) ||
+           ((flags & ANYOF_SPACEL)  && isSPACE_LC(c))  ||
+           ((flags & ANYOF_NSPACEL) && !isSPACE_LC(c)))
+       {
+           match = TRUE;
+       }
+    }
+
+    return match ^ ((flags & ANYOF_INVERT) != 0);
+}
+
+/*
  - regnext - dig the "next" pointer out of a node
  *
  * [Note, when REGALIGN is defined there are two places in regmatch()