X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=regexec.c;h=4119dfc97a9437705d4e2751eb1dc811da4fc4a1;hb=4521c7914c9458406ab869e3d05e04c7de0567d5;hp=4621990d6f7b0c874c2ddbaec997b653cfb45f85;hpb=748a93069b3d16374a9859d1456065dd3ae11394;p=p5sagit%2Fp5-mst-13.2.git diff --git a/regexec.c b/regexec.c index 4621990..4119dfc 100644 --- a/regexec.c +++ b/regexec.c @@ -14,9 +14,14 @@ * blame Henry for some of the lack of readability. */ +/* The names of the functions have been changed from regcomp and + * regexec to pregcomp and pregexec in order to avoid conflicts + * with the POSIX routines of the same names. +*/ + /*SUPPRESS 112*/ /* - * regcomp and regexec -- regsub and regerror are not used in perl + * pregcomp and pregexec -- regsub and regerror are not used in perl * * Copyright (c) 1986 by University of Toronto. * Written by Henry Spencer. Not derived from licensed software. @@ -132,7 +137,7 @@ regcppop() #define regcpblow(cp) leave_scope(cp) /* - * regexec and friends + * pregexec and friends */ /* @@ -144,10 +149,10 @@ static I32 regrepeat _((char *p, I32 max)); static I32 regtry _((regexp *prog, char *startpos)); /* - - regexec - match a regexp against a string + - pregexec - match a regexp against a string */ I32 -regexec(prog, stringarg, strend, strbeg, minend, screamer, safebase) +pregexec(prog, stringarg, strend, strbeg, minend, screamer, safebase) register regexp *prog; char *stringarg; register char *strend; /* pointer to null at end of string */ @@ -166,6 +171,7 @@ I32 safebase; /* no need to remember string in subbase */ CURCUR cc; cc.cur = 0; + cc.oldcc = 0; regcc = &cc; #ifdef DEBUGGING @@ -571,14 +577,26 @@ char *prog; register char *s; /* operand or save */ register char *locinput = reginput; int minmod = 0; +#ifdef DEBUGGING + static int regindent = 0; + regindent++; +#endif nextchar = *locinput; scan = prog; while (scan != NULL) { #ifdef DEBUGGING - if (regnarrate) - fprintf(stderr, "%2d%-8.8s\t<%.10s>\n", +#define sayYES goto yes +#define sayNO goto no +#define saySAME(x) if (x) goto yes; else goto no + if (regnarrate) { + PerlIO_printf(Perl_debug_log, "%*s%2d%-8.8s\t<%.10s>\n", regindent*2, "", scan - regprogram, regprop(scan), locinput); + } +#else +#define sayYES return 1 +#define sayNO return 0 +#define saySAME(x) return x #endif #ifdef REGALIGN @@ -598,7 +616,7 @@ char *prog; /* regtill = regbol; */ break; } - return 0; + sayNO; case MBOL: if (locinput == regbol ? regprev == '\n' @@ -606,15 +624,15 @@ char *prog; { break; } - return 0; + sayNO; case SBOL: if (locinput == regbol && regprev == '\n') break; - return 0; + sayNO; case GBOL: if (locinput == regbol) break; - return 0; + sayNO; case EOL: if (multiline) goto meol; @@ -623,23 +641,23 @@ char *prog; case MEOL: meol: if ((nextchar || locinput < regeol) && nextchar != '\n') - return 0; + sayNO; break; case SEOL: seol: if ((nextchar || locinput < regeol) && nextchar != '\n') - return 0; + sayNO; if (regeol - locinput > 1) - return 0; + sayNO; break; case SANY: if (!nextchar && locinput >= regeol) - return 0; + sayNO; nextchar = *++locinput; break; case ANY: if (!nextchar && locinput >= regeol || nextchar == '\n') - return 0; + sayNO; nextchar = *++locinput; break; case EXACTLY: @@ -647,11 +665,11 @@ char *prog; ln = *s++; /* Inline the first character, for speed. */ if (*s != nextchar) - return 0; + sayNO; if (regeol - locinput < ln) - return 0; + sayNO; if (ln > 1 && bcmp(s, locinput, ln) != 0) - return 0; + sayNO; locinput += ln; nextchar = *locinput; break; @@ -660,23 +678,23 @@ char *prog; if (nextchar < 0) nextchar = UCHARAT(locinput); if (s[nextchar >> 3] & (1 << (nextchar&7))) - return 0; + sayNO; if (!nextchar && locinput >= regeol) - return 0; + sayNO; nextchar = *++locinput; break; case ALNUM: if (!nextchar) - return 0; + sayNO; if (!isALNUM(nextchar)) - return 0; + sayNO; nextchar = *++locinput; break; case NALNUM: if (!nextchar && locinput >= regeol) - return 0; + sayNO; if (isALNUM(nextchar)) - return 0; + sayNO; nextchar = *++locinput; break; case NBOUND: @@ -687,51 +705,51 @@ char *prog; ln = isALNUM(locinput[-1]); n = isALNUM(nextchar); /* is next char in word? */ if ((ln == n) == (OP(scan) == BOUND)) - return 0; + sayNO; break; case SPACE: if (!nextchar && locinput >= regeol) - return 0; + sayNO; if (!isSPACE(nextchar)) - return 0; + sayNO; nextchar = *++locinput; break; case NSPACE: if (!nextchar) - return 0; + sayNO; if (isSPACE(nextchar)) - return 0; + sayNO; nextchar = *++locinput; break; case DIGIT: if (!isDIGIT(nextchar)) - return 0; + sayNO; nextchar = *++locinput; break; case NDIGIT: if (!nextchar && locinput >= regeol) - return 0; + sayNO; if (isDIGIT(nextchar)) - return 0; + sayNO; nextchar = *++locinput; break; case REF: n = ARG1(scan); /* which paren pair */ s = regstartp[n]; if (!s) - return 0; + sayNO; if (!regendp[n]) - return 0; + sayNO; if (s == regendp[n]) break; /* Inline the first character, for speed. */ if (*s != nextchar) - return 0; + sayNO; ln = regendp[n] - s; if (locinput + ln > regeol) - return 0; + sayNO; if (ln > 1 && bcmp(s, locinput, ln) != 0) - return 0; + sayNO; locinput += ln; nextchar = *locinput; break; @@ -769,7 +787,7 @@ char *prog; n = regmatch(PREVOPER(next)); /* start on the WHILEM */ regcpblow(cp); regcc = cc.oldcc; - return n; + saySAME(n); } /* NOT REACHED */ case WHILEM: { @@ -783,19 +801,25 @@ char *prog; */ CURCUR* cc = regcc; - n = cc->cur + 1; + n = cc->cur + 1; /* how many we know we matched */ reginput = locinput; +#ifdef DEBUGGING + if (regnarrate) + PerlIO_printf(Perl_debug_log, "%*s %d %lx\n", regindent*2, "", + n, (long)cc); +#endif + /* If degenerate scan matches "", assume scan done. */ if (locinput == cc->lastloc) { regcc = cc->oldcc; ln = regcc->cur; if (regmatch(cc->next)) - return TRUE; + sayYES; regcc->cur = ln; regcc = cc; - return FALSE; + sayNO; } /* First just match a string of min scans. */ @@ -803,7 +827,10 @@ char *prog; if (n < cc->min) { cc->cur = n; cc->lastloc = locinput; - return regmatch(cc->scan); + if (regmatch(cc->scan)) + sayYES; + cc->cur = n - 1; + sayNO; } /* Prefer next over scan for minimal matching. */ @@ -812,18 +839,21 @@ char *prog; regcc = cc->oldcc; ln = regcc->cur; if (regmatch(cc->next)) - return TRUE; /* All done. */ + sayYES; /* All done. */ regcc->cur = ln; regcc = cc; if (n >= cc->max) /* Maximum greed exceeded? */ - return FALSE; + sayNO; /* Try scanning more and see if it helps. */ reginput = locinput; cc->cur = n; cc->lastloc = locinput; - return regmatch(cc->scan); + if (regmatch(cc->scan)) + sayYES; + cc->cur = n - 1; + sayNO; } /* Prefer scan over next for maximal matching. */ @@ -833,7 +863,7 @@ char *prog; cc->cur = n; cc->lastloc = locinput; if (regmatch(cc->scan)) - return TRUE; + sayYES; regcppop(); /* Restore some previous $s? */ reginput = locinput; } @@ -842,10 +872,11 @@ char *prog; regcc = cc->oldcc; ln = regcc->cur; if (regmatch(cc->next)) - return TRUE; + sayYES; regcc->cur = ln; regcc = cc; - return FALSE; + cc->cur = n - 1; + sayNO; } /* NOT REACHED */ case BRANCH: { @@ -856,7 +887,7 @@ char *prog; do { reginput = locinput; if (regmatch(NEXTOPER(scan))) - return 1; + sayYES; for (n = *reglastparen; n > lastparen; n--) regendp[n] = 0; *reglastparen = n; @@ -871,7 +902,7 @@ char *prog; scan = regnext(scan); #endif } while (scan != NULL && OP(scan) == BRANCH); - return 0; + sayNO; /* NOTREACHED */ } } @@ -906,12 +937,12 @@ char *prog; if (minmod) { minmod = 0; if (ln && regrepeat(scan, ln) < ln) - return 0; - while (n >= ln) { + sayNO; + while (n >= ln || (n == 32767 && ln > 0)) { /* ln overflow ? */ /* If it could work, try it. */ if (nextchar == -1000 || *reginput == nextchar) if (regmatch(next)) - return 1; + sayYES; /* Couldn't or didn't -- back up. */ reginput = locinput + ln; if (regrepeat(scan, 1)) { @@ -919,7 +950,7 @@ char *prog; reginput = locinput + ln; } else - return 0; + sayNO; } } else { @@ -931,31 +962,31 @@ char *prog; /* If it could work, try it. */ if (nextchar == -1000 || *reginput == nextchar) if (regmatch(next)) - return 1; + sayYES; /* Couldn't or didn't -- back up. */ n--; reginput = locinput + n; } } - return 0; + sayNO; case SUCCEED: case END: reginput = locinput; /* put where regtry can find it */ - return 1; /* Success! */ + sayYES; /* Success! */ case IFMATCH: reginput = locinput; scan = NEXTOPER(scan); if (!regmatch(scan)) - return 0; + sayNO; break; case UNLESSM: reginput = locinput; scan = NEXTOPER(scan); if (regmatch(scan)) - return 0; + sayNO; break; default: - fprintf(stderr, "%x %d\n",(unsigned)scan,scan[1]); + PerlIO_printf(PerlIO_stderr(), "%x %d\n",(unsigned)scan,scan[1]); FAIL("regexp memory corruption"); } scan = next; @@ -967,6 +998,18 @@ char *prog; */ FAIL("corrupted regexp pointers"); /*NOTREACHED*/ + sayNO; + +yes: +#ifdef DEBUGGING + regindent--; +#endif + return 1; + +no: +#ifdef DEBUGGING + regindent--; +#endif return 0; }