*
**** Alterations to Henry's code are...
****
- **** Copyright (c) 1991-1994, Larry Wall
+ **** Copyright (c) 1991-1997, Larry Wall
****
**** You may distribute under the terms of either the GNU General Public
**** License or the Artistic License, as specified in the README file.
#include "INTERN.h"
#include "regcomp.h"
+#ifdef USE_THREADS
+#undef op
+#endif /* USE_THREADS */
+
#ifdef MSDOS
# if defined(BUGGY_MSC6)
/* MSC 6.00A breaks on op/regexp.t test 85 unless we turn this off */
static void regoptail _((char *, char *));
static void regset _((char *, I32));
static void regtail _((char *, char *));
+static char* regwhite _((char *, char *));
static char* nextchar _((void));
/*
* of the structure of the compiled regexp. [I'll say.]
*/
regexp *
-pregcomp(exp,xend,pm)
-char* exp;
-char* xend;
-PMOP* pm;
+pregcomp(char *exp, char *xend, PMOP *pm)
{
register regexp *r;
register char *scan;
if (sawplus && (!sawopen || !regsawback))
r->reganch |= ROPT_SKIP; /* x+ must match 1st of run */
- DEBUG_r(PerlIO_printf(Perl_debug_log, "first %d next %d offset %d\n",
- OP(first), OP(NEXTOPER(first)), first - scan));
+ DEBUG_r(PerlIO_printf(Perl_debug_log, "first %d next %d offset %ld\n",
+ OP(first), OP(NEXTOPER(first)), (long)(first - scan)));
/*
* If there's something expensive in the r.e., find the
* longest literal string that must appear and make it the
* follows makes it hard to avoid.
*/
static char *
-reg(paren, flagp)
-I32 paren; /* Parenthesized? */
-I32 *flagp;
+reg(I32 paren, I32 *flagp)
+ /* Parenthesized? */
+
{
register char *ret;
register char *br;
nextchar();
*flagp = TRYAGAIN;
return NULL;
+ case 0:
+ croak("Sequence (? incomplete");
+ break;
default:
--regparse;
- while (*regparse && strchr("iogmsx", *regparse))
+ while (*regparse && strchr("iogcmsx", *regparse))
pmflag(®flags, *regparse++);
if (*regparse != ')')
croak("Sequence (?%c...) not recognized", *regparse);
* Implements the concatenation operator.
*/
static char *
-regbranch(flagp)
-I32 *flagp;
+regbranch(I32 *flagp)
{
register char *ret;
register char *chain;
* endmarker role is not redundant.
*/
static char *
-regpiece(flagp)
-I32 *flagp;
+regpiece(I32 *flagp)
{
register char *ret;
register char op;
*flagp = flags;
return(ret);
}
+
+ if (!(flags&HASWIDTH) && op != '?')
+ FAIL("regexp *+ operand could be empty"); /* else may core dump */
+
nextchar();
*flagp = (op != '+') ? (WORST|SPSTART) : (WORST|HASWIDTH);
* [Yes, it is worth fixing, some scripts can run twice the speed.]
*/
static char *
-regatom(flagp)
-I32 *flagp;
+regatom(I32 *flagp)
{
register char *ret = 0;
I32 flags;
goto defchar;
else {
regsawback = 1;
- ret = reganode(REF, num);
+ ret = reganode((regflags & PMf_FOLD)
+ ? ((regflags & PMf_LOCALE) ? REFFL : REFF)
+ : REF, num);
*flagp |= HASWIDTH;
while (isDIGIT(*regparse))
regparse++;
len++)
{
oldp = p;
+
+ if (regflags & PMf_EXTENDED)
+ p = regwhite(p, regxend);
switch (*p) {
case '^':
case '$':
break;
}
break;
- case '#':
- if (regflags & PMf_EXTENDED) {
- while (p < regxend && *p != '\n') p++;
- }
- /* FALL THROUGH */
- case ' ': case '\t': case '\n': case '\r': case '\f': case '\v':
- if (regflags & PMf_EXTENDED) {
- p++;
- len--;
- continue;
- }
- /* FALL THROUGH */
default:
ender = *p++;
break;
}
+ if (regflags & PMf_EXTENDED)
+ p = regwhite(p, regxend);
if (ISMULT2(p)) { /* Back off on ?+*. */
if (len)
p = oldp;
return(ret);
}
+static char *
+regwhite(char *p, char *e)
+{
+ while (p < e) {
+ if (isSPACE(*p))
+ ++p;
+ else if (*p == '#') {
+ do {
+ p++;
+ } while (p < e && *p != '\n');
+ }
+ else
+ break;
+ }
+ return p;
+}
+
static void
-regset(opnd, c)
-char *opnd;
-register I32 c;
+regset(char *opnd, register I32 c)
{
if (opnd == ®dummy)
return;
}
static char *
-regclass()
+regclass(void)
{
register char *opnd;
- register I32 class;
+ register I32 Class;
register I32 lastclass = 1234;
register I32 range = 0;
register char *ret;
ret = regnode(ANYOF);
opnd = regcode;
- for (class = 0; class < 33; class++)
+ for (Class = 0; Class < 33; Class++)
regc(0);
if (*regparse == '^') { /* Complement of range. */
regnaughty++;
goto skipcond; /* allow 1st char to be ] or - */
while (regparse < regxend && *regparse != ']') {
skipcond:
- class = UCHARAT(regparse++);
- if (class == '\\') {
- class = UCHARAT(regparse++);
- switch (class) {
+ Class = UCHARAT(regparse++);
+ if (Class == '\\') {
+ Class = UCHARAT(regparse++);
+ switch (Class) {
case 'w':
if (regflags & PMf_LOCALE) {
if (opnd != ®dummy)
*opnd |= ANYOF_ALNUML;
}
else {
- for (class = 0; class < 256; class++)
- if (isALNUM(class))
- regset(opnd, class);
+ for (Class = 0; Class < 256; Class++)
+ if (isALNUM(Class))
+ regset(opnd, Class);
}
lastclass = 1234;
continue;
*opnd |= ANYOF_NALNUML;
}
else {
- for (class = 0; class < 256; class++)
- if (!isALNUM(class))
- regset(opnd, class);
+ for (Class = 0; Class < 256; Class++)
+ if (!isALNUM(Class))
+ regset(opnd, Class);
}
lastclass = 1234;
continue;
*opnd |= ANYOF_SPACEL;
}
else {
- for (class = 0; class < 256; class++)
- if (isSPACE(class))
- regset(opnd, class);
+ for (Class = 0; Class < 256; Class++)
+ if (isSPACE(Class))
+ regset(opnd, Class);
}
lastclass = 1234;
continue;
*opnd |= ANYOF_NSPACEL;
}
else {
- for (class = 0; class < 256; class++)
- if (!isSPACE(class))
- regset(opnd, class);
+ for (Class = 0; Class < 256; Class++)
+ if (!isSPACE(Class))
+ regset(opnd, Class);
}
lastclass = 1234;
continue;
case 'd':
- for (class = '0'; class <= '9'; class++)
- regset(opnd, class);
+ for (Class = '0'; Class <= '9'; Class++)
+ regset(opnd, Class);
lastclass = 1234;
continue;
case 'D':
- for (class = 0; class < '0'; class++)
- regset(opnd, class);
- for (class = '9' + 1; class < 256; class++)
- regset(opnd, class);
+ for (Class = 0; Class < '0'; Class++)
+ regset(opnd, Class);
+ for (Class = '9' + 1; Class < 256; Class++)
+ regset(opnd, Class);
lastclass = 1234;
continue;
case 'n':
- class = '\n';
+ Class = '\n';
break;
case 'r':
- class = '\r';
+ Class = '\r';
break;
case 't':
- class = '\t';
+ Class = '\t';
break;
case 'f':
- class = '\f';
+ Class = '\f';
break;
case 'b':
- class = '\b';
+ Class = '\b';
break;
case 'e':
- class = '\033';
+ Class = '\033';
break;
case 'a':
- class = '\007';
+ Class = '\007';
break;
case 'x':
- class = scan_hex(regparse, 2, &numlen);
+ Class = scan_hex(regparse, 2, &numlen);
regparse += numlen;
break;
case 'c':
- class = UCHARAT(regparse++);
- class = toCTRL(class);
+ Class = UCHARAT(regparse++);
+ Class = toCTRL(Class);
break;
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
- class = scan_oct(--regparse, 3, &numlen);
+ Class = scan_oct(--regparse, 3, &numlen);
regparse += numlen;
break;
}
}
if (range) {
- if (lastclass > class)
+ if (lastclass > Class)
FAIL("invalid [] range in regexp");
range = 0;
}
else {
- lastclass = class;
+ lastclass = Class;
if (*regparse == '-' && regparse+1 < regxend &&
regparse[1] != ']') {
regparse++;
continue; /* do it next time */
}
}
- for ( ; lastclass <= class; lastclass++)
+ for ( ; lastclass <= Class; lastclass++)
regset(opnd, lastclass);
- lastclass = class;
+ lastclass = Class;
}
if (*regparse != ']')
FAIL("unmatched [] in regexp");
}
static char*
-nextchar()
+nextchar(void)
{
char* retval = regparse++;
- regtail - set the next-pointer at the end of a node chain
*/
static void
-regtail(p, val)
-char *p;
-char *val;
+regtail(char *p, char *val)
{
register char *scan;
register char *temp;
- regoptail - regtail on operand of first argument; nop if operandless
*/
static void
-regoptail(p, val)
-char *p;
-char *val;
+regoptail(char *p, char *val)
{
/* "Operandless" and "op != BRANCH" are synonymous in practice. */
if (p == NULL || p == ®dummy || regkind[(U8)OP(p)] != BRANCH)
- regcurly - a little FSA that accepts {\d+,?\d*}
*/
STATIC I32
-regcurly(s)
-register char *s;
+regcurly(register char *s)
{
if (*s++ != '{')
return FALSE;
- regdump - dump a regexp onto Perl_debug_log in vaguely comprehensible form
*/
void
-regdump(r)
-regexp *r;
+regdump(regexp *r)
{
register char *s;
register char op = EXACT; /* Arbitrary non-END op. */
register char *next;
-
+ SV *sv = sv_newmortal();
s = r->program + 1;
while (op != END) { /* While that wasn't END last time... */
s++;
#endif
op = OP(s);
- PerlIO_printf(Perl_debug_log, "%2d%s", s-r->program, regprop(s)); /* Where, what. */
+ /* where, what */
+ regprop(sv, s);
+ PerlIO_printf(Perl_debug_log, "%2ld%s", (long)(s - r->program), SvPVX(sv));
next = regnext(s);
s += regarglen[(U8)op];
if (next == NULL) /* Next ptr. */
PerlIO_printf(Perl_debug_log, "(0)");
else
- PerlIO_printf(Perl_debug_log, "(%d)", (s-r->program)+(next-s));
+ PerlIO_printf(Perl_debug_log, "(%ld)", (long)(s-r->program)+(next-s));
s += 3;
if (op == ANYOF) {
s += 33;
/* Header fields of interest. */
if (r->regstart)
PerlIO_printf(Perl_debug_log, "start `%s' ", SvPVX(r->regstart));
- if (r->regstclass)
- PerlIO_printf(Perl_debug_log, "stclass `%s' ", regprop(r->regstclass));
+ if (r->regstclass) {
+ regprop(sv, r->regstclass);
+ PerlIO_printf(Perl_debug_log, "stclass `%s' ", SvPVX(sv));
+ }
if (r->reganch & ROPT_ANCH) {
PerlIO_printf(Perl_debug_log, "anchored");
if (r->reganch & ROPT_ANCH_BOL)
/*
- regprop - printable representation of opcode
*/
-char *
-regprop(op)
-char *op;
+void
+regprop(SV *sv, char *o)
{
register char *p = 0;
- (void) strcpy(buf, ":");
-
- switch (OP(op)) {
+ sv_setpv(sv, ":");
+ switch (OP(o)) {
case BOL:
p = "BOL";
break;
p = "NBOUNDL";
break;
case CURLY:
- (void)sprintf(buf+strlen(buf), "CURLY {%d,%d}", ARG1(op),ARG2(op));
- p = NULL;
+ sv_catpvf(sv, "CURLY {%d,%d}", ARG1(o), ARG2(o));
break;
case CURLYX:
- (void)sprintf(buf+strlen(buf), "CURLYX {%d,%d}", ARG1(op),ARG2(op));
- p = NULL;
+ sv_catpvf(sv, "CURLYX {%d,%d}", ARG1(o), ARG2(o));
break;
case REF:
- (void)sprintf(buf+strlen(buf), "REF%d", ARG1(op));
- p = NULL;
+ sv_catpvf(sv, "REF%d", ARG1(o));
+ break;
+ case REFF:
+ sv_catpvf(sv, "REFF%d", ARG1(o));
+ break;
+ case REFFL:
+ sv_catpvf(sv, "REFFL%d", ARG1(o));
break;
case OPEN:
- (void)sprintf(buf+strlen(buf), "OPEN%d", ARG1(op));
- p = NULL;
+ sv_catpvf(sv, "OPEN%d", ARG1(o));
break;
case CLOSE:
- (void)sprintf(buf+strlen(buf), "CLOSE%d", ARG1(op));
+ sv_catpvf(sv, "CLOSE%d", ARG1(o));
p = NULL;
break;
case STAR:
default:
FAIL("corrupted regexp opcode");
}
- if (p != NULL)
- (void) strcat(buf, p);
- return(buf);
+ if (p)
+ sv_catpv(sv, p);
}
#endif /* DEBUGGING */
void
-pregfree(r)
-struct regexp *r;
+pregfree(struct regexp *r)
{
if (!r)
return;