From: Gurusamy Sarathy Date: Wed, 13 May 1998 09:47:11 +0000 (+0000) Subject: [win32] merge change#664 from maint branch X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=ae5c130cf43baa916c1292cd85a40d054824ba20;p=p5sagit%2Fp5-mst-13.2.git [win32] merge change#664 from maint branch p4raw-link: @664 on //depot/maint-5.004/perl: c3ae1fa52acf9130fcc1770ad2fce8519766b744 p4raw-id: //depot/win32/perl@926 --- diff --git a/regcomp.c b/regcomp.c index 0f48976..8d66f38 100644 --- a/regcomp.c +++ b/regcomp.c @@ -127,7 +127,6 @@ static regnode *reg_node _((U8)); static regnode *regpiece _((I32 *)); static void reginsert _((U8, regnode *)); static void regoptail _((regnode *, regnode *)); -static void regset _((char *, I32)); static void regtail _((regnode *, regnode *)); static char* regwhite _((char *, char *)); static char* nextchar _((void)); @@ -1831,15 +1830,6 @@ regwhite(char *p, char *e) return p; } -static void -regset(char *opnd, register I32 c) -{ - if (SIZE_ONLY) - return; - c &= 0xFF; - opnd[1 + (c >> 3)] |= (1 << (c & 7)); -} - static regnode * regclass(void) { @@ -1903,63 +1893,67 @@ regclass(void) Class = UCHARAT(regparse++); switch (Class) { case 'w': - if (regflags & PMf_LOCALE) { - if (!SIZE_ONLY) + if (!SIZE_ONLY) { + if (regflags & PMf_LOCALE) *opnd |= ANYOF_ALNUML; - } - else { - for (Class = 0; Class < 256; Class++) - if (isALNUM(Class)) - regset(opnd, Class); + else { + for (Class = 0; Class < 256; Class++) + if (isALNUM(Class)) + ANYOF_SET(opnd, Class); + } } lastclass = 1234; continue; case 'W': - if (regflags & PMf_LOCALE) { - if (!SIZE_ONLY) + if (!SIZE_ONLY) { + if (regflags & PMf_LOCALE) *opnd |= ANYOF_NALNUML; - } - else { - for (Class = 0; Class < 256; Class++) - if (!isALNUM(Class)) - regset(opnd, Class); + else { + for (Class = 0; Class < 256; Class++) + if (!isALNUM(Class)) + ANYOF_SET(opnd, Class); + } } lastclass = 1234; continue; case 's': - if (regflags & PMf_LOCALE) { - if (!SIZE_ONLY) + if (!SIZE_ONLY) { + if (regflags & PMf_LOCALE) *opnd |= ANYOF_SPACEL; - } - else { - for (Class = 0; Class < 256; Class++) - if (isSPACE(Class)) - regset(opnd, Class); + else { + for (Class = 0; Class < 256; Class++) + if (isSPACE(Class)) + ANYOF_SET(opnd, Class); + } } lastclass = 1234; continue; case 'S': - if (regflags & PMf_LOCALE) { - if (!SIZE_ONLY) + if (!SIZE_ONLY) { + if (regflags & PMf_LOCALE) *opnd |= ANYOF_NSPACEL; - } - else { - for (Class = 0; Class < 256; Class++) - if (!isSPACE(Class)) - regset(opnd, Class); + else { + for (Class = 0; Class < 256; Class++) + if (!isSPACE(Class)) + ANYOF_SET(opnd, Class); + } } lastclass = 1234; continue; case 'd': - for (Class = '0'; Class <= '9'; Class++) - regset(opnd, Class); + if (!SIZE_ONLY) { + for (Class = '0'; Class <= '9'; Class++) + ANYOF_SET(opnd, Class); + } lastclass = 1234; continue; case 'D': - for (Class = 0; Class < '0'; Class++) - regset(opnd, Class); - for (Class = '9' + 1; Class < 256; Class++) - regset(opnd, Class); + if (!SIZE_ONLY) { + for (Class = 0; Class < '0'; Class++) + ANYOF_SET(opnd, Class); + for (Class = '9' + 1; Class < 256; Class++) + ANYOF_SET(opnd, Class); + } lastclass = 1234; continue; case 'n': @@ -2012,13 +2006,31 @@ regclass(void) continue; /* do it next time */ } } - for ( ; lastclass <= Class; lastclass++) - regset(opnd, lastclass); + if (!SIZE_ONLY) { + for ( ; lastclass <= Class; lastclass++) + ANYOF_SET(opnd, lastclass); + } lastclass = Class; } if (*regparse != ']') FAIL("unmatched [] in regexp"); nextchar(); + /* optimize case-insensitive simple patterns (e.g. /[a-z]/i) */ + if (!SIZE_ONLY && (*opnd & (0xFF ^ ANYOF_INVERT)) == ANYOF_FOLD) { + for (Class = 0; Class < 256; ++Class) { + if (ANYOF_TEST(opnd, Class)) { + I32 cf = fold[Class]; + ANYOF_SET(opnd, cf); + } + } + *opnd &= ~ANYOF_FOLD; + } + /* optimize inverted simple patterns (e.g. [^a-z]) */ + if (!SIZE_ONLY && (*opnd & 0xFF) == ANYOF_INVERT) { + for (Class = 0; Class < 32; ++Class) + opnd[1 + Class] ^= 0xFF; + *opnd = 0; + } return ret; } diff --git a/regcomp.h b/regcomp.h index 4b86a8d..0bd00e2 100644 --- a/regcomp.h +++ b/regcomp.h @@ -370,6 +370,13 @@ typedef char* regnode; #define ANYOF_SPACEL 0x02 #define ANYOF_NSPACEL 0x01 +/* Utility macros for bitmap of ANYOF */ +#define ANYOF_BYTE(p,c) (p)[1 + (((c) >> 3) & 31)] +#define ANYOF_BIT(c) (1 << ((c) & 7)) +#define ANYOF_SET(p,c) (ANYOF_BYTE(p,c) |= ANYOF_BIT(c)) +#define ANYOF_CLEAR(p,c) (ANYOF_BYTE(p,c) &= ~ANYOF_BIT(c)) +#define ANYOF_TEST(p,c) (ANYOF_BYTE(p,c) & ANYOF_BIT(c)) + #ifdef REGALIGN_STRUCT #define ANY_SKIP ((33 - 1)/sizeof(regnode) + 1) #else diff --git a/regexec.c b/regexec.c index 250704c..a9f2751 100644 --- a/regexec.c +++ b/regexec.c @@ -114,9 +114,11 @@ static I32 regmatch _((regnode *prog)); static I32 regrepeat _((regnode *p, I32 max)); static I32 regrepeat_hard _((regnode *p, I32 max, I32 *lp)); static I32 regtry _((regexp *prog, char *startpos)); + static bool reginclass _((char *p, I32 c)); static CHECKPOINT regcppush _((I32 parenfloor)); static char * regcppop _((void)); +#define REGINCLASS(p,c) (*(p) ? reginclass(p,c) : ANYOF_TEST(p,c)) static CHECKPOINT regcppush(I32 parenfloor) @@ -422,7 +424,7 @@ regexec_flags(register regexp *prog, char *stringarg, register char *strend, cha case ANYOF: Class = (char *) OPERAND(c); while (s < strend) { - if (reginclass(Class, *s)) { + if (REGINCLASS(Class, *s)) { if (tmp && regtry(prog, s)) goto got_it; else @@ -890,7 +892,7 @@ regmatch(regnode *prog) s = (char *) OPERAND(scan); if (nextchar < 0) nextchar = UCHARAT(locinput); - if (!reginclass(s, nextchar)) + if (!REGINCLASS(s, nextchar)) sayNO; if (!nextchar && locinput >= regeol) sayNO; @@ -1663,7 +1665,7 @@ regrepeat(regnode *p, I32 max) scan++; break; case ANYOF: - while (scan < loceol && reginclass(opnd, *scan)) + while (scan < loceol && REGINCLASS(opnd, *scan)) scan++; break; case ALNUM: @@ -1774,7 +1776,7 @@ reginclass(register char *p, register I32 c) bool match = FALSE; c &= 0xFF; - if (p[1 + (c >> 3)] & (1 << (c & 7))) + if (ANYOF_TEST(p, c)) match = TRUE; else if (flags & ANYOF_FOLD) { I32 cf; @@ -1784,7 +1786,7 @@ reginclass(register char *p, register I32 c) } else cf = fold[c]; - if (p[1 + (cf >> 3)] & (1 << (cf & 7))) + if (ANYOF_TEST(p, cf)) match = TRUE; } @@ -1800,7 +1802,7 @@ reginclass(register char *p, register I32 c) } } - return match ^ ((flags & ANYOF_INVERT) != 0); + return (flags & ANYOF_INVERT) ? !match : match; }