From: Gurusamy Sarathy Date: Wed, 8 Dec 1999 19:09:27 +0000 (+0000) Subject: apply change#4618 again along with Ilya's patch to fix bugs X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=653099ff2c52a6af02b3894d684593dfe31dcc17;p=p5sagit%2Fp5-mst-13.2.git apply change#4618 again along with Ilya's patch to fix bugs in it (see change#4622) p4raw-link: @4622 on //depot/perl: 34baa6c30415f54e9b8c2e622de1e229cf36d781 p4raw-link: @4618 on //depot/perl: f9d9cdce9cbb41baf3d0716ebac8540732d59bf8 p4raw-id: //depot/perl@4669 --- diff --git a/embed.h b/embed.h index 2250cc7..d28e673 100644 --- a/embed.h +++ b/embed.h @@ -938,7 +938,14 @@ #define regwhite S_regwhite #define nextchar S_nextchar #define dumpuntil S_dumpuntil +#define put_byte S_put_byte #define scan_commit S_scan_commit +#define cl_anything S_cl_anything +#define cl_is_anything S_cl_is_anything +#define cl_init S_cl_init +#define cl_init_zero S_cl_init_zero +#define cl_and S_cl_and +#define cl_or S_cl_or #define study_chunk S_study_chunk #define add_data S_add_data #define re_croak2 S_re_croak2 @@ -2327,7 +2334,14 @@ #define regwhite(a,b) S_regwhite(aTHX_ a,b) #define nextchar() S_nextchar(aTHX) #define dumpuntil(a,b,c,d,e) S_dumpuntil(aTHX_ a,b,c,d,e) +#define put_byte(a,b) S_put_byte(aTHX_ a,b) #define scan_commit(a) S_scan_commit(aTHX_ a) +#define cl_anything(a) S_cl_anything(aTHX_ a) +#define cl_is_anything(a) S_cl_is_anything(aTHX_ a) +#define cl_init(a) S_cl_init(aTHX_ a) +#define cl_init_zero(a) S_cl_init_zero(aTHX_ a) +#define cl_and(a,b) S_cl_and(aTHX_ a,b) +#define cl_or(a,b) S_cl_or(aTHX_ a,b) #define study_chunk(a,b,c,d,e) S_study_chunk(aTHX_ a,b,c,d,e) #define add_data(a,b) S_add_data(aTHX_ a,b) #define regpposixcc(a) S_regpposixcc(aTHX_ a) @@ -4535,8 +4549,22 @@ #define nextchar S_nextchar #define S_dumpuntil CPerlObj::S_dumpuntil #define dumpuntil S_dumpuntil +#define S_put_byte CPerlObj::S_put_byte +#define put_byte S_put_byte #define S_scan_commit CPerlObj::S_scan_commit #define scan_commit S_scan_commit +#define S_cl_anything CPerlObj::S_cl_anything +#define cl_anything S_cl_anything +#define S_cl_is_anything CPerlObj::S_cl_is_anything +#define cl_is_anything S_cl_is_anything +#define S_cl_init CPerlObj::S_cl_init +#define cl_init S_cl_init +#define S_cl_init_zero CPerlObj::S_cl_init_zero +#define cl_init_zero S_cl_init_zero +#define S_cl_and CPerlObj::S_cl_and +#define cl_and S_cl_and +#define S_cl_or CPerlObj::S_cl_or +#define cl_or S_cl_or #define S_study_chunk CPerlObj::S_study_chunk #define study_chunk S_study_chunk #define S_add_data CPerlObj::S_add_data diff --git a/embed.pl b/embed.pl index 3466cd1..d3eee2d 100755 --- a/embed.pl +++ b/embed.pl @@ -2017,7 +2017,16 @@ s |char*|regwhite |char *|char * s |char*|nextchar s |regnode*|dumpuntil |regnode *start|regnode *node \ |regnode *last|SV* sv|I32 l +s |void |put_byte |SV* sv|int c s |void |scan_commit |struct scan_data_t *data +s |void |cl_anything |struct regnode_charclass_class *cl +s |int |cl_is_anything |struct regnode_charclass_class *cl +s |void |cl_init |struct regnode_charclass_class *cl +s |void |cl_init_zero |struct regnode_charclass_class *cl +s |void |cl_and |struct regnode_charclass_class *cl \ + |struct regnode_charclass_class *and_with +s |void |cl_or |struct regnode_charclass_class *cl \ + |struct regnode_charclass_class *or_with s |I32 |study_chunk |regnode **scanp|I32 *deltap \ |regnode *last|struct scan_data_t *data \ |U32 flags diff --git a/perl.h b/perl.h index eae26bb..dc72ffe 100644 --- a/perl.h +++ b/perl.h @@ -1748,6 +1748,7 @@ struct _sublex_info { typedef struct magic_state MGS; /* struct magic_state defined in mg.c */ struct scan_data_t; /* Used in S_* functions in regcomp.c */ +struct regnode_charclass_class; /* Used in S_* functions in regcomp.c */ typedef I32 CHECKPOINT; diff --git a/proto.h b/proto.h index c49e606..9c4dd16 100644 --- a/proto.h +++ b/proto.h @@ -951,7 +951,14 @@ STATIC void S_regtail(pTHX_ regnode *, regnode *); STATIC char* S_regwhite(pTHX_ char *, char *); STATIC char* S_nextchar(pTHX); STATIC regnode* S_dumpuntil(pTHX_ regnode *start, regnode *node, regnode *last, SV* sv, I32 l); +STATIC void S_put_byte(pTHX_ SV* sv, int c); STATIC void S_scan_commit(pTHX_ struct scan_data_t *data); +STATIC void S_cl_anything(pTHX_ struct regnode_charclass_class *cl); +STATIC int S_cl_is_anything(pTHX_ struct regnode_charclass_class *cl); +STATIC void S_cl_init(pTHX_ struct regnode_charclass_class *cl); +STATIC void S_cl_init_zero(pTHX_ struct regnode_charclass_class *cl); +STATIC void S_cl_and(pTHX_ struct regnode_charclass_class *cl, struct regnode_charclass_class *and_with); +STATIC void S_cl_or(pTHX_ struct regnode_charclass_class *cl, struct regnode_charclass_class *or_with); STATIC I32 S_study_chunk(pTHX_ regnode **scanp, I32 *deltap, regnode *last, struct scan_data_t *data, U32 flags); STATIC I32 S_add_data(pTHX_ I32 n, char *s); STATIC void S_re_croak2(pTHX_ const char* pat1, const char* pat2, ...) __attribute__((noreturn)); diff --git a/regcomp.c b/regcomp.c index 0a76384..83438e9 100644 --- a/regcomp.c +++ b/regcomp.c @@ -151,6 +151,7 @@ typedef struct scan_data_t { I32 offset_float_max; I32 flags; I32 whilem_c; + struct regnode_charclass_class *start_class; } scan_data_t; /* @@ -158,7 +159,7 @@ typedef struct scan_data_t { */ static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0 }; + 0, 0, 0, 0, 0 }; #define SF_BEFORE_EOL (SF_BEFORE_SEOL|SF_BEFORE_MEOL) #define SF_BEFORE_SEOL 0x1 @@ -184,6 +185,9 @@ static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, #define SF_IN_PAR 0x100 #define SF_HAS_EVAL 0x200 #define SCF_DO_SUBSTR 0x400 +#define SCF_DO_STCLASS_AND 0x0800 +#define SCF_DO_STCLASS_OR 0x1000 +#define SCF_DO_STCLASS (SCF_DO_STCLASS_AND|SCF_DO_STCLASS_OR) #define RF_utf8 8 #define UTF (PL_reg_flags & RF_utf8) @@ -202,6 +206,10 @@ static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, static void clear_re(pTHXo_ void *r); +/* Mark that we cannot extend a found fixed substring at this point. + Updata the longest found anchored substring and the longest found + floating substrings if needed. */ + STATIC void S_scan_commit(pTHX_ scan_data_t *data) { @@ -236,6 +244,135 @@ S_scan_commit(pTHX_ scan_data_t *data) data->flags &= ~SF_BEFORE_EOL; } +/* Can match anything (initialization) */ +STATIC void +S_cl_anything(pTHX_ struct regnode_charclass_class *cl) +{ + int value; + + ANYOF_CLASS_ZERO(cl); + for (value = 0; value < 256; ++value) + ANYOF_BITMAP_SET(cl, value); + cl->flags = ANYOF_EOS; + if (LOC) + cl->flags |= ANYOF_LOCALE; +} + +/* Can match anything (initialization) */ +STATIC int +S_cl_is_anything(pTHX_ struct regnode_charclass_class *cl) +{ + int value; + + for (value = 0; value < ANYOF_MAX; value += 2) + if (ANYOF_CLASS_TEST(cl, value) && ANYOF_CLASS_TEST(cl, value + 1)) + return 1; + for (value = 0; value < 256; ++value) + if (!ANYOF_BITMAP_TEST(cl, value)) + return 0; + return 1; +} + +/* Can match anything (initialization) */ +STATIC void +S_cl_init(pTHX_ struct regnode_charclass_class *cl) +{ + cl->type = ANYOF; + cl_anything(cl); +} + +STATIC void +S_cl_init_zero(pTHX_ struct regnode_charclass_class *cl) +{ + cl->type = ANYOF; + cl_anything(cl); + ANYOF_CLASS_ZERO(cl); + ANYOF_BITMAP_ZERO(cl); + if (LOC) + cl->flags |= ANYOF_LOCALE; +} + +/* 'And' a given class with another one. Can create false positives */ +/* We assume that cl is not inverted */ +STATIC void +S_cl_and(pTHX_ struct regnode_charclass_class *cl, + struct regnode_charclass_class *and_with) +{ + int value; + + if (!(and_with->flags & ANYOF_CLASS) + && !(cl->flags & ANYOF_CLASS) + && (and_with->flags & ANYOF_LOCALE) == (cl->flags & ANYOF_LOCALE) + && !(and_with->flags & ANYOF_FOLD) + && !(cl->flags & ANYOF_FOLD)) { + int i; + + if (and_with->flags & ANYOF_INVERT) + for (i = 0; i < ANYOF_BITMAP_SIZE; i++) + cl->bitmap[i] &= ~and_with->bitmap[i]; + else + for (i = 0; i < ANYOF_BITMAP_SIZE; i++) + cl->bitmap[i] &= and_with->bitmap[i]; + } /* XXXX: logic is complicated otherwise, leave it along for a moment. */ + if (!(and_with->flags & ANYOF_EOS)) + cl->flags &= ~ANYOF_EOS; +} + +/* 'OR' a given class with another one. Can create false positives */ +/* We assume that cl is not inverted */ +STATIC void +S_cl_or(pTHX_ struct regnode_charclass_class *cl, struct regnode_charclass_class *or_with) +{ + int value; + + if (or_with->flags & ANYOF_INVERT) { + /* We do not use + * (B1 | CL1) | (!B2 & !CL2) = (B1 | !B2 & !CL2) | (CL1 | (!B2 & !CL2)) + * <= (B1 | !B2) | (CL1 | !CL2) + * which is wasteful if CL2 is small, but we ignore CL2: + * (B1 | CL1) | (!B2 & !CL2) <= (B1 | CL1) | !B2 = (B1 | !B2) | CL1 + * XXXX Can we handle case-fold? Unclear: + * (OK1(i) | OK1(i')) | !(OK1(i) | OK1(i')) = + * (OK1(i) | OK1(i')) | (!OK1(i) & !OK1(i')) + */ + if ( (or_with->flags & ANYOF_LOCALE) == (cl->flags & ANYOF_LOCALE) + && !(or_with->flags & ANYOF_FOLD) + && !(cl->flags & ANYOF_FOLD) ) { + int i; + + for (i = 0; i < ANYOF_BITMAP_SIZE; i++) + cl->bitmap[i] |= ~or_with->bitmap[i]; + } /* XXXX: logic is complicated otherwise */ + else { + cl_anything(cl); + } + } else { + /* (B1 | CL1) | (B2 | CL2) = (B1 | B2) | (CL1 | CL2)) */ + if ( (or_with->flags & ANYOF_LOCALE) == (cl->flags & ANYOF_LOCALE) + && (!(or_with->flags & ANYOF_FOLD) + || (cl->flags & ANYOF_FOLD)) ) { + int i; + + /* OR char bitmap and class bitmap separately */ + for (i = 0; i < ANYOF_BITMAP_SIZE; i++) + cl->bitmap[i] |= or_with->bitmap[i]; + if (or_with->flags & ANYOF_CLASS) { + for (i = 0; i < ANYOF_CLASSBITMAP_SIZE; i++) + cl->classflags[i] |= or_with->classflags[i]; + cl->flags |= ANYOF_CLASS; + } + } + else { /* XXXX: logic is complicated, leave it along for a moment. */ + cl_anything(cl); + } + } + if (or_with->flags & ANYOF_EOS) + cl->flags |= ANYOF_EOS; +} + +/* REx optimizer. Converts nodes into quickier variants "in place". + Finds fixed substrings. */ + /* Stops at toplevel WHILEM as well as at `last'. At end *scanp is set to the position after last scanned or to NULL. */ @@ -253,11 +390,13 @@ S_study_chunk(pTHX_ regnode **scanp, I32 *deltap, regnode *last, scan_data_t *da int is_inf_internal = 0; /* The studied chunk is infinite */ I32 is_par = OP(scan) == OPEN ? ARG(scan) : 0; scan_data_t data_fake; + struct regnode_charclass_class and_with; /* Valid if flags & SCF_DO_STCLASS_OR */ while (scan && OP(scan) != END && scan < last) { /* Peephole optimizer: */ if (PL_regkind[(U8)OP(scan)] == EXACT) { + /* Merge several consecutive EXACTish nodes into one. */ regnode *n = regnext(scan); U32 stringok = 1; #ifdef DEBUGGING @@ -305,19 +444,16 @@ S_study_chunk(pTHX_ regnode **scanp, I32 *deltap, regnode *last, scan_data_t *da /* Allow dumping */ n = scan + NODE_SZ_STR(scan); while (n <= stop) { - /* Purify reports a benign UMR here sometimes, because we - * don't initialize the OP() slot of a node when that node - * is occupied by just the trailing null of the string in - * an EXACT node */ if (PL_regkind[(U8)OP(n)] != NOTHING || OP(n) == NOTHING) { OP(n) = OPTIMIZED; NEXT_OFF(n) = 0; } n++; } -#endif - +#endif } + /* Follow the next-chain of the current node and optimize + away all the NOTHINGs from it. */ if (OP(scan) != CURLYX) { int max = (reg_off_by_arg[OP(scan)] ? I32_MAX @@ -338,6 +474,8 @@ S_study_chunk(pTHX_ regnode **scanp, I32 *deltap, regnode *last, scan_data_t *da else NEXT_OFF(scan) = off; } + /* The principal pseudo-switch. Cannot be a switch, since we + look into several different things. */ if (OP(scan) == BRANCH || OP(scan) == BRANCHJ || OP(scan) == IFTHEN || OP(scan) == SUSPEND) { next = regnext(scan); @@ -345,11 +483,15 @@ S_study_chunk(pTHX_ regnode **scanp, I32 *deltap, regnode *last, scan_data_t *da if (OP(next) == code || code == IFTHEN || code == SUSPEND) { I32 max1 = 0, min1 = I32_MAX, num = 0; + struct regnode_charclass_class accum; - if (flags & SCF_DO_SUBSTR) - scan_commit(data); + if (flags & SCF_DO_SUBSTR) /* XXXX Add !SUSPEND? */ + scan_commit(data); /* Cannot merge strings after this. */ + if (flags & SCF_DO_STCLASS) + cl_init_zero(&accum); while (OP(scan) == code) { - I32 deltanext, minnext; + I32 deltanext, minnext, f = 0; + struct regnode_charclass_class this_class; num++; data_fake.flags = 0; @@ -359,9 +501,14 @@ S_study_chunk(pTHX_ regnode **scanp, I32 *deltap, regnode *last, scan_data_t *da scan = NEXTOPER(scan); if (code != BRANCH) scan = NEXTOPER(scan); - /* We suppose the run is continuous, last=next...*/ + if (flags & SCF_DO_STCLASS) { + cl_init(&this_class); + data_fake.start_class = &this_class; + f = SCF_DO_STCLASS_AND; + } + /* we suppose the run is continuous, last=next...*/ minnext = study_chunk(&scan, &deltanext, next, - &data_fake, 0); + &data_fake, f); if (min1 > minnext) min1 = minnext; if (max1 < minnext + deltanext) @@ -375,6 +522,8 @@ S_study_chunk(pTHX_ regnode **scanp, I32 *deltap, regnode *last, scan_data_t *da data->flags |= SF_HAS_EVAL; if (data) data->whilem_c = data_fake.whilem_c; + if (flags & SCF_DO_STCLASS) + cl_or(&accum, &this_class); if (code == SUSPEND) break; } @@ -388,6 +537,18 @@ S_study_chunk(pTHX_ regnode **scanp, I32 *deltap, regnode *last, scan_data_t *da } min += min1; delta += max1 - min1; + if (flags & SCF_DO_STCLASS_OR) { + cl_or(data->start_class, &accum); + if (min1) { + cl_and(data->start_class, &and_with); + flags &= ~SCF_DO_STCLASS; + } + } + else if (flags & SCF_DO_STCLASS_AND) { + cl_and(data->start_class, &accum); + if (min1) + flags &= ~SCF_DO_STCLASS; + } } else if (code == BRANCHJ) /* single branch is optimized. */ scan = NEXTOPER(NEXTOPER(scan)); @@ -421,9 +582,34 @@ S_study_chunk(pTHX_ regnode **scanp, I32 *deltap, regnode *last, scan_data_t *da data->pos_min += l; /* As in the first entry. */ data->flags &= ~SF_BEFORE_EOL; } + if (flags & SCF_DO_STCLASS_AND) { + /* Check whether it is compatible with what we know already! */ + int compat = 1; + + if (!(data->start_class->flags & (ANYOF_CLASS | ANYOF_LOCALE)) + && !ANYOF_BITMAP_TEST(data->start_class, *STRING(scan)) + && (!(data->start_class->flags & ANYOF_FOLD) + || !ANYOF_BITMAP_TEST(data->start_class, + PL_fold[*STRING(scan)]))) + compat = 0; + ANYOF_CLASS_ZERO(data->start_class); + ANYOF_BITMAP_ZERO(data->start_class); + if (compat) + ANYOF_BITMAP_SET(data->start_class, *STRING(scan)); + data->start_class->flags &= ~ANYOF_EOS; + } + else if (flags & SCF_DO_STCLASS_OR) { + /* false positive possible if the class is case-folded */ + ANYOF_BITMAP_SET(data->start_class, *STRING(scan)); + data->start_class->flags &= ~ANYOF_EOS; + cl_and(data->start_class, &and_with); + } + flags &= ~SCF_DO_STCLASS; } - else if (PL_regkind[(U8)OP(scan)] == EXACT) { + else if (PL_regkind[(U8)OP(scan)] == EXACT) { /* But OP != EXACT! */ I32 l = STR_LEN(scan); + + /* Search for fixed substrings supports EXACT only. */ if (flags & SCF_DO_SUBSTR) scan_commit(data); if (UTF) { @@ -439,19 +625,51 @@ S_study_chunk(pTHX_ regnode **scanp, I32 *deltap, regnode *last, scan_data_t *da min += l; if (data && (flags & SCF_DO_SUBSTR)) data->pos_min += l; + if (flags & SCF_DO_STCLASS_AND) { + /* Check whether it is compatible with what we know already! */ + int compat = 1; + + if (!(data->start_class->flags & (ANYOF_CLASS | ANYOF_LOCALE)) + && !ANYOF_BITMAP_TEST(data->start_class, *STRING(scan)) + && !ANYOF_BITMAP_TEST(data->start_class, + PL_fold[*STRING(scan)])) + compat = 0; + ANYOF_CLASS_ZERO(data->start_class); + ANYOF_BITMAP_ZERO(data->start_class); + if (compat) { + ANYOF_BITMAP_SET(data->start_class, *STRING(scan)); + data->start_class->flags &= ~ANYOF_EOS; + data->start_class->flags |= ANYOF_FOLD; + if (OP(scan) == EXACTFL) + data->start_class->flags |= ANYOF_LOCALE; + } + } + else if (flags & SCF_DO_STCLASS_OR) { + if (data->start_class->flags & ANYOF_FOLD) { + /* false positive possible if the class is case-folded. + Assume that the locale settings are the same... */ + ANYOF_BITMAP_SET(data->start_class, *STRING(scan)); + data->start_class->flags &= ~ANYOF_EOS; + } + cl_and(data->start_class, &and_with); + } + flags &= ~SCF_DO_STCLASS; } else if (strchr((char*)PL_varies,OP(scan))) { I32 mincount, maxcount, minnext, deltanext, pos_before, fl; + I32 f = flags; regnode *oscan = scan; - + struct regnode_charclass_class this_class; + struct regnode_charclass_class *oclass = NULL; + switch (PL_regkind[(U8)OP(scan)]) { - case WHILEM: + case WHILEM: /* End of (?:...)* . */ scan = NEXTOPER(scan); goto finish; case PLUS: - if (flags & SCF_DO_SUBSTR) { + if (flags & (SCF_DO_SUBSTR | SCF_DO_STCLASS)) { next = NEXTOPER(scan); - if (OP(next) == EXACT) { + if (OP(next) == EXACT || (flags & SCF_DO_STCLASS)) { mincount = 1; maxcount = REG_INFTY; next = regnext(scan); @@ -464,10 +682,17 @@ S_study_chunk(pTHX_ regnode **scanp, I32 *deltap, regnode *last, scan_data_t *da min++; /* Fall through. */ case STAR: + if (flags & SCF_DO_STCLASS) { + mincount = 0; + maxcount = REG_INFTY; + next = regnext(scan); + scan = NEXTOPER(scan); + goto do_curly; + } is_inf = is_inf_internal = 1; scan = regnext(scan); if (flags & SCF_DO_SUBSTR) { - scan_commit(data); + scan_commit(data); /* Cannot extend fixed substrings */ data->longest = &(data->longest_float); } goto optimize_curly_tail; @@ -478,7 +703,7 @@ S_study_chunk(pTHX_ regnode **scanp, I32 *deltap, regnode *last, scan_data_t *da scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS; do_curly: if (flags & SCF_DO_SUBSTR) { - if (mincount == 0) scan_commit(data); + if (mincount == 0) scan_commit(data); /* Cannot extend fixed substrings */ pos_before = data->pos_min; } if (data) { @@ -487,10 +712,45 @@ S_study_chunk(pTHX_ regnode **scanp, I32 *deltap, regnode *last, scan_data_t *da if (is_inf) data->flags |= SF_IS_INF; } + if (flags & SCF_DO_STCLASS) { + cl_init(&this_class); + oclass = data->start_class; + data->start_class = &this_class; + f |= SCF_DO_STCLASS_AND; + f &= ~SCF_DO_STCLASS_OR; + } + /* This will finish on WHILEM, setting scan, or on NULL: */ minnext = study_chunk(&scan, &deltanext, last, data, mincount == 0 - ? (flags & ~SCF_DO_SUBSTR) : flags); + ? (f & ~SCF_DO_SUBSTR) : f); + + if (flags & SCF_DO_STCLASS) + data->start_class = oclass; + if (mincount == 0 || minnext == 0) { + if (flags & SCF_DO_STCLASS_OR) { + cl_or(data->start_class, &this_class); + } + else if (flags & SCF_DO_STCLASS_AND) { + /* Switch to OR mode: cache the old value of + * data->start_class */ + StructCopy(data->start_class, &and_with, + struct regnode_charclass_class); + flags &= ~SCF_DO_STCLASS_AND; + StructCopy(&this_class, data->start_class, + struct regnode_charclass_class); + flags |= SCF_DO_STCLASS_OR; + data->start_class->flags |= ANYOF_EOS; + } + } else { /* Non-zero len */ + if (flags & SCF_DO_STCLASS_OR) { + cl_or(data->start_class, &this_class); + cl_and(data->start_class, &and_with); + } + else if (flags & SCF_DO_STCLASS_AND) + cl_and(data->start_class, &this_class); + flags &= ~SCF_DO_STCLASS; + } if (!scan) /* It was not CURLYX, but CURLY. */ scan = next; if (ckWARN(WARN_UNSAFE) && (minnext + deltanext == 0) @@ -640,6 +900,8 @@ S_study_chunk(pTHX_ regnode **scanp, I32 *deltap, regnode *last, scan_data_t *da data->pos_delta += - counted * deltanext + (minnext + deltanext) * maxcount - minnext * mincount; if (mincount != maxcount) { + /* Cannot extend fixed substrings found inside + the group. */ scan_commit(data); if (mincount && last_str) { sv_setsv(data->last_found, last_str); @@ -664,39 +926,258 @@ S_study_chunk(pTHX_ regnode **scanp, I32 *deltap, regnode *last, scan_data_t *da NEXT_OFF(oscan) += NEXT_OFF(next); } continue; - default: /* REF only? */ + default: /* REF and CLUMP only? */ if (flags & SCF_DO_SUBSTR) { - scan_commit(data); + scan_commit(data); /* Cannot expect anything... */ data->longest = &(data->longest_float); } is_inf = is_inf_internal = 1; + if (flags & SCF_DO_STCLASS_OR) + cl_anything(data->start_class); + flags &= ~SCF_DO_STCLASS; break; } } else if (strchr((char*)PL_simple,OP(scan)) || PL_regkind[(U8)OP(scan)] == ANYUTF8) { + int value; + if (flags & SCF_DO_SUBSTR) { scan_commit(data); data->pos_min++; } min++; + if (flags & SCF_DO_STCLASS) { + data->start_class->flags &= ~ANYOF_EOS; /* No match on empty */ + + /* Some of the logic below assumes that switching + locale on will only add false positives. */ + switch (PL_regkind[(U8)OP(scan)]) { + case ANYUTF8: + case SANY: + case SANYUTF8: + case ALNUMUTF8: + case ANYOFUTF8: + case ALNUMLUTF8: + case NALNUMUTF8: + case NALNUMLUTF8: + case SPACEUTF8: + case NSPACEUTF8: + case SPACELUTF8: + case NSPACELUTF8: + case DIGITUTF8: + case NDIGITUTF8: + default: + do_default: + /* Perl_croak(aTHX_ "panic: unexpected simple REx opcode %d", OP(scan)); */ + if (flags & SCF_DO_STCLASS_OR) /* Allow everything */ + cl_anything(data->start_class); + break; + case REG_ANY: + if (OP(scan) == SANY) + goto do_default; + if (flags & SCF_DO_STCLASS_OR) { /* Everything but \n */ + value = (ANYOF_BITMAP_TEST(data->start_class,'\n') + || (data->start_class->flags & ANYOF_CLASS)); + cl_anything(data->start_class); + } + if (flags & SCF_DO_STCLASS_AND || !value) + ANYOF_BITMAP_CLEAR(data->start_class,'\n'); + break; + case ANYOF: + if (flags & SCF_DO_STCLASS_AND) + cl_and(data->start_class, + (struct regnode_charclass_class*)scan); + else + cl_or(data->start_class, + (struct regnode_charclass_class*)scan); + break; + case ALNUM: + if (flags & SCF_DO_STCLASS_AND) { + if (!(data->start_class->flags & ANYOF_LOCALE)) { + ANYOF_CLASS_CLEAR(data->start_class,ANYOF_NALNUM); + for (value = 0; value < 256; value++) + if (!isALNUM(value)) + ANYOF_BITMAP_CLEAR(data->start_class, value); + } + } + else { + if (data->start_class->flags & ANYOF_LOCALE) + ANYOF_CLASS_SET(data->start_class,ANYOF_ALNUM); + else { + for (value = 0; value < 256; value++) + if (isALNUM(value)) + ANYOF_BITMAP_SET(data->start_class, value); + } + } + break; + case ALNUML: + if (flags & SCF_DO_STCLASS_AND) { + if (data->start_class->flags & ANYOF_LOCALE) + ANYOF_CLASS_CLEAR(data->start_class,ANYOF_NALNUM); + } + else { + ANYOF_CLASS_SET(data->start_class,ANYOF_ALNUM); + data->start_class->flags |= ANYOF_LOCALE; + } + break; + case NALNUM: + if (flags & SCF_DO_STCLASS_AND) { + if (!(data->start_class->flags & ANYOF_LOCALE)) { + ANYOF_CLASS_CLEAR(data->start_class,ANYOF_ALNUM); + for (value = 0; value < 256; value++) + if (isALNUM(value)) + ANYOF_BITMAP_CLEAR(data->start_class, value); + } + } + else { + if (data->start_class->flags & ANYOF_LOCALE) + ANYOF_CLASS_SET(data->start_class,ANYOF_NALNUM); + else { + for (value = 0; value < 256; value++) + if (!isALNUM(value)) + ANYOF_BITMAP_SET(data->start_class, value); + } + } + break; + case NALNUML: + if (flags & SCF_DO_STCLASS_AND) { + if (data->start_class->flags & ANYOF_LOCALE) + ANYOF_CLASS_CLEAR(data->start_class,ANYOF_ALNUM); + } + else { + data->start_class->flags |= ANYOF_LOCALE; + ANYOF_CLASS_SET(data->start_class,ANYOF_NALNUM); + } + break; + case SPACE: + if (flags & SCF_DO_STCLASS_AND) { + if (!(data->start_class->flags & ANYOF_LOCALE)) { + ANYOF_CLASS_CLEAR(data->start_class,ANYOF_NSPACE); + for (value = 0; value < 256; value++) + if (!isSPACE(value)) + ANYOF_BITMAP_CLEAR(data->start_class, value); + } + } + else { + if (data->start_class->flags & ANYOF_LOCALE) + ANYOF_CLASS_SET(data->start_class,ANYOF_SPACE); + else { + for (value = 0; value < 256; value++) + if (isSPACE(value)) + ANYOF_BITMAP_SET(data->start_class, value); + } + } + break; + case SPACEL: + if (flags & SCF_DO_STCLASS_AND) { + if (data->start_class->flags & ANYOF_LOCALE) + ANYOF_CLASS_CLEAR(data->start_class,ANYOF_NSPACE); + } + else { + data->start_class->flags |= ANYOF_LOCALE; + ANYOF_CLASS_SET(data->start_class,ANYOF_SPACE); + } + break; + case NSPACE: + if (flags & SCF_DO_STCLASS_AND) { + if (!(data->start_class->flags & ANYOF_LOCALE)) { + ANYOF_CLASS_CLEAR(data->start_class,ANYOF_SPACE); + for (value = 0; value < 256; value++) + if (isSPACE(value)) + ANYOF_BITMAP_CLEAR(data->start_class, value); + } + } + else { + if (data->start_class->flags & ANYOF_LOCALE) + ANYOF_CLASS_SET(data->start_class,ANYOF_NSPACE); + else { + for (value = 0; value < 256; value++) + if (!isSPACE(value)) + ANYOF_BITMAP_SET(data->start_class, value); + } + } + break; + case NSPACEL: + if (flags & SCF_DO_STCLASS_AND) { + if (data->start_class->flags & ANYOF_LOCALE) { + ANYOF_CLASS_CLEAR(data->start_class,ANYOF_SPACE); + for (value = 0; value < 256; value++) + if (!isSPACE(value)) + ANYOF_BITMAP_CLEAR(data->start_class, value); + } + } + else { + data->start_class->flags |= ANYOF_LOCALE; + ANYOF_CLASS_SET(data->start_class,ANYOF_NSPACE); + } + break; + case DIGIT: + if (flags & SCF_DO_STCLASS_AND) { + ANYOF_CLASS_CLEAR(data->start_class,ANYOF_NDIGIT); + for (value = 0; value < 256; value++) + if (!isDIGIT(value)) + ANYOF_BITMAP_CLEAR(data->start_class, value); + } + else { + if (data->start_class->flags & ANYOF_LOCALE) + ANYOF_CLASS_SET(data->start_class,ANYOF_DIGIT); + else { + for (value = 0; value < 256; value++) + if (isDIGIT(value)) + ANYOF_BITMAP_SET(data->start_class, value); + } + } + break; + case NDIGIT: + if (flags & SCF_DO_STCLASS_AND) { + ANYOF_CLASS_CLEAR(data->start_class,ANYOF_DIGIT); + for (value = 0; value < 256; value++) + if (isDIGIT(value)) + ANYOF_BITMAP_CLEAR(data->start_class, value); + } + else { + if (data->start_class->flags & ANYOF_LOCALE) + ANYOF_CLASS_SET(data->start_class,ANYOF_NDIGIT); + else { + for (value = 0; value < 256; value++) + if (!isDIGIT(value)) + ANYOF_BITMAP_SET(data->start_class, value); + } + } + break; + } + if (flags & SCF_DO_STCLASS_OR) + cl_and(data->start_class, &and_with); + flags &= ~SCF_DO_STCLASS; + } } else if (PL_regkind[(U8)OP(scan)] == EOL && flags & SCF_DO_SUBSTR) { data->flags |= (OP(scan) == MEOL ? SF_BEFORE_MEOL : SF_BEFORE_SEOL); } - else if (PL_regkind[(U8)OP(scan)] == BRANCHJ - && (scan->flags || data) + else if ( PL_regkind[(U8)OP(scan)] == BRANCHJ + /* Lookbehind, or need to calculate parens/evals/stclass: */ + && (scan->flags || data || (flags & SCF_DO_STCLASS)) && (OP(scan) == IFMATCH || OP(scan) == UNLESSM)) { + /* Lookahead/lookbehind */ I32 deltanext, minnext; regnode *nscan; + struct regnode_charclass_class intrnl; + int f = 0; data_fake.flags = 0; if (data) data_fake.whilem_c = data->whilem_c; + if ( flags & SCF_DO_STCLASS && !scan->flags + && OP(scan) == IFMATCH ) { /* Lookahead */ + cl_init(&intrnl); + data_fake.start_class = &intrnl; + f = SCF_DO_STCLASS_AND; + } next = regnext(scan); nscan = NEXTOPER(NEXTOPER(scan)); - minnext = study_chunk(&nscan, &deltanext, last, &data_fake, 0); + minnext = study_chunk(&nscan, &deltanext, last, &data_fake, f); if (scan->flags) { if (deltanext) { FAIL("variable length lookbehind not implemented"); @@ -712,6 +1193,13 @@ S_study_chunk(pTHX_ regnode **scanp, I32 *deltap, regnode *last, scan_data_t *da data->flags |= SF_HAS_EVAL; if (data) data->whilem_c = data_fake.whilem_c; + if (f) { + int was = (data->start_class->flags & ANYOF_EOS); + + cl_and(data->start_class, &intrnl); + if (was) + data->start_class->flags |= ANYOF_EOS; + } } else if (OP(scan) == OPEN) { pars++; @@ -732,6 +1220,8 @@ S_study_chunk(pTHX_ regnode **scanp, I32 *deltap, regnode *last, scan_data_t *da data->longest = &(data->longest_float); } is_inf = is_inf_internal = 1; + if (flags & SCF_DO_STCLASS_OR) /* Allow everything */ + cl_anything(data->start_class); } /* Else: zero-length, ignore. */ scan = regnext(scan); @@ -752,6 +1242,8 @@ S_study_chunk(pTHX_ regnode **scanp, I32 *deltap, regnode *last, scan_data_t *da data->flags |= SF_HAS_PAR; data->flags &= ~SF_IN_PAR; } + if (flags & SCF_DO_STCLASS_OR) + cl_and(data->start_class, &and_with); return min; } @@ -924,16 +1416,21 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm) Newz(1004, r->substrs, 1, struct reg_substr_data); StructCopy(&zero_scan_data, &data, scan_data_t); + /* XXXX Should not we check for something else? Usually it is OPEN1... */ if (OP(scan) != BRANCH) { /* Only one top-level choice. */ I32 fake; STRLEN longest_float_length, longest_fixed_length; + struct regnode_charclass_class ch_class; + int stclass_flag; first = scan; /* Skip introductions and multiplicators >= 1. */ while ((OP(first) == OPEN && (sawopen = 1)) || + /* An OR of *one* alternative - should not happen now. */ (OP(first) == BRANCH && OP(regnext(first)) != BRANCH) || (OP(first) == PLUS) || (OP(first) == MINMOD) || + /* An {n,m} with n>0 */ (PL_regkind[(U8)OP(first)] == CURLY && ARG1(first) > 0) ) { if (OP(first) == PLUS) sawplus = 1; @@ -944,13 +1441,13 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm) /* Starting-point info. */ again: - if (PL_regkind[(U8)OP(first) == EXACT]) { + if (PL_regkind[(U8)OP(first)] == EXACT) { if (OP(first) == EXACT); /* Empty, get anchored substr later. */ else if ((OP(first) == EXACTF || OP(first) == EXACTFL) && !UTF) r->regstclass = first; } - else if (strchr((char*)PL_simple+4,OP(first))) + else if (strchr((char*)PL_simple,OP(first))) r->regstclass = first; else if (PL_regkind[(U8)OP(first)] == BOUND || PL_regkind[(U8)OP(first)] == NBOUND) @@ -1011,9 +1508,15 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm) data.last_found = newSVpvn("",0); data.longest = &(data.longest_fixed); first = scan; - + if (!r->regstclass) { + cl_init(&ch_class); + data.start_class = &ch_class; + stclass_flag = SCF_DO_STCLASS_AND; + } else /* XXXX Check for BOUND? */ + stclass_flag = 0; + minlen = study_chunk(&first, &fake, scan + PL_regsize, /* Up to end */ - &data, SCF_DO_SUBSTR); + &data, SCF_DO_SUBSTR | stclass_flag); if ( PL_regnpar == 1 && data.longest == &(data.longest_fixed) && data.last_start_min == 0 && data.last_end > 0 && !PL_seen_zerolen @@ -1068,6 +1571,28 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm) SvREFCNT_dec(data.longest_fixed); longest_fixed_length = 0; } + if (r->regstclass + && (OP(r->regstclass) == REG_ANY || OP(r->regstclass) == ANYUTF8 + || OP(r->regstclass) == SANYUTF8 || OP(r->regstclass) == SANY)) + r->regstclass = NULL; + if ((!r->anchored_substr || r->anchored_offset) && stclass_flag + && !(data.start_class->flags & ANYOF_EOS) + && !cl_is_anything(data.start_class)) { + SV *sv; + I32 n = add_data(1, "f"); + + New(1006, PL_regcomp_rx->data->data[n], 1, + struct regnode_charclass_class); + StructCopy(data.start_class, + (struct regnode_charclass_class*)PL_regcomp_rx->data->data[n], + struct regnode_charclass_class); + r->regstclass = (regnode*)PL_regcomp_rx->data->data[n]; + r->reganch &= ~ROPT_SKIP; /* Used in find_byclass(). */ + DEBUG_r((sv = sv_newmortal(), + regprop(sv, (regnode*)data.start_class), + PerlIO_printf(Perl_debug_log, "synthetic stclass.\n", + SvPVX(sv)))); + } /* A temporary algorithm prefers floated substr to fixed one to dig more info. */ if (longest_fixed_length > longest_float_length) { @@ -1092,11 +1617,31 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm) else { /* Several toplevels. Best we can is to set minlen. */ I32 fake; + struct regnode_charclass_class ch_class; DEBUG_r(PerlIO_printf(Perl_debug_log, "\n")); scan = r->program + 1; - minlen = study_chunk(&scan, &fake, scan + PL_regsize, &data, 0); + cl_init(&ch_class); + data.start_class = &ch_class; + minlen = study_chunk(&scan, &fake, scan + PL_regsize, &data, SCF_DO_STCLASS_AND); r->check_substr = r->anchored_substr = r->float_substr = Nullsv; + if (!(data.start_class->flags & ANYOF_EOS) + && !cl_is_anything(data.start_class)) { + SV *sv; + I32 n = add_data(1, "f"); + + New(1006, PL_regcomp_rx->data->data[n], 1, + struct regnode_charclass_class); + StructCopy(data.start_class, + (struct regnode_charclass_class*)PL_regcomp_rx->data->data[n], + struct regnode_charclass_class); + r->regstclass = (regnode*)PL_regcomp_rx->data->data[n]; + r->reganch &= ~ROPT_SKIP; /* Used in find_byclass(). */ + DEBUG_r((sv = sv_newmortal(), + regprop(sv, (regnode*)data.start_class), + PerlIO_printf(Perl_debug_log, "synthetic stclass.\n", + SvPVX(sv)))); + } } r->minlen = minlen; @@ -3322,6 +3867,17 @@ Perl_regdump(pTHX_ regexp *r) #endif /* DEBUGGING */ } +STATIC void +S_put_byte(pTHX_ SV *sv, int c) +{ + if (c <= ' ' || c == 127 || c == 255) + Perl_sv_catpvf(aTHX_ sv, "\\%o", c); + else if (c == '-' || c == ']' || c == '\\' || c == '^') + Perl_sv_catpvf(aTHX_ sv, "\\%c", c); + else + Perl_sv_catpvf(aTHX_ sv, "%c", c); +} + /* - regprop - printable representation of opcode */ @@ -3353,6 +3909,67 @@ Perl_regprop(pTHX_ SV *sv, regnode *o) Perl_sv_catpvf(aTHX_ sv, "%d", ARG(o)); /* Parenth number */ else if (k == LOGICAL) Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags); /* 2: embedded, otherwise 1 */ + else if (k == ANYOF) { + int i, rangestart = -1; + const char * const out[] = { /* Should be syncronized with + a table in regcomp.h */ + "\\w", + "\\W", + "\\s", + "\\S", + "\\d", + "\\D", + "[:alnum:]", + "[:^alnum:]", + "[:alpha:]", + "[:^alpha:]", + "[:ascii:]", + "[:^ascii:]", + "[:ctrl:]", + "[:^ctrl:]", + "[:graph:]", + "[:^graph:]", + "[:lower:]", + "[:^lower:]", + "[:print:]", + "[:^print:]", + "[:punct:]", + "[:^punct:]", + "[:upper:]", + "[:!upper:]", + "[:xdigit:]", + "[:^xdigit:]" + }; + + if (o->flags & ANYOF_LOCALE) + sv_catpv(sv, "{loc}"); + if (o->flags & ANYOF_FOLD) + sv_catpv(sv, "{i}"); + Perl_sv_catpvf(aTHX_ sv, "[%s", PL_colors[0]); + if (o->flags & ANYOF_INVERT) + sv_catpv(sv, "^"); + for (i = 0; i <= 256; i++) { + if (i < 256 && ANYOF_BITMAP_TEST(o,i)) { + if (rangestart == -1) + rangestart = i; + } else if (rangestart != -1) { + if (i <= rangestart + 3) + for (; rangestart < i; rangestart++) + put_byte(sv, rangestart); + else { + put_byte(sv, rangestart); + sv_catpv(sv, "-"); + put_byte(sv, i - 1); + } + rangestart = -1; + } + } + if (o->flags & ANYOF_CLASS) + for (i = 0; i < sizeof(out)/sizeof(char*); i++) + if (ANYOF_CLASS_TEST(o,i)) + sv_catpv(sv, out[i]); + Perl_sv_catpvf(aTHX_ sv, "%s]", PL_colors[1]); + } else if (k == BRANCHJ && (OP(o) == UNLESSM || OP(o) == IFMATCH)) Perl_sv_catpvf(aTHX_ sv, "[-%d]", o->flags); #endif /* DEBUGGING */ @@ -3414,6 +4031,9 @@ Perl_pregfree(pTHX_ struct regexp *r) case 's': SvREFCNT_dec((SV*)r->data->data[n]); break; + case 'f': + Safefree(r->data->data[n]); + break; case 'p': new_comppad = (AV*)r->data->data[n]; break; diff --git a/regcomp.h b/regcomp.h index 2fcf7a9..3624917 100644 --- a/regcomp.h +++ b/regcomp.h @@ -185,7 +185,12 @@ struct regnode_charclass_class { #define ANYOF_FOLD 0x02 #define ANYOF_LOCALE 0x01 +/* Used for regstclass only */ +#define ANYOF_EOS 0x10 /* Can match an empty string too */ + /* Character classes for node->classflags of ANYOF */ +/* Should be synchronized with a table in regprop() */ +/* 2n should pair with 2n+1 */ #define ANYOF_ALNUM 0 /* \w, utf8::IsWord, isALNUM() */ #define ANYOF_NALNUM 1 diff --git a/regexec.c b/regexec.c index cce1166..adde1dd 100644 --- a/regexec.c +++ b/regexec.c @@ -335,6 +335,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n")); goto fail; } + check = prog->check_substr; if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */ ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE) || ( (prog->reganch & ROPT_ANCH_BOL) @@ -351,8 +352,8 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, } PL_regeol = strend; /* Used in HOP() */ s = HOPc(strpos, prog->check_offset_min); - if (SvTAIL(prog->check_substr)) { - slen = SvCUR(prog->check_substr); /* >= 1 */ + if (SvTAIL(check)) { + slen = SvCUR(check); /* >= 1 */ if ( strend - s > slen || strend - s < slen - 1 || (strend - s == slen && strend[-1] != '\n')) { @@ -361,29 +362,28 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, } /* Now should match s[0..slen-2] */ slen--; - if (slen && (*SvPVX(prog->check_substr) != *s + if (slen && (*SvPVX(check) != *s || (slen > 1 - && memNE(SvPVX(prog->check_substr), s, slen)))) { + && memNE(SvPVX(check), s, slen)))) { report_neq: DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n")); goto fail_finish; } } - else if (*SvPVX(prog->check_substr) != *s - || ((slen = SvCUR(prog->check_substr)) > 1 - && memNE(SvPVX(prog->check_substr), s, slen))) + else if (*SvPVX(check) != *s + || ((slen = SvCUR(check)) > 1 + && memNE(SvPVX(check), s, slen))) goto report_neq; goto success_at_start; } /* Match is anchored, but substr is not anchored wrt beg-of-str. */ s = strpos; start_shift = prog->check_offset_min; /* okay to underestimate on CC */ - /* Should be nonnegative! */ end_shift = prog->minlen - start_shift - - CHR_SVLEN(prog->check_substr) + (SvTAIL(prog->check_substr) != 0); + CHR_SVLEN(check) + (SvTAIL(check) != 0); if (!ml_anch) { - I32 end = prog->check_offset_max + CHR_SVLEN(prog->check_substr) - - (SvTAIL(prog->check_substr) != 0); + I32 end = prog->check_offset_max + CHR_SVLEN(check) + - (SvTAIL(check) != 0); I32 eshift = strend - s - end; if (end_shift < eshift) @@ -396,7 +396,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, start_shift = prog->check_offset_min; /* okay to underestimate on CC */ /* Should be nonnegative! */ end_shift = prog->minlen - start_shift - - CHR_SVLEN(prog->check_substr) + (SvTAIL(prog->check_substr) != 0); + CHR_SVLEN(check) + (SvTAIL(check) != 0); } #ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */ @@ -404,7 +404,6 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, Perl_croak(aTHX_ "panic: end_shift"); #endif - check = prog->check_substr; restart: /* Find a possible match in the region s..strend by looking for the "check" substring in the region corrected by start/end_shift. */ @@ -701,6 +700,8 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, } DEBUG_r( PerlIO_printf(Perl_debug_log, "This position contradicts STCLASS...\n") ); + if ((prog->reganch & ROPT_ANCH) && !ml_anch) + goto fail; /* Contradict one of substrings */ if (prog->anchored_substr) { if (prog->anchored_substr == check) { diff --git a/t/op/re_tests b/t/op/re_tests index 20b2d63..357b705 100644 --- a/t/op/re_tests +++ b/t/op/re_tests @@ -747,3 +747,4 @@ tt+$ xxxtt y - - '\.c(pp|xx|c)?$'i Changes n - - '\.c(pp|xx|c)?$'i IO.c y - - '(\.c(pp|xx|c)?$)'i IO.c y $1 .c +^([a-z]:) C:/ n - - diff --git a/t/op/subst.t b/t/op/subst.t index 2d15df4..9757f4c 100755 --- a/t/op/subst.t +++ b/t/op/subst.t @@ -6,7 +6,7 @@ BEGIN { require Config; import Config; } -print "1..83\n"; +print "1..84\n"; $x = 'foo'; $_ = "x"; @@ -375,4 +375,7 @@ $x = $x = 'interp'; eval q% ($_ = "x") =~ s/x(($x)*)/"$1"/e %; print +($_ eq '' and !length $@) ? "ok 83\n" : "not ok 83\n# \$_ eq $_, $@\n"; +$_ = "C:/"; +s/^([a-z]:)/\u$1/ and print "not "; +print "ok 84\n";