From: Yves Orton Date: Thu, 1 Feb 2007 17:06:37 +0000 (+0100) Subject: Re: prerelease checklist for Perl 5.10 X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=fae667d5a60f37538a5761795f7af2165c7d4fb0;p=p5sagit%2Fp5-mst-13.2.git Re: prerelease checklist for Perl 5.10 Date: Thu, 1 Feb 2007 17:06:37 +0100 Message-ID: <9b18b3110702010806n7e095317v77f5dc1eb765f8d@mail.gmail.com> Subject: Re: prerelease checklist for Perl 5.10 From: demerphq Date: Fri, 2 Feb 2007 18:10:14 +0100 Message-ID: <9b18b3110702020910l31c7784fi5e37bf777b6eafb3@mail.gmail.com> Regular expression changes to fix failing tests in XML::Twig and Mail::SpamAssassin. The breakages occured in changes #28785 and #29279. p4raw-id: //depot/perl@30104 --- diff --git a/embed.fnc b/embed.fnc index 5564a8c..3e601e5 100644 --- a/embed.fnc +++ b/embed.fnc @@ -1400,6 +1400,7 @@ ERsn |U8* |reghop4 |NN U8 *pos|I32 off|NN const U8 *llim|NN const U8 *rlim #endif ERsn |U8* |reghopmaybe3 |NN U8 *pos|I32 off|NN const U8 *lim ERs |char* |find_byclass |NN regexp * prog|NN const regnode *c|NN char *s|NN const char *strend|NULLOK regmatch_info *reginfo +Es |void |swap_match_buff|NN regexp * prog Es |void |to_utf8_substr |NN regexp * prog Es |void |to_byte_substr |NN regexp * prog ERs |I32 |reg_check_named_buff_matched |NN const regexp *rex|NN const regnode *prog diff --git a/embed.h b/embed.h index 64a3c3d..f725992 100644 --- a/embed.h +++ b/embed.h @@ -1382,6 +1382,7 @@ #if defined(PERL_CORE) || defined(PERL_EXT) #define reghopmaybe3 S_reghopmaybe3 #define find_byclass S_find_byclass +#define swap_match_buff S_swap_match_buff #define to_utf8_substr S_to_utf8_substr #define to_byte_substr S_to_byte_substr #define reg_check_named_buff_matched S_reg_check_named_buff_matched @@ -3591,6 +3592,7 @@ #if defined(PERL_CORE) || defined(PERL_EXT) #define reghopmaybe3 S_reghopmaybe3 #define find_byclass(a,b,c,d,e) S_find_byclass(aTHX_ a,b,c,d,e) +#define swap_match_buff(a) S_swap_match_buff(aTHX_ a) #define to_utf8_substr(a) S_to_utf8_substr(aTHX_ a) #define to_byte_substr(a) S_to_byte_substr(aTHX_ a) #define reg_check_named_buff_matched(a,b) S_reg_check_named_buff_matched(aTHX_ a,b) diff --git a/proto.h b/proto.h index 8110b29..8199ec2 100644 --- a/proto.h +++ b/proto.h @@ -3791,6 +3791,9 @@ STATIC char* S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, cons __attribute__nonnull__(pTHX_3) __attribute__nonnull__(pTHX_4); +STATIC void S_swap_match_buff(pTHX_ regexp * prog) + __attribute__nonnull__(pTHX_1); + STATIC void S_to_utf8_substr(pTHX_ regexp * prog) __attribute__nonnull__(pTHX_1); diff --git a/regexec.c b/regexec.c index 8697eb6..72b9e87 100644 --- a/regexec.c +++ b/regexec.c @@ -1646,6 +1646,33 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, return s; } +void +S_swap_match_buff (pTHX_ regexp *prog) { + I32 *t; + RXi_GET_DECL(prog,progi); + + if (!progi->swap) { + /* We have to be careful. If the previous successful match + was from this regex we don't want a subsequent paritally + successful match to clobber the old results. + So when we detect this possibility we add a swap buffer + to the re, and switch the buffer each match. If we fail + we switch it back, otherwise we leave it swapped. + */ + Newxz(progi->swap, 1, regexp_paren_ofs); + /* no need to copy these */ + Newxz(progi->swap->startp, prog->nparens + 1, I32); + Newxz(progi->swap->endp, prog->nparens + 1, I32); + } + t = progi->swap->startp; + progi->swap->startp = prog->startp; + prog->startp = t; + t = progi->swap->endp; + progi->swap->endp = prog->endp; + prog->endp = t; +} + + /* - regexec_flags - match a regexp against a string */ @@ -1674,6 +1701,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * I32 multiline; RXi_GET_DECL(prog,progi); regmatch_info reginfo; /* create some info to pass to regtry etc */ + bool swap_on_fail = 0; GET_RE_DEBUG_FLAGS_DECL; @@ -1751,26 +1779,9 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * reginfo.ganch = strbeg; } if (PL_curpm && (PM_GETRE(PL_curpm) == prog)) { - I32 *t; - if (!progi->swap) { - /* We have to be careful. If the previous successful match - was from this regex we don't want a subsequent paritally - successful match to clobber the old results. - So when we detect this possibility we add a swap buffer - to the re, and switch the buffer each match. If we fail - we switch it back, otherwise we leave it swapped. - */ - Newxz(progi->swap, 1, regexp_paren_ofs); - /* no need to copy these */ - Newxz(progi->swap->startp, prog->nparens + 1, I32); - Newxz(progi->swap->endp, prog->nparens + 1, I32); - } - t = progi->swap->startp; - progi->swap->startp = prog->startp; - prog->startp = t; - t = progi->swap->endp; - progi->swap->endp = prog->endp; - prog->endp = t; + swap_on_fail = 1; + swap_match_buff(prog); /* do we need a save destructor here for + eval dies? */ } if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL || prog->check_utf8 != NULL)) { re_scream_pos_data d; @@ -2120,16 +2131,10 @@ phooey: PL_colors[4], PL_colors[5])); if (PL_reg_eval_set) restore_pos(aTHX_ prog); - if (progi->swap) { + if (swap_on_fail) /* we failed :-( roll it back */ - I32 *t; - t = progi->swap->startp; - progi->swap->startp = prog->startp; - prog->startp = t; - t = progi->swap->endp; - progi->swap->endp = prog->endp; - prog->endp = t; - } + swap_match_buff(prog); + return 0; } @@ -2869,7 +2874,6 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) ST.B = next; ST.jump = trie->jump; ST.me = scan; - /* traverse the TRIE keeping track of all accepting states we transition through until we get to a failing node. @@ -2967,13 +2971,25 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) PL_colors[4], (IV)ST.accepted, PL_colors[5] ); ); }} - - /* FALL THROUGH */ + goto trie_first_try; /* jump into the fail handler */ + /* NOTREACHED */ case TRIE_next_fail: /* we failed - try next alterative */ + if ( ST.jump) { + REGCP_UNWIND(ST.cp); + for (n = *PL_reglastparen; n > ST.lastparen; n--) + PL_regendp[n] = -1; + *PL_reglastparen = n; + } + trie_first_try: if (do_cutgroup) { do_cutgroup = 0; no_final = 0; } + + if ( ST.jump) { + ST.lastparen = *PL_reglastparen; + REGCP_SET(ST.cp); + } if ( ST.accepted == 1 ) { /* only one choice left - just continue */ DEBUG_EXECUTE_r({ @@ -3014,8 +3030,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) continue; /* execute rest of RE */ } - - if (!ST.accepted-- ) { + + if ( !ST.accepted-- ) { DEBUG_EXECUTE_r({ PerlIO_printf( Perl_debug_log, "%*s %sTRIE failed...%s\n", @@ -3026,7 +3042,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) FREETMPS; LEAVE; sayNO_SILENT; - } + /*NOTREACHED*/ + } /* There are at least two accepting states left. Presumably diff --git a/regexp.h b/regexp.h index 0bf886b..d43f05f 100644 --- a/regexp.h +++ b/regexp.h @@ -310,16 +310,41 @@ typedef struct regmatch_state { struct regmatch_state *prev_yes_state; } yes; - struct { + /* branchlike members */ + /* this is a fake union member that matches the first elements + * of each member that needs to behave like a branch */ + struct { /* this first element must match u.yes */ struct regmatch_state *prev_yes_state; - reg_trie_accepted *accept_buff; + U32 lastparen; + CHECKPOINT cp; + + } branchlike; + + struct { + /* the first elements must match u.branchlike */ + struct regmatch_state *prev_yes_state; + U32 lastparen; + CHECKPOINT cp; + + regnode *next_branch; /* next branch node */ + } branch; + + struct { + /* the first elements must match u.branchlike */ + struct regmatch_state *prev_yes_state; + U32 lastparen; + CHECKPOINT cp; + + reg_trie_accepted *accept_buff; /* accepting states we have seen */ U32 accepted; /* how many accepting states we have seen */ U16 *jump; /* positive offsets from me */ regnode *B; /* node following the trie */ regnode *me; /* Which node am I - needed for jump tries*/ } trie; + /* special types - these members are used to store state for special + regops like eval, if/then, lookaround and the markpoint state */ struct { /* this first element must match u.yes */ struct regmatch_state *prev_yes_state; @@ -338,6 +363,28 @@ typedef struct regmatch_state { struct { /* this first element must match u.yes */ struct regmatch_state *prev_yes_state; + I32 wanted; + I32 logical; /* saved copy of 'logical' var */ + regnode *me; /* the IFMATCH/SUSPEND/UNLESSM node */ + } ifmatch; /* and SUSPEND/UNLESSM */ + + struct { + /* this first element must match u.yes */ + struct regmatch_state *prev_yes_state; + struct regmatch_state *prev_mark; + SV* mark_name; + char *mark_loc; + } mark; + + struct { + int val; + } keeper; + + /* quantifiers - these members are used for storing state for + for the regops used to implement quantifiers */ + struct { + /* this first element must match u.yes */ + struct regmatch_state *prev_yes_state; struct regmatch_state *prev_curlyx; /* previous cur_curlyx */ CHECKPOINT cp; /* remember current savestack index */ bool minmod; @@ -365,14 +412,6 @@ typedef struct regmatch_state { struct { /* this first element must match u.yes */ struct regmatch_state *prev_yes_state; - U32 lastparen; - regnode *next_branch; /* next branch node */ - CHECKPOINT cp; - } branch; - - struct { - /* this first element must match u.yes */ - struct regmatch_state *prev_yes_state; I32 c1, c2; /* case fold search */ CHECKPOINT cp; I32 alen; /* length of first-matched A string */ @@ -393,25 +432,6 @@ typedef struct regmatch_state { regnode *A, *B; /* the nodes corresponding to /A*B/ */ } curly; /* and CURLYN/PLUS/STAR */ - struct { - /* this first element must match u.yes */ - struct regmatch_state *prev_yes_state; - I32 wanted; - I32 logical; /* saved copy of 'logical' var */ - regnode *me; /* the IFMATCH/SUSPEND/UNLESSM node */ - } ifmatch; /* and SUSPEND/UNLESSM */ - - struct { - /* this first element must match u.yes */ - struct regmatch_state *prev_yes_state; - struct regmatch_state *prev_mark; - SV* mark_name; - char *mark_loc; - } mark; - - struct { - int val; - } keeper; } u; } regmatch_state; diff --git a/t/op/re_tests b/t/op/re_tests index e2b33fb..c047d3a 100644 --- a/t/op/re_tests +++ b/t/op/re_tests @@ -1268,3 +1268,5 @@ a*(*F) aaaab n - - (?<=abcd(?<=(aaaabcd))) ..aaaabcd.. y $1 aaaabcd (?=xy(?<=(aaxy))) ..aaxy.. y $1 aaxy + +X(\w+)(?=\s)|X(\w+) Xab y [$1-$2] [-ab]