From: Yves Orton <demerphq@gmail.com>
Date: Thu, 1 Feb 2007 17:06:37 +0000 (+0100)
Subject: Re: prerelease checklist for Perl 5.10
X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=fae667d5a60f37538a5761795f7af2165c7d4fb0;p=p5sagit%2Fp5-mst-13.2.git

Re: prerelease checklist for Perl 5.10
Date: Thu, 1 Feb 2007 17:06:37 +0100
Message-ID: <9b18b3110702010806n7e095317v77f5dc1eb765f8d@mail.gmail.com>

Subject: Re: prerelease checklist for Perl 5.10
From: demerphq <demerphq@gmail.com>
Date: Fri, 2 Feb 2007 18:10:14 +0100
Message-ID: <9b18b3110702020910l31c7784fi5e37bf777b6eafb3@mail.gmail.com>

Regular expression changes to fix failing tests in XML::Twig and
Mail::SpamAssassin.  The breakages occured in changes #28785 and
#29279.

p4raw-id: //depot/perl@30104
---

diff --git a/embed.fnc b/embed.fnc
index 5564a8c..3e601e5 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -1400,6 +1400,7 @@ ERsn	|U8*	|reghop4	|NN U8 *pos|I32 off|NN const U8 *llim|NN const U8 *rlim
 #endif
 ERsn	|U8*	|reghopmaybe3	|NN U8 *pos|I32 off|NN const U8 *lim
 ERs	|char*	|find_byclass	|NN regexp * prog|NN const regnode *c|NN char *s|NN const char *strend|NULLOK regmatch_info *reginfo
+Es	|void	|swap_match_buff|NN regexp * prog
 Es	|void	|to_utf8_substr	|NN regexp * prog
 Es	|void	|to_byte_substr	|NN regexp * prog
 ERs	|I32	|reg_check_named_buff_matched	|NN const regexp *rex|NN const regnode *prog
diff --git a/embed.h b/embed.h
index 64a3c3d..f725992 100644
--- a/embed.h
+++ b/embed.h
@@ -1382,6 +1382,7 @@
 #if defined(PERL_CORE) || defined(PERL_EXT)
 #define reghopmaybe3		S_reghopmaybe3
 #define find_byclass		S_find_byclass
+#define swap_match_buff		S_swap_match_buff
 #define to_utf8_substr		S_to_utf8_substr
 #define to_byte_substr		S_to_byte_substr
 #define reg_check_named_buff_matched	S_reg_check_named_buff_matched
@@ -3591,6 +3592,7 @@
 #if defined(PERL_CORE) || defined(PERL_EXT)
 #define reghopmaybe3		S_reghopmaybe3
 #define find_byclass(a,b,c,d,e)	S_find_byclass(aTHX_ a,b,c,d,e)
+#define swap_match_buff(a)	S_swap_match_buff(aTHX_ a)
 #define to_utf8_substr(a)	S_to_utf8_substr(aTHX_ a)
 #define to_byte_substr(a)	S_to_byte_substr(aTHX_ a)
 #define reg_check_named_buff_matched(a,b)	S_reg_check_named_buff_matched(aTHX_ a,b)
diff --git a/proto.h b/proto.h
index 8110b29..8199ec2 100644
--- a/proto.h
+++ b/proto.h
@@ -3791,6 +3791,9 @@ STATIC char*	S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, cons
 			__attribute__nonnull__(pTHX_3)
 			__attribute__nonnull__(pTHX_4);
 
+STATIC void	S_swap_match_buff(pTHX_ regexp * prog)
+			__attribute__nonnull__(pTHX_1);
+
 STATIC void	S_to_utf8_substr(pTHX_ regexp * prog)
 			__attribute__nonnull__(pTHX_1);
 
diff --git a/regexec.c b/regexec.c
index 8697eb6..72b9e87 100644
--- a/regexec.c
+++ b/regexec.c
@@ -1646,6 +1646,33 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
 	return s;
 }
 
+void 
+S_swap_match_buff (pTHX_ regexp *prog) {
+    I32 *t;
+    RXi_GET_DECL(prog,progi);
+
+    if (!progi->swap) {
+    /* We have to be careful. If the previous successful match
+       was from this regex we don't want a subsequent paritally
+       successful match to clobber the old results. 
+       So when we detect this possibility we add a swap buffer
+       to the re, and switch the buffer each match. If we fail
+       we switch it back, otherwise we leave it swapped.
+    */
+        Newxz(progi->swap, 1, regexp_paren_ofs);
+        /* no need to copy these */
+        Newxz(progi->swap->startp, prog->nparens + 1, I32);
+        Newxz(progi->swap->endp, prog->nparens + 1, I32);
+    }
+    t = progi->swap->startp;
+    progi->swap->startp = prog->startp;
+    prog->startp = t;
+    t = progi->swap->endp;
+    progi->swap->endp = prog->endp;
+    prog->endp = t;
+}    
+
+
 /*
  - regexec_flags - match a regexp against a string
  */
@@ -1674,6 +1701,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
     I32 multiline;
     RXi_GET_DECL(prog,progi);
     regmatch_info reginfo;  /* create some info to pass to regtry etc */
+    bool swap_on_fail = 0;
 
     GET_RE_DEBUG_FLAGS_DECL;
 
@@ -1751,26 +1779,9 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
 	    reginfo.ganch = strbeg;
     }
     if (PL_curpm && (PM_GETRE(PL_curpm) == prog)) {
-        I32 *t;
-        if (!progi->swap) {
-        /* We have to be careful. If the previous successful match
-           was from this regex we don't want a subsequent paritally
-           successful match to clobber the old results. 
-           So when we detect this possibility we add a swap buffer
-           to the re, and switch the buffer each match. If we fail
-           we switch it back, otherwise we leave it swapped.
-        */
-            Newxz(progi->swap, 1, regexp_paren_ofs);
-            /* no need to copy these */
-            Newxz(progi->swap->startp, prog->nparens + 1, I32);
-            Newxz(progi->swap->endp, prog->nparens + 1, I32);
-        }
-        t = progi->swap->startp;
-        progi->swap->startp = prog->startp;
-        prog->startp = t;
-        t = progi->swap->endp;
-        progi->swap->endp = prog->endp;
-        prog->endp = t;
+        swap_on_fail = 1;
+        swap_match_buff(prog); /* do we need a save destructor here for
+                                  eval dies? */
     }
     if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL || prog->check_utf8 != NULL)) {
 	re_scream_pos_data d;
@@ -2120,16 +2131,10 @@ phooey:
 			  PL_colors[4], PL_colors[5]));
     if (PL_reg_eval_set)
 	restore_pos(aTHX_ prog);
-    if (progi->swap) {
+    if (swap_on_fail) 
         /* we failed :-( roll it back */
-        I32 *t;
-        t = progi->swap->startp;
-        progi->swap->startp = prog->startp;
-        prog->startp = t;
-        t = progi->swap->endp;
-        progi->swap->endp = prog->endp;
-        prog->endp = t;
-    }
+        swap_match_buff(prog);
+    
     return 0;
 }
 
@@ -2869,7 +2874,6 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
 		ST.B = next;
 		ST.jump = trie->jump;
 		ST.me = scan;
-                
 	        /*
         	   traverse the TRIE keeping track of all accepting states
         	   we transition through until we get to a failing node.
@@ -2967,13 +2971,25 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
 			PL_colors[4], (IV)ST.accepted, PL_colors[5] );
 		);
 	    }}
-
-	    /* FALL THROUGH */
+            goto trie_first_try; /* jump into the fail handler */
+	    /* NOTREACHED */
 	case TRIE_next_fail: /* we failed - try next alterative */
+            if ( ST.jump) {
+                REGCP_UNWIND(ST.cp);
+	        for (n = *PL_reglastparen; n > ST.lastparen; n--)
+		    PL_regendp[n] = -1;
+	        *PL_reglastparen = n;
+	    }
+          trie_first_try:
             if (do_cutgroup) {
                 do_cutgroup = 0;
                 no_final = 0;
             }
+
+            if ( ST.jump) {
+                ST.lastparen = *PL_reglastparen;
+	        REGCP_SET(ST.cp);
+            }	        
 	    if ( ST.accepted == 1 ) {
 		/* only one choice left - just continue */
 		DEBUG_EXECUTE_r({
@@ -3014,8 +3030,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
 		
 		continue; /* execute rest of RE */
 	    }
-
-	    if (!ST.accepted-- ) {
+	    
+	    if ( !ST.accepted-- ) {
 	        DEBUG_EXECUTE_r({
 		    PerlIO_printf( Perl_debug_log,
 			"%*s  %sTRIE failed...%s\n",
@@ -3026,7 +3042,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
 		FREETMPS;
 		LEAVE;
 		sayNO_SILENT;
-	    }
+		/*NOTREACHED*/
+	    } 
 
 	    /*
 	       There are at least two accepting states left.  Presumably
diff --git a/regexp.h b/regexp.h
index 0bf886b..d43f05f 100644
--- a/regexp.h
+++ b/regexp.h
@@ -310,16 +310,41 @@ typedef struct regmatch_state {
 	    struct regmatch_state *prev_yes_state;
 	} yes;
 
-	struct {
+        /* branchlike members */
+        /* this is a fake union member that matches the first elements
+         * of each member that needs to behave like a branch */
+        struct {
 	    /* this first element must match u.yes */
 	    struct regmatch_state *prev_yes_state;
-	    reg_trie_accepted *accept_buff;
+	    U32 lastparen;
+	    CHECKPOINT cp;
+	    
+        } branchlike;
+        	    
+	struct {
+	    /* the first elements must match u.branchlike */
+	    struct regmatch_state *prev_yes_state;
+	    U32 lastparen;
+	    CHECKPOINT cp;
+	    
+	    regnode *next_branch; /* next branch node */
+	} branch;
+
+	struct {
+	    /* the first elements must match u.branchlike */
+	    struct regmatch_state *prev_yes_state;
+	    U32 lastparen;
+	    CHECKPOINT cp;
+
+	    reg_trie_accepted *accept_buff; /* accepting states we have seen */
 	    U32		accepted; /* how many accepting states we have seen */
 	    U16         *jump;  /* positive offsets from me */
 	    regnode	*B;	/* node following the trie */
 	    regnode	*me;	/* Which node am I - needed for jump tries*/
 	} trie;
 
+        /* special types - these members are used to store state for special
+           regops like eval, if/then, lookaround and the markpoint state */
 	struct {
 	    /* this first element must match u.yes */
 	    struct regmatch_state *prev_yes_state;
@@ -338,6 +363,28 @@ typedef struct regmatch_state {
 	struct {
 	    /* this first element must match u.yes */
 	    struct regmatch_state *prev_yes_state;
+	    I32 wanted;
+	    I32 logical;	/* saved copy of 'logical' var */
+	    regnode  *me; /* the IFMATCH/SUSPEND/UNLESSM node  */
+	} ifmatch; /* and SUSPEND/UNLESSM */
+	
+	struct {
+	    /* this first element must match u.yes */
+	    struct regmatch_state *prev_yes_state;
+	    struct regmatch_state *prev_mark;
+	    SV* mark_name;
+	    char *mark_loc;
+	} mark;
+	
+	struct {
+	    int val;
+	} keeper;
+
+        /* quantifiers - these members are used for storing state for
+           for the regops used to implement quantifiers */
+	struct {
+	    /* this first element must match u.yes */
+	    struct regmatch_state *prev_yes_state;
 	    struct regmatch_state *prev_curlyx; /* previous cur_curlyx */
 	    CHECKPOINT	cp;	/* remember current savestack index */
 	    bool	minmod;
@@ -365,14 +412,6 @@ typedef struct regmatch_state {
 	struct {
 	    /* this first element must match u.yes */
 	    struct regmatch_state *prev_yes_state;
-	    U32 lastparen;
-	    regnode *next_branch; /* next branch node */
-	    CHECKPOINT cp;
-	} branch;
-
-	struct {
-	    /* this first element must match u.yes */
-	    struct regmatch_state *prev_yes_state;
 	    I32 c1, c2;		/* case fold search */
 	    CHECKPOINT cp;
 	    I32 alen;		/* length of first-matched A string */
@@ -393,25 +432,6 @@ typedef struct regmatch_state {
 	    regnode *A, *B;	/* the nodes corresponding to /A*B/  */
 	} curly; /* and CURLYN/PLUS/STAR */
 
-	struct {
-	    /* this first element must match u.yes */
-	    struct regmatch_state *prev_yes_state;
-	    I32 wanted;
-	    I32 logical;	/* saved copy of 'logical' var */
-	    regnode  *me; /* the IFMATCH/SUSPEND/UNLESSM node  */
-	} ifmatch; /* and SUSPEND/UNLESSM */
-	
-	struct {
-	    /* this first element must match u.yes */
-	    struct regmatch_state *prev_yes_state;
-	    struct regmatch_state *prev_mark;
-	    SV* mark_name;
-	    char *mark_loc;
-	} mark;
-	
-	struct {
-	    int val;
-	} keeper;
     } u;
 } regmatch_state;
 
diff --git a/t/op/re_tests b/t/op/re_tests
index e2b33fb..c047d3a 100644
--- a/t/op/re_tests
+++ b/t/op/re_tests
@@ -1268,3 +1268,5 @@ a*(*F)	aaaab	n	-	-
 
 (?<=abcd(?<=(aaaabcd)))	..aaaabcd..	y	$1	aaaabcd
 (?=xy(?<=(aaxy)))	..aaxy..	y	$1	aaxy
+
+X(\w+)(?=\s)|X(\w+)	Xab	y	[$1-$2]	[-ab]