Re: prerelease checklist for Perl 5.10
Yves Orton [Thu, 1 Feb 2007 17:06:37 +0000 (18:06 +0100)]
Date: Thu, 1 Feb 2007 17:06:37 +0100
Message-ID: <9b18b3110702010806n7e095317v77f5dc1eb765f8d@mail.gmail.com>

Subject: Re: prerelease checklist for Perl 5.10
From: demerphq <demerphq@gmail.com>
Date: Fri, 2 Feb 2007 18:10:14 +0100
Message-ID: <9b18b3110702020910l31c7784fi5e37bf777b6eafb3@mail.gmail.com>

Regular expression changes to fix failing tests in XML::Twig and
Mail::SpamAssassin.  The breakages occured in changes #28785 and
#29279.

p4raw-id: //depot/perl@30104

embed.fnc
embed.h
proto.h
regexec.c
regexp.h
t/op/re_tests

index 5564a8c..3e601e5 100644 (file)
--- a/embed.fnc
+++ b/embed.fnc
@@ -1400,6 +1400,7 @@ ERsn      |U8*    |reghop4        |NN U8 *pos|I32 off|NN const U8 *llim|NN const U8 *rlim
 #endif
 ERsn   |U8*    |reghopmaybe3   |NN U8 *pos|I32 off|NN const U8 *lim
 ERs    |char*  |find_byclass   |NN regexp * prog|NN const regnode *c|NN char *s|NN const char *strend|NULLOK regmatch_info *reginfo
+Es     |void   |swap_match_buff|NN regexp * prog
 Es     |void   |to_utf8_substr |NN regexp * prog
 Es     |void   |to_byte_substr |NN regexp * prog
 ERs    |I32    |reg_check_named_buff_matched   |NN const regexp *rex|NN const regnode *prog
diff --git a/embed.h b/embed.h
index 64a3c3d..f725992 100644 (file)
--- a/embed.h
+++ b/embed.h
 #if defined(PERL_CORE) || defined(PERL_EXT)
 #define reghopmaybe3           S_reghopmaybe3
 #define find_byclass           S_find_byclass
+#define swap_match_buff                S_swap_match_buff
 #define to_utf8_substr         S_to_utf8_substr
 #define to_byte_substr         S_to_byte_substr
 #define reg_check_named_buff_matched   S_reg_check_named_buff_matched
 #if defined(PERL_CORE) || defined(PERL_EXT)
 #define reghopmaybe3           S_reghopmaybe3
 #define find_byclass(a,b,c,d,e)        S_find_byclass(aTHX_ a,b,c,d,e)
+#define swap_match_buff(a)     S_swap_match_buff(aTHX_ a)
 #define to_utf8_substr(a)      S_to_utf8_substr(aTHX_ a)
 #define to_byte_substr(a)      S_to_byte_substr(aTHX_ a)
 #define reg_check_named_buff_matched(a,b)      S_reg_check_named_buff_matched(aTHX_ a,b)
diff --git a/proto.h b/proto.h
index 8110b29..8199ec2 100644 (file)
--- a/proto.h
+++ b/proto.h
@@ -3791,6 +3791,9 @@ STATIC char*      S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, cons
                        __attribute__nonnull__(pTHX_3)
                        __attribute__nonnull__(pTHX_4);
 
+STATIC void    S_swap_match_buff(pTHX_ regexp * prog)
+                       __attribute__nonnull__(pTHX_1);
+
 STATIC void    S_to_utf8_substr(pTHX_ regexp * prog)
                        __attribute__nonnull__(pTHX_1);
 
index 8697eb6..72b9e87 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -1646,6 +1646,33 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
        return s;
 }
 
+void 
+S_swap_match_buff (pTHX_ regexp *prog) {
+    I32 *t;
+    RXi_GET_DECL(prog,progi);
+
+    if (!progi->swap) {
+    /* We have to be careful. If the previous successful match
+       was from this regex we don't want a subsequent paritally
+       successful match to clobber the old results. 
+       So when we detect this possibility we add a swap buffer
+       to the re, and switch the buffer each match. If we fail
+       we switch it back, otherwise we leave it swapped.
+    */
+        Newxz(progi->swap, 1, regexp_paren_ofs);
+        /* no need to copy these */
+        Newxz(progi->swap->startp, prog->nparens + 1, I32);
+        Newxz(progi->swap->endp, prog->nparens + 1, I32);
+    }
+    t = progi->swap->startp;
+    progi->swap->startp = prog->startp;
+    prog->startp = t;
+    t = progi->swap->endp;
+    progi->swap->endp = prog->endp;
+    prog->endp = t;
+}    
+
+
 /*
  - regexec_flags - match a regexp against a string
  */
@@ -1674,6 +1701,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
     I32 multiline;
     RXi_GET_DECL(prog,progi);
     regmatch_info reginfo;  /* create some info to pass to regtry etc */
+    bool swap_on_fail = 0;
 
     GET_RE_DEBUG_FLAGS_DECL;
 
@@ -1751,26 +1779,9 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
            reginfo.ganch = strbeg;
     }
     if (PL_curpm && (PM_GETRE(PL_curpm) == prog)) {
-        I32 *t;
-        if (!progi->swap) {
-        /* We have to be careful. If the previous successful match
-           was from this regex we don't want a subsequent paritally
-           successful match to clobber the old results. 
-           So when we detect this possibility we add a swap buffer
-           to the re, and switch the buffer each match. If we fail
-           we switch it back, otherwise we leave it swapped.
-        */
-            Newxz(progi->swap, 1, regexp_paren_ofs);
-            /* no need to copy these */
-            Newxz(progi->swap->startp, prog->nparens + 1, I32);
-            Newxz(progi->swap->endp, prog->nparens + 1, I32);
-        }
-        t = progi->swap->startp;
-        progi->swap->startp = prog->startp;
-        prog->startp = t;
-        t = progi->swap->endp;
-        progi->swap->endp = prog->endp;
-        prog->endp = t;
+        swap_on_fail = 1;
+        swap_match_buff(prog); /* do we need a save destructor here for
+                                  eval dies? */
     }
     if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL || prog->check_utf8 != NULL)) {
        re_scream_pos_data d;
@@ -2120,16 +2131,10 @@ phooey:
                          PL_colors[4], PL_colors[5]));
     if (PL_reg_eval_set)
        restore_pos(aTHX_ prog);
-    if (progi->swap) {
+    if (swap_on_fail) 
         /* we failed :-( roll it back */
-        I32 *t;
-        t = progi->swap->startp;
-        progi->swap->startp = prog->startp;
-        prog->startp = t;
-        t = progi->swap->endp;
-        progi->swap->endp = prog->endp;
-        prog->endp = t;
-    }
+        swap_match_buff(prog);
+    
     return 0;
 }
 
@@ -2869,7 +2874,6 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                ST.B = next;
                ST.jump = trie->jump;
                ST.me = scan;
-                
                /*
                   traverse the TRIE keeping track of all accepting states
                   we transition through until we get to a failing node.
@@ -2967,13 +2971,25 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                        PL_colors[4], (IV)ST.accepted, PL_colors[5] );
                );
            }}
-
-           /* FALL THROUGH */
+            goto trie_first_try; /* jump into the fail handler */
+           /* NOTREACHED */
        case TRIE_next_fail: /* we failed - try next alterative */
+            if ( ST.jump) {
+                REGCP_UNWIND(ST.cp);
+               for (n = *PL_reglastparen; n > ST.lastparen; n--)
+                   PL_regendp[n] = -1;
+               *PL_reglastparen = n;
+           }
+          trie_first_try:
             if (do_cutgroup) {
                 do_cutgroup = 0;
                 no_final = 0;
             }
+
+            if ( ST.jump) {
+                ST.lastparen = *PL_reglastparen;
+               REGCP_SET(ST.cp);
+            }          
            if ( ST.accepted == 1 ) {
                /* only one choice left - just continue */
                DEBUG_EXECUTE_r({
@@ -3014,8 +3030,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                
                continue; /* execute rest of RE */
            }
-
-           if (!ST.accepted-- ) {
+           
+           if ( !ST.accepted-- ) {
                DEBUG_EXECUTE_r({
                    PerlIO_printf( Perl_debug_log,
                        "%*s  %sTRIE failed...%s\n",
@@ -3026,7 +3042,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                FREETMPS;
                LEAVE;
                sayNO_SILENT;
-           }
+               /*NOTREACHED*/
+           } 
 
            /*
               There are at least two accepting states left.  Presumably
index 0bf886b..d43f05f 100644 (file)
--- a/regexp.h
+++ b/regexp.h
@@ -310,16 +310,41 @@ typedef struct regmatch_state {
            struct regmatch_state *prev_yes_state;
        } yes;
 
-       struct {
+        /* branchlike members */
+        /* this is a fake union member that matches the first elements
+         * of each member that needs to behave like a branch */
+        struct {
            /* this first element must match u.yes */
            struct regmatch_state *prev_yes_state;
-           reg_trie_accepted *accept_buff;
+           U32 lastparen;
+           CHECKPOINT cp;
+           
+        } branchlike;
+                   
+       struct {
+           /* the first elements must match u.branchlike */
+           struct regmatch_state *prev_yes_state;
+           U32 lastparen;
+           CHECKPOINT cp;
+           
+           regnode *next_branch; /* next branch node */
+       } branch;
+
+       struct {
+           /* the first elements must match u.branchlike */
+           struct regmatch_state *prev_yes_state;
+           U32 lastparen;
+           CHECKPOINT cp;
+
+           reg_trie_accepted *accept_buff; /* accepting states we have seen */
            U32         accepted; /* how many accepting states we have seen */
            U16         *jump;  /* positive offsets from me */
            regnode     *B;     /* node following the trie */
            regnode     *me;    /* Which node am I - needed for jump tries*/
        } trie;
 
+        /* special types - these members are used to store state for special
+           regops like eval, if/then, lookaround and the markpoint state */
        struct {
            /* this first element must match u.yes */
            struct regmatch_state *prev_yes_state;
@@ -338,6 +363,28 @@ typedef struct regmatch_state {
        struct {
            /* this first element must match u.yes */
            struct regmatch_state *prev_yes_state;
+           I32 wanted;
+           I32 logical;        /* saved copy of 'logical' var */
+           regnode  *me; /* the IFMATCH/SUSPEND/UNLESSM node  */
+       } ifmatch; /* and SUSPEND/UNLESSM */
+       
+       struct {
+           /* this first element must match u.yes */
+           struct regmatch_state *prev_yes_state;
+           struct regmatch_state *prev_mark;
+           SV* mark_name;
+           char *mark_loc;
+       } mark;
+       
+       struct {
+           int val;
+       } keeper;
+
+        /* quantifiers - these members are used for storing state for
+           for the regops used to implement quantifiers */
+       struct {
+           /* this first element must match u.yes */
+           struct regmatch_state *prev_yes_state;
            struct regmatch_state *prev_curlyx; /* previous cur_curlyx */
            CHECKPOINT  cp;     /* remember current savestack index */
            bool        minmod;
@@ -365,14 +412,6 @@ typedef struct regmatch_state {
        struct {
            /* this first element must match u.yes */
            struct regmatch_state *prev_yes_state;
-           U32 lastparen;
-           regnode *next_branch; /* next branch node */
-           CHECKPOINT cp;
-       } branch;
-
-       struct {
-           /* this first element must match u.yes */
-           struct regmatch_state *prev_yes_state;
            I32 c1, c2;         /* case fold search */
            CHECKPOINT cp;
            I32 alen;           /* length of first-matched A string */
@@ -393,25 +432,6 @@ typedef struct regmatch_state {
            regnode *A, *B;     /* the nodes corresponding to /A*B/  */
        } curly; /* and CURLYN/PLUS/STAR */
 
-       struct {
-           /* this first element must match u.yes */
-           struct regmatch_state *prev_yes_state;
-           I32 wanted;
-           I32 logical;        /* saved copy of 'logical' var */
-           regnode  *me; /* the IFMATCH/SUSPEND/UNLESSM node  */
-       } ifmatch; /* and SUSPEND/UNLESSM */
-       
-       struct {
-           /* this first element must match u.yes */
-           struct regmatch_state *prev_yes_state;
-           struct regmatch_state *prev_mark;
-           SV* mark_name;
-           char *mark_loc;
-       } mark;
-       
-       struct {
-           int val;
-       } keeper;
     } u;
 } regmatch_state;
 
index e2b33fb..c047d3a 100644 (file)
@@ -1268,3 +1268,5 @@ a*(*F)    aaaab   n       -       -
 
 (?<=abcd(?<=(aaaabcd)))        ..aaaabcd..     y       $1      aaaabcd
 (?=xy(?<=(aaxy)))      ..aaxy..        y       $1      aaxy
+
+X(\w+)(?=\s)|X(\w+)    Xab     y       [$1-$2] [-ab]