From: Yves Orton <demerphq@gmail.com>
Date: Wed, 7 Feb 2007 22:53:25 +0000 (+0100)
Subject: Re: [PATCH - provisional] H. Merijn Brands idea of buffer numbering.
X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=594d70332e6d7552f1cb2180b59e1c78bea05ea1;p=p5sagit%2Fp5-mst-13.2.git

Re: [PATCH - provisional] H. Merijn Brands idea of buffer numbering.
Message-ID: <9b18b3110702071353l250d8a67x188c4e234e8905c7@mail.gmail.com>

p4raw-id: //depot/perl@30169
---

diff --git a/pod/perlre.pod b/pod/perlre.pod
index d913c80..5287965 100644
--- a/pod/perlre.pod
+++ b/pod/perlre.pod
@@ -711,6 +711,29 @@ is equivalent to the more verbose
 
     /(?:(?s-i)more.*than).*million/i
 
+=item C<(?|pattern)>
+X<(?|)> X<Branch reset>
+
+This is the "branch reset" pattern, which has the special property
+that the capture buffers are numbered from the same starting point
+in each branch. 
+
+Normally capture buffers in a pattern are number sequentially, left
+to right in the pattern. Inside of this construct this behaviour is
+overriden so that the captures buffers in each branch share the same
+numbers. The numbering in each branch will be as normal, and any 
+buffers following the use of this pattern will be numbered as though
+the construct contained only one branch, that being the one with the
+most capture buffers in it.
+
+Consider the following pattern. The numbers underneath are which
+buffer number the captured content will be stored in.
+
+
+    # before  ---------------branch-reset----------- after        
+    / ( a )  (?| x ( y ) z | (p (q) r) | (t) u (v) ) ( z ) /x
+    # 1            2         2  3        2     3     4  
+
 =item Look-Around Assertions
 X<look-around assertion> X<lookaround assertion> X<look-around> X<lookaround>
 
diff --git a/regcomp.c b/regcomp.c
index 7a74cfc..c75b5f3 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -4962,6 +4962,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
     const I32 oregflags = RExC_flags;
     bool have_branch = 0;
     bool is_open = 0;
+    I32 freeze_paren = 0;
+    I32 after_freeze = 0;
 
     /* for (?g), (?gc), and (?o) warnings; warning
        about (?c) will warn about (?g) -- japhy    */
@@ -5212,6 +5214,13 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
 	            nextchar(pRExC_state);
 	            return ret;
 	        }
+	        break;
+	    case '|':           /* (?|...) */
+	        /* branch reset, behave like a (?:...) except that
+	           buffers in alternations share the same numbers */
+	        paren = ':'; 
+	        after_freeze = freeze_paren = RExC_npar;
+	        break;
 	    case ':':           /* (?:...) */
 	    case '>':           /* (?>...) */
 		break;
@@ -5668,6 +5677,11 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
 	if (SIZE_ONLY)
 	    RExC_extralen += 2;		/* Account for LONGJMP. */
 	nextchar(pRExC_state);
+	if (freeze_paren) {
+	    if (RExC_npar > after_freeze)
+	        after_freeze = RExC_npar;
+            RExC_npar = freeze_paren;	    
+        }
         br = regbranch(pRExC_state, &flags, 0, depth+1);
 
 	if (br == NULL)
@@ -5769,7 +5783,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
 	    FAIL("Junk on end of regexp");	/* "Can't happen". */
 	/* NOTREACHED */
     }
-
+    if (after_freeze)
+        RExC_npar = after_freeze;
     return(ret);
 }
 
diff --git a/t/op/re_tests b/t/op/re_tests
index c047d3a..aa6ec7b 100644
--- a/t/op/re_tests
+++ b/t/op/re_tests
@@ -1270,3 +1270,13 @@ a*(*F)	aaaab	n	-	-
 (?=xy(?<=(aaxy)))	..aaxy..	y	$1	aaxy
 
 X(\w+)(?=\s)|X(\w+)	Xab	y	[$1-$2]	[-ab]
+
+#check that branch reset works ok.
+(?|a(.)b|d(.(o).)d|i(.)(.)j)(.)	d!o!da	y	$1-$2-$3	!o!-o-a
+(?|a(.)b|d(.(o).)d|i(.)(.)j)(.)	aabc	y	$1-$2-$3	a--c
+(?|a(.)b|d(.(o).)d|i(.)(.)j)(.)	ixyjp	y	$1-$2-$3	x-y-p
+(?|(?|(a)|(b))|(?|(c)|(d)))	a	y	$1	a
+(?|(?|(a)|(b))|(?|(c)|(d)))	b	y	$1	b
+(?|(?|(a)|(b))|(?|(c)|(d)))	c	y	$1	c
+(?|(?|(a)|(b))|(?|(c)|(d)))	d	y	$1	d
+(.)(?|(.)(.)x|(.)d)(.)	abcde	y	$1-$2-$3-$4-$5-	b-c--e--