From: Yves Orton Date: Wed, 7 Feb 2007 22:53:25 +0000 (+0100) Subject: Re: [PATCH - provisional] H. Merijn Brands idea of buffer numbering. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=594d70332e6d7552f1cb2180b59e1c78bea05ea1;p=p5sagit%2Fp5-mst-13.2.git Re: [PATCH - provisional] H. Merijn Brands idea of buffer numbering. Message-ID: <9b18b3110702071353l250d8a67x188c4e234e8905c7@mail.gmail.com> p4raw-id: //depot/perl@30169 --- diff --git a/pod/perlre.pod b/pod/perlre.pod index d913c80..5287965 100644 --- a/pod/perlre.pod +++ b/pod/perlre.pod @@ -711,6 +711,29 @@ is equivalent to the more verbose /(?:(?s-i)more.*than).*million/i +=item C<(?|pattern)> +X<(?|)> X + +This is the "branch reset" pattern, which has the special property +that the capture buffers are numbered from the same starting point +in each branch. + +Normally capture buffers in a pattern are number sequentially, left +to right in the pattern. Inside of this construct this behaviour is +overriden so that the captures buffers in each branch share the same +numbers. The numbering in each branch will be as normal, and any +buffers following the use of this pattern will be numbered as though +the construct contained only one branch, that being the one with the +most capture buffers in it. + +Consider the following pattern. The numbers underneath are which +buffer number the captured content will be stored in. + + + # before ---------------branch-reset----------- after + / ( a ) (?| x ( y ) z | (p (q) r) | (t) u (v) ) ( z ) /x + # 1 2 2 3 2 3 4 + =item Look-Around Assertions X X X X diff --git a/regcomp.c b/regcomp.c index 7a74cfc..c75b5f3 100644 --- a/regcomp.c +++ b/regcomp.c @@ -4962,6 +4962,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) const I32 oregflags = RExC_flags; bool have_branch = 0; bool is_open = 0; + I32 freeze_paren = 0; + I32 after_freeze = 0; /* for (?g), (?gc), and (?o) warnings; warning about (?c) will warn about (?g) -- japhy */ @@ -5212,6 +5214,13 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) nextchar(pRExC_state); return ret; } + break; + case '|': /* (?|...) */ + /* branch reset, behave like a (?:...) except that + buffers in alternations share the same numbers */ + paren = ':'; + after_freeze = freeze_paren = RExC_npar; + break; case ':': /* (?:...) */ case '>': /* (?>...) */ break; @@ -5668,6 +5677,11 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) if (SIZE_ONLY) RExC_extralen += 2; /* Account for LONGJMP. */ nextchar(pRExC_state); + if (freeze_paren) { + if (RExC_npar > after_freeze) + after_freeze = RExC_npar; + RExC_npar = freeze_paren; + } br = regbranch(pRExC_state, &flags, 0, depth+1); if (br == NULL) @@ -5769,7 +5783,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) FAIL("Junk on end of regexp"); /* "Can't happen". */ /* NOTREACHED */ } - + if (after_freeze) + RExC_npar = after_freeze; return(ret); } diff --git a/t/op/re_tests b/t/op/re_tests index c047d3a..aa6ec7b 100644 --- a/t/op/re_tests +++ b/t/op/re_tests @@ -1270,3 +1270,13 @@ a*(*F) aaaab n - - (?=xy(?<=(aaxy))) ..aaxy.. y $1 aaxy X(\w+)(?=\s)|X(\w+) Xab y [$1-$2] [-ab] + +#check that branch reset works ok. +(?|a(.)b|d(.(o).)d|i(.)(.)j)(.) d!o!da y $1-$2-$3 !o!-o-a +(?|a(.)b|d(.(o).)d|i(.)(.)j)(.) aabc y $1-$2-$3 a--c +(?|a(.)b|d(.(o).)d|i(.)(.)j)(.) ixyjp y $1-$2-$3 x-y-p +(?|(?|(a)|(b))|(?|(c)|(d))) a y $1 a +(?|(?|(a)|(b))|(?|(c)|(d))) b y $1 b +(?|(?|(a)|(b))|(?|(c)|(d))) c y $1 c +(?|(?|(a)|(b))|(?|(c)|(d))) d y $1 d +(.)(?|(.)(.)x|(.)d)(.) abcde y $1-$2-$3-$4-$5- b-c--e--