Change in handling of \RNNN inside nested patterns
Yves Orton [Wed, 29 Nov 2006 01:07:43 +0000 (02:07 +0100)]
Subject: Re: New development release in sight
Message-ID: <9b18b3110611281607i3d583febtd549989dc3cabc8a@mail.gmail.com>

p4raw-id: //depot/perl@29413

pod/perlre.pod
regcomp.c
t/op/pat.t
t/op/re_tests

index c1cc75d..bff63a6 100644 (file)
@@ -483,15 +483,24 @@ ambiguity by interpreting \10 as a backreference only if at least 10
 left parentheses have opened before it.  Likewise \11 is a
 backreference only if at least 11 left parentheses have opened
 before it.  And so on.  \1 through \9 are always interpreted as
-backreferences. 
+backreferences.
 
 X<relative backreference>
 In Perl 5.10 it is possible to relatively address a capture buffer by
 using the C<\RNNN> notation, where C<NNN> is negative offset to a
-preceding completed capture buffer. Thus C<\R1> refers to the last
-buffer closed, C<\R2> refers to the buffer before that, and so on. Note
-especially that C</(foo)(\R1)/> refers to the capture buffer containing
-C<foo>, not to the buffer containing C<\R1>.
+preceding capture buffer. Thus C<\R1> refers to the last buffer,
+C<\R2> refers to the buffer before that. For example:
+
+        /
+         (Y)            # buffer 1
+         (              # buffer 2
+            (X)         # buffer 3
+            \R1         # backref to buffer 3
+            \R3         # backref to buffer 1
+         )
+        /x
+
+and would match the same as C</(Y) ( (X) $3 $1 )/x>.
 
 Additionally, as of Perl 5.10 you may use named capture buffers and named
 backreferences. The notation is C<< (?<name>...) >> and C<< \k<name> >>
index 77ee7b4..3fe5561 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -156,7 +156,6 @@ typedef struct RExC_state_t {
 #define RExC_seen      (pRExC_state->seen)
 #define RExC_size      (pRExC_state->size)
 #define RExC_npar      (pRExC_state->npar)
-#define RExC_cpar      (pRExC_state->cpar)
 #define RExC_nestroot   (pRExC_state->nestroot)
 #define RExC_extralen  (pRExC_state->extralen)
 #define RExC_seen_zerolen      (pRExC_state->seen_zerolen)
@@ -4031,7 +4030,6 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
     RExC_end = xend;
     RExC_naughty = 0;
     RExC_npar = 1;
-    RExC_cpar = 1;
     RExC_nestroot = 0;
     RExC_size = 0L;
     RExC_emit = &PL_regdummy;
@@ -4127,7 +4125,6 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
     RExC_end = xend;
     RExC_naughty = 0;
     RExC_npar = 1;
-    RExC_cpar = 1;
     RExC_emit_start = ri->program;
     RExC_emit = ri->program;
 #ifdef DEBUGGING
@@ -5417,7 +5414,6 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
            ender = reg_node(pRExC_state, TAIL);
            break;
        case 1:
-           RExC_cpar++;
            ender = reganode(pRExC_state, CLOSE, parno);
            if (!SIZE_ONLY && RExC_seen & REG_SEEN_RECURSE) {
                DEBUG_OPTIMISE_MORE_r(PerlIO_printf(Perl_debug_log,
@@ -6372,7 +6368,7 @@ tryagain:
                    RExC_parse++;
                num = atoi(RExC_parse);
                 if (isrel) {
-                    num = RExC_cpar - num;
+                    num = RExC_npar - num;
                     if (num < 1)
                         vFAIL("Reference to nonexistent or unclosed group");
                 }
@@ -6386,12 +6382,6 @@ tryagain:
                    if (!SIZE_ONLY) {
                        if (num > (I32)RExC_rx->nparens)
                            vFAIL("Reference to nonexistent group");
-                       /* People make this error all the time apparently.
-                          So we cant fail on it, even though we should 
-                       
-                       else if (num >= RExC_cpar)
-                           vFAIL("Reference to unclosed group will always match");
-                       */
                    }
                    RExC_sawback = 1;
                    ret = reganode(pRExC_state,
index f337a58..2bc5da6 100755 (executable)
@@ -4111,6 +4111,16 @@ for my $c ("z", "\0", "!", chr(254), chr(256)) {
     $v='foo';
     iseq("$1",'bar','$1 is safe after /g - may fail due to specialized config in pp_hot.c')
 }
+{
+    local $Message = "http://nntp.perl.org/group/perl.perl5.porters/118663";
+    my $qr_barR1 = qr/(bar)\R1/;
+    ok("foobarbarxyz" =~ $qr_barR1);
+    ok("foobarbarxyz" =~ qr/foo${qr_barR1}xyz/);
+    ok("foobarbarxyz" =~ qr/(foo)${qr_barR1}xyz/);
+    ok("foobarbarxyz" =~ qr/(foo)(bar)\R1xyz/);
+    ok("foobarbarxyz" =~ qr/(foo${qr_barR1})xyz/);
+    ok("foobarbarxyz" =~ qr/(foo(bar)\R1)xyz/);
+} 
  
 # Test counter is at bottom of file. Put new tests above here.
 #-------------------------------------------------------------------
@@ -4158,7 +4168,7 @@ ok((q(a)x 100) =~ /^(??{'(.)'x 100})/,
 iseq(0+$::test,$::TestCount,"Got the right number of tests!");
 # Don't forget to update this!
 BEGIN {
-    $::TestCount = 1369; 
+    $::TestCount = 1375; 
     print "1..$::TestCount\n";
 }
 
index 4279dd6..cde5ccc 100644 (file)
@@ -1190,5 +1190,6 @@ a*(*F)    aaaab   n       -       -
 (a)(?:(?-1)|(?+1))(b)  abb     y       $1-$2   a-b
 (a)(?:(?-1)|(?+1))(b)  acb     n       -       -
 
-(foo)(\R1)     foofoo  y       $1-$2   foo-foo
-(foo)(\R1)(foo)(\R1)   foofoofoofoo    y       $1-$2-$3-$4     foo-foo-foo-foo
+(foo)(\R2)     foofoo  y       $1-$2   foo-foo
+(foo)(\R2)(foo)(\R2)   foofoofoofoo    y       $1-$2-$3-$4     foo-foo-foo-foo
+(([abc]+) \R1)(([abc]+) \R1)   abc abccba cba  y       $2-$4   abc-cba