=item Relative backreferences
-A new syntax C<\R1> ("1" being any positive decimal integer) allows
-relative backreferencing. This should make it easier to embed patterns
+A new syntax C<\g{N}> or C<\gN> where "N" is a decimal integer allows a
+safer form of back-reference notation as well as allowing relative
+backreferences. This should make it easier to generate and embed patterns
that contain backreferences. (Yves Orton)
=back
=item Reference to nonexistent or unclosed group in regex; marked by <-- HERE in m/%s/
-(F) You used something like C<\R7> in your regular expression, but there are
+(F) You used something like C<\g{-7}> in your regular expression, but there are
not at least seven sets of closed capturing parentheses in the expression before
-where the C<\R7> was located. It's also possible you forgot to escape the
-backslash.
+where the C<\g{-7}> was located.
The <-- HERE shows in the regular expression about where the problem was
discovered.
(F) You used a pattern of the form C<(*VERB:ARG)> but did not terminate
the pattern with a C<)>. Fix the pattern and retry.
+=item Unterminated \g{...} pattern in regex; marked by <-- HERE in m/%s/
+
+(F) You missed a close brace on a \g{..} pattern (group reference) in
+a regular expression. Fix the pattern and retry.
=item Unterminated <> operator
Unsupported in lookbehind.
\1 Backreference to a specific group.
'1' may actually be any positive integer.
- \R1 Relative backreference to a preceding closed group.
- '1' may actually be any positive integer.
+ \g1 Backreference to a specific or previous group,
+ \g{-1} number may be negative indicating a previous buffer and may
+ optionally be wrapped in curly brackets for safer parsing.
\k<name> Named backreference
\N{name} Named unicode character, or unicode escape
\x12 Hexadecimal escape sequence
before it. And so on. \1 through \9 are always interpreted as
backreferences.
-X<relative backreference>
-In Perl 5.10 it is possible to relatively address a capture buffer by
-using the C<\RNNN> notation, where C<NNN> is negative offset to a
-preceding capture buffer. Thus C<\R1> refers to the last buffer,
-C<\R2> refers to the buffer before that. For example:
+X<\g{1}> X<\g{-1}> X<relative backreference>
+In order to provide a safer and easier way to construct patterns using
+backrefs, in Perl 5.10 the C<\g{N}> notation is provided. The curly
+brackets are optional, however omitting them is less safe as the meaning
+of the pattern can be changed by text (such as digits) following it.
+When N is a positive integer the C<\g{N}> notation is exactly equivalent
+to using normal backreferences. When N is a negative integer then it is
+a relative backreference referring to the previous N'th capturing group.
+
+Thus C<\g{-1}> refers to the last buffer, C<\g{-2}> refers to the
+buffer before that. For example:
/
(Y) # buffer 1
( # buffer 2
(X) # buffer 3
- \R1 # backref to buffer 3
- \R3 # backref to buffer 1
+ \g{-1} # backref to buffer 3
+ \g{-3} # backref to buffer 1
)
/x
-and would match the same as C</(Y) ( (X) $3 $1 )/x>.
+and would match the same as C</(Y) ( (X) \3 \1 )/x>.
Additionally, as of Perl 5.10 you may use named capture buffers and named
backreferences. The notation is C<< (?<name>...) >> and C<< \k<name> >>
An example of how this might be used is as follows:
- /(?<NAME>(&NAME_PAT))(?<ADDR>(&ADDRESS_PAT))
+ /(?<NAME>(?&NAME_PAT))(?<ADDR>(?&ADDRESS_PAT))
(?(DEFINE)
- (<NAME_PAT>....)
- (<ADRESS_PAT>....)
+ (?<NAME_PAT>....)
+ (?<ADRESS_PAT>....)
)/x
Note that capture buffers matched inside of recursion are not accessible
if ( RExC_npar == 1 && data.longest == &(data.longest_fixed)
&& data.last_start_min == 0 && data.last_end > 0
&& !RExC_seen_zerolen
+ && !(RExC_seen & REG_SEEN_VERBARG)
&& (!(RExC_seen & REG_SEEN_GPOS) || (r->extflags & RXf_ANCH_GPOS)))
r->extflags |= RXf_CHECK_ALL;
scan_commit(pRExC_state, &data,&minlen,0);
case 'c':
case '0':
goto defchar;
- case 'R':
+ case 'g':
case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
{
I32 num;
- bool isrel=(*RExC_parse=='R');
- if (isrel)
+ bool isg = *RExC_parse == 'g';
+ bool isrel = 0;
+ bool hasbrace = 0;
+ if (isg) {
RExC_parse++;
+ if (*RExC_parse == '{') {
+ RExC_parse++;
+ hasbrace = 1;
+ }
+ if (*RExC_parse == '-') {
+ RExC_parse++;
+ isrel = 1;
+ }
+ }
num = atoi(RExC_parse);
if (isrel) {
num = RExC_npar - num;
if (num < 1)
vFAIL("Reference to nonexistent or unclosed group");
}
- if (num > 9 && num >= RExC_npar)
+ if (!isg && num > 9 && num >= RExC_npar)
goto defchar;
else {
char * const parse_start = RExC_parse - 1; /* MJD */
while (isDIGIT(*RExC_parse))
RExC_parse++;
-
+ if (hasbrace) {
+ if (*RExC_parse != '}')
+ vFAIL("Unterminated \\g{...} pattern");
+ RExC_parse++;
+ }
if (!SIZE_ONLY) {
if (num > (I32)RExC_rx->nparens)
vFAIL("Reference to nonexistent group");
case 'C':
case 'X':
case 'G':
+ case 'g':
case 'Z':
case 'z':
case 'w':
PAD_SAVE_LOCAL(old_comppad, (PAD*)rexi->data->data[n + 2]);
PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
+ if (sv_yes_mark) {
+ SV *sv_mrk = get_sv("REGMARK", 1);
+ sv_setsv(sv_mrk, sv_yes_mark);
+ }
+
CALLRUNOPS(aTHX); /* Scalar context. */
SPAGAIN;
if (SP == before)
case SKIP:
PL_reginput = locinput;
if (scan->flags) {
- /* (*CUT) : if we fail we cut here*/
+ /* (*SKIP) : if we fail we cut here*/
ST.mark_name = NULL;
ST.mark_loc = locinput;
PUSH_STATE_GOTO(SKIP_next,next);
} else {
- /* (*CUT:NAME) : if there is a (*MARK:NAME) fail where it was,
+ /* (*SKIP:NAME) : if there is a (*MARK:NAME) fail where it was,
otherwise do nothing. Meaning we need to scan
*/
regmatch_state *cur = mark_state;
cur = cur->u.mark.prev_mark;
}
}
- /* Didn't find our (*MARK:NAME) so ignore this (*CUT:NAME) */
+ /* Didn't find our (*MARK:NAME) so ignore this (*SKIP:NAME) */
break;
case SKIP_next_fail:
if (ST.mark_name) {
}
{
local $Message = "http://nntp.perl.org/group/perl.perl5.porters/118663";
- my $qr_barR1 = qr/(bar)\R1/;
+ my $qr_barR1 = qr/(bar)\g-1/;
ok("foobarbarxyz" =~ $qr_barR1);
ok("foobarbarxyz" =~ qr/foo${qr_barR1}xyz/);
ok("foobarbarxyz" =~ qr/(foo)${qr_barR1}xyz/);
- ok("foobarbarxyz" =~ qr/(foo)(bar)\R1xyz/);
+ ok("foobarbarxyz" =~ qr/(foo)(bar)\g{-1}xyz/);
ok("foobarbarxyz" =~ qr/(foo${qr_barR1})xyz/);
- ok("foobarbarxyz" =~ qr/(foo(bar)\R1)xyz/);
+ ok("foobarbarxyz" =~ qr/(foo(bar)\g{-1})xyz/);
}
{
local $Message = "RT#41010";
$doit->(\@spats,@sstrs);
$doit->(\@dpats,@dstrs);
}
-
+{
+ local $Message = "\$REGMARK";
+ our @r=();
+ ok('foofoo' =~ /foo (*MARK:foo) (?{push @r,$REGMARK}) /x);
+ iseq("@r","foo");
+ iseq($REGMARK,"foo");
+ ok('foofoo' !~ /foo (*MARK:foo) (*FAIL) /x);
+ ok(!$REGMARK);
+ iseq($REGERROR,'foo');
+}
# Test counter is at bottom of file. Put new tests above here.
#-------------------------------------------------------------------
# Keep the following tests last -- they may crash perl
iseq(0+$::test,$::TestCount,"Got the right number of tests!");
# Don't forget to update this!
BEGIN {
- $::TestCount = 1567;
+ $::TestCount = 1573;
print "1..$::TestCount\n";
}
(a)(?:(?-1)|(?+1))(b) abb y $1-$2 a-b
(a)(?:(?-1)|(?+1))(b) acb n - -
-(foo)(\R2) foofoo y $1-$2 foo-foo
-(foo)(\R2)(foo)(\R2) foofoofoofoo y $1-$2-$3-$4 foo-foo-foo-foo
-(([abc]+) \R1)(([abc]+) \R1) abc abccba cba y $2-$4 abc-cba
+(foo)(\g-2) foofoo y $1-$2 foo-foo
+(foo)(\g-2)(foo)(\g-2) foofoofoofoo y $1-$2-$3-$4 foo-foo-foo-foo
+(([abc]+) \g-1)(([abc]+) \g{-1}) abc abccba cba y $2-$4 abc-cba
+(a)(b)(c)\g1\g2\g3 abcabc y $1$2$3 abc
+
/(?'n'foo) \k<n>/ ..foo foo.. y $1 foo
/(?'n'foo) \k<n>/ ..foo foo.. y $+{n} foo