From: Jarkko Hietaniemi Date: Sat, 30 Jun 2001 12:58:16 +0000 (+0000) Subject: Add support for $^N, the most-recently closed group. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=a01268b57212e226e8cd71d448590f3e6c10d529;p=p5sagit%2Fp5-mst-13.2.git Add support for $^N, the most-recently closed group. p4raw-id: //depot/perl@11038 --- diff --git a/embedvar.h b/embedvar.h index a77a273..82c965f 100644 --- a/embedvar.h +++ b/embedvar.h @@ -113,6 +113,7 @@ #define PL_regint_start (vTHX->Tregint_start) #define PL_regint_string (vTHX->Tregint_string) #define PL_reginterp_cnt (vTHX->Treginterp_cnt) +#define PL_reglastcloseparen (vTHX->Treglastcloseparen) #define PL_reglastparen (vTHX->Treglastparen) #define PL_regnarrate (vTHX->Tregnarrate) #define PL_regnaughty (vTHX->Tregnaughty) @@ -821,6 +822,7 @@ #define PL_regint_start (aTHXo->interp.Tregint_start) #define PL_regint_string (aTHXo->interp.Tregint_string) #define PL_reginterp_cnt (aTHXo->interp.Treginterp_cnt) +#define PL_reglastcloseparen (aTHXo->interp.Treglastcloseparen) #define PL_reglastparen (aTHXo->interp.Treglastparen) #define PL_regnarrate (aTHXo->interp.Tregnarrate) #define PL_regnaughty (aTHXo->interp.Tregnaughty) @@ -1518,6 +1520,7 @@ #define PL_regint_start (aTHX->Tregint_start) #define PL_regint_string (aTHX->Tregint_string) #define PL_reginterp_cnt (aTHX->Treginterp_cnt) +#define PL_reglastcloseparen (aTHX->Treglastcloseparen) #define PL_reglastparen (aTHX->Treglastparen) #define PL_regnarrate (aTHX->Tregnarrate) #define PL_regnaughty (aTHX->Tregnaughty) @@ -1654,6 +1657,7 @@ #define PL_Tregint_start PL_regint_start #define PL_Tregint_string PL_regint_string #define PL_Treginterp_cnt PL_reginterp_cnt +#define PL_Treglastcloseparen PL_reglastcloseparen #define PL_Treglastparen PL_reglastparen #define PL_Tregnarrate PL_regnarrate #define PL_Tregnaughty PL_regnaughty diff --git a/gv.c b/gv.c index 86f4843..0af054c 100644 --- a/gv.c +++ b/gv.c @@ -895,6 +895,7 @@ Perl_gv_fetchpv(pTHX_ const char *nambeg, I32 add, I32 sv_type) case '\006': /* $^F */ case '\010': /* $^H */ case '\011': /* $^I, NOT \t in EBCDIC */ + case '\016': /* $^N */ case '\020': /* $^P */ case '\024': /* $^T */ if (len > 1) @@ -1764,6 +1765,7 @@ Perl_is_gv_magical(pTHX_ char *name, STRLEN len, U32 flags) case '\010': /* $^H */ case '\011': /* $^I, NOT \t in EBCDIC */ case '\014': /* $^L */ + case '\016': /* $^N */ case '\020': /* $^P */ case '\023': /* $^S */ case '\024': /* $^T */ diff --git a/mg.c b/mg.c index 1f51e5c..30c8cdd 100644 --- a/mg.c +++ b/mg.c @@ -435,6 +435,13 @@ Perl_magic_len(pTHX_ SV *sv, MAGIC *mg) goto getparen; } return 0; + case '\016': /* ^N */ + if (PL_curpm && (rx = PM_GETRE(PL_curpm))) { + paren = rx->lastcloseparen; + if (paren) + goto getparen; + } + return 0; case '`': if (PL_curpm && (rx = PM_GETRE(PL_curpm))) { if (rx->startp[0] != -1) { @@ -660,6 +667,14 @@ Perl_magic_get(pTHX_ SV *sv, MAGIC *mg) } sv_setsv(sv,&PL_sv_undef); break; + case '\016': /* ^N */ + if (PL_curpm && (rx = PM_GETRE(PL_curpm))) { + paren = rx->lastcloseparen; + if (paren) + goto getparen; + } + sv_setsv(sv,&PL_sv_undef); + break; case '`': if (PL_curpm && (rx = PM_GETRE(PL_curpm))) { if ((s = rx->subbeg) && rx->startp[0] != -1) { diff --git a/perlapi.h b/perlapi.h index 7085e74..7a8dcec 100644 --- a/perlapi.h +++ b/perlapi.h @@ -802,6 +802,8 @@ START_EXTERN_C #define PL_regint_string (*Perl_Tregint_string_ptr(aTHXo)) #undef PL_reginterp_cnt #define PL_reginterp_cnt (*Perl_Treginterp_cnt_ptr(aTHXo)) +#undef PL_reglastcloseparen +#define PL_reglastcloseparen (*Perl_Treglastcloseparen_ptr(aTHXo)) #undef PL_reglastparen #define PL_reglastparen (*Perl_Treglastparen_ptr(aTHXo)) #undef PL_regnarrate diff --git a/pod/perlretut.pod b/pod/perlretut.pod index 45f829b..3e83c13 100644 --- a/pod/perlretut.pod +++ b/pod/perlretut.pod @@ -710,9 +710,12 @@ indicated below it: /(ab(cd|ef)((gi)|j))/; 1 2 34 -so that if the regexp matched, e.g., C<$2> would contain 'cd' or 'ef'. -For convenience, perl sets C<$+> to the highest numbered C<$1>, C<$2>, -... that got assigned. +so that if the regexp matched, e.g., C<$2> would contain 'cd' or 'ef'. For +convenience, perl sets C<$+> to the string held by the highest numbered +C<$1>, C<$2>, ... that got assigned (and, somewhat related, C<$^N> to the +value of the C<$1>, C<$2>, ... most-recently assigned; i.e. the C<$1>, +C<$2>, ... associated with the rightmost closing parenthesis used in the +match). Closely associated with the matching variables C<$1>, C<$2>, ... are the B C<\1>, C<\2>, ... . Backreferences are simply diff --git a/pod/perltoc.pod b/pod/perltoc.pod index 502a8f4..98652cc 100644 --- a/pod/perltoc.pod +++ b/pod/perltoc.pod @@ -904,7 +904,7 @@ $CHILD_ERROR, $?, $OS_ERROR, $ERRNO, $!, $EXTENDED_OS_ERROR, $^E, $EVAL_ERROR, $@, $PROCESS_ID, $PID, $$, $REAL_USER_ID, $UID, $<, $EFFECTIVE_USER_ID, $EUID, $>, $REAL_GROUP_ID, $GID, $(, $EFFECTIVE_GROUP_ID, $EGID, $), $PROGRAM_NAME, $0, $[, $], $COMPILING, $^C, -$DEBUGGING, $^D, $SYSTEM_FD_MAX, $^F, $^H, %^H, $INPLACE_EDIT, $^I, $^M, +$DEBUGGING, $^D, $SYSTEM_FD_MAX, $^F, $^H, %^H, $INPLACE_EDIT, $^I, $^M, $^N, $OSNAME, $^O, $PERLDB, $^P, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x100, 0x200, $LAST_REGEXP_CODE_RESULT, $^R, $EXCEPTIONS_BEING_CAUGHT, $^S, $BASETIME, $^T, $PERL_VERSION, $^V, $WARNING, $^W, ${^WARNING_BITS}, diff --git a/pod/perlvar.pod b/pod/perlvar.pod index eae87c7..d70f22d 100644 --- a/pod/perlvar.pod +++ b/pod/perlvar.pod @@ -180,15 +180,30 @@ performance penalty on all regular expression matches. See L. =item $+ -The last bracket matched by the last search pattern. This is useful if -you don't know which one of a set of alternative patterns matched. For -example: +The text matched by the last bracket of the last successful search pattern. +This is useful if you don't know which one of a set of alternative patterns +matched. For example: /Version: (.*)|Revision: (.*)/ && ($rev = $+); (Mnemonic: be positive and forward looking.) This variable is read-only and dynamically scoped to the current BLOCK. +=item $^N + +The text matched by the used group most-recently closed (i.e. the group +with the rightmost closing parenthesis) of the last successful search +pattern. This is primarly used inside C<(?{...})> blocks for examining text +recently matched. For example, to effectively capture text to a variable +(in addition to C<$1>, C<$2>, etc.), replace C<(...)> with + + (?:(...)(?{ $var = $^N })) + +By setting and then using C<$var> in this way relieves you from having to +worry about exactly which numbered set of parentheses they are. + +This variable is dynamically scoped to the current BLOCK. + =item @LAST_MATCH_END =item @+ diff --git a/regexec.c b/regexec.c index 1145b60..b5f8f47 100644 --- a/regexec.c +++ b/regexec.c @@ -147,7 +147,7 @@ S_regcppush(pTHX_ I32 parenfloor) if (paren_elems_to_push < 0) Perl_croak(aTHX_ "panic: paren_elems_to_push < 0"); -#define REGCP_OTHER_ELEMS 5 +#define REGCP_OTHER_ELEMS 6 SSCHECK(paren_elems_to_push + REGCP_OTHER_ELEMS); for (p = PL_regsize; p > parenfloor; p--) { /* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */ @@ -159,6 +159,7 @@ S_regcppush(pTHX_ I32 parenfloor) /* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */ SSPUSHINT(PL_regsize); SSPUSHINT(*PL_reglastparen); + SSPUSHINT(*PL_reglastcloseparen); SSPUSHPTR(PL_reginput); #define REGCP_FRAME_ELEMS 2 /* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and @@ -192,6 +193,7 @@ S_regcppop(pTHX) assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */ i = SSPOPINT; /* Parentheses elements to pop. */ input = (char *) SSPOPPTR; + *PL_reglastcloseparen = SSPOPINT; *PL_reglastparen = SSPOPINT; PL_regsize = SSPOPINT; @@ -1871,6 +1873,7 @@ S_regtry(pTHX_ regexp *prog, char *startpos) PL_regstartp = prog->startp; PL_regendp = prog->endp; PL_reglastparen = &prog->lastparen; + PL_reglastcloseparen = &prog->lastcloseparen; prog->lastparen = 0; PL_regsize = 0; DEBUG_r(PL_reg_starttry = startpos); @@ -2562,6 +2565,7 @@ S_regmatch(pTHX_ regnode *prog) cache_re(re); state.ss = PL_savestack_ix; *PL_reglastparen = 0; + *PL_reglastcloseparen = 0; PL_reg_call_cc = &state; PL_reginput = locinput; @@ -2619,6 +2623,7 @@ S_regmatch(pTHX_ regnode *prog) PL_regendp[n] = locinput - PL_bostr; if (n > *PL_reglastparen) *PL_reglastparen = n; + *PL_reglastcloseparen = n; break; case GROUPP: n = ARG(scan); /* which paren pair */ diff --git a/regexp.h b/regexp.h index f21d9d3..89537c2 100644 --- a/regexp.h +++ b/regexp.h @@ -37,6 +37,7 @@ typedef struct regexp { I32 prelen; /* length of precomp */ U32 nparens; /* number of parentheses */ U32 lastparen; /* last paren matched */ + U32 lastcloseparen; /* last paren matched */ U32 reganch; /* Internal use only + Tainted information used by regexec? */ regnode program[1]; /* Unwarranted chumminess with compiler. */ diff --git a/t/op/pat.t b/t/op/pat.t index 9635ad9..57f7cb7 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -6,7 +6,7 @@ $| = 1; -print "1..639\n"; +print "1..660\n"; BEGIN { chdir 't' if -d 't'; @@ -1854,3 +1854,38 @@ print "ok 638\n"; print "not " unless " " =~ /[[:print:]]/; print "ok 639\n"; +## +## Test basic $^N usage outside of a regex +## +$x = "abcdef"; +$T="ok 640\n";if ($x =~ /cde/ and not defined $^N) {print $T} else {print "not $T"}; +$T="ok 641\n";if ($x =~ /(cde)/ and $^N eq "cde") {print $T} else {print "not $T"}; +$T="ok 642\n";if ($x =~ /(c)(d)(e)/ and $^N eq "e") {print $T} else {print "not $T"}; +$T="ok 643\n";if ($x =~ /(c(d)e)/ and $^N eq "cde") {print $T} else {print "not $T"}; +$T="ok 644\n";if ($x =~ /(foo)|(c(d)e)/ and $^N eq "cde") {print $T} else {print "not $T"}; +$T="ok 645\n";if ($x =~ /(c(d)e)|(foo)/ and $^N eq "cde") {print $T} else {print "not $T"}; +$T="ok 646\n";if ($x =~ /(c(d)e)|(abc)/ and $^N eq "abc") {print $T} else {print "not $T"}; +$T="ok 647\n";if ($x =~ /(c(d)e)|(abc)x/ and $^N eq "cde") {print $T} else {print "not $T"}; +$T="ok 648\n";if ($x =~ /(c(d)e)(abc)?/ and $^N eq "cde") {print $T} else {print "not $T"}; +$T="ok 649\n";if ($x =~ /(?:c(d)e)/ and $^N eq "d" ) {print $T} else {print "not $T"}; +$T="ok 650\n";if ($x =~ /(?:c(d)e)(?:f)/ and $^N eq "d" ) {print $T} else {print "not $T"}; +$T="ok 651\n";if ($x =~ /(?:([abc])|([def]))*/ and $^N eq "f" ){print $T} else {print "not $T"}; +$T="ok 652\n";if ($x =~ /(?:([ace])|([bdf]))*/ and $^N eq "f" ){print $T} else {print "not $T"}; +$T="ok 653\n";if ($x =~ /(([ace])|([bd]))*/ and $^N eq "e" ){print $T} else {print "not $T"}; +{ + $T="ok 654\n";if($x =~ /(([ace])|([bdf]))*/ and $^N eq "f" ){print $T} else {print "not $T"}; +} +## test to see if $^N is automatically localized -- it should now +## have the value set in test 653 +$T="ok 655\n";if ($^N eq "e" ){print $T} else {print "not $T"}; + +## +## Now test inside (?{...}) +## +$T="ok 656\n";if ($x =~ /a([abc])(?{$y=$^N})c/ and $y eq "b" ){print $T} else {print "not $T"}; +$T="ok 657\n";if ($x =~ /a([abc]+)(?{$y=$^N})d/ and $y eq "bc"){print $T} else {print "not $T"}; +$T="ok 658\n";if ($x =~ /a([abcdefg]+)(?{$y=$^N})d/ and $y eq "bc"){print $T} else {print "not $T"}; +$T="ok 659\n";if ($x =~ /(a([abcdefg]+)(?{$y=$^N})d)(?{$z=$^N})e/ and $y eq "bc" and $z eq "abcd") + {print $T} else {print "not $T"}; +$T="ok 660\n";if ($x =~ /(a([abcdefg]+)(?{$y=$^N})de)(?{$z=$^N})/ and $y eq "bc" and $z eq "abcde") + {print $T} else {print "not $T"}; diff --git a/thrdvar.h b/thrdvar.h index 2cfbfa2..a739ecd 100644 --- a/thrdvar.h +++ b/thrdvar.h @@ -182,6 +182,7 @@ PERLVAR(Tregeol, char *) /* End of input, for $ check. */ PERLVAR(Tregstartp, I32 *) /* Pointer to startp array. */ PERLVAR(Tregendp, I32 *) /* Ditto for endp. */ PERLVAR(Treglastparen, U32 *) /* Similarly for lastparen. */ +PERLVAR(Treglastcloseparen, U32 *) /* Similarly for lastcloseparen. */ PERLVAR(Tregtill, char *) /* How far we are required to go. */ PERLVAR(Tregcompat1, char) /* used to be regprev1 */ PERLVAR(Treg_start_tmp, char **) /* from regexec.c */