# define VTBL_uvar &PL_vtbl_uvar
# define VTBL_defelem &PL_vtbl_defelem
# define VTBL_regexp &PL_vtbl_regexp
+# define VTBL_regdata_names &PL_vtbl_regdata_names
# define VTBL_regdata &PL_vtbl_regdata
# define VTBL_regdatum &PL_vtbl_regdatum
# ifdef USE_LOCALE_COLLATE
RETURN;
}
- if (! SvTIED_mg((SV*)keys, PERL_MAGIC_tied))
+ if (! SvTIED_mg((SV*)keys, PERL_MAGIC_tied)
+ && ! SvTIED_mg((SV*)keys, PERL_MAGIC_regdata_names))
+ {
i = HvKEYS(keys);
+ }
else {
i = 0;
while (hv_iternext(keys)) i++;
{ PERL_MAGIC_sv, "sv(\\0)" },
{ PERL_MAGIC_arylen, "arylen(#)" },
{ PERL_MAGIC_rhash, "rhash(%)" },
+ { PERL_MAGIC_regdata_names, "regdata_names(+)" },
{ PERL_MAGIC_pos, "pos(.)" },
{ PERL_MAGIC_symtab, "symtab(:)" },
{ PERL_MAGIC_backref, "backref(<)" },
|NN char* strend|NN char* strbeg|I32 minend \
|NN SV* screamer|NULLOK void* data|U32 flags
ApR |regnode*|regnext |NN regnode* p
+Ep |SV*|reg_named_buff_sv |NN SV* namesv
Ep |void |regprop |NULLOK const regexp *prog|NN SV* sv|NN const regnode* o
Ap |void |repeatcpy |NN char* to|NN const char* from|I32 len|I32 count
ApP |char* |rninstr |NN const char* big|NN const char* bigend \
#define regexec_flags Perl_regexec_flags
#define regnext Perl_regnext
#if defined(PERL_CORE) || defined(PERL_EXT)
+#define reg_named_buff_sv Perl_reg_named_buff_sv
#define regprop Perl_regprop
#endif
#define repeatcpy Perl_repeatcpy
#define regexec_flags(a,b,c,d,e,f,g,h) Perl_regexec_flags(aTHX_ a,b,c,d,e,f,g,h)
#define regnext(a) Perl_regnext(aTHX_ a)
#if defined(PERL_CORE) || defined(PERL_EXT)
+#define reg_named_buff_sv(a) Perl_reg_named_buff_sv(aTHX_ a)
#define regprop(a,b,c) Perl_regprop(aTHX_ a,b,c)
#endif
#define repeatcpy(a,b,c,d) Perl_repeatcpy(aTHX_ a,b,c,d)
goto magicalize;
case '+':
+ GvMULTI_on(gv);
{
AV* const av = GvAVn(gv);
+ HV* const hv = GvHVn(gv);
sv_magic((SV*)av, (SV*)av, PERL_MAGIC_regdata, NULL, 0);
SvREADONLY_on(av);
+ hv_magic(hv, NULL, PERL_MAGIC_regdata_names);
+ SvREADONLY_on(hv);
/* FALL THROUGH */
}
case '\023': /* $^S */
xhv = (XPVHV*)SvANY(hv);
if (SvMAGICAL(hv)) {
if (SvRMAGICAL(hv) && !(action & (HV_FETCH_ISSTORE|HV_FETCH_ISEXISTS))) {
- if (mg_find((SV*)hv, PERL_MAGIC_tied) || SvGMAGICAL((SV*)hv)) {
- sv = sv_newmortal();
+ MAGIC *regdata = NULL;
+ if (mg_find((SV*)hv, PERL_MAGIC_tied) || SvGMAGICAL((SV*)hv)
+ || (regdata = mg_find((SV*)hv, PERL_MAGIC_regdata_names))) {
/* XXX should be able to skimp on the HE/HEK here when
HV_FETCH_JUST_SV is true. */
-
if (!keysv) {
keysv = newSVpvn(key, klen);
if (is_utf8) {
} else {
keysv = newSVsv(keysv);
}
- mg_copy((SV*)hv, sv, (char *)keysv, HEf_SVKEY);
+ if (regdata) {
+ sv = Perl_reg_named_buff_sv(aTHX_ keysv);
+ if (!sv) {
+ SvREFCNT_dec(keysv);
+ return 0;
+ }
+ } else {
+ sv = sv_newmortal();
+ mg_copy((SV*)hv, sv, (char *)keysv, HEf_SVKEY);
+ }
/* grab a fake HE/HEK pair from the pool or make a new one */
entry = PL_hv_fetch_ent_mh;
} else {
hv_auxinit(hv);
}
-
+ if (SvMAGICAL(hv) && SvRMAGICAL(hv)) {
+ MAGIC *mg = mg_find((SV*)hv, PERL_MAGIC_regdata_names);
+ if ( mg ) {
+ if (PL_curpm) {
+ const REGEXP * const rx = PM_GETRE(PL_curpm);
+ if (rx && rx->paren_names) {
+ (void)hv_iterinit(rx->paren_names);
+ }
+ }
+ }
+ }
/* used to be xhv->xhv_fill before 5.004_65 */
return HvTOTALKEYS(hv);
}
if (!hv)
Perl_croak(aTHX_ "Bad hash");
+
xhv = (XPVHV*)SvANY(hv);
if (!SvOOK(hv)) {
iter = HvAUX(hv);
oldentry = entry = iter->xhv_eiter; /* HvEITER(hv) */
+ if (SvMAGICAL(hv) && SvRMAGICAL(hv) &&
+ (mg = mg_find((SV*)hv, PERL_MAGIC_regdata_names)))
+ {
+ SV * key;
+ SV *val = NULL;
+ REGEXP * rx;
+ if (!PL_curpm)
+ return NULL;
+ rx = PM_GETRE(PL_curpm);
+ if (rx && rx->paren_names) {
+ hv = rx->paren_names;
+ } else {
+ return NULL;
+ }
- if ((mg = SvTIED_mg((SV*)hv, PERL_MAGIC_tied))) {
+ key = sv_newmortal();
+ if (entry) {
+ sv_setsv(key, HeSVKEY_force(entry));
+ SvREFCNT_dec(HeSVKEY(entry)); /* get rid of previous key */
+ }
+ else {
+ char *k;
+ HEK *hek;
+
+ /* one HE per MAGICAL hash */
+ iter->xhv_eiter = entry = new_HE(); /* HvEITER(hv) = new_HE() */
+ Zero(entry, 1, HE);
+ Newxz(k, HEK_BASESIZE + sizeof(SV*), char);
+ hek = (HEK*)k;
+ HeKEY_hek(entry) = hek;
+ HeKLEN(entry) = HEf_SVKEY;
+ }
+ {
+ while (!val) {
+ HE *temphe = hv_iternext_flags(hv,flags);
+ if (temphe) {
+ IV i;
+ IV parno = 0;
+ SV* sv_dat = HeVAL(temphe);
+ I32 *nums = (I32*)SvPVX(sv_dat);
+ for ( i = 0; i < SvIVX(sv_dat); i++ ) {
+ if ((I32)(rx->lastcloseparen) >= nums[i] &&
+ rx->startp[nums[i]] != -1 &&
+ rx->endp[nums[i]] != -1)
+ {
+ parno = nums[i];
+ break;
+ }
+ }
+ if (parno) {
+ GV *gv_paren;
+ STRLEN len;
+ SV *sv = sv_newmortal();
+ const char* pvkey = HePV(temphe, len);
+
+ Perl_sv_setpvf(aTHX_ sv, "%"IVdf,(IV)parno);
+ gv_paren = Perl_gv_fetchsv(aTHX_ sv, GV_ADD, SVt_PVGV);
+ Perl_sv_setpvn(aTHX_ key, pvkey, len);
+ val = GvSVn(gv_paren);
+ }
+ } else {
+ break;
+ }
+ }
+ }
+ if (val && SvOK(key)) {
+ /* force key to stay around until next time */
+ HeSVKEY_set(entry, SvREFCNT_inc_simple_NN(key));
+ HeVAL(entry) = SvREFCNT_inc_simple_NN(val);
+ return entry; /* beware, hent_val is not set */
+ }
+ if (HeVAL(entry))
+ SvREFCNT_dec(HeVAL(entry));
+ Safefree(HeKEY_hek(entry));
+ del_HE(entry);
+ iter->xhv_eiter = NULL; /* HvEITER(hv) = NULL */
+ return NULL;
+
+ } else if ((mg = SvTIED_mg((SV*)hv, PERL_MAGIC_tied))) {
SV * const key = sv_newmortal();
if (entry) {
sv_setsv(key, HeSVKEY_force(entry));
#define PERL_MAGIC_overload_elem 'a' /* %OVERLOAD hash element */
#define PERL_MAGIC_overload_table 'c' /* Holds overload table (AMT) on stash */
#define PERL_MAGIC_bm 'B' /* Boyer-Moore (fast string search) */
+#define PERL_MAGIC_regdata_names '+' /* Regex named capture buffer hash
+ (%+ support) */
#define PERL_MAGIC_regdata 'D' /* Regex match position data
(@+ and @- vars) */
#define PERL_MAGIC_regdatum 'd' /* Regex match position data element */
);
MGVTBL_SET(
+ PL_vtbl_regdata_names,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL
+);
+
+MGVTBL_SET(
PL_vtbl_regdata,
NULL,
NULL,
X<\w> X<\W> X<\s> X<\S> X<\d> X<\D> X<\X> X<\p> X<\P> X<\C>
X<word> X<whitespace>
- \w Match a "word" character (alphanumeric plus "_")
- \W Match a non-"word" character
- \s Match a whitespace character
- \S Match a non-whitespace character
- \d Match a digit character
- \D Match a non-digit character
- \pP Match P, named property. Use \p{Prop} for longer names.
- \PP Match non-P
- \X Match eXtended Unicode "combining character sequence",
- equivalent to (?:\PM\pM*)
- \C Match a single C char (octet) even under Unicode.
- NOTE: breaks up characters into their UTF-8 bytes,
- so you may end up with malformed pieces of UTF-8.
- Unsupported in lookbehind.
+ \w Match a "word" character (alphanumeric plus "_")
+ \W Match a non-"word" character
+ \s Match a whitespace character
+ \S Match a non-whitespace character
+ \d Match a digit character
+ \D Match a non-digit character
+ \pP Match P, named property. Use \p{Prop} for longer names.
+ \PP Match non-P
+ \X Match eXtended Unicode "combining character sequence",
+ equivalent to (?:\PM\pM*)
+ \C Match a single C char (octet) even under Unicode.
+ NOTE: breaks up characters into their UTF-8 bytes,
+ so you may end up with malformed pieces of UTF-8.
+ Unsupported in lookbehind.
+ \1 Backreference to a a specific group.
+ '1' may actually be any positive integer
+ \k<name> Named backreference
+ \N{name} Named unicode character, or unicode escape.
+ \x12 Hexadecimal escape sequence
+ \x{1234} Long hexadecimal escape sequence
A C<\w> matches a single alphanumeric character (an alphabetic
character, or a decimal digit) or C<_>, not a whole word. Use C<\w+>
The bracketing construct C<( ... )> creates capture buffers. To
refer to the digit'th buffer use \<digit> within the
match. Outside the match use "$" instead of "\". (The
-\<digit> notation works in certain circumstances outside
+\<digit> notation works in certain circumstances outside
the match. See the warning below about \1 vs $1 for details.)
Referring back to another part of the match is called a
I<backreference>.
use. However Perl also uses \10, \11, etc. as aliases for \010,
\011, etc. (Recall that 0 means octal, so \011 is the character at
number 9 in your coded character set; which would be the 10th character,
-a horizontal tab under ASCII.) Perl resolves this
-ambiguity by interpreting \10 as a backreference only if at least 10
-left parentheses have opened before it. Likewise \11 is a
-backreference only if at least 11 left parentheses have opened
-before it. And so on. \1 through \9 are always interpreted as
+a horizontal tab under ASCII.) Perl resolves this
+ambiguity by interpreting \10 as a backreference only if at least 10
+left parentheses have opened before it. Likewise \11 is a
+backreference only if at least 11 left parentheses have opened
+before it. And so on. \1 through \9 are always interpreted as
backreferences.
+Additionally, as of Perl 5.10 you may use named capture buffers and named
+backreferences. The notation is C<< (?<name>...) >> and C<< \k<name> >>
+(you may also use single quotes instead of angle brackets to quote the
+name). The only difference with named capture buffers and unnamed ones is
+that multiple buffers may have the same name and that the contents of
+named capture buffers is available via the C<%+> hash. When multiple
+groups share the same name C<$+{name}> and C<< \k<name> >> refer to the
+leftmost defined group, thus it's possible to do things with named capture
+buffers that would otherwise require C<(??{})> code to accomplish. Named
+capture buffers are numbered just as normal capture buffers are and may be
+referenced via the magic numeric variables or via numeric backreferences
+as well as by name.
+
Examples:
s/^([^ ]*) *([^ ]*)/$2 $1/; # swap first two words
- if (/(.)\1/) { # find first doubled char
- print "'$1' is the first doubled character\n";
- }
+ /(.)\1/ # find first doubled char
+ and print "'$1' is the first doubled character\n";
+
+ /(?<char>.)\k<char>/ # ... a different way
+ and print "'$+{char}' is the first doubled character\n";
+
+ /(?<char>.)\1/ # ... mix and match
+ and print "'$1' is the first doubled character\n";
if (/Time: (..):(..):(..)/) { # parse out values
$hours = $1;
after the matched string. And C<$^N> contains whatever was matched by
the most-recently closed group (submatch). C<$^N> can be used in
extended patterns (see below), for example to assign a submatch to a
-variable.
+variable.
X<$+> X<$^N> X<$&> X<$`> X<$'>
The numbered match variables ($1, $2, $3, etc.) and the related punctuation
matches any occurrence of "foo" that does not follow "bar". Works
only for fixed-width look-behind.
+=item C<(?'NAME'pattern)>
+
+=item C<< (?<NAME>pattern) >>
+X<< (?<NAME>) >> X<(?'NAME')> X<named capture> X<capture>
+
+A named capture buffer. Identical in every respect to normal capturing
+parens C<()> but for the additional fact that C<%+> may be used after
+a succesful match to refer to a named buffer. See C<perlvar> for more
+details on the C<%+> hash.
+
+If multiple distinct capture buffers have the same name then the
+$+{NAME} will refer to the leftmost defined buffer in the match.
+
+The forms C<(?'NAME'pattern)> and C<(?<NAME>pattern)> are equivalent.
+
+B<NOTE:> While the notation of this construct is the same as the similar
+function in .NET regexes, the behavior is not, in Perl the buffers are
+numbered sequentially regardless of being named or not. Thus in the
+pattern
+
+ /(x)(?<foo>y)(z)/
+
+$+{foo} will be the same as $2, and $3 will contain 'z' instead of
+the opposite which is what a .NET regex hacker might expect.
+
+Currently NAME is restricted to word chars only. In other words, it
+must match C</^\w+$/>.
+
+=item C<< \k<name> >>
+
+=item C<< \k'name' >>
+
+Named backreference. Similar to numeric backreferences, except that
+the group is designated by name and not number. If multiple groups
+have the same name then it refers to the leftmost defined group in
+the current match.
+
+It is an error to refer to a name not defined by a C<(?<NAME>)>
+earlier in the pattern.
+
+Both forms are equivalent.
+
=item C<(?{ code })>
X<(?{})> X<regex, code in> X<regexp, code in> X<regular expression, code in>
('a' x 100)=~/(??{'(.)' x 100})/
-B<will> match, it will B<not> set $1.
+B<will> match, it will B<not> set $1.
The C<code> is not interpolated. As before, the rules to determine
where the C<code> ends are currently somewhat convoluted.
B<WARNING>: This extended regular expression feature is considered
highly experimental, and may be changed or deleted without notice.
-Similar to C<(??{ code })> except it does not involve compiling any code,
-instead it treats the contents of a capture buffer as an independent
+Similar to C<(??{ code })> except it does not involve compiling any code,
+instead it treats the contents of a capture buffer as an independent
pattern that must match at the current position. Capture buffers
-contained by the pattern will have the value as determined by the
+contained by the pattern will have the value as determined by the
outermost recursion.
PARNO is a sequence of digits not starting with 0 whose value
-reflects the paren-number of the capture buffer to recurse to.
+reflects the paren-number of the capture buffer to recurse to.
C<(?R)> curses to the beginning of the pattern.
-The following pattern matches a function foo() which may contain
-balanced parenthesis as the argument.
+The following pattern matches a function foo() which may contain
+balanced parenthesis as the argument.
$re = qr{ ( # paren group 1 (full function)
- foo
+ foo
( # paren group 2 (parens)
\(
( # paren group 3 (contents of parens)
$1 = foo(bar(baz)+baz(bop))
$2 = (bar(baz)+baz(bop))
- $3 = bar(baz)+baz(bop)
+ $3 = bar(baz)+baz(bop)
-If there is no corresponding capture buffer defined, then it is a
+If there is no corresponding capture buffer defined, then it is a
fatal error. Recursing deeper than 50 times without consuming any input
-string will also result in a fatal error. The maximum depth is compiled
+string will also result in a fatal error. The maximum depth is compiled
into perl, so changing it requires a custom build.
-B<Note> that this pattern does not behave the same way as the equivalent
+B<Note> that this pattern does not behave the same way as the equivalent
PCRE or Python construct of the same form. In perl you can backtrack into
a recursed group, in PCRE and Python the recursed into group is treated
-as atomic. Also, constructs like (?i:(?1)) or (?:(?i)(?1)) do not affect
-the pattern being recursed into.
+as atomic. Also, constructs like (?i:(?1)) or (?:(?i)(?1)) do not affect
+the pattern being recursed into.
=item C<< (?>pattern) >>
X<backtrack> X<backtracking> X<atomic> X<possessive>
This will allow the use of a regex from inside (?{ }), (??{ }) and
(?(?{ })|) constructs.
-=head2 Add named capture to regexp engine
-
-Named capture is supported by .NET, PCRE and Python. Its embarrassing
-Perl doesn't support it yet.
-
-Jeffrey Friedl notes that "the most glaring omission [in perl's regexp
-engine] offered by other implementations is named capture".
-
-demerphq is working on this.
-
=head2 Add possessive quantifiers to regexp engine
Possessive quantifiers are a syntactic sugar that affords a more
how many subgroups were in the last successful match. See the
examples given for the C<@-> variable.
+=item %+
+X<%+>
+
+Similar to C<@+>, the C<%+> hash allows access to the named capture
+buffers, should they exist, in the last successful match in the
+currently active dynamic scope.
+
+C<$+{foo}> is equivalent to C<$1> after the following match:
+
+ 'foo'=~/(?<foo>foo)/;
+
=item HANDLE->input_line_number(EXPR)
=item $INPUT_LINE_NUMBER
=item $.
X<$.> X<$NR> X<$INPUT_LINE_NUMBER> X<line number>
-Current line number for the last filehandle accessed.
+Current line number for the last filehandle accessed.
Each filehandle in Perl counts the number of lines that have been read
from it. (Depending on the value of C<$/>, Perl's idea of what
{
dVAR;
dSP;
- HV * const hash = (HV*)POPs;
+ HV * hash = (HV*)POPs;
HE *entry;
const I32 gimme = GIMME_V;
__attribute__warn_unused_result__
__attribute__nonnull__(pTHX_1);
+PERL_CALLCONV SV* Perl_reg_named_buff_sv(pTHX_ SV* namesv)
+ __attribute__nonnull__(pTHX_1);
+
PERL_CALLCONV void Perl_regprop(pTHX_ const regexp *prog, SV* sv, const regnode* o)
__attribute__nonnull__(pTHX_2)
__attribute__nonnull__(pTHX_3);
regnode **parens; /* offsets of each paren */
I32 utf8;
HV *charnames; /* cache of named sequences */
+ HV *paren_names; /* Paren names */
#if ADD_TO_REGEXEC
char *starttry; /* -Dr: where regtry was called. */
#define RExC_starttry (pRExC_state->starttry)
#define RExC_utf8 (pRExC_state->utf8)
#define RExC_charnames (pRExC_state->charnames)
#define RExC_parens (pRExC_state->parens)
+#define RExC_paren_names (pRExC_state->paren_names)
#define ISMULT1(c) ((c) == '*' || (c) == '+' || (c) == '?')
#define ISMULT2(s) ((*s) == '*' || (*s) == '+' || (*s) == '?' || \
RExC_emit = &PL_regdummy;
RExC_whilem_seen = 0;
RExC_charnames = NULL;
- RExC_parens= NULL;
-
+ RExC_parens = NULL;
+ RExC_paren_names = NULL;
+
#if 0 /* REGC() is (currently) a NOP at the first pass.
* Clever compilers notice this and complain. --jhi */
REGC((U8)REG_MAGIC, (char*)RExC_emit);
RExC_precomp = NULL;
return(NULL);
}
- DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log, "Required "));
- DEBUG_COMPILE_r(PerlIO_printf(Perl_debug_log, "size %"IVdf" nodes ", (IV)RExC_size));
- DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log, "\nStarting second pass (creation)\n"));
DEBUG_PARSE_r({
+ PerlIO_printf(Perl_debug_log,
+ "Required size %"IVdf" nodes\n"
+ "Starting second pass (creation)\n",
+ (IV)RExC_size);
RExC_lastnum=0;
RExC_lastparse=NULL;
});
-
-
/* Small enough for pointer-storage convention?
If extralen==0, this means that we will not need long jumps. */
if (RExC_size >= 0x10000L && RExC_extralen)
r->substrs = 0; /* Useful during FAIL. */
r->startp = 0; /* Useful during FAIL. */
-
r->endp = 0;
+ r->paren_names = 0;
+
if (RExC_seen & REG_SEEN_RECURSE) {
Newx(RExC_parens, RExC_npar,regnode *);
SAVEFREEPV(RExC_parens);
/* Scan is after the zeroth branch, first is atomic matcher. */
#ifdef TRIE_STUDY_OPT
- DEBUG_COMPILE_r(
+ DEBUG_PARSE_r(
if (!restudied)
PerlIO_printf(Perl_debug_log, "first at %"IVdf"\n",
(IV)(first - scan + 1))
);
#else
- DEBUG_COMPILE_r(
+ DEBUG_PARSE_r(
PerlIO_printf(Perl_debug_log, "first at %"IVdf"\n",
(IV)(first - scan + 1))
);
r->reganch |= ROPT_EVAL_SEEN;
if (RExC_seen & REG_SEEN_CANY)
r->reganch |= ROPT_CANY_SEEN;
+ if (RExC_paren_names)
+ r->paren_names = (HV*)SvREFCNT_inc(RExC_paren_names);
+ else
+ r->paren_names = NULL;
+
Newxz(r->startp, RExC_npar, I32);
Newxz(r->endp, RExC_npar, I32);
#undef END_BLOCK
#undef RE_ENGINE_PTR
+SV*
+Perl_reg_named_buff_sv(pTHX_ SV* namesv)
+{
+ I32 parno = 0; /* no match */
+ if (PL_curpm) {
+ const REGEXP * const rx = PM_GETRE(PL_curpm);
+ if (rx && rx->paren_names) {
+ HE *he_str = hv_fetch_ent( rx->paren_names, namesv, 0, 0 );
+ if (he_str) {
+ IV i;
+ SV* sv_dat=HeVAL(he_str);
+ I32 *nums=(I32*)SvPVX(sv_dat);
+ for ( i=0; i<SvIVX(sv_dat); i++ ) {
+ if ((I32)(rx->lastcloseparen) >= nums[i] &&
+ rx->startp[nums[i]] != -1 &&
+ rx->endp[nums[i]] != -1)
+ {
+ parno = nums[i];
+ break;
+ }
+ }
+ }
+ }
+ }
+ if ( !parno ) {
+ return 0;
+ } else {
+ GV *gv_paren;
+ SV *sv= sv_newmortal();
+ Perl_sv_setpvf(aTHX_ sv, "%"IVdf,(IV)parno);
+ gv_paren= Perl_gv_fetchsv(aTHX_ sv, GV_ADD, SVt_PVGV);
+ return GvSVn(gv_paren);
+ }
+}
+
#define DEBUG_PARSE_MSG(funcname) DEBUG_PARSE_r({ \
int rem=(int)(RExC_end - RExC_parse); \
int cut; \
paren = *RExC_parse++;
ret = NULL; /* For look-ahead/behind. */
switch (paren) {
+
case '<': /* (?<...) */
- RExC_seen |= REG_SEEN_LOOKBEHIND;
if (*RExC_parse == '!')
paren = ',';
- if (*RExC_parse != '=' && *RExC_parse != '!')
- goto unknown;
+ else if (*RExC_parse != '=')
+ { /* (?<...>) */
+ char *name_start;
+ paren= '>';
+ case '\'': /* (?'...') */
+ name_start= RExC_parse;
+ if (UTF) {
+ STRLEN numlen;
+ while(isIDFIRST_uni(utf8n_to_uvchr((U8*)RExC_parse,
+ RExC_end - RExC_parse,
+ &numlen, UTF8_ALLOW_DEFAULT)))
+ RExC_parse += numlen;
+ } else {
+ while(isIDFIRST(*RExC_parse))
+ RExC_parse++;
+ }
+ if (RExC_parse == name_start)
+ goto unknown;
+ if (*RExC_parse != paren)
+ vFAIL2("Sequence (?%c... not terminated",
+ paren=='>' ? '<' : paren);
+ if (SIZE_ONLY) {
+ SV *svname= Perl_newSVpvf(aTHX_ "%.*s",
+ (int)(RExC_parse - name_start), name_start);
+ HE *he_str;
+ SV *sv_dat;
+
+ if (!RExC_paren_names) {
+ RExC_paren_names= newHV();
+ sv_2mortal((SV*)RExC_paren_names);
+ }
+ he_str = hv_fetch_ent( RExC_paren_names, svname, 1, 0 );
+ if ( he_str ) {
+ sv_dat = HeVAL(he_str);
+ } else {
+ /* croak baby croak */
+ }
+ if (SvPOK(sv_dat)) {
+ IV count=SvIV(sv_dat);
+ I32 *pv=(I32*)SvGROW(sv_dat,SvCUR(sv_dat)+sizeof(I32)+1);
+ SvCUR_set(sv_dat,SvCUR(sv_dat)+sizeof(I32));
+ pv[count]=RExC_npar;
+ SvIVX(sv_dat)++;
+ } else {
+ (void)SvUPGRADE(sv_dat,SVt_PVNV);
+ sv_setpvn(sv_dat, (char *)&(RExC_npar), sizeof(I32));
+ SvIOK_on(sv_dat);
+ SvIVX(sv_dat)= 1;
+ }
+ /*sv_dump(sv_dat);*/
+ }
+ nextchar(pRExC_state);
+ paren = 1;
+ goto capturing_parens;
+ }
+ RExC_seen |= REG_SEEN_LOOKBEHIND;
RExC_parse++;
case '=': /* (?=...) */
case '!': /* (?!...) */
nextchar(pRExC_state);
*flagp = TRYAGAIN;
return NULL;
+ case '0' :
case 'R' :
if (*RExC_parse != ')')
FAIL("Sequence (?R) not terminated");
}
}
else { /* (...) */
+ capturing_parens:
parno = RExC_npar;
RExC_npar++;
ret = reganode(pRExC_state, OPEN, parno);
++RExC_parse;
ret= reg_namedseq(pRExC_state, NULL);
break;
+ case 'k':
+ {
+ char ch= RExC_parse[1];
+ if (ch != '<' && ch != '\'') {
+ if (SIZE_ONLY)
+ vWARN( RExC_parse + 1,
+ "Possible broken named back reference treated as literal k");
+ parse_start--;
+ goto defchar;
+ } else {
+ char* name_start = (RExC_parse += 2);
+ I32 num = 0;
+ ch= (ch == '<') ? '>' : '\'';
+
+ if (UTF) {
+ STRLEN numlen;
+ while(isIDFIRST_uni(utf8n_to_uvchr((U8*)RExC_parse,
+ RExC_end - RExC_parse,
+ &numlen, UTF8_ALLOW_DEFAULT)))
+ RExC_parse += numlen;
+ } else {
+ while(isIDFIRST(*RExC_parse))
+ RExC_parse++;
+ }
+ if (RExC_parse == name_start || *RExC_parse != ch)
+ vFAIL2("Sequence \\k%c... not terminated",
+ (ch == '>' ? '<' : ch));
+
+ RExC_sawback = 1;
+ ret = reganode(pRExC_state,
+ (U8)(FOLD ? (LOC ? NREFFL : NREFF) : NREF),
+ num);
+ *flagp |= HASWIDTH;
+
+
+ if (!SIZE_ONLY) {
+ SV *svname = Perl_newSVpvf(aTHX_ "%.*s",
+ (int)(RExC_parse - name_start), name_start);
+ HE *he_str;
+ SV *sv_dat;
+ if (UTF)
+ SvUTF8_on(svname);
+ he_str = hv_fetch_ent( RExC_paren_names, svname, 0, 0 );
+ SvREFCNT_dec(svname);
+ if ( he_str ) {
+ sv_dat = HeVAL(he_str);
+ } else {
+ vFAIL("Reference to nonexistent group");
+ }
+ num = add_data( pRExC_state, 1, "S" );
+ ARG_SET(ret,num);
+ RExC_rx->data->data[num]=(void*)sv_dat;
+ SvREFCNT_inc(sv_dat);
+ }
+ /* override incorrect value set in reganode MJD */
+ Set_Node_Offset(ret, parse_start+1);
+ Set_Node_Cur_Length(ret); /* MJD */
+ nextchar(pRExC_state);
+
+ }
+ break;
+ }
case 'n':
case 'r':
case 't':
SvREFCNT_dec(r->float_utf8);
Safefree(r->substrs);
}
+ if (r->paren_names)
+ SvREFCNT_dec(r->paren_names);
if (r->data) {
int n = r->data->count;
PAD* new_comppad = NULL;
/* If you add a ->what type here, update the comment in regcomp.h */
switch (r->data->what[n]) {
case 's':
+ case 'S':
SvREFCNT_dec((SV*)r->data->data[n]);
break;
case 'f':
#define sv_dup_inc(s,t) SvREFCNT_inc(sv_dup(s,t))
#define av_dup_inc(s,t) (AV*)SvREFCNT_inc(sv_dup((SV*)s,t))
+#define hv_dup_inc(s,t) (HV*)SvREFCNT_inc(sv_dup((SV*)s,t))
#define SAVEPVN(p,n) ((p) ? savepvn(p,n) : NULL)
/*
/* legal options are one of: sfpont
see also regcomp.h and pregfree() */
case 's':
+ case 'S':
d->data[i] = sv_dup_inc((SV *)r->data->data[i], param);
break;
case 'p':
ret->sublen = r->sublen;
ret->engine = r->engine;
+
+ ret->paren_names = hv_dup_inc(r->paren_names, param);
if (RX_MATCH_COPIED(ret))
ret->subbeg = SAVEPVN(r->subbeg, r->sublen);
* in the character class
* t - trie struct
* T - aho-trie struct
+ * S - sv for named capture lookup
* 20010712 mjd@plover.com
* (Remember to update re_dup() and pregfree() if you add any items.)
*/
#*Literals (33..35)
-EXACT EXACT, sv Match this string (preceded by length).
-EXACTF EXACT, sv Match this string, folded (prec. by length).
-EXACTFL EXACT, sv Match this string, folded in locale (w/len).
+EXACT EXACT, str Match this string (preceded by length).
+EXACTF EXACT, str Match this string, folded (prec. by length).
+EXACTFL EXACT, str Match this string, folded in locale (w/len).
#*Do nothing types (36..37)
AHOCORASICK TRIE, trie 1 Aho Corasick stclass. flags==type
AHOCORASICKC TRIE, trie charclass Same as AHOCORASICK, but with embedded charclass data
-#*Recursion (65)
+#*Recursion (65..66)
RECURSE RECURSE, num/ofs 2L recurse to paren arg1 at (signed) ofs arg2
SRECURSE RECURSE, no recurse to start of pattern
+#*Named references (67..69)
+NREF NREF, no-sv 1 Match some already matched string
+NREFF NREF, no-sv 1 Match already matched string, folded
+NREFFL NREF, no-sv 1 Match already matched string, folded in loc.
+
+
# NEW STUFF ABOVE THIS LINE -- Please update counts below.
################################################################################
-#*SPECIAL REGOPS (65, 66)
+#*SPECIAL REGOPS (70, 71)
# This is not really a node, but an optimized away piece of a "long" node.
# To simplify debugging output, we mark it as if it were a node
the same. */
restore_pos(aTHX_ prog);
}
+ if (prog->paren_names)
+ (void)hv_iterinit(prog->paren_names);
/* make sure $`, $&, $', and $digit will work later */
if ( !(flags & REXEC_NOT_FIRST) ) {
locinput++;
nextchr = UCHARAT(locinput);
break;
+
+ case NREFFL:
+ {
+ char *s;
+ char type = OP(scan);
+ PL_reg_flags |= RF_tainted;
+ /* FALL THROUGH */
+ case NREF:
+ case NREFF:
+ {
+ SV *sv_dat=(SV*)rex->data->data[ ARG( scan ) ];
+ I32 *nums=(I32*)SvPVX(sv_dat);
+ for ( n=0; n<SvIVX(sv_dat); n++ ) {
+ if ((I32)*PL_reglastparen >= nums[n] &&
+ PL_regstartp[nums[n]] != -1 &&
+ PL_regendp[nums[n]] != -1)
+ {
+ n = nums[n];
+ type = REF + ( type - NREF );
+ goto do_ref;
+ }
+ }
+ sayNO;
+ /* unreached */
+ }
case REFFL:
PL_reg_flags |= RF_tainted;
/* FALL THROUGH */
case REF:
- case REFF: {
- char *s;
+ case REFF:
n = ARG(scan); /* which paren pair */
+ type = OP(scan);
+ do_ref:
ln = PL_regstartp[n];
PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
if ((I32)*PL_reglastparen < n || ln == -1)
break;
s = PL_bostr + ln;
- if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
+ if (do_utf8 && type != REF) { /* REF can do byte comparison */
char *l = locinput;
const char *e = PL_bostr + PL_regendp[n];
/*
* in the 8-bit case (no pun intended) because in Unicode we
* have to map both upper and title case to lower case.
*/
- if (OP(scan) == REFF) {
+ if (type == REFF) {
while (s < e) {
STRLEN ulen1, ulen2;
U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
/* Inline the first character, for speed. */
if (UCHARAT(s) != nextchr &&
- (OP(scan) == REF ||
- (UCHARAT(s) != ((OP(scan) == REFF
- ? PL_fold : PL_fold_locale)[nextchr]))))
+ (type == REF ||
+ (UCHARAT(s) != (type == REFF
+ ? PL_fold : PL_fold_locale)[nextchr])))
sayNO;
ln = PL_regendp[n] - ln;
if (locinput + ln > PL_regeol)
sayNO;
- if (ln > 1 && (OP(scan) == REF
+ if (ln > 1 && (type == REF
? memNE(s, locinput, ln)
- : (OP(scan) == REFF
+ : (type == REFF
? ibcmp(s, locinput, ln)
: ibcmp_locale(s, locinput, ln))))
sayNO;
locinput += ln;
nextchr = UCHARAT(locinput);
break;
- }
-
+ }
case NOTHING:
case TAIL:
break;
U32 lastcloseparen; /* last paren matched */
U32 reganch; /* Internal use only +
Tainted information used by regexec? */
- const struct regexp_engine* engine;
+ HV *paren_names; /* Paren names */
+ const struct regexp_engine* engine;
regnode program[1]; /* Unwarranted chumminess with compiler. */
} regexp;
/* Regops and State definitions */
-#define REGNODE_MAX 68
-#define REGMATCH_STATE_MAX 98
+#define REGNODE_MAX 71
+#define REGMATCH_STATE_MAX 101
#define END 0 /* 0000 End of program. */
#define SUCCEED 1 /* 0x01 Return from a subroutine, basically. */
#define AHOCORASICKC 64 /* 0x40 Same as AHOCORASICK, but with embedded charclass data */
#define RECURSE 65 /* 0x41 recurse to paren arg1 at (signed) ofs arg2 */
#define SRECURSE 66 /* 0x42 recurse to start of pattern */
-#define OPTIMIZED 67 /* 0x43 Placeholder for dump. */
-#define PSEUDO 68 /* 0x44 Pseudo opcode for internal use. */
+#define NREF 67 /* 0x43 Match some already matched string */
+#define NREFF 68 /* 0x44 Match already matched string, folded */
+#define NREFFL 69 /* 0x45 Match already matched string, folded in loc. */
+#define OPTIMIZED 70 /* 0x46 Placeholder for dump. */
+#define PSEUDO 71 /* 0x47 Pseudo opcode for internal use. */
/* ------------ States ------------- */
-#define TRIE_next 69 /* 0x45 Regmatch state for TRIE */
-#define TRIE_next_fail 70 /* 0x46 Regmatch state for TRIE */
-#define EVAL_AB 71 /* 0x47 Regmatch state for EVAL */
-#define EVAL_AB_fail 72 /* 0x48 Regmatch state for EVAL */
-#define CURLYX_end 73 /* 0x49 Regmatch state for CURLYX */
-#define CURLYX_end_fail 74 /* 0x4a Regmatch state for CURLYX */
-#define WHILEM_A_pre 75 /* 0x4b Regmatch state for WHILEM */
-#define WHILEM_A_pre_fail 76 /* 0x4c Regmatch state for WHILEM */
-#define WHILEM_A_min 77 /* 0x4d Regmatch state for WHILEM */
-#define WHILEM_A_min_fail 78 /* 0x4e Regmatch state for WHILEM */
-#define WHILEM_A_max 79 /* 0x4f Regmatch state for WHILEM */
-#define WHILEM_A_max_fail 80 /* 0x50 Regmatch state for WHILEM */
-#define WHILEM_B_min 81 /* 0x51 Regmatch state for WHILEM */
-#define WHILEM_B_min_fail 82 /* 0x52 Regmatch state for WHILEM */
-#define WHILEM_B_max 83 /* 0x53 Regmatch state for WHILEM */
-#define WHILEM_B_max_fail 84 /* 0x54 Regmatch state for WHILEM */
-#define BRANCH_next 85 /* 0x55 Regmatch state for BRANCH */
-#define BRANCH_next_fail 86 /* 0x56 Regmatch state for BRANCH */
-#define CURLYM_A 87 /* 0x57 Regmatch state for CURLYM */
-#define CURLYM_A_fail 88 /* 0x58 Regmatch state for CURLYM */
-#define CURLYM_B 89 /* 0x59 Regmatch state for CURLYM */
-#define CURLYM_B_fail 90 /* 0x5a Regmatch state for CURLYM */
-#define IFMATCH_A 91 /* 0x5b Regmatch state for IFMATCH */
-#define IFMATCH_A_fail 92 /* 0x5c Regmatch state for IFMATCH */
-#define CURLY_B_min_known 93 /* 0x5d Regmatch state for CURLY */
-#define CURLY_B_min_known_fail 94 /* 0x5e Regmatch state for CURLY */
-#define CURLY_B_min 95 /* 0x5f Regmatch state for CURLY */
-#define CURLY_B_min_fail 96 /* 0x60 Regmatch state for CURLY */
-#define CURLY_B_max 97 /* 0x61 Regmatch state for CURLY */
-#define CURLY_B_max_fail 98 /* 0x62 Regmatch state for CURLY */
+#define TRIE_next 72 /* 0x48 Regmatch state for TRIE */
+#define TRIE_next_fail 73 /* 0x49 Regmatch state for TRIE */
+#define EVAL_AB 74 /* 0x4a Regmatch state for EVAL */
+#define EVAL_AB_fail 75 /* 0x4b Regmatch state for EVAL */
+#define CURLYX_end 76 /* 0x4c Regmatch state for CURLYX */
+#define CURLYX_end_fail 77 /* 0x4d Regmatch state for CURLYX */
+#define WHILEM_A_pre 78 /* 0x4e Regmatch state for WHILEM */
+#define WHILEM_A_pre_fail 79 /* 0x4f Regmatch state for WHILEM */
+#define WHILEM_A_min 80 /* 0x50 Regmatch state for WHILEM */
+#define WHILEM_A_min_fail 81 /* 0x51 Regmatch state for WHILEM */
+#define WHILEM_A_max 82 /* 0x52 Regmatch state for WHILEM */
+#define WHILEM_A_max_fail 83 /* 0x53 Regmatch state for WHILEM */
+#define WHILEM_B_min 84 /* 0x54 Regmatch state for WHILEM */
+#define WHILEM_B_min_fail 85 /* 0x55 Regmatch state for WHILEM */
+#define WHILEM_B_max 86 /* 0x56 Regmatch state for WHILEM */
+#define WHILEM_B_max_fail 87 /* 0x57 Regmatch state for WHILEM */
+#define BRANCH_next 88 /* 0x58 Regmatch state for BRANCH */
+#define BRANCH_next_fail 89 /* 0x59 Regmatch state for BRANCH */
+#define CURLYM_A 90 /* 0x5a Regmatch state for CURLYM */
+#define CURLYM_A_fail 91 /* 0x5b Regmatch state for CURLYM */
+#define CURLYM_B 92 /* 0x5c Regmatch state for CURLYM */
+#define CURLYM_B_fail 93 /* 0x5d Regmatch state for CURLYM */
+#define IFMATCH_A 94 /* 0x5e Regmatch state for IFMATCH */
+#define IFMATCH_A_fail 95 /* 0x5f Regmatch state for IFMATCH */
+#define CURLY_B_min_known 96 /* 0x60 Regmatch state for CURLY */
+#define CURLY_B_min_known_fail 97 /* 0x61 Regmatch state for CURLY */
+#define CURLY_B_min 98 /* 0x62 Regmatch state for CURLY */
+#define CURLY_B_min_fail 99 /* 0x63 Regmatch state for CURLY */
+#define CURLY_B_max 100 /* 0x64 Regmatch state for CURLY */
+#define CURLY_B_max_fail 101 /* 0x65 Regmatch state for CURLY */
/* PL_regkind[] What type of regop or state is this. */
TRIE, /* AHOCORASICKC */
RECURSE, /* RECURSE */
RECURSE, /* SRECURSE */
+ NREF, /* NREF */
+ NREF, /* NREFF */
+ NREF, /* NREFFL */
NOTHING, /* OPTIMIZED */
PSEUDO, /* PSEUDO */
/* ------------ States ------------- */
EXTRA_SIZE(struct regnode_charclass), /* AHOCORASICKC */
EXTRA_SIZE(struct regnode_2L), /* RECURSE */
0, /* SRECURSE */
+ EXTRA_SIZE(struct regnode_1), /* NREF */
+ EXTRA_SIZE(struct regnode_1), /* NREFF */
+ EXTRA_SIZE(struct regnode_1), /* NREFFL */
0, /* OPTIMIZED */
0, /* PSEUDO */
};
0, /* AHOCORASICKC */
0, /* RECURSE */
0, /* SRECURSE */
+ 0, /* NREF */
+ 0, /* NREFF */
+ 0, /* NREFFL */
0, /* OPTIMIZED */
0, /* PSEUDO */
};
"AHOCORASICKC", /* 0x40 */
"RECURSE", /* 0x41 */
"SRECURSE", /* 0x42 */
- "OPTIMIZED", /* 0x43 */
- "PSEUDO", /* 0x44 */
+ "NREF", /* 0x43 */
+ "NREFF", /* 0x44 */
+ "NREFFL", /* 0x45 */
+ "OPTIMIZED", /* 0x46 */
+ "PSEUDO", /* 0x47 */
/* ------------ States ------------- */
- "TRIE_next", /* 0x45 */
- "TRIE_next_fail", /* 0x46 */
- "EVAL_AB", /* 0x47 */
- "EVAL_AB_fail", /* 0x48 */
- "CURLYX_end", /* 0x49 */
- "CURLYX_end_fail", /* 0x4a */
- "WHILEM_A_pre", /* 0x4b */
- "WHILEM_A_pre_fail", /* 0x4c */
- "WHILEM_A_min", /* 0x4d */
- "WHILEM_A_min_fail", /* 0x4e */
- "WHILEM_A_max", /* 0x4f */
- "WHILEM_A_max_fail", /* 0x50 */
- "WHILEM_B_min", /* 0x51 */
- "WHILEM_B_min_fail", /* 0x52 */
- "WHILEM_B_max", /* 0x53 */
- "WHILEM_B_max_fail", /* 0x54 */
- "BRANCH_next", /* 0x55 */
- "BRANCH_next_fail", /* 0x56 */
- "CURLYM_A", /* 0x57 */
- "CURLYM_A_fail", /* 0x58 */
- "CURLYM_B", /* 0x59 */
- "CURLYM_B_fail", /* 0x5a */
- "IFMATCH_A", /* 0x5b */
- "IFMATCH_A_fail", /* 0x5c */
- "CURLY_B_min_known", /* 0x5d */
- "CURLY_B_min_known_fail", /* 0x5e */
- "CURLY_B_min", /* 0x5f */
- "CURLY_B_min_fail", /* 0x60 */
- "CURLY_B_max", /* 0x61 */
- "CURLY_B_max_fail", /* 0x62 */
+ "TRIE_next", /* 0x48 */
+ "TRIE_next_fail", /* 0x49 */
+ "EVAL_AB", /* 0x4a */
+ "EVAL_AB_fail", /* 0x4b */
+ "CURLYX_end", /* 0x4c */
+ "CURLYX_end_fail", /* 0x4d */
+ "WHILEM_A_pre", /* 0x4e */
+ "WHILEM_A_pre_fail", /* 0x4f */
+ "WHILEM_A_min", /* 0x50 */
+ "WHILEM_A_min_fail", /* 0x51 */
+ "WHILEM_A_max", /* 0x52 */
+ "WHILEM_A_max_fail", /* 0x53 */
+ "WHILEM_B_min", /* 0x54 */
+ "WHILEM_B_min_fail", /* 0x55 */
+ "WHILEM_B_max", /* 0x56 */
+ "WHILEM_B_max_fail", /* 0x57 */
+ "BRANCH_next", /* 0x58 */
+ "BRANCH_next_fail", /* 0x59 */
+ "CURLYM_A", /* 0x5a */
+ "CURLYM_A_fail", /* 0x5b */
+ "CURLYM_B", /* 0x5c */
+ "CURLYM_B_fail", /* 0x5d */
+ "IFMATCH_A", /* 0x5e */
+ "IFMATCH_A_fail", /* 0x5f */
+ "CURLY_B_min_known", /* 0x60 */
+ "CURLY_B_min_known_fail", /* 0x61 */
+ "CURLY_B_min", /* 0x62 */
+ "CURLY_B_min_fail", /* 0x63 */
+ "CURLY_B_max", /* 0x64 */
+ "CURLY_B_max_fail", /* 0x65 */
};
#endif /* DEBUGGING */
#else
case PERL_MAGIC_regdata:
vtable = &PL_vtbl_regdata;
break;
+ case PERL_MAGIC_regdata_names:
+ vtable = &PL_vtbl_regdata_names;
+ break;
case PERL_MAGIC_regdatum:
vtable = &PL_vtbl_regdatum;
break;
}
}
+{
+ my $s='123453456';
+ $s=~s/(?<digits>\d+)\k<digits>/$+{digits}/;
+ ok($s eq '123456','Named capture (angle brackets) s///');
+ $s='123453456';
+ $s=~s/(?'digits'\d+)\k'digits'/$+{digits}/;
+ ok($s eq '123456','Named capture (single quotes) s///');
+}
+{
+ my $s='foo bar baz';
+ my (@k,@v,$count);
+ if ($s=~/(?<A>foo)\s+(?<B>bar)?\s+(?<C>baz)/) {
+ while (my ($k,$v)=each(%+)) {
+ $count++;
+ }
+ @k=sort keys(%+);
+ @v=sort values(%+);
+ }
+ ok($count==3,"Got 3 keys in %+ via each ($count)");
+ ok(@k == 3, 'Got 3 keys in %+ via keys');
+ ok("@k" eq "A B C", "Got expected keys");
+ ok("@v" eq "bar baz foo", "Got expected values");
+}
+
+
# stress test CURLYX/WHILEM.
#
# This test includes varying levels of nesting, and according to
or print "# Unexpected outcome: should pass or crash perl\n";
# Don't forget to update this!
-BEGIN{print "1..1264\n"};
+BEGIN{print "1..1270\n"};
^(<(?:[^<>]+|(?1))*>)$ <<><<<><>>>> y $1 <<><<<><>>>>
((?2)*)([fF]o+) fooFoFoo y $1-$2 fooFo-Foo
(<(?:[^<>]+|(?R))*>) <<><<<><>>>> y $1 <<><<<><>>>>
+(?<n>foo|bar|baz) snofooewa y $1 foo
+(?<n>foo|bar|baz) snofooewa y $+{n} foo
+(?<n>foo|bar|baz)(?<m>[ew]+) snofooewa y $+{n} foo
+(?<n>foo|bar|baz)(?<m>[ew]+) snofooewa y $+{m} ew
+(?<n>foo)|(?<n>bar)|(?<n>baz) snofooewa y $+{n} foo
+(?<n>foo)(??{ $+{n} }) snofooefoofoowaa y $+{n} foo
+/(?'n'foo|bar|baz)/ snofooewa y $1 foo
+/(?'n'foo|bar|baz)/ snofooewa y $+{n} foo
+/(?'n'foo|bar|baz)(?'m'[ew]+)/ snofooewa y $+{n} foo
+/(?'n'foo|bar|baz)(?'m'[ew]+)/ snofooewa y $+{m} ew
+/(?'n'foo)|(?'n'bar)|(?<n>baz)/ snobazewa y $+{n} baz
+/(?'n'foo)(??{ $+{n} })/ snofooefoofoowaa y $+{n} foo
+/(?'n'foo)\k<n>/ ..foofoo.. y $1 foo
+/(?'n'foo)\k<n>/ ..foofoo.. y $+{n} foo
+/(?<n>foo)\k'n'/ ..foofoo.. y $1 foo
+/(?<n>foo)\k'n'/ ..foofoo.. y $+{n} foo
+/(?:(?<n>foo)|(?<n>bar))\k<n>/ ..barbar.. y $+{n} bar
$input = join(':',$pat,$subject,$result,$repl,$expect);
infty_subst(\$pat);
infty_subst(\$expect);
- $pat = "'$pat'" unless $pat =~ /^[:']/;
+ $pat = "'$pat'" unless $pat =~ /^[:'\/]/;
$pat =~ s/(\$\{\w+\})/$1/eeg;
$pat =~ s/\\n/\n/g;
$subject = eval qq("$subject");
const char * const leaveit = /* set of acceptably-backslashed characters */
(const char *)
(PL_lex_inpat
- ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-Nnrtfeaxcz0123456789[{]} \t\n\r\f\v#"
+ ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-Nnrktfeaxcz0123456789[{]} \t\n\r\f\v#"
: "");
if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {