Es |regnode*|reg_namedseq |NN struct RExC_state_t *state|NULLOK UV *valuep
Es |void |reginsert |NN struct RExC_state_t *state|U8 op|NN regnode *opnd|U32 depth
Es |void |regtail |NN struct RExC_state_t *state|NN regnode *p|NN const regnode *val|U32 depth
+Es |SV * |reg_scan_name |NN struct RExC_state_t *state|U32 flags
Es |U32 |join_exact |NN struct RExC_state_t *state|NN regnode *scan|NN I32 *min|U32 flags|NULLOK regnode *val|U32 depth
EsRn |char* |regwhite |NN char *p|NN const char *e
Es |char* |nextchar |NN struct RExC_state_t *state
#define reg_namedseq S_reg_namedseq
#define reginsert S_reginsert
#define regtail S_regtail
+#define reg_scan_name S_reg_scan_name
#define join_exact S_join_exact
#define regwhite S_regwhite
#define nextchar S_nextchar
#define reg_namedseq(a,b) S_reg_namedseq(aTHX_ a,b)
#define reginsert(a,b,c,d) S_reginsert(aTHX_ a,b,c,d)
#define regtail(a,b,c,d) S_regtail(aTHX_ a,b,c,d)
+#define reg_scan_name(a,b) S_reg_scan_name(aTHX_ a,b)
#define join_exact(a,b,c,d,e,f) S_join_exact(aTHX_ a,b,c,d,e,f)
#define regwhite S_regwhite
#define nextchar(a) S_nextchar(aTHX_ a)
if ($@) {
$ENV{PERL_RE_COLORS}||=qq'\t\t> <\t> <\t\t'
}
-
+
}
my %flags = (
);
$flags{ALL} = -1;
$flags{All} = $flags{all} = $flags{DUMP} | $flags{EXECUTE};
+$flags{Extra} = $flags{EXECUTE} | $flags{COMPILE};
$flags{More} = $flags{MORE} = $flags{All} | $flags{TRIEC} | $flags{TRIEM} | $flags{STATE};
$flags{State} = $flags{DUMP} | $flags{EXECUTE} | $flags{STATE};
$flags{TRIE} = $flags{DUMP} | $flags{EXECUTE} | $flags{TRIEC};
-my $installed =eval {
- require XSLoader;
- XSLoader::load('re');
- install();
-};
+my $installed;
sub _load_unload {
my ($on)= @_;
if ($on) {
- die "'re' not installed!?" unless $installed;
- #warn "installed: $installed\n";
- install(); # allow for changes in colors
- $^H{regcomp}= $installed;
+ if ( ! defined($installed) ) {
+ require XSLoader;
+ XSLoader::load('re');
+ $installed = install() || 0;
+ }
+ if ( ! $installed ) {
+ die "'re' not installed!?";
+ } else {
+ # We could just say = $installed; but then we wouldn't
+ # "see" any changes to the color environment var.
+
+ # install() returns an integer, which if casted properly
+ # in C resolves to a structure containing the regex
+ # hooks. Setting it to a random integer will guarantee
+ # segfaults.
+ $^H{regcomp} = install();
+ }
} else {
delete $^H{regcomp};
}
result in a fatal error. The maximum depth is compiled into perl, so
changing it requires a custom build.
-=item C<(?PARNO)> C<(?R)>
-
-X<(?PARNO)> X<(?1)>
+=item C<(?PARNO)> C<(?R)> C<(?0)>
+X<(?PARNO)> X<(?1)> X<(?R)> X<(?0)>
X<regex, recursive> X<regexp, recursive> X<regular expression, recursive>
B<WARNING>: This extended regular expression feature is considered
contained by the pattern will have the value as determined by the
outermost recursion.
-PARNO is a sequence of digits not starting with 0 whose value
-reflects the paren-number of the capture buffer to recurse to.
-C<(?R)> curses to the beginning of the pattern.
+PARNO is a sequence of digits (not starting with 0) whose value reflects
+the paren-number of the capture buffer to recurse to. C<(?R)> recurses to
+the beginning of the whole pattern. C<(?0)> is an alternate syntax for
+C<(?R)>.
The following pattern matches a function foo() which may contain
balanced parenthesis as the argument.
as atomic. Also, constructs like (?i:(?1)) or (?:(?i)(?1)) do not affect
the pattern being recursed into.
+=item C<(?&NAME)>
+X<(?&NAME)>
+
+Recurse to a named subpattern. Identical to (?PARNO) except that the
+parenthesis to recurse to is determined by name. If multiple parens have
+the same name, then it recurses to the leftmost.
+
+It is an error to refer to a name that is not declared somewhere in the
+pattern.
+
=item C<< (?>pattern) >>
X<backtrack> X<backtracking> X<atomic> X<possessive>
__attribute__nonnull__(pTHX_2)
__attribute__nonnull__(pTHX_3);
+STATIC SV * S_reg_scan_name(pTHX_ struct RExC_state_t *state, U32 flags)
+ __attribute__nonnull__(pTHX_1);
+
STATIC U32 S_join_exact(pTHX_ struct RExC_state_t *state, regnode *scan, I32 *min, U32 flags, regnode *val, U32 depth)
__attribute__nonnull__(pTHX_1)
__attribute__nonnull__(pTHX_2)
}
}
+/* Scans the name of a named buffer from the pattern.
+ * If flags is true then returns an SV containing the name.
+ */
+STATIC SV*
+S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags) {
+ char *name_start = RExC_parse;
+ if (UTF) {
+ STRLEN numlen;
+ while (isIDFIRST_uni(utf8n_to_uvchr((U8*)RExC_parse,
+ RExC_end - RExC_parse,
+ &numlen, UTF8_ALLOW_DEFAULT)))
+ RExC_parse += numlen;
+ }
+ else {
+ while (isIDFIRST(*RExC_parse))
+ RExC_parse++;
+ }
+ if (flags) {
+ SV* svname = sv_2mortal(Perl_newSVpvn(aTHX_ name_start,
+ (int)(RExC_parse - name_start)));
+ if (UTF)
+ SvUTF8_on(svname);
+ return svname;
+ }
+ else {
+ return NULL;
+ }
+}
+
#define DEBUG_PARSE_MSG(funcname) DEBUG_PARSE_r({ \
int rem=(int)(RExC_end - RExC_parse); \
int cut; \
paren = *RExC_parse++;
ret = NULL; /* For look-ahead/behind. */
switch (paren) {
-
+
case '<': /* (?<...) */
if (*RExC_parse == '!')
paren = ',';
- else if (*RExC_parse != '=')
- { /* (?<...>) */
+ else if (*RExC_parse != '=') { /* (?<...>) */
char *name_start;
+ SV *svname;
paren= '>';
case '\'': /* (?'...') */
name_start= RExC_parse;
- if (UTF) {
- STRLEN numlen;
- while(isIDFIRST_uni(utf8n_to_uvchr((U8*)RExC_parse,
- RExC_end - RExC_parse,
- &numlen, UTF8_ALLOW_DEFAULT)))
- RExC_parse += numlen;
- } else {
- while(isIDFIRST(*RExC_parse))
- RExC_parse++;
- }
+ svname = reg_scan_name(pRExC_state,SIZE_ONLY);
if (RExC_parse == name_start)
goto unknown;
if (*RExC_parse != paren)
vFAIL2("Sequence (?%c... not terminated",
paren=='>' ? '<' : paren);
if (SIZE_ONLY) {
- SV *svname= Perl_newSVpvf(aTHX_ "%.*s",
- (int)(RExC_parse - name_start), name_start);
HE *he_str;
SV *sv_dat = NULL;
-
+ if (!svname) /* shouldnt happen */
+ Perl_croak(aTHX_
+ "panic: reg_scan_name returned NULL");
if (!RExC_paren_names) {
RExC_paren_names= newHV();
sv_2mortal((SV*)RExC_paren_names);
nextchar(pRExC_state);
*flagp = TRYAGAIN;
return NULL;
- case '0' :
- case 'R' :
- if (*RExC_parse != ')')
+ case '0' : /* (?0) */
+ case 'R' : /* (?R) */
+ if (*RExC_parse != ')')
FAIL("Sequence (?R) not terminated");
reg_node(pRExC_state, SRECURSE);
- break;
+ break; /* (?PARNO) */
+ { /* named and numeric backreferences */
+ I32 num;
+ char * parse_start;
+ case '&': /* (?&NAME) */
+ parse_start = RExC_parse - 1;
+ {
+ char *name_start = RExC_parse;
+ SV *svname = reg_scan_name(pRExC_state, !SIZE_ONLY);
+ if (RExC_parse == name_start)
+ goto unknown;
+ if (*RExC_parse != ')')
+ vFAIL("Expecting close bracket");
+ if (!SIZE_ONLY) {
+ HE *he_str = NULL;
+ SV *sv_dat;
+ if (!svname) /* shouldn't happen*/
+ Perl_croak(aTHX_ "panic: reg_scan_name returned NULL");
+ if (RExC_paren_names)
+ he_str = hv_fetch_ent( RExC_paren_names, svname, 0, 0 );
+ if (he_str)
+ sv_dat = HeVAL(he_str);
+ else
+ vFAIL("Reference to nonexistent group");
+ num = *((I32 *)SvPVX(sv_dat));
+ } else {
+ num = 0;
+ }
+ }
+ goto gen_recurse_regop;
+ /* NOT REACHED */
case '1': case '2': case '3': case '4': /* (?1) */
case '5': case '6': case '7': case '8': case '9':
RExC_parse--;
- {
- const I32 num = atoi(RExC_parse);
- char * const parse_start = RExC_parse - 1; /* MJD */
+ num = atoi(RExC_parse);
+ parse_start = RExC_parse - 1; /* MJD */
while (isDIGIT(*RExC_parse))
RExC_parse++;
if (*RExC_parse!=')')
vFAIL("Expecting close bracket");
+
+ gen_recurse_regop:
ret = reganode(pRExC_state, RECURSE, num);
if (!SIZE_ONLY) {
if (num > (I32)RExC_rx->nparens) {
RExC_emit++;
DEBUG_OPTIMISE_MORE_r(PerlIO_printf(Perl_debug_log,
"Recurse #%"UVuf" to %"IVdf"\n", ARG(ret), ARG2L(ret)));
- } else{
+ } else {
RExC_size++;
RExC_seen|=REG_SEEN_RECURSE;
}
nextchar(pRExC_state);
return ret;
- }
+ } /* named and numeric backreferences */
+ /* NOT REACHED */
+
case 'p': /* (?p...) */
if (SIZE_ONLY && ckWARN2(WARN_DEPRECATED, WARN_REGEXP))
vWARNdep(RExC_parse, "(?p{}) is deprecated - use (??{})");
} else {
char* name_start = (RExC_parse += 2);
I32 num = 0;
- ch= (ch == '<') ? '>' : '\'';
-
- if (UTF) {
- STRLEN numlen;
- while(isIDFIRST_uni(utf8n_to_uvchr((U8*)RExC_parse,
- RExC_end - RExC_parse,
- &numlen, UTF8_ALLOW_DEFAULT)))
- RExC_parse += numlen;
- } else {
- while(isIDFIRST(*RExC_parse))
- RExC_parse++;
- }
+ SV *svname = reg_scan_name(pRExC_state,!SIZE_ONLY);
+ ch= (ch == '<') ? '>' : '\'';
+
if (RExC_parse == name_start || *RExC_parse != ch)
vFAIL2("Sequence \\k%c... not terminated",
(ch == '>' ? '<' : ch));
if (!SIZE_ONLY) {
- SV *svname = Perl_newSVpvf(aTHX_ "%.*s",
- (int)(RExC_parse - name_start), name_start);
- HE *he_str;
+ HE *he_str = NULL;
SV *sv_dat;
- if (UTF)
- SvUTF8_on(svname);
- he_str = hv_fetch_ent( RExC_paren_names, svname, 0, 0 );
- SvREFCNT_dec(svname);
+ if (!svname)
+ Perl_croak(aTHX_
+ "panic: reg_scan_name returned NULL");
+ if (RExC_paren_names)
+ he_str = hv_fetch_ent( RExC_paren_names, svname, 0, 0 );
if ( he_str ) {
sv_dat = HeVAL(he_str);
} else {
/(?<n>foo)\k'n'/ ..foofoo.. y $1 foo
/(?<n>foo)\k'n'/ ..foofoo.. y $+{n} foo
/(?:(?<n>foo)|(?<n>bar))\k<n>/ ..barbar.. y $+{n} bar
+/^(?'main'<(?:[^<>]+|(?&crap)|(?&main))*>)(?'empty')(?'crap'!>!>!>)$/ <<!>!>!>><>>!>!>!> y $+{main} <<!>!>!>><>>
+/^(?'main'<(?:[^<>]+|(?&main))*>)$/ <<><<<><>>>> y $1 <<><<<><>>>>
+/(?'first'(?&second)*)(?'second'[fF]o+)/ fooFoFoo y $+{first}-$+{second} fooFo-Foo