#define PL_ofslen (PL_curinterp->Tofslen)
#define PL_op (PL_curinterp->Top)
#define PL_opsave (PL_curinterp->Topsave)
+#define PL_reg_call_cc (PL_curinterp->Treg_call_cc)
#define PL_reg_eval_set (PL_curinterp->Treg_eval_set)
#define PL_reg_flags (PL_curinterp->Treg_flags)
+#define PL_reg_re (PL_curinterp->Treg_re)
#define PL_reg_start_tmp (PL_curinterp->Treg_start_tmp)
#define PL_reg_start_tmpl (PL_curinterp->Treg_start_tmpl)
#define PL_regbol (PL_curinterp->Tregbol)
#define PL_Tofslen PL_ofslen
#define PL_Top PL_op
#define PL_Topsave PL_opsave
+#define PL_Treg_call_cc PL_reg_call_cc
#define PL_Treg_eval_set PL_reg_eval_set
#define PL_Treg_flags PL_reg_flags
+#define PL_Treg_re PL_reg_re
#define PL_Treg_start_tmp PL_reg_start_tmp
#define PL_Treg_start_tmpl PL_reg_start_tmpl
#define PL_Tregbol PL_regbol
#define PL_ofslen (thr->Tofslen)
#define PL_op (thr->Top)
#define PL_opsave (thr->Topsave)
+#define PL_reg_call_cc (thr->Treg_call_cc)
#define PL_reg_eval_set (thr->Treg_eval_set)
#define PL_reg_flags (thr->Treg_flags)
+#define PL_reg_re (thr->Treg_re)
#define PL_reg_start_tmp (thr->Treg_start_tmp)
#define PL_reg_start_tmpl (thr->Treg_start_tmpl)
#define PL_regbol (thr->Tregbol)
#define PL_regbol pPerl->PL_regbol
#undef PL_regcc
#define PL_regcc pPerl->PL_regcc
+#undef PL_reg_call_cc
+#define PL_reg_call_cc pPerl->PL_reg_call_cc
+#undef PL_reg_re
+#define PL_reg_re pPerl->PL_reg_re
#undef PL_regcode
#define PL_regcode pPerl->PL_regcode
#undef PL_regcomp_parse
I<with tainting enabled>, one needs both C<use re 'eval'> and untaint
the $re.
+=item C<(?p{ code })>
+
+I<Very experimental> "postponed" regular subexpression. C<code> is evaluated
+at runtime, at the moment this subexpression may match. The result of
+evaluation is considered as a regular expression, and matched as if it
+were inserted instead of this construct.
+
+C<code> is not interpolated. Currently the rules to
+determine where the C<code> ends are somewhat convoluted.
+
+The following regular expression matches matching parenthesized group:
+
+ $re = qr{
+ \(
+ (?:
+ (?> [^()]+ ) # Non-parens without backtracking
+ |
+ (?p{ $re }) # Group with matching parens
+ )*
+ \)
+ }x;
+
=item C<(?E<gt>pattern)>
An "independent" subexpression. Matches the substring that a
if (data)
data->flags |= SF_HAS_EVAL;
}
+ else if (OP(scan) == LOGICAL && scan->flags == 2) { /* Embedded */
+ if (flags & SCF_DO_SUBSTR) {
+ scan_commit(data);
+ data->longest = &(data->longest_float);
+ }
+ is_inf = is_inf_internal = 1;
+ }
/* Else: zero-length, ignore. */
scan = regnext(scan);
}
if (*PL_regcomp_parse == '?') {
U16 posflags = 0, negflags = 0;
U16 *flagsp = &posflags;
+ int logical = 0;
PL_regcomp_parse++;
paren = *PL_regcomp_parse++;
nextchar();
*flagp = TRYAGAIN;
return NULL;
+ case 'p':
+ logical = 1;
+ paren = *PL_regcomp_parse++;
+ /* FALL THROUGH */
case '{':
{
dTHR;
}
nextchar();
+ if (logical) {
+ ret = reg_node(LOGICAL);
+ if (!SIZE_ONLY)
+ ret->flags = 2;
+ regtail(ret, reganode(EVAL, n));
+ return ret;
+ }
return reganode(EVAL, n);
}
case '(':
I32 flag;
ret = reg_node(LOGICAL);
+ if (!SIZE_ONLY)
+ ret->flags = 1;
regtail(ret, reg(1, &flag));
goto insert_if;
}
sv_catpvf(sv, "GROUPP%d", ARG(o));
break;
case LOGICAL:
- p = "LOGICAL";
+ sv_catpvf(sv, "LOGICAL[%d]", o->flags);
break;
case SUSPEND:
p = "SUSPEND";
*/
#include "EXTERN.h"
#include "perl.h"
+typedef MAGIC *my_magic;
+
#include "regcomp.h"
#define RF_tainted 1 /* tainted information used? */
return input;
}
+STATIC char *
+regcp_set_to(I32 ss)
+{
+ I32 tmp = PL_savestack_ix;
+
+ PL_savestack_ix = ss;
+ regcppop();
+ PL_savestack_ix = tmp;
+}
+
+typedef struct re_cc_state
+{
+ I32 ss;
+ regnode *node;
+ struct re_cc_state *prev;
+ CURCUR *cc;
+ regexp *re;
+} re_cc_state;
+
#define regcpblow(cp) LEAVE_SCOPE(cp)
/*
regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
nosave ? 0 : REXEC_COPY_STR);
}
+
+STATIC void
+cache_re(regexp *prog)
+{
+ PL_regprecomp = prog->precomp; /* Needed for FAIL. */
+#ifdef DEBUGGING
+ PL_regprogram = prog->program;
+#endif
+ PL_regnpar = prog->nparens;
+ PL_regdata = prog->data;
+ PL_reg_re = prog;
+}
/*
- regexec_flags - match a regexp against a string
cc.oldcc = 0;
PL_regcc = &cc;
- PL_regprecomp = prog->precomp; /* Needed for error messages. */
+ cache_re(prog);
#ifdef DEBUGGING
PL_regnarrate = PL_debug & 512;
- PL_regprogram = prog->program;
#endif
/* Be paranoid... */
FAIL("corrupted regexp program");
}
- PL_regnpar = prog->nparens;
PL_reg_flags = 0;
PL_reg_eval_set = 0;
/* see how far we have to get to not match where we matched before */
PL_regtill = startpos+minend;
+ /* We start without call_cc context. */
+ PL_reg_call_cc = 0;
+
/* If there is a "must appear" string, look for it. */
s = startpos;
if (!(flags & REXEC_CHECKED)
(strend - startpos > 60 ? "..." : ""))
);
- PL_regdata = prog->data;
-
/* Simplest case: anchored match need be tried only once. */
/* [unless only anchor is BOL and multiline is set] */
if (prog->reganch & ROPT_ANCH) {
ret = POPs;
PUTBACK;
+ PL_op = oop;
+ PL_curpad = ocurpad;
+ PL_curcop = ocurcop;
if (logical) {
- logical = 0;
+ if (logical == 2) { /* Postponed subexpression. */
+ regexp *re;
+ my_magic mg = Null(my_magic);
+ re_cc_state state;
+ CURCUR cctmp;
+ CHECKPOINT cp, lastcp;
+
+ if(SvROK(ret) || SvRMAGICAL(ret)) {
+ SV *sv = SvROK(ret) ? SvRV(ret) : ret;
+
+ if(SvMAGICAL(sv))
+ mg = mg_find(sv, 'r');
+ }
+ if (mg) {
+ re = (regexp *)mg->mg_obj;
+ ReREFCNT_inc(re);
+ }
+ else {
+ STRLEN len;
+ char *t = SvPV(ret, len);
+ PMOP pm;
+ char *oprecomp = PL_regprecomp;
+ I32 osize = PL_regsize;
+ I32 onpar = PL_regnpar;
+
+ pm.op_pmflags = 0;
+ re = CALLREGCOMP(t, t + len, &pm);
+ if (!(SvFLAGS(ret)
+ & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
+ sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
+ PL_regprecomp = oprecomp;
+ PL_regsize = osize;
+ PL_regnpar = onpar;
+ }
+ DEBUG_r(
+ PerlIO_printf(Perl_debug_log,
+ "Entering embedded `%s%.60s%s%s'\n",
+ PL_colors[0],
+ re->precomp,
+ PL_colors[1],
+ (strlen(re->precomp) > 60 ? "..." : ""))
+ );
+ state.node = next;
+ state.prev = PL_reg_call_cc;
+ state.cc = PL_regcc;
+ state.re = PL_reg_re;
+
+ cctmp.cur = 0;
+ cctmp.oldcc = 0;
+ PL_regcc = &cctmp;
+
+ cp = regcppush(0); /* Save *all* the positions. */
+ REGCP_SET;
+ cache_re(re);
+ state.ss = PL_savestack_ix;
+ *PL_reglastparen = 0;
+ PL_reg_call_cc = &state;
+ PL_reginput = locinput;
+ if (regmatch(re->program + 1)) {
+ ReREFCNT_dec(re);
+ regcpblow(cp);
+ sayYES;
+ }
+ DEBUG_r(
+ PerlIO_printf(Perl_debug_log,
+ "%*s failed...\n",
+ REPORT_CODE_OFF+PL_regindent*2, "")
+ );
+ ReREFCNT_dec(re);
+ REGCP_UNWIND;
+ regcppop();
+ PL_reg_call_cc = state.prev;
+ PL_regcc = state.cc;
+ PL_reg_re = state.re;
+ sayNO;
+ }
sw = SvTRUE(ret);
+ logical = 0;
}
else
sv_setsv(save_scalar(PL_replgv), ret);
- PL_op = oop;
- PL_curpad = ocurpad;
- PL_curcop = ocurcop;
break;
}
case OPEN:
}
break;
case LOGICAL:
- logical = 1;
+ logical = scan->flags;
break;
case CURLYX: {
CURCUR cc;
sayNO;
break;
case END:
+ if (PL_reg_call_cc) {
+ re_cc_state *cur_call_cc = PL_reg_call_cc;
+ CURCUR *cctmp = PL_regcc;
+ regexp *re = PL_reg_re;
+ CHECKPOINT cp, lastcp;
+
+ cp = regcppush(0); /* Save *all* the positions. */
+ REGCP_SET;
+ regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
+ the caller. */
+ PL_reginput = locinput; /* Make position available to
+ the callcc. */
+ cache_re(PL_reg_call_cc->re);
+ PL_regcc = PL_reg_call_cc->cc;
+ PL_reg_call_cc = PL_reg_call_cc->prev;
+ if (regmatch(cur_call_cc->node)) {
+ PL_reg_call_cc = cur_call_cc;
+ regcpblow(cp);
+ sayYES;
+ }
+ REGCP_UNWIND;
+ regcppop();
+ PL_reg_call_cc = cur_call_cc;
+ PL_regcc = cctmp;
+ PL_reg_re = re;
+ cache_re(re);
+
+ DEBUG_r(
+ PerlIO_printf(Perl_debug_log,
+ "%*s continuation failed...\n",
+ REPORT_CODE_OFF+PL_regindent*2, "")
+ );
+ sayNO;
+ }
if (locinput < PL_regtill)
sayNO; /* Cannot match: too short. */
/* Fall through */
# the format supported by op/regexp.t. If you want to add a test
# that does fit that format, add it to op/re_tests, not here.
-print "1..158\n";
+print "1..161\n";
BEGIN {
chdir 't' if -d 't';
/xg;
}
+@ans = ();
push @ans, $res while $res = matchit;
print "# ans='@ans'\n# expect='$expect'\nnot " if "@ans" ne "1 1 1";
print "ok $test\n";
$test++;
+my $matched;
+$matched = qr/\((?:(?>[^()]+)|(?p{$matched}))*\)/;
+
+@ans = @ans1 = ();
+push(@ans, $res), push(@ans1, $&) while $res = m/$matched/g;
+
+print "# ans='@ans'\n# expect='$expect'\nnot " if "@ans" ne "1 1 1";
+print "ok $test\n";
+$test++;
+
+print "# ans1='@ans1'\n# expect='$expect'\nnot " if "@ans1" ne $expect;
+print "ok $test\n";
+$test++;
+
+@ans = m/$matched/g;
+
+print "# ans='@ans'\n# expect='$expect'\nnot " if "@ans" ne $expect;
+print "ok $test\n";
+$test++;
+
@ans = ('a/b' =~ m%(.*/)?(.*)%); # Stack may be bad
print "not " if "@ans" ne 'a/ b';
print "ok $test\n";
b\z a\nb y - -
(^|x)(c) ca y $2 c
a*abc?xyz+pqr{3}ab{2,}xy{4,5}pq{0,6}AB{0,}zz x n - -
+a(?{$a=2;$b=3;($b)=$a})b yabz y $b 2
PERLVAR(Tregprogram, regnode *) /* from regexec.c */
PERLVARI(Tregindent, int, 0) /* from regexec.c */
PERLVAR(Tregcc, CURCUR *) /* from regexec.c */
+PERLVAR(Treg_call_cc, struct re_cc_state *) /* from regexec.c */
+PERLVAR(Treg_re, regexp *) /* from regexec.c */
PERLVARI(Tregcompp, regcomp_t, FUNC_NAME_TO_PTR(pregcomp))
/* Pointer to RE compiler */
/* if we get here, we're not doing a transliteration */
- /* skip for regexp comments /(?#comment)/ */
+ /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
+ except for the last char, which will be done separately. */
else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
if (s[2] == '#') {
while (s < send && *s != ')')
*d++ = *s++;
- } else if (s[2] == '{') { /* This should march regcomp.c */
+ } else if (s[2] == '{'
+ || s[2] == 'p' && s[3] == '{') { /* This should march regcomp.c */
I32 count = 1;
- char *regparse = s + 3;
+ char *regparse = s + (s[2] == '{' ? 3 : 4);
char c;
while (count && (c = *regparse)) {
count--;
regparse++;
}
- if (*regparse == ')')
- regparse++;
- else
+ if (*regparse != ')')
yyerror("Sequence (?{...}) not terminated or not {}-balanced");
- while (s < regparse && *s != ')')
+ while (s < regparse)
*d++ = *s++;
}
}