if (paren_elems_to_push < 0)
Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
-#define REGCP_OTHER_ELEMS 8
+#define REGCP_OTHER_ELEMS 7
SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
for (p = PL_regsize; p > parenfloor; p--) {
/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
- SSPUSHINT(PL_regendp[p]);
- SSPUSHINT(PL_regstartp[p]);
+ SSPUSHINT(PL_regoffs[p].end);
+ SSPUSHINT(PL_regoffs[p].start);
SSPUSHPTR(PL_reg_start_tmp[p]);
SSPUSHINT(p);
DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log,
" saving \\%"UVuf" %"IVdf"(%"IVdf")..%"IVdf"\n",
- (UV)p, (IV)PL_regstartp[p],
+ (UV)p, (IV)PL_regoffs[p].start,
(IV)(PL_reg_start_tmp[p] - PL_bostr),
- (IV)PL_regendp[p]
+ (IV)PL_regoffs[p].end
));
}
/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
- SSPUSHPTR(PL_regstartp);
- SSPUSHPTR(PL_regendp);
+ SSPUSHPTR(PL_regoffs);
SSPUSHINT(PL_regsize);
SSPUSHINT(*PL_reglastparen);
SSPUSHINT(*PL_reglastcloseparen);
dVAR;
U32 i;
char *input;
-
GET_RE_DEBUG_FLAGS_DECL;
+ PERL_ARGS_ASSERT_REGCPPOP;
+
/* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
i = SSPOPINT;
assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
*PL_reglastcloseparen = SSPOPINT;
*PL_reglastparen = SSPOPINT;
PL_regsize = SSPOPINT;
- PL_regendp=(I32 *) SSPOPPTR;
- PL_regstartp=(I32 *) SSPOPPTR;
+ PL_regoffs=(regexp_paren_pair *) SSPOPPTR;
/* Now restore the parentheses context. */
I32 tmps;
U32 paren = (U32)SSPOPINT;
PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
- PL_regstartp[paren] = SSPOPINT;
+ PL_regoffs[paren].start = SSPOPINT;
tmps = SSPOPINT;
if (paren <= *PL_reglastparen)
- PL_regendp[paren] = tmps;
+ PL_regoffs[paren].end = tmps;
DEBUG_BUFFERS_r(
PerlIO_printf(Perl_debug_log,
" restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
- (UV)paren, (IV)PL_regstartp[paren],
+ (UV)paren, (IV)PL_regoffs[paren].start,
(IV)(PL_reg_start_tmp[paren] - PL_bostr),
- (IV)PL_regendp[paren],
+ (IV)PL_regoffs[paren].end,
(paren > *PL_reglastparen ? "(no)" : ""));
);
}
* requiring null fields (pat.t#187 and split.t#{13,14}
* (as of patchlevel 7877) will fail. Then again,
* this code seems to be necessary or otherwise
- * building DynaLoader will fail:
- * "Error: '*' not in typemap in DynaLoader.xs, line 164"
- * --jhi */
+ * this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
+ * --jhi updated by dapm */
for (i = *PL_reglastparen + 1; i <= rex->nparens; i++) {
if (i > PL_regsize)
- PL_regstartp[i] = -1;
- PL_regendp[i] = -1;
+ PL_regoffs[i].start = -1;
+ PL_regoffs[i].end = -1;
}
#endif
return input;
- pregexec - match a regexp against a string
*/
I32
-Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
+Perl_pregexec(pTHX_ REGEXP * const prog, char* stringarg, register char *strend,
char *strbeg, I32 minend, SV *screamer, U32 nosave)
/* strend: pointer to null at end of string */
/* strbeg: real beginning of string */
/* minend: end of match must be >=minend after stringarg. */
/* nosave: For optimizations. */
{
+ PERL_ARGS_ASSERT_PREGEXEC;
+
return
regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
nosave ? 0 : REXEC_COPY_STR);
deleted from the finite automaton. */
char *
-Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
- char *strend, U32 flags, re_scream_pos_data *data)
+Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV *sv, char *strpos,
+ char *strend, const U32 flags, re_scream_pos_data *data)
{
dVAR;
+ struct regexp *const prog = (struct regexp *)SvANY(rx);
register I32 start_shift = 0;
/* Should be nonnegative! */
register I32 end_shift = 0;
#ifdef DEBUGGING
const char * const i_strpos = strpos;
#endif
-
GET_RE_DEBUG_FLAGS_DECL;
- RX_MATCH_UTF8_set(prog,do_utf8);
+ PERL_ARGS_ASSERT_RE_INTUIT_START;
+
+ RX_MATCH_UTF8_set(rx,do_utf8);
- if (prog->extflags & RXf_UTF8) {
+ if (RX_UTF8(rx)) {
PL_reg_flags |= RF_utf8;
}
DEBUG_EXECUTE_r(
- debug_start_match(prog, do_utf8, strpos, strend,
+ debug_start_match(rx, do_utf8, strpos, strend,
sv ? "Guessing start of match in sv for"
: "Guessing start of match in string for");
);
#ifdef QDEBUGGING /* 7/99: reports of failure (with the older version) */
if (end_shift < 0)
Perl_croak(aTHX_ "panic: end_shift: %"IVdf" pattern:\n%s\n ",
- (IV)end_shift, prog->precomp);
+ (IV)end_shift, RX_PRECOMP(prog));
#endif
restart:
else
goto fail_finish;
/* we may be pointing at the wrong string */
- if (s && RX_MATCH_COPIED(prog))
+ if (s && RXp_MATCH_COPIED(prog))
s = strbeg + (s - SvPVX_const(sv));
if (data)
*data->scream_olds = s;
return NULL;
}
-
+#define DECL_TRIE_TYPE(scan) \
+ const enum { trie_plain, trie_utf8, trie_utf8_fold, trie_latin_utf8_fold } \
+ trie_type = (scan->flags != EXACT) \
+ ? (do_utf8 ? trie_utf8_fold : (UTF ? trie_latin_utf8_fold : trie_plain)) \
+ : (do_utf8 ? trie_utf8 : trie_plain)
#define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len, \
uvc, charid, foldlen, foldbuf, uniflags) STMT_START { \
uscan = foldbuf + UNISKIP( uvc ); \
} \
break; \
+ case trie_latin_utf8_fold: \
+ if ( foldlen>0 ) { \
+ uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags ); \
+ foldlen -= len; \
+ uscan += len; \
+ len=0; \
+ } else { \
+ len = 1; \
+ uvc = to_uni_fold( *(U8*)uc, foldbuf, &foldlen ); \
+ foldlen -= UNISKIP( uvc ); \
+ uscan = foldbuf + UNISKIP( uvc ); \
+ } \
+ break; \
case trie_utf8: \
uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); \
break; \
} \
} STMT_END
-#define REXEC_FBC_EXACTISH_CHECK(CoNd) \
+#define REXEC_FBC_EXACTISH_CHECK(CoNd) \
+{ \
+ char *my_strend= (char *)strend; \
if ( (CoNd) \
&& (ln == len || \
- ibcmp_utf8(s, NULL, 0, do_utf8, \
+ !ibcmp_utf8(s, &my_strend, 0, do_utf8, \
m, NULL, ln, (bool)UTF)) \
- && (!reginfo || regtry(reginfo, &s)) ) \
+ && (!reginfo || regtry(reginfo, &s)) ) \
goto got_it; \
else { \
U8 foldbuf[UTF8_MAXBYTES_CASE+1]; \
f = to_utf8_fold(tmpbuf, foldbuf, &foldlen); \
if ( f != c \
&& (f == c1 || f == c2) \
- && (ln == foldlen || \
- !ibcmp_utf8((char *) foldbuf, \
- NULL, foldlen, do_utf8, \
- m, \
- NULL, ln, (bool)UTF)) \
- && (!reginfo || regtry(reginfo, &s)) ) \
+ && (ln == len || \
+ !ibcmp_utf8(s, &my_strend, 0, do_utf8,\
+ m, NULL, ln, (bool)UTF)) \
+ && (!reginfo || regtry(reginfo, &s)) ) \
goto got_it; \
} \
- s += len
+} \
+s += len
#define REXEC_FBC_EXACTISH_SCAN(CoNd) \
STMT_START { \
if ((!reginfo || regtry(reginfo, &s))) \
goto got_it
+#define REXEC_FBC_CSCAN(CoNdUtF8,CoNd) \
+ if (do_utf8) { \
+ REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8); \
+ } \
+ else { \
+ REXEC_FBC_CLASS_SCAN(CoNd); \
+ } \
+ break
+
#define REXEC_FBC_CSCAN_PRELOAD(UtFpReLoAd,CoNdUtF8,CoNd) \
if (do_utf8) { \
UtFpReLoAd; \
register I32 tmp = 1; /* Scratch variable? */
register const bool do_utf8 = PL_reg_match_utf8;
RXi_GET_DECL(prog,progi);
+
+ PERL_ARGS_ASSERT_FIND_BYCLASS;
/* We know what class it must start with. */
switch (OP(c)) {
U8 *sm = (U8 *) m;
U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
- const U32 uniflags = UTF8_ALLOW_DEFAULT;
-
- to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
- to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
-
+ /* used by commented-out code below */
+ /*const U32 uniflags = UTF8_ALLOW_DEFAULT;*/
+
+ /* XXX: Since the node will be case folded at compile
+ time this logic is a little odd, although im not
+ sure that its actually wrong. --dmq */
+
+ c1 = to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
+ c2 = to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
+
+ /* XXX: This is kinda strange. to_utf8_XYZ returns the
+ codepoint of the first character in the converted
+ form, yet originally we did the extra step.
+ No tests fail by commenting this code out however
+ so Ive left it out. -- dmq.
+
c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXBYTES_CASE,
0, uniflags);
c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXBYTES_CASE,
0, uniflags);
+ */
+
lnc = 0;
while (sm < ((U8 *) m + ln)) {
lnc++;
* matching (called "loose matching" in Unicode).
* ibcmp_utf8() will do just that. */
- if (do_utf8) {
+ if (do_utf8 || UTF) {
UV c, f;
U8 tmpbuf [UTF8_MAXBYTES+1];
- STRLEN len, foldlen;
+ STRLEN len = 1;
+ STRLEN foldlen;
const U32 uniflags = UTF8_ALLOW_DEFAULT;
if (c1 == c2) {
/* Upper and lower of 1st char are equal -
* probably not a "letter". */
while (s <= e) {
- c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
+ if (do_utf8) {
+ c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
uniflags);
+ } else {
+ c = *((U8*)s);
+ }
REXEC_FBC_EXACTISH_CHECK(c == c1);
}
}
else {
while (s <= e) {
- c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
+ if (do_utf8) {
+ c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
uniflags);
+ } else {
+ c = *((U8*)s);
+ }
/* Handle some of the three Greek sigmas cases.
* Note that not all the possible combinations
}
}
else {
+ /* Neither pattern nor string are UTF8 */
if (c1 == c2)
REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1);
else
!isDIGIT_LC_utf8((U8*)s),
!isDIGIT_LC(*s)
);
+ case LNBREAK:
+ REXEC_FBC_CSCAN(
+ is_LNBREAK_utf8(s),
+ is_LNBREAK_latin1(s)
+ );
+ case VERTWS:
+ REXEC_FBC_CSCAN(
+ is_VERTWS_utf8(s),
+ is_VERTWS_latin1(s)
+ );
+ case NVERTWS:
+ REXEC_FBC_CSCAN(
+ !is_VERTWS_utf8(s),
+ !is_VERTWS_latin1(s)
+ );
+ case HORIZWS:
+ REXEC_FBC_CSCAN(
+ is_HORIZWS_utf8(s),
+ is_HORIZWS_latin1(s)
+ );
+ case NHORIZWS:
+ REXEC_FBC_CSCAN(
+ !is_HORIZWS_utf8(s),
+ !is_HORIZWS_latin1(s)
+ );
case AHOCORASICKC:
case AHOCORASICK:
{
- const enum { trie_plain, trie_utf8, trie_utf8_fold }
- trie_type = do_utf8 ?
- (c->flags == EXACT ? trie_utf8 : trie_utf8_fold)
- : trie_plain;
+ DECL_TRIE_TYPE(c);
/* what trie are we using right now */
reg_ac_data *aho
= (reg_ac_data*)progi->data->data[ ARG( c ) ];
U8 **points; /* map of where we were in the input string
when reading a given char. For ASCII this
is unnecessary overhead as the relationship
- is always 1:1, but for unicode, especially
- case folded unicode this is not true. */
+ is always 1:1, but for Unicode, especially
+ case folded Unicode this is not true. */
U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
U8 *bitmap=NULL;
}
static void
-S_swap_match_buff (pTHX_ regexp *prog) {
- I32 *t;
+S_swap_match_buff (pTHX_ regexp *prog)
+{
+ regexp_paren_pair *t;
+
+ PERL_ARGS_ASSERT_SWAP_MATCH_BUFF;
if (!prog->swap) {
/* We have to be careful. If the previous successful match
to the re, and switch the buffer each match. If we fail
we switch it back, otherwise we leave it swapped.
*/
- Newxz(prog->swap, 1, regexp_paren_ofs);
- /* no need to copy these */
- Newxz(prog->swap->startp, prog->nparens + 1, I32);
- Newxz(prog->swap->endp, prog->nparens + 1, I32);
+ Newxz(prog->swap, (prog->nparens + 1), regexp_paren_pair);
}
- t = prog->swap->startp;
- prog->swap->startp = prog->startp;
- prog->startp = t;
- t = prog->swap->endp;
- prog->swap->endp = prog->endp;
- prog->endp = t;
+ t = prog->swap;
+ prog->swap = prog->offs;
+ prog->offs = t;
}
- regexec_flags - match a regexp against a string
*/
I32
-Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
+Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, register char *strend,
char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
/* strend: pointer to null at end of string */
/* strbeg: real beginning of string */
/* nosave: For optimizations. */
{
dVAR;
+ struct regexp *const prog = (struct regexp *)SvANY(rx);
/*register*/ char *s;
register regnode *c;
/*register*/ char *startpos = stringarg;
I32 end_shift = 0; /* Same for the end. */ /* CC */
I32 scream_pos = -1; /* Internal iterator of scream. */
char *scream_olds = NULL;
- SV* const oreplsv = GvSV(PL_replgv);
const bool do_utf8 = (bool)DO_UTF8(sv);
I32 multiline;
RXi_GET_DECL(prog,progi);
regmatch_info reginfo; /* create some info to pass to regtry etc */
bool swap_on_fail = 0;
-
GET_RE_DEBUG_FLAGS_DECL;
+ PERL_ARGS_ASSERT_REGEXEC_FLAGS;
PERL_UNUSED_ARG(data);
/* Be paranoid... */
}
multiline = prog->extflags & RXf_PMf_MULTILINE;
- reginfo.prog = prog;
+ reginfo.prog = rx; /* Yes, sorry that this is confusing. */
- RX_MATCH_UTF8_set(prog, do_utf8);
+ RX_MATCH_UTF8_set(rx, do_utf8);
DEBUG_EXECUTE_r(
- debug_start_match(prog, do_utf8, startpos, strend,
+ debug_start_match(rx, do_utf8, startpos, strend,
"Matching");
);
PL_reg_eval_set = 0;
PL_reg_maxiter = 0;
- if (prog->extflags & RXf_UTF8)
+ if (RX_UTF8(rx))
PL_reg_flags |= RF_utf8;
/* Mark beginning of line for ^ and lookbehind. */
} else /* pos() not defined */
reginfo.ganch = strbeg;
}
- if (PL_curpm && (PM_GETRE(PL_curpm) == prog)) {
+ if (PL_curpm && (PM_GETRE(PL_curpm) == rx)) {
swap_on_fail = 1;
swap_match_buff(prog); /* do we need a save destructor here for
eval dies? */
d.scream_olds = &scream_olds;
d.scream_pos = &scream_pos;
- s = re_intuit_start(prog, sv, s, strend, flags, &d);
+ s = re_intuit_start(rx, sv, s, strend, flags, &d);
if (!s) {
DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
goto phooey; /* not present */
if (regtry(®info, &s))
goto got_it;
after_try:
- if (s >= end)
+ if (s > end)
goto phooey;
if (prog->extflags & RXf_USE_INTUIT) {
- s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
+ s = re_intuit_start(rx, sv, s + 1, strend, flags, NULL);
if (!s)
goto phooey;
}
(unsigned char*)strend, must,
multiline ? FBMrf_MULTILINE : 0))) ) {
/* we may be pointing at the wrong string */
- if ((flags & REXEC_SCREAM) && RX_MATCH_COPIED(prog))
+ if ((flags & REXEC_SCREAM) && RXp_MATCH_COPIED(prog))
s = strbeg + (s - SvPVX_const(sv));
DEBUG_EXECUTE_r( did_match = 1 );
if (HOPc(s, -back_max) > last1) {
if (!last)
last = scream_olds; /* Only one occurrence. */
/* we may be pointing at the wrong string */
- else if (RX_MATCH_COPIED(prog))
+ else if (RXp_MATCH_COPIED(prog))
s = strbeg + (s - SvPVX_const(sv));
}
else {
goto phooey;
got_it:
- RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
+ RX_MATCH_TAINTED_set(rx, PL_reg_flags & RF_tainted);
- if (PL_reg_eval_set) {
- /* Preserve the current value of $^R */
- if (oreplsv != GvSV(PL_replgv))
- sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
- restored, the value remains
- the same. */
+ if (PL_reg_eval_set)
restore_pos(aTHX_ prog);
- }
- if (prog->paren_names)
- (void)hv_iterinit(prog->paren_names);
+ if (RXp_PAREN_NAMES(prog))
+ (void)hv_iterinit(RXp_PAREN_NAMES(prog));
/* make sure $`, $&, $', and $digit will work later */
if ( !(flags & REXEC_NOT_FIRST) ) {
- RX_MATCH_COPY_FREE(prog);
+ RX_MATCH_COPY_FREE(rx);
if (flags & REXEC_COPY_STR) {
const I32 i = PL_regeol - startpos + (stringarg - strbeg);
#ifdef PERL_OLD_COPY_ON_WRITE
} else
#endif
{
- RX_MATCH_COPIED_on(prog);
+ RX_MATCH_COPIED_on(rx);
s = savepvn(strbeg, i);
prog->subbeg = s;
}
S_regtry(pTHX_ regmatch_info *reginfo, char **startpos)
{
dVAR;
- register I32 *sp;
- register I32 *ep;
CHECKPOINT lastcp;
- regexp *prog = reginfo->prog;
+ REGEXP *const rx = reginfo->prog;
+ regexp *const prog = (struct regexp *)SvANY(rx);
RXi_GET_DECL(prog,progi);
GET_RE_DEBUG_FLAGS_DECL;
+
+ PERL_ARGS_ASSERT_REGTRY;
+
reginfo->cutpoint=NULL;
if ((prog->extflags & RXf_EVAL_SEEN) && !PL_reg_eval_set) {
Newxz(PL_reg_curpm, 1, PMOP);
#ifdef USE_ITHREADS
{
- SV* const repointer = newSViv(0);
- /* so we know which PL_regex_padav element is PL_reg_curpm */
- SvFLAGS(repointer) |= SVf_BREAK;
- av_push(PL_regex_padav,repointer);
+ SV* const repointer = &PL_sv_undef;
+ /* this regexp is also owned by the new PL_reg_curpm, which
+ will try to free it. */
+ av_push(PL_regex_padav, repointer);
PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
PL_regex_pad = AvARRAY(PL_regex_padav);
}
#endif
}
- PM_SETRE(PL_reg_curpm, prog);
+#ifdef USE_ITHREADS
+ /* It seems that non-ithreads works both with and without this code.
+ So for efficiency reasons it seems best not to have the code
+ compiled when it is not needed. */
+ /* This is safe against NULLs: */
+ ReREFCNT_dec(PM_GETRE(PL_reg_curpm));
+ /* PM_reg_curpm owns a reference to this regexp. */
+ ReREFCNT_inc(rx);
+#endif
+ PM_SETRE(PL_reg_curpm, rx);
PL_reg_oldcurpm = PL_curpm;
PL_curpm = PL_reg_curpm;
- if (RX_MATCH_COPIED(prog)) {
+ if (RXp_MATCH_COPIED(prog)) {
/* Here is a serious problem: we cannot rewrite subbeg,
since it may be needed if this match fails. Thus
$` inside (?{}) could fail... */
#ifdef PERL_OLD_COPY_ON_WRITE
PL_nrs = prog->saved_copy;
#endif
- RX_MATCH_COPIED_off(prog);
+ RXp_MATCH_COPIED_off(prog);
}
else
PL_reg_oldsaved = NULL;
prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
}
DEBUG_EXECUTE_r(PL_reg_starttry = *startpos);
- prog->startp[0] = *startpos - PL_bostr;
+ prog->offs[0].start = *startpos - PL_bostr;
PL_reginput = *startpos;
PL_reglastparen = &prog->lastparen;
PL_reglastcloseparen = &prog->lastcloseparen;
prog->lastparen = 0;
prog->lastcloseparen = 0;
PL_regsize = 0;
- PL_regstartp = prog->startp;
- PL_regendp = prog->endp;
+ PL_regoffs = prog->offs;
if (PL_reg_start_tmpl <= prog->nparens) {
PL_reg_start_tmpl = prog->nparens*3/2 + 3;
if(PL_reg_start_tmp)
/* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
* Actually, the code in regcppop() (which Ilya may be meaning by
* PL_reglastparen), is not needed at all by the test suite
- * (op/regexp, op/pat, op/split), but that code is needed, oddly
- * enough, for building DynaLoader, or otherwise this
- * "Error: '*' not in typemap in DynaLoader.xs, line 164"
- * will happen. Meanwhile, this code *is* needed for the
+ * (op/regexp, op/pat, op/split), but that code is needed otherwise
+ * this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
+ * Meanwhile, this code *is* needed for the
* above-mentioned test suite tests to succeed. The common theme
* on those tests seems to be returning null fields from matches.
- * --jhi */
+ * --jhi updated by dapm */
#if 1
- sp = PL_regstartp;
- ep = PL_regendp;
if (prog->nparens) {
+ regexp_paren_pair *pp = PL_regoffs;
register I32 i;
for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
- *++sp = -1;
- *++ep = -1;
+ ++pp;
+ pp->start = -1;
+ pp->end = -1;
}
}
#endif
REGCP_SET(lastcp);
if (regmatch(reginfo, progi->program + 1)) {
- PL_regendp[0] = PL_reginput - PL_bostr;
+ PL_regoffs[0].end = PL_reginput - PL_bostr;
return 1;
}
if (reginfo->cutpoint)
#ifdef DEBUGGING
STATIC void
-S_debug_start_match(pTHX_ const regexp *prog, const bool do_utf8,
+S_debug_start_match(pTHX_ const REGEXP *prog, const bool do_utf8,
const char *start, const char *end, const char *blurb)
{
- const bool utf8_pat= prog->extflags & RXf_UTF8 ? 1 : 0;
+ const bool utf8_pat = RX_UTF8(prog) ? 1 : 0;
+
+ PERL_ARGS_ASSERT_DEBUG_START_MATCH;
+
if (!PL_colorset)
reginitcolors();
{
RE_PV_QUOTED_DECL(s0, utf8_pat, PERL_DEBUG_PAD_ZERO(0),
- prog->precomp, prog->prelen, 60);
+ RX_PRECOMP(prog), RX_PRELEN(prog), 60);
RE_PV_QUOTED_DECL(s1, do_utf8, PERL_DEBUG_PAD_ZERO(1),
start, end - start, 60);
? (5 + taill) - l : locinput - loc_bostr;
int pref0_len;
+ PERL_ARGS_ASSERT_DUMP_EXEC_POS;
+
while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
pref_len++;
pref0_len = pref_len - (locinput - loc_reg_starttry);
* or 0 if non of the buffers matched.
*/
STATIC I32
-S_reg_check_named_buff_matched(pTHX_ const regexp *rex, const regnode *scan) {
+S_reg_check_named_buff_matched(pTHX_ const regexp *rex, const regnode *scan)
+{
I32 n;
RXi_GET_DECL(rex,rexi);
SV *sv_dat=(SV*)rexi->data->data[ ARG( scan ) ];
I32 *nums=(I32*)SvPVX(sv_dat);
+
+ PERL_ARGS_ASSERT_REG_CHECK_NAMED_BUFF_MATCHED;
+
for ( n=0; n<SvIVX(sv_dat); n++ ) {
if ((I32)*PL_reglastparen >= nums[n] &&
- PL_regendp[nums[n]] != -1)
+ PL_regoffs[nums[n]].end != -1)
{
return nums[n];
}
return 0;
}
+
+/* free all slabs above current one - called during LEAVE_SCOPE */
+
+STATIC void
+S_clear_backtrack_stack(pTHX_ void *p)
+{
+ regmatch_slab *s = PL_regmatch_slab->next;
+ PERL_UNUSED_ARG(p);
+
+ if (!s)
+ return;
+ PL_regmatch_slab->next = NULL;
+ while (s) {
+ regmatch_slab * const osl = s;
+ s = s->next;
+ Safefree(osl);
+ }
+}
+
+
#define SETREX(Re1,Re2) \
if (PL_reg_eval_set) PM_SETRE((PL_reg_curpm), (Re2)); \
Re1 = (Re2)
dVAR;
register const bool do_utf8 = PL_reg_match_utf8;
const U32 uniflags = UTF8_ALLOW_DEFAULT;
-
- regexp *rex = reginfo->prog;
+ REGEXP *rex_sv = reginfo->prog;
+ regexp *rex = (struct regexp *)SvANY(rex_sv);
RXi_GET_DECL(rex,rexi);
-
- regmatch_slab *orig_slab;
- regmatch_state *orig_state;
-
+ I32 oldsave;
/* the current state. This is a cached copy of PL_regmatch_state */
register regmatch_state *st;
-
/* cache heavy used fields of st in registers */
register regnode *scan;
register regnode *next;
const U32 max_nochange_depth =
(3 * rex->nparens > MAX_RECURSE_EVAL_NOCHANGE_DEPTH) ?
3 * rex->nparens : MAX_RECURSE_EVAL_NOCHANGE_DEPTH;
-
regmatch_state *yes_state = NULL; /* state to pop to on success of
subpattern */
/* mark_state piggy backs on the yes_state logic so that when we unwind
the stack on success we can update the mark_state as we go */
regmatch_state *mark_state = NULL; /* last mark state we have seen */
-
regmatch_state *cur_eval = NULL; /* most recent EVAL_AB state */
struct regmatch_state *cur_curlyx = NULL; /* most recent curlyx */
U32 state_num;
during a successfull match */
U32 lastopen = 0; /* last open we saw */
bool has_cutgroup = RX_HAS_CUTGROUP(rex) ? 1 : 0;
-
-
+ SV* const oreplsv = GvSV(PL_replgv);
/* these three flags are set by various ops to signal information to
* the very next op. They have a useful lifetime of exactly one loop
* iteration, and are not preserved or restored by state pushes/pops
false: plain (?=foo)
true: used as a condition: (?(?=foo))
*/
-
#ifdef DEBUGGING
GET_RE_DEBUG_FLAGS_DECL;
#endif
+ PERL_ARGS_ASSERT_REGMATCH;
+
DEBUG_OPTIMISE_r( DEBUG_EXECUTE_r({
PerlIO_printf(Perl_debug_log,"regmatch start\n");
}));
PL_regmatch_state = SLAB_FIRST(PL_regmatch_slab);
}
- /* remember current high-water mark for exit */
- /* XXX this should be done with SAVE* instead */
- orig_slab = PL_regmatch_slab;
- orig_state = PL_regmatch_state;
+ oldsave = PL_savestack_ix;
+ SAVEDESTRUCTOR_X(S_clear_backtrack_stack, NULL);
+ SAVEVPTR(PL_regmatch_slab);
+ SAVEVPTR(PL_regmatch_state);
/* grab next free state slot */
st = ++PL_regmatch_state;
case KEEPS:
/* update the startpoint */
- st->u.keeper.val = PL_regstartp[0];
+ st->u.keeper.val = PL_regoffs[0].start;
PL_reginput = locinput;
- PL_regstartp[0] = locinput - PL_bostr;
+ PL_regoffs[0].start = locinput - PL_bostr;
PUSH_STATE_GOTO(KEEPS_next, next);
/*NOT-REACHED*/
case KEEPS_next_fail:
/* rollback the start point change */
- PL_regstartp[0] = st->u.keeper.val;
+ PL_regoffs[0].start = st->u.keeper.val;
sayNO_SILENT;
/*NOT-REACHED*/
case EOL:
case TRIE:
{
/* what type of TRIE am I? (utf8 makes this contextual) */
- const enum { trie_plain, trie_utf8, trie_utf8_fold }
- trie_type = do_utf8 ?
- (scan->flags == EXACT ? trie_utf8 : trie_utf8_fold)
- : trie_plain;
+ DECL_TRIE_TYPE(scan);
/* what trie are we using right now */
reg_trie_data * const trie
if ( got_wordnum ) {
if ( ! ST.accepted ) {
ENTER;
- SAVETMPS;
+ /* SAVETMPS; */ /* XXX is this necessary? dmq */
bufflen = TRIE_INITAL_ACCEPT_BUFFLEN;
sv_accept_buff=newSV(bufflen *
sizeof(reg_trie_accepted) - 1);
if ( ST.jump) {
REGCP_UNWIND(ST.cp);
for (n = *PL_reglastparen; n > ST.lastparen; n--)
- PL_regendp[n] = -1;
+ PL_regoffs[n].end = -1;
*PL_reglastparen = n;
}
trie_first_try:
PL_reginput = (char *)ST.accept_buff[ best ].endpos;
if ( !ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) {
scan = ST.B;
- /* NOTREACHED */
} else {
scan = ST.me + ST.jump[ST.accept_buff[best].wordnum];
- /* NOTREACHED */
- }
- if (has_cutgroup) {
- PUSH_YES_STATE_GOTO(TRIE_next, scan);
- /* NOTREACHED */
- } else {
- PUSH_STATE_GOTO(TRIE_next, scan);
- /* NOTREACHED */
}
+ PUSH_YES_STATE_GOTO(TRIE_next, scan);
/* NOTREACHED */
}
/* NOTREACHED */
* pack("U0U*", 0xDF) =~ /ss/i,
* the 0xC3 0x9F are the UTF-8
* byte sequence for the U+00DF. */
+
if (!(do_utf8 &&
- toLOWER(s[0]) == 's' &&
+ toLOWER(s[0]) == 's' &&
ln >= 2 &&
toLOWER(s[1]) == 's' &&
(U8)l[0] == 0xC3 &&
n = ARG(scan); /* which paren pair */
type = OP(scan);
do_ref:
- ln = PL_regstartp[n];
+ ln = PL_regoffs[n].start;
PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
if (*PL_reglastparen < n || ln == -1)
sayNO; /* Do not match unless seen CLOSEn. */
- if (ln == PL_regendp[n])
+ if (ln == PL_regoffs[n].end)
break;
s = PL_bostr + ln;
if (do_utf8 && type != REF) { /* REF can do byte comparison */
char *l = locinput;
- const char *e = PL_bostr + PL_regendp[n];
+ const char *e = PL_bostr + PL_regoffs[n].end;
/*
* Note that we can't do the "other character" lookup trick as
* in the 8-bit case (no pun intended) because in Unicode we
(UCHARAT(s) != (type == REFF
? PL_fold : PL_fold_locale)[nextchr])))
sayNO;
- ln = PL_regendp[n] - ln;
+ ln = PL_regoffs[n].end - ln;
if (locinput + ln > PL_regeol)
sayNO;
if (ln > 1 && (type == REF
#define ST st->u.eval
{
SV *ret;
+ REGEXP *re_sv;
regexp *re;
regexp_internal *rei;
regnode *startpoint;
} else {
nochange_depth = 0;
}
+ re_sv = rex_sv;
re = rex;
rei = rexi;
- (void)ReREFCNT_inc(rex);
+ (void)ReREFCNT_inc(rex_sv);
if (OP(scan)==GOSUB) {
startpoint = scan + ARG2L(scan);
ST.close_paren = ARG(scan);
DEBUG_STATE_r( PerlIO_printf(Perl_debug_log,
" re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
PAD_SAVE_LOCAL(old_comppad, (PAD*)rexi->data->data[n + 2]);
- PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
+ PL_regoffs[0].end = PL_reg_magic->mg_len = locinput - PL_bostr;
if (sv_yes_mark) {
SV *sv_mrk = get_sv("REGMARK", 1);
{
/* extract RE object from returned value; compiling if
* necessary */
-
MAGIC *mg = NULL;
- const SV *sv;
- if(SvROK(ret) && SvSMAGICAL(sv = SvRV(ret)))
- mg = mg_find(sv, PERL_MAGIC_qr);
- else if (SvSMAGICAL(ret)) {
- if (SvGMAGICAL(ret))
+ REGEXP *rx = NULL;
+
+ if (SvROK(ret)) {
+ SV *const sv = SvRV(ret);
+
+ if (SvTYPE(sv) == SVt_REGEXP) {
+ rx = (REGEXP*) sv;
+ } else if (SvSMAGICAL(sv)) {
+ mg = mg_find(sv, PERL_MAGIC_qr);
+ assert(mg);
+ }
+ } else if (SvTYPE(ret) == SVt_REGEXP) {
+ rx = (REGEXP*) ret;
+ } else if (SvSMAGICAL(ret)) {
+ if (SvGMAGICAL(ret)) {
+ /* I don't believe that there is ever qr magic
+ here. */
+ assert(!mg_find(ret, PERL_MAGIC_qr));
sv_unmagic(ret, PERL_MAGIC_qr);
- else
+ }
+ else {
mg = mg_find(ret, PERL_MAGIC_qr);
+ /* testing suggests mg only ends up non-NULL for
+ scalars who were upgraded and compiled in the
+ else block below. In turn, this is only
+ triggered in the "postponed utf8 string" tests
+ in t/op/pat.t */
+ }
}
if (mg) {
- re = reg_temp_copy((regexp *)mg->mg_obj); /*XXX:dmq*/
+ rx = (REGEXP *) mg->mg_obj; /*XXX:dmq*/
+ assert(rx);
+ }
+ if (rx) {
+ rx = reg_temp_copy(rx);
}
else {
- STRLEN len;
- const char * const t = SvPV_const(ret, len);
- PMOP pm;
+ U32 pm_flags = 0;
const I32 osize = PL_regsize;
- Zero(&pm, 1, PMOP);
- if (DO_UTF8(ret)) pm.op_pmdynflags |= PMdf_DYN_UTF8;
- re = CALLREGCOMP((char*)t, (char*)t + len, &pm);
+ if (DO_UTF8(ret)) {
+ assert (SvUTF8(ret));
+ } else if (SvUTF8(ret)) {
+ /* Not doing UTF-8, despite what the SV says. Is
+ this only if we're trapped in use 'bytes'? */
+ /* Make a copy of the octet sequence, but without
+ the flag on, as the compiler now honours the
+ SvUTF8 flag on ret. */
+ STRLEN len;
+ const char *const p = SvPV(ret, len);
+ ret = newSVpvn_flags(p, len, SVs_TEMP);
+ }
+ rx = CALLREGCOMP(ret, pm_flags);
if (!(SvFLAGS(ret)
& (SVs_TEMP | SVs_PADTMP | SVf_READONLY
- | SVs_GMG)))
- sv_magic(ret,(SV*)ReREFCNT_inc(re),
- PERL_MAGIC_qr,0,0);
+ | SVs_GMG))) {
+ /* This isn't a first class regexp. Instead, it's
+ caching a regexp onto an existing, Perl visible
+ scalar. */
+ sv_magic(ret, (SV*) rx, PERL_MAGIC_qr, 0, 0);
+ }
PL_regsize = osize;
}
+ re_sv = rx;
+ re = (struct regexp *)SvANY(rx);
}
- RX_MATCH_COPIED_off(re);
+ RXp_MATCH_COPIED_off(re);
re->subbeg = rex->subbeg;
re->sublen = rex->sublen;
rei = RXi_GET(re);
DEBUG_EXECUTE_r(
- debug_start_match(re, do_utf8, locinput, PL_regeol,
+ debug_start_match(re_sv, do_utf8, locinput, PL_regeol,
"Matching embedded");
);
startpoint = rei->program + 1;
ST.cp = regcppush(0); /* Save *all* the positions. */
REGCP_SET(ST.lastcp);
- PL_regstartp = re->startp; /* essentially NOOP on GOSUB */
- PL_regendp = re->endp; /* essentially NOOP on GOSUB */
+ PL_regoffs = re->offs; /* essentially NOOP on GOSUB */
- *PL_reglastparen = 0;
- *PL_reglastcloseparen = 0;
+ /* see regtry, specifically PL_reglast(?:close)?paren is a pointer! (i dont know why) :dmq */
+ PL_reglastparen = &re->lastparen;
+ PL_reglastcloseparen = &re->lastcloseparen;
+ re->lastparen = 0;
+ re->lastcloseparen = 0;
+
PL_reginput = locinput;
PL_regsize = 0;
PL_reg_maxiter = 0;
ST.toggle_reg_flags = PL_reg_flags;
- if (re->extflags & RXf_UTF8)
+ if (RX_UTF8(re_sv))
PL_reg_flags |= RF_utf8;
else
PL_reg_flags &= ~RF_utf8;
ST.toggle_reg_flags ^= PL_reg_flags; /* diff of old and new */
- ST.prev_rex = rex;
+ ST.prev_rex = rex_sv;
ST.prev_curlyx = cur_curlyx;
- SETREX(rex,re);
+ SETREX(rex_sv,re_sv);
+ rex = re;
rexi = rei;
cur_curlyx = NULL;
ST.B = next;
case EVAL_AB: /* cleanup after a successful (??{A})B */
/* note: this is called twice; first after popping B, then A */
PL_reg_flags ^= ST.toggle_reg_flags;
- ReREFCNT_dec(rex);
- SETREX(rex,ST.prev_rex);
+ ReREFCNT_dec(rex_sv);
+ SETREX(rex_sv,ST.prev_rex);
+ rex = (struct regexp *)SvANY(rex_sv);
rexi = RXi_GET(rex);
regcpblow(ST.cp);
cur_eval = ST.prev_eval;
cur_curlyx = ST.prev_curlyx;
+
+ PL_reglastparen = &rex->lastparen;
+ PL_reglastcloseparen = &rex->lastcloseparen;
+
/* XXXX This is too dramatic a measure... */
PL_reg_maxiter = 0;
if ( nochange_depth )
case EVAL_AB_fail: /* unsuccessfully ran A or B in (??{A})B */
/* note: this is called twice; first after popping B, then A */
PL_reg_flags ^= ST.toggle_reg_flags;
- ReREFCNT_dec(rex);
- SETREX(rex,ST.prev_rex);
+ ReREFCNT_dec(rex_sv);
+ SETREX(rex_sv,ST.prev_rex);
+ rex = (struct regexp *)SvANY(rex_sv);
rexi = RXi_GET(rex);
+ PL_reglastparen = &rex->lastparen;
+ PL_reglastcloseparen = &rex->lastcloseparen;
+
PL_reginput = locinput;
REGCP_UNWIND(ST.lastcp);
regcppop(rex);
break;
case CLOSE:
n = ARG(scan); /* which paren pair */
- PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
- PL_regendp[n] = locinput - PL_bostr;
+ PL_regoffs[n].start = PL_reg_start_tmp[n] - PL_bostr;
+ PL_regoffs[n].end = locinput - PL_bostr;
/*if (n > PL_regsize)
PL_regsize = n;*/
if (n > *PL_reglastparen)
if ( OP(cursor)==CLOSE ){
n = ARG(cursor);
if ( n <= lastopen ) {
- PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
- PL_regendp[n] = locinput - PL_bostr;
+ PL_regoffs[n].start
+ = PL_reg_start_tmp[n] - PL_bostr;
+ PL_regoffs[n].end = locinput - PL_bostr;
/*if (n > PL_regsize)
PL_regsize = n;*/
if (n > *PL_reglastparen)
/*NOTREACHED*/
case GROUPP:
n = ARG(scan); /* which paren pair */
- sw = (bool)(*PL_reglastparen >= n && PL_regendp[n] != -1);
+ sw = (bool)(*PL_reglastparen >= n && PL_regoffs[n].end != -1);
break;
case NGROUPP:
/* reg_check_named_buff_matched returns 0 for no match */
}
case CURLYX_end: /* just finished matching all of A*B */
- if (PL_reg_eval_set){
- SV *pres= GvSV(PL_replgv);
- SvREFCNT_inc(pres);
- regcpblow(ST.cp);
- sv_setsv(GvSV(PL_replgv), pres);
- SvREFCNT_dec(pres);
- } else {
- regcpblow(ST.cp);
- }
cur_curlyx = ST.prev_curlyx;
sayYES;
/* NOTREACHED */
case BRANCH: /* /(...|A|...)/ */
scan = NEXTOPER(scan); /* scan now points to inner node */
- if ((!next || (OP(next) != BRANCH && OP(next) != BRANCHJ))
- && !has_cutgroup)
- {
- /* last branch; skip state push and jump direct to node */
- continue;
- }
ST.lastparen = *PL_reglastparen;
ST.next_branch = next;
REGCP_SET(ST.cp);
}
REGCP_UNWIND(ST.cp);
for (n = *PL_reglastparen; n > ST.lastparen; n--)
- PL_regendp[n] = -1;
+ PL_regoffs[n].end = -1;
*PL_reglastparen = n;
/*dmq: *PL_reglastcloseparen = n; */
scan = ST.next_branch;
/* mark current A as captured */
I32 paren = ST.me->flags;
if (ST.count) {
- PL_regstartp[paren]
+ PL_regoffs[paren].start
= HOPc(PL_reginput, -ST.alen) - PL_bostr;
- PL_regendp[paren] = PL_reginput - PL_bostr;
+ PL_regoffs[paren].end = PL_reginput - PL_bostr;
/*dmq: *PL_reglastcloseparen = paren; */
}
else
- PL_regendp[paren] = -1;
+ PL_regoffs[paren].end = -1;
if (cur_eval && cur_eval->u.eval.close_paren &&
cur_eval->u.eval.close_paren == (U32)ST.me->flags)
{
#define CURLY_SETPAREN(paren, success) \
if (paren) { \
if (success) { \
- PL_regstartp[paren] = HOPc(locinput, -1) - PL_bostr; \
- PL_regendp[paren] = locinput - PL_bostr; \
+ PL_regoffs[paren].start = HOPc(locinput, -1) - PL_bostr; \
+ PL_regoffs[paren].end = locinput - PL_bostr; \
*PL_reglastcloseparen = paren; \
} \
else \
- PL_regendp[paren] = -1; \
+ PL_regoffs[paren].end = -1; \
}
case STAR: /* /A*B/ where A is width 1 */
case CURLY_B_min_known_fail:
/* failed to find B in a non-greedy match where c1,c2 valid */
if (ST.paren && ST.count)
- PL_regendp[ST.paren] = -1;
+ PL_regoffs[ST.paren].end = -1;
PL_reginput = locinput; /* Could be reset... */
REGCP_UNWIND(ST.cp);
case CURLY_B_min_fail:
/* failed to find B in a non-greedy match where c1,c2 invalid */
if (ST.paren && ST.count)
- PL_regendp[ST.paren] = -1;
+ PL_regoffs[ST.paren].end = -1;
REGCP_UNWIND(ST.cp);
/* failed -- move forward one */
case CURLY_B_max_fail:
/* failed to find B in a greedy match */
if (ST.paren && ST.count)
- PL_regendp[ST.paren] = -1;
+ PL_regoffs[ST.paren].end = -1;
REGCP_UNWIND(ST.cp);
/* back up. */
= cur_eval->u.eval.toggle_reg_flags;
PL_reg_flags ^= st->u.eval.toggle_reg_flags;
- st->u.eval.prev_rex = rex; /* inner */
- SETREX(rex,cur_eval->u.eval.prev_rex);
+ st->u.eval.prev_rex = rex_sv; /* inner */
+ SETREX(rex_sv,cur_eval->u.eval.prev_rex);
+ rex = (struct regexp *)SvANY(rex_sv);
rexi = RXi_GET(rex);
cur_curlyx = cur_eval->u.eval.prev_curlyx;
- ReREFCNT_inc(rex);
+ ReREFCNT_inc(rex_sv);
st->u.eval.cp = regcppush(0); /* Save *all* the positions. */
REGCP_SET(st->u.eval.lastcp);
PL_reginput = locinput;
sayNO;
/* NOTREACHED */
#undef ST
+ case FOLDCHAR:
+ n = ARG(scan);
+ if ( n == (U32)what_len_TRICKYFOLD(locinput,do_utf8,ln) ) {
+ locinput += ln;
+ } else if ( 0xDF == n && !do_utf8 && !UTF ) {
+ sayNO;
+ } else {
+ U8 folded[UTF8_MAXBYTES_CASE+1];
+ STRLEN foldlen;
+ const char * const l = locinput;
+ char *e = PL_regeol;
+ to_uni_fold(n, folded, &foldlen);
+
+ if (ibcmp_utf8((const char*) folded, 0, foldlen, 1,
+ l, &e, 0, do_utf8)) {
+ sayNO;
+ }
+ locinput = e;
+ }
+ nextchr = UCHARAT(locinput);
+ break;
+ case LNBREAK:
+ if ((n=is_LNBREAK(locinput,do_utf8))) {
+ locinput += n;
+ nextchr = UCHARAT(locinput);
+ } else
+ sayNO;
+ break;
+
+#define CASE_CLASS(nAmE) \
+ case nAmE: \
+ if ((n=is_##nAmE(locinput,do_utf8))) { \
+ locinput += n; \
+ nextchr = UCHARAT(locinput); \
+ } else \
+ sayNO; \
+ break; \
+ case N##nAmE: \
+ if ((n=is_##nAmE(locinput,do_utf8))) { \
+ sayNO; \
+ } else { \
+ locinput += UTF8SKIP(locinput); \
+ nextchr = UCHARAT(locinput); \
+ } \
+ break
+
+ CASE_CLASS(VERTWS);
+ CASE_CLASS(HORIZWS);
+#undef CASE_CLASS
default:
PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
PL_colors[4], PL_colors[5]));
+ if (PL_reg_eval_set) {
+ /* each successfully executed (?{...}) block does the equivalent of
+ * local $^R = do {...}
+ * When popping the save stack, all these locals would be undone;
+ * bypass this by setting the outermost saved $^R to the latest
+ * value */
+ if (oreplsv != GvSV(PL_replgv))
+ sv_setsv(oreplsv, GvSV(PL_replgv));
+ }
result = 1;
goto final_exit;
sv_setsv(sv_err, sv_commit);
sv_setsv(sv_mrk, sv_yes_mark);
}
- /* restore original high-water mark */
- PL_regmatch_slab = orig_slab;
- PL_regmatch_state = orig_state;
-
- /* free all slabs above current one */
- if (orig_slab->next) {
- regmatch_slab *sl = orig_slab->next;
- orig_slab->next = NULL;
- while (sl) {
- regmatch_slab * const osl = sl;
- sl = sl->next;
- Safefree(osl);
- }
- }
+
+ /* clean up; in particular, free all slabs above current one */
+ LEAVE_SCOPE(oldsave);
return result;
}
PERL_UNUSED_ARG(depth);
#endif
+ PERL_ARGS_ASSERT_REGREPEAT;
+
scan = PL_reginput;
if (max == REG_INFTY)
max = I32_MAX;
} else {
while (scan < loceol && !isSPACE(*scan))
scan++;
- break;
}
+ break;
case NSPACEL:
PL_reg_flags |= RF_tainted;
if (do_utf8) {
while (scan < loceol && !isDIGIT(*scan))
scan++;
}
+ case LNBREAK:
+ if (do_utf8) {
+ loceol = PL_regeol;
+ while (hardcount < max && scan < loceol && (c=is_LNBREAK_utf8(scan))) {
+ scan += c;
+ hardcount++;
+ }
+ } else {
+ /*
+ LNBREAK can match two latin chars, which is ok,
+ because we have a null terminated string, but we
+ have to use hardcount in this situation
+ */
+ while (scan < loceol && (c=is_LNBREAK_latin1(scan))) {
+ scan+=c;
+ hardcount++;
+ }
+ }
break;
+ case HORIZWS:
+ if (do_utf8) {
+ loceol = PL_regeol;
+ while (hardcount < max && scan < loceol && (c=is_HORIZWS_utf8(scan))) {
+ scan += c;
+ hardcount++;
+ }
+ } else {
+ while (scan < loceol && is_HORIZWS_latin1(scan))
+ scan++;
+ }
+ break;
+ case NHORIZWS:
+ if (do_utf8) {
+ loceol = PL_regeol;
+ while (hardcount < max && scan < loceol && !is_HORIZWS_utf8(scan)) {
+ scan += UTF8SKIP(scan);
+ hardcount++;
+ }
+ } else {
+ while (scan < loceol && !is_HORIZWS_latin1(scan))
+ scan++;
+
+ }
+ break;
+ case VERTWS:
+ if (do_utf8) {
+ loceol = PL_regeol;
+ while (hardcount < max && scan < loceol && (c=is_VERTWS_utf8(scan))) {
+ scan += c;
+ hardcount++;
+ }
+ } else {
+ while (scan < loceol && is_VERTWS_latin1(scan))
+ scan++;
+
+ }
+ break;
+ case NVERTWS:
+ if (do_utf8) {
+ loceol = PL_regeol;
+ while (hardcount < max && scan < loceol && !is_VERTWS_utf8(scan)) {
+ scan += UTF8SKIP(scan);
+ hardcount++;
+ }
+ } else {
+ while (scan < loceol && !is_VERTWS_latin1(scan))
+ scan++;
+
+ }
+ break;
+
default: /* Called on something of 0 width. */
break; /* So match right here or not at all. */
}
RXi_GET_DECL(prog,progi);
const struct reg_data * const data = prog ? progi->data : NULL;
+ PERL_ARGS_ASSERT_REGCLASS_SWASH;
+
if (data && data->count) {
const U32 n = ARG(node);
* documentation of these array elements. */
si = *ary;
- a = SvROK(ary[1]) ? &ary[1] : 0;
- b = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : 0;
+ a = SvROK(ary[1]) ? &ary[1] : NULL;
+ b = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : NULL;
if (a)
sw = *a;
STRLEN len = 0;
STRLEN plen;
+ PERL_ARGS_ASSERT_REGINCLASS;
+
if (do_utf8 && !UTF8_IS_INVARIANT(c)) {
c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &len,
(UTF8_ALLOW_DEFAULT & UTF8_ALLOW_ANYUV) | UTF8_CHECK_ONLY);
S_reghop3(U8 *s, I32 off, const U8* lim)
{
dVAR;
+
+ PERL_ARGS_ASSERT_REGHOP3;
+
if (off >= 0) {
while (off-- && s < lim) {
/* XXX could check well-formedness here */
S_reghop4(U8 *s, I32 off, const U8* llim, const U8* rlim)
{
dVAR;
+
+ PERL_ARGS_ASSERT_REGHOP4;
+
if (off >= 0) {
while (off-- && s < rlim) {
/* XXX could check well-formedness here */
S_reghopmaybe3(U8* s, I32 off, const U8* lim)
{
dVAR;
+
+ PERL_ARGS_ASSERT_REGHOPMAYBE3;
+
if (off >= 0) {
while (off-- && s < lim) {
/* XXX could check well-formedness here */
#ifdef PERL_OLD_COPY_ON_WRITE
rex->saved_copy = PL_nrs;
#endif
- RX_MATCH_COPIED_on(rex);
+ RXp_MATCH_COPIED_on(rex);
}
PL_reg_magic->mg_len = PL_reg_oldpos;
PL_reg_eval_set = 0;
S_to_utf8_substr(pTHX_ register regexp *prog)
{
int i = 1;
+
+ PERL_ARGS_ASSERT_TO_UTF8_SUBSTR;
+
do {
if (prog->substrs->data[i].substr
&& !prog->substrs->data[i].utf8_substr) {
{
dVAR;
int i = 1;
+
+ PERL_ARGS_ASSERT_TO_BYTE_SUBSTR;
+
do {
if (prog->substrs->data[i].utf8_substr
&& !prog->substrs->data[i].substr) {