#define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len, \
uvc, charid, foldlen, foldbuf, uniflags) STMT_START { \
+ UV uvc_unfolded = 0; \
switch (trie_type) { \
case trie_utf8_fold: \
if ( foldlen>0 ) { \
- uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags ); \
+ uvc_unfolded = uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags ); \
foldlen -= len; \
uscan += len; \
len=0; \
} else { \
- uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); \
+ uvc_unfolded = uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); \
uvc = to_uni_fold( uvc, foldbuf, &foldlen ); \
foldlen -= UNISKIP( uvc ); \
uscan = foldbuf + UNISKIP( uvc ); \
charid = (U16)SvIV(*svpp); \
} \
} \
+ if (!charid && trie_type == trie_utf8_fold && !UTF) { \
+ charid = trie->charmap[uvc_unfolded]; \
+ } \
} STMT_END
#define REXEC_FBC_EXACTISH_CHECK(CoNd) \
return s;
}
-static void
-S_swap_match_buff (pTHX_ regexp *prog)
-{
- regexp_paren_pair *t;
-
- PERL_ARGS_ASSERT_SWAP_MATCH_BUFF;
-
- if (!prog->swap) {
- /* We have to be careful. If the previous successful match
- was from this regex we don't want a subsequent paritally
- successful match to clobber the old results.
- So when we detect this possibility we add a swap buffer
- to the re, and switch the buffer each match. If we fail
- we switch it back, otherwise we leave it swapped.
- */
- Newxz(prog->swap, (prog->nparens + 1), regexp_paren_pair);
- }
- t = prog->swap;
- prog->swap = prog->offs;
- prog->offs = t;
-}
-
/*
- regexec_flags - match a regexp against a string
I32 multiline;
RXi_GET_DECL(prog,progi);
regmatch_info reginfo; /* create some info to pass to regtry etc */
- bool swap_on_fail = 0;
+ regexp_paren_pair *swap = NULL;
GET_RE_DEBUG_FLAGS_DECL;
PERL_ARGS_ASSERT_REGEXEC_FLAGS;
reginfo.ganch = strbeg;
}
if (PL_curpm && (PM_GETRE(PL_curpm) == rx)) {
- swap_on_fail = 1;
- swap_match_buff(prog); /* do we need a save destructor here for
- eval dies? */
+ /* We have to be careful. If the previous successful match
+ was from this regex we don't want a subsequent partially
+ successful match to clobber the old results.
+ So when we detect this possibility we add a swap buffer
+ to the re, and switch the buffer each match. If we fail
+ we switch it back, otherwise we leave it swapped.
+ */
+ swap = prog->offs;
+ /* do we need a save destructor here for eval dies? */
+ Newxz(prog->offs, (prog->nparens + 1), regexp_paren_pair);
}
if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL || prog->check_utf8 != NULL)) {
re_scream_pos_data d;
goto phooey;
got_it:
+ Safefree(swap);
RX_MATCH_TAINTED_set(rx, PL_reg_flags & RF_tainted);
if (PL_reg_eval_set)
PL_colors[4], PL_colors[5]));
if (PL_reg_eval_set)
restore_pos(aTHX_ prog);
- if (swap_on_fail)
+ if (swap) {
/* we failed :-( roll it back */
- swap_match_buff(prog);
-
+ Safefree(prog->offs);
+ prog->offs = swap;
+ }
+
return 0;
}
/* Make $_ available to executed code. */
if (reginfo->sv != DEFSV) {
SAVE_DEFSV;
- DEFSV = reginfo->sv;
+ DEFSV_set(reginfo->sv);
}
if (!(SvTYPE(reginfo->sv) >= SVt_PVMG && SvMAGIC(reginfo->sv)
reginitcolors();
{
RE_PV_QUOTED_DECL(s0, utf8_pat, PERL_DEBUG_PAD_ZERO(0),
- RX_PRECOMP(prog), RX_PRELEN(prog), 60);
+ RX_PRECOMP_const(prog), RX_PRELEN(prog), 60);
RE_PV_QUOTED_DECL(s1, do_utf8, PERL_DEBUG_PAD_ZERO(1),
start, end - start, 60);
state_num = OP(scan);
reenter_switch:
+
+ assert(PL_reglastparen == &rex->lastparen);
+ assert(PL_reglastcloseparen == &rex->lastcloseparen);
+ assert(PL_regoffs == rex->offs);
+
switch (state_num) {
case BOL:
if (locinput == PL_bostr)
if ( got_wordnum ) {
if ( ! ST.accepted ) {
ENTER;
- /* SAVETMPS; */ /* XXX is this necessary? dmq */
+ SAVETMPS; /* XXX is this necessary? dmq */
bufflen = TRIE_INITAL_ACCEPT_BUFFLEN;
sv_accept_buff=newSV(bufflen *
sizeof(reg_trie_accepted) - 1);
}
/* NOTREACHED */
case TRIE_next:
+ /* we dont want to throw this away, see bug 57042*/
+ if (oreplsv != GvSV(PL_replgv))
+ sv_setsv(oreplsv, GvSV(PL_replgv));
FREETMPS;
LEAVE;
sayYES;
OP_4tree * const oop = PL_op;
COP * const ocurcop = PL_curcop;
PAD *old_comppad;
+ char *saved_regeol = PL_regeol;
n = ARG(scan);
PL_op = (OP_4tree*)rexi->data->data[n];
PL_op = oop;
PAD_RESTORE_LOCAL(old_comppad);
PL_curcop = ocurcop;
+ PL_regeol = saved_regeol;
if (!logical) {
/* /(?{...})/ */
sv_setsv(save_scalar(PL_replgv), ret);
regcpblow(ST.cp);
cur_eval = ST.prev_eval;
cur_curlyx = ST.prev_curlyx;
-
+
+ /* rex was changed so update the pointer in PL_reglastparen and PL_reglastcloseparen */
PL_reglastparen = &rex->lastparen;
PL_reglastcloseparen = &rex->lastcloseparen;
+ /* also update PL_regoffs */
+ PL_regoffs = rex->offs;
/* XXXX This is too dramatic a measure... */
PL_reg_maxiter = 0;
SETREX(rex_sv,ST.prev_rex);
rex = (struct regexp *)SvANY(rex_sv);
rexi = RXi_GET(rex);
+ /* rex was changed so update the pointer in PL_reglastparen and PL_reglastcloseparen */
PL_reglastparen = &rex->lastparen;
PL_reglastcloseparen = &rex->lastcloseparen;
case CURLYM: /* /A{m,n}B/ where A is fixed-length */
/* This is an optimisation of CURLYX that enables us to push
- * only a single backtracking state, no matter now many matches
+ * only a single backtracking state, no matter how many matches
* there are in {m,n}. It relies on the pattern being constant
* length, with no parens to influence future backrefs
*/
cur_eval->u.eval.close_paren == (U32)ST.me->flags)
goto fake_end;
- if ( ST.count < (ST.minmod ? ARG1(ST.me) : ARG2(ST.me)) )
- goto curlym_do_A; /* try to match another A */
+ {
+ I32 max = (ST.minmod ? ARG1(ST.me) : ARG2(ST.me));
+ if ( max == REG_INFTY || ST.count < max )
+ goto curlym_do_A; /* try to match another A */
+ }
goto curlym_do_B; /* try to match B */
case CURLYM_A_fail: /* just failed to match an A */
case CURLYM_B_fail: /* just failed to match a B */
REGCP_UNWIND(ST.cp);
if (ST.minmod) {
- if (ST.count == ARG2(ST.me) /* max */)
+ I32 max = ARG2(ST.me);
+ if (max != REG_INFTY && ST.count == max)
sayNO;
goto curlym_do_A; /* try to match a further A */
}
cur_curlyx = cur_eval->u.eval.prev_curlyx;
ReREFCNT_inc(rex_sv);
st->u.eval.cp = regcppush(0); /* Save *all* the positions. */
+
+ /* rex was changed so update the pointer in PL_reglastparen and PL_reglastcloseparen */
+ PL_reglastparen = &rex->lastparen;
+ PL_reglastcloseparen = &rex->lastcloseparen;
+
REGCP_SET(st->u.eval.lastcp);
PL_reginput = locinput;
do_ifmatch:
ST.me = scan;
ST.logical = logical;
+ logical = 0; /* XXX: reset state of logical once it has been saved into ST */
+
/* execute body of (?...A) */
PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)));
/* NOTREACHED */
SV * const sw = regclass_swash(prog, n, TRUE, 0, (SV**)&av);
if (sw) {
- if (swash_fetch(sw, p, do_utf8))
+ U8 * utf8_p;
+ if (do_utf8) {
+ utf8_p = (U8 *) p;
+ } else {
+ STRLEN len = 1;
+ utf8_p = bytes_to_utf8(p, &len);
+ }
+ if (swash_fetch(sw, utf8_p, 1))
match = TRUE;
else if (flags & ANYOF_FOLD) {
if (!match && lenp && av) {
SV* const sv = *av_fetch(av, i, FALSE);
STRLEN len;
const char * const s = SvPV_const(sv, len);
-
- if (len <= plen && memEQ(s, (char*)p, len)) {
+ if (len <= plen && memEQ(s, (char*)utf8_p, len)) {
*lenp = len;
match = TRUE;
break;
}
if (!match) {
U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
- STRLEN tmplen;
- to_utf8_fold(p, tmpbuf, &tmplen);
- if (swash_fetch(sw, tmpbuf, do_utf8))
+ STRLEN tmplen;
+ to_utf8_fold(utf8_p, tmpbuf, &tmplen);
+ if (swash_fetch(sw, tmpbuf, 1))
match = TRUE;
}
}
+
+ /* If we allocated a string above, free it */
+ if (! do_utf8) Safefree(utf8_p);
}
}
if (match && lenp && *lenp == 0)