- pregexec - match a regexp against a string
*/
I32
-Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
+Perl_pregexec(pTHX_ REGEXP * const prog, char* stringarg, register char *strend,
char *strbeg, I32 minend, SV *screamer, U32 nosave)
/* strend: pointer to null at end of string */
/* strbeg: real beginning of string */
deleted from the finite automaton. */
char *
-Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
- char *strend, U32 flags, re_scream_pos_data *data)
+Perl_re_intuit_start(pTHX_ REGEXP * const prog, SV *sv, char *strpos,
+ char *strend, const U32 flags, re_scream_pos_data *data)
{
dVAR;
register I32 start_shift = 0;
if ((!reginfo || regtry(reginfo, &s))) \
goto got_it
+#define REXEC_FBC_CSCAN(CoNdUtF8,CoNd) \
+ if (do_utf8) { \
+ REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8); \
+ } \
+ else { \
+ REXEC_FBC_CLASS_SCAN(CoNd); \
+ } \
+ break
+
#define REXEC_FBC_CSCAN_PRELOAD(UtFpReLoAd,CoNdUtF8,CoNd) \
if (do_utf8) { \
UtFpReLoAd; \
!isDIGIT_LC_utf8((U8*)s),
!isDIGIT_LC(*s)
);
+ case LNBREAK:
+ REXEC_FBC_CSCAN(
+ is_LNBREAK_utf8(s),
+ is_LNBREAK_latin1(s)
+ );
+ case VERTWS:
+ REXEC_FBC_CSCAN(
+ is_VERTWS_utf8(s),
+ is_VERTWS_latin1(s)
+ );
+ case NVERTWS:
+ REXEC_FBC_CSCAN(
+ !is_VERTWS_utf8(s),
+ !is_VERTWS_latin1(s)
+ );
+ case HORIZWS:
+ REXEC_FBC_CSCAN(
+ is_HORIZWS_utf8(s),
+ is_HORIZWS_latin1(s)
+ );
+ case NHORIZWS:
+ REXEC_FBC_CSCAN(
+ !is_HORIZWS_utf8(s),
+ !is_HORIZWS_latin1(s)
+ );
case AHOCORASICKC:
case AHOCORASICK:
{
- regexec_flags - match a regexp against a string
*/
I32
-Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
+Perl_regexec_flags(pTHX_ REGEXP * const prog, char *stringarg, register char *strend,
char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
/* strend: pointer to null at end of string */
/* strbeg: real beginning of string */
if (regtry(®info, &s))
goto got_it;
after_try:
- if (s >= end)
+ if (s > end)
goto phooey;
if (prog->extflags & RXf_USE_INTUIT) {
s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
return 0;
}
+
+/* free all slabs above current one - called during LEAVE_SCOPE */
+
+STATIC void
+S_clear_backtrack_stack(pTHX_ void *p)
+{
+ regmatch_slab *s = PL_regmatch_slab->next;
+ PERL_UNUSED_ARG(p);
+
+ if (!s)
+ return;
+ PL_regmatch_slab->next = NULL;
+ while (s) {
+ regmatch_slab * const osl = s;
+ s = s->next;
+ Safefree(osl);
+ }
+}
+
+
#define SETREX(Re1,Re2) \
if (PL_reg_eval_set) PM_SETRE((PL_reg_curpm), (Re2)); \
Re1 = (Re2)
regexp *rex = reginfo->prog;
RXi_GET_DECL(rex,rexi);
- regmatch_slab *orig_slab;
- regmatch_state *orig_state;
+ I32 oldsave;
/* the current state. This is a cached copy of PL_regmatch_state */
register regmatch_state *st;
PL_regmatch_state = SLAB_FIRST(PL_regmatch_slab);
}
- /* remember current high-water mark for exit */
- /* XXX this should be done with SAVE* instead */
- orig_slab = PL_regmatch_slab;
- orig_state = PL_regmatch_state;
+ oldsave = PL_savestack_ix;
+ SAVEDESTRUCTOR_X(S_clear_backtrack_stack, NULL);
+ SAVEVPTR(PL_regmatch_slab);
+ SAVEVPTR(PL_regmatch_state);
/* grab next free state slot */
st = ++PL_regmatch_state;
* pack("U0U*", 0xDF) =~ /ss/i,
* the 0xC3 0x9F are the UTF-8
* byte sequence for the U+00DF. */
+
if (!(do_utf8 &&
- toLOWER(s[0]) == 's' &&
+ toLOWER(s[0]) == 's' &&
ln >= 2 &&
toLOWER(s[1]) == 's' &&
(U8)l[0] == 0xC3 &&
re = reg_temp_copy((regexp *)mg->mg_obj); /*XXX:dmq*/
}
else {
- STRLEN len;
- const char * const t = SvPV_const(ret, len);
- PMOP pm;
+ U32 pm_flags = 0;
const I32 osize = PL_regsize;
- Zero(&pm, 1, PMOP);
- if (DO_UTF8(ret)) pm.op_pmdynflags |= PMdf_DYN_UTF8;
- re = CALLREGCOMP((char*)t, (char*)t + len, &pm);
+ if (DO_UTF8(ret)) pm_flags |= RXf_UTF8;
+ re = CALLREGCOMP(ret, pm_flags);
if (!(SvFLAGS(ret)
& (SVs_TEMP | SVs_PADTMP | SVf_READONLY
| SVs_GMG)))
sayNO;
/* NOTREACHED */
#undef ST
+ case FOLDCHAR:
+ n = ARG(scan);
+ if ( n == (U32)what_len_TRICKYFOLD(locinput,do_utf8,ln) ) {
+ locinput += ln;
+ } else if ( 0xDF == n && !do_utf8 && !UTF ) {
+ sayNO;
+ } else {
+ U8 folded[UTF8_MAXBYTES_CASE+1];
+ STRLEN foldlen;
+ const char * const l = locinput;
+ char *e = PL_regeol;
+ to_uni_fold(n, folded, &foldlen);
+
+ if (ibcmp_utf8((const char*) folded, 0, foldlen, 1,
+ l, &e, 0, do_utf8)) {
+ sayNO;
+ }
+ locinput = e;
+ }
+ nextchr = UCHARAT(locinput);
+ break;
+ case LNBREAK:
+ if ((n=is_LNBREAK(locinput,do_utf8))) {
+ locinput += n;
+ nextchr = UCHARAT(locinput);
+ } else
+ sayNO;
+ break;
+
+#define CASE_CLASS(nAmE) \
+ case nAmE: \
+ if ((n=is_##nAmE(locinput,do_utf8))) { \
+ locinput += n; \
+ nextchr = UCHARAT(locinput); \
+ } else \
+ sayNO; \
+ break; \
+ case N##nAmE: \
+ if ((n=is_##nAmE(locinput,do_utf8))) { \
+ sayNO; \
+ } else { \
+ locinput += UTF8SKIP(locinput); \
+ nextchr = UCHARAT(locinput); \
+ } \
+ break
+
+ CASE_CLASS(VERTWS);
+ CASE_CLASS(HORIZWS);
+#undef CASE_CLASS
default:
PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
sv_setsv(sv_mrk, sv_yes_mark);
}
- /* restore original high-water mark */
- PL_regmatch_slab = orig_slab;
- PL_regmatch_state = orig_state;
-
- /* free all slabs above current one */
- if (orig_slab->next) {
- regmatch_slab *sl = orig_slab->next;
- orig_slab->next = NULL;
- while (sl) {
- regmatch_slab * const osl = sl;
- sl = sl->next;
- Safefree(osl);
- }
- }
+ /* clean up; in particular, free all slabs above current one */
+ LEAVE_SCOPE(oldsave);
return result;
}
while (scan < loceol && !isDIGIT(*scan))
scan++;
}
+ case LNBREAK:
+ if (do_utf8) {
+ loceol = PL_regeol;
+ while (hardcount < max && scan < loceol && (c=is_LNBREAK_utf8(scan))) {
+ scan += c;
+ hardcount++;
+ }
+ } else {
+ /*
+ LNBREAK can match two latin chars, which is ok,
+ because we have a null terminated string, but we
+ have to use hardcount in this situation
+ */
+ while (scan < loceol && (c=is_LNBREAK_latin1(scan))) {
+ scan+=c;
+ hardcount++;
+ }
+ }
+ break;
+ case HORIZWS:
+ if (do_utf8) {
+ loceol = PL_regeol;
+ while (hardcount < max && scan < loceol && (c=is_HORIZWS_utf8(scan))) {
+ scan += c;
+ hardcount++;
+ }
+ } else {
+ while (scan < loceol && is_HORIZWS_latin1(scan))
+ scan++;
+ }
+ break;
+ case NHORIZWS:
+ if (do_utf8) {
+ loceol = PL_regeol;
+ while (hardcount < max && scan < loceol && !is_HORIZWS_utf8(scan)) {
+ scan += UTF8SKIP(scan);
+ hardcount++;
+ }
+ } else {
+ while (scan < loceol && !is_HORIZWS_latin1(scan))
+ scan++;
+
+ }
+ break;
+ case VERTWS:
+ if (do_utf8) {
+ loceol = PL_regeol;
+ while (hardcount < max && scan < loceol && (c=is_VERTWS_utf8(scan))) {
+ scan += c;
+ hardcount++;
+ }
+ } else {
+ while (scan < loceol && is_VERTWS_latin1(scan))
+ scan++;
+
+ }
break;
+ case NVERTWS:
+ if (do_utf8) {
+ loceol = PL_regeol;
+ while (hardcount < max && scan < loceol && !is_VERTWS_utf8(scan)) {
+ scan += UTF8SKIP(scan);
+ hardcount++;
+ }
+ } else {
+ while (scan < loceol && !is_VERTWS_latin1(scan))
+ scan++;
+
+ }
+ break;
+
default: /* Called on something of 0 width. */
break; /* So match right here or not at all. */
}