if (pmflags & PMf_ONCE)
sv_catpv(desc, ",ONCE");
if (regex && regex->check_substr) {
- if (!(regex->reganch & ROPT_NOSCAN))
+ if (!(regex->extflags & RXf_NOSCAN))
sv_catpv(desc, ",SCANFIRST");
- if (regex->reganch & ROPT_CHECK_ALL)
+ if (regex->extflags & RXf_CHECK_ALL)
sv_catpv(desc, ",ALL");
}
if (pmflags & PMf_SKIPWHITE)
char *fptr = "msix";
char ch;
- U16 reganch = (U16)((re->reganch & PMf_COMPILETIME) >> 12);
+ U16 match_flags = (U16)((re->extflags & PMf_COMPILETIME) >> 12);
while((ch = *fptr++)) {
- if(reganch & 1) {
+ if(match_flags & 1) {
reflags[left++] = ch;
}
- reganch >>= 1;
+ match_flags >>= 1;
}
pattern = sv_2mortal(newSVpvn(re->precomp,re->prelen));
- if (re->reganch & ROPT_UTF8) SvUTF8_on(pattern);
+ if (re->extflags & RXf_UTF8) SvUTF8_on(pattern);
/* return the pattern and the modifiers */
XPUSHs(pattern);
/* return the pattern in (?msix:..) format */
pattern = sv_2mortal(newSVpvn(mg->mg_ptr,mg->mg_len));
- if (re->reganch & ROPT_UTF8)
+ if (re->extflags & RXf_UTF8)
SvUTF8_on(pattern);
XPUSHs(pattern);
XSRETURN(1);
TAINT_NOT;
sv_setpvn(sv, s, i);
PL_tainted = oldtainted;
- if ( (rx->reganch & ROPT_CANY_SEEN)
+ if ( (rx->extflags & RXf_CANY_SEEN)
? (RX_MATCH_UTF8(rx)
&& (!i || is_utf8_string((U8*)s, i)))
: (RX_MATCH_UTF8(rx)) )
if (curop == repl
&& !(repl_has_vars
&& (!PM_GETRE(pm)
- || PM_GETRE(pm)->reganch & ROPT_EVAL_SEEN))) {
+ || PM_GETRE(pm)->extflags & RXf_EVAL_SEEN))) {
pm->op_pmflags |= PMf_CONST; /* const for long enough */
pm->op_pmpermflags |= PMf_CONST; /* const for long enough */
prepend_elem(o->op_type, scalar(repl), o);
#define PMf_FOLD 0x4000 /* case insensitivity */
#define PMf_EXTENDED 0x8000 /* chuck embedded whitespace */
-/* mask of bits stored in regexp->reganch */
+/* mask of bits stored in regexp->extflags
+ these all are also called RXf_PMf_xyz
+ */
#define PMf_COMPILETIME (PMf_MULTILINE|PMf_SINGLELINE|PMf_LOCALE|PMf_FOLD|PMf_EXTENDED)
#ifdef USE_ITHREADS
s = m;
}
}
- else if (do_utf8 == ((rx->reganch & ROPT_UTF8) != 0) &&
- (rx->reganch & RE_USE_INTUIT) && !rx->nparens
- && (rx->reganch & ROPT_CHECK_ALL)
- && !(rx->reganch & ROPT_ANCH)) {
- const int tail = (rx->reganch & RE_INTUIT_TAIL);
+ else if (do_utf8 == ((rx->extflags & RXf_UTF8) != 0) &&
+ (rx->extflags & RXf_USE_INTUIT) && !rx->nparens
+ && (rx->extflags & RXf_CHECK_ALL)
+ && !(rx->extflags & RXf_ANCH)) {
+ const int tail = (rx->extflags & RXf_INTUIT_TAIL);
SV * const csv = CALLREG_INTUIT_STRING(rx);
len = rx->minlenret;
- if (len == 1 && !(rx->reganch & ROPT_UTF8) && !tail) {
+ if (len == 1 && !(rx->extflags & RXf_UTF8) && !tail) {
const char c = *SvPV_nolen_const(csv);
while (--limit) {
for (m = s; m < strend && *m != c; m++)
if (SvTYPE(TARG) >= SVt_PVMG && SvMAGIC(TARG)) {
MAGIC* const mg = mg_find(TARG, PERL_MAGIC_regex_global);
if (mg && mg->mg_len >= 0) {
- if (!(rx->reganch & ROPT_GPOS_SEEN))
+ if (!(rx->extflags & RXf_GPOS_SEEN))
rx->endp[0] = rx->startp[0] = mg->mg_len;
- else if (rx->reganch & ROPT_ANCH_GPOS) {
+ else if (rx->extflags & RXf_ANCH_GPOS) {
r_flags |= REXEC_IGNOREPOS;
rx->endp[0] = rx->startp[0] = mg->mg_len;
- } else if (rx->reganch & ROPT_GPOS_FLOAT)
+ } else if (rx->extflags & RXf_GPOS_FLOAT)
gpos = mg->mg_len;
else
rx->endp[0] = rx->startp[0] = mg->mg_len;
if (update_minmatch++)
minmatch = had_zerolen;
}
- if (rx->reganch & RE_USE_INTUIT &&
- DO_UTF8(TARG) == ((rx->reganch & ROPT_UTF8) != 0)) {
+ if (rx->extflags & RXf_USE_INTUIT &&
+ DO_UTF8(TARG) == ((rx->extflags & RXf_UTF8) != 0)) {
/* FIXME - can PL_bostr be made const char *? */
PL_bostr = (char *)truebase;
s = CALLREG_INTUIT_START(rx, TARG, (char *)s, (char *)strend, r_flags, NULL);
if (!s)
goto nope;
- if ( (rx->reganch & ROPT_CHECK_ALL)
+ if ( (rx->extflags & RXf_CHECK_ALL)
&& !PL_sawampersand
- && ((rx->reganch & ROPT_NOSCAN)
- || !((rx->reganch & RE_INTUIT_TAIL)
+ && ((rx->extflags & RXf_NOSCAN)
+ || !((rx->extflags & RXf_INTUIT_TAIL)
&& (r_flags & REXEC_SCREAM)))
&& !SvROK(TARG)) /* Cannot trust since INTUIT cannot guess ^ */
goto yup;
r_flags |= REXEC_SCREAM;
orig = m = s;
- if (rx->reganch & RE_USE_INTUIT) {
+ if (rx->extflags & RXf_USE_INTUIT) {
PL_bostr = orig;
s = CALLREG_INTUIT_START(rx, TARG, s, strend, r_flags, NULL);
if (!s)
goto nope;
/* How to do it in subst? */
-/* if ( (rx->reganch & ROPT_CHECK_ALL)
+/* if ( (rx->extflags & RXf_CHECK_ALL)
&& !PL_sawampersand
- && ((rx->reganch & ROPT_NOSCAN)
- || !((rx->reganch & RE_INTUIT_TAIL)
+ && ((rx->extflags & RXf_NOSCAN)
+ || !((rx->extflags & RXf_INTUIT_TAIL)
&& (r_flags & REXEC_SCREAM))))
goto yup;
*/
&& !is_cow
#endif
&& (I32)clen <= rx->minlenret && (once || !(r_flags & REXEC_COPY_STR))
- && !(rx->reganch & ROPT_LOOKBEHIND_SEEN)
+ && !(rx->extflags & RXf_LOOKBEHIND_SEEN)
&& (!doutf8 || SvUTF8(TARG))) {
if (!CALLREGEXEC(rx, s, strend, orig, 0, TARG, NULL,
r_flags | REXEC_CHECKED))
#define SCF_SEEN_ACCEPT 0x8000
#define UTF (RExC_utf8 != 0)
-#define LOC ((RExC_flags & PMf_LOCALE) != 0)
-#define FOLD ((RExC_flags & PMf_FOLD) != 0)
+#define LOC ((RExC_flags & RXf_PMf_LOCALE) != 0)
+#define FOLD ((RExC_flags & RXf_PMf_FOLD) != 0)
#define OOB_UNICODE 12345678
#define OOB_NAMEDCLASS -1
flags &= ~SCF_DO_STCLASS;
}
else if (OP(scan) == GPOS) {
- if (!(RExC_rx->reganch & ROPT_GPOS_FLOAT) &&
+ if (!(RExC_rx->extflags & RXf_GPOS_FLOAT) &&
!(delta || is_inf || (data && data->pos_delta)))
{
- if (!(RExC_rx->reganch & ROPT_ANCH) && (flags & SCF_DO_SUBSTR))
- RExC_rx->reganch |= ROPT_ANCH_GPOS;
+ if (!(RExC_rx->extflags & RXf_ANCH) && (flags & SCF_DO_SUBSTR))
+ RExC_rx->extflags |= RXf_ANCH_GPOS;
if (RExC_rx->gofs < (U32)min)
RExC_rx->gofs = min;
} else {
- RExC_rx->reganch |= ROPT_GPOS_FLOAT;
+ RExC_rx->extflags |= RXf_GPOS_FLOAT;
RExC_rx->gofs = 0;
}
}
r->refcnt = 1;
r->prelen = xend - exp;
r->precomp = savepvn(RExC_precomp, r->prelen);
- r->reganch = pm->op_pmflags & PMf_COMPILETIME;
+ r->extflags = pm->op_pmflags & RXf_PMf_COMPILETIME;
+ r->intflags = 0;
r->nparens = RExC_npar - 1; /* set early to validate backrefs */
if (RExC_seen & REG_SEEN_RECURSE) {
#endif
/* Dig out information for optimizations. */
- r->reganch = pm->op_pmflags & PMf_COMPILETIME; /* Again? */
+ r->extflags = pm->op_pmflags & RXf_PMf_COMPILETIME; /* Again? */
pm->op_pmflags = RExC_flags;
if (UTF)
- r->reganch |= ROPT_UTF8; /* Unicode in it? */
+ r->extflags |= RXf_UTF8; /* Unicode in it? */
r->regstclass = NULL;
if (RExC_naughty >= 10) /* Probably an expensive pattern. */
- r->reganch |= ROPT_NAUGHTY;
+ r->intflags |= PREGf_NAUGHTY;
scan = r->program + 1; /* First BRANCH. */
/* testing for BRANCH here tells us whether there is "must appear"
PL_regkind[OP(first)] == NBOUND)
r->regstclass = first;
else if (PL_regkind[OP(first)] == BOL) {
- r->reganch |= (OP(first) == MBOL
- ? ROPT_ANCH_MBOL
+ r->extflags |= (OP(first) == MBOL
+ ? RXf_ANCH_MBOL
: (OP(first) == SBOL
- ? ROPT_ANCH_SBOL
- : ROPT_ANCH_BOL));
+ ? RXf_ANCH_SBOL
+ : RXf_ANCH_BOL));
first = NEXTOPER(first);
goto again;
}
else if (OP(first) == GPOS) {
- r->reganch |= ROPT_ANCH_GPOS;
+ r->extflags |= RXf_ANCH_GPOS;
first = NEXTOPER(first);
goto again;
}
else if ((!sawopen || !RExC_sawback) &&
(OP(first) == STAR &&
PL_regkind[OP(NEXTOPER(first))] == REG_ANY) &&
- !(r->reganch & ROPT_ANCH) && !(RExC_seen & REG_SEEN_EVAL))
+ !(r->extflags & RXf_ANCH) && !(RExC_seen & REG_SEEN_EVAL))
{
/* turn .* into ^.* with an implied $*=1 */
const int type =
(OP(NEXTOPER(first)) == REG_ANY)
- ? ROPT_ANCH_MBOL
- : ROPT_ANCH_SBOL;
- r->reganch |= type | ROPT_IMPLICIT;
+ ? RXf_ANCH_MBOL
+ : RXf_ANCH_SBOL;
+ r->extflags |= type;
+ r->intflags |= PREGf_IMPLICIT;
first = NEXTOPER(first);
goto again;
}
if (sawplus && (!sawopen || !RExC_sawback)
&& !(RExC_seen & REG_SEEN_EVAL)) /* May examine pos and $& */
/* x+ must match at the 1st pos of run of x's */
- r->reganch |= ROPT_SKIP;
+ r->intflags |= PREGf_SKIP;
/* Scan is after the zeroth branch, first is atomic matcher. */
#ifdef TRIE_STUDY_OPT
if ( RExC_npar == 1 && data.longest == &(data.longest_fixed)
&& data.last_start_min == 0 && data.last_end > 0
&& !RExC_seen_zerolen
- && (!(RExC_seen & REG_SEEN_GPOS) || (r->reganch & ROPT_ANCH_GPOS)))
- r->reganch |= ROPT_CHECK_ALL;
+ && (!(RExC_seen & REG_SEEN_GPOS) || (r->extflags & RXf_ANCH_GPOS)))
+ r->extflags |= RXf_CHECK_ALL;
scan_commit(pRExC_state, &data,&minlen);
SvREFCNT_dec(data.last_found);
if (longest_float_length
|| (data.flags & SF_FL_BEFORE_EOL
&& (!(data.flags & SF_FL_BEFORE_MEOL)
- || (RExC_flags & PMf_MULTILINE))))
+ || (RExC_flags & RXf_PMf_MULTILINE))))
{
I32 t,ml;
t = (data.flags & SF_FL_BEFORE_EOL /* Can't have SEOL and MULTI */
&& (!(data.flags & SF_FL_BEFORE_MEOL)
- || (RExC_flags & PMf_MULTILINE)));
+ || (RExC_flags & RXf_PMf_MULTILINE)));
fbm_compile(data.longest_float, t ? FBMcf_TAIL : 0);
}
else {
if (longest_fixed_length
|| (data.flags & SF_FIX_BEFORE_EOL /* Cannot have SEOL and MULTI */
&& (!(data.flags & SF_FIX_BEFORE_MEOL)
- || (RExC_flags & PMf_MULTILINE))))
+ || (RExC_flags & RXf_PMf_MULTILINE))))
{
I32 t,ml;
t = (data.flags & SF_FIX_BEFORE_EOL /* Can't have SEOL and MULTI */
&& (!(data.flags & SF_FIX_BEFORE_MEOL)
- || (RExC_flags & PMf_MULTILINE)));
+ || (RExC_flags & RXf_PMf_MULTILINE)));
fbm_compile(data.longest_fixed, t ? FBMcf_TAIL : 0);
}
else {
(struct regnode_charclass_class*)RExC_rx->data->data[n],
struct regnode_charclass_class);
r->regstclass = (regnode*)RExC_rx->data->data[n];
- r->reganch &= ~ROPT_SKIP; /* Used in find_byclass(). */
+ r->intflags &= ~PREGf_SKIP; /* Used in find_byclass(). */
DEBUG_COMPILE_r({ SV *sv = sv_newmortal();
regprop(r, sv, (regnode*)data.start_class);
PerlIO_printf(Perl_debug_log,
r->check_substr = r->anchored_substr;
r->check_utf8 = r->anchored_utf8;
r->check_offset_min = r->check_offset_max = r->anchored_offset;
- if (r->reganch & ROPT_ANCH_SINGLE)
- r->reganch |= ROPT_NOSCAN;
+ if (r->extflags & RXf_ANCH_SINGLE)
+ r->extflags |= RXf_NOSCAN;
}
else {
r->check_end_shift = r->float_end_shift;
}
/* XXXX Currently intuiting is not compatible with ANCH_GPOS.
This should be changed ASAP! */
- if ((r->check_substr || r->check_utf8) && !(r->reganch & ROPT_ANCH_GPOS)) {
- r->reganch |= RE_USE_INTUIT;
+ if ((r->check_substr || r->check_utf8) && !(r->extflags & RXf_ANCH_GPOS)) {
+ r->extflags |= RXf_USE_INTUIT;
if (SvTAIL(r->check_substr ? r->check_substr : r->check_utf8))
- r->reganch |= RE_INTUIT_TAIL;
+ r->extflags |= RXf_INTUIT_TAIL;
}
/* XXX Unneeded? dmq (shouldn't as this is handled elsewhere)
if ( (STRLEN)minlen < longest_float_length )
(struct regnode_charclass_class*)RExC_rx->data->data[n],
struct regnode_charclass_class);
r->regstclass = (regnode*)RExC_rx->data->data[n];
- r->reganch &= ~ROPT_SKIP; /* Used in find_byclass(). */
+ r->intflags &= ~PREGf_SKIP; /* Used in find_byclass(). */
DEBUG_COMPILE_r({ SV* sv = sv_newmortal();
regprop(r, sv, (regnode*)data.start_class);
PerlIO_printf(Perl_debug_log,
r->minlen = minlen;
if (RExC_seen & REG_SEEN_GPOS)
- r->reganch |= ROPT_GPOS_SEEN;
+ r->extflags |= RXf_GPOS_SEEN;
if (RExC_seen & REG_SEEN_LOOKBEHIND)
- r->reganch |= ROPT_LOOKBEHIND_SEEN;
+ r->extflags |= RXf_LOOKBEHIND_SEEN;
if (RExC_seen & REG_SEEN_EVAL)
- r->reganch |= ROPT_EVAL_SEEN;
+ r->extflags |= RXf_EVAL_SEEN;
if (RExC_seen & REG_SEEN_CANY)
- r->reganch |= ROPT_CANY_SEEN;
+ r->extflags |= RXf_CANY_SEEN;
if (RExC_seen & REG_SEEN_VERBARG)
- r->reganch |= ROPT_VERBARG_SEEN;
+ r->intflags |= PREGf_VERBARG_SEEN;
if (RExC_seen & REG_SEEN_CUTGROUP)
- r->reganch |= ROPT_CUTGROUP_SEEN;
+ r->intflags |= PREGf_CUTGROUP_SEEN;
if (RExC_paren_names)
r->paren_names = (HV*)SvREFCNT_inc(RExC_paren_names);
else
case '^':
RExC_seen_zerolen++;
nextchar(pRExC_state);
- if (RExC_flags & PMf_MULTILINE)
+ if (RExC_flags & RXf_PMf_MULTILINE)
ret = reg_node(pRExC_state, MBOL);
- else if (RExC_flags & PMf_SINGLELINE)
+ else if (RExC_flags & RXf_PMf_SINGLELINE)
ret = reg_node(pRExC_state, SBOL);
else
ret = reg_node(pRExC_state, BOL);
nextchar(pRExC_state);
if (*RExC_parse)
RExC_seen_zerolen++;
- if (RExC_flags & PMf_MULTILINE)
+ if (RExC_flags & RXf_PMf_MULTILINE)
ret = reg_node(pRExC_state, MEOL);
- else if (RExC_flags & PMf_SINGLELINE)
+ else if (RExC_flags & RXf_PMf_SINGLELINE)
ret = reg_node(pRExC_state, SEOL);
else
ret = reg_node(pRExC_state, EOL);
break;
case '.':
nextchar(pRExC_state);
- if (RExC_flags & PMf_SINGLELINE)
+ if (RExC_flags & RXf_PMf_SINGLELINE)
ret = reg_node(pRExC_state, SANY);
else
ret = reg_node(pRExC_state, REG_ANY);
break;
case '#':
- if (RExC_flags & PMf_EXTENDED) {
+ if (RExC_flags & RXf_PMf_EXTENDED) {
while (RExC_parse < RExC_end && *RExC_parse != '\n')
RExC_parse++;
if (RExC_parse < RExC_end)
{
char * const oldp = p;
- if (RExC_flags & PMf_EXTENDED)
+ if (RExC_flags & RXf_PMf_EXTENDED)
p = regwhite(p, RExC_end);
switch (*p) {
case '^':
ender = *p++;
break;
}
- if (RExC_flags & PMf_EXTENDED)
+ if (RExC_flags & RXf_PMf_EXTENDED)
p = regwhite(p, RExC_end);
if (UTF && FOLD) {
/* Prime the casefolded buffer. */
RExC_parse++;
continue;
}
- if (RExC_flags & PMf_EXTENDED) {
+ if (RExC_flags & RXf_PMf_EXTENDED) {
if (isSPACE(*RExC_parse)) {
RExC_parse++;
continue;
(r->check_substr == r->float_substr
&& r->check_utf8 == r->float_utf8
? "(checking floating" : "(checking anchored"));
- if (r->reganch & ROPT_NOSCAN)
+ if (r->extflags & RXf_NOSCAN)
PerlIO_printf(Perl_debug_log, " noscan");
- if (r->reganch & ROPT_CHECK_ALL)
+ if (r->extflags & RXf_CHECK_ALL)
PerlIO_printf(Perl_debug_log, " isall");
if (r->check_substr || r->check_utf8)
PerlIO_printf(Perl_debug_log, ") ");
regprop(r, sv, r->regstclass);
PerlIO_printf(Perl_debug_log, "stclass %s ", SvPVX_const(sv));
}
- if (r->reganch & ROPT_ANCH) {
+ if (r->extflags & RXf_ANCH) {
PerlIO_printf(Perl_debug_log, "anchored");
- if (r->reganch & ROPT_ANCH_BOL)
+ if (r->extflags & RXf_ANCH_BOL)
PerlIO_printf(Perl_debug_log, "(BOL)");
- if (r->reganch & ROPT_ANCH_MBOL)
+ if (r->extflags & RXf_ANCH_MBOL)
PerlIO_printf(Perl_debug_log, "(MBOL)");
- if (r->reganch & ROPT_ANCH_SBOL)
+ if (r->extflags & RXf_ANCH_SBOL)
PerlIO_printf(Perl_debug_log, "(SBOL)");
- if (r->reganch & ROPT_ANCH_GPOS)
+ if (r->extflags & RXf_ANCH_GPOS)
PerlIO_printf(Perl_debug_log, "(GPOS)");
PerlIO_putc(Perl_debug_log, ' ');
}
- if (r->reganch & ROPT_GPOS_SEEN)
+ if (r->extflags & RXf_GPOS_SEEN)
PerlIO_printf(Perl_debug_log, "GPOS:%"UVuf" ", r->gofs);
- if (r->reganch & ROPT_SKIP)
+ if (r->intflags & PREGf_SKIP)
PerlIO_printf(Perl_debug_log, "plus ");
- if (r->reganch & ROPT_IMPLICIT)
+ if (r->intflags & PREGf_IMPLICIT)
PerlIO_printf(Perl_debug_log, "implicit ");
PerlIO_printf(Perl_debug_log, "minlen %ld ", (long) r->minlen);
- if (r->reganch & ROPT_EVAL_SEEN)
+ if (r->extflags & RXf_EVAL_SEEN)
PerlIO_printf(Perl_debug_log, "with eval ");
PerlIO_printf(Perl_debug_log, "\n");
#else
reginitcolors();
{
SV *dsv= sv_newmortal();
- RE_PV_QUOTED_DECL(s, (r->reganch & ROPT_UTF8),
+ RE_PV_QUOTED_DECL(s, (r->extflags & RXf_UTF8),
dsv, r->precomp, r->prelen, 60);
PerlIO_printf(Perl_debug_log,"%sFreeing REx:%s %s\n",
PL_colors[4],PL_colors[5],s);
for (i = 0; i < count; i++) {
d->what[i] = r->data->what[i];
switch (d->what[i]) {
- /* legal options are one of: sSfpont
+ /* legal options are one of: sSfpontT
see also regcomp.h and pregfree() */
case 's':
case 'S':
ret->regstclass = (regnode*)d->data[i];
break;
case 'o':
- /* Compiled op trees are readonly, and can thus be
- shared without duplication. */
+ /* Compiled op trees are readonly and in shared memory,
+ and can thus be shared without duplication. */
OP_REFCNT_LOCK;
d->data[i] = (void*)OpREFCNT_inc((OP*)r->data->data[i]);
OP_REFCNT_UNLOCK;
ret->nparens = r->nparens;
ret->lastparen = r->lastparen;
ret->lastcloseparen = r->lastcloseparen;
- ret->reganch = r->reganch;
+ ret->intflags = r->intflags;
+ ret->extflags = r->extflags;
ret->sublen = r->sublen;
int left = 0;
int right = 4;
bool need_newline = 0;
- U16 reganch = (U16)((re->reganch & PMf_COMPILETIME) >> 12);
+ U16 reganch = (U16)((re->extflags & RXf_PMf_COMPILETIME) >> 12);
while((ch = *fptr++)) {
if(reganch & 1) {
* ourself. If we find a '\n' first (or if we don't find '#' or '\n'),
* we don't need to add anything. -jfriedl
*/
- if (PMf_EXTENDED & re->reganch) {
+ if (PMf_EXTENDED & re->extflags) {
const char *endptr = re->precomp + re->prelen;
while (endptr >= re->precomp) {
const char c = *(endptr--);
if (haseval)
*haseval = re->program[0].next_off;
if (flags)
- *flags = ((re->reganch & ROPT_UTF8) ? 1 : 0);
+ *flags = ((re->extflags & RXf_UTF8) ? 1 : 0);
if (lp)
*lp = mg->mg_len;
I32 ml_anch;
register char *other_last = NULL; /* other substr checked before this */
char *check_at = NULL; /* check substr found at this pos */
- const I32 multiline = prog->reganch & PMf_MULTILINE;
+ const I32 multiline = prog->extflags & RXf_PMf_MULTILINE;
#ifdef DEBUGGING
const char * const i_strpos = strpos;
#endif
RX_MATCH_UTF8_set(prog,do_utf8);
- if (prog->reganch & ROPT_UTF8) {
+ if (prog->extflags & RXf_UTF8) {
PL_reg_flags |= RF_utf8;
}
DEBUG_EXECUTE_r(
"Non-utf8 string cannot match utf8 check string\n"));
goto fail;
}
- if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
- ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
- || ( (prog->reganch & ROPT_ANCH_BOL)
+ if (prog->extflags & RXf_ANCH) { /* Match at beg-of-str or after \n */
+ ml_anch = !( (prog->extflags & RXf_ANCH_SINGLE)
+ || ( (prog->extflags & RXf_ANCH_BOL)
&& !multiline ) ); /* Check after \n? */
if (!ml_anch) {
- if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
- | ROPT_IMPLICIT)) /* not a real BOL */
+ if ( !(prog->extflags & RXf_ANCH_GPOS) /* Checked by the caller */
+ && !(prog->intflags & PREGf_IMPLICIT) /* not a real BOL */
/* SvCUR is not set on references: SvRV and SvPVX_const overlap */
&& sv && !SvROK(sv)
&& (strpos != strbeg)) {
goto fail;
}
if (prog->check_offset_min == prog->check_offset_max &&
- !(prog->reganch & ROPT_CANY_SEEN)) {
+ !(prog->extflags & RXf_CANY_SEEN)) {
/* Substring at constant offset from beg-of-str... */
I32 slen;
else {
U8* start_point;
U8* end_point;
- if (prog->reganch & ROPT_CANY_SEEN) {
+ if (prog->extflags & RXf_CANY_SEEN) {
start_point= (U8*)(s + srch_start_shift);
end_point= (U8*)(strend - srch_end_shift);
} else {
if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
&& (strpos != strbeg) && strpos[-1] != '\n'
/* May be due to an implicit anchor of m{.*foo} */
- && !(prog->reganch & ROPT_IMPLICIT))
+ && !(prog->intflags & PREGf_IMPLICIT))
{
t = strpos;
goto find_anchor;
(long)(strpos - i_strpos), PL_colors[0], PL_colors[1]);
);
success_at_start:
- if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
+ if (!(prog->intflags & PREGf_NAUGHTY) /* XXXX If strpos moved? */
&& (do_utf8 ? (
prog->check_utf8 /* Could be deleted already */
&& --BmUSEFUL(prog->check_utf8) < 0
/* XXXX This is a remnant of the old implementation. It
looks wasteful, since now INTUIT can use many
other heuristics. */
- prog->reganch &= ~RE_USE_INTUIT;
+ prog->extflags &= ~RXf_USE_INTUIT;
}
else
s = strpos;
}
DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
"This position contradicts STCLASS...\n") );
- if ((prog->reganch & ROPT_ANCH) && !ml_anch)
+ if ((prog->extflags & RXf_ANCH) && !ml_anch)
goto fail;
/* Contradict one of substrings */
if (prog->anchored_substr || prog->anchored_utf8) {
const char *strend, regmatch_info *reginfo)
{
dVAR;
- const I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
+ const I32 doevery = (prog->intflags & PREGf_SKIP) == 0;
char *m;
STRLEN ln;
STRLEN lnc;
return 0;
}
- multiline = prog->reganch & PMf_MULTILINE;
+ multiline = prog->extflags & RXf_PMf_MULTILINE;
reginfo.prog = prog;
RX_MATCH_UTF8_set(prog, do_utf8);
PL_reg_eval_set = 0;
PL_reg_maxiter = 0;
- if (prog->reganch & ROPT_UTF8)
+ if (prog->extflags & RXf_UTF8)
PL_reg_flags |= RF_utf8;
/* Mark beginning of line for ^ and lookbehind. */
/* If there is a "must appear" string, look for it. */
s = startpos;
- if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to set reginfo->ganch */
+ if (prog->extflags & RXf_GPOS_SEEN) { /* Need to set reginfo->ganch */
MAGIC *mg;
if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
&& (mg = mg_find(sv, PERL_MAGIC_regex_global))
&& mg->mg_len >= 0) {
reginfo.ganch = strbeg + mg->mg_len; /* Defined pos() */
- if (prog->reganch & ROPT_ANCH_GPOS) {
+ if (prog->extflags & RXf_ANCH_GPOS) {
if (s > reginfo.ganch)
goto phooey;
s = reginfo.ganch - prog->gofs;
/* Simplest case: anchored match need be tried only once. */
/* [unless only anchor is BOL and multiline is set] */
- if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
+ if (prog->extflags & (RXf_ANCH & ~RXf_ANCH_GPOS)) {
if (s == startpos && regtry(®info, &startpos))
goto got_it;
- else if (multiline || (prog->reganch & ROPT_IMPLICIT)
- || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
+ else if (multiline || (prog->intflags & PREGf_IMPLICIT)
+ || (prog->extflags & RXf_ANCH_MBOL)) /* XXXX SBOL? */
{
char *end;
after_try:
if (s >= end)
goto phooey;
- if (prog->reganch & RE_USE_INTUIT) {
+ if (prog->extflags & RXf_USE_INTUIT) {
s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
if (!s)
goto phooey;
}
}
goto phooey;
- } else if (ROPT_GPOS_CHECK == (prog->reganch & ROPT_GPOS_CHECK))
+ } else if (RXf_GPOS_CHECK == (prog->extflags & RXf_GPOS_CHECK))
{
/* the warning about reginfo.ganch being used without intialization
- is bogus -- we set it above, when prog->reganch & ROPT_GPOS_SEEN
+ is bogus -- we set it above, when prog->extflags & RXf_GPOS_SEEN
and we only enter this block when the same bit is set. */
char *tmp_s = reginfo.ganch - prog->gofs;
if (regtry(®info, &tmp_s))
}
/* Messy cases: unanchored match. */
- if ((prog->anchored_substr || prog->anchored_utf8) && prog->reganch & ROPT_SKIP) {
+ if ((prog->anchored_substr || prog->anchored_utf8) && prog->intflags & PREGf_SKIP) {
/* we have /x+whatever/ */
/* it must be a one character string (XXXX Except UTF?) */
char ch;
GET_RE_DEBUG_FLAGS_DECL;
reginfo->cutpoint=NULL;
- if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
+ if ((prog->extflags & RXf_EVAL_SEEN) && !PL_reg_eval_set) {
MAGIC *mg;
PL_reg_eval_set = RS_init;
S_debug_start_match(pTHX_ const regexp *prog, const bool do_utf8,
const char *start, const char *end, const char *blurb)
{
- const bool utf8_pat= prog->reganch & ROPT_UTF8 ? 1 : 0;
+ const bool utf8_pat= prog->extflags & RXf_UTF8 ? 1 : 0;
if (!PL_colorset)
reginitcolors();
{
PL_reg_maxiter = 0;
ST.toggle_reg_flags = PL_reg_flags;
- if (re->reganch & ROPT_UTF8)
+ if (re->extflags & RXf_UTF8)
PL_reg_flags |= RF_utf8;
else
PL_reg_flags &= ~RF_utf8;
result = 0;
final_exit:
- if (rex->reganch & ROPT_VERBARG_SEEN) {
+ if (rex->intflags & PREGf_VERBARG_SEEN) {
SV *sv_err = get_sv("REGERROR", 1);
SV *sv_mrk = get_sv("REGMARK", 1);
if (result) {
struct reg_data;
struct regexp_engine;
+
typedef struct regexp_paren_ofs {
I32 *startp;
I32 *endp;
} regexp_paren_ofs;
-typedef struct regexp {
- I32 *startp;
- I32 *endp;
- regexp_paren_ofs *swap;
- regnode *regstclass;
- struct reg_substr_data *substrs;
- char *precomp; /* pre-compilation regular expression */
- struct reg_data *data; /* Additional data. */
- char *subbeg; /* saved or original string
- so \digit works forever. */
#ifdef PERL_OLD_COPY_ON_WRITE
- SV *saved_copy; /* If non-NULL, SV which is COW from original */
+#define SV_SAVED_COPY SV *saved_copy; /* If non-NULL, SV which is COW from original */
+#else
+#define SV_SAVED_COPY
#endif
- U32 *offsets; /* offset annotations 20001228 MJD */
- I32 sublen; /* Length of string pointed by subbeg */
- I32 refcnt;
+
+typedef struct regexp {
+ /* Generic details */
+ const struct regexp_engine* engine; /* what created this regexp? */
+ I32 refcnt; /* Refcount of this regexp */
+
+ /* The original string as passed to the compilation routine */
+ char *precomp; /* pre-compilation regular expression */
+ I32 prelen; /* length of precomp */
+
+ /* Used for generic optimisations by the perl core.
+ All engines are expected to provide this information. */
+ U32 extflags; /* Flags used both externally and internally */
I32 minlen; /* mininum possible length of string to match */
I32 minlenret; /* mininum possible length of $& */
U32 gofs; /* chars left of pos that we search from */
- I32 prelen; /* length of precomp */
- U32 nparens; /* number of parentheses */
- U32 lastparen; /* last paren matched */
- U32 lastcloseparen; /* last paren matched */
- U32 reganch; /* Internal use only +
- Tainted information used by regexec? */
- HV *paren_names; /* Paren names */
- const struct regexp_engine* engine;
+ U32 nparens; /* number of capture buffers */
+ HV *paren_names; /* Optional hash of paren names */
+ struct reg_substr_data *substrs; /* substring data about strings that must appear
+ in the final match, used for optimisations */
+
+ /* Data about the last/current match. Used by the core and therefore
+ must be populated by all engines. */
+ char *subbeg; /* saved or original string
+ so \digit works forever. */
+ I32 sublen; /* Length of string pointed by subbeg */
+ I32 *startp; /* Array of offsets from start of string (@-) */
+ I32 *endp; /* Array of offsets from start of string (@+) */
+
+ SV_SAVED_COPY /* If non-NULL, SV which is COW from original */
+ U32 lastparen; /* last open paren matched */
+ U32 lastcloseparen; /* last close paren matched */
+
+ /* Perl Regex Engine specific data. Other engines shouldn't need
+ to touch this. Should be refactored out into a different structure
+ and accessed via the *pprivate field. (except intflags) */
+ U32 intflags; /* Internal flags */
+ void *pprivate; /* Data private to the regex engine which
+ created this object. Perl will never mess with
+ this member at all. */
+ regexp_paren_ofs *swap; /* Swap copy of *startp / *endp */
+ U32 *offsets; /* offset annotations 20001228 MJD
+ data about mapping the program to the
+ string*/
+ regnode *regstclass; /* Optional startclass as identified or constructed
+ by the optimiser */
+ struct reg_data *data; /* Additional miscellaneous data used by the program.
+ Used to make it easier to clone and free arbitrary
+ data that the regops need. Often the ARG field of
+ a regop is an index into this structure */
regnode program[1]; /* Unwarranted chumminess with compiler. */
} regexp;
#endif
} regexp_engine;
-#define ROPT_ANCH (ROPT_ANCH_BOL|ROPT_ANCH_MBOL|ROPT_ANCH_GPOS|ROPT_ANCH_SBOL)
-#define ROPT_ANCH_SINGLE (ROPT_ANCH_SBOL|ROPT_ANCH_GPOS)
-#define ROPT_ANCH_BOL 0x00000001
-#define ROPT_ANCH_MBOL 0x00000002
-#define ROPT_ANCH_SBOL 0x00000004
-#define ROPT_ANCH_GPOS 0x00000008
-#define ROPT_SKIP 0x00000010
-#define ROPT_IMPLICIT 0x00000020 /* Converted .* to ^.* */
-#define ROPT_NOSCAN 0x00000040 /* Check-string always at start. */
-#define ROPT_GPOS_SEEN 0x00000080
-#define ROPT_CHECK_ALL 0x00000100
-#define ROPT_LOOKBEHIND_SEEN 0x00000200
-#define ROPT_EVAL_SEEN 0x00000400
-#define ROPT_CANY_SEEN 0x00000800
-#define ROPT_SANY_SEEN ROPT_CANY_SEEN /* src bckwrd cmpt */
-#define ROPT_GPOS_CHECK (ROPT_GPOS_SEEN|ROPT_ANCH_GPOS)
-
-/* 0xF800 of reganch is used by PMf_COMPILETIME */
-
-#define ROPT_UTF8 0x00010000
-#define ROPT_NAUGHTY 0x00020000 /* how exponential is this pattern? */
-#define ROPT_COPY_DONE 0x00040000 /* subbeg is a copy of the string */
-#define ROPT_TAINTED_SEEN 0x00080000
-#define ROPT_MATCH_UTF8 0x10000000 /* subbeg is utf-8 */
-#define ROPT_VERBARG_SEEN 0x20000000
-#define ROPT_CUTGROUP_SEEN 0x40000000
-#define ROPT_GPOS_FLOAT 0x80000000
-
-#define RE_USE_INTUIT_NOML 0x00100000 /* Best to intuit before matching */
-#define RE_USE_INTUIT_ML 0x00200000
-#define REINT_AUTORITATIVE_NOML 0x00400000 /* Can trust a positive answer */
-#define REINT_AUTORITATIVE_ML 0x00800000
-#define REINT_ONCE_NOML 0x01000000 /* Intuit can succed once only. */
-#define REINT_ONCE_ML 0x02000000
-#define RE_INTUIT_ONECHAR 0x04000000
-#define RE_INTUIT_TAIL 0x08000000
-
-
-#define RE_USE_INTUIT (RE_USE_INTUIT_NOML|RE_USE_INTUIT_ML)
-#define REINT_AUTORITATIVE (REINT_AUTORITATIVE_NOML|REINT_AUTORITATIVE_ML)
-#define REINT_ONCE (REINT_ONCE_NOML|REINT_ONCE_ML)
-
-#define RX_HAS_CUTGROUP(prog) ((prog)->reganch & ROPT_CUTGROUP_SEEN)
-#define RX_MATCH_TAINTED(prog) ((prog)->reganch & ROPT_TAINTED_SEEN)
-#define RX_MATCH_TAINTED_on(prog) ((prog)->reganch |= ROPT_TAINTED_SEEN)
-#define RX_MATCH_TAINTED_off(prog) ((prog)->reganch &= ~ROPT_TAINTED_SEEN)
+/*
+ * Flags stored in regexp->intflags
+ * These are used only internally to the regexp engine
+ */
+#define PREGf_SKIP 0x00000001
+#define PREGf_IMPLICIT 0x00000002 /* Converted .* to ^.* */
+#define PREGf_NAUGHTY 0x00000004 /* how exponential is this pattern? */
+#define PREGf_VERBARG_SEEN 0x00000008
+#define PREGf_CUTGROUP_SEEN 0x00000010
+
+
+/* Flags stored in regexp->extflags
+ * These are used by code external to the regexp engine
+ */
+
+/* Anchor and GPOS related stuff */
+#define RXf_ANCH_BOL 0x00000001
+#define RXf_ANCH_MBOL 0x00000002
+#define RXf_ANCH_SBOL 0x00000004
+#define RXf_ANCH_GPOS 0x00000008
+#define RXf_GPOS_SEEN 0x00000010
+#define RXf_GPOS_FLOAT 0x00000020
+/* five bits here */
+#define RXf_ANCH (RXf_ANCH_BOL|RXf_ANCH_MBOL|RXf_ANCH_GPOS|RXf_ANCH_SBOL)
+#define RXf_GPOS_CHECK (RXf_GPOS_SEEN|RXf_ANCH_GPOS)
+#define RXf_ANCH_SINGLE (RXf_ANCH_SBOL|RXf_ANCH_GPOS)
+/*
+ * 0xF800 of extflags is used by PMf_COMPILETIME
+ * These are the regex equivelent of the PMf_xyz stuff defined
+ * in op.h
+ */
+#define RXf_PMf_LOCALE 0x00000800
+#define RXf_PMf_MULTILINE 0x00001000
+#define RXf_PMf_SINGLELINE 0x00002000
+#define RXf_PMf_FOLD 0x00004000
+#define RXf_PMf_EXTENDED 0x00008000
+#define RXf_PMf_COMPILETIME (RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_LOCALE|RXf_PMf_FOLD|RXf_PMf_EXTENDED)
+
+/* What we have seen */
+/* one bit here */
+#define RXf_LOOKBEHIND_SEEN 0x00020000
+#define RXf_EVAL_SEEN 0x00040000
+#define RXf_CANY_SEEN 0x00080000
+
+/* Special */
+#define RXf_NOSCAN 0x00100000
+#define RXf_CHECK_ALL 0x00200000
+
+/* UTF8 related */
+#define RXf_UTF8 0x00400000
+#define RXf_MATCH_UTF8 0x00800000
+
+/* Intuit related */
+#define RXf_USE_INTUIT_NOML 0x01000000
+#define RXf_USE_INTUIT_ML 0x02000000
+#define RXf_INTUIT_TAIL 0x04000000
+/* one bit here */
+#define RXf_USE_INTUIT (RXf_USE_INTUIT_NOML|RXf_USE_INTUIT_ML)
+
+/* Copy and tainted info */
+#define RXf_COPY_DONE 0x10000000
+#define RXf_TAINTED_SEEN 0x20000000
+/* two bits here */
+
+
+#define RX_HAS_CUTGROUP(prog) ((prog)->intflags & PREGf_CUTGROUP_SEEN)
+#define RX_MATCH_TAINTED(prog) ((prog)->extflags & RXf_TAINTED_SEEN)
+#define RX_MATCH_TAINTED_on(prog) ((prog)->extflags |= RXf_TAINTED_SEEN)
+#define RX_MATCH_TAINTED_off(prog) ((prog)->extflags &= ~RXf_TAINTED_SEEN)
#define RX_MATCH_TAINTED_set(prog, t) ((t) \
? RX_MATCH_TAINTED_on(prog) \
: RX_MATCH_TAINTED_off(prog))
-#define RX_MATCH_COPIED(prog) ((prog)->reganch & ROPT_COPY_DONE)
-#define RX_MATCH_COPIED_on(prog) ((prog)->reganch |= ROPT_COPY_DONE)
-#define RX_MATCH_COPIED_off(prog) ((prog)->reganch &= ~ROPT_COPY_DONE)
+#define RX_MATCH_COPIED(prog) ((prog)->extflags & RXf_COPY_DONE)
+#define RX_MATCH_COPIED_on(prog) ((prog)->extflags |= RXf_COPY_DONE)
+#define RX_MATCH_COPIED_off(prog) ((prog)->extflags &= ~RXf_COPY_DONE)
#define RX_MATCH_COPIED_set(prog,t) ((t) \
? RX_MATCH_COPIED_on(prog) \
: RX_MATCH_COPIED_off(prog))
}} STMT_END
#endif
-#define RX_MATCH_UTF8(prog) ((prog)->reganch & ROPT_MATCH_UTF8)
-#define RX_MATCH_UTF8_on(prog) ((prog)->reganch |= ROPT_MATCH_UTF8)
-#define RX_MATCH_UTF8_off(prog) ((prog)->reganch &= ~ROPT_MATCH_UTF8)
+#define RX_MATCH_UTF8(prog) ((prog)->extflags & RXf_MATCH_UTF8)
+#define RX_MATCH_UTF8_on(prog) ((prog)->extflags |= RXf_MATCH_UTF8)
+#define RX_MATCH_UTF8_off(prog) ((prog)->extflags &= ~RXf_MATCH_UTF8)
#define RX_MATCH_UTF8_set(prog, t) ((t) \
? (RX_MATCH_UTF8_on(prog), (PL_reg_match_utf8 = 1)) \
: (RX_MATCH_UTF8_off(prog), (PL_reg_match_utf8 = 0)))