*/
#ifdef PERL_EXT_RE_BUILD
-/* need to replace pregcomp et al, so enable that */
-# ifndef PERL_IN_XSUB_RE
-# define PERL_IN_XSUB_RE
-# endif
-/* need access to debugger hooks */
-# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
-# define DEBUGGING
-# endif
-#endif
-
-#ifdef PERL_IN_XSUB_RE
-/* We *really* need to overwrite these symbols: */
-# define Perl_pregcomp my_regcomp
-# define Perl_regdump my_regdump
-# define Perl_regprop my_regprop
-# define Perl_pregfree my_regfree
-# define Perl_re_intuit_string my_re_intuit_string
-/* *These* symbols are masked to allow static link. */
-# define Perl_regnext my_regnext
-# define Perl_save_re_context my_save_re_context
-# define Perl_reginitcolors my_reginitcolors
-
-# define PERL_NO_GET_CONTEXT
+#include "re_top.h"
#endif
/*
#endif
#define REG_COMP_C
-#include "regcomp.h"
+#ifdef PERL_IN_XSUB_RE
+# include "re_comp.h"
+#else
+# include "regcomp.h"
+#endif
#ifdef op
#undef op
floating substrings if needed. */
STATIC void
-S_scan_commit(pTHX_ RExC_state_t *pRExC_state, scan_data_t *data)
+S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, scan_data_t *data)
{
const STRLEN l = CHR_SVLEN(data->last_found);
const STRLEN old_l = CHR_SVLEN(*data->longest);
SvCUR_set(data->last_found, 0);
{
SV * const sv = data->last_found;
- MAGIC * const mg =
- SvUTF8(sv) && SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_utf8) : NULL;
- if (mg)
- mg->mg_len = 0;
+ if (SvUTF8(sv) && SvMAGICAL(sv)) {
+ MAGIC * const mg = mg_find(sv, PERL_MAGIC_utf8);
+ if (mg)
+ mg->mg_len = 0;
+ }
}
data->last_end = -1;
data->flags &= ~SF_BEFORE_EOL;
/* Can match anything (initialization) */
STATIC void
-S_cl_anything(RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
+S_cl_anything(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
{
ANYOF_CLASS_ZERO(cl);
ANYOF_BITMAP_SETALL(cl);
/* Can match anything (initialization) */
STATIC void
-S_cl_init(RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
+S_cl_init(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
{
Zero(cl, 1, struct regnode_charclass_class);
cl->type = ANYOF;
}
STATIC void
-S_cl_init_zero(RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
+S_cl_init_zero(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
{
Zero(cl, 1, struct regnode_charclass_class);
cl->type = ANYOF;
/* 'OR' a given class with another one. Can create false positives */
/* We assume that cl is not inverted */
STATIC void
-S_cl_or(RExC_state_t *pRExC_state, struct regnode_charclass_class *cl, const struct regnode_charclass_class *or_with)
+S_cl_or(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl, const struct regnode_charclass_class *or_with)
{
if (or_with->flags & ANYOF_INVERT) {
/* We do not use
});
} else {
- /*EMPTY*/; /* It's a dupe. So ignore it. */
+ NOOP; /* It's a dupe. So ignore it. */
}
} /* end second pass */
});
} else {
- /*EMPTY*/; /* Its a dupe. So ignore it. */
+ NOOP; /* Its a dupe. So ignore it. */
}
} /* end second pass */
/* Peephole optimizer: */
DEBUG_OPTIMISE_r({
SV * const mysv=sv_newmortal();
- regprop( mysv, scan);
+ regprop(RExC_rx, mysv, scan);
PerlIO_printf(Perl_debug_log, "%*speep: %s (0x%08"UVXf")\n",
(int)depth*2, "", SvPV_nolen_const(mysv), PTR2UV(scan));
});
}
DEBUG_OPTIMISE_r({
- regprop( mysv, tail );
+ regprop(RExC_rx, mysv, tail );
PerlIO_printf( Perl_debug_log, "%*s%s%s%s\n",
(int)depth * 2 + 2, "", "Tail node is:", SvPV_nolen_const( mysv ),
(RExC_seen_evals) ? "[EVAL]" : ""
regnode * const noper_next = regnext( noper );
DEBUG_OPTIMISE_r({
- regprop( mysv, cur);
+ regprop(RExC_rx, mysv, cur);
PerlIO_printf( Perl_debug_log, "%*s%s",
(int)depth * 2 + 2," ", SvPV_nolen_const( mysv ) );
- regprop( mysv, noper);
+ regprop(RExC_rx, mysv, noper);
PerlIO_printf( Perl_debug_log, " -> %s",
SvPV_nolen_const(mysv));
if ( noper_next ) {
- regprop( mysv, noper_next );
+ regprop(RExC_rx, mysv, noper_next );
PerlIO_printf( Perl_debug_log,"\t=> %s\t",
SvPV_nolen_const(mysv));
}
PerlIO_printf( Perl_debug_log, "0x%p,0x%p,0x%p)\n",
- first, last, cur );
+ (void*)first, (void*)last, (void*)cur );
});
if ( ( first ? OP( noper ) == optype
: PL_regkind[ (U8)OP( noper ) ] == EXACT )
} else {
DEBUG_OPTIMISE_r(
if (!last ) {
- regprop( mysv, first);
+ regprop(RExC_rx, mysv, first);
PerlIO_printf( Perl_debug_log, "%*s%s",
(int)depth * 2 + 2, "F:", SvPV_nolen_const( mysv ) );
- regprop( mysv, NEXTOPER(first) );
+ regprop(RExC_rx, mysv, NEXTOPER(first) );
PerlIO_printf( Perl_debug_log, " -> %s\n",
SvPV_nolen_const( mysv ) );
}
);
last = cur;
DEBUG_OPTIMISE_r({
- regprop( mysv, cur);
+ regprop(RExC_rx, mysv, cur);
PerlIO_printf( Perl_debug_log, "%*s%s",
(int)depth * 2 + 2, "N:", SvPV_nolen_const( mysv ) );
- regprop( mysv, noper );
+ regprop(RExC_rx, mysv, noper );
PerlIO_printf( Perl_debug_log, " -> %s\n",
SvPV_nolen_const( mysv ) );
});
}
}
DEBUG_OPTIMISE_r({
- regprop( mysv, cur);
+ regprop(RExC_rx, mysv, cur);
PerlIO_printf( Perl_debug_log,
"%*s%s\t(0x%p,0x%p,0x%p)\n", (int)depth * 2 + 2,
- " ", SvPV_nolen_const( mysv ), first, last, cur);
+ " ", SvPV_nolen_const( mysv ), (void*)first, (void*)last, (void*)cur);
});
if ( last ) {
UV uc = *((U8*)STRING(scan));
/* Search for fixed substrings supports EXACT only. */
- if (flags & SCF_DO_SUBSTR)
+ if (flags & SCF_DO_SUBSTR) {
+ assert(data);
scan_commit(pRExC_state, data);
+ }
if (UTF) {
const U8 * const s = (U8 *)STRING(scan);
l = utf8_length(s, s + l);
uc = utf8_to_uvchr(s, NULL);
}
min += l;
- if (data && (flags & SCF_DO_SUBSTR))
+ if (flags & SCF_DO_SUBSTR)
data->pos_min += l;
if (flags & SCF_DO_STCLASS_AND) {
/* Check whether it is compatible with what we know already! */
return RExC_rx->data->count - n;
}
+#ifndef PERL_IN_XSUB_RE
void
Perl_reginitcolors(pTHX)
{
}
PL_colorset = 1;
}
-
+#endif
/*
- pregcomp - compile a regular expression into internal code
again:
if (PL_regkind[(U8)OP(first)] == EXACT) {
if (OP(first) == EXACT)
- /*EMPTY*/; /* Empty, get anchored substr later. */
+ NOOP; /* Empty, get anchored substr later. */
else if ((OP(first) == EXACTF || OP(first) == EXACTFL))
r->regstclass = first;
}
struct regnode_charclass_class);
r->regstclass = (regnode*)RExC_rx->data->data[n];
r->reganch &= ~ROPT_SKIP; /* Used in find_byclass(). */
- PL_regdata = r->data; /* for regprop() */
DEBUG_COMPILE_r({ SV *sv = sv_newmortal();
- regprop(sv, (regnode*)data.start_class);
+ regprop(r, sv, (regnode*)data.start_class);
PerlIO_printf(Perl_debug_log,
"synthetic stclass \"%s\".\n",
SvPVX_const(sv));});
r->regstclass = (regnode*)RExC_rx->data->data[n];
r->reganch &= ~ROPT_SKIP; /* Used in find_byclass(). */
DEBUG_COMPILE_r({ SV* sv = sv_newmortal();
- regprop(sv, (regnode*)data.start_class);
+ regprop(r, sv, (regnode*)data.start_class);
PerlIO_printf(Perl_debug_log,
"synthetic stclass \"%s\".\n",
SvPVX_const(sv));});
r->reganch |= ROPT_CANY_SEEN;
Newxz(r->startp, RExC_npar, I32);
Newxz(r->endp, RExC_npar, I32);
- PL_regdata = r->data; /* for regprop() */
DEBUG_COMPILE_r(regdump(r));
return(r);
}
FAIL("Eval-group not allowed at runtime, use re 'eval'");
if (PL_tainting && PL_tainted)
FAIL("Eval-group in insecure regular expression");
+#if PERL_VERSION > 8
if (IN_PERL_COMPILETIME)
PL_cv_has_eval = 1;
+#endif
}
nextchar(pRExC_state);
/* I smell either [: or [= or [. -- POSIX has been here, right? */
POSIXCC(UCHARAT(RExC_parse))) {
const char c = UCHARAT(RExC_parse);
- char* s = RExC_parse++;
+ char* const s = RExC_parse++;
while (RExC_parse < RExC_end && UCHARAT(RExC_parse) != c)
RExC_parse++;
ANYOF_FLAGS(ret) |= ANYOF_INVERT;
}
- if (SIZE_ONLY)
+ if (SIZE_ONLY) {
RExC_size += ANYOF_SKIP;
+ listsv = &PL_sv_undef; /* For code scanners: listsv always non-NULL. */
+ }
else {
RExC_emit += ANYOF_SKIP;
if (FOLD)
n--;
}
}
- if (value == 'p')
- Perl_sv_catpvf(aTHX_ listsv,
- "+utf8::%.*s\n", (int)n, RExC_parse);
- else
- Perl_sv_catpvf(aTHX_ listsv,
- "!utf8::%.*s\n", (int)n, RExC_parse);
+ Perl_sv_catpvf(aTHX_ listsv, "%cutf8::%.*s\n",
+ (value=='p' ? '+' : '!'), (int)n, RExC_parse);
}
RExC_parse = e + 1;
ANYOF_FLAGS(ret) |= ANYOF_UNICODE;
if (range) {
if (!SIZE_ONLY) {
if (ckWARN(WARN_REGEXP)) {
- int w =
+ const int w =
RExC_parse >= rangebegin ?
RExC_parse - rangebegin : 0;
vWARN4(RExC_parse,
"False [] range \"%*.*s\"",
- w,
- w,
- rangebegin);
+ w, w, rangebegin);
}
if (prevvalue < 256) {
ANYOF_BITMAP_SET(ret, prevvalue);
RExC_parse - rangebegin : 0;
vWARN4(RExC_parse,
"False [] range \"%*.*s\"",
- w,
- w,
- rangebegin);
+ w, w, rangebegin);
}
if (!SIZE_ONLY)
ANYOF_BITMAP_SET(ret, '-');
}
if (!SIZE_ONLY) {
- AV *av = newAV();
+ AV * const av = newAV();
SV *rv;
/* The 0th element stores the character class description
STATIC char*
S_nextchar(pTHX_ RExC_state_t *pRExC_state)
{
- char* retval = RExC_parse++;
+ char* const retval = RExC_parse++;
for (;;) {
if (*RExC_parse == '(' && RExC_parse[1] == '?' &&
/*
- regtail - set the next-pointer at the end of a node chain of p to val.
*/
+/* TODO: All three parms should be const */
STATIC void
-S_regtail(pTHX_ RExC_state_t *pRExC_state, regnode *p, regnode *val)
+S_regtail(pTHX_ const RExC_state_t *pRExC_state, regnode *p, const regnode *val)
{
dVAR;
register regnode *scan;
/*
- regoptail - regtail on operand of first argument; nop if operandless
*/
+/* TODO: All three parms should be const */
STATIC void
-S_regoptail(pTHX_ RExC_state_t *pRExC_state, regnode *p, regnode *val)
+S_regoptail(pTHX_ const RExC_state_t *pRExC_state, regnode *p, const regnode *val)
{
dVAR;
/* "Operandless" and "op != BRANCH" are synonymous in practice. */
- regdump - dump a regexp onto Perl_debug_log in vaguely comprehensible form
*/
void
-Perl_regdump(pTHX_ regexp *r)
+Perl_regdump(pTHX_ const regexp *r)
{
#ifdef DEBUGGING
dVAR;
SV * const sv = sv_newmortal();
- (void)dumpuntil(r->program, r->program + 1, NULL, sv, 0);
+ (void)dumpuntil(r, r->program, r->program + 1, NULL, sv, 0);
/* Header fields of interest. */
if (r->anchored_substr)
PerlIO_printf(Perl_debug_log, ") ");
if (r->regstclass) {
- regprop(sv, r->regstclass);
+ regprop(r, sv, r->regstclass);
PerlIO_printf(Perl_debug_log, "stclass \"%s\" ", SvPVX_const(sv));
}
if (r->reganch & ROPT_ANCH) {
- regprop - printable representation of opcode
*/
void
-Perl_regprop(pTHX_ SV *sv, const regnode *o)
+Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
{
#ifdef DEBUGGING
dVAR;
len, s,
PL_colors[1]);
} else if (k == TRIE) {
- /*EMPTY*/;
- /*
- this isn't always safe, as Pl_regdata may not be for this regex yet
- (depending on where its called from) so its being moved to dumpuntil
- I32 n = ARG(o);
- reg_trie_data *trie=(reg_trie_data*)PL_regdata->data[n];
- Perl_sv_catpvf(aTHX_ sv, " (W:%d L:%d C:%d S:%d)",
- trie->wordcount,
- trie->charcount,
- trie->uniquecharcount,
- trie->laststate);
- */
+ NOOP;
+ /* print the details od the trie in dumpuntil instead, as
+ * prog->data isn't available here */
} else if (k == CURLY) {
if (OP(o) == CURLYM || OP(o) == CURLYN || OP(o) == CURLYX)
Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags); /* Parenth number */
}
if (o->flags & ANYOF_CLASS)
- for (i = 0; i < sizeof(anyofs)/sizeof(char*); i++)
+ for (i = 0; i < (int)(sizeof(anyofs)/sizeof(char*)); i++)
if (ANYOF_CLASS_TEST(o,i))
sv_catpv(sv, anyofs[i]);
{
SV *lv;
- SV * const sw = regclass_swash(o, FALSE, &lv, 0);
+ SV * const sw = regclass_swash(prog, o, FALSE, &lv, 0);
if (lv) {
if (sw) {
dVAR;
#ifdef DEBUGGING
SV * const dsv = PERL_DEBUG_PAD_ZERO(0);
- SV * const re_debug_flags=get_sv(RE_DEBUG_FLAGS,0);
#endif
-
+ GET_RE_DEBUG_FLAGS_DECL;
if (!r || (--r->refcnt > 0))
return;
Safefree(r);
}
+#ifndef PERL_IN_XSUB_RE
/*
- regnext - dig the "next" pointer out of a node
*/
return(p+offset);
}
+#endif
STATIC void
S_re_croak2(pTHX_ const char* pat1,const char* pat2,...)
/* XXX Here's a total kludge. But we need to re-enter for swash routines. */
+#ifndef PERL_IN_XSUB_RE
void
Perl_save_re_context(pTHX)
{
dVAR;
- SAVEI32(PL_reg_flags); /* from regexec.c */
- SAVEPPTR(PL_bostr);
- SAVEPPTR(PL_reginput); /* String-input pointer. */
- SAVEPPTR(PL_regbol); /* Beginning of input, for ^ check. */
- SAVEPPTR(PL_regeol); /* End of input, for $ check. */
- SAVEVPTR(PL_regstartp); /* Pointer to startp array. */
- SAVEVPTR(PL_regendp); /* Ditto for endp. */
- SAVEVPTR(PL_reglastparen); /* Similarly for lastparen. */
- SAVEVPTR(PL_reglastcloseparen); /* Similarly for lastcloseparen. */
- SAVEPPTR(PL_regtill); /* How far we are required to go. */
- SAVEGENERICPV(PL_reg_start_tmp); /* from regexec.c */
+
+ struct re_save_state *state;
+
+ SAVEVPTR(PL_curcop);
+ SSGROW(SAVESTACK_ALLOC_FOR_RE_SAVE_STATE + 1);
+
+ state = (struct re_save_state *)(PL_savestack + PL_savestack_ix);
+ PL_savestack_ix += SAVESTACK_ALLOC_FOR_RE_SAVE_STATE;
+ SSPUSHINT(SAVEt_RE_STATE);
+
+ Copy(&PL_reg_state, state, 1, struct re_save_state);
+
PL_reg_start_tmp = 0;
- SAVEI32(PL_reg_start_tmpl); /* from regexec.c */
PL_reg_start_tmpl = 0;
- SAVEVPTR(PL_regdata);
- SAVEI32(PL_reg_eval_set); /* from regexec.c */
- SAVEI32(PL_regnarrate); /* from regexec.c */
- SAVEVPTR(PL_regprogram); /* from regexec.c */
- SAVEINT(PL_regindent); /* from regexec.c */
- SAVEVPTR(PL_curcop);
- SAVEVPTR(PL_reg_call_cc); /* from regexec.c */
- SAVEVPTR(PL_reg_re); /* from regexec.c */
- SAVEPPTR(PL_reg_ganch); /* from regexec.c */
- SAVESPTR(PL_reg_sv); /* from regexec.c */
- SAVEBOOL(PL_reg_match_utf8); /* from regexec.c */
- SAVEVPTR(PL_reg_magic); /* from regexec.c */
- SAVEI32(PL_reg_oldpos); /* from regexec.c */
- SAVEVPTR(PL_reg_oldcurpm); /* from regexec.c */
- SAVEVPTR(PL_reg_curpm); /* from regexec.c */
- SAVEPPTR(PL_reg_oldsaved); /* old saved substr during match */
PL_reg_oldsaved = NULL;
- SAVEI32(PL_reg_oldsavedlen); /* old length of saved substr during match */
PL_reg_oldsavedlen = 0;
-#ifdef PERL_OLD_COPY_ON_WRITE
- SAVESPTR(PL_nrs);
- PL_nrs = NULL;
-#endif
- SAVEI32(PL_reg_maxiter); /* max wait until caching pos */
PL_reg_maxiter = 0;
- SAVEI32(PL_reg_leftiter); /* wait until caching pos */
PL_reg_leftiter = 0;
- SAVEGENERICPV(PL_reg_poscache); /* cache of pos of WHILEM */
PL_reg_poscache = NULL;
- SAVEI32(PL_reg_poscache_size); /* size of pos cache of WHILEM */
PL_reg_poscache_size = 0;
- SAVEPPTR(PL_regprecomp); /* uncompiled string. */
- SAVEI32(PL_regnpar); /* () count. */
- SAVEI32(PL_regsize); /* from regexec.c */
+#ifdef PERL_OLD_COPY_ON_WRITE
+ PL_nrs = NULL;
+#endif
/* Save $1..$n (#18107: UTF-8 s/(\w+)/uc($1)/e); AMS 20021106. */
if (PL_curpm) {
U32 i;
for (i = 1; i <= rx->nparens; i++) {
char digits[TYPE_CHARS(long)];
+#ifdef USE_SNPRINTF
+ const STRLEN len = snprintf(digits, sizeof(digits), "%lu", (long)i);
+#else
const STRLEN len = my_sprintf(digits, "%lu", (long)i);
+#endif /* #ifdef USE_SNPRINTF */
GV *const *const gvp
= (GV**)hv_fetch(PL_defstash, digits, len, 0);
}
}
}
-
-#ifdef DEBUGGING
- SAVEPPTR(PL_reg_starttry); /* from regexec.c */
-#endif
}
+#endif
static void
clear_re(pTHX_ void *r)
}
-STATIC regnode *
-S_dumpuntil(pTHX_ regnode *start, regnode *node, regnode *last, SV* sv, I32 l)
+STATIC const regnode *
+S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
+ const regnode *last, SV* sv, I32 l)
{
dVAR;
register U8 op = EXACT; /* Arbitrary non-END op. */
- register regnode *next;
+ register const regnode *next;
while (op != END && (!last || node < last)) {
/* While that wasn't END last time... */
op = OP(node);
if (op == CLOSE)
l--;
- next = regnext(node);
+ next = regnext((regnode *)node);
/* Where, what. */
if (OP(node) == OPTIMIZED)
goto after_print;
- regprop(sv, node);
+ regprop(r, sv, node);
PerlIO_printf(Perl_debug_log, "%4"IVdf":%*s%s", (IV)(node - start),
(int)(2*l + 1), "", SvPVX_const(sv));
if (next == NULL) /* Next ptr. */
(void)PerlIO_putc(Perl_debug_log, '\n');
after_print:
if (PL_regkind[(U8)op] == BRANCHJ) {
- register regnode *nnode = (OP(next) == LONGJMP
- ? regnext(next)
- : next);
+ register const regnode *nnode = (OP(next) == LONGJMP
+ ? regnext((regnode *)next)
+ : next);
if (last && nnode > last)
nnode = last;
- node = dumpuntil(start, NEXTOPER(NEXTOPER(node)), nnode, sv, l + 1);
+ node = dumpuntil(r, start, NEXTOPER(NEXTOPER(node)), nnode, sv, l + 1);
}
else if (PL_regkind[(U8)op] == BRANCH) {
- node = dumpuntil(start, NEXTOPER(node), next, sv, l + 1);
+ node = dumpuntil(r, start, NEXTOPER(node), next, sv, l + 1);
}
else if ( PL_regkind[(U8)op] == TRIE ) {
const I32 n = ARG(node);
- const reg_trie_data * const trie = (reg_trie_data*)PL_regdata->data[n];
+ const reg_trie_data * const trie = (reg_trie_data*)r->data->data[n];
const I32 arry_len = av_len(trie->words)+1;
I32 word_idx;
PerlIO_printf(Perl_debug_log,
node->flags ? " EVAL mode" : "");
for (word_idx=0; word_idx < arry_len; word_idx++) {
- SV **elem_ptr=av_fetch(trie->words,word_idx,0);
+ SV ** const elem_ptr = av_fetch(trie->words,word_idx,0);
if (elem_ptr) {
PerlIO_printf(Perl_debug_log, "%*s<%s%s%s>\n",
(int)(2*(l+4)), "",
}
else if ( op == CURLY) { /* "next" might be very big: optimizer */
- node = dumpuntil(start, NEXTOPER(node) + EXTRA_STEP_2ARGS,
+ node = dumpuntil(r, start, NEXTOPER(node) + EXTRA_STEP_2ARGS,
NEXTOPER(node) + EXTRA_STEP_2ARGS + 1, sv, l + 1);
}
else if (PL_regkind[(U8)op] == CURLY && op != CURLYX) {
- node = dumpuntil(start, NEXTOPER(node) + EXTRA_STEP_2ARGS,
+ node = dumpuntil(r, start, NEXTOPER(node) + EXTRA_STEP_2ARGS,
next, sv, l + 1);
}
else if ( op == PLUS || op == STAR) {
- node = dumpuntil(start, NEXTOPER(node), NEXTOPER(node) + 1, sv, l + 1);
+ node = dumpuntil(r, start, NEXTOPER(node), NEXTOPER(node) + 1, sv, l + 1);
}
else if (op == ANYOF) {
/* arglen 1 + class block */