floating substrings if needed. */
STATIC void
-S_scan_commit(pTHX_ RExC_state_t *pRExC_state, scan_data_t *data)
+S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, scan_data_t *data)
{
const STRLEN l = CHR_SVLEN(data->last_found);
const STRLEN old_l = CHR_SVLEN(*data->longest);
SvCUR_set(data->last_found, 0);
{
SV * const sv = data->last_found;
- MAGIC * const mg =
- SvUTF8(sv) && SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_utf8) : NULL;
- if (mg)
- mg->mg_len = 0;
+ if (SvUTF8(sv) && SvMAGICAL(sv)) {
+ MAGIC * const mg = mg_find(sv, PERL_MAGIC_utf8);
+ if (mg)
+ mg->mg_len = 0;
+ }
}
data->last_end = -1;
data->flags &= ~SF_BEFORE_EOL;
/* Can match anything (initialization) */
STATIC void
-S_cl_anything(RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
+S_cl_anything(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
{
ANYOF_CLASS_ZERO(cl);
ANYOF_BITMAP_SETALL(cl);
/* Can match anything (initialization) */
STATIC void
-S_cl_init(RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
+S_cl_init(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
{
Zero(cl, 1, struct regnode_charclass_class);
cl->type = ANYOF;
}
STATIC void
-S_cl_init_zero(RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
+S_cl_init_zero(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
{
Zero(cl, 1, struct regnode_charclass_class);
cl->type = ANYOF;
/* 'OR' a given class with another one. Can create false positives */
/* We assume that cl is not inverted */
STATIC void
-S_cl_or(RExC_state_t *pRExC_state, struct regnode_charclass_class *cl, const struct regnode_charclass_class *or_with)
+S_cl_or(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl, const struct regnode_charclass_class *or_with)
{
if (or_with->flags & ANYOF_INVERT) {
/* We do not use
/* first pass, loop through and scan words */
reg_trie_data *trie;
regnode *cur;
- const U32 uniflags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
+ const U32 uniflags = UTF8_ALLOW_DEFAULT;
STRLEN len = 0;
UV uvc = 0;
U16 curword = 0;
char * const s0 = STRING(scan), *s, *t;
char * const s1 = s0 + STR_LEN(scan) - 1;
char * const s2 = s1 - 4;
- const char * const t0 = "\xcc\x88\xcc\x81";
+ const char t0[] = "\xcc\x88\xcc\x81";
const char * const t1 = t0 + 3;
for (s = s0 + 2;
if (OP(next) == code || code == IFTHEN || code == SUSPEND) {
I32 max1 = 0, min1 = I32_MAX, num = 0;
struct regnode_charclass_class accum;
- regnode *startbranch=scan;
+ regnode * const startbranch=scan;
if (flags & SCF_DO_SUBSTR) /* XXXX Add !SUSPEND? */
scan_commit(pRExC_state, data); /* Cannot merge strings after this. */
else if (strchr((const char*)PL_varies,OP(scan))) {
I32 mincount, maxcount, minnext, deltanext, fl = 0;
I32 f = flags, pos_before = 0;
- regnode *oscan = scan;
+ regnode * const oscan = scan;
struct regnode_charclass_class this_class;
struct regnode_charclass_class *oclass = NULL;
I32 next_is_eval = 0;
&& !deltanext && minnext == 1 ) {
/* Try to optimize to CURLYN. */
regnode *nxt = NEXTOPER(oscan) + EXTRA_STEP_2ARGS;
- regnode *nxt1 = nxt;
+ regnode * const nxt1 = nxt;
#ifdef DEBUGGING
regnode *nxt2;
#endif
I32 b = pos_before >= data->last_start_min
? pos_before : data->last_start_min;
STRLEN l;
- const char *s = SvPV_const(data->last_found, l);
+ const char * const s = SvPV_const(data->last_found, l);
I32 old = b - data->last_start_min;
#endif
the group. */
scan_commit(pRExC_state,data);
if (mincount && last_str) {
- SV *sv = data->last_found;
- MAGIC *mg = SvUTF8(sv) && SvMAGICAL(sv) ?
+ SV * const sv = data->last_found;
+ MAGIC * const mg = SvUTF8(sv) && SvMAGICAL(sv) ?
mg_find(sv, PERL_MAGIC_utf8) : NULL;
if (mg)
struct regnode_charclass_class);
r->regstclass = (regnode*)RExC_rx->data->data[n];
r->reganch &= ~ROPT_SKIP; /* Used in find_byclass(). */
- PL_regdata = r->data; /* for regprop() */
DEBUG_COMPILE_r({ SV *sv = sv_newmortal();
regprop(sv, (regnode*)data.start_class);
PerlIO_printf(Perl_debug_log,
r->reganch |= ROPT_CANY_SEEN;
Newxz(r->startp, RExC_npar, I32);
Newxz(r->endp, RExC_npar, I32);
- PL_regdata = r->data; /* for regprop() */
DEBUG_COMPILE_r(regdump(r));
return(r);
}
*flagp |= flags&SIMPLE;
}
- return(ret);
+ return ret;
}
/*
if (UTF8_IS_START(*p) && UTF) {
STRLEN numlen;
ender = utf8n_to_uvchr((U8*)p, RExC_end - p,
- &numlen, 0);
+ &numlen, UTF8_ALLOW_DEFAULT);
p += numlen;
}
else
/* I smell either [: or [= or [. -- POSIX has been here, right? */
POSIXCC(UCHARAT(RExC_parse))) {
const char c = UCHARAT(RExC_parse);
- char* s = RExC_parse++;
+ char* const s = RExC_parse++;
while (RExC_parse < RExC_end && UCHARAT(RExC_parse) != c)
RExC_parse++;
ANYOF_FLAGS(ret) |= ANYOF_INVERT;
}
- if (SIZE_ONLY)
+ if (SIZE_ONLY) {
RExC_size += ANYOF_SKIP;
+ listsv = &PL_sv_undef; /* For code scanners: listsv always non-NULL. */
+ }
else {
RExC_emit += ANYOF_SKIP;
if (FOLD)
if (UTF) {
value = utf8n_to_uvchr((U8*)RExC_parse,
RExC_end - RExC_parse,
- &numlen, 0);
+ &numlen, UTF8_ALLOW_DEFAULT);
RExC_parse += numlen;
}
else
if (UTF) {
value = utf8n_to_uvchr((U8*)RExC_parse,
RExC_end - RExC_parse,
- &numlen, 0);
+ &numlen, UTF8_ALLOW_DEFAULT);
RExC_parse += numlen;
}
else
n--;
}
}
- if (value == 'p')
- Perl_sv_catpvf(aTHX_ listsv,
- "+utf8::%.*s\n", (int)n, RExC_parse);
- else
- Perl_sv_catpvf(aTHX_ listsv,
- "!utf8::%.*s\n", (int)n, RExC_parse);
+ Perl_sv_catpvf(aTHX_ listsv, "%cutf8::%.*s\n",
+ (value=='p' ? '+' : '!'), (int)n, RExC_parse);
}
RExC_parse = e + 1;
ANYOF_FLAGS(ret) |= ANYOF_UNICODE;
if (range) {
if (!SIZE_ONLY) {
if (ckWARN(WARN_REGEXP)) {
- int w =
+ const int w =
RExC_parse >= rangebegin ?
RExC_parse - rangebegin : 0;
vWARN4(RExC_parse,
"False [] range \"%*.*s\"",
- w,
- w,
- rangebegin);
+ w, w, rangebegin);
}
if (prevvalue < 256) {
ANYOF_BITMAP_SET(ret, prevvalue);
if (range) {
if (prevvalue > (IV)value) /* b-a */ {
- Simple_vFAIL4("Invalid [] range \"%*.*s\"",
- RExC_parse - rangebegin,
- RExC_parse - rangebegin,
- rangebegin);
+ const int w = RExC_parse - rangebegin;
+ Simple_vFAIL4("Invalid [] range \"%*.*s\"", w, w, rangebegin);
range = 0; /* not a valid range */
}
}
/* a bad range like \w-, [:word:]- ? */
if (namedclass > OOB_NAMEDCLASS) {
if (ckWARN(WARN_REGEXP)) {
- int w =
+ const int w =
RExC_parse >= rangebegin ?
RExC_parse - rangebegin : 0;
vWARN4(RExC_parse,
"False [] range \"%*.*s\"",
- w,
- w,
- rangebegin);
+ w, w, rangebegin);
}
if (!SIZE_ONLY)
ANYOF_BITMAP_SET(ret, '-');
}
if (!SIZE_ONLY) {
- AV *av = newAV();
+ AV * const av = newAV();
SV *rv;
/* The 0th element stores the character class description
STATIC char*
S_nextchar(pTHX_ RExC_state_t *pRExC_state)
{
- char* retval = RExC_parse++;
+ char* const retval = RExC_parse++;
for (;;) {
if (*RExC_parse == '(' && RExC_parse[1] == '?' &&
/*
- regtail - set the next-pointer at the end of a node chain of p to val.
*/
+/* TODO: All three parms should be const */
STATIC void
-S_regtail(pTHX_ RExC_state_t *pRExC_state, regnode *p, regnode *val)
+S_regtail(pTHX_ const RExC_state_t *pRExC_state, regnode *p, const regnode *val)
{
dVAR;
register regnode *scan;
/*
- regoptail - regtail on operand of first argument; nop if operandless
*/
+/* TODO: All three parms should be const */
STATIC void
-S_regoptail(pTHX_ RExC_state_t *pRExC_state, regnode *p, regnode *val)
+S_regoptail(pTHX_ const RExC_state_t *pRExC_state, regnode *p, const regnode *val)
{
dVAR;
/* "Operandless" and "op != BRANCH" are synonymous in practice. */
- regdump - dump a regexp onto Perl_debug_log in vaguely comprehensible form
*/
void
-Perl_regdump(pTHX_ regexp *r)
+Perl_regdump(pTHX_ const regexp *r)
{
#ifdef DEBUGGING
dVAR;
SV * const sv = sv_newmortal();
- (void)dumpuntil(r->program, r->program + 1, NULL, sv, 0);
+ (void)dumpuntil(r, r->program, r->program + 1, NULL, sv, 0);
/* Header fields of interest. */
if (r->anchored_substr)
PL_colors[1]);
} else if (k == TRIE) {
/*EMPTY*/;
- /*
- this isn't always safe, as Pl_regdata may not be for this regex yet
- (depending on where its called from) so its being moved to dumpuntil
- I32 n = ARG(o);
- reg_trie_data *trie=(reg_trie_data*)PL_regdata->data[n];
- Perl_sv_catpvf(aTHX_ sv, " (W:%d L:%d C:%d S:%d)",
- trie->wordcount,
- trie->charcount,
- trie->uniquecharcount,
- trie->laststate);
- */
+ /* print the details od the trie in dumpuntil instead, as
+ * prog->data isn't available here */
} else if (k == CURLY) {
if (OP(o) == CURLYM || OP(o) == CURLYN || OP(o) == CURLYX)
Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags); /* Parenth number */
Perl_save_re_context(pTHX)
{
dVAR;
- SAVEI32(PL_reg_flags); /* from regexec.c */
- SAVEPPTR(PL_bostr);
- SAVEPPTR(PL_reginput); /* String-input pointer. */
- SAVEPPTR(PL_regbol); /* Beginning of input, for ^ check. */
- SAVEPPTR(PL_regeol); /* End of input, for $ check. */
- SAVEVPTR(PL_regstartp); /* Pointer to startp array. */
- SAVEVPTR(PL_regendp); /* Ditto for endp. */
- SAVEVPTR(PL_reglastparen); /* Similarly for lastparen. */
- SAVEVPTR(PL_reglastcloseparen); /* Similarly for lastcloseparen. */
- SAVEPPTR(PL_regtill); /* How far we are required to go. */
- SAVEGENERICPV(PL_reg_start_tmp); /* from regexec.c */
+
+ struct re_save_state *state;
+
+ SAVEVPTR(PL_curcop);
+ SSGROW(SAVESTACK_ALLOC_FOR_RE_SAVE_STATE + 1);
+
+ state = (struct re_save_state *)(PL_savestack + PL_savestack_ix);
+ PL_savestack_ix += SAVESTACK_ALLOC_FOR_RE_SAVE_STATE;
+ SSPUSHINT(SAVEt_RE_STATE);
+
+ Copy(&PL_reg_state, state, 1, struct re_save_state);
+
PL_reg_start_tmp = 0;
- SAVEI32(PL_reg_start_tmpl); /* from regexec.c */
PL_reg_start_tmpl = 0;
- SAVEVPTR(PL_regdata);
- SAVEI32(PL_reg_eval_set); /* from regexec.c */
- SAVEI32(PL_regnarrate); /* from regexec.c */
- SAVEVPTR(PL_regprogram); /* from regexec.c */
- SAVEINT(PL_regindent); /* from regexec.c */
- SAVEVPTR(PL_curcop);
- SAVEVPTR(PL_reg_call_cc); /* from regexec.c */
- SAVEVPTR(PL_reg_re); /* from regexec.c */
- SAVEPPTR(PL_reg_ganch); /* from regexec.c */
- SAVESPTR(PL_reg_sv); /* from regexec.c */
- SAVEBOOL(PL_reg_match_utf8); /* from regexec.c */
- SAVEVPTR(PL_reg_magic); /* from regexec.c */
- SAVEI32(PL_reg_oldpos); /* from regexec.c */
- SAVEVPTR(PL_reg_oldcurpm); /* from regexec.c */
- SAVEVPTR(PL_reg_curpm); /* from regexec.c */
- SAVEPPTR(PL_reg_oldsaved); /* old saved substr during match */
PL_reg_oldsaved = NULL;
- SAVEI32(PL_reg_oldsavedlen); /* old length of saved substr during match */
PL_reg_oldsavedlen = 0;
-#ifdef PERL_OLD_COPY_ON_WRITE
- SAVESPTR(PL_nrs);
- PL_nrs = NULL;
-#endif
- SAVEI32(PL_reg_maxiter); /* max wait until caching pos */
PL_reg_maxiter = 0;
- SAVEI32(PL_reg_leftiter); /* wait until caching pos */
PL_reg_leftiter = 0;
- SAVEGENERICPV(PL_reg_poscache); /* cache of pos of WHILEM */
PL_reg_poscache = NULL;
- SAVEI32(PL_reg_poscache_size); /* size of pos cache of WHILEM */
PL_reg_poscache_size = 0;
- SAVEPPTR(PL_regprecomp); /* uncompiled string. */
- SAVEI32(PL_regnpar); /* () count. */
- SAVEI32(PL_regsize); /* from regexec.c */
+#ifdef PERL_OLD_COPY_ON_WRITE
+ PL_nrs = NULL;
+#endif
/* Save $1..$n (#18107: UTF-8 s/(\w+)/uc($1)/e); AMS 20021106. */
if (PL_curpm) {
}
}
}
-
-#ifdef DEBUGGING
- SAVEPPTR(PL_reg_starttry); /* from regexec.c */
-#endif
}
static void
}
-STATIC regnode *
-S_dumpuntil(pTHX_ regnode *start, regnode *node, regnode *last, SV* sv, I32 l)
+STATIC const regnode *
+S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
+ const regnode *last, SV* sv, I32 l)
{
dVAR;
register U8 op = EXACT; /* Arbitrary non-END op. */
- register regnode *next;
+ register const regnode *next;
while (op != END && (!last || node < last)) {
/* While that wasn't END last time... */
op = OP(node);
if (op == CLOSE)
l--;
- next = regnext(node);
+ next = regnext((regnode *)node);
/* Where, what. */
if (OP(node) == OPTIMIZED)
goto after_print;
(void)PerlIO_putc(Perl_debug_log, '\n');
after_print:
if (PL_regkind[(U8)op] == BRANCHJ) {
- register regnode *nnode = (OP(next) == LONGJMP
- ? regnext(next)
- : next);
+ register const regnode *nnode = (OP(next) == LONGJMP
+ ? regnext((regnode *)next)
+ : next);
if (last && nnode > last)
nnode = last;
- node = dumpuntil(start, NEXTOPER(NEXTOPER(node)), nnode, sv, l + 1);
+ node = dumpuntil(r, start, NEXTOPER(NEXTOPER(node)), nnode, sv, l + 1);
}
else if (PL_regkind[(U8)op] == BRANCH) {
- node = dumpuntil(start, NEXTOPER(node), next, sv, l + 1);
+ node = dumpuntil(r, start, NEXTOPER(node), next, sv, l + 1);
}
else if ( PL_regkind[(U8)op] == TRIE ) {
const I32 n = ARG(node);
- const reg_trie_data * const trie = (reg_trie_data*)PL_regdata->data[n];
+ const reg_trie_data * const trie = (reg_trie_data*)r->data->data[n];
const I32 arry_len = av_len(trie->words)+1;
I32 word_idx;
PerlIO_printf(Perl_debug_log,
node->flags ? " EVAL mode" : "");
for (word_idx=0; word_idx < arry_len; word_idx++) {
- SV **elem_ptr=av_fetch(trie->words,word_idx,0);
+ SV ** const elem_ptr = av_fetch(trie->words,word_idx,0);
if (elem_ptr) {
PerlIO_printf(Perl_debug_log, "%*s<%s%s%s>\n",
(int)(2*(l+4)), "",
}
else if ( op == CURLY) { /* "next" might be very big: optimizer */
- node = dumpuntil(start, NEXTOPER(node) + EXTRA_STEP_2ARGS,
+ node = dumpuntil(r, start, NEXTOPER(node) + EXTRA_STEP_2ARGS,
NEXTOPER(node) + EXTRA_STEP_2ARGS + 1, sv, l + 1);
}
else if (PL_regkind[(U8)op] == CURLY && op != CURLYX) {
- node = dumpuntil(start, NEXTOPER(node) + EXTRA_STEP_2ARGS,
+ node = dumpuntil(r, start, NEXTOPER(node) + EXTRA_STEP_2ARGS,
next, sv, l + 1);
}
else if ( op == PLUS || op == STAR) {
- node = dumpuntil(start, NEXTOPER(node), NEXTOPER(node) + 1, sv, l + 1);
+ node = dumpuntil(r, start, NEXTOPER(node), NEXTOPER(node) + 1, sv, l + 1);
}
else if (op == ANYOF) {
/* arglen 1 + class block */