#define RExC_end (pRExC_state->end)
#define RExC_parse (pRExC_state->parse)
#define RExC_whilem_seen (pRExC_state->whilem_seen)
-#define RExC_offsets (pRExC_state->rxi->offsets) /* I am not like the others */
+#ifdef RE_TRACK_PATTERN_OFFSETS
+#define RExC_offsets (pRExC_state->rxi->u.offsets) /* I am not like the others */
+#endif
#define RExC_emit (pRExC_state->emit)
#define RExC_emit_start (pRExC_state->emit_start)
#define RExC_naughty (pRExC_state->naughty)
#define RExC_recurse (pRExC_state->recurse)
#define RExC_recurse_count (pRExC_state->recurse_count)
+
#define ISMULT1(c) ((c) == '*' || (c) == '+' || (c) == '?')
#define ISMULT2(s) ((*s) == '*' || (*s) == '+' || (*s) == '?' || \
((*s) == '{' && regcurly(s)))
* Element 0 holds the number n.
* Position is 1 indexed.
*/
-
+#ifndef RE_TRACK_PATTERN_OFFSETS
+#define Set_Node_Offset_To_R(node,byte)
+#define Set_Node_Offset(node,byte)
+#define Set_Cur_Node_Offset
+#define Set_Node_Length_To_R(node,len)
+#define Set_Node_Length(node,len)
+#define Set_Node_Cur_Length(node)
+#define Node_Offset(n)
+#define Node_Length(n)
+#define Set_Node_Offset_Length(node,offset,len)
+#define ProgLen(ri) ri->u.proglen
+#define SetProgLen(ri,x) ri->u.proglen = x
+#else
+#define ProgLen(ri) ri->u.offsets[0]
+#define SetProgLen(ri,x) ri->u.offsets[0] = x
#define Set_Node_Offset_To_R(node,byte) STMT_START { \
if (! SIZE_ONLY) { \
MJD_OFFSET_DEBUG(("** (%d) offset of node %d is %d.\n", \
Set_Node_Offset_To_R((node)-RExC_emit_start, (offset)); \
Set_Node_Length_To_R((node)-RExC_emit_start, (len)); \
} STMT_END
-
+#endif
#if PERL_ENABLE_EXPERIMENTAL_REGEX_OPTIMISATIONS
#define EXPERIMENTAL_INPLACESCAN
-#endif
+#endif /*RE_TRACK_PATTERN_OFFSETS*/
#define DEBUG_STUDYDATA(str,data,depth) \
DEBUG_OPTIMISE_MORE_r(if(data){ \
#ifdef DEBUGGING
regnode *optimize = NULL;
+#ifdef RE_TRACK_PATTERN_OFFSETS
+
U32 mjd_offset = 0;
U32 mjd_nodelen = 0;
-#endif
+#endif /* RE_TRACK_PATTERN_OFFSETS */
+#endif /* DEBUGGING */
/*
This means we convert either the first branch or the first Exact,
depending on whether the thing following (in 'last') is a branch
if ( first != startbranch || OP( last ) == BRANCH ) {
/* branch sub-chain */
NEXT_OFF( first ) = (U16)(last - first);
+#ifdef RE_TRACK_PATTERN_OFFSETS
DEBUG_r({
mjd_offset= Node_Offset((convert));
mjd_nodelen= Node_Length((convert));
});
+#endif
/* whole branch chain */
- } else {
+ }
+#ifdef RE_TRACK_PATTERN_OFFSETS
+ else {
DEBUG_r({
const regnode *nop = NEXTOPER( convert );
mjd_offset= Node_Offset((nop));
mjd_nodelen= Node_Length((nop));
});
}
-
DEBUG_OPTIMISE_r(
PerlIO_printf(Perl_debug_log, "%*sMJD offset:%"UVuf" MJD length:%"UVuf"\n",
(int)depth * 2 + 2, "",
(UV)mjd_offset, (UV)mjd_nodelen)
);
-
+#endif
/* But first we check to see if there is a common prefix we can
split out as an EXACT and put in front of the TRIE node. */
trie->startstate= 1;
}
/* Useful during FAIL. */
- Newxz(ri->offsets, 2*RExC_size+1, U32); /* MJD 20001228 */
- if (ri->offsets) {
- ri->offsets[0] = RExC_size;
- }
+#ifdef RE_TRACK_PATTERN_OFFSETS
+ Newxz(ri->u.offsets, 2*RExC_size+1, U32); /* MJD 20001228 */
DEBUG_OFFSETS_r(PerlIO_printf(Perl_debug_log,
"%s %"UVuf" bytes for offset annotations.\n",
- ri->offsets ? "Got" : "Couldn't get",
+ ri->u.offsets ? "Got" : "Couldn't get",
(UV)((2*RExC_size+1) * sizeof(U32))));
-
+#endif
+ SetProgLen(ri,RExC_size);
RExC_rx = r;
RExC_rxi = ri;
ri->name_list_idx = add_data( pRExC_state, 1, "p" );
ri->data->data[ri->name_list_idx] = (void*)SvREFCNT_inc(RExC_paren_name_list);
} else
- ri->name_list_idx = 0;
#endif
+ ri->name_list_idx = 0;
if (RExC_recurse_count) {
for ( ; RExC_recurse_count ; RExC_recurse_count-- ) {
PerlIO_printf(Perl_debug_log,"Final program:\n");
regdump(r);
});
- DEBUG_OFFSETS_r(if (ri->offsets) {
- const U32 len = ri->offsets[0];
+#ifdef RE_TRACK_PATTERN_OFFSETS
+ DEBUG_OFFSETS_r(if (ri->u.offsets) {
+ const U32 len = ri->u.offsets[0];
U32 i;
GET_RE_DEBUG_FLAGS_DECL;
- PerlIO_printf(Perl_debug_log, "Offsets: [%"UVuf"]\n\t", (UV)ri->offsets[0]);
+ PerlIO_printf(Perl_debug_log, "Offsets: [%"UVuf"]\n\t", (UV)ri->u.offsets[0]);
for (i = 1; i <= len; i++) {
- if (ri->offsets[i*2-1] || ri->offsets[i*2])
+ if (ri->u.offsets[i*2-1] || ri->u.offsets[i*2])
PerlIO_printf(Perl_debug_log, "%"UVuf":%"UVuf"[%"UVuf"] ",
- (UV)i, (UV)ri->offsets[i*2-1], (UV)ri->offsets[i*2]);
+ (UV)i, (UV)ri->u.offsets[i*2-1], (UV)ri->u.offsets[i*2]);
}
PerlIO_printf(Perl_debug_log, "\n");
});
+#endif
return(r);
}
SV *sv = usesv ? usesv : newSVpvs("");
PERL_UNUSED_ARG(flags);
- if (paren == -2 && (s = rx->subbeg) && rx->startp[0] != -1) {
+ if (!rx->subbeg) {
+ sv_setsv(sv,&PL_sv_undef);
+ return sv;
+ }
+ else
+ if (paren == -2 && rx->startp[0] != -1) {
/* $` */
i = rx->startp[0];
+ s = rx->subbeg;
}
else
- if (paren == -1 && rx->subbeg && rx->endp[0] != -1) {
+ if (paren == -1 && rx->endp[0] != -1) {
/* $' */
s = rx->subbeg + rx->endp[0];
i = rx->sublen - rx->endp[0];
/* $& $1 ... */
i = t1 - s1;
s = rx->subbeg + s1;
- }
-
- if (s) {
- assert(rx->subbeg);
- assert(rx->sublen >= (s - rx->subbeg) + i );
-
- if (i >= 0) {
- const int oldtainted = PL_tainted;
- TAINT_NOT;
- sv_setpvn(sv, s, i);
- PL_tainted = oldtainted;
- if ( (rx->extflags & RXf_CANY_SEEN)
- ? (RX_MATCH_UTF8(rx)
- && (!i || is_utf8_string((U8*)s, i)))
- : (RX_MATCH_UTF8(rx)) )
- {
- SvUTF8_on(sv);
- }
- else
- SvUTF8_off(sv);
- if (PL_tainting) {
- if (RX_MATCH_TAINTED(rx)) {
- if (SvTYPE(sv) >= SVt_PVMG) {
- MAGIC* const mg = SvMAGIC(sv);
- MAGIC* mgt;
- PL_tainted = 1;
- SvMAGIC_set(sv, mg->mg_moremagic);
- SvTAINT(sv);
- if ((mgt = SvMAGIC(sv))) {
- mg->mg_moremagic = mgt;
- SvMAGIC_set(sv, mg);
- }
- } else {
- PL_tainted = 1;
- SvTAINT(sv);
+ } else {
+ sv_setsv(sv,&PL_sv_undef);
+ return sv;
+ }
+ assert(rx->sublen >= (s - rx->subbeg) + i );
+ if (i >= 0) {
+ const int oldtainted = PL_tainted;
+ TAINT_NOT;
+ sv_setpvn(sv, s, i);
+ PL_tainted = oldtainted;
+ if ( (rx->extflags & RXf_CANY_SEEN)
+ ? (RX_MATCH_UTF8(rx)
+ && (!i || is_utf8_string((U8*)s, i)))
+ : (RX_MATCH_UTF8(rx)) )
+ {
+ SvUTF8_on(sv);
+ }
+ else
+ SvUTF8_off(sv);
+ if (PL_tainting) {
+ if (RX_MATCH_TAINTED(rx)) {
+ if (SvTYPE(sv) >= SVt_PVMG) {
+ MAGIC* const mg = SvMAGIC(sv);
+ MAGIC* mgt;
+ PL_tainted = 1;
+ SvMAGIC_set(sv, mg->mg_moremagic);
+ SvTAINT(sv);
+ if ((mgt = SvMAGIC(sv))) {
+ mg->mg_moremagic = mgt;
+ SvMAGIC_set(sv, mg);
}
- } else
- SvTAINTED_off(sv);
- }
- } else {
- sv_setsv(sv,&PL_sv_undef);
+ } else {
+ PL_tainted = 1;
+ SvTAINT(sv);
+ }
+ } else
+ SvTAINTED_off(sv);
}
} else {
sv_setsv(sv,&PL_sv_undef);
return ret;
} else
if (*RExC_parse == '?') { /* (?...) */
- U32 posflags = 0, negflags = 0;
- U32 *flagsp = &posflags;
bool is_logical = 0;
const char * const seqstart = RExC_parse;
vFAIL("Sequence (? incomplete");
break;
default:
- --RExC_parse;
- parse_flags: /* (?i) */
- while (*RExC_parse && strchr("iogcmsx", *RExC_parse)) {
+ --RExC_parse;
+ parse_flags: /* (?i) */
+ {
+ U32 posflags = 0, negflags = 0;
+ U32 *flagsp = &posflags;
+
+ while (*RExC_parse) {
+ /* && strchr("iogcmsx", *RExC_parse) */
/* (?g), (?gc) and (?o) are useless here
and must be globally applied -- japhy */
-
- if (*RExC_parse == 'o' || *RExC_parse == 'g') {
+ switch (*RExC_parse) {
+ CASE_STD_PMMOD_FLAGS_PARSE_SET(flagsp);
+ case 'o':
+ case 'g':
if (SIZE_ONLY && ckWARN(WARN_REGEXP)) {
const I32 wflagbit = *RExC_parse == 'o' ? WASTED_O : WASTED_G;
if (! (wastedflags & wflagbit) ) {
);
}
}
- }
- else if (*RExC_parse == 'c') {
+ break;
+
+ case 'c':
if (SIZE_ONLY && ckWARN(WARN_REGEXP)) {
if (! (wastedflags & WASTED_C) ) {
wastedflags |= WASTED_GC;
);
}
}
- }
- else { pmflag(flagsp, *RExC_parse); }
-
- ++RExC_parse;
- }
- if (*RExC_parse == '-') {
- flagsp = &negflags;
- wastedflags = 0; /* reset so (?g-c) warns twice */
+ break;
+ case 'k':
+ if (flagsp == &negflags) {
+ if (SIZE_ONLY && ckWARN(WARN_REGEXP))
+ vWARN(RExC_parse + 1,"Useless use of (?-k)");
+ } else {
+ *flagsp |= RXf_PMf_KEEPCOPY;
+ }
+ break;
+ case '-':
+ if (flagsp == &negflags)
+ goto unknown;
+ flagsp = &negflags;
+ wastedflags = 0; /* reset so (?g-c) warns twice */
+ break;
+ case ':':
+ paren = ':';
+ /*FALLTHROUGH*/
+ case ')':
+ RExC_flags |= posflags;
+ RExC_flags &= ~negflags;
+ nextchar(pRExC_state);
+ if (paren != ':') {
+ *flagp = TRYAGAIN;
+ return NULL;
+ } else {
+ ret = NULL;
+ goto parse_rest;
+ }
+ /*NOTREACHED*/
+ default:
+ unknown:
+ RExC_parse++;
+ vFAIL3("Sequence (%.*s...) not recognized", RExC_parse-seqstart, seqstart);
+ /*NOTREACHED*/
+ }
++RExC_parse;
- goto parse_flags;
- }
- RExC_flags |= posflags;
- RExC_flags &= ~negflags;
- if (*RExC_parse == ':') {
- RExC_parse++;
- paren = ':';
- break;
- }
- unknown:
- if (*RExC_parse != ')') {
- RExC_parse++;
- vFAIL3("Sequence (%.*s...) not recognized", RExC_parse-seqstart, seqstart);
}
- nextchar(pRExC_state);
- *flagp = TRYAGAIN;
- return NULL;
- }
+ }} /* one for the default block, one for the switch */
}
else { /* (...) */
capturing_parens:
}
else /* ! paren */
ret = NULL;
-
+
+ parse_rest:
/* Pick up the branches, linking them together. */
parse_start = RExC_parse; /* MJD */
br = regbranch(pRExC_state, &flags, 1,depth+1);
NODE_ALIGN_FILL(ret);
ptr = ret;
FILL_ADVANCE_NODE(ptr, op);
+#ifdef RE_TRACK_PATTERN_OFFSETS
if (RExC_offsets) { /* MJD */
MJD_OFFSET_DEBUG(("%s:%d: (op %s) %s %"UVuf" (len %"UVuf") (max %"UVuf").\n",
"reg_node", __LINE__,
(UV)RExC_offsets[0]));
Set_Node_Offset(RExC_emit, RExC_parse + (op == END));
}
-
+#endif
RExC_emit = ptr;
return(ret);
}
NODE_ALIGN_FILL(ret);
ptr = ret;
FILL_ADVANCE_NODE_ARG(ptr, op, arg);
+#ifdef RE_TRACK_PATTERN_OFFSETS
if (RExC_offsets) { /* MJD */
MJD_OFFSET_DEBUG(("%s(%d): (op %s) %s %"UVuf" <- %"UVuf" (max %"UVuf").\n",
"reganode",
(UV)RExC_offsets[0]));
Set_Cur_Node_Offset;
}
-
+#endif
RExC_emit = ptr;
return(ret);
}
while (src > opnd) {
StructCopy(--src, --dst, regnode);
+#ifdef RE_TRACK_PATTERN_OFFSETS
if (RExC_offsets) { /* MJD 20010112 */
MJD_OFFSET_DEBUG(("%s(%d): (op %s) %s copy %"UVuf" -> %"UVuf" (max %"UVuf").\n",
"reg_insert",
Set_Node_Offset_To_R(dst-RExC_emit_start, Node_Offset(src));
Set_Node_Length_To_R(dst-RExC_emit_start, Node_Length(src));
}
+#endif
}
place = opnd; /* Op node, where operand used to be. */
+#ifdef RE_TRACK_PATTERN_OFFSETS
if (RExC_offsets) { /* MJD */
MJD_OFFSET_DEBUG(("%s(%d): (op %s) %s %"UVuf" <- %"UVuf" (max %"UVuf").\n",
"reginsert",
Set_Node_Offset(place, RExC_parse);
Set_Node_Length(place, 1);
}
+#endif
src = NEXTOPER(place);
FILL_ADVANCE_NODE(place, op);
Zero(src, offset, regnode);
PL_colors[4],PL_colors[5],s);
}
});
-
- Safefree(ri->offsets); /* 20010421 MJD */
+#ifdef RE_TRACK_PATTERN_OFFSETS
+ if (ri->u.offsets)
+ Safefree(ri->u.offsets); /* 20010421 MJD */
+#endif
if (ri->data) {
int n = ri->data->count;
PAD* new_comppad = NULL;
RXi_GET_DECL(r,ri);
npar = r->nparens+1;
- len = ri->offsets[0];
+ len = ProgLen(ri);
Newxc(reti, sizeof(regexp_internal) + (len+1)*sizeof(regnode), char, regexp_internal);
Copy(ri->program, reti->program, len+1, regnode);
else
reti->data = NULL;
- Newx(reti->offsets, 2*len+1, U32);
- Copy(ri->offsets, reti->offsets, 2*len+1, U32);
-
+ reti->name_list_idx = ri->name_list_idx;
+
+#ifdef RE_TRACK_PATTERN_OFFSETS
+ if (ri->u.offsets) {
+ Newx(reti->u.offsets, 2*len+1, U32);
+ Copy(ri->u.offsets, reti->u.offsets, 2*len+1, U32);
+ }
+#else
+ SetProgLen(reti,len);
+#endif
+
return (void*)reti;
}
const regexp * const re = (regexp *)mg->mg_obj;
if (!mg->mg_ptr) {
- const char *fptr = "msix";
- char reflags[6];
+ const char *fptr = STD_PAT_MODS; /*"msix"*/
+ char reflags[7];
char ch;
- int left = 0;
- int right = 4;
- bool need_newline = 0;
- U16 reganch = (U16)((re->extflags & RXf_PMf_COMPILETIME) >> 12);
-
+ bool hask = ((re->extflags & RXf_PMf_KEEPCOPY) == RXf_PMf_KEEPCOPY);
+ bool hasm = ((re->extflags & RXf_PMf_STD_PMMOD) != RXf_PMf_STD_PMMOD);
+ U16 reganch = (U16)((re->extflags & RXf_PMf_STD_PMMOD) >> 12);
+ bool need_newline = 0;
+ int left = 0;
+ int right = 4 + hask;
+ if (hask)
+ reflags[left++] = KEEPCOPY_PAT_MOD; /*'k'*/
while((ch = *fptr++)) {
if(reganch & 1) {
reflags[left++] = ch;
}
reganch >>= 1;
}
- if(left != 4) {
+ if(hasm) {
reflags[left] = '-';
- left = 5;
+ left = 5 + hask;
}
-
+ /* printf("[%*.7s]\n",left,reflags); */
mg->mg_len = re->prelen + 4 + left;
/*
* If /x was used, we have to worry about a regex ending with a