Es |void |regtail |NN struct RExC_state_t *state|NN regnode *p|NN const regnode *val|U32 depth
Es |SV * |reg_scan_name |NN struct RExC_state_t *state|U32 flags
Es |U32 |join_exact |NN struct RExC_state_t *state|NN regnode *scan|NN I32 *min|U32 flags|NULLOK regnode *val|U32 depth
-EsRn |char* |regwhite |NN char *p|NN const char *e
-Es |char* |nextchar |NN struct RExC_state_t *state
+EsRn |char * |regwhite |NN struct RExC_state_t *state|NN char *p
+Es |char * |nextchar |NN struct RExC_state_t *state
+Es |bool |reg_skipcomment|NN struct RExC_state_t *state
Es |void |scan_commit |NN const struct RExC_state_t* state|NN struct scan_data_t *data|NN I32 *minlenp|int is_inf
Esn |void |cl_anything |NN const struct RExC_state_t* state|NN struct regnode_charclass_class *cl
EsRn |int |cl_is_anything |NN const struct regnode_charclass_class *cl
#define join_exact S_join_exact
#define regwhite S_regwhite
#define nextchar S_nextchar
+#define reg_skipcomment S_reg_skipcomment
#define scan_commit S_scan_commit
#define cl_anything S_cl_anything
#define cl_is_anything S_cl_is_anything
#define join_exact(a,b,c,d,e,f) S_join_exact(aTHX_ a,b,c,d,e,f)
#define regwhite S_regwhite
#define nextchar(a) S_nextchar(aTHX_ a)
+#define reg_skipcomment(a) S_reg_skipcomment(aTHX_ a)
#define scan_commit(a,b,c,d) S_scan_commit(aTHX_ a,b,c,d)
#define cl_anything S_cl_anything
#define cl_is_anything S_cl_is_anything
modifiers" in this scenario.
*/
- char *fptr = "msix";
+ char *fptr = INT_PAT_MODS;
char ch;
U16 match_flags = (U16)((re->extflags & PMf_COMPILETIME) >> 12);
XSRETURN(2);
} else {
/* Scalar, so use the string that Perl would return */
- if (!mg->mg_ptr)
- CALLREG_STRINGIFY(mg,0,0);
-
/* return the pattern in (?msix:..) format */
- pattern = sv_2mortal(newSVpvn(mg->mg_ptr,mg->mg_len));
+ pattern = sv_2mortal(newSVpvn(re->wrapped,re->wraplen));
if (re->extflags & RXf_UTF8)
SvUTF8_on(pattern);
XPUSHs(pattern);
use re qw(is_regexp regexp_pattern regmust
regname regnames regnames_count
regnames_iterinit regnames_iternext);
-my $qr=qr/foo/i;
-
-ok(is_regexp($qr),'is_regexp($qr)');
-ok(!is_regexp(''),'is_regexp("")');
-is((regexp_pattern($qr))[0],'foo','regexp_pattern[0]');
-is((regexp_pattern($qr))[1],'i','regexp_pattern[1]');
-is(regexp_pattern($qr),'(?i-xsm:foo)','scalar regexp_pattern');
-ok(!regexp_pattern(''),'!regexp_pattern("")');
+{
+ my $qr=qr/foo/ki;
+ ok(is_regexp($qr),'is_regexp($qr)');
+ ok(!is_regexp(''),'is_regexp("")');
+ is((regexp_pattern($qr))[0],'foo','regexp_pattern[0]');
+ is((regexp_pattern($qr))[1],'ik','regexp_pattern[1]');
+ is(regexp_pattern($qr),'(?ki-xsm:foo)','scalar regexp_pattern');
+ ok(!regexp_pattern(''),'!regexp_pattern("")');
+}
{
my $qr=qr/here .* there/x;
my ($anchored,$floating)=regmust($qr);
if (ckWARN(WARN_SYNTAX)) {
const REGEXP *re = PM_GETRE(kPMOP);
const char *pmstr = re ? re->precomp : "STRING";
+ const STRLEN len = re ? re->prelen : 6;
Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
- "/%s/ should probably be written as \"%s\"",
- pmstr, pmstr);
+ "/%.*s/ should probably be written as \"%.*s\"",
+ len, pmstr, len, pmstr);
}
}
return ck_fun(o);
#endif
-/* chars and strings used as regex pattern modifiers
- * Singlular is a 'c'har, plural is a "string"
- */
-#define EXEC_PAT_MOD 'e'
-#define KEEPCOPY_PAT_MOD 'k'
-#define ONCE_PAT_MOD 'o'
-#define GLOBAL_PAT_MOD 'g'
-#define CONTINUE_PAT_MOD 'c'
-#define MULTILINE_PAT_MOD 'm'
-#define SINGLE_PAT_MOD 's'
-#define IGNORE_PAT_MOD 'i'
-#define XTENDED_PAT_MOD 'x'
-
-#define ONCE_PAT_MODS "o"
-#define KEEPCOPY_PAT_MODS "k"
-#define EXEC_PAT_MODS "e"
-#define LOOP_PAT_MODS "gc"
-
-#define STD_PAT_MODS "msix"
-
-#define EXT_PAT_MODS ONCE_PAT_MODS KEEPCOPY_PAT_MODS
-#define QR_PAT_MODS STD_PAT_MODS EXT_PAT_MODS
-#define M_PAT_MODS QR_PAT_MODS LOOP_PAT_MODS
-#define S_PAT_MODS M_PAT_MODS EXEC_PAT_MODS
+
/*
=for hackers
Found in file av.c
+=item av_create_and_push
+X<av_create_and_push>
+
+Push an SV onto the end of the array, creating the array if necessary.
+A small internal helper function to remove a commonly duplicated idiom.
+
+NOTE: this function is experimental and may change or be
+removed without notice.
+
+ void av_create_and_push(AV **const avp, SV *const val)
+
+=for hackers
+Found in file av.c
+
+=item av_create_and_unshift_one
+X<av_create_and_unshift_one>
+
+Unshifts an SV onto the beginning of the array, creating the array if
+necessary.
+A small internal helper function to remove a commonly duplicated idiom.
+
+NOTE: this function is experimental and may change or be
+removed without notice.
+
+ SV** av_create_and_unshift_one(AV **const avp, SV *const val)
+
+=for hackers
+Found in file av.c
+
=item av_delete
X<av_delete>
__attribute__nonnull__(pTHX_2)
__attribute__nonnull__(pTHX_3);
-STATIC char* S_regwhite(char *p, const char *e)
+STATIC char * S_regwhite(struct RExC_state_t *state, char *p)
__attribute__warn_unused_result__
__attribute__nonnull__(1)
__attribute__nonnull__(2);
-STATIC char* S_nextchar(pTHX_ struct RExC_state_t *state)
+STATIC char * S_nextchar(pTHX_ struct RExC_state_t *state)
+ __attribute__nonnull__(pTHX_1);
+
+STATIC bool S_reg_skipcomment(pTHX_ struct RExC_state_t *state)
__attribute__nonnull__(pTHX_1);
STATIC void S_scan_commit(pTHX_ const struct RExC_state_t* state, struct scan_data_t *data, I32 *minlenp, int is_inf)
}
data->last_end = -1;
data->flags &= ~SF_BEFORE_EOL;
- DEBUG_STUDYDATA("cl_anything: ",data,0);
+ DEBUG_STUDYDATA("commit: ",data,0);
}
/* Can match anything (initialization) */
/* needed for dumping*/
DEBUG_r(if (optimize) {
regnode *opt = convert;
+
while ( ++opt < optimize) {
Set_Node_Offset_Length(opt,0,0);
}
r->engine= RE_ENGINE_PTR;
r->refcnt = 1;
r->prelen = xend - exp;
- r->precomp = savepvn(RExC_precomp, r->prelen);
r->extflags = pm->op_pmflags & RXf_PMf_COMPILETIME;
+ {
+ bool has_k = ((r->extflags & RXf_PMf_KEEPCOPY) == RXf_PMf_KEEPCOPY);
+ bool has_minus = ((r->extflags & RXf_PMf_STD_PMMOD) != RXf_PMf_STD_PMMOD);
+ bool has_runon = ((RExC_seen & REG_SEEN_RUN_ON_COMMENT)==REG_SEEN_RUN_ON_COMMENT);
+ U16 reganch = (U16)((r->extflags & RXf_PMf_STD_PMMOD) >> 12);
+ const char *fptr = STD_PAT_MODS; /*"msix"*/
+ char *p;
+ r->wraplen = r->prelen + has_minus + has_k + has_runon
+ + (sizeof(STD_PAT_MODS) - 1)
+ + (sizeof("(?:)") - 1);
+
+ Newx(r->wrapped, r->wraplen, char );
+ p = r->wrapped;
+ *p++='('; *p++='?';
+ if (has_k)
+ *p++ = KEEPCOPY_PAT_MOD; /*'k'*/
+ {
+ char *r = p + (sizeof(STD_PAT_MODS) - 1) + has_minus - 1;
+ char *colon = r + 1;
+ char ch;
+
+ while((ch = *fptr++)) {
+ if(reganch & 1)
+ *p++ = ch;
+ else
+ *r-- = ch;
+ reganch >>= 1;
+ }
+ if(has_minus) {
+ *r = '-';
+ p = colon;
+ }
+ }
+
+ *p++=':';
+ Copy(RExC_precomp, p, r->prelen, char);
+ r->precomp = p;
+ p += r->prelen;
+ if (has_runon)
+ *p++='\n';
+ *p=')';
+
+
+ if (0)
+ PerlIO_printf(Perl_debug_log,
+ "RExC_precomp: %.*s\nr->precomp: %.*s\nr->wrapped:%.*s\n",
+ r->prelen,
+ RExC_precomp,
+ r->prelen,
+ r->precomp,
+ r->wraplen,
+ r->wrapped
+ );
+
+
+ }
+
r->intflags = 0;
r->nparens = RExC_npar - 1; /* set early to validate backrefs */
case '#':
if (RExC_flags & RXf_PMf_EXTENDED) {
- while (RExC_parse < RExC_end && *RExC_parse != '\n')
- RExC_parse++;
- if (RExC_parse < RExC_end)
+ if ( reg_skipcomment( pRExC_state ) )
goto tryagain;
}
/* FALL THROUGH */
char * const oldp = p;
if (RExC_flags & RXf_PMf_EXTENDED)
- p = regwhite(p, RExC_end);
+ p = regwhite( pRExC_state, p );
switch (*p) {
case '^':
case '$':
ender = *p++;
break;
}
- if (RExC_flags & RXf_PMf_EXTENDED)
- p = regwhite(p, RExC_end);
+ if ( RExC_flags & RXf_PMf_EXTENDED)
+ p = regwhite( pRExC_state, p );
if (UTF && FOLD) {
/* Prime the casefolded buffer. */
ender = toFOLD_uni(ender, tmpbuf, &foldlen);
}
- if (ISMULT2(p)) { /* Back off on ?+*. */
+ if (p < RExC_end && ISMULT2(p)) { /* Back off on ?+*. */
if (len)
p = oldp;
else if (UTF) {
}
STATIC char *
-S_regwhite(char *p, const char *e)
+S_regwhite( RExC_state_t *pRExC_state, char *p )
{
+ const char *e = RExC_end;
while (p < e) {
if (isSPACE(*p))
++p;
else if (*p == '#') {
+ bool ended = 0;
do {
- p++;
- } while (p < e && *p != '\n');
+ if (*p++ == '\n') {
+ ended = 1;
+ break;
+ }
+ } while (p < e);
+ if (!ended)
+ RExC_seen |= REG_SEEN_RUN_ON_COMMENT;
}
else
break;
#undef _C_C_T_
+/* reg_skipcomment()
+
+ Absorbs an /x style # comments from the input stream.
+ Returns true if there is more text remaining in the stream.
+ Will set the REG_SEEN_RUN_ON_COMMENT flag if the comment
+ terminates the pattern without including a newline.
+
+ Note its the callers responsibility to ensure that we are
+ actually in /x mode
+
+*/
+
+STATIC bool
+S_reg_skipcomment(pTHX_ RExC_state_t *pRExC_state)
+{
+ bool ended = 0;
+ while (RExC_parse < RExC_end)
+ if (*RExC_parse++ == '\n') {
+ ended = 1;
+ break;
+ }
+ if (!ended) {
+ /* we ran off the end of the pattern without ending
+ the comment, so we have to add an \n when wrapping */
+ RExC_seen |= REG_SEEN_RUN_ON_COMMENT;
+ return 0;
+ } else
+ return 1;
+}
+
+/* nextchar()
+
+ Advance that parse position, and optionall absorbs
+ "whitespace" from the inputstream.
+
+ Without /x "whitespace" means (?#...) style comments only,
+ with /x this means (?#...) and # comments and whitespace proper.
+
+ Returns the RExC_parse point from BEFORE the scan occurs.
+
+ This is the /x friendly way of saying RExC_parse++.
+*/
+
STATIC char*
S_nextchar(pTHX_ RExC_state_t *pRExC_state)
{
continue;
}
else if (*RExC_parse == '#') {
- while (RExC_parse < RExC_end)
- if (*RExC_parse++ == '\n') break;
- continue;
+ if ( reg_skipcomment( pRExC_state ) )
+ continue;
}
}
return retval;
return;
CALLREGFREE_PVT(r); /* free the private data */
-
- /* gcov results gave these as non-null 100% of the time, so there's no
- optimisation in checking them before calling Safefree */
- Safefree(r->precomp);
RX_MATCH_COPY_FREE(r);
#ifdef PERL_OLD_COPY_ON_WRITE
if (r->saved_copy)
Safefree(r->substrs);
}
if (r->paren_names)
- SvREFCNT_dec(r->paren_names);
-
+ SvREFCNT_dec(r->paren_names);
+ Safefree(r->wrapped);
Safefree(r->startp);
Safefree(r->endp);
Safefree(r);
} else
ret->substrs = NULL;
- ret->precomp = SAVEPVN(r->precomp, r->prelen);
+ ret->wrapped = SAVEPVN(r->wrapped, r->wraplen);
+ ret->precomp = ret->wrapped + (r->precomp - r->wrapped);
+ ret->prelen = r->prelen;
+ ret->wraplen = r->wraplen;
+
ret->refcnt = r->refcnt;
ret->minlen = r->minlen;
ret->minlenret = r->minlenret;
- ret->prelen = r->prelen;
ret->nparens = r->nparens;
ret->lastparen = r->lastparen;
ret->lastcloseparen = r->lastcloseparen;
reti->swap = NULL;
}
-
reti->regstclass = NULL;
+
if (ri->data) {
struct reg_data *d;
const int count = ri->data->count;
*/
#ifndef PERL_IN_XSUB_RE
+
char *
Perl_reg_stringify(pTHX_ MAGIC *mg, STRLEN *lp, U32 *flags, I32 *haseval ) {
dVAR;
const regexp * const re = (regexp *)mg->mg_obj;
-
- if (!mg->mg_ptr) {
- const char *fptr = STD_PAT_MODS; /*"msix"*/
- char reflags[7];
- char ch;
- bool hask = ((re->extflags & RXf_PMf_KEEPCOPY) == RXf_PMf_KEEPCOPY);
- bool hasm = ((re->extflags & RXf_PMf_STD_PMMOD) != RXf_PMf_STD_PMMOD);
- U16 reganch = (U16)((re->extflags & RXf_PMf_STD_PMMOD) >> 12);
- bool need_newline = 0;
- int left = 0;
- int right = 4 + hask;
- if (hask)
- reflags[left++] = KEEPCOPY_PAT_MOD; /*'k'*/
- while((ch = *fptr++)) {
- if(reganch & 1) {
- reflags[left++] = ch;
- }
- else {
- reflags[right--] = ch;
- }
- reganch >>= 1;
- }
- if(hasm) {
- reflags[left] = '-';
- left = 5 + hask;
- }
- /* printf("[%*.7s]\n",left,reflags); */
- mg->mg_len = re->prelen + 4 + left;
- /*
- * If /x was used, we have to worry about a regex ending with a
- * comment later being embedded within another regex. If so, we don't
- * want this regex's "commentization" to leak out to the right part of
- * the enclosing regex, we must cap it with a newline.
- *
- * So, if /x was used, we scan backwards from the end of the regex. If
- * we find a '#' before we find a newline, we need to add a newline
- * ourself. If we find a '\n' first (or if we don't find '#' or '\n'),
- * we don't need to add anything. -jfriedl
- */
- if (PMf_EXTENDED & re->extflags) {
- const char *endptr = re->precomp + re->prelen;
- while (endptr >= re->precomp) {
- const char c = *(endptr--);
- if (c == '\n')
- break; /* don't need another */
- if (c == '#') {
- /* we end while in a comment, so we need a newline */
- mg->mg_len++; /* save space for it */
- need_newline = 1; /* note to add it */
- break;
- }
- }
- }
-
- Newx(mg->mg_ptr, mg->mg_len + 1 + left, char);
- mg->mg_ptr[0] = '(';
- mg->mg_ptr[1] = '?';
- Copy(reflags, mg->mg_ptr+2, left, char);
- *(mg->mg_ptr+left+2) = ':';
- Copy(re->precomp, mg->mg_ptr+3+left, re->prelen, char);
- if (need_newline)
- mg->mg_ptr[mg->mg_len - 2] = '\n';
- mg->mg_ptr[mg->mg_len - 1] = ')';
- mg->mg_ptr[mg->mg_len] = 0;
- }
if (haseval)
*haseval = re->seen_evals;
if (flags)
*flags = ((re->extflags & RXf_UTF8) ? 1 : 0);
-
if (lp)
- *lp = mg->mg_len;
- return mg->mg_ptr;
+ *lp = re->wraplen;
+ return re->wrapped;
}
/*
#define REG_TOP_LEVEL_BRANCHES 0x00000040
#define REG_SEEN_VERBARG 0x00000080
#define REG_SEEN_CUTGROUP 0x00000100
+#define REG_SEEN_RUN_ON_COMMENT 0x00000200
START_EXTERN_C
/* end shift should be non negative here */
}
-#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
+#ifdef QDEBUGGING /* 7/99: reports of failure (with the older version) */
if (end_shift < 0)
Perl_croak(aTHX_ "panic: end_shift: %"IVdf" pattern:\n%s\n ",
(IV)end_shift, prog->precomp);
/* Information about the match that isn't often used */
char *precomp; /* pre-compilation regular expression */
I32 prelen; /* length of precomp */
+ char *wrapped; /* wrapped version of the pattern */
+ I32 wraplen; /* length of wrapped */
I32 seen_evals; /* number of eval groups in the pattern - for security checks */
HV *paren_names; /* Optional hash of paren names */
#define RXf_START_ONLY 0x00000200 /* Pattern is /^/ */
#define RXf_WHITE 0x00000400 /* Pattern is /\s+/ */
-/* 0xF800 of extflags is used by (RXf_)PMf_COMPILETIME */
+/* 0x1F800 of extflags is used by (RXf_)PMf_COMPILETIME */
#define RXf_PMf_LOCALE 0x00000800 /* use locale */
#define RXf_PMf_MULTILINE 0x00001000 /* /m */
#define RXf_PMf_SINGLELINE 0x00002000 /* /s */
case SINGLE_PAT_MOD: *(pmfl) |= RXf_PMf_SINGLELINE; break; \
case XTENDED_PAT_MOD: *(pmfl) |= RXf_PMf_EXTENDED; break
+/* chars and strings used as regex pattern modifiers
+ * Singlular is a 'c'har, plural is a "string"
+ */
+#define EXEC_PAT_MOD 'e'
+#define KEEPCOPY_PAT_MOD 'k'
+#define ONCE_PAT_MOD 'o'
+#define GLOBAL_PAT_MOD 'g'
+#define CONTINUE_PAT_MOD 'c'
+#define MULTILINE_PAT_MOD 'm'
+#define SINGLE_PAT_MOD 's'
+#define IGNORE_PAT_MOD 'i'
+#define XTENDED_PAT_MOD 'x'
+
+#define ONCE_PAT_MODS "o"
+#define KEEPCOPY_PAT_MODS "k"
+#define EXEC_PAT_MODS "e"
+#define LOOP_PAT_MODS "gc"
+
+#define STD_PAT_MODS "msix"
+
+#define INT_PAT_MODS STD_PAT_MODS KEEPCOPY_PAT_MODS
+
+#define EXT_PAT_MODS ONCE_PAT_MODS KEEPCOPY_PAT_MODS
+#define QR_PAT_MODS STD_PAT_MODS EXT_PAT_MODS
+#define M_PAT_MODS QR_PAT_MODS LOOP_PAT_MODS
+#define S_PAT_MODS M_PAT_MODS EXEC_PAT_MODS
+
+
/* What we have seen */
#define RXf_LOOKBEHIND_SEEN 0x00020000
#define RXf_EVAL_SEEN 0x00040000