*/
/*
- * "A fair jaw-cracker dwarf-language must be." --Samwise Gamgee
+ * 'A fair jaw-cracker dwarf-language must be.' --Samwise Gamgee
+ *
+ * [p.285 of _The Lord of the Rings_, II/iii: "The Ring Goes South"]
*/
/* This file contains functions for compiling a regular expression. See
**** Alterations to Henry's code are...
****
**** Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
- **** 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 by Larry Wall and others
+ **** 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
+ **** by Larry Wall and others
****
**** You may distribute under the terms of either the GNU General Public
**** License or the Artistic License, as specified in the README file.
const STRLEN old_l = CHR_SVLEN(*data->longest);
GET_RE_DEBUG_FLAGS_DECL;
+ PERL_ARGS_ASSERT_SCAN_COMMIT;
+
if ((l >= old_l) && ((l > old_l) || (data->flags & SF_BEFORE_EOL))) {
SvSetMagicSV(*data->longest, data->last_found);
if (*data->longest == data->longest_fixed) {
STATIC void
S_cl_anything(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
{
+ PERL_ARGS_ASSERT_CL_ANYTHING;
+
ANYOF_CLASS_ZERO(cl);
ANYOF_BITMAP_SETALL(cl);
cl->flags = ANYOF_EOS|ANYOF_UNICODE_ALL;
{
int value;
+ PERL_ARGS_ASSERT_CL_IS_ANYTHING;
+
for (value = 0; value <= ANYOF_MAX; value += 2)
if (ANYOF_CLASS_TEST(cl, value) && ANYOF_CLASS_TEST(cl, value + 1))
return 1;
STATIC void
S_cl_init(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
{
+ PERL_ARGS_ASSERT_CL_INIT;
+
Zero(cl, 1, struct regnode_charclass_class);
cl->type = ANYOF;
cl_anything(pRExC_state, cl);
STATIC void
S_cl_init_zero(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
{
+ PERL_ARGS_ASSERT_CL_INIT_ZERO;
+
Zero(cl, 1, struct regnode_charclass_class);
cl->type = ANYOF;
cl_anything(pRExC_state, cl);
S_cl_and(struct regnode_charclass_class *cl,
const struct regnode_charclass_class *and_with)
{
+ PERL_ARGS_ASSERT_CL_AND;
assert(and_with->type == ANYOF);
if (!(and_with->flags & ANYOF_CLASS)
STATIC void
S_cl_or(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl, const struct regnode_charclass_class *or_with)
{
+ PERL_ARGS_ASSERT_CL_OR;
+
if (or_with->flags & ANYOF_INVERT) {
/* We do not use
* (B1 | CL1) | (!B2 & !CL2) = (B1 | !B2 & !CL2) | (CL1 | (!B2 & !CL2))
int colwidth= widecharmap ? 6 : 4;
GET_RE_DEBUG_FLAGS_DECL;
+ PERL_ARGS_ASSERT_DUMP_TRIE;
PerlIO_printf( Perl_debug_log, "%*sChar : %-6s%-6s%-4s ",
(int)depth * 2 + 2,"",
SV *sv=sv_newmortal();
int colwidth= widecharmap ? 6 : 4;
GET_RE_DEBUG_FLAGS_DECL;
+
+ PERL_ARGS_ASSERT_DUMP_TRIE_INTERIM_LIST;
+
/* print out the table precompression. */
PerlIO_printf( Perl_debug_log, "%*sState :Word | Transition Data\n%*s%s",
(int)depth * 2 + 2,"", (int)depth * 2 + 2,"",
SV *sv=sv_newmortal();
int colwidth= widecharmap ? 6 : 4;
GET_RE_DEBUG_FLAGS_DECL;
+
+ PERL_ARGS_ASSERT_DUMP_TRIE_INTERIM_TABLE;
/*
print out the table precompression so that we can do a visual check
#endif
SV *re_trie_maxbuff;
GET_RE_DEBUG_FLAGS_DECL;
+
+ PERL_ARGS_ASSERT_MAKE_TRIE;
#ifndef DEBUGGING
PERL_UNUSED_ARG(depth);
#endif
if ( folder )
TRIE_BITMAP_SET(trie, folder[ *ch ]);
DEBUG_OPTIMISE_r(
- PerlIO_printf(Perl_debug_log, (char*)ch)
+ PerlIO_printf(Perl_debug_log, "%s", (char*)ch)
);
}
}
reg_ac_data *aho;
const U32 data_slot = add_data( pRExC_state, 1, "T" );
GET_RE_DEBUG_FLAGS_DECL;
+
+ PERL_ARGS_ASSERT_MAKE_TRIE_FAILTABLE;
#ifndef DEBUGGING
PERL_UNUSED_ARG(depth);
#endif
#else
PERL_UNUSED_ARG(depth);
#endif
+
+ PERL_ARGS_ASSERT_JOIN_EXACT;
#ifndef EXPERIMENTAL_INPLACESCAN
PERL_UNUSED_ARG(flags);
PERL_UNUSED_ARG(val);
regnode *first_non_open = scan;
I32 stopmin = I32_MAX;
scan_frame *frame = NULL;
-
GET_RE_DEBUG_FLAGS_DECL;
+ PERL_ARGS_ASSERT_STUDY_CHUNK;
+
#ifdef DEBUGGING
StructCopy(&zero_scan_data, &data_fake, scan_data_t);
#endif
data->whilem_c = data_fake.whilem_c;
}
if (f & SCF_DO_STCLASS_AND) {
- const int was = (data->start_class->flags & ANYOF_EOS);
-
- cl_and(data->start_class, &intrnl);
- if (was)
- data->start_class->flags |= ANYOF_EOS;
+ if (flags & SCF_DO_STCLASS_OR) {
+ /* OR before, AND after: ideally we would recurse with
+ * data_fake to get the AND applied by study of the
+ * remainder of the pattern, and then derecurse;
+ * *** HACK *** for now just treat as "no information".
+ * See [perl #56690].
+ */
+ cl_init(pRExC_state, data->start_class);
+ } else {
+ /* AND before and after: combine and continue */
+ const int was = (data->start_class->flags & ANYOF_EOS);
+
+ cl_and(data->start_class, &intrnl);
+ if (was)
+ data->start_class->flags |= ANYOF_EOS;
+ }
}
}
#if PERL_ENABLE_POSITIVE_ASSERTION_STUDY
{
U32 count = RExC_rxi->data ? RExC_rxi->data->count : 0;
+ PERL_ARGS_ASSERT_ADD_DATA;
+
Renewc(RExC_rxi->data,
sizeof(*RExC_rxi->data) + sizeof(void*) * (count + n - 1),
char, struct reg_data);
#ifndef PERL_IN_XSUB_RE
REGEXP *
-Perl_pregcomp(pTHX_ const SV * const pattern, const U32 flags)
+Perl_pregcomp(pTHX_ SV * const pattern, const U32 flags)
{
dVAR;
HV * const table = GvHV(PL_hintgv);
+
+ PERL_ARGS_ASSERT_PREGCOMP;
+
/* Dispatch a request to compile a regexp to correct
regexp engine. */
if (table) {
#endif
REGEXP *
-Perl_re_compile(pTHX_ const SV * const pattern, const U32 pm_flags)
+Perl_re_compile(pTHX_ SV * const pattern, U32 pm_flags)
{
dVAR;
REGEXP *rx;
struct regexp *r;
register regexp_internal *ri;
STRLEN plen;
- char* exp = SvPV((SV*)pattern, plen);
+ char *exp = SvPV(pattern, plen);
char* xend = exp + plen;
regnode *scan;
I32 flags;
RExC_state_t copyRExC_state;
#endif
GET_RE_DEBUG_FLAGS_DECL;
+
+ PERL_ARGS_ASSERT_RE_COMPILE;
+
DEBUG_r(if (!PL_colorset) reginitcolors());
- RExC_utf8 = RExC_orig_utf8 = pm_flags & RXf_UTF8;
+ RExC_utf8 = RExC_orig_utf8 = SvUTF8(pattern);
DEBUG_COMPILE_r({
SV *dsv= sv_newmortal();
/* Allocate space and zero-initialize. Note, the two step process
of zeroing when in debug mode, thus anything assigned has to
happen after that */
- rx = newSV_type(SVt_REGEXP);
+ rx = (REGEXP*) newSV_type(SVt_REGEXP);
r = (struct regexp*)SvANY(rx);
Newxc(ri, sizeof(regexp_internal) + (unsigned)RExC_size * sizeof(regnode),
char, regexp_internal);
+ (sizeof(STD_PAT_MODS) - 1)
+ (sizeof("(?:)") - 1);
- p = sv_grow(rx, wraplen + 1);
+ p = sv_grow(MUTABLE_SV(rx), wraplen + 1);
SvCUR_set(rx, wraplen);
SvPOK_on(rx);
+ SvFLAGS(rx) |= SvUTF8(pattern);
*p++='('; *p++='?';
if (has_p)
*p++ = KEEPCOPY_PAT_MOD; /*'p'*/
Zero(r->substrs, 1, struct reg_substr_data);
#ifdef TRIE_STUDY_OPT
- if ( restudied ) {
+ if (!restudied) {
+ StructCopy(&zero_scan_data, &data, scan_data_t);
+ copyRExC_state = RExC_state;
+ } else {
U32 seen=RExC_seen;
DEBUG_OPTIMISE_r(PerlIO_printf(Perl_debug_log,"Restudying\n"));
SvREFCNT_dec(data.last_found);
}
StructCopy(&zero_scan_data, &data, scan_data_t);
- } else {
- StructCopy(&zero_scan_data, &data, scan_data_t);
- copyRExC_state = RExC_state;
}
#else
StructCopy(&zero_scan_data, &data, scan_data_t);
/*dmq: removed as part of de-PMOP: pm->op_pmflags = RExC_flags; */
if (UTF)
- r->extflags |= RXf_UTF8; /* Unicode in it? */
+ SvUTF8_on(rx); /* Unicode in it? */
ri->regstclass = NULL;
if (RExC_naughty >= 10) /* Probably an expensive pattern. */
r->intflags |= PREGf_NAUGHTY;
regnode *first= scan;
regnode *first_next= regnext(first);
- /* Skip introductions and multiplicators >= 1. */
+ /*
+ * Skip introductions and multiplicators >= 1
+ * so that we can extract the 'meat' of the pattern that must
+ * match in the large if() sequence following.
+ * NOTE that EXACT is NOT covered here, as it is normally
+ * picked up by the optimiser separately.
+ *
+ * This is unfortunate as the optimiser isnt handling lookahead
+ * properly currently.
+ *
+ */
while ((OP(first) == OPEN && (sawopen = 1)) ||
/* An OR of *one* alternative - should not happen now. */
(OP(first) == BRANCH && OP(first_next) != BRANCH) ||
(PL_regkind[OP(first)] == CURLY && ARG1(first) > 0) ||
(OP(first) == NOTHING && PL_regkind[OP(first_next)] != END ))
{
-
+ /*
+ * the only op that could be a regnode is PLUS, all the rest
+ * will be regnode_1 or regnode_2.
+ *
+ */
if (OP(first) == PLUS)
sawplus = 1;
else
first += regarglen[OP(first)];
- if (OP(first) == IFMATCH) {
- first = NEXTOPER(first);
- first += EXTRA_STEP_2ARGS;
- } else /* XXX possible optimisation for /(?=)/ */
- first = NEXTOPER(first);
+
+ first = NEXTOPER(first);
first_next= regnext(first);
}
if (RExC_seen & REG_SEEN_CUTGROUP)
r->intflags |= PREGf_CUTGROUP_SEEN;
if (RExC_paren_names)
- r->paren_names = (HV*)SvREFCNT_inc(RExC_paren_names);
+ RXp_PAREN_NAMES(r) = MUTABLE_HV(SvREFCNT_inc(RExC_paren_names));
else
- r->paren_names = NULL;
+ RXp_PAREN_NAMES(r) = NULL;
#ifdef STUPID_PATTERN_CHECKS
if (RX_PRELEN(rx) == 0)
Perl_reg_named_buff(pTHX_ REGEXP * const rx, SV * const key, SV * const value,
const U32 flags)
{
+ PERL_ARGS_ASSERT_REG_NAMED_BUFF;
+
PERL_UNUSED_ARG(value);
if (flags & RXapif_FETCH) {
return reg_named_buff_fetch(rx, key, flags);
} else if (flags & (RXapif_STORE | RXapif_DELETE | RXapif_CLEAR)) {
- Perl_croak(aTHX_ PL_no_modify);
+ Perl_croak(aTHX_ "%s", PL_no_modify);
return NULL;
} else if (flags & RXapif_EXISTS) {
return reg_named_buff_exists(rx, key, flags)
Perl_reg_named_buff_iter(pTHX_ REGEXP * const rx, const SV * const lastkey,
const U32 flags)
{
+ PERL_ARGS_ASSERT_REG_NAMED_BUFF_ITER;
PERL_UNUSED_ARG(lastkey);
if (flags & RXapif_FIRSTKEY)
AV *retarray = NULL;
SV *ret;
struct regexp *const rx = (struct regexp *)SvANY(r);
+
+ PERL_ARGS_ASSERT_REG_NAMED_BUFF_FETCH;
+
if (flags & RXapif_ALL)
retarray=newAV();
- if (rx && rx->paren_names) {
- HE *he_str = hv_fetch_ent( rx->paren_names, namesv, 0, 0 );
+ if (rx && RXp_PAREN_NAMES(rx)) {
+ HE *he_str = hv_fetch_ent( RXp_PAREN_NAMES(rx), namesv, 0, 0 );
if (he_str) {
IV i;
SV* sv_dat=HeVAL(he_str);
} else {
ret = newSVsv(&PL_sv_undef);
}
- if (retarray) {
- SvREFCNT_inc_simple_void(ret);
+ if (retarray)
av_push(retarray, ret);
- }
}
if (retarray)
- return newRV((SV*)retarray);
+ return newRV_noinc(MUTABLE_SV(retarray));
}
}
return NULL;
const U32 flags)
{
struct regexp *const rx = (struct regexp *)SvANY(r);
- if (rx && rx->paren_names) {
+
+ PERL_ARGS_ASSERT_REG_NAMED_BUFF_EXISTS;
+
+ if (rx && RXp_PAREN_NAMES(rx)) {
if (flags & RXapif_ALL) {
- return hv_exists_ent(rx->paren_names, key, 0);
+ return hv_exists_ent(RXp_PAREN_NAMES(rx), key, 0);
} else {
SV *sv = CALLREG_NAMED_BUFF_FETCH(r, key, flags);
if (sv) {
Perl_reg_named_buff_firstkey(pTHX_ REGEXP * const r, const U32 flags)
{
struct regexp *const rx = (struct regexp *)SvANY(r);
- if ( rx && rx->paren_names ) {
- (void)hv_iterinit(rx->paren_names);
+
+ PERL_ARGS_ASSERT_REG_NAMED_BUFF_FIRSTKEY;
+
+ if ( rx && RXp_PAREN_NAMES(rx) ) {
+ (void)hv_iterinit(RXp_PAREN_NAMES(rx));
return CALLREG_NAMED_BUFF_NEXTKEY(r, NULL, flags & ~RXapif_FIRSTKEY);
} else {
Perl_reg_named_buff_nextkey(pTHX_ REGEXP * const r, const U32 flags)
{
struct regexp *const rx = (struct regexp *)SvANY(r);
- if (rx && rx->paren_names) {
- HV *hv = rx->paren_names;
+ GET_RE_DEBUG_FLAGS_DECL;
+
+ PERL_ARGS_ASSERT_REG_NAMED_BUFF_NEXTKEY;
+
+ if (rx && RXp_PAREN_NAMES(rx)) {
+ HV *hv = RXp_PAREN_NAMES(rx);
HE *temphe;
while ( (temphe = hv_iternext_flags(hv,0)) ) {
IV i;
SV* sv_dat = HeVAL(temphe);
I32 *nums = (I32*)SvPVX(sv_dat);
for ( i = 0; i < SvIVX(sv_dat); i++ ) {
- if ((I32)(rx->lastcloseparen) >= nums[i] &&
+ if ((I32)(rx->lastparen) >= nums[i] &&
rx->offs[nums[i]].start != -1 &&
rx->offs[nums[i]].end != -1)
{
I32 length;
struct regexp *const rx = (struct regexp *)SvANY(r);
- if (rx && rx->paren_names) {
+ PERL_ARGS_ASSERT_REG_NAMED_BUFF_SCALAR;
+
+ if (rx && RXp_PAREN_NAMES(rx)) {
if (flags & (RXapif_ALL | RXapif_REGNAMES_COUNT)) {
- return newSViv(HvTOTALKEYS(rx->paren_names));
+ return newSViv(HvTOTALKEYS(RXp_PAREN_NAMES(rx)));
} else if (flags & RXapif_ONE) {
ret = CALLREG_NAMED_BUFF_ALL(r, (flags | RXapif_REGNAMES));
- av = (AV*)SvRV(ret);
+ av = MUTABLE_AV(SvRV(ret));
length = av_len(av);
+ SvREFCNT_dec(ret);
return newSViv(length + 1);
} else {
Perl_croak(aTHX_ "panic: Unknown flags %d in named_buff_scalar", (int)flags);
struct regexp *const rx = (struct regexp *)SvANY(r);
AV *av = newAV();
- if (rx && rx->paren_names) {
- HV *hv= rx->paren_names;
+ PERL_ARGS_ASSERT_REG_NAMED_BUFF_ALL;
+
+ if (rx && RXp_PAREN_NAMES(rx)) {
+ HV *hv= RXp_PAREN_NAMES(rx);
HE *temphe;
(void)hv_iterinit(hv);
while ( (temphe = hv_iternext_flags(hv,0)) ) {
SV* sv_dat = HeVAL(temphe);
I32 *nums = (I32*)SvPVX(sv_dat);
for ( i = 0; i < SvIVX(sv_dat); i++ ) {
- if ((I32)(rx->lastcloseparen) >= nums[i] &&
+ if ((I32)(rx->lastparen) >= nums[i] &&
rx->offs[nums[i]].start != -1 &&
rx->offs[nums[i]].end != -1)
{
}
}
- return newRV((SV*)av);
+ return newRV_noinc(MUTABLE_SV(av));
}
void
char *s = NULL;
I32 i = 0;
I32 s1, t1;
+
+ PERL_ARGS_ASSERT_REG_NUMBERED_BUFF_FETCH;
if (!rx->subbeg) {
sv_setsv(sv,&PL_sv_undef);
Perl_reg_numbered_buff_store(pTHX_ REGEXP * const rx, const I32 paren,
SV const * const value)
{
+ PERL_ARGS_ASSERT_REG_NUMBERED_BUFF_STORE;
+
PERL_UNUSED_ARG(rx);
PERL_UNUSED_ARG(paren);
PERL_UNUSED_ARG(value);
if (!PL_localizing)
- Perl_croak(aTHX_ PL_no_modify);
+ Perl_croak(aTHX_ "%s", PL_no_modify);
}
I32
I32 i;
I32 s1, t1;
+ PERL_ARGS_ASSERT_REG_NUMBERED_BUFF_LENGTH;
+
/* Some of this code was originally in C<Perl_magic_len> in F<mg.c> */
switch (paren) {
/* $` / ${^PREMATCH} */
goto getlen;
} else {
if (ckWARN(WARN_UNINITIALIZED))
- report_uninit((SV*)sv);
+ report_uninit((const SV *)sv);
return 0;
}
}
SV*
Perl_reg_qr_package(pTHX_ REGEXP * const rx)
{
+ PERL_ARGS_ASSERT_REG_QR_PACKAGE;
PERL_UNUSED_ARG(rx);
- return NULL;
+ if (0)
+ return NULL;
+ else
+ return newSVpvs("Regexp");
}
/* Scans the name of a named buffer from the pattern.
#define REG_RSN_RETURN_DATA 2
STATIC SV*
-S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags) {
+S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
+{
char *name_start = RExC_parse;
+ PERL_ARGS_ASSERT_REG_SCAN_NAME;
+
if (isIDFIRST_lazy_if(RExC_parse, UTF)) {
/* skip IDFIRST by using do...while */
if (UTF)
char * const oregcomp_parse = RExC_parse;
GET_RE_DEBUG_FLAGS_DECL;
+
+ PERL_ARGS_ASSERT_REG;
DEBUG_PARSE("reg ");
*flagp = 0; /* Tentatively. */
"panic: reg_scan_name returned NULL");
if (!RExC_paren_names) {
RExC_paren_names= newHV();
- sv_2mortal((SV*)RExC_paren_names);
+ sv_2mortal(MUTABLE_SV(RExC_paren_names));
#ifdef DEBUGGING
RExC_paren_name_list= newAV();
- sv_2mortal((SV*)RExC_paren_name_list);
+ sv_2mortal(MUTABLE_SV(RExC_paren_name_list));
#endif
}
he_str = hv_fetch_ent( RExC_paren_names, svname, 1, 0 );
pv = (I32*)SvGROW(sv_dat, SvCUR(sv_dat) + sizeof(I32)+1);
SvCUR_set(sv_dat, SvCUR(sv_dat) + sizeof(I32));
pv[count] = RExC_npar;
- SvIVX(sv_dat)++;
+ SvIV_set(sv_dat, SvIVX(sv_dat) + 1);
}
} else {
(void)SvUPGRADE(sv_dat,SVt_PVNV);
sv_setpvn(sv_dat, (char *)&(RExC_npar), sizeof(I32));
SvIOK_on(sv_dat);
- SvIVX(sv_dat)= 1;
+ SvIV_set(sv_dat, 1);
}
#ifdef DEBUGGING
if (!av_store(RExC_paren_name_list, RExC_npar, SvREFCNT_inc(svname)))
RExC_seen |= REG_SEEN_LOOKBEHIND;
RExC_parse++;
case '=': /* (?=...) */
+ RExC_seen_zerolen++;
+ break;
case '!': /* (?!...) */
RExC_seen_zerolen++;
if (*RExC_parse == ')') {
/* Pick up the branches, linking them together. */
parse_start = RExC_parse; /* MJD */
br = regbranch(pRExC_state, &flags, 1,depth+1);
+
+ if (freeze_paren) {
+ if (RExC_npar > after_freeze)
+ after_freeze = RExC_npar;
+ RExC_npar = freeze_paren;
+ }
+
/* branch_len = (paren != 0); */
if (br == NULL)
register regnode *latest;
I32 flags = 0, c = 0;
GET_RE_DEBUG_FLAGS_DECL;
+
+ PERL_ARGS_ASSERT_REGBRANCH;
+
DEBUG_PARSE("brnc");
if (first)
char *parse_start;
const char *maxpos = NULL;
GET_RE_DEBUG_FLAGS_DECL;
+
+ PERL_ARGS_ASSERT_REGPIECE;
+
DEBUG_PARSE("piec");
ret = regatom(pRExC_state, &flags,depth+1);
*flagp = WORST;
if (max > 0)
*flagp |= HASWIDTH;
- if (max && max < min)
+ if (max < min)
vFAIL("Can't do {n,m} with n > m");
if (!SIZE_ONLY) {
ARG1_SET(ret, (U16)min);
/* reg_namedseq(pRExC_state,UVp)
This is expected to be called by a parser routine that has
- recognized'\N' and needs to handle the rest. RExC_parse is
+ recognized '\N' and needs to handle the rest. RExC_parse is
expected to point at the first char following the N at the time
of the call.
be returned to indicate failure. (This will NOT be a valid pointer
to a regnode.)
- If value is null then it is assumed that we are parsing normal text
+ If valuep is null then it is assumed that we are parsing normal text
and inserts a new EXACT node into the program containing the resolved
string and returns a pointer to the new node. If the string is
zerolength a NOTHING node is emitted.
-
+
On success RExC_parse is set to the char following the endbrace.
Parsing failures will generate a fatal errorvia vFAIL(...)
*/
STATIC regnode *
-S_reg_namedseq(pTHX_ RExC_state_t *pRExC_state, UV *valuep)
+S_reg_namedseq(pTHX_ RExC_state_t *pRExC_state, UV *valuep, I32 *flagp)
{
char * name; /* start of the content of the name */
char * endbrace; /* endbrace following the name */
STRLEN len; /* this has various purposes throughout the code */
bool cached = 0; /* if this is true then we shouldn't refcount dev sv_str */
regnode *ret = NULL;
-
- if (*RExC_parse != '{') {
- vFAIL("Missing braces on \\N{}");
+
+ PERL_ARGS_ASSERT_REG_NAMEDSEQ;
+
+ if (*RExC_parse != '{' ||
+ (*RExC_parse == '{' && RExC_parse[1]
+ && strchr("0123456789", RExC_parse[1])))
+ {
+ GET_RE_DEBUG_FLAGS_DECL;
+ if (valuep)
+ /* no bare \N in a charclass */
+ vFAIL("Missing braces on \\N{}");
+ GET_RE_DEBUG_FLAGS;
+ nextchar(pRExC_state);
+ ret = reg_node(pRExC_state, REG_ANY);
+ *flagp |= HASWIDTH|SIMPLE;
+ RExC_naughty++;
+ RExC_parse--;
+ Set_Node_Length(ret, 1); /* MJD */
+ return ret;
}
name = RExC_parse+1;
endbrace = strchr(RExC_parse, '}');
| PERL_SCAN_DISALLOW_PREFIX
| (SIZE_ONLY ? PERL_SCAN_SILENT_ILLDIGIT : 0);
UV cp;
- char string;
len = (STRLEN)(endbrace - name - 2);
cp = grok_hex(name + 2, &len, &fl, NULL);
if ( len != (STRLEN)(endbrace - name - 2) ) {
cp = 0xFFFD;
}
- if (cp > 0xff)
- RExC_utf8 = 1;
if ( valuep ) {
+ if (cp > 0xff) RExC_utf8 = 1;
*valuep = cp;
return NULL;
}
- string = (char)cp;
- sv_str= newSVpvn(&string, 1);
+
+ /* Need to convert to utf8 if either: won't fit into a byte, or the re
+ * is going to be in utf8 and the representation changes under utf8. */
+ if (cp > 0xff || (RExC_utf8 && ! UNI_IS_INVARIANT(cp))) {
+ U8 string[UTF8_MAXBYTES+1];
+ U8 *tmps;
+ RExC_utf8 = 1;
+ tmps = uvuni_to_utf8(string, cp);
+ sv_str = newSVpvn_utf8((char*)string, tmps - string, TRUE);
+ } else { /* Otherwise, no need for utf8, can skip that step */
+ char string;
+ string = (char)cp;
+ sv_str= newSVpvn(&string, 1);
+ }
} else {
/* fetch the charnames handler for this scope */
HV * const table = GvHV(PL_hintgv);
if (!RExC_charnames) {
/* make sure our cache is allocated */
RExC_charnames = newHV();
- sv_2mortal((SV*)RExC_charnames);
+ sv_2mortal(MUTABLE_SV(RExC_charnames));
}
/* see if we have looked this one up before */
he_str = hv_fetch_ent( RExC_charnames, sv_name, 0, 0 );
Set_Node_Cur_Length(ret); /* MJD */
RExC_parse--;
nextchar(pRExC_state);
- } else {
+ } else { /* zero length */
ret = reg_node(pRExC_state,NOTHING);
}
if (!cached) {
const STRLEN newlen = SvCUR(sv);
UV uv = UNICODE_REPLACEMENT;
+ PERL_ARGS_ASSERT_REG_RECODE;
+
if (newlen)
uv = SvUTF8(sv)
? utf8n_to_uvchr((U8*)s, newlen, &numlen, UTF8_ALLOW_DEFAULT)
DEBUG_PARSE("atom");
*flagp = WORST; /* Tentatively. */
+ PERL_ARGS_ASSERT_REGATOM;
tryagain:
switch ((U8)*RExC_parse) {
}
break;
case 'N':
- /* Handle \N{NAME} here and not below because it can be
+ /* Handle \N and \N{NAME} here and not below because it can be
multicharacter. join_exact() will join them up later on.
Also this makes sure that things like /\N{BLAH}+/ and
\N{BLAH} being multi char Just Happen. dmq*/
++RExC_parse;
- ret= reg_namedseq(pRExC_state, NULL);
+ ret= reg_namedseq(pRExC_state, NULL, flagp);
break;
case 'k': /* Handle \k<NAME> and \k'NAME' */
parse_named_seq:
I32 flags = 0;
STRLEN numlen = 3;
ender = grok_oct(p, &numlen, &flags, NULL);
+
+ /* An octal above 0xff is interpreted differently
+ * depending on if the re is in utf8 or not. If it
+ * is in utf8, the value will be itself, otherwise
+ * it is interpreted as modulo 0x100. It has been
+ * decided to discourage the use of octal above the
+ * single-byte range. For now, warn only when
+ * it ends up modulo */
+ if (SIZE_ONLY && ender >= 0x100
+ && ! UTF && ! PL_encoding
+ && ckWARN2(WARN_DEPRECATED, WARN_REGEXP)) {
+ vWARNdep(p, "Use of octal value above 377 is deprecated");
+ }
p += numlen;
}
else {
S_regwhite( RExC_state_t *pRExC_state, char *p )
{
const char *e = RExC_end;
+
+ PERL_ARGS_ASSERT_REGWHITE;
+
while (p < e) {
if (isSPACE(*p))
++p;
dVAR;
I32 namedclass = OOB_NAMEDCLASS;
+ PERL_ARGS_ASSERT_REGPPOSIXCC;
+
if (value == '[' && RExC_parse + 1 < RExC_end &&
/* I smell either [: or [= or [. -- POSIX has been here, right? */
POSIXCC(UCHARAT(RExC_parse))) {
S_checkposixcc(pTHX_ RExC_state_t *pRExC_state)
{
dVAR;
+
+ PERL_ARGS_ASSERT_CHECKPOSIXCC;
+
if (POSIXCC(UCHARAT(RExC_parse))) {
const char *s = RExC_parse;
const char c = *s++;
what = WORD; \
break
+/*
+ We dont use PERL_LEGACY_UNICODE_CHARCLASS_MAPPINGS as the direct test
+ so that it is possible to override the option here without having to
+ rebuild the entire core. as we are required to do if we change regcomp.h
+ which is where PERL_LEGACY_UNICODE_CHARCLASS_MAPPINGS is defined.
+*/
+#if PERL_LEGACY_UNICODE_CHARCLASS_MAPPINGS
+#define BROKEN_UNICODE_CHARCLASS_MAPPINGS
+#endif
+
+#ifdef BROKEN_UNICODE_CHARCLASS_MAPPINGS
+#define POSIX_CC_UNI_NAME(CCNAME) CCNAME
+#else
+#define POSIX_CC_UNI_NAME(CCNAME) "Posix" CCNAME
+#endif
+
/*
parse a class specification and produce either an ANYOF node that
matches the pattern or if the pattern matches a single char only and
case we need to change the emitted regop to an EXACT. */
const char * orig_parse = RExC_parse;
GET_RE_DEBUG_FLAGS_DECL;
+
+ PERL_ARGS_ASSERT_REGCLASS;
#ifndef DEBUGGING
PERL_UNUSED_ARG(depth);
#endif
from earlier versions, OTOH that behaviour was broken
as well. */
UV v; /* value is register so we cant & it /grrr */
- if (reg_namedseq(pRExC_state, &v)) {
+ if (reg_namedseq(pRExC_state, &v, NULL)) {
goto parseit;
}
value= v;
* A similar issue a little earlier when switching on value.
* --jhi */
switch ((I32)namedclass) {
+
+ case _C_C_T_(ALNUMC, isALNUMC(value), POSIX_CC_UNI_NAME("Alnum"));
+ case _C_C_T_(ALPHA, isALPHA(value), POSIX_CC_UNI_NAME("Alpha"));
+ case _C_C_T_(BLANK, isBLANK(value), POSIX_CC_UNI_NAME("Blank"));
+ case _C_C_T_(CNTRL, isCNTRL(value), POSIX_CC_UNI_NAME("Cntrl"));
+ case _C_C_T_(GRAPH, isGRAPH(value), POSIX_CC_UNI_NAME("Graph"));
+ case _C_C_T_(LOWER, isLOWER(value), POSIX_CC_UNI_NAME("Lower"));
+ case _C_C_T_(PRINT, isPRINT(value), POSIX_CC_UNI_NAME("Print"));
+ case _C_C_T_(PSXSPC, isPSXSPC(value), POSIX_CC_UNI_NAME("Space"));
+ case _C_C_T_(PUNCT, isPUNCT(value), POSIX_CC_UNI_NAME("Punct"));
+ case _C_C_T_(UPPER, isUPPER(value), POSIX_CC_UNI_NAME("Upper"));
+#ifdef BROKEN_UNICODE_CHARCLASS_MAPPINGS
case _C_C_T_(ALNUM, isALNUM(value), "Word");
- case _C_C_T_(ALNUMC, isALNUMC(value), "Alnum");
- case _C_C_T_(ALPHA, isALPHA(value), "Alpha");
- case _C_C_T_(BLANK, isBLANK(value), "Blank");
- case _C_C_T_(CNTRL, isCNTRL(value), "Cntrl");
- case _C_C_T_(GRAPH, isGRAPH(value), "Graph");
- case _C_C_T_(LOWER, isLOWER(value), "Lower");
- case _C_C_T_(PRINT, isPRINT(value), "Print");
- case _C_C_T_(PSXSPC, isPSXSPC(value), "Space");
- case _C_C_T_(PUNCT, isPUNCT(value), "Punct");
case _C_C_T_(SPACE, isSPACE(value), "SpacePerl");
- case _C_C_T_(UPPER, isUPPER(value), "Upper");
+#else
+ case _C_C_T_(SPACE, isSPACE(value), "PerlSpace");
+ case _C_C_T_(ALNUM, isALNUM(value), "PerlWord");
+#endif
case _C_C_T_(XDIGIT, isXDIGIT(value), "XDigit");
case _C_C_T_NOLOC_(VERTWS, is_VERTWS_latin1(&value), "VertSpace");
case _C_C_T_NOLOC_(HORIZWS, is_HORIZWS_latin1(&value), "HorizSpace");
ANYOF_BITMAP_SET(ret, value);
}
yesno = '+';
- what = "Digit";
+ what = POSIX_CC_UNI_NAME("Digit");
break;
case ANYOF_NDIGIT:
if (LOC)
ANYOF_BITMAP_SET(ret, value);
}
yesno = '!';
- what = "Digit";
+ what = POSIX_CC_UNI_NAME("Digit");
break;
case ANYOF_MAX:
/* this is to handle \p and \P */
*STRING(ret)= (char)value;
STR_LEN(ret)= 1;
RExC_emit += STR_SZ(1);
+ if (listsv) {
+ SvREFCNT_dec(listsv);
+ }
return ret;
}
/* optimize case-insensitive simple patterns (e.g. /[a-z]/i) */
* used later (regexec.c:S_reginclass()). */
av_store(av, 0, listsv);
av_store(av, 1, NULL);
- av_store(av, 2, (SV*)unicode_alternate);
- rv = newRV_noinc((SV*)av);
+ av_store(av, 2, MUTABLE_SV(unicode_alternate));
+ rv = newRV_noinc(MUTABLE_SV(av));
n = add_data(pRExC_state, 1, "s");
RExC_rxi->data->data[n] = (void*)rv;
ARG_SET(ret, n);
S_reg_skipcomment(pTHX_ RExC_state_t *pRExC_state)
{
bool ended = 0;
+
+ PERL_ARGS_ASSERT_REG_SKIPCOMMENT;
+
while (RExC_parse < RExC_end)
if (*RExC_parse++ == '\n') {
ended = 1;
{
char* const retval = RExC_parse++;
+ PERL_ARGS_ASSERT_NEXTCHAR;
+
for (;;) {
if (*RExC_parse == '(' && RExC_parse[1] == '?' &&
RExC_parse[2] == '#') {
regnode * const ret = RExC_emit;
GET_RE_DEBUG_FLAGS_DECL;
+ PERL_ARGS_ASSERT_REG_NODE;
+
if (SIZE_ONLY) {
SIZE_ALIGN(RExC_size);
RExC_size += 1;
regnode * const ret = RExC_emit;
GET_RE_DEBUG_FLAGS_DECL;
+ PERL_ARGS_ASSERT_REGANODE;
+
if (SIZE_ONLY) {
SIZE_ALIGN(RExC_size);
RExC_size += 2;
S_reguni(pTHX_ const RExC_state_t *pRExC_state, UV uv, char* s)
{
dVAR;
+
+ PERL_ARGS_ASSERT_REGUNI;
+
return SIZE_ONLY ? UNISKIP(uv) : (uvchr_to_utf8((U8*)s, uv) - (U8*)s);
}
const int offset = regarglen[(U8)op];
const int size = NODE_STEP_REGNODE + offset;
GET_RE_DEBUG_FLAGS_DECL;
+
+ PERL_ARGS_ASSERT_REGINSERT;
PERL_UNUSED_ARG(depth);
/* (PL_regkind[(U8)op] == CURLY ? EXTRA_STEP_2ARGS : 0); */
DEBUG_PARSE_FMT("inst"," - %s",PL_reg_name[op]);
dVAR;
register regnode *scan;
GET_RE_DEBUG_FLAGS_DECL;
+
+ PERL_ARGS_ASSERT_REGTAIL;
#ifndef DEBUGGING
PERL_UNUSED_ARG(depth);
#endif
#ifdef EXPERIMENTAL_INPLACESCAN
I32 min = 0;
#endif
-
GET_RE_DEBUG_FLAGS_DECL;
+ PERL_ARGS_ASSERT_REGTAIL_STUDY;
+
if (SIZE_ONLY)
return exact;
STATIC I32
S_regcurly(register const char *s)
{
+ PERL_ARGS_ASSERT_REGCURLY;
+
if (*s++ != '{')
return FALSE;
if (!isDIGIT(*s))
- regdump - dump a regexp onto Perl_debug_log in vaguely comprehensible form
*/
#ifdef DEBUGGING
-void
-S_regdump_extflags(pTHX_ const char *lead, const U32 flags) {
+static void
+S_regdump_extflags(pTHX_ const char *lead, const U32 flags)
+{
int bit;
int set=0;
+
for (bit=0; bit<32; bit++) {
if (flags & (1<<bit)) {
if (!set++ && lead)
RXi_GET_DECL(r,ri);
GET_RE_DEBUG_FLAGS_DECL;
+ PERL_ARGS_ASSERT_REGDUMP;
+
(void)dumpuntil(r, ri->program, ri->program + 1, NULL, NULL, sv, 0, 0);
/* Header fields of interest. */
PerlIO_printf(Perl_debug_log, "\n");
DEBUG_FLAGS_r(regdump_extflags("r->extflags: ",r->extflags));
#else
+ PERL_ARGS_ASSERT_REGDUMP;
PERL_UNUSED_CONTEXT;
PERL_UNUSED_ARG(r);
#endif /* DEBUGGING */
/*
- regprop - printable representation of opcode
*/
+#define EMIT_ANYOF_TEST_SEPARATOR(do_sep,sv,flags) \
+STMT_START { \
+ if (do_sep) { \
+ Perl_sv_catpvf(aTHX_ sv,"%s][%s",PL_colors[1],PL_colors[0]); \
+ if (flags & ANYOF_INVERT) \
+ /*make sure the invert info is in each */ \
+ sv_catpvs(sv, "^"); \
+ do_sep = 0; \
+ } \
+} STMT_END
+
void
Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
{
RXi_GET_DECL(prog,progi);
GET_RE_DEBUG_FLAGS_DECL;
+ PERL_ARGS_ASSERT_REGPROP;
- sv_setpvn(sv, "", 0);
+ sv_setpvs(sv, "");
if (OP(o) > REGNODE_MAX) /* regnode.type is unsigned */
/* It would be nice to FAIL() here, but this may be called from
Perl_sv_catpvf(aTHX_ sv, "[%d/%d]", o->flags & 0xf, o->flags>>4);
else if (k == REF || k == OPEN || k == CLOSE || k == GROUPP || OP(o)==ACCEPT) {
Perl_sv_catpvf(aTHX_ sv, "%d", (int)ARG(o)); /* Parenth number */
- if ( prog->paren_names ) {
+ if ( RXp_PAREN_NAMES(prog) ) {
if ( k != REF || OP(o) < NREF) {
- AV *list= (AV *)progi->data->data[progi->name_list_idx];
+ AV *list= MUTABLE_AV(progi->data->data[progi->name_list_idx]);
SV **name= av_fetch(list, ARG(o), 0 );
if (name)
Perl_sv_catpvf(aTHX_ sv, " '%"SVf"'", SVfARG(*name));
}
else {
- AV *list= (AV *)progi->data->data[ progi->name_list_idx ];
- SV *sv_dat=(SV*)progi->data->data[ ARG( o ) ];
+ AV *list= MUTABLE_AV(progi->data->data[ progi->name_list_idx ]);
+ SV *sv_dat= MUTABLE_SV(progi->data->data[ ARG( o ) ]);
I32 *nums=(I32*)SvPVX(sv_dat);
SV **name= av_fetch(list, nums[0], 0 );
I32 n;
else if (k == VERB) {
if (!o->flags)
Perl_sv_catpvf(aTHX_ sv, ":%"SVf,
- SVfARG((SV*)progi->data->data[ ARG( o ) ]));
+ SVfARG((MUTABLE_SV(progi->data->data[ ARG( o ) ]))));
} else if (k == LOGICAL)
Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags); /* 2: embedded, otherwise 1 */
else if (k == FOLDCHAR)
else if (k == ANYOF) {
int i, rangestart = -1;
const U8 flags = ANYOF_FLAGS(o);
+ int do_sep = 0;
/* Should be synchronized with * ANYOF_ #xdefines in regcomp.h */
static const char * const anyofs[] = {
"[:^alpha:]",
"[:ascii:]",
"[:^ascii:]",
- "[:ctrl:]",
- "[:^ctrl:]",
+ "[:cntrl:]",
+ "[:^cntrl:]",
"[:graph:]",
"[:^graph:]",
"[:lower:]",
Perl_sv_catpvf(aTHX_ sv, "[%s", PL_colors[0]);
if (flags & ANYOF_INVERT)
sv_catpvs(sv, "^");
+
+ /* output what the standard cp 0-255 bitmap matches */
for (i = 0; i <= 256; i++) {
if (i < 256 && ANYOF_BITMAP_TEST(o,i)) {
if (rangestart == -1)
sv_catpvs(sv, "-");
put_byte(sv, i - 1);
}
+ do_sep = 1;
rangestart = -1;
}
}
-
+
+ EMIT_ANYOF_TEST_SEPARATOR(do_sep,sv,flags);
+ /* output any special charclass tests (used mostly under use locale) */
if (o->flags & ANYOF_CLASS)
for (i = 0; i < (int)(sizeof(anyofs)/sizeof(char*)); i++)
- if (ANYOF_CLASS_TEST(o,i))
+ if (ANYOF_CLASS_TEST(o,i)) {
sv_catpv(sv, anyofs[i]);
-
+ do_sep = 1;
+ }
+
+ EMIT_ANYOF_TEST_SEPARATOR(do_sep,sv,flags);
+
+ /* output information about the unicode matching */
if (flags & ANYOF_UNICODE)
sv_catpvs(sv, "{unicode}");
else if (flags & ANYOF_UNICODE_ALL)
if (lv) {
if (sw) {
U8 s[UTF8_MAXBYTES_CASE+1];
-
+
for (i = 0; i <= 256; i++) { /* just the first 256 */
uvchr_to_utf8(s, i);
dVAR;
struct regexp *const prog = (struct regexp *)SvANY(r);
GET_RE_DEBUG_FLAGS_DECL;
+
+ PERL_ARGS_ASSERT_RE_INTUIT_STRING;
PERL_UNUSED_CONTEXT;
DEBUG_COMPILE_r(
struct regexp *const r = (struct regexp *)SvANY(rx);
GET_RE_DEBUG_FLAGS_DECL;
+ PERL_ARGS_ASSERT_PREGFREE2;
+
if (r->mother_re) {
ReREFCNT_dec(r->mother_re);
} else {
CALLREGFREE_PVT(rx); /* free the private data */
- if (r->paren_names)
- SvREFCNT_dec(r->paren_names);
+ if (RXp_PAREN_NAMES(r))
+ SvREFCNT_dec(RXp_PAREN_NAMES(r));
}
if (r->substrs) {
if (r->anchored_substr)
if (r->saved_copy)
SvREFCNT_dec(r->saved_copy);
#endif
- Safefree(r->swap);
Safefree(r->offs);
}
REGEXP *
-Perl_reg_temp_copy (pTHX_ REGEXP *rx) {
- REGEXP *ret_x = newSV_type(SVt_REGEXP);
+Perl_reg_temp_copy (pTHX_ REGEXP *rx)
+{
+ REGEXP *ret_x = (REGEXP*) newSV_type(SVt_REGEXP);
struct regexp *ret = (struct regexp *)SvANY(ret_x);
struct regexp *const r = (struct regexp *)SvANY(rx);
register const I32 npar = r->nparens+1;
+
+ PERL_ARGS_ASSERT_REG_TEMP_COPY;
+
(void)ReREFCNT_inc(rx);
- /* FIXME ORANGE (once we start actually using the regular SV fields.) */
/* We can take advantage of the existing "copied buffer" mechanism in SVs
by pointing directly at the buffer, but flagging that the allocated
space in the copy is zero. As we've just done a struct copy, it's now
a case of zero-ing that, rather than copying the current length. */
SvPV_set(ret_x, RX_WRAPPED(rx));
- StructCopy(r, ret, regexp);
+ SvFLAGS(ret_x) |= SvFLAGS(rx) & (SVf_POK|SVp_POK|SVf_UTF8);
+ memcpy(&(ret->xpv_cur), &(r->xpv_cur),
+ sizeof(regexp) - STRUCT_OFFSET(regexp, xpv_cur));
SvLEN_set(ret_x, 0);
Newx(ret->offs, npar, regexp_paren_pair);
Copy(r->offs, ret->offs, npar, regexp_paren_pair);
ret->saved_copy = NULL;
#endif
ret->mother_re = rx;
- ret->swap = NULL;
return ret_x;
}
struct regexp *const r = (struct regexp *)SvANY(rx);
RXi_GET_DECL(r,ri);
GET_RE_DEBUG_FLAGS_DECL;
-
+
+ PERL_ARGS_ASSERT_REGFREE_INTERNAL;
+
DEBUG_COMPILE_r({
if (!PL_colorset)
reginitcolors();
case 's':
case 'S':
case 'u':
- SvREFCNT_dec((SV*)ri->data->data[n]);
+ SvREFCNT_dec(MUTABLE_SV(ri->data->data[n]));
break;
case 'f':
Safefree(ri->data->data[n]);
break;
case 'p':
- new_comppad = (AV*)ri->data->data[n];
+ new_comppad = MUTABLE_AV(ri->data->data[n]);
break;
case 'o':
if (new_comppad == NULL)
op_free((OP_4tree*)ri->data->data[n]);
PAD_RESTORE_LOCAL(old_comppad);
- SvREFCNT_dec((SV*)new_comppad);
+ SvREFCNT_dec(MUTABLE_SV(new_comppad));
new_comppad = NULL;
break;
case 'n':
}
#define sv_dup_inc(s,t) SvREFCNT_inc(sv_dup(s,t))
-#define av_dup_inc(s,t) (AV*)SvREFCNT_inc(sv_dup((SV*)s,t))
-#define hv_dup_inc(s,t) (HV*)SvREFCNT_inc(sv_dup((SV*)s,t))
+#define av_dup_inc(s,t) MUTABLE_AV(SvREFCNT_inc(sv_dup((const SV *)s,t)))
+#define hv_dup_inc(s,t) MUTABLE_HV(SvREFCNT_inc(sv_dup((const SV *)s,t)))
#define SAVEPVN(p,n) ((p) ? savepvn(p,n) : NULL)
/*
re_dup - duplicate a regexp.
- This routine is expected to clone a given regexp structure. It is not
- compiler under USE_ITHREADS.
+ This routine is expected to clone a given regexp structure. It is only
+ compiled under USE_ITHREADS.
After all of the core data stored in struct regexp is duplicated
the regexp_engine.dupe method is used to copy any private data
const struct regexp *r = (const struct regexp *)SvANY(sstr);
struct regexp *ret = (struct regexp *)SvANY(dstr);
+ PERL_ARGS_ASSERT_RE_DUP_GUTS;
+
npar = r->nparens+1;
Newx(ret->offs, npar, regexp_paren_pair);
Copy(r->offs, ret->offs, npar, regexp_paren_pair);
/* Do it this way to avoid reading from *r after the StructCopy().
That way, if any of the sv_dup_inc()s dislodge *r from the L1
cache, it doesn't matter. */
- const bool anchored = r->check_substr == r->anchored_substr;
+ const bool anchored = r->check_substr
+ ? r->check_substr == r->anchored_substr
+ : r->check_utf8 == r->anchored_utf8;
Newx(ret->substrs, 1, struct reg_substr_data);
StructCopy(r->substrs, ret->substrs, struct reg_substr_data);
ret->check_substr = ret->float_substr;
ret->check_utf8 = ret->float_utf8;
}
+ } else if (ret->check_utf8) {
+ if (anchored) {
+ ret->check_utf8 = ret->anchored_utf8;
+ } else {
+ ret->check_utf8 = ret->float_utf8;
+ }
}
}
- ret->paren_names = hv_dup_inc(ret->paren_names, param);
+ RXp_PAREN_NAMES(ret) = hv_dup_inc(RXp_PAREN_NAMES(ret), param);
if (ret->pprivate)
RXi_SET(ret,CALLREGDUPE_PVT(dstr,param));
ret->mother_re = NULL;
ret->gofs = 0;
- ret->seen_evals = 0;
}
#endif /* PERL_IN_XSUB_RE */
regexp_internal *reti;
int len, npar;
RXi_GET_DECL(r,ri);
+
+ PERL_ARGS_ASSERT_REGDUPE_INTERNAL;
npar = r->nparens+1;
len = ProgLen(ri);
- Newxc(reti, sizeof(regexp_internal) + (len+1)*sizeof(regnode), char, regexp_internal);
+ Newxc(reti, sizeof(regexp_internal) + len*sizeof(regnode), char, regexp_internal);
Copy(ri->program, reti->program, len+1, regnode);
case 'S':
case 'p': /* actually an AV, but the dup function is identical. */
case 'u': /* actually an HV, but the dup function is identical. */
- d->data[i] = sv_dup_inc((SV *)ri->data->data[i], param);
+ d->data[i] = sv_dup_inc((const SV *)ri->data->data[i], param);
break;
case 'f':
/* This is cheating. */
#endif /* USE_ITHREADS */
-/*
- reg_stringify()
-
- converts a regexp embedded in a MAGIC struct to its stringified form,
- caching the converted form in the struct and returns the cached
- string.
-
- If lp is nonnull then it is used to return the length of the
- resulting string
-
- If flags is nonnull and the returned string contains UTF8 then
- (*flags & 1) will be true.
-
- If haseval is nonnull then it is used to return whether the pattern
- contains evals.
-
- Normally called via macro:
-
- CALLREG_STRINGIFY(mg,&len,&utf8);
-
- And internally with
-
- CALLREG_AS_STR(mg,&lp,&flags,&haseval)
-
- See sv_2pv_flags() in sv.c for an example of internal usage.
-
- */
#ifndef PERL_IN_XSUB_RE
-char *
-Perl_reg_stringify(pTHX_ MAGIC *mg, STRLEN *lp, U32 *flags, I32 *haseval ) {
- dVAR;
- const REGEXP * const re = (REGEXP *)mg->mg_obj;
- if (haseval)
- *haseval = RX_SEEN_EVALS(re);
- if (flags)
- *flags = RX_UTF8(re) ? 1 : 0;
- if (lp)
- *lp = RX_WRAPLEN(re);
- return RX_WRAPPED(re);
-}
-
/*
- regnext - dig the "next" pointer out of a node
*/
SV *msv;
const char *message;
+ PERL_ARGS_ASSERT_RE_CROAK2;
+
if (l1 > 510)
l1 = 510;
if (l1 + l2 > 510)
STATIC void
S_put_byte(pTHX_ SV *sv, int c)
{
+ PERL_ARGS_ASSERT_PUT_BYTE;
+
/* Our definition of isPRINT() ignores locales, so only bytes that are
not part of UTF-8 are considered printable. I assume that the same
holds for UTF-EBCDIC.
RXi_GET_DECL(r,ri);
GET_RE_DEBUG_FLAGS_DECL;
-
+
+ PERL_ARGS_ASSERT_DUMPUNTIL;
+
#ifdef DEBUG_DUMPUNTIL
PerlIO_printf(Perl_debug_log, "--- %d : %d - %d - %d\n",indent,node-start,
last ? last-start : 0,plast ? plast-start : 0);
const reg_trie_data * const trie =
(reg_trie_data*)ri->data->data[op<AHOCORASICK ? n : ac->trie];
#ifdef DEBUGGING
- AV *const trie_words = (AV *) ri->data->data[n + TRIE_WORDS_OFFSET];
+ AV *const trie_words = MUTABLE_AV(ri->data->data[n + TRIE_WORDS_OFFSET]);
#endif
const regnode *nextbranch= NULL;
I32 word_idx;
- sv_setpvn(sv, "", 0);
+ sv_setpvs(sv, "");
for (word_idx= 0; word_idx < (I32)trie->wordcount; word_idx++) {
SV ** const elem_ptr = av_fetch(trie_words,word_idx,0);