From: Yves Orton Date: Thu, 23 Nov 2006 19:21:38 +0000 (+0100) Subject: Re: [PATCH] Cleanup regexp flags and structure X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=f8fc2ecf2fa95aa27fcef856e4853807c5dec00e;p=p5sagit%2Fp5-mst-13.2.git Re: [PATCH] Cleanup regexp flags and structure Message-ID: <9b18b3110611231021l561a9cb4te985db3f0648e097@mail.gmail.com> Attached patch completes the splitting out of the core from the regexp internal data. p4raw-id: //depot/perl@29368 --- diff --git a/regcomp.c b/regcomp.c index e3f7ee0..8af528b 100644 --- a/regcomp.c +++ b/regcomp.c @@ -102,7 +102,8 @@ typedef struct RExC_state_t { U32 flags; /* are we folding, multilining? */ char *precomp; /* uncompiled string. */ - regexp *rx; + regexp *rx; /* perl core regexp structure */ + regexp_internal *rxi; /* internal data for regexp object pprivate field */ char *start; /* Start of input for compile */ char *end; /* End of input for compile */ char *parse; /* Input-scan pointer. */ @@ -142,11 +143,12 @@ typedef struct RExC_state_t { #define RExC_flags (pRExC_state->flags) #define RExC_precomp (pRExC_state->precomp) #define RExC_rx (pRExC_state->rx) +#define RExC_rxi (pRExC_state->rxi) #define RExC_start (pRExC_state->start) #define RExC_end (pRExC_state->end) #define RExC_parse (pRExC_state->parse) #define RExC_whilem_seen (pRExC_state->whilem_seen) -#define RExC_offsets (pRExC_state->rx->offsets) /* I am not like the others */ +#define RExC_offsets (pRExC_state->rxi->offsets) /* I am not like the others */ #define RExC_emit (pRExC_state->emit) #define RExC_emit_start (pRExC_state->emit_start) #define RExC_naughty (pRExC_state->naughty) @@ -1282,7 +1284,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs trie->refcount = 1; trie->startstate = 1; trie->wordcount = word_count; - RExC_rx->data->data[ data_slot ] = (void*)trie; + RExC_rxi->data->data[ data_slot ] = (void*)trie; Newxz( trie->charmap, 256, U16 ); if (!(UTF && folder)) Newxz( trie->bitmap, ANYOF_BITMAP_SIZE, char ); @@ -2047,7 +2049,7 @@ S_make_trie_failtable(pTHX_ RExC_state_t *pRExC_state, regnode *source, regnode try 'g' and succeed, prodceding to match 'cdgu'. */ /* add a fail transition */ - reg_trie_data *trie=(reg_trie_data *)RExC_rx->data->data[ARG(source)]; + reg_trie_data *trie=(reg_trie_data *)RExC_rxi->data->data[ARG(source)]; U32 *q; const U32 ucharcount = trie->uniquecharcount; const U32 numstates = trie->statecount; @@ -2067,7 +2069,7 @@ S_make_trie_failtable(pTHX_ RExC_state_t *pRExC_state, regnode *source, regnode ARG_SET( stclass, data_slot ); Newxz( aho, 1, reg_ac_data ); - RExC_rx->data->data[ data_slot ] = (void*)aho; + RExC_rxi->data->data[ data_slot ] = (void*)aho; aho->trie=trie; aho->states=(reg_trie_state *)savepvn((const char*)trie->states, numstates * sizeof(reg_trie_state)); @@ -2727,7 +2729,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, end = RExC_close_parens[paren-1]; } else { paren = 0; - start = RExC_rx->program + 1; + start = RExC_rxi->program + 1; end = RExC_opend; } if (!recursed) { @@ -3687,7 +3689,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, check there too. */ regnode *trie_node= scan; regnode *tail= regnext(scan); - reg_trie_data *trie = (reg_trie_data*)RExC_rx->data->data[ ARG(scan) ]; + reg_trie_data *trie = (reg_trie_data*)RExC_rxi->data->data[ ARG(scan) ]; I32 max1 = 0, min1 = I32_MAX; struct regnode_charclass_class accum; @@ -3802,7 +3804,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, } #else else if (PL_regkind[OP(scan)] == TRIE) { - reg_trie_data *trie = (reg_trie_data*)RExC_rx->data->data[ ARG(scan) ]; + reg_trie_data *trie = (reg_trie_data*)RExC_rxi->data->data[ ARG(scan) ]; U8*bang=NULL; min += trie->minlen; @@ -3861,22 +3863,22 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, STATIC I32 S_add_data(RExC_state_t *pRExC_state, I32 n, const char *s) { - if (RExC_rx->data) { - const U32 count = RExC_rx->data->count; - Renewc(RExC_rx->data, - sizeof(*RExC_rx->data) + sizeof(void*) * (count + n - 1), + if (RExC_rxi->data) { + const U32 count = RExC_rxi->data->count; + Renewc(RExC_rxi->data, + sizeof(*RExC_rxi->data) + sizeof(void*) * (count + n - 1), char, struct reg_data); - Renew(RExC_rx->data->what, count + n, U8); - RExC_rx->data->count += n; + Renew(RExC_rxi->data->what, count + n, U8); + RExC_rxi->data->count += n; } else { - Newxc(RExC_rx->data, sizeof(*RExC_rx->data) + sizeof(void*) * (n - 1), + Newxc(RExC_rxi->data, sizeof(*RExC_rxi->data) + sizeof(void*) * (n - 1), char, struct reg_data); - Newx(RExC_rx->data->what, n, U8); - RExC_rx->data->count = n; + Newx(RExC_rxi->data->what, n, U8); + RExC_rxi->data->count = n; } - Copy(s, RExC_rx->data->what + RExC_rx->data->count - n, n, U8); - return RExC_rx->data->count - n; + Copy(s, RExC_rxi->data->what + RExC_rxi->data->count - n, n, U8); + return RExC_rxi->data->count - n; } #ifndef PERL_IN_XSUB_RE @@ -3972,6 +3974,7 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm) #endif BEGIN_BLOCK register regexp *r; + register regexp_internal *ri; regnode *scan; regnode *first; I32 flags; @@ -4059,19 +4062,21 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm) /* Allocate space and zero-initialize. Note, the two step process of zeroing when in debug mode, thus anything assigned has to happen after that */ - Newxc(r, sizeof(regexp) + (unsigned)RExC_size * sizeof(regnode), - char, regexp); - if (r == NULL) + Newxz(r, 1, regexp); + Newxc(ri, sizeof(regexp_internal) + (unsigned)RExC_size * sizeof(regnode), + char, regexp_internal); + if ( r == NULL || ri == NULL ) FAIL("Regexp out of space"); #ifdef DEBUGGING /* avoid reading uninitialized memory in DEBUGGING code in study_chunk() */ - Zero(r, sizeof(regexp) + (unsigned)RExC_size * sizeof(regnode), char); + Zero(ri, sizeof(regexp_internal) + (unsigned)RExC_size * sizeof(regnode), char); #else - /* bulk initialize fields with 0. */ - Zero(r, sizeof(regexp), char); + /* bulk initialize base fields with 0. */ + Zero(ri, sizeof(regexp_internal), char); #endif /* non-zero initialization begins here */ + RXi_SET( r, ri ); r->engine= RE_ENGINE_PTR; r->refcnt = 1; r->prelen = xend - exp; @@ -4088,16 +4093,17 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm) } /* Useful during FAIL. */ - Newxz(r->offsets, 2*RExC_size+1, U32); /* MJD 20001228 */ - if (r->offsets) { - r->offsets[0] = RExC_size; + Newxz(ri->offsets, 2*RExC_size+1, U32); /* MJD 20001228 */ + if (ri->offsets) { + ri->offsets[0] = RExC_size; } DEBUG_OFFSETS_r(PerlIO_printf(Perl_debug_log, "%s %"UVuf" bytes for offset annotations.\n", - r->offsets ? "Got" : "Couldn't get", + ri->offsets ? "Got" : "Couldn't get", (UV)((2*RExC_size+1) * sizeof(U32)))); RExC_rx = r; + RExC_rxi = ri; /* Second pass: emit code. */ RExC_flags = pm->op_pmflags; /* don't let top level (?i) bleed */ @@ -4106,17 +4112,16 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm) RExC_naughty = 0; RExC_npar = 1; RExC_cpar = 1; - RExC_emit_start = r->program; - RExC_emit = r->program; + RExC_emit_start = ri->program; + RExC_emit = ri->program; #ifdef DEBUGGING /* put a sentinal on the end of the program so we can check for overwrites */ - r->program[RExC_size].type = 255; + ri->program[RExC_size].type = 255; #endif /* Store the count of eval-groups for security checks: */ RExC_emit->next_off = (RExC_seen_evals > (I32)U16_MAX) ? U16_MAX : (U16)RExC_seen_evals; REGC((U8)REG_MAGIC, (char*) RExC_emit++); - r->data = 0; if (reg(pRExC_state, 0, &flags,1) == NULL) return(NULL); @@ -4161,10 +4166,10 @@ reStudy: pm->op_pmflags = RExC_flags; if (UTF) r->extflags |= RXf_UTF8; /* Unicode in it? */ - r->regstclass = NULL; + ri->regstclass = NULL; if (RExC_naughty >= 10) /* Probably an expensive pattern. */ r->intflags |= PREGf_NAUGHTY; - scan = r->program + 1; /* First BRANCH. */ + scan = ri->program + 1; /* First BRANCH. */ /* testing for BRANCH here tells us whether there is "must appear" data in the pattern. If there is then we can use it for optimisations */ @@ -4207,11 +4212,11 @@ reStudy: if (OP(first) == EXACT) NOOP; /* Empty, get anchored substr later. */ else if ((OP(first) == EXACTF || OP(first) == EXACTFL)) - r->regstclass = first; + ri->regstclass = first; } #ifdef TRIE_STCLASS else if (PL_regkind[OP(first)] == TRIE && - ((reg_trie_data *)r->data->data[ ARG(first) ])->minlen>0) + ((reg_trie_data *)ri->data->data[ ARG(first) ])->minlen>0) { regnode *trie_op; /* this can happen only on restudy */ @@ -4228,14 +4233,14 @@ reStudy: } OP(trie_op)+=2; make_trie_failtable(pRExC_state, (regnode *)first, trie_op, 0); - r->regstclass = trie_op; + ri->regstclass = trie_op; } #endif else if (strchr((const char*)PL_simple,OP(first))) - r->regstclass = first; + ri->regstclass = first; else if (PL_regkind[OP(first)] == BOUND || PL_regkind[OP(first)] == NBOUND) - r->regstclass = first; + ri->regstclass = first; else if (PL_regkind[OP(first)] == BOL) { r->extflags |= (OP(first) == MBOL ? RXf_ANCH_MBOL @@ -4302,7 +4307,7 @@ reStudy: data.last_found = newSVpvs(""); data.longest = &(data.longest_fixed); first = scan; - if (!r->regstclass) { + if (!ri->regstclass) { cl_init(pRExC_state, &ch_class); data.start_class = &ch_class; stclass_flag = SCF_DO_STCLASS_AND; @@ -4420,9 +4425,9 @@ reStudy: SvREFCNT_dec(data.longest_fixed); longest_fixed_length = 0; } - if (r->regstclass - && (OP(r->regstclass) == REG_ANY || OP(r->regstclass) == SANY)) - r->regstclass = NULL; + if (ri->regstclass + && (OP(ri->regstclass) == REG_ANY || OP(ri->regstclass) == SANY)) + ri->regstclass = NULL; if ((!(r->anchored_substr || r->anchored_utf8) || r->anchored_offset) && stclass_flag && !(data.start_class->flags & ANYOF_EOS) @@ -4430,12 +4435,12 @@ reStudy: { const I32 n = add_data(pRExC_state, 1, "f"); - Newx(RExC_rx->data->data[n], 1, + Newx(RExC_rxi->data->data[n], 1, struct regnode_charclass_class); StructCopy(data.start_class, - (struct regnode_charclass_class*)RExC_rx->data->data[n], + (struct regnode_charclass_class*)RExC_rxi->data->data[n], struct regnode_charclass_class); - r->regstclass = (regnode*)RExC_rx->data->data[n]; + ri->regstclass = (regnode*)RExC_rxi->data->data[n]; r->intflags &= ~PREGf_SKIP; /* Used in find_byclass(). */ DEBUG_COMPILE_r({ SV *sv = sv_newmortal(); regprop(r, sv, (regnode*)data.start_class); @@ -4482,7 +4487,7 @@ reStudy: DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log, "\nMulti Top Level\n")); - scan = r->program + 1; + scan = ri->program + 1; cl_init(pRExC_state, &ch_class); data.start_class = &ch_class; data.last_closep = &last_close; @@ -4500,12 +4505,12 @@ reStudy: { const I32 n = add_data(pRExC_state, 1, "f"); - Newx(RExC_rx->data->data[n], 1, + Newx(RExC_rxi->data->data[n], 1, struct regnode_charclass_class); StructCopy(data.start_class, - (struct regnode_charclass_class*)RExC_rx->data->data[n], + (struct regnode_charclass_class*)RExC_rxi->data->data[n], struct regnode_charclass_class); - r->regstclass = (regnode*)RExC_rx->data->data[n]; + ri->regstclass = (regnode*)RExC_rxi->data->data[n]; r->intflags &= ~PREGf_SKIP; /* Used in find_byclass(). */ DEBUG_COMPILE_r({ SV* sv = sv_newmortal(); regprop(r, sv, (regnode*)data.start_class); @@ -4556,15 +4561,15 @@ reStudy: PerlIO_printf(Perl_debug_log,"Final program:\n"); regdump(r); }); - DEBUG_OFFSETS_r(if (r->offsets) { - const U32 len = r->offsets[0]; + DEBUG_OFFSETS_r(if (ri->offsets) { + const U32 len = ri->offsets[0]; U32 i; GET_RE_DEBUG_FLAGS_DECL; - PerlIO_printf(Perl_debug_log, "Offsets: [%"UVuf"]\n\t", (UV)r->offsets[0]); + PerlIO_printf(Perl_debug_log, "Offsets: [%"UVuf"]\n\t", (UV)ri->offsets[0]); for (i = 1; i <= len; i++) { - if (r->offsets[i*2-1] || r->offsets[i*2]) + if (ri->offsets[i*2-1] || ri->offsets[i*2]) PerlIO_printf(Perl_debug_log, "%"UVuf":%"UVuf"[%"UVuf"] ", - (UV)i, (UV)r->offsets[i*2-1], (UV)r->offsets[i*2]); + (UV)i, (UV)ri->offsets[i*2-1], (UV)ri->offsets[i*2]); } PerlIO_printf(Perl_debug_log, "\n"); }); @@ -4854,7 +4859,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) if (start_arg) { SV *sv = newSVpvn( start_arg, RExC_parse - start_arg); ARG(ret) = add_data( pRExC_state, 1, "S" ); - RExC_rx->data->data[ARG(ret)]=(void*)sv; + RExC_rxi->data->data[ARG(ret)]=(void*)sv; ret->flags = 0; } else { ret->flags = 1; @@ -5100,9 +5105,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) LEAVE; n = add_data(pRExC_state, 3, "nop"); - RExC_rx->data->data[n] = (void*)rop; - RExC_rx->data->data[n+1] = (void*)sop; - RExC_rx->data->data[n+2] = (void*)pad; + RExC_rxi->data->data[n] = (void*)rop; + RExC_rxi->data->data[n+1] = (void*)sop; + RExC_rxi->data->data[n+2] = (void*)pad; SvREFCNT_dec(sv); } else { /* First pass */ @@ -5163,7 +5168,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) RExC_parse++; if (!SIZE_ONLY) { num = add_data( pRExC_state, 1, "S" ); - RExC_rx->data->data[num]=(void*)sv_dat; + RExC_rxi->data->data[num]=(void*)sv_dat; SvREFCNT_inc(sv_dat); } ret = reganode(pRExC_state,NGROUPP,num); @@ -6319,7 +6324,7 @@ tryagain: if (!SIZE_ONLY) { num = add_data( pRExC_state, 1, "S" ); ARG_SET(ret,num); - RExC_rx->data->data[num]=(void*)sv_dat; + RExC_rxi->data->data[num]=(void*)sv_dat; SvREFCNT_inc(sv_dat); } /* override incorrect value set in reganode MJD */ @@ -7699,7 +7704,7 @@ parseit: av_store(av, 2, (SV*)unicode_alternate); rv = newRV_noinc((SV*)av); n = add_data(pRExC_state, 1, "s"); - RExC_rx->data->data[n] = (void*)rv; + RExC_rxi->data->data[n] = (void*)rv; ARG_SET(ret, n); } return ret; @@ -8090,8 +8095,9 @@ Perl_regdump(pTHX_ const regexp *r) dVAR; SV * const sv = sv_newmortal(); SV *dsv= sv_newmortal(); + RXi_GET_DECL(r,ri); - (void)dumpuntil(r, r->program, r->program + 1, NULL, NULL, sv, 0, 0); + (void)dumpuntil(r, ri->program, ri->program + 1, NULL, NULL, sv, 0, 0); /* Header fields of interest. */ if (r->anchored_substr) { @@ -8137,8 +8143,8 @@ Perl_regdump(pTHX_ const regexp *r) if (r->check_substr || r->check_utf8) PerlIO_printf(Perl_debug_log, ") "); - if (r->regstclass) { - regprop(r, sv, r->regstclass); + if (ri->regstclass) { + regprop(r, sv, ri->regstclass); PerlIO_printf(Perl_debug_log, "stclass %s ", SvPVX_const(sv)); } if (r->extflags & RXf_ANCH) { @@ -8178,7 +8184,9 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o) #ifdef DEBUGGING dVAR; register int k; + RXi_GET_DECL(prog,progi); GET_RE_DEBUG_FLAGS_DECL; + sv_setpvn(sv, "", 0); @@ -8206,14 +8214,14 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o) Perl_sv_catpvf(aTHX_ sv, " %s", s ); } else if (k == TRIE) { /* print the details of the trie in dumpuntil instead, as - * prog->data isn't available here */ + * progi->data isn't available here */ const char op = OP(o); const I32 n = ARG(o); const reg_ac_data * const ac = IS_TRIE_AC(op) ? - (reg_ac_data *)prog->data->data[n] : + (reg_ac_data *)progi->data->data[n] : NULL; const reg_trie_data * const trie = !IS_TRIE_AC(op) ? - (reg_trie_data*)prog->data->data[n] : + (reg_trie_data*)progi->data->data[n] : ac->trie; Perl_sv_catpvf(aTHX_ sv, "-%s",reg_name[o->flags]); @@ -8267,7 +8275,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o) else if (k == VERB) { if (!o->flags) Perl_sv_catpvf(aTHX_ sv, ":%"SVf, - (SV*)prog->data->data[ ARG( o ) ]); + (SV*)progi->data->data[ ARG( o ) ]); } else if (k == LOGICAL) Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags); /* 2: embedded, otherwise 1 */ else if (k == ANYOF) { @@ -8455,7 +8463,7 @@ void Perl_pregfree(pTHX_ struct regexp *r) { dVAR; - + RXi_GET_DECL(r,ri); GET_RE_DEBUG_FLAGS_DECL; if (!r || (--r->refcnt > 0)) @@ -8475,7 +8483,7 @@ Perl_pregfree(pTHX_ struct regexp *r) /* gcov results gave these as non-null 100% of the time, so there's no optimisation in checking them before calling Safefree */ Safefree(r->precomp); - Safefree(r->offsets); /* 20010421 MJD */ + Safefree(ri->offsets); /* 20010421 MJD */ RX_MATCH_COPY_FREE(r); #ifdef PERL_OLD_COPY_ON_WRITE if (r->saved_copy) @@ -8494,24 +8502,24 @@ Perl_pregfree(pTHX_ struct regexp *r) } if (r->paren_names) SvREFCNT_dec(r->paren_names); - if (r->data) { - int n = r->data->count; + if (ri->data) { + int n = ri->data->count; PAD* new_comppad = NULL; PAD* old_comppad; PADOFFSET refcnt; while (--n >= 0) { /* If you add a ->what type here, update the comment in regcomp.h */ - switch (r->data->what[n]) { + switch (ri->data->what[n]) { case 's': case 'S': - SvREFCNT_dec((SV*)r->data->data[n]); + SvREFCNT_dec((SV*)ri->data->data[n]); break; case 'f': - Safefree(r->data->data[n]); + Safefree(ri->data->data[n]); break; case 'p': - new_comppad = (AV*)r->data->data[n]; + new_comppad = (AV*)ri->data->data[n]; break; case 'o': if (new_comppad == NULL) @@ -8521,10 +8529,10 @@ Perl_pregfree(pTHX_ struct regexp *r) (SvTYPE(new_comppad) == SVt_PVAV) ? new_comppad : NULL ); OP_REFCNT_LOCK; - refcnt = OpREFCNT_dec((OP_4tree*)r->data->data[n]); + refcnt = OpREFCNT_dec((OP_4tree*)ri->data->data[n]); OP_REFCNT_UNLOCK; if (!refcnt) - op_free((OP_4tree*)r->data->data[n]); + op_free((OP_4tree*)ri->data->data[n]); PAD_RESTORE_LOCAL(old_comppad); SvREFCNT_dec((SV*)new_comppad); @@ -8536,7 +8544,7 @@ Perl_pregfree(pTHX_ struct regexp *r) { /* Aho Corasick add-on structure for a trie node. Used in stclass optimization only */ U32 refcount; - reg_ac_data *aho=(reg_ac_data*)r->data->data[n]; + reg_ac_data *aho=(reg_ac_data*)ri->data->data[n]; OP_REFCNT_LOCK; refcount = --aho->refcount; OP_REFCNT_UNLOCK; @@ -8545,8 +8553,8 @@ Perl_pregfree(pTHX_ struct regexp *r) Safefree(aho->fail); aho->trie=NULL; /* not necessary to free this as it is handled by the 't' case */ - Safefree(r->data->data[n]); /* do this last!!!! */ - Safefree(r->regstclass); + Safefree(ri->data->data[n]); /* do this last!!!! */ + Safefree(ri->regstclass); } } break; @@ -8554,7 +8562,7 @@ Perl_pregfree(pTHX_ struct regexp *r) { /* trie structure. */ U32 refcount; - reg_trie_data *trie=(reg_trie_data*)r->data->data[n]; + reg_trie_data *trie=(reg_trie_data*)ri->data->data[n]; OP_REFCNT_LOCK; refcount = --trie->refcount; OP_REFCNT_UNLOCK; @@ -8578,24 +8586,25 @@ Perl_pregfree(pTHX_ struct regexp *r) if (trie->revcharmap) SvREFCNT_dec((SV*)trie->revcharmap); #endif - Safefree(r->data->data[n]); /* do this last!!!! */ + Safefree(ri->data->data[n]); /* do this last!!!! */ } } break; default: - Perl_croak(aTHX_ "panic: regfree data code '%c'", r->data->what[n]); + Perl_croak(aTHX_ "panic: regfree data code '%c'", ri->data->what[n]); } } - Safefree(r->data->what); - Safefree(r->data); + Safefree(ri->data->what); + Safefree(ri->data); } Safefree(r->startp); Safefree(r->endp); - if (r->swap) { - Safefree(r->swap->startp); - Safefree(r->swap->endp); - Safefree(r->swap); + if (ri->swap) { + Safefree(ri->swap->startp); + Safefree(ri->swap->endp); + Safefree(ri->swap); } + Safefree(ri); Safefree(r); } @@ -8618,33 +8627,37 @@ regexp * Perl_regdupe(pTHX_ const regexp *r, CLONE_PARAMS *param) { dVAR; - REGEXP *ret; + regexp *ret; + regexp_internal *reti; int i, len, npar; struct reg_substr_datum *s; - + RXi_GET_DECL(r,ri); + if (!r) return (REGEXP *)NULL; if ((ret = (REGEXP *)ptr_table_fetch(PL_ptr_table, r))) return ret; - len = r->offsets[0]; + len = ri->offsets[0]; npar = r->nparens+1; - Newxc(ret, sizeof(regexp) + (len+1)*sizeof(regnode), char, regexp); - Copy(r->program, ret->program, len+1, regnode); + Newxz(ret, 1, regexp); + Newxc(reti, sizeof(regexp_internal) + (len+1)*sizeof(regnode), char, regexp_internal); + RXi_SET(ret,reti); + Copy(ri->program, reti->program, len+1, regnode); Newx(ret->startp, npar, I32); Copy(r->startp, ret->startp, npar, I32); Newx(ret->endp, npar, I32); Copy(r->startp, ret->startp, npar, I32); - if(r->swap) { - Newx(ret->swap, 1, regexp_paren_ofs); + if(ri->swap) { + Newx(reti->swap, 1, regexp_paren_ofs); /* no need to copy these */ - Newx(ret->swap->startp, npar, I32); - Newx(ret->swap->endp, npar, I32); + Newx(reti->swap->startp, npar, I32); + Newx(reti->swap->endp, npar, I32); } else { - ret->swap = NULL; + reti->swap = NULL; } Newx(ret->substrs, 1, struct reg_substr_data); @@ -8656,10 +8669,10 @@ Perl_regdupe(pTHX_ const regexp *r, CLONE_PARAMS *param) s->utf8_substr = sv_dup_inc(r->substrs->data[i].utf8_substr, param); } - ret->regstclass = NULL; - if (r->data) { + reti->regstclass = NULL; + if (ri->data) { struct reg_data *d; - const int count = r->data->count; + const int count = ri->data->count; int i; Newxc(d, sizeof(struct reg_data) + count*sizeof(void *), @@ -8668,42 +8681,42 @@ Perl_regdupe(pTHX_ const regexp *r, CLONE_PARAMS *param) d->count = count; for (i = 0; i < count; i++) { - d->what[i] = r->data->what[i]; + d->what[i] = ri->data->what[i]; switch (d->what[i]) { /* legal options are one of: sSfpontT see also regcomp.h and pregfree() */ case 's': case 'S': - d->data[i] = sv_dup_inc((SV *)r->data->data[i], param); + d->data[i] = sv_dup_inc((SV *)ri->data->data[i], param); break; case 'p': - d->data[i] = av_dup_inc((AV *)r->data->data[i], param); + d->data[i] = av_dup_inc((AV *)ri->data->data[i], param); break; case 'f': /* This is cheating. */ Newx(d->data[i], 1, struct regnode_charclass_class); - StructCopy(r->data->data[i], d->data[i], + StructCopy(ri->data->data[i], d->data[i], struct regnode_charclass_class); - ret->regstclass = (regnode*)d->data[i]; + reti->regstclass = (regnode*)d->data[i]; break; case 'o': /* Compiled op trees are readonly and in shared memory, and can thus be shared without duplication. */ OP_REFCNT_LOCK; - d->data[i] = (void*)OpREFCNT_inc((OP*)r->data->data[i]); + d->data[i] = (void*)OpREFCNT_inc((OP*)ri->data->data[i]); OP_REFCNT_UNLOCK; break; case 'n': - d->data[i] = r->data->data[i]; + d->data[i] = ri->data->data[i]; break; case 't': - d->data[i] = r->data->data[i]; + d->data[i] = ri->data->data[i]; OP_REFCNT_LOCK; ((reg_trie_data*)d->data[i])->refcount++; OP_REFCNT_UNLOCK; break; case 'T': - d->data[i] = r->data->data[i]; + d->data[i] = ri->data->data[i]; OP_REFCNT_LOCK; ((reg_ac_data*)d->data[i])->refcount++; OP_REFCNT_UNLOCK; @@ -8711,20 +8724,20 @@ Perl_regdupe(pTHX_ const regexp *r, CLONE_PARAMS *param) * without duplication. We free the stclass in pregfree * when the corresponding reg_ac_data struct is freed. */ - ret->regstclass= r->regstclass; + reti->regstclass= ri->regstclass; break; default: - Perl_croak(aTHX_ "panic: re_dup unknown data code '%c'", r->data->what[i]); + Perl_croak(aTHX_ "panic: re_dup unknown data code '%c'", ri->data->what[i]); } } - ret->data = d; + reti->data = d; } else - ret->data = NULL; + reti->data = NULL; - Newx(ret->offsets, 2*len+1, U32); - Copy(r->offsets, ret->offsets, 2*len+1, U32); + Newx(reti->offsets, 2*len+1, U32); + Copy(ri->offsets, reti->offsets, 2*len+1, U32); ret->precomp = SAVEPVN(r->precomp, r->prelen); ret->refcnt = r->refcnt; @@ -8788,7 +8801,8 @@ char * Perl_reg_stringify(pTHX_ MAGIC *mg, STRLEN *lp, U32 *flags, I32 *haseval ) { dVAR; const regexp * const re = (regexp *)mg->mg_obj; - + RXi_GET_DECL(re,ri); + if (!mg->mg_ptr) { const char *fptr = "msix"; char reflags[6]; @@ -8851,7 +8865,7 @@ Perl_reg_stringify(pTHX_ MAGIC *mg, STRLEN *lp, U32 *flags, I32 *haseval ) { mg->mg_ptr[mg->mg_len] = 0; } if (haseval) - *haseval = re->program[0].next_off; + *haseval = ri->program[0].next_off; if (flags) *flags = ((re->extflags & RXf_UTF8) ? 1 : 0); @@ -8871,7 +8885,7 @@ Perl_regnext(pTHX_ register regnode *p) dVAR; register I32 offset; - if (p == &PL_regdummy) + if (!p) return(NULL); offset = (reg_off_by_arg[OP(p)] ? ARG(p) : NEXT_OFF(p)); @@ -9007,6 +9021,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node, register U8 op = PSEUDO; /* Arbitrary non-END op. */ register const regnode *next; const regnode *optstart= NULL; + RXi_GET_DECL(r,ri); GET_RE_DEBUG_FLAGS_DECL; #ifdef DEBUG_DUMPUNTIL @@ -9072,10 +9087,10 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node, const char op = OP(node); const I32 n = ARG(node); const reg_ac_data * const ac = op>=AHOCORASICK ? - (reg_ac_data *)r->data->data[n] : + (reg_ac_data *)ri->data->data[n] : NULL; const reg_trie_data * const trie = opdata->data[n] : + (reg_trie_data*)ri->data->data[n] : ac->trie; const regnode *nextbranch= NULL; I32 word_idx; diff --git a/regexec.c b/regexec.c index e34af4d..ecbebac 100644 --- a/regexec.c +++ b/regexec.c @@ -372,6 +372,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, register char *other_last = NULL; /* other substr checked before this */ char *check_at = NULL; /* check substr found at this pos */ const I32 multiline = prog->extflags & RXf_PMf_MULTILINE; + RXi_GET_DECL(prog,progi); #ifdef DEBUGGING const char * const i_strpos = strpos; #endif @@ -857,7 +858,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */ /* trie stclasses are too expensive to use here, we are better off to leave it to regmatch itself */ - if (prog->regstclass && PL_regkind[OP(prog->regstclass)]!=TRIE) { + if (progi->regstclass && PL_regkind[OP(progi->regstclass)]!=TRIE) { /* minlen == 0 is possible if regstclass is \b or \B, and the fixed substr is ''$. Since minlen is already taken into account, s+1 is before strend; @@ -866,9 +867,9 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, regstclass does not come from lookahead... */ /* If regstclass takes bytelength more than 1: If charlength==1, OK. This leaves EXACTF only, which is dealt with in find_byclass(). */ - const U8* const str = (U8*)STRING(prog->regstclass); - const int cl_l = (PL_regkind[OP(prog->regstclass)] == EXACT - ? CHR_DIST(str+STR_LEN(prog->regstclass), str) + const U8* const str = (U8*)STRING(progi->regstclass); + const int cl_l = (PL_regkind[OP(progi->regstclass)] == EXACT + ? CHR_DIST(str+STR_LEN(progi->regstclass), str) : 1); char * endpos; if (prog->anchored_substr || prog->anchored_utf8 || ml_anch) @@ -882,7 +883,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, (IV)start_shift, check_at - strbeg, s - strbeg, endpos - strbeg)); t = s; - s = find_byclass(prog, prog->regstclass, s, endpos, NULL); + s = find_byclass(prog, progi->regstclass, s, endpos, NULL); if (!s) { #ifdef DEBUGGING const char *what = NULL; @@ -1136,7 +1137,8 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, char *e; register I32 tmp = 1; /* Scratch variable? */ register const bool do_utf8 = PL_reg_match_utf8; - + RXi_GET_DECL(prog,progi); + /* We know what class it must start with. */ switch (OP(c)) { case ANYOF: @@ -1416,7 +1418,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, : trie_plain; /* what trie are we using right now */ reg_ac_data *aho - = (reg_ac_data*)prog->data->data[ ARG( c ) ]; + = (reg_ac_data*)progi->data->data[ ARG( c ) ]; reg_trie_data *trie=aho->trie; const char *last_start = strend - trie->minlen; @@ -1652,7 +1654,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * SV* const oreplsv = GvSV(PL_replgv); const bool do_utf8 = (bool)DO_UTF8(sv); I32 multiline; - + RXi_GET_DECL(prog,progi); regmatch_info reginfo; /* create some info to pass to regtry etc */ GET_RE_DEBUG_FLAGS_DECL; @@ -1684,7 +1686,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * /* Check validity of program. */ - if (UCHARAT(prog->program) != REG_MAGIC) { + if (UCHARAT(progi->program) != REG_MAGIC) { Perl_croak(aTHX_ "corrupted regexp program"); } @@ -1732,7 +1734,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * } if (PL_curpm && (PM_GETRE(PL_curpm) == prog)) { I32 *t; - if (!prog->swap) { + if (!progi->swap) { /* We have to be careful. If the previous successful match was from this regex we don't want a subsequent paritally successful match to clobber the old results. @@ -1740,16 +1742,16 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * to the re, and switch the buffer each match. If we fail we switch it back, otherwise we leave it swapped. */ - Newxz(prog->swap, 1, regexp_paren_ofs); + Newxz(progi->swap, 1, regexp_paren_ofs); /* no need to copy these */ - Newxz(prog->swap->startp, prog->nparens + 1, I32); - Newxz(prog->swap->endp, prog->nparens + 1, I32); + Newxz(progi->swap->startp, prog->nparens + 1, I32); + Newxz(progi->swap->endp, prog->nparens + 1, I32); } - t = prog->swap->startp; - prog->swap->startp = prog->startp; + t = progi->swap->startp; + progi->swap->startp = prog->startp; prog->startp = t; - t = prog->swap->endp; - prog->swap->endp = prog->endp; + t = progi->swap->endp; + progi->swap->endp = prog->endp; prog->endp = t; } if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL || prog->check_utf8 != NULL)) { @@ -1952,9 +1954,9 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * }); goto phooey; } - else if ( (c = prog->regstclass) ) { + else if ( (c = progi->regstclass) ) { if (minlen) { - const OPCODE op = OP(prog->regstclass); + const OPCODE op = OP(progi->regstclass); /* don't bother with what can't match */ if (PL_regkind[op] != EXACT && op != CANY && PL_regkind[op] != TRIE) strend = HOPc(strend, -(minlen - 1)); @@ -2100,14 +2102,14 @@ phooey: PL_colors[4], PL_colors[5])); if (PL_reg_eval_set) restore_pos(aTHX_ prog); - if (prog->swap) { + if (progi->swap) { /* we failed :-( roll it back */ I32 *t; - t = prog->swap->startp; - prog->swap->startp = prog->startp; + t = progi->swap->startp; + progi->swap->startp = prog->startp; prog->startp = t; - t = prog->swap->endp; - prog->swap->endp = prog->endp; + t = progi->swap->endp; + progi->swap->endp = prog->endp; prog->endp = t; } return 0; @@ -2125,6 +2127,7 @@ S_regtry(pTHX_ regmatch_info *reginfo, char **startpos) register I32 *ep; CHECKPOINT lastcp; regexp *prog = reginfo->prog; + RXi_GET_DECL(prog,progi); GET_RE_DEBUG_FLAGS_DECL; reginfo->cutpoint=NULL; @@ -2242,7 +2245,7 @@ S_regtry(pTHX_ regmatch_info *reginfo, char **startpos) } #endif REGCP_SET(lastcp); - if (regmatch(reginfo, prog->program + 1)) { + if (regmatch(reginfo, progi->program + 1)) { PL_regendp[0] = PL_reginput - PL_bostr; return 1; } @@ -2569,7 +2572,8 @@ S_dump_exec_pos(pTHX_ const char *locinput, STATIC I32 S_reg_check_named_buff_matched(pTHX_ const regexp *rex, const regnode *scan) { I32 n; - SV *sv_dat=(SV*)rex->data->data[ ARG( scan ) ]; + RXi_GET_DECL(rex,rexi); + SV *sv_dat=(SV*)rexi->data->data[ ARG( scan ) ]; I32 *nums=(I32*)SvPVX(sv_dat); for ( n=0; n= nums[n] && @@ -2592,7 +2596,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) const U32 uniflags = UTF8_ALLOW_DEFAULT; regexp *rex = reginfo->prog; - + RXi_GET_DECL(rex,rexi); + regmatch_slab *orig_slab; regmatch_state *orig_state; @@ -2683,10 +2688,10 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) PerlIO_printf(Perl_debug_log, "%3"IVdf":%*s%s(%"IVdf")\n", - (IV)(scan - rex->program), depth*2, "", + (IV)(scan - rexi->program), depth*2, "", SvPVX_const(prop), (PL_regkind[OP(scan)] == END || !rnext) ? - 0 : (IV)(rnext - rex->program)); + 0 : (IV)(rnext - rexi->program)); }); next = scan + NEXT_OFF(scan); @@ -2793,7 +2798,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) /* what trie are we using right now */ reg_trie_data * const trie - = (reg_trie_data*)rex->data->data[ ARG( scan ) ]; + = (reg_trie_data*)rexi->data->data[ ARG( scan ) ]; U32 state = trie->startstate; if (trie->bitmap && trie_type != trie_utf8_fold && @@ -2938,7 +2943,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) /* only one choice left - just continue */ DEBUG_EXECUTE_r({ reg_trie_data * const trie - = (reg_trie_data*)rex->data->data[ ARG(ST.me) ]; + = (reg_trie_data*)rexi->data->data[ ARG(ST.me) ]; SV ** const tmp = av_fetch( trie->words, ST.accept_buff[ 0 ].wordnum-1, 0 ); SV *sv= tmp ? sv_newmortal() : NULL; @@ -3019,7 +3024,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) DEBUG_EXECUTE_r({ reg_trie_data * const trie - = (reg_trie_data*)rex->data->data[ ARG(ST.me) ]; + = (reg_trie_data*)rexi->data->data[ ARG(ST.me) ]; SV ** const tmp = av_fetch( trie->words, ST.accept_buff[ best ].wordnum - 1, 0 ); regnode *nextop=(!ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) ? @@ -3502,6 +3507,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) { SV *ret; regexp *re; + regexp_internal *rei; regnode *startpoint; case GOSTART: @@ -3517,12 +3523,13 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) nochange_depth = 0; } re = rex; + rei = rexi; (void)ReREFCNT_inc(rex); if (OP(scan)==GOSUB) { startpoint = scan + ARG2L(scan); ST.close_paren = ARG(scan); } else { - startpoint = re->program+1; + startpoint = rei->program+1; ST.close_paren = 0; } goto eval_recurse_doit; @@ -3543,10 +3550,10 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) PAD *old_comppad; n = ARG(scan); - PL_op = (OP_4tree*)rex->data->data[n]; + PL_op = (OP_4tree*)rexi->data->data[n]; DEBUG_STATE_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) ); - PAD_SAVE_LOCAL(old_comppad, (PAD*)rex->data->data[n + 2]); + PAD_SAVE_LOCAL(old_comppad, (PAD*)rexi->data->data[n + 2]); PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr; CALLRUNOPS(aTHX); /* Scalar context. */ @@ -3605,11 +3612,12 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) PL_regsize = osize; } } + rei = RXi_GET(re); DEBUG_EXECUTE_r( debug_start_match(re, do_utf8, locinput, PL_regeol, "Matching embedded"); ); - startpoint = re->program + 1; + startpoint = rei->program + 1; ST.close_paren = 0; /* only used for GOSUB */ /* borrowed from regtry */ if (PL_reg_start_tmpl <= re->nparens) { @@ -3646,6 +3654,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) ST.prev_rex = rex; ST.prev_curlyx = cur_curlyx; rex = re; + rexi = rei; cur_curlyx = NULL; ST.B = next; ST.prev_eval = cur_eval; @@ -3665,6 +3674,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) PL_reg_flags ^= ST.toggle_reg_flags; ReREFCNT_dec(rex); rex = ST.prev_rex; + rexi = RXi_GET(rex); regcpblow(ST.cp); cur_eval = ST.prev_eval; cur_curlyx = ST.prev_curlyx; @@ -3678,6 +3688,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) PL_reg_flags ^= ST.toggle_reg_flags; ReREFCNT_dec(rex); rex = ST.prev_rex; + rexi = RXi_GET(rex); PL_reginput = locinput; REGCP_UNWIND(ST.lastcp); regcppop(rex); @@ -4134,7 +4145,7 @@ NULL case CUTGROUP: PL_reginput = locinput; sv_yes_mark = st->u.mark.mark_name = scan->flags ? NULL : - (SV*)rex->data->data[ ARG( scan ) ]; + (SV*)rexi->data->data[ ARG( scan ) ]; PUSH_STATE_GOTO(CUTGROUP_next,next); /* NOTREACHED */ case CUTGROUP_next_fail: @@ -4664,7 +4675,8 @@ NULL PL_reg_flags ^= st->u.eval.toggle_reg_flags; st->u.eval.prev_rex = rex; /* inner */ - rex = cur_eval->u.eval.prev_rex; /* outer */ + rex = cur_eval->u.eval.prev_rex; /* outer */ + rexi = RXi_GET(rex); cur_curlyx = cur_eval->u.eval.prev_curlyx; ReREFCNT_inc(rex); st->u.eval.cp = regcppush(0); /* Save *all* the positions. */ @@ -4785,7 +4797,7 @@ NULL case PRUNE: PL_reginput = locinput; if (!scan->flags) - sv_yes_mark = sv_commit = (SV*)rex->data->data[ ARG( scan ) ]; + sv_yes_mark = sv_commit = (SV*)rexi->data->data[ ARG( scan ) ]; PUSH_STATE_GOTO(COMMIT_next,next); /* NOTREACHED */ case COMMIT_next_fail: @@ -4799,7 +4811,7 @@ NULL case MARKPOINT: ST.prev_mark = mark_state; ST.mark_name = sv_commit = sv_yes_mark - = (SV*)rex->data->data[ ARG( scan ) ]; + = (SV*)rexi->data->data[ ARG( scan ) ]; mark_state = st; ST.mark_loc = PL_reginput = locinput; PUSH_YES_STATE_GOTO(MARKPOINT_next,next); @@ -4840,7 +4852,7 @@ NULL otherwise do nothing. Meaning we need to scan */ regmatch_state *cur = mark_state; - SV *find = (SV*)rex->data->data[ ARG( scan ) ]; + SV *find = (SV*)rexi->data->data[ ARG( scan ) ]; while (cur) { if ( sv_eq( cur->u.mark.mark_name, @@ -5321,7 +5333,8 @@ Perl_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bool SV *sw = NULL; SV *si = NULL; SV *alt = NULL; - const struct reg_data * const data = prog ? prog->data : NULL; + RXi_GET_DECL(prog,progi); + const struct reg_data * const data = prog ? progi->data : NULL; if (data && data->count) { const U32 n = ARG(node); diff --git a/regexp.h b/regexp.h index 1355e1e..f2c9705 100644 --- a/regexp.h +++ b/regexp.h @@ -82,6 +82,10 @@ typedef struct regexp { void *pprivate; /* Data private to the regex engine which created this object. Perl will never mess with this member at all. */ +} regexp; + + +typedef struct regexp_internal { regexp_paren_ofs *swap; /* Swap copy of *startp / *endp */ U32 *offsets; /* offset annotations 20001228 MJD data about mapping the program to the @@ -93,8 +97,11 @@ typedef struct regexp { data that the regops need. Often the ARG field of a regop is an index into this structure */ regnode program[1]; /* Unwarranted chumminess with compiler. */ -} regexp; +} regexp_internal; +#define RXi_SET(x,y) (x)->pprivate = (void*)(y) +#define RXi_GET(x) ((regexp_internal *)((x)->pprivate)) +#define RXi_GET_DECL(r,ri) regexp_internal *ri = RXi_GET(r) typedef struct re_scream_pos_data_s {