X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=regcomp.c;h=28c12d18aa55b0f9b4dfe3f29e9ade9f6dee7627;hb=b3c649451aa23903a1f1aa5b0d54e8244611b239;hp=b7fd317248c06c53b029a7bb20cf31d302b00a85;hpb=59cd0e26eb6c10499b25d783562357dd68cc16f2;p=p5sagit%2Fp5-mst-13.2.git diff --git a/regcomp.c b/regcomp.c index b7fd317..28c12d1 100644 --- a/regcomp.c +++ b/regcomp.c @@ -4151,7 +4151,7 @@ Perl_pregcomp(pTHX_ const SV * const pattern, const U32 flags) #endif REGEXP * -Perl_re_compile(pTHX_ const SV * const pattern, const U32 pm_flags) +Perl_re_compile(pTHX_ const SV * const pattern, U32 pm_flags) { dVAR; REGEXP *rx; @@ -4175,7 +4175,7 @@ Perl_re_compile(pTHX_ const SV * const pattern, const U32 pm_flags) GET_RE_DEBUG_FLAGS_DECL; DEBUG_r(if (!PL_colorset) reginitcolors()); - RExC_utf8 = RExC_orig_utf8 = pm_flags & RXf_UTF8; + RExC_utf8 = RExC_orig_utf8 = SvUTF8(pattern); DEBUG_COMPILE_r({ SV *dsv= sv_newmortal(); @@ -4264,7 +4264,7 @@ redo_first_pass: /* Allocate space and zero-initialize. Note, the two step process of zeroing when in debug mode, thus anything assigned has to happen after that */ - rx = newSV_type(SVt_REGEXP); + rx = (REGEXP*) newSV_type(SVt_REGEXP); r = (struct regexp*)SvANY(rx); Newxc(ri, sizeof(regexp_internal) + (unsigned)RExC_size * sizeof(regnode), char, regexp_internal); @@ -4290,12 +4290,14 @@ redo_first_pass: >> RXf_PMf_STD_PMMOD_SHIFT); const char *fptr = STD_PAT_MODS; /*"msix"*/ char *p; - RXp_WRAPLEN(r) = plen + has_minus + has_p + has_runon + const STRLEN wraplen = plen + has_minus + has_p + has_runon + (sizeof(STD_PAT_MODS) - 1) + (sizeof("(?:)") - 1); - Newx(RXp_WRAPPED(r), RXp_WRAPLEN(r) + 1, char ); - p = RXp_WRAPPED(r); + p = sv_grow((SV *)rx, wraplen + 1); + SvCUR_set(rx, wraplen); + SvPOK_on(rx); + SvFLAGS(rx) |= SvUTF8(pattern); *p++='('; *p++='?'; if (has_p) *p++ = KEEPCOPY_PAT_MOD; /*'p'*/ @@ -4319,8 +4321,8 @@ redo_first_pass: *p++ = ':'; Copy(RExC_precomp, p, plen, char); - assert ((RXp_WRAPPED(r) - p) < 16); - r->pre_prefix = p - RXp_WRAPPED(r); + assert ((RX_WRAPPED(rx) - p) < 16); + r->pre_prefix = p - RX_WRAPPED(rx); p += plen; if (has_runon) *p++ = '\n'; @@ -4409,7 +4411,7 @@ reStudy: /*dmq: removed as part of de-PMOP: pm->op_pmflags = RExC_flags; */ if (UTF) - r->extflags |= RXf_UTF8; /* Unicode in it? */ + SvUTF8_on(rx); /* Unicode in it? */ ri->regstclass = NULL; if (RExC_naughty >= 10) /* Probably an expensive pattern. */ r->intflags |= PREGf_NAUGHTY; @@ -4791,22 +4793,22 @@ reStudy: if (RExC_seen & REG_SEEN_CUTGROUP) r->intflags |= PREGf_CUTGROUP_SEEN; if (RExC_paren_names) - r->paren_names = (HV*)SvREFCNT_inc(RExC_paren_names); + RXp_PAREN_NAMES(r) = (HV*)SvREFCNT_inc(RExC_paren_names); else - r->paren_names = NULL; + RXp_PAREN_NAMES(r) = NULL; #ifdef STUPID_PATTERN_CHECKS - if (RX_PRELEN(r) == 0) + if (RX_PRELEN(rx) == 0) r->extflags |= RXf_NULL; - if (r->extflags & RXf_SPLIT && RX_PRELEN(r) == 1 && RXp_PRECOMP(r)[0] == ' ') + if (r->extflags & RXf_SPLIT && RX_PRELEN(rx) == 1 && RX_PRECOMP(rx)[0] == ' ') /* XXX: this should happen BEFORE we compile */ r->extflags |= (RXf_SKIPWHITE|RXf_WHITE); - else if (RX_PRELEN(r) == 3 && memEQ("\\s+", RXp_PRECOMP(r), 3)) + else if (RX_PRELEN(rx) == 3 && memEQ("\\s+", RX_PRECOMP(rx), 3)) r->extflags |= RXf_WHITE; - else if (RX_PRELEN(r) == 1 && RXp_PRECOMP(r)[0] == '^') + else if (RX_PRELEN(rx) == 1 && RXp_PRECOMP(rx)[0] == '^') r->extflags |= RXf_START_ONLY; #else - if (r->extflags & RXf_SPLIT && RXp_PRELEN(r) == 1 && RXp_PRECOMP(r)[0] == ' ') + if (r->extflags & RXf_SPLIT && RX_PRELEN(rx) == 1 && RX_PRECOMP(rx)[0] == ' ') /* XXX: this should happen BEFORE we compile */ r->extflags |= (RXf_SKIPWHITE|RXf_WHITE); else { @@ -4914,8 +4916,8 @@ Perl_reg_named_buff_fetch(pTHX_ REGEXP * const r, SV * const namesv, if (flags & RXapif_ALL) retarray=newAV(); - if (rx && rx->paren_names) { - HE *he_str = hv_fetch_ent( rx->paren_names, namesv, 0, 0 ); + if (rx && RXp_PAREN_NAMES(rx)) { + HE *he_str = hv_fetch_ent( RXp_PAREN_NAMES(rx), namesv, 0, 0 ); if (he_str) { IV i; SV* sv_dat=HeVAL(he_str); @@ -4949,9 +4951,9 @@ Perl_reg_named_buff_exists(pTHX_ REGEXP * const r, SV * const key, const U32 flags) { struct regexp *const rx = (struct regexp *)SvANY(r); - if (rx && rx->paren_names) { + if (rx && RXp_PAREN_NAMES(rx)) { if (flags & RXapif_ALL) { - return hv_exists_ent(rx->paren_names, key, 0); + return hv_exists_ent(RXp_PAREN_NAMES(rx), key, 0); } else { SV *sv = CALLREG_NAMED_BUFF_FETCH(r, key, flags); if (sv) { @@ -4970,8 +4972,8 @@ SV* Perl_reg_named_buff_firstkey(pTHX_ REGEXP * const r, const U32 flags) { struct regexp *const rx = (struct regexp *)SvANY(r); - if ( rx && rx->paren_names ) { - (void)hv_iterinit(rx->paren_names); + if ( rx && RXp_PAREN_NAMES(rx) ) { + (void)hv_iterinit(RXp_PAREN_NAMES(rx)); return CALLREG_NAMED_BUFF_NEXTKEY(r, NULL, flags & ~RXapif_FIRSTKEY); } else { @@ -4983,8 +4985,8 @@ SV* Perl_reg_named_buff_nextkey(pTHX_ REGEXP * const r, const U32 flags) { struct regexp *const rx = (struct regexp *)SvANY(r); - if (rx && rx->paren_names) { - HV *hv = rx->paren_names; + if (rx && RXp_PAREN_NAMES(rx)) { + HV *hv = RXp_PAREN_NAMES(rx); HE *temphe; while ( (temphe = hv_iternext_flags(hv,0)) ) { IV i; @@ -5016,9 +5018,9 @@ Perl_reg_named_buff_scalar(pTHX_ REGEXP * const r, const U32 flags) I32 length; struct regexp *const rx = (struct regexp *)SvANY(r); - if (rx && rx->paren_names) { + if (rx && RXp_PAREN_NAMES(rx)) { if (flags & (RXapif_ALL | RXapif_REGNAMES_COUNT)) { - return newSViv(HvTOTALKEYS(rx->paren_names)); + return newSViv(HvTOTALKEYS(RXp_PAREN_NAMES(rx))); } else if (flags & RXapif_ONE) { ret = CALLREG_NAMED_BUFF_ALL(r, (flags | RXapif_REGNAMES)); av = (AV*)SvRV(ret); @@ -5038,8 +5040,8 @@ Perl_reg_named_buff_all(pTHX_ REGEXP * const r, const U32 flags) struct regexp *const rx = (struct regexp *)SvANY(r); AV *av = newAV(); - if (rx && rx->paren_names) { - HV *hv= rx->paren_names; + if (rx && RXp_PAREN_NAMES(rx)) { + HV *hv= RXp_PAREN_NAMES(rx); HE *temphe; (void)hv_iterinit(hv); while ( (temphe = hv_iternext_flags(hv,0)) ) { @@ -5217,7 +5219,10 @@ SV* Perl_reg_qr_package(pTHX_ REGEXP * const rx) { PERL_UNUSED_ARG(rx); - return NULL; + if (0) + return NULL; + else + return newSVpvs("Regexp"); } /* Scans the name of a named buffer from the pattern. @@ -8919,7 +8924,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o) Perl_sv_catpvf(aTHX_ sv, "[%d/%d]", o->flags & 0xf, o->flags>>4); else if (k == REF || k == OPEN || k == CLOSE || k == GROUPP || OP(o)==ACCEPT) { Perl_sv_catpvf(aTHX_ sv, "%d", (int)ARG(o)); /* Parenth number */ - if ( prog->paren_names ) { + if ( RXp_PAREN_NAMES(prog) ) { if ( k != REF || OP(o) < NREF) { AV *list= (AV *)progi->data->data[progi->name_list_idx]; SV **name= av_fetch(list, ARG(o), 0 ); @@ -9157,9 +9162,8 @@ Perl_pregfree2(pTHX_ REGEXP *rx) ReREFCNT_dec(r->mother_re); } else { CALLREGFREE_PVT(rx); /* free the private data */ - if (r->paren_names) - SvREFCNT_dec(r->paren_names); - Safefree(RXp_WRAPPED(r)); + if (RXp_PAREN_NAMES(r)) + SvREFCNT_dec(RXp_PAREN_NAMES(r)); } if (r->substrs) { if (r->anchored_substr) @@ -9200,13 +9204,19 @@ Perl_pregfree2(pTHX_ REGEXP *rx) REGEXP * Perl_reg_temp_copy (pTHX_ REGEXP *rx) { - REGEXP *ret_x = newSV_type(SVt_REGEXP); + REGEXP *ret_x = (REGEXP*) newSV_type(SVt_REGEXP); struct regexp *ret = (struct regexp *)SvANY(ret_x); struct regexp *const r = (struct regexp *)SvANY(rx); register const I32 npar = r->nparens+1; (void)ReREFCNT_inc(rx); - /* FIXME ORANGE (once we start actually using the regular SV fields.) */ - StructCopy(r, ret, regexp); + /* We can take advantage of the existing "copied buffer" mechanism in SVs + by pointing directly at the buffer, but flagging that the allocated + space in the copy is zero. As we've just done a struct copy, it's now + a case of zero-ing that, rather than copying the current length. */ + SvPV_set(ret_x, RX_WRAPPED(rx)); + SvFLAGS(ret_x) |= SvFLAGS(rx) & (SVf_POK|SVp_POK|SVf_UTF8); + StructCopy(&(r->xpv_cur), &(ret->xpv_cur), struct regexp_allocated); + SvLEN_set(ret_x, 0); Newx(ret->offs, npar, regexp_paren_pair); Copy(r->offs, ret->offs, npar, regexp_paren_pair); if (r->substrs) { @@ -9257,8 +9267,8 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx) reginitcolors(); { SV *dsv= sv_newmortal(); - RE_PV_QUOTED_DECL(s, (r->extflags & RXf_UTF8), - dsv, RXp_PRECOMP(r), RXp_PRELEN(r), 60); + RE_PV_QUOTED_DECL(s, RX_UTF8(rx), + dsv, RX_PRECOMP(rx), RX_PRELEN(rx), 60); PerlIO_printf(Perl_debug_log,"%sFreeing REx:%s %s\n", PL_colors[4],PL_colors[5],s); } @@ -9399,7 +9409,9 @@ Perl_re_dup_guts(pTHX_ const REGEXP *sstr, REGEXP *dstr, CLONE_PARAMS *param) /* Do it this way to avoid reading from *r after the StructCopy(). That way, if any of the sv_dup_inc()s dislodge *r from the L1 cache, it doesn't matter. */ - const bool anchored = r->check_substr == r->anchored_substr; + const bool anchored = r->check_substr + ? r->check_substr == r->anchored_substr + : r->check_utf8 == r->anchored_utf8; Newx(ret->substrs, 1, struct reg_substr_data); StructCopy(r->substrs, ret->substrs, struct reg_substr_data); @@ -9422,11 +9434,16 @@ Perl_re_dup_guts(pTHX_ const REGEXP *sstr, REGEXP *dstr, CLONE_PARAMS *param) ret->check_substr = ret->float_substr; ret->check_utf8 = ret->float_utf8; } + } else if (ret->check_utf8) { + if (anchored) { + ret->check_utf8 = ret->anchored_utf8; + } else { + ret->check_utf8 = ret->float_utf8; + } } } - RXp_WRAPPED(ret) = SAVEPVN(RXp_WRAPPED(ret), RXp_WRAPLEN(ret)+1); - ret->paren_names = hv_dup_inc(ret->paren_names, param); + RXp_PAREN_NAMES(ret) = hv_dup_inc(RXp_PAREN_NAMES(ret), param); if (ret->pprivate) RXi_SET(ret,CALLREGDUPE_PVT(dstr,param)); @@ -9441,7 +9458,6 @@ Perl_re_dup_guts(pTHX_ const REGEXP *sstr, REGEXP *dstr, CLONE_PARAMS *param) ret->mother_re = NULL; ret->gofs = 0; - ret->seen_evals = 0; } #endif /* PERL_IN_XSUB_RE */ @@ -9553,48 +9569,8 @@ Perl_regdupe_internal(pTHX_ REGEXP * const rx, CLONE_PARAMS *param) #endif /* USE_ITHREADS */ -/* - reg_stringify() - - converts a regexp embedded in a MAGIC struct to its stringified form, - caching the converted form in the struct and returns the cached - string. - - If lp is nonnull then it is used to return the length of the - resulting string - - If flags is nonnull and the returned string contains UTF8 then - (*flags & 1) will be true. - - If haseval is nonnull then it is used to return whether the pattern - contains evals. - - Normally called via macro: - - CALLREG_STRINGIFY(mg,&len,&utf8); - - And internally with - - CALLREG_AS_STR(mg,&lp,&flags,&haseval) - - See sv_2pv_flags() in sv.c for an example of internal usage. - - */ #ifndef PERL_IN_XSUB_RE -char * -Perl_reg_stringify(pTHX_ MAGIC *mg, STRLEN *lp, U32 *flags, I32 *haseval ) { - dVAR; - const REGEXP * const re = (REGEXP *)mg->mg_obj; - if (haseval) - *haseval = RX_SEEN_EVALS(re); - if (flags) - *flags = ((RX_EXTFLAGS(re) & RXf_UTF8) ? 1 : 0); - if (lp) - *lp = RX_WRAPLEN(re); - return RX_WRAPPED(re); -} - /* - regnext - dig the "next" pointer out of a node */