X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=regcomp.c;h=dedcd0079d16b3ab0c87140827982f3595b0e96b;hb=9d1ce744c6b1f6545853185bcc1688e9343cccff;hp=20b68762f4db17942b3c59aba01101da15f5945f;hpb=9febdf04c602e91a389f75497c9add388bc632ec;p=p5sagit%2Fp5-mst-13.2.git diff --git a/regcomp.c b/regcomp.c index 20b6876..dedcd00 100644 --- a/regcomp.c +++ b/regcomp.c @@ -69,7 +69,8 @@ * **** Alterations to Henry's code are... **** - **** Copyright (c) 1991-2002, Larry Wall + **** Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, + **** 2000, 2001, 2002, 2003, by Larry Wall and others **** **** You may distribute under the terms of either the GNU General Public **** License or the Artistic License, as specified in the README file. @@ -471,7 +472,7 @@ S_scan_commit(pTHX_ RExC_state_t *pRExC_state, scan_data_t *data) STRLEN old_l = CHR_SVLEN(*data->longest); if ((l >= old_l) && ((l > old_l) || (data->flags & SF_BEFORE_EOL))) { - sv_setsv(*data->longest, data->last_found); + SvSetMagicSV(*data->longest, data->last_found); if (*data->longest == data->longest_fixed) { data->offset_fixed = l ? data->last_start_min : data->pos_min; if (data->flags & SF_BEFORE_EOL) @@ -495,6 +496,13 @@ S_scan_commit(pTHX_ RExC_state_t *pRExC_state, scan_data_t *data) } } SvCUR_set(data->last_found, 0); + { + SV * sv = data->last_found; + MAGIC *mg = + SvUTF8(sv) && SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_utf8) : NULL; + if (mg && mg->mg_len > 0) + mg->mg_len = 0; + } data->last_end = -1; data->flags &= ~SF_BEFORE_EOL; } @@ -568,14 +576,17 @@ S_cl_and(pTHX_ struct regnode_charclass_class *cl, if (!(and_with->flags & ANYOF_EOS)) cl->flags &= ~ANYOF_EOS; - if (cl->flags & ANYOF_UNICODE_ALL && and_with->flags & ANYOF_UNICODE) { + if (cl->flags & ANYOF_UNICODE_ALL && and_with->flags & ANYOF_UNICODE && + !(and_with->flags & ANYOF_INVERT)) { cl->flags &= ~ANYOF_UNICODE_ALL; cl->flags |= ANYOF_UNICODE; ARG_SET(cl, ARG(and_with)); } - if (!(and_with->flags & ANYOF_UNICODE_ALL)) + if (!(and_with->flags & ANYOF_UNICODE_ALL) && + !(and_with->flags & ANYOF_INVERT)) cl->flags &= ~ANYOF_UNICODE_ALL; - if (!(and_with->flags & (ANYOF_UNICODE|ANYOF_UNICODE_ALL))) + if (!(and_with->flags & (ANYOF_UNICODE|ANYOF_UNICODE_ALL)) && + !(and_with->flags & ANYOF_INVERT)) cl->flags &= ~ANYOF_UNICODE; } @@ -913,6 +924,14 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg ? I32_MAX : data->pos_min + data->pos_delta; } sv_catpvn(data->last_found, STRING(scan), STR_LEN(scan)); + { + SV * sv = data->last_found; + MAGIC *mg = SvUTF8(sv) && SvMAGICAL(sv) ? + mg_find(sv, PERL_MAGIC_utf8) : NULL; + if (mg && mg->mg_len >= 0) + mg->mg_len += utf8_length((U8*)STRING(scan), + (U8*)STRING(scan)+STR_LEN(scan)); + } if (UTF) SvUTF8_on(data->last_found); data->last_end = data->pos_min + l; @@ -1283,6 +1302,14 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg SvCUR_set(data->last_found, SvCUR(data->last_found) - l); sv_catsv(data->last_found, last_str); + { + SV * sv = data->last_found; + MAGIC *mg = + SvUTF8(sv) && SvMAGICAL(sv) ? + mg_find(sv, PERL_MAGIC_utf8) : NULL; + if (mg && mg->mg_len >= 0) + mg->mg_len += CHR_SVLEN(last_str); + } data->last_end += l * (mincount - 1); } } else { @@ -1778,6 +1805,9 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm) r->prelen = xend - exp; r->precomp = savepvn(RExC_precomp, r->prelen); r->subbeg = NULL; +#ifdef PERL_COPY_ON_WRITE + r->saved_copy = Nullsv; +#endif r->reganch = pm->op_pmflags & PMf_COMPILETIME; r->nparens = RExC_npar - 1; /* set early to validate backrefs */ @@ -2229,8 +2259,10 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp) FAIL("Eval-group not allowed at runtime, use re 'eval'"); if (PL_tainting && PL_tainted) FAIL("Eval-group in insecure regular expression"); + if (PL_curcop == &PL_compiling) + PL_cv_has_eval = 1; } - + nextchar(pRExC_state); if (logical) { ret = reg_node(pRExC_state, LOGICAL); @@ -4711,7 +4743,7 @@ Perl_regprop(pTHX_ SV *sv, regnode *o) else if (k == ANYOF) { int i, rangestart = -1; U8 flags = ANYOF_FLAGS(o); - const char * const anyofs[] = { /* Should be syncronized with + const char * const anyofs[] = { /* Should be synchronized with * ANYOF_ #xdefines in regcomp.h */ "\\w", "\\W", @@ -4900,8 +4932,11 @@ Perl_pregfree(pTHX_ struct regexp *r) Safefree(r->precomp); if (r->offsets) /* 20010421 MJD */ Safefree(r->offsets); - if (RX_MATCH_COPIED(r)) - Safefree(r->subbeg); + RX_MATCH_COPY_FREE(r); +#ifdef PERL_COPY_ON_WRITE + if (r->saved_copy) + SvREFCNT_dec(r->saved_copy); +#endif if (r->substrs) { if (r->anchored_substr) SvREFCNT_dec(r->anchored_substr); @@ -5020,20 +5055,6 @@ S_re_croak2(pTHX_ const char* pat1,const char* pat2,...) void Perl_save_re_context(pTHX) { -#if 0 - SAVEPPTR(RExC_precomp); /* uncompiled string. */ - SAVEI32(RExC_npar); /* () count. */ - SAVEI32(RExC_size); /* Code size. */ - SAVEI32(RExC_flags); /* are we folding, multilining? */ - SAVEVPTR(RExC_rx); /* from regcomp.c */ - SAVEI32(RExC_seen); /* from regcomp.c */ - SAVEI32(RExC_sawback); /* Did we see \1, ...? */ - SAVEI32(RExC_naughty); /* How bad is this pattern? */ - SAVEVPTR(RExC_emit); /* Code-emit pointer; ®dummy = don't */ - SAVEPPTR(RExC_end); /* End of input for compile */ - SAVEPPTR(RExC_parse); /* Input-scan pointer. */ -#endif - SAVEI32(PL_reg_flags); /* from regexec.c */ SAVEPPTR(PL_bostr); SAVEPPTR(PL_reginput); /* String-input pointer. */ @@ -5042,6 +5063,7 @@ Perl_save_re_context(pTHX) SAVEVPTR(PL_regstartp); /* Pointer to startp array. */ SAVEVPTR(PL_regendp); /* Ditto for endp. */ SAVEVPTR(PL_reglastparen); /* Similarly for lastparen. */ + SAVEVPTR(PL_reglastcloseparen); /* Similarly for lastcloseparen. */ SAVEPPTR(PL_regtill); /* How far we are required to go. */ SAVEGENERICPV(PL_reg_start_tmp); /* from regexec.c */ PL_reg_start_tmp = 0; @@ -5063,12 +5085,29 @@ Perl_save_re_context(pTHX) SAVEI32(PL_reg_oldpos); /* from regexec.c */ SAVEVPTR(PL_reg_oldcurpm); /* from regexec.c */ SAVEVPTR(PL_reg_curpm); /* from regexec.c */ + SAVEPPTR(PL_reg_oldsaved); /* old saved substr during match */ + PL_reg_oldsaved = Nullch; + SAVEI32(PL_reg_oldsavedlen); /* old length of saved substr during match */ + PL_reg_oldsavedlen = 0; +#ifdef PERL_COPY_ON_WRITE + SAVESPTR(PL_nrs); + PL_nrs = Nullsv; +#endif + SAVEI32(PL_reg_maxiter); /* max wait until caching pos */ + PL_reg_maxiter = 0; + SAVEI32(PL_reg_leftiter); /* wait until caching pos */ + PL_reg_leftiter = 0; + SAVEGENERICPV(PL_reg_poscache); /* cache of pos of WHILEM */ + PL_reg_poscache = Nullch; + SAVEI32(PL_reg_poscache_size); /* size of pos cache of WHILEM */ + PL_reg_poscache_size = 0; + SAVEPPTR(PL_regprecomp); /* uncompiled string. */ SAVEI32(PL_regnpar); /* () count. */ SAVEI32(PL_regsize); /* from regexec.c */ { /* Save $1..$n (#18107: UTF-8 s/(\w+)/uc($1)/e); AMS 20021106. */ - int i; + U32 i; GV *mgv; REGEXP *rx; char digits[16];