X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=regcomp.c;h=c61e54853181968e8bf1145101e04c4f3d542368;hb=a09a0aa27722154853871015ad1f231653d101dc;hp=20b68762f4db17942b3c59aba01101da15f5945f;hpb=9febdf04c602e91a389f75497c9add388bc632ec;p=p5sagit%2Fp5-mst-13.2.git diff --git a/regcomp.c b/regcomp.c index 20b6876..c61e548 100644 --- a/regcomp.c +++ b/regcomp.c @@ -69,7 +69,8 @@ * **** Alterations to Henry's code are... **** - **** Copyright (c) 1991-2002, Larry Wall + **** Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, + **** 2000, 2001, 2002, 2003, by Larry Wall and others **** **** You may distribute under the terms of either the GNU General Public **** License or the Artistic License, as specified in the README file. @@ -471,7 +472,7 @@ S_scan_commit(pTHX_ RExC_state_t *pRExC_state, scan_data_t *data) STRLEN old_l = CHR_SVLEN(*data->longest); if ((l >= old_l) && ((l > old_l) || (data->flags & SF_BEFORE_EOL))) { - sv_setsv(*data->longest, data->last_found); + SvSetMagicSV(*data->longest, data->last_found); if (*data->longest == data->longest_fixed) { data->offset_fixed = l ? data->last_start_min : data->pos_min; if (data->flags & SF_BEFORE_EOL) @@ -495,6 +496,13 @@ S_scan_commit(pTHX_ RExC_state_t *pRExC_state, scan_data_t *data) } } SvCUR_set(data->last_found, 0); + { + SV * sv = data->last_found; + MAGIC *mg = + SvUTF8(sv) && SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_utf8) : NULL; + if (mg && mg->mg_len > 0) + mg->mg_len = 0; + } data->last_end = -1; data->flags &= ~SF_BEFORE_EOL; } @@ -568,14 +576,17 @@ S_cl_and(pTHX_ struct regnode_charclass_class *cl, if (!(and_with->flags & ANYOF_EOS)) cl->flags &= ~ANYOF_EOS; - if (cl->flags & ANYOF_UNICODE_ALL && and_with->flags & ANYOF_UNICODE) { + if (cl->flags & ANYOF_UNICODE_ALL && and_with->flags & ANYOF_UNICODE && + !(and_with->flags & ANYOF_INVERT)) { cl->flags &= ~ANYOF_UNICODE_ALL; cl->flags |= ANYOF_UNICODE; ARG_SET(cl, ARG(and_with)); } - if (!(and_with->flags & ANYOF_UNICODE_ALL)) + if (!(and_with->flags & ANYOF_UNICODE_ALL) && + !(and_with->flags & ANYOF_INVERT)) cl->flags &= ~ANYOF_UNICODE_ALL; - if (!(and_with->flags & (ANYOF_UNICODE|ANYOF_UNICODE_ALL))) + if (!(and_with->flags & (ANYOF_UNICODE|ANYOF_UNICODE_ALL)) && + !(and_with->flags & ANYOF_INVERT)) cl->flags &= ~ANYOF_UNICODE; } @@ -913,6 +924,14 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg ? I32_MAX : data->pos_min + data->pos_delta; } sv_catpvn(data->last_found, STRING(scan), STR_LEN(scan)); + { + SV * sv = data->last_found; + MAGIC *mg = SvUTF8(sv) && SvMAGICAL(sv) ? + mg_find(sv, PERL_MAGIC_utf8) : NULL; + if (mg && mg->mg_len >= 0) + mg->mg_len += utf8_length((U8*)STRING(scan), + (U8*)STRING(scan)+STR_LEN(scan)); + } if (UTF) SvUTF8_on(data->last_found); data->last_end = data->pos_min + l; @@ -1283,6 +1302,14 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg SvCUR_set(data->last_found, SvCUR(data->last_found) - l); sv_catsv(data->last_found, last_str); + { + SV * sv = data->last_found; + MAGIC *mg = + SvUTF8(sv) && SvMAGICAL(sv) ? + mg_find(sv, PERL_MAGIC_utf8) : NULL; + if (mg && mg->mg_len >= 0) + mg->mg_len += CHR_SVLEN(last_str); + } data->last_end += l * (mincount - 1); } } else { @@ -1778,6 +1805,9 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm) r->prelen = xend - exp; r->precomp = savepvn(RExC_precomp, r->prelen); r->subbeg = NULL; +#ifdef PERL_COPY_ON_WRITE + r->saved_copy = Nullsv; +#endif r->reganch = pm->op_pmflags & PMf_COMPILETIME; r->nparens = RExC_npar - 1; /* set early to validate backrefs */ @@ -2229,8 +2259,10 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp) FAIL("Eval-group not allowed at runtime, use re 'eval'"); if (PL_tainting && PL_tainted) FAIL("Eval-group in insecure regular expression"); + if (PL_curcop == &PL_compiling) + PL_cv_has_eval = 1; } - + nextchar(pRExC_state); if (logical) { ret = reg_node(pRExC_state, LOGICAL); @@ -3130,11 +3162,6 @@ tryagain: ender = grok_hex(p + 1, &numlen, &flags, NULL); if (ender > 0xff) RExC_utf8 = 1; - /* numlen is generous */ - if (numlen + len >= 127) { - p--; - goto loopdone; - } p = e + 1; } } @@ -3278,7 +3305,7 @@ tryagain: } if (len > 0) *flagp |= HASWIDTH; - if (len == 1) + if (len == 1 && UNI_IS_INVARIANT(ender)) *flagp |= SIMPLE; if (!SIZE_ONLY) STR_LEN(ret) = len; @@ -4240,8 +4267,11 @@ S_nextchar(pTHX_ RExC_state_t *pRExC_state) for (;;) { if (*RExC_parse == '(' && RExC_parse[1] == '?' && RExC_parse[2] == '#') { - while (*RExC_parse && *RExC_parse != ')') + while (*RExC_parse != ')') { + if (RExC_parse == RExC_end) + FAIL("Sequence (?#... not terminated"); RExC_parse++; + } RExC_parse++; continue; } @@ -4251,9 +4281,8 @@ S_nextchar(pTHX_ RExC_state_t *pRExC_state) continue; } else if (*RExC_parse == '#') { - while (*RExC_parse && *RExC_parse != '\n') - RExC_parse++; - RExC_parse++; + while (RExC_parse < RExC_end) + if (*RExC_parse++ == '\n') break; continue; } } @@ -4711,7 +4740,7 @@ Perl_regprop(pTHX_ SV *sv, regnode *o) else if (k == ANYOF) { int i, rangestart = -1; U8 flags = ANYOF_FLAGS(o); - const char * const anyofs[] = { /* Should be syncronized with + const char * const anyofs[] = { /* Should be synchronized with * ANYOF_ #xdefines in regcomp.h */ "\\w", "\\W", @@ -4900,8 +4929,11 @@ Perl_pregfree(pTHX_ struct regexp *r) Safefree(r->precomp); if (r->offsets) /* 20010421 MJD */ Safefree(r->offsets); - if (RX_MATCH_COPIED(r)) - Safefree(r->subbeg); + RX_MATCH_COPY_FREE(r); +#ifdef PERL_COPY_ON_WRITE + if (r->saved_copy) + SvREFCNT_dec(r->saved_copy); +#endif if (r->substrs) { if (r->anchored_substr) SvREFCNT_dec(r->anchored_substr); @@ -5020,20 +5052,6 @@ S_re_croak2(pTHX_ const char* pat1,const char* pat2,...) void Perl_save_re_context(pTHX) { -#if 0 - SAVEPPTR(RExC_precomp); /* uncompiled string. */ - SAVEI32(RExC_npar); /* () count. */ - SAVEI32(RExC_size); /* Code size. */ - SAVEI32(RExC_flags); /* are we folding, multilining? */ - SAVEVPTR(RExC_rx); /* from regcomp.c */ - SAVEI32(RExC_seen); /* from regcomp.c */ - SAVEI32(RExC_sawback); /* Did we see \1, ...? */ - SAVEI32(RExC_naughty); /* How bad is this pattern? */ - SAVEVPTR(RExC_emit); /* Code-emit pointer; ®dummy = don't */ - SAVEPPTR(RExC_end); /* End of input for compile */ - SAVEPPTR(RExC_parse); /* Input-scan pointer. */ -#endif - SAVEI32(PL_reg_flags); /* from regexec.c */ SAVEPPTR(PL_bostr); SAVEPPTR(PL_reginput); /* String-input pointer. */ @@ -5042,6 +5060,7 @@ Perl_save_re_context(pTHX) SAVEVPTR(PL_regstartp); /* Pointer to startp array. */ SAVEVPTR(PL_regendp); /* Ditto for endp. */ SAVEVPTR(PL_reglastparen); /* Similarly for lastparen. */ + SAVEVPTR(PL_reglastcloseparen); /* Similarly for lastcloseparen. */ SAVEPPTR(PL_regtill); /* How far we are required to go. */ SAVEGENERICPV(PL_reg_start_tmp); /* from regexec.c */ PL_reg_start_tmp = 0; @@ -5063,12 +5082,29 @@ Perl_save_re_context(pTHX) SAVEI32(PL_reg_oldpos); /* from regexec.c */ SAVEVPTR(PL_reg_oldcurpm); /* from regexec.c */ SAVEVPTR(PL_reg_curpm); /* from regexec.c */ + SAVEPPTR(PL_reg_oldsaved); /* old saved substr during match */ + PL_reg_oldsaved = Nullch; + SAVEI32(PL_reg_oldsavedlen); /* old length of saved substr during match */ + PL_reg_oldsavedlen = 0; +#ifdef PERL_COPY_ON_WRITE + SAVESPTR(PL_nrs); + PL_nrs = Nullsv; +#endif + SAVEI32(PL_reg_maxiter); /* max wait until caching pos */ + PL_reg_maxiter = 0; + SAVEI32(PL_reg_leftiter); /* wait until caching pos */ + PL_reg_leftiter = 0; + SAVEGENERICPV(PL_reg_poscache); /* cache of pos of WHILEM */ + PL_reg_poscache = Nullch; + SAVEI32(PL_reg_poscache_size); /* size of pos cache of WHILEM */ + PL_reg_poscache_size = 0; + SAVEPPTR(PL_regprecomp); /* uncompiled string. */ SAVEI32(PL_regnpar); /* () count. */ SAVEI32(PL_regsize); /* from regexec.c */ { /* Save $1..$n (#18107: UTF-8 s/(\w+)/uc($1)/e); AMS 20021106. */ - int i; + U32 i; GV *mgv; REGEXP *rx; char digits[16];