X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=regcomp.c;h=6293ade0e2c83020269140fc052d914c224ca087;hb=7c04078ea21d9035bc412dbd7b8edf35773a2c43;hp=c738714acf8a40fa2e682f9d2250c674b3d7bf79;hpb=0eda9292bc85e7f943d7a8738310d47e93f67325;p=p5sagit%2Fp5-mst-13.2.git diff --git a/regcomp.c b/regcomp.c index c738714..6293ade 100644 --- a/regcomp.c +++ b/regcomp.c @@ -69,7 +69,8 @@ * **** Alterations to Henry's code are... **** - **** Copyright (c) 1991-2003, Larry Wall + **** Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, + **** 2000, 2001, 2002, 2003, by Larry Wall and others **** **** You may distribute under the terms of either the GNU General Public **** License or the Artistic License, as specified in the README file. @@ -95,12 +96,12 @@ #endif /* op */ #ifdef MSDOS -# if defined(BUGGY_MSC6) +# if defined(BUGGY_MSC6) /* MSC 6.00A breaks on op/regexp.t test 85 unless we turn this off */ - # pragma optimize("a",off) +# pragma optimize("a",off) /* But MSC 6.00A is happy with 'w', for aliases only across function calls*/ - # pragma optimize("w",on ) -# endif /* BUGGY_MSC6 */ +# pragma optimize("w",on ) +# endif /* BUGGY_MSC6 */ #endif /* MSDOS */ #ifndef STATIC @@ -471,7 +472,7 @@ S_scan_commit(pTHX_ RExC_state_t *pRExC_state, scan_data_t *data) STRLEN old_l = CHR_SVLEN(*data->longest); if ((l >= old_l) && ((l > old_l) || (data->flags & SF_BEFORE_EOL))) { - sv_setsv(*data->longest, data->last_found); + SvSetMagicSV(*data->longest, data->last_found); if (*data->longest == data->longest_fixed) { data->offset_fixed = l ? data->last_start_min : data->pos_min; if (data->flags & SF_BEFORE_EOL) @@ -575,14 +576,17 @@ S_cl_and(pTHX_ struct regnode_charclass_class *cl, if (!(and_with->flags & ANYOF_EOS)) cl->flags &= ~ANYOF_EOS; - if (cl->flags & ANYOF_UNICODE_ALL && and_with->flags & ANYOF_UNICODE) { + if (cl->flags & ANYOF_UNICODE_ALL && and_with->flags & ANYOF_UNICODE && + !(and_with->flags & ANYOF_INVERT)) { cl->flags &= ~ANYOF_UNICODE_ALL; cl->flags |= ANYOF_UNICODE; ARG_SET(cl, ARG(and_with)); } - if (!(and_with->flags & ANYOF_UNICODE_ALL)) + if (!(and_with->flags & ANYOF_UNICODE_ALL) && + !(and_with->flags & ANYOF_INVERT)) cl->flags &= ~ANYOF_UNICODE_ALL; - if (!(and_with->flags & (ANYOF_UNICODE|ANYOF_UNICODE_ALL))) + if (!(and_with->flags & (ANYOF_UNICODE|ANYOF_UNICODE_ALL)) && + !(and_with->flags & ANYOF_INVERT)) cl->flags &= ~ANYOF_UNICODE; } @@ -925,8 +929,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg MAGIC *mg = SvUTF8(sv) && SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_utf8) : NULL; if (mg && mg->mg_len >= 0) - mg->mg_len += utf8_length(STRING(scan), - STRING(scan)+STR_LEN(scan)); + mg->mg_len += utf8_length((U8*)STRING(scan), + (U8*)STRING(scan)+STR_LEN(scan)); } if (UTF) SvUTF8_on(data->last_found); @@ -2249,14 +2253,16 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp) } else { /* First pass */ if (PL_reginterp_cnt < ++RExC_seen_evals - && PL_curcop != &PL_compiling) + && IN_PERL_RUNTIME) /* No compiled RE interpolated, has runtime components ===> unsafe. */ FAIL("Eval-group not allowed at runtime, use re 'eval'"); if (PL_tainting && PL_tainted) FAIL("Eval-group in insecure regular expression"); + if (IN_PERL_COMPILETIME) + PL_cv_has_eval = 1; } - + nextchar(pRExC_state); if (logical) { ret = reg_node(pRExC_state, LOGICAL); @@ -2505,8 +2511,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp) if (paren == '>') node = SUSPEND, flag = 0; reginsert(pRExC_state, node,ret); - Set_Node_Offset(ret, oregcomp_parse); - Set_Node_Length(ret, RExC_parse - oregcomp_parse + 2); + Set_Node_Cur_Length(ret); + Set_Node_Offset(ret, parse_start + 1); ret->flags = flag; regtail(pRExC_state, ret, reg_node(pRExC_state, TAIL)); } @@ -2787,7 +2793,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp) { register regnode *ret = 0; I32 flags; - char *parse_start = 0; + char *parse_start = RExC_parse; *flagp = WORST; /* Tentatively. */ @@ -3050,6 +3056,7 @@ tryagain: default: /* Do not generate `unrecognized' warnings here, we fall back into the quick-grab loop below */ + parse_start--; goto defchar; } break; @@ -3156,11 +3163,6 @@ tryagain: ender = grok_hex(p + 1, &numlen, &flags, NULL); if (ender > 0xff) RExC_utf8 = 1; - /* numlen is generous */ - if (numlen + len >= 127) { - p--; - goto loopdone; - } p = e + 1; } } @@ -3304,7 +3306,7 @@ tryagain: } if (len > 0) *flagp |= HASWIDTH; - if (len == 1) + if (len == 1 && UNI_IS_INVARIANT(ender)) *flagp |= SIMPLE; if (!SIZE_ONLY) STR_LEN(ret) = len; @@ -4266,8 +4268,11 @@ S_nextchar(pTHX_ RExC_state_t *pRExC_state) for (;;) { if (*RExC_parse == '(' && RExC_parse[1] == '?' && RExC_parse[2] == '#') { - while (*RExC_parse && *RExC_parse != ')') + while (*RExC_parse != ')') { + if (RExC_parse == RExC_end) + FAIL("Sequence (?#... not terminated"); RExC_parse++; + } RExC_parse++; continue; } @@ -4277,9 +4282,8 @@ S_nextchar(pTHX_ RExC_state_t *pRExC_state) continue; } else if (*RExC_parse == '#') { - while (*RExC_parse && *RExC_parse != '\n') - RExC_parse++; - RExC_parse++; + while (RExC_parse < RExC_end) + if (*RExC_parse++ == '\n') break; continue; } } @@ -4422,6 +4426,7 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd) RExC_parse - RExC_start, RExC_offsets[0])); Set_Node_Offset(place, RExC_parse); + Set_Node_Length(place, 1); } src = NEXTOPER(place); FILL_ADVANCE_NODE(place, op); @@ -5040,7 +5045,7 @@ S_re_croak2(pTHX_ const char* pat1,const char* pat2,...) if (l1 > 512) l1 = 512; Copy(message, buf, l1 , char); - buf[l1] = '\0'; /* Overwrite \n */ + buf[l1-1] = '\0'; /* Overwrite \n */ Perl_croak(aTHX_ "%s", buf); }