X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=regcomp.c;h=538849558819a2d120b71a0b3f4614e55995a33f;hb=6137113dfd943e6f896f8b9c1a4df2b7d4e355ae;hp=c1ec5f905e3c7ea672e273d1aff84e0688aa6398;hpb=4bb101f2758f169969171dfe6b70f68a406dcc1e;p=p5sagit%2Fp5-mst-13.2.git diff --git a/regcomp.c b/regcomp.c index c1ec5f9..5388495 100644 --- a/regcomp.c +++ b/regcomp.c @@ -96,12 +96,12 @@ #endif /* op */ #ifdef MSDOS -# if defined(BUGGY_MSC6) +# if defined(BUGGY_MSC6) /* MSC 6.00A breaks on op/regexp.t test 85 unless we turn this off */ - # pragma optimize("a",off) +# pragma optimize("a",off) /* But MSC 6.00A is happy with 'w', for aliases only across function calls*/ - # pragma optimize("w",on ) -# endif /* BUGGY_MSC6 */ +# pragma optimize("w",on ) +# endif /* BUGGY_MSC6 */ #endif /* MSDOS */ #ifndef STATIC @@ -472,7 +472,7 @@ S_scan_commit(pTHX_ RExC_state_t *pRExC_state, scan_data_t *data) STRLEN old_l = CHR_SVLEN(*data->longest); if ((l >= old_l) && ((l > old_l) || (data->flags & SF_BEFORE_EOL))) { - sv_setsv(*data->longest, data->last_found); + SvSetMagicSV(*data->longest, data->last_found); if (*data->longest == data->longest_fixed) { data->offset_fixed = l ? data->last_start_min : data->pos_min; if (data->flags & SF_BEFORE_EOL) @@ -576,14 +576,17 @@ S_cl_and(pTHX_ struct regnode_charclass_class *cl, if (!(and_with->flags & ANYOF_EOS)) cl->flags &= ~ANYOF_EOS; - if (cl->flags & ANYOF_UNICODE_ALL && and_with->flags & ANYOF_UNICODE) { + if (cl->flags & ANYOF_UNICODE_ALL && and_with->flags & ANYOF_UNICODE && + !(and_with->flags & ANYOF_INVERT)) { cl->flags &= ~ANYOF_UNICODE_ALL; cl->flags |= ANYOF_UNICODE; ARG_SET(cl, ARG(and_with)); } - if (!(and_with->flags & ANYOF_UNICODE_ALL)) + if (!(and_with->flags & ANYOF_UNICODE_ALL) && + !(and_with->flags & ANYOF_INVERT)) cl->flags &= ~ANYOF_UNICODE_ALL; - if (!(and_with->flags & (ANYOF_UNICODE|ANYOF_UNICODE_ALL))) + if (!(and_with->flags & (ANYOF_UNICODE|ANYOF_UNICODE_ALL)) && + !(and_with->flags & ANYOF_INVERT)) cl->flags &= ~ANYOF_UNICODE; } @@ -1185,7 +1188,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg if ( OP(oscan) == CURLYX && data && !(data->flags & SF_HAS_PAR) && !(data->flags & SF_HAS_EVAL) - && !deltanext ) { + && !deltanext /* atom is fixed width */ + && minnext != 0 /* CURLYM can't handle zero width */ + ) { /* XXXX How to optimize if data == 0? */ /* Optimize to a simpler form. */ regnode *nxt = NEXTOPER(oscan) + EXTRA_STEP_2ARGS; /* OPEN */ @@ -2250,14 +2255,16 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp) } else { /* First pass */ if (PL_reginterp_cnt < ++RExC_seen_evals - && PL_curcop != &PL_compiling) + && IN_PERL_RUNTIME) /* No compiled RE interpolated, has runtime components ===> unsafe. */ FAIL("Eval-group not allowed at runtime, use re 'eval'"); if (PL_tainting && PL_tainted) FAIL("Eval-group in insecure regular expression"); + if (IN_PERL_COMPILETIME) + PL_cv_has_eval = 1; } - + nextchar(pRExC_state); if (logical) { ret = reg_node(pRExC_state, LOGICAL); @@ -2506,8 +2513,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp) if (paren == '>') node = SUSPEND, flag = 0; reginsert(pRExC_state, node,ret); - Set_Node_Offset(ret, oregcomp_parse); - Set_Node_Length(ret, RExC_parse - oregcomp_parse + 2); + Set_Node_Cur_Length(ret); + Set_Node_Offset(ret, parse_start + 1); ret->flags = flag; regtail(pRExC_state, ret, reg_node(pRExC_state, TAIL)); } @@ -2788,7 +2795,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp) { register regnode *ret = 0; I32 flags; - char *parse_start = 0; + char *parse_start = RExC_parse; *flagp = WORST; /* Tentatively. */ @@ -3051,6 +3058,7 @@ tryagain: default: /* Do not generate `unrecognized' warnings here, we fall back into the quick-grab loop below */ + parse_start--; goto defchar; } break; @@ -3157,11 +3165,6 @@ tryagain: ender = grok_hex(p + 1, &numlen, &flags, NULL); if (ender > 0xff) RExC_utf8 = 1; - /* numlen is generous */ - if (numlen + len >= 127) { - p--; - goto loopdone; - } p = e + 1; } } @@ -3305,7 +3308,7 @@ tryagain: } if (len > 0) *flagp |= HASWIDTH; - if (len == 1) + if (len == 1 && UNI_IS_INVARIANT(ender)) *flagp |= SIMPLE; if (!SIZE_ONLY) STR_LEN(ret) = len; @@ -3658,7 +3661,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) } RExC_parse = e + 1; ANYOF_FLAGS(ret) |= ANYOF_UNICODE; - continue; + namedclass = ANYOF_MAX; /* no official name, but it's named */ + break; case 'n': value = '\n'; break; case 'r': value = '\r'; break; case 't': value = '\t'; break; @@ -4063,6 +4067,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) } Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsXDigit\n"); break; + case ANYOF_MAX: + /* this is to handle \p and \P */ + break; default: vFAIL("Invalid [::] class"); break; @@ -4267,8 +4274,11 @@ S_nextchar(pTHX_ RExC_state_t *pRExC_state) for (;;) { if (*RExC_parse == '(' && RExC_parse[1] == '?' && RExC_parse[2] == '#') { - while (*RExC_parse && *RExC_parse != ')') + while (*RExC_parse != ')') { + if (RExC_parse == RExC_end) + FAIL("Sequence (?#... not terminated"); RExC_parse++; + } RExC_parse++; continue; } @@ -4278,9 +4288,8 @@ S_nextchar(pTHX_ RExC_state_t *pRExC_state) continue; } else if (*RExC_parse == '#') { - while (*RExC_parse && *RExC_parse != '\n') - RExC_parse++; - RExC_parse++; + while (RExC_parse < RExC_end) + if (*RExC_parse++ == '\n') break; continue; } } @@ -4423,6 +4432,7 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd) RExC_parse - RExC_start, RExC_offsets[0])); Set_Node_Offset(place, RExC_parse); + Set_Node_Length(place, 1); } src = NEXTOPER(place); FILL_ADVANCE_NODE(place, op); @@ -5041,7 +5051,7 @@ S_re_croak2(pTHX_ const char* pat1,const char* pat2,...) if (l1 > 512) l1 = 512; Copy(message, buf, l1 , char); - buf[l1] = '\0'; /* Overwrite \n */ + buf[l1-1] = '\0'; /* Overwrite \n */ Perl_croak(aTHX_ "%s", buf); }