X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=regcomp.c;h=696796b7962715941caadf0008b26c6409f93083;hb=bc8f2ddd12bfb4ed7885096cdab471dc8d1188aa;hp=3ad1f3b07d614b90fd387146fbcb10fa441de089;hpb=da7fcca4b8d6fb4dc88e0305bf9830bf24912ebd;p=p5sagit%2Fp5-mst-13.2.git diff --git a/regcomp.c b/regcomp.c index 3ad1f3b..696796b 100644 --- a/regcomp.c +++ b/regcomp.c @@ -3727,11 +3727,22 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, data->whilem_c = data_fake.whilem_c; } if (f & SCF_DO_STCLASS_AND) { - const int was = (data->start_class->flags & ANYOF_EOS); - - cl_and(data->start_class, &intrnl); - if (was) - data->start_class->flags |= ANYOF_EOS; + if (flags & SCF_DO_STCLASS_OR) { + /* OR before, AND after: ideally we would recurse with + * data_fake to get the AND applied by study of the + * remainder of the pattern, and then derecurse; + * *** HACK *** for now just treat as "no information". + * See [perl #56690]. + */ + cl_init(pRExC_state, data->start_class); + } else { + /* AND before and after: combine and continue */ + const int was = (data->start_class->flags & ANYOF_EOS); + + cl_and(data->start_class, &intrnl); + if (was) + data->start_class->flags |= ANYOF_EOS; + } } } #if PERL_ENABLE_POSITIVE_ASSERTION_STUDY @@ -6138,6 +6149,13 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) /* Pick up the branches, linking them together. */ parse_start = RExC_parse; /* MJD */ br = regbranch(pRExC_state, &flags, 1,depth+1); + + if (freeze_paren) { + if (RExC_npar > after_freeze) + after_freeze = RExC_npar; + RExC_npar = freeze_paren; + } + /* branch_len = (paren != 0); */ if (br == NULL) @@ -6553,7 +6571,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) /* reg_namedseq(pRExC_state,UVp) This is expected to be called by a parser routine that has - recognized'\N' and needs to handle the rest. RExC_parse is + recognized '\N' and needs to handle the rest. RExC_parse is expected to point at the first char following the N at the time of the call. @@ -6567,11 +6585,11 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) be returned to indicate failure. (This will NOT be a valid pointer to a regnode.) - If value is null then it is assumed that we are parsing normal text + If valuep is null then it is assumed that we are parsing normal text and inserts a new EXACT node into the program containing the resolved string and returns a pointer to the new node. If the string is zerolength a NOTHING node is emitted. - + On success RExC_parse is set to the char following the endbrace. Parsing failures will generate a fatal errorvia vFAIL(...) @@ -6585,7 +6603,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) */ STATIC regnode * -S_reg_namedseq(pTHX_ RExC_state_t *pRExC_state, UV *valuep) +S_reg_namedseq(pTHX_ RExC_state_t *pRExC_state, UV *valuep, I32 *flagp) { char * name; /* start of the content of the name */ char * endbrace; /* endbrace following the name */ @@ -6597,8 +6615,22 @@ S_reg_namedseq(pTHX_ RExC_state_t *pRExC_state, UV *valuep) PERL_ARGS_ASSERT_REG_NAMEDSEQ; - if (*RExC_parse != '{') { - vFAIL("Missing braces on \\N{}"); + if (*RExC_parse != '{' || + (*RExC_parse == '{' && RExC_parse[1] + && strchr("0123456789", RExC_parse[1]))) + { + GET_RE_DEBUG_FLAGS_DECL; + if (valuep) + /* no bare \N in a charclass */ + vFAIL("Missing braces on \\N{}"); + GET_RE_DEBUG_FLAGS; + nextchar(pRExC_state); + ret = reg_node(pRExC_state, REG_ANY); + *flagp |= HASWIDTH|SIMPLE; + RExC_naughty++; + RExC_parse--; + Set_Node_Length(ret, 1); /* MJD */ + return ret; } name = RExC_parse+1; endbrace = strchr(RExC_parse, '}'); @@ -7159,12 +7191,12 @@ tryagain: } break; case 'N': - /* Handle \N{NAME} here and not below because it can be + /* Handle \N and \N{NAME} here and not below because it can be multicharacter. join_exact() will join them up later on. Also this makes sure that things like /\N{BLAH}+/ and \N{BLAH} being multi char Just Happen. dmq*/ ++RExC_parse; - ret= reg_namedseq(pRExC_state, NULL); + ret= reg_namedseq(pRExC_state, NULL, flagp); break; case 'k': /* Handle \k and \k'NAME' */ parse_named_seq: @@ -7430,6 +7462,19 @@ tryagain: I32 flags = 0; STRLEN numlen = 3; ender = grok_oct(p, &numlen, &flags, NULL); + + /* An octal above 0xff is interpreted differently + * depending on if the re is in utf8 or not. If it + * is in utf8, the value will be itself, otherwise + * it is interpreted as modulo 0x100. It has been + * decided to discourage the use of octal above the + * single-byte range. For now, warn only when + * it ends up modulo */ + if (SIZE_ONLY && ender >= 0x100 + && ! UTF && ! PL_encoding + && ckWARN2(WARN_DEPRECATED, WARN_REGEXP)) { + vWARNdep(p, "Use of octal value above 377 is deprecated"); + } p += numlen; } else { @@ -7951,7 +7996,7 @@ parseit: from earlier versions, OTOH that behaviour was broken as well. */ UV v; /* value is register so we cant & it /grrr */ - if (reg_namedseq(pRExC_state, &v)) { + if (reg_namedseq(pRExC_state, &v, NULL)) { goto parseit; } value= v; @@ -9369,7 +9414,6 @@ Perl_pregfree2(pTHX_ REGEXP *rx) if (r->saved_copy) SvREFCNT_dec(r->saved_copy); #endif - Safefree(r->swap); Safefree(r->offs); } @@ -9407,7 +9451,8 @@ Perl_reg_temp_copy (pTHX_ REGEXP *rx) a case of zero-ing that, rather than copying the current length. */ SvPV_set(ret_x, RX_WRAPPED(rx)); SvFLAGS(ret_x) |= SvFLAGS(rx) & (SVf_POK|SVp_POK|SVf_UTF8); - StructCopy(&(r->xpv_cur), &(ret->xpv_cur), struct regexp_allocated); + memcpy(&(ret->xpv_cur), &(r->xpv_cur), + sizeof(regexp) - STRUCT_OFFSET(regexp, xpv_cur)); SvLEN_set(ret_x, 0); Newx(ret->offs, npar, regexp_paren_pair); Copy(r->offs, ret->offs, npar, regexp_paren_pair); @@ -9428,7 +9473,6 @@ Perl_reg_temp_copy (pTHX_ REGEXP *rx) ret->saved_copy = NULL; #endif ret->mother_re = rx; - ret->swap = NULL; return ret_x; }