From: Ilya Zakharevich Date: Wed, 25 Nov 1998 23:33:45 +0000 (-0500) Subject: Fix \G in REx without //g X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=22e551b9ed23de1d5af9977d67389142f4d41cc5;p=p5sagit%2Fp5-mst-13.2.git Fix \G in REx without //g Message-Id: <199811260433.XAA29281@monk.mps.ohio-state.edu> p4raw-id: //depot/perl@2365 --- diff --git a/cop.h b/cop.h index 043ea8d..6bdb594 100644 --- a/cop.h +++ b/cop.h @@ -213,7 +213,7 @@ struct block { struct subst { I32 sbu_iters; I32 sbu_maxiters; - I32 sbu_safebase; + I32 sbu_rflags; I32 sbu_oldsave; bool sbu_once; bool sbu_rxtainted; @@ -228,7 +228,7 @@ struct subst { }; #define sb_iters cx_u.cx_subst.sbu_iters #define sb_maxiters cx_u.cx_subst.sbu_maxiters -#define sb_safebase cx_u.cx_subst.sbu_safebase +#define sb_rflags cx_u.cx_subst.sbu_rflags #define sb_oldsave cx_u.cx_subst.sbu_oldsave #define sb_once cx_u.cx_subst.sbu_once #define sb_rxtainted cx_u.cx_subst.sbu_rxtainted @@ -244,7 +244,7 @@ struct subst { #define PUSHSUBST(cx) CXINC, cx = &cxstack[cxstack_ix], \ cx->sb_iters = iters, \ cx->sb_maxiters = maxiters, \ - cx->sb_safebase = safebase, \ + cx->sb_rflags = r_flags, \ cx->sb_oldsave = oldsave, \ cx->sb_once = once, \ cx->sb_rxtainted = rxtainted, \ diff --git a/embedvar.h b/embedvar.h index 7225618..733347d 100644 --- a/embedvar.h +++ b/embedvar.h @@ -55,9 +55,11 @@ #define PL_reg_call_cc (PL_curinterp->Treg_call_cc) #define PL_reg_eval_set (PL_curinterp->Treg_eval_set) #define PL_reg_flags (PL_curinterp->Treg_flags) +#define PL_reg_ganch (PL_curinterp->Treg_ganch) #define PL_reg_re (PL_curinterp->Treg_re) #define PL_reg_start_tmp (PL_curinterp->Treg_start_tmp) #define PL_reg_start_tmpl (PL_curinterp->Treg_start_tmpl) +#define PL_reg_sv (PL_curinterp->Treg_sv) #define PL_regbol (PL_curinterp->Tregbol) #define PL_regcc (PL_curinterp->Tregcc) #define PL_regcode (PL_curinterp->Tregcode) @@ -439,9 +441,11 @@ #define PL_Treg_call_cc PL_reg_call_cc #define PL_Treg_eval_set PL_reg_eval_set #define PL_Treg_flags PL_reg_flags +#define PL_Treg_ganch PL_reg_ganch #define PL_Treg_re PL_reg_re #define PL_Treg_start_tmp PL_reg_start_tmp #define PL_Treg_start_tmpl PL_reg_start_tmpl +#define PL_Treg_sv PL_reg_sv #define PL_Tregbol PL_regbol #define PL_Tregcc PL_regcc #define PL_Tregcode PL_regcode @@ -566,9 +570,11 @@ #define PL_reg_call_cc (thr->Treg_call_cc) #define PL_reg_eval_set (thr->Treg_eval_set) #define PL_reg_flags (thr->Treg_flags) +#define PL_reg_ganch (thr->Treg_ganch) #define PL_reg_re (thr->Treg_re) #define PL_reg_start_tmp (thr->Treg_start_tmp) #define PL_reg_start_tmpl (thr->Treg_start_tmpl) +#define PL_reg_sv (thr->Treg_sv) #define PL_regbol (thr->Tregbol) #define PL_regcc (thr->Tregcc) #define PL_regcode (thr->Tregcode) diff --git a/objXSUB.h b/objXSUB.h index a9820dd..d4d101d 100644 --- a/objXSUB.h +++ b/objXSUB.h @@ -498,12 +498,16 @@ #define PL_reg_eval_set pPerl->PL_reg_eval_set #undef PL_reg_flags #define PL_reg_flags pPerl->PL_reg_flags +#undef PL_reg_ganch +#define PL_reg_ganch pPerl->PL_reg_ganch #undef PL_reg_re #define PL_reg_re pPerl->PL_reg_re #undef PL_reg_start_tmp #define PL_reg_start_tmp pPerl->PL_reg_start_tmp #undef PL_reg_start_tmpl #define PL_reg_start_tmpl pPerl->PL_reg_start_tmpl +#undef PL_reg_sv +#define PL_reg_sv pPerl->PL_reg_sv #undef PL_regbol #define PL_regbol pPerl->PL_regbol #undef PL_regcc diff --git a/pp.c b/pp.c index 0bd4842..21a5dd3 100644 --- a/pp.c +++ b/pp.c @@ -4672,7 +4672,7 @@ PP(pp_split) else { maxiters += (strend - s) * rx->nparens; while (s < strend && --limit && - CALLREGEXEC(rx, s, strend, orig, 1, Nullsv, NULL, 0)) + CALLREGEXEC(rx, s, strend, orig, 1, sv, NULL, 0)) { TAINT_IF(RX_MATCH_TAINTED(rx)); if (rx->subbase diff --git a/pp_ctl.c b/pp_ctl.c index e488749..f2cee37 100644 --- a/pp_ctl.c +++ b/pp_ctl.c @@ -164,8 +164,9 @@ PP(pp_substcont) /* Are we done */ if (cx->sb_once || !CALLREGEXEC(rx, s, cx->sb_strend, orig, - s == m, Nullsv, NULL, - cx->sb_safebase ? 0 : REXEC_COPY_STR)) + s == m, Nullsv, cx->sb_targ, + ((cx->sb_rflags & REXEC_COPY_STR) + ? 0 : REXEC_COPY_STR))) { SV *targ = cx->sb_targ; sv_catpvn(dstr, s, cx->sb_strend - s); diff --git a/pp_hot.c b/pp_hot.c index 713b1d1..f9ff09d 100644 --- a/pp_hot.c +++ b/pp_hot.c @@ -832,7 +832,7 @@ PP(pp_match) register char *s; char *strend; I32 global; - I32 safebase; + I32 r_flags; char *truebase; register REGEXP *rx = pm->op_pmregexp; bool rxtainted; @@ -841,7 +841,6 @@ PP(pp_match) I32 minmatch = 0; I32 oldsave = PL_savestack_ix; I32 update_minmatch = 1; - SV *screamer; if (PL_op->op_flags & OPf_STACKED) TARG = POPs; @@ -871,10 +870,6 @@ PP(pp_match) } if (rx->minlen > len) goto failure; - screamer = ( (SvSCREAM(TARG) && rx->check_substr - && SvTYPE(rx->check_substr) == SVt_PVBM - && SvVALID(rx->check_substr)) - ? TARG : Nullsv); truebase = t = s; if (global = pm->op_pmflags & PMf_GLOBAL) { rx->startp[0] = 0; @@ -887,9 +882,14 @@ PP(pp_match) } } } - safebase = ((gimme != G_ARRAY && !global && rx->nparens) + r_flags = ((gimme != G_ARRAY && !global && rx->nparens) || SvTEMP(TARG) || PL_sawampersand) ? REXEC_COPY_STR : 0; + if (SvSCREAM(TARG) && rx->check_substr + && SvTYPE(rx->check_substr) == SVt_PVBM + && SvVALID(rx->check_substr)) + r_flags |= REXEC_SCREAM; + if (pm->op_pmflags & (PMf_MULTILINE|PMf_SINGLELINE)) { SAVEINT(PL_multiline); PL_multiline = pm->op_pmflags & PMf_MULTILINE; @@ -905,7 +905,7 @@ play_it_again: } if (rx->check_substr) { if (!(rx->reganch & ROPT_NOSCAN)) { /* Floating checkstring. */ - if ( screamer ) { + if (r_flags & REXEC_SCREAM) { I32 p = -1; char *b; @@ -950,8 +950,7 @@ play_it_again: rx->float_substr = Nullsv; } } - if (CALLREGEXEC(rx, s, strend, truebase, minmatch, - screamer, NULL, safebase)) + if (CALLREGEXEC(rx, s, strend, truebase, minmatch, TARG, NULL, r_flags)) { PL_curpm = pm; if (pm->op_pmflags & PMf_ONCE) @@ -1602,13 +1601,12 @@ PP(pp_subst) bool once; bool rxtainted; char *orig; - I32 safebase; + I32 r_flags; register REGEXP *rx = pm->op_pmregexp; STRLEN len; int force_on_match = 0; I32 oldsave = PL_savestack_ix; I32 update_minmatch = 1; - SV *screamer; /* known replacement string? */ dstr = (pm->op_pmflags & PMf_CONST) ? POPs : Nullsv; @@ -1646,12 +1644,12 @@ PP(pp_subst) pm = PL_curpm; rx = pm->op_pmregexp; } - screamer = ( (SvSCREAM(TARG) && rx->check_substr - && SvTYPE(rx->check_substr) == SVt_PVBM - && SvVALID(rx->check_substr)) - ? TARG : Nullsv); - safebase = (rx->nparens || SvTEMP(TARG) || PL_sawampersand) + r_flags = (rx->nparens || SvTEMP(TARG) || PL_sawampersand) ? REXEC_COPY_STR : 0; + if (SvSCREAM(TARG) && rx->check_substr + && SvTYPE(rx->check_substr) == SVt_PVBM + && SvVALID(rx->check_substr)) + r_flags |= REXEC_SCREAM; if (pm->op_pmflags & (PMf_MULTILINE|PMf_SINGLELINE)) { SAVEINT(PL_multiline); PL_multiline = pm->op_pmflags & PMf_MULTILINE; @@ -1659,7 +1657,7 @@ PP(pp_subst) orig = m = s; if (rx->check_substr) { if (!(rx->reganch & ROPT_NOSCAN)) { /* It floats. */ - if (screamer) { + if (r_flags & REXEC_SCREAM) { I32 p = -1; char *b; @@ -1706,9 +1704,9 @@ PP(pp_subst) c = dstr ? SvPV(dstr, clen) : Nullch; /* can do inplace substitution? */ - if (c && clen <= rx->minlen && (once || !(safebase & REXEC_COPY_STR)) + if (c && clen <= rx->minlen && (once || !(r_flags & REXEC_COPY_STR)) && !(rx->reganch & ROPT_LOOKBEHIND_SEEN)) { - if (!CALLREGEXEC(rx, s, strend, orig, 0, screamer, NULL, safebase)) { + if (!CALLREGEXEC(rx, s, strend, orig, 0, TARG, NULL, r_flags)) { SPAGAIN; PUSHs(&PL_sv_no); LEAVE_SCOPE(oldsave); @@ -1808,7 +1806,7 @@ PP(pp_subst) RETURN; } - if (CALLREGEXEC(rx, s, strend, orig, 0, screamer, NULL, safebase)) { + if (CALLREGEXEC(rx, s, strend, orig, 0, TARG, NULL, r_flags)) { if (force_on_match) { force_on_match = 0; s = SvPV_force(TARG, len); @@ -1842,7 +1840,7 @@ PP(pp_subst) sv_catpvn(dstr, c, clen); if (once) break; - } while (CALLREGEXEC(rx, s, strend, orig, s == m, Nullsv, NULL, safebase)); + } while (CALLREGEXEC(rx, s, strend, orig, s == m, Nullsv, NULL, r_flags)); sv_catpvn(dstr, s, strend - s); (void)SvOOK_off(TARG); diff --git a/regexec.c b/regexec.c index 841b900..46833c2 100644 --- a/regexec.c +++ b/regexec.c @@ -259,13 +259,13 @@ cache_re(regexp *prog) PL_regdata = prog->data; PL_reg_re = prog; } - + /* - regexec_flags - match a regexp against a string */ I32 regexec_flags(register regexp *prog, char *stringarg, register char *strend, - char *strbeg, I32 minend, SV *screamer, void *data, U32 flags) + char *strbeg, I32 minend, SV *sv, void *data, U32 flags) /* strend: pointer to null at end of string */ /* strbeg: real beginning of string */ /* minend: end of match must be >=minend after stringarg. */ @@ -349,9 +349,9 @@ regexec_flags(register regexp *prog, char *stringarg, register char *strend, start_shift = prog->check_offset_min; /* okay to underestimate on CC */ /* Should be nonnegative! */ end_shift = minlen - start_shift - CHR_SVLEN(prog->check_substr); - if (screamer) { + if (flags & REXEC_SCREAM) { if (PL_screamfirst[BmRARE(prog->check_substr)] >= 0) - s = screaminstr(screamer, prog->check_substr, + s = screaminstr(sv, prog->check_substr, start_shift + (stringarg - strbeg), end_shift, &scream_pos, 0); else @@ -401,14 +401,23 @@ regexec_flags(register regexp *prog, char *stringarg, register char *strend, (strend - startpos > 60 ? "..." : "")) ); + if (prog->reganch & ROPT_GPOS_SEEN) { + MAGIC *mg; + int pos = 0; + + if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv) + && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) + pos = mg->mg_len; + PL_reg_ganch = startpos + pos; + } + /* Simplest case: anchored match need be tried only once. */ /* [unless only anchor is BOL and multiline is set] */ - if (prog->reganch & ROPT_ANCH) { + if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) { if (regtry(prog, startpos)) goto got_it; - else if (!(prog->reganch & ROPT_ANCH_GPOS) && - (PL_multiline || (prog->reganch & ROPT_IMPLICIT) - || (prog->reganch & ROPT_ANCH_MBOL))) + else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT) + || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */ { if (minlen) dontbother = minlen - 1; @@ -424,6 +433,10 @@ regexec_flags(register regexp *prog, char *stringarg, register char *strend, } } goto phooey; + } else if (prog->reganch & ROPT_ANCH_GPOS) { + if (regtry(prog, PL_reg_ganch)) + goto got_it; + goto phooey; } /* Messy cases: unanchored match. */ @@ -479,8 +492,8 @@ regexec_flags(register regexp *prog, char *stringarg, register char *strend, dontbother = end_shift; strend = HOPc(strend, -dontbother); while ( (s <= last) && - (screamer - ? (s = screaminstr(screamer, must, HOPc(s, back_min) - strbeg, + ((flags & REXEC_SCREAM) + ? (s = screaminstr(sv, must, HOPc(s, back_min) - strbeg, end_shift, &scream_pos, 0)) : (s = fbm_instr((unsigned char*)HOP(s, back_min), (unsigned char*)strend, must, 0))) ) { @@ -912,8 +925,8 @@ regexec_flags(register regexp *prog, char *stringarg, register char *strend, char *last; I32 oldpos = scream_pos; - if (screamer) { - last = screaminstr(screamer, prog->float_substr, s - strbeg, + if (flags & REXEC_SCREAM) { + last = screaminstr(sv, prog->float_substr, s - strbeg, end_shift, &scream_pos, 1); /* last one */ if (!last) { last = scream_olds; /* Only one occurence. */ @@ -1159,7 +1172,7 @@ regmatch(regnode *prog) break; sayNO; case GPOS: - if (locinput == PL_regbol) + if (locinput == PL_reg_ganch) break; sayNO; case EOL: diff --git a/regexp.h b/regexp.h index 5082610..67410a5 100644 --- a/regexp.h +++ b/regexp.h @@ -86,6 +86,7 @@ typedef struct regexp { #define ROPT_LOOKBEHIND_SEEN 0x00100 #define ROPT_EVAL_SEEN 0x00200 #define ROPT_TAINTED_SEEN 0x00400 +#define ROPT_ANCH_SBOL 0x00800 /* 0xf800 of reganch is used by PMf_COMPILETIME */ @@ -101,6 +102,7 @@ typedef struct regexp { #define REXEC_COPY_STR 1 /* Need to copy the string. */ #define REXEC_CHECKED 2 /* check_substr already checked. */ +#define REXEC_SCREAM 4 /* use scream table. */ #define ReREFCNT_inc(re) ((re && re->refcnt++), re) #define ReREFCNT_dec(re) pregfree(re) diff --git a/t/op/pat.t b/t/op/pat.t index cea2267..12b9397 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -4,7 +4,7 @@ # the format supported by op/regexp.t. If you want to add a test # that does fit that format, add it to op/re_tests, not here. -print "1..162\n"; +print "1..168\n"; BEGIN { chdir 't' if -d 't'; @@ -692,6 +692,33 @@ print "not " print "ok $test\n"; $test++; +$str = 'abcde'; +pos $str = 2; + +print "not " if $str =~ /^\G/; +print "ok $test\n"; +$test++; + +print "not " if $str =~ /^.\G/; +print "ok $test\n"; +$test++; + +print "not " unless $str =~ /^..\G/; +print "ok $test\n"; +$test++; + +print "not " if $str =~ /^...\G/; +print "ok $test\n"; +$test++; + +print "not " unless $str =~ /.\G./ and $& eq 'bc'; +print "ok $test\n"; +$test++; + +print "not " unless $str =~ /\G../ and $& eq 'cd'; +print "ok $test\n"; +$test++; + # see if matching against temporaries (created via pp_helem()) is safe { foo => "ok $test\n".$^X }->{foo} =~ /^(.*)\n/g; print "$1\n"; diff --git a/thrdvar.h b/thrdvar.h index 93c4546..3e71fb5 100644 --- a/thrdvar.h +++ b/thrdvar.h @@ -156,6 +156,8 @@ PERLVARI(Tregindent, int, 0) /* from regexec.c */ PERLVAR(Tregcc, CURCUR *) /* from regexec.c */ PERLVAR(Treg_call_cc, struct re_cc_state *) /* from regexec.c */ PERLVAR(Treg_re, regexp *) /* from regexec.c */ +PERLVAR(Treg_ganch, char *) /* position of \G */ +PERLVAR(Treg_sv, SV *) /* what we match against */ PERLVARI(Tregcompp, regcomp_t, FUNC_NAME_TO_PTR(pregcomp)) /* Pointer to RE compiler */