From: Hugo van der Sanden Date: Sun, 29 Apr 2001 17:09:30 +0000 (+0100) Subject: Re: [PATCH bleadperl] [ID 20010426.002] Word boundry regex [...] X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=12d33761ed16a63d0b1b4afd6443ea446d40b38e;p=p5sagit%2Fp5-mst-13.2.git Re: [PATCH bleadperl] [ID 20010426.002] Word boundry regex [...] Message-Id: <200104291609.RAA17790@crypt.compulink.co.uk> p4raw-id: //depot/perl@9911 --- diff --git a/regcomp.c b/regcomp.c index 20388f1..34d5b37 100644 --- a/regcomp.c +++ b/regcomp.c @@ -4660,7 +4660,6 @@ Perl_save_re_context(pTHX) SAVEVPTR(PL_regendp); /* Ditto for endp. */ SAVEVPTR(PL_reglastparen); /* Similarly for lastparen. */ SAVEPPTR(PL_regtill); /* How far we are required to go. */ - SAVEI8(PL_regprev); /* char before regbol, \n if none */ SAVEGENERICPV(PL_reg_start_tmp); /* from regexec.c */ PL_reg_start_tmp = 0; SAVEI32(PL_reg_start_tmpl); /* from regexec.c */ diff --git a/regexec.c b/regexec.c index c9096f0..d3e347e 100644 --- a/regexec.c +++ b/regexec.c @@ -946,7 +946,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta /* FALL THROUGH */ case BOUND: if (do_utf8) { - if (s == startpos) + if (s == PL_bostr) tmp = '\n'; else { U8 *r = reghop3((U8*)s, -1, (U8*)startpos); @@ -969,7 +969,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta } } else { - tmp = (s != startpos) ? UCHARAT(s - 1) : '\n'; + tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n'; tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0); while (s < strend) { if (tmp == @@ -989,7 +989,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta /* FALL THROUGH */ case NBOUND: if (do_utf8) { - if (s == startpos) + if (s == PL_bostr) tmp = '\n'; else { U8 *r = reghop3((U8*)s, -1, (U8*)startpos); @@ -1010,7 +1010,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta } } else { - tmp = (s != startpos) ? UCHARAT(s - 1) : '\n'; + tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n'; tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0); while (s < strend) { @@ -1429,19 +1429,6 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * if (strend - startpos < minlen) goto phooey; } - if (startpos == strbeg) /* is ^ valid at stringarg? */ - PL_regprev = '\n'; - else { - if (prog->reganch & ROPT_UTF8 && do_utf8) { - U8 *s = reghop3((U8*)stringarg, -1, (U8*)strbeg); - PL_regprev = utf8n_to_uvchr(s, (U8*)stringarg - s, NULL, 0); - } - else - PL_regprev = (U32)stringarg[-1]; - if (!PL_multiline && PL_regprev == '\n') - PL_regprev = '\0'; /* force ^ to NOT match */ - } - /* Check validity of program. */ if (UCHARAT(prog->program) != REG_MAGIC) { Perl_croak(aTHX_ "corrupted regexp program"); @@ -2044,19 +2031,16 @@ S_regmatch(pTHX_ regnode *prog) switch (OP(scan)) { case BOL: - if (locinput == PL_bostr - ? PL_regprev == '\n' - : (PL_multiline && - (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') ) + if (locinput == PL_bostr || (PL_multiline && + (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') ) { /* regtill = regbol; */ break; } sayNO; case MBOL: - if (locinput == PL_bostr - ? PL_regprev == '\n' - : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') ) + if (locinput == PL_bostr || + ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n')) { break; } @@ -2259,8 +2243,8 @@ S_regmatch(pTHX_ regnode *prog) case NBOUND: /* was last char in word? */ if (do_utf8) { - if (locinput == PL_regbol) - ln = PL_regprev; + if (locinput == PL_bostr) + ln = '\n'; else { U8 *r = reghop((U8*)locinput, -1); @@ -2277,8 +2261,8 @@ S_regmatch(pTHX_ regnode *prog) } } else { - ln = (locinput != PL_regbol) ? - UCHARAT(locinput - 1) : PL_regprev; + ln = (locinput != PL_bostr) ? + UCHARAT(locinput - 1) : '\n'; if (OP(scan) == BOUND || OP(scan) == NBOUND) { ln = isALNUM(ln); n = isALNUM(nextchr); diff --git a/sv.c b/sv.c index 7f62a78..65a3279 100644 --- a/sv.c +++ b/sv.c @@ -9295,7 +9295,6 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, PL_regendp = (I32*)NULL; PL_reglastparen = (U32*)NULL; PL_regtill = Nullch; - PL_regprev = '\n'; PL_reg_start_tmp = (char**)NULL; PL_reg_start_tmpl = 0; PL_regdata = (struct reg_data*)NULL; diff --git a/t/op/re_tests b/t/op/re_tests index 6406fcd..3989c06 100644 --- a/t/op/re_tests +++ b/t/op/re_tests @@ -787,3 +787,4 @@ tt+$ xxxtt y - - (a)?(a)+ a y $1:$2 :a - (ab)?(ab)+ ab y $1:$2 :ab - (abc)?(abc)+ abc y $1:$2 :abc - +'b\s^'m a\nb\n n - - diff --git a/t/op/subst.t b/t/op/subst.t index 7dd7a1c..907d0da 100755 --- a/t/op/subst.t +++ b/t/op/subst.t @@ -6,7 +6,7 @@ BEGIN { require Config; import Config; } -print "1..84\n"; +print "1..85\n"; $x = 'foo'; $_ = "x"; @@ -379,3 +379,7 @@ $_ = "C:/"; s/^([a-z]:)/\u$1/ and print "not "; print "ok 84\n"; +$_ = "Charles Bronson"; +s/\B\w//g; +print $_ eq "C B" ? "ok 85\n" : "not ok 85\n# \$_ eq '$_'\n"; + diff --git a/thrdvar.h b/thrdvar.h index d35c1d9..6c48da9 100644 --- a/thrdvar.h +++ b/thrdvar.h @@ -183,7 +183,6 @@ PERLVAR(Tregstartp, I32 *) /* Pointer to startp array. */ PERLVAR(Tregendp, I32 *) /* Ditto for endp. */ PERLVAR(Treglastparen, U32 *) /* Similarly for lastparen. */ PERLVAR(Tregtill, char *) /* How far we are required to go. */ -PERLVAR(Tregprev, char) /* char before regbol, \n if none */ PERLVAR(Treg_start_tmp, char **) /* from regexec.c */ PERLVAR(Treg_start_tmpl,U32) /* from regexec.c */ PERLVAR(Tregdata, struct reg_data *)