From: Hugo van der Sanden <hv@crypt.org>
Date: Sun, 29 Apr 2001 17:09:30 +0000 (+0100)
Subject: Re: [PATCH bleadperl] [ID 20010426.002] Word boundry regex [...]
X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=12d33761ed16a63d0b1b4afd6443ea446d40b38e;p=p5sagit%2Fp5-mst-13.2.git

Re: [PATCH bleadperl] [ID 20010426.002] Word boundry regex [...]
Message-Id: <200104291609.RAA17790@crypt.compulink.co.uk>

p4raw-id: //depot/perl@9911
---

diff --git a/regcomp.c b/regcomp.c
index 20388f1..34d5b37 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -4660,7 +4660,6 @@ Perl_save_re_context(pTHX)
     SAVEVPTR(PL_regendp);		/* Ditto for endp. */
     SAVEVPTR(PL_reglastparen);		/* Similarly for lastparen. */
     SAVEPPTR(PL_regtill);		/* How far we are required to go. */
-    SAVEI8(PL_regprev);			/* char before regbol, \n if none */
     SAVEGENERICPV(PL_reg_start_tmp);		/* from regexec.c */
     PL_reg_start_tmp = 0;
     SAVEI32(PL_reg_start_tmpl);		/* from regexec.c */
diff --git a/regexec.c b/regexec.c
index c9096f0..d3e347e 100644
--- a/regexec.c
+++ b/regexec.c
@@ -946,7 +946,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
 	    /* FALL THROUGH */
 	case BOUND:
 	    if (do_utf8) {
-		if (s == startpos)
+		if (s == PL_bostr)
 		    tmp = '\n';
 		else {
 		    U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
@@ -969,7 +969,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
 		}
 	    }
 	    else {
-		tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
+		tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
 		tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
 		while (s < strend) {
 		    if (tmp ==
@@ -989,7 +989,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
 	    /* FALL THROUGH */
 	case NBOUND:
 	    if (do_utf8) {
-		if (s == startpos)
+		if (s == PL_bostr)
 		    tmp = '\n';
 		else {
 		    U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
@@ -1010,7 +1010,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
 		}
 	    }
 	    else {
-		tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
+		tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
 		tmp = ((OP(c) == NBOUND ?
 			isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
 		while (s < strend) {
@@ -1429,19 +1429,6 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
       if (strend - startpos < minlen) goto phooey;
     }
 
-    if (startpos == strbeg)	/* is ^ valid at stringarg? */
-	PL_regprev = '\n';
-    else {
-        if (prog->reganch & ROPT_UTF8 && do_utf8) {
-	    U8 *s = reghop3((U8*)stringarg, -1, (U8*)strbeg);
-	    PL_regprev = utf8n_to_uvchr(s, (U8*)stringarg - s, NULL, 0);
-	}
-	else
-	    PL_regprev = (U32)stringarg[-1];
-	if (!PL_multiline && PL_regprev == '\n')
-	    PL_regprev = '\0';		/* force ^ to NOT match */
-    }
-
     /* Check validity of program. */
     if (UCHARAT(prog->program) != REG_MAGIC) {
 	Perl_croak(aTHX_ "corrupted regexp program");
@@ -2044,19 +2031,16 @@ S_regmatch(pTHX_ regnode *prog)
 
 	switch (OP(scan)) {
 	case BOL:
-	    if (locinput == PL_bostr
-		? PL_regprev == '\n'
-		: (PL_multiline &&
-		   (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
+	    if (locinput == PL_bostr || (PL_multiline &&
+		(nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
 	    {
 		/* regtill = regbol; */
 		break;
 	    }
 	    sayNO;
 	case MBOL:
-	    if (locinput == PL_bostr
-		? PL_regprev == '\n'
-		: ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
+	    if (locinput == PL_bostr ||
+		((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
 	    {
 		break;
 	    }
@@ -2259,8 +2243,8 @@ S_regmatch(pTHX_ regnode *prog)
 	case NBOUND:
 	    /* was last char in word? */
 	    if (do_utf8) {
-		if (locinput == PL_regbol)
-		    ln = PL_regprev;
+		if (locinput == PL_bostr)
+		    ln = '\n';
 		else {
 		    U8 *r = reghop((U8*)locinput, -1);
 		
@@ -2277,8 +2261,8 @@ S_regmatch(pTHX_ regnode *prog)
 		}
 	    }
 	    else {
-		ln = (locinput != PL_regbol) ?
-		    UCHARAT(locinput - 1) : PL_regprev;
+		ln = (locinput != PL_bostr) ?
+		    UCHARAT(locinput - 1) : '\n';
 		if (OP(scan) == BOUND || OP(scan) == NBOUND) {
 		    ln = isALNUM(ln);
 		    n = isALNUM(nextchr);
diff --git a/sv.c b/sv.c
index 7f62a78..65a3279 100644
--- a/sv.c
+++ b/sv.c
@@ -9295,7 +9295,6 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags,
     PL_regendp		= (I32*)NULL;
     PL_reglastparen	= (U32*)NULL;
     PL_regtill		= Nullch;
-    PL_regprev		= '\n';
     PL_reg_start_tmp	= (char**)NULL;
     PL_reg_start_tmpl	= 0;
     PL_regdata		= (struct reg_data*)NULL;
diff --git a/t/op/re_tests b/t/op/re_tests
index 6406fcd..3989c06 100644
--- a/t/op/re_tests
+++ b/t/op/re_tests
@@ -787,3 +787,4 @@ tt+$	xxxtt	y	-	-
 (a)?(a)+	a	y	$1:$2	:a	-
 (ab)?(ab)+	ab	y	$1:$2	:ab	-
 (abc)?(abc)+	abc	y	$1:$2	:abc	-
+'b\s^'m	a\nb\n	n	-	-
diff --git a/t/op/subst.t b/t/op/subst.t
index 7dd7a1c..907d0da 100755
--- a/t/op/subst.t
+++ b/t/op/subst.t
@@ -6,7 +6,7 @@ BEGIN {
     require Config; import Config;
 }
 
-print "1..84\n";
+print "1..85\n";
 
 $x = 'foo';
 $_ = "x";
@@ -379,3 +379,7 @@ $_ = "C:/";
 s/^([a-z]:)/\u$1/ and print "not ";
 print "ok 84\n";
 
+$_ = "Charles Bronson";
+s/\B\w//g;
+print $_ eq "C B" ? "ok 85\n" : "not ok 85\n# \$_ eq '$_'\n";
+
diff --git a/thrdvar.h b/thrdvar.h
index d35c1d9..6c48da9 100644
--- a/thrdvar.h
+++ b/thrdvar.h
@@ -183,7 +183,6 @@ PERLVAR(Tregstartp,	I32 *)		/* Pointer to startp array. */
 PERLVAR(Tregendp,	I32 *)		/* Ditto for endp. */
 PERLVAR(Treglastparen,	U32 *)		/* Similarly for lastparen. */
 PERLVAR(Tregtill,	char *)		/* How far we are required to go. */
-PERLVAR(Tregprev,	char)		/* char before regbol, \n if none */
 PERLVAR(Treg_start_tmp,	char **)	/* from regexec.c */
 PERLVAR(Treg_start_tmpl,U32)		/* from regexec.c */
 PERLVAR(Tregdata,	struct reg_data *)