From: Yves Orton <demerphq@gmail.com>
Date: Fri, 12 Jan 2007 02:31:12 +0000 (+0100)
Subject: Add support for /k modfier for matching along with ${^PREMATCH}, ${^MATCH}, ${^POSTMATCH}
X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=cde0cee5716418bb58782f073048ee9685ed2368;p=p5sagit%2Fp5-mst-13.2.git

Add support for /k modfier for matching along with ${^PREMATCH}, ${^MATCH}, ${^POSTMATCH}
Message-ID: <9b18b3110701111731x29b1c63i57b1698f769b3bbc@mail.gmail.com>
(with tweaks)

p4raw-id: //depot/perl@29831
---

diff --git a/MANIFEST b/MANIFEST
index 9b8753a..7a8aa79 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -3569,6 +3569,7 @@ t/op/regexp_notrie.t		See if regular expressions work without trie optimisation
 t/op/regexp_qr_embed.t		See if regular expressions work with embedded qr//
 t/op/regexp_qr.t		See if regular expressions work as qr//
 t/op/regexp.t			See if regular expressions work
+t/op/regexp_kmod.t		See if regexp /k modifier works as expected
 t/op/regexp_trielist.t		See if regular expressions work with trie optimisation
 t/op/regmesg.t			See if one can get regular expression errors
 t/op/repeat.t			See if x operator works
diff --git a/ext/B/t/concise-xs.t b/ext/B/t/concise-xs.t
index b4f053f..d3711cc 100644
--- a/ext/B/t/concise-xs.t
+++ b/ext/B/t/concise-xs.t
@@ -117,7 +117,7 @@ use Getopt::Std;
 use Carp;
 use Test::More tests => ( # per-pkg tests (function ct + require_ok)
 			  40 + 16	# Data::Dumper, Digest::MD5
-			  + 517 + 238	# B::Deparse, B
+			  + 517 + 239	# B::Deparse, B
 			  + 595 + 190	# POSIX, IO::Socket
 			  + 3 * ($] > 5.009)
 			  + 16 * ($] >= 5.009003)
diff --git a/gv.c b/gv.c
index 8630c1b..4332df9 100644
--- a/gv.c
+++ b/gv.c
@@ -1109,10 +1109,16 @@ Perl_gv_fetchpvn_flags(pTHX_ const char *nambeg, STRLEN full_len, I32 flags,
 		if (strEQ(name2, "NCODING"))
 		    goto magicalize;
 		break;
+            case '\015':        /* $^MATCH */
+                if (strEQ(name2, "ATCH"))
+		    goto ro_magicalize;
 	    case '\017':	/* $^OPEN */
 		if (strEQ(name2, "PEN"))
 		    goto magicalize;
 		break;
+	    case '\020':        /* $^PREMATCH  $^POSTMATCH */
+	        if (strEQ(name2, "REMATCH") || strEQ(name2, "OSTMATCH"))
+		    goto ro_magicalize;  
 	    case '\024':	/* ${^TAINT} */
 		if (strEQ(name2, "AINT"))
 		    goto ro_magicalize;
diff --git a/mg.c b/mg.c
index 2bb9b66..86f0e12 100644
--- a/mg.c
+++ b/mg.c
@@ -779,8 +779,14 @@ Perl_magic_get(pTHX_ SV *sv, MAGIC *mg)
 	    }
 	}
 	break;
-    case '\020':		/* ^P */
-	sv_setiv(sv, (IV)PL_perldb);
+    case '\020':		
+	if (nextchar == '\0') {       /* ^P */
+	    sv_setiv(sv, (IV)PL_perldb);
+	} else if (strEQ(remaining, "REMATCH")) { /* $^PREMATCH */
+	    goto do_prematch_fetch;
+	} else if (strEQ(remaining, "OSTMATCH")) { /* $^POSTMATCH */
+	    goto do_postmatch_fetch;
+	}
 	break;
     case '\023':		/* ^S */
 	if (nextchar == '\0') {
@@ -847,18 +853,21 @@ Perl_magic_get(pTHX_ SV *sv, MAGIC *mg)
 	    SvPOK_only(sv);
 	}
 	break;
+    case '\015': /* $^MATCH */
+	if (strEQ(remaining, "ATCH")) {
     case '1': case '2': case '3': case '4':
     case '5': case '6': case '7': case '8': case '9': case '&':
-	if (PL_curpm && (rx = PM_GETRE(PL_curpm))) {
-	    /*
-	     * Pre-threads, this was paren = atoi(GvENAME((GV*)mg->mg_obj));
-	     * XXX Does the new way break anything?
-	     */
-	    paren = atoi(mg->mg_ptr); /* $& is in [0] */
-	    reg_numbered_buff_get( paren, rx, sv, 0);
-	    break;
+	    if (PL_curpm && (rx = PM_GETRE(PL_curpm))) {
+		/*
+		 * Pre-threads, this was paren = atoi(GvENAME((GV*)mg->mg_obj));
+		 * XXX Does the new way break anything?
+		 */
+		paren = atoi(mg->mg_ptr); /* $& is in [0] */
+		reg_numbered_buff_get( paren, rx, sv, 0);
+		break;
+	    }
+	    sv_setsv(sv,&PL_sv_undef);
 	}
-	sv_setsv(sv,&PL_sv_undef);
 	break;
     case '+':
 	if (PL_curpm && (rx = PM_GETRE(PL_curpm))) {
@@ -880,6 +889,7 @@ Perl_magic_get(pTHX_ SV *sv, MAGIC *mg)
 	sv_setsv(sv,&PL_sv_undef);
 	break;
     case '`':
+      do_prematch_fetch:
 	if (PL_curpm && (rx = PM_GETRE(PL_curpm))) {
 	  reg_numbered_buff_get( -2, rx, sv, 0);
 	  break;
@@ -887,6 +897,7 @@ Perl_magic_get(pTHX_ SV *sv, MAGIC *mg)
 	sv_setsv(sv,&PL_sv_undef);
 	break;
     case '\'':
+      do_postmatch_fetch:
 	if (PL_curpm && (rx = PM_GETRE(PL_curpm))) {
 	  reg_numbered_buff_get( -1, rx, sv, 0);
 	  break;
diff --git a/op.h b/op.h
index 1ac4aa0..d8c5478 100644
--- a/op.h
+++ b/op.h
@@ -371,14 +371,15 @@ struct pmop {
 /* The following flags have exact equivalents in regcomp.h with the prefix RXf_
  * which are stored in the regexp->extflags member.
  */
-#define PMf_LOCALE	0x0800		/* use locale for character types */
-#define PMf_MULTILINE	0x1000		/* assume multiple lines */
-#define PMf_SINGLELINE	0x2000		/* assume single line */
-#define PMf_FOLD	0x4000		/* case insensitivity */
-#define PMf_EXTENDED	0x8000		/* chuck embedded whitespace */
+#define PMf_LOCALE	0x00800		/* use locale for character types */
+#define PMf_MULTILINE	0x01000		/* assume multiple lines */
+#define PMf_SINGLELINE	0x02000		/* assume single line */
+#define PMf_FOLD	0x04000		/* case insensitivity */
+#define PMf_EXTENDED	0x08000		/* chuck embedded whitespace */
+#define PMf_KEEPCOPY	0x10000		/* copy the string when matching */
 
 /* mask of bits that need to be transfered to re->extflags */
-#define PMf_COMPILETIME	(PMf_MULTILINE|PMf_SINGLELINE|PMf_LOCALE|PMf_FOLD|PMf_EXTENDED)
+#define PMf_COMPILETIME	(PMf_MULTILINE|PMf_SINGLELINE|PMf_LOCALE|PMf_FOLD|PMf_EXTENDED|PMf_KEEPCOPY)
 
 #ifdef USE_ITHREADS
 
diff --git a/pod/perlop.pod b/pod/perlop.pod
index 46af19b..7b84a68 100644
--- a/pod/perlop.pod
+++ b/pod/perlop.pod
@@ -1067,7 +1067,7 @@ X<m> X<operator, match>
 X<regexp, options> X<regexp> X<regex, options> X<regex>
 X</c> X</i> X</m> X</o> X</s> X</x>
 
-=item /PATTERN/cgimosx
+=item /PATTERN/cgimosxk
 
 Searches a string for a pattern match, and in scalar context returns
 true if it succeeds, false if it fails.  If no string is specified
@@ -1080,13 +1080,15 @@ is in effect.
 
 Options are:
 
-    c	Do not reset search position on a failed match when /g is in effect.
-    g	Match globally, i.e., find all occurrences.
     i	Do case-insensitive pattern matching.
     m	Treat string as multiple lines.
-    o	Compile pattern only once.
     s	Treat string as single line.
     x	Use extended regular expressions.
+    g	Match globally, i.e., find all occurrences.
+    c	Do not reset search position on a failed match when /g is in effect.
+    o	Compile pattern only once.
+    k	Keep a copy of the matched string so that ${^MATCH} and friends
+	will be defined.
 
 If "/" is the delimiter then the initial C<m> is optional.  With the C<m>
 you can use any pair of non-alphanumeric, non-whitespace characters
@@ -1449,7 +1451,7 @@ put comments into a multi-line C<qw>-string.  For this reason, the
 C<use warnings> pragma and the B<-w> switch (that is, the C<$^W> variable)
 produces warnings if the STRING contains the "," or the "#" character.
 
-=item s/PATTERN/REPLACEMENT/egimosx
+=item s/PATTERN/REPLACEMENT/egimosxk
 X<substitute> X<substitution> X<replace> X<regexp, replace>
 X<regexp, substitute> X</e> X</g> X</i> X</m> X</o> X</s> X</x>
 
@@ -1475,13 +1477,16 @@ when C<use locale> is in effect.
 
 Options are:
 
-    e	Evaluate the right side as an expression.
-    g	Replace globally, i.e., all occurrences.
     i	Do case-insensitive pattern matching.
     m	Treat string as multiple lines.
-    o	Compile pattern only once.
     s	Treat string as single line.
     x	Use extended regular expressions.
+    g	Replace globally, i.e., all occurrences.
+    o	Compile pattern only once.
+    k	Keep a copy of the original string so ${^MATCH} and friends
+	will be defined.
+    e	Evaluate the right side as an expression.
+
 
 Any non-alphanumeric, non-whitespace delimiter may replace the
 slashes.  If single quotes are used, no interpretation is done on the
diff --git a/pod/perlre.pod b/pod/perlre.pod
index 7133a02..d886d09 100644
--- a/pod/perlre.pod
+++ b/pod/perlre.pod
@@ -586,6 +586,15 @@ already paid the price.  As of 5.005, C<$&> is not so costly as the
 other two.
 X<$&> X<$`> X<$'>
 
+As a workaround for this problem, Perl 5.10 introduces C<${^PREMATCH}>,
+C<${^MATCH}> and C<${^POSTMATCH}>, which are equivalent to C<$`>, C<$&>
+and C<$'>, B<except> that they are only guaranteed to be defined after a
+successful match that was executed with the C</k> (keep-copy) modifier.
+The use of these variables incurs no global performance penalty, unlike
+their punctuation char equivalents, however at the trade-off that you
+have to tell perl when you want to use them.
+X</k> X<k modifier>
+
 Backslashed metacharacters in Perl are alphanumeric, such as C<\b>,
 C<\w>, C<\n>.  Unlike some other regular expression languages, there
 are no backslashed symbols that aren't alphanumeric.  So anything
@@ -639,7 +648,7 @@ whitespace formatting, a simple C<#> will suffice.  Note that Perl closes
 the comment as soon as it sees a C<)>, so there is no way to put a literal
 C<)> in the comment.
 
-=item C<(?imsx-imsx)>
+=item C<(?kimsx-imsx)>
 X<(?)>
 
 One or more embedded pattern-match modifiers, to be turned on (or
@@ -667,6 +676,11 @@ will match a repeated (I<including the case>!) word C<blah> in any
 case, assuming C<x> modifier, and no C<i> modifier outside this
 group.
 
+Note that the C<k> modifier is special in that it can only be enabled,
+not disabled, and that its presence anywhere in a pattern has a global
+effect. Thus C<(?-k)> and C<(?-k:...)> are meaningless and will warn
+when executed under C<use warnings>.
+
 =item C<(?:pattern)>
 X<(?:)>
 
diff --git a/pod/perlvar.pod b/pod/perlvar.pod
index a211c37..b4db654 100644
--- a/pod/perlvar.pod
+++ b/pod/perlvar.pod
@@ -228,6 +228,14 @@ performance penalty on all regular expression matches.  See L</BUGS>.
 
 See L</@-> for a replacement.
 
+=item ${^MATCH}
+X<${^MATCH}>
+
+This is similar to C<$&> (C<$POSTMATCH>) except that it does not incur the
+performance penalty associated with that variable, and is only guaranteed
+to return a defined value when the pattern was compiled or executed with
+the C</k> modifier.
+
 =item $PREMATCH
 
 =item $`
@@ -243,6 +251,14 @@ performance penalty on all regular expression matches.  See L</BUGS>.
 
 See L</@-> for a replacement.
 
+=item ${^PREMATCH}
+X<${^PREMATCH}>
+
+This is similar to C<$`> ($PREMATCH) except that it does not incur the
+performance penalty associated with that variable, and is only guaranteed
+to return a defined value when the pattern was compiled or executed with
+the C</k> modifier.
+
 =item $POSTMATCH
 
 =item $'
@@ -264,6 +280,14 @@ performance penalty on all regular expression matches.  See L</BUGS>.
 
 See L</@-> for a replacement.
 
+=item ${^POSTMATCH}
+X<${^POSTMATCH}>
+
+This is similar to C<$'> (C<$POSTMATCH>) except that it does not incur the
+performance penalty associated with that variable, and is only guaranteed
+to return a defined value when the pattern was compiled or executed with
+the C</k> modifier.
+
 =item $LAST_PAREN_MATCH
 
 =item $+
diff --git a/pp_hot.c b/pp_hot.c
index f1ad3ed..7167311 100644
--- a/pp_hot.c
+++ b/pp_hot.c
@@ -1368,7 +1368,8 @@ PP(pp_match)
     /* remove comment to get faster /g but possibly unsafe $1 vars after a
        match. Test for the unsafe vars will fail as well*/
     if (( /* !global &&  */ rx->nparens) 
-	    || SvTEMP(TARG) || PL_sawampersand || (pm->op_pmflags & PMf_EVAL))
+	    || SvTEMP(TARG) || PL_sawampersand ||
+	    (pm->op_pmflags & (PMf_EVAL|PMf_KEEPCOPY)))
 	r_flags |= REXEC_COPY_STR;
     if (SvSCREAM(TARG))
 	r_flags |= REXEC_SCREAM;
@@ -1391,6 +1392,7 @@ play_it_again:
 	    goto nope;
 	if ( (rx->extflags & RXf_CHECK_ALL)
 	     && !PL_sawampersand
+	     && !(pm->op_pmflags & PMf_KEEPCOPY)
 	     && ((rx->extflags & RXf_NOSCAN)
 		 || !((rx->extflags & RXf_INTUIT_TAIL)
 		      && (r_flags & REXEC_SCREAM)))
@@ -1516,7 +1518,7 @@ yup:					/* Confirmed by INTUIT */
 	rx->sublen = strend - truebase;
 	goto gotcha;
     }
-    if (PL_sawampersand) {
+    if (PL_sawampersand || pm->op_pmflags & PMf_KEEPCOPY) {
 	I32 off;
 #ifdef PERL_OLD_COPY_ON_WRITE
 	if (SvIsCOW(TARG) || (SvFLAGS(TARG) & CAN_COW_MASK) == CAN_COW_FLAGS) {
@@ -1547,6 +1549,8 @@ yup:					/* Confirmed by INTUIT */
 	rx->startp[0] = s - truebase;
 	rx->endp[0] = s - truebase + rx->minlenret;
     }
+    /* including rx->nparens in the below code seems highly suspicious.
+       -dmq */
     rx->nparens = rx->lastparen = rx->lastcloseparen = 0;	/* used by @-, @+, and $^N */
     LEAVE_SCOPE(oldsave);
     RETPUSHYES;
@@ -2152,7 +2156,7 @@ PP(pp_subst)
 	rx = PM_GETRE(pm);
     }
     r_flags = (rx->nparens || SvTEMP(TARG) || PL_sawampersand
-	    || (pm->op_pmflags & PMf_EVAL))
+	    || (pm->op_pmflags & (PMf_EVAL|PMf_KEEPCOPY)) )
 	       ? REXEC_COPY_STR : 0;
     if (SvSCREAM(TARG))
 	r_flags |= REXEC_SCREAM;
@@ -2167,6 +2171,7 @@ PP(pp_subst)
 	/* How to do it in subst? */
 /*	if ( (rx->extflags & RXf_CHECK_ALL)
 	     && !PL_sawampersand
+	     && !(pm->op_pmflags & PMf_KEEPCOPY)
 	     && ((rx->extflags & RXf_NOSCAN)
 		 || !((rx->extflags & RXf_INTUIT_TAIL)
 		      && (r_flags & REXEC_SCREAM))))
diff --git a/regcomp.c b/regcomp.c
index 4d139f2..d07f177 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -172,6 +172,7 @@ typedef struct RExC_state_t {
 #define RExC_recurse	(pRExC_state->recurse)
 #define RExC_recurse_count	(pRExC_state->recurse_count)
 
+
 #define	ISMULT1(c)	((c) == '*' || (c) == '+' || (c) == '?')
 #define	ISMULT2(s)	((*s) == '*' || (*s) == '+' || (*s) == '?' || \
 	((*s) == '{' && regcurly(s)))
@@ -4592,8 +4593,8 @@ reStudy:
         ri->name_list_idx = add_data( pRExC_state, 1, "p" );
         ri->data->data[ri->name_list_idx] = (void*)SvREFCNT_inc(RExC_paren_name_list);
     } else
-        ri->name_list_idx = 0;
 #endif
+        ri->name_list_idx = 0;
 
     if (RExC_recurse_count) {
         for ( ; RExC_recurse_count ; RExC_recurse_count-- ) {
@@ -4676,12 +4677,18 @@ Perl_reg_numbered_buff_get(pTHX_ I32 paren, const REGEXP * const rx, SV* usesv,
     SV *sv = usesv ? usesv : newSVpvs("");
     PERL_UNUSED_ARG(flags);
         
-    if (paren == -2 && (s = rx->subbeg) && rx->startp[0] != -1) {
+    if (!rx->subbeg) {
+        sv_setsv(sv,&PL_sv_undef);
+        return sv;
+    } 
+    else               
+    if (paren == -2 && rx->startp[0] != -1) {
         /* $` */
 	i = rx->startp[0];
+	s = rx->subbeg;
     }
     else 
-    if (paren == -1 && rx->subbeg && rx->endp[0] != -1) {
+    if (paren == -1 && rx->endp[0] != -1) {
         /* $' */
 	s = rx->subbeg + rx->endp[0];
 	i = rx->sublen - rx->endp[0];
@@ -4694,47 +4701,43 @@ Perl_reg_numbered_buff_get(pTHX_ I32 paren, const REGEXP * const rx, SV* usesv,
         /* $& $1 ... */
         i = t1 - s1;
         s = rx->subbeg + s1;
-    }
-      
-    if (s) {        
-        assert(rx->subbeg);
-        assert(rx->sublen >= (s - rx->subbeg) + i );
-            
-        if (i >= 0) {
-            const int oldtainted = PL_tainted;
-            TAINT_NOT;
-            sv_setpvn(sv, s, i);
-            PL_tainted = oldtainted;
-            if ( (rx->extflags & RXf_CANY_SEEN)
-                ? (RX_MATCH_UTF8(rx)
-                            && (!i || is_utf8_string((U8*)s, i)))
-                : (RX_MATCH_UTF8(rx)) )
-            {
-                SvUTF8_on(sv);
-            }
-            else
-                SvUTF8_off(sv);
-            if (PL_tainting) {
-                if (RX_MATCH_TAINTED(rx)) {
-                    if (SvTYPE(sv) >= SVt_PVMG) {
-                        MAGIC* const mg = SvMAGIC(sv);
-                        MAGIC* mgt;
-                        PL_tainted = 1;
-                        SvMAGIC_set(sv, mg->mg_moremagic);
-                        SvTAINT(sv);
-                        if ((mgt = SvMAGIC(sv))) {
-                            mg->mg_moremagic = mgt;
-                            SvMAGIC_set(sv, mg);
-                        }
-                    } else {
-                        PL_tainted = 1;
-                        SvTAINT(sv);
+    } else {
+        sv_setsv(sv,&PL_sv_undef);
+        return sv;
+    }          
+    assert(rx->sublen >= (s - rx->subbeg) + i );
+    if (i >= 0) {
+        const int oldtainted = PL_tainted;
+        TAINT_NOT;
+        sv_setpvn(sv, s, i);
+        PL_tainted = oldtainted;
+        if ( (rx->extflags & RXf_CANY_SEEN)
+            ? (RX_MATCH_UTF8(rx)
+                        && (!i || is_utf8_string((U8*)s, i)))
+            : (RX_MATCH_UTF8(rx)) )
+        {
+            SvUTF8_on(sv);
+        }
+        else
+            SvUTF8_off(sv);
+        if (PL_tainting) {
+            if (RX_MATCH_TAINTED(rx)) {
+                if (SvTYPE(sv) >= SVt_PVMG) {
+                    MAGIC* const mg = SvMAGIC(sv);
+                    MAGIC* mgt;
+                    PL_tainted = 1;
+                    SvMAGIC_set(sv, mg->mg_moremagic);
+                    SvTAINT(sv);
+                    if ((mgt = SvMAGIC(sv))) {
+                        mg->mg_moremagic = mgt;
+                        SvMAGIC_set(sv, mg);
                     }
-                } else 
-                    SvTAINTED_off(sv);
-            }
-        } else {
-            sv_setsv(sv,&PL_sv_undef);
+                } else {
+                    PL_tainted = 1;
+                    SvTAINT(sv);
+                }
+            } else 
+                SvTAINTED_off(sv);
         }
     } else {
         sv_setsv(sv,&PL_sv_undef);
@@ -5006,8 +5009,6 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
 	    return ret;
         } else 
 	if (*RExC_parse == '?') { /* (?...) */
-	    U32 posflags = 0, negflags = 0;
-	    U32 *flagsp = &posflags;
 	    bool is_logical = 0;
 	    const char * const seqstart = RExC_parse;
 
@@ -5431,13 +5432,20 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                 vFAIL("Sequence (? incomplete");
                 break;
 	    default:
-		--RExC_parse;
-	      parse_flags:      /* (?i) */
-		while (*RExC_parse && strchr("iogcmsx", *RExC_parse)) {
+	        --RExC_parse;
+	        parse_flags:      /* (?i) */  
+	    {
+                U32 posflags = 0, negflags = 0;
+	        U32 *flagsp = &posflags;
+
+		while (*RExC_parse) {
+		    /* && strchr("iogcmsx", *RExC_parse) */
 		    /* (?g), (?gc) and (?o) are useless here
 		       and must be globally applied -- japhy */
-
-		    if (*RExC_parse == 'o' || *RExC_parse == 'g') {
+                    switch (*RExC_parse) {
+	            CASE_STD_PMMOD_FLAGS_PARSE_SET(flagsp);
+                    case 'o':
+                    case 'g':
 			if (SIZE_ONLY && ckWARN(WARN_REGEXP)) {
 			    const I32 wflagbit = *RExC_parse == 'o' ? WASTED_O : WASTED_G;
 			    if (! (wastedflags & wflagbit) ) {
@@ -5452,8 +5460,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
 				);
 			    }
 			}
-		    }
-		    else if (*RExC_parse == 'c') {
+			break;
+		        
+		    case 'c':
 			if (SIZE_ONLY && ckWARN(WARN_REGEXP)) {
 			    if (! (wastedflags & WASTED_C) ) {
 				wastedflags |= WASTED_GC;
@@ -5465,33 +5474,45 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
 				);
 			    }
 			}
-		    }
-		    else { pmflag(flagsp, *RExC_parse); }
-
-		    ++RExC_parse;
-		}
-		if (*RExC_parse == '-') {
-		    flagsp = &negflags;
-		    wastedflags = 0;  /* reset so (?g-c) warns twice */
+			break;
+	            case 'k':
+                        if (flagsp == &negflags) {
+                            if (SIZE_ONLY && ckWARN(WARN_REGEXP))
+                                vWARN(RExC_parse + 1,"Useless use of (?-k)");
+                        } else {
+                            *flagsp |= RXf_PMf_KEEPCOPY;
+                        }
+	                break;
+                    case '-':
+                        if (flagsp == &negflags)
+                            goto unknown;
+			flagsp = &negflags;
+		        wastedflags = 0;  /* reset so (?g-c) warns twice */
+		        break;
+                    case ':':
+		        paren = ':';
+		        /*FALLTHROUGH*/
+                    case ')':
+                        RExC_flags |= posflags;
+                        RExC_flags &= ~negflags;
+                        nextchar(pRExC_state);
+		        if (paren != ':') {
+		            *flagp = TRYAGAIN;
+		            return NULL;
+		        } else {
+                            ret = NULL;
+		            goto parse_rest;
+		        }
+		        /*NOTREACHED*/
+                    default:
+                    unknown:
+		        RExC_parse++;
+		        vFAIL3("Sequence (%.*s...) not recognized", RExC_parse-seqstart, seqstart);
+		        /*NOTREACHED*/
+                    }                           
 		    ++RExC_parse;
-		    goto parse_flags;
 		}
-		RExC_flags |= posflags;
-		RExC_flags &= ~negflags;
-		if (*RExC_parse == ':') {
-		    RExC_parse++;
-		    paren = ':';
-		    break;
-		}		
-	      unknown:
-		if (*RExC_parse != ')') {
-		    RExC_parse++;
-		    vFAIL3("Sequence (%.*s...) not recognized", RExC_parse-seqstart, seqstart);
-		}
-		nextchar(pRExC_state);
-		*flagp = TRYAGAIN;
-		return NULL;
-	    }
+	    }} /* one for the default block, one for the switch */
 	}
 	else {                  /* (...) */
 	  capturing_parens:
@@ -5516,7 +5537,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
     }
     else                        /* ! paren */
 	ret = NULL;
-
+   
+   parse_rest:
     /* Pick up the branches, linking them together. */
     parse_start = RExC_parse;   /* MJD */
     br = regbranch(pRExC_state, &flags, 1,depth+1);
@@ -8803,6 +8825,8 @@ Perl_regdupe_internal(pTHX_ const regexp *r, CLONE_PARAMS *param)
     else
 	reti->data = NULL;
 
+    reti->name_list_idx = ri->name_list_idx;
+
     Newx(reti->offsets, 2*len+1, U32);
     Copy(ri->offsets, reti->offsets, 2*len+1, U32);
     
@@ -8846,13 +8870,16 @@ Perl_reg_stringify(pTHX_ MAGIC *mg, STRLEN *lp, U32 *flags, I32 *haseval ) {
 
     if (!mg->mg_ptr) {
 	const char *fptr = "msix";
-	char reflags[6];
+	char reflags[7];
 	char ch;
-	int left = 0;
-	int right = 4;
-	bool need_newline = 0;
-	U16 reganch = (U16)((re->extflags & RXf_PMf_COMPILETIME) >> 12);
-
+	bool hask = ((re->extflags & RXf_PMf_KEEPCOPY) == RXf_PMf_KEEPCOPY);
+	bool hasm = ((re->extflags & RXf_PMf_STD_PMMOD) != RXf_PMf_STD_PMMOD);
+        U16 reganch = (U16)((re->extflags & RXf_PMf_STD_PMMOD) >> 12);
+        bool need_newline = 0;
+        int left = 0;
+	int right = 4 + hask;
+        if (hask) 
+            reflags[left++]='k';
 	while((ch = *fptr++)) {
 	    if(reganch & 1) {
 		reflags[left++] = ch;
@@ -8862,11 +8889,11 @@ Perl_reg_stringify(pTHX_ MAGIC *mg, STRLEN *lp, U32 *flags, I32 *haseval ) {
 	    }
 	    reganch >>= 1;
 	}
-	if(left != 4) {
+	if(hasm) {
 	    reflags[left] = '-';
-	    left = 5;
+	    left = 5 + hask;
 	}
-
+        /* printf("[%*.7s]\n",left,reflags); */
 	mg->mg_len = re->prelen + 4 + left;
 	/*
 	 * If /x was used, we have to worry about a regex ending with a
diff --git a/regcomp.h b/regcomp.h
index ad7400f..d4a5001 100644
--- a/regcomp.h
+++ b/regcomp.h
@@ -103,10 +103,7 @@ typedef struct regexp_paren_ofs {
 } regexp_paren_ofs;
 
  typedef struct regexp_internal {
-#ifdef DEBUGGING
         int name_list_idx;	/* Optional data index of an array of paren names */
-#endif
-
 	U32 *offsets;           /* offset annotations 20001228 MJD 
                                    data about mapping the program to the 
                                    string*/
diff --git a/regexp.h b/regexp.h
index c28c78e..4045fbd 100644
--- a/regexp.h
+++ b/regexp.h
@@ -144,11 +144,18 @@ typedef struct regexp_engine {
 #define RXf_PMf_SINGLELINE	0x00002000 /* /s         */
 #define RXf_PMf_FOLD    	0x00004000 /* /i         */
 #define RXf_PMf_EXTENDED	0x00008000 /* /x         */
+#define RXf_PMf_KEEPCOPY	0x00010000 /* /k         */
 /* these flags are transfered from the PMOP->op_pmflags member during compilation */
-#define RXf_PMf_COMPILETIME	(RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_LOCALE|RXf_PMf_FOLD|RXf_PMf_EXTENDED)
+#define RXf_PMf_STD_PMMOD	(RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_FOLD|RXf_PMf_EXTENDED)
+#define RXf_PMf_COMPILETIME	(RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_LOCALE|RXf_PMf_FOLD|RXf_PMf_EXTENDED|RXf_PMf_KEEPCOPY)
+
+#define CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl)              \
+        case 'i': *(pmfl) |= RXf_PMf_FOLD;       break;   \
+        case 'm': *(pmfl) |= RXf_PMf_MULTILINE;  break;   \
+        case 's': *(pmfl) |= RXf_PMf_SINGLELINE; break;   \
+        case 'x': *(pmfl) |= RXf_PMf_EXTENDED;   break
 
 /* What we have seen */
-/* one bit here */
 #define RXf_LOOKBEHIND_SEEN	0x00020000
 #define RXf_EVAL_SEEN   	0x00040000
 #define RXf_CANY_SEEN   	0x00080000
@@ -448,6 +455,7 @@ struct re_save_state {
 
 #define SAVESTACK_ALLOC_FOR_RE_SAVE_STATE \
 	(1 + ((sizeof(struct re_save_state) - 1) / sizeof(*PL_savestack)))
+
 /*
  * Local variables:
  * c-indentation-style: bsd
diff --git a/t/op/regexp.t b/t/op/regexp.t
index cce19fc..a7cd5fc 100755
--- a/t/op/regexp.t
+++ b/t/op/regexp.t
@@ -125,7 +125,15 @@ EOFCODE
 	}
 	else {
 	    if (!$match || $got ne $expect) {
- 		print "not ok $. ($study) $input => `$got', match=$match\n$code\n";
+	        eval { require Data::Dumper };
+		if ($@) {
+		    print "not ok $. ($study) $input => `$got', match=$match\n$code\n";
+		}
+		else { # better diagnostics
+		    my $s = Data::Dumper->new([$subject],['subject'])->Useqq(1)->Dump;
+		    my $g = Data::Dumper->new([$got],['got'])->Useqq(1)->Dump;
+		    print "not ok $. ($study) $input => `$got', match=$match\n$s\n$g\n$code\n";
+		}
 		next TEST;
 	    }
 	}
diff --git a/t/op/regexp_kmod.t b/t/op/regexp_kmod.t
new file mode 100644
index 0000000..84efd83
--- /dev/null
+++ b/t/op/regexp_kmod.t
@@ -0,0 +1,39 @@
+#!./perl
+
+BEGIN {
+    chdir 't' if -d 't';
+    @INC = '../lib';
+    require './test.pl';
+}
+
+use strict;
+use warnings;
+
+our @tests = (
+    # /k     Pattern   PRE     MATCH   POST
+    [ 'k',   "456",    "123-", "456",  "-789"],
+    [ '',    "(456)",  "123-", "456",  "-789"],
+    [ '',    "456",    undef,  undef,  undef ],
+);
+
+plan tests => 4 * @tests + 2;
+my $W = "";
+
+$SIG{__WARN__} = sub { $W.=join("",@_); };
+sub _u($$) { "$_[0] is ".(defined $_[1] ? "'$_[1]'" : "undef") }
+
+$_ = '123-456-789';
+foreach my $test (@tests) {
+    my ($k, $pat,$l,$m,$r) = @$test;
+    my $test_name = "/$pat/$k";
+    my $ok = ok($k ? /$pat/k : /$pat/, $test_name);
+    SKIP: {
+        skip "/$pat/$k failed to match", 3
+            unless $ok;
+        is(${^PREMATCH},  $l,_u "$test_name: ^PREMATCH",$l);
+        is(${^MATCH},     $m,_u "$test_name: ^MATCH",$m );
+        is(${^POSTMATCH}, $r,_u "$test_name: ^POSTMATCH",$r );
+    }
+}
+is($W,"","No warnings should be produced");
+ok(!defined ${^MATCH}, "No /k in scope so ^MATCH is undef");
diff --git a/toke.c b/toke.c
index f9f0627..dcbf3d6 100644
--- a/toke.c
+++ b/toke.c
@@ -10763,20 +10763,16 @@ void
 Perl_pmflag(pTHX_ U32* pmfl, int ch)
 {
     PERL_UNUSED_CONTEXT;
-    if (ch == 'i')
-	*pmfl |= PMf_FOLD;
-    else if (ch == 'g')
-	*pmfl |= PMf_GLOBAL;
-    else if (ch == 'c')
-	*pmfl |= PMf_CONTINUE;
-    else if (ch == 'o')
-	*pmfl |= PMf_KEEP;
-    else if (ch == 'm')
-	*pmfl |= PMf_MULTILINE;
-    else if (ch == 's')
-	*pmfl |= PMf_SINGLELINE;
-    else if (ch == 'x')
-	*pmfl |= PMf_EXTENDED;
+    if (ch<256) {
+        char c = (char)ch;
+        switch (c) {
+            CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
+            case 'g': *pmfl |= PMf_GLOBAL; break;
+            case 'c': *pmfl |= PMf_CONTINUE; break;
+            case 'o': *pmfl |= PMf_KEEP; break;
+            case 'k': *pmfl |= PMf_KEEPCOPY; break;
+        }
+    }
 }
 
 STATIC char *
@@ -10786,7 +10782,7 @@ S_scan_pat(pTHX_ char *start, I32 type)
     PMOP *pm;
     char *s = scan_str(start,!!PL_madskills,FALSE);
     const char * const valid_flags =
-	(const char *)((type == OP_QR) ? "iomsx" : "iogcmsx");
+	(const char *)((type == OP_QR) ? "iomsxk" : "iogcmsxk");
 #ifdef PERL_MAD
     char *modstart;
 #endif
@@ -10887,7 +10883,7 @@ S_scan_subst(pTHX_ char *start)
 	    s++;
 	    es++;
 	}
-	else if (strchr("iogcmsx", *s))
+	else if (strchr("iogcmsxk", *s))
 	    pmflag(&pm->op_pmflags,*s++);
 	else
 	    break;