[perl #18232] [PATCH] store PL_reg_match_utf8 in reganch
Rafael Garcia-Suarez [Wed, 22 Jan 2003 22:35:56 +0000 (23:35 +0100)]
Message-Id: <20030122223556.57d597a3.rgarciasuarez@free.fr>

p4raw-id: //depot/perl@18633

mg.c
pp.c
pp_ctl.c
pp_hot.c
regexec.c
regexp.h
t/op/pat.t

diff --git a/mg.c b/mg.c
index fae5cda..0edd711 100644 (file)
--- a/mg.c
+++ b/mg.c
@@ -418,7 +418,7 @@ Perl_magic_regdatum_get(pTHX_ SV *sv, MAGIC *mg)
                else                    /* @- */
                    i = s;
 
-               if (i > 0 && PL_reg_match_utf8) {
+               if (i > 0 && RX_MATCH_UTF8(rx)) {
                    char *b = rx->subbeg;
                    if (b)
                        i = Perl_utf8_length(aTHX_ (U8*)b, (U8*)(b+i));
@@ -459,7 +459,7 @@ Perl_magic_len(pTHX_ SV *sv, MAGIC *mg)
            {
                i = t1 - s1;
              getlen:
-               if (i > 0 && PL_reg_match_utf8) {
+               if (i > 0 && RX_MATCH_UTF8(rx)) {
                    char *s    = rx->subbeg + s1;
                    char *send = rx->subbeg + t1;
 
@@ -707,7 +707,7 @@ Perl_magic_get(pTHX_ SV *sv, MAGIC *mg)
              getrx:
                if (i >= 0) {
                    sv_setpvn(sv, s, i);
-                   if (PL_reg_match_utf8 && is_utf8_string((U8*)s, i))
+                   if (RX_MATCH_UTF8(rx) && is_utf8_string((U8*)s, i))
                        SvUTF8_on(sv);
                    else
                        SvUTF8_off(sv);
diff --git a/pp.c b/pp.c
index c9d1dc6..67fe7f6 100644 (file)
--- a/pp.c
+++ b/pp.c
@@ -4391,7 +4391,7 @@ PP(pp_split)
     TAINT_IF((pm->op_pmflags & PMf_LOCALE) &&
             (pm->op_pmflags & (PMf_WHITE | PMf_SKIPWHITE)));
 
-    PL_reg_match_utf8 = do_utf8;
+    RX_MATCH_UTF8_set(rx, do_utf8);
 
     if (pm->op_pmreplroot) {
 #ifdef USE_ITHREADS
index 7c0f8ba..a08e2b6 100644 (file)
--- a/pp_ctl.c
+++ b/pp_ctl.c
@@ -158,7 +158,7 @@ PP(pp_substcont)
     register REGEXP *rx = cx->sb_rx;
 
     rxres_restore(&cx->sb_rxres, rx);
-    PL_reg_match_utf8 = SvUTF8(cx->sb_targ) ? 1 : 0;
+    RX_MATCH_UTF8_set(rx, SvUTF8(cx->sb_targ));
 
     if (cx->sb_iters++) {
        I32 saviters = cx->sb_iters;
index 1a40441..8d9625b 100644 (file)
--- a/pp_hot.c
+++ b/pp_hot.c
@@ -1180,7 +1180,7 @@ PP(pp_match)
                 (PL_tainted && (pm->op_pmflags & PMf_RETAINT)));
     TAINT_NOT;
 
-    PL_reg_match_utf8 = DO_UTF8(TARG);
+    RX_MATCH_UTF8_set(rx, DO_UTF8(TARG));
 
     /* PMdf_USED is set after a ?? matches once */
     if (pm->op_pmdynflags & PMdf_USED) {
@@ -1355,7 +1355,7 @@ yup:                                      /* Confirmed by INTUIT */
     if (global) {
        rx->subbeg = truebase;
        rx->startp[0] = s - truebase;
-       if (PL_reg_match_utf8) {
+       if (RX_MATCH_UTF8(rx)) {
            char *t = (char*)utf8_hop((U8*)s, rx->minlen);
            rx->endp[0] = t - truebase;
        }
@@ -1907,14 +1907,14 @@ PP(pp_subst)
        rxtainted |= 2;
     TAINT_NOT;
 
-    PL_reg_match_utf8 = DO_UTF8(TARG);
+    RX_MATCH_UTF8_set(rx, DO_UTF8(TARG));
 
   force_it:
     if (!pm || !s)
        DIE(aTHX_ "panic: pp_subst");
 
     strend = s + len;
-    slen = PL_reg_match_utf8 ? utf8_length((U8*)s, (U8*)strend) : len;
+    slen = RX_MATCH_UTF8(rx) ? utf8_length((U8*)s, (U8*)strend) : len;
     maxiters = 2 * slen + 10;  /* We can match twice at each
                                   position, once with zero-length,
                                   second time with non-zero. */
index 157f426..59111a0 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -400,6 +400,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
     char *i_strpos = strpos;
     SV *dsv = PERL_DEBUG_PAD_ZERO(0);
 #endif
+    RX_MATCH_UTF8_set(prog,do_utf8);
 
     if (prog->reganch & ROPT_UTF8) {
        DEBUG_r(PerlIO_printf(Perl_debug_log,
@@ -1615,6 +1616,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
     SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
     SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
 #endif
+    RX_MATCH_UTF8_set(prog,do_utf8);
 
     PL_regcc = 0;
 
index 81552ba..0564054 100644 (file)
--- a/regexp.h
+++ b/regexp.h
@@ -71,6 +71,7 @@ typedef struct regexp {
 #define ROPT_NAUGHTY           0x20000 /* how exponential is this pattern? */
 #define ROPT_COPY_DONE         0x40000 /* subbeg is a copy of the string */
 #define ROPT_TAINTED_SEEN      0x80000
+#define ROPT_MATCH_UTF8                0x10000000 /* subbeg is utf-8 */
 
 #define RE_USE_INTUIT_NOML     0x0100000 /* Best to intuit before matching */
 #define RE_USE_INTUIT_ML       0x0200000
@@ -99,6 +100,13 @@ typedef struct regexp {
                                         ? RX_MATCH_COPIED_on(prog) \
                                         : RX_MATCH_COPIED_off(prog))
 
+#define RX_MATCH_UTF8(prog)            ((prog)->reganch & ROPT_MATCH_UTF8)
+#define RX_MATCH_UTF8_on(prog)         ((prog)->reganch |= ROPT_MATCH_UTF8)
+#define RX_MATCH_UTF8_off(prog)                ((prog)->reganch &= ~ROPT_MATCH_UTF8)
+#define RX_MATCH_UTF8_set(prog, t)     ((t) \
+                       ? (RX_MATCH_UTF8_on(prog), (PL_reg_match_utf8 = 1)) \
+                       : (RX_MATCH_UTF8_off(prog), (PL_reg_match_utf8 = 0)))
+    
 #define REXEC_COPY_STR 0x01            /* Need to copy the string. */
 #define REXEC_CHECKED  0x02            /* check_substr already checked. */
 #define REXEC_SCREAM   0x04            /* use scream table. */
index 0868546..360892b 100755 (executable)
@@ -6,7 +6,7 @@
 
 $| = 1;
 
-print "1..983\n";
+print "1..986\n";
 
 BEGIN {
     chdir 't' if -d 't';
@@ -3081,5 +3081,15 @@ ok("bbac"   =~ /$pattern/ && $1 eq 'a', "[perl #3547]");
 ok("bbbac"  =~ /$pattern/ && $1 eq 'a', "[perl #3547]");
 ok("bbbbac" =~ /$pattern/ && $1 eq 'a', "[perl #3547]");
 
-# last test 983
+{
+    # [perl #18232]
+    "\x{100}" =~ /(.)/;
+    ok( $1 eq "\x{100}", '$1 is utf-8 [perl #18232]' );
+    { 'a' =~ /./; }
+    ok( $1 eq "\x{100}", '$1 is still utf-8' );
+    ok( $1 ne "\xC4\x80", '$1 is not non-utf-8' );
+}
+
+# last test 984
+