Add a new function newSVpvn_flags(), which takes a third parameter of

diff --git a/embed.fnc b/embed.fnc

index c041296..d36e2fd 100644 (file)
--- a/embed.fnc
+++ b/embed.fnc
@@ -590,6 +590,7 @@ Apda        |SV*    |newSVuv        |UV u
 Apda   |SV*    |newSVnv        |NV n
 Apda   |SV*    |newSVpv        |NULLOK const char* s|STRLEN len
 Apda   |SV*    |newSVpvn       |NULLOK const char* s|STRLEN len
+Apda   |SV*    |newSVpvn_flags |NULLOK const char* s|STRLEN len|U32 flags
 Apda   |SV*    |newSVhek       |NULLOK const HEK *hek
 Apda   |SV*    |newSVpvn_share |NULLOK const char* s|I32 len|U32 hash
 Afpda  |SV*    |newSVpvf       |NN const char* pat|...
diff --git a/embed.h b/embed.h

index ba24871..d99785c 100644 (file)
--- a/embed.h
+++ b/embed.h
@@ -572,7 +572,10 @@
 #define newSVnv                        Perl_newSVnv
 #define newSVpv                        Perl_newSVpv
 #define newSVpvn               Perl_newSVpvn
+#define newSVpvn_flags         Perl_newSVpvn_flags
+#ifdef PERL_CORE
 #define newSVhek               Perl_newSVhek
+#endif
 #define newSVpvn_share         Perl_newSVpvn_share
 #define newSVpvf               Perl_newSVpvf
 #define vnewSVpvf              Perl_vnewSVpvf
@@ -2865,7 +2868,10 @@
 #define newSVnv(a)             Perl_newSVnv(aTHX_ a)
 #define newSVpv(a,b)           Perl_newSVpv(aTHX_ a,b)
 #define newSVpvn(a,b)          Perl_newSVpvn(aTHX_ a,b)
+#define newSVpvn_flags(a,b,c)  Perl_newSVpvn_flags(aTHX_ a,b,c)
+#ifdef PERL_CORE
 #define newSVhek(a)            Perl_newSVhek(aTHX_ a)
+#endif
 #define newSVpvn_share(a,b,c)  Perl_newSVpvn_share(aTHX_ a,b,c)
 #define vnewSVpvf(a,b)         Perl_vnewSVpvf(aTHX_ a,b)
 #define newSVrv(a,b)           Perl_newSVrv(aTHX_ a,b)
diff --git a/hv.c b/hv.c

index 9523fb0..63e1049 100644 (file)
--- a/hv.c
+++ b/hv.c
@@ -350,9 +350,8 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen,
                SV* obj = mg->mg_obj;
 
                if (!keysv) {
-                   keysv = sv_2mortal(newSVpvn(key, klen));
-                   if (flags & HVhek_UTF8)
-                       SvUTF8_on(keysv);
+                   keysv = sv_2mortal(newSVpvn_utf8(key, klen,
+                                                    flags & HVhek_UTF8));
                }
                
                mg->mg_obj = keysv;         /* pass key */
@@ -391,11 +390,8 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen,
                /* FIXME should be able to skimp on the HE/HEK here when
                   HV_FETCH_JUST_SV is true.  */
                if (!keysv) {
-                   keysv = newSVpvn(key, klen);
-                   if (is_utf8) {
-                       SvUTF8_on(keysv);
-                   }
-               } else {
+                   keysv = newSVpvn_utf8(key, klen, is_utf8);
+               } else {
                    keysv = newSVsv(keysv);
                }
                 sv = sv_newmortal();
@@ -472,8 +468,7 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen,
 
                if (keysv || is_utf8) {
                    if (!keysv) {
-                       keysv = newSVpvn(key, klen);
-                       SvUTF8_on(keysv);
+                       keysv = newSVpvn_utf8(key, klen, TRUE);
                    } else {
                        keysv = newSVsv(keysv);
                    }
@@ -515,8 +510,7 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen,
                const bool save_taint = PL_tainted;
                if (keysv || is_utf8) {
                    if (!keysv) {
-                       keysv = newSVpvn(key, klen);
-                       SvUTF8_on(keysv);
+                       keysv = newSVpvn_utf8(key, klen, TRUE);
                    }
                    if (PL_tainting)
                        PL_tainted = SvTAINTED(keysv);
diff --git a/perl.c b/perl.c

index e0bc0e7..82412d2 100644 (file)
--- a/perl.c
+++ b/perl.c
@@ -2481,8 +2481,7 @@ Perl_get_cvn_flags(pTHX_ const char *name, STRLEN len, I32 flags)
      * It has the same effect as "sub name;", i.e. just a forward
      * declaration! */
     if ((flags & ~GV_NOADD_MASK) && !GvCVu(gv)) {
-       SV *const sv = newSVpvn(name,len);
-       SvFLAGS(sv) |= flags & SVf_UTF8;
+       SV *const sv = newSVpvn_flags(name, len, flags & SVf_UTF8);
        return newSUB(start_subparse(FALSE, 0),
                      newSVOP(OP_CONST, 0, sv),
                      NULL, NULL);
diff --git a/pod/perlapi.pod b/pod/perlapi.pod

index df9572a..b7cb7b3 100644 (file)
--- a/pod/perlapi.pod
+++ b/pod/perlapi.pod
@@ -3918,6 +3918,17 @@ incremented.
 =for hackers
 Found in file sv.h
 
+=item newSVpvn_utf8
+X<newSVpvn_utf8>
+
+Creates a new SV and copies a string into it.  If utf8 is true, calls
+C<SvUTF8_on> on the new SV.  Implemented as a wrapper around C<newSVpvn_flags>.
+
+       SV*     newSVpvn_utf8(NULLOK const char* s, STRLEN len, U32 utf8)
+
+=for hackers
+Found in file sv.h
+
 =item SvCUR
 X<SvCUR>
 
@@ -5137,6 +5148,25 @@ C<len> bytes long.  If the C<s> argument is NULL the new SV will be undefined.
 =for hackers
 Found in file sv.c
 
+=item newSVpvn_flags
+X<newSVpvn_flags>
+
+Creates a new SV and copies a string into it.  The reference count for the
+SV is set to 1.  Note that if C<len> is zero, Perl will create a zero length
+string.  You are responsible for ensuring that the source string is at least
+C<len> bytes long.  If the C<s> argument is NULL the new SV will be undefined.
+Currently the only flag bit accepted is SVf_UTF8. If this is set, then it
+will be set on the new SV. C<newSVpvn_utf8()> is a convenience wrapper for
+this function, defined as
+
+    #define newSVpvn_utf8(s, len, u)                   \
+       newSVpvn_flags((s), (len), (u) ? SVf_UTF8 : 0)
+
+       SV*     newSVpvn_flags(const char* s, STRLEN len, U32 flags)
+
+=for hackers
+Found in file sv.c
+
 =item newSVpvn_share
 X<newSVpvn_share>
 
diff --git a/pp.c b/pp.c

index 78e0e36..3ac702d 100644 (file)
--- a/pp.c
+++ b/pp.c
@@ -3312,9 +3312,7 @@ PP(pp_index)
           Otherwise I need to avoid calls to sv_pos_u2b(), which (dangerously)
           will trigger magic and overloading again, as will fbm_instr()
        */
-       big = sv_2mortal(newSVpvn(big_p, biglen));
-       if (big_utf8)
-           SvUTF8_on(big);
+       big = sv_2mortal(newSVpvn_utf8(big_p, biglen, big_utf8));
        big_p = SvPVX(big);
     }
     if (SvGAMAGIC(little) || (is_index && !SvOK(little))) {
@@ -3326,9 +3324,7 @@ PP(pp_index)
           This is all getting to messy. The API isn't quite clean enough,
           because data access has side effects.
        */
-       little = sv_2mortal(newSVpvn(little_p, llen));
-       if (little_utf8)
-           SvUTF8_on(little);
+       little = sv_2mortal(newSVpvn_utf8(little_p, llen, little_utf8));
        little_p = SvPVX(little);
     }
 
@@ -4755,11 +4751,9 @@ PP(pp_split)
            if (m >= strend)
                break;
 
-           dstr = newSVpvn(s, m-s);
+           dstr = newSVpvn_utf8(s, m-s, do_utf8);
            if (make_mortal)
                sv_2mortal(dstr);
-           if (do_utf8)
-               (void)SvUTF8_on(dstr);
            XPUSHs(dstr);
 
            /* skip the whitespace found last */
@@ -4788,11 +4782,9 @@ PP(pp_split)
            m++;
            if (m >= strend)
                break;
-           dstr = newSVpvn(s, m-s);
+           dstr = newSVpvn_utf8(s, m-s, do_utf8);
            if (make_mortal)
                sv_2mortal(dstr);
-           if (do_utf8)
-               (void)SvUTF8_on(dstr);
            XPUSHs(dstr);
            s = m;
        }
@@ -4817,12 +4809,11 @@ PP(pp_split)
                 /* keep track of how many bytes we skip over */
                 m = s;
                 s += UTF8SKIP(s);
-                dstr = newSVpvn(m, s-m);
+                dstr = newSVpvn_utf8(m, s-m, TRUE);
 
                 if (make_mortal)
                     sv_2mortal(dstr);
 
-                (void)SvUTF8_on(dstr);
                 PUSHs(dstr);
 
                 if (s >= strend)
@@ -4859,11 +4850,9 @@ PP(pp_split)
                    ;
                if (m >= strend)
                    break;
-               dstr = newSVpvn(s, m-s);
+               dstr = newSVpvn_utf8(s, m-s, do_utf8);
                if (make_mortal)
                    sv_2mortal(dstr);
-               if (do_utf8)
-                   (void)SvUTF8_on(dstr);
                XPUSHs(dstr);
                /* The rx->minlen is in characters but we want to step
                 * s ahead by bytes. */
@@ -4878,11 +4867,9 @@ PP(pp_split)
              (m = fbm_instr((unsigned char*)s, (unsigned char*)strend,
                             csv, multiline ? FBMrf_MULTILINE : 0)) )
            {
-               dstr = newSVpvn(s, m-s);
+               dstr = newSVpvn_utf8(s, m-s, do_utf8);
                if (make_mortal)
                    sv_2mortal(dstr);
-               if (do_utf8)
-                   (void)SvUTF8_on(dstr);
                XPUSHs(dstr);
                /* The rx->minlen is in characters but we want to step
                 * s ahead by bytes. */
@@ -4913,11 +4900,9 @@ PP(pp_split)
                strend = s + (strend - m);
            }
            m = RX_OFFS(rx)[0].start + orig;
-           dstr = newSVpvn(s, m-s);
+           dstr = newSVpvn_utf8(s, m-s, do_utf8);
            if (make_mortal)
                sv_2mortal(dstr);
-           if (do_utf8)
-               (void)SvUTF8_on(dstr);
            XPUSHs(dstr);
            if (RX_NPARENS(rx)) {
                I32 i;
@@ -4929,14 +4914,12 @@ PP(pp_split)
                       parens that didn't match -- they should be set to
                       undef, not the empty string */
                    if (m >= orig && s >= orig) {
-                       dstr = newSVpvn(s, m-s);
+                       dstr = newSVpvn_utf8(s, m-s, do_utf8);
                    }
                    else
                        dstr = &PL_sv_undef;  /* undef, not "" */
                    if (make_mortal)
                        sv_2mortal(dstr);
-                   if (do_utf8)
-                       (void)SvUTF8_on(dstr);
                    XPUSHs(dstr);
                }
            }
@@ -4951,11 +4934,9 @@ PP(pp_split)
     /* keep field after final delim? */
     if (s < strend || (iters && origlimit)) {
         const STRLEN l = strend - s;
-       dstr = newSVpvn(s, l);
+       dstr = newSVpvn_utf8(s, l, do_utf8);
        if (make_mortal)
            sv_2mortal(dstr);
-       if (do_utf8)
-           (void)SvUTF8_on(dstr);
        XPUSHs(dstr);
        iters++;
     }
diff --git a/pp_hot.c b/pp_hot.c

index 9099c88..bf8f2fb 100644 (file)
--- a/pp_hot.c
+++ b/pp_hot.c
@@ -2278,10 +2278,8 @@ PP(pp_subst)
       have_a_cow:
 #endif
        rxtainted |= RX_MATCH_TAINTED(rx);
-       dstr = newSVpvn(m, s-m);
+       dstr = newSVpvn_utf8(m, s-m, DO_UTF8(TARG));
        SAVEFREESV(dstr);
-       if (DO_UTF8(TARG))
-           SvUTF8_on(dstr);
        PL_curpm = pm;
        if (!c) {
            register PERL_CONTEXT *cx;
diff --git a/pp_pack.c b/pp_pack.c

index db8a94e..0d456bd 100644 (file)
--- a/pp_pack.c
+++ b/pp_pack.c
@@ -2510,9 +2510,9 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
                               by copying it to a temporary.  */
                            STRLEN len;
                            const char *const pv = SvPV_const(*beglist, len);
-                           SV *const temp = sv_2mortal(newSVpvn(pv, len));
-                           if (SvUTF8(*beglist))
-                               SvUTF8_on(temp);
+                           SV *const temp
+                               = sv_2mortal(newSVpvn_flags(pv, len,
+                                                           SvUTF8(*beglist)));
                            *beglist = temp;
                        }
                        count = DO_UTF8(*beglist) ?
diff --git a/proto.h b/proto.h

index 668aea1..5bbb593 100644 (file)
--- a/proto.h
+++ b/proto.h
@@ -1607,6 +1607,10 @@ PERL_CALLCONV SV*        Perl_newSVpvn(pTHX_ const char* s, STRLEN len)
                        __attribute__malloc__
                        __attribute__warn_unused_result__;
 
+PERL_CALLCONV SV*      Perl_newSVpvn_flags(pTHX_ const char* s, STRLEN len, U32 flags)
+                       __attribute__malloc__
+                       __attribute__warn_unused_result__;
+
 PERL_CALLCONV SV*      Perl_newSVhek(pTHX_ const HEK *hek)
                        __attribute__malloc__
                        __attribute__warn_unused_result__;
diff --git a/regcomp.c b/regcomp.c

index 8bd1894..e384ff5 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -1232,10 +1232,9 @@ is the recommended Unicode-aware way of saying
         /* store the word for dumping */                        \
         SV* tmp;                                                \
         if (OP(noper) != NOTHING)                               \
-            tmp = newSVpvn(STRING(noper), STR_LEN(noper));      \
+            tmp = newSVpvn_utf8(STRING(noper), STR_LEN(noper), UTF);   \
         else                                                    \
-            tmp = newSVpvn( "", 0 );                            \
-        if ( UTF ) SvUTF8_on( tmp );                            \
+            tmp = newSVpvn_utf8( "", 0, UTF );                 \
         av_push( trie_words, tmp );                             \
     });                                                         \
                                                                 \
@@ -3320,9 +3319,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                        
                        l -= old;
                        /* Get the added string: */
-                       last_str = newSVpvn(s  + old, l);
-                       if (UTF)
-                           SvUTF8_on(last_str);
+                       last_str = newSVpvn_utf8(s  + old, l, UTF);
                        if (deltanext == 0 && pos_before == b) {
                            /* What was added is a constant string */
                            if (mincount > 1) {
@@ -5256,10 +5253,8 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags) {
     }
 
     if ( flags ) {
-        SV* sv_name = sv_2mortal(Perl_newSVpvn(aTHX_ name_start,
-            (int)(RExC_parse - name_start)));
-       if (UTF)
-            SvUTF8_on(sv_name);
+        SV* sv_name = sv_2mortal(newSVpvn_utf8(name_start,
+            (int)(RExC_parse - name_start), UTF));
         if ( flags == REG_RSN_RETURN_NAME)
             return sv_name;
         else if (flags==REG_RSN_RETURN_DATA) {
@@ -8193,8 +8188,8 @@ parseit:
 
                                  if (!unicode_alternate)
                                      unicode_alternate = newAV();
-                                 sv = newSVpvn((char*)foldbuf, foldlen);
-                                 SvUTF8_on(sv);
+                                 sv = newSVpvn_utf8((char*)foldbuf, foldlen,
+                                                    TRUE);
                                  av_push(unicode_alternate, sv);
                              }
                         }
diff --git a/sv.c b/sv.c

index 551d458..2d73c7f 100644 (file)
--- a/sv.c
+++ b/sv.c
@@ -6042,8 +6042,7 @@ Perl_sv_eq(pTHX_ register SV *sv1, register SV *sv2)
         * invalidate pv1, so we may need to make a copy */
        if (sv1 == sv2 && (SvTHINKFIRST(sv1) || SvGMAGICAL(sv1))) {
            pv1 = SvPV_const(sv1, cur1);
-           sv1 = sv_2mortal(newSVpvn(pv1, cur1));
-           if (SvUTF8(sv2)) SvUTF8_on(sv1);
+           sv1 = sv_2mortal(newSVpvn_flags(pv1, cur1, SvUTF8(sv2)));
        }
        pv1 = SvPV_const(sv1, cur1);
     }
@@ -7068,6 +7067,37 @@ Perl_newSVpvn(pTHX_ const char *s, STRLEN len)
     return sv;
 }
 
+/*
+=for apidoc newSVpvn_flags
+
+Creates a new SV and copies a string into it.  The reference count for the
+SV is set to 1.  Note that if C<len> is zero, Perl will create a zero length
+string.  You are responsible for ensuring that the source string is at least
+C<len> bytes long.  If the C<s> argument is NULL the new SV will be undefined.
+Currently the only flag bit accepted is SVf_UTF8. If this is set, then it
+will be set on the new SV. C<newSVpvn_utf8()> is a convenience wrapper for
+this function, defined as
+
+    #define newSVpvn_utf8(s, len, u)                   \
+       newSVpvn_flags((s), (len), (u) ? SVf_UTF8 : 0)
+
+=cut
+*/
+
+SV *
+Perl_newSVpvn_flags(pTHX_ const char *s, STRLEN len, U32 flags)
+{
+    dVAR;
+    register SV *sv;
+
+    /* All the flags we don't support must be zero.
+       And we're new code so I'm going to assert this from the start.  */
+    assert(!(flags & ~SVf_UTF8));
+    new_SV(sv);
+    sv_setpvn(sv,s,len);
+    SvFLAGS(sv) |= flags;
+    return sv;
+}
 
 /*
 =for apidoc newSVhek
diff --git a/sv.h b/sv.h

index df42dcf..9d0851c 100644 (file)
--- a/sv.h
+++ b/sv.h
@@ -1911,6 +1911,17 @@ struct clone_params {
 };
 
 /*
+=for apidoc Am|SV*|newSVpvn_utf8|NULLOK const char* s|STRLEN len|U32 utf8
+
+Creates a new SV and copies a string into it.  If utf8 is true, calls
+C<SvUTF8_on> on the new SV.  Implemented as a wrapper around C<newSVpvn_flags>.
+
+=cut
+*/
+
+#define newSVpvn_utf8(s, len, u) newSVpvn_flags((s), (len), (u) ? SVf_UTF8 : 0)
+
+/*
  * Local variables:
  * c-indentation-style: bsd
  * c-basic-offset: 4
diff --git a/toke.c b/toke.c

index 04190c3..410e4d6 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -1347,9 +1347,9 @@ STATIC SV *
 S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
 {
     dVAR;
-    SV * const sv = newSVpvn(start,len);
-    if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
-       SvUTF8_on(sv);
+    SV * const sv = newSVpvn_utf8(start, len,
+                                 UTF && !IN_BYTES
+                                 && is_utf8_string((const U8*)start, len));
     return sv;
 }
 
@@ -1570,9 +1570,7 @@ S_tokeq(pTHX_ SV *sv)
        goto finish;
     d = s;
     if ( PL_hints & HINT_NEW_STRING ) {
-       pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
-       if (SvUTF8(sv))
-           SvUTF8_on(pv);
+       pv = sv_2mortal(newSVpvn_flags(SvPVX_const(pv), len, SvUTF8(sv)));
     }
     while (s < send) {
        if (*s == '\\') {
@@ -1639,9 +1637,7 @@ S_sublex_start(pTHX)
            /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
            STRLEN len;
            const char * const p = SvPV_const(sv, len);
-           SV * const nsv = newSVpvn(p, len);
-           if (SvUTF8(sv))
-               SvUTF8_on(nsv);
+           SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
            SvREFCNT_dec(sv);
            sv = nsv;
        }
@@ -6347,9 +6343,7 @@ Perl_yylex(pTHX)
                            for (; !isSPACE(*d) && len; --len, ++d)
                                /**/;
                        }
-                       sv = newSVpvn(b, d-b);
-                       if (DO_UTF8(PL_lex_stuff))
-                           SvUTF8_on(sv);
+                       sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
                        words = append_elem(OP_LIST, words,
                                            newSVOP(OP_CONST, 0, tokeq(sv)));
                    }
diff --git a/util.c b/util.c

index 1710e6f..6c7e338 100644 (file)
--- a/util.c
+++ b/util.c
@@ -1270,8 +1270,7 @@ S_vdie_common(pTHX_ const char *message, STRLEN msglen, I32 utf8, bool warn)
            *hook = NULL;
        }
        if (warn || message) {
-           msg = newSVpvn(message, msglen);
-           SvFLAGS(msg) |= utf8;
+           msg = newSVpvn_flags(message, msglen, utf8);
            SvREADONLY_on(msg);
            SAVEFREESV(msg);
        }
embed.fnc		patch \| blob \| blame \| history
embed.h		patch \| blob \| blame \| history
hv.c		patch \| blob \| blame \| history
perl.c		patch \| blob \| blame \| history
pod/perlapi.pod		patch \| blob \| blame \| history
pp.c		patch \| blob \| blame \| history
pp_hot.c		patch \| blob \| blame \| history
pp_pack.c		patch \| blob \| blame \| history
proto.h		patch \| blob \| blame \| history
regcomp.c		patch \| blob \| blame \| history
sv.c		patch \| blob \| blame \| history
sv.h		patch \| blob \| blame \| history
toke.c		patch \| blob \| blame \| history
util.c		patch \| blob \| blame \| history