I was mistaken. Borland and gcc do have snprintf() and vsnprintf()

[p5sagit/p5-mst-13.2.git] / pp.c
diff --git a/pp.c b/pp.c

index 86299ac..65e1d50 100644 (file)
--- a/pp.c
+++ b/pp.c
@@ -3183,8 +3183,8 @@ PP(pp_index)
     STRLEN llen = 0;
     I32 offset;
     I32 retval;
-    const char *tmps;
-    const char *tmps2;
+    const char *big_p;
+    const char *little_p;
     const I32 arybase = CopARYBASE_get(PL_curcop);
     bool big_utf8;
     bool little_utf8;
@@ -3197,6 +3197,9 @@ PP(pp_index)
     }
     little = POPs;
     big = POPs;
+    big_p = SvPV_const(big, biglen);
+    little_p = SvPV_const(little, llen);
+
     big_utf8 = DO_UTF8(big);
     little_utf8 = DO_UTF8(little);
     if (big_utf8 ^ little_utf8) {
@@ -3204,9 +3207,7 @@ PP(pp_index)
        if (little_utf8 && !PL_encoding) {
            /* Well, maybe instead we might be able to downgrade the small
               string?  */
-           STRLEN little_len;
-           const U8 * const little_pv = (U8*) SvPV_const(little, little_len);
-           char * const pv = (char*)bytes_from_utf8(little_pv, &little_len,
+           char * const pv = (char*)bytes_from_utf8(little_p, &llen,
                                                     &little_utf8);
            if (little_utf8) {
                /* If the large string is ISO-8859-1, and it's not possible to
@@ -3219,13 +3220,11 @@ PP(pp_index)
            /* At this point, pv is a malloc()ed string. So donate it to temp
               to ensure it will get free()d  */
            little = temp = newSV(0);
-           sv_usepvn(temp, pv, little_len);
+           sv_usepvn(temp, pv, llen);
+           little_p = SvPVX(little);
        } else {
-           SV * const bytes = little_utf8 ? big : little;
-           STRLEN len;
-           const char * const p = SvPV_const(bytes, len);
-
-           temp = newSVpvn(p, len);
+           temp = little_utf8
+               ? newSVpvn(big_p, biglen) : newSVpvn(little_p, llen);
 
            if (PL_encoding) {
                sv_recode_to_utf8(temp, PL_encoding);
@@ -3235,34 +3234,58 @@ PP(pp_index)
            if (little_utf8) {
                big = temp;
                big_utf8 = TRUE;
+               big_p = SvPV_const(big, biglen);
            } else {
                little = temp;
+               little_p = SvPV_const(little, llen);
            }
        }
     }
-    /* Don't actually need the NULL initialisation, but it keeps gcc quiet.  */
-    tmps2 = is_index ? NULL : SvPV_const(little, llen);
-    tmps = SvPV_const(big, biglen);
+    if (SvGAMAGIC(big)) {
+       /* Life just becomes a lot easier if I use a temporary here.
+          Otherwise I need to avoid calls to sv_pos_u2b(), which (dangerously)
+          will trigger magic and overloading again, as will fbm_instr()
+       */
+       big = sv_2mortal(newSVpvn(big_p, biglen));
+       if (big_utf8)
+           SvUTF8_on(big);
+       big_p = SvPVX(big);
+    }
+    if (SvGAMAGIC(little) || index && !SvOK(little)) {
+       /* index && SvOK() is a hack. fbm_instr() calls SvPV_const, which will
+          warn on undef, and we've already triggered a warning with the
+          SvPV_const some lines above. We can't remove that, as we need to
+          call some SvPV to trigger overloading early and find out if the
+          string is UTF-8.
+          This is all getting to messy. The API isn't quite clean enough,
+          because data access has side effects.
+       */
+       little = sv_2mortal(newSVpvn(little_p, llen));
+       if (little_utf8)
+           SvUTF8_on(little);
+       little_p = SvPVX(little);
+    }
 
     if (MAXARG < 3)
        offset = is_index ? 0 : biglen;
     else {
        if (big_utf8 && offset > 0)
            sv_pos_u2b(big, &offset, 0);
-       offset += llen;
+       if (!is_index)
+           offset += llen;
     }
     if (offset < 0)
        offset = 0;
     else if (offset > (I32)biglen)
        offset = biglen;
-    if (!(tmps2 = is_index
-         ? fbm_instr((unsigned char*)tmps + offset,
-                     (unsigned char*)tmps + biglen, little, 0)
-         : rninstr(tmps,  tmps  + offset,
-                   tmps2, tmps2 + llen)))
+    if (!(little_p = is_index
+         ? fbm_instr((unsigned char*)big_p + offset,
+                     (unsigned char*)big_p + biglen, little, 0)
+         : rninstr(big_p,  big_p  + offset,
+                   little_p, little_p + llen)))
        retval = -1;
     else {
-       retval = tmps2 - tmps;
+       retval = little_p - big_p;
        if (retval > 0 && big_utf8)
            sv_pos_b2u(big, &retval);
     }
@@ -3413,28 +3436,64 @@ PP(pp_ucfirst)
 {
     dVAR;
     dSP;
-    SV *sv = TOPs;
-    const U8 *s;
+    SV *source = TOPs;
     STRLEN slen;
+    STRLEN need;
+    SV *dest;
+    bool inplace = TRUE;
+    bool doing_utf8;
     const int op_type = PL_op->op_type;
+    const U8 *s;
+    U8 *d;
+    U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
+    STRLEN ulen;
+    STRLEN tculen;
 
-    SvGETMAGIC(sv);
-    if (DO_UTF8(sv) &&
-       (s = (const U8*)SvPV_nomg_const(sv, slen)) && slen &&
-       UTF8_IS_START(*s)) {
-       U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
-       STRLEN ulen;
-       STRLEN tculen;
+    SvGETMAGIC(source);
+    if (SvOK(source)) {
+       s = (const U8*)SvPV_nomg_const(source, slen);
+    } else {
+       s = "";
+       slen = 0;
+    }
 
+    if (slen && DO_UTF8(source) && UTF8_IS_START(*s)) {
+       doing_utf8 = TRUE;
        utf8_to_uvchr(s, &ulen);
        if (op_type == OP_UCFIRST) {
            toTITLE_utf8(s, tmpbuf, &tculen);
        } else {
            toLOWER_utf8(s, tmpbuf, &tculen);
        }
+       /* If the two differ, we definately cannot do inplace.  */
+       inplace = ulen == tculen;
+       need = slen + 1 - ulen + tculen;
+    } else {
+       doing_utf8 = FALSE;
+       need = slen + 1;
+    }
+
+    if (SvPADTMP(source) && !SvREADONLY(source) && inplace) {
+       /* We can convert in place.  */
 
-       if (!SvPADTMP(sv) || SvREADONLY(sv) || ulen != tculen) {
-           dTARGET;
+       dest = source;
+       s = d = (U8*)SvPV_force_nomg(source, slen);
+    } else {
+       dTARGET;
+
+       dest = TARG;
+
+       SvUPGRADE(dest, SVt_PV);
+       d = SvGROW(dest, need);
+       (void)SvPOK_only(dest);
+
+       SETs(dest);
+
+       inplace = FALSE;
+    }
+
+    if (doing_utf8) {
+       if(!inplace) {
            /* slen is the byte length of the whole SV.
             * ulen is the byte length of the original Unicode character
             * stored as UTF-8 at s.
@@ -3442,40 +3501,41 @@ PP(pp_ucfirst)
             * lowercased) Unicode character stored as UTF-8 at tmpbuf.
             * We first set the result to be the titlecased (/lowercased)
             * character, and then append the rest of the SV data. */
-           sv_setpvn(TARG, (char*)tmpbuf, tculen);
+           sv_setpvn(dest, (char*)tmpbuf, tculen);
            if (slen > ulen)
-               sv_catpvn(TARG, (char*)(s + ulen), slen - ulen);
-           SvUTF8_on(TARG);
-           sv = TARG;
-           SETs(sv);
+               sv_catpvn(dest, (char*)(s + ulen), slen - ulen);
+           SvUTF8_on(dest);
        }
        else {
-           s = (U8*)SvPV_force_nomg(sv, slen);
-           Copy(tmpbuf, s, tculen, U8);
+           Copy(tmpbuf, d, tculen, U8);
+           SvCUR_set(dest, need - 1);
        }
     }
     else {
-       U8 *s1;
-       if (!SvPADTMP(sv) || SvREADONLY(sv)) {
-           dTARGET;
-           SvUTF8_off(TARG);                           /* decontaminate */
-           sv_setsv_nomg(TARG, sv);
-           sv = TARG;
-           SETs(sv);
-       }
-       s1 = (U8*)SvPV_force_nomg(sv, slen);
-       if (*s1) {
+       if (*s) {
            if (IN_LOCALE_RUNTIME) {
                TAINT;
-               SvTAINTED_on(sv);
-               *s1 = (op_type == OP_UCFIRST)
-                   ? toUPPER_LC(*s1) : toLOWER_LC(*s1);
+               SvTAINTED_on(dest);
+               *d = (op_type == OP_UCFIRST)
+                   ? toUPPER_LC(*s) : toLOWER_LC(*s);
            }
            else
-               *s1 = (op_type == OP_UCFIRST) ? toUPPER(*s1) : toLOWER(*s1);
+               *d = (op_type == OP_UCFIRST) ? toUPPER(*s) : toLOWER(*s);
+       } else {
+           /* See bug #39028  */
+           *d = *s;
+       }
+
+       if (SvUTF8(source))
+           SvUTF8_on(dest);
+
+       if (!inplace) {
+           /* This will copy the trailing NUL  */
+           Copy(s + 1, d + 1, slen, U8);
+           SvCUR_set(dest, need - 1);
        }
     }
-    SvSETMAGIC(sv);
+    SvSETMAGIC(dest);
     RETURN;
 }