Make chr() for values >127 to create utf8 when under utf8.
[p5sagit/p5-mst-13.2.git] / doop.c
diff --git a/doop.c b/doop.c
index 0c6e690..0139911 100644 (file)
--- a/doop.c
+++ b/doop.c
 #endif
 #endif
 
-
-#define HALF_UPGRADE(start,end) {                                    \
-                                U8* newstr;                          \
-                                STRLEN len;                          \
-                                len = end-start;                     \
-                                newstr = bytes_to_utf8(start, &len); \
-                                Copy(newstr,start,len,U8*);          \
-                                end = start + len;                   \
-                                }
-
+#define HALF_UTF8_UPGRADE(start,end) \
+    STMT_START {                               \
+       U8* NeWsTr;                             \
+       STRLEN LeN = (end) - (start);           \
+       NeWsTr = bytes_to_utf8(start, &LeN);    \
+       Safefree(start);                        \
+       (start) = NeWsTr;                       \
+       (end) = (start) + LeN;                  \
+    } STMT_END
 
 STATIC I32
 S_do_trans_simple(pTHX_ SV *sv)
@@ -55,14 +54,15 @@ S_do_trans_simple(pTHX_ SV *sv)
 
     /* First, take care of non-UTF8 input strings, because they're easy */
     if (!sutf) {
-    while (s < send) {
+       while (s < send) {
            if ((ch = tbl[*s]) >= 0) {
-               matches++;
-                *s++ = ch;
-            } else
-       s++;
-        }
-    SvSETMAGIC(sv);
+               matches++;
+               *s++ = ch;
+           }
+           else
+               s++;
+       }
+       SvSETMAGIC(sv);
         return matches;
     }
 
@@ -83,12 +83,13 @@ S_do_trans_simple(pTHX_ SV *sv)
             else         
                 d = uv_to_utf8(d,ch);
             s += ulen;
-        } else { /* No match -> copy */
+        }
+       else { /* No match -> copy */
             while (ulen--)
                 *d++ = *s++;
         }
     }
-    *d='\0';
+    *d = '\0';
     sv_setpvn(sv, (const char*)dstart, d - dstart);
     SvUTF8_on(sv);
     SvLEN_set(sv, 2*len+1);
@@ -116,7 +117,7 @@ S_do_trans_count(pTHX_ SV *sv)/* SPC - OK */
 
     while (s < send) {
         if (hasutf && *s & 0x80)
-            s+=UTF8SKIP(s);
+            s += UTF8SKIP(s);
         else {
             UV c;
             I32 ulen;
@@ -127,7 +128,7 @@ S_do_trans_count(pTHX_ SV *sv)/* SPC - OK */
                 c = *s;
             if (c < 0x100 && tbl[c] >= 0)
                 matches++;
-            s+=ulen;
+            s += ulen;
         }
     }
 
@@ -160,7 +161,7 @@ S_do_trans_complex(pTHX_ SV *sv)/* SPC - NOT OK */
 
        while (s < send) {
             if (hasutf && *s & 0x80)
-                s+=UTF8SKIP(s);
+                s += UTF8SKIP(s);
             else {
                if ((ch = tbl[*s]) >= 0) {
                    *d = ch;
@@ -170,7 +171,7 @@ S_do_trans_complex(pTHX_ SV *sv)/* SPC - NOT OK */
                    else
                        p = d++;
                }
-               else if (ch == -1)              /* -1 is unmapped character */
+               else if (ch == -1)      /* -1 is unmapped character */
                    *d++ = *s;          /* -2 is delete character */
                s++;
             }
@@ -179,20 +180,20 @@ S_do_trans_complex(pTHX_ SV *sv)/* SPC - NOT OK */
     else {
        while (s < send) {
             if (hasutf && *s & 0x80)
-                s+=UTF8SKIP(s);
+                s += UTF8SKIP(s);
             else {
                if ((ch = tbl[*s]) >= 0) {
                    *d = ch;
                    matches++;
                    d++;
                }
-               else if (ch == -1)              /* -1 is unmapped character */
+               else if (ch == -1)      /* -1 is unmapped character */
                    *d++ = *s;          /* -2 is delete character */
                s++;
             }
        }
     }
-    matches += send - d;       /* account for disappeared chars */
+    matches += send - d;               /* account for disappeared chars */
     *d = '\0';
     SvCUR_set(sv, d - (U8*)SvPVX(sv));
     SvSETMAGIC(sv);
@@ -238,25 +239,25 @@ S_do_trans_simple_utf8(pTHX_ SV *sv)/* SPC - OK */
        if ((uv = swash_fetch(rv, s)) < none) {
            s += UTF8SKIP(s);
            matches++;
-            if (uv & 0x80 && !isutf++)
-                HALF_UPGRADE(dstart,d);
+            if ((uv & 0x80) && !isutf++)
+                HALF_UTF8_UPGRADE(dstart,d);
            d = uv_to_utf8(d, uv);
        }
        else if (uv == none) {
            int i;
-        i = UTF8SKIP(s);
+           i = UTF8SKIP(s);
             if (i > 1 && !isutf++)
-                HALF_UPGRADE(dstart,d);
+                HALF_UTF8_UPGRADE(dstart,d);
            while(i--)
-            *d++ = *s++;
+               *d++ = *s++;
        }
        else if (uv == extra) {
            int i;
-        i = UTF8SKIP(s);
+           i = UTF8SKIP(s);
            s += i;
            matches++;
             if (i > 1 && !isutf++) 
-                HALF_UPGRADE(dstart,d);
+                HALF_UTF8_UPGRADE(dstart,d);
            d = uv_to_utf8(d, final);
        }
        else
@@ -351,32 +352,32 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */
            if (uv < none) {
                matches++;
                if (uv != puv) {
-                    if (uv & 0x80 && !isutf++) 
-                        HALF_UPGRADE(dst,d);
-                       d = uv_to_utf8(d, uv);
+                    if ((uv & 0x80) && !isutf++) 
+                        HALF_UTF8_UPGRADE(dst,d);
+                   d = uv_to_utf8(d, uv);
                    puv = uv;
                }
-                   s += UTF8SKIP(s);
+               s += UTF8SKIP(s);
                continue;
            }
            else if (uv == none) {      /* "none" is unmapped character */
-                       I32 ulen;
-                       *d++ = (U8)utf8_to_uv(s, &ulen);
-                       s += ulen;
+               I32 ulen;
+               *d++ = (U8)utf8_to_uv(s, &ulen);
+               s += ulen;
                puv = 0xfeedface;
                continue;
            }
            else if (uv == extra && !del) {
                matches++;
                if (uv != puv) {
-                       d = uv_to_utf8(d, final);
+                   d = uv_to_utf8(d, final);
                    puv = final;
                }
-                   s += UTF8SKIP(s);
+               s += UTF8SKIP(s);
                continue;
            }
-           matches++;          /* "none+1" is delete character */
-               s += UTF8SKIP(s);
+           matches++;                  /* "none+1" is delete character */
+           s += UTF8SKIP(s);
        }
     }
     else {
@@ -396,24 +397,24 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */
            }
            if (uv < none) {
                matches++;
-                   d = uv_to_utf8(d, uv);
-                   s += UTF8SKIP(s);
+               d = uv_to_utf8(d, uv);
+               s += UTF8SKIP(s);
                continue;
            }
            else if (uv == none) {      /* "none" is unmapped character */
-                       I32 ulen;
-                       *d++ = (U8)utf8_to_uv(s, &ulen);
-                       s += ulen;
+               I32 ulen;
+               *d++ = (U8)utf8_to_uv(s, &ulen);
+               s += ulen;
                continue;
            }
            else if (uv == extra && !del) {
                matches++;
-                   d = uv_to_utf8(d, final);
-                   s += UTF8SKIP(s);
+               d = uv_to_utf8(d, final);
+               s += UTF8SKIP(s);
                continue;
            }
-           matches++;          /* "none+1" is delete character */
-               s += UTF8SKIP(s);
+           matches++;                  /* "none+1" is delete character */
+           s += UTF8SKIP(s);
        }
     }
     if (dst)
@@ -450,19 +451,19 @@ Perl_do_trans(pTHX_ SV *sv)
 
     switch (PL_op->op_private & ~hasutf & 63) {
     case 0:
-    if (hasutf)
-        return do_trans_simple_utf8(sv);
-    else
-        return do_trans_simple(sv);
+       if (hasutf)
+           return do_trans_simple_utf8(sv);
+       else
+           return do_trans_simple(sv);
 
     case OPpTRANS_IDENTICAL:
-    if (hasutf)
-        return do_trans_count_utf8(sv);
-    else
-        return do_trans_count(sv);
+       if (hasutf)
+           return do_trans_count_utf8(sv);
+       else
+           return do_trans_count(sv);
 
     default:
-    if (hasutf)
+       if (hasutf)
            return do_trans_complex_utf8(sv);
        else
            return do_trans_complex(sv);
@@ -902,6 +903,7 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right)
     char *rsave;
     bool left_utf = DO_UTF8(left);
     bool right_utf = DO_UTF8(right);
+    I32 needlen;
 
     if (left_utf && !right_utf)
        sv_utf8_upgrade(right);
@@ -914,17 +916,23 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right)
     rsave = rc = SvPV(right, rightlen);
     len = leftlen < rightlen ? leftlen : rightlen;
     lensave = len;
-    if (SvOK(sv) || SvTYPE(sv) > SVt_PVMG) {
+    if ((left_utf || right_utf) && (sv == left || sv == right)) {
+       needlen = optype == OP_BIT_AND ? len : leftlen + rightlen;
+       Newz(801, dc, needlen + 1, char);
+    }
+    else if (SvOK(sv) || SvTYPE(sv) > SVt_PVMG) {
        STRLEN n_a;
        dc = SvPV_force(sv, n_a);
        if (SvCUR(sv) < len) {
            dc = SvGROW(sv, len + 1);
            (void)memzero(dc + SvCUR(sv), len - SvCUR(sv) + 1);
        }
+       if (optype != OP_BIT_AND && (left_utf || right_utf))
+           dc = SvGROW(sv, leftlen + rightlen + 1);
     }
     else {
-       I32 needlen = ((optype == OP_BIT_AND)
-                       ? len : (leftlen > rightlen ? leftlen : rightlen));
+       needlen = ((optype == OP_BIT_AND)
+                   ? len : (leftlen > rightlen ? leftlen : rightlen));
        Newz(801, dc, needlen + 1, char);
        (void)sv_usepvn(sv, dc, needlen);
        dc = SvPVX(sv);         /* sv_usepvn() calls Renew() */
@@ -933,14 +941,11 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right)
     (void)SvPOK_only(sv);
     if (left_utf || right_utf) {
        UV duc, luc, ruc;
+       char *dcsave = dc;
        STRLEN lulen = leftlen;
        STRLEN rulen = rightlen;
-       STRLEN dulen = 0;
        I32 ulen;
 
-       if (optype != OP_BIT_AND)
-           dc = SvGROW(sv, leftlen+rightlen+1);
-
        switch (optype) {
        case OP_BIT_AND:
            while (lulen && rulen) {
@@ -953,8 +958,9 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right)
                duc = luc & ruc;
                dc = (char*)uv_to_utf8((U8*)dc, duc);
            }
-           dulen = dc - SvPVX(sv);
-           SvCUR_set(sv, dulen);
+           if (sv == left || sv == right)
+               (void)sv_usepvn(sv, dcsave, needlen);
+           SvCUR_set(sv, dc - dcsave);
            break;
        case OP_BIT_XOR:
            while (lulen && rulen) {
@@ -980,8 +986,9 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right)
                dc = (char*)uv_to_utf8((U8*)dc, duc);
            }
          mop_up_utf:
-           dulen = dc - SvPVX(sv);
-           SvCUR_set(sv, dulen);
+           if (sv == left || sv == right)
+               (void)sv_usepvn(sv, dcsave, needlen);
+           SvCUR_set(sv, dc - dcsave);
            if (rulen)
                sv_catpvn(sv, rc, rulen);
            else if (lulen)