Comment on comment.
[p5sagit/p5-mst-13.2.git] / doop.c
diff --git a/doop.c b/doop.c
index b75ffaa..3c34425 100644 (file)
--- a/doop.c
+++ b/doop.c
@@ -72,12 +72,12 @@ S_do_trans_simple(pTHX_ SV *sv)
     Newz(0, d, len*2+1, U8);
     dstart = d;
     while (s < send) {
-        I32 ulen;
+        STRLEN ulen;
         short c;
 
         ulen = 1;
         /* Need to check this, otherwise 128..255 won't match */
-       c = utf8_to_uv_chk(s, &ulen, 0);
+       c = utf8_to_uv(s, send - s, &ulen, 0);
         if (c < 0x100 && (ch = tbl[(short)c]) >= 0) {
             matches++;
             if (ch < 0x80)
@@ -122,10 +122,10 @@ S_do_trans_count(pTHX_ SV *sv)/* SPC - OK */
             s += UTF8SKIP(s);
         else {
             UV c;
-            I32 ulen;
+            STRLEN ulen;
             ulen = 1;
             if (hasutf)
-                c = utf8_to_uv_chk(s,&ulen, 0);
+                c = utf8_to_uv(s, send - s, &ulen, 0);
             else
                 c = *s;
             if (c < 0x100 && tbl[c] >= 0)
@@ -144,6 +144,7 @@ S_do_trans_complex(pTHX_ SV *sv)/* SPC - NOT OK */
     U8 *s;
     U8 *send;
     U8 *d;
+    U8 *dstart;
     I32 hasutf = SvUTF8(sv);
     I32 matches = 0;
     STRLEN len;
@@ -157,7 +158,9 @@ S_do_trans_complex(pTHX_ SV *sv)/* SPC - NOT OK */
     s = (U8*)SvPV(sv, len);
     send = s + len;
 
-    d = s;
+    Newz(0, d, len*2+1, U8);
+    dstart = d;
+
     if (PL_op->op_private & OPpTRANS_SQUASH) {
        U8* p = send;
 
@@ -168,9 +171,7 @@ S_do_trans_complex(pTHX_ SV *sv)/* SPC - NOT OK */
                if ((ch = tbl[*s]) >= 0) {
                    *d = ch;
                    matches++;
-                   if (p == d - 1 && *p == *d)
-                       matches--;
-                   else
+           if (p != d - 1 || *p != *d)
                        p = d++;
                }
                else if (ch == -1)      /* -1 is unmapped character */
@@ -181,26 +182,41 @@ S_do_trans_complex(pTHX_ SV *sv)/* SPC - NOT OK */
     }
     else {
        while (s < send) {
+           UV comp;
             if (hasutf && *s & 0x80)
-                s += UTF8SKIP(s);
-            else {
-               if ((ch = tbl[*s]) >= 0) {
-                   *d = ch;
-                   matches++;
-                   d++;
-               }
-               else if (ch == -1)      /* -1 is unmapped character */
-                   *d++ = *s;          /* -2 is delete character */
-               s++;
-            }
+                comp = utf8_to_uv_simple(s, NULL);
+           else
+                comp = *s;
+           
+           ch = tbl[comp];
+           
+           if (ch == -1) { /* -1 is unmapped character */
+                ch = comp;
+               matches--;
+           }
+
+           if (ch >= 0) {
+               if (hasutf)
+                 d = uv_to_utf8(d, ch);
+               else 
+                 *d++ = ch;
+           }
+           matches++;
+
+           s += hasutf && *s & 0x80 ? UNISKIP(*s) : 1;
+            
        }
     }
-    matches += send - d;               /* account for disappeared chars */
+
     *d = '\0';
-    SvCUR_set(sv, d - (U8*)SvPVX(sv));
-    SvSETMAGIC(sv);
 
+    sv_setpvn(sv, (const char*)dstart, d - dstart);
+    Safefree(dstart);
+    if (hasutf)
+        SvUTF8_on(sv);
+    SvSETMAGIC(sv);
     return matches;
+
 }
 
 STATIC I32
@@ -363,8 +379,8 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */
                continue;
            }
            else if (uv == none) {      /* "none" is unmapped character */
-               I32 ulen;
-               *d++ = (U8)utf8_to_uv_chk(s, &ulen, 0);
+               STRLEN ulen;
+               *d++ = (U8)utf8_to_uv(s, send - s, &ulen, 0);
                s += ulen;
                puv = 0xfeedface;
                continue;
@@ -404,8 +420,8 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */
                continue;
            }
            else if (uv == none) {      /* "none" is unmapped character */
-               I32 ulen;
-               *d++ = (U8)utf8_to_uv_chk(s, &ulen, 0);
+               STRLEN ulen;
+               *d++ = (U8)utf8_to_uv(s, send - s, &ulen, 0);
                s += ulen;
                continue;
            }
@@ -550,9 +566,8 @@ Perl_do_vecget(pTHX_ SV *sv, I32 offset, I32 size)
     if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */
        Perl_croak(aTHX_ "Illegal number of bits in vec");
 
-    if (SvUTF8(sv)) {
+    if (SvUTF8(sv))
        (void) Perl_sv_utf8_downgrade(aTHX_ sv, TRUE);
-    }
 
     offset *= size;    /* turn into bit offset */
     len = (offset + size + 7) / 8;     /* required number of bytes */
@@ -927,7 +942,7 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right)
 
     if (left_utf && !right_utf)
        sv_utf8_upgrade(right);
-    if (!left_utf && right_utf)
+    else if (!left_utf && right_utf)
        sv_utf8_upgrade(left);
 
     if (sv != left || (optype != OP_BIT_AND && !SvOK(sv) && !SvGMAGICAL(sv)))
@@ -964,15 +979,15 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right)
        char *dcsave = dc;
        STRLEN lulen = leftlen;
        STRLEN rulen = rightlen;
-       I32 ulen;
+       STRLEN ulen;
 
        switch (optype) {
        case OP_BIT_AND:
            while (lulen && rulen) {
-               luc = utf8_to_uv_chk((U8*)lc, &ulen, 0);
+               luc = utf8_to_uv((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV);
                lc += ulen;
                lulen -= ulen;
-               ruc = utf8_to_uv_chk((U8*)rc, &ulen, 0);
+               ruc = utf8_to_uv((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV);
                rc += ulen;
                rulen -= ulen;
                duc = luc & ruc;
@@ -984,10 +999,10 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right)
            break;
        case OP_BIT_XOR:
            while (lulen && rulen) {
-               luc = utf8_to_uv_chk((U8*)lc, &ulen, 0);
+               luc = utf8_to_uv((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV);
                lc += ulen;
                lulen -= ulen;
-               ruc = utf8_to_uv_chk((U8*)rc, &ulen, 0);
+               ruc = utf8_to_uv((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV);
                rc += ulen;
                rulen -= ulen;
                duc = luc ^ ruc;
@@ -996,10 +1011,10 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right)
            goto mop_up_utf;
        case OP_BIT_OR:
            while (lulen && rulen) {
-               luc = utf8_to_uv_chk((U8*)lc, &ulen, 0);
+               luc = utf8_to_uv((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV);
                lc += ulen;
                lulen -= ulen;
-               ruc = utf8_to_uv_chk((U8*)rc, &ulen, 0);
+               ruc = utf8_to_uv((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV);
                rc += ulen;
                rulen -= ulen;
                duc = luc | ruc;