add new files to MANIFEST; add missing prototypes to proto.h;
[p5sagit/p5-mst-13.2.git] / doop.c
diff --git a/doop.c b/doop.c
index 67082db..9dc9b04 100644 (file)
--- a/doop.c
+++ b/doop.c
@@ -22,72 +22,320 @@ I32
 do_trans(SV *sv, OP *arg)
 {
     dTHR;
-    register short *tbl;
     register U8 *s;
     register U8 *send;
     register U8 *d;
-    register I32 ch;
     register I32 matches = 0;
-    register I32 squash = op->op_private & OPpTRANS_SQUASH;
-    register U8 *p;
+    register I32 squash = PL_op->op_private & OPpTRANS_SQUASH;
     STRLEN len;
 
-    if (SvREADONLY(sv) && !(op->op_private & OPpTRANS_COUNTONLY))
+    if (SvREADONLY(sv) && !(PL_op->op_private & OPpTRANS_COUNTONLY))
        croak(no_modify);
-    tbl = (short*)cPVOP->op_pv;
-    s = (U8*)SvPV(sv, len);
-    if (!len)
-       return 0;
-    if (!SvPOKp(sv))
-       s = (U8*)SvPV_force(sv, len);
-    (void)SvPOK_only(sv);
-    send = s + len;
-    if (!tbl || !s)
-       croak("panic: do_trans");
-    DEBUG_t( deb("2.TBL\n"));
-    if (!op->op_private) {
-       while (s < send) {
-           if ((ch = tbl[*s]) >= 0) {
-               matches++;
-               *s = ch;
+
+    if (PL_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF)) {
+       SV* rv = (SV*)cSVOP->op_sv;
+       HV* hv = (HV*)SvRV(rv);
+       SV** svp = hv_fetch(hv, "NONE", 4, FALSE);
+       UV none = svp ? SvUV(*svp) : 0x7fffffff;
+       UV extra = none + 1;
+       I32 del = PL_op->op_private & OPpTRANS_DELETE;
+       UV final;
+       register UV uv;
+       UV puv;
+       char *dst;
+       register I32 from_utf = PL_op->op_private & OPpTRANS_FROM_UTF;
+       register I32 to_utf = PL_op->op_private & OPpTRANS_TO_UTF;
+
+       s = (U8*)SvPV(sv, len);
+       if (!len)
+           return 0;
+       if (!SvPOKp(sv))
+           s = (U8*)SvPV_force(sv, len);
+       (void)SvPOK_only(sv);
+       send = s + len;
+       DEBUG_t( deb("2.TBL\n"));
+       if (PL_op->op_private == (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF)) { /* no other flags */
+           svp = hv_fetch(hv, "FINAL", 5, FALSE);
+           if (svp)
+               final = SvUV(*svp);
+
+           d = s;
+           while (s < send) {
+               if ((uv = swash_fetch(rv, s)) < none) {
+                   s += UTF8SKIP(s);
+                   matches++;
+                   d = uv_to_utf8(d, uv);
+               }
+               else if (uv == none) {
+                   int i;
+                   for (i = UTF8SKIP(s); i; i--)
+                       *d++ = *s++;
+               }
+               else if (uv == extra) {
+                   s += UTF8SKIP(s);
+                   matches++;
+                   d = uv_to_utf8(d, final);
+               }
+               else
+                   s += UTF8SKIP(s);
            }
-           s++;
+           *d = '\0';
+           SvCUR_set(sv, d - (U8*)SvPVX(sv));
+           SvSETMAGIC(sv);
        }
-       SvSETMAGIC(sv);
-    }
-    else if (op->op_private & OPpTRANS_COUNTONLY) {
-       while (s < send) {
-           if (tbl[*s] >= 0)
-               matches++;
-           s++;
+       else if (PL_op->op_private == OPpTRANS_FROM_UTF) {      /* no other flags */
+           svp = hv_fetch(hv, "FINAL", 5, FALSE);
+           if (svp)
+               final = SvUV(*svp);
+
+           d = s;
+           while (s < send) {
+               if ((uv = swash_fetch(rv, s)) < none) {
+                   s += UTF8SKIP(s);
+                   matches++;
+                   *d++ = (U8)uv;
+               }
+               else if (uv == none) {
+                   I32 ulen;
+                   uv = utf8_to_uv(s, &ulen);
+                   s += ulen;
+                   *d++ = (U8)uv;
+               }
+               else if (uv == extra) {
+                   s += UTF8SKIP(s);
+                   matches++;
+                   *d++ = (U8)final;
+               }
+               else
+                   s += UTF8SKIP(s);
+           }
+           *d = '\0';
+           SvCUR_set(sv, d - (U8*)SvPVX(sv));
+           SvSETMAGIC(sv);
+       }
+       else if (PL_op->op_private == OPpTRANS_TO_UTF) {        /* no other flags */
+           svp = hv_fetch(hv, "FINAL", 5, FALSE);
+           if (svp)
+               final = SvUV(*svp);
+
+           d = s;
+           while (s < send) {
+               U8 tmpbuf[10];
+               uv_to_utf8(tmpbuf, *s);         /* XXX suboptimal */
+               if ((uv = swash_fetch(rv, tmpbuf)) < none) {
+                   s += UTF8SKIP(s);
+                   matches++;
+                   d = uv_to_utf8(d, uv);
+               }
+               else if (uv == none) {
+                   I32 ulen;
+                   uv = utf8_to_uv(s, &ulen);
+                   s += ulen;
+                   d = uv_to_utf8(d, uv);
+               }
+               else if (uv == extra) {
+                   s += UTF8SKIP(s);
+                   matches++;
+                   d = uv_to_utf8(d, final);
+               }
+               else
+                   s += UTF8SKIP(s);
+           }
+           *d = '\0';
+           SvCUR_set(sv, d - (U8*)SvPVX(sv));
+           SvSETMAGIC(sv);
+       }
+       else if (PL_op->op_private & OPpTRANS_COUNTONLY) {
+           if (from_utf) {
+               while (s < send) {
+                   if (swash_fetch(rv, s) < none)
+                       matches++;
+                   s += UTF8SKIP(s);
+               }
+           }
+           else {
+               while (s < send) {
+                   char tmpbuf[10];
+                   uv_to_utf8(tmpbuf, *s);     /* XXX suboptimal */
+                   if (swash_fetch(rv, tmpbuf) < none)
+                       matches++;
+                   s += UTF8SKIP(s);
+               }
+           }
        }
+       else {
+           I32 bits = 16;
+           U8 *dst;
+
+           svp = hv_fetch(hv, "BITS", 4, FALSE);
+           if (svp)
+               bits = (I32)SvIV(*svp);
+
+           svp = hv_fetch(hv, "FINAL", 5, FALSE);
+           if (svp)
+               final = SvUV(*svp);
+
+           Newz(801, d, len * (bits >> 3) + 1, char);
+           dst = d;
+
+           puv = 0xfeedface;
+           if (squash) {
+               while (s < send) {
+                   if (from_utf)
+                       uv = swash_fetch(rv, s);
+                   else {
+                       char tmpbuf[10];
+                       uv_to_utf8(tmpbuf, *s); /* XXX suboptimal */
+                       uv = swash_fetch(rv, tmpbuf);
+                   }
+                   if (uv < none) {
+                       matches++;
+                       if (uv != puv) {
+                           if (to_utf)
+                               d = uv_to_utf8(d, uv);
+                           else
+                               *d++ = (U8)uv;
+                       }
+                       puv = uv;
+                       s += UTF8SKIP(s);
+                       continue;
+                   }
+                   else if (uv == none) {      /* "none" is unmapped character */
+                       int i;
+                       if (to_utf) {
+                           for (i = UTF8SKIP(s); i; --i)
+                               *d++ = *s++;
+                       }
+                       else {
+                           I32 ulen;
+                           *d++ = (U8)utf8_to_uv(s, &ulen);
+                           s += ulen;
+                       }
+                       puv = 0xfeedface;
+                       continue;
+                   }
+                   else if (uv == extra && !del) {
+                       matches++;
+                       if (to_utf)
+                           d = uv_to_utf8(d, final);
+                       else
+                           *d++ = (U8)final;
+                       s += UTF8SKIP(s);
+                       puv = 0xfeedface;
+                       continue;
+                   }
+                   matches++;          /* "none+1" is delete character */
+                   s += UTF8SKIP(s);
+               }
+           }
+           else {
+               while (s < send) {
+                   if (from_utf)
+                       uv = swash_fetch(rv, s);
+                   else {
+                       char tmpbuf[10];
+                       uv_to_utf8(tmpbuf, *s); /* XXX suboptimal */
+                       uv = swash_fetch(rv, tmpbuf);
+                   }
+                   if (uv < none) {
+                       if (to_utf)
+                           d = uv_to_utf8(d, uv);
+                       else
+                           *d++ = (U8)uv;
+                       matches++;
+                       s += UTF8SKIP(s);
+                       continue;
+                   }
+                   else if (uv == none) {      /* "none" is unmapped character */
+                       int i;
+                       if (to_utf) {
+                           for (i = UTF8SKIP(s); i; --i)
+                               *d++ = *s++;
+                       }
+                       else {
+                           I32 ulen;
+                           *d++ = (U8)utf8_to_uv(s, &ulen);
+                           s += ulen;
+                       }
+                       continue;
+                   }
+                   else if (uv == extra && !del) {
+                       matches++;
+                       if (to_utf)
+                           d = uv_to_utf8(d, final);
+                       else
+                           *d++ = (U8)final;
+                       s += UTF8SKIP(s);
+                       continue;
+                   }
+                   matches++;          /* "none+1" is delete character */
+                   s += UTF8SKIP(s);
+               }
+           }
+           sv_usepvn_mg(sv, dst, d - dst);
+       }
+       return matches;
     }
     else {
-       d = s;
-       p = send;
-       while (s < send) {
-           if ((ch = tbl[*s]) >= 0) {
-               *d = ch;
-               matches++;
-               if (squash) {
-                   if (p == d - 1 && *p == *d)
-                       matches--;
+       register short *tbl;
+       register I32 ch;
+       register U8 *p;
+
+       tbl = (short*)cPVOP->op_pv;
+       s = (U8*)SvPV(sv, len);
+       if (!len)
+           return 0;
+       if (!SvPOKp(sv))
+           s = (U8*)SvPV_force(sv, len);
+       (void)SvPOK_only(sv);
+       send = s + len;
+       if (!tbl || !s)
+           croak("panic: do_trans");
+       DEBUG_t( deb("2.TBL\n"));
+       if (!PL_op->op_private) {
+           while (s < send) {
+               if ((ch = tbl[*s]) >= 0) {
+                   matches++;
+                   *s = ch;
+               }
+               s++;
+           }
+           SvSETMAGIC(sv);
+       }
+       else if (PL_op->op_private & OPpTRANS_COUNTONLY) {
+           while (s < send) {
+               if (tbl[*s] >= 0)
+                   matches++;
+               s++;
+           }
+       }
+       else {
+           d = s;
+           p = send;
+           while (s < send) {
+               if ((ch = tbl[*s]) >= 0) {
+                   *d = ch;
+                   matches++;
+                   if (squash) {
+                       if (p == d - 1 && *p == *d)
+                           matches--;
+                       else
+                           p = d++;
+                   }
                    else
-                       p = d++;
+                       d++;
                }
-               else
-                   d++;
+               else if (ch == -1)              /* -1 is unmapped character */
+                   *d++ = *s;          /* -2 is delete character */
+               s++;
            }
-           else if (ch == -1)          /* -1 is unmapped character */
-               *d++ = *s;              /* -2 is delete character */
-           s++;
+           matches += send - d;        /* account for disappeared chars */
+           *d = '\0';
+           SvCUR_set(sv, d - (U8*)SvPVX(sv));
+           SvSETMAGIC(sv);
        }
-       matches += send - d;    /* account for disappeared chars */
-       *d = '\0';
-       SvCUR_set(sv, d - (U8*)SvPVX(sv));
-       SvSETMAGIC(sv);
+       return matches;
     }
-    return matches;
 }
 
 void
@@ -240,6 +488,24 @@ do_chop(register SV *astr, register SV *sv)
     s = SvPV(sv, len);
     if (len && !SvPOK(sv))
        s = SvPV_force(sv, len);
+    if (IN_UTF8) {
+       if (s && len) {
+           char *send = s + len;
+           char *start = s;
+           s = send - 1;
+           while ((*s & 0xc0) == 0x80)
+               --s;
+           if (UTF8SKIP(s) != send - s)
+               warn("Malformed UTF-8 character");
+           sv_setpvn(astr, s, send - s);
+           *s = '\0';
+           SvCUR_set(sv, s - start);
+           SvNIOK_off(sv);
+       }
+       else
+           sv_setpvn(astr, "", 0);
+    }
+    else
     if (s && len) {
        s += --len;
        sv_setpvn(astr, s, 1);
@@ -449,15 +715,15 @@ do_kv(ARGSproto)
     register HE *entry;
     SV *tmpstr;
     I32 gimme = GIMME_V;
-    I32 dokeys =   (op->op_type == OP_KEYS);
-    I32 dovalues = (op->op_type == OP_VALUES);
+    I32 dokeys =   (PL_op->op_type == OP_KEYS);
+    I32 dovalues = (PL_op->op_type == OP_VALUES);
     I32 realhv = (SvTYPE(hv) == SVt_PVHV);
     
-    if (op->op_type == OP_RV2HV || op->op_type == OP_PADHV) 
+    if (PL_op->op_type == OP_RV2HV || PL_op->op_type == OP_PADHV) 
        dokeys = dovalues = TRUE;
 
     if (!hv) {
-       if (op->op_flags & OPf_MOD) {   /* lvalue */
+       if (PL_op->op_flags & OPf_MOD) {        /* lvalue */
            dTARGET;            /* make sure to clear its target here */
            if (SvTYPE(TARG) == SVt_PVLV)
                LvTARG(TARG) = Nullsv;
@@ -476,7 +742,7 @@ do_kv(ARGSproto)
        IV i;
        dTARGET;
 
-       if (op->op_flags & OPf_MOD) {   /* lvalue */
+       if (PL_op->op_flags & OPf_MOD) {        /* lvalue */
            if (SvTYPE(TARG) < SVt_PVLV) {
                sv_upgrade(TARG, SVt_PVLV);
                sv_magic(TARG, Nullsv, 'k', Nullch, 0);