From: Larry Wall <larry@wall.org>
Date: Sat, 5 Sep 1998 23:48:24 +0000 (+0000)
Subject: tr/// logic was hosed under utf8
X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=4757a2438b123364ad98fc0cb4698e56331f713b;p=p5sagit%2Fp5-mst-13.2.git

tr/// logic was hosed under utf8

p4raw-id: //depot/perl@1781
---

diff --git a/doop.c b/doop.c
index 8ebbd83..c6270e4 100644
--- a/doop.c
+++ b/doop.c
@@ -18,322 +18,613 @@
 #include <signal.h>
 #endif
 
-I32
-do_trans(SV *sv, OP *arg)
+static I32
+do_trans_CC_simple(SV *sv)
 {
     dTHR;
-    register U8 *s;
-    register U8 *send;
-    register U8 *d;
-    register I32 matches = 0;
-    register I32 squash = PL_op->op_private & OPpTRANS_SQUASH;
+    U8 *s;
+    U8 *send;
+    I32 matches = 0;
     STRLEN len;
+    short *tbl;
+    I32 ch;
 
-    if (SvREADONLY(sv) && !(PL_op->op_private & OPpTRANS_COUNTONLY))
-	croak(no_modify);
+    tbl = (short*)cPVOP->op_pv;
+    if (!tbl)
+	croak("panic: do_trans");
 
-    if (PL_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF)) {
-	SV* rv = (SV*)cSVOP->op_sv;
-	HV* hv = (HV*)SvRV(rv);
-	SV** svp = hv_fetch(hv, "NONE", 4, FALSE);
-	UV none = svp ? SvUV(*svp) : 0x7fffffff;
-	UV extra = none + 1;
-	I32 del = PL_op->op_private & OPpTRANS_DELETE;
-	UV final;
-	register UV uv;
-	UV puv;
-	register I32 from_utf = PL_op->op_private & OPpTRANS_FROM_UTF;
-	register I32 to_utf = PL_op->op_private & OPpTRANS_TO_UTF;
-
-	s = (U8*)SvPV(sv, len);
-	if (!len)
-	    return 0;
-	if (!SvPOKp(sv))
-	    s = (U8*)SvPV_force(sv, len);
-	(void)SvPOK_only(sv);
-	send = s + len;
-	DEBUG_t( deb("2.TBL\n"));
-	if (PL_op->op_private == (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF)) {	/* no other flags */
-	    svp = hv_fetch(hv, "FINAL", 5, FALSE);
-	    if (svp)
-		final = SvUV(*svp);
-
-	    d = s;
-	    while (s < send) {
-		if ((uv = swash_fetch(rv, s)) < none) {
-		    s += UTF8SKIP(s);
-		    matches++;
-		    d = uv_to_utf8(d, uv);
-		}
-		else if (uv == none) {
-		    int i;
-		    for (i = UTF8SKIP(s); i; i--)
-			*d++ = *s++;
-		}
-		else if (uv == extra) {
-		    s += UTF8SKIP(s);
-		    matches++;
-		    d = uv_to_utf8(d, final);
-		}
-		else
-		    s += UTF8SKIP(s);
-	    }
-	    *d = '\0';
-	    SvCUR_set(sv, d - (U8*)SvPVX(sv));
-	    SvSETMAGIC(sv);
-	}
-	else if (PL_op->op_private == OPpTRANS_FROM_UTF) {	/* no other flags */
-	    svp = hv_fetch(hv, "FINAL", 5, FALSE);
-	    if (svp)
-		final = SvUV(*svp);
-
-	    d = s;
-	    while (s < send) {
-		if ((uv = swash_fetch(rv, s)) < none) {
-		    s += UTF8SKIP(s);
-		    matches++;
-		    *d++ = (U8)uv;
-		}
-		else if (uv == none) {
-		    I32 ulen;
-		    uv = utf8_to_uv(s, &ulen);
-		    s += ulen;
-		    *d++ = (U8)uv;
-		}
-		else if (uv == extra) {
-		    s += UTF8SKIP(s);
-		    matches++;
-		    *d++ = (U8)final;
-		}
-		else
-		    s += UTF8SKIP(s);
-	    }
-	    *d = '\0';
-	    SvCUR_set(sv, d - (U8*)SvPVX(sv));
-	    SvSETMAGIC(sv);
-	}
-	else if (PL_op->op_private == OPpTRANS_TO_UTF) {	/* no other flags */
-	    svp = hv_fetch(hv, "FINAL", 5, FALSE);
-	    if (svp)
-		final = SvUV(*svp);
-
-	    d = s;
-	    while (s < send) {
-		U8 tmpbuf[10];
-		uv_to_utf8(tmpbuf, *s);		/* XXX suboptimal */
-		if ((uv = swash_fetch(rv, tmpbuf)) < none) {
-		    s += UTF8SKIP(s);
-		    matches++;
-		    d = uv_to_utf8(d, uv);
-		}
-		else if (uv == none) {
-		    I32 ulen;
-		    uv = utf8_to_uv(s, &ulen);
-		    s += ulen;
-		    d = uv_to_utf8(d, uv);
-		}
-		else if (uv == extra) {
-		    s += UTF8SKIP(s);
-		    matches++;
-		    d = uv_to_utf8(d, final);
-		}
+    s = (U8*)SvPV(sv, len);
+    send = s + len;
+
+    while (s < send) {
+	if ((ch = tbl[*s]) >= 0) {
+	    matches++;
+	    *s = ch;
+	}
+	s++;
+    }
+    SvSETMAGIC(sv);
+
+    return matches;
+}
+
+static I32
+do_trans_CC_count(SV *sv)
+{
+    dTHR;
+    U8 *s;
+    U8 *send;
+    I32 matches = 0;
+    STRLEN len;
+    short *tbl;
+
+    tbl = (short*)cPVOP->op_pv;
+    if (!tbl)
+	croak("panic: do_trans");
+
+    s = (U8*)SvPV(sv, len);
+    send = s + len;
+
+    while (s < send) {
+	if (tbl[*s] >= 0)
+	    matches++;
+	s++;
+    }
+
+    return matches;
+}
+
+static I32
+do_trans_CC_complex(SV *sv)
+{
+    dTHR;
+    U8 *s;
+    U8 *send;
+    U8 *d;
+    I32 matches = 0;
+    STRLEN len;
+    short *tbl;
+    I32 ch;
+
+    tbl = (short*)cPVOP->op_pv;
+    if (!tbl)
+	croak("panic: do_trans");
+
+    s = (U8*)SvPV(sv, len);
+    send = s + len;
+
+    d = s;
+    if (PL_op->op_private & OPpTRANS_SQUASH) {
+	U8* p = send;
+
+	while (s < send) {
+	    if ((ch = tbl[*s]) >= 0) {
+		*d = ch;
+		matches++;
+		if (p == d - 1 && *p == *d)
+		    matches--;
 		else
-		    s += UTF8SKIP(s);
+		    p = d++;
 	    }
-	    *d = '\0';
-	    SvCUR_set(sv, d - (U8*)SvPVX(sv));
-	    SvSETMAGIC(sv);
+	    else if (ch == -1)		/* -1 is unmapped character */
+		*d++ = *s;		/* -2 is delete character */
+	    s++;
 	}
-	else if (PL_op->op_private & OPpTRANS_COUNTONLY) {
-	    if (from_utf) {
-		while (s < send) {
-		    if (swash_fetch(rv, s) < none)
-			matches++;
-		    s += UTF8SKIP(s);
-		}
-	    }
-	    else {
-		while (s < send) {
-		    U8 tmpbuf[10];
-		    uv_to_utf8(tmpbuf, *s);	/* XXX suboptimal */
-		    if (swash_fetch(rv, tmpbuf) < none)
-			matches++;
-		    s += UTF8SKIP(s);
-		}
+    }
+    else {
+	while (s < send) {
+	    if ((ch = tbl[*s]) >= 0) {
+		*d = ch;
+		matches++;
+		d++;
 	    }
+	    else if (ch == -1)		/* -1 is unmapped character */
+		*d++ = *s;		/* -2 is delete character */
+	    s++;
+	}
+    }
+    matches += send - d;	/* account for disappeared chars */
+    *d = '\0';
+    SvCUR_set(sv, d - (U8*)SvPVX(sv));
+    SvSETMAGIC(sv);
+
+    return matches;
+}
+
+static I32
+do_trans_UU_simple(SV *sv)
+{
+    dTHR;
+    U8 *s;
+    U8 *send;
+    U8 *d;
+    I32 matches = 0;
+    STRLEN len;
+
+    SV* rv = (SV*)cSVOP->op_sv;
+    HV* hv = (HV*)SvRV(rv);
+    SV** svp = hv_fetch(hv, "NONE", 4, FALSE);
+    UV none = svp ? SvUV(*svp) : 0x7fffffff;
+    UV extra = none + 1;
+    UV final;
+    UV uv;
+
+    s = (U8*)SvPV(sv, len);
+    send = s + len;
+
+    svp = hv_fetch(hv, "FINAL", 5, FALSE);
+    if (svp)
+	final = SvUV(*svp);
+
+    d = s;
+    while (s < send) {
+	if ((uv = swash_fetch(rv, s)) < none) {
+	    s += UTF8SKIP(s);
+	    matches++;
+	    d = uv_to_utf8(d, uv);
+	}
+	else if (uv == none) {
+	    int i;
+	    for (i = UTF8SKIP(s); i; i--)
+		*d++ = *s++;
+	}
+	else if (uv == extra) {
+	    s += UTF8SKIP(s);
+	    matches++;
+	    d = uv_to_utf8(d, final);
+	}
+	else
+	    s += UTF8SKIP(s);
+    }
+    *d = '\0';
+    SvCUR_set(sv, d - (U8*)SvPVX(sv));
+    SvSETMAGIC(sv);
+
+    return matches;
+}
+
+static I32
+do_trans_UU_count(SV *sv)
+{
+    dTHR;
+    U8 *s;
+    U8 *send;
+    I32 matches = 0;
+    STRLEN len;
+
+    SV* rv = (SV*)cSVOP->op_sv;
+    HV* hv = (HV*)SvRV(rv);
+    SV** svp = hv_fetch(hv, "NONE", 4, FALSE);
+    UV none = svp ? SvUV(*svp) : 0x7fffffff;
+    UV uv;
+
+    s = (U8*)SvPV(sv, len);
+    send = s + len;
+
+    while (s < send) {
+	if ((uv = swash_fetch(rv, s)) < none) {
+	    s += UTF8SKIP(s);
+	    matches++;
+	}
+    }
+
+    return matches;
+}
+
+static I32
+do_trans_UC_simple(SV *sv)
+{
+    dTHR;
+    U8 *s;
+    U8 *send;
+    U8 *d;
+    I32 matches = 0;
+    STRLEN len;
+
+    SV* rv = (SV*)cSVOP->op_sv;
+    HV* hv = (HV*)SvRV(rv);
+    SV** svp = hv_fetch(hv, "NONE", 4, FALSE);
+    UV none = svp ? SvUV(*svp) : 0x7fffffff;
+    UV extra = none + 1;
+    UV final;
+    UV uv;
+
+    s = (U8*)SvPV(sv, len);
+    send = s + len;
+
+    svp = hv_fetch(hv, "FINAL", 5, FALSE);
+    if (svp)
+	final = SvUV(*svp);
+
+    d = s;
+    while (s < send) {
+	if ((uv = swash_fetch(rv, s)) < none) {
+	    s += UTF8SKIP(s);
+	    matches++;
+	    *d++ = (U8)uv;
+	}
+	else if (uv == none) {
+	    I32 ulen;
+	    uv = utf8_to_uv(s, &ulen);
+	    s += ulen;
+	    *d++ = (U8)uv;
 	}
+	else if (uv == extra) {
+	    s += UTF8SKIP(s);
+	    matches++;
+	    *d++ = (U8)final;
+	}
+	else
+	    s += UTF8SKIP(s);
+    }
+    *d = '\0';
+    SvCUR_set(sv, d - (U8*)SvPVX(sv));
+    SvSETMAGIC(sv);
+
+    return matches;
+}
+
+static I32
+do_trans_CU_simple(SV *sv)
+{
+    dTHR;
+    U8 *s;
+    U8 *send;
+    U8 *d;
+    U8 *dst;
+    I32 matches = 0;
+    STRLEN len;
+
+    SV* rv = (SV*)cSVOP->op_sv;
+    HV* hv = (HV*)SvRV(rv);
+    SV** svp = hv_fetch(hv, "NONE", 4, FALSE);
+    UV none = svp ? SvUV(*svp) : 0x7fffffff;
+    UV extra = none + 1;
+    UV final;
+    UV uv;
+    U8 tmpbuf[10];
+    I32 bits = 16;
+
+    s = (U8*)SvPV(sv, len);
+    send = s + len;
+
+    svp = hv_fetch(hv, "BITS", 4, FALSE);
+    if (svp)
+	bits = (I32)SvIV(*svp);
+
+    svp = hv_fetch(hv, "FINAL", 5, FALSE);
+    if (svp)
+	final = SvUV(*svp);
+
+    Newz(801, d, len * (bits >> 3) + 1, U8);
+    dst = d;
+
+    while (s < send) {
+	uv = *s++;
+	if (uv < 0x80)
+	    tmpbuf[0] = uv;
 	else {
-	    I32 bits = 16;
-	    U8 *dst;
+	    tmpbuf[0] = (( uv >>  6)         | 0xc0);
+	    tmpbuf[1] = (( uv        & 0x3f) | 0x80);
+	}
 
-	    svp = hv_fetch(hv, "BITS", 4, FALSE);
-	    if (svp)
-		bits = (I32)SvIV(*svp);
+	if ((uv = swash_fetch(rv, tmpbuf)) < none) {
+	    matches++;
+	    d = uv_to_utf8(d, uv);
+	}
+	else if (uv == none)
+	    d = uv_to_utf8(d, s[-1]);
+	else if (uv == extra) {
+	    matches++;
+	    d = uv_to_utf8(d, final);
+	}
+    }
+    *d = '\0';
+    sv_usepvn_mg(sv, (char*)dst, d - dst);
 
-	    svp = hv_fetch(hv, "FINAL", 5, FALSE);
-	    if (svp)
-		final = SvUV(*svp);
+    return matches;
+}
 
-	    Newz(801, d, len * (bits >> 3) + 1, U8);
-	    dst = d;
+/* utf-8 to latin-1 */
 
-	    puv = 0xfeedface;
-	    if (squash) {
-		while (s < send) {
-		    if (from_utf)
-			uv = swash_fetch(rv, s);
-		    else {
-			U8 tmpbuf[10];
-			uv_to_utf8(tmpbuf, *s);	/* XXX suboptimal */
-			uv = swash_fetch(rv, tmpbuf);
-		    }
-		    if (uv < none) {
-			matches++;
-			if (uv != puv) {
-			    if (to_utf)
-				d = uv_to_utf8(d, uv);
-			    else
-				*d++ = (U8)uv;
-			}
-			puv = uv;
-			s += UTF8SKIP(s);
-			continue;
-		    }
-		    else if (uv == none) {	/* "none" is unmapped character */
-			int i;
-			if (to_utf) {
-			    for (i = UTF8SKIP(s); i; --i)
-				*d++ = *s++;
-			}
-			else {
-			    I32 ulen;
-			    *d++ = (U8)utf8_to_uv(s, &ulen);
-			    s += ulen;
-			}
-			puv = 0xfeedface;
-			continue;
-		    }
-		    else if (uv == extra && !del) {
-			matches++;
-			if (to_utf)
-			    d = uv_to_utf8(d, final);
-			else
-			    *d++ = (U8)final;
-			s += UTF8SKIP(s);
-			puv = 0xfeedface;
-			continue;
-		    }
-		    matches++;		/* "none+1" is delete character */
-		    s += UTF8SKIP(s);
-		}
+static I32
+do_trans_UC_trivial(SV *sv)
+{
+    dTHR;
+    U8 *s;
+    U8 *send;
+    U8 *d;
+    STRLEN len;
+
+    s = (U8*)SvPV(sv, len);
+    send = s + len;
+
+    d = s;
+    while (s < send) {
+	if (*s < 0x80)
+	    *d++ = *s++;
+	else {
+	    I32 ulen;
+	    UV uv = utf8_to_uv(s, &ulen);
+	    s += ulen;
+	    *d++ = (U8)uv;
+	}
+    }
+    *d = '\0';
+    SvCUR_set(sv, d - (U8*)SvPVX(sv));
+    SvSETMAGIC(sv);
+
+    return SvCUR(sv);
+}
+
+/* latin-1 to utf-8 */
+
+static I32
+do_trans_CU_trivial(SV *sv)
+{
+    dTHR;
+    U8 *s;
+    U8 *send;
+    U8 *d;
+    U8 *dst;
+    I32 matches;
+    STRLEN len;
+
+    s = (U8*)SvPV(sv, len);
+    send = s + len;
+
+    Newz(801, d, len * 2 + 1, U8);
+    dst = d;
+
+    matches = send - s;
+
+    while (s < send) {
+	if (*s < 0x80)
+	    *d++ = *s++;
+	else {
+	    UV uv = *s++;
+	    *d++ = (( uv >>  6)         | 0xc0);
+	    *d++ = (( uv        & 0x3f) | 0x80);
+	}
+    }
+    *d = '\0';
+    sv_usepvn_mg(sv, (char*)dst, d - dst);
+
+    return matches;
+}
+
+static I32
+do_trans_UU_complex(SV *sv)
+{
+    dTHR;
+    U8 *s;
+    U8 *send;
+    U8 *d;
+    I32 matches = 0;
+    I32 squash   = PL_op->op_private & OPpTRANS_SQUASH;
+    I32 from_utf = PL_op->op_private & OPpTRANS_FROM_UTF;
+    I32 to_utf   = PL_op->op_private & OPpTRANS_TO_UTF;
+    I32 del      = PL_op->op_private & OPpTRANS_DELETE;
+    SV* rv = (SV*)cSVOP->op_sv;
+    HV* hv = (HV*)SvRV(rv);
+    SV** svp = hv_fetch(hv, "NONE", 4, FALSE);
+    UV none = svp ? SvUV(*svp) : 0x7fffffff;
+    UV extra = none + 1;
+    UV final;
+    UV uv;
+    STRLEN len;
+    U8 *dst;
+
+    s = (U8*)SvPV(sv, len);
+    send = s + len;
+
+    svp = hv_fetch(hv, "FINAL", 5, FALSE);
+    if (svp)
+	final = SvUV(*svp);
+
+    if (PL_op->op_private & OPpTRANS_GROWS) {
+	I32 bits = 16;
+
+	svp = hv_fetch(hv, "BITS", 4, FALSE);
+	if (svp)
+	    bits = (I32)SvIV(*svp);
+
+	Newz(801, d, len * (bits >> 3) + 1, U8);
+	dst = d;
+    }
+    else {
+	d = s;
+	dst = 0;
+    }
+
+    if (squash) {
+	UV puv = 0xfeedface;
+	while (s < send) {
+	    if (from_utf) {
+		uv = swash_fetch(rv, s);
 	    }
 	    else {
-		while (s < send) {
-		    if (from_utf)
-			uv = swash_fetch(rv, s);
-		    else {
-			U8 tmpbuf[10];
-			uv_to_utf8(tmpbuf, *s);	/* XXX suboptimal */
-			uv = swash_fetch(rv, tmpbuf);
-		    }
-		    if (uv < none) {
-			if (to_utf)
-			    d = uv_to_utf8(d, uv);
-			else
-			    *d++ = (U8)uv;
-			matches++;
-			s += UTF8SKIP(s);
-			continue;
-		    }
-		    else if (uv == none) {	/* "none" is unmapped character */
+		U8 tmpbuf[2];
+		uv = *s++;
+		if (uv < 0x80)
+		    tmpbuf[0] = uv;
+		else {
+		    tmpbuf[0] = (( uv >>  6)         | 0xc0);
+		    tmpbuf[1] = (( uv        & 0x3f) | 0x80);
+		}
+		uv = swash_fetch(rv, tmpbuf);
+	    }
+	    if (uv < none) {
+		matches++;
+		if (uv != puv) {
+		    if (uv >= 0x80 && to_utf)
+			d = uv_to_utf8(d, uv);
+		    else
+			*d++ = (U8)uv;
+		    puv = uv;
+		}
+		if (from_utf)
+		    s += UTF8SKIP(s);
+		continue;
+	    }
+	    else if (uv == none) {	/* "none" is unmapped character */
+		if (from_utf) {
+		    if (*s < 0x80)
+			*d++ = *s++;
+		    else if (to_utf) {
 			int i;
-			if (to_utf) {
-			    for (i = UTF8SKIP(s); i; --i)
-				*d++ = *s++;
-			}
-			else {
-			    I32 ulen;
-			    *d++ = (U8)utf8_to_uv(s, &ulen);
-			    s += ulen;
-			}
-			continue;
+			for (i = UTF8SKIP(s); i; --i)
+			    *d++ = *s++;
 		    }
-		    else if (uv == extra && !del) {
-			matches++;
-			if (to_utf)
-			    d = uv_to_utf8(d, final);
-			else
-			    *d++ = (U8)final;
-			s += UTF8SKIP(s);
-			continue;
+		    else {
+			I32 ulen;
+			*d++ = (U8)utf8_to_uv(s, &ulen);
+			s += ulen;
 		    }
-		    matches++;		/* "none+1" is delete character */
-		    s += UTF8SKIP(s);
 		}
+		else {	/* must be to_utf only */
+		    d = uv_to_utf8(d, s[-1]);
+		}
+		puv = 0xfeedface;
+		continue;
 	    }
-	    sv_usepvn_mg(sv, (char*)dst, d - dst);
+	    else if (uv == extra && !del) {
+		matches++;
+		if (uv != puv) {
+		    if (final >= 0x80 && to_utf)
+			d = uv_to_utf8(d, final);
+		    else
+			*d++ = (U8)final;
+		    puv = final;
+		}
+		if (from_utf)
+		    s += UTF8SKIP(s);
+		continue;
+	    }
+	    matches++;		/* "none+1" is delete character */
+	    if (from_utf)
+		s += UTF8SKIP(s);
 	}
-	return matches;
     }
     else {
-	register short *tbl;
-	register I32 ch;
-	register U8 *p;
-
-	tbl = (short*)cPVOP->op_pv;
-	s = (U8*)SvPV(sv, len);
-	if (!len)
-	    return 0;
-	if (!SvPOKp(sv))
-	    s = (U8*)SvPV_force(sv, len);
-	(void)SvPOK_only(sv);
-	send = s + len;
-	if (!tbl || !s)
-	    croak("panic: do_trans");
-	DEBUG_t( deb("2.TBL\n"));
-	if (!PL_op->op_private) {
-	    while (s < send) {
-		if ((ch = tbl[*s]) >= 0) {
-		    matches++;
-		    *s = ch;
+	while (s < send) {
+	    if (from_utf) {
+		uv = swash_fetch(rv, s);
+	    }
+	    else {
+		U8 tmpbuf[2];
+		uv = *s++;
+		if (uv < 0x80)
+		    tmpbuf[0] = uv;
+		else {
+		    tmpbuf[0] = (( uv >>  6)         | 0xc0);
+		    tmpbuf[1] = (( uv        & 0x3f) | 0x80);
 		}
-		s++;
+		uv = swash_fetch(rv, tmpbuf);
 	    }
-	    SvSETMAGIC(sv);
-	}
-	else if (PL_op->op_private & OPpTRANS_COUNTONLY) {
-	    while (s < send) {
-		if (tbl[*s] >= 0)
-		    matches++;
-		s++;
+	    if (uv < none) {
+		matches++;
+		if (uv >= 0x80 && to_utf)
+		    d = uv_to_utf8(d, uv);
+		else
+		    *d++ = (U8)uv;
+		if (from_utf)
+		    s += UTF8SKIP(s);
+		continue;
 	    }
-	}
-	else {
-	    d = s;
-	    p = send;
-	    while (s < send) {
-		if ((ch = tbl[*s]) >= 0) {
-		    *d = ch;
-		    matches++;
-		    if (squash) {
-			if (p == d - 1 && *p == *d)
-			    matches--;
-			else
-			    p = d++;
+	    else if (uv == none) {	/* "none" is unmapped character */
+		if (from_utf) {
+		    if (*s < 0x80)
+			*d++ = *s++;
+		    else if (to_utf) {
+			int i;
+			for (i = UTF8SKIP(s); i; --i)
+			    *d++ = *s++;
+		    }
+		    else {
+			I32 ulen;
+			*d++ = (U8)utf8_to_uv(s, &ulen);
+			s += ulen;
 		    }
-		    else
-			d++;
 		}
-		else if (ch == -1)		/* -1 is unmapped character */
-		    *d++ = *s;		/* -2 is delete character */
-		s++;
+		else {	/* must be to_utf only */
+		    d = uv_to_utf8(d, s[-1]);
+		}
+		continue;
 	    }
-	    matches += send - d;	/* account for disappeared chars */
-	    *d = '\0';
-	    SvCUR_set(sv, d - (U8*)SvPVX(sv));
-	    SvSETMAGIC(sv);
+	    else if (uv == extra && !del) {
+		matches++;
+		if (final >= 0x80 && to_utf)
+		    d = uv_to_utf8(d, final);
+		else
+		    *d++ = (U8)final;
+		if (from_utf)
+		    s += UTF8SKIP(s);
+		continue;
+	    }
+	    matches++;		/* "none+1" is delete character */
+	    if (from_utf)
+		s += UTF8SKIP(s);
 	}
-	return matches;
+    }
+    if (dst)
+	sv_usepvn(sv, (char*)dst, d - dst);
+    else {
+	*d = '\0';
+	SvCUR_set(sv, d - (U8*)SvPVX(sv));
+    }
+    SvSETMAGIC(sv);
+
+    return matches;
+}
+
+I32
+do_trans(SV *sv)
+{
+    STRLEN len;
+
+    if (SvREADONLY(sv) && !(PL_op->op_private & OPpTRANS_IDENTICAL))
+	croak(no_modify);
+
+    (void)SvPV(sv, len);
+    if (!len)
+	return 0;
+    if (!SvPOKp(sv))
+	(void)SvPV_force(sv, len);
+    (void)SvPOK_only(sv);
+
+    DEBUG_t( deb("2.TBL\n"));
+
+    switch (PL_op->op_private & 63) {
+    case 0:
+	return do_trans_CC_simple(sv);
+
+    case OPpTRANS_FROM_UTF:
+	return do_trans_UC_simple(sv);
+
+    case OPpTRANS_TO_UTF:
+	return do_trans_CU_simple(sv);
+
+    case OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF:
+	return do_trans_UU_simple(sv);
+
+    case OPpTRANS_IDENTICAL:
+	return do_trans_CC_count(sv);
+
+    case OPpTRANS_FROM_UTF|OPpTRANS_IDENTICAL:
+	return do_trans_UC_trivial(sv);
+
+    case OPpTRANS_TO_UTF|OPpTRANS_IDENTICAL:
+	return do_trans_CU_trivial(sv);
+
+    case OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF|OPpTRANS_IDENTICAL:
+	return do_trans_UU_count(sv);
+
+    default:
+	if (PL_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
+	    return do_trans_UU_complex(sv); /* could be UC or CU too */
+	else
+	    return do_trans_CC_complex(sv);
     }
 }
 
diff --git a/op.c b/op.c
index 53fb8c1..ca89229 100644
--- a/op.c
+++ b/op.c
@@ -2156,8 +2156,17 @@ pmtrans(OP *o, OP *expr, OP *repl)
 	}
 	else if (!rlen && !del) {
 	    r = t; rlen = tlen; rend = tend;
-	    if (!squash && to_utf && from_utf)
-		o->op_private |= OPpTRANS_COUNTONLY;
+	}
+	if (!squash) {
+	    if (to_utf && from_utf) {	/* only counting characters */
+		if (t == r || (tlen == rlen && memEQ(t, r, tlen)))
+		    o->op_private |= OPpTRANS_IDENTICAL;
+	    }
+	    else {	/* straight latin-1 translation */
+		if (tlen == 4 && memEQ(t, "\0\377\303\277", 4) &&
+		    rlen == 4 && memEQ(r, "\0\377\303\277", 4))
+		    o->op_private |= OPpTRANS_IDENTICAL;
+	    }
 	}
 
 	while (t < tend || tfirst <= tlast) {
@@ -2286,7 +2295,7 @@ pmtrans(OP *o, OP *expr, OP *repl)
 	if (!rlen && !del) {
 	    r = t; rlen = tlen;
 	    if (!squash)
-		o->op_private |= OPpTRANS_COUNTONLY;
+		o->op_private |= OPpTRANS_IDENTICAL;
 	}
 	for (i = 0; i < 256; i++)
 	    tbl[i] = -1;
diff --git a/op.h b/op.h
index cbb2ac3..0b186a8 100644
--- a/op.h
+++ b/op.h
@@ -103,13 +103,15 @@ typedef U32 PADOFFSET;
 #define OPpRUNTIME		64	/* Pattern coming in on the stack */
 
 /* Private for OP_TRANS */
-#define OPpTRANS_GROWS		1
-#define OPpTRANS_FROM_UTF	2
-#define OPpTRANS_TO_UTF		4
-#define OPpTRANS_COUNTONLY	8
-#define OPpTRANS_SQUASH		16
-#define OPpTRANS_DELETE		32
-#define OPpTRANS_COMPLEMENT	64
+#define OPpTRANS_FROM_UTF	1
+#define OPpTRANS_TO_UTF		2
+#define OPpTRANS_IDENTICAL	4
+	/* When CU or UC, means straight latin-1 to utf-8 or vice versa */
+	/* Otherwise, IDENTICAL means the right side is the same as the left */
+#define OPpTRANS_SQUASH		8
+#define OPpTRANS_DELETE		16
+#define OPpTRANS_COMPLEMENT	32
+#define OPpTRANS_GROWS		64
 
 /* Private for OP_REPEAT */
 #define OPpREPEAT_DOLIST	64	/* List replication. */
diff --git a/pp.c b/pp.c
index 9c08e2e..a4f7828 100644
--- a/pp.c
+++ b/pp.c
@@ -669,7 +669,7 @@ PP(pp_trans)
 	EXTEND(SP,1);
     }
     TARG = sv_newmortal();
-    PUSHi(do_trans(sv, PL_op));
+    PUSHi(do_trans(sv));
     RETURN;
 }
 
diff --git a/proto.h b/proto.h
index 5b71f63..96bb15c 100644
--- a/proto.h
+++ b/proto.h
@@ -126,7 +126,7 @@ I32	do_shmio _((I32 optype, SV** mark, SV** sp));
 VIRTUAL void	do_sprintf _((SV* sv, I32 len, SV** sarg));
 VIRTUAL long	do_sysseek _((GV* gv, long pos, int whence));
 VIRTUAL long	do_tell _((GV* gv));
-VIRTUAL I32	do_trans _((SV* sv, OP* arg));
+VIRTUAL I32	do_trans _((SV* sv));
 VIRTUAL void	do_vecset _((SV* sv));
 VIRTUAL void	do_vop _((I32 optype, SV* sv, SV* left, SV* right));
 VIRTUAL I32	dowantarray _((void));