From: Jarkko Hietaniemi <jhi@iki.fi>
Date: Thu, 7 Mar 2002 20:54:02 +0000 (+0000)
Subject: Retract #14985, #14899, and #14990, following the principles
X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=cf48d248eb62e81239204ca4ca6b33029875e0bd;p=p5sagit%2Fp5-mst-13.2.git

Retract #14985, #14899, and #14990, following the principles
"Do no harm." and "If it ain't broke, don't fix it."

Firstly, the #14985 broke badly on UTF-EBCDIC, #14990 fixed
some, but still broken, and I do not have the extra brain
cells for the EBCDIC backport.  Secondly, the old version
worked both in EBCDIC and non-.  Thirdly, the old version
may be more amenable for the behaviour suggsted by Anton
Tagunov regarding the encoding pragma.

p4raw-id: //depot/perl@15084
---

diff --git a/embed.fnc b/embed.fnc
index d448387..52472a0 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -427,7 +427,6 @@ Ap	|void	|markstack_grow
 #if defined(USE_LOCALE_COLLATE)
 p	|char*	|mem_collxfrm	|const char* s|STRLEN len|STRLEN* xlen
 #endif
-Apd	|int	|memcmp_byte_utf8	|char *sbyte|STRLEN lbyte|char *sutf|STRLEN lutf
 Afp	|SV*	|mess		|const char* pat|...
 Ap	|SV*	|vmess		|const char* pat|va_list* args
 p	|void	|qerror		|SV* err
diff --git a/embed.h b/embed.h
index 2c0256f..9d66677 100644
--- a/embed.h
+++ b/embed.h
@@ -411,7 +411,6 @@
 #if defined(USE_LOCALE_COLLATE)
 #define mem_collxfrm		Perl_mem_collxfrm
 #endif
-#define memcmp_byte_utf8	Perl_memcmp_byte_utf8
 #define mess			Perl_mess
 #define vmess			Perl_vmess
 #define qerror			Perl_qerror
@@ -1970,7 +1969,6 @@
 #if defined(USE_LOCALE_COLLATE)
 #define mem_collxfrm(a,b,c)	Perl_mem_collxfrm(aTHX_ a,b,c)
 #endif
-#define memcmp_byte_utf8(a,b,c,d)	Perl_memcmp_byte_utf8(aTHX_ a,b,c,d)
 #define vmess(a,b)		Perl_vmess(aTHX_ a,b)
 #define qerror(a)		Perl_qerror(aTHX_ a)
 #define sortsv(a,b,c)		Perl_sortsv(aTHX_ a,b,c)
diff --git a/global.sym b/global.sym
index a477a0b..9b709ec 100644
--- a/global.sym
+++ b/global.sym
@@ -229,7 +229,6 @@ Perl_grok_number
 Perl_grok_numeric_radix
 Perl_grok_oct
 Perl_markstack_grow
-Perl_memcmp_byte_utf8
 Perl_mess
 Perl_vmess
 Perl_sortsv
diff --git a/proto.h b/proto.h
index 87ca95f..159d968 100644
--- a/proto.h
+++ b/proto.h
@@ -463,7 +463,6 @@ PERL_CALLCONV void	Perl_markstack_grow(pTHX);
 #if defined(USE_LOCALE_COLLATE)
 PERL_CALLCONV char*	Perl_mem_collxfrm(pTHX_ const char* s, STRLEN len, STRLEN* xlen);
 #endif
-PERL_CALLCONV int	Perl_memcmp_byte_utf8(pTHX_ char *sbyte, STRLEN lbyte, char *sutf, STRLEN lutf);
 PERL_CALLCONV SV*	Perl_mess(pTHX_ const char* pat, ...)
 #ifdef CHECK_FORMAT
  __attribute__((format(printf,pTHX_1,pTHX_2)))
diff --git a/sv.c b/sv.c
index f893fa6..27150d6 100644
--- a/sv.c
+++ b/sv.c
@@ -5349,6 +5349,7 @@ Perl_sv_eq(pTHX_ register SV *sv1, register SV *sv2)
     char *pv2;
     STRLEN cur2;
     I32  eq     = 0;
+    char *tpv   = Nullch;
 
     if (!sv1) {
 	pv1 = "";
@@ -5364,13 +5365,35 @@ Perl_sv_eq(pTHX_ register SV *sv1, register SV *sv2)
     else
 	pv2 = SvPV(sv2, cur2);
 
-    if (SvUTF8(sv1) == SvUTF8(sv2) || IN_BYTES)
-	eq = (cur1 == cur2) && memEQ(pv1, pv2, cur1);
-    else if (SvUTF8(sv1)) /* do not utf8ize the comparands as a side-effect */
-	eq = !memcmp_byte_utf8(pv2, cur2, pv1, cur1);
-    else
-	eq = !memcmp_byte_utf8(pv1, cur1, pv2, cur2);
+    /* do not utf8ize the comparands as a side-effect */
+    if (cur1 && cur2 && SvUTF8(sv1) != SvUTF8(sv2) && !IN_BYTES) {
+	bool is_utf8 = TRUE;
+        /* UTF-8ness differs */
+
+	if (SvUTF8(sv1)) {
+	    /* sv1 is the UTF-8 one , If is equal it must be downgrade-able */
+	    char *pv = (char*)bytes_from_utf8((U8*)pv1, &cur1, &is_utf8);
+	    if (pv != pv1)
+		pv1 = tpv = pv;
+	}
+	else {
+	    /* sv2 is the UTF-8 one , If is equal it must be downgrade-able */
+	    char *pv = (char *)bytes_from_utf8((U8*)pv2, &cur2, &is_utf8);
+	    if (pv != pv2)
+		pv2 = tpv = pv;
+	}
+	if (is_utf8) {
+	    /* Downgrade not possible - cannot be eq */
+	    return FALSE;
+	}
+    }
+
+    if (cur1 == cur2)
+	eq = memEQ(pv1, pv2, cur1);
 	
+    if (tpv != Nullch)
+	Safefree(tpv);
+
     return eq;
 }
 
@@ -5390,7 +5413,9 @@ Perl_sv_cmp(pTHX_ register SV *sv1, register SV *sv2)
 {
     STRLEN cur1, cur2;
     char *pv1, *pv2;
-    I32  retval;
+    I32  cmp;
+    bool pv1tmp = FALSE;
+    bool pv2tmp = FALSE;
 
     if (!sv1) {
 	pv1 = "";
@@ -5406,28 +5431,40 @@ Perl_sv_cmp(pTHX_ register SV *sv1, register SV *sv2)
     else
 	pv2 = SvPV(sv2, cur2);
 
+    /* do not utf8ize the comparands as a side-effect */
+    if (cur1 && cur2 && SvUTF8(sv1) != SvUTF8(sv2) && !IN_BYTES) {
+	if (SvUTF8(sv1)) {
+	    pv2 = (char*)bytes_to_utf8((U8*)pv2, &cur2);
+	    pv2tmp = TRUE;
+	}
+	else {
+	    pv1 = (char*)bytes_to_utf8((U8*)pv1, &cur1);
+	    pv1tmp = TRUE;
+	}
+    }
+
     if (!cur1) {
-	return cur2 ? -1 : 0;
+	cmp = cur2 ? -1 : 0;
     } else if (!cur2) {
-	return 1;
-    } else if (SvUTF8(sv1) == SvUTF8(sv2) || IN_BYTES) {
-	retval = memcmp((void*)pv1, (void*)pv2, cur1 < cur2 ? cur1 : cur2);
+	cmp = 1;
+    } else {
+	I32 retval = memcmp((void*)pv1, (void*)pv2, cur1 < cur2 ? cur1 : cur2);
 
 	if (retval) {
-	    return retval < 0 ? -1 : 1;
+	    cmp = retval < 0 ? -1 : 1;
 	} else if (cur1 == cur2) {
-	    return 0;
-	} else {
-	    return cur1 < cur2 ? -1 : 1;
+	    cmp = 0;
+        } else {
+	    cmp = cur1 < cur2 ? -1 : 1;
 	}
-    } else if (SvUTF8(sv1)) /* do not utf8ize the comparands as a side-effect */
-	retval = -memcmp_byte_utf8(pv2, cur2, pv1, cur1);
-    else
-	retval = memcmp_byte_utf8(pv1, cur1, pv2, cur2);
+    }
 
-    if (retval)				/* CURs taken into account already */
-	return retval < 0 ? -1 : 1;
-    return 0;
+    if (pv1tmp)
+	Safefree(pv1);
+    if (pv2tmp)
+	Safefree(pv2);
+
+    return cmp;
 }
 
 /*
diff --git a/util.c b/util.c
index 0e5c519..303bfa4 100644
--- a/util.c
+++ b/util.c
@@ -4348,42 +4348,5 @@ Perl_sv_nounlocking(pTHX_ SV *sv)
 {
 }
 
-/*
-=for apidoc memcmp_byte_utf8
-
-Similar to memcmp(), but the first string is with bytes, the second
-with utf8.  Takes into account that the lengths may be different.
 
-=cut
-*/
 
-int
-Perl_memcmp_byte_utf8(pTHX_ char *sb, STRLEN lbyte, char *su, STRLEN lutf)
-{
-    U8 *sbyte = (U8*)sb;
-    U8 *sutf  = (U8*)su;
-    U8 *ebyte = sbyte + lbyte;
-    U8 *eutf  = sutf  + lutf;
-
-    while (sbyte < ebyte) {
-	if (sutf >= eutf)
-	    return 1;			/* utf one shorter */
-	if (NATIVE_IS_INVARIANT(*sbyte)) {
-	    if (*sbyte != *sutf)
-		return *sbyte - *sutf;
-	    sbyte++; sutf++;	/* CONTINUE */
-	} else if ((*sutf & UTF_CONTINUATION_MASK) ==
-                   (*sbyte >> UTF_ACCUMULATION_SHIFT)) {
-	    if ((sutf[1] & UTF_CONTINUATION_MASK) !=
-                (*sbyte & UTF_CONTINUATION_MASK))
-		return (*sbyte & UTF_CONTINUATION_MASK) -
-                       (*sutf & UTF_CONTINUATION_MASK);
-	    sbyte++, sutf += 2;	/* CONTINUE */
-	} else
-	    return (*sbyte >> UTF_ACCUMULATION_SHIFT) -
-                   (*sutf & UTF_CONTINUATION_MASK);
-    }
-    if (sutf >= eutf)
-	return 0;
-    return -1;				/* byte one shorter */
-}