From: Anton Tagunov <tagunov@motor.ru>
Date: Fri, 8 Mar 2002 04:31:44 +0000 (+0300)
Subject: Re[2]: [ID 20020307.006] [BUG][use encoding] use encoding 'greek'; print "not" unless... 
X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=553e1bccd86c250b3dbeaa468c16f0244c6db836;p=p5sagit%2Fp5-mst-13.2.git

Re[2]: [ID 20020307.006] [BUG][use encoding] use encoding 'greek'; print "not" unless pack("U*", 0x3af) eq pack("C*", 0xdf) prints "not"!
Message-ID: <13946506773.20020308043144@newmail.ru>

(one test changed)

p4raw-id: //depot/perl@15088
---

diff --git a/lib/encoding.t b/lib/encoding.t
index 6a50c03..aaec973 100644
--- a/lib/encoding.t
+++ b/lib/encoding.t
@@ -5,7 +5,7 @@ BEGIN {
     }
 }
 
-print "1..23\n";
+print "1..29\n";
 
 use encoding "latin1"; # ignored (overwritten by the next line)
 use encoding "greek";  # iso 8859-7 (no "latin" alias, surprise...)
@@ -91,19 +91,91 @@ print "ok 19\n";
 
 # eq, cmp
 
+my ($byte,$bytes,$U,$Ub,$g1,$g2,$l) = ( 
+    pack("C*", 0xDF ),       # byte
+    pack("C*", 0xDF, 0x20),  # ($bytes2 cmp $U) > 0
+    pack("U*", 0x3AF),       # $U eq $byte
+    pack("U*", 0xDF ),       # $Ub would eq $bytev w/o use encoding
+    pack("U*", 0x3B1),       # ($g1 cmp $byte) > 0; === chr(0xe1)
+    pack("U*", 0x3AF, 0x20), # ($g2 cmp $byte) > 0;
+    pack("U*", 0x3AB),       # ($l  cmp $byte) < 0; === chr(0xdb)
+);
+
+# all the tests in this section that compare a byte encoded string 
+# ato UTF-8 encoded are run in all possible vairants 
+# all of the eq, ne, cmp operations tested,
+# $v z $u tested as well as $u z $v
+
+sub alleq($$){
+    my ($a,$b)    =    (shift, shift);
+     $a  eq  $b        &&     $b  eq  $a         && 
+  !( $a  ne  $b )      &&  !( $b  ne  $a )       &&
+   ( $a  cmp $b ) == 0 &&   ( $b  cmp $a ) == 0;
+}
+   
+sub anyeq($$){
+    my ($a,$b)    =    (shift, shift);
+     $a  eq  $b        ||     $b  eq  $a         ||
+  !( $a  ne  $b )      ||  !( $b  ne  $a )       ||
+   ( $a  cmp $b ) == 0 ||   ( $b  cmp $a ) == 0;
+}
+
+sub allgt($$){
+    my ($a,$b)    =    (shift, shift);
+    ( $a cmp $b ) == 1 && ( $b cmp $a ) == -1;
+}
+#match the correct UTF-8 string
+print "not " unless  alleq($byte, $U);
+print "ok 20\n";
+
+#do not match a wrong UTF-8 string
+print "not " if anyeq($byte, $Ub);
+print "ok 21\n";
+
+#string ordering
+print "not " unless allgt ( $g1,    $byte  )  &&
+                    allgt ( $g2,    $byte  )  &&
+                    allgt ( $byte,  $l     )  &&
+                    allgt ( $bytes, $U     );
+print "ok 22\n";
+
+# upgrade, downgrade
+
+my ($u,$v,$v2);
+$u = $v = $v2 = pack("C*", 0xDF);
+utf8::upgrade($v);                   #explicit upgrade
+$v2 = substr( $v2."\x{410}", 0, -1); #implicit upgrade
+
+# implicit upgrade === explicit upgrade
+print "not "  if do{{use bytes; $v ne $v2}} || $v ne $v2;
+print "ok 23\n";
+
+# utf8::upgrade is transparent and does not break equality
+print "not " unless alleq( $u, $v );
+print "ok 24\n";
+
+$u = $v = pack("C*", 0xDF);
+utf8::upgrade($v);
+#test for a roundtrip, we should get back from where we left
+eval {utf8::downgrade( $v )};
+print "not " if $@ !~ /^Wide / || do{{use bytes; $u eq $v}} || $u ne $v;
+print "ok 25\n";
+
+# some more eq, cmp
+
 my $byte=pack("C*", 0xDF);
 
 print "not " unless pack("U*", 0x3AF) eq $byte;
-print "ok 20\n";
+print "ok 26\n";
 
 print "not " if chr(0xDF) cmp $byte;
-print "ok 21\n";
+print "ok 27\n";
 
 print "not " unless ((pack("U*", 0x3B0)       cmp $byte) ==  1) &&
                     ((pack("U*", 0x3AE)       cmp $byte) == -1) &&
                     ((pack("U*", 0x3AF, 0x20) cmp $byte) ==  1) &&
 	            ((pack("U*", 0x3AF) cmp pack("C*",0xDF,0x20))==-1);
-print "ok 22\n";
+print "ok 28\n";
 
 # Used to core dump in 5.7.3
-print ord undef == 0 ? "ok 23\n" : "not ok 23\n";
+print ord undef == 0 ? "ok 29\n" : "not ok 29\n";
diff --git a/sv.c b/sv.c
index 2dfc8d4..799ffab 100644
--- a/sv.c
+++ b/sv.c
@@ -5349,10 +5349,8 @@ Perl_sv_eq(pTHX_ register SV *sv1, register SV *sv2)
     char *pv2;
     STRLEN cur2;
     I32  eq     = 0;
-    char *tpv1  = Nullch;
-    char *tpv2  = Nullch;
-    SV* sv1recode = Nullsv;
-    SV* sv2recode = Nullsv;
+    char *tpv   = Nullch;
+    SV* svrecode = Nullsv;
 
     if (!sv1) {
 	pv1 = "";
@@ -5373,14 +5371,14 @@ Perl_sv_eq(pTHX_ register SV *sv1, register SV *sv2)
 	 * Do not UTF8size the comparands as a side-effect. */
 	 if (PL_encoding) {
 	      if (SvUTF8(sv1)) {
-		   sv2recode = newSVpvn(pv2, cur2);
-		   sv_recode_to_utf8(sv2recode, PL_encoding);
-		   pv2 = SvPV(sv2recode, cur2);
+		   svrecode = newSVpvn(pv2, cur2);
+		   sv_recode_to_utf8(svrecode, PL_encoding);
+		   pv2 = SvPV(svrecode, cur2);
 	      }
 	      else {
-		   sv1recode = newSVpvn(pv1, cur1);
-		   sv_recode_to_utf8(sv1recode, PL_encoding);
-		   pv2 = SvPV(sv1recode, cur1);
+		   svrecode = newSVpvn(pv1, cur1);
+		   sv_recode_to_utf8(svrecode, PL_encoding);
+		   pv1 = SvPV(svrecode, cur1);
 	      }
 	      /* Now both are in UTF-8. */
 	      if (cur1 != cur2)
@@ -5395,7 +5393,7 @@ Perl_sv_eq(pTHX_ register SV *sv1, register SV *sv2)
 		   char *pv = (char*)bytes_from_utf8((U8*)pv1,
 						     &cur1, &is_utf8);
 		   if (pv != pv1)
-			pv1 = tpv1 = pv;
+			pv1 = tpv = pv;
 	      }
 	      else {
 		   /* sv2 is the UTF-8 one,
@@ -5403,7 +5401,7 @@ Perl_sv_eq(pTHX_ register SV *sv1, register SV *sv2)
 		   char *pv = (char *)bytes_from_utf8((U8*)pv2,
 						      &cur2, &is_utf8);
 		   if (pv != pv2)
-			pv2 = tpv2 = pv;
+			pv2 = tpv = pv;
 	      }
 	      if (is_utf8) {
 		   /* Downgrade not possible - cannot be eq */
@@ -5415,15 +5413,11 @@ Perl_sv_eq(pTHX_ register SV *sv1, register SV *sv2)
     if (cur1 == cur2)
 	eq = memEQ(pv1, pv2, cur1);
 	
-    if (sv1recode)
-	 SvREFCNT_dec(sv1recode);
-    if (sv2recode)
-	 SvREFCNT_dec(sv2recode);
+    if (svrecode)
+	 SvREFCNT_dec(svrecode);
 
-    if (tpv1)
-	Safefree(tpv1);
-    if (tpv2)
-	Safefree(tpv2);
+    if (tpv)
+	Safefree(tpv);
 
     return eq;
 }
@@ -5443,12 +5437,9 @@ I32
 Perl_sv_cmp(pTHX_ register SV *sv1, register SV *sv2)
 {
     STRLEN cur1, cur2;
-    char *pv1, *pv2;
+    char *pv1, *pv2, *tpv = Nullch;
     I32  cmp;
-    bool pv1tmp = FALSE;
-    bool pv2tmp = FALSE;
-    SV *sv1recode = Nullsv;
-    SV *sv2recode = Nullsv;
+    SV *svrecode = Nullsv;
 
     if (!sv1) {
 	pv1 = "";
@@ -5457,7 +5448,7 @@ Perl_sv_cmp(pTHX_ register SV *sv1, register SV *sv2)
     else
 	pv1 = SvPV(sv1, cur1);
 
-    if (!sv2){
+    if (!sv2) {
 	pv2 = "";
 	cur2 = 0;
     }
@@ -5469,24 +5460,22 @@ Perl_sv_cmp(pTHX_ register SV *sv1, register SV *sv2)
 	 * Do not UTF8size the comparands as a side-effect. */
 	if (SvUTF8(sv1)) {
 	    if (PL_encoding) {
-		 sv2recode = newSVpvn(pv2, cur2);
-		 sv_recode_to_utf8(sv2recode, PL_encoding);
-		 pv2 = SvPV(sv2recode, cur2);
+		 svrecode = newSVpvn(pv2, cur2);
+		 sv_recode_to_utf8(svrecode, PL_encoding);
+		 pv2 = SvPV(svrecode, cur2);
 	    }
 	    else {
-		 pv2 = (char*)bytes_to_utf8((U8*)pv2, &cur2);
-		 pv2tmp = TRUE;
+		 pv2 = tpv = (char*)bytes_to_utf8((U8*)pv2, &cur2);
 	    }
 	}
 	else {
 	    if (PL_encoding) {
-		 sv1recode = newSVpvn(pv1, cur1);
-		 sv_recode_to_utf8(sv1recode, PL_encoding);
-		 pv1 = SvPV(sv1recode, cur1);
+		 svrecode = newSVpvn(pv1, cur1);
+		 sv_recode_to_utf8(svrecode, PL_encoding);
+		 pv1 = SvPV(svrecode, cur1);
 	    }
 	    else {
-		 pv1 = (char*)bytes_to_utf8((U8*)pv1, &cur1);
-		 pv1tmp = TRUE;
+		 pv1 = tpv = (char*)bytes_to_utf8((U8*)pv1, &cur1);
 	    }
 	}
     }
@@ -5507,15 +5496,11 @@ Perl_sv_cmp(pTHX_ register SV *sv1, register SV *sv2)
 	}
     }
 
-    if (sv1recode)
-	 SvREFCNT_dec(sv1recode);
-    if (sv2recode)
-	 SvREFCNT_dec(sv2recode);
+    if (svrecode)
+	 SvREFCNT_dec(svrecode);
 
-    if (pv1tmp)
-	Safefree(pv1);
-    if (pv2tmp)
-	Safefree(pv2);
+    if (tpv)
+	Safefree(tpv);
 
     return cmp;
 }