From: Anton Tagunov Date: Fri, 8 Mar 2002 04:31:44 +0000 (+0300) Subject: Re[2]: [ID 20020307.006] [BUG][use encoding] use encoding 'greek'; print "not" unless... X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=553e1bccd86c250b3dbeaa468c16f0244c6db836;p=p5sagit%2Fp5-mst-13.2.git Re[2]: [ID 20020307.006] [BUG][use encoding] use encoding 'greek'; print "not" unless pack("U*", 0x3af) eq pack("C*", 0xdf) prints "not"! Message-ID: <13946506773.20020308043144@newmail.ru> (one test changed) p4raw-id: //depot/perl@15088 --- diff --git a/lib/encoding.t b/lib/encoding.t index 6a50c03..aaec973 100644 --- a/lib/encoding.t +++ b/lib/encoding.t @@ -5,7 +5,7 @@ BEGIN { } } -print "1..23\n"; +print "1..29\n"; use encoding "latin1"; # ignored (overwritten by the next line) use encoding "greek"; # iso 8859-7 (no "latin" alias, surprise...) @@ -91,19 +91,91 @@ print "ok 19\n"; # eq, cmp +my ($byte,$bytes,$U,$Ub,$g1,$g2,$l) = ( + pack("C*", 0xDF ), # byte + pack("C*", 0xDF, 0x20), # ($bytes2 cmp $U) > 0 + pack("U*", 0x3AF), # $U eq $byte + pack("U*", 0xDF ), # $Ub would eq $bytev w/o use encoding + pack("U*", 0x3B1), # ($g1 cmp $byte) > 0; === chr(0xe1) + pack("U*", 0x3AF, 0x20), # ($g2 cmp $byte) > 0; + pack("U*", 0x3AB), # ($l cmp $byte) < 0; === chr(0xdb) +); + +# all the tests in this section that compare a byte encoded string +# ato UTF-8 encoded are run in all possible vairants +# all of the eq, ne, cmp operations tested, +# $v z $u tested as well as $u z $v + +sub alleq($$){ + my ($a,$b) = (shift, shift); + $a eq $b && $b eq $a && + !( $a ne $b ) && !( $b ne $a ) && + ( $a cmp $b ) == 0 && ( $b cmp $a ) == 0; +} + +sub anyeq($$){ + my ($a,$b) = (shift, shift); + $a eq $b || $b eq $a || + !( $a ne $b ) || !( $b ne $a ) || + ( $a cmp $b ) == 0 || ( $b cmp $a ) == 0; +} + +sub allgt($$){ + my ($a,$b) = (shift, shift); + ( $a cmp $b ) == 1 && ( $b cmp $a ) == -1; +} +#match the correct UTF-8 string +print "not " unless alleq($byte, $U); +print "ok 20\n"; + +#do not match a wrong UTF-8 string +print "not " if anyeq($byte, $Ub); +print "ok 21\n"; + +#string ordering +print "not " unless allgt ( $g1, $byte ) && + allgt ( $g2, $byte ) && + allgt ( $byte, $l ) && + allgt ( $bytes, $U ); +print "ok 22\n"; + +# upgrade, downgrade + +my ($u,$v,$v2); +$u = $v = $v2 = pack("C*", 0xDF); +utf8::upgrade($v); #explicit upgrade +$v2 = substr( $v2."\x{410}", 0, -1); #implicit upgrade + +# implicit upgrade === explicit upgrade +print "not " if do{{use bytes; $v ne $v2}} || $v ne $v2; +print "ok 23\n"; + +# utf8::upgrade is transparent and does not break equality +print "not " unless alleq( $u, $v ); +print "ok 24\n"; + +$u = $v = pack("C*", 0xDF); +utf8::upgrade($v); +#test for a roundtrip, we should get back from where we left +eval {utf8::downgrade( $v )}; +print "not " if $@ !~ /^Wide / || do{{use bytes; $u eq $v}} || $u ne $v; +print "ok 25\n"; + +# some more eq, cmp + my $byte=pack("C*", 0xDF); print "not " unless pack("U*", 0x3AF) eq $byte; -print "ok 20\n"; +print "ok 26\n"; print "not " if chr(0xDF) cmp $byte; -print "ok 21\n"; +print "ok 27\n"; print "not " unless ((pack("U*", 0x3B0) cmp $byte) == 1) && ((pack("U*", 0x3AE) cmp $byte) == -1) && ((pack("U*", 0x3AF, 0x20) cmp $byte) == 1) && ((pack("U*", 0x3AF) cmp pack("C*",0xDF,0x20))==-1); -print "ok 22\n"; +print "ok 28\n"; # Used to core dump in 5.7.3 -print ord undef == 0 ? "ok 23\n" : "not ok 23\n"; +print ord undef == 0 ? "ok 29\n" : "not ok 29\n"; diff --git a/sv.c b/sv.c index 2dfc8d4..799ffab 100644 --- a/sv.c +++ b/sv.c @@ -5349,10 +5349,8 @@ Perl_sv_eq(pTHX_ register SV *sv1, register SV *sv2) char *pv2; STRLEN cur2; I32 eq = 0; - char *tpv1 = Nullch; - char *tpv2 = Nullch; - SV* sv1recode = Nullsv; - SV* sv2recode = Nullsv; + char *tpv = Nullch; + SV* svrecode = Nullsv; if (!sv1) { pv1 = ""; @@ -5373,14 +5371,14 @@ Perl_sv_eq(pTHX_ register SV *sv1, register SV *sv2) * Do not UTF8size the comparands as a side-effect. */ if (PL_encoding) { if (SvUTF8(sv1)) { - sv2recode = newSVpvn(pv2, cur2); - sv_recode_to_utf8(sv2recode, PL_encoding); - pv2 = SvPV(sv2recode, cur2); + svrecode = newSVpvn(pv2, cur2); + sv_recode_to_utf8(svrecode, PL_encoding); + pv2 = SvPV(svrecode, cur2); } else { - sv1recode = newSVpvn(pv1, cur1); - sv_recode_to_utf8(sv1recode, PL_encoding); - pv2 = SvPV(sv1recode, cur1); + svrecode = newSVpvn(pv1, cur1); + sv_recode_to_utf8(svrecode, PL_encoding); + pv1 = SvPV(svrecode, cur1); } /* Now both are in UTF-8. */ if (cur1 != cur2) @@ -5395,7 +5393,7 @@ Perl_sv_eq(pTHX_ register SV *sv1, register SV *sv2) char *pv = (char*)bytes_from_utf8((U8*)pv1, &cur1, &is_utf8); if (pv != pv1) - pv1 = tpv1 = pv; + pv1 = tpv = pv; } else { /* sv2 is the UTF-8 one, @@ -5403,7 +5401,7 @@ Perl_sv_eq(pTHX_ register SV *sv1, register SV *sv2) char *pv = (char *)bytes_from_utf8((U8*)pv2, &cur2, &is_utf8); if (pv != pv2) - pv2 = tpv2 = pv; + pv2 = tpv = pv; } if (is_utf8) { /* Downgrade not possible - cannot be eq */ @@ -5415,15 +5413,11 @@ Perl_sv_eq(pTHX_ register SV *sv1, register SV *sv2) if (cur1 == cur2) eq = memEQ(pv1, pv2, cur1); - if (sv1recode) - SvREFCNT_dec(sv1recode); - if (sv2recode) - SvREFCNT_dec(sv2recode); + if (svrecode) + SvREFCNT_dec(svrecode); - if (tpv1) - Safefree(tpv1); - if (tpv2) - Safefree(tpv2); + if (tpv) + Safefree(tpv); return eq; } @@ -5443,12 +5437,9 @@ I32 Perl_sv_cmp(pTHX_ register SV *sv1, register SV *sv2) { STRLEN cur1, cur2; - char *pv1, *pv2; + char *pv1, *pv2, *tpv = Nullch; I32 cmp; - bool pv1tmp = FALSE; - bool pv2tmp = FALSE; - SV *sv1recode = Nullsv; - SV *sv2recode = Nullsv; + SV *svrecode = Nullsv; if (!sv1) { pv1 = ""; @@ -5457,7 +5448,7 @@ Perl_sv_cmp(pTHX_ register SV *sv1, register SV *sv2) else pv1 = SvPV(sv1, cur1); - if (!sv2){ + if (!sv2) { pv2 = ""; cur2 = 0; } @@ -5469,24 +5460,22 @@ Perl_sv_cmp(pTHX_ register SV *sv1, register SV *sv2) * Do not UTF8size the comparands as a side-effect. */ if (SvUTF8(sv1)) { if (PL_encoding) { - sv2recode = newSVpvn(pv2, cur2); - sv_recode_to_utf8(sv2recode, PL_encoding); - pv2 = SvPV(sv2recode, cur2); + svrecode = newSVpvn(pv2, cur2); + sv_recode_to_utf8(svrecode, PL_encoding); + pv2 = SvPV(svrecode, cur2); } else { - pv2 = (char*)bytes_to_utf8((U8*)pv2, &cur2); - pv2tmp = TRUE; + pv2 = tpv = (char*)bytes_to_utf8((U8*)pv2, &cur2); } } else { if (PL_encoding) { - sv1recode = newSVpvn(pv1, cur1); - sv_recode_to_utf8(sv1recode, PL_encoding); - pv1 = SvPV(sv1recode, cur1); + svrecode = newSVpvn(pv1, cur1); + sv_recode_to_utf8(svrecode, PL_encoding); + pv1 = SvPV(svrecode, cur1); } else { - pv1 = (char*)bytes_to_utf8((U8*)pv1, &cur1); - pv1tmp = TRUE; + pv1 = tpv = (char*)bytes_to_utf8((U8*)pv1, &cur1); } } } @@ -5507,15 +5496,11 @@ Perl_sv_cmp(pTHX_ register SV *sv1, register SV *sv2) } } - if (sv1recode) - SvREFCNT_dec(sv1recode); - if (sv2recode) - SvREFCNT_dec(sv2recode); + if (svrecode) + SvREFCNT_dec(svrecode); - if (pv1tmp) - Safefree(pv1); - if (pv2tmp) - Safefree(pv2); + if (tpv) + Safefree(tpv); return cmp; }