}
}
-print "1..23\n";
+print "1..29\n";
use encoding "latin1"; # ignored (overwritten by the next line)
use encoding "greek"; # iso 8859-7 (no "latin" alias, surprise...)
# eq, cmp
+my ($byte,$bytes,$U,$Ub,$g1,$g2,$l) = (
+ pack("C*", 0xDF ), # byte
+ pack("C*", 0xDF, 0x20), # ($bytes2 cmp $U) > 0
+ pack("U*", 0x3AF), # $U eq $byte
+ pack("U*", 0xDF ), # $Ub would eq $bytev w/o use encoding
+ pack("U*", 0x3B1), # ($g1 cmp $byte) > 0; === chr(0xe1)
+ pack("U*", 0x3AF, 0x20), # ($g2 cmp $byte) > 0;
+ pack("U*", 0x3AB), # ($l cmp $byte) < 0; === chr(0xdb)
+);
+
+# all the tests in this section that compare a byte encoded string
+# ato UTF-8 encoded are run in all possible vairants
+# all of the eq, ne, cmp operations tested,
+# $v z $u tested as well as $u z $v
+
+sub alleq($$){
+ my ($a,$b) = (shift, shift);
+ $a eq $b && $b eq $a &&
+ !( $a ne $b ) && !( $b ne $a ) &&
+ ( $a cmp $b ) == 0 && ( $b cmp $a ) == 0;
+}
+
+sub anyeq($$){
+ my ($a,$b) = (shift, shift);
+ $a eq $b || $b eq $a ||
+ !( $a ne $b ) || !( $b ne $a ) ||
+ ( $a cmp $b ) == 0 || ( $b cmp $a ) == 0;
+}
+
+sub allgt($$){
+ my ($a,$b) = (shift, shift);
+ ( $a cmp $b ) == 1 && ( $b cmp $a ) == -1;
+}
+#match the correct UTF-8 string
+print "not " unless alleq($byte, $U);
+print "ok 20\n";
+
+#do not match a wrong UTF-8 string
+print "not " if anyeq($byte, $Ub);
+print "ok 21\n";
+
+#string ordering
+print "not " unless allgt ( $g1, $byte ) &&
+ allgt ( $g2, $byte ) &&
+ allgt ( $byte, $l ) &&
+ allgt ( $bytes, $U );
+print "ok 22\n";
+
+# upgrade, downgrade
+
+my ($u,$v,$v2);
+$u = $v = $v2 = pack("C*", 0xDF);
+utf8::upgrade($v); #explicit upgrade
+$v2 = substr( $v2."\x{410}", 0, -1); #implicit upgrade
+
+# implicit upgrade === explicit upgrade
+print "not " if do{{use bytes; $v ne $v2}} || $v ne $v2;
+print "ok 23\n";
+
+# utf8::upgrade is transparent and does not break equality
+print "not " unless alleq( $u, $v );
+print "ok 24\n";
+
+$u = $v = pack("C*", 0xDF);
+utf8::upgrade($v);
+#test for a roundtrip, we should get back from where we left
+eval {utf8::downgrade( $v )};
+print "not " if $@ !~ /^Wide / || do{{use bytes; $u eq $v}} || $u ne $v;
+print "ok 25\n";
+
+# some more eq, cmp
+
my $byte=pack("C*", 0xDF);
print "not " unless pack("U*", 0x3AF) eq $byte;
-print "ok 20\n";
+print "ok 26\n";
print "not " if chr(0xDF) cmp $byte;
-print "ok 21\n";
+print "ok 27\n";
print "not " unless ((pack("U*", 0x3B0) cmp $byte) == 1) &&
((pack("U*", 0x3AE) cmp $byte) == -1) &&
((pack("U*", 0x3AF, 0x20) cmp $byte) == 1) &&
((pack("U*", 0x3AF) cmp pack("C*",0xDF,0x20))==-1);
-print "ok 22\n";
+print "ok 28\n";
# Used to core dump in 5.7.3
-print ord undef == 0 ? "ok 23\n" : "not ok 23\n";
+print ord undef == 0 ? "ok 29\n" : "not ok 29\n";
char *pv2;
STRLEN cur2;
I32 eq = 0;
- char *tpv1 = Nullch;
- char *tpv2 = Nullch;
- SV* sv1recode = Nullsv;
- SV* sv2recode = Nullsv;
+ char *tpv = Nullch;
+ SV* svrecode = Nullsv;
if (!sv1) {
pv1 = "";
* Do not UTF8size the comparands as a side-effect. */
if (PL_encoding) {
if (SvUTF8(sv1)) {
- sv2recode = newSVpvn(pv2, cur2);
- sv_recode_to_utf8(sv2recode, PL_encoding);
- pv2 = SvPV(sv2recode, cur2);
+ svrecode = newSVpvn(pv2, cur2);
+ sv_recode_to_utf8(svrecode, PL_encoding);
+ pv2 = SvPV(svrecode, cur2);
}
else {
- sv1recode = newSVpvn(pv1, cur1);
- sv_recode_to_utf8(sv1recode, PL_encoding);
- pv2 = SvPV(sv1recode, cur1);
+ svrecode = newSVpvn(pv1, cur1);
+ sv_recode_to_utf8(svrecode, PL_encoding);
+ pv1 = SvPV(svrecode, cur1);
}
/* Now both are in UTF-8. */
if (cur1 != cur2)
char *pv = (char*)bytes_from_utf8((U8*)pv1,
&cur1, &is_utf8);
if (pv != pv1)
- pv1 = tpv1 = pv;
+ pv1 = tpv = pv;
}
else {
/* sv2 is the UTF-8 one,
char *pv = (char *)bytes_from_utf8((U8*)pv2,
&cur2, &is_utf8);
if (pv != pv2)
- pv2 = tpv2 = pv;
+ pv2 = tpv = pv;
}
if (is_utf8) {
/* Downgrade not possible - cannot be eq */
if (cur1 == cur2)
eq = memEQ(pv1, pv2, cur1);
- if (sv1recode)
- SvREFCNT_dec(sv1recode);
- if (sv2recode)
- SvREFCNT_dec(sv2recode);
+ if (svrecode)
+ SvREFCNT_dec(svrecode);
- if (tpv1)
- Safefree(tpv1);
- if (tpv2)
- Safefree(tpv2);
+ if (tpv)
+ Safefree(tpv);
return eq;
}
Perl_sv_cmp(pTHX_ register SV *sv1, register SV *sv2)
{
STRLEN cur1, cur2;
- char *pv1, *pv2;
+ char *pv1, *pv2, *tpv = Nullch;
I32 cmp;
- bool pv1tmp = FALSE;
- bool pv2tmp = FALSE;
- SV *sv1recode = Nullsv;
- SV *sv2recode = Nullsv;
+ SV *svrecode = Nullsv;
if (!sv1) {
pv1 = "";
else
pv1 = SvPV(sv1, cur1);
- if (!sv2){
+ if (!sv2) {
pv2 = "";
cur2 = 0;
}
* Do not UTF8size the comparands as a side-effect. */
if (SvUTF8(sv1)) {
if (PL_encoding) {
- sv2recode = newSVpvn(pv2, cur2);
- sv_recode_to_utf8(sv2recode, PL_encoding);
- pv2 = SvPV(sv2recode, cur2);
+ svrecode = newSVpvn(pv2, cur2);
+ sv_recode_to_utf8(svrecode, PL_encoding);
+ pv2 = SvPV(svrecode, cur2);
}
else {
- pv2 = (char*)bytes_to_utf8((U8*)pv2, &cur2);
- pv2tmp = TRUE;
+ pv2 = tpv = (char*)bytes_to_utf8((U8*)pv2, &cur2);
}
}
else {
if (PL_encoding) {
- sv1recode = newSVpvn(pv1, cur1);
- sv_recode_to_utf8(sv1recode, PL_encoding);
- pv1 = SvPV(sv1recode, cur1);
+ svrecode = newSVpvn(pv1, cur1);
+ sv_recode_to_utf8(svrecode, PL_encoding);
+ pv1 = SvPV(svrecode, cur1);
}
else {
- pv1 = (char*)bytes_to_utf8((U8*)pv1, &cur1);
- pv1tmp = TRUE;
+ pv1 = tpv = (char*)bytes_to_utf8((U8*)pv1, &cur1);
}
}
}
}
}
- if (sv1recode)
- SvREFCNT_dec(sv1recode);
- if (sv2recode)
- SvREFCNT_dec(sv2recode);
+ if (svrecode)
+ SvREFCNT_dec(svrecode);
- if (pv1tmp)
- Safefree(pv1);
- if (pv2tmp)
- Safefree(pv2);
+ if (tpv)
+ Safefree(tpv);
return cmp;
}