print "tera\n" if ord(pack("C", 0xdf)) == 0x3af;
- # but pack/unpack are not affected, in case you still
+ # ... as are eq and cmp ...
+
+ print "peta\n" if "\x{3af}" eq pack("C", 0xdf);
+ print "exa\n" if "\x{3af}" cmp pack("C", 0xdf) == 0;
+
+ # ... but pack/unpack C are not affected, in case you still
# want back to your native encoding
- print "peta\n" if unpack("C", (pack("C", 0xdf))) == 0xdf;
+ print "zetta\n" if unpack("C", (pack("C", 0xdf))) == 0xdf;
=head1 DESCRIPTION
-print "1..19\n";
-
BEGIN {
if (ord("A") == 193) {
print "1..0 # encoding pragma does not support EBCDIC platforms\n";
}
}
+print "1..23\n";
+
use encoding "latin1"; # ignored (overwritten by the next line)
use encoding "greek"; # iso 8859-7 (no "latin" alias, surprise...)
print "not " unless "\x{3AF}" =~ /\x{3AF}/;
print "ok 19\n";
+# eq, cmp
+
+my $byte=pack("C*", 0xDF);
+
+print "not " unless pack("U*", 0x3AF) eq $byte;
+print "ok 20\n";
+
+print "not " if chr(0xDF) cmp $byte;
+print "ok 21\n";
+
+print "not " unless ((pack("U*", 0x3B0) cmp $byte) == 1) &&
+ ((pack("U*", 0x3AE) cmp $byte) == -1) &&
+ ((pack("U*", 0x3AF, 0x20) cmp $byte) == 1) &&
+ ((pack("U*", 0x3AF) cmp pack("C*",0xDF,0x20))==-1);
+print "ok 22\n";
+
+# Used to core dump in 5.7.3
+print ord undef == 0 ? "ok 23\n" : "not ok 23\n";
}
if (PL_encoding)
- Perl_sv_recode_to_utf8(aTHX_ sv, PL_encoding);
+ sv_recode_to_utf8(sv, PL_encoding);
else { /* Assume Latin-1/EBCDIC */
/* This function could be much more efficient if we
* had a FLAG in SVs to signal if there are any hibit
char *pv2;
STRLEN cur2;
I32 eq = 0;
- char *tpv = Nullch;
+ char *tpv1 = Nullch;
+ char *tpv2 = Nullch;
+ SV* sv1recode = Nullsv;
+ SV* sv2recode = Nullsv;
if (!sv1) {
pv1 = "";
else
pv2 = SvPV(sv2, cur2);
- /* do not utf8ize the comparands as a side-effect */
if (cur1 && cur2 && SvUTF8(sv1) != SvUTF8(sv2) && !IN_BYTES) {
- bool is_utf8 = TRUE;
- /* UTF-8ness differs */
-
- if (SvUTF8(sv1)) {
- /* sv1 is the UTF-8 one , If is equal it must be downgrade-able */
- char *pv = (char*)bytes_from_utf8((U8*)pv1, &cur1, &is_utf8);
- if (pv != pv1)
- pv1 = tpv = pv;
- }
- else {
- /* sv2 is the UTF-8 one , If is equal it must be downgrade-able */
- char *pv = (char *)bytes_from_utf8((U8*)pv2, &cur2, &is_utf8);
- if (pv != pv2)
- pv2 = tpv = pv;
- }
- if (is_utf8) {
- /* Downgrade not possible - cannot be eq */
- return FALSE;
- }
+ /* Differing utf8ness.
+ * Do not UTF8size the comparands as a side-effect. */
+ if (PL_encoding) {
+ if (SvUTF8(sv1)) {
+ sv2recode = newSVpvn(pv2, cur2);
+ sv_recode_to_utf8(sv2recode, PL_encoding);
+ pv2 = SvPV(sv2recode, cur2);
+ }
+ else {
+ sv1recode = newSVpvn(pv1, cur1);
+ sv_recode_to_utf8(sv1recode, PL_encoding);
+ pv2 = SvPV(sv1recode, cur1);
+ }
+ /* Now both are in UTF-8. */
+ if (cur1 != cur2)
+ return FALSE;
+ }
+ else {
+ bool is_utf8 = TRUE;
+
+ if (SvUTF8(sv1)) {
+ /* sv1 is the UTF-8 one,
+ * if is equal it must be downgrade-able */
+ char *pv = (char*)bytes_from_utf8((U8*)pv1,
+ &cur1, &is_utf8);
+ if (pv != pv1)
+ pv1 = tpv1 = pv;
+ }
+ else {
+ /* sv2 is the UTF-8 one,
+ * if is equal it must be downgrade-able */
+ char *pv = (char *)bytes_from_utf8((U8*)pv2,
+ &cur2, &is_utf8);
+ if (pv != pv2)
+ pv2 = tpv2 = pv;
+ }
+ if (is_utf8) {
+ /* Downgrade not possible - cannot be eq */
+ return FALSE;
+ }
+ }
}
if (cur1 == cur2)
eq = memEQ(pv1, pv2, cur1);
- if (tpv != Nullch)
- Safefree(tpv);
+ if (sv1recode)
+ SvREFCNT_dec(sv1recode);
+ if (sv2recode)
+ SvREFCNT_dec(sv2recode);
+
+ if (tpv1)
+ Safefree(tpv1);
+ if (tpv2)
+ Safefree(tpv2);
return eq;
}
I32 cmp;
bool pv1tmp = FALSE;
bool pv2tmp = FALSE;
+ SV *sv1recode = Nullsv;
+ SV *sv2recode = Nullsv;
if (!sv1) {
pv1 = "";
else
pv2 = SvPV(sv2, cur2);
- /* do not utf8ize the comparands as a side-effect */
if (cur1 && cur2 && SvUTF8(sv1) != SvUTF8(sv2) && !IN_BYTES) {
+ /* Differing utf8ness.
+ * Do not UTF8size the comparands as a side-effect. */
if (SvUTF8(sv1)) {
- pv2 = (char*)bytes_to_utf8((U8*)pv2, &cur2);
- pv2tmp = TRUE;
+ if (PL_encoding) {
+ sv2recode = newSVpvn(pv2, cur2);
+ sv_recode_to_utf8(sv2recode, PL_encoding);
+ pv2 = SvPV(sv2recode, cur2);
+ }
+ else {
+ pv2 = (char*)bytes_to_utf8((U8*)pv2, &cur2);
+ pv2tmp = TRUE;
+ }
}
else {
- pv1 = (char*)bytes_to_utf8((U8*)pv1, &cur1);
- pv1tmp = TRUE;
+ if (PL_encoding) {
+ sv1recode = newSVpvn(pv1, cur1);
+ sv_recode_to_utf8(sv1recode, PL_encoding);
+ pv1 = SvPV(sv1recode, cur1);
+ }
+ else {
+ pv1 = (char*)bytes_to_utf8((U8*)pv1, &cur1);
+ pv1tmp = TRUE;
+ }
}
}
}
}
+ if (sv1recode)
+ SvREFCNT_dec(sv1recode);
+ if (sv2recode)
+ SvREFCNT_dec(sv2recode);
+
if (pv1tmp)
Safefree(pv1);
if (pv2tmp)