From: Ilya Zakharevich Date: Mon, 4 Mar 2002 02:31:04 +0000 (-0500) Subject: sv_cmp and friends X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=0d3b7757875e39a336d967574233c80ebdc2f8b6;p=p5sagit%2Fp5-mst-13.2.git sv_cmp and friends Message-Id: <20020304023103.A14140@math.ohio-state.edu> p4raw-link: @14577 on //depot/perl: 0ad5258ff3f3328f321188cbb4fcd6a74b365431 p4raw-id: //depot/perl@14985 --- diff --git a/embed.fnc b/embed.fnc index a94654f..2853071 100644 --- a/embed.fnc +++ b/embed.fnc @@ -426,6 +426,7 @@ Ap |void |markstack_grow #if defined(USE_LOCALE_COLLATE) p |char* |mem_collxfrm |const char* s|STRLEN len|STRLEN* xlen #endif +Apd |int |memcmp_byte_utf8 |char *sbyte|STRLEN lbyte|char *sutf|STRLEN lutf Afp |SV* |mess |const char* pat|... Ap |SV* |vmess |const char* pat|va_list* args p |void |qerror |SV* err diff --git a/sv.c b/sv.c index 27150d6..f893fa6 100644 --- a/sv.c +++ b/sv.c @@ -5349,7 +5349,6 @@ Perl_sv_eq(pTHX_ register SV *sv1, register SV *sv2) char *pv2; STRLEN cur2; I32 eq = 0; - char *tpv = Nullch; if (!sv1) { pv1 = ""; @@ -5365,35 +5364,13 @@ Perl_sv_eq(pTHX_ register SV *sv1, register SV *sv2) else pv2 = SvPV(sv2, cur2); - /* do not utf8ize the comparands as a side-effect */ - if (cur1 && cur2 && SvUTF8(sv1) != SvUTF8(sv2) && !IN_BYTES) { - bool is_utf8 = TRUE; - /* UTF-8ness differs */ - - if (SvUTF8(sv1)) { - /* sv1 is the UTF-8 one , If is equal it must be downgrade-able */ - char *pv = (char*)bytes_from_utf8((U8*)pv1, &cur1, &is_utf8); - if (pv != pv1) - pv1 = tpv = pv; - } - else { - /* sv2 is the UTF-8 one , If is equal it must be downgrade-able */ - char *pv = (char *)bytes_from_utf8((U8*)pv2, &cur2, &is_utf8); - if (pv != pv2) - pv2 = tpv = pv; - } - if (is_utf8) { - /* Downgrade not possible - cannot be eq */ - return FALSE; - } - } - - if (cur1 == cur2) - eq = memEQ(pv1, pv2, cur1); + if (SvUTF8(sv1) == SvUTF8(sv2) || IN_BYTES) + eq = (cur1 == cur2) && memEQ(pv1, pv2, cur1); + else if (SvUTF8(sv1)) /* do not utf8ize the comparands as a side-effect */ + eq = !memcmp_byte_utf8(pv2, cur2, pv1, cur1); + else + eq = !memcmp_byte_utf8(pv1, cur1, pv2, cur2); - if (tpv != Nullch) - Safefree(tpv); - return eq; } @@ -5413,9 +5390,7 @@ Perl_sv_cmp(pTHX_ register SV *sv1, register SV *sv2) { STRLEN cur1, cur2; char *pv1, *pv2; - I32 cmp; - bool pv1tmp = FALSE; - bool pv2tmp = FALSE; + I32 retval; if (!sv1) { pv1 = ""; @@ -5431,40 +5406,28 @@ Perl_sv_cmp(pTHX_ register SV *sv1, register SV *sv2) else pv2 = SvPV(sv2, cur2); - /* do not utf8ize the comparands as a side-effect */ - if (cur1 && cur2 && SvUTF8(sv1) != SvUTF8(sv2) && !IN_BYTES) { - if (SvUTF8(sv1)) { - pv2 = (char*)bytes_to_utf8((U8*)pv2, &cur2); - pv2tmp = TRUE; - } - else { - pv1 = (char*)bytes_to_utf8((U8*)pv1, &cur1); - pv1tmp = TRUE; - } - } - if (!cur1) { - cmp = cur2 ? -1 : 0; + return cur2 ? -1 : 0; } else if (!cur2) { - cmp = 1; - } else { - I32 retval = memcmp((void*)pv1, (void*)pv2, cur1 < cur2 ? cur1 : cur2); + return 1; + } else if (SvUTF8(sv1) == SvUTF8(sv2) || IN_BYTES) { + retval = memcmp((void*)pv1, (void*)pv2, cur1 < cur2 ? cur1 : cur2); if (retval) { - cmp = retval < 0 ? -1 : 1; + return retval < 0 ? -1 : 1; } else if (cur1 == cur2) { - cmp = 0; - } else { - cmp = cur1 < cur2 ? -1 : 1; + return 0; + } else { + return cur1 < cur2 ? -1 : 1; } - } - - if (pv1tmp) - Safefree(pv1); - if (pv2tmp) - Safefree(pv2); + } else if (SvUTF8(sv1)) /* do not utf8ize the comparands as a side-effect */ + retval = -memcmp_byte_utf8(pv2, cur2, pv1, cur1); + else + retval = memcmp_byte_utf8(pv1, cur1, pv2, cur2); - return cmp; + if (retval) /* CURs taken into account already */ + return retval < 0 ? -1 : 1; + return 0; } /* diff --git a/util.c b/util.c index a9f9ade..4dc8676 100644 --- a/util.c +++ b/util.c @@ -4346,5 +4346,38 @@ Perl_sv_nounlocking(pTHX_ SV *sv) { } +/* +=for apidoc memcmp_byte_utf8 + +Similar to memcmp(), but the first string is with bytes, the second +with utf8. Takes into account that the lengths may be different. +=cut +*/ +int +Perl_memcmp_byte_utf8(pTHX_ char *sb, STRLEN lbyte, char *su, STRLEN lutf) +{ + U8 *sbyte = (U8*)sb; + U8 *sutf = (U8*)su; + U8 *ebyte = sbyte + lbyte; + U8 *eutf = sutf + lutf; + + while (sbyte < ebyte) { + if (sutf >= eutf) + return 1; /* utf one shorter */ + if (*sbyte < 128) { + if (*sbyte != *sutf) + return *sbyte - *sutf; + sbyte++; sutf++; /* CONTINUE */ + } else if ((*sutf & 0x3F) == (*sbyte >> 6)) { /* byte 0xFF: 0xC3 BF */ + if ((sutf[1] & 0x3F) != (*sbyte & 0x3F)) + return (*sbyte & 0x3F) - (*sutf & 0x3F); + sbyte++, sutf += 2; /* CONTINUE */ + } else + return (*sbyte >> 6) - (*sutf & 0x3F); + } + if (sutf >= eutf) + return 0; + return -1; /* byte one shorter */ +}