#if defined(USE_LOCALE_COLLATE)
p |char* |mem_collxfrm |const char* s|STRLEN len|STRLEN* xlen
#endif
-Apd |int |memcmp_byte_utf8 |char *sbyte|STRLEN lbyte|char *sutf|STRLEN lutf
Afp |SV* |mess |const char* pat|...
Ap |SV* |vmess |const char* pat|va_list* args
p |void |qerror |SV* err
#if defined(USE_LOCALE_COLLATE)
#define mem_collxfrm Perl_mem_collxfrm
#endif
-#define memcmp_byte_utf8 Perl_memcmp_byte_utf8
#define mess Perl_mess
#define vmess Perl_vmess
#define qerror Perl_qerror
#if defined(USE_LOCALE_COLLATE)
#define mem_collxfrm(a,b,c) Perl_mem_collxfrm(aTHX_ a,b,c)
#endif
-#define memcmp_byte_utf8(a,b,c,d) Perl_memcmp_byte_utf8(aTHX_ a,b,c,d)
#define vmess(a,b) Perl_vmess(aTHX_ a,b)
#define qerror(a) Perl_qerror(aTHX_ a)
#define sortsv(a,b,c) Perl_sortsv(aTHX_ a,b,c)
Perl_grok_numeric_radix
Perl_grok_oct
Perl_markstack_grow
-Perl_memcmp_byte_utf8
Perl_mess
Perl_vmess
Perl_sortsv
#if defined(USE_LOCALE_COLLATE)
PERL_CALLCONV char* Perl_mem_collxfrm(pTHX_ const char* s, STRLEN len, STRLEN* xlen);
#endif
-PERL_CALLCONV int Perl_memcmp_byte_utf8(pTHX_ char *sbyte, STRLEN lbyte, char *sutf, STRLEN lutf);
PERL_CALLCONV SV* Perl_mess(pTHX_ const char* pat, ...)
#ifdef CHECK_FORMAT
__attribute__((format(printf,pTHX_1,pTHX_2)))
char *pv2;
STRLEN cur2;
I32 eq = 0;
+ char *tpv = Nullch;
if (!sv1) {
pv1 = "";
else
pv2 = SvPV(sv2, cur2);
- if (SvUTF8(sv1) == SvUTF8(sv2) || IN_BYTES)
- eq = (cur1 == cur2) && memEQ(pv1, pv2, cur1);
- else if (SvUTF8(sv1)) /* do not utf8ize the comparands as a side-effect */
- eq = !memcmp_byte_utf8(pv2, cur2, pv1, cur1);
- else
- eq = !memcmp_byte_utf8(pv1, cur1, pv2, cur2);
+ /* do not utf8ize the comparands as a side-effect */
+ if (cur1 && cur2 && SvUTF8(sv1) != SvUTF8(sv2) && !IN_BYTES) {
+ bool is_utf8 = TRUE;
+ /* UTF-8ness differs */
+
+ if (SvUTF8(sv1)) {
+ /* sv1 is the UTF-8 one , If is equal it must be downgrade-able */
+ char *pv = (char*)bytes_from_utf8((U8*)pv1, &cur1, &is_utf8);
+ if (pv != pv1)
+ pv1 = tpv = pv;
+ }
+ else {
+ /* sv2 is the UTF-8 one , If is equal it must be downgrade-able */
+ char *pv = (char *)bytes_from_utf8((U8*)pv2, &cur2, &is_utf8);
+ if (pv != pv2)
+ pv2 = tpv = pv;
+ }
+ if (is_utf8) {
+ /* Downgrade not possible - cannot be eq */
+ return FALSE;
+ }
+ }
+
+ if (cur1 == cur2)
+ eq = memEQ(pv1, pv2, cur1);
+ if (tpv != Nullch)
+ Safefree(tpv);
+
return eq;
}
{
STRLEN cur1, cur2;
char *pv1, *pv2;
- I32 retval;
+ I32 cmp;
+ bool pv1tmp = FALSE;
+ bool pv2tmp = FALSE;
if (!sv1) {
pv1 = "";
else
pv2 = SvPV(sv2, cur2);
+ /* do not utf8ize the comparands as a side-effect */
+ if (cur1 && cur2 && SvUTF8(sv1) != SvUTF8(sv2) && !IN_BYTES) {
+ if (SvUTF8(sv1)) {
+ pv2 = (char*)bytes_to_utf8((U8*)pv2, &cur2);
+ pv2tmp = TRUE;
+ }
+ else {
+ pv1 = (char*)bytes_to_utf8((U8*)pv1, &cur1);
+ pv1tmp = TRUE;
+ }
+ }
+
if (!cur1) {
- return cur2 ? -1 : 0;
+ cmp = cur2 ? -1 : 0;
} else if (!cur2) {
- return 1;
- } else if (SvUTF8(sv1) == SvUTF8(sv2) || IN_BYTES) {
- retval = memcmp((void*)pv1, (void*)pv2, cur1 < cur2 ? cur1 : cur2);
+ cmp = 1;
+ } else {
+ I32 retval = memcmp((void*)pv1, (void*)pv2, cur1 < cur2 ? cur1 : cur2);
if (retval) {
- return retval < 0 ? -1 : 1;
+ cmp = retval < 0 ? -1 : 1;
} else if (cur1 == cur2) {
- return 0;
- } else {
- return cur1 < cur2 ? -1 : 1;
+ cmp = 0;
+ } else {
+ cmp = cur1 < cur2 ? -1 : 1;
}
- } else if (SvUTF8(sv1)) /* do not utf8ize the comparands as a side-effect */
- retval = -memcmp_byte_utf8(pv2, cur2, pv1, cur1);
- else
- retval = memcmp_byte_utf8(pv1, cur1, pv2, cur2);
+ }
- if (retval) /* CURs taken into account already */
- return retval < 0 ? -1 : 1;
- return 0;
+ if (pv1tmp)
+ Safefree(pv1);
+ if (pv2tmp)
+ Safefree(pv2);
+
+ return cmp;
}
/*
{
}
-/*
-=for apidoc memcmp_byte_utf8
-
-Similar to memcmp(), but the first string is with bytes, the second
-with utf8. Takes into account that the lengths may be different.
-=cut
-*/
-int
-Perl_memcmp_byte_utf8(pTHX_ char *sb, STRLEN lbyte, char *su, STRLEN lutf)
-{
- U8 *sbyte = (U8*)sb;
- U8 *sutf = (U8*)su;
- U8 *ebyte = sbyte + lbyte;
- U8 *eutf = sutf + lutf;
-
- while (sbyte < ebyte) {
- if (sutf >= eutf)
- return 1; /* utf one shorter */
- if (NATIVE_IS_INVARIANT(*sbyte)) {
- if (*sbyte != *sutf)
- return *sbyte - *sutf;
- sbyte++; sutf++; /* CONTINUE */
- } else if ((*sutf & UTF_CONTINUATION_MASK) ==
- (*sbyte >> UTF_ACCUMULATION_SHIFT)) {
- if ((sutf[1] & UTF_CONTINUATION_MASK) !=
- (*sbyte & UTF_CONTINUATION_MASK))
- return (*sbyte & UTF_CONTINUATION_MASK) -
- (*sutf & UTF_CONTINUATION_MASK);
- sbyte++, sutf += 2; /* CONTINUE */
- } else
- return (*sbyte >> UTF_ACCUMULATION_SHIFT) -
- (*sutf & UTF_CONTINUATION_MASK);
- }
- if (sutf >= eutf)
- return 0;
- return -1; /* byte one shorter */
-}