From: Jarkko Hietaniemi Date: Thu, 7 Mar 2002 20:54:02 +0000 (+0000) Subject: Retract #14985, #14899, and #14990, following the principles X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=cf48d248eb62e81239204ca4ca6b33029875e0bd;p=p5sagit%2Fp5-mst-13.2.git Retract #14985, #14899, and #14990, following the principles "Do no harm." and "If it ain't broke, don't fix it." Firstly, the #14985 broke badly on UTF-EBCDIC, #14990 fixed some, but still broken, and I do not have the extra brain cells for the EBCDIC backport. Secondly, the old version worked both in EBCDIC and non-. Thirdly, the old version may be more amenable for the behaviour suggsted by Anton Tagunov regarding the encoding pragma. p4raw-id: //depot/perl@15084 --- diff --git a/embed.fnc b/embed.fnc index d448387..52472a0 100644 --- a/embed.fnc +++ b/embed.fnc @@ -427,7 +427,6 @@ Ap |void |markstack_grow #if defined(USE_LOCALE_COLLATE) p |char* |mem_collxfrm |const char* s|STRLEN len|STRLEN* xlen #endif -Apd |int |memcmp_byte_utf8 |char *sbyte|STRLEN lbyte|char *sutf|STRLEN lutf Afp |SV* |mess |const char* pat|... Ap |SV* |vmess |const char* pat|va_list* args p |void |qerror |SV* err diff --git a/embed.h b/embed.h index 2c0256f..9d66677 100644 --- a/embed.h +++ b/embed.h @@ -411,7 +411,6 @@ #if defined(USE_LOCALE_COLLATE) #define mem_collxfrm Perl_mem_collxfrm #endif -#define memcmp_byte_utf8 Perl_memcmp_byte_utf8 #define mess Perl_mess #define vmess Perl_vmess #define qerror Perl_qerror @@ -1970,7 +1969,6 @@ #if defined(USE_LOCALE_COLLATE) #define mem_collxfrm(a,b,c) Perl_mem_collxfrm(aTHX_ a,b,c) #endif -#define memcmp_byte_utf8(a,b,c,d) Perl_memcmp_byte_utf8(aTHX_ a,b,c,d) #define vmess(a,b) Perl_vmess(aTHX_ a,b) #define qerror(a) Perl_qerror(aTHX_ a) #define sortsv(a,b,c) Perl_sortsv(aTHX_ a,b,c) diff --git a/global.sym b/global.sym index a477a0b..9b709ec 100644 --- a/global.sym +++ b/global.sym @@ -229,7 +229,6 @@ Perl_grok_number Perl_grok_numeric_radix Perl_grok_oct Perl_markstack_grow -Perl_memcmp_byte_utf8 Perl_mess Perl_vmess Perl_sortsv diff --git a/proto.h b/proto.h index 87ca95f..159d968 100644 --- a/proto.h +++ b/proto.h @@ -463,7 +463,6 @@ PERL_CALLCONV void Perl_markstack_grow(pTHX); #if defined(USE_LOCALE_COLLATE) PERL_CALLCONV char* Perl_mem_collxfrm(pTHX_ const char* s, STRLEN len, STRLEN* xlen); #endif -PERL_CALLCONV int Perl_memcmp_byte_utf8(pTHX_ char *sbyte, STRLEN lbyte, char *sutf, STRLEN lutf); PERL_CALLCONV SV* Perl_mess(pTHX_ const char* pat, ...) #ifdef CHECK_FORMAT __attribute__((format(printf,pTHX_1,pTHX_2))) diff --git a/sv.c b/sv.c index f893fa6..27150d6 100644 --- a/sv.c +++ b/sv.c @@ -5349,6 +5349,7 @@ Perl_sv_eq(pTHX_ register SV *sv1, register SV *sv2) char *pv2; STRLEN cur2; I32 eq = 0; + char *tpv = Nullch; if (!sv1) { pv1 = ""; @@ -5364,13 +5365,35 @@ Perl_sv_eq(pTHX_ register SV *sv1, register SV *sv2) else pv2 = SvPV(sv2, cur2); - if (SvUTF8(sv1) == SvUTF8(sv2) || IN_BYTES) - eq = (cur1 == cur2) && memEQ(pv1, pv2, cur1); - else if (SvUTF8(sv1)) /* do not utf8ize the comparands as a side-effect */ - eq = !memcmp_byte_utf8(pv2, cur2, pv1, cur1); - else - eq = !memcmp_byte_utf8(pv1, cur1, pv2, cur2); + /* do not utf8ize the comparands as a side-effect */ + if (cur1 && cur2 && SvUTF8(sv1) != SvUTF8(sv2) && !IN_BYTES) { + bool is_utf8 = TRUE; + /* UTF-8ness differs */ + + if (SvUTF8(sv1)) { + /* sv1 is the UTF-8 one , If is equal it must be downgrade-able */ + char *pv = (char*)bytes_from_utf8((U8*)pv1, &cur1, &is_utf8); + if (pv != pv1) + pv1 = tpv = pv; + } + else { + /* sv2 is the UTF-8 one , If is equal it must be downgrade-able */ + char *pv = (char *)bytes_from_utf8((U8*)pv2, &cur2, &is_utf8); + if (pv != pv2) + pv2 = tpv = pv; + } + if (is_utf8) { + /* Downgrade not possible - cannot be eq */ + return FALSE; + } + } + + if (cur1 == cur2) + eq = memEQ(pv1, pv2, cur1); + if (tpv != Nullch) + Safefree(tpv); + return eq; } @@ -5390,7 +5413,9 @@ Perl_sv_cmp(pTHX_ register SV *sv1, register SV *sv2) { STRLEN cur1, cur2; char *pv1, *pv2; - I32 retval; + I32 cmp; + bool pv1tmp = FALSE; + bool pv2tmp = FALSE; if (!sv1) { pv1 = ""; @@ -5406,28 +5431,40 @@ Perl_sv_cmp(pTHX_ register SV *sv1, register SV *sv2) else pv2 = SvPV(sv2, cur2); + /* do not utf8ize the comparands as a side-effect */ + if (cur1 && cur2 && SvUTF8(sv1) != SvUTF8(sv2) && !IN_BYTES) { + if (SvUTF8(sv1)) { + pv2 = (char*)bytes_to_utf8((U8*)pv2, &cur2); + pv2tmp = TRUE; + } + else { + pv1 = (char*)bytes_to_utf8((U8*)pv1, &cur1); + pv1tmp = TRUE; + } + } + if (!cur1) { - return cur2 ? -1 : 0; + cmp = cur2 ? -1 : 0; } else if (!cur2) { - return 1; - } else if (SvUTF8(sv1) == SvUTF8(sv2) || IN_BYTES) { - retval = memcmp((void*)pv1, (void*)pv2, cur1 < cur2 ? cur1 : cur2); + cmp = 1; + } else { + I32 retval = memcmp((void*)pv1, (void*)pv2, cur1 < cur2 ? cur1 : cur2); if (retval) { - return retval < 0 ? -1 : 1; + cmp = retval < 0 ? -1 : 1; } else if (cur1 == cur2) { - return 0; - } else { - return cur1 < cur2 ? -1 : 1; + cmp = 0; + } else { + cmp = cur1 < cur2 ? -1 : 1; } - } else if (SvUTF8(sv1)) /* do not utf8ize the comparands as a side-effect */ - retval = -memcmp_byte_utf8(pv2, cur2, pv1, cur1); - else - retval = memcmp_byte_utf8(pv1, cur1, pv2, cur2); + } - if (retval) /* CURs taken into account already */ - return retval < 0 ? -1 : 1; - return 0; + if (pv1tmp) + Safefree(pv1); + if (pv2tmp) + Safefree(pv2); + + return cmp; } /* diff --git a/util.c b/util.c index 0e5c519..303bfa4 100644 --- a/util.c +++ b/util.c @@ -4348,42 +4348,5 @@ Perl_sv_nounlocking(pTHX_ SV *sv) { } -/* -=for apidoc memcmp_byte_utf8 - -Similar to memcmp(), but the first string is with bytes, the second -with utf8. Takes into account that the lengths may be different. -=cut -*/ -int -Perl_memcmp_byte_utf8(pTHX_ char *sb, STRLEN lbyte, char *su, STRLEN lutf) -{ - U8 *sbyte = (U8*)sb; - U8 *sutf = (U8*)su; - U8 *ebyte = sbyte + lbyte; - U8 *eutf = sutf + lutf; - - while (sbyte < ebyte) { - if (sutf >= eutf) - return 1; /* utf one shorter */ - if (NATIVE_IS_INVARIANT(*sbyte)) { - if (*sbyte != *sutf) - return *sbyte - *sutf; - sbyte++; sutf++; /* CONTINUE */ - } else if ((*sutf & UTF_CONTINUATION_MASK) == - (*sbyte >> UTF_ACCUMULATION_SHIFT)) { - if ((sutf[1] & UTF_CONTINUATION_MASK) != - (*sbyte & UTF_CONTINUATION_MASK)) - return (*sbyte & UTF_CONTINUATION_MASK) - - (*sutf & UTF_CONTINUATION_MASK); - sbyte++, sutf += 2; /* CONTINUE */ - } else - return (*sbyte >> UTF_ACCUMULATION_SHIFT) - - (*sutf & UTF_CONTINUATION_MASK); - } - if (sutf >= eutf) - return 0; - return -1; /* byte one shorter */ -}