From: Karl Williamson Date: Sat, 5 Jun 2010 17:12:47 +0000 (-0600) Subject: Change name of ibcmp to foldEQ X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=e6226b18246ce7d24213c41123114ac7967ed04f;p=p5sagit%2Fp5-mst-13.2.git Change name of ibcmp to foldEQ As discussed on p5p, ibcmp has different semantics from other cmp functions in that it is a binary instead of ternary function. It is less confusing then to have a name that implies true/false. There are three functions affected: ibcmp, ibcmp_locale and ibcmp_utf8. ibcmp is actually equivalent to foldNE, but for the same reason that things like 'unless' and 'until' are cautioned against, I changed the functions to foldEQ, so that the existing names, like ibcmp_utf8 are defined as macros as being the complement of foldEQ. This patch also changes the one file where turning ibcmp into a macro causes problems. It changes it to use the new name. It also documents for the first time ibcmp, ibcmp_locale and their new names. --- diff --git a/embed.fnc b/embed.fnc index 01605a9..36d8c1a 100644 --- a/embed.fnc +++ b/embed.fnc @@ -499,9 +499,14 @@ Abmd |HE* |hv_store_ent |NULLOK HV *hv|NULLOK SV *key|NULLOK SV *val\ AbmM |SV** |hv_store_flags |NULLOK HV *hv|NULLOK const char *key \ |I32 klen|NULLOK SV *val|U32 hash|int flags Apd |void |hv_undef |NULLOK HV *hv -AnpP |I32 |ibcmp |NN const char* a|NN const char* b|I32 len -AnpP |I32 |ibcmp_locale |NN const char* a|NN const char* b|I32 len -Apd |I32 |ibcmp_utf8 |NN const char *s1|NULLOK char **pe1|UV l1 \ +Am |I32 |ibcmp |NN const char* a|NN const char* b|I32 len +AnpP |I32 |foldEQ |NN const char* a|NN const char* b|I32 len +Am |I32 |ibcmp_locale |NN const char* a|NN const char* b|I32 len +AnpP |I32 |foldEQ_locale |NN const char* a|NN const char* b|I32 len +Am |I32 |ibcmp_utf8 |NN const char *s1|NULLOK char **pe1|UV l1 \ + |bool u1|NN const char *s2|NULLOK char **pe2 \ + |UV l2|bool u2 +Apd |I32 |foldEQ_utf8 |NN const char *s1|NULLOK char **pe1|UV l1 \ |bool u1|NN const char *s2|NULLOK char **pe2 \ |UV l2|bool u2 #if defined(PERL_IN_DOIO_C) || defined(PERL_DECL_PROT) diff --git a/embed.h b/embed.h index 588c50a..80457a2 100644 --- a/embed.h +++ b/embed.h @@ -315,9 +315,9 @@ #endif #endif #define hv_undef Perl_hv_undef -#define ibcmp Perl_ibcmp -#define ibcmp_locale Perl_ibcmp_locale -#define ibcmp_utf8 Perl_ibcmp_utf8 +#define foldEQ Perl_foldEQ +#define foldEQ_locale Perl_foldEQ_locale +#define foldEQ_utf8 Perl_foldEQ_utf8 #if defined(PERL_IN_DOIO_C) || defined(PERL_DECL_PROT) #ifdef PERL_CORE #define ingroup S_ingroup @@ -2752,9 +2752,9 @@ #endif #endif #define hv_undef(a) Perl_hv_undef(aTHX_ a) -#define ibcmp Perl_ibcmp -#define ibcmp_locale Perl_ibcmp_locale -#define ibcmp_utf8(a,b,c,d,e,f,g,h) Perl_ibcmp_utf8(aTHX_ a,b,c,d,e,f,g,h) +#define foldEQ Perl_foldEQ +#define foldEQ_locale Perl_foldEQ_locale +#define foldEQ_utf8(a,b,c,d,e,f,g,h) Perl_foldEQ_utf8(aTHX_ a,b,c,d,e,f,g,h) #if defined(PERL_IN_DOIO_C) || defined(PERL_DECL_PROT) #ifdef PERL_CORE #define ingroup(a,b) S_ingroup(aTHX_ a,b) diff --git a/global.sym b/global.sym index 5ab0090..459f796 100644 --- a/global.sym +++ b/global.sym @@ -185,9 +185,9 @@ Perl_hv_store Perl_hv_store_ent Perl_hv_store_flags Perl_hv_undef -Perl_ibcmp -Perl_ibcmp_locale -Perl_ibcmp_utf8 +Perl_foldEQ +Perl_foldEQ_locale +Perl_foldEQ_utf8 Perl_init_stacks Perl_init_tm Perl_instr diff --git a/locale.c b/locale.c index 16ccce8..bd93cd6 100644 --- a/locale.c +++ b/locale.c @@ -522,23 +522,23 @@ Perl_init_i18nl10n(pTHX_ int printwarn) codeset = nl_langinfo(CODESET); #endif if (codeset) - utf8locale = (ibcmp(codeset, STR_WITH_LEN("UTF-8")) == 0 || - ibcmp(codeset, STR_WITH_LEN("UTF8") ) == 0); + utf8locale = (foldEQ(codeset, STR_WITH_LEN("UTF-8")) + || foldEQ(codeset, STR_WITH_LEN("UTF8") )); #if defined(USE_LOCALE) else { /* nl_langinfo(CODESET) is supposed to correctly * interpret the locale environment variables, * but just in case it fails, let's do this manually. */ if (lang) - utf8locale = (ibcmp(lang, STR_WITH_LEN("UTF-8")) == 0 || - ibcmp(lang, STR_WITH_LEN("UTF8") ) == 0); + utf8locale = (foldEQ(lang, STR_WITH_LEN("UTF-8")) + || foldEQ(lang, STR_WITH_LEN("UTF8") )); #ifdef USE_LOCALE_CTYPE if (curctype) - utf8locale = (ibcmp(curctype, STR_WITH_LEN("UTF-8")) == 0 || - ibcmp(curctype, STR_WITH_LEN("UTF8") ) == 0); + utf8locale = (foldEQ(curctype, STR_WITH_LEN("UTF-8")) + || foldEQ(curctype, STR_WITH_LEN("UTF8") )); #endif if (lc_all) - utf8locale = (ibcmp(lc_all, STR_WITH_LEN("UTF-8")) == 0 || - ibcmp(lc_all, STR_WITH_LEN("UTF8") ) == 0); + utf8locale = (foldEQ(lc_all, STR_WITH_LEN("UTF-8")) + || foldEQ(lc_all, STR_WITH_LEN("UTF8") )); } #endif /* USE_LOCALE */ if (utf8locale) diff --git a/proto.h b/proto.h index 9ce57d5..271107f 100644 --- a/proto.h +++ b/proto.h @@ -1109,24 +1109,36 @@ STATIC struct refcounted_he * S_refcounted_he_new_common(pTHX_ struct refcounted /* PERL_CALLCONV HE* Perl_hv_store_ent(pTHX_ HV *hv, SV *key, SV *val, U32 hash); */ /* PERL_CALLCONV SV** Perl_hv_store_flags(pTHX_ HV *hv, const char *key, I32 klen, SV *val, U32 hash, int flags); */ PERL_CALLCONV void Perl_hv_undef(pTHX_ HV *hv); -PERL_CALLCONV I32 Perl_ibcmp(const char* a, const char* b, I32 len) +/* PERL_CALLCONV I32 ibcmp(pTHX_ const char* a, const char* b, I32 len) + __attribute__nonnull__(pTHX_1) + __attribute__nonnull__(pTHX_2); */ + +PERL_CALLCONV I32 Perl_foldEQ(const char* a, const char* b, I32 len) __attribute__pure__ __attribute__nonnull__(1) __attribute__nonnull__(2); -#define PERL_ARGS_ASSERT_IBCMP \ +#define PERL_ARGS_ASSERT_FOLDEQ \ assert(a); assert(b) -PERL_CALLCONV I32 Perl_ibcmp_locale(const char* a, const char* b, I32 len) +/* PERL_CALLCONV I32 ibcmp_locale(pTHX_ const char* a, const char* b, I32 len) + __attribute__nonnull__(pTHX_1) + __attribute__nonnull__(pTHX_2); */ + +PERL_CALLCONV I32 Perl_foldEQ_locale(const char* a, const char* b, I32 len) __attribute__pure__ __attribute__nonnull__(1) __attribute__nonnull__(2); -#define PERL_ARGS_ASSERT_IBCMP_LOCALE \ +#define PERL_ARGS_ASSERT_FOLDEQ_LOCALE \ assert(a); assert(b) -PERL_CALLCONV I32 Perl_ibcmp_utf8(pTHX_ const char *s1, char **pe1, UV l1, bool u1, const char *s2, char **pe2, UV l2, bool u2) +/* PERL_CALLCONV I32 ibcmp_utf8(pTHX_ const char *s1, char **pe1, UV l1, bool u1, const char *s2, char **pe2, UV l2, bool u2) + __attribute__nonnull__(pTHX_1) + __attribute__nonnull__(pTHX_5); */ + +PERL_CALLCONV I32 Perl_foldEQ_utf8(pTHX_ const char *s1, char **pe1, UV l1, bool u1, const char *s2, char **pe2, UV l2, bool u2) __attribute__nonnull__(pTHX_1) __attribute__nonnull__(pTHX_5); -#define PERL_ARGS_ASSERT_IBCMP_UTF8 \ +#define PERL_ARGS_ASSERT_FOLDEQ_UTF8 \ assert(s1); assert(s2) #if defined(PERL_IN_DOIO_C) || defined(PERL_DECL_PROT) diff --git a/utf8.c b/utf8.c index d0be794..1f4192f 100644 --- a/utf8.c +++ b/utf8.c @@ -2502,10 +2502,10 @@ Perl_sv_uni_display(pTHX_ SV *dsv, SV *ssv, STRLEN pvlim, UV flags) } /* -=for apidoc ibcmp_utf8 +=for apidoc foldEQ_utf8 Returns true if the leading portions of the strings s1 and s2 (either or both -of which may be in UTF-8) differ case-insensitively; false otherwise. +of which may be in UTF-8) are the same case-insensitively; false otherwise. How far into the strings to compare is determined by other input parameters. If u1 is true, the string s1 is assumed to be in UTF-8-encoded Unicode; @@ -2531,7 +2531,7 @@ reached for a successful match. Also, if the fold of a character is multiple characters, all of them must be matched (see tr21 reference below for 'folding'). -Upon a successful match (when the routine returns false), if pe1 is non-NULL, +Upon a successful match, if pe1 is non-NULL, it will be set to point to the beginning of the I character of s1 beyond what was matched. Correspondingly for pe2 and s2. @@ -2541,7 +2541,7 @@ http://www.unicode.org/unicode/reports/tr21/ (Case Mappings). =cut */ I32 -Perl_ibcmp_utf8(pTHX_ const char *s1, char **pe1, register UV l1, bool u1, const char *s2, char **pe2, register UV l2, bool u2) +Perl_foldEQ_utf8(pTHX_ const char *s1, char **pe1, register UV l1, bool u1, const char *s2, char **pe2, register UV l2, bool u2) { dVAR; register const U8 *p1 = (const U8*)s1; /* Point to current char */ @@ -2558,7 +2558,7 @@ Perl_ibcmp_utf8(pTHX_ const char *s1, char **pe1, register UV l1, bool u1, const U8 natbuf[2]; /* Holds native 8-bit char converted to utf8; these always fit in 2 bytes */ - PERL_ARGS_ASSERT_IBCMP_UTF8; + PERL_ARGS_ASSERT_FOLDEQ_UTF8; if (pe1) { e1 = *(U8**)pe1; @@ -2634,7 +2634,7 @@ Perl_ibcmp_utf8(pTHX_ const char *s1, char **pe1, register UV l1, bool u1, const character */ || memNE((char*)f1, (char*)f2, fold_length)) { - return 1; /* mismatch */ + return 0; /* mismatch */ } /* Here, they matched, advance past them */ @@ -2658,7 +2658,7 @@ Perl_ibcmp_utf8(pTHX_ const char *s1, char **pe1, register UV l1, bool u1, const * character (which can happen when the fold of a character is more than one * character). */ if (! ((g1 == 0 || p1 == g1) && (g2 == 0 || p2 == g2)) || n1 || n2) { - return 1; + return 0; } /* Successful match. Set output pointers */ @@ -2668,7 +2668,7 @@ Perl_ibcmp_utf8(pTHX_ const char *s1, char **pe1, register UV l1, bool u1, const if (pe2) { *pe2 = (char*)p2; } - return 0; + return 1; } /* diff --git a/utf8.h b/utf8.h index b0cfedf..8e6d4e0 100644 --- a/utf8.h +++ b/utf8.h @@ -20,6 +20,16 @@ #define uvuni_to_utf8(d, uv) uvuni_to_utf8_flags(d, uv, 0) #define is_utf8_string_loc(s, len, ep) is_utf8_string_loclen(s, len, ep, 0) +/* +=for apidoc ibcmp_utf8 + +This is a synonym for (! foldEQ_utf8()) + +=cut +*/ +#define ibcmp_utf8(s1, pe1, l1, u1, s2, pe2, l2, u2) \ + cBOOL(! foldEQ_utf8(s1, pe1, l1, u1, s2, pe2, l2, u2)) + #ifdef EBCDIC /* The equivalent of these macros but implementing UTF-EBCDIC are in the following header file: diff --git a/util.c b/util.c index 75c4808..d5f0f1f 100644 --- a/util.c +++ b/util.c @@ -878,37 +878,58 @@ Perl_screaminstr(pTHX_ SV *bigstr, SV *littlestr, I32 start_shift, I32 end_shift return NULL; } +/* +=for apidoc foldEQ + +Returns true if the leading len bytes of the strings s1 and s2 are the same +case-insensitively; false otherwise. Uppercase and lowercase ASCII range bytes +match themselves and their opposite case counterparts. Non-cased and non-ASCII +range bytes match only themselves. + +=cut +*/ + + I32 -Perl_ibcmp(const char *s1, const char *s2, register I32 len) +Perl_foldEQ(const char *s1, const char *s2, register I32 len) { register const U8 *a = (const U8 *)s1; register const U8 *b = (const U8 *)s2; - PERL_ARGS_ASSERT_IBCMP; + PERL_ARGS_ASSERT_FOLDEQ; while (len--) { if (*a != *b && *a != PL_fold[*b]) - return 1; + return 0; a++,b++; } - return 0; + return 1; } +/* +=for apidoc foldEQ_locale + +Returns true if the leading len bytes of the strings s1 and s2 are the same +case-insensitively in the current locale; false otherwise. + +=cut +*/ + I32 -Perl_ibcmp_locale(const char *s1, const char *s2, register I32 len) +Perl_foldEQ_locale(const char *s1, const char *s2, register I32 len) { dVAR; register const U8 *a = (const U8 *)s1; register const U8 *b = (const U8 *)s2; - PERL_ARGS_ASSERT_IBCMP_LOCALE; + PERL_ARGS_ASSERT_FOLDEQ_LOCALE; while (len--) { if (*a != *b && *a != PL_fold_locale[*b]) - return 1; + return 0; a++,b++; } - return 0; + return 1; } /* copy a string to a safe spot */ diff --git a/util.h b/util.h index 6eab055..3981656 100644 --- a/util.h +++ b/util.h @@ -39,6 +39,21 @@ #endif /* VMS */ /* +=for apidoc ibcmp + +This is a synonym for (! foldEQ()) + +=for apidoc ibcmp_locale + +This is a synonym for (! foldEQ_locale()) + +=cut +*/ +#define ibcmp(s1, s2, len) cBOOL(! foldEQ(s1, s2, len)) +#define ibcmp_locale(s1, s2, len) cBOOL(! foldEQ_locale(s1, s2, len)) + + +/* * Local variables: * c-indentation-style: bsd * c-basic-offset: 4