From: Jarkko Hietaniemi Date: Fri, 2 Nov 2001 05:18:45 +0000 (+0000) Subject: Unicode: add ToFold mapping. Not used yet; but basically X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=b4e400f9f0f3998e635cdce0c2d2e790cbe42caa;p=p5sagit%2Fp5-mst-13.2.git Unicode: add ToFold mapping. Not used yet; but basically a more useful mapping for caseless aka case-ignoring than doing either lc($a) eq lc($b) or uc($a) eq uc($b); the full algorithm for creating the foldings uses equivalence classes, see http://www.unicode.org/unicode/reports/tr21/ Hopefully this feature will be used in //i. (The folding tables were introduced by #12689.) p4raw-id: //depot/perl@12807 --- diff --git a/embed.h b/embed.h index 44ce7a6..bef032d 100644 --- a/embed.h +++ b/embed.h @@ -725,6 +725,7 @@ #define to_utf8_lower Perl_to_utf8_lower #define to_utf8_upper Perl_to_utf8_upper #define to_utf8_title Perl_to_utf8_title +#define to_utf8_fold Perl_to_utf8_fold #if defined(UNLINK_ALL_VERSIONS) #define unlnk Perl_unlnk #endif @@ -2238,6 +2239,7 @@ #define to_utf8_lower(a,b,c) Perl_to_utf8_lower(aTHX_ a,b,c) #define to_utf8_upper(a,b,c) Perl_to_utf8_upper(aTHX_ a,b,c) #define to_utf8_title(a,b,c) Perl_to_utf8_title(aTHX_ a,b,c) +#define to_utf8_fold(a,b,c) Perl_to_utf8_fold(aTHX_ a,b,c) #if defined(UNLINK_ALL_VERSIONS) #define unlnk(a) Perl_unlnk(aTHX_ a) #endif diff --git a/embed.pl b/embed.pl index f772608..42b714b 100755 --- a/embed.pl +++ b/embed.pl @@ -1824,6 +1824,7 @@ Ap |UV |to_utf8_case |U8 *p|U8* ustrp|STRLEN *lenp \ Ap |UV |to_utf8_lower |U8 *p|U8* ustrp|STRLEN *lenp Ap |UV |to_utf8_upper |U8 *p|U8* ustrp|STRLEN *lenp Ap |UV |to_utf8_title |U8 *p|U8* ustrp|STRLEN *lenp +Ap |UV |to_utf8_fold |U8 *p|U8* ustrp|STRLEN *lenp #if defined(UNLINK_ALL_VERSIONS) Ap |I32 |unlnk |char* f #endif diff --git a/embedvar.h b/embedvar.h index 95550e6..7748218 100644 --- a/embedvar.h +++ b/embedvar.h @@ -418,6 +418,7 @@ #define PL_utf8_print (PERL_GET_INTERP->Iutf8_print) #define PL_utf8_punct (PERL_GET_INTERP->Iutf8_punct) #define PL_utf8_space (PERL_GET_INTERP->Iutf8_space) +#define PL_utf8_tofold (PERL_GET_INTERP->Iutf8_tofold) #define PL_utf8_tolower (PERL_GET_INTERP->Iutf8_tolower) #define PL_utf8_totitle (PERL_GET_INTERP->Iutf8_totitle) #define PL_utf8_toupper (PERL_GET_INTERP->Iutf8_toupper) @@ -710,6 +711,7 @@ #define PL_utf8_print (vTHX->Iutf8_print) #define PL_utf8_punct (vTHX->Iutf8_punct) #define PL_utf8_space (vTHX->Iutf8_space) +#define PL_utf8_tofold (vTHX->Iutf8_tofold) #define PL_utf8_tolower (vTHX->Iutf8_tolower) #define PL_utf8_totitle (vTHX->Iutf8_totitle) #define PL_utf8_toupper (vTHX->Iutf8_toupper) @@ -1005,6 +1007,7 @@ #define PL_Iutf8_print PL_utf8_print #define PL_Iutf8_punct PL_utf8_punct #define PL_Iutf8_space PL_utf8_space +#define PL_Iutf8_tofold PL_utf8_tofold #define PL_Iutf8_tolower PL_utf8_tolower #define PL_Iutf8_totitle PL_utf8_totitle #define PL_Iutf8_toupper PL_utf8_toupper diff --git a/intrpvar.h b/intrpvar.h index 63c9397..2d47c8b 100644 --- a/intrpvar.h +++ b/intrpvar.h @@ -385,6 +385,7 @@ PERLVAR(Iutf8_mark, SV *) PERLVAR(Iutf8_toupper, SV *) PERLVAR(Iutf8_totitle, SV *) PERLVAR(Iutf8_tolower, SV *) +PERLVAR(Iutf8_tofold, SV *) PERLVAR(Ilast_swash_hv, HV *) PERLVAR(Ilast_swash_klen, U32) PERLVARA(Ilast_swash_key,10, U8) diff --git a/perl.c b/perl.c index 7192122..73212c0 100644 --- a/perl.c +++ b/perl.c @@ -666,6 +666,7 @@ perl_destruct(pTHXx) SvREFCNT_dec(PL_utf8_toupper); SvREFCNT_dec(PL_utf8_totitle); SvREFCNT_dec(PL_utf8_tolower); + SvREFCNT_dec(PL_utf8_tofold); PL_utf8_alnum = Nullsv; PL_utf8_alnumc = Nullsv; PL_utf8_ascii = Nullsv; @@ -683,6 +684,7 @@ perl_destruct(pTHXx) PL_utf8_toupper = Nullsv; PL_utf8_totitle = Nullsv; PL_utf8_tolower = Nullsv; + PL_utf8_tofold = Nullsv; if (!specialWARN(PL_compiling.cop_warnings)) SvREFCNT_dec(PL_compiling.cop_warnings); diff --git a/perlapi.h b/perlapi.h index 0592374..c247aae 100644 --- a/perlapi.h +++ b/perlapi.h @@ -573,6 +573,8 @@ END_EXTERN_C #define PL_utf8_punct (*Perl_Iutf8_punct_ptr(aTHX)) #undef PL_utf8_space #define PL_utf8_space (*Perl_Iutf8_space_ptr(aTHX)) +#undef PL_utf8_tofold +#define PL_utf8_tofold (*Perl_Iutf8_tofold_ptr(aTHX)) #undef PL_utf8_tolower #define PL_utf8_tolower (*Perl_Iutf8_tolower_ptr(aTHX)) #undef PL_utf8_totitle diff --git a/sv.c b/sv.c index afd2aad..2da1291 100644 --- a/sv.c +++ b/sv.c @@ -10096,6 +10096,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, PL_utf8_toupper = sv_dup_inc(proto_perl->Iutf8_toupper, param); PL_utf8_totitle = sv_dup_inc(proto_perl->Iutf8_totitle, param); PL_utf8_tolower = sv_dup_inc(proto_perl->Iutf8_tolower, param); + PL_utf8_tofold = sv_dup_inc(proto_perl->Iutf8_tofold, param); /* swatch cache */ PL_last_swash_hv = Nullhv; /* reinits on demand */ diff --git a/utf8.c b/utf8.c index ac90a38..768db07 100644 --- a/utf8.c +++ b/utf8.c @@ -1223,21 +1223,28 @@ UV Perl_to_utf8_upper(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp) { return Perl_to_utf8_case(aTHX_ p, ustrp, lenp, - &PL_utf8_toupper, "ToUpper", "utf8::ToSpecUpper"); + &PL_utf8_toupper, "ToUpper", "utf8::ToSpecUpper"); } UV Perl_to_utf8_title(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp) { return Perl_to_utf8_case(aTHX_ p, ustrp, lenp, - &PL_utf8_totitle, "ToTitle", "utf8::ToSpecTitle"); + &PL_utf8_totitle, "ToTitle", "utf8::ToSpecTitle"); } UV Perl_to_utf8_lower(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp) { return Perl_to_utf8_case(aTHX_ p, ustrp, lenp, - &PL_utf8_tolower, "ToLower", "utf8::ToSpecLower"); + &PL_utf8_tolower, "ToLower", "utf8::ToSpecLower"); +} + +UV +Perl_to_utf8_fold(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp) +{ + return Perl_to_utf8_case(aTHX_ p, ustrp, lenp, + &PL_utf8_tofold, "ToFold", "utf8::ToSpecFold"); } /* a "swash" is a swatch hash */