From: Jarkko Hietaniemi Date: Fri, 30 Nov 2001 00:49:35 +0000 (+0000) Subject: Make to Unicode character functions to use UVs X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=84afefe65e567dcd9633a77e26cb0a95826679dd;p=p5sagit%2Fp5-mst-13.2.git Make to Unicode character functions to use UVs instead of U32s and add to_uni_fold(). p4raw-id: //depot/perl@13374 --- diff --git a/embed.pl b/embed.pl index 02327d9..8992809 100755 --- a/embed.pl +++ b/embed.pl @@ -1345,37 +1345,37 @@ p |bool |io_close |IO* io|bool not_implicit p |OP* |invert |OP* cmd dp |bool |is_gv_magical |char *name|STRLEN len|U32 flags p |I32 |is_lvalue_sub -Ap |bool |is_uni_alnum |U32 c -Ap |bool |is_uni_alnumc |U32 c -Ap |bool |is_uni_idfirst |U32 c -Ap |bool |is_uni_alpha |U32 c -Ap |bool |is_uni_ascii |U32 c -Ap |bool |is_uni_space |U32 c -Ap |bool |is_uni_cntrl |U32 c -Ap |bool |is_uni_graph |U32 c -Ap |bool |is_uni_digit |U32 c -Ap |bool |is_uni_upper |U32 c -Ap |bool |is_uni_lower |U32 c -Ap |bool |is_uni_print |U32 c -Ap |bool |is_uni_punct |U32 c -Ap |bool |is_uni_xdigit |U32 c -Ap |U32 |to_uni_upper |U32 c|U8 *p|STRLEN *lenp -Ap |U32 |to_uni_title |U32 c|U8 *p|STRLEN *lenp -Ap |U32 |to_uni_lower |U32 c|U8 *p|STRLEN *lenp -Ap |bool |is_uni_alnum_lc|U32 c -Ap |bool |is_uni_alnumc_lc|U32 c -Ap |bool |is_uni_idfirst_lc|U32 c -Ap |bool |is_uni_alpha_lc|U32 c -Ap |bool |is_uni_ascii_lc|U32 c -Ap |bool |is_uni_space_lc|U32 c -Ap |bool |is_uni_cntrl_lc|U32 c -Ap |bool |is_uni_graph_lc|U32 c -Ap |bool |is_uni_digit_lc|U32 c -Ap |bool |is_uni_upper_lc|U32 c -Ap |bool |is_uni_lower_lc|U32 c -Ap |bool |is_uni_print_lc|U32 c -Ap |bool |is_uni_punct_lc|U32 c -Ap |bool |is_uni_xdigit_lc|U32 c +Ap |bool |is_uni_alnum |UV c +Ap |bool |is_uni_alnumc |UV c +Ap |bool |is_uni_idfirst |UV c +Ap |bool |is_uni_alpha |UV c +Ap |bool |is_uni_ascii |UV c +Ap |bool |is_uni_space |UV c +Ap |bool |is_uni_cntrl |UV c +Ap |bool |is_uni_graph |UV c +Ap |bool |is_uni_digit |UV c +Ap |bool |is_uni_upper |UV c +Ap |bool |is_uni_lower |UV c +Ap |bool |is_uni_print |UV c +Ap |bool |is_uni_punct |UV c +Ap |bool |is_uni_xdigit |UV c +Ap |UV |to_uni_upper |UV c|U8 *p|STRLEN *lenp +Ap |UV |to_uni_title |UV c|U8 *p|STRLEN *lenp +Ap |UV |to_uni_lower |UV c|U8 *p|STRLEN *lenp +Ap |bool |is_uni_alnum_lc|UV c +Ap |bool |is_uni_alnumc_lc|UV c +Ap |bool |is_uni_idfirst_lc|UV c +Ap |bool |is_uni_alpha_lc|UV c +Ap |bool |is_uni_ascii_lc|UV c +Ap |bool |is_uni_space_lc|UV c +Ap |bool |is_uni_cntrl_lc|UV c +Ap |bool |is_uni_graph_lc|UV c +Ap |bool |is_uni_digit_lc|UV c +Ap |bool |is_uni_upper_lc|UV c +Ap |bool |is_uni_lower_lc|UV c +Ap |bool |is_uni_print_lc|UV c +Ap |bool |is_uni_punct_lc|UV c +Ap |bool |is_uni_xdigit_lc|UV c Apd |STRLEN |is_utf8_char |U8 *p Apd |bool |is_utf8_string |U8 *s|STRLEN len Ap |bool |is_utf8_alnum |U8 *p diff --git a/proto.h b/proto.h index 1073831..9bf7b3f 100644 --- a/proto.h +++ b/proto.h @@ -327,37 +327,37 @@ PERL_CALLCONV bool Perl_io_close(pTHX_ IO* io, bool not_implicit); PERL_CALLCONV OP* Perl_invert(pTHX_ OP* cmd); PERL_CALLCONV bool Perl_is_gv_magical(pTHX_ char *name, STRLEN len, U32 flags); PERL_CALLCONV I32 Perl_is_lvalue_sub(pTHX); -PERL_CALLCONV bool Perl_is_uni_alnum(pTHX_ U32 c); -PERL_CALLCONV bool Perl_is_uni_alnumc(pTHX_ U32 c); -PERL_CALLCONV bool Perl_is_uni_idfirst(pTHX_ U32 c); -PERL_CALLCONV bool Perl_is_uni_alpha(pTHX_ U32 c); -PERL_CALLCONV bool Perl_is_uni_ascii(pTHX_ U32 c); -PERL_CALLCONV bool Perl_is_uni_space(pTHX_ U32 c); -PERL_CALLCONV bool Perl_is_uni_cntrl(pTHX_ U32 c); -PERL_CALLCONV bool Perl_is_uni_graph(pTHX_ U32 c); -PERL_CALLCONV bool Perl_is_uni_digit(pTHX_ U32 c); -PERL_CALLCONV bool Perl_is_uni_upper(pTHX_ U32 c); -PERL_CALLCONV bool Perl_is_uni_lower(pTHX_ U32 c); -PERL_CALLCONV bool Perl_is_uni_print(pTHX_ U32 c); -PERL_CALLCONV bool Perl_is_uni_punct(pTHX_ U32 c); -PERL_CALLCONV bool Perl_is_uni_xdigit(pTHX_ U32 c); -PERL_CALLCONV U32 Perl_to_uni_upper(pTHX_ U32 c, U8 *p, STRLEN *lenp); -PERL_CALLCONV U32 Perl_to_uni_title(pTHX_ U32 c, U8 *p, STRLEN *lenp); -PERL_CALLCONV U32 Perl_to_uni_lower(pTHX_ U32 c, U8 *p, STRLEN *lenp); -PERL_CALLCONV bool Perl_is_uni_alnum_lc(pTHX_ U32 c); -PERL_CALLCONV bool Perl_is_uni_alnumc_lc(pTHX_ U32 c); -PERL_CALLCONV bool Perl_is_uni_idfirst_lc(pTHX_ U32 c); -PERL_CALLCONV bool Perl_is_uni_alpha_lc(pTHX_ U32 c); -PERL_CALLCONV bool Perl_is_uni_ascii_lc(pTHX_ U32 c); -PERL_CALLCONV bool Perl_is_uni_space_lc(pTHX_ U32 c); -PERL_CALLCONV bool Perl_is_uni_cntrl_lc(pTHX_ U32 c); -PERL_CALLCONV bool Perl_is_uni_graph_lc(pTHX_ U32 c); -PERL_CALLCONV bool Perl_is_uni_digit_lc(pTHX_ U32 c); -PERL_CALLCONV bool Perl_is_uni_upper_lc(pTHX_ U32 c); -PERL_CALLCONV bool Perl_is_uni_lower_lc(pTHX_ U32 c); -PERL_CALLCONV bool Perl_is_uni_print_lc(pTHX_ U32 c); -PERL_CALLCONV bool Perl_is_uni_punct_lc(pTHX_ U32 c); -PERL_CALLCONV bool Perl_is_uni_xdigit_lc(pTHX_ U32 c); +PERL_CALLCONV bool Perl_is_uni_alnum(pTHX_ UV c); +PERL_CALLCONV bool Perl_is_uni_alnumc(pTHX_ UV c); +PERL_CALLCONV bool Perl_is_uni_idfirst(pTHX_ UV c); +PERL_CALLCONV bool Perl_is_uni_alpha(pTHX_ UV c); +PERL_CALLCONV bool Perl_is_uni_ascii(pTHX_ UV c); +PERL_CALLCONV bool Perl_is_uni_space(pTHX_ UV c); +PERL_CALLCONV bool Perl_is_uni_cntrl(pTHX_ UV c); +PERL_CALLCONV bool Perl_is_uni_graph(pTHX_ UV c); +PERL_CALLCONV bool Perl_is_uni_digit(pTHX_ UV c); +PERL_CALLCONV bool Perl_is_uni_upper(pTHX_ UV c); +PERL_CALLCONV bool Perl_is_uni_lower(pTHX_ UV c); +PERL_CALLCONV bool Perl_is_uni_print(pTHX_ UV c); +PERL_CALLCONV bool Perl_is_uni_punct(pTHX_ UV c); +PERL_CALLCONV bool Perl_is_uni_xdigit(pTHX_ UV c); +PERL_CALLCONV UV Perl_to_uni_upper(pTHX_ UV c, U8 *p, STRLEN *lenp); +PERL_CALLCONV UV Perl_to_uni_title(pTHX_ UV c, U8 *p, STRLEN *lenp); +PERL_CALLCONV UV Perl_to_uni_lower(pTHX_ UV c, U8 *p, STRLEN *lenp); +PERL_CALLCONV bool Perl_is_uni_alnum_lc(pTHX_ UV c); +PERL_CALLCONV bool Perl_is_uni_alnumc_lc(pTHX_ UV c); +PERL_CALLCONV bool Perl_is_uni_idfirst_lc(pTHX_ UV c); +PERL_CALLCONV bool Perl_is_uni_alpha_lc(pTHX_ UV c); +PERL_CALLCONV bool Perl_is_uni_ascii_lc(pTHX_ UV c); +PERL_CALLCONV bool Perl_is_uni_space_lc(pTHX_ UV c); +PERL_CALLCONV bool Perl_is_uni_cntrl_lc(pTHX_ UV c); +PERL_CALLCONV bool Perl_is_uni_graph_lc(pTHX_ UV c); +PERL_CALLCONV bool Perl_is_uni_digit_lc(pTHX_ UV c); +PERL_CALLCONV bool Perl_is_uni_upper_lc(pTHX_ UV c); +PERL_CALLCONV bool Perl_is_uni_lower_lc(pTHX_ UV c); +PERL_CALLCONV bool Perl_is_uni_print_lc(pTHX_ UV c); +PERL_CALLCONV bool Perl_is_uni_punct_lc(pTHX_ UV c); +PERL_CALLCONV bool Perl_is_uni_xdigit_lc(pTHX_ UV c); PERL_CALLCONV STRLEN Perl_is_utf8_char(pTHX_ U8 *p); PERL_CALLCONV bool Perl_is_utf8_string(pTHX_ U8 *s, STRLEN len); PERL_CALLCONV bool Perl_is_utf8_alnum(pTHX_ U8 *p); diff --git a/utf8.c b/utf8.c index d7b0784..1a92962 100644 --- a/utf8.c +++ b/utf8.c @@ -796,7 +796,7 @@ Perl_utf16_to_utf8_reversed(pTHX_ U8* p, U8* d, I32 bytelen, I32 *newlen) /* for now these are all defined (inefficiently) in terms of the utf8 versions */ bool -Perl_is_uni_alnum(pTHX_ U32 c) +Perl_is_uni_alnum(pTHX_ UV c) { U8 tmpbuf[UTF8_MAXLEN+1]; uvchr_to_utf8(tmpbuf, (UV)c); @@ -804,7 +804,7 @@ Perl_is_uni_alnum(pTHX_ U32 c) } bool -Perl_is_uni_alnumc(pTHX_ U32 c) +Perl_is_uni_alnumc(pTHX_ UV c) { U8 tmpbuf[UTF8_MAXLEN+1]; uvchr_to_utf8(tmpbuf, (UV)c); @@ -812,7 +812,7 @@ Perl_is_uni_alnumc(pTHX_ U32 c) } bool -Perl_is_uni_idfirst(pTHX_ U32 c) +Perl_is_uni_idfirst(pTHX_ UV c) { U8 tmpbuf[UTF8_MAXLEN+1]; uvchr_to_utf8(tmpbuf, (UV)c); @@ -820,7 +820,7 @@ Perl_is_uni_idfirst(pTHX_ U32 c) } bool -Perl_is_uni_alpha(pTHX_ U32 c) +Perl_is_uni_alpha(pTHX_ UV c) { U8 tmpbuf[UTF8_MAXLEN+1]; uvchr_to_utf8(tmpbuf, (UV)c); @@ -828,7 +828,7 @@ Perl_is_uni_alpha(pTHX_ U32 c) } bool -Perl_is_uni_ascii(pTHX_ U32 c) +Perl_is_uni_ascii(pTHX_ UV c) { U8 tmpbuf[UTF8_MAXLEN+1]; uvchr_to_utf8(tmpbuf, (UV)c); @@ -836,7 +836,7 @@ Perl_is_uni_ascii(pTHX_ U32 c) } bool -Perl_is_uni_space(pTHX_ U32 c) +Perl_is_uni_space(pTHX_ UV c) { U8 tmpbuf[UTF8_MAXLEN+1]; uvchr_to_utf8(tmpbuf, (UV)c); @@ -844,7 +844,7 @@ Perl_is_uni_space(pTHX_ U32 c) } bool -Perl_is_uni_digit(pTHX_ U32 c) +Perl_is_uni_digit(pTHX_ UV c) { U8 tmpbuf[UTF8_MAXLEN+1]; uvchr_to_utf8(tmpbuf, (UV)c); @@ -852,7 +852,7 @@ Perl_is_uni_digit(pTHX_ U32 c) } bool -Perl_is_uni_upper(pTHX_ U32 c) +Perl_is_uni_upper(pTHX_ UV c) { U8 tmpbuf[UTF8_MAXLEN+1]; uvchr_to_utf8(tmpbuf, (UV)c); @@ -860,7 +860,7 @@ Perl_is_uni_upper(pTHX_ U32 c) } bool -Perl_is_uni_lower(pTHX_ U32 c) +Perl_is_uni_lower(pTHX_ UV c) { U8 tmpbuf[UTF8_MAXLEN+1]; uvchr_to_utf8(tmpbuf, (UV)c); @@ -868,7 +868,7 @@ Perl_is_uni_lower(pTHX_ U32 c) } bool -Perl_is_uni_cntrl(pTHX_ U32 c) +Perl_is_uni_cntrl(pTHX_ UV c) { U8 tmpbuf[UTF8_MAXLEN+1]; uvchr_to_utf8(tmpbuf, (UV)c); @@ -876,7 +876,7 @@ Perl_is_uni_cntrl(pTHX_ U32 c) } bool -Perl_is_uni_graph(pTHX_ U32 c) +Perl_is_uni_graph(pTHX_ UV c) { U8 tmpbuf[UTF8_MAXLEN+1]; uvchr_to_utf8(tmpbuf, (UV)c); @@ -884,7 +884,7 @@ Perl_is_uni_graph(pTHX_ U32 c) } bool -Perl_is_uni_print(pTHX_ U32 c) +Perl_is_uni_print(pTHX_ UV c) { U8 tmpbuf[UTF8_MAXLEN+1]; uvchr_to_utf8(tmpbuf, (UV)c); @@ -892,7 +892,7 @@ Perl_is_uni_print(pTHX_ U32 c) } bool -Perl_is_uni_punct(pTHX_ U32 c) +Perl_is_uni_punct(pTHX_ UV c) { U8 tmpbuf[UTF8_MAXLEN+1]; uvchr_to_utf8(tmpbuf, (UV)c); @@ -900,119 +900,127 @@ Perl_is_uni_punct(pTHX_ U32 c) } bool -Perl_is_uni_xdigit(pTHX_ U32 c) +Perl_is_uni_xdigit(pTHX_ UV c) { U8 tmpbuf[UTF8_MAXLEN*2+1]; uvchr_to_utf8(tmpbuf, (UV)c); return is_utf8_xdigit(tmpbuf); } -U32 -Perl_to_uni_upper(pTHX_ U32 c, U8* p, STRLEN *lenp) +UV +Perl_to_uni_upper(pTHX_ UV c, U8* p, STRLEN *lenp) { U8 tmpbuf[UTF8_MAXLEN*2+1]; uvchr_to_utf8(tmpbuf, (UV)c); return to_utf8_upper(tmpbuf, p, lenp); } -U32 -Perl_to_uni_title(pTHX_ U32 c, U8* p, STRLEN *lenp) +UV +Perl_to_uni_title(pTHX_ UV c, U8* p, STRLEN *lenp) { U8 tmpbuf[UTF8_MAXLEN*2+1]; uvchr_to_utf8(tmpbuf, (UV)c); return to_utf8_title(tmpbuf, p, lenp); } -U32 -Perl_to_uni_lower(pTHX_ U32 c, U8* p, STRLEN *lenp) +UV +Perl_to_uni_lower(pTHX_ UV c, U8* p, STRLEN *lenp) { U8 tmpbuf[UTF8_MAXLEN+1]; uvchr_to_utf8(tmpbuf, (UV)c); return to_utf8_lower(tmpbuf, p, lenp); } +UV +Perl_to_uni_fold(pTHX_ UV c, U8* p, STRLEN *lenp) +{ + U8 tmpbuf[UTF8_MAXLEN+1]; + uvchr_to_utf8(tmpbuf, (UV)c); + return to_utf8_fold(tmpbuf, p, lenp); +} + /* for now these all assume no locale info available for Unicode > 255 */ bool -Perl_is_uni_alnum_lc(pTHX_ U32 c) +Perl_is_uni_alnum_lc(pTHX_ UV c) { return is_uni_alnum(c); /* XXX no locale support yet */ } bool -Perl_is_uni_alnumc_lc(pTHX_ U32 c) +Perl_is_uni_alnumc_lc(pTHX_ UV c) { return is_uni_alnumc(c); /* XXX no locale support yet */ } bool -Perl_is_uni_idfirst_lc(pTHX_ U32 c) +Perl_is_uni_idfirst_lc(pTHX_ UV c) { return is_uni_idfirst(c); /* XXX no locale support yet */ } bool -Perl_is_uni_alpha_lc(pTHX_ U32 c) +Perl_is_uni_alpha_lc(pTHX_ UV c) { return is_uni_alpha(c); /* XXX no locale support yet */ } bool -Perl_is_uni_ascii_lc(pTHX_ U32 c) +Perl_is_uni_ascii_lc(pTHX_ UV c) { return is_uni_ascii(c); /* XXX no locale support yet */ } bool -Perl_is_uni_space_lc(pTHX_ U32 c) +Perl_is_uni_space_lc(pTHX_ UV c) { return is_uni_space(c); /* XXX no locale support yet */ } bool -Perl_is_uni_digit_lc(pTHX_ U32 c) +Perl_is_uni_digit_lc(pTHX_ UV c) { return is_uni_digit(c); /* XXX no locale support yet */ } bool -Perl_is_uni_upper_lc(pTHX_ U32 c) +Perl_is_uni_upper_lc(pTHX_ UV c) { return is_uni_upper(c); /* XXX no locale support yet */ } bool -Perl_is_uni_lower_lc(pTHX_ U32 c) +Perl_is_uni_lower_lc(pTHX_ UV c) { return is_uni_lower(c); /* XXX no locale support yet */ } bool -Perl_is_uni_cntrl_lc(pTHX_ U32 c) +Perl_is_uni_cntrl_lc(pTHX_ UV c) { return is_uni_cntrl(c); /* XXX no locale support yet */ } bool -Perl_is_uni_graph_lc(pTHX_ U32 c) +Perl_is_uni_graph_lc(pTHX_ UV c) { return is_uni_graph(c); /* XXX no locale support yet */ } bool -Perl_is_uni_print_lc(pTHX_ U32 c) +Perl_is_uni_print_lc(pTHX_ UV c) { return is_uni_print(c); /* XXX no locale support yet */ } bool -Perl_is_uni_punct_lc(pTHX_ U32 c) +Perl_is_uni_punct_lc(pTHX_ UV c) { return is_uni_punct(c); /* XXX no locale support yet */ } bool -Perl_is_uni_xdigit_lc(pTHX_ U32 c) +Perl_is_uni_xdigit_lc(pTHX_ UV c) { return is_uni_xdigit(c); /* XXX no locale support yet */ }