From: Jarkko Hietaniemi Date: Tue, 1 Jan 2002 17:53:44 +0000 (+0000) Subject: Document the to_utf8_*() functions. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=d3e7953240b22e0c70f8c91e5ca8c47138b3ac50;p=p5sagit%2Fp5-mst-13.2.git Document the to_utf8_*() functions. p4raw-id: //depot/perl@14002 --- diff --git a/embed.pl b/embed.pl index 32e7925..67d7d0c 100755 --- a/embed.pl +++ b/embed.pl @@ -1827,10 +1827,10 @@ Ap |void |taint_env Ap |void |taint_proper |const char* f|const char* s Apd |UV |to_utf8_case |U8 *p|U8* ustrp|STRLEN *lenp \ |SV **swash|char *normal|char *special -Ap |UV |to_utf8_lower |U8 *p|U8* ustrp|STRLEN *lenp -Ap |UV |to_utf8_upper |U8 *p|U8* ustrp|STRLEN *lenp -Ap |UV |to_utf8_title |U8 *p|U8* ustrp|STRLEN *lenp -Ap |UV |to_utf8_fold |U8 *p|U8* ustrp|STRLEN *lenp +Apd |UV |to_utf8_lower |U8 *p|U8* ustrp|STRLEN *lenp +Apd |UV |to_utf8_upper |U8 *p|U8* ustrp|STRLEN *lenp +Apd |UV |to_utf8_title |U8 *p|U8* ustrp|STRLEN *lenp +Apd |UV |to_utf8_fold |U8 *p|U8* ustrp|STRLEN *lenp #if defined(UNLINK_ALL_VERSIONS) Ap |I32 |unlnk |char* f #endif diff --git a/pod/perlapi.pod b/pod/perlapi.pod index 847bc77..6228c75 100644 --- a/pod/perlapi.pod +++ b/pod/perlapi.pod @@ -4333,6 +4333,70 @@ to the hash is by Perl_to_utf8_case(). =for hackers Found in file utf8.c +=item to_utf8_fold + +Convert the UTF-8 encoded character at p to its foldcase version and +store that in UTF-8 in ustrp and its length in bytes in lenp. Note +that the ustrp needs to be at least UTF8_MAXLEN_FOLD+1 bytes since the +foldcase version may be longer than the original character (up to +three characters). + +The first character of the foldcased version is returned +(but note, as explained above, that there may be more.) + + UV to_utf8_fold(U8 *p, U8* ustrp, STRLEN *lenp) + +=for hackers +Found in file utf8.c + +=item to_utf8_lower + +Convert the UTF-8 encoded character at p to its lowercase version and +store that in UTF-8 in ustrp and its length in bytes in lenp. Note +that the ustrp needs to be at least UTF8_MAXLEN_UCLC+1 bytes since the +lowercase version may be longer than the original character (up to two +characters). + +The first character of the lowercased version is returned +(but note, as explained above, that there may be more.) + + UV to_utf8_lower(U8 *p, U8* ustrp, STRLEN *lenp) + +=for hackers +Found in file utf8.c + +=item to_utf8_title + +Convert the UTF-8 encoded character at p to its titlecase version and +store that in UTF-8 in ustrp and its length in bytes in lenp. Note +that the ustrp needs to be at least UTF8_MAXLEN_UCLC+1 bytes since the +titlecase version may be longer than the original character (up to two +characters). + +The first character of the titlecased version is returned +(but note, as explained above, that there may be more.) + + UV to_utf8_title(U8 *p, U8* ustrp, STRLEN *lenp) + +=for hackers +Found in file utf8.c + +=item to_utf8_upper + +Convert the UTF-8 encoded character at p to its uppercase version and +store that in UTF-8 in ustrp and its length in bytes in lenp. Note +that the ustrp needs to be at least UTF8_MAXLEN_UCLC+1 bytes since the +uppercase version may be longer than the original character (up to two +characters). + +The first character of the uppercased version is returned +(but note, as explained above, that there may be more.) + + UV to_utf8_upper(U8 *p, U8* ustrp, STRLEN *lenp) + +=for hackers +Found in file utf8.c + =item utf8n_to_uvchr Returns the native character value of the first character in the string C diff --git a/utf8.c b/utf8.c index 296cb6a..500ac4b 100644 --- a/utf8.c +++ b/utf8.c @@ -1315,7 +1315,7 @@ Perl_to_utf8_case(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp, SV **swashp,char *normal ustrp[1] = UTF8_EIGHT_BIT_LO(c); *lenp = 2; } - return 0; + return utf8_to_uvchr(ustrp, 0); } } if (lenp) @@ -1324,6 +1324,20 @@ Perl_to_utf8_case(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp, SV **swashp,char *normal return uv; } +/* +=for apidoc A|UV|to_utf8_upper|U8 *p|U8 *ustrp|STRLEN *lenp + +Convert the UTF-8 encoded character at p to its uppercase version and +store that in UTF-8 in ustrp and its length in bytes in lenp. Note +that the ustrp needs to be at least UTF8_MAXLEN_UCLC+1 bytes since the +uppercase version may be longer than the original character (up to two +characters). + +The first character of the uppercased version is returned +(but note, as explained above, that there may be more.) + +=cut */ + UV Perl_to_utf8_upper(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp) { @@ -1331,6 +1345,20 @@ Perl_to_utf8_upper(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp) &PL_utf8_toupper, "ToUpper", "utf8::ToSpecUpper"); } +/* +=for apidoc A|UV|to_utf8_title|U8 *p|U8 *ustrp|STRLEN *lenp + +Convert the UTF-8 encoded character at p to its titlecase version and +store that in UTF-8 in ustrp and its length in bytes in lenp. Note +that the ustrp needs to be at least UTF8_MAXLEN_UCLC+1 bytes since the +titlecase version may be longer than the original character (up to two +characters). + +The first character of the titlecased version is returned +(but note, as explained above, that there may be more.) + +=cut */ + UV Perl_to_utf8_title(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp) { @@ -1338,6 +1366,20 @@ Perl_to_utf8_title(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp) &PL_utf8_totitle, "ToTitle", "utf8::ToSpecTitle"); } +/* +=for apidoc A|UV|to_utf8_lower|U8 *p|U8 *ustrp|STRLEN *lenp + +Convert the UTF-8 encoded character at p to its lowercase version and +store that in UTF-8 in ustrp and its length in bytes in lenp. Note +that the ustrp needs to be at least UTF8_MAXLEN_UCLC+1 bytes since the +lowercase version may be longer than the original character (up to two +characters). + +The first character of the lowercased version is returned +(but note, as explained above, that there may be more.) + +=cut */ + UV Perl_to_utf8_lower(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp) { @@ -1345,6 +1387,20 @@ Perl_to_utf8_lower(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp) &PL_utf8_tolower, "ToLower", "utf8::ToSpecLower"); } +/* +=for apidoc A|UV|to_utf8_fold|U8 *p|U8 *ustrp|STRLEN *lenp + +Convert the UTF-8 encoded character at p to its foldcase version and +store that in UTF-8 in ustrp and its length in bytes in lenp. Note +that the ustrp needs to be at least UTF8_MAXLEN_FOLD+1 bytes since the +foldcase version may be longer than the original character (up to +three characters). + +The first character of the foldcased version is returned +(but note, as explained above, that there may be more.) + +=cut */ + UV Perl_to_utf8_fold(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp) {