Document utf8_length(), utf8_distance(), and utf8_hop().
Jarkko Hietaniemi [Thu, 7 Dec 2000 18:23:47 +0000 (18:23 +0000)]
p4raw-id: //depot/perl@8023

embed.pl
pod/perlapi.pod
utf8.c

index ac43b07..35b714f 100755 (executable)
--- a/embed.pl
+++ b/embed.pl
@@ -2079,7 +2079,7 @@ ApM       |U8*    |utf8_to_bytes  |U8 *s|STRLEN *len
 ApM    |U8*    |bytes_to_utf8  |U8 *s|STRLEN *len
 Ap     |UV     |utf8_to_uv_simple|U8 *s|STRLEN* retlen
 Ap     |UV     |utf8_to_uv     |U8 *s|STRLEN curlen|STRLEN* retlen|U32 flags
-Ap     |U8*    |uv_to_utf8|U8 *d|UV uv
+Ap     |U8*    |uv_to_utf8     |U8 *d|UV uv
 p      |void   |vivify_defelem |SV* sv
 p      |void   |vivify_ref     |SV* sv|U32 to_what
 p      |I32    |wait4pid       |Pid_t pid|int* statusp|int flags
index f5b237f..8bcece2 100644 (file)
@@ -3063,8 +3063,8 @@ Found in file sv.c
 
 Unsets the RV status of the SV, and decrements the reference count of
 whatever was being referenced by the RV.  This can almost be thought of
-as a reversal of C<newSVrv>.  This is C<sv_unref_flags> with C<flag>
-of zero.  See C<SvROK_off>.  
+as a reversal of C<newSVrv>.  This is C<sv_unref_flags> with the C<flag>
+being zero.  See C<SvROK_off>.  
 
        void    sv_unref(SV* sv)
 
@@ -3219,6 +3219,43 @@ string, false otherwise.
 =for hackers
 Found in file utf8.c
 
+=item utf8_distance
+
+Returns the number of UTF8 characters between the UTF-8 pointers C<a>
+and C<b>.
+
+WARNING: use only if you *know* that the pointers point inside the
+same UTF-8 buffer.
+
+       IV      utf8_distance(U8 *a, U8 *b)
+
+=for hackers
+Found in file utf8.c
+
+=item utf8_hop
+
+Move the C<s> pointing to UTF-8 data by C<off> characters, either forward
+or backward.
+
+WARNING: do not use the following unless you *know* C<off> is within
+the UTF-8 buffer pointed to by C<s>.
+
+       U8*     utf8_hop(U8 *s, I32 off)
+
+=for hackers
+Found in file utf8.c
+
+=item utf8_length
+
+Return the length of the UTF-8 char encoded string C<s> in characters.
+Stops at C<e> (inclusive).  If C<e E<lt> s> or if the scan would end
+up past C<e>, croaks.
+
+       STRLEN  utf8_length(U8* s, U8 *e)
+
+=for hackers
+Found in file utf8.c
+
 =item utf8_to_bytes
 
 Converts a string C<s> of length C<len> from UTF8 into byte encoding.
diff --git a/utf8.c b/utf8.c
index bc0a521..244bb63 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -362,7 +362,7 @@ Perl_utf8_to_uv_simple(pTHX_ U8* s, STRLEN* retlen)
 }
 
 /*
-=for apidoc|utf8_length|U8 *s|U8 *e
+=for apidoc Am|STRLEN|utf8_length|U8* s|U8 *e
 
 Return the length of the UTF-8 char encoded string C<s> in characters.
 Stops at C<e> (inclusive).  If C<e E<lt> s> or if the scan would end
@@ -390,8 +390,16 @@ Perl_utf8_length(pTHX_ U8* s, U8* e)
     return len;
 }
 
-/* utf8_distance(a,b) returns the number of UTF8 characters between
-   the pointers a and b                                                        */
+/*
+=for apidoc Am|IV|utf8_distance|U8 *a|U8 *b
+
+Returns the number of UTF8 characters between the UTF-8 pointers C<a>
+and C<b>.
+
+WARNING: use only if you *know* that the pointers point inside the
+same UTF-8 buffer.
+
+=cut */
 
 IV
 Perl_utf8_distance(pTHX_ U8 *a, U8 *b)
@@ -422,7 +430,16 @@ Perl_utf8_distance(pTHX_ U8 *a, U8 *b)
     return off;
 }
 
-/* WARNING: do not use the following unless you *know* off is within bounds */
+/*
+=for apidoc Am|U8*|utf8_hop|U8 *s|I32 off
+
+Move the C<s> pointing to UTF-8 data by C<off> characters, either forward
+or backward.
+
+WARNING: do not use the following unless you *know* C<off> is within
+the UTF-8 buffer pointed to by C<s>.
+
+=cut */
 
 U8 *
 Perl_utf8_hop(pTHX_ U8 *s, I32 off)