Introduce Perl_utf8_length(). Use it.
Jarkko Hietaniemi [Sat, 18 Nov 2000 22:50:28 +0000 (22:50 +0000)]
p4raw-id: //depot/perl@7744

embed.h
embed.pl
objXSUB.h
perlapi.c
proto.h
sv.c
utf8.c

diff --git a/embed.h b/embed.h
index 7bb132d..1301e3e 100644 (file)
--- a/embed.h
+++ b/embed.h
 #define utilize                        Perl_utilize
 #define utf16_to_utf8          Perl_utf16_to_utf8
 #define utf16_to_utf8_reversed Perl_utf16_to_utf8_reversed
+#define utf8_length            Perl_utf8_length
 #define utf8_distance          Perl_utf8_distance
 #define utf8_hop               Perl_utf8_hop
 #define utf8_to_bytes          Perl_utf8_to_bytes
 #define utilize(a,b,c,d,e)     Perl_utilize(aTHX_ a,b,c,d,e)
 #define utf16_to_utf8(a,b,c,d) Perl_utf16_to_utf8(aTHX_ a,b,c,d)
 #define utf16_to_utf8_reversed(a,b,c,d)        Perl_utf16_to_utf8_reversed(aTHX_ a,b,c,d)
+#define utf8_length(a,b)       Perl_utf8_length(aTHX_ a,b)
 #define utf8_distance(a,b)     Perl_utf8_distance(aTHX_ a,b)
 #define utf8_hop(a,b)          Perl_utf8_hop(aTHX_ a,b)
 #define utf8_to_bytes(a,b)     Perl_utf8_to_bytes(aTHX_ a,b)
 #define utf16_to_utf8          Perl_utf16_to_utf8
 #define Perl_utf16_to_utf8_reversed    CPerlObj::Perl_utf16_to_utf8_reversed
 #define utf16_to_utf8_reversed Perl_utf16_to_utf8_reversed
+#define Perl_utf8_length       CPerlObj::Perl_utf8_length
+#define utf8_length            Perl_utf8_length
 #define Perl_utf8_distance     CPerlObj::Perl_utf8_distance
 #define utf8_distance          Perl_utf8_distance
 #define Perl_utf8_hop          CPerlObj::Perl_utf8_hop
index a19c439..b8abef3 100755 (executable)
--- a/embed.pl
+++ b/embed.pl
@@ -2070,6 +2070,7 @@ p |void   |unshare_hek    |HEK* hek
 p      |void   |utilize        |int aver|I32 floor|OP* version|OP* id|OP* arg
 Ap     |U8*    |utf16_to_utf8  |U8* p|U8 *d|I32 bytelen|I32 *newlen
 Ap     |U8*    |utf16_to_utf8_reversed|U8* p|U8 *d|I32 bytelen|I32 *newlen
+Ap     |STRLEN |utf8_length    |U8* s|U8 *e
 Ap     |I32    |utf8_distance  |U8 *a|U8 *b
 Ap     |U8*    |utf8_hop       |U8 *s|I32 off
 ApM    |U8*    |utf8_to_bytes  |U8 *s|STRLEN *len
index 5827b72..88eb400 100644 (file)
--- a/objXSUB.h
+++ b/objXSUB.h
 #define Perl_utf16_to_utf8_reversed    pPerl->Perl_utf16_to_utf8_reversed
 #undef  utf16_to_utf8_reversed
 #define utf16_to_utf8_reversed Perl_utf16_to_utf8_reversed
+#undef  Perl_utf8_length
+#define Perl_utf8_length       pPerl->Perl_utf8_length
+#undef  utf8_length
+#define utf8_length            Perl_utf8_length
 #undef  Perl_utf8_distance
 #define Perl_utf8_distance     pPerl->Perl_utf8_distance
 #undef  utf8_distance
index a9dd2f0..a2e73e4 100644 (file)
--- a/perlapi.c
+++ b/perlapi.c
@@ -3350,6 +3350,13 @@ Perl_utf16_to_utf8_reversed(pTHXo_ U8* p, U8 *d, I32 bytelen, I32 *newlen)
     return ((CPerlObj*)pPerl)->Perl_utf16_to_utf8_reversed(p, d, bytelen, newlen);
 }
 
+#undef  Perl_utf8_length
+STRLEN
+Perl_utf8_length(pTHXo_ U8* s, U8 *e)
+{
+    return ((CPerlObj*)pPerl)->Perl_utf8_length(s, e);
+}
+
 #undef  Perl_utf8_distance
 I32
 Perl_utf8_distance(pTHXo_ U8 *a, U8 *b)
diff --git a/proto.h b/proto.h
index 052346d..91b7f86 100644 (file)
--- a/proto.h
+++ b/proto.h
@@ -805,6 +805,7 @@ PERL_CALLCONV void  Perl_unshare_hek(pTHX_ HEK* hek);
 PERL_CALLCONV void     Perl_utilize(pTHX_ int aver, I32 floor, OP* version, OP* id, OP* arg);
 PERL_CALLCONV U8*      Perl_utf16_to_utf8(pTHX_ U8* p, U8 *d, I32 bytelen, I32 *newlen);
 PERL_CALLCONV U8*      Perl_utf16_to_utf8_reversed(pTHX_ U8* p, U8 *d, I32 bytelen, I32 *newlen);
+PERL_CALLCONV STRLEN   Perl_utf8_length(pTHX_ U8* s, U8 *e);
 PERL_CALLCONV I32      Perl_utf8_distance(pTHX_ U8 *a, U8 *b);
 PERL_CALLCONV U8*      Perl_utf8_hop(pTHX_ U8 *s, I32 off);
 PERL_CALLCONV U8*      Perl_utf8_to_bytes(pTHX_ U8 *s, STRLEN *len);
diff --git a/sv.c b/sv.c
index 375b956..e193bc5 100644 (file)
--- a/sv.c
+++ b/sv.c
@@ -3994,26 +3994,20 @@ UTF8 bytes as a single character.
 STRLEN
 Perl_sv_len_utf8(pTHX_ register SV *sv)
 {
-    U8 *s;
-    U8 *send;
-    STRLEN len;
-
     if (!sv)
        return 0;
 
 #ifdef NOTYET
     if (SvGMAGICAL(sv))
-       len = mg_length(sv);
+       return mg_length(sv);
     else
 #endif
-       s = (U8*)SvPV(sv, len);
-    send = s + len;
-    len = 0;
-    while (s < send) {
-       s += UTF8SKIP(s);
-       len++;
+    {
+       STRLEN len;
+       U8 *s = (U8*)SvPV(sv, len);
+
+       return Perl_utf8_length(s, s + len);
     }
-    return len;
 }
 
 void
diff --git a/utf8.c b/utf8.c
index f1b80a4..fc625dc 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -353,6 +353,35 @@ Perl_utf8_to_uv_simple(pTHX_ U8* s, STRLEN* retlen)
     return Perl_utf8_to_uv(aTHX_ s, (STRLEN)-1, retlen, 0);
 }
 
+/*
+=for apidoc|utf8_length|U8 *s|U8 *e
+
+Return the length of the UTF-8 char encoded string C<s> in characters.
+Stops at string C<e>.  If C<e E<lt> s> or if the scan would end up 
+past C<e>, return -1.
+
+=cut
+*/
+
+STRLEN
+Perl_utf8_length(pTHX_ U8* s, U8* e)
+{
+    STRLEN len = 0;
+
+    if (e < s)
+       return -1;
+    while (s < e) {
+       STRLEN t = UTF8SKIP(s);
+
+       if (e - s < t)
+           return -1;
+       s += t;
+       len++;
+    }
+
+    return len;
+}
+
 /* utf8_distance(a,b) returns the number of UTF8 characters between
    the pointers a and b                                                        */