From: Jarkko Hietaniemi <jhi@iki.fi>
Date: Sat, 18 Nov 2000 22:50:28 +0000 (+0000)
Subject: Introduce Perl_utf8_length().  Use it.
X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=b76347f2eb34c85a0a38543b2f57ca474fedab4d;p=p5sagit%2Fp5-mst-13.2.git

Introduce Perl_utf8_length().  Use it.

p4raw-id: //depot/perl@7744
---

diff --git a/embed.h b/embed.h
index 7bb132d..1301e3e 100644
--- a/embed.h
+++ b/embed.h
@@ -725,6 +725,7 @@
 #define utilize			Perl_utilize
 #define utf16_to_utf8		Perl_utf16_to_utf8
 #define utf16_to_utf8_reversed	Perl_utf16_to_utf8_reversed
+#define utf8_length		Perl_utf8_length
 #define utf8_distance		Perl_utf8_distance
 #define utf8_hop		Perl_utf8_hop
 #define utf8_to_bytes		Perl_utf8_to_bytes
@@ -2186,6 +2187,7 @@
 #define utilize(a,b,c,d,e)	Perl_utilize(aTHX_ a,b,c,d,e)
 #define utf16_to_utf8(a,b,c,d)	Perl_utf16_to_utf8(aTHX_ a,b,c,d)
 #define utf16_to_utf8_reversed(a,b,c,d)	Perl_utf16_to_utf8_reversed(aTHX_ a,b,c,d)
+#define utf8_length(a,b)	Perl_utf8_length(aTHX_ a,b)
 #define utf8_distance(a,b)	Perl_utf8_distance(aTHX_ a,b)
 #define utf8_hop(a,b)		Perl_utf8_hop(aTHX_ a,b)
 #define utf8_to_bytes(a,b)	Perl_utf8_to_bytes(aTHX_ a,b)
@@ -4284,6 +4286,8 @@
 #define utf16_to_utf8		Perl_utf16_to_utf8
 #define Perl_utf16_to_utf8_reversed	CPerlObj::Perl_utf16_to_utf8_reversed
 #define utf16_to_utf8_reversed	Perl_utf16_to_utf8_reversed
+#define Perl_utf8_length	CPerlObj::Perl_utf8_length
+#define utf8_length		Perl_utf8_length
 #define Perl_utf8_distance	CPerlObj::Perl_utf8_distance
 #define utf8_distance		Perl_utf8_distance
 #define Perl_utf8_hop		CPerlObj::Perl_utf8_hop
diff --git a/embed.pl b/embed.pl
index a19c439..b8abef3 100755
--- a/embed.pl
+++ b/embed.pl
@@ -2070,6 +2070,7 @@ p	|void	|unshare_hek	|HEK* hek
 p	|void	|utilize	|int aver|I32 floor|OP* version|OP* id|OP* arg
 Ap	|U8*	|utf16_to_utf8	|U8* p|U8 *d|I32 bytelen|I32 *newlen
 Ap	|U8*	|utf16_to_utf8_reversed|U8* p|U8 *d|I32 bytelen|I32 *newlen
+Ap	|STRLEN	|utf8_length	|U8* s|U8 *e
 Ap	|I32	|utf8_distance	|U8 *a|U8 *b
 Ap	|U8*	|utf8_hop	|U8 *s|I32 off
 ApM	|U8*	|utf8_to_bytes	|U8 *s|STRLEN *len
diff --git a/objXSUB.h b/objXSUB.h
index 5827b72..88eb400 100644
--- a/objXSUB.h
+++ b/objXSUB.h
@@ -1853,6 +1853,10 @@
 #define Perl_utf16_to_utf8_reversed	pPerl->Perl_utf16_to_utf8_reversed
 #undef  utf16_to_utf8_reversed
 #define utf16_to_utf8_reversed	Perl_utf16_to_utf8_reversed
+#undef  Perl_utf8_length
+#define Perl_utf8_length	pPerl->Perl_utf8_length
+#undef  utf8_length
+#define utf8_length		Perl_utf8_length
 #undef  Perl_utf8_distance
 #define Perl_utf8_distance	pPerl->Perl_utf8_distance
 #undef  utf8_distance
diff --git a/perlapi.c b/perlapi.c
index a9dd2f0..a2e73e4 100644
--- a/perlapi.c
+++ b/perlapi.c
@@ -3350,6 +3350,13 @@ Perl_utf16_to_utf8_reversed(pTHXo_ U8* p, U8 *d, I32 bytelen, I32 *newlen)
     return ((CPerlObj*)pPerl)->Perl_utf16_to_utf8_reversed(p, d, bytelen, newlen);
 }
 
+#undef  Perl_utf8_length
+STRLEN
+Perl_utf8_length(pTHXo_ U8* s, U8 *e)
+{
+    return ((CPerlObj*)pPerl)->Perl_utf8_length(s, e);
+}
+
 #undef  Perl_utf8_distance
 I32
 Perl_utf8_distance(pTHXo_ U8 *a, U8 *b)
diff --git a/proto.h b/proto.h
index 052346d..91b7f86 100644
--- a/proto.h
+++ b/proto.h
@@ -805,6 +805,7 @@ PERL_CALLCONV void	Perl_unshare_hek(pTHX_ HEK* hek);
 PERL_CALLCONV void	Perl_utilize(pTHX_ int aver, I32 floor, OP* version, OP* id, OP* arg);
 PERL_CALLCONV U8*	Perl_utf16_to_utf8(pTHX_ U8* p, U8 *d, I32 bytelen, I32 *newlen);
 PERL_CALLCONV U8*	Perl_utf16_to_utf8_reversed(pTHX_ U8* p, U8 *d, I32 bytelen, I32 *newlen);
+PERL_CALLCONV STRLEN	Perl_utf8_length(pTHX_ U8* s, U8 *e);
 PERL_CALLCONV I32	Perl_utf8_distance(pTHX_ U8 *a, U8 *b);
 PERL_CALLCONV U8*	Perl_utf8_hop(pTHX_ U8 *s, I32 off);
 PERL_CALLCONV U8*	Perl_utf8_to_bytes(pTHX_ U8 *s, STRLEN *len);
diff --git a/sv.c b/sv.c
index 375b956..e193bc5 100644
--- a/sv.c
+++ b/sv.c
@@ -3994,26 +3994,20 @@ UTF8 bytes as a single character.
 STRLEN
 Perl_sv_len_utf8(pTHX_ register SV *sv)
 {
-    U8 *s;
-    U8 *send;
-    STRLEN len;
-
     if (!sv)
 	return 0;
 
 #ifdef NOTYET
     if (SvGMAGICAL(sv))
-	len = mg_length(sv);
+	return mg_length(sv);
     else
 #endif
-	s = (U8*)SvPV(sv, len);
-    send = s + len;
-    len = 0;
-    while (s < send) {
-	s += UTF8SKIP(s);
-	len++;
+    {
+	STRLEN len;
+	U8 *s = (U8*)SvPV(sv, len);
+
+	return Perl_utf8_length(s, s + len);
     }
-    return len;
 }
 
 void
diff --git a/utf8.c b/utf8.c
index f1b80a4..fc625dc 100644
--- a/utf8.c
+++ b/utf8.c
@@ -353,6 +353,35 @@ Perl_utf8_to_uv_simple(pTHX_ U8* s, STRLEN* retlen)
     return Perl_utf8_to_uv(aTHX_ s, (STRLEN)-1, retlen, 0);
 }
 
+/*
+=for apidoc|utf8_length|U8 *s|U8 *e
+
+Return the length of the UTF-8 char encoded string C<s> in characters.
+Stops at string C<e>.  If C<e E<lt> s> or if the scan would end up 
+past C<e>, return -1.
+
+=cut
+*/
+
+STRLEN
+Perl_utf8_length(pTHX_ U8* s, U8* e)
+{
+    STRLEN len = 0;
+
+    if (e < s)
+	return -1;
+    while (s < e) {
+	STRLEN t = UTF8SKIP(s);
+
+	if (e - s < t)
+	    return -1;
+	s += t;
+	len++;
+    }
+
+    return len;
+}
+
 /* utf8_distance(a,b) returns the number of UTF8 characters between
    the pointers a and b							*/