From: Simon Cozens Date: Mon, 26 Jun 2000 02:25:59 +0000 (+0000) Subject: is_utf8_string X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=fe04eb4c51be9971488ff9efe0143edc29eec58a;hp=1e72252ad7b8e23d1a1142285b8aa82986bd2491;p=p5sagit%2Fp5-mst-13.2.git is_utf8_string Message-ID: p4raw-id: //depot/cfgperl@6243 --- diff --git a/embed.h b/embed.h index 4c95dc2..fa199fb 100644 --- a/embed.h +++ b/embed.h @@ -305,6 +305,7 @@ #define to_uni_title_lc Perl_to_uni_title_lc #define to_uni_lower_lc Perl_to_uni_lower_lc #define is_utf8_char Perl_is_utf8_char +#define is_utf8_string Perl_is_utf8_string #define is_utf8_alnum Perl_is_utf8_alnum #define is_utf8_alnumc Perl_is_utf8_alnumc #define is_utf8_idfirst Perl_is_utf8_idfirst @@ -1758,6 +1759,7 @@ #define to_uni_title_lc(a) Perl_to_uni_title_lc(aTHX_ a) #define to_uni_lower_lc(a) Perl_to_uni_lower_lc(aTHX_ a) #define is_utf8_char(a) Perl_is_utf8_char(aTHX_ a) +#define is_utf8_string(a,b) Perl_is_utf8_string(aTHX_ a,b) #define is_utf8_alnum(a) Perl_is_utf8_alnum(aTHX_ a) #define is_utf8_alnumc(a) Perl_is_utf8_alnumc(aTHX_ a) #define is_utf8_idfirst(a) Perl_is_utf8_idfirst(aTHX_ a) @@ -3445,6 +3447,8 @@ #define to_uni_lower_lc Perl_to_uni_lower_lc #define Perl_is_utf8_char CPerlObj::Perl_is_utf8_char #define is_utf8_char Perl_is_utf8_char +#define Perl_is_utf8_string CPerlObj::Perl_is_utf8_string +#define is_utf8_string Perl_is_utf8_string #define Perl_is_utf8_alnum CPerlObj::Perl_is_utf8_alnum #define is_utf8_alnum Perl_is_utf8_alnum #define Perl_is_utf8_alnumc CPerlObj::Perl_is_utf8_alnumc diff --git a/embed.pl b/embed.pl index 5b63a35..96603be 100755 --- a/embed.pl +++ b/embed.pl @@ -1620,6 +1620,7 @@ Ap |U32 |to_uni_upper_lc|U32 c Ap |U32 |to_uni_title_lc|U32 c Ap |U32 |to_uni_lower_lc|U32 c Ap |int |is_utf8_char |U8 *p +Ap |bool |is_utf8_string |U8 *s|STRLEN len Ap |bool |is_utf8_alnum |U8 *p Ap |bool |is_utf8_alnumc |U8 *p Ap |bool |is_utf8_idfirst|U8 *p diff --git a/global.sym b/global.sym index 8a9b008..1f03877 100644 --- a/global.sym +++ b/global.sym @@ -185,6 +185,7 @@ Perl_to_uni_upper_lc Perl_to_uni_title_lc Perl_to_uni_lower_lc Perl_is_utf8_char +Perl_is_utf8_string Perl_is_utf8_alnum Perl_is_utf8_alnumc Perl_is_utf8_idfirst diff --git a/objXSUB.h b/objXSUB.h index ad5aa09..e30258b 100644 --- a/objXSUB.h +++ b/objXSUB.h @@ -707,6 +707,10 @@ #define Perl_is_utf8_char pPerl->Perl_is_utf8_char #undef is_utf8_char #define is_utf8_char Perl_is_utf8_char +#undef Perl_is_utf8_string +#define Perl_is_utf8_string pPerl->Perl_is_utf8_string +#undef is_utf8_string +#define is_utf8_string Perl_is_utf8_string #undef Perl_is_utf8_alnum #define Perl_is_utf8_alnum pPerl->Perl_is_utf8_alnum #undef is_utf8_alnum diff --git a/perlapi.c b/perlapi.c index ccb7c8f..29428d5 100755 --- a/perlapi.c +++ b/perlapi.c @@ -1326,6 +1326,13 @@ Perl_is_utf8_char(pTHXo_ U8 *p) return ((CPerlObj*)pPerl)->Perl_is_utf8_char(p); } +#undef Perl_is_utf8_string +bool +Perl_is_utf8_string(pTHXo_ U8 *s, STRLEN len) +{ + return ((CPerlObj*)pPerl)->Perl_is_utf8_string(s, len); +} + #undef Perl_is_utf8_alnum bool Perl_is_utf8_alnum(pTHXo_ U8 *p) diff --git a/pod/perlapi.pod b/pod/perlapi.pod index 1e48809..86ad5bd 100644 --- a/pod/perlapi.pod +++ b/pod/perlapi.pod @@ -2282,19 +2282,19 @@ false, defined or undefined. Does not handle 'get' magic. =for hackers Found in file sv.h -=item svtype +=item SvTYPE -An enum of flags for Perl types. These are found in the file B -in the C enum. Test these flags with the C macro. +Returns the type of the SV. See C. + + svtype SvTYPE(SV* sv) =for hackers Found in file sv.h -=item SvTYPE - -Returns the type of the SV. See C. +=item svtype - svtype SvTYPE(SV* sv) +An enum of flags for Perl types. These are found in the file B +in the C enum. Test these flags with the C macro. =for hackers Found in file sv.h @@ -2939,6 +2939,16 @@ Converts the specified character to uppercase. =for hackers Found in file handy.h +=item U8 *s + +Returns true if first C bytes of the given string form valid a UTF8 +string, false otherwise. + + bool_utf8_string U8 *s(STRLEN len) + +=for hackers +Found in file utf8.c + =item utf8_to_bytes Converts a string C of length C from UTF8 into ASCII encoding. diff --git a/proto.h b/proto.h index c1d9a66..fc34840 100644 --- a/proto.h +++ b/proto.h @@ -367,6 +367,7 @@ PERL_CALLCONV U32 Perl_to_uni_upper_lc(pTHX_ U32 c); PERL_CALLCONV U32 Perl_to_uni_title_lc(pTHX_ U32 c); PERL_CALLCONV U32 Perl_to_uni_lower_lc(pTHX_ U32 c); PERL_CALLCONV int Perl_is_utf8_char(pTHX_ U8 *p); +PERL_CALLCONV bool Perl_is_utf8_string(pTHX_ U8 *s, STRLEN len); PERL_CALLCONV bool Perl_is_utf8_alnum(pTHX_ U8 *p); PERL_CALLCONV bool Perl_is_utf8_alnumc(pTHX_ U8 *p); PERL_CALLCONV bool Perl_is_utf8_idfirst(pTHX_ U8 *p); diff --git a/utf8.c b/utf8.c index 9bb89a4..666ec34 100644 --- a/utf8.c +++ b/utf8.c @@ -134,6 +134,30 @@ Perl_is_utf8_char(pTHX_ U8 *s) return len; } +/* +=for apidoc Am|bool_utf8_string|U8 *s|STRLEN len + +Returns true if first C bytes of the given string form valid a UTF8 +string, false otherwise. + +=cut +*/ + +bool +Perl_is_utf8_string(pTHX_ U8 *s, STRLEN len) +{ + U8* x=s; + U8* send=s+len; + int c; + while (x < send) { + c = is_utf8_char(x); + x += c; + if (!c || x > send) + return 0; + } + return 1; +} + UV Perl_utf8_to_uv(pTHX_ U8* s, I32* retlen) {