From: Jarkko Hietaniemi Date: Sat, 14 Jun 2003 09:05:07 +0000 (+0000) Subject: Having to pull in the whole Encode just to get SvUTF8() X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=8800c35a0a1b4f206c0a9ba7f62bf82cc177d98e;p=p5sagit%2Fp5-mst-13.2.git Having to pull in the whole Encode just to get SvUTF8() is a bit too much: introduce utf8::is_utf8(). p4raw-id: //depot/perl@19777 --- diff --git a/lib/utf8.pm b/lib/utf8.pm index 5a37aec..0727c4d 100644 --- a/lib/utf8.pm +++ b/lib/utf8.pm @@ -72,7 +72,9 @@ utf8 until the end the block (or file, if at top level) by C. =head2 Utility functions -The following functions are defined in the C package by the perl core. +The following functions are defined in the C package by the +Perl core. You do not need to say C to use these and in fact +you should not unless you really want to have UTF-8 source code. =over 4 @@ -112,13 +114,18 @@ into logical characters. Same as Encode::decode_utf8(). Note that this should not be used to convert Unicode back to a legacy byte encoding: use Encode for that. +=item * $flag = utf8::is_utf8(STRING) + +Test whether STRING is in UTF-8. + =item * $flag = utf8::valid(STRING) -[INTERNAL] Test whether STRING is in a consistent state. Will return -true if string is held as bytes, or is well-formed UTF-8 and has the -UTF-8 flag on. Main reason for this routine is to allow Perl's -testsuite to check that operations have left strings in a consistent -state. +[INTERNAL] Test whether STRING is in a consistent state regarding +UTF-8. Will return true is well-formed UTF-8 and has the UTF-8 flag +on B if string is held as bytes (both these states are 'consistent'). +Main reason for this routine is to allow Perl's testsuite to check +that operations have left strings in a consistent state. You most +probably want to use utf8::is_utf8() instead. =back diff --git a/pod/perluniintro.pod b/pod/perluniintro.pod index feee902..c20e05c 100644 --- a/pod/perluniintro.pod +++ b/pod/perluniintro.pod @@ -504,7 +504,7 @@ Yet another way would be to use the Devel::Peek module: That shows the UTF8 flag in FLAGS and both the UTF-8 bytes and Unicode characters in C. See also later in this document -the discussion about the C function of the C module. +the discussion about the C function. =back @@ -625,8 +625,7 @@ didn't get the transparency of Unicode quite right. Okay, if you insist: - use Encode 'is_utf8'; - print is_utf8($string) ? 1 : 0, "\n"; + print utf8::is_utf8($string) ? 1 : 0, "\n"; But note that this doesn't mean that any of the characters in the string are necessary UTF-8 encoded, or that any of the characters have diff --git a/universal.c b/universal.c index e7889fb..0fe94d2 100644 --- a/universal.c +++ b/universal.c @@ -171,6 +171,7 @@ XS(XS_version_numify); XS(XS_version_vcmp); XS(XS_version_boolean); XS(XS_version_noop); +XS(XS_utf8_is_utf8); XS(XS_utf8_valid); XS(XS_utf8_encode); XS(XS_utf8_decode); @@ -210,6 +211,7 @@ Perl_boot_core_UNIVERSAL(pTHX) newXS("version::(nomethod", XS_version_noop, file); newXS("version::noop", XS_version_noop, file); } + newXS("utf8::is_utf8", XS_utf8_is_utf8, file); newXS("utf8::valid", XS_utf8_valid, file); newXS("utf8::encode", XS_utf8_encode, file); newXS("utf8::decode", XS_utf8_decode, file); @@ -526,6 +528,24 @@ XS(XS_version_noop) XSRETURN_EMPTY; } +XS(XS_utf8_is_utf8) +{ + dXSARGS; + if (items != 1) + Perl_croak(aTHX_ "Usage: utf8::is_utf8(sv)"); + { + SV * sv = ST(0); + { + STRLEN len; + if (SvUTF8(sv)) + XSRETURN_YES; + else + XSRETURN_NO; + } + } + XSRETURN_EMPTY; +} + XS(XS_utf8_valid) { dXSARGS;