ApPR |bool |is_uni_print_lc|UV c
ApPR |bool |is_uni_punct_lc|UV c
ApPR |bool |is_uni_xdigit_lc|UV c
+Apd |bool |is_ascii_string|NN const U8 *s|STRLEN len
Apd |STRLEN |is_utf8_char |NN const U8 *s
Apd |bool |is_utf8_string |NN const U8 *s|STRLEN len
Apdmb |bool |is_utf8_string_loc|NN const U8 *s|STRLEN len|NULLOK const U8 **p
#define is_uni_print_lc Perl_is_uni_print_lc
#define is_uni_punct_lc Perl_is_uni_punct_lc
#define is_uni_xdigit_lc Perl_is_uni_xdigit_lc
+#define is_ascii_string Perl_is_ascii_string
#define is_utf8_char Perl_is_utf8_char
#define is_utf8_string Perl_is_utf8_string
#define is_utf8_string_loclen Perl_is_utf8_string_loclen
#define is_uni_print_lc(a) Perl_is_uni_print_lc(aTHX_ a)
#define is_uni_punct_lc(a) Perl_is_uni_punct_lc(aTHX_ a)
#define is_uni_xdigit_lc(a) Perl_is_uni_xdigit_lc(aTHX_ a)
+#define is_ascii_string(a,b) Perl_is_ascii_string(aTHX_ a,b)
#define is_utf8_char(a) Perl_is_utf8_char(aTHX_ a)
#define is_utf8_string(a,b) Perl_is_utf8_string(aTHX_ a,b)
#define is_utf8_string_loclen(a,b,c,d) Perl_is_utf8_string_loclen(aTHX_ a,b,c,d)
Perl_is_uni_print_lc
Perl_is_uni_punct_lc
Perl_is_uni_xdigit_lc
+Perl_is_ascii_string
Perl_is_utf8_char
Perl_is_utf8_string
Perl_is_utf8_string_loc
__attribute__warn_unused_result__
__attribute__pure__;
+PERL_CALLCONV bool Perl_is_ascii_string(pTHX_ const U8 *s, STRLEN len)
+ __attribute__nonnull__(pTHX_1);
+#define PERL_ARGS_ASSERT_IS_ASCII_STRING \
+ assert(s)
+
PERL_CALLCONV STRLEN Perl_is_utf8_char(pTHX_ const U8 *s)
__attribute__nonnull__(pTHX_1);
#define PERL_ARGS_ASSERT_IS_UTF8_CHAR \
@INC = '../lib';
require './test.pl';
- plan(tests => 97);
+ plan(tests => 99);
}
use strict;
is($hash{тест}, $hash{'тест'});
is($hash{тест}, 123);
is($hash{'тест'}, 123);
+
+ # See if plain ASCII strings quoted with '=>' erroneously get utf8 flag [perl #68812]
+ my %foo = (a => 'b', 'c' => 'd');
+ for my $key (keys %foo) {
+ ok !utf8::is_utf8($key), "'$key' shouldn't have utf8 flag";
+ }
}
__END__
{
is($hash{½ää½âÀ½äâ½ää}, $hash{'½ää½âÀ½äâ½ää'});
is($hash{½ää½âÀ½äâ½ää}, 123);
is($hash{'½ää½âÀ½äâ½ää'}, 123);
+
+ # See if plain ASCII strings quoted with '=>' erroneously get utf8 flag [perl #68812]
+ my %foo = (a => 'b', 'c' => 'd');
+ for my $key (keys %foo) {
+ ok !utf8::is_utf8($key), "'$key' shouldn't have utf8 flag";
+ }
}
{
dVAR;
SV * const sv = newSVpvn_utf8(start, len,
- UTF && !IN_BYTES
+ !IN_BYTES
+ && UTF
+ && !is_ascii_string((const U8*)start, len)
&& is_utf8_string((const U8*)start, len));
return sv;
}
characters in the ASCII range are unmodified, and a zero byte never appears
within non-zero characters.
+=cut
+*/
+
+/*
+=for apidoc is_ascii_string
+
+Returns true if first C<len> bytes of the given string are ASCII (i.e. none
+of them even raise the question of UTF-8-ness).
+
+See also is_utf8_string(), is_utf8_string_loclen(), and is_utf8_string_loc().
+
+=cut
+*/
+
+bool
+Perl_is_ascii_string(pTHX_ const U8 *s, STRLEN len)
+{
+ const U8* const send = s + (len ? len : strlen((const char *)s));
+ const U8* x = s;
+
+ PERL_ARGS_ASSERT_IS_ASCII_STRING;
+ PERL_UNUSED_CONTEXT;
+
+ for (; x < send; ++x) {
+ if (!UTF8_IS_INVARIANT(*x))
+ break;
+ }
+
+ return x == send;
+}
+
+/*
=for apidoc uvuni_to_utf8_flags
Adds the UTF-8 representation of the Unicode codepoint C<uv> to the end
return is_utf8_char_slow(s, len);
}
+
/*
=for apidoc is_utf8_string
not mean 'a string that contains code points above 0x7F encoded in UTF-8'
because a valid ASCII string is a valid UTF-8 string.
-See also is_utf8_string_loclen() and is_utf8_string_loc().
+See also is_ascii_string(), is_utf8_string_loclen(), and is_utf8_string_loc().
=cut
*/