From: Jarkko Hietaniemi Date: Sun, 18 Feb 2001 20:12:02 +0000 (+0000) Subject: Fix for "[ID 20010213.005] utf8 + localized hash elems + 64 bits?" X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=5da9da9e9f46681684e0c487fd55df8db6f9de67;p=p5sagit%2Fp5-mst-13.2.git Fix for "[ID 20010213.005] utf8 + localized hash elems + 64 bits?" The hash key got wrongly UTF8fied. p4raw-id: //depot/perl@8835 --- diff --git a/pod/perlapi.pod b/pod/perlapi.pod index 40d40fe..ef3a260 100644 --- a/pod/perlapi.pod +++ b/pod/perlapi.pod @@ -1035,9 +1035,10 @@ Found in file handy.h =item is_utf8_char -Tests if some arbitrary number of bytes begins in a valid UTF-8 character. -The actual number of bytes in the UTF-8 character will be returned if it -is valid, otherwise 0. +Tests if some arbitrary number of bytes begins in a valid UTF-8 +character. Note that an ASCII character is a valid UTF-8 character. +The actual number of bytes in the UTF-8 character will be returned if +it is valid, otherwise 0. STRLEN is_utf8_char(U8 *p) @@ -1046,8 +1047,10 @@ Found in file utf8.c =item is_utf8_string -Returns true if first C bytes of the given string form valid a UTF8 -string, false otherwise. +Returns true if first C bytes of the given string form a valid UTF8 +string, false otherwise. Note that 'a valid UTF8 string' does not mean +'a string that contains UTF8' because a valid ASCII string is a valid +UTF8 string. bool is_utf8_string(U8 *s, STRLEN len) diff --git a/toke.c b/toke.c index 5243fea..f8d7145 100644 --- a/toke.c +++ b/toke.c @@ -3185,9 +3185,6 @@ Perl_yylex(pTHX) if (*d == '}') { char minus = (PL_tokenbuf[0] == '-'); s = force_word(s + minus, WORD, FALSE, TRUE, FALSE); - if (UTF && !IN_BYTE && is_utf8_string((U8*)PL_tokenbuf, 0) && - PL_nextval[PL_nexttoke-1].opval) - SvUTF8_on(((SVOP*)PL_nextval[PL_nexttoke-1].opval)->op_sv); if (minus) force_next('-'); } diff --git a/utf8.c b/utf8.c index 918b669..13b953a 100644 --- a/utf8.c +++ b/utf8.c @@ -121,12 +121,12 @@ Perl_uv_to_utf8(pTHX_ U8 *d, UV uv) /* =for apidoc A|STRLEN|is_utf8_char|U8 *s -Tests if some arbitrary number of bytes begins in a valid UTF-8 character. -The actual number of bytes in the UTF-8 character will be returned if it -is valid, otherwise 0. +Tests if some arbitrary number of bytes begins in a valid UTF-8 +character. Note that an ASCII character is a valid UTF-8 character. +The actual number of bytes in the UTF-8 character will be returned if +it is valid, otherwise 0. -=cut -*/ +=cut */ STRLEN Perl_is_utf8_char(pTHX_ U8 *s) { @@ -168,8 +168,10 @@ Perl_is_utf8_char(pTHX_ U8 *s) /* =for apidoc A|bool|is_utf8_string|U8 *s|STRLEN len -Returns true if first C bytes of the given string form valid a UTF8 -string, false otherwise. +Returns true if first C bytes of the given string form a valid UTF8 +string, false otherwise. Note that 'a valid UTF8 string' does not mean +'a string that contains UTF8' because a valid ASCII string is a valid +UTF8 string. =cut */