Allow poking holes at the UTF-8 decoding strictness.
Jarkko Hietaniemi [Wed, 25 Oct 2000 18:52:30 +0000 (18:52 +0000)]
p4raw-id: //depot/perl@7438

embed.pl
pp.c
proto.h
t/pragma/utf8.t
toke.c
utf8.c
utf8.h

index 1148ad1..8f80bbf 100755 (executable)
--- a/embed.pl
+++ b/embed.pl
@@ -2075,7 +2075,7 @@ Ap        |U8*    |utf8_hop       |U8 *s|I32 off
 ApM    |U8*    |utf8_to_bytes  |U8 *s|STRLEN *len
 ApM    |U8*    |bytes_to_utf8  |U8 *s|STRLEN *len
 Ap     |UV     |utf8_to_uv     |U8 *s|STRLEN* retlen
-Ap     |UV     |utf8_to_uv_chk |U8 *s|STRLEN curlen|STRLEN* retlen|bool checking
+Ap     |UV     |utf8_to_uv_chk |U8 *s|STRLEN curlen|STRLEN* retlen|U32 flags
 Ap     |U8*    |uv_to_utf8     |U8 *d|UV uv
 p      |void   |vivify_defelem |SV* sv
 p      |void   |vivify_ref     |SV* sv|U32 to_what
diff --git a/pp.c b/pp.c
index 73b6a12..ba50627 100644 (file)
--- a/pp.c
+++ b/pp.c
@@ -1484,7 +1484,7 @@ PP(pp_complement)
 
          send = tmps + len;
          while (tmps < send) {
-           UV c = utf8_to_uv(tmps, &l);
+           UV c = utf8_to_uv_chk(tmps, 0, &l, UTF8_ALLOW_ANY);
            tmps += UTF8SKIP(tmps);
            targlen += UNISKIP(~c);
          }
@@ -1493,7 +1493,7 @@ PP(pp_complement)
          tmps -= len;
          Newz(0, result, targlen + 1, U8);
          while (tmps < send) {
-           UV c = utf8_to_uv(tmps, &l);
+           UV c = utf8_to_uv_chk(tmps, 0, &l, UTF8_ALLOW_ANY);
            tmps += UTF8SKIP(tmps);
            result = uv_to_utf8(result,(UV)~c);
          }
diff --git a/proto.h b/proto.h
index 6886e27..14a6e47 100644 (file)
--- a/proto.h
+++ b/proto.h
@@ -810,7 +810,7 @@ PERL_CALLCONV U8*   Perl_utf8_hop(pTHX_ U8 *s, I32 off);
 PERL_CALLCONV U8*      Perl_utf8_to_bytes(pTHX_ U8 *s, STRLEN *len);
 PERL_CALLCONV U8*      Perl_bytes_to_utf8(pTHX_ U8 *s, STRLEN *len);
 PERL_CALLCONV UV       Perl_utf8_to_uv(pTHX_ U8 *s, STRLEN* retlen);
-PERL_CALLCONV UV       Perl_utf8_to_uv_chk(pTHX_ U8 *s, STRLEN curlen, STRLEN* retlen, bool checking);
+PERL_CALLCONV UV       Perl_utf8_to_uv_chk(pTHX_ U8 *s, STRLEN curlen, STRLEN* retlen, U32 flags);
 PERL_CALLCONV U8*      Perl_uv_to_utf8(pTHX_ U8 *d, UV uv);
 PERL_CALLCONV void     Perl_vivify_defelem(pTHX_ SV* sv);
 PERL_CALLCONV void     Perl_vivify_ref(pTHX_ SV* sv, U32 to_what);
index e61baad..768da05 100755 (executable)
@@ -578,8 +578,8 @@ my @MK = split(/\n/, <<__EOMK__);
 2.2    Last possible sequence of certain length
 2.2.1 y "\7f"                    7f              1       7f      1
 2.2.2 y "߿"                   7ff             2       df:bf   1
-# The ffff is legal unless under use utf8

Software error:

Malformed UTF-8 character (fatal) at /var/www/git.shadowcat.co.uk/docroot/gitweb/gitweb.cgi line 1024, <$fd> line 66.

For help, please send mail to the webmaster (chrisj@shadowcatsystems.co.uk), giving this error message and the time and date of the error.