Make the UTF-8 decoding stricter and more verbose when
[p5sagit/p5-mst-13.2.git] / doop.c
diff --git a/doop.c b/doop.c
index b75ffaa..3cd8f07 100644 (file)
--- a/doop.c
+++ b/doop.c
@@ -72,12 +72,12 @@ S_do_trans_simple(pTHX_ SV *sv)
     Newz(0, d, len*2+1, U8);
     dstart = d;
     while (s < send) {
-        I32 ulen;
+        STRLEN ulen;
         short c;
 
         ulen = 1;
         /* Need to check this, otherwise 128..255 won't match */
-       c = utf8_to_uv_chk(s, &ulen, 0);
+       c = utf8_to_uv_chk(s, send - s, &ulen, 0);
         if (c < 0x100 && (ch = tbl[(short)c]) >= 0) {
             matches++;
             if (ch < 0x80)
@@ -122,10 +122,10 @@ S_do_trans_count(pTHX_ SV *sv)/* SPC - OK */
             s += UTF8SKIP(s);
         else {
             UV c;
-            I32 ulen;
+            STRLEN ulen;
             ulen = 1;
             if (hasutf)
-                c = utf8_to_uv_chk(s,&ulen, 0);
+                c = utf8_to_uv_chk(s, send - s, &ulen, 0);
             else
                 c = *s;
             if (c < 0x100 && tbl[c] >= 0)
@@ -363,8 +363,8 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */
                continue;
            }
            else if (uv == none) {      /* "none" is unmapped character */
-               I32 ulen;
-               *d++ = (U8)utf8_to_uv_chk(s, &ulen, 0);
+               STRLEN ulen;
+               *d++ = (U8)utf8_to_uv_chk(s, send - s, &ulen, 0);
                s += ulen;
                puv = 0xfeedface;
                continue;
@@ -404,8 +404,8 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */
                continue;
            }
            else if (uv == none) {      /* "none" is unmapped character */
-               I32 ulen;
-               *d++ = (U8)utf8_to_uv_chk(s, &ulen, 0);
+               STRLEN ulen;
+               *d++ = (U8)utf8_to_uv_chk(s, send - s, &ulen, 0);
                s += ulen;
                continue;
            }
@@ -964,15 +964,15 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right)
        char *dcsave = dc;
        STRLEN lulen = leftlen;
        STRLEN rulen = rightlen;
-       I32 ulen;
+       STRLEN ulen;
 
        switch (optype) {
        case OP_BIT_AND:
            while (lulen && rulen) {
-               luc = utf8_to_uv_chk((U8*)lc, &ulen, 0);
+               luc = utf8_to_uv_chk((U8*)lc, lulen, &ulen, 0);
                lc += ulen;
                lulen -= ulen;
-               ruc = utf8_to_uv_chk((U8*)rc, &ulen, 0);
+               ruc = utf8_to_uv_chk((U8*)rc, rulen, &ulen, 0);
                rc += ulen;
                rulen -= ulen;
                duc = luc & ruc;
@@ -984,10 +984,10 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right)
            break;
        case OP_BIT_XOR:
            while (lulen && rulen) {
-               luc = utf8_to_uv_chk((U8*)lc, &ulen, 0);
+               luc = utf8_to_uv_chk((U8*)lc, lulen, &ulen, 0);
                lc += ulen;
                lulen -= ulen;
-               ruc = utf8_to_uv_chk((U8*)rc, &ulen, 0);
+               ruc = utf8_to_uv_chk((U8*)rc, rulen, &ulen, 0);
                rc += ulen;
                rulen -= ulen;
                duc = luc ^ ruc;
@@ -996,10 +996,10 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right)
            goto mop_up_utf;
        case OP_BIT_OR:
            while (lulen && rulen) {
-               luc = utf8_to_uv_chk((U8*)lc, &ulen, 0);
+               luc = utf8_to_uv_chk((U8*)lc, lulen, &ulen, 0);
                lc += ulen;
                lulen -= ulen;
-               ruc = utf8_to_uv_chk((U8*)rc, &ulen, 0);
+               ruc = utf8_to_uv_chk((U8*)rc, rulen, &ulen, 0);
                rc += ulen;
                rulen -= ulen;
                duc = luc | ruc;