Add a hook for running the tests under the Third Degree
[p5sagit/p5-mst-13.2.git] / pp.c
diff --git a/pp.c b/pp.c
index 1bbb108..37ada8b 100644 (file)
--- a/pp.c
+++ b/pp.c
@@ -745,9 +745,10 @@ PP(pp_schop)
 
 PP(pp_chop)
 {
-    dSP; dMARK; dTARGET;
-    while (SP > MARK)
-       do_chop(TARG, POPs);
+    dSP; dMARK; dTARGET; dORIGMARK;
+    while (MARK < SP)
+       do_chop(TARG, *++MARK);
+    SP = ORIGMARK;
     PUSHTARG;
     RETURN;
 }
@@ -1280,7 +1281,7 @@ PP(pp_subtract)
            UV result;
            register UV buv;
            bool buvok = SvUOK(TOPs);
-           
+       
            if (buvok)
                buv = SvUVX(TOPs);
            else {
@@ -2137,7 +2138,7 @@ PP(pp_complement)
 
          send = tmps + len;
          while (tmps < send) {
-           UV c = utf8_to_uv(tmps, send-tmps, &l, UTF8_ALLOW_ANYUV);
+           UV c = utf8n_to_uvchr(tmps, send-tmps, &l, UTF8_ALLOW_ANYUV);
            tmps += UTF8SKIP(tmps);
            targlen += UNISKIP(~c);
            nchar++;
@@ -2151,9 +2152,9 @@ PP(pp_complement)
          if (nwide) {
              Newz(0, result, targlen + 1, U8);
              while (tmps < send) {
-                 UV c = utf8_to_uv(tmps, send-tmps, &l, UTF8_ALLOW_ANYUV);
+                 UV c = utf8n_to_uvchr(tmps, send-tmps, &l, UTF8_ALLOW_ANYUV);
                  tmps += UTF8SKIP(tmps);
-                 result = uv_to_utf8(result, ~c);
+                 result = uvchr_to_utf8(result, ~c);
              }
              *result = '\0';
              result -= targlen;
@@ -2163,7 +2164,7 @@ PP(pp_complement)
          else {
              Newz(0, result, nchar + 1, U8);
              while (tmps < send) {
-                 U8 c = (U8)utf8_to_uv(tmps, 0, &l, UTF8_ALLOW_ANY);
+                 U8 c = (U8)utf8n_to_uvchr(tmps, 0, &l, UTF8_ALLOW_ANY);
                  tmps += UTF8SKIP(tmps);
                  *result++ = ~c;
              }
@@ -2933,7 +2934,7 @@ PP(pp_ord)
     STRLEN len;
     U8 *s = (U8*)SvPVx(argsv, len);
 
-    XPUSHu(DO_UTF8(argsv) ? utf8_to_uv_simple(s, 0) : (*s & 0xff));
+    XPUSHu(DO_UTF8(argsv) ? utf8_to_uvchr(s, 0) : (*s & 0xff));
     RETURN;
 }
 
@@ -2947,7 +2948,7 @@ PP(pp_chr)
 
     if (value > 255 && !IN_BYTE) {
        SvGROW(TARG, UNISKIP(value)+1);
-       tmps = (char*)uv_to_utf8((U8*)SvPVX(TARG), value);
+       tmps = (char*)uvchr_to_utf8((U8*)SvPVX(TARG), value);
        SvCUR_set(TARG, tmps - SvPVX(TARG));
        *tmps = '\0';
        (void)SvPOK_only(TARG);
@@ -2996,17 +2997,17 @@ PP(pp_ucfirst)
        STRLEN ulen;
        U8 tmpbuf[UTF8_MAXLEN+1];
        U8 *tend;
-       UV uv = utf8_to_uv(s, slen, &ulen, 0);
+       UV uv;
 
        if (PL_op->op_private & OPpLOCALE) {
            TAINT;
            SvTAINTED_on(sv);
-           uv = toTITLE_LC_uni(uv);
+           uv = toTITLE_LC_uvchr(utf8n_to_uvchr(s, slen, &ulen, 0));
        }
        else
            uv = toTITLE_utf8(s);
        
-       tend = uv_to_utf8(tmpbuf, uv);
+       tend = uvchr_to_utf8(tmpbuf, uv);
 
        if (!SvPADTMP(sv) || tend - tmpbuf != ulen || SvREADONLY(sv)) {
            dTARGET;
@@ -3055,17 +3056,17 @@ PP(pp_lcfirst)
        STRLEN ulen;
        U8 tmpbuf[UTF8_MAXLEN+1];
        U8 *tend;
-       UV uv = utf8_to_uv(s, slen, &ulen, 0);
+       UV uv;
 
        if (PL_op->op_private & OPpLOCALE) {
            TAINT;
            SvTAINTED_on(sv);
-           uv = toLOWER_LC_uni(uv);
+           uv = toLOWER_LC_uvchr(utf8n_to_uvchr(s, slen, &ulen, 0));
        }
        else
            uv = toLOWER_utf8(s);
        
-       tend = uv_to_utf8(tmpbuf, uv);
+       tend = uvchr_to_utf8(tmpbuf, uv);
 
        if (!SvPADTMP(sv) || tend - tmpbuf != ulen || SvREADONLY(sv)) {
            dTARGET;
@@ -3132,13 +3133,13 @@ PP(pp_uc)
                TAINT;
                SvTAINTED_on(TARG);
                while (s < send) {
-                   d = uv_to_utf8(d, toUPPER_LC_uni( utf8_to_uv(s, len, &ulen, 0)));
+                   d = uvchr_to_utf8(d, toUPPER_LC_uvchr( utf8n_to_uvchr(s, len, &ulen, 0)));
                    s += ulen;
                }
            }
            else {
                while (s < send) {
-                   d = uv_to_utf8(d, toUPPER_utf8( s ));
+                   d = uvchr_to_utf8(d, toUPPER_utf8( s ));
                    s += UTF8SKIP(s);
                }
            }
@@ -3206,13 +3207,13 @@ PP(pp_lc)
                TAINT;
                SvTAINTED_on(TARG);
                while (s < send) {
-                   d = uv_to_utf8(d, toLOWER_LC_uni( utf8_to_uv(s, len, &ulen, 0)));
+                   d = uvchr_to_utf8(d, toLOWER_LC_uvchr( utf8n_to_uvchr(s, len, &ulen, 0)));
                    s += ulen;
                }
            }
            else {
                while (s < send) {
-                   d = uv_to_utf8(d, toLOWER_utf8(s));
+                   d = uvchr_to_utf8(d, toLOWER_utf8(s));
                    s += UTF8SKIP(s);
                }
            }
@@ -3961,12 +3962,12 @@ PP(pp_reverse)
                U8* s = (U8*)SvPVX(TARG);
                U8* send = (U8*)(s + len);
                while (s < send) {
-                   if (UTF8_IS_ASCII(*s)) {
+                   if (UTF8_IS_INVARIANT(*s)) {
                        s++;
                        continue;
                    }
                    else {
-                       if (!utf8_to_uv_simple(s, 0))
+                       if (!utf8_to_uvchr(s, 0))
                            break;
                        up = (char*)s;
                        s += UTF8SKIP(s);
@@ -4035,6 +4036,7 @@ S_mul128(pTHX_ SV *sv, U8 m)
 #define ISUUCHAR(ch)    (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
 #endif
 
+
 PP(pp_unpack)
 {
     dSP;
@@ -4045,7 +4047,14 @@ PP(pp_unpack)
     STRLEN llen;
     STRLEN rlen;
     register char *pat = SvPV(left, llen);
+#ifdef PACKED_IS_OCTETS
+    /* Packed side is assumed to be octets - so force downgrade if it
+       has been UTF-8 encoded by accident
+     */
+    register char *s = SvPVbyte(right, rlen);
+#else
     register char *s = SvPV(right, rlen);
+#endif
     char *strend = s + rlen;
     char *strbeg = s;
     register char *patend = pat + llen;
@@ -4354,7 +4363,7 @@ PP(pp_unpack)
            if (checksum) {
                while (len-- > 0 && s < strend) {
                    STRLEN alen;
-                   auint = utf8_to_uv((U8*)s, strend - s, &alen, 0);
+                   auint = utf8n_to_uvchr((U8*)s, strend - s, &alen, 0);
                    along = alen;
                    s += along;
                    if (checksum > 32)
@@ -4368,7 +4377,7 @@ PP(pp_unpack)
                EXTEND_MORTAL(len);
                while (len-- > 0 && s < strend) {
                    STRLEN alen;
-                   auint = utf8_to_uv((U8*)s, strend - s, &alen, 0);
+                   auint = utf8n_to_uvchr((U8*)s, strend - s, &alen, 0);
                    along = alen;
                    s += along;
                    sv = NEWSV(37, 0);
@@ -4780,7 +4789,8 @@ PP(pp_unpack)
                
                while ((len > 0) && (s < strend)) {
                    auv = (auv << 7) | (*s & 0x7f);
-                   if (UTF8_IS_ASCII(*s++)) {
+                   /* UTF8_IS_XXXXX not right here - using constant 0x80 */
+                   if ((U8)(*s++) < 0x80) {
                        bytes = 0;
                        sv = NEWSV(40, 0);
                        sv_setuv(sv, auv);
@@ -5172,8 +5182,10 @@ PP(pp_pack)
            patcopy++;
            continue;
         }
+#ifndef PACKED_IS_OCTETS
        if (datumtype == 'U' && pat == patcopy+1)
            SvUTF8_on(cat);
+#endif
        if (datumtype == '#') {
            while (pat < patend && *pat != '\n')
                pat++;
@@ -5406,7 +5418,7 @@ PP(pp_pack)
                fromstr = NEXTFROM;
                auint = SvUV(fromstr);
                SvGROW(cat, SvCUR(cat) + UTF8_MAXLEN + 1);
-               SvCUR_set(cat, (char*)uv_to_utf8((U8*)SvEND(cat),auint)
+               SvCUR_set(cat, (char*)uvchr_to_utf8((U8*)SvEND(cat),auint)
                               - SvPVX(cat));
            }
            *SvEND(cat) = '\0';