X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=pp.c;h=37ada8ba0c017d69c7c97abe38513faaa102e24a;hb=211f317fca609f5ac54a899e10ccf2cdbfe2496c;hp=1bbb1086add1ea65ce81a33ce3b9505fee89d948;hpb=494f3023e6cb99f0f26ded70a0869fe24d04973e;p=p5sagit%2Fp5-mst-13.2.git diff --git a/pp.c b/pp.c index 1bbb108..37ada8b 100644 --- a/pp.c +++ b/pp.c @@ -745,9 +745,10 @@ PP(pp_schop) PP(pp_chop) { - dSP; dMARK; dTARGET; - while (SP > MARK) - do_chop(TARG, POPs); + dSP; dMARK; dTARGET; dORIGMARK; + while (MARK < SP) + do_chop(TARG, *++MARK); + SP = ORIGMARK; PUSHTARG; RETURN; } @@ -1280,7 +1281,7 @@ PP(pp_subtract) UV result; register UV buv; bool buvok = SvUOK(TOPs); - + if (buvok) buv = SvUVX(TOPs); else { @@ -2137,7 +2138,7 @@ PP(pp_complement) send = tmps + len; while (tmps < send) { - UV c = utf8_to_uv(tmps, send-tmps, &l, UTF8_ALLOW_ANYUV); + UV c = utf8n_to_uvchr(tmps, send-tmps, &l, UTF8_ALLOW_ANYUV); tmps += UTF8SKIP(tmps); targlen += UNISKIP(~c); nchar++; @@ -2151,9 +2152,9 @@ PP(pp_complement) if (nwide) { Newz(0, result, targlen + 1, U8); while (tmps < send) { - UV c = utf8_to_uv(tmps, send-tmps, &l, UTF8_ALLOW_ANYUV); + UV c = utf8n_to_uvchr(tmps, send-tmps, &l, UTF8_ALLOW_ANYUV); tmps += UTF8SKIP(tmps); - result = uv_to_utf8(result, ~c); + result = uvchr_to_utf8(result, ~c); } *result = '\0'; result -= targlen; @@ -2163,7 +2164,7 @@ PP(pp_complement) else { Newz(0, result, nchar + 1, U8); while (tmps < send) { - U8 c = (U8)utf8_to_uv(tmps, 0, &l, UTF8_ALLOW_ANY); + U8 c = (U8)utf8n_to_uvchr(tmps, 0, &l, UTF8_ALLOW_ANY); tmps += UTF8SKIP(tmps); *result++ = ~c; } @@ -2933,7 +2934,7 @@ PP(pp_ord) STRLEN len; U8 *s = (U8*)SvPVx(argsv, len); - XPUSHu(DO_UTF8(argsv) ? utf8_to_uv_simple(s, 0) : (*s & 0xff)); + XPUSHu(DO_UTF8(argsv) ? utf8_to_uvchr(s, 0) : (*s & 0xff)); RETURN; } @@ -2947,7 +2948,7 @@ PP(pp_chr) if (value > 255 && !IN_BYTE) { SvGROW(TARG, UNISKIP(value)+1); - tmps = (char*)uv_to_utf8((U8*)SvPVX(TARG), value); + tmps = (char*)uvchr_to_utf8((U8*)SvPVX(TARG), value); SvCUR_set(TARG, tmps - SvPVX(TARG)); *tmps = '\0'; (void)SvPOK_only(TARG); @@ -2996,17 +2997,17 @@ PP(pp_ucfirst) STRLEN ulen; U8 tmpbuf[UTF8_MAXLEN+1]; U8 *tend; - UV uv = utf8_to_uv(s, slen, &ulen, 0); + UV uv; if (PL_op->op_private & OPpLOCALE) { TAINT; SvTAINTED_on(sv); - uv = toTITLE_LC_uni(uv); + uv = toTITLE_LC_uvchr(utf8n_to_uvchr(s, slen, &ulen, 0)); } else uv = toTITLE_utf8(s); - tend = uv_to_utf8(tmpbuf, uv); + tend = uvchr_to_utf8(tmpbuf, uv); if (!SvPADTMP(sv) || tend - tmpbuf != ulen || SvREADONLY(sv)) { dTARGET; @@ -3055,17 +3056,17 @@ PP(pp_lcfirst) STRLEN ulen; U8 tmpbuf[UTF8_MAXLEN+1]; U8 *tend; - UV uv = utf8_to_uv(s, slen, &ulen, 0); + UV uv; if (PL_op->op_private & OPpLOCALE) { TAINT; SvTAINTED_on(sv); - uv = toLOWER_LC_uni(uv); + uv = toLOWER_LC_uvchr(utf8n_to_uvchr(s, slen, &ulen, 0)); } else uv = toLOWER_utf8(s); - tend = uv_to_utf8(tmpbuf, uv); + tend = uvchr_to_utf8(tmpbuf, uv); if (!SvPADTMP(sv) || tend - tmpbuf != ulen || SvREADONLY(sv)) { dTARGET; @@ -3132,13 +3133,13 @@ PP(pp_uc) TAINT; SvTAINTED_on(TARG); while (s < send) { - d = uv_to_utf8(d, toUPPER_LC_uni( utf8_to_uv(s, len, &ulen, 0))); + d = uvchr_to_utf8(d, toUPPER_LC_uvchr( utf8n_to_uvchr(s, len, &ulen, 0))); s += ulen; } } else { while (s < send) { - d = uv_to_utf8(d, toUPPER_utf8( s )); + d = uvchr_to_utf8(d, toUPPER_utf8( s )); s += UTF8SKIP(s); } } @@ -3206,13 +3207,13 @@ PP(pp_lc) TAINT; SvTAINTED_on(TARG); while (s < send) { - d = uv_to_utf8(d, toLOWER_LC_uni( utf8_to_uv(s, len, &ulen, 0))); + d = uvchr_to_utf8(d, toLOWER_LC_uvchr( utf8n_to_uvchr(s, len, &ulen, 0))); s += ulen; } } else { while (s < send) { - d = uv_to_utf8(d, toLOWER_utf8(s)); + d = uvchr_to_utf8(d, toLOWER_utf8(s)); s += UTF8SKIP(s); } } @@ -3961,12 +3962,12 @@ PP(pp_reverse) U8* s = (U8*)SvPVX(TARG); U8* send = (U8*)(s + len); while (s < send) { - if (UTF8_IS_ASCII(*s)) { + if (UTF8_IS_INVARIANT(*s)) { s++; continue; } else { - if (!utf8_to_uv_simple(s, 0)) + if (!utf8_to_uvchr(s, 0)) break; up = (char*)s; s += UTF8SKIP(s); @@ -4035,6 +4036,7 @@ S_mul128(pTHX_ SV *sv, U8 m) #define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ') #endif + PP(pp_unpack) { dSP; @@ -4045,7 +4047,14 @@ PP(pp_unpack) STRLEN llen; STRLEN rlen; register char *pat = SvPV(left, llen); +#ifdef PACKED_IS_OCTETS + /* Packed side is assumed to be octets - so force downgrade if it + has been UTF-8 encoded by accident + */ + register char *s = SvPVbyte(right, rlen); +#else register char *s = SvPV(right, rlen); +#endif char *strend = s + rlen; char *strbeg = s; register char *patend = pat + llen; @@ -4354,7 +4363,7 @@ PP(pp_unpack) if (checksum) { while (len-- > 0 && s < strend) { STRLEN alen; - auint = utf8_to_uv((U8*)s, strend - s, &alen, 0); + auint = utf8n_to_uvchr((U8*)s, strend - s, &alen, 0); along = alen; s += along; if (checksum > 32) @@ -4368,7 +4377,7 @@ PP(pp_unpack) EXTEND_MORTAL(len); while (len-- > 0 && s < strend) { STRLEN alen; - auint = utf8_to_uv((U8*)s, strend - s, &alen, 0); + auint = utf8n_to_uvchr((U8*)s, strend - s, &alen, 0); along = alen; s += along; sv = NEWSV(37, 0); @@ -4780,7 +4789,8 @@ PP(pp_unpack) while ((len > 0) && (s < strend)) { auv = (auv << 7) | (*s & 0x7f); - if (UTF8_IS_ASCII(*s++)) { + /* UTF8_IS_XXXXX not right here - using constant 0x80 */ + if ((U8)(*s++) < 0x80) { bytes = 0; sv = NEWSV(40, 0); sv_setuv(sv, auv); @@ -5172,8 +5182,10 @@ PP(pp_pack) patcopy++; continue; } +#ifndef PACKED_IS_OCTETS if (datumtype == 'U' && pat == patcopy+1) SvUTF8_on(cat); +#endif if (datumtype == '#') { while (pat < patend && *pat != '\n') pat++; @@ -5406,7 +5418,7 @@ PP(pp_pack) fromstr = NEXTFROM; auint = SvUV(fromstr); SvGROW(cat, SvCUR(cat) + UTF8_MAXLEN + 1); - SvCUR_set(cat, (char*)uv_to_utf8((U8*)SvEND(cat),auint) + SvCUR_set(cat, (char*)uvchr_to_utf8((U8*)SvEND(cat),auint) - SvPVX(cat)); } *SvEND(cat) = '\0';