X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=pp.c;h=d5e4dd7fb03d14271d85a4a0b1d29d1d822de3d7;hb=2e117952781c322d29321f4d0b7193f45488d1cb;hp=6ff39fa82c948879b767b43728d4231120116d2b;hpb=39644a267dae6dfa935b1c1d39151eb399850949;p=p5sagit%2Fp5-mst-13.2.git diff --git a/pp.c b/pp.c index 6ff39fa..d5e4dd7 100644 --- a/pp.c +++ b/pp.c @@ -448,10 +448,12 @@ PP(pp_prototype) else if (n && str[0] == ';' && seen_question) goto set; /* XXXX system, exec */ if ((oa & (OA_OPTIONAL - 1)) >= OA_AVREF - && (oa & (OA_OPTIONAL - 1)) <= OA_HVREF) { + && (oa & (OA_OPTIONAL - 1)) <= OA_SCALARREF + /* But globs are already references (kinda) */ + && (oa & (OA_OPTIONAL - 1)) != OA_FILEREF + ) { str[n++] = '\\'; } - /* What to do with R ((un)tie, tied, (sys)read, recv)? */ str[n++] = ("?$@@%&*$")[oa & (OA_OPTIONAL - 1)]; oa = oa >> 4; } @@ -745,9 +747,10 @@ PP(pp_schop) PP(pp_chop) { - dSP; dMARK; dTARGET; - while (SP > MARK) - do_chop(TARG, POPs); + dSP; dMARK; dTARGET; dORIGMARK; + while (MARK < SP) + do_chop(TARG, *++MARK); + SP = ORIGMARK; PUSHTARG; RETURN; } @@ -1231,6 +1234,16 @@ PP(pp_repeat) (void)SvPOK_only_UTF8(TARG); else (void)SvPOK_only(TARG); + + if (PL_op->op_private & OPpREPEAT_DOLIST) { + /* The parser saw this as a list repeat, and there + are probably several items on the stack. But we're + in scalar context, and there's no pp_list to save us + now. So drop the rest of the items -- robin@kitsite.com + */ + dMARK; + SP = MARK; + } PUSHTARG; } RETURN; @@ -1280,7 +1293,7 @@ PP(pp_subtract) UV result; register UV buv; bool buvok = SvUOK(TOPs); - + if (buvok) buv = SvUVX(TOPs); else { @@ -1703,6 +1716,12 @@ PP(pp_ge) PP(pp_ne) { dSP; tryAMAGICbinSET(ne,0); +#ifndef NV_PRESERVES_UV + if (SvROK(TOPs) && SvROK(TOPm1s)) { + SETs(boolSV(SvRV(TOPs) != SvRV(TOPm1s))); + RETURN; + } +#endif #ifdef PERL_PRESERVE_IVUV SvIV_please(TOPs); if (SvIOK(TOPs)) { @@ -1773,6 +1792,12 @@ PP(pp_ne) PP(pp_ncmp) { dSP; dTARGET; tryAMAGICbin(ncmp,0); +#ifndef NV_PRESERVES_UV + if (SvROK(TOPs) && SvROK(TOPm1s)) { + SETi(PTR2UV(SvRV(TOPs)) - PTR2UV(SvRV(TOPm1s))); + RETURN; + } +#endif #ifdef PERL_PRESERVE_IVUV /* Fortunately it seems NaN isn't IOK */ SvIV_please(TOPs); @@ -1953,6 +1978,12 @@ PP(pp_sne) PP(pp_scmp) { dSP; dTARGET; tryAMAGICbin(scmp,0); +#ifndef NV_PRESERVES_UV + if (SvROK(TOPs) && SvROK(TOPm1s)) { + SETi(PTR2UV(SvRV(TOPs)) - PTR2UV(SvRV(TOPm1s))); + RETURN; + } +#endif { dPOPTOPssrl; int cmp = ((PL_op->op_private & OPpLOCALE) @@ -2137,7 +2168,7 @@ PP(pp_complement) send = tmps + len; while (tmps < send) { - UV c = utf8_to_uv(tmps, send-tmps, &l, UTF8_ALLOW_ANYUV); + UV c = utf8n_to_uvchr(tmps, send-tmps, &l, UTF8_ALLOW_ANYUV); tmps += UTF8SKIP(tmps); targlen += UNISKIP(~c); nchar++; @@ -2151,9 +2182,9 @@ PP(pp_complement) if (nwide) { Newz(0, result, targlen + 1, U8); while (tmps < send) { - UV c = utf8_to_uv(tmps, send-tmps, &l, UTF8_ALLOW_ANYUV); + UV c = utf8n_to_uvchr(tmps, send-tmps, &l, UTF8_ALLOW_ANYUV); tmps += UTF8SKIP(tmps); - result = uv_to_utf8(result, ~c); + result = uvchr_to_utf8(result, ~c); } *result = '\0'; result -= targlen; @@ -2163,7 +2194,7 @@ PP(pp_complement) else { Newz(0, result, nchar + 1, U8); while (tmps < send) { - U8 c = (U8)utf8_to_uv(tmps, 0, &l, UTF8_ALLOW_ANY); + U8 c = (U8)utf8n_to_uvchr(tmps, 0, &l, UTF8_ALLOW_ANY); tmps += UTF8SKIP(tmps); *result++ = ~c; } @@ -2638,11 +2669,11 @@ PP(pp_hex) dSP; dTARGET; char *tmps; STRLEN argtype; - STRLEN n_a; + STRLEN len; - tmps = POPpx; + tmps = (SvPVx(POPs, len)); argtype = 1; /* allow underscores */ - XPUSHn(scan_hex(tmps, 99, &argtype)); + XPUSHn(scan_hex(tmps, len, &argtype)); RETURN; } @@ -2652,20 +2683,20 @@ PP(pp_oct) NV value; STRLEN argtype; char *tmps; - STRLEN n_a; + STRLEN len; - tmps = POPpx; - while (*tmps && isSPACE(*tmps)) - tmps++; + tmps = (SvPVx(POPs, len)); + while (*tmps && len && isSPACE(*tmps)) + tmps++, len--; if (*tmps == '0') - tmps++; + tmps++, len--; argtype = 1; /* allow underscores */ if (*tmps == 'x') - value = scan_hex(++tmps, 99, &argtype); + value = scan_hex(++tmps, --len, &argtype); else if (*tmps == 'b') - value = scan_bin(++tmps, 99, &argtype); + value = scan_bin(++tmps, --len, &argtype); else - value = scan_oct(tmps, 99, &argtype); + value = scan_oct(tmps, len, &argtype); XPUSHn(value); RETURN; } @@ -2690,39 +2721,51 @@ PP(pp_substr) SV *sv; I32 len; STRLEN curlen; - STRLEN utfcurlen; + STRLEN utf8_curlen; I32 pos; I32 rem; I32 fail; I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET; char *tmps; I32 arybase = PL_curcop->cop_arybase; + SV *repl_sv = NULL; char *repl = 0; STRLEN repl_len; int num_args = PL_op->op_private & 7; + bool repl_need_utf8_upgrade = FALSE; + bool repl_is_utf8 = FALSE; SvTAINTED_off(TARG); /* decontaminate */ SvUTF8_off(TARG); /* decontaminate */ if (num_args > 2) { if (num_args > 3) { - sv = POPs; - repl = SvPV(sv, repl_len); + repl_sv = POPs; + repl = SvPV(repl_sv, repl_len); + repl_is_utf8 = DO_UTF8(repl_sv) && SvCUR(repl_sv); } len = POPi; } pos = POPi; sv = POPs; PUTBACK; + if (repl_sv) { + if (repl_is_utf8) { + if (!DO_UTF8(sv)) + sv_utf8_upgrade(sv); + } + else if (DO_UTF8(sv)) + repl_need_utf8_upgrade = TRUE; + } tmps = SvPV(sv, curlen); if (DO_UTF8(sv)) { - utfcurlen = sv_len_utf8(sv); - if (utfcurlen == curlen) - utfcurlen = 0; + utf8_curlen = sv_len_utf8(sv); + if (utf8_curlen == curlen) + utf8_curlen = 0; else - curlen = utfcurlen; + curlen = utf8_curlen; } else - utfcurlen = 0; + utf8_curlen = 0; if (pos >= arybase) { pos -= arybase; @@ -2767,14 +2810,27 @@ PP(pp_substr) else { I32 upos = pos; I32 urem = rem; - if (utfcurlen) + if (utf8_curlen) sv_pos_u2b(sv, &pos, &rem); tmps += pos; sv_setpvn(TARG, tmps, rem); - if (utfcurlen) + if (utf8_curlen) SvUTF8_on(TARG); - if (repl) + if (repl) { + SV* repl_sv_copy = NULL; + + if (repl_need_utf8_upgrade) { + repl_sv_copy = newSVsv(repl_sv); + sv_utf8_upgrade(repl_sv_copy); + repl = SvPV(repl_sv_copy, repl_len); + repl_is_utf8 = DO_UTF8(repl_sv_copy) && SvCUR(sv); + } sv_insert(sv, pos, rem, repl, repl_len); + if (repl_is_utf8) + SvUTF8_on(sv); + if (repl_sv_copy) + SvREFCNT_dec(repl_sv_copy); + } else if (lvalue) { /* it's an lvalue! */ if (!SvGMAGICAL(sv)) { if (SvROK(sv)) { @@ -2933,7 +2989,7 @@ PP(pp_ord) STRLEN len; U8 *s = (U8*)SvPVx(argsv, len); - XPUSHu(DO_UTF8(argsv) ? utf8_to_uv_simple(s, 0) : (*s & 0xff)); + XPUSHu(DO_UTF8(argsv) ? utf8_to_uvchr(s, 0) : (*s & 0xff)); RETURN; } @@ -2947,7 +3003,7 @@ PP(pp_chr) if (value > 255 && !IN_BYTE) { SvGROW(TARG, UNISKIP(value)+1); - tmps = (char*)uv_to_utf8((U8*)SvPVX(TARG), value); + tmps = (char*)uvchr_to_utf8((U8*)SvPVX(TARG), value); SvCUR_set(TARG, tmps - SvPVX(TARG)); *tmps = '\0'; (void)SvPOK_only(TARG); @@ -2996,17 +3052,17 @@ PP(pp_ucfirst) STRLEN ulen; U8 tmpbuf[UTF8_MAXLEN+1]; U8 *tend; - UV uv = utf8_to_uv(s, slen, &ulen, 0); + UV uv; if (PL_op->op_private & OPpLOCALE) { TAINT; SvTAINTED_on(sv); - uv = toTITLE_LC_uni(uv); + uv = toTITLE_LC_uvchr(utf8n_to_uvchr(s, slen, &ulen, 0)); } else uv = toTITLE_utf8(s); - tend = uv_to_utf8(tmpbuf, uv); + tend = uvchr_to_utf8(tmpbuf, uv); if (!SvPADTMP(sv) || tend - tmpbuf != ulen || SvREADONLY(sv)) { dTARGET; @@ -3055,17 +3111,17 @@ PP(pp_lcfirst) STRLEN ulen; U8 tmpbuf[UTF8_MAXLEN+1]; U8 *tend; - UV uv = utf8_to_uv(s, slen, &ulen, 0); + UV uv; if (PL_op->op_private & OPpLOCALE) { TAINT; SvTAINTED_on(sv); - uv = toLOWER_LC_uni(uv); + uv = toLOWER_LC_uvchr(utf8n_to_uvchr(s, slen, &ulen, 0)); } else uv = toLOWER_utf8(s); - tend = uv_to_utf8(tmpbuf, uv); + tend = uvchr_to_utf8(tmpbuf, uv); if (!SvPADTMP(sv) || tend - tmpbuf != ulen || SvREADONLY(sv)) { dTARGET; @@ -3132,13 +3188,13 @@ PP(pp_uc) TAINT; SvTAINTED_on(TARG); while (s < send) { - d = uv_to_utf8(d, toUPPER_LC_uni( utf8_to_uv(s, len, &ulen, 0))); + d = uvchr_to_utf8(d, toUPPER_LC_uvchr( utf8n_to_uvchr(s, len, &ulen, 0))); s += ulen; } } else { while (s < send) { - d = uv_to_utf8(d, toUPPER_utf8( s )); + d = uvchr_to_utf8(d, toUPPER_utf8( s )); s += UTF8SKIP(s); } } @@ -3206,13 +3262,13 @@ PP(pp_lc) TAINT; SvTAINTED_on(TARG); while (s < send) { - d = uv_to_utf8(d, toLOWER_LC_uni( utf8_to_uv(s, len, &ulen, 0))); + d = uvchr_to_utf8(d, toLOWER_LC_uvchr( utf8n_to_uvchr(s, len, &ulen, 0))); s += ulen; } } else { while (s < send) { - d = uv_to_utf8(d, toLOWER_utf8(s)); + d = uvchr_to_utf8(d, toLOWER_utf8(s)); s += UTF8SKIP(s); } } @@ -3507,7 +3563,9 @@ PP(pp_hslice) while (++MARK <= SP) { SV *keysv = *MARK; SV **svp; - I32 preeminent = SvRMAGICAL(hv) ? 1 : hv_exists_ent(hv, keysv, 0); + I32 preeminent = SvRMAGICAL(hv) ? 1 : + realhv ? hv_exists_ent(hv, keysv, 0) + : avhv_exists_ent((AV*)hv, keysv, 0); if (realhv) { HE *he = hv_fetch_ent(hv, keysv, lval, 0); svp = he ? &HeVAL(he) : 0; @@ -3961,12 +4019,12 @@ PP(pp_reverse) U8* s = (U8*)SvPVX(TARG); U8* send = (U8*)(s + len); while (s < send) { - if (UTF8_IS_ASCII(*s)) { + if (UTF8_IS_INVARIANT(*s)) { s++; continue; } else { - if (!utf8_to_uv_simple(s, 0)) + if (!utf8_to_uvchr(s, 0)) break; up = (char*)s; s += UTF8SKIP(s); @@ -4035,6 +4093,7 @@ S_mul128(pTHX_ SV *sv, U8 m) #define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ') #endif + PP(pp_unpack) { dSP; @@ -4045,7 +4104,14 @@ PP(pp_unpack) STRLEN llen; STRLEN rlen; register char *pat = SvPV(left, llen); +#ifdef PACKED_IS_OCTETS + /* Packed side is assumed to be octets - so force downgrade if it + has been UTF-8 encoded by accident + */ + register char *s = SvPVbyte(right, rlen); +#else register char *s = SvPV(right, rlen); +#endif char *strend = s + rlen; char *strbeg = s; register char *patend = pat + llen; @@ -4064,7 +4130,6 @@ PP(pp_unpack) U16 aushort; unsigned int auint; U32 aulong; - UV auv; #ifdef HAS_QUAD Uquad_t auquad; #endif @@ -4332,46 +4397,20 @@ PP(pp_unpack) if (len > strend - s) len = strend - s; if (checksum) { - if (DO_UTF8(right)) { - while (len > 0) { - STRLEN l; - auv = utf8_to_uv((U8*)s, strend - s, - &l, UTF8_ALLOW_ANYUV); - culong += auv; - s += l; - len -= l; - } - } - else { - uchar_checksum: - while (len-- > 0) { - auint = *s++ & 0xFF; - culong += auint; - } + uchar_checksum: + while (len-- > 0) { + auint = *s++ & 255; + culong += auint; } } else { EXTEND(SP, len); EXTEND_MORTAL(len); - if (DO_UTF8(right)) { - while (len > 0) { - STRLEN l; - auv = utf8_to_uv((U8*)s, strend - s, - &l, UTF8_ALLOW_ANYUV); - sv = NEWSV(37, 0); - sv_setuv(sv, auv); - PUSHs(sv_2mortal(sv)); - s += l; - len -= l; - } - } - else { - while (len-- > 0) { - auint = *s++ & 0xFF; - sv = NEWSV(37, 0); - sv_setuv(sv, auint); - PUSHs(sv_2mortal(sv)); - } + while (len-- > 0) { + auint = *s++ & 255; + sv = NEWSV(37, 0); + sv_setiv(sv, (IV)auint); + PUSHs(sv_2mortal(sv)); } } break; @@ -4381,7 +4420,7 @@ PP(pp_unpack) if (checksum) { while (len-- > 0 && s < strend) { STRLEN alen; - auint = utf8_to_uv((U8*)s, strend - s, &alen, 0); + auint = utf8n_to_uvchr((U8*)s, strend - s, &alen, 0); along = alen; s += along; if (checksum > 32) @@ -4395,7 +4434,7 @@ PP(pp_unpack) EXTEND_MORTAL(len); while (len-- > 0 && s < strend) { STRLEN alen; - auint = utf8_to_uv((U8*)s, strend - s, &alen, 0); + auint = utf8n_to_uvchr((U8*)s, strend - s, &alen, 0); along = alen; s += along; sv = NEWSV(37, 0); @@ -4807,7 +4846,8 @@ PP(pp_unpack) while ((len > 0) && (s < strend)) { auv = (auv << 7) | (*s & 0x7f); - if (UTF8_IS_ASCII(*s++)) { + /* UTF8_IS_XXXXX not right here - using constant 0x80 */ + if ((U8)(*s++) < 0x80) { bytes = 0; sv = NEWSV(40, 0); sv_setuv(sv, auv); @@ -5172,7 +5212,6 @@ PP(pp_pack) unsigned int auint; I32 along; U32 aulong; - UV auv; #ifdef HAS_QUAD Quad_t aquad; Uquad_t auquad; @@ -5184,7 +5223,6 @@ PP(pp_pack) #ifdef PERL_NATINT_PACK int natint; /* native integer */ #endif - bool has_utf8; items = SP - MARK; MARK++; @@ -5201,8 +5239,10 @@ PP(pp_pack) patcopy++; continue; } +#ifndef PACKED_IS_OCTETS if (datumtype == 'U' && pat == patcopy+1) SvUTF8_on(cat); +#endif if (datumtype == '#') { while (pat < patend && *pat != '\n') pat++; @@ -5421,6 +5461,7 @@ PP(pp_pack) items = saveitems; } break; + case 'C': case 'c': while (len-- > 0) { fromstr = NEXTFROM; @@ -5429,41 +5470,12 @@ PP(pp_pack) sv_catpvn(cat, &achar, sizeof(char)); } break; - case 'C': - has_utf8 = SvUTF8(cat); - while (len-- > 0) { - fromstr = NEXTFROM; - auv = SvUV(fromstr); - if (!has_utf8 && auv > 0xFF && !IN_BYTE) { - has_utf8 = TRUE; - if (SvCUR(cat)) - sv_utf8_upgrade(cat); - else - SvUTF8_on(cat); /* There will be UTF8. */ - } - if (has_utf8) { - SvGROW(cat, SvCUR(cat) + UNISKIP(auv) + 1); - SvCUR_set(cat, (char*)uv_to_utf8((U8*)SvEND(cat),auv) - - SvPVX(cat)); - } - else { - achar = auv; - sv_catpvn(cat, &achar, sizeof(char)); - } - } - *SvEND(cat) = '\0'; - break; case 'U': - has_utf8 = SvUTF8(cat); while (len-- > 0) { fromstr = NEXTFROM; - auv = SvUV(fromstr); - if (!has_utf8 && auv > 0x80) { - has_utf8 = TRUE; - sv_utf8_upgrade(cat); - } - SvGROW(cat, SvCUR(cat) + UNISKIP(auv) + 1); - SvCUR_set(cat, (char*)uv_to_utf8((U8*)SvEND(cat),auv) + auint = SvUV(fromstr); + SvGROW(cat, SvCUR(cat) + UTF8_MAXLEN + 1); + SvCUR_set(cat, (char*)uvchr_to_utf8((U8*)SvEND(cat),auint) - SvPVX(cat)); } *SvEND(cat) = '\0';