X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=pp.c;h=d5e4dd7fb03d14271d85a4a0b1d29d1d822de3d7;hb=2e117952781c322d29321f4d0b7193f45488d1cb;hp=4c21f1b5b4aa18afc56273aa64ee2dc28082b0b6;hpb=2ec6af5f978df7271f6d544a10f1ed025070c53f;p=p5sagit%2Fp5-mst-13.2.git diff --git a/pp.c b/pp.c index 4c21f1b..d5e4dd7 100644 --- a/pp.c +++ b/pp.c @@ -448,10 +448,12 @@ PP(pp_prototype) else if (n && str[0] == ';' && seen_question) goto set; /* XXXX system, exec */ if ((oa & (OA_OPTIONAL - 1)) >= OA_AVREF - && (oa & (OA_OPTIONAL - 1)) <= OA_HVREF) { + && (oa & (OA_OPTIONAL - 1)) <= OA_SCALARREF + /* But globs are already references (kinda) */ + && (oa & (OA_OPTIONAL - 1)) != OA_FILEREF + ) { str[n++] = '\\'; } - /* What to do with R ((un)tie, tied, (sys)read, recv)? */ str[n++] = ("?$@@%&*$")[oa & (OA_OPTIONAL - 1)]; oa = oa >> 4; } @@ -1232,6 +1234,16 @@ PP(pp_repeat) (void)SvPOK_only_UTF8(TARG); else (void)SvPOK_only(TARG); + + if (PL_op->op_private & OPpREPEAT_DOLIST) { + /* The parser saw this as a list repeat, and there + are probably several items on the stack. But we're + in scalar context, and there's no pp_list to save us + now. So drop the rest of the items -- robin@kitsite.com + */ + dMARK; + SP = MARK; + } PUSHTARG; } RETURN; @@ -1281,7 +1293,7 @@ PP(pp_subtract) UV result; register UV buv; bool buvok = SvUOK(TOPs); - + if (buvok) buv = SvUVX(TOPs); else { @@ -1704,6 +1716,12 @@ PP(pp_ge) PP(pp_ne) { dSP; tryAMAGICbinSET(ne,0); +#ifndef NV_PRESERVES_UV + if (SvROK(TOPs) && SvROK(TOPm1s)) { + SETs(boolSV(SvRV(TOPs) != SvRV(TOPm1s))); + RETURN; + } +#endif #ifdef PERL_PRESERVE_IVUV SvIV_please(TOPs); if (SvIOK(TOPs)) { @@ -1774,6 +1792,12 @@ PP(pp_ne) PP(pp_ncmp) { dSP; dTARGET; tryAMAGICbin(ncmp,0); +#ifndef NV_PRESERVES_UV + if (SvROK(TOPs) && SvROK(TOPm1s)) { + SETi(PTR2UV(SvRV(TOPs)) - PTR2UV(SvRV(TOPm1s))); + RETURN; + } +#endif #ifdef PERL_PRESERVE_IVUV /* Fortunately it seems NaN isn't IOK */ SvIV_please(TOPs); @@ -1954,6 +1978,12 @@ PP(pp_sne) PP(pp_scmp) { dSP; dTARGET; tryAMAGICbin(scmp,0); +#ifndef NV_PRESERVES_UV + if (SvROK(TOPs) && SvROK(TOPm1s)) { + SETi(PTR2UV(SvRV(TOPs)) - PTR2UV(SvRV(TOPm1s))); + RETURN; + } +#endif { dPOPTOPssrl; int cmp = ((PL_op->op_private & OPpLOCALE) @@ -2138,7 +2168,7 @@ PP(pp_complement) send = tmps + len; while (tmps < send) { - UV c = utf8_to_uv(tmps, send-tmps, &l, UTF8_ALLOW_ANYUV); + UV c = utf8n_to_uvchr(tmps, send-tmps, &l, UTF8_ALLOW_ANYUV); tmps += UTF8SKIP(tmps); targlen += UNISKIP(~c); nchar++; @@ -2152,9 +2182,9 @@ PP(pp_complement) if (nwide) { Newz(0, result, targlen + 1, U8); while (tmps < send) { - UV c = utf8_to_uv(tmps, send-tmps, &l, UTF8_ALLOW_ANYUV); + UV c = utf8n_to_uvchr(tmps, send-tmps, &l, UTF8_ALLOW_ANYUV); tmps += UTF8SKIP(tmps); - result = uv_to_utf8(result, ~c); + result = uvchr_to_utf8(result, ~c); } *result = '\0'; result -= targlen; @@ -2164,7 +2194,7 @@ PP(pp_complement) else { Newz(0, result, nchar + 1, U8); while (tmps < send) { - U8 c = (U8)utf8_to_uv(tmps, 0, &l, UTF8_ALLOW_ANY); + U8 c = (U8)utf8n_to_uvchr(tmps, 0, &l, UTF8_ALLOW_ANY); tmps += UTF8SKIP(tmps); *result++ = ~c; } @@ -2639,11 +2669,11 @@ PP(pp_hex) dSP; dTARGET; char *tmps; STRLEN argtype; - STRLEN n_a; + STRLEN len; - tmps = POPpx; + tmps = (SvPVx(POPs, len)); argtype = 1; /* allow underscores */ - XPUSHn(scan_hex(tmps, 99, &argtype)); + XPUSHn(scan_hex(tmps, len, &argtype)); RETURN; } @@ -2653,20 +2683,20 @@ PP(pp_oct) NV value; STRLEN argtype; char *tmps; - STRLEN n_a; + STRLEN len; - tmps = POPpx; - while (*tmps && isSPACE(*tmps)) - tmps++; + tmps = (SvPVx(POPs, len)); + while (*tmps && len && isSPACE(*tmps)) + tmps++, len--; if (*tmps == '0') - tmps++; + tmps++, len--; argtype = 1; /* allow underscores */ if (*tmps == 'x') - value = scan_hex(++tmps, 99, &argtype); + value = scan_hex(++tmps, --len, &argtype); else if (*tmps == 'b') - value = scan_bin(++tmps, 99, &argtype); + value = scan_bin(++tmps, --len, &argtype); else - value = scan_oct(tmps, 99, &argtype); + value = scan_oct(tmps, len, &argtype); XPUSHn(value); RETURN; } @@ -2691,39 +2721,51 @@ PP(pp_substr) SV *sv; I32 len; STRLEN curlen; - STRLEN utfcurlen; + STRLEN utf8_curlen; I32 pos; I32 rem; I32 fail; I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET; char *tmps; I32 arybase = PL_curcop->cop_arybase; + SV *repl_sv = NULL; char *repl = 0; STRLEN repl_len; int num_args = PL_op->op_private & 7; + bool repl_need_utf8_upgrade = FALSE; + bool repl_is_utf8 = FALSE; SvTAINTED_off(TARG); /* decontaminate */ SvUTF8_off(TARG); /* decontaminate */ if (num_args > 2) { if (num_args > 3) { - sv = POPs; - repl = SvPV(sv, repl_len); + repl_sv = POPs; + repl = SvPV(repl_sv, repl_len); + repl_is_utf8 = DO_UTF8(repl_sv) && SvCUR(repl_sv); } len = POPi; } pos = POPi; sv = POPs; PUTBACK; + if (repl_sv) { + if (repl_is_utf8) { + if (!DO_UTF8(sv)) + sv_utf8_upgrade(sv); + } + else if (DO_UTF8(sv)) + repl_need_utf8_upgrade = TRUE; + } tmps = SvPV(sv, curlen); if (DO_UTF8(sv)) { - utfcurlen = sv_len_utf8(sv); - if (utfcurlen == curlen) - utfcurlen = 0; + utf8_curlen = sv_len_utf8(sv); + if (utf8_curlen == curlen) + utf8_curlen = 0; else - curlen = utfcurlen; + curlen = utf8_curlen; } else - utfcurlen = 0; + utf8_curlen = 0; if (pos >= arybase) { pos -= arybase; @@ -2768,14 +2810,27 @@ PP(pp_substr) else { I32 upos = pos; I32 urem = rem; - if (utfcurlen) + if (utf8_curlen) sv_pos_u2b(sv, &pos, &rem); tmps += pos; sv_setpvn(TARG, tmps, rem); - if (utfcurlen) + if (utf8_curlen) SvUTF8_on(TARG); - if (repl) + if (repl) { + SV* repl_sv_copy = NULL; + + if (repl_need_utf8_upgrade) { + repl_sv_copy = newSVsv(repl_sv); + sv_utf8_upgrade(repl_sv_copy); + repl = SvPV(repl_sv_copy, repl_len); + repl_is_utf8 = DO_UTF8(repl_sv_copy) && SvCUR(sv); + } sv_insert(sv, pos, rem, repl, repl_len); + if (repl_is_utf8) + SvUTF8_on(sv); + if (repl_sv_copy) + SvREFCNT_dec(repl_sv_copy); + } else if (lvalue) { /* it's an lvalue! */ if (!SvGMAGICAL(sv)) { if (SvROK(sv)) { @@ -2934,7 +2989,7 @@ PP(pp_ord) STRLEN len; U8 *s = (U8*)SvPVx(argsv, len); - XPUSHu(DO_UTF8(argsv) ? utf8_to_uv_simple(s, 0) : (*s & 0xff)); + XPUSHu(DO_UTF8(argsv) ? utf8_to_uvchr(s, 0) : (*s & 0xff)); RETURN; } @@ -2948,7 +3003,7 @@ PP(pp_chr) if (value > 255 && !IN_BYTE) { SvGROW(TARG, UNISKIP(value)+1); - tmps = (char*)uv_to_utf8((U8*)SvPVX(TARG), value); + tmps = (char*)uvchr_to_utf8((U8*)SvPVX(TARG), value); SvCUR_set(TARG, tmps - SvPVX(TARG)); *tmps = '\0'; (void)SvPOK_only(TARG); @@ -2997,17 +3052,17 @@ PP(pp_ucfirst) STRLEN ulen; U8 tmpbuf[UTF8_MAXLEN+1]; U8 *tend; - UV uv = utf8_to_uv(s, slen, &ulen, 0); + UV uv; if (PL_op->op_private & OPpLOCALE) { TAINT; SvTAINTED_on(sv); - uv = toTITLE_LC_uni(uv); + uv = toTITLE_LC_uvchr(utf8n_to_uvchr(s, slen, &ulen, 0)); } else uv = toTITLE_utf8(s); - tend = uv_to_utf8(tmpbuf, uv); + tend = uvchr_to_utf8(tmpbuf, uv); if (!SvPADTMP(sv) || tend - tmpbuf != ulen || SvREADONLY(sv)) { dTARGET; @@ -3056,17 +3111,17 @@ PP(pp_lcfirst) STRLEN ulen; U8 tmpbuf[UTF8_MAXLEN+1]; U8 *tend; - UV uv = utf8_to_uv(s, slen, &ulen, 0); + UV uv; if (PL_op->op_private & OPpLOCALE) { TAINT; SvTAINTED_on(sv); - uv = toLOWER_LC_uni(uv); + uv = toLOWER_LC_uvchr(utf8n_to_uvchr(s, slen, &ulen, 0)); } else uv = toLOWER_utf8(s); - tend = uv_to_utf8(tmpbuf, uv); + tend = uvchr_to_utf8(tmpbuf, uv); if (!SvPADTMP(sv) || tend - tmpbuf != ulen || SvREADONLY(sv)) { dTARGET; @@ -3133,13 +3188,13 @@ PP(pp_uc) TAINT; SvTAINTED_on(TARG); while (s < send) { - d = uv_to_utf8(d, toUPPER_LC_uni( utf8_to_uv(s, len, &ulen, 0))); + d = uvchr_to_utf8(d, toUPPER_LC_uvchr( utf8n_to_uvchr(s, len, &ulen, 0))); s += ulen; } } else { while (s < send) { - d = uv_to_utf8(d, toUPPER_utf8( s )); + d = uvchr_to_utf8(d, toUPPER_utf8( s )); s += UTF8SKIP(s); } } @@ -3207,13 +3262,13 @@ PP(pp_lc) TAINT; SvTAINTED_on(TARG); while (s < send) { - d = uv_to_utf8(d, toLOWER_LC_uni( utf8_to_uv(s, len, &ulen, 0))); + d = uvchr_to_utf8(d, toLOWER_LC_uvchr( utf8n_to_uvchr(s, len, &ulen, 0))); s += ulen; } } else { while (s < send) { - d = uv_to_utf8(d, toLOWER_utf8(s)); + d = uvchr_to_utf8(d, toLOWER_utf8(s)); s += UTF8SKIP(s); } } @@ -3508,7 +3563,9 @@ PP(pp_hslice) while (++MARK <= SP) { SV *keysv = *MARK; SV **svp; - I32 preeminent = SvRMAGICAL(hv) ? 1 : hv_exists_ent(hv, keysv, 0); + I32 preeminent = SvRMAGICAL(hv) ? 1 : + realhv ? hv_exists_ent(hv, keysv, 0) + : avhv_exists_ent((AV*)hv, keysv, 0); if (realhv) { HE *he = hv_fetch_ent(hv, keysv, lval, 0); svp = he ? &HeVAL(he) : 0; @@ -3962,12 +4019,12 @@ PP(pp_reverse) U8* s = (U8*)SvPVX(TARG); U8* send = (U8*)(s + len); while (s < send) { - if (UTF8_IS_ASCII(*s)) { + if (UTF8_IS_INVARIANT(*s)) { s++; continue; } else { - if (!utf8_to_uv_simple(s, 0)) + if (!utf8_to_uvchr(s, 0)) break; up = (char*)s; s += UTF8SKIP(s); @@ -4036,6 +4093,7 @@ S_mul128(pTHX_ SV *sv, U8 m) #define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ') #endif + PP(pp_unpack) { dSP; @@ -4046,7 +4104,14 @@ PP(pp_unpack) STRLEN llen; STRLEN rlen; register char *pat = SvPV(left, llen); +#ifdef PACKED_IS_OCTETS + /* Packed side is assumed to be octets - so force downgrade if it + has been UTF-8 encoded by accident + */ + register char *s = SvPVbyte(right, rlen); +#else register char *s = SvPV(right, rlen); +#endif char *strend = s + rlen; char *strbeg = s; register char *patend = pat + llen; @@ -4355,7 +4420,7 @@ PP(pp_unpack) if (checksum) { while (len-- > 0 && s < strend) { STRLEN alen; - auint = utf8_to_uv((U8*)s, strend - s, &alen, 0); + auint = utf8n_to_uvchr((U8*)s, strend - s, &alen, 0); along = alen; s += along; if (checksum > 32) @@ -4369,7 +4434,7 @@ PP(pp_unpack) EXTEND_MORTAL(len); while (len-- > 0 && s < strend) { STRLEN alen; - auint = utf8_to_uv((U8*)s, strend - s, &alen, 0); + auint = utf8n_to_uvchr((U8*)s, strend - s, &alen, 0); along = alen; s += along; sv = NEWSV(37, 0); @@ -4781,7 +4846,8 @@ PP(pp_unpack) while ((len > 0) && (s < strend)) { auv = (auv << 7) | (*s & 0x7f); - if (UTF8_IS_ASCII(*s++)) { + /* UTF8_IS_XXXXX not right here - using constant 0x80 */ + if ((U8)(*s++) < 0x80) { bytes = 0; sv = NEWSV(40, 0); sv_setuv(sv, auv); @@ -5173,8 +5239,10 @@ PP(pp_pack) patcopy++; continue; } +#ifndef PACKED_IS_OCTETS if (datumtype == 'U' && pat == patcopy+1) SvUTF8_on(cat); +#endif if (datumtype == '#') { while (pat < patend && *pat != '\n') pat++; @@ -5407,7 +5475,7 @@ PP(pp_pack) fromstr = NEXTFROM; auint = SvUV(fromstr); SvGROW(cat, SvCUR(cat) + UTF8_MAXLEN + 1); - SvCUR_set(cat, (char*)uv_to_utf8((U8*)SvEND(cat),auint) + SvCUR_set(cat, (char*)uvchr_to_utf8((U8*)SvEND(cat),auint) - SvPVX(cat)); } *SvEND(cat) = '\0';