PP(pp_chop)
{
- dSP; dMARK; dTARGET;
- while (SP > MARK)
- do_chop(TARG, POPs);
+ dSP; dMARK; dTARGET; dORIGMARK;
+ while (MARK < SP)
+ do_chop(TARG, *++MARK);
+ SP = ORIGMARK;
PUSHTARG;
RETURN;
}
(void)SvPOK_only_UTF8(TARG);
else
(void)SvPOK_only(TARG);
+
+ if (PL_op->op_private & OPpREPEAT_DOLIST) {
+ /* The parser saw this as a list repeat, and there
+ are probably several items on the stack. But we're
+ in scalar context, and there's no pp_list to save us
+ now. So drop the rest of the items -- robin@kitsite.com
+ */
+ dMARK;
+ SP = MARK;
+ }
PUSHTARG;
}
RETURN;
UV result;
register UV buv;
bool buvok = SvUOK(TOPs);
-
+
if (buvok)
buv = SvUVX(TOPs);
else {
send = tmps + len;
while (tmps < send) {
- UV c = utf8_to_uv(tmps, send-tmps, &l, UTF8_ALLOW_ANYUV);
+ UV c = utf8n_to_uvchr(tmps, send-tmps, &l, UTF8_ALLOW_ANYUV);
tmps += UTF8SKIP(tmps);
targlen += UNISKIP(~c);
nchar++;
if (nwide) {
Newz(0, result, targlen + 1, U8);
while (tmps < send) {
- UV c = utf8_to_uv(tmps, send-tmps, &l, UTF8_ALLOW_ANYUV);
+ UV c = utf8n_to_uvchr(tmps, send-tmps, &l, UTF8_ALLOW_ANYUV);
tmps += UTF8SKIP(tmps);
- result = uv_to_utf8(result, ~c);
+ result = uvchr_to_utf8(result, ~c);
}
*result = '\0';
result -= targlen;
else {
Newz(0, result, nchar + 1, U8);
while (tmps < send) {
- U8 c = (U8)utf8_to_uv(tmps, 0, &l, UTF8_ALLOW_ANY);
+ U8 c = (U8)utf8n_to_uvchr(tmps, 0, &l, UTF8_ALLOW_ANY);
tmps += UTF8SKIP(tmps);
*result++ = ~c;
}
SV *sv;
I32 len;
STRLEN curlen;
- STRLEN utfcurlen;
+ STRLEN utf8_curlen;
I32 pos;
I32 rem;
I32 fail;
I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
char *tmps;
I32 arybase = PL_curcop->cop_arybase;
+ SV *repl_sv = NULL;
char *repl = 0;
STRLEN repl_len;
int num_args = PL_op->op_private & 7;
+ bool repl_need_utf8_upgrade = FALSE;
+ bool repl_is_utf8 = FALSE;
SvTAINTED_off(TARG); /* decontaminate */
SvUTF8_off(TARG); /* decontaminate */
if (num_args > 2) {
if (num_args > 3) {
- sv = POPs;
- repl = SvPV(sv, repl_len);
+ repl_sv = POPs;
+ repl = SvPV(repl_sv, repl_len);
+ repl_is_utf8 = DO_UTF8(repl_sv) && SvCUR(repl_sv);
}
len = POPi;
}
pos = POPi;
sv = POPs;
PUTBACK;
+ if (repl_sv) {
+ if (repl_is_utf8) {
+ if (!DO_UTF8(sv))
+ sv_utf8_upgrade(sv);
+ }
+ else if (DO_UTF8(sv))
+ repl_need_utf8_upgrade = TRUE;
+ }
tmps = SvPV(sv, curlen);
if (DO_UTF8(sv)) {
- utfcurlen = sv_len_utf8(sv);
- if (utfcurlen == curlen)
- utfcurlen = 0;
+ utf8_curlen = sv_len_utf8(sv);
+ if (utf8_curlen == curlen)
+ utf8_curlen = 0;
else
- curlen = utfcurlen;
+ curlen = utf8_curlen;
}
else
- utfcurlen = 0;
+ utf8_curlen = 0;
if (pos >= arybase) {
pos -= arybase;
else {
I32 upos = pos;
I32 urem = rem;
- if (utfcurlen)
+ if (utf8_curlen)
sv_pos_u2b(sv, &pos, &rem);
tmps += pos;
sv_setpvn(TARG, tmps, rem);
- if (utfcurlen)
+ if (utf8_curlen)
SvUTF8_on(TARG);
- if (repl)
+ if (repl) {
+ SV* repl_sv_copy = NULL;
+
+ if (repl_need_utf8_upgrade) {
+ repl_sv_copy = newSVsv(repl_sv);
+ sv_utf8_upgrade(repl_sv_copy);
+ repl = SvPV(repl_sv_copy, repl_len);
+ repl_is_utf8 = DO_UTF8(repl_sv_copy) && SvCUR(sv);
+ }
sv_insert(sv, pos, rem, repl, repl_len);
+ if (repl_is_utf8)
+ SvUTF8_on(sv);
+ if (repl_sv_copy)
+ SvREFCNT_dec(repl_sv_copy);
+ }
else if (lvalue) { /* it's an lvalue! */
if (!SvGMAGICAL(sv)) {
if (SvROK(sv)) {
STRLEN len;
U8 *s = (U8*)SvPVx(argsv, len);
- XPUSHu(DO_UTF8(argsv) ? utf8_to_uv_simple(s, 0) : (*s & 0xff));
+ XPUSHu(DO_UTF8(argsv) ? utf8_to_uvchr(s, 0) : (*s & 0xff));
RETURN;
}
if (value > 255 && !IN_BYTE) {
SvGROW(TARG, UNISKIP(value)+1);
- tmps = (char*)uv_to_utf8((U8*)SvPVX(TARG), value);
+ tmps = (char*)uvchr_to_utf8((U8*)SvPVX(TARG), value);
SvCUR_set(TARG, tmps - SvPVX(TARG));
*tmps = '\0';
(void)SvPOK_only(TARG);
STRLEN ulen;
U8 tmpbuf[UTF8_MAXLEN+1];
U8 *tend;
- UV uv = utf8_to_uv(s, slen, &ulen, 0);
+ UV uv;
if (PL_op->op_private & OPpLOCALE) {
TAINT;
SvTAINTED_on(sv);
- uv = toTITLE_LC_uni(uv);
+ uv = toTITLE_LC_uvchr(utf8n_to_uvchr(s, slen, &ulen, 0));
}
else
uv = toTITLE_utf8(s);
- tend = uv_to_utf8(tmpbuf, uv);
+ tend = uvchr_to_utf8(tmpbuf, uv);
if (!SvPADTMP(sv) || tend - tmpbuf != ulen || SvREADONLY(sv)) {
dTARGET;
STRLEN ulen;
U8 tmpbuf[UTF8_MAXLEN+1];
U8 *tend;
- UV uv = utf8_to_uv(s, slen, &ulen, 0);
+ UV uv;
if (PL_op->op_private & OPpLOCALE) {
TAINT;
SvTAINTED_on(sv);
- uv = toLOWER_LC_uni(uv);
+ uv = toLOWER_LC_uvchr(utf8n_to_uvchr(s, slen, &ulen, 0));
}
else
uv = toLOWER_utf8(s);
- tend = uv_to_utf8(tmpbuf, uv);
+ tend = uvchr_to_utf8(tmpbuf, uv);
if (!SvPADTMP(sv) || tend - tmpbuf != ulen || SvREADONLY(sv)) {
dTARGET;
TAINT;
SvTAINTED_on(TARG);
while (s < send) {
- d = uv_to_utf8(d, toUPPER_LC_uni( utf8_to_uv(s, len, &ulen, 0)));
+ d = uvchr_to_utf8(d, toUPPER_LC_uvchr( utf8n_to_uvchr(s, len, &ulen, 0)));
s += ulen;
}
}
else {
while (s < send) {
- d = uv_to_utf8(d, toUPPER_utf8( s ));
+ d = uvchr_to_utf8(d, toUPPER_utf8( s ));
s += UTF8SKIP(s);
}
}
TAINT;
SvTAINTED_on(TARG);
while (s < send) {
- d = uv_to_utf8(d, toLOWER_LC_uni( utf8_to_uv(s, len, &ulen, 0)));
+ d = uvchr_to_utf8(d, toLOWER_LC_uvchr( utf8n_to_uvchr(s, len, &ulen, 0)));
s += ulen;
}
}
else {
while (s < send) {
- d = uv_to_utf8(d, toLOWER_utf8(s));
+ d = uvchr_to_utf8(d, toLOWER_utf8(s));
s += UTF8SKIP(s);
}
}
U8* s = (U8*)SvPVX(TARG);
U8* send = (U8*)(s + len);
while (s < send) {
- if (UTF8_IS_ASCII(*s)) {
+ if (UTF8_IS_INVARIANT(*s)) {
s++;
continue;
}
else {
- if (!utf8_to_uv_simple(s, 0))
+ if (!utf8_to_uvchr(s, 0))
break;
up = (char*)s;
s += UTF8SKIP(s);
#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
#endif
+
PP(pp_unpack)
{
dSP;
STRLEN llen;
STRLEN rlen;
register char *pat = SvPV(left, llen);
+#ifdef PACKED_IS_OCTETS
+ /* Packed side is assumed to be octets - so force downgrade if it
+ has been UTF-8 encoded by accident
+ */
+ register char *s = SvPVbyte(right, rlen);
+#else
register char *s = SvPV(right, rlen);
+#endif
char *strend = s + rlen;
char *strbeg = s;
register char *patend = pat + llen;
if (checksum) {
while (len-- > 0 && s < strend) {
STRLEN alen;
- auint = utf8_to_uv((U8*)s, strend - s, &alen, 0);
+ auint = utf8n_to_uvchr((U8*)s, strend - s, &alen, 0);
along = alen;
s += along;
if (checksum > 32)
EXTEND_MORTAL(len);
while (len-- > 0 && s < strend) {
STRLEN alen;
- auint = utf8_to_uv((U8*)s, strend - s, &alen, 0);
+ auint = utf8n_to_uvchr((U8*)s, strend - s, &alen, 0);
along = alen;
s += along;
sv = NEWSV(37, 0);
while ((len > 0) && (s < strend)) {
auv = (auv << 7) | (*s & 0x7f);
- if (UTF8_IS_ASCII(*s++)) {
+ /* UTF8_IS_XXXXX not right here - using constant 0x80 */
+ if ((U8)(*s++) < 0x80) {
bytes = 0;
sv = NEWSV(40, 0);
sv_setuv(sv, auv);
patcopy++;
continue;
}
+#ifndef PACKED_IS_OCTETS
if (datumtype == 'U' && pat == patcopy+1)
SvUTF8_on(cat);
+#endif
if (datumtype == '#') {
while (pat < patend && *pat != '\n')
pat++;
fromstr = NEXTFROM;
auint = SvUV(fromstr);
SvGROW(cat, SvCUR(cat) + UTF8_MAXLEN + 1);
- SvCUR_set(cat, (char*)uv_to_utf8((U8*)SvEND(cat),auint)
+ SvCUR_set(cat, (char*)uvchr_to_utf8((U8*)SvEND(cat),auint)
- SvPVX(cat));
}
*SvEND(cat) = '\0';