Newz(0, d, len*2+1, U8);
dstart = d;
while (s < send) {
- I32 ulen;
+ STRLEN ulen;
short c;
ulen = 1;
/* Need to check this, otherwise 128..255 won't match */
- c = utf8_to_uv_chk(s, &ulen, 0);
+ c = utf8_to_uv(s, send - s, &ulen, 0);
if (c < 0x100 && (ch = tbl[(short)c]) >= 0) {
matches++;
if (ch < 0x80)
s += UTF8SKIP(s);
else {
UV c;
- I32 ulen;
+ STRLEN ulen;
ulen = 1;
if (hasutf)
- c = utf8_to_uv_chk(s,&ulen, 0);
+ c = utf8_to_uv(s, send - s, &ulen, 0);
else
c = *s;
if (c < 0x100 && tbl[c] >= 0)
U8 *s;
U8 *send;
U8 *d;
+ U8 *dstart;
I32 hasutf = SvUTF8(sv);
I32 matches = 0;
STRLEN len;
s = (U8*)SvPV(sv, len);
send = s + len;
- d = s;
+ Newz(0, d, len*2+1, U8);
+ dstart = d;
+
if (PL_op->op_private & OPpTRANS_SQUASH) {
U8* p = send;
if ((ch = tbl[*s]) >= 0) {
*d = ch;
matches++;
- if (p == d - 1 && *p == *d)
- matches--;
- else
+ if (p != d - 1 || *p != *d)
p = d++;
}
else if (ch == -1) /* -1 is unmapped character */
}
else {
while (s < send) {
+ UV comp;
if (hasutf && *s & 0x80)
- s += UTF8SKIP(s);
- else {
- if ((ch = tbl[*s]) >= 0) {
- *d = ch;
- matches++;
- d++;
- }
- else if (ch == -1) /* -1 is unmapped character */
- *d++ = *s; /* -2 is delete character */
- s++;
- }
+ comp = utf8_to_uv_simple(s, NULL);
+ else
+ comp = *s;
+
+ ch = tbl[comp];
+
+ if (ch == -1) { /* -1 is unmapped character */
+ ch = comp;
+ matches--;
+ }
+
+ if (ch >= 0) {
+ if (hasutf)
+ d = uv_to_utf8(d, ch);
+ else
+ *d++ = ch;
+ }
+ matches++;
+
+ s += hasutf && *s & 0x80 ? UNISKIP(*s) : 1;
+
}
}
- matches += send - d; /* account for disappeared chars */
+
*d = '\0';
- SvCUR_set(sv, d - (U8*)SvPVX(sv));
- SvSETMAGIC(sv);
+ sv_setpvn(sv, (const char*)dstart, d - dstart);
+ Safefree(dstart);
+ if (hasutf)
+ SvUTF8_on(sv);
+ SvSETMAGIC(sv);
return matches;
+
}
STATIC I32
continue;
}
else if (uv == none) { /* "none" is unmapped character */
- I32 ulen;
- *d++ = (U8)utf8_to_uv_chk(s, &ulen, 0);
+ STRLEN ulen;
+ *d++ = (U8)utf8_to_uv(s, send - s, &ulen, 0);
s += ulen;
puv = 0xfeedface;
continue;
continue;
}
else if (uv == none) { /* "none" is unmapped character */
- I32 ulen;
- *d++ = (U8)utf8_to_uv_chk(s, &ulen, 0);
+ STRLEN ulen;
+ *d++ = (U8)utf8_to_uv(s, send - s, &ulen, 0);
s += ulen;
continue;
}
if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */
Perl_croak(aTHX_ "Illegal number of bits in vec");
- if (SvUTF8(sv)) {
+ if (SvUTF8(sv))
(void) Perl_sv_utf8_downgrade(aTHX_ sv, TRUE);
- }
offset *= size; /* turn into bit offset */
len = (offset + size + 7) / 8; /* required number of bytes */
if (left_utf && !right_utf)
sv_utf8_upgrade(right);
- if (!left_utf && right_utf)
+ else if (!left_utf && right_utf)
sv_utf8_upgrade(left);
if (sv != left || (optype != OP_BIT_AND && !SvOK(sv) && !SvGMAGICAL(sv)))
char *dcsave = dc;
STRLEN lulen = leftlen;
STRLEN rulen = rightlen;
- I32 ulen;
+ STRLEN ulen;
switch (optype) {
case OP_BIT_AND:
while (lulen && rulen) {
- luc = utf8_to_uv_chk((U8*)lc, &ulen, 0);
+ luc = utf8_to_uv((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV);
lc += ulen;
lulen -= ulen;
- ruc = utf8_to_uv_chk((U8*)rc, &ulen, 0);
+ ruc = utf8_to_uv((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV);
rc += ulen;
rulen -= ulen;
duc = luc & ruc;
break;
case OP_BIT_XOR:
while (lulen && rulen) {
- luc = utf8_to_uv_chk((U8*)lc, &ulen, 0);
+ luc = utf8_to_uv((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV);
lc += ulen;
lulen -= ulen;
- ruc = utf8_to_uv_chk((U8*)rc, &ulen, 0);
+ ruc = utf8_to_uv((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV);
rc += ulen;
rulen -= ulen;
duc = luc ^ ruc;
goto mop_up_utf;
case OP_BIT_OR:
while (lulen && rulen) {
- luc = utf8_to_uv_chk((U8*)lc, &ulen, 0);
+ luc = utf8_to_uv((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV);
lc += ulen;
lulen -= ulen;
- ruc = utf8_to_uv_chk((U8*)rc, &ulen, 0);
+ ruc = utf8_to_uv((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV);
rc += ulen;
rulen -= ulen;
duc = luc | ruc;