switch (optype) {
case OP_BIT_AND:
while (lulen && rulen) {
- luc = utf8_to_uv((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANY);
+ luc = utf8_to_uv((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV);
lc += ulen;
lulen -= ulen;
- ruc = utf8_to_uv((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANY);
+ ruc = utf8_to_uv((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV);
rc += ulen;
rulen -= ulen;
duc = luc & ruc;
break;
case OP_BIT_XOR:
while (lulen && rulen) {
- luc = utf8_to_uv((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANY);
+ luc = utf8_to_uv((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV);
lc += ulen;
lulen -= ulen;
- ruc = utf8_to_uv((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANY);
+ ruc = utf8_to_uv((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV);
rc += ulen;
rulen -= ulen;
duc = luc ^ ruc;
goto mop_up_utf;
case OP_BIT_OR:
while (lulen && rulen) {
- luc = utf8_to_uv((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANY);
+ luc = utf8_to_uv((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV);
lc += ulen;
lulen -= ulen;
- ruc = utf8_to_uv((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANY);
+ ruc = utf8_to_uv((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV);
rc += ulen;
rulen -= ulen;
duc = luc | ruc;
send = tmps + len;
while (tmps < send) {
- UV c = utf8_to_uv(tmps, 0, &l, UTF8_ALLOW_ANY);
+ UV c = utf8_to_uv(tmps, send-tmps, &l, UTF8_ALLOW_ANYUV);
tmps += UTF8SKIP(tmps);
targlen += UNISKIP(~c);
nchar++;
if (nwide) {
Newz(0, result, targlen + 1, U8);
while (tmps < send) {
- UV c = utf8_to_uv(tmps, 0, &l, UTF8_ALLOW_ANY);
+ UV c = utf8_to_uv(tmps, send-tmps, &l, UTF8_ALLOW_ANYUV);
tmps += UTF8SKIP(tmps);
result = uv_to_utf8(result, ~c);
}
UV uv;
uv = utf8_to_uv((U8*)s, send - s, &len, UTF8_CHECK_ONLY);
- if (len == 1) {
+ if (len == (STRLEN)-1) {
/* Illegal UTF8 (a high-bit byte), make it valid. */
char *old_pvx = SvPVX(sv);
/* need space for one extra char (NOTE: SvCUR() not set here) */
if (flags & UTF8_CHECK_ONLY) {
if (retlen)
- *retlen = len;
+ *retlen = -1;
return 0;
}
if (retlen)
- *retlen = -1;
+ *retlen = expectlen ? expectlen : len;
return UNICODE_REPLACEMENT_CHARACTER;
}
#define UTF8_ALLOW_BOM 0x0020
#define UTF8_ALLOW_FFFF 0x0040
#define UTF8_ALLOW_LONG 0x0080
+#define UTF8_ALLOW_ANYUV (UTF8_ALLOW_FE_FF|UTF8_ALLOW_FFFF \
+ |UTF8_ALLOW_BOM|UTF8_ALLOW_SURROGATE)
#define UTF8_ALLOW_ANY 0x00ff
#define UTF8_CHECK_ONLY 0x0100