#endif
#endif
-#define HALF_UPGRADE(start,end) \
+#define HALF_UTF8_UPGRADE(start,end) \
STMT_START { \
+ if ((start)<(end)) { \
U8* NeWsTr; \
STRLEN LeN = (end) - (start); \
NeWsTr = bytes_to_utf8(start, &LeN); \
- Copy(NeWsTr,start,LeN,U8*); \
- end = (start) + LeN; \
+ Safefree(start); \
+ (start) = NeWsTr; \
+ (end) = (start) + LeN; \
+ } \
} STMT_END
-
STATIC I32
S_do_trans_simple(pTHX_ SV *sv)
{
}
*d = '\0';
sv_setpvn(sv, (const char*)dstart, d - dstart);
+ Safefree(dstart);
SvUTF8_on(sv);
- SvLEN_set(sv, 2*len+1);
SvSETMAGIC(sv);
return matches;
}
s += UTF8SKIP(s);
matches++;
if ((uv & 0x80) && !isutf++)
- HALF_UPGRADE(dstart,d);
+ HALF_UTF8_UPGRADE(dstart,d);
d = uv_to_utf8(d, uv);
}
else if (uv == none) {
int i;
i = UTF8SKIP(s);
if (i > 1 && !isutf++)
- HALF_UPGRADE(dstart,d);
+ HALF_UTF8_UPGRADE(dstart,d);
while(i--)
- *d++ = *s++;
+ *d++ = *s++;
}
else if (uv == extra) {
int i;
s += i;
matches++;
if (i > 1 && !isutf++)
- HALF_UPGRADE(dstart,d);
+ HALF_UTF8_UPGRADE(dstart,d);
d = uv_to_utf8(d, final);
}
else
matches++;
if (uv != puv) {
if ((uv & 0x80) && !isutf++)
- HALF_UPGRADE(dst,d);
+ HALF_UTF8_UPGRADE(dst,d);
d = uv_to_utf8(d, uv);
puv = uv;
}
- s += UTF8SKIP(s);
+ s += UTF8SKIP(s);
continue;
}
else if (uv == none) { /* "none" is unmapped character */
(void)SvUPGRADE(sv, SVt_PV);
if (SvLEN(sv) < len + items) { /* current length is way too short */
while (items-- > 0) {
- if (*mark && !SvGMAGICAL(*mark) && SvOK(*mark)) {
+ if (*mark && !SvGAMAGIC(*mark) && SvOK(*mark)) {
SvPV(*mark, tmplen);
len += tmplen;
}
SvTAINTED_on(sv);
}
-/* XXX SvUTF8 support missing! */
+/* currently converts input to bytes if possible, but doesn't sweat failure */
UV
Perl_do_vecget(pTHX_ SV *sv, I32 offset, I32 size)
{
return retnum;
if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */
Perl_croak(aTHX_ "Illegal number of bits in vec");
+
+ if (SvUTF8(sv)) {
+ (void) Perl_sv_utf8_downgrade(aTHX_ sv, TRUE);
+ }
+
offset *= size; /* turn into bit offset */
len = (offset + size + 7) / 8; /* required number of bytes */
if (len > srclen) {
return retnum;
}
-/* XXX SvUTF8 support missing! */
+/* currently converts input to bytes if possible but doesn't sweat failures,
+ * although it does ensure that the string it clobbers is not marked as
+ * utf8-valid any more
+ */
void
Perl_do_vecset(pTHX_ SV *sv)
{
if (!targ)
return;
s = (unsigned char*)SvPV_force(targ, targlen);
+ if (SvUTF8(targ)) {
+ /* This is handled by the SvPOK_only below...
+ if (!Perl_sv_utf8_downgrade(aTHX_ targ, TRUE))
+ SvUTF8_off(targ);
+ */
+ (void) Perl_sv_utf8_downgrade(aTHX_ targ, TRUE);
+ }
+
(void)SvPOK_only(targ);
lval = SvUV(sv);
offset = LvTARGOFF(sv);
+ if (offset < 0)
+ Perl_croak(aTHX_ "Assigning to negative offset in vec");
size = LvTARGLEN(sv);
if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */
Perl_croak(aTHX_ "Illegal number of bits in vec");
char *rsave;
bool left_utf = DO_UTF8(left);
bool right_utf = DO_UTF8(right);
+ I32 needlen;
if (left_utf && !right_utf)
sv_utf8_upgrade(right);
rsave = rc = SvPV(right, rightlen);
len = leftlen < rightlen ? leftlen : rightlen;
lensave = len;
- if (SvOK(sv) || SvTYPE(sv) > SVt_PVMG) {
+ if ((left_utf || right_utf) && (sv == left || sv == right)) {
+ needlen = optype == OP_BIT_AND ? len : leftlen + rightlen;
+ Newz(801, dc, needlen + 1, char);
+ }
+ else if (SvOK(sv) || SvTYPE(sv) > SVt_PVMG) {
STRLEN n_a;
dc = SvPV_force(sv, n_a);
if (SvCUR(sv) < len) {
dc = SvGROW(sv, len + 1);
(void)memzero(dc + SvCUR(sv), len - SvCUR(sv) + 1);
}
+ if (optype != OP_BIT_AND && (left_utf || right_utf))
+ dc = SvGROW(sv, leftlen + rightlen + 1);
}
else {
- I32 needlen = ((optype == OP_BIT_AND)
- ? len : (leftlen > rightlen ? leftlen : rightlen));
+ needlen = ((optype == OP_BIT_AND)
+ ? len : (leftlen > rightlen ? leftlen : rightlen));
Newz(801, dc, needlen + 1, char);
(void)sv_usepvn(sv, dc, needlen);
dc = SvPVX(sv); /* sv_usepvn() calls Renew() */
(void)SvPOK_only(sv);
if (left_utf || right_utf) {
UV duc, luc, ruc;
+ char *dcsave = dc;
STRLEN lulen = leftlen;
STRLEN rulen = rightlen;
- STRLEN dulen = 0;
I32 ulen;
- if (optype != OP_BIT_AND)
- dc = SvGROW(sv, leftlen+rightlen+1);
-
switch (optype) {
case OP_BIT_AND:
while (lulen && rulen) {
duc = luc & ruc;
dc = (char*)uv_to_utf8((U8*)dc, duc);
}
- dulen = dc - SvPVX(sv);
- SvCUR_set(sv, dulen);
+ if (sv == left || sv == right)
+ (void)sv_usepvn(sv, dcsave, needlen);
+ SvCUR_set(sv, dc - dcsave);
break;
case OP_BIT_XOR:
while (lulen && rulen) {
dc = (char*)uv_to_utf8((U8*)dc, duc);
}
mop_up_utf:
- dulen = dc - SvPVX(sv);
- SvCUR_set(sv, dulen);
+ if (sv == left || sv == right)
+ (void)sv_usepvn(sv, dcsave, needlen);
+ SvCUR_set(sv, dc - dcsave);
if (rulen)
sv_catpvn(sv, rc, rulen);
else if (lulen)