/* doop.c
*
- * Copyright (c) 1991-2000, Larry Wall
+ * Copyright (c) 1991-2001, Larry Wall
*
* You may distribute under the terms of either the GNU General Public
* License or the Artistic License, as specified in the README file.
#endif
#endif
-#define HALF_UTF8_UPGRADE(start,end) \
- STMT_START { \
- if ((start)<(end)) { \
- U8* NeWsTr; \
- STRLEN LeN = (end) - (start); \
- NeWsTr = bytes_to_utf8(start, &LeN); \
- Safefree(start); \
- (start) = NeWsTr; \
- (end) = (start) + LeN; \
- } \
- } STMT_END
-
STATIC I32
S_do_trans_simple(pTHX_ SV *sv)
{
- dTHR;
U8 *s;
U8 *d;
U8 *send;
U8 *dstart;
I32 matches = 0;
- I32 sutf = SvUTF8(sv);
STRLEN len;
short *tbl;
I32 ch;
tbl = (short*)cPVOP->op_pv;
if (!tbl)
- Perl_croak(aTHX_ "panic: do_trans");
+ Perl_croak(aTHX_ "panic: do_trans_simple");
s = (U8*)SvPV(sv, len);
send = s + len;
/* First, take care of non-UTF8 input strings, because they're easy */
- if (!sutf) {
+ if (!SvUTF8(sv)) {
while (s < send) {
if ((ch = tbl[*s]) >= 0) {
matches++;
}
}
*d = '\0';
- sv_setpvn(sv, (const char*)dstart, d - dstart);
- Safefree(dstart);
+ sv_setpvn(sv, (char*)dstart, d - dstart);
SvUTF8_on(sv);
SvSETMAGIC(sv);
return matches;
STATIC I32
S_do_trans_count(pTHX_ SV *sv)/* SPC - OK */
{
- dTHR;
U8 *s;
U8 *send;
I32 matches = 0;
- I32 hasutf = SvUTF8(sv);
STRLEN len;
short *tbl;
tbl = (short*)cPVOP->op_pv;
if (!tbl)
- Perl_croak(aTHX_ "panic: do_trans");
+ Perl_croak(aTHX_ "panic: do_trans_count");
s = (U8*)SvPV(sv, len);
send = s + len;
- while (s < send) {
- if (hasutf && *s & 0x80)
- s += UTF8SKIP(s);
- else {
- UV c;
- STRLEN ulen;
- ulen = 1;
- if (hasutf)
- c = utf8_to_uv(s, send - s, &ulen, 0);
- else
- c = *s;
- if (c < 0x100 && tbl[c] >= 0)
+ if (!SvUTF8(sv))
+ while (s < send) {
+ if (tbl[*s++] >= 0)
matches++;
- s += ulen;
- }
- }
+ }
+ else
+ while (s < send) {
+ UV c;
+ STRLEN ulen;
+ c = utf8_to_uv(s, send - s, &ulen, 0);
+ if (c < 0x100 && tbl[c] >= 0)
+ matches++;
+ s += ulen;
+ }
return matches;
}
STATIC I32
S_do_trans_complex(pTHX_ SV *sv)/* SPC - NOT OK */
{
- dTHR;
U8 *s;
U8 *send;
U8 *d;
U8 *dstart;
- I32 hasutf = SvUTF8(sv);
+ I32 isutf8;
I32 matches = 0;
STRLEN len;
short *tbl;
tbl = (short*)cPVOP->op_pv;
if (!tbl)
- Perl_croak(aTHX_ "panic: do_trans");
+ Perl_croak(aTHX_ "panic: do_trans_complex");
s = (U8*)SvPV(sv, len);
+ isutf8 = SvUTF8(sv);
send = s + len;
- Newz(0, d, len*2+1, U8);
- dstart = d;
-
- if (PL_op->op_private & OPpTRANS_SQUASH) {
- U8* p = send;
-
- while (s < send) {
- if (hasutf && *s & 0x80)
- s += UTF8SKIP(s);
- else {
- if ((ch = tbl[*s]) >= 0) {
+ if (!isutf8) {
+ dstart = d = s;
+ if (PL_op->op_private & OPpTRANS_SQUASH) {
+ U8* p = send;
+ while (s < send) {
+ if ((ch = tbl[*s]) >= 0) {
*d = ch;
matches++;
- if (p != d - 1 || *p != *d)
- p = d++;
- }
- else if (ch == -1) /* -1 is unmapped character */
- *d++ = *s; /* -2 is delete character */
- s++;
- }
+ if (p != d - 1 || *p != *d)
+ p = d++;
+ }
+ else if (ch == -1) /* -1 is unmapped character */
+ *d++ = *s;
+ else if (ch == -2) /* -2 is delete character */
+ matches++;
+ s++;
+ }
}
+ else {
+ while (s < send) {
+ if ((ch = tbl[*s]) >= 0) {
+ matches++;
+ *d++ = ch;
+ }
+ else if (ch == -1) /* -1 is unmapped character */
+ *d++ = *s;
+ else if (ch == -2) /* -2 is delete character */
+ matches++;
+ s++;
+ }
+ }
+ SvCUR_set(sv, d - dstart);
}
- else {
- while (s < send) {
- UV comp;
- if (hasutf && *s & 0x80)
- comp = utf8_to_uv_simple(s, NULL);
- else
- comp = *s;
-
- ch = tbl[comp];
-
- if (ch == -1) { /* -1 is unmapped character */
- ch = comp;
- matches--;
- }
-
- if (ch >= 0) {
- if (hasutf)
- d = uv_to_utf8(d, ch);
- else
- *d++ = ch;
- }
- matches++;
-
- s += hasutf && *s & 0x80 ? UNISKIP(*s) : 1;
-
+ else { /* isutf8 */
+ Newz(0, d, len*2+1, U8);
+ dstart = d;
+
+ if (PL_op->op_private & OPpTRANS_SQUASH) {
+ U8* p = send;
+ UV pch = 0xfeedface;
+ while (s < send) {
+ STRLEN len;
+ UV comp = utf8_to_uv_simple(s, &len);
+
+ if (comp > 0xff)
+ d = uv_to_utf8(d, comp); /* always unmapped */
+ else if ((ch = tbl[comp]) >= 0) {
+ matches++;
+ if (ch != pch) {
+ d = uv_to_utf8(d, ch);
+ pch = ch;
+ }
+ s += len;
+ continue;
+ }
+ else if (ch == -1) /* -1 is unmapped character */
+ d = uv_to_utf8(d, comp);
+ else if (ch == -2) /* -2 is delete character */
+ matches++;
+ s += len;
+ pch = 0xfeedface;
+ }
+ }
+ else {
+ while (s < send) {
+ STRLEN len;
+ UV comp = utf8_to_uv_simple(s, &len);
+ if (comp > 0xff)
+ d = uv_to_utf8(d, comp); /* always unmapped */
+ else if ((ch = tbl[comp]) >= 0) {
+ d = uv_to_utf8(d, ch);
+ matches++;
+ }
+ else if (ch == -1) { /* -1 is unmapped character */
+ d = uv_to_utf8(d, comp);
+ }
+ else if (ch == -2) /* -2 is delete character */
+ matches++;
+ s += len;
+ }
}
+ *d = '\0';
+ sv_setpvn(sv, (char*)dstart, d - dstart);
+ SvUTF8_on(sv);
}
-
- *d = '\0';
-
- sv_setpvn(sv, (const char*)dstart, d - dstart);
- Safefree(dstart);
- if (hasutf)
- SvUTF8_on(sv);
SvSETMAGIC(sv);
return matches;
-
}
STATIC I32
S_do_trans_simple_utf8(pTHX_ SV *sv)/* SPC - OK */
{
- dTHR;
U8 *s;
U8 *send;
U8 *d;
U8 *start;
- U8 *dstart;
+ U8 *dstart, *dend;
I32 matches = 0;
STRLEN len;
UV extra = none + 1;
UV final;
UV uv;
- I32 isutf;
- I32 howmany;
+ I32 isutf8;
+ U8 hibit = 0;
- isutf = SvUTF8(sv);
s = (U8*)SvPV(sv, len);
+ isutf8 = SvUTF8(sv);
+ if (!isutf8) {
+ U8 *t = s, *e = s + len;
+ while (t < e)
+ if ((hibit = *t++ & 0x80))
+ break;
+ if (hibit)
+ s = bytes_to_utf8(s, &len);
+ }
send = s + len;
start = s;
final = SvUV(*svp);
/* d needs to be bigger than s, in case e.g. upgrading is required */
- Newz(0, d, len*2+1, U8);
+ New(0, d, len*3+UTF8_MAXLEN, U8);
+ dend = d + len * 3;
dstart = d;
+
while (s < send) {
if ((uv = swash_fetch(rv, s)) < none) {
s += UTF8SKIP(s);
matches++;
- if ((uv & 0x80) && !isutf++)
- HALF_UTF8_UPGRADE(dstart,d);
d = uv_to_utf8(d, uv);
}
else if (uv == none) {
- int i;
- i = UTF8SKIP(s);
- if (i > 1 && !isutf++)
- HALF_UTF8_UPGRADE(dstart,d);
+ int i = UTF8SKIP(s);
while(i--)
*d++ = *s++;
}
else if (uv == extra) {
- int i;
- i = UTF8SKIP(s);
+ int i = UTF8SKIP(s);
s += i;
matches++;
- if (i > 1 && !isutf++)
- HALF_UTF8_UPGRADE(dstart,d);
d = uv_to_utf8(d, final);
}
else
s += UTF8SKIP(s);
+
+ if (d >= dend) {
+ STRLEN clen = d - dstart;
+ STRLEN nlen = dend - dstart + len + UTF8_MAXLEN;
+ Renew(dstart, nlen+UTF8_MAXLEN, U8);
+ d = dstart + clen;
+ dend = dstart + nlen;
+ }
}
*d = '\0';
- sv_setpvn(sv, (const char*)dstart, d - dstart);
+ sv_setpvn(sv, (char*)dstart, d - dstart);
SvSETMAGIC(sv);
- if (isutf)
- SvUTF8_on(sv);
+ SvUTF8_on(sv);
+ if (hibit)
+ Safefree(start);
+ if (!isutf8 && !(PL_hints & HINT_UTF8))
+ sv_utf8_downgrade(sv, TRUE);
return matches;
}
STATIC I32
S_do_trans_count_utf8(pTHX_ SV *sv)/* SPC - OK */
{
- dTHR;
U8 *s;
- U8 *send;
+ U8 *start, *send;
I32 matches = 0;
STRLEN len;
SV** svp = hv_fetch(hv, "NONE", 4, FALSE);
UV none = svp ? SvUV(*svp) : 0x7fffffff;
UV uv;
+ U8 hibit = 0;
s = (U8*)SvPV(sv, len);
- if (!SvUTF8(sv))
- s = bytes_to_utf8(s, &len);
+ if (!SvUTF8(sv)) {
+ U8 *t = s, *e = s + len;
+ while (t < e)
+ if ((hibit = *t++ & 0x80))
+ break;
+ if (hibit)
+ start = s = bytes_to_utf8(s, &len);
+ }
send = s + len;
while (s < send) {
matches++;
s += UTF8SKIP(s);
}
+ if (hibit)
+ Safefree(start);
return matches;
}
STATIC I32
S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */
{
- dTHR;
U8 *s;
- U8 *send;
+ U8 *start, *send;
U8 *d;
I32 matches = 0;
I32 squash = PL_op->op_private & OPpTRANS_SQUASH;
UV final;
UV uv;
STRLEN len;
- U8 *dst;
- I32 isutf = SvUTF8(sv);
+ U8 *dstart, *dend;
+ I32 isutf8;
+ U8 hibit = 0;
s = (U8*)SvPV(sv, len);
+ isutf8 = SvUTF8(sv);
+ if (!isutf8) {
+ U8 *t = s, *e = s + len;
+ while (t < e)
+ if ((hibit = *t++ & 0x80))
+ break;
+ if (hibit)
+ s = bytes_to_utf8(s, &len);
+ }
send = s + len;
+ start = s;
svp = hv_fetch(hv, "FINAL", 5, FALSE);
if (svp)
final = SvUV(*svp);
- Newz(0, d, len*2+1, U8);
- dst = d;
+ New(0, d, len*3+UTF8_MAXLEN, U8);
+ dend = d + len * 3;
+ dstart = d;
if (squash) {
UV puv = 0xfeedface;
while (s < send) {
- if (SvUTF8(sv))
- uv = swash_fetch(rv, s);
- else {
- U8 tmpbuf[2];
- uv = *s++;
- if (uv < 0x80)
- tmpbuf[0] = uv;
- else {
- tmpbuf[0] = (( uv >> 6) | 0xc0);
- tmpbuf[1] = (( uv & 0x3f) | 0x80);
- }
- uv = swash_fetch(rv, tmpbuf);
+ uv = swash_fetch(rv, s);
+
+ if (d >= dend) {
+ STRLEN clen = d - dstart, nlen = dend - dstart + len;
+ Renew(dstart, nlen+UTF8_MAXLEN, U8);
+ d = dstart + clen;
+ dend = dstart + nlen;
}
-
if (uv < none) {
matches++;
if (uv != puv) {
- if ((uv & 0x80) && !isutf++)
- HALF_UTF8_UPGRADE(dst,d);
d = uv_to_utf8(d, uv);
puv = uv;
}
continue;
}
else if (uv == none) { /* "none" is unmapped character */
- STRLEN ulen;
- *d++ = (U8)utf8_to_uv(s, send - s, &ulen, 0);
- s += ulen;
+ int i = UTF8SKIP(s);
+ while(i--)
+ *d++ = *s++;
puv = 0xfeedface;
continue;
}
}
else {
while (s < send) {
- if (SvUTF8(sv))
- uv = swash_fetch(rv, s);
- else {
- U8 tmpbuf[2];
- uv = *s++;
- if (uv < 0x80)
- tmpbuf[0] = uv;
- else {
- tmpbuf[0] = (( uv >> 6) | 0xc0);
- tmpbuf[1] = (( uv & 0x3f) | 0x80);
- }
- uv = swash_fetch(rv, tmpbuf);
+ uv = swash_fetch(rv, s);
+ if (d >= dend) {
+ STRLEN clen = d - dstart, nlen = dend - dstart + len;
+ Renew(dstart, nlen+UTF8_MAXLEN, U8);
+ d = dstart + clen;
+ dend = dstart + nlen;
}
if (uv < none) {
matches++;
continue;
}
else if (uv == none) { /* "none" is unmapped character */
- STRLEN ulen;
- *d++ = (U8)utf8_to_uv(s, send - s, &ulen, 0);
- s += ulen;
+ int i = UTF8SKIP(s);
+ while(i--)
+ *d++ = *s++;
continue;
}
else if (uv == extra && !del) {
s += UTF8SKIP(s);
}
}
- if (dst)
- sv_usepvn(sv, (char*)dst, d - dst);
- else {
- *d = '\0';
- SvCUR_set(sv, d - (U8*)SvPVX(sv));
- }
+ *d = '\0';
+ sv_setpvn(sv, (char*)dstart, d - dstart);
+ SvUTF8_on(sv);
+ if (hibit)
+ Safefree(start);
+ if (!isutf8 && !(PL_hints & HINT_UTF8))
+ sv_utf8_downgrade(sv, TRUE);
SvSETMAGIC(sv);
return matches;
I32
Perl_do_trans(pTHX_ SV *sv)
{
- dTHR;
STRLEN len;
I32 hasutf = (PL_op->op_private &
(OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF));
}
if (items-- > 0) {
- char *s;
-
sv_setpv(sv, "");
if (*mark)
sv_catsv(sv, *mark);
}
else
sv_setpv(sv,"");
- len = delimlen;
- if (len) {
+ if (delimlen) {
for (; items > 0; items--,mark++) {
- sv_catpvn(sv,delim,len);
+ sv_catsv(sv,del);
sv_catsv(sv,*mark);
}
}
}
#ifdef UV_IS_QUAD
else if (size == 64) {
- dTHR;
if (ckWARN(WARN_PORTABLE))
Perl_warner(aTHX_ WARN_PORTABLE,
"Bit vector size > 32 non-portable");
s[offset + 3];
#ifdef UV_IS_QUAD
else if (size == 64) {
- dTHR;
if (ckWARN(WARN_PORTABLE))
Perl_warner(aTHX_ WARN_PORTABLE,
"Bit vector size > 32 non-portable");
}
#ifdef UV_IS_QUAD
else if (size == 64) {
- dTHR;
if (ckWARN(WARN_PORTABLE))
Perl_warner(aTHX_ WARN_PORTABLE,
"Bit vector size > 32 non-portable");
{
STRLEN len;
char *s;
- dTHR;
if (SvTYPE(sv) == SVt_PVAV) {
register I32 i;
I32
Perl_do_chomp(pTHX_ register SV *sv)
{
- dTHR;
register I32 count;
STRLEN len;
char *s;
void
Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right)
{
- dTHR; /* just for taint */
#ifdef LIBERAL
register long *dl;
register long *ll;
PUTBACK; /* hv_iternext and hv_iterval might clobber stack_sp */
while ((entry = hv_iternext(keys))) {
SPAGAIN;
- if (dokeys) {
+ if (dokeys)
XPUSHs(hv_iterkeysv(entry)); /* won't clobber stack_sp */
- if (SvUTF8((SV*)keys))
- SvUTF8_on(TOPs); /* Yuck */
- }
if (dovalues) {
PUTBACK;
tmpstr = realhv ?