U8 *send;
U8 *dstart;
I32 matches = 0;
+ I32 grows = PL_op->op_private & OPpTRANS_GROWS;
STRLEN len;
short *tbl;
I32 ch;
}
/* Allow for expansion: $_="a".chr(400); tr/a/\xFE/, FE needs encoding */
- Newz(0, d, len*2+1, U8);
+ if (grows)
+ New(0, d, len*2+1, U8);
+ else
+ d = s;
dstart = d;
while (s < send) {
STRLEN ulen;
- short c;
+ UV c;
- ulen = 1;
/* Need to check this, otherwise 128..255 won't match */
c = utf8_to_uv(s, send - s, &ulen, 0);
- if (c < 0x100 && (ch = tbl[(short)c]) >= 0) {
+ if (c < 0x100 && (ch = tbl[c]) >= 0) {
matches++;
d = uv_to_utf8(d, ch);
s += ulen;
}
else { /* No match -> copy */
- while (ulen--)
- *d++ = *s++;
+ Copy(s, d, ulen, U8);
+ d += ulen;
+ s += ulen;
}
}
- *d = '\0';
- sv_setpvn(sv, (char*)dstart, d - dstart);
+ if (grows) {
+ sv_setpvn(sv, (char*)dstart, d - dstart);
+ Safefree(dstart);
+ }
+ else {
+ *d = '\0';
+ SvCUR_set(sv, d - dstart);
+ }
SvUTF8_on(sv);
SvSETMAGIC(sv);
return matches;
U8 *dstart;
I32 isutf8;
I32 matches = 0;
+ I32 grows = PL_op->op_private & OPpTRANS_GROWS;
STRLEN len;
short *tbl;
I32 ch;
SvCUR_set(sv, d - dstart);
}
else { /* isutf8 */
- Newz(0, d, len*2+1, U8);
+ if (grows)
+ New(0, d, len*2+1, U8);
+ else
+ d = s;
dstart = d;
if (PL_op->op_private & OPpTRANS_SQUASH) {
STRLEN len;
UV comp = utf8_to_uv_simple(s, &len);
- if (comp > 0xff)
- d = uv_to_utf8(d, comp); /* always unmapped */
+ if (comp > 0xff) { /* always unmapped */
+ Copy(s, d, len, U8);
+ d += len;
+ }
else if ((ch = tbl[comp]) >= 0) {
matches++;
if (ch != pch) {
s += len;
continue;
}
- else if (ch == -1) /* -1 is unmapped character */
- d = uv_to_utf8(d, comp);
+ else if (ch == -1) { /* -1 is unmapped character */
+ Copy(s, d, len, U8);
+ d += len;
+ }
else if (ch == -2) /* -2 is delete character */
matches++;
s += len;
while (s < send) {
STRLEN len;
UV comp = utf8_to_uv_simple(s, &len);
- if (comp > 0xff)
- d = uv_to_utf8(d, comp); /* always unmapped */
+ if (comp > 0xff) { /* always unmapped */
+ Copy(s, d, len, U8);
+ d += len;
+ }
else if ((ch = tbl[comp]) >= 0) {
d = uv_to_utf8(d, ch);
matches++;
}
else if (ch == -1) { /* -1 is unmapped character */
- d = uv_to_utf8(d, comp);
+ Copy(s, d, len, U8);
+ d += len;
}
else if (ch == -2) /* -2 is delete character */
matches++;
s += len;
}
}
- *d = '\0';
- sv_setpvn(sv, (char*)dstart, d - dstart);
+ if (grows) {
+ sv_setpvn(sv, (char*)dstart, d - dstart);
+ Safefree(dstart);
+ }
+ else {
+ *d = '\0';
+ SvCUR_set(sv, d - dstart);
+ }
SvUTF8_on(sv);
}
SvSETMAGIC(sv);
U8 *start;
U8 *dstart, *dend;
I32 matches = 0;
+ I32 grows = PL_op->op_private & OPpTRANS_GROWS;
STRLEN len;
SV* rv = (SV*)cSVOP->op_sv;
if (svp)
final = SvUV(*svp);
- /* d needs to be bigger than s, in case e.g. upgrading is required */
- New(0, d, len*3+UTF8_MAXLEN, U8);
- dend = d + len * 3;
- dstart = d;
+ if (grows) {
+ /* d needs to be bigger than s, in case e.g. upgrading is required */
+ New(0, d, len*3+UTF8_MAXLEN, U8);
+ dend = d + len * 3;
+ dstart = d;
+ }
+ else {
+ dstart = d = s;
+ dend = d + len;
+ }
while (s < send) {
if ((uv = swash_fetch(rv, s)) < none) {
}
else if (uv == none) {
int i = UTF8SKIP(s);
- while(i--)
- *d++ = *s++;
+ Copy(s, d, i, U8);
+ d += i;
+ s += i;
}
else if (uv == extra) {
int i = UTF8SKIP(s);
else
s += UTF8SKIP(s);
- if (d >= dend) {
+ if (d > dend) {
STRLEN clen = d - dstart;
STRLEN nlen = dend - dstart + len + UTF8_MAXLEN;
+ if (!grows)
+ Perl_croak(aTHX_ "panic: do_trans_complex_utf8");
Renew(dstart, nlen+UTF8_MAXLEN, U8);
d = dstart + clen;
dend = dstart + nlen;
}
}
- *d = '\0';
- sv_setpvn(sv, (char*)dstart, d - dstart);
+ if (grows) {
+ sv_setpvn(sv, (char*)dstart, d - dstart);
+ Safefree(dstart);
+ }
+ else {
+ *d = '\0';
+ SvCUR_set(sv, d - dstart);
+ }
SvSETMAGIC(sv);
SvUTF8_on(sv);
if (hibit)
I32 matches = 0;
I32 squash = PL_op->op_private & OPpTRANS_SQUASH;
I32 del = PL_op->op_private & OPpTRANS_DELETE;
+ I32 grows = PL_op->op_private & OPpTRANS_GROWS;
SV* rv = (SV*)cSVOP->op_sv;
HV* hv = (HV*)SvRV(rv);
SV** svp = hv_fetch(hv, "NONE", 4, FALSE);
if (svp)
final = SvUV(*svp);
- New(0, d, len*3+UTF8_MAXLEN, U8);
- dend = d + len * 3;
- dstart = d;
+ if (grows) {
+ /* d needs to be bigger than s, in case e.g. upgrading is required */
+ New(0, d, len*3+UTF8_MAXLEN, U8);
+ dend = d + len * 3;
+ dstart = d;
+ }
+ else {
+ dstart = d = s;
+ dend = d + len;
+ }
if (squash) {
UV puv = 0xfeedface;
while (s < send) {
uv = swash_fetch(rv, s);
- if (d >= dend) {
- STRLEN clen = d - dstart, nlen = dend - dstart + len;
+ if (d > dend) {
+ STRLEN clen = d - dstart;
+ STRLEN nlen = dend - dstart + len + UTF8_MAXLEN;
+ if (!grows)
+ Perl_croak(aTHX_ "panic: do_trans_complex_utf8");
Renew(dstart, nlen+UTF8_MAXLEN, U8);
d = dstart + clen;
dend = dstart + nlen;
}
else if (uv == none) { /* "none" is unmapped character */
int i = UTF8SKIP(s);
- while(i--)
- *d++ = *s++;
+ Copy(s, d, i, U8);
+ d += i;
+ s += i;
puv = 0xfeedface;
continue;
}
else {
while (s < send) {
uv = swash_fetch(rv, s);
- if (d >= dend) {
- STRLEN clen = d - dstart, nlen = dend - dstart + len;
+ if (d > dend) {
+ STRLEN clen = d - dstart;
+ STRLEN nlen = dend - dstart + len + UTF8_MAXLEN;
+ if (!grows)
+ Perl_croak(aTHX_ "panic: do_trans_complex_utf8");
Renew(dstart, nlen+UTF8_MAXLEN, U8);
d = dstart + clen;
dend = dstart + nlen;
}
else if (uv == none) { /* "none" is unmapped character */
int i = UTF8SKIP(s);
- while(i--)
- *d++ = *s++;
+ Copy(s, d, i, U8);
+ d += i;
+ s += i;
continue;
}
else if (uv == extra && !del) {
s += UTF8SKIP(s);
}
}
- *d = '\0';
- sv_setpvn(sv, (char*)dstart, d - dstart);
+ if (grows) {
+ sv_setpvn(sv, (char*)dstart, d - dstart);
+ Safefree(dstart);
+ }
+ else {
+ *d = '\0';
+ SvCUR_set(sv, d - dstart);
+ }
SvUTF8_on(sv);
if (hibit)
Safefree(start);
#define scalarboolean S_scalarboolean
#define too_few_arguments S_too_few_arguments
#define too_many_arguments S_too_many_arguments
+#define trlist_upgrade S_trlist_upgrade
#define op_clear S_op_clear
#define null S_null
#define pad_addlex S_pad_addlex
#define scalarboolean(a) S_scalarboolean(aTHX_ a)
#define too_few_arguments(a,b) S_too_few_arguments(aTHX_ a,b)
#define too_many_arguments(a,b) S_too_many_arguments(aTHX_ a,b)
+#define trlist_upgrade(a,b) S_trlist_upgrade(aTHX_ a,b)
#define op_clear(a) S_op_clear(aTHX_ a)
#define null(a) S_null(aTHX_ a)
#define pad_addlex(a) S_pad_addlex(aTHX_ a)
#define too_few_arguments S_too_few_arguments
#define S_too_many_arguments CPerlObj::S_too_many_arguments
#define too_many_arguments S_too_many_arguments
+#define S_trlist_upgrade CPerlObj::S_trlist_upgrade
+#define trlist_upgrade S_trlist_upgrade
#define S_op_clear CPerlObj::S_op_clear
#define op_clear S_op_clear
#define S_null CPerlObj::S_null
s |OP* |scalarboolean |OP *o
s |OP* |too_few_arguments|OP *o|char* name
s |OP* |too_many_arguments|OP *o|char* name
+s |U8* |trlist_upgrade |U8** sp|U8** ep
s |void |op_clear |OP* o
s |void |null |OP* o
s |PADOFFSET|pad_addlex |SV* name
SvPV_nolen(cSVOPo_sv)));
}
+STATIC U8*
+S_trlist_upgrade(pTHX_ U8** sp, U8** ep)
+{
+ U8 *s = *sp;
+ U8 *e = *ep;
+ U8 *d;
+
+ Newz(801, d, (e - s) * 2, U8);
+ *sp = d;
+
+ while (s < e) {
+ if (*s < 0x80 || *s == 0xff)
+ *d++ = *s++;
+ else {
+ U8 c = *s++;
+ *d++ = ((c >> 6) | 0xc0);
+ *d++ = ((c & 0x3f) | 0x80);
+ }
+ }
+ *ep = d;
+ return *sp;
+}
+
+
/* "register" allocation */
PADOFFSET
SV *rstr = ((SVOP*)repl)->op_sv;
STRLEN tlen;
STRLEN rlen;
- register U8 *t = (U8*)SvPV(tstr, tlen);
- register U8 *r = (U8*)SvPV(rstr, rlen);
+ U8 *t = (U8*)SvPV(tstr, tlen);
+ U8 *r = (U8*)SvPV(rstr, rlen);
register I32 i;
register I32 j;
I32 del;
I32 complement;
I32 squash;
+ I32 grows = 0;
register short *tbl;
complement = o->op_private & OPpTRANS_COMPLEMENT;
I32 none = 0;
U32 max = 0;
I32 bits;
- I32 grows = 0;
I32 havefinal = 0;
U32 final;
I32 from_utf = o->op_private & OPpTRANS_FROM_UTF;
I32 to_utf = o->op_private & OPpTRANS_TO_UTF;
+ U8* tsave = from_utf ? NULL : trlist_upgrade(&t, &tend);
+ U8* rsave = to_utf ? NULL : trlist_upgrade(&r, &rend);
if (complement) {
U8 tmpbuf[UTF8_MAXLEN+1];
if (rfirst + diff > max)
max = rfirst + diff;
rfirst += diff + 1;
- if (!grows) {
- if (rfirst <= 0x80)
- ;
- else if (rfirst <= 0x800)
- grows |= (tfirst < 0x80);
- else if (rfirst <= 0x10000)
- grows |= (tfirst < 0x800);
- else if (rfirst <= 0x200000)
- grows |= (tfirst < 0x10000);
- else if (rfirst <= 0x4000000)
- grows |= (tfirst < 0x200000);
- else if (rfirst <= 0x80000000)
- grows |= (tfirst < 0x4000000);
- }
+ if (!grows)
+ grows = (UNISKIP(tfirst) < UNISKIP(rfirst));
}
tfirst += diff + 1;
}
(void)hv_store((HV*)SvRV((cSVOPo->op_sv)), "FINAL", 5,
newSVuv((UV)final), 0);
- if (grows && to_utf)
+ if (grows)
o->op_private |= OPpTRANS_GROWS;
+ if (tsave)
+ Safefree(tsave);
+ if (rsave)
+ Safefree(rsave);
+
op_free(expr);
op_free(repl);
return o;
else
tbl[i] = i;
}
- else
+ else {
+ if (i < 128 && r[j] >= 128)
+ grows = 1;
tbl[i] = r[j++];
+ }
}
}
}
}
--j;
}
- if (tbl[t[i]] == -1)
+ if (tbl[t[i]] == -1) {
+ if (t[i] < 128 && r[j] >= 128)
+ grows = 1;
tbl[t[i]] = r[j];
+ }
}
}
+ if (grows)
+ o->op_private |= OPpTRANS_GROWS;
op_free(expr);
op_free(repl);
else { /* Note: mark already snarfed by pp_list */
SV *tmpstr = POPs;
STRLEN len;
- bool isutf = DO_UTF8(tmpstr);
+ bool isutf;
SvSetSV(TARG, tmpstr);
SvPV_force(TARG, len);
+ isutf = DO_UTF8(TARG);
if (count != 1) {
if (count < 1)
SvCUR_set(TARG, 0);
STATIC OP* S_scalarboolean(pTHX_ OP *o);
STATIC OP* S_too_few_arguments(pTHX_ OP *o, char* name);
STATIC OP* S_too_many_arguments(pTHX_ OP *o, char* name);
+STATIC U8* S_trlist_upgrade(pTHX_ U8** sp, U8** ep);
STATIC void S_op_clear(pTHX_ OP* o);
STATIC void S_null(pTHX_ OP* o);
STATIC PADOFFSET S_pad_addlex(pTHX_ SV* name);
if (compat)
ANYOF_BITMAP_SET(data->start_class, uc);
data->start_class->flags &= ~ANYOF_EOS;
+ if (uc < 0x100)
+ data->start_class->flags &= ~ANYOF_UNICODE_ALL;
}
else if (flags & SCF_DO_STCLASS_OR) {
/* false positive possible if the class is case-folded */
if (uc < 0x100)
- ANYOF_BITMAP_SET(data->start_class, uc);
+ ANYOF_BITMAP_SET(data->start_class, uc);
+ else
+ data->start_class->flags |= ANYOF_UNICODE_ALL;
data->start_class->flags &= ~ANYOF_EOS;
cl_and(data->start_class, &and_with);
}
#endif
restart:
+ other_last = Nullch;
+
/* Find a possible match in the region s..strend by looking for
the "check" substring in the region corrected by start/end_shift. */
if (flags & REXEC_SCREAM) {
else {
STRLEN len = 1; /* allow underscores */
uv = (UV)scan_hex(s + 1, e - s - 1, &len);
- to_be_utf8 = TRUE;
+ if (PL_hints & HINT_UTF8)
+ to_be_utf8 = TRUE;
}
s = e + 1;
}
if (hicount) {
char *old_pvx = SvPVX(sv);
char *src, *dst;
- U8 tmpbuf[UTF8_MAXLEN+1];
- U8 *tmpend;
d = SvGROW(sv,
SvCUR(sv) + hicount + 1) +
while (src < dst) {
if (UTF8_IS_CONTINUED(*src)) {
- tmpend = uv_to_utf8(tmpbuf, (U8)*src--);
- dst -= tmpend - tmpbuf;
- Copy((char *)tmpbuf, dst+1,
- tmpend - tmpbuf, char);
+ *dst-- = UTF8_EIGHT_BIT_LO(*src);
+ *dst-- = UTF8_EIGHT_BIT_HI(*src--);
}
else {
*dst-- = *src--;
}
}
- if (to_be_utf8 || (has_utf8 && uv > 127) || uv > 255) {
+ if (to_be_utf8 || has_utf8 || uv > 255) {
d = (char*)uv_to_utf8((U8*)d, uv);
has_utf8 = TRUE;
}
{
if (uv < 0x80) {
*d++ = uv;
- *d = 0;
return d;
}
if (uv < 0x800) {
*d++ = (( uv >> 6) | 0xc0);
*d++ = (( uv & 0x3f) | 0x80);
- *d = 0;
return d;
}
if (uv < 0x10000) {
*d++ = (( uv >> 12) | 0xe0);
*d++ = (((uv >> 6) & 0x3f) | 0x80);
*d++ = (( uv & 0x3f) | 0x80);
- *d = 0;
return d;
}
if (uv < 0x200000) {
*d++ = (((uv >> 12) & 0x3f) | 0x80);
*d++ = (((uv >> 6) & 0x3f) | 0x80);
*d++ = (( uv & 0x3f) | 0x80);
- *d = 0;
return d;
}
if (uv < 0x4000000) {
*d++ = (((uv >> 12) & 0x3f) | 0x80);
*d++ = (((uv >> 6) & 0x3f) | 0x80);
*d++ = (( uv & 0x3f) | 0x80);
- *d = 0;
return d;
}
if (uv < 0x80000000) {
*d++ = (((uv >> 12) & 0x3f) | 0x80);
*d++ = (((uv >> 6) & 0x3f) | 0x80);
*d++ = (( uv & 0x3f) | 0x80);
- *d = 0;
return d;
}
#ifdef HAS_QUAD
*d++ = (((uv >> 12) & 0x3f) | 0x80);
*d++ = (((uv >> 6) & 0x3f) | 0x80);
*d++ = (( uv & 0x3f) | 0x80);
- *d = 0;
return d;
}
#ifdef HAS_QUAD
*d++ = (((uv >> 12) & 0x3f) | 0x80);
*d++ = (((uv >> 6) & 0x3f) | 0x80);
*d++ = (( uv & 0x3f) | 0x80);
- *d = 0;
return d;
}
#endif