SV* sva = (SV*)ptr;
register SV* sv;
register SV* svend;
- Zero(sva, size, char);
+ Zero(ptr, size, char);
/* The first SV in an arena isn't an SV. */
SvANY(sva) = (void *) PL_sv_arenaroot; /* ptr to next arena */
char *
Perl_sv_2pvbyte_nolen(pTHX_ register SV *sv)
{
- return sv_2pv_nolen(sv);
+ STRLEN n_a;
+ return sv_2pvbyte(sv, &n_a);
}
char *
char *
Perl_sv_2pvutf8_nolen(pTHX_ register SV *sv)
{
- return sv_2pv_nolen(sv);
+ STRLEN n_a;
+ return sv_2pvutf8(sv, &n_a);
}
char *
Perl_sv_2pvutf8(pTHX_ register SV *sv, STRLEN *lp)
{
+ sv_utf8_upgrade(sv);
return sv_2pv(sv,lp);
}
}
}
+void
+Perl_sv_utf8_upgrade(pTHX_ register SV *sv)
+{
+ int hicount;
+ char *c;
+
+ if (!sv || !SvPOK(sv) || SvUTF8(sv))
+ return;
+
+ /* This function could be much more efficient if we had a FLAG
+ * to signal if there are any hibit chars in the string
+ */
+ hicount = 0;
+ for (c = SvPVX(sv); c < SvEND(sv); c++) {
+ if (*c & 0x80)
+ hicount++;
+ }
+
+ if (hicount) {
+ char *src, *dst;
+ SvGROW(sv, SvCUR(sv) + hicount + 1);
+
+ src = SvEND(sv) - 1;
+ SvCUR_set(sv, SvCUR(sv) + hicount);
+ dst = SvEND(sv) - 1;
+
+ while (src < dst) {
+ if (*src & 0x80) {
+ dst--;
+ uv_to_utf8((U8*)dst, (U8)*src--);
+ dst--;
+ }
+ else {
+ *dst-- = *src--;
+ }
+ }
+
+ SvUTF8_on(sv);
+ }
+}
+
+bool
+Perl_sv_utf8_downgrade(pTHX_ register SV* sv, bool fail_ok)
+{
+ if (SvPOK(sv) && SvUTF8(sv)) {
+ char *c = SvPVX(sv);
+ char *first_hi = 0;
+ /* need to figure out if this is possible at all first */
+ while (c < SvEND(sv)) {
+ if (*c & 0x80) {
+ I32 len;
+ UV uv = utf8_to_uv((U8*)c, &len);
+ if (uv >= 256) {
+ if (fail_ok)
+ return FALSE;
+ else {
+ /* XXX might want to make a callback here instead */
+ Perl_croak(aTHX_ "Big byte");
+ }
+ }
+ if (!first_hi)
+ first_hi = c;
+ c += len;
+ }
+ else {
+ c++;
+ }
+ }
+
+ if (first_hi) {
+ char *src = first_hi;
+ char *dst = first_hi;
+ while (src < SvEND(sv)) {
+ if (*src & 0x80) {
+ I32 len;
+ U8 u = (U8)utf8_to_uv((U8*)src, &len);
+ *dst++ = u;
+ src += len;
+ }
+ else {
+ *dst++ = *src++;
+ }
+ }
+ SvCUR_set(sv, dst - SvPVX(sv));
+ }
+ SvUTF8_off(sv);
+ }
+ return TRUE;
+}
+
+void
+Perl_sv_utf8_encode(pTHX_ register SV *sv)
+{
+ sv_utf8_upgrade(sv);
+ SvUTF8_off(sv);
+}
+
+bool
+Perl_sv_utf8_decode(pTHX_ register SV *sv)
+{
+ if (SvPOK(sv)) {
+ char *c;
+ bool has_utf = FALSE;
+ if (!sv_utf8_downgrade(sv, TRUE))
+ return FALSE;
+
+ /* it is actually just a matter of turning the utf8 flag on, but
+ * we want to make sure everything inside is valid utf8 first.
+ */
+ c = SvPVX(sv);
+ while (c < SvEND(sv)) {
+ if (*c & 0x80) {
+ I32 len;
+ (void)utf8_to_uv((U8*)c, &len);
+ if (len == 1) {
+ /* bad utf8 */
+ return FALSE;
+ }
+ c += len;
+ has_utf = TRUE;
+ }
+ else {
+ c++;
+ }
+ }
+
+ if (has_utf)
+ SvUTF8_on(sv);
+ }
+ return TRUE;
+}
+
+
/* Note: sv_setsv() should not be called with a source string that needs
* to be reused, since it may destroy the source string if it is marked
* as temporary.
Perl_croak(aTHX_
"Can't redefine active sort subroutine %s",
GvENAME((GV*)dstr));
- if (ckWARN(WARN_REDEFINE) || (const_changed && const_sv)) {
- if (!(CvGV(cv) && GvSTASH(CvGV(cv))
- && HvNAME(GvSTASH(CvGV(cv)))
- && strEQ(HvNAME(GvSTASH(CvGV(cv))),
- "autouse")))
- Perl_warner(aTHX_ WARN_REDEFINE, const_sv ?
+ if ((const_changed || const_sv) && ckWARN(WARN_REDEFINE))
+ Perl_warner(aTHX_ WARN_REDEFINE, const_sv ?
"Constant subroutine %s redefined"
: "Subroutine %s redefined",
GvENAME((GV*)dstr));
- }
}
cv_ckproto(cv, (GV*)dstr,
SvPOK(sref) ? SvPVX(sref) : Nullch);
}
else {
if (dtype == SVt_PVGV) {
- if (ckWARN(WARN_UNSAFE))
- Perl_warner(aTHX_ WARN_UNSAFE, "Undefined value assigned to typeglob");
+ if (ckWARN(WARN_MISC))
+ Perl_warner(aTHX_ WARN_MISC, "Undefined value assigned to typeglob");
}
else
(void)SvOK_off(dstr);
STRLEN len;
if (!sstr)
return;
- if (s = SvPV(sstr, len))
+ if (s = SvPV(sstr, len)) {
+ if (SvUTF8(sstr))
+ sv_utf8_upgrade(dstr);
sv_catpvn(dstr,s,len);
- if (SvUTF8(sstr))
- SvUTF8_on(dstr);
+ if (SvUTF8(sstr))
+ SvUTF8_on(dstr);
+ }
}
/*
I32
Perl_sv_cmp(pTHX_ register SV *str1, register SV *str2)
{
- STRLEN cur1 = 0;
- char *pv1 = str1 ? SvPV(str1, cur1) : (char *) NULL;
- STRLEN cur2 = 0;
- char *pv2 = str2 ? SvPV(str2, cur2) : (char *) NULL;
+ STRLEN cur1, cur2;
+ char *pv1, *pv2;
I32 retval;
+ bool utf1;
+
+ if (str1) {
+ pv1 = SvPV(str1, cur1);
+ }
+ else {
+ cur1 = 0;
+ }
+
+ if (str2) {
+ if (SvPOK(str2)) {
+ if (SvPOK(str1) && SvUTF8(str1) != SvUTF8(str2) && !IN_BYTE) {
+ /* must upgrade other to UTF8 first */
+ if (SvUTF8(str1)) {
+ sv_utf8_upgrade(str2);
+ }
+ else {
+ sv_utf8_upgrade(str1);
+ /* refresh pointer and length */
+ pv1 = SvPVX(str1);
+ cur1 = SvCUR(str1);
+ }
+ }
+ pv2 = SvPVX(str2);
+ cur2 = SvCUR(str2);
+ }
+ else {
+ pv2 = sv_2pv(str2, &cur2);
+ }
+ }
+ else {
+ cur2 = 0;
+ }
if (!cur1)
return cur2 ? -1 : 0;
char *
Perl_sv_pvutf8(pTHX_ SV *sv)
{
+ sv_utf8_upgrade(sv);
return sv_pv(sv);
}
char *
Perl_sv_pvutf8n(pTHX_ SV *sv, STRLEN *lp)
{
+ sv_utf8_upgrade(sv);
return sv_pvn(sv,lp);
}
char *
Perl_sv_pvutf8n_force(pTHX_ SV *sv, STRLEN *lp)
{
+ sv_utf8_upgrade(sv);
return sv_pvn_force(sv,lp);
}
for (p = (char*)pat; p < patend; p = q) {
bool alt = FALSE;
bool left = FALSE;
+ bool vectorize = FALSE;
+ bool utf = FALSE;
char fill = ' ';
char plus = 0;
char intsize = 0;
bool is_utf = FALSE;
char esignbuf[4];
- U8 utf8buf[10];
+ U8 utf8buf[UTF8_MAXLEN];
STRLEN esignlen = 0;
char *eptr = Nullch;
char ebuf[IV_DIG * 4 + NV_DIG + 32];
/* large enough for "%#.#f" --chip */
/* what about long double NVs? --jhi */
+
+ SV *vecsv;
+ U8 *vecstr = Null(U8*);
+ STRLEN veclen = 0;
char c;
int i;
unsigned base;
STRLEN have;
STRLEN need;
STRLEN gap;
+ char *dotstr = ".";
+ STRLEN dotstrlen = 1;
for (q = p; q < patend && *q != '%'; ++q) ;
if (q > p) {
q++;
continue;
+ case '*': /* printf("%*vX",":",$ipv6addr) */
+ if (q[1] != 'v')
+ break;
+ q++;
+ if (args)
+ vecsv = va_arg(*args, SV*);
+ else if (svix < svmax)
+ vecsv = svargs[svix++];
+ dotstr = SvPVx(vecsv,dotstrlen);
+ if (DO_UTF8(vecsv))
+ is_utf = TRUE;
+ /* FALL THROUGH */
+
+ case 'v':
+ vectorize = TRUE;
+ q++;
+ if (args)
+ vecsv = va_arg(*args, SV*);
+ else if (svix < svmax)
+ vecsv = svargs[svix++];
+ vecstr = (U8*)SvPVx(vecsv,veclen);
+ utf = DO_UTF8(vecsv);
+ continue;
+
default:
break;
}
}
goto string;
- case 'v':
- if (args)
- argsv = va_arg(*args, SV*);
- else if (svix < svmax)
- argsv = svargs[svix++];
- {
- STRLEN len;
- U8 *str = (U8*)SvPVx(argsv,len);
- I32 vlen = len*3+1;
- SV *vsv = NEWSV(73,vlen);
- I32 ulen;
- I32 vfree = vlen;
- U8 *vptr = (U8*)SvPVX(vsv);
- STRLEN vcur = 0;
- bool utf = DO_UTF8(argsv);
-
- if (utf)
- is_utf = TRUE;
- while (len) {
- UV uv;
-
- if (utf)
- uv = utf8_to_uv(str, &ulen);
- else {
- uv = *str;
- ulen = 1;
- }
- str += ulen;
- len -= ulen;
- eptr = ebuf + sizeof ebuf;
- do {
- *--eptr = '0' + uv % 10;
- } while (uv /= 10);
- elen = (ebuf + sizeof ebuf) - eptr;
- while (elen >= vfree-1) {
- STRLEN off = vptr - (U8*)SvPVX(vsv);
- vfree += vlen;
- vlen *= 2;
- SvGROW(vsv, vlen);
- vptr = (U8*)SvPVX(vsv) + off;
- }
- memcpy(vptr, eptr, elen);
- vptr += elen;
- *vptr++ = '.';
- vfree -= elen + 1;
- vcur += elen + 1;
- }
- if (vcur) {
- vcur--;
- vptr[-1] = '\0';
- }
- SvCUR_set(vsv,vcur);
- eptr = SvPVX(vsv);
- elen = vcur;
- }
- goto string;
-
case '_':
/*
* The "%_" hack might have to be changed someday,
is_utf = TRUE;
string:
+ vectorize = FALSE;
if (has_precis && elen > precis)
elen = precis;
break;
/* FALL THROUGH */
case 'd':
case 'i':
- if (args) {
+ if (vectorize) {
+ I32 ulen;
+ if (!veclen) {
+ vectorize = FALSE;
+ break;
+ }
+ if (utf)
+ iv = (IV)utf8_to_uv(vecstr, &ulen);
+ else {
+ iv = *vecstr;
+ ulen = 1;
+ }
+ vecstr += ulen;
+ veclen -= ulen;
+ }
+ else if (args) {
switch (intsize) {
case 'h': iv = (short)va_arg(*args, int); break;
default: iv = va_arg(*args, int); break;
base = 16;
uns_integer:
- if (args) {
+ if (vectorize) {
+ I32 ulen;
+ vector:
+ if (!veclen) {
+ vectorize = FALSE;
+ break;
+ }
+ if (utf)
+ uv = utf8_to_uv(vecstr, &ulen);
+ else {
+ uv = *vecstr;
+ ulen = 1;
+ }
+ vecstr += ulen;
+ veclen -= ulen;
+ }
+ else if (args) {
switch (intsize) {
case 'h': uv = (unsigned short)va_arg(*args, unsigned); break;
default: uv = va_arg(*args, unsigned); break;
break;
default: /* it had better be ten or less */
#if defined(PERL_Y2KWARN)
- if (ckWARN(WARN_MISC)) {
+ if (ckWARN(WARN_Y2K)) {
STRLEN n;
char *s = SvPV(sv,n);
if (n >= 2 && s[n-2] == '1' && s[n-1] == '9'
&& (n == 2 || !isDIGIT(s[n-3])))
{
- Perl_warner(aTHX_ WARN_MISC,
+ Perl_warner(aTHX_ WARN_Y2K,
"Possible Y2K bug: %%%c %s",
c, "format string following '19'");
}
/* This is evil, but floating point is even more evil */
+ vectorize = FALSE;
if (args)
nv = va_arg(*args, NV);
else
/* SPECIAL */
case 'n':
+ vectorize = FALSE;
i = SvCUR(sv) - origlen;
if (args) {
switch (intsize) {
default:
unknown:
+ vectorize = FALSE;
if (!args && ckWARN(WARN_PRINTF) &&
(PL_op->op_type == OP_PRTF || PL_op->op_type == OP_SPRINTF)) {
SV *msg = sv_newmortal();
need = (have > width ? have : width);
gap = need - have;
- SvGROW(sv, SvCUR(sv) + need + 1);
+ SvGROW(sv, SvCUR(sv) + need + dotstrlen + 1);
p = SvEND(sv);
if (esignlen && fill == '0') {
for (i = 0; i < esignlen; i++)
memset(p, ' ', gap);
p += gap;
}
+ if (vectorize) {
+ if (veclen) {
+ memcpy(p, dotstr, dotstrlen);
+ p += dotstrlen;
+ }
+ else
+ vectorize = FALSE; /* done iterating over vecstr */
+ }
if (is_utf)
SvUTF8_on(sv);
*p = '\0';
SvCUR(sv) = p - SvPVX(sv);
+ if (vectorize) {
+ esignlen = 0;
+ goto vector;
+ }
}
}
PL_dirty = proto_perl->Tdirty;
PL_localizing = proto_perl->Tlocalizing;
+#ifdef PERL_FLEXIBLE_EXCEPTIONS
PL_protect = proto_perl->Tprotect;
+#endif
PL_errors = sv_dup_inc(proto_perl->Terrors);
PL_av_fetch_sv = Nullsv;
PL_hv_fetch_sv = Nullsv;