X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=sv.c;h=2312eb043334aec9b93fc8ea1963d8b1a01dbfcf;hb=658bbd66c2d2a7c48f06bb97a9c36f4970ca2011;hp=9376540a8a3eb5044e57e42bb824f18cb9d2e799;hpb=d33b2eba4a0e814b78ec910034b3f6943f4805ac;p=p5sagit%2Fp5-mst-13.2.git diff --git a/sv.c b/sv.c index 9376540..2312eb0 100644 --- a/sv.c +++ b/sv.c @@ -107,7 +107,7 @@ Perl_sv_add_arena(pTHX_ char *ptr, U32 size, U32 flags) SV* sva = (SV*)ptr; register SV* sv; register SV* svend; - Zero(sva, size, char); + Zero(ptr, size, char); /* The first SV in an arena isn't an SV. */ SvANY(sva) = (void *) PL_sv_arenaroot; /* ptr to next arena */ @@ -2214,7 +2214,8 @@ Perl_sv_2pv(pTHX_ register SV *sv, STRLEN *lp) char * Perl_sv_2pvbyte_nolen(pTHX_ register SV *sv) { - return sv_2pv_nolen(sv); + STRLEN n_a; + return sv_2pvbyte(sv, &n_a); } char * @@ -2226,12 +2227,14 @@ Perl_sv_2pvbyte(pTHX_ register SV *sv, STRLEN *lp) char * Perl_sv_2pvutf8_nolen(pTHX_ register SV *sv) { - return sv_2pv_nolen(sv); + STRLEN n_a; + return sv_2pvutf8(sv, &n_a); } char * Perl_sv_2pvutf8(pTHX_ register SV *sv, STRLEN *lp) { + sv_utf8_upgrade(sv); return sv_2pv(sv,lp); } @@ -2273,6 +2276,139 @@ Perl_sv_2bool(pTHX_ register SV *sv) } } +void +Perl_sv_utf8_upgrade(pTHX_ register SV *sv) +{ + int hicount; + char *c; + + if (!sv || !SvPOK(sv) || SvUTF8(sv)) + return; + + /* This function could be much more efficient if we had a FLAG + * to signal if there are any hibit chars in the string + */ + hicount = 0; + for (c = SvPVX(sv); c < SvEND(sv); c++) { + if (*c & 0x80) + hicount++; + } + + if (hicount) { + char *src, *dst; + SvGROW(sv, SvCUR(sv) + hicount + 1); + + src = SvEND(sv) - 1; + SvCUR_set(sv, SvCUR(sv) + hicount); + dst = SvEND(sv) - 1; + + while (src < dst) { + if (*src & 0x80) { + dst--; + uv_to_utf8((U8*)dst, (U8)*src--); + dst--; + } + else { + *dst-- = *src--; + } + } + + SvUTF8_on(sv); + } +} + +bool +Perl_sv_utf8_downgrade(pTHX_ register SV* sv, bool fail_ok) +{ + if (SvPOK(sv) && SvUTF8(sv)) { + char *c = SvPVX(sv); + char *first_hi = 0; + /* need to figure out if this is possible at all first */ + while (c < SvEND(sv)) { + if (*c & 0x80) { + I32 len; + UV uv = utf8_to_uv((U8*)c, &len); + if (uv >= 256) { + if (fail_ok) + return FALSE; + else { + /* XXX might want to make a callback here instead */ + Perl_croak(aTHX_ "Big byte"); + } + } + if (!first_hi) + first_hi = c; + c += len; + } + else { + c++; + } + } + + if (first_hi) { + char *src = first_hi; + char *dst = first_hi; + while (src < SvEND(sv)) { + if (*src & 0x80) { + I32 len; + U8 u = (U8)utf8_to_uv((U8*)src, &len); + *dst++ = u; + src += len; + } + else { + *dst++ = *src++; + } + } + SvCUR_set(sv, dst - SvPVX(sv)); + } + SvUTF8_off(sv); + } + return TRUE; +} + +void +Perl_sv_utf8_encode(pTHX_ register SV *sv) +{ + sv_utf8_upgrade(sv); + SvUTF8_off(sv); +} + +bool +Perl_sv_utf8_decode(pTHX_ register SV *sv) +{ + if (SvPOK(sv)) { + char *c; + bool has_utf = FALSE; + if (!sv_utf8_downgrade(sv, TRUE)) + return FALSE; + + /* it is actually just a matter of turning the utf8 flag on, but + * we want to make sure everything inside is valid utf8 first. + */ + c = SvPVX(sv); + while (c < SvEND(sv)) { + if (*c & 0x80) { + I32 len; + (void)utf8_to_uv((U8*)c, &len); + if (len == 1) { + /* bad utf8 */ + return FALSE; + } + c += len; + has_utf = TRUE; + } + else { + c++; + } + } + + if (has_utf) + SvUTF8_on(sv); + } + return TRUE; +} + + /* Note: sv_setsv() should not be called with a source string that needs * to be reused, since it may destroy the source string if it is marked * as temporary. @@ -2526,16 +2662,11 @@ Perl_sv_setsv(pTHX_ SV *dstr, register SV *sstr) Perl_croak(aTHX_ "Can't redefine active sort subroutine %s", GvENAME((GV*)dstr)); - if (ckWARN(WARN_REDEFINE) || (const_changed && const_sv)) { - if (!(CvGV(cv) && GvSTASH(CvGV(cv)) - && HvNAME(GvSTASH(CvGV(cv))) - && strEQ(HvNAME(GvSTASH(CvGV(cv))), - "autouse"))) - Perl_warner(aTHX_ WARN_REDEFINE, const_sv ? + if ((const_changed || const_sv) && ckWARN(WARN_REDEFINE)) + Perl_warner(aTHX_ WARN_REDEFINE, const_sv ? "Constant subroutine %s redefined" : "Subroutine %s redefined", GvENAME((GV*)dstr)); - } } cv_ckproto(cv, (GV*)dstr, SvPOK(sref) ? SvPVX(sref) : Nullch); @@ -2627,6 +2758,11 @@ Perl_sv_setsv(pTHX_ SV *dstr, register SV *sstr) SvPV_set(dstr, SvPVX(sstr)); SvLEN_set(dstr, SvLEN(sstr)); SvCUR_set(dstr, SvCUR(sstr)); + if (SvUTF8(sstr)) + SvUTF8_on(dstr); + else + SvUTF8_off(dstr); + SvTEMP_off(dstr); (void)SvOK_off(sstr); SvPV_set(sstr, Nullch); @@ -2676,8 +2812,8 @@ Perl_sv_setsv(pTHX_ SV *dstr, register SV *sstr) } else { if (dtype == SVt_PVGV) { - if (ckWARN(WARN_UNSAFE)) - Perl_warner(aTHX_ WARN_UNSAFE, "Undefined value assigned to typeglob"); + if (ckWARN(WARN_MISC)) + Perl_warner(aTHX_ WARN_MISC, "Undefined value assigned to typeglob"); } else (void)SvOK_off(dstr); @@ -2955,10 +3091,13 @@ Perl_sv_catsv(pTHX_ SV *dstr, register SV *sstr) STRLEN len; if (!sstr) return; - if (s = SvPV(sstr, len)) + if (s = SvPV(sstr, len)) { + if (SvUTF8(sstr)) + sv_utf8_upgrade(dstr); sv_catpvn(dstr,s,len); - if (SvUTF8(sstr)) - SvUTF8_on(dstr); + if (SvUTF8(sstr)) + SvUTF8_on(dstr); + } } /* @@ -3807,11 +3946,42 @@ C. I32 Perl_sv_cmp(pTHX_ register SV *str1, register SV *str2) { - STRLEN cur1 = 0; - char *pv1 = str1 ? SvPV(str1, cur1) : (char *) NULL; - STRLEN cur2 = 0; - char *pv2 = str2 ? SvPV(str2, cur2) : (char *) NULL; + STRLEN cur1, cur2; + char *pv1, *pv2; I32 retval; + bool utf1; + + if (str1) { + pv1 = SvPV(str1, cur1); + } + else { + cur1 = 0; + } + + if (str2) { + if (SvPOK(str2)) { + if (SvPOK(str1) && SvUTF8(str1) != SvUTF8(str2) && !IN_BYTE) { + /* must upgrade other to UTF8 first */ + if (SvUTF8(str1)) { + sv_utf8_upgrade(str2); + } + else { + sv_utf8_upgrade(str1); + /* refresh pointer and length */ + pv1 = SvPVX(str1); + cur1 = SvCUR(str1); + } + } + pv2 = SvPVX(str2); + cur2 = SvCUR(str2); + } + else { + pv2 = sv_2pv(str2, &cur2); + } + } + else { + cur2 = 0; + } if (!cur1) return cur2 ? -1 : 0; @@ -4957,18 +5127,21 @@ Perl_sv_pvbyten_force(pTHX_ SV *sv, STRLEN *lp) char * Perl_sv_pvutf8(pTHX_ SV *sv) { + sv_utf8_upgrade(sv); return sv_pv(sv); } char * Perl_sv_pvutf8n(pTHX_ SV *sv, STRLEN *lp) { + sv_utf8_upgrade(sv); return sv_pvn(sv,lp); } char * Perl_sv_pvutf8n_force(pTHX_ SV *sv, STRLEN *lp) { + sv_utf8_upgrade(sv); return sv_pvn_force(sv,lp); } @@ -5224,6 +5397,8 @@ Perl_sv_bless(pTHX_ SV *sv, HV *stash) STATIC void S_sv_unglob(pTHX_ SV *sv) { + void *xpvmg; + assert(SvTYPE(sv) == SVt_PVGV); SvFAKE_off(sv); if (GvGP(sv)) @@ -5235,6 +5410,13 @@ S_sv_unglob(pTHX_ SV *sv) sv_unmagic(sv, '*'); Safefree(GvNAME(sv)); GvMULTI_off(sv); + + /* need to keep SvANY(sv) in the right arena */ + xpvmg = new_XPVMG(); + StructCopy(SvANY(sv), xpvmg, XPVMG); + del_XPVGV(SvANY(sv)); + SvANY(sv) = xpvmg; + SvFLAGS(sv) &= ~SVTYPEMASK; SvFLAGS(sv) |= SVt_PVMG; } @@ -5552,6 +5734,8 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV for (p = (char*)pat; p < patend; p = q) { bool alt = FALSE; bool left = FALSE; + bool vectorize = FALSE; + bool utf = FALSE; char fill = ' '; char plus = 0; char intsize = 0; @@ -5562,7 +5746,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV bool is_utf = FALSE; char esignbuf[4]; - U8 utf8buf[10]; + U8 utf8buf[UTF8_MAXLEN]; STRLEN esignlen = 0; char *eptr = Nullch; @@ -5573,6 +5757,10 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV char ebuf[IV_DIG * 4 + NV_DIG + 32]; /* large enough for "%#.#f" --chip */ /* what about long double NVs? --jhi */ + + SV *vecsv; + U8 *vecstr = Null(U8*); + STRLEN veclen = 0; char c; int i; unsigned base; @@ -5582,6 +5770,8 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV STRLEN have; STRLEN need; STRLEN gap; + char *dotstr = "."; + STRLEN dotstrlen = 1; for (q = p; q < patend && *q != '%'; ++q) ; if (q > p) { @@ -5614,6 +5804,30 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV q++; continue; + case '*': /* printf("%*vX",":",$ipv6addr) */ + if (q[1] != 'v') + break; + q++; + if (args) + vecsv = va_arg(*args, SV*); + else if (svix < svmax) + vecsv = svargs[svix++]; + dotstr = SvPVx(vecsv,dotstrlen); + if (DO_UTF8(vecsv)) + is_utf = TRUE; + /* FALL THROUGH */ + + case 'v': + vectorize = TRUE; + q++; + if (args) + vecsv = va_arg(*args, SV*); + else if (svix < svmax) + vecsv = svargs[svix++]; + vecstr = (U8*)SvPVx(vecsv,veclen); + utf = DO_UTF8(vecsv); + continue; + default: break; } @@ -5749,63 +5963,6 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV } goto string; - case 'v': - if (args) - argsv = va_arg(*args, SV*); - else if (svix < svmax) - argsv = svargs[svix++]; - { - STRLEN len; - U8 *str = (U8*)SvPVx(argsv,len); - I32 vlen = len*3+1; - SV *vsv = NEWSV(73,vlen); - I32 ulen; - I32 vfree = vlen; - U8 *vptr = (U8*)SvPVX(vsv); - STRLEN vcur = 0; - bool utf = DO_UTF8(argsv); - - if (utf) - is_utf = TRUE; - while (len) { - UV uv; - - if (utf) - uv = utf8_to_uv(str, &ulen); - else { - uv = *str; - ulen = 1; - } - str += ulen; - len -= ulen; - eptr = ebuf + sizeof ebuf; - do { - *--eptr = '0' + uv % 10; - } while (uv /= 10); - elen = (ebuf + sizeof ebuf) - eptr; - while (elen >= vfree-1) { - STRLEN off = vptr - (U8*)SvPVX(vsv); - vfree += vlen; - vlen *= 2; - SvGROW(vsv, vlen); - vptr = (U8*)SvPVX(vsv) + off; - } - memcpy(vptr, eptr, elen); - vptr += elen; - *vptr++ = '.'; - vfree -= elen + 1; - vcur += elen + 1; - } - if (vcur) { - vcur--; - vptr[-1] = '\0'; - } - SvCUR_set(vsv,vcur); - eptr = SvPVX(vsv); - elen = vcur; - } - goto string; - case '_': /* * The "%_" hack might have to be changed someday, @@ -5820,6 +5977,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV is_utf = TRUE; string: + vectorize = FALSE; if (has_precis && elen > precis) elen = precis; break; @@ -5843,7 +6001,22 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV /* FALL THROUGH */ case 'd': case 'i': - if (args) { + if (vectorize) { + I32 ulen; + if (!veclen) { + vectorize = FALSE; + break; + } + if (utf) + iv = (IV)utf8_to_uv(vecstr, &ulen); + else { + iv = *vecstr; + ulen = 1; + } + vecstr += ulen; + veclen -= ulen; + } + else if (args) { switch (intsize) { case 'h': iv = (short)va_arg(*args, int); break; default: iv = va_arg(*args, int); break; @@ -5909,7 +6082,23 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV base = 16; uns_integer: - if (args) { + if (vectorize) { + I32 ulen; + vector: + if (!veclen) { + vectorize = FALSE; + break; + } + if (utf) + uv = utf8_to_uv(vecstr, &ulen); + else { + uv = *vecstr; + ulen = 1; + } + vecstr += ulen; + veclen -= ulen; + } + else if (args) { switch (intsize) { case 'h': uv = (unsigned short)va_arg(*args, unsigned); break; default: uv = va_arg(*args, unsigned); break; @@ -5971,13 +6160,13 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV break; default: /* it had better be ten or less */ #if defined(PERL_Y2KWARN) - if (ckWARN(WARN_MISC)) { + if (ckWARN(WARN_Y2K)) { STRLEN n; char *s = SvPV(sv,n); if (n >= 2 && s[n-2] == '1' && s[n-1] == '9' && (n == 2 || !isDIGIT(s[n-3]))) { - Perl_warner(aTHX_ WARN_MISC, + Perl_warner(aTHX_ WARN_Y2K, "Possible Y2K bug: %%%c %s", c, "format string following '19'"); } @@ -6009,6 +6198,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV /* This is evil, but floating point is even more evil */ + vectorize = FALSE; if (args) nv = va_arg(*args, NV); else @@ -6017,7 +6207,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV need = 0; if (c != 'e' && c != 'E') { i = PERL_INT_MIN; - (void)frexp(nv, &i); + (void)Perl_frexp(nv, &i); if (i == PERL_INT_MIN) Perl_die(aTHX_ "panic: frexp"); if (i > 0) @@ -6040,8 +6230,9 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV *--eptr = c; #ifdef USE_LONG_DOUBLE { - char* p = PERL_PRIfldbl + sizeof(PERL_PRIfldbl) - 3; - while (p >= PERL_PRIfldbl) { *--eptr = *p--; } + static char const my_prifldbl[] = PERL_PRIfldbl; + char const *p = my_prifldbl + sizeof my_prifldbl - 3; + while (p >= my_prifldbl) { *--eptr = *p--; } } #endif if (has_precis) { @@ -6076,6 +6267,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV /* SPECIAL */ case 'n': + vectorize = FALSE; i = SvCUR(sv) - origlen; if (args) { switch (intsize) { @@ -6096,6 +6288,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV default: unknown: + vectorize = FALSE; if (!args && ckWARN(WARN_PRINTF) && (PL_op->op_type == OP_PRTF || PL_op->op_type == OP_SPRINTF)) { SV *msg = sv_newmortal(); @@ -6134,7 +6327,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV need = (have > width ? have : width); gap = need - have; - SvGROW(sv, SvCUR(sv) + need + 1); + SvGROW(sv, SvCUR(sv) + need + dotstrlen + 1); p = SvEND(sv); if (esignlen && fill == '0') { for (i = 0; i < esignlen; i++) @@ -6160,10 +6353,22 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV memset(p, ' ', gap); p += gap; } + if (vectorize) { + if (veclen) { + memcpy(p, dotstr, dotstrlen); + p += dotstrlen; + } + else + vectorize = FALSE; /* done iterating over vecstr */ + } if (is_utf) SvUTF8_on(sv); *p = '\0'; SvCUR(sv) = p - SvPVX(sv); + if (vectorize) { + esignlen = 0; + goto vector; + } } } @@ -7038,13 +7243,13 @@ Perl_ss_dup(pTHX_ PerlInterpreter *proto_perl) ptr = POPPTR(ss,ix); TOPPTR(nss,ix) = any_dup(ptr, proto_perl); /* XXX quite arbitrary */ dptr = POPDPTR(ss,ix); - TOPDPTR(nss,ix) = (void (*)(void*))any_dup(dptr, proto_perl); + TOPDPTR(nss,ix) = (void (*)(void*))any_dup((void *)dptr, proto_perl); break; case SAVEt_DESTRUCTOR_X: ptr = POPPTR(ss,ix); TOPPTR(nss,ix) = any_dup(ptr, proto_perl); /* XXX quite arbitrary */ dxptr = POPDXPTR(ss,ix); - TOPDXPTR(nss,ix) = (void (*)(pTHXo_ void*))any_dup(dxptr, proto_perl); + TOPDXPTR(nss,ix) = (void (*)(pTHXo_ void*))any_dup((void *)dxptr, proto_perl); break; case SAVEt_REGCONTEXT: case SAVEt_ALLOC: @@ -7134,10 +7339,10 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, # ifdef PERL_OBJECT CPerlObj *pPerl = new(ipM) CPerlObj(ipM, ipMS, ipMP, ipE, ipStd, ipLIO, ipD, ipS, ipP); - PERL_SET_INTERP(pPerl); + PERL_SET_THX(pPerl); # else /* !PERL_OBJECT */ PerlInterpreter *my_perl = (PerlInterpreter*)(*ipM->pMalloc)(ipM, sizeof(PerlInterpreter)); - PERL_SET_INTERP(my_perl); + PERL_SET_THX(my_perl); # ifdef DEBUGGING memset(my_perl, 0xab, sizeof(PerlInterpreter)); @@ -7165,7 +7370,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, SV *sv; SV **svp; PerlInterpreter *my_perl = (PerlInterpreter*)PerlMem_malloc(sizeof(PerlInterpreter)); - PERL_SET_INTERP(my_perl); + PERL_SET_THX(my_perl); # ifdef DEBUGGING memset(my_perl, 0xab, sizeof(PerlInterpreter)); @@ -7671,7 +7876,9 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, PL_dirty = proto_perl->Tdirty; PL_localizing = proto_perl->Tlocalizing; +#ifdef PERL_FLEXIBLE_EXCEPTIONS PL_protect = proto_perl->Tprotect; +#endif PL_errors = sv_dup_inc(proto_perl->Terrors); PL_av_fetch_sv = Nullsv; PL_hv_fetch_sv = Nullsv;