X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=sv.c;h=59d0b123056f8fb5313a6d2a4330a5aeb5e0739c;hb=82d4537cdeff77e7ba16a40dc7517efc8eb83ce2;hp=d82e354341db1415bc03834f7cf84763568a16b8;hpb=e27ad1f20b87bf08f3461d0be498f8d4da22a576;p=p5sagit%2Fp5-mst-13.2.git diff --git a/sv.c b/sv.c index d82e354..59d0b12 100644 --- a/sv.c +++ b/sv.c @@ -24,6 +24,24 @@ #define FCALL *f +#ifdef PERL_UTF8_CACHE_ASSERT +/* The cache element 0 is the Unicode offset; + * the cache element 1 is the byte offset of the element 0; + * the cache element 2 is the Unicode length of the substring; + * the cache element 3 is the byte length of the substring; + * The checking of the substring side would be good + * but substr() has enough code paths to make my head spin; + * if adding more checks watch out for the following tests: + * t/op/index.t t/op/length.t t/op/pat.t t/op/substr.t + * lib/utf8.t lib/Unicode/Collate/t/index.t + * --jhi + */ +#define ASSERT_UTF8_CACHE(cache) \ + STMT_START { if (cache) { assert((cache)[0] <= (cache)[1]); } } STMT_END +#else +#define ASSERT_UTF8_CACHE(cache) NOOP +#endif + #ifdef PERL_COPY_ON_WRITE #define SV_COW_NEXT_SV(sv) INT2PTR(SV *,SvUVX(sv)) #define SV_COW_NEXT_SV_SET(current,next) SvUVX(current) = PTR2UV(next) @@ -3673,7 +3691,7 @@ Perl_sv_setsv_flags(pTHX_ SV *dstr, register SV *sstr, I32 flags) if (dtype < SVt_RV) sv_upgrade(dstr, SVt_RV); else if (dtype == SVt_PVGV && - SvTYPE(SvRV(sstr)) == SVt_PVGV) { + SvROK(sstr) && SvTYPE(SvRV(sstr)) == SVt_PVGV) { sstr = SvRV(sstr); if (sstr == dstr) { if (GvIMPORTED(dstr) != GVf_IMPORTED @@ -4501,6 +4519,8 @@ Efficient removal of characters from the beginning of the string buffer. SvPOK(sv) must be true and the C must be a pointer to somewhere inside the string buffer. The C becomes the first character of the adjusted string. Uses the "OOK hack". +Beware: after this function returns, C and SvPVX(sv) may no longer +refer to the same chunk of data. =cut */ @@ -4509,9 +4529,9 @@ void Perl_sv_chop(pTHX_ register SV *sv, register char *ptr) { register STRLEN delta; - if (!ptr || !SvPOKp(sv)) return; + delta = ptr - SvPVX(sv); SV_CHECK_THINKFIRST(sv); if (SvTYPE(sv) < SVt_PVIV) sv_upgrade(sv,SVt_PVIV); @@ -4531,7 +4551,6 @@ Perl_sv_chop(pTHX_ register SV *sv, register char *ptr) SvFLAGS(sv) |= SVf_OOK; } SvNIOK_off(sv); - delta = ptr - SvPVX(sv); SvLEN(sv) -= delta; SvCUR(sv) -= delta; SvPVX(sv) += delta; @@ -5090,7 +5109,9 @@ S_sv_add_backref(pTHX_ SV *tsv, SV *sv) else { av = newAV(); sv_magic(tsv, (SV*)av, PERL_MAGIC_backref, NULL, 0); - SvREFCNT_dec(av); /* for sv_magic */ + /* av now has a refcnt of 2, which avoids it getting freed + * before us during global cleanup. The extra ref is removed + * by magic_killbackrefs() when tsv is being freed */ } if (AvFILLp(av) >= AvMAX(av)) { SV **svp = AvARRAY(av); @@ -5652,8 +5673,12 @@ Perl_sv_len_utf8(pTHX_ register SV *sv) U8 *s = (U8*)SvPV(sv, len); MAGIC *mg = SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_utf8) : 0; - if (mg && mg->mg_len != -1 && (mg->mg_len > 0 || len == 0)) + if (mg && mg->mg_len != -1 && (mg->mg_len > 0 || len == 0)) { ulen = mg->mg_len; +#ifdef PERL_UTF8_CACHE_ASSERT + assert(ulen == Perl_utf8_length(aTHX_ s, s + len)); +#endif + } else { ulen = Perl_utf8_length(aTHX_ s, s + len); if (!mg && !SvREADONLY(sv)) { @@ -5723,8 +5748,9 @@ S_utf8_mg_pos(pTHX_ SV *sv, MAGIC **mgp, STRLEN **cachep, I32 i, I32 *offsetp, I *mgp = mg_find(sv, PERL_MAGIC_utf8); if (*mgp && (*mgp)->mg_ptr) { *cachep = (STRLEN *) (*mgp)->mg_ptr; + ASSERT_UTF8_CACHE(*cachep); if ((*cachep)[i] == (STRLEN)uoff) /* An exact match. */ - found = TRUE; + found = TRUE; else { /* We will skip to the right spot. */ STRLEN forw = 0; STRLEN backw = 0; @@ -5796,7 +5822,24 @@ S_utf8_mg_pos(pTHX_ SV *sv, MAGIC **mgp, STRLEN **cachep, I32 i, I32 *offsetp, I } } } +#ifdef PERL_UTF8_CACHE_ASSERT + if (found) { + U8 *s = start; + I32 n = uoff; + + while (n-- && s < send) + s += UTF8SKIP(s); + + if (i == 0) { + assert(*offsetp == s - start); + assert((*cachep)[0] == (STRLEN)uoff); + assert((*cachep)[1] == *offsetp); + } + ASSERT_UTF8_CACHE(*cachep); + } +#endif } + return found; } @@ -5868,12 +5911,14 @@ Perl_sv_pos_u2b(pTHX_ register SV *sv, I32* offsetp, I32* lenp) } *lenp = s - start; } + ASSERT_UTF8_CACHE(cache); } else { *offsetp = 0; if (lenp) *lenp = 0; } + return; } @@ -5917,13 +5962,13 @@ Perl_sv_pos_b2u(pTHX_ register SV* sv, I32* offsetp) mg = mg_find(sv, PERL_MAGIC_utf8); if (mg && mg->mg_ptr) { cache = (STRLEN *) mg->mg_ptr; - if (cache[1] == *offsetp) { + if (cache[1] == (STRLEN)*offsetp) { /* An exact match. */ *offsetp = cache[0]; return; } - else if (cache[1] < *offsetp) { + else if (cache[1] < (STRLEN)*offsetp) { /* We already know part of the way. */ len = cache[0]; s += cache[1]; @@ -5946,17 +5991,20 @@ Perl_sv_pos_b2u(pTHX_ register SV* sv, I32* offsetp) while (backw--) { p--; - while (UTF8_IS_CONTINUATION(*p)) + while (UTF8_IS_CONTINUATION(*p)) { p--; + backw--; + } ubackw++; } cache[0] -= ubackw; - + *offsetp = cache[0]; return; } } } + ASSERT_UTF8_CACHE(cache); } while (s < send) { @@ -8665,7 +8713,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV vecsv = va_arg(*args, SV*); else vecsv = (evix ? evix <= svmax : svix < svmax) ? - svargs[ewix ? ewix-1 : svix++] : &PL_sv_undef; + svargs[evix ? evix-1 : svix++] : &PL_sv_undef; dotstr = SvPVx(vecsv, dotstrlen); if (DO_UTF8(vecsv)) is_utf8 = TRUE; @@ -9326,6 +9374,13 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV p = SvEND(sv); *p = '\0'; } + /* Use memchr() instead of strchr(), as eptr is not guaranteed */ + /* to point to a null-terminated string. */ + if (left && ckWARN(WARN_PRINTF) && memchr(eptr, '\n', elen) && + (PL_op->op_type == OP_PRTF || PL_op->op_type == OP_SPRINTF)) + Perl_warner(aTHX_ packWARN(WARN_PRINTF), + "Newline in left-justified string for %sprintf", + (PL_op->op_type == OP_PRTF) ? "" : "s"); have = esignlen + zeros + elen; need = (have > width ? have : width); @@ -10737,6 +10792,8 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, PL_markstack = 0; PL_scopestack = 0; PL_savestack = 0; + PL_savestack_ix = 0; + PL_savestack_max = -1; PL_retstack = 0; PL_sig_pending = 0; Zero(&PL_debug_pad, 1, struct perl_debug_pad); @@ -10768,6 +10825,8 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, PL_markstack = 0; PL_scopestack = 0; PL_savestack = 0; + PL_savestack_ix = 0; + PL_savestack_max = -1; PL_retstack = 0; PL_sig_pending = 0; Zero(&PL_debug_pad, 1, struct perl_debug_pad); @@ -11255,6 +11314,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, PL_glob_index = proto_perl->Iglob_index; PL_srand_called = proto_perl->Isrand_called; + PL_hash_seed = proto_perl->Ihash_seed; PL_uudmap['M'] = 0; /* reinits on demand */ PL_bitcount = Nullch; /* reinits on demand */