From: Nicholas Clark Date: Sat, 5 Jan 2008 11:30:31 +0000 (+0000) Subject: BER is all very well, but it turns out that it's better to store the X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=69240efd70fee399a5232ed995c383b154000a08;p=p5sagit%2Fp5-mst-13.2.git BER is all very well, but it turns out that it's better to store the offset as either a byte (if <256), or a 0 byte with a STRLEN before. "better" in that the reading can be inlined, and even then the object code is smaller (function calls have space overhead). So goodbye Perl_sv_read_offset() and hello SvOOK_offset(). p4raw-id: //depot/perl@32838 --- diff --git a/dump.c b/dump.c index 9312bf4..b4eaed8 100644 --- a/dump.c +++ b/dump.c @@ -1576,10 +1576,13 @@ Perl_do_sv_dump(pTHX_ I32 level, PerlIO *file, SV *sv, I32 nest, I32 maxnest, bo } if (type <= SVt_PVLV && !isGV_with_GP(sv)) { if (SvPVX_const(sv)) { - UV delta = SvOOK(sv) ? sv_read_offset(sv) : 0; + STRLEN delta; if (SvOOK(sv)) { + SvOOK_offset(sv, delta); Perl_dump_indent(aTHX_ level, file," OFFSET = %"UVuf"\n", delta); + } else { + delta = 0; } Perl_dump_indent(aTHX_ level, file," PV = 0x%"UVxf" ", PTR2UV(SvPVX_const(sv))); if (SvOOK(sv)) { diff --git a/embed.fnc b/embed.fnc index 2ae0c3b..d36e2fd 100644 --- a/embed.fnc +++ b/embed.fnc @@ -881,7 +881,6 @@ Apd |void |sv_pos_b2u |NULLOK SV* sv|NN I32* offsetp Amdb |char* |sv_pvn_force |NN SV* sv|NULLOK STRLEN* lp Apd |char* |sv_pvutf8n_force|NN SV* sv|NULLOK STRLEN* lp Apd |char* |sv_pvbyten_force|NN SV* sv|NULLOK STRLEN* lp -Ap |UV |sv_read_offset |NN const SV *const sv Apd |char* |sv_recode_to_utf8 |NN SV* sv|NN SV *encoding Apd |bool |sv_cat_decode |NN SV* dsv|NN SV *encoding|NN SV *ssv|NN int *offset \ |NN char* tstr|int tlen diff --git a/embed.h b/embed.h index 72b4640..377266a 100644 --- a/embed.h +++ b/embed.h @@ -899,7 +899,6 @@ #define sv_pos_b2u Perl_sv_pos_b2u #define sv_pvutf8n_force Perl_sv_pvutf8n_force #define sv_pvbyten_force Perl_sv_pvbyten_force -#define sv_read_offset Perl_sv_read_offset #define sv_recode_to_utf8 Perl_sv_recode_to_utf8 #define sv_cat_decode Perl_sv_cat_decode #define sv_reftype Perl_sv_reftype @@ -3191,7 +3190,6 @@ #define sv_pos_b2u(a,b) Perl_sv_pos_b2u(aTHX_ a,b) #define sv_pvutf8n_force(a,b) Perl_sv_pvutf8n_force(aTHX_ a,b) #define sv_pvbyten_force(a,b) Perl_sv_pvbyten_force(aTHX_ a,b) -#define sv_read_offset(a) Perl_sv_read_offset(aTHX_ a) #define sv_recode_to_utf8(a,b) Perl_sv_recode_to_utf8(aTHX_ a,b) #define sv_cat_decode(a,b,c,d,e,f) Perl_sv_cat_decode(aTHX_ a,b,c,d,e,f) #define sv_reftype(a,b) Perl_sv_reftype(aTHX_ a,b) diff --git a/global.sym b/global.sym index e5f9d66..021d86b 100644 --- a/global.sym +++ b/global.sym @@ -539,7 +539,6 @@ Perl_sv_pos_b2u Perl_sv_pvn_force Perl_sv_pvutf8n_force Perl_sv_pvbyten_force -Perl_sv_read_offset Perl_sv_recode_to_utf8 Perl_sv_cat_decode Perl_sv_reftype diff --git a/pod/perlapi.pod b/pod/perlapi.pod index c46f697..d139026 100644 --- a/pod/perlapi.pod +++ b/pod/perlapi.pod @@ -4353,16 +4353,32 @@ Found in file sv.h =item SvOOK X -Returns a U32 indicating whether the SvIVX is a valid offset value for -the SvPVX. This hack is used internally to speed up removal of characters -from the beginning of a SvPV. When SvOOK is true, then the start of the -allocated string buffer is really (SvPVX - SvIVX). +Returns a U32 indicating whether the pointer to the string buffer is offset. +This hack is used internally to speed up removal of characters from the +beginning of a SvPV. When SvOOK is true, then the start of the +allocated string buffer is actually C bytes before SvPVX. +This offset used to be stored in SvIVX, but is now stored within the spare +part of the buffer. U32 SvOOK(SV* sv) =for hackers Found in file sv.h +=item SvOOK_offset +X + +Reads into I the offset from SvPVX back to the true start of the +allocated buffer, which will be non-zero if C has been used to +efficiently remove characters from start of the buffer. Implemented as a +macro, which takes the address of I, which must be of type C. +Evaluates I more than once. Sets I to 0 if C is false. + + void SvOOK_offset(NN SV*sv, STRLEN len) + +=for hackers +Found in file sv.h + =item SvPOK X diff --git a/proto.h b/proto.h index d276e3a..5bbb593 100644 --- a/proto.h +++ b/proto.h @@ -2355,9 +2355,6 @@ PERL_CALLCONV char* Perl_sv_pvutf8n_force(pTHX_ SV* sv, STRLEN* lp) PERL_CALLCONV char* Perl_sv_pvbyten_force(pTHX_ SV* sv, STRLEN* lp) __attribute__nonnull__(pTHX_1); -PERL_CALLCONV UV Perl_sv_read_offset(pTHX_ const SV *const sv) - __attribute__nonnull__(pTHX_1); - PERL_CALLCONV char* Perl_sv_recode_to_utf8(pTHX_ SV* sv, SV *encoding) __attribute__nonnull__(pTHX_1) __attribute__nonnull__(pTHX_2); diff --git a/sv.c b/sv.c index 883b97f..a9bdfd2 100644 --- a/sv.c +++ b/sv.c @@ -1397,13 +1397,15 @@ wrapper instead. int Perl_sv_backoff(pTHX_ register SV *sv) { - UV delta = sv_read_offset(sv); + STRLEN delta; const char * const s = SvPVX_const(sv); PERL_UNUSED_CONTEXT; assert(SvOOK(sv)); assert(SvTYPE(sv) != SVt_PVHV); assert(SvTYPE(sv) != SVt_PVAV); + SvOOK_offset(sv, delta); + SvLEN_set(sv, SvLEN(sv) + delta); SvPV_set(sv, SvPVX(sv) - delta); Move(s, SvPVX(sv), SvCUR(sv)+1, char); @@ -4190,43 +4192,6 @@ Perl_sv_force_normal_flags(pTHX_ register SV *sv, U32 flags) sv_unglob(sv); } -UV -Perl_sv_read_offset(pTHX_ const SV *const sv) { - U8 *p; - UV delta = 0; - U8 c; - - if (!SvOOK(sv)) - return 0; - p = (U8*)SvPVX_const(sv); - if (!p) - return 0; - - c = *--p; - delta = c & 0x7F; - while ((c & 0x80)) { - UV const last_delta = delta; - delta <<= 7; - if (delta < last_delta) - Perl_croak(aTHX_ "panic: overflow in sv_read_offset from %"UVuf - " to %"UVuf, last_delta, delta); - c = *--p; - delta |= c & 0x7F; - } -#ifdef DEBUGGING - { - /* Validate the preceding buffer's sentinels to verify that no-one is - using it. */ - const U8 *const real_start = (U8 *) SvPVX_const(sv) - delta; - while (p > real_start) { - --p; - assert (*p == (U8)PTR2UV(p)); - } - } -#endif - return delta; -} - /* =for apidoc sv_chop @@ -4243,8 +4208,8 @@ refer to the same chunk of data. void Perl_sv_chop(pTHX_ register SV *sv, register const char *ptr) { - register STRLEN delta; - UV old_delta; + STRLEN delta; + STRLEN old_delta; U8 *p; #ifdef DEBUGGING const U8 *real_start; @@ -4271,7 +4236,7 @@ Perl_sv_chop(pTHX_ register SV *sv, register const char *ptr) SvFLAGS(sv) |= SVf_OOK; old_delta = 0; } else { - old_delta = sv_read_offset(sv); + SvOOK_offset(sv, old_delta); } SvLEN_set(sv, SvLEN(sv) - delta); SvCUR_set(sv, SvCUR(sv) - delta); @@ -4285,22 +4250,13 @@ Perl_sv_chop(pTHX_ register SV *sv, register const char *ptr) real_start = p - delta; #endif - if (delta < 0x80) { + assert(delta); + if (delta < 0x100) { *--p = (U8) delta; } else { - /* Code lovingly ripped from pp_pack.c: */ - U8 buf[(sizeof(UV)*CHAR_BIT)/7+1]; - U8 *in = buf; - STRLEN len; - do { - *in++ = (U8)((delta & 0x7f) | 0x80); - delta >>= 7; - } while (delta); - buf[0] &= 0x7f; /* clear continue bit */ - - len = in - buf; - p -= len; - Copy(buf, p, len, U8); + *--p = 0; + p -= sizeof(STRLEN); + Copy((U8*)&delta, p, sizeof(STRLEN), U8); } #ifdef DEBUGGING @@ -5331,7 +5287,9 @@ Perl_sv_clear(pTHX_ register SV *sv) freescalar: /* Don't bother with SvOOK_off(sv); as we're only going to free it. */ if (SvOOK(sv)) { - SvPV_set(sv, SvPVX_mutable(sv) - sv_read_offset(sv)); + STRLEN offset; + SvOOK_offset(sv, offset); + SvPV_set(sv, SvPVX_mutable(sv) - offset); /* Don't even bother with turning off the OOK flag. */ } if (SvROK(sv)) { diff --git a/sv.h b/sv.h index afa18dc..e13b5ba 100644 --- a/sv.h +++ b/sv.h @@ -653,10 +653,12 @@ Will also turn off the UTF-8 status. Returns a boolean indicating whether the SV contains a v-string. =for apidoc Am|U32|SvOOK|SV* sv -Returns a U32 indicating whether the SvIVX is a valid offset value for -the SvPVX. This hack is used internally to speed up removal of characters -from the beginning of a SvPV. When SvOOK is true, then the start of the -allocated string buffer is really (SvPVX - SvIVX). +Returns a U32 indicating whether the pointer to the string buffer is offset. +This hack is used internally to speed up removal of characters from the +beginning of a SvPV. When SvOOK is true, then the start of the +allocated string buffer is actually C bytes before SvPVX. +This offset used to be stored in SvIVX, but is now stored within the spare +part of the buffer. =for apidoc Am|U32|SvROK|SV* sv Tests if the SV is an RV. @@ -1240,8 +1242,9 @@ the scalar's value cannot change unless written to. if (SvLEN(sv)) { \ assert(!SvROK(sv)); \ if(SvOOK(sv)) { \ - SvPV_set(sv, SvPVX_mutable(sv) \ - - sv_read_offset(sv)); \ + STRLEN zok; \ + SvOOK_offset(sv, zok); \ + SvPV_set(sv, SvPVX_mutable(sv) - zok); \ SvFLAGS(sv) &= ~SVf_OOK; \ } \ Safefree(SvPVX(sv)); \ @@ -1929,6 +1932,60 @@ C on the new SV. Implemented as a wrapper around C. #define newSVpvn_utf8(s, len, u) newSVpvn_flags((s), (len), (u) ? SVf_UTF8 : 0) /* +=for apidoc Am|void|SvOOK_offset|NN SV*sv|STRLEN len + +Reads into I the offset from SvPVX back to the true start of the +allocated buffer, which will be non-zero if C has been used to +efficiently remove characters from start of the buffer. Implemented as a +macro, which takes the address of I, which must be of type C. +Evaluates I more than once. Sets I to 0 if C is false. + +=cut +*/ + +#ifdef DEBUGGING +/* Does the bot know something I don't? +10:28 <@Nicholas> metabatman +10:28 <+meta> Nicholas: crash +*/ +# define SvOOK_offset(sv, offset) STMT_START { \ + assert(sizeof(offset) == sizeof(STRLEN)); \ + if (SvOOK(sv)) { \ + const U8 *crash = (U8*)SvPVX_const(sv); \ + offset = *--crash; \ + if (!offset) { \ + crash -= sizeof(STRLEN); \ + Copy(crash, (U8 *)&offset, sizeof(STRLEN), U8); \ + } \ + { \ + /* Validate the preceding buffer's sentinels to \ + verify that no-one is using it. */ \ + const U8 *const bonk = (U8 *) SvPVX_const(sv) - offset; \ + while (crash > bonk) { \ + --crash; \ + assert (*crash == (U8)PTR2UV(crash)); \ + } \ + } \ + } else { \ + offset = 0; \ + } \ + } STMT_END +#else + /* This is the same code, but avoids using any temporary variables: */ +# define SvOOK_offset(sv, offset) STMT_START { \ + assert(sizeof(offset) == sizeof(STRLEN)); \ + if (SvOOK(sv)) { \ + offset = ((U8*)SvPVX_const(sv))[-1]; \ + if (!offset) { \ + Copy(SvPVX_const(sv) - 1 - sizeof(STRLEN), \ + (U8 *)&offset, sizeof(STRLEN), U8); \ + } \ + } else { \ + offset = 0; \ + } \ + } STMT_END +#endif +/* * Local variables: * c-indentation-style: bsd * c-basic-offset: 4