From: Vincent Pit Date: Thu, 27 Aug 2009 09:13:09 +0000 (+0200) Subject: Speed up repeatcpy() by at least 40% for 1-char or numerous repeats X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=16fa5c119c4bda5c0396a5f81296bd1ccc128a9c;p=p5sagit%2Fp5-mst-13.2.git Speed up repeatcpy() by at least 40% for 1-char or numerous repeats And don't make it receive the interpreter anymore. For 1-char repeats, use memset(). Otherwise, use the old implementation up to some (small) length, and then use memcpy() in a binary manner, based on what we previously copied. Note that we use memcpy() so both strings shouldn't overlap. The previous implementation didn't allow this as well. This would be a good place to use the restrict keyword from C99. I'm not sure if Configure has a probe for it. --- diff --git a/embed.fnc b/embed.fnc index f2c7050..74cc604 100644 --- a/embed.fnc +++ b/embed.fnc @@ -860,7 +860,7 @@ EXp |SV*|reg_qr_package|NN REGEXP * const rx : FIXME - why the E? Ep |void |regprop |NULLOK const regexp *prog|NN SV* sv|NN const regnode* o -Ap |void |repeatcpy |NN char* to|NN const char* from|I32 len|I32 count +Anp |void |repeatcpy |NN char* to|NN const char* from|I32 len|I32 count AnpP |char* |rninstr |NN const char* big|NN const char* bigend \ |NN const char* little|NN const char* lend Ap |Sighandler_t|rsignal |int i|Sighandler_t t diff --git a/embed.h b/embed.h index 921c63d..fa2561f 100644 --- a/embed.h +++ b/embed.h @@ -3076,7 +3076,7 @@ #if defined(PERL_CORE) || defined(PERL_EXT) #define regprop(a,b,c) Perl_regprop(aTHX_ a,b,c) #endif -#define repeatcpy(a,b,c,d) Perl_repeatcpy(aTHX_ a,b,c,d) +#define repeatcpy Perl_repeatcpy #define rninstr Perl_rninstr #define rsignal(a,b) Perl_rsignal(aTHX_ a,b) #ifdef PERL_CORE diff --git a/proto.h b/proto.h index cb04ab8..9734b14 100644 --- a/proto.h +++ b/proto.h @@ -2665,9 +2665,9 @@ PERL_CALLCONV void Perl_regprop(pTHX_ const regexp *prog, SV* sv, const regnode* #define PERL_ARGS_ASSERT_REGPROP \ assert(sv); assert(o) -PERL_CALLCONV void Perl_repeatcpy(pTHX_ char* to, const char* from, I32 len, I32 count) - __attribute__nonnull__(pTHX_1) - __attribute__nonnull__(pTHX_2); +PERL_CALLCONV void Perl_repeatcpy(char* to, const char* from, I32 len, I32 count) + __attribute__nonnull__(1) + __attribute__nonnull__(2); #define PERL_ARGS_ASSERT_REPEATCPY \ assert(to); assert(from) diff --git a/util.c b/util.c index 3f43393..ae8c688 100644 --- a/util.c +++ b/util.c @@ -3026,26 +3026,36 @@ Perl_my_pclose(pTHX_ PerlIO *ptr) } #endif +#define PERL_REPEATCPY_LINEAR 4 void -Perl_repeatcpy(pTHX_ register char *to, register const char *from, I32 len, register I32 count) +Perl_repeatcpy(register char *to, register const char *from, I32 len, register I32 count) { - register I32 todo; - register const char * const frombase = from; - PERL_UNUSED_CONTEXT; - PERL_ARGS_ASSERT_REPEATCPY; - if (len == 1) { - register const char c = *from; - while (count-- > 0) - *to++ = c; - return; - } - while (count-- > 0) { - for (todo = len; todo > 0; todo--) { - *to++ = *from++; + if (len == 1) + memset(to, *from, count); + else if (count) { + register char *p = to; + I32 items, linear, half; + + linear = count < PERL_REPEATCPY_LINEAR ? count : PERL_REPEATCPY_LINEAR; + for (items = 0; items < linear; ++items) { + register const char *q = from; + I32 todo; + for (todo = len; todo > 0; todo--) + *p++ = *q++; + } + + half = count / 2; + while (items <= half) { + I32 size = items * len; + memcpy(p, to, size); + p += size; + items *= 2; } - from = frombase; + + if (count > items) + memcpy(p, to, (count - items) * len); } }