Mithing.
[p5sagit/p5-mst-13.2.git] / lib / Benchmark.pm
CommitLineData
a0d0e21e 1package Benchmark;
2
f06db76b 3=head1 NAME
4
8a4f6ac2 5Benchmark - benchmark running times of Perl code
431d98c2 6
f06db76b 7=head1 SYNOPSIS
8
f36484b0 9 use Benchmark qw(:all) ;
10
f06db76b 11 timethis ($count, "code");
12
523cc92b 13 # Use Perl code in strings...
f06db76b 14 timethese($count, {
15 'Name1' => '...code1...',
16 'Name2' => '...code2...',
17 });
18
523cc92b 19 # ... or use subroutine references.
20 timethese($count, {
21 'Name1' => sub { ...code1... },
22 'Name2' => sub { ...code2... },
23 });
24
431d98c2 25 # cmpthese can be used both ways as well
26 cmpthese($count, {
27 'Name1' => '...code1...',
28 'Name2' => '...code2...',
29 });
30
31 cmpthese($count, {
32 'Name1' => sub { ...code1... },
33 'Name2' => sub { ...code2... },
34 });
35
36 # ...or in two stages
37 $results = timethese($count,
38 {
39 'Name1' => sub { ...code1... },
40 'Name2' => sub { ...code2... },
41 },
42 'none'
43 );
44 cmpthese( $results ) ;
45
f06db76b 46 $t = timeit($count, '...other code...')
47 print "$count loops of other code took:",timestr($t),"\n";
48
431d98c2 49 $t = countit($time, '...other code...')
50 $count = $t->iters ;
51 print "$count loops of other code took:",timestr($t),"\n";
52
f06db76b 53=head1 DESCRIPTION
54
55The Benchmark module encapsulates a number of routines to help you
56figure out how long it takes to execute some code.
57
8a4f6ac2 58timethis - run a chunk of code several times
59
60timethese - run several chunks of code several times
61
62cmpthese - print results of timethese as a comparison chart
63
64timeit - run a chunk of code and see how long it goes
65
66countit - see how many times a chunk of code runs in a given time
67
68
f06db76b 69=head2 Methods
70
71=over 10
72
73=item new
74
75Returns the current time. Example:
76
77 use Benchmark;
78 $t0 = new Benchmark;
79 # ... your code here ...
80 $t1 = new Benchmark;
81 $td = timediff($t1, $t0);
a24a9dfe 82 print "the code took:",timestr($td),"\n";
f06db76b 83
84=item debug
85
86Enables or disable debugging by setting the C<$Benchmark::Debug> flag:
87
523cc92b 88 debug Benchmark 1;
f06db76b 89 $t = timeit(10, ' 5 ** $Global ');
523cc92b 90 debug Benchmark 0;
f06db76b 91
431d98c2 92=item iters
93
94Returns the number of iterations.
95
f06db76b 96=back
97
98=head2 Standard Exports
99
523cc92b 100The following routines will be exported into your namespace
f06db76b 101if you use the Benchmark module:
102
103=over 10
104
105=item timeit(COUNT, CODE)
106
523cc92b 107Arguments: COUNT is the number of times to run the loop, and CODE is
108the code to run. CODE may be either a code reference or a string to
109be eval'd; either way it will be run in the caller's package.
110
111Returns: a Benchmark object.
112
113=item timethis ( COUNT, CODE, [ TITLE, [ STYLE ]] )
114
115Time COUNT iterations of CODE. CODE may be a string to eval or a
116code reference; either way the CODE will run in the caller's package.
117Results will be printed to STDOUT as TITLE followed by the times.
118TITLE defaults to "timethis COUNT" if none is provided. STYLE
119determines the format of the output, as described for timestr() below.
120
6ee623d5 121The COUNT can be zero or negative: this means the I<minimum number of
122CPU seconds> to run. A zero signifies the default of 3 seconds. For
123example to run at least for 10 seconds:
124
125 timethis(-10, $code)
126
127or to run two pieces of code tests for at least 3 seconds:
128
129 timethese(0, { test1 => '...', test2 => '...'})
130
131CPU seconds is, in UNIX terms, the user time plus the system time of
132the process itself, as opposed to the real (wallclock) time and the
133time spent by the child processes. Less than 0.1 seconds is not
134accepted (-0.01 as the count, for example, will cause a fatal runtime
135exception).
136
137Note that the CPU seconds is the B<minimum> time: CPU scheduling and
138other operating system factors may complicate the attempt so that a
139little bit more time is spent. The benchmark output will, however,
140also tell the number of C<$code> runs/second, which should be a more
141interesting number than the actually spent seconds.
142
143Returns a Benchmark object.
144
523cc92b 145=item timethese ( COUNT, CODEHASHREF, [ STYLE ] )
f06db76b 146
523cc92b 147The CODEHASHREF is a reference to a hash containing names as keys
148and either a string to eval or a code reference for each value.
149For each (KEY, VALUE) pair in the CODEHASHREF, this routine will
150call
f06db76b 151
523cc92b 152 timethis(COUNT, VALUE, KEY, STYLE)
f06db76b 153
1d2dff63 154The routines are called in string comparison order of KEY.
155
156The COUNT can be zero or negative, see timethis().
6ee623d5 157
3c6312e9 158Returns a hash of Benchmark objects, keyed by name.
159
523cc92b 160=item timediff ( T1, T2 )
f06db76b 161
523cc92b 162Returns the difference between two Benchmark times as a Benchmark
163object suitable for passing to timestr().
f06db76b 164
6ee623d5 165=item timestr ( TIMEDIFF, [ STYLE, [ FORMAT ] ] )
f06db76b 166
523cc92b 167Returns a string that formats the times in the TIMEDIFF object in
168the requested STYLE. TIMEDIFF is expected to be a Benchmark object
169similar to that returned by timediff().
170
3c6312e9 171STYLE can be any of 'all', 'none', 'noc', 'nop' or 'auto'. 'all' shows
172each of the 5 times available ('wallclock' time, user time, system time,
523cc92b 173user time of children, and system time of children). 'noc' shows all
174except the two children times. 'nop' shows only wallclock and the
175two children times. 'auto' (the default) will act as 'all' unless
176the children times are both zero, in which case it acts as 'noc'.
3c6312e9 177'none' prevents output.
523cc92b 178
179FORMAT is the L<printf(3)>-style format specifier (without the
180leading '%') to use to print the times. It defaults to '5.2f'.
f06db76b 181
182=back
183
184=head2 Optional Exports
185
186The following routines will be exported into your namespace
187if you specifically ask that they be imported:
188
189=over 10
190
523cc92b 191=item clearcache ( COUNT )
192
193Clear the cached time for COUNT rounds of the null loop.
194
195=item clearallcache ( )
f06db76b 196
523cc92b 197Clear all cached times.
f06db76b 198
ac8eabc1 199=item cmpthese ( COUT, CODEHASHREF, [ STYLE ] )
200
d1083c7a 201=item cmpthese ( RESULTSHASHREF, [ STYLE ] )
ac8eabc1 202
d1083c7a 203Optionally calls timethese(), then outputs comparison chart. This:
ac8eabc1 204
d1083c7a 205 cmpthese( -1, { a => "++\$i", b => "\$i *= 2" } ) ;
206
207outputs a chart like:
208
209 Rate b a
210 b 2831802/s -- -61%
211 a 7208959/s 155% --
212
213This chart is sorted from slowest to fastest, and shows the percent speed
214difference between each pair of tests.
215
216c<cmpthese> can also be passed the data structure that timethese() returns:
217
218 $results = timethese( -1, { a => "++\$i", b => "\$i *= 2" } ) ;
ac8eabc1 219 cmpthese( $results );
220
d1083c7a 221in case you want to see both sets of results.
222
223Returns a reference to an ARRAY of rows, each row is an ARRAY of cells from the
224above chart, including labels. This:
225
226 my $rows = cmpthese( -1, { a => '++$i', b => '$i *= 2' }, "none" );
227
228returns a data structure like:
229
230 [
231 [ '', 'Rate', 'b', 'a' ],
232 [ 'b', '2885232/s', '--', '-59%' ],
233 [ 'a', '7099126/s', '146%', '--' ],
234 ]
235
236B<NOTE>: This result value differs from previous versions, which returned
237the C<timethese()> result structure. If you want that, just use the two
238statement C<timethese>...C<cmpthese> idiom shown above.
239
240Incidently, note the variance in the result values between the two examples;
241this is typical of benchmarking. If this were a real benchmark, you would
242probably want to run a lot more iterations.
ac8eabc1 243
244=item countit(TIME, CODE)
245
246Arguments: TIME is the minimum length of time to run CODE for, and CODE is
247the code to run. CODE may be either a code reference or a string to
248be eval'd; either way it will be run in the caller's package.
249
250TIME is I<not> negative. countit() will run the loop many times to
251calculate the speed of CODE before running it for TIME. The actual
252time run for will usually be greater than TIME due to system clock
253resolution, so it's best to look at the number of iterations divided
254by the times that you are concerned with, not just the iterations.
255
256Returns: a Benchmark object.
257
523cc92b 258=item disablecache ( )
f06db76b 259
523cc92b 260Disable caching of timings for the null loop. This will force Benchmark
261to recalculate these timings for each new piece of code timed.
262
263=item enablecache ( )
264
265Enable caching of timings for the null loop. The time taken for COUNT
266rounds of the null loop will be calculated only once for each
267different COUNT used.
f06db76b 268
ac8eabc1 269=item timesum ( T1, T2 )
270
271Returns the sum of two Benchmark times as a Benchmark object suitable
272for passing to timestr().
273
f06db76b 274=back
275
276=head1 NOTES
277
278The data is stored as a list of values from the time and times
523cc92b 279functions:
f06db76b 280
431d98c2 281 ($real, $user, $system, $children_user, $children_system, $iters)
f06db76b 282
283in seconds for the whole loop (not divided by the number of rounds).
284
285The timing is done using time(3) and times(3).
286
287Code is executed in the caller's package.
288
f06db76b 289The time of the null loop (a loop with the same
290number of rounds but empty loop body) is subtracted
291from the time of the real loop.
292
3c6312e9 293The null loop times can be cached, the key being the
f06db76b 294number of rounds. The caching can be controlled using
295calls like these:
296
523cc92b 297 clearcache($key);
f06db76b 298 clearallcache();
299
523cc92b 300 disablecache();
f06db76b 301 enablecache();
302
3c6312e9 303Caching is off by default, as it can (usually slightly) decrease
304accuracy and does not usually noticably affect runtimes.
305
54e82ce5 306=head1 EXAMPLES
307
308For example,
309
14393033 310 use Benchmark qw( cmpthese ) ;
311 $x = 3;
312 cmpthese( -5, {
313 a => sub{$x*$x},
314 b => sub{$x**2},
315 } );
54e82ce5 316
317outputs something like this:
318
319 Benchmark: running a, b, each for at least 5 CPU seconds...
14393033 320 Rate b a
321 b 1559428/s -- -62%
322 a 4152037/s 166% --
323
54e82ce5 324
325while
326
14393033 327 use Benchmark qw( timethese cmpthese ) ;
328 $x = 3;
329 $r = timethese( -5, {
330 a => sub{$x*$x},
331 b => sub{$x**2},
332 } );
333 cmpthese $r;
54e82ce5 334
335outputs something like this:
336
14393033 337 Benchmark: running a, b, each for at least 5 CPU seconds...
338 a: 10 wallclock secs ( 5.14 usr + 0.13 sys = 5.27 CPU) @ 3835055.60/s (n=20210743)
339 b: 5 wallclock secs ( 5.41 usr + 0.00 sys = 5.41 CPU) @ 1574944.92/s (n=8520452)
340 Rate b a
341 b 1574945/s -- -59%
342 a 3835056/s 144% --
54e82ce5 343
344
f06db76b 345=head1 INHERITANCE
346
347Benchmark inherits from no other class, except of course
348for Exporter.
349
350=head1 CAVEATS
351
80eab818 352Comparing eval'd strings with code references will give you
431d98c2 353inaccurate results: a code reference will show a slightly slower
80eab818 354execution time than the equivalent eval'd string.
355
f06db76b 356The real time timing is done using time(2) and
357the granularity is therefore only one second.
358
359Short tests may produce negative figures because perl
523cc92b 360can appear to take longer to execute the empty loop
361than a short test; try:
f06db76b 362
363 timethis(100,'1');
364
365The system time of the null loop might be slightly
366more than the system time of the loop with the actual
a24a9dfe 367code and therefore the difference might end up being E<lt> 0.
f06db76b 368
8a4f6ac2 369=head1 SEE ALSO
370
371L<Devel::DProf> - a Perl code profiler
372
f06db76b 373=head1 AUTHORS
374
5aabfad6 375Jarkko Hietaniemi <F<jhi@iki.fi>>, Tim Bunce <F<Tim.Bunce@ig.co.uk>>
f06db76b 376
377=head1 MODIFICATION HISTORY
378
379September 8th, 1994; by Tim Bunce.
380
523cc92b 381March 28th, 1997; by Hugo van der Sanden: added support for code
382references and the already documented 'debug' method; revamped
383documentation.
f06db76b 384
6ee623d5 385April 04-07th, 1997: by Jarkko Hietaniemi, added the run-for-some-time
386functionality.
387
3c6312e9 388September, 1999; by Barrie Slaymaker: math fixes and accuracy and
389efficiency tweaks. Added cmpthese(). A result is now returned from
431d98c2 390timethese(). Exposed countit() (was runfor()).
3c6312e9 391
523cc92b 392=cut
a0d0e21e 393
3f943bd9 394# evaluate something in a clean lexical environment
395sub _doeval { eval shift }
396
397#
398# put any lexicals at file scope AFTER here
399#
400
4aa0a1f7 401use Carp;
a0d0e21e 402use Exporter;
403@ISA=(Exporter);
ac8eabc1 404@EXPORT=qw(timeit timethis timethese timediff timestr);
405@EXPORT_OK=qw(timesum cmpthese countit
406 clearcache clearallcache disablecache enablecache);
f36484b0 407%EXPORT_TAGS=( all => [ @EXPORT, @EXPORT_OK ] ) ;
a0d0e21e 408
f36484b0 409$VERSION = 1.03;
8a4f6ac2 410
a0d0e21e 411&init;
412
413sub init {
414 $debug = 0;
415 $min_count = 4;
416 $min_cpu = 0.4;
417 $defaultfmt = '5.2f';
418 $defaultstyle = 'auto';
419 # The cache can cause a slight loss of sys time accuracy. If a
420 # user does many tests (>10) with *very* large counts (>10000)
421 # or works on a very slow machine the cache may be useful.
422 &disablecache;
423 &clearallcache;
424}
425
523cc92b 426sub debug { $debug = ($_[1] != 0); }
427
bba8fca5 428# The cache needs two branches: 's' for strings and 'c' for code. The
429# emtpy loop is different in these two cases.
430sub clearcache { delete $cache{"$_[0]c"}; delete $cache{"$_[0]s"}; }
a0d0e21e 431sub clearallcache { %cache = (); }
432sub enablecache { $cache = 1; }
433sub disablecache { $cache = 0; }
434
a0d0e21e 435# --- Functions to process the 'time' data type
436
6ee623d5 437sub new { my @t = (time, times, @_ == 2 ? $_[1] : 0);
438 print "new=@t\n" if $debug;
439 bless \@t; }
a0d0e21e 440
441sub cpu_p { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $pu+$ps ; }
442sub cpu_c { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $cu+$cs ; }
443sub cpu_a { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $pu+$ps+$cu+$cs ; }
444sub real { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $r ; }
431d98c2 445sub iters { $_[0]->[5] ; }
a0d0e21e 446
523cc92b 447sub timediff {
a0d0e21e 448 my($a, $b) = @_;
523cc92b 449 my @r;
3f943bd9 450 for (my $i=0; $i < @$a; ++$i) {
a0d0e21e 451 push(@r, $a->[$i] - $b->[$i]);
452 }
453 bless \@r;
454}
455
705cc255 456sub timesum {
457 my($a, $b) = @_;
458 my @r;
459 for (my $i=0; $i < @$a; ++$i) {
460 push(@r, $a->[$i] + $b->[$i]);
461 }
462 bless \@r;
463}
464
523cc92b 465sub timestr {
a0d0e21e 466 my($tr, $style, $f) = @_;
523cc92b 467 my @t = @$tr;
6ee623d5 468 warn "bad time value (@t)" unless @t==6;
469 my($r, $pu, $ps, $cu, $cs, $n) = @t;
ce9550df 470 my($pt, $ct, $tt) = ($tr->cpu_p, $tr->cpu_c, $tr->cpu_a);
523cc92b 471 $f = $defaultfmt unless defined $f;
a0d0e21e 472 # format a time in the required style, other formats may be added here
80eab818 473 $style ||= $defaultstyle;
523cc92b 474 $style = ($ct>0) ? 'all' : 'noc' if $style eq 'auto';
475 my $s = "@t $style"; # default for unknown style
7be077a2 476 $s=sprintf("%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
ce9550df 477 $r,$pu,$ps,$cu,$cs,$tt) if $style eq 'all';
7be077a2 478 $s=sprintf("%2d wallclock secs (%$f usr + %$f sys = %$f CPU)",
479 $r,$pu,$ps,$pt) if $style eq 'noc';
480 $s=sprintf("%2d wallclock secs (%$f cusr + %$f csys = %$f CPU)",
481 $r,$cu,$cs,$ct) if $style eq 'nop';
cc31225e 482 $s .= sprintf(" @ %$f/s (n=$n)", $n / ( $pu + $ps )) if $n && $pu+$ps;
a0d0e21e 483 $s;
484}
523cc92b 485
486sub timedebug {
a0d0e21e 487 my($msg, $t) = @_;
523cc92b 488 print STDERR "$msg",timestr($t),"\n" if $debug;
a0d0e21e 489}
490
a0d0e21e 491# --- Functions implementing low-level support for timing loops
492
493sub runloop {
494 my($n, $c) = @_;
4aa0a1f7 495
496 $n+=0; # force numeric now, so garbage won't creep into the eval
523cc92b 497 croak "negative loopcount $n" if $n<0;
498 confess "Usage: runloop(number, [string | coderef])" unless defined $c;
a0d0e21e 499 my($t0, $t1, $td); # before, after, difference
500
501 # find package of caller so we can execute code there
523cc92b 502 my($curpack) = caller(0);
503 my($i, $pack)= 0;
a0d0e21e 504 while (($pack) = caller(++$i)) {
505 last if $pack ne $curpack;
506 }
507
3f943bd9 508 my ($subcode, $subref);
509 if (ref $c eq 'CODE') {
510 $subcode = "sub { for (1 .. $n) { local \$_; package $pack; &\$c; } }";
511 $subref = eval $subcode;
512 }
513 else {
514 $subcode = "sub { for (1 .. $n) { local \$_; package $pack; $c;} }";
515 $subref = _doeval($subcode);
516 }
4aa0a1f7 517 croak "runloop unable to compile '$c': $@\ncode: $subcode\n" if $@;
523cc92b 518 print STDERR "runloop $n '$subcode'\n" if $debug;
a0d0e21e 519
3c6312e9 520 # Wait for the user timer to tick. This makes the error range more like
521 # -0.01, +0. If we don't wait, then it's more like -0.01, +0.01. This
522 # may not seem important, but it significantly reduces the chances of
523 # getting a too low initial $n in the initial, 'find the minimum' loop
431d98c2 524 # in &countit. This, in turn, can reduce the number of calls to
bba8fca5 525 # &runloop a lot, and thus reduce additive errors.
526 my $tbase = Benchmark->new(0)->[1];
277427cf 527 while ( ( $t0 = Benchmark->new(0) )->[1] == $tbase ) {} ;
a0d0e21e 528 &$subref;
6ee623d5 529 $t1 = Benchmark->new($n);
a0d0e21e 530 $td = &timediff($t1, $t0);
a0d0e21e 531 timedebug("runloop:",$td);
532 $td;
533}
534
535
536sub timeit {
537 my($n, $code) = @_;
538 my($wn, $wc, $wd);
539
540 printf STDERR "timeit $n $code\n" if $debug;
3c6312e9 541 my $cache_key = $n . ( ref( $code ) ? 'c' : 's' );
bba8fca5 542 if ($cache && exists $cache{$cache_key} ) {
543 $wn = $cache{$cache_key};
523cc92b 544 } else {
bba8fca5 545 $wn = &runloop($n, ref( $code ) ? sub { undef } : '' );
3c6312e9 546 # Can't let our baseline have any iterations, or they get subtracted
547 # out of the result.
548 $wn->[5] = 0;
bba8fca5 549 $cache{$cache_key} = $wn;
a0d0e21e 550 }
551
552 $wc = &runloop($n, $code);
553
554 $wd = timediff($wc, $wn);
a0d0e21e 555 timedebug("timeit: ",$wc);
556 timedebug(" - ",$wn);
557 timedebug(" = ",$wd);
558
559 $wd;
560}
561
6ee623d5 562
563my $default_for = 3;
564my $min_for = 0.1;
565
3c6312e9 566
431d98c2 567sub countit {
568 my ( $tmax, $code ) = @_;
6ee623d5 569
570 if ( not defined $tmax or $tmax == 0 ) {
571 $tmax = $default_for;
572 } elsif ( $tmax < 0 ) {
573 $tmax = -$tmax;
574 }
575
431d98c2 576 die "countit($tmax, ...): timelimit cannot be less than $min_for.\n"
6ee623d5 577 if $tmax < $min_for;
578
3c6312e9 579 my ($n, $tc);
6ee623d5 580
bba8fca5 581 # First find the minimum $n that gives a significant timing.
3c6312e9 582 for ($n = 1; ; $n *= 2 ) {
583 my $td = timeit($n, $code);
584 $tc = $td->[1] + $td->[2];
585 last if $tc > 0.1;
586 }
6ee623d5 587
3c6312e9 588 my $nmin = $n;
589
590 # Get $n high enough that we can guess the final $n with some accuracy.
591 my $tpra = 0.1 * $tmax; # Target/time practice.
592 while ( $tc < $tpra ) {
593 # The 5% fudge is to keep us from iterating again all
594 # that often (this speeds overall responsiveness when $tmax is big
595 # and we guess a little low). This does not noticably affect
596 # accuracy since we're not couting these times.
597 $n = int( $tpra * 1.05 * $n / $tc ); # Linear approximation.
598 my $td = timeit($n, $code);
c5d57293 599 my $new_tc = $td->[1] + $td->[2];
600 # Make sure we are making progress.
601 $tc = $new_tc > 1.2 * $tc ? $new_tc : 1.2 * $tc;
6ee623d5 602 }
603
3c6312e9 604 # Now, do the 'for real' timing(s), repeating until we exceed
605 # the max.
606 my $ntot = 0;
607 my $rtot = 0;
608 my $utot = 0.0;
609 my $stot = 0.0;
610 my $cutot = 0.0;
611 my $cstot = 0.0;
612 my $ttot = 0.0;
613
614 # The 5% fudge is because $n is often a few % low even for routines
615 # with stable times and avoiding extra timeit()s is nice for
616 # accuracy's sake.
617 $n = int( $n * ( 1.05 * $tmax / $tc ) );
618
619 while () {
620 my $td = timeit($n, $code);
621 $ntot += $n;
622 $rtot += $td->[0];
623 $utot += $td->[1];
624 $stot += $td->[2];
6ee623d5 625 $cutot += $td->[3];
626 $cstot += $td->[4];
3c6312e9 627 $ttot = $utot + $stot;
628 last if $ttot >= $tmax;
6ee623d5 629
c5d57293 630 $ttot = 0.01 if $ttot < 0.01;
3c6312e9 631 my $r = $tmax / $ttot - 1; # Linear approximation.
bba8fca5 632 $n = int( $r * $ntot );
6ee623d5 633 $n = $nmin if $n < $nmin;
6ee623d5 634 }
635
636 return bless [ $rtot, $utot, $stot, $cutot, $cstot, $ntot ];
637}
638
a0d0e21e 639# --- Functions implementing high-level time-then-print utilities
640
6ee623d5 641sub n_to_for {
642 my $n = shift;
643 return $n == 0 ? $default_for : $n < 0 ? -$n : undef;
644}
645
a0d0e21e 646sub timethis{
647 my($n, $code, $title, $style) = @_;
6ee623d5 648 my($t, $for, $forn);
649
650 if ( $n > 0 ) {
651 croak "non-integer loopcount $n, stopped" if int($n)<$n;
652 $t = timeit($n, $code);
653 $title = "timethis $n" unless defined $title;
654 } else {
655 $fort = n_to_for( $n );
431d98c2 656 $t = countit( $fort, $code );
6ee623d5 657 $title = "timethis for $fort" unless defined $title;
658 $forn = $t->[-1];
659 }
523cc92b 660 local $| = 1;
523cc92b 661 $style = "" unless defined $style;
3c6312e9 662 printf("%10s: ", $title) unless $style eq 'none';
663 print timestr($t, $style, $defaultfmt),"\n" unless $style eq 'none';
6ee623d5 664
665 $n = $forn if defined $forn;
523cc92b 666
a0d0e21e 667 # A conservative warning to spot very silly tests.
668 # Don't assume that your benchmark is ok simply because
669 # you don't get this warning!
670 print " (warning: too few iterations for a reliable count)\n"
523cc92b 671 if $n < $min_count
a0d0e21e 672 || ($t->real < 1 && $n < 1000)
523cc92b 673 || $t->cpu_a < $min_cpu;
a0d0e21e 674 $t;
675}
676
a0d0e21e 677sub timethese{
678 my($n, $alt, $style) = @_;
679 die "usage: timethese(count, { 'Name1'=>'code1', ... }\n"
680 unless ref $alt eq HASH;
523cc92b 681 my @names = sort keys %$alt;
682 $style = "" unless defined $style;
3c6312e9 683 print "Benchmark: " unless $style eq 'none';
6ee623d5 684 if ( $n > 0 ) {
685 croak "non-integer loopcount $n, stopped" if int($n)<$n;
3c6312e9 686 print "timing $n iterations of" unless $style eq 'none';
6ee623d5 687 } else {
3c6312e9 688 print "running" unless $style eq 'none';
6ee623d5 689 }
3c6312e9 690 print " ", join(', ',@names) unless $style eq 'none';
6ee623d5 691 unless ( $n > 0 ) {
692 my $for = n_to_for( $n );
df7779cf 693 print ", each" if $n > 1 && $style ne 'none';
694 print " for at least $for CPU seconds" unless $style eq 'none';
6ee623d5 695 }
3c6312e9 696 print "...\n" unless $style eq 'none';
523cc92b 697
698 # we could save the results in an array and produce a summary here
a0d0e21e 699 # sum, min, max, avg etc etc
3c6312e9 700 my %results;
4dbb2df9 701 foreach my $name (@names) {
3c6312e9 702 $results{$name} = timethis ($n, $alt -> {$name}, $name, $style);
4dbb2df9 703 }
3c6312e9 704
705 return \%results;
a0d0e21e 706}
707
3c6312e9 708sub cmpthese{
d1083c7a 709 my ($results, $style) = ref $_[0] ? @_ : ( timethese( @_[0,1] ), $_[2] ) ;
3c6312e9 710
d1083c7a 711 $style = "" unless defined $style;
3c6312e9 712
713 # Flatten in to an array of arrays with the name as the first field
714 my @vals = map{ [ $_, @{$results->{$_}} ] } keys %$results;
715
716 for (@vals) {
717 # The epsilon fudge here is to prevent div by 0. Since clock
718 # resolutions are much larger, it's below the noise floor.
719 my $rate = $_->[6] / ( $_->[2] + $_->[3] + 0.000000000000001 );
720 $_->[7] = $rate;
721 }
722
723 # Sort by rate
724 @vals = sort { $a->[7] <=> $b->[7] } @vals;
725
726 # If more than half of the rates are greater than one...
727 my $display_as_rate = $vals[$#vals>>1]->[7] > 1;
728
729 my @rows;
730 my @col_widths;
731
732 my @top_row = (
733 '',
734 $display_as_rate ? 'Rate' : 's/iter',
735 map { $_->[0] } @vals
736 );
737
738 push @rows, \@top_row;
739 @col_widths = map { length( $_ ) } @top_row;
740
741 # Build the data rows
742 # We leave the last column in even though it never has any data. Perhaps
743 # it should go away. Also, perhaps a style for a single column of
744 # percentages might be nice.
745 for my $row_val ( @vals ) {
746 my @row;
747
748 # Column 0 = test name
749 push @row, $row_val->[0];
750 $col_widths[0] = length( $row_val->[0] )
751 if length( $row_val->[0] ) > $col_widths[0];
752
753 # Column 1 = performance
754 my $row_rate = $row_val->[7];
755
756 # We assume that we'll never get a 0 rate.
757 my $a = $display_as_rate ? $row_rate : 1 / $row_rate;
758
759 # Only give a few decimal places before switching to sci. notation,
760 # since the results aren't usually that accurate anyway.
761 my $format =
762 $a >= 100 ?
763 "%0.0f" :
764 $a >= 10 ?
765 "%0.1f" :
766 $a >= 1 ?
767 "%0.2f" :
768 $a >= 0.1 ?
769 "%0.3f" :
770 "%0.2e";
771
772 $format .= "/s"
773 if $display_as_rate;
774 # Using $b here due to optimizing bug in _58 through _61
775 my $b = sprintf( $format, $a );
776 push @row, $b;
777 $col_widths[1] = length( $b )
778 if length( $b ) > $col_widths[1];
779
780 # Columns 2..N = performance ratios
781 my $skip_rest = 0;
782 for ( my $col_num = 0 ; $col_num < @vals ; ++$col_num ) {
783 my $col_val = $vals[$col_num];
784 my $out;
785 if ( $skip_rest ) {
786 $out = '';
787 }
788 elsif ( $col_val->[0] eq $row_val->[0] ) {
789 $out = "--";
790 # $skip_rest = 1;
791 }
792 else {
793 my $col_rate = $col_val->[7];
794 $out = sprintf( "%.0f%%", 100*$row_rate/$col_rate - 100 );
795 }
796 push @row, $out;
797 $col_widths[$col_num+2] = length( $out )
798 if length( $out ) > $col_widths[$col_num+2];
799
800 # A little wierdness to set the first column width properly
801 $col_widths[$col_num+2] = length( $col_val->[0] )
802 if length( $col_val->[0] ) > $col_widths[$col_num+2];
803 }
804 push @rows, \@row;
805 }
806
d1083c7a 807 return \@rows if $style eq "none";
808
3c6312e9 809 # Equalize column widths in the chart as much as possible without
810 # exceeding 80 characters. This does not use or affect cols 0 or 1.
811 my @sorted_width_refs =
812 sort { $$a <=> $$b } map { \$_ } @col_widths[2..$#col_widths];
813 my $max_width = ${$sorted_width_refs[-1]};
814
277427cf 815 my $total = @col_widths - 1 ;
3c6312e9 816 for ( @col_widths ) { $total += $_ }
817
818 STRETCHER:
819 while ( $total < 80 ) {
820 my $min_width = ${$sorted_width_refs[0]};
821 last
822 if $min_width == $max_width;
823 for ( @sorted_width_refs ) {
824 last
825 if $$_ > $min_width;
826 ++$$_;
827 ++$total;
828 last STRETCHER
829 if $total >= 80;
830 }
831 }
832
833 # Dump the output
834 my $format = join( ' ', map { "%${_}s" } @col_widths ) . "\n";
835 substr( $format, 1, 0 ) = '-';
836 for ( @rows ) {
837 printf $format, @$_;
838 }
839
d1083c7a 840 return \@rows ;
3c6312e9 841}
842
843
a0d0e21e 8441;