I don't think trying to bracket the hires time with lores
[p5sagit/p5-mst-13.2.git] / lib / Benchmark.pm
CommitLineData
a0d0e21e 1package Benchmark;
2
f06db76b 3=head1 NAME
4
8a4f6ac2 5Benchmark - benchmark running times of Perl code
431d98c2 6
f06db76b 7=head1 SYNOPSIS
8
9 timethis ($count, "code");
10
523cc92b 11 # Use Perl code in strings...
f06db76b 12 timethese($count, {
13 'Name1' => '...code1...',
14 'Name2' => '...code2...',
15 });
16
523cc92b 17 # ... or use subroutine references.
18 timethese($count, {
19 'Name1' => sub { ...code1... },
20 'Name2' => sub { ...code2... },
21 });
22
431d98c2 23 # cmpthese can be used both ways as well
24 cmpthese($count, {
25 'Name1' => '...code1...',
26 'Name2' => '...code2...',
27 });
28
29 cmpthese($count, {
30 'Name1' => sub { ...code1... },
31 'Name2' => sub { ...code2... },
32 });
33
34 # ...or in two stages
35 $results = timethese($count,
36 {
37 'Name1' => sub { ...code1... },
38 'Name2' => sub { ...code2... },
39 },
40 'none'
41 );
42 cmpthese( $results ) ;
43
f06db76b 44 $t = timeit($count, '...other code...')
45 print "$count loops of other code took:",timestr($t),"\n";
46
431d98c2 47 $t = countit($time, '...other code...')
48 $count = $t->iters ;
49 print "$count loops of other code took:",timestr($t),"\n";
50
f06db76b 51=head1 DESCRIPTION
52
53The Benchmark module encapsulates a number of routines to help you
54figure out how long it takes to execute some code.
55
8a4f6ac2 56timethis - run a chunk of code several times
57
58timethese - run several chunks of code several times
59
60cmpthese - print results of timethese as a comparison chart
61
62timeit - run a chunk of code and see how long it goes
63
64countit - see how many times a chunk of code runs in a given time
65
66
f06db76b 67=head2 Methods
68
69=over 10
70
71=item new
72
73Returns the current time. Example:
74
75 use Benchmark;
76 $t0 = new Benchmark;
77 # ... your code here ...
78 $t1 = new Benchmark;
79 $td = timediff($t1, $t0);
a24a9dfe 80 print "the code took:",timestr($td),"\n";
f06db76b 81
82=item debug
83
84Enables or disable debugging by setting the C<$Benchmark::Debug> flag:
85
523cc92b 86 debug Benchmark 1;
f06db76b 87 $t = timeit(10, ' 5 ** $Global ');
523cc92b 88 debug Benchmark 0;
f06db76b 89
431d98c2 90=item iters
91
92Returns the number of iterations.
93
f06db76b 94=back
95
96=head2 Standard Exports
97
523cc92b 98The following routines will be exported into your namespace
f06db76b 99if you use the Benchmark module:
100
101=over 10
102
103=item timeit(COUNT, CODE)
104
523cc92b 105Arguments: COUNT is the number of times to run the loop, and CODE is
106the code to run. CODE may be either a code reference or a string to
107be eval'd; either way it will be run in the caller's package.
108
109Returns: a Benchmark object.
110
111=item timethis ( COUNT, CODE, [ TITLE, [ STYLE ]] )
112
113Time COUNT iterations of CODE. CODE may be a string to eval or a
114code reference; either way the CODE will run in the caller's package.
115Results will be printed to STDOUT as TITLE followed by the times.
116TITLE defaults to "timethis COUNT" if none is provided. STYLE
117determines the format of the output, as described for timestr() below.
118
6ee623d5 119The COUNT can be zero or negative: this means the I<minimum number of
120CPU seconds> to run. A zero signifies the default of 3 seconds. For
121example to run at least for 10 seconds:
122
123 timethis(-10, $code)
124
125or to run two pieces of code tests for at least 3 seconds:
126
127 timethese(0, { test1 => '...', test2 => '...'})
128
129CPU seconds is, in UNIX terms, the user time plus the system time of
130the process itself, as opposed to the real (wallclock) time and the
131time spent by the child processes. Less than 0.1 seconds is not
132accepted (-0.01 as the count, for example, will cause a fatal runtime
133exception).
134
135Note that the CPU seconds is the B<minimum> time: CPU scheduling and
136other operating system factors may complicate the attempt so that a
137little bit more time is spent. The benchmark output will, however,
138also tell the number of C<$code> runs/second, which should be a more
139interesting number than the actually spent seconds.
140
141Returns a Benchmark object.
142
523cc92b 143=item timethese ( COUNT, CODEHASHREF, [ STYLE ] )
f06db76b 144
523cc92b 145The CODEHASHREF is a reference to a hash containing names as keys
146and either a string to eval or a code reference for each value.
147For each (KEY, VALUE) pair in the CODEHASHREF, this routine will
148call
f06db76b 149
523cc92b 150 timethis(COUNT, VALUE, KEY, STYLE)
f06db76b 151
1d2dff63 152The routines are called in string comparison order of KEY.
153
154The COUNT can be zero or negative, see timethis().
6ee623d5 155
3c6312e9 156Returns a hash of Benchmark objects, keyed by name.
157
523cc92b 158=item timediff ( T1, T2 )
f06db76b 159
523cc92b 160Returns the difference between two Benchmark times as a Benchmark
161object suitable for passing to timestr().
f06db76b 162
6ee623d5 163=item timestr ( TIMEDIFF, [ STYLE, [ FORMAT ] ] )
f06db76b 164
523cc92b 165Returns a string that formats the times in the TIMEDIFF object in
166the requested STYLE. TIMEDIFF is expected to be a Benchmark object
167similar to that returned by timediff().
168
3c6312e9 169STYLE can be any of 'all', 'none', 'noc', 'nop' or 'auto'. 'all' shows
170each of the 5 times available ('wallclock' time, user time, system time,
523cc92b 171user time of children, and system time of children). 'noc' shows all
172except the two children times. 'nop' shows only wallclock and the
173two children times. 'auto' (the default) will act as 'all' unless
174the children times are both zero, in which case it acts as 'noc'.
3c6312e9 175'none' prevents output.
523cc92b 176
177FORMAT is the L<printf(3)>-style format specifier (without the
178leading '%') to use to print the times. It defaults to '5.2f'.
f06db76b 179
180=back
181
182=head2 Optional Exports
183
184The following routines will be exported into your namespace
185if you specifically ask that they be imported:
186
187=over 10
188
523cc92b 189=item clearcache ( COUNT )
190
191Clear the cached time for COUNT rounds of the null loop.
192
193=item clearallcache ( )
f06db76b 194
523cc92b 195Clear all cached times.
f06db76b 196
ac8eabc1 197=item cmpthese ( COUT, CODEHASHREF, [ STYLE ] )
198
d1083c7a 199=item cmpthese ( RESULTSHASHREF, [ STYLE ] )
ac8eabc1 200
d1083c7a 201Optionally calls timethese(), then outputs comparison chart. This:
ac8eabc1 202
d1083c7a 203 cmpthese( -1, { a => "++\$i", b => "\$i *= 2" } ) ;
204
205outputs a chart like:
206
207 Rate b a
208 b 2831802/s -- -61%
209 a 7208959/s 155% --
210
211This chart is sorted from slowest to fastest, and shows the percent speed
212difference between each pair of tests.
213
214c<cmpthese> can also be passed the data structure that timethese() returns:
215
216 $results = timethese( -1, { a => "++\$i", b => "\$i *= 2" } ) ;
ac8eabc1 217 cmpthese( $results );
218
d1083c7a 219in case you want to see both sets of results.
220
221Returns a reference to an ARRAY of rows, each row is an ARRAY of cells from the
222above chart, including labels. This:
223
224 my $rows = cmpthese( -1, { a => '++$i', b => '$i *= 2' }, "none" );
225
226returns a data structure like:
227
228 [
229 [ '', 'Rate', 'b', 'a' ],
230 [ 'b', '2885232/s', '--', '-59%' ],
231 [ 'a', '7099126/s', '146%', '--' ],
232 ]
233
234B<NOTE>: This result value differs from previous versions, which returned
235the C<timethese()> result structure. If you want that, just use the two
236statement C<timethese>...C<cmpthese> idiom shown above.
237
238Incidently, note the variance in the result values between the two examples;
239this is typical of benchmarking. If this were a real benchmark, you would
240probably want to run a lot more iterations.
ac8eabc1 241
242=item countit(TIME, CODE)
243
244Arguments: TIME is the minimum length of time to run CODE for, and CODE is
245the code to run. CODE may be either a code reference or a string to
246be eval'd; either way it will be run in the caller's package.
247
248TIME is I<not> negative. countit() will run the loop many times to
249calculate the speed of CODE before running it for TIME. The actual
250time run for will usually be greater than TIME due to system clock
251resolution, so it's best to look at the number of iterations divided
252by the times that you are concerned with, not just the iterations.
253
254Returns: a Benchmark object.
255
523cc92b 256=item disablecache ( )
f06db76b 257
523cc92b 258Disable caching of timings for the null loop. This will force Benchmark
259to recalculate these timings for each new piece of code timed.
260
261=item enablecache ( )
262
263Enable caching of timings for the null loop. The time taken for COUNT
264rounds of the null loop will be calculated only once for each
265different COUNT used.
f06db76b 266
ac8eabc1 267=item timesum ( T1, T2 )
268
269Returns the sum of two Benchmark times as a Benchmark object suitable
270for passing to timestr().
271
f06db76b 272=back
273
274=head1 NOTES
275
276The data is stored as a list of values from the time and times
523cc92b 277functions:
f06db76b 278
431d98c2 279 ($real, $user, $system, $children_user, $children_system, $iters)
f06db76b 280
281in seconds for the whole loop (not divided by the number of rounds).
282
283The timing is done using time(3) and times(3).
284
285Code is executed in the caller's package.
286
f06db76b 287The time of the null loop (a loop with the same
288number of rounds but empty loop body) is subtracted
289from the time of the real loop.
290
3c6312e9 291The null loop times can be cached, the key being the
f06db76b 292number of rounds. The caching can be controlled using
293calls like these:
294
523cc92b 295 clearcache($key);
f06db76b 296 clearallcache();
297
523cc92b 298 disablecache();
f06db76b 299 enablecache();
300
3c6312e9 301Caching is off by default, as it can (usually slightly) decrease
302accuracy and does not usually noticably affect runtimes.
303
54e82ce5 304=head1 EXAMPLES
305
306For example,
307
14393033 308 use Benchmark qw( cmpthese ) ;
309 $x = 3;
310 cmpthese( -5, {
311 a => sub{$x*$x},
312 b => sub{$x**2},
313 } );
54e82ce5 314
315outputs something like this:
316
317 Benchmark: running a, b, each for at least 5 CPU seconds...
14393033 318 Rate b a
319 b 1559428/s -- -62%
320 a 4152037/s 166% --
321
54e82ce5 322
323while
324
14393033 325 use Benchmark qw( timethese cmpthese ) ;
326 $x = 3;
327 $r = timethese( -5, {
328 a => sub{$x*$x},
329 b => sub{$x**2},
330 } );
331 cmpthese $r;
54e82ce5 332
333outputs something like this:
334
14393033 335 Benchmark: running a, b, each for at least 5 CPU seconds...
336 a: 10 wallclock secs ( 5.14 usr + 0.13 sys = 5.27 CPU) @ 3835055.60/s (n=20210743)
337 b: 5 wallclock secs ( 5.41 usr + 0.00 sys = 5.41 CPU) @ 1574944.92/s (n=8520452)
338 Rate b a
339 b 1574945/s -- -59%
340 a 3835056/s 144% --
54e82ce5 341
342
f06db76b 343=head1 INHERITANCE
344
345Benchmark inherits from no other class, except of course
346for Exporter.
347
348=head1 CAVEATS
349
80eab818 350Comparing eval'd strings with code references will give you
431d98c2 351inaccurate results: a code reference will show a slightly slower
80eab818 352execution time than the equivalent eval'd string.
353
f06db76b 354The real time timing is done using time(2) and
355the granularity is therefore only one second.
356
357Short tests may produce negative figures because perl
523cc92b 358can appear to take longer to execute the empty loop
359than a short test; try:
f06db76b 360
361 timethis(100,'1');
362
363The system time of the null loop might be slightly
364more than the system time of the loop with the actual
a24a9dfe 365code and therefore the difference might end up being E<lt> 0.
f06db76b 366
8a4f6ac2 367=head1 SEE ALSO
368
369L<Devel::DProf> - a Perl code profiler
370
f06db76b 371=head1 AUTHORS
372
5aabfad6 373Jarkko Hietaniemi <F<jhi@iki.fi>>, Tim Bunce <F<Tim.Bunce@ig.co.uk>>
f06db76b 374
375=head1 MODIFICATION HISTORY
376
377September 8th, 1994; by Tim Bunce.
378
523cc92b 379March 28th, 1997; by Hugo van der Sanden: added support for code
380references and the already documented 'debug' method; revamped
381documentation.
f06db76b 382
6ee623d5 383April 04-07th, 1997: by Jarkko Hietaniemi, added the run-for-some-time
384functionality.
385
3c6312e9 386September, 1999; by Barrie Slaymaker: math fixes and accuracy and
387efficiency tweaks. Added cmpthese(). A result is now returned from
431d98c2 388timethese(). Exposed countit() (was runfor()).
3c6312e9 389
523cc92b 390=cut
a0d0e21e 391
3f943bd9 392# evaluate something in a clean lexical environment
393sub _doeval { eval shift }
394
395#
396# put any lexicals at file scope AFTER here
397#
398
4aa0a1f7 399use Carp;
a0d0e21e 400use Exporter;
401@ISA=(Exporter);
ac8eabc1 402@EXPORT=qw(timeit timethis timethese timediff timestr);
403@EXPORT_OK=qw(timesum cmpthese countit
404 clearcache clearallcache disablecache enablecache);
a0d0e21e 405
df7779cf 406$VERSION = 1.02;
8a4f6ac2 407
a0d0e21e 408&init;
409
410sub init {
411 $debug = 0;
412 $min_count = 4;
413 $min_cpu = 0.4;
414 $defaultfmt = '5.2f';
415 $defaultstyle = 'auto';
416 # The cache can cause a slight loss of sys time accuracy. If a
417 # user does many tests (>10) with *very* large counts (>10000)
418 # or works on a very slow machine the cache may be useful.
419 &disablecache;
420 &clearallcache;
421}
422
523cc92b 423sub debug { $debug = ($_[1] != 0); }
424
bba8fca5 425# The cache needs two branches: 's' for strings and 'c' for code. The
426# emtpy loop is different in these two cases.
427sub clearcache { delete $cache{"$_[0]c"}; delete $cache{"$_[0]s"}; }
a0d0e21e 428sub clearallcache { %cache = (); }
429sub enablecache { $cache = 1; }
430sub disablecache { $cache = 0; }
431
a0d0e21e 432# --- Functions to process the 'time' data type
433
6ee623d5 434sub new { my @t = (time, times, @_ == 2 ? $_[1] : 0);
435 print "new=@t\n" if $debug;
436 bless \@t; }
a0d0e21e 437
438sub cpu_p { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $pu+$ps ; }
439sub cpu_c { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $cu+$cs ; }
440sub cpu_a { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $pu+$ps+$cu+$cs ; }
441sub real { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $r ; }
431d98c2 442sub iters { $_[0]->[5] ; }
a0d0e21e 443
523cc92b 444sub timediff {
a0d0e21e 445 my($a, $b) = @_;
523cc92b 446 my @r;
3f943bd9 447 for (my $i=0; $i < @$a; ++$i) {
a0d0e21e 448 push(@r, $a->[$i] - $b->[$i]);
449 }
450 bless \@r;
451}
452
705cc255 453sub timesum {
454 my($a, $b) = @_;
455 my @r;
456 for (my $i=0; $i < @$a; ++$i) {
457 push(@r, $a->[$i] + $b->[$i]);
458 }
459 bless \@r;
460}
461
523cc92b 462sub timestr {
a0d0e21e 463 my($tr, $style, $f) = @_;
523cc92b 464 my @t = @$tr;
6ee623d5 465 warn "bad time value (@t)" unless @t==6;
466 my($r, $pu, $ps, $cu, $cs, $n) = @t;
ce9550df 467 my($pt, $ct, $tt) = ($tr->cpu_p, $tr->cpu_c, $tr->cpu_a);
523cc92b 468 $f = $defaultfmt unless defined $f;
a0d0e21e 469 # format a time in the required style, other formats may be added here
80eab818 470 $style ||= $defaultstyle;
523cc92b 471 $style = ($ct>0) ? 'all' : 'noc' if $style eq 'auto';
472 my $s = "@t $style"; # default for unknown style
7be077a2 473 $s=sprintf("%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
ce9550df 474 $r,$pu,$ps,$cu,$cs,$tt) if $style eq 'all';
7be077a2 475 $s=sprintf("%2d wallclock secs (%$f usr + %$f sys = %$f CPU)",
476 $r,$pu,$ps,$pt) if $style eq 'noc';
477 $s=sprintf("%2d wallclock secs (%$f cusr + %$f csys = %$f CPU)",
478 $r,$cu,$cs,$ct) if $style eq 'nop';
cc31225e 479 $s .= sprintf(" @ %$f/s (n=$n)", $n / ( $pu + $ps )) if $n && $pu+$ps;
a0d0e21e 480 $s;
481}
523cc92b 482
483sub timedebug {
a0d0e21e 484 my($msg, $t) = @_;
523cc92b 485 print STDERR "$msg",timestr($t),"\n" if $debug;
a0d0e21e 486}
487
a0d0e21e 488# --- Functions implementing low-level support for timing loops
489
490sub runloop {
491 my($n, $c) = @_;
4aa0a1f7 492
493 $n+=0; # force numeric now, so garbage won't creep into the eval
523cc92b 494 croak "negative loopcount $n" if $n<0;
495 confess "Usage: runloop(number, [string | coderef])" unless defined $c;
a0d0e21e 496 my($t0, $t1, $td); # before, after, difference
497
498 # find package of caller so we can execute code there
523cc92b 499 my($curpack) = caller(0);
500 my($i, $pack)= 0;
a0d0e21e 501 while (($pack) = caller(++$i)) {
502 last if $pack ne $curpack;
503 }
504
3f943bd9 505 my ($subcode, $subref);
506 if (ref $c eq 'CODE') {
507 $subcode = "sub { for (1 .. $n) { local \$_; package $pack; &\$c; } }";
508 $subref = eval $subcode;
509 }
510 else {
511 $subcode = "sub { for (1 .. $n) { local \$_; package $pack; $c;} }";
512 $subref = _doeval($subcode);
513 }
4aa0a1f7 514 croak "runloop unable to compile '$c': $@\ncode: $subcode\n" if $@;
523cc92b 515 print STDERR "runloop $n '$subcode'\n" if $debug;
a0d0e21e 516
3c6312e9 517 # Wait for the user timer to tick. This makes the error range more like
518 # -0.01, +0. If we don't wait, then it's more like -0.01, +0.01. This
519 # may not seem important, but it significantly reduces the chances of
520 # getting a too low initial $n in the initial, 'find the minimum' loop
431d98c2 521 # in &countit. This, in turn, can reduce the number of calls to
bba8fca5 522 # &runloop a lot, and thus reduce additive errors.
523 my $tbase = Benchmark->new(0)->[1];
277427cf 524 while ( ( $t0 = Benchmark->new(0) )->[1] == $tbase ) {} ;
a0d0e21e 525 &$subref;
6ee623d5 526 $t1 = Benchmark->new($n);
a0d0e21e 527 $td = &timediff($t1, $t0);
a0d0e21e 528 timedebug("runloop:",$td);
529 $td;
530}
531
532
533sub timeit {
534 my($n, $code) = @_;
535 my($wn, $wc, $wd);
536
537 printf STDERR "timeit $n $code\n" if $debug;
3c6312e9 538 my $cache_key = $n . ( ref( $code ) ? 'c' : 's' );
bba8fca5 539 if ($cache && exists $cache{$cache_key} ) {
540 $wn = $cache{$cache_key};
523cc92b 541 } else {
bba8fca5 542 $wn = &runloop($n, ref( $code ) ? sub { undef } : '' );
3c6312e9 543 # Can't let our baseline have any iterations, or they get subtracted
544 # out of the result.
545 $wn->[5] = 0;
bba8fca5 546 $cache{$cache_key} = $wn;
a0d0e21e 547 }
548
549 $wc = &runloop($n, $code);
550
551 $wd = timediff($wc, $wn);
a0d0e21e 552 timedebug("timeit: ",$wc);
553 timedebug(" - ",$wn);
554 timedebug(" = ",$wd);
555
556 $wd;
557}
558
6ee623d5 559
560my $default_for = 3;
561my $min_for = 0.1;
562
3c6312e9 563
431d98c2 564sub countit {
565 my ( $tmax, $code ) = @_;
6ee623d5 566
567 if ( not defined $tmax or $tmax == 0 ) {
568 $tmax = $default_for;
569 } elsif ( $tmax < 0 ) {
570 $tmax = -$tmax;
571 }
572
431d98c2 573 die "countit($tmax, ...): timelimit cannot be less than $min_for.\n"
6ee623d5 574 if $tmax < $min_for;
575
3c6312e9 576 my ($n, $tc);
6ee623d5 577
bba8fca5 578 # First find the minimum $n that gives a significant timing.
3c6312e9 579 for ($n = 1; ; $n *= 2 ) {
580 my $td = timeit($n, $code);
581 $tc = $td->[1] + $td->[2];
582 last if $tc > 0.1;
583 }
6ee623d5 584
3c6312e9 585 my $nmin = $n;
586
587 # Get $n high enough that we can guess the final $n with some accuracy.
588 my $tpra = 0.1 * $tmax; # Target/time practice.
589 while ( $tc < $tpra ) {
590 # The 5% fudge is to keep us from iterating again all
591 # that often (this speeds overall responsiveness when $tmax is big
592 # and we guess a little low). This does not noticably affect
593 # accuracy since we're not couting these times.
594 $n = int( $tpra * 1.05 * $n / $tc ); # Linear approximation.
595 my $td = timeit($n, $code);
c5d57293 596 my $new_tc = $td->[1] + $td->[2];
597 # Make sure we are making progress.
598 $tc = $new_tc > 1.2 * $tc ? $new_tc : 1.2 * $tc;
6ee623d5 599 }
600
3c6312e9 601 # Now, do the 'for real' timing(s), repeating until we exceed
602 # the max.
603 my $ntot = 0;
604 my $rtot = 0;
605 my $utot = 0.0;
606 my $stot = 0.0;
607 my $cutot = 0.0;
608 my $cstot = 0.0;
609 my $ttot = 0.0;
610
611 # The 5% fudge is because $n is often a few % low even for routines
612 # with stable times and avoiding extra timeit()s is nice for
613 # accuracy's sake.
614 $n = int( $n * ( 1.05 * $tmax / $tc ) );
615
616 while () {
617 my $td = timeit($n, $code);
618 $ntot += $n;
619 $rtot += $td->[0];
620 $utot += $td->[1];
621 $stot += $td->[2];
6ee623d5 622 $cutot += $td->[3];
623 $cstot += $td->[4];
3c6312e9 624 $ttot = $utot + $stot;
625 last if $ttot >= $tmax;
6ee623d5 626
c5d57293 627 $ttot = 0.01 if $ttot < 0.01;
3c6312e9 628 my $r = $tmax / $ttot - 1; # Linear approximation.
bba8fca5 629 $n = int( $r * $ntot );
6ee623d5 630 $n = $nmin if $n < $nmin;
6ee623d5 631 }
632
633 return bless [ $rtot, $utot, $stot, $cutot, $cstot, $ntot ];
634}
635
a0d0e21e 636# --- Functions implementing high-level time-then-print utilities
637
6ee623d5 638sub n_to_for {
639 my $n = shift;
640 return $n == 0 ? $default_for : $n < 0 ? -$n : undef;
641}
642
a0d0e21e 643sub timethis{
644 my($n, $code, $title, $style) = @_;
6ee623d5 645 my($t, $for, $forn);
646
647 if ( $n > 0 ) {
648 croak "non-integer loopcount $n, stopped" if int($n)<$n;
649 $t = timeit($n, $code);
650 $title = "timethis $n" unless defined $title;
651 } else {
652 $fort = n_to_for( $n );
431d98c2 653 $t = countit( $fort, $code );
6ee623d5 654 $title = "timethis for $fort" unless defined $title;
655 $forn = $t->[-1];
656 }
523cc92b 657 local $| = 1;
523cc92b 658 $style = "" unless defined $style;
3c6312e9 659 printf("%10s: ", $title) unless $style eq 'none';
660 print timestr($t, $style, $defaultfmt),"\n" unless $style eq 'none';
6ee623d5 661
662 $n = $forn if defined $forn;
523cc92b 663
a0d0e21e 664 # A conservative warning to spot very silly tests.
665 # Don't assume that your benchmark is ok simply because
666 # you don't get this warning!
667 print " (warning: too few iterations for a reliable count)\n"
523cc92b 668 if $n < $min_count
a0d0e21e 669 || ($t->real < 1 && $n < 1000)
523cc92b 670 || $t->cpu_a < $min_cpu;
a0d0e21e 671 $t;
672}
673
a0d0e21e 674sub timethese{
675 my($n, $alt, $style) = @_;
676 die "usage: timethese(count, { 'Name1'=>'code1', ... }\n"
677 unless ref $alt eq HASH;
523cc92b 678 my @names = sort keys %$alt;
679 $style = "" unless defined $style;
3c6312e9 680 print "Benchmark: " unless $style eq 'none';
6ee623d5 681 if ( $n > 0 ) {
682 croak "non-integer loopcount $n, stopped" if int($n)<$n;
3c6312e9 683 print "timing $n iterations of" unless $style eq 'none';
6ee623d5 684 } else {
3c6312e9 685 print "running" unless $style eq 'none';
6ee623d5 686 }
3c6312e9 687 print " ", join(', ',@names) unless $style eq 'none';
6ee623d5 688 unless ( $n > 0 ) {
689 my $for = n_to_for( $n );
df7779cf 690 print ", each" if $n > 1 && $style ne 'none';
691 print " for at least $for CPU seconds" unless $style eq 'none';
6ee623d5 692 }
3c6312e9 693 print "...\n" unless $style eq 'none';
523cc92b 694
695 # we could save the results in an array and produce a summary here
a0d0e21e 696 # sum, min, max, avg etc etc
3c6312e9 697 my %results;
4dbb2df9 698 foreach my $name (@names) {
3c6312e9 699 $results{$name} = timethis ($n, $alt -> {$name}, $name, $style);
4dbb2df9 700 }
3c6312e9 701
702 return \%results;
a0d0e21e 703}
704
3c6312e9 705sub cmpthese{
d1083c7a 706 my ($results, $style) = ref $_[0] ? @_ : ( timethese( @_[0,1] ), $_[2] ) ;
3c6312e9 707
d1083c7a 708 $style = "" unless defined $style;
3c6312e9 709
710 # Flatten in to an array of arrays with the name as the first field
711 my @vals = map{ [ $_, @{$results->{$_}} ] } keys %$results;
712
713 for (@vals) {
714 # The epsilon fudge here is to prevent div by 0. Since clock
715 # resolutions are much larger, it's below the noise floor.
716 my $rate = $_->[6] / ( $_->[2] + $_->[3] + 0.000000000000001 );
717 $_->[7] = $rate;
718 }
719
720 # Sort by rate
721 @vals = sort { $a->[7] <=> $b->[7] } @vals;
722
723 # If more than half of the rates are greater than one...
724 my $display_as_rate = $vals[$#vals>>1]->[7] > 1;
725
726 my @rows;
727 my @col_widths;
728
729 my @top_row = (
730 '',
731 $display_as_rate ? 'Rate' : 's/iter',
732 map { $_->[0] } @vals
733 );
734
735 push @rows, \@top_row;
736 @col_widths = map { length( $_ ) } @top_row;
737
738 # Build the data rows
739 # We leave the last column in even though it never has any data. Perhaps
740 # it should go away. Also, perhaps a style for a single column of
741 # percentages might be nice.
742 for my $row_val ( @vals ) {
743 my @row;
744
745 # Column 0 = test name
746 push @row, $row_val->[0];
747 $col_widths[0] = length( $row_val->[0] )
748 if length( $row_val->[0] ) > $col_widths[0];
749
750 # Column 1 = performance
751 my $row_rate = $row_val->[7];
752
753 # We assume that we'll never get a 0 rate.
754 my $a = $display_as_rate ? $row_rate : 1 / $row_rate;
755
756 # Only give a few decimal places before switching to sci. notation,
757 # since the results aren't usually that accurate anyway.
758 my $format =
759 $a >= 100 ?
760 "%0.0f" :
761 $a >= 10 ?
762 "%0.1f" :
763 $a >= 1 ?
764 "%0.2f" :
765 $a >= 0.1 ?
766 "%0.3f" :
767 "%0.2e";
768
769 $format .= "/s"
770 if $display_as_rate;
771 # Using $b here due to optimizing bug in _58 through _61
772 my $b = sprintf( $format, $a );
773 push @row, $b;
774 $col_widths[1] = length( $b )
775 if length( $b ) > $col_widths[1];
776
777 # Columns 2..N = performance ratios
778 my $skip_rest = 0;
779 for ( my $col_num = 0 ; $col_num < @vals ; ++$col_num ) {
780 my $col_val = $vals[$col_num];
781 my $out;
782 if ( $skip_rest ) {
783 $out = '';
784 }
785 elsif ( $col_val->[0] eq $row_val->[0] ) {
786 $out = "--";
787 # $skip_rest = 1;
788 }
789 else {
790 my $col_rate = $col_val->[7];
791 $out = sprintf( "%.0f%%", 100*$row_rate/$col_rate - 100 );
792 }
793 push @row, $out;
794 $col_widths[$col_num+2] = length( $out )
795 if length( $out ) > $col_widths[$col_num+2];
796
797 # A little wierdness to set the first column width properly
798 $col_widths[$col_num+2] = length( $col_val->[0] )
799 if length( $col_val->[0] ) > $col_widths[$col_num+2];
800 }
801 push @rows, \@row;
802 }
803
d1083c7a 804 return \@rows if $style eq "none";
805
3c6312e9 806 # Equalize column widths in the chart as much as possible without
807 # exceeding 80 characters. This does not use or affect cols 0 or 1.
808 my @sorted_width_refs =
809 sort { $$a <=> $$b } map { \$_ } @col_widths[2..$#col_widths];
810 my $max_width = ${$sorted_width_refs[-1]};
811
277427cf 812 my $total = @col_widths - 1 ;
3c6312e9 813 for ( @col_widths ) { $total += $_ }
814
815 STRETCHER:
816 while ( $total < 80 ) {
817 my $min_width = ${$sorted_width_refs[0]};
818 last
819 if $min_width == $max_width;
820 for ( @sorted_width_refs ) {
821 last
822 if $$_ > $min_width;
823 ++$$_;
824 ++$total;
825 last STRETCHER
826 if $total >= 80;
827 }
828 }
829
830 # Dump the output
831 my $format = join( ' ', map { "%${_}s" } @col_widths ) . "\n";
832 substr( $format, 1, 0 ) = '-';
833 for ( @rows ) {
834 printf $format, @$_;
835 }
836
d1083c7a 837 return \@rows ;
3c6312e9 838}
839
840
a0d0e21e 8411;