lib/Benchmark.pm

   1 package Benchmark;
   2
   3 =head1 NAME
   4
   5 Benchmark - benchmark running times of Perl code
   6
   7 =head1 SYNOPSIS
   8
   9     use Benchmark qw(:all) ;
  10
  11     timethis ($count, "code");
  12
  13     # Use Perl code in strings...
  14     timethese($count, {
  15         'Name1' => '...code1...',
  16         'Name2' => '...code2...',
  17     });
  18
  19     # ... or use subroutine references.
  20     timethese($count, {
  21         'Name1' => sub { ...code1... },
  22         'Name2' => sub { ...code2... },
  23     });
  24
  25     # cmpthese can be used both ways as well
  26     cmpthese($count, {
  27         'Name1' => '...code1...',
  28         'Name2' => '...code2...',
  29     });
  30
  31     cmpthese($count, {
  32         'Name1' => sub { ...code1... },
  33         'Name2' => sub { ...code2... },
  34     });
  35
  36     # ...or in two stages
  37     $results = timethese($count,
  38         {
  39             'Name1' => sub { ...code1... },
  40             'Name2' => sub { ...code2... },
  41         },
  42         'none'
  43     );
  44     cmpthese( $results ) ;
  45
  46     $t = timeit($count, '...other code...')
  47     print "$count loops of other code took:",timestr($t),"\n";
  48
  49     $t = countit($time, '...other code...')
  50     $count = $t->iters ;
  51     print "$count loops of other code took:",timestr($t),"\n";
  52
  53     # enable hires wallclock timing if possible
  54     use Benchmark ':hireswallclock';
  55
  56 =head1 DESCRIPTION
  57
  58 The Benchmark module encapsulates a number of routines to help you
  59 figure out how long it takes to execute some code.
  60
  61 timethis - run a chunk of code several times
  62
  63 timethese - run several chunks of code several times
  64
  65 cmpthese - print results of timethese as a comparison chart
  66
  67 timeit - run a chunk of code and see how long it goes
  68
  69 countit - see how many times a chunk of code runs in a given time
  70
  71
  72 =head2 Methods
  73
  74 =over 10
  75
  76 =item new
  77
  78 Returns the current time.   Example:
  79
  80     use Benchmark;
  81     $t0 = new Benchmark;
  82     # ... your code here ...
  83     $t1 = new Benchmark;
  84     $td = timediff($t1, $t0);
  85     print "the code took:",timestr($td),"\n";
  86
  87 =item debug
  88
  89 Enables or disable debugging by setting the C<$Benchmark::Debug> flag:
  90
  91     debug Benchmark 1;
  92     $t = timeit(10, ' 5 ** $Global ');
  93     debug Benchmark 0;
  94
  95 =item iters
  96
  97 Returns the number of iterations.
  98
  99 =back
 100
 101 =head2 Standard Exports
 102
 103 The following routines will be exported into your namespace
 104 if you use the Benchmark module:
 105
 106 =over 10
 107
 108 =item timeit(COUNT, CODE)
 109
 110 Arguments: COUNT is the number of times to run the loop, and CODE is
 111 the code to run.  CODE may be either a code reference or a string to
 112 be eval'd; either way it will be run in the caller's package.
 113
 114 Returns: a Benchmark object.
 115
 116 =item timethis ( COUNT, CODE, [ TITLE, [ STYLE ]] )
 117
 118 Time COUNT iterations of CODE. CODE may be a string to eval or a
 119 code reference; either way the CODE will run in the caller's package.
 120 Results will be printed to STDOUT as TITLE followed by the times.
 121 TITLE defaults to "timethis COUNT" if none is provided. STYLE
 122 determines the format of the output, as described for timestr() below.
 123
 124 The COUNT can be zero or negative: this means the I<minimum number of
 125 CPU seconds> to run.  A zero signifies the default of 3 seconds.  For
 126 example to run at least for 10 seconds:
 127
 128         timethis(-10, $code)
 129
 130 or to run two pieces of code tests for at least 3 seconds:
 131
 132         timethese(0, { test1 => '...', test2 => '...'})
 133
 134 CPU seconds is, in UNIX terms, the user time plus the system time of
 135 the process itself, as opposed to the real (wallclock) time and the
 136 time spent by the child processes.  Less than 0.1 seconds is not
 137 accepted (-0.01 as the count, for example, will cause a fatal runtime
 138 exception).
 139
 140 Note that the CPU seconds is the B<minimum> time: CPU scheduling and
 141 other operating system factors may complicate the attempt so that a
 142 little bit more time is spent.  The benchmark output will, however,
 143 also tell the number of C<$code> runs/second, which should be a more
 144 interesting number than the actually spent seconds.
 145
 146 Returns a Benchmark object.
 147
 148 =item timethese ( COUNT, CODEHASHREF, [ STYLE ] )
 149
 150 The CODEHASHREF is a reference to a hash containing names as keys
 151 and either a string to eval or a code reference for each value.
 152 For each (KEY, VALUE) pair in the CODEHASHREF, this routine will
 153 call
 154
 155         timethis(COUNT, VALUE, KEY, STYLE)
 156
 157 The routines are called in string comparison order of KEY.
 158
 159 The COUNT can be zero or negative, see timethis().
 160
 161 Returns a hash of Benchmark objects, keyed by name.
 162
 163 =item timediff ( T1, T2 )
 164
 165 Returns the difference between two Benchmark times as a Benchmark
 166 object suitable for passing to timestr().
 167
 168 =item timestr ( TIMEDIFF, [ STYLE, [ FORMAT ] ] )
 169
 170 Returns a string that formats the times in the TIMEDIFF object in
 171 the requested STYLE. TIMEDIFF is expected to be a Benchmark object
 172 similar to that returned by timediff().
 173
 174 STYLE can be any of 'all', 'none', 'noc', 'nop' or 'auto'. 'all' shows
 175 each of the 5 times available ('wallclock' time, user time, system time,
 176 user time of children, and system time of children). 'noc' shows all
 177 except the two children times. 'nop' shows only wallclock and the
 178 two children times. 'auto' (the default) will act as 'all' unless
 179 the children times are both zero, in which case it acts as 'noc'.
 180 'none' prevents output.
 181
 182 FORMAT is the L<printf(3)>-style format specifier (without the
 183 leading '%') to use to print the times. It defaults to '5.2f'.
 184
 185 =back
 186
 187 =head2 Optional Exports
 188
 189 The following routines will be exported into your namespace
 190 if you specifically ask that they be imported:
 191
 192 =over 10
 193
 194 =item clearcache ( COUNT )
 195
 196 Clear the cached time for COUNT rounds of the null loop.
 197
 198 =item clearallcache ( )
 199
 200 Clear all cached times.
 201
 202 =item cmpthese ( COUNT, CODEHASHREF, [ STYLE ] )
 203
 204 =item cmpthese ( RESULTSHASHREF, [ STYLE ] )
 205
 206 Optionally calls timethese(), then outputs comparison chart.  This:
 207
 208     cmpthese( -1, { a => "++\$i", b => "\$i *= 2" } ) ;
 209
 210 outputs a chart like:
 211
 212            Rate    b    a
 213     b 2831802/s   -- -61%
 214     a 7208959/s 155%   --
 215
 216 This chart is sorted from slowest to fastest, and shows the percent speed
 217 difference between each pair of tests.
 218
 219 c<cmpthese> can also be passed the data structure that timethese() returns:
 220
 221     $results = timethese( -1, { a => "++\$i", b => "\$i *= 2" } ) ;
 222     cmpthese( $results );
 223
 224 in case you want to see both sets of results.
 225
 226 Returns a reference to an ARRAY of rows, each row is an ARRAY of cells from the
 227 above chart, including labels. This:
 228
 229     my $rows = cmpthese( -1, { a => '++$i', b => '$i *= 2' }, "none" );
 230
 231 returns a data structure like:
 232
 233     [
 234         [ '',       'Rate',   'b',    'a' ],
 235         [ 'b', '2885232/s',  '--', '-59%' ],
 236         [ 'a', '7099126/s', '146%',  '--' ],
 237     ]
 238
 239 B<NOTE>: This result value differs from previous versions, which returned
 240 the C<timethese()> result structure.  If you want that, just use the two
 241 statement C<timethese>...C<cmpthese> idiom shown above.
 242
 243 Incidently, note the variance in the result values between the two examples;
 244 this is typical of benchmarking.  If this were a real benchmark, you would
 245 probably want to run a lot more iterations.
 246
 247 =item countit(TIME, CODE)
 248
 249 Arguments: TIME is the minimum length of time to run CODE for, and CODE is
 250 the code to run.  CODE may be either a code reference or a string to
 251 be eval'd; either way it will be run in the caller's package.
 252
 253 TIME is I<not> negative.  countit() will run the loop many times to
 254 calculate the speed of CODE before running it for TIME.  The actual
 255 time run for will usually be greater than TIME due to system clock
 256 resolution, so it's best to look at the number of iterations divided
 257 by the times that you are concerned with, not just the iterations.
 258
 259 Returns: a Benchmark object.
 260
 261 =item disablecache ( )
 262
 263 Disable caching of timings for the null loop. This will force Benchmark
 264 to recalculate these timings for each new piece of code timed.
 265
 266 =item enablecache ( )
 267
 268 Enable caching of timings for the null loop. The time taken for COUNT
 269 rounds of the null loop will be calculated only once for each
 270 different COUNT used.
 271
 272 =item timesum ( T1, T2 )
 273
 274 Returns the sum of two Benchmark times as a Benchmark object suitable
 275 for passing to timestr().
 276
 277 =back
 278
 279 =head2 :hireswallclock
 280
 281 If the Time::HiRes module has been installed, you can specify the
 282 special tag C<:hireswallclock> for Benchmark (if Time::HiRes is not
 283 available, the tag will be silently ignored).  This tag will cause the
 284 wallclock time to be measured in microseconds, instead of integer
 285 seconds.  Note though that the speed computations are still conducted
 286 in CPU time, not wallclock time.
 287
 288 =head1 NOTES
 289
 290 The data is stored as a list of values from the time and times
 291 functions:
 292
 293       ($real, $user, $system, $children_user, $children_system, $iters)
 294
 295 in seconds for the whole loop (not divided by the number of rounds).
 296
 297 The timing is done using time(3) and times(3).
 298
 299 Code is executed in the caller's package.
 300
 301 The time of the null loop (a loop with the same
 302 number of rounds but empty loop body) is subtracted
 303 from the time of the real loop.
 304
 305 The null loop times can be cached, the key being the
 306 number of rounds. The caching can be controlled using
 307 calls like these:
 308
 309     clearcache($key);
 310     clearallcache();
 311
 312     disablecache();
 313     enablecache();
 314
 315 Caching is off by default, as it can (usually slightly) decrease
 316 accuracy and does not usually noticably affect runtimes.
 317
 318 =head1 EXAMPLES
 319
 320 For example,
 321
 322     use Benchmark qw( cmpthese ) ;
 323     $x = 3;
 324     cmpthese( -5, {
 325         a => sub{$x*$x},
 326         b => sub{$x**2},
 327     } );
 328
 329 outputs something like this:
 330
 331    Benchmark: running a, b, each for at least 5 CPU seconds...
 332           Rate    b    a
 333    b 1559428/s   -- -62%
 334    a 4152037/s 166%   --
 335
 336
 337 while
 338
 339     use Benchmark qw( timethese cmpthese ) ;
 340     $x = 3;
 341     $r = timethese( -5, {
 342         a => sub{$x*$x},
 343         b => sub{$x**2},
 344     } );
 345     cmpthese $r;
 346
 347 outputs something like this:
 348
 349     Benchmark: running a, b, each for at least 5 CPU seconds...
 350              a: 10 wallclock secs ( 5.14 usr +  0.13 sys =  5.27 CPU) @ 3835055.60/s (n=20210743)
 351              b:  5 wallclock secs ( 5.41 usr +  0.00 sys =  5.41 CPU) @ 1574944.92/s (n=8520452)
 352            Rate    b    a
 353     b 1574945/s   -- -59%
 354     a 3835056/s 144%   --
 355
 356
 357 =head1 INHERITANCE
 358
 359 Benchmark inherits from no other class, except of course
 360 for Exporter.
 361
 362 =head1 CAVEATS
 363
 364 Comparing eval'd strings with code references will give you
 365 inaccurate results: a code reference will show a slightly slower
 366 execution time than the equivalent eval'd string.
 367
 368 The real time timing is done using time(2) and
 369 the granularity is therefore only one second.
 370
 371 Short tests may produce negative figures because perl
 372 can appear to take longer to execute the empty loop
 373 than a short test; try:
 374
 375     timethis(100,'1');
 376
 377 The system time of the null loop might be slightly
 378 more than the system time of the loop with the actual
 379 code and therefore the difference might end up being E<lt> 0.
 380
 381 =head1 SEE ALSO
 382
 383 L<Devel::DProf> - a Perl code profiler
 384
 385 =head1 AUTHORS
 386
 387 Jarkko Hietaniemi <F<jhi@iki.fi>>, Tim Bunce <F<Tim.Bunce@ig.co.uk>>
 388
 389 =head1 MODIFICATION HISTORY
 390
 391 September 8th, 1994; by Tim Bunce.
 392
 393 March 28th, 1997; by Hugo van der Sanden: added support for code
 394 references and the already documented 'debug' method; revamped
 395 documentation.
 396
 397 April 04-07th, 1997: by Jarkko Hietaniemi, added the run-for-some-time
 398 functionality.
 399
 400 September, 1999; by Barrie Slaymaker: math fixes and accuracy and
 401 efficiency tweaks.  Added cmpthese().  A result is now returned from
 402 timethese().  Exposed countit() (was runfor()).
 403
 404 December, 2001; by Nicholas Clark: make timestr() recognise the style 'none'
 405 and return an empty string. If cmpthese is calling timethese, make it pass the
 406 style in. (so that 'none' will suppress output). Make sub new dump its
 407 debugging output to STDERR, to be consistent with everything else.
 408 All bugs found while writing a regression test.
 409
 410 September, 2002; by Jarkko Hietaniemi: add ':hireswallclock' special tag.
 411
 412 =cut
 413
 414 # evaluate something in a clean lexical environment
 415 sub _doeval { eval shift }
 416
 417 #
 418 # put any lexicals at file scope AFTER here
 419 #
 420
 421 use Carp;
 422 use Exporter;
 423 @ISA=(Exporter);
 424 @EXPORT=qw(timeit timethis timethese timediff timestr);
 425 @EXPORT_OK=qw(timesum cmpthese countit
 426               clearcache clearallcache disablecache enablecache);
 427 %EXPORT_TAGS=( all => [ @EXPORT, @EXPORT_OK ] ) ;
 428
 429 $VERSION = 1.0501;
 430
 431 # --- ':hireswallclock' special handling
 432
 433 my $hirestime;
 434
 435 sub mytime () { time }
 436
 437 &init;
 438
 439 sub BEGIN {
 440     if (eval 'require Time::HiRes') {
 441         import Time::HiRes qw(time);
 442         $hirestime = \&Time::HiRes::time;
 443     }
 444 }
 445
 446 sub import {
 447     my $class = shift;
 448     if (grep { $_ eq ":hireswallclock" } @_) {
 449         @_ = grep { $_ ne ":hireswallclock" } @_;
 450         *mytime = $hirestime if defined $hirestime;
 451     }
 452     Benchmark->export_to_level(1, $class, @_);
 453 }
 454
 455 sub init {
 456     $debug = 0;
 457     $min_count = 4;
 458     $min_cpu   = 0.4;
 459     $defaultfmt = '5.2f';
 460     $defaultstyle = 'auto';
 461     # The cache can cause a slight loss of sys time accuracy. If a
 462     # user does many tests (>10) with *very* large counts (>10000)
 463     # or works on a very slow machine the cache may be useful.
 464     &disablecache;
 465     &clearallcache;
 466 }
 467
 468 sub debug { $debug = ($_[1] != 0); }
 469
 470 # The cache needs two branches: 's' for strings and 'c' for code.  The
 471 # emtpy loop is different in these two cases.
 472 sub clearcache    { delete $cache{"$_[0]c"}; delete $cache{"$_[0]s"}; }
 473 sub clearallcache { %cache = (); }
 474 sub enablecache   { $cache = 1; }
 475 sub disablecache  { $cache = 0; }
 476
 477 # --- Functions to process the 'time' data type
 478
 479 sub new { my @t = (mytime, times, @_ == 2 ? $_[1] : 0);
 480           print STDERR "new=@t\n" if $debug;
 481           bless \@t; }
 482
 483 sub cpu_p { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $pu+$ps         ; }
 484 sub cpu_c { my($r,$pu,$ps,$cu,$cs) = @{$_[0]};         $cu+$cs ; }
 485 sub cpu_a { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $pu+$ps+$cu+$cs ; }
 486 sub real  { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $r              ; }
 487 sub iters { $_[0]->[5] ; }
 488
 489 sub timediff {
 490     my($a, $b) = @_;
 491     my @r;
 492     for (my $i=0; $i < @$a; ++$i) {
 493         push(@r, $a->[$i] - $b->[$i]);
 494     }
 495     bless \@r;
 496 }
 497
 498 sub timesum {
 499      my($a, $b) = @_;
 500      my @r;
 501      for (my $i=0; $i < @$a; ++$i) {
 502         push(@r, $a->[$i] + $b->[$i]);
 503      }
 504      bless \@r;
 505 }
 506
 507 sub timestr {
 508     my($tr, $style, $f) = @_;
 509     my @t = @$tr;
 510     warn "bad time value (@t)" unless @t==6;
 511     my($r, $pu, $ps, $cu, $cs, $n) = @t;
 512     my($pt, $ct, $tt) = ($tr->cpu_p, $tr->cpu_c, $tr->cpu_a);
 513     $f = $defaultfmt unless defined $f;
 514     # format a time in the required style, other formats may be added here
 515     $style ||= $defaultstyle;
 516     return '' if $style eq 'none';
 517     $style = ($ct>0) ? 'all' : 'noc' if $style eq 'auto';
 518     my $s = "@t $style"; # default for unknown style
 519     my $w = $hirestime ? "%2g" : "%2d";
 520     $s=sprintf("$w wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
 521                             $r,$pu,$ps,$cu,$cs,$tt) if $style eq 'all';
 522     $s=sprintf("$w wallclock secs (%$f usr + %$f sys = %$f CPU)",
 523                             $r,$pu,$ps,$pt) if $style eq 'noc';
 524     $s=sprintf("$w wallclock secs (%$f cusr + %$f csys = %$f CPU)",
 525                             $r,$cu,$cs,$ct) if $style eq 'nop';
 526     $s .= sprintf(" @ %$f/s (n=$n)", $n / ( $pu + $ps )) if $n && $pu+$ps;
 527     $s;
 528 }
 529
 530 sub timedebug {
 531     my($msg, $t) = @_;
 532     print STDERR "$msg",timestr($t),"\n" if $debug;
 533 }
 534
 535 # --- Functions implementing low-level support for timing loops
 536
 537 sub runloop {
 538     my($n, $c) = @_;
 539
 540     $n+=0; # force numeric now, so garbage won't creep into the eval
 541     croak "negative loopcount $n" if $n<0;
 542     confess "Usage: runloop(number, [string | coderef])" unless defined $c;
 543     my($t0, $t1, $td); # before, after, difference
 544
 545     # find package of caller so we can execute code there
 546     my($curpack) = caller(0);
 547     my($i, $pack)= 0;
 548     while (($pack) = caller(++$i)) {
 549         last if $pack ne $curpack;
 550     }
 551
 552     my ($subcode, $subref);
 553     if (ref $c eq 'CODE') {
 554         $subcode = "sub { for (1 .. $n) { local \$_; package $pack; &\$c; } }";
 555         $subref  = eval $subcode;
 556     }
 557     else {
 558         $subcode = "sub { for (1 .. $n) { local \$_; package $pack; $c;} }";
 559         $subref  = _doeval($subcode);
 560     }
 561     croak "runloop unable to compile '$c': $@\ncode: $subcode\n" if $@;
 562     print STDERR "runloop $n '$subcode'\n" if $debug;
 563
 564     # Wait for the user timer to tick.  This makes the error range more like
 565     # -0.01, +0.  If we don't wait, then it's more like -0.01, +0.01.  This
 566     # may not seem important, but it significantly reduces the chances of
 567     # getting a too low initial $n in the initial, 'find the minimum' loop
 568     # in &countit.  This, in turn, can reduce the number of calls to
 569     # &runloop a lot, and thus reduce additive errors.
 570     my $tbase = Benchmark->new(0)->[1];
 571     while ( ( $t0 = Benchmark->new(0) )->[1] == $tbase ) {} ;
 572     &$subref;
 573     $t1 = Benchmark->new($n);
 574     $td = &timediff($t1, $t0);
 575     timedebug("runloop:",$td);
 576     $td;
 577 }
 578
 579
 580 sub timeit {
 581     my($n, $code) = @_;
 582     my($wn, $wc, $wd);
 583
 584     printf STDERR "timeit $n $code\n" if $debug;
 585     my $cache_key = $n . ( ref( $code ) ? 'c' : 's' );
 586     if ($cache && exists $cache{$cache_key} ) {
 587         $wn = $cache{$cache_key};
 588     } else {
 589         $wn = &runloop($n, ref( $code ) ? sub { } : '' );
 590         # Can't let our baseline have any iterations, or they get subtracted
 591         # out of the result.
 592         $wn->[5] = 0;
 593         $cache{$cache_key} = $wn;
 594     }
 595
 596     $wc = &runloop($n, $code);
 597
 598     $wd = timediff($wc, $wn);
 599     timedebug("timeit: ",$wc);
 600     timedebug("      - ",$wn);
 601     timedebug("      = ",$wd);
 602
 603     $wd;
 604 }
 605
 606
 607 my $default_for = 3;
 608 my $min_for     = 0.1;
 609
 610
 611 sub countit {
 612     my ( $tmax, $code ) = @_;
 613
 614     if ( not defined $tmax or $tmax == 0 ) {
 615         $tmax = $default_for;
 616     } elsif ( $tmax < 0 ) {
 617         $tmax = -$tmax;
 618     }
 619
 620     die "countit($tmax, ...): timelimit cannot be less than $min_for.\n"
 621         if $tmax < $min_for;
 622
 623     my ($n, $tc);
 624
 625     # First find the minimum $n that gives a significant timing.
 626     for ($n = 1; ; $n *= 2 ) {
 627         my $td = timeit($n, $code);
 628         $tc = $td->[1] + $td->[2];
 629         last if $tc > 0.1;
 630     }
 631
 632     my $nmin = $n;
 633
 634     # Get $n high enough that we can guess the final $n with some accuracy.
 635     my $tpra = 0.1 * $tmax; # Target/time practice.
 636     while ( $tc < $tpra ) {
 637         # The 5% fudge is to keep us from iterating again all
 638         # that often (this speeds overall responsiveness when $tmax is big
 639         # and we guess a little low).  This does not noticably affect
 640         # accuracy since we're not couting these times.
 641         $n = int( $tpra * 1.05 * $n / $tc ); # Linear approximation.
 642         my $td = timeit($n, $code);
 643         my $new_tc = $td->[1] + $td->[2];
 644         # Make sure we are making progress.
 645         $tc = $new_tc > 1.2 * $tc ? $new_tc : 1.2 * $tc;
 646     }
 647
 648     # Now, do the 'for real' timing(s), repeating until we exceed
 649     # the max.
 650     my $ntot  = 0;
 651     my $rtot  = 0;
 652     my $utot  = 0.0;
 653     my $stot  = 0.0;
 654     my $cutot = 0.0;
 655     my $cstot = 0.0;
 656     my $ttot  = 0.0;
 657
 658     # The 5% fudge is because $n is often a few % low even for routines
 659     # with stable times and avoiding extra timeit()s is nice for
 660     # accuracy's sake.
 661     $n = int( $n * ( 1.05 * $tmax / $tc ) );
 662
 663     while () {
 664         my $td = timeit($n, $code);
 665         $ntot  += $n;
 666         $rtot  += $td->[0];
 667         $utot  += $td->[1];
 668         $stot  += $td->[2];
 669         $cutot += $td->[3];
 670         $cstot += $td->[4];
 671         $ttot = $utot + $stot;
 672         last if $ttot >= $tmax;
 673
 674         $ttot = 0.01 if $ttot < 0.01;
 675         my $r = $tmax / $ttot - 1; # Linear approximation.
 676         $n = int( $r * $ntot );
 677         $n = $nmin if $n < $nmin;
 678     }
 679
 680     return bless [ $rtot, $utot, $stot, $cutot, $cstot, $ntot ];
 681 }
 682
 683 # --- Functions implementing high-level time-then-print utilities
 684
 685 sub n_to_for {
 686     my $n = shift;
 687     return $n == 0 ? $default_for : $n < 0 ? -$n : undef;
 688 }
 689
 690 sub timethis{
 691     my($n, $code, $title, $style) = @_;
 692     my($t, $for, $forn);
 693
 694     if ( $n > 0 ) {
 695         croak "non-integer loopcount $n, stopped" if int($n)<$n;
 696         $t = timeit($n, $code);
 697         $title = "timethis $n" unless defined $title;
 698     } else {
 699         $fort  = n_to_for( $n );
 700         $t     = countit( $fort, $code );
 701         $title = "timethis for $fort" unless defined $title;
 702         $forn  = $t->[-1];
 703     }
 704     local $| = 1;
 705     $style = "" unless defined $style;
 706     printf("%10s: ", $title) unless $style eq 'none';
 707     print timestr($t, $style, $defaultfmt),"\n" unless $style eq 'none';
 708
 709     $n = $forn if defined $forn;
 710
 711     # A conservative warning to spot very silly tests.
 712     # Don't assume that your benchmark is ok simply because
 713     # you don't get this warning!
 714     print "            (warning: too few iterations for a reliable count)\n"
 715         if     $n < $min_count
 716             || ($t->real < 1 && $n < 1000)
 717             || $t->cpu_a < $min_cpu;
 718     $t;
 719 }
 720
 721 sub timethese{
 722     my($n, $alt, $style) = @_;
 723     die "usage: timethese(count, { 'Name1'=>'code1', ... }\n"
 724                 unless ref $alt eq HASH;
 725     my @names = sort keys %$alt;
 726     $style = "" unless defined $style;
 727     print "Benchmark: " unless $style eq 'none';
 728     if ( $n > 0 ) {
 729         croak "non-integer loopcount $n, stopped" if int($n)<$n;
 730         print "timing $n iterations of" unless $style eq 'none';
 731     } else {
 732         print "running" unless $style eq 'none';
 733     }
 734     print " ", join(', ',@names) unless $style eq 'none';
 735     unless ( $n > 0 ) {
 736         my $for = n_to_for( $n );
 737         print ", each" if $n > 1 && $style ne 'none';
 738         print " for at least $for CPU seconds" unless $style eq 'none';
 739     }
 740     print "...\n" unless $style eq 'none';
 741
 742     # we could save the results in an array and produce a summary here
 743     # sum, min, max, avg etc etc
 744     my %results;
 745     foreach my $name (@names) {
 746         $results{$name} = timethis ($n, $alt -> {$name}, $name, $style);
 747     }
 748
 749     return \%results;
 750 }
 751
 752 sub cmpthese{
 753     my ($results, $style) =
 754          ref $_ [0] ? @_
 755                     : (timethese (@_ [0, 1], @_ > 2 ? $_ [2] : "none"), $_ [2]);
 756
 757     $style = "" unless defined $style;
 758
 759     # Flatten in to an array of arrays with the name as the first field
 760     my @vals = map{ [ $_, @{$results->{$_}} ] } keys %$results;
 761
 762     for (@vals) {
 763         # The epsilon fudge here is to prevent div by 0.  Since clock
 764         # resolutions are much larger, it's below the noise floor.
 765         my $rate = $_->[6] / ( $_->[2] + $_->[3] + 0.000000000000001 );
 766         $_->[7] = $rate;
 767     }
 768
 769     # Sort by rate
 770     @vals = sort { $a->[7] <=> $b->[7] } @vals;
 771
 772     # If more than half of the rates are greater than one...
 773     my $display_as_rate = $vals[$#vals>>1]->[7] > 1;
 774
 775     my @rows;
 776     my @col_widths;
 777
 778     my @top_row = (
 779         '',
 780         $display_as_rate ? 'Rate' : 's/iter',
 781         map { $_->[0] } @vals
 782     );
 783
 784     push @rows, \@top_row;
 785     @col_widths = map { length( $_ ) } @top_row;
 786
 787     # Build the data rows
 788     # We leave the last column in even though it never has any data.  Perhaps
 789     # it should go away.  Also, perhaps a style for a single column of
 790     # percentages might be nice.
 791     for my $row_val ( @vals ) {
 792         my @row;
 793
 794         # Column 0 = test name
 795         push @row, $row_val->[0];
 796         $col_widths[0] = length( $row_val->[0] )
 797             if length( $row_val->[0] ) > $col_widths[0];
 798
 799         # Column 1 = performance
 800         my $row_rate = $row_val->[7];
 801
 802         # We assume that we'll never get a 0 rate.
 803         my $a = $display_as_rate ? $row_rate : 1 / $row_rate;
 804
 805         # Only give a few decimal places before switching to sci. notation,
 806         # since the results aren't usually that accurate anyway.
 807         my $format =
 808            $a >= 100 ?
 809                "%0.0f" :
 810            $a >= 10 ?
 811                "%0.1f" :
 812            $a >= 1 ?
 813                "%0.2f" :
 814            $a >= 0.1 ?
 815                "%0.3f" :
 816                "%0.2e";
 817
 818         $format .= "/s"
 819             if $display_as_rate;
 820         # Using $b here due to optimizing bug in _58 through _61
 821         my $b = sprintf( $format, $a );
 822         push @row, $b;
 823         $col_widths[1] = length( $b )
 824             if length( $b ) > $col_widths[1];
 825
 826         # Columns 2..N = performance ratios
 827         my $skip_rest = 0;
 828         for ( my $col_num = 0 ; $col_num < @vals ; ++$col_num ) {
 829             my $col_val = $vals[$col_num];
 830             my $out;
 831             if ( $skip_rest ) {
 832                 $out = '';
 833             }
 834             elsif ( $col_val->[0] eq $row_val->[0] ) {
 835                 $out = "--";
 836                 # $skip_rest = 1;
 837             }
 838             else {
 839                 my $col_rate = $col_val->[7];
 840                 $out = sprintf( "%.0f%%", 100*$row_rate/$col_rate - 100 );
 841             }
 842             push @row, $out;
 843             $col_widths[$col_num+2] = length( $out )
 844                 if length( $out ) > $col_widths[$col_num+2];
 845
 846             # A little wierdness to set the first column width properly
 847             $col_widths[$col_num+2] = length( $col_val->[0] )
 848                 if length( $col_val->[0] ) > $col_widths[$col_num+2];
 849         }
 850         push @rows, \@row;
 851     }
 852
 853     return \@rows if $style eq "none";
 854
 855     # Equalize column widths in the chart as much as possible without
 856     # exceeding 80 characters.  This does not use or affect cols 0 or 1.
 857     my @sorted_width_refs =
 858        sort { $$a <=> $$b } map { \$_ } @col_widths[2..$#col_widths];
 859     my $max_width = ${$sorted_width_refs[-1]};
 860
 861     my $total = @col_widths - 1 ;
 862     for ( @col_widths ) { $total += $_ }
 863
 864     STRETCHER:
 865     while ( $total < 80 ) {
 866         my $min_width = ${$sorted_width_refs[0]};
 867         last
 868            if $min_width == $max_width;
 869         for ( @sorted_width_refs ) {
 870             last
 871                 if $$_ > $min_width;
 872             ++$$_;
 873             ++$total;
 874             last STRETCHER
 875                 if $total >= 80;
 876         }
 877     }
 878
 879     # Dump the output
 880     my $format = join( ' ', map { "%${_}s" } @col_widths ) . "\n";
 881     substr( $format, 1, 0 ) = '-';
 882     for ( @rows ) {
 883         printf $format, @$_;
 884     }
 885
 886     return \@rows ;
 887 }
 888
 889
 890 1;