From: Peter Rabbitson Date: Mon, 19 Dec 2011 05:55:18 +0000 (+0100) Subject: Rewrite the HRI bench to produce consistent numbers with less handholding X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=9839286bfcc2f0b7ff5848f509486afed5b9daad;p=dbsrgits%2FDBIx-Class-Historic.git Rewrite the HRI bench to produce consistent numbers with less handholding - Now we auto-pull from git what we want to benchmark (by default last 2 commits + uncommitted changes if any) - Run a very tight benchmarking loop, without requerying SQLite or have any other unrelated distractions - use DumbBench (much saner implementation and results, though slower) --- diff --git a/lib/DBIx/Class/ResultClass/HashRefInflator.pm b/lib/DBIx/Class/ResultClass/HashRefInflator.pm index ca5e0ea..a8861bd 100644 --- a/lib/DBIx/Class/ResultClass/HashRefInflator.pm +++ b/lib/DBIx/Class/ResultClass/HashRefInflator.pm @@ -58,9 +58,7 @@ recommended. # # Generally people use this to gain as much speed as possible. If a new &mk_hash is # implemented, it should be benchmarked using the maint/benchmark_hashrefinflator.pl -# script (in addition to passing all tests of course :). Additional instructions are -# provided in the script itself. -# +# script (in addition to passing all tests of course :) # This coderef is a simple recursive function # Arguments: ($me, $prefetch, $is_root) from inflate_result() below diff --git a/maint/benchmark_hashrefinflator.pl b/maint/benchmark_hashrefinflator.pl index 194e53a..6d6a081 100755 --- a/maint/benchmark_hashrefinflator.pl +++ b/maint/benchmark_hashrefinflator.pl @@ -1,118 +1,208 @@ #!/usr/bin/env perl # -# So you wrote a new mk_hash implementation which passed all tests (particularly -# t/68inflate_resultclass_hashrefinflator) and would like to see how it holds up -# against older versions of the same. Just add your coderef to the HRI::Bench -# namespace and add a name/ref pair to the %bench_list hash. Happy testing. - -package DBIx::Class::ResultClass::HashRefInflator::Bench; +# So you wrote a new mk_hash implementation which passed all tests +# (particularly t/inflate/hri.t) and would like to see how it holds +# up against older (and often buggy) versions of the same. Just run +# this script and wait (no editing necessary) use warnings; use strict; -my $current_mk_hash; -$current_mk_hash = sub { - if (ref $_[0] eq 'ARRAY') { # multi relationship - return [ map { $current_mk_hash->(@$_) || () } (@_) ]; - } - else { - my $hash = { - # the main hash could be an undef if we are processing a skipped-over join - $_[0] ? %{$_[0]} : (), - - # the second arg is a hash of arrays for each prefetched relation - map - { $_ => $current_mk_hash->( @{$_[1]->{$_}} ) } - ( $_[1] ? (keys %{$_[1]}) : () ) - }; - - # if there is at least one defined column consider the resultset real - # (and not an emtpy has_many rel containing one empty hashref) - for (values %$hash) { - return $hash if defined $_; - } - - return undef; - } -}; +use FindBin; +use lib ("$FindBin::Bin/../lib", "$FindBin::Bin/../t/lib"); -# the (incomplete, fails a test) implementation before svn:4760 -my $old_mk_hash; -$old_mk_hash = sub { - my ($me, $rest) = @_; - - # $me is the hashref of cols/data from the immediate resultsource - # $rest is a deep hashref of all the data from the prefetched - # related sources. - - # to avoid emtpy has_many rels contain one empty hashref - return undef if (not keys %$me); - - my $def; - - foreach (values %$me) { - if (defined $_) { - $def = 1; - last; - } - } - return undef unless $def; - - return { %$me, - map { - ( $_ => - ref($rest->{$_}[0]) eq 'ARRAY' - ? [ grep defined, map $old_mk_hash->(@$_), @{$rest->{$_}} ] - : $old_mk_hash->( @{$rest->{$_}} ) - ) - } keys %$rest - }; -}; +use Class::Unload '0.07'; +use Benchmark (); +use Dumbbench; +use Benchmark::Dumb ':all'; +use DBICTest; +# for git reporting to work, and to use it as INC key directly +chdir ("$FindBin::Bin/../lib"); +my $hri_fn = 'DBIx/Class/ResultClass/HashRefInflator.pm'; -our %bench_list = ( - current_implementation => $current_mk_hash, - old_implementation => $old_mk_hash, +require Getopt::Long; +my $getopt = Getopt::Long::Parser->new( + config => [qw/gnu_getopt bundling_override no_ignore_case pass_through/] ); +my $args = { + 'bench-commits' => 2, + 'no-cpufreq-checks' => undef, +}; +$getopt->getoptions($args, qw/ + bench-commits + no-cpufreq-checks +/); + +if ( + !$args->{'no-cpufreq-checks'} + and + $^O eq 'linux' + and + -r '/sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq' +) { + my ($min_freq, $max_freq, $governor) = map { local @ARGV = $_; my $s = <>; chomp $s; $s } qw| + /sys/devices/system/cpu/cpu0/cpufreq/scaling_min_freq + /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq + /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor + |; + + if ($min_freq != $max_freq) { + die "Your OS seems to have an active CPU governor '$governor' -" + . ' this will render benchmark results meaningless. Disable it' + . ' by setting /sys/devices/system/cpu/cpu*/cpufreq/scaling_max_freq' + . ' to the same value as /sys/devices/system/cpu/cpu*/cpufreq/scaling_min_freq' + . " ($min_freq). Alternatively skip this check with --no-cpufreq-checks.\n"; + } +} -1; +my %skip_commits = map { $_ => 1 } qw/ + e1540ee + a5b2936 + 4613ee1 + 419ff18 +/; +my (@to_bench, $not_latest); +for my $commit (`git log --format=%h HEAD ^8330454^ $hri_fn `) { + chomp $commit; + next if $skip_commits{$commit}; + my $diff = `git show -w -U0 --format=%ar%n%b $commit $hri_fn`; + if ($diff =~ /^ (?: \@\@ \s .+? | [+-] sub \s) \$? mk_hash /xm ) { + my ($age) = $diff =~ /\A(.+?)\n/; + + push @to_bench, { + commit => $commit, + title => $not_latest ? $commit : 'LATEST', + desc => sprintf ("commit %s (%smade %s)...\t\t", + $commit, + $not_latest ? '' : 'LATEST, ', + $age, + ), + code => scalar `git show $commit:lib/DBIx/Class/ResultClass/HashRefInflator.pm`, + }; -package benchmark_hashrefinflator; + last if @to_bench == $args->{'bench-commits'}; + $not_latest = 1; + } +} +die "Can't find any commits... something is wrong\n" unless @to_bench; -use warnings; -use strict; +unshift @to_bench, { + desc => "the current uncommitted HRI...\t\t\t\t", + title => 'CURRENT', + code => do { local (@ARGV, $/) = ($hri_fn); <> }, +} if `git status --porcelain $hri_fn`; -use FindBin; -use lib ("$FindBin::Bin/../lib", "$FindBin::Bin/../t/lib"); - -use Benchmark qw/timethis cmpthese/; -use DBICTest; +printf "\nAbout to benchmark %d HRI variants (%s)\n", + scalar @to_bench, + (join ', ', map { $_->{title} } @to_bench), +; -chdir ("$FindBin::Bin/.."); my $schema = DBICTest->init_schema(); -my $test_sub = sub { - my $rs_hashrefinf = $schema->resultset ('Artist')->search ({}, { - prefetch => { cds => 'tracks' }, +# add some extra data for the complex test +$schema->resultset ('Artist')->create({ + name => 'largggge', + cds => [ + { + genre => { name => 'massive' }, + title => 'largesse', + year => 2011, + tracks => [ + { + title => 'larguitto', + cd_single => { + title => 'mongo', + year => 2012, + artist => 1, + genre => { name => 'massive' }, + tracks => [ + { title => 'yo momma' }, + { title => 'so much momma' }, + ], + }, + }, + ], + }, + ], +}); + +# get what data to feed during benchmarks +{ + package _BENCH_::DBIC::InflateResult::Trap; + sub inflate_result { shift; return \@_ } +} +my %bench_dataset = ( + simple => do { + my $rs = $schema->resultset ('Artist')->search ({}, { + prefetch => { cds => 'tracks' }, + result_class => '_BENCH_::DBIC::InflateResult::Trap', }); - $rs_hashrefinf->result_class('DBIx::Class::ResultClass::HashRefInflator::Bench'); - my @stuff = $rs_hashrefinf->all; -}; - - -my $results; -for my $b (keys %DBIx::Class::ResultClass::HashRefInflator::Bench::bench_list) { - - print "Timing $b... "; + [ $rs->all ]; + }, + complex => do { + my $rs = $schema->resultset ('Artist')->search ({}, { + prefetch => { cds => [ { tracks => { cd_single => [qw/artist genre tracks/] } }, 'genre' ] }, + result_class => '_BENCH_::DBIC::InflateResult::Trap', + }); + [ $rs->all ]; + }, +); - # switch the inflator - no warnings qw/redefine once/; - no strict qw/refs/; - local *DBIx::Class::ResultClass::HashRefInflator::Bench::inflate_result = sub { - return $DBIx::Class::ResultClass::HashRefInflator::Bench::bench_list{$b}->(@_[2,3]); - }; +# benchmark coderefs (num iters is set below) +my %num_iters; +my %bench = ( map { $_ => eval "sub { + for (1 .. (\$num_iters{$_}||1) ) { + DBIx::Class::ResultClass::HashRefInflator->inflate_result(\$bench_dataset{$_}) + } +}" } qw/simple complex/ ); + +$|++; +print "\nPre-timing current HRI to determine iteration counts..."; +# crude unreliable and quick test how many to run in the loop +# designed to return a value so that there ~ 1/$div runs in a second +# (based on the current @INC implementation) +my $div = 1; +require DBIx::Class::ResultClass::HashRefInflator; +for (qw/simple complex/) { + local $SIG{__WARN__} = sub {}; + my $tst = Benchmark::timethis(-1, $bench{$_}, '', 'none'); + $num_iters{$_} ||= int( $tst->[5] / $tst->[1] / $div ); + $num_iters{$_} ||= 1; +} +print " done\n\nBenchmarking - this can taka a LOOOOOONG time\n\n"; + +my %results; + +for my $bch (@to_bench) { + Class::Unload->unload('DBIx::Class::ResultClass::HashRefInflator'); + eval $bch->{code} or die $@; + $INC{'DBIx/Class/ResultClass/HashRefInflator.pm'} = $bch->{title}; + + for my $t (qw/simple complex/) { + my $label = "Timing $num_iters{$t} $t iterations of $bch->{desc}"; + + my $bench = Dumbbench->new( + initial_runs => 30, + target_rel_precision => 0.0005, + ); + $bench->add_instances( Dumbbench::Instance::PerlSub->new ( + name => $label, code => $bench{$t}, + )); + + print $label; + $bench->run; + + print( + ($results{ (substr $t, 0, 1) . "_$bch->{title}" } + = Benchmark::Dumb->_new( instance => ($bench->instances)[0] ) ) + ->timestr('') + ); + print "\n"; + } +} - $results->{$b} = timethis (-2, $test_sub); +for my $t (qw/s c/) { + cmpthese ({ map { $_ =~ /^${t}_/ ? ( $_ => $results{$_}) : () } keys %results }, '', ''); } -cmpthese ($results);