From: Peter Rabbitson Date: Mon, 2 Nov 2015 21:05:04 +0000 (+0100) Subject: FINALLY find and fix the elusive parallel test deadlock X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?p=dbsrgits%2FDBIx-Class.git;a=commitdiff_plain;h=630e2ea8a42ab04c12effadf75e11b5081334899 FINALLY find and fix the elusive parallel test deadlock For more details see https://rt.cpan.org/Ticket/Display.html?id=108390 --- diff --git a/Changes b/Changes index f5b552f..9bb59ff 100644 --- a/Changes +++ b/Changes @@ -39,6 +39,7 @@ Revision history for DBIx::Class Optional::Dependencies::req_group_list (no known users in the wild) - Protect tests and codebase from incomplete caller() overrides, like e.g. RT#32640 + - Work around rare test deadlock under heavy parallelism (RT#108390) - Stop using bare $] throughout - protects the codebase from issues similar (but likely not limited to) P5#72210 diff --git a/t/lib/DBICTest.pm b/t/lib/DBICTest.pm index 6da2f7e..ff046a7 100644 --- a/t/lib/DBICTest.pm +++ b/t/lib/DBICTest.pm @@ -4,7 +4,7 @@ package # hide from PAUSE use strict; use warnings; -use DBICTest::Util qw( local_umask dbg DEBUG_TEST_CONCURRENCY_LOCKS ); +use DBICTest::Util qw( local_umask await_flock dbg DEBUG_TEST_CONCURRENCY_LOCKS ); use DBICTest::Schema; use DBICTest::Util::LeakTracer qw/populate_weakregistry assert_empty_weakregistry/; use DBIx::Class::_Util 'detected_reinvoked_destructor'; @@ -94,7 +94,7 @@ sub import { DEBUG_TEST_CONCURRENCY_LOCKS > 1 and dbg "Waiting for EXCLUSIVE global lock..."; - flock ($global_lock_fh, LOCK_EX) or die "Unable to lock $lockpath: $!"; + await_flock ($global_lock_fh, LOCK_EX) or die "Unable to lock $lockpath: $!"; DEBUG_TEST_CONCURRENCY_LOCKS > 1 and dbg "Got EXCLUSIVE global lock"; @@ -118,7 +118,7 @@ sub import { DEBUG_TEST_CONCURRENCY_LOCKS > 1 and dbg "Waiting for SHARED global lock..."; - flock ($global_lock_fh, LOCK_SH) or die "Unable to lock $lockpath: $!"; + await_flock ($global_lock_fh, LOCK_SH) or die "Unable to lock $lockpath: $!"; DEBUG_TEST_CONCURRENCY_LOCKS > 1 and dbg "Got SHARED global lock"; diff --git a/t/lib/DBICTest/BaseSchema.pm b/t/lib/DBICTest/BaseSchema.pm index 80d3fb1..27cdcd7 100644 --- a/t/lib/DBICTest/BaseSchema.pm +++ b/t/lib/DBICTest/BaseSchema.pm @@ -9,7 +9,7 @@ use Fcntl qw(:DEFAULT :seek :flock); use Time::HiRes 'sleep'; use Scope::Guard (); use DBICTest::Util::LeakTracer qw(populate_weakregistry assert_empty_weakregistry); -use DBICTest::Util qw( local_umask dbg DEBUG_TEST_CONCURRENCY_LOCKS ); +use DBICTest::Util qw( local_umask await_flock dbg DEBUG_TEST_CONCURRENCY_LOCKS ); use namespace::clean; sub capture_executed_sql_bind { @@ -151,13 +151,6 @@ sub connection { # an envvar, we can not detect when a user invokes prove -jN. Hence # perform the locking at all times, it shouldn't hurt. # the lock fh *should* inherit across forks/subprocesses - # - # File locking is hard. Really hard. By far the best lock implementation - # I've seen is part of the guts of File::Temp. However it is sadly not - # reusable. Since I am not aware of folks doing NFS parallel testing, - # nor are we known to work on VMS, I am just going to punt this and - # use the portable-ish flock() provided by perl itself. If this does - # not work for you - patches more than welcome. if ( ! $DBICTest::global_exclusive_lock and @@ -215,7 +208,7 @@ sub connection { sysopen ($lock_fh, $lockpath, O_RDWR|O_CREAT) or die "Unable to open $lockpath: $!"; } - flock ($lock_fh, LOCK_EX) or die "Unable to lock $lockpath: $!"; + await_flock ($lock_fh, LOCK_EX) or die "Unable to lock $lockpath: $!"; DEBUG_TEST_CONCURRENCY_LOCKS and dbg "Got $locktype LOCK: $lockpath"; diff --git a/t/lib/DBICTest/Util.pm b/t/lib/DBICTest/Util.pm index 98f05c0..74ba068 100644 --- a/t/lib/DBICTest/Util.pm +++ b/t/lib/DBICTest/Util.pm @@ -25,6 +25,7 @@ use constant DEBUG_TEST_CONCURRENCY_LOCKS => use Config; use Carp 'confess'; +use Fcntl ':flock'; use Scalar::Util qw(blessed refaddr); use DBIx::Class::_Util; @@ -34,7 +35,7 @@ our @EXPORT_OK = qw( local_umask visit_namespaces check_customcond_args - DEBUG_TEST_CONCURRENCY_LOCKS + await_flock DEBUG_TEST_CONCURRENCY_LOCKS ); if (DEBUG_TEST_CONCURRENCY_LOCKS) { @@ -57,6 +58,38 @@ sub dbg ($) { ; } +# File locking is hard. Really hard. By far the best lock implementation +# I've seen is part of the guts of File::Temp. However it is sadly not +# reusable. Since I am not aware of folks doing NFS parallel testing, +# nor are we known to work on VMS, I am just going to punt this and +# use the portable-ish flock() provided by perl itself. If this does +# not work for you - patches more than welcome. +# +# This figure esentially means "how long can a single test hold a +# resource before everyone else gives up waiting and aborts" or +# in other words "how long does the longest test-group legitimally run?" +my $lock_timeout_minutes = 15; # yes, that's long, I know +my $wait_step_seconds = 0.25; + +sub await_flock ($$) { + my ($fh, $locktype) = @_; + + my ($res, $tries); + while( + ! ( $res = flock( $fh, $locktype | LOCK_NB ) ) + and + ++$tries <= $lock_timeout_minutes * 60 / $wait_step_seconds + ) { + select( undef, undef, undef, $wait_step_seconds ); + + # "say something" every 10 cycles to work around RT#108390 + # jesus christ our tooling is such a crock of shit :( + print "#\n" if not $tries % 10; + } + + return $res; +} + sub local_umask { return unless defined $Config{d_umask};