Commit | Line | Data |
55c0d020 |
1 | package Object::Remote::WatchDog; |
c824fdf3 |
2 | |
55c0d020 |
3 | use Object::Remote::MiniLoop; |
5add5e29 |
4 | use Object::Remote::Logging qw (:log :dlog router); |
55c0d020 |
5 | use Moo; |
c824fdf3 |
6 | |
c824fdf3 |
7 | has timeout => ( is => 'ro', required => 1 ); |
8 | |
5add5e29 |
9 | BEGIN { router()->exclude_forwarding; } |
10 | |
e42ea8c8 |
11 | sub instance { |
12 | my ($class, @args) = @_; |
55c0d020 |
13 | |
e42ea8c8 |
14 | return our $WATCHDOG ||= do { |
15 | log_trace { "Constructing new instance of global watchdog" }; |
16 | $class->new(@args); |
17 | }; |
c824fdf3 |
18 | }; |
19 | |
20 | #start the watchdog |
21 | sub BUILD { |
22 | my ($self) = @_; |
55c0d020 |
23 | |
f129bfaf |
24 | $SIG{ALRM} = sub { |
25 | #if the Watchdog is killing the process we don't want any chance of the |
26 | #process not actually exiting and die could be caught by an eval which |
55c0d020 |
27 | #doesn't do us any good |
867e4de5 |
28 | log_fatal { "Watchdog has expired, terminating the process" }; |
29 | exit(1); |
55c0d020 |
30 | }; |
31 | |
c824fdf3 |
32 | Dlog_debug { "Initializing watchdog with timeout of $_ seconds" } $self->timeout; |
33 | alarm($self->timeout); |
34 | } |
35 | |
36 | #invoke at least once per timeout to stop |
55c0d020 |
37 | #the watchdog from killing the process |
c824fdf3 |
38 | sub reset { |
c824fdf3 |
39 | die "Attempt to reset the watchdog before it was constructed" |
e42ea8c8 |
40 | unless defined our $WATCHDOG; |
55c0d020 |
41 | |
8d757beb |
42 | log_debug { "Watchdog has been reset" }; |
55c0d020 |
43 | alarm($WATCHDOG->timeout); |
c824fdf3 |
44 | } |
45 | |
46 | #must explicitly call this method to stop the |
47 | #watchdog from killing the process - if the |
48 | #watchdog is lost because it goes out of scope |
49 | #it makes sense to still terminate the process |
50 | sub shutdown { |
51 | my ($self) = @_; |
8d757beb |
52 | log_debug { "Watchdog is shutting down" }; |
55c0d020 |
53 | alarm(0); |
c824fdf3 |
54 | } |
55 | |
56 | 1; |
57 | |
8dbf62a5 |
58 | =head1 NAME |
59 | |
60 | Object::Remote::WatchDog - alarm-based event loop timeout singleton |
61 | |
62 | =head1 DESCRIPTION |
63 | |
64 | This is a singleton class intended to be used in remote nodes to kill the |
65 | process if the event loop seems to have stalled for longer than the timeout |
66 | specified. |
67 | |
68 | =head1 METHODS |
69 | |
70 | The following are all class methods. |
71 | |
72 | =head2 instance |
73 | |
74 | my $d = Object::Remote::WatchDog->instance(timeout => 10); |
75 | |
76 | Creates a new watch dog if there wasn't one yet, with the timeout set to the |
77 | specified value. The timeout argument is required. The timeout is immediately |
78 | started by calling C<alarm> with the timeout specified. The C<ALRM> signal is |
79 | replaced with a handler that, when triggered, quits the process with an error. |
80 | |
81 | If there already was a watchdog it just returns that, however in that case the |
82 | timeout value is ignored. |
83 | |
84 | =head2 reset |
85 | |
86 | Object::Remote::WatchDog->reset; |
87 | |
88 | Calls C<alarm> with the timeout value of the current watch dog singleton to |
89 | reset it. Throws an exception if there is no current singleton. Intended to be |
90 | called repeatedly by the event loop to signal it's still running and not |
91 | stalled. |
92 | |
93 | =head2 shutdown |
94 | |
95 | Object::Remote::WatchDog->shutdown; |
96 | |
97 | Sets C<alarm> back to 0, thus preventing the C<ALRM> handler from quitting the |
98 | process. |
99 | |
100 | =cut |
101 | |
c824fdf3 |
102 | |