Revert Scheduler plugin back to 0.07 release state
[catagits/Catalyst-Plugin-Scheduler.git] / lib / Catalyst / Plugin / Scheduler.pm
CommitLineData
74e31b02 1package Catalyst::Plugin::Scheduler;
2
3use strict;
4use warnings;
bec4be8c 5use base qw/Class::Accessor::Fast Class::Data::Inheritable/;
6use DateTime;
7use DateTime::Event::Cron;
8use DateTime::TimeZone;
9use File::stat;
74e31b02 10use NEXT;
bec4be8c 11use Set::Scalar;
12use Storable qw/lock_store lock_retrieve/;
74e31b02 13
bec4be8c 14our $VERSION = '0.07';
74e31b02 15
bec4be8c 16__PACKAGE__->mk_classdata( '_events' => [] );
17__PACKAGE__->mk_accessors('_event_state');
18
19sub schedule {
20 my ( $class, %args ) = @_;
21
22 unless ( $args{event} ) {
23 Catalyst::Exception->throw(
24 message => 'The schedule method requires an event parameter' );
25 }
26
27 my $conf = $class->config->{scheduler};
28
29 my $event = {
30 trigger => $args{trigger},
31 event => $args{event},
32 auto_run => ( defined $args{auto_run} ) ? $args{auto_run} : 1,
33 };
34
35 if ( $args{at} ) {
36
37 # replace keywords that Set::Crontab doesn't support
38 $args{at} = _prepare_cron( $args{at} );
39
40 # parse the cron entry into a DateTime::Set
41 my $set;
42 eval { $set = DateTime::Event::Cron->from_cron( $args{at} ) };
43 if ($@) {
44 Catalyst::Exception->throw(
45 "Scheduler: Unable to parse 'at' value "
46 . $args{at} . ': '
47 . $@ );
48 }
49 else {
50 $event->{at} = $args{at};
51 $event->{set} = $set;
52 }
53 }
54
55 push @{ $class->_events }, $event;
56}
57
58sub dispatch {
59 my $c = shift;
60
61 $c->NEXT::dispatch(@_);
62
63 $c->_get_event_state();
64
65 $c->_check_yaml();
66
67 # check if a minute has passed since our last check
68 # This check is not run if the user is manually triggering an event
69 if ( time - $c->_event_state->{last_check} < 60 ) {
70 return unless $c->req->params->{schedule_trigger};
71 }
72 my $last_check = $c->_event_state->{last_check};
73 $c->_event_state->{last_check} = time;
74 $c->_save_event_state();
75
76 my $conf = $c->config->{scheduler};
77 my $last_check_dt = DateTime->from_epoch(
78 epoch => $last_check,
79 time_zone => $conf->{time_zone}
80 );
81 my $now = DateTime->now( time_zone => $conf->{time_zone} );
82
83 EVENT:
84 for my $event ( @{ $c->_events } ) {
85 my $next_run;
86
87 if ( $event->{trigger} && $c->req->params->{schedule_trigger}
88 && $event->{trigger} eq $c->req->params->{schedule_trigger} )
89 {
90
91 # manual trigger, run it now
92 next EVENT unless $c->_event_authorized;
93 $next_run = $now;
94 }
95 else {
96 next EVENT unless $event->{set};
97 $next_run = $event->{set}->next($last_check_dt);
98 }
99
100 if ( $next_run <= $now ) {
101
102 # do some security checking for non-auto-run events
103 if ( !$event->{auto_run} ) {
104 next EVENT unless $c->_event_authorized;
105 }
106
107 # make sure we're the only process running this event
108 next EVENT unless $c->_mark_running($event);
109
110 my $event_name = $event->{trigger} || $event->{event};
111 $c->log->debug("Scheduler: Executing $event_name")
112 if $c->config->{scheduler}->{logging};
113
114 # trap errors
115 local $c->{error} = [];
116
117 # return value/output from the event, if any
118 my $output;
119
120 # run event
121 eval {
122
123 # do not allow the event to modify the response
124 local $c->res->{body};
125 local $c->res->{cookies};
126 local $c->res->{headers};
127 local $c->res->{location};
128 local $c->res->{status};
129
130 if ( ref $event->{event} eq 'CODE' ) {
131 $output = $event->{event}->($c);
132 }
133 else {
134 $output = $c->forward( $event->{event} );
135 }
136 };
137 my @errors = @{ $c->{error} };
138 push @errors, $@ if $@;
139 if (@errors) {
140 $c->log->error(
141 'Scheduler: Error executing ' . "$event_name: $_" )
142 for @errors;
143 $output = join '; ', @errors;
144 }
145
146 $c->_mark_finished( $event, $output );
147 }
148 }
149}
150
151sub setup {
152 my $c = shift;
153
154 # initial configuration
155 $c->config->{scheduler}->{logging} ||= ( $c->debug ) ? 1 : 0;
156 $c->config->{scheduler}->{time_zone} ||= $c->_detect_timezone();
157 $c->config->{scheduler}->{state_file} ||= $c->path_to('scheduler.state');
158 $c->config->{scheduler}->{hosts_allow} ||= '127.0.0.1';
159 $c->config->{scheduler}->{yaml_file} ||= $c->path_to('scheduler.yml');
160
161 $c->NEXT::setup(@_);
162}
163
164sub dump_these {
165 my $c = shift;
166
167 return ( $c->NEXT::dump_these(@_) ) unless @{ $c->_events };
168
169 # for debugging, we dump out a list of all events with their next
170 # scheduled run time
171 return (
172 $c->NEXT::dump_these(@_),
173 [ 'Scheduled Events', $c->scheduler_state ],
174 );
175}
176
177sub scheduler_state {
178 my $c = shift;
179
180 $c->_get_event_state();
181
182 my $conf = $c->config->{scheduler};
183 my $now = DateTime->now( time_zone => $conf->{time_zone} );
184
185 my $last_check = $c->_event_state->{last_check};
186 my $last_check_dt = DateTime->from_epoch(
187 epoch => $last_check,
188 time_zone => $conf->{time_zone},
189 );
190
191 my $event_dump = [];
192 for my $event ( @{ $c->_events } ) {
193 my $dump = {};
194 for my $key ( qw/at trigger event auto_run/ ) {
195 $dump->{$key} = $event->{$key} if $event->{$key};
196 }
197
198 # display the next run time
199 if ( $event->{set} ) {
200 my $next_run = $event->{set}->next($last_check_dt);
201 $dump->{next_run}
202 = $next_run->ymd
203 . q{ } . $next_run->hms
204 . q{ } . $next_run->time_zone_short_name;
205 }
206
207 # display the last run time
208 my $last_run
209 = $c->_event_state->{events}->{ $event->{event} }->{last_run};
210 if ( $last_run ) {
211 $last_run = DateTime->from_epoch(
212 epoch => $last_run,
213 time_zone => $conf->{time_zone},
214 );
215 $dump->{last_run}
216 = $last_run->ymd
217 . q{ } . $last_run->hms
218 . q{ } . $last_run->time_zone_short_name;
219 }
220
221 # display the result of the last run
222 my $output
223 = $c->_event_state->{events}->{ $event->{event} }->{last_output};
224 if ( $output ) {
225 $dump->{last_output} = $output;
226 }
227
228 push @{$event_dump}, $dump;
229 }
230
231 return $event_dump;
232}
233
234# check and reload the YAML file with schedule data
235sub _check_yaml {
236 my ($c) = @_;
237
238 # each process needs to load the YAML file independently
239 if ( $c->_event_state->{yaml_mtime}->{$$} ||= 0 ) {
240 return if ( time - $c->_event_state->{last_check} < 60 );
241 }
242
243 return unless -e $c->config->{scheduler}->{yaml_file};
244
245 eval {
246 my $mtime = ( stat $c->config->{scheduler}->{yaml_file} )->mtime;
247 if ( $mtime > $c->_event_state->{yaml_mtime}->{$$} ) {
248 $c->_event_state->{yaml_mtime}->{$$} = $mtime;
249
250 # clean up old PIDs listed in yaml_mtime
251 foreach my $pid ( keys %{ $c->_event_state->{yaml_mtime} } ) {
252 if ( $c->_event_state->{yaml_mtime}->{$pid} < $mtime ) {
253 delete $c->_event_state->{yaml_mtime}->{$pid};
254 }
255 }
256 $c->_save_event_state();
257
258 # wipe out all current events and reload from YAML
259 $c->_events( [] );
260
261 my $file = $c->config->{scheduler}->{yaml_file};
262 my $yaml;
263
264 eval { require YAML::Syck; };
265 if( $@ ) {
266 require YAML;
267 $yaml = YAML::LoadFile( "$file" );
268 }
269 else {
270 open( my $fh, $file ) or die $!;
271 my $content = do { local $/; <$fh> };
272 close $fh;
273 $yaml = YAML::Syck::Load( $content );
274 }
275
276 foreach my $event ( @{$yaml} ) {
277 $c->schedule( %{$event} );
278 }
279
280 $c->log->info( "Scheduler: PID $$ loaded "
281 . scalar @{$yaml}
282 . ' events from YAML file' )
283 if $c->config->{scheduler}->{logging};
284 }
285 };
286 if ($@) {
287 $c->log->error("Scheduler: Error reading YAML file: $@");
288 }
289}
290
291# Detect the current time zone
292sub _detect_timezone {
293 my $c = shift;
294
295 my $tz;
296 eval { $tz = DateTime::TimeZone->new( name => 'local' ) };
297 if ($@) {
298 $c->log->warn(
299 'Scheduler: Unable to autodetect local time zone, using UTC')
300 if $c->config->{scheduler}->{logging};
301 return 'UTC';
302 }
303 else {
304 $c->log->debug(
305 'Scheduler: Using autodetected time zone: ' . $tz->name )
306 if $c->config->{scheduler}->{logging};
307 return $tz->name;
308 }
309}
310
311# Check for authorized users on non-auto events
312sub _event_authorized {
313 my $c = shift;
314
315 # this should never happen, but just in case...
316 return unless $c->req->address;
317
318 my $hosts_allow = $c->config->{scheduler}->{hosts_allow};
319 $hosts_allow = [$hosts_allow] unless ref($hosts_allow) eq 'ARRAY';
320 my $allowed = Set::Scalar->new( @{$hosts_allow} );
321 return $allowed->contains( $c->req->address );
322}
323
324# get the state from the state file
325sub _get_event_state {
326 my $c = shift;
327
328 if ( -e $c->config->{scheduler}->{state_file} ) {
329 $c->_event_state(
330 lock_retrieve $c->config->{scheduler}->{state_file} );
331 }
332 else {
333
334 # initialize the state file
335 $c->_event_state(
336 { last_check => time,
337 events => {},
338 yaml_mtime => {},
339 }
340 );
341 $c->_save_event_state();
342 }
343}
344
345# Check the state file to ensure we are the only process running an event
346sub _mark_running {
347 my ( $c, $event ) = @_;
348
349 $c->_get_event_state();
350
351 return if
352 $c->_event_state->{events}->{ $event->{event} }->{running};
353
354 # this is a 2-step process to prevent race conditions
355 # 1. write the state file with our PID
356 $c->_event_state->{events}->{ $event->{event} }->{running} = $$;
357 $c->_save_event_state();
358
359 # 2. re-read the state file and make sure it's got the same PID
360 $c->_get_event_state();
361 if ( $c->_event_state->{events}->{ $event->{event} }->{running} == $$ ) {
362 return 1;
363 }
364
365 return;
366}
367
368# Mark an event as finished
369sub _mark_finished {
370 my ( $c, $event, $output ) = @_;
371
372 $c->_event_state->{events}->{ $event->{event} }->{running} = 0;
373 $c->_event_state->{events}->{ $event->{event} }->{last_run} = time;
374 $c->_event_state->{events}->{ $event->{event} }->{last_output} = $output;
375 $c->_save_event_state();
376}
377
378# update the state file on disk
379sub _save_event_state {
380 my $c = shift;
381
382 lock_store $c->_event_state, $c->config->{scheduler}->{state_file};
383}
384
385# Set::Crontab does not support day names, or '@' shortcuts
386sub _prepare_cron {
387 my $cron = shift;
388
389 return $cron unless $cron =~ /\w/;
390
391 my %replace = (
392 jan => 1,
393 feb => 2,
394 mar => 3,
395 apr => 4,
396 may => 5,
397 jun => 6,
398 jul => 7,
399 aug => 8,
400 sep => 9,
401 'oct' => 10,
402 nov => 11,
403 dec => 12,
404
405 sun => 0,
406 mon => 1,
407 tue => 2,
408 wed => 3,
409 thu => 4,
410 fri => 5,
411 sat => 6,
412 );
413
414 my %replace_at = (
415 'yearly' => '0 0 1 1 *',
416 'annually' => '0 0 1 1 *',
417 'monthly' => '0 0 1 * *',
418 'weekly' => '0 0 * * 0',
419 'daily' => '0 0 * * *',
420 'midnight' => '0 0 * * *',
421 'hourly' => '0 * * * *',
422 );
423
424 if ( $cron =~ /^\@/ ) {
425 $cron =~ s/^\@//;
426 return $replace_at{ $cron };
427 }
428
429 for my $name ( keys %replace ) {
430 my $value = $replace{$name};
431 $cron =~ s/$name/$value/i;
432 last unless $cron =~ /\w/;
433 }
434 return $cron;
435}
436
4371;
438__END__
74e31b02 439
440=pod
441
442=head1 NAME
443
444Catalyst::Plugin::Scheduler - Schedule events to run in a cron-like fashion
445
446=head1 SYNOPSIS
447
448 use Catalyst qw/Scheduler/;
449
450 # run remove_sessions in the Cron controller every hour
451 __PACKAGE__->schedule(
452 at => '0 * * * *',
453 event => '/cron/remove_sessions'
454 );
455
456 # Run a subroutine at 4:05am every Sunday
457 __PACKAGE__->schedule(
458 at => '5 4 * * sun',
459 event => \&do_stuff,
460 );
461
68f800bd 462 # A long-running scheduled event that must be triggered
463 # manually by an authorized user
464 __PACKAGE__->schedule(
465 trigger => 'rebuild_search_index',
466 event => '/cron/rebuild_search_index',
467 );
468 $ wget -q http://www.myapp.com/?schedule_trigger=rebuild_search_index
f9d8e3cf 469
74e31b02 470=head1 DESCRIPTION
471
472This plugin allows you to schedule events to run at recurring intervals.
473Events will run during the first request which meets or exceeds the specified
474time. Depending on the level of traffic to the application, events may or may
475not run at exactly the correct time, but it should be enough to satisfy many
476basic scheduling needs.
477
478=head1 CONFIGURATION
479
480Configuration is optional and is specified in MyApp->config->{scheduler}.
481
482=head2 logging
483
484Set to 1 to enable logging of events as they are executed. This option is
485enabled by default when running under -Debug mode. Errors are always logged
486regardless of the value of this option.
487
cbf1ecfe 488=head2 time_zone
489
490The time zone of your system. This will be autodetected where possible, or
491will default to UTC (GMT). You can override the detection by providing a
492valid L<DateTime> time zone string, such as 'America/New_York'.
493
74e31b02 494=head2 state_file
495
496The current state of every event is stored in a file. By default this is
f9d8e3cf 497$APP_HOME/scheduler.state. This file is created on the first request if it
498does not already exist.
74e31b02 499
68f800bd 500=head2 yaml_file
501
502The location of the optional YAML event configuration file. By default this
503is $APP_HOME/scheduler.yml.
504
74e31b02 505=head2 hosts_allow
506
507This option specifies IP addresses for trusted users. This option defaults
508to 127.0.0.1. Multiple addresses can be specified by using an array
509reference. This option is used for both events where auto_run is set to 0
510and for manually-triggered events.
511
512 __PACKAGE__->config->{scheduler}->{hosts_allow} = '192.168.1.1';
513 __PACKAGE__->config->{scheduler}->{hosts_allow} = [
514 '127.0.0.1',
515 '192.168.1.1'
516 ];
517
bec4be8c 518=head1 SCHEDULING
74e31b02 519
bec4be8c 520=head2 AUTOMATED EVENTS
74e31b02 521
522Events are scheduled by calling the class method C<schedule>.
523
524 MyApp->schedule(
525 at => '0 * * * *',
526 event => '/cron/remove_sessions',
527 );
528
529 package MyApp::Controller::Cron;
530
531 sub remove_sessions : Private {
532 my ( $self, $c ) = @_;
533
534 $c->delete_expired_sessions;
535 }
536
bec4be8c 537=head3 at
74e31b02 538
539The time to run an event is specified using L<crontab(5)>-style syntax.
540
541 5 0 * * * # 5 minutes after midnight, every day
542 15 14 1 * * # run at 2:15pm on the first of every month
543 0 22 * * 1-5 # run at 10 pm on weekdays
544 5 4 * * sun # run at 4:05am every Sunday
545
546From crontab(5):
547
548 field allowed values
549 ----- --------------
550 minute 0-59
551 hour 0-23
552 day of month 1-31
553 month 0-12 (or names, see below)
554 day of week 0-7 (0 or 7 is Sun, or use names)
555
bec4be8c 556Instead of the first five fields, one of seven special strings may appear:
74e31b02 557
558 string meaning
559 ------ -------
560 @yearly Run once a year, "0 0 1 1 *".
561 @annually (same as @yearly)
562 @monthly Run once a month, "0 0 1 * *".
563 @weekly Run once a week, "0 0 * * 0".
564 @daily Run once a day, "0 0 * * *".
565 @midnight (same as @daily)
566 @hourly Run once an hour, "0 * * * *".
567
bec4be8c 568=head3 event
74e31b02 569
570The event to run at the specified time can be either a Catalyst private
571action path or a coderef. Both types of event methods will receive the $c
572object from the current request, but you must not rely on any request-specific
573information present in $c as it will be from a random user request at or near
574the event's specified run time.
575
576Important: Methods used for events should be marked C<Private> so that
577they can not be executed via the browser.
578
bec4be8c 579=head3 auto_run
74e31b02 580
581The auto_run parameter specifies when the event is allowed to be executed.
582By default this option is set to 1, so the event will be executed during the
583first request that matches the specified time in C<at>.
584
585If set to 0, the event will only run when a request is made by a user from
586an authorized address. The purpose of this option is to allow long-running
587tasks to execute only for certain users.
588
589 MyApp->schedule(
590 at => '0 0 * * *',
591 event => '/cron/rebuild_search_index',
592 auto_run => 0,
593 );
594
595 package MyApp::Controller::Cron;
596
597 sub rebuild_search_index : Private {
598 my ( $self, $c ) = @_;
599
600 # rebuild the search index, this may take a long time
601 }
602
603Now, the search index will only be rebuilt when a request is made from a user
604whose IP address matches the list in the C<hosts_allow> config option. To
605run this event, you probably want to ping the app from a cron job.
606
f9d8e3cf 607 0 0 * * * wget -q http://www.myapp.com/
74e31b02 608
bec4be8c 609=head2 MANUAL EVENTS
74e31b02 610
611To create an event that does not run on a set schedule and must be manually
612triggered, you can specify the C<trigger> option instead of C<at>.
613
614 __PACKAGE__->schedule(
615 trigger => 'send_email',
616 event => '/events/send_email',
617 );
618
619The event may then be triggered by a standard web request from an authorized
620user. The trigger to run is specified by using a special GET parameter,
621'schedule_trigger'; the path requested does not matter.
622
623 http://www.myapp.com/?schedule_trigger=send_email
624
625By default, manual events may only be triggered by requests made from
626localhost (127.0.0.1). To allow other addresses to run events, use the
68f800bd 627configuration option L</hosts_allow>.
628
629=head1 SCHEDULING USING A YAML FILE
630
631As an alternative to using the schedule() method, you may define scheduled
632events in an external YAML file. By default, the plugin looks for the
633existence of a file called C<schedule.yml> in your application's home
634directory. You can change the filename using the configuration option
635L</yaml_file>.
636
bec4be8c 637Modifications to this file will be re-read once per minute during the normal
638event checking process.
68f800bd 639
640Here's an example YAML configuration file with 4 events. Each event is
641denoted with a '-' character, followed by the same parameters used by the
642C<schedule> method. Note that coderef events are not supported by the YAML
643file.
644
645 ---
646 - at: '* * * * *'
647 event: /cron/delete_sessions
648 - event: /cron/send_email
649 trigger: send_email
650 - at: '@hourly'
651 event: /cron/hourly
652 - at: 0 0 * * *
653 auto_run: 0
654 event: /cron/rebuild_search_index
74e31b02 655
656=head1 SECURITY
657
658All events are run inside of an eval container. This protects the user from
659receiving any error messages or page crashes if an event fails to run
660properly. All event errors are logged, even if logging is disabled.
661
74e31b02 662=head1 PLUGIN SUPPORT
663
664Other plugins may register scheduled events if they need to perform periodic
665maintenance. Plugin authors, B<be sure to inform your users> if you do this!
666Events should be registered from a plugin's C<setup> method.
667
668 sub setup {
669 my $c = shift;
670 $c->NEXT::setup(@_);
671
672 if ( $c->can('schedule') ) {
673 $c->schedule(
674 at => '0 * * * *',
675 event => \&cleanup,
676 );
677 }
678 }
bec4be8c 679
f9d8e3cf 680=head1 CAVEATS
681
682The time at which an event will run is determined completely by the requests
683made to the application. Apps with heavy traffic may have events run at very
684close to the correct time, whereas apps with low levels of traffic may see
685events running much later than scheduled. If this is a problem, you can use
686a real cron entry that simply hits your application at the desired time.
687
688 0 * * * * wget -q http://www.myapp.com/
689
690Events which consume a lot of time will slow the request processing for the
691user who triggers the event. For these types of events, you should use
692auto_run => 0 or manual event triggering.
693
694=head1 PERFORMANCE
695
bec4be8c 696The plugin only checks once per minute if any events need to be run, so the
697overhead on each request is minimal. On my test server, the difference
698between running with Scheduler and without was only around 0.02% (0.004
699seconds).
f9d8e3cf 700
68f800bd 701Of course, when a scheduled event runs, performance will depend on what's
702being run in the event.
bec4be8c 703
704=head1 METHODS
705
706=head2 schedule
707
708Schedule is a class method for adding scheduled events. See the
709L<"/SCHEDULING"> section for more information.
710
711=head2 scheduler_state
712
713The current state of all scheduled events is available in an easy-to-use
714format by calling $c->scheduler_state. You can use this data to build an
715admin view into the scheduling engine, for example. This same data is also
716displayed on the Catalyst debug screen.
717
718This method returns an array reference containing a hash reference for each
719event.
720
721 [
722 {
723 'last_run' => '2005-12-29 16:29:33 EST',
724 'auto_run' => 1,
725 'last_output' => 1,
726 'at' => '0 0 * * *',
727 'next_run' => '2005-12-30 00:00:00 EST',
728 'event' => '/cron/session_cleanup'
729 },
730 {
731 'auto_run' => 1,
732 'at' => '0 0 * * *',
733 'next_run' => '2005-12-30 00:00:00 EST',
734 'event' => '/cron/build_rss'
735 },
736 ]
737
738=head1 INTERNAL METHODS
739
740The following methods are extended by this plugin.
741
742=over 4
743
744=item dispatch
745
746The main scheduling logic takes place during the dispatch phase.
747
748=item dump_these
749
750On the Catalyst debug screen, all scheduled events are displayed along with
751the next time they will be executed.
752
753=item setup
754
755=back
74e31b02 756
757=head1 SEE ALSO
758
bec4be8c 759L<crontab(5)>
74e31b02 760
bec4be8c 761=head1 AUTHOR
74e31b02 762
763Andy Grundman, <andy@hybridized.org>
764
765=head1 COPYRIGHT
766
767This program is free software, you can redistribute it and/or modify it
768under the same terms as Perl itself.
769
770=cut