Scheduler 0.03
[catagits/Catalyst-Plugin-Scheduler.git] / lib / Catalyst / Plugin / Scheduler.pm
CommitLineData
74e31b02 1package Catalyst::Plugin::Scheduler;
2
3use strict;
4use warnings;
f9d8e3cf 5use base qw/Class::Accessor::Fast Class::Data::Inheritable/;
cbf1ecfe 6use DateTime;
7use DateTime::Event::Cron;
8use DateTime::TimeZone;
68f800bd 9use File::stat;
74e31b02 10use NEXT;
d2c7c91a 11use Set::Scalar;
f9d8e3cf 12use Storable qw/lock_store lock_retrieve/;
13use YAML;
74e31b02 14
8c698cac 15our $VERSION = '0.03';
74e31b02 16
cbf1ecfe 17__PACKAGE__->mk_classdata( '_events' => [] );
68f800bd 18__PACKAGE__->mk_accessors('_event_state');
cbf1ecfe 19
20sub schedule {
21 my ( $class, %args ) = @_;
68f800bd 22
f9d8e3cf 23 unless ( $args{event} ) {
68f800bd 24 Catalyst::Exception->throw(
25 message => 'The schedule method requires an event parameter' );
f9d8e3cf 26 }
68f800bd 27
f9d8e3cf 28 my $conf = $class->config->{scheduler};
68f800bd 29
cbf1ecfe 30 my $event = {
cbf1ecfe 31 trigger => $args{trigger},
f9d8e3cf 32 event => $args{event},
cbf1ecfe 33 auto_run => ( defined $args{auto_run} ) ? $args{auto_run} : 1,
34 };
68f800bd 35
cbf1ecfe 36 if ( $args{at} ) {
68f800bd 37
cbf1ecfe 38 # replace keywords that Set::Crontab doesn't support
39 $args{at} = _prepare_cron( $args{at} );
68f800bd 40
cbf1ecfe 41 # parse the cron entry into a DateTime::Set
42 my $set;
43 eval { $set = DateTime::Event::Cron->from_cron( $args{at} ) };
68f800bd 44 if ($@) {
45 Catalyst::Exception->throw(
46 "Scheduler: Unable to parse 'at' value "
47 . $args{at} . ': '
48 . $@ );
cbf1ecfe 49 }
50 else {
51 $event->{set} = $set;
52 }
53 }
68f800bd 54
cbf1ecfe 55 push @{ $class->_events }, $event;
56}
57
58sub dispatch {
59 my $c = shift;
68f800bd 60
cbf1ecfe 61 $c->NEXT::dispatch(@_);
68f800bd 62
f9d8e3cf 63 $c->_get_event_state();
68f800bd 64
65 $c->_check_yaml();
66
cbf1ecfe 67 # check if a minute has passed since our last check
f9d8e3cf 68 # This check is not run if the user is manually triggering an event
69 if ( time - $c->_event_state->{last_check} < 60 ) {
70 return unless $c->req->params->{schedule_trigger};
cbf1ecfe 71 }
f9d8e3cf 72 my $last_check = $c->_event_state->{last_check};
73 $c->_event_state->{last_check} = time;
74 $c->_save_event_state();
68f800bd 75
76 my $conf = $c->config->{scheduler};
f9d8e3cf 77 my $last_check_dt = DateTime->from_epoch(
78 epoch => $last_check,
cbf1ecfe 79 time_zone => $conf->{time_zone}
80 );
81 my $now = DateTime->now( time_zone => $conf->{time_zone} );
68f800bd 82
48390e8e 83 EVENT:
cbf1ecfe 84 for my $event ( @{ $c->_events } ) {
f9d8e3cf 85 my $next_run;
68f800bd 86
87 if ( $event->{trigger}
88 && $event->{trigger} eq $c->req->params->{schedule_trigger} )
89 {
90
f9d8e3cf 91 # manual trigger, run it now
92 next EVENT unless $c->_event_authorized;
93 $next_run = $now;
94 }
95 else {
96 next EVENT unless $event->{set};
68f800bd 97 $next_run = $event->{set}->next($last_check_dt);
f9d8e3cf 98 }
68f800bd 99
cbf1ecfe 100 if ( $next_run <= $now ) {
68f800bd 101
cbf1ecfe 102 # do some security checking for non-auto-run events
103 if ( !$event->{auto_run} ) {
104 next EVENT unless $c->_event_authorized;
105 }
68f800bd 106
f9d8e3cf 107 # make sure we're the only process running this event
68f800bd 108 next EVENT unless $c->_mark_running($event);
109
cbf1ecfe 110 my $event_name = $event->{trigger} || $event->{event};
68f800bd 111 $c->log->debug("Scheduler: Executing $event_name")
f9d8e3cf 112 if $c->config->{scheduler}->{logging};
68f800bd 113
cbf1ecfe 114 # trap errors
115 local $c->{error} = [];
68f800bd 116
cbf1ecfe 117 # run event
118 eval {
68f800bd 119
cbf1ecfe 120 # do not allow the event to modify the response
121 local $c->res->{body};
122 local $c->res->{cookies};
123 local $c->res->{headers};
124 local $c->res->{location};
125 local $c->res->{status};
68f800bd 126
cbf1ecfe 127 if ( ref $event->{event} eq 'CODE' ) {
68f800bd 128 $event->{event}->($c);
cbf1ecfe 129 }
130 else {
131 $c->forward( $event->{event} );
132 }
133 };
134 my @errors = @{ $c->{error} };
135 push @errors, $@ if $@;
68f800bd 136 if (@errors) {
137 $c->log->error(
138 'Scheduler: Error executing ' . "$event_name: $_" )
139 for @errors;
cbf1ecfe 140 }
68f800bd 141
142 $c->_mark_finished($event);
cbf1ecfe 143 }
144 }
145}
146
147sub setup {
148 my $c = shift;
68f800bd 149
cbf1ecfe 150 # initial configuration
48390e8e 151 $c->config->{scheduler}->{logging} ||= ( $c->debug ) ? 1 : 0;
68f800bd 152 $c->config->{scheduler}->{time_zone} ||= $c->_detect_timezone();
cbf1ecfe 153 $c->config->{scheduler}->{state_file} ||= $c->path_to('scheduler.state');
154 $c->config->{scheduler}->{hosts_allow} ||= '127.0.0.1';
8c698cac 155 $c->config->{scheduler}->{yaml_file} ||= $c->path_to('scheduler.yml');
68f800bd 156
f9d8e3cf 157 $c->NEXT::setup(@_);
cbf1ecfe 158}
159
68f800bd 160# check and reload the YAML file with schedule data
161sub _check_yaml {
162 my ($c) = @_;
163
164 # each process needs to load the YAML file independently
165 if ( $c->_event_state->{yaml_mtime}->{$$} ||= 0 ) {
166 return if ( time - $c->_event_state->{last_check} < 60 );
167 }
168
8c698cac 169 return unless -e $c->config->{scheduler}->{yaml_file};
68f800bd 170
171 eval {
8c698cac 172 my $mtime = ( stat $c->config->{scheduler}->{yaml_file} )->mtime;
68f800bd 173 if ( $mtime > $c->_event_state->{yaml_mtime}->{$$} ) {
174 $c->_event_state->{yaml_mtime}->{$$} = $mtime;
175 $c->_save_event_state();
176
177 # wipe out all current events and reload from YAML
178 $c->_events( [] );
179
8c698cac 180 my $yaml = YAML::LoadFile( $c->config->{scheduler}->{yaml_file} );
68f800bd 181
182 foreach my $event ( @{$yaml} ) {
183 $c->schedule( %{$event} );
184 }
185
186 $c->log->info( "Scheduler: PID $$ loaded "
187 . scalar @{$yaml}
188 . ' events from YAML file' )
189 if $c->config->{scheduler}->{logging};
190 }
191 };
192 if ($@) {
193 $c->log->error("Error reading YAML file: $@");
194 }
195}
196
cbf1ecfe 197# Detect the current time zone
198sub _detect_timezone {
199 my $c = shift;
68f800bd 200
cbf1ecfe 201 my $tz;
202 eval { $tz = DateTime::TimeZone->new( name => 'local' ) };
203 if ($@) {
68f800bd 204 $c->log->warn(
07305803 205 'Scheduler: Unable to autodetect local time zone, using UTC')
206 if $c->config->{scheduler}->{logging};
cbf1ecfe 207 return 'UTC';
208 }
209 else {
f9d8e3cf 210 $c->log->debug(
68f800bd 211 'Scheduler: Using autodetected time zone: ' . $tz->name )
212 if $c->config->{scheduler}->{logging};
cbf1ecfe 213 return $tz->name;
214 }
215}
216
217# Check for authorized users on non-auto events
cbf1ecfe 218sub _event_authorized {
219 my $c = shift;
68f800bd 220
f9d8e3cf 221 # this should never happen, but just in case...
68f800bd 222 return unless $c->req->address;
223
f9d8e3cf 224 my $hosts_allow = $c->config->{scheduler}->{hosts_allow};
68f800bd 225 $hosts_allow = [$hosts_allow] unless ref($hosts_allow) eq 'ARRAY';
d2c7c91a 226 my $allowed = Set::Scalar->new( @{$hosts_allow} );
227 return $allowed->contains( $c->req->address );
f9d8e3cf 228}
229
230# get the state from the state file
231sub _get_event_state {
232 my $c = shift;
68f800bd 233
f9d8e3cf 234 if ( -e $c->config->{scheduler}->{state_file} ) {
68f800bd 235 $c->_event_state(
236 lock_retrieve $c->config->{scheduler}->{state_file} );
f9d8e3cf 237 }
238 else {
68f800bd 239
f9d8e3cf 240 # initialize the state file
68f800bd 241 $c->_event_state(
242 { last_check => time,
243 yaml_mtime => {},
244 }
245 );
f9d8e3cf 246 $c->_save_event_state();
247 }
248}
249
250# Check the state file to ensure we are the only process running an event
251sub _mark_running {
252 my ( $c, $event ) = @_;
68f800bd 253
f9d8e3cf 254 $c->_get_event_state();
68f800bd 255
f9d8e3cf 256 return if $c->_event_state->{ $event->{event} };
68f800bd 257
f9d8e3cf 258 # this is a 2-step process to prevent race conditions
259 # 1. write the state file with our PID
260 $c->_event_state->{ $event->{event} } = $$;
261 $c->_save_event_state();
68f800bd 262
f9d8e3cf 263 # 2. re-read the state file and make sure it's got the same PID
264 $c->_get_event_state();
265 if ( $c->_event_state->{ $event->{event} } == $$ ) {
266 return 1;
267 }
68f800bd 268
f9d8e3cf 269 return;
270}
271
272# Mark an event as finished
273sub _mark_finished {
274 my ( $c, $event ) = @_;
68f800bd 275
f9d8e3cf 276 $c->_event_state->{ $event->{event} } = 0;
277 $c->_save_event_state();
cbf1ecfe 278}
279
f9d8e3cf 280# update the state file on disk
281sub _save_event_state {
282 my $c = shift;
68f800bd 283
f9d8e3cf 284 lock_store $c->_event_state, $c->config->{scheduler}->{state_file};
cbf1ecfe 285}
286
287# Set::Crontab does not support day names, or '@' shortcuts
288sub _prepare_cron {
289 my $cron = shift;
68f800bd 290
cbf1ecfe 291 return $cron unless $cron =~ /\w/;
68f800bd 292
cbf1ecfe 293 my %replace = (
294 jan => 1,
295 feb => 2,
296 mar => 3,
297 apr => 4,
298 may => 5,
299 jun => 6,
300 jul => 7,
301 aug => 8,
302 sep => 9,
303 'oct' => 10,
304 nov => 11,
305 dec => 12,
68f800bd 306
cbf1ecfe 307 sun => 0,
308 mon => 1,
309 tue => 2,
310 wed => 3,
311 thu => 4,
312 fri => 5,
313 sat => 6,
68f800bd 314
cbf1ecfe 315 'yearly' => '0 0 1 1 *',
316 'annually' => '0 0 1 1 *',
317 'monthly' => '0 0 1 * *',
318 'weekly' => '0 0 * * 0',
319 'daily' => '0 0 * * *',
320 'midnight' => '0 0 * * *',
321 'hourly' => '0 * * * *',
322 );
68f800bd 323
cbf1ecfe 324 for my $name ( keys %replace ) {
325 my $value = $replace{$name};
68f800bd 326
cbf1ecfe 327 if ( $cron =~ /^\@$name/ ) {
328 $cron = $value;
329 last;
330 }
331 else {
332 $cron =~ s/$name/$value/i;
333 last unless $cron =~ /\w/;
334 }
335 }
336
337 return $cron;
338}
339
74e31b02 3401;
341__END__
342
343=pod
344
345=head1 NAME
346
347Catalyst::Plugin::Scheduler - Schedule events to run in a cron-like fashion
348
349=head1 SYNOPSIS
350
351 use Catalyst qw/Scheduler/;
352
353 # run remove_sessions in the Cron controller every hour
354 __PACKAGE__->schedule(
355 at => '0 * * * *',
356 event => '/cron/remove_sessions'
357 );
358
359 # Run a subroutine at 4:05am every Sunday
360 __PACKAGE__->schedule(
361 at => '5 4 * * sun',
362 event => \&do_stuff,
363 );
364
68f800bd 365 # A long-running scheduled event that must be triggered
366 # manually by an authorized user
367 __PACKAGE__->schedule(
368 trigger => 'rebuild_search_index',
369 event => '/cron/rebuild_search_index',
370 );
371 $ wget -q http://www.myapp.com/?schedule_trigger=rebuild_search_index
f9d8e3cf 372
74e31b02 373=head1 DESCRIPTION
374
375This plugin allows you to schedule events to run at recurring intervals.
376Events will run during the first request which meets or exceeds the specified
377time. Depending on the level of traffic to the application, events may or may
378not run at exactly the correct time, but it should be enough to satisfy many
379basic scheduling needs.
380
381=head1 CONFIGURATION
382
383Configuration is optional and is specified in MyApp->config->{scheduler}.
384
385=head2 logging
386
387Set to 1 to enable logging of events as they are executed. This option is
388enabled by default when running under -Debug mode. Errors are always logged
389regardless of the value of this option.
390
cbf1ecfe 391=head2 time_zone
392
393The time zone of your system. This will be autodetected where possible, or
394will default to UTC (GMT). You can override the detection by providing a
395valid L<DateTime> time zone string, such as 'America/New_York'.
396
74e31b02 397=head2 state_file
398
399The current state of every event is stored in a file. By default this is
f9d8e3cf 400$APP_HOME/scheduler.state. This file is created on the first request if it
401does not already exist.
74e31b02 402
68f800bd 403=head2 yaml_file
404
405The location of the optional YAML event configuration file. By default this
406is $APP_HOME/scheduler.yml.
407
74e31b02 408=head2 hosts_allow
409
410This option specifies IP addresses for trusted users. This option defaults
411to 127.0.0.1. Multiple addresses can be specified by using an array
412reference. This option is used for both events where auto_run is set to 0
413and for manually-triggered events.
414
415 __PACKAGE__->config->{scheduler}->{hosts_allow} = '192.168.1.1';
416 __PACKAGE__->config->{scheduler}->{hosts_allow} = [
417 '127.0.0.1',
418 '192.168.1.1'
419 ];
420
421=head1 SCHEDULING
422
423=head2 AUTOMATED EVENTS
424
425Events are scheduled by calling the class method C<schedule>.
426
427 MyApp->schedule(
428 at => '0 * * * *',
429 event => '/cron/remove_sessions',
430 );
431
432 package MyApp::Controller::Cron;
433
434 sub remove_sessions : Private {
435 my ( $self, $c ) = @_;
436
437 $c->delete_expired_sessions;
438 }
439
440=head3 at
441
442The time to run an event is specified using L<crontab(5)>-style syntax.
443
444 5 0 * * * # 5 minutes after midnight, every day
445 15 14 1 * * # run at 2:15pm on the first of every month
446 0 22 * * 1-5 # run at 10 pm on weekdays
447 5 4 * * sun # run at 4:05am every Sunday
448
449From crontab(5):
450
451 field allowed values
452 ----- --------------
453 minute 0-59
454 hour 0-23
455 day of month 1-31
456 month 0-12 (or names, see below)
457 day of week 0-7 (0 or 7 is Sun, or use names)
458
459Instead of the first five fields, one of seven special strings may appear:
460
461 string meaning
462 ------ -------
463 @yearly Run once a year, "0 0 1 1 *".
464 @annually (same as @yearly)
465 @monthly Run once a month, "0 0 1 * *".
466 @weekly Run once a week, "0 0 * * 0".
467 @daily Run once a day, "0 0 * * *".
468 @midnight (same as @daily)
469 @hourly Run once an hour, "0 * * * *".
470
471=head3 event
472
473The event to run at the specified time can be either a Catalyst private
474action path or a coderef. Both types of event methods will receive the $c
475object from the current request, but you must not rely on any request-specific
476information present in $c as it will be from a random user request at or near
477the event's specified run time.
478
479Important: Methods used for events should be marked C<Private> so that
480they can not be executed via the browser.
481
482=head3 auto_run
483
484The auto_run parameter specifies when the event is allowed to be executed.
485By default this option is set to 1, so the event will be executed during the
486first request that matches the specified time in C<at>.
487
488If set to 0, the event will only run when a request is made by a user from
489an authorized address. The purpose of this option is to allow long-running
490tasks to execute only for certain users.
491
492 MyApp->schedule(
493 at => '0 0 * * *',
494 event => '/cron/rebuild_search_index',
495 auto_run => 0,
496 );
497
498 package MyApp::Controller::Cron;
499
500 sub rebuild_search_index : Private {
501 my ( $self, $c ) = @_;
502
503 # rebuild the search index, this may take a long time
504 }
505
506Now, the search index will only be rebuilt when a request is made from a user
507whose IP address matches the list in the C<hosts_allow> config option. To
508run this event, you probably want to ping the app from a cron job.
509
f9d8e3cf 510 0 0 * * * wget -q http://www.myapp.com/
74e31b02 511
512=head2 MANUAL EVENTS
513
514To create an event that does not run on a set schedule and must be manually
515triggered, you can specify the C<trigger> option instead of C<at>.
516
517 __PACKAGE__->schedule(
518 trigger => 'send_email',
519 event => '/events/send_email',
520 );
521
522The event may then be triggered by a standard web request from an authorized
523user. The trigger to run is specified by using a special GET parameter,
524'schedule_trigger'; the path requested does not matter.
525
526 http://www.myapp.com/?schedule_trigger=send_email
527
528By default, manual events may only be triggered by requests made from
529localhost (127.0.0.1). To allow other addresses to run events, use the
68f800bd 530configuration option L</hosts_allow>.
531
532=head1 SCHEDULING USING A YAML FILE
533
534As an alternative to using the schedule() method, you may define scheduled
535events in an external YAML file. By default, the plugin looks for the
536existence of a file called C<schedule.yml> in your application's home
537directory. You can change the filename using the configuration option
538L</yaml_file>.
539
540Modifications to this file will be re-read once per minute during the normal
541event checking process.
542
543Here's an example YAML configuration file with 4 events. Each event is
544denoted with a '-' character, followed by the same parameters used by the
545C<schedule> method. Note that coderef events are not supported by the YAML
546file.
547
548 ---
549 - at: '* * * * *'
550 event: /cron/delete_sessions
551 - event: /cron/send_email
552 trigger: send_email
553 - at: '@hourly'
554 event: /cron/hourly
555 - at: 0 0 * * *
556 auto_run: 0
557 event: /cron/rebuild_search_index
74e31b02 558
559=head1 SECURITY
560
561All events are run inside of an eval container. This protects the user from
562receiving any error messages or page crashes if an event fails to run
563properly. All event errors are logged, even if logging is disabled.
564
74e31b02 565=head1 PLUGIN SUPPORT
566
567Other plugins may register scheduled events if they need to perform periodic
568maintenance. Plugin authors, B<be sure to inform your users> if you do this!
569Events should be registered from a plugin's C<setup> method.
570
571 sub setup {
572 my $c = shift;
573 $c->NEXT::setup(@_);
574
575 if ( $c->can('schedule') ) {
576 $c->schedule(
577 at => '0 * * * *',
578 event => \&cleanup,
579 );
580 }
581 }
f9d8e3cf 582
583=head1 CAVEATS
584
585The time at which an event will run is determined completely by the requests
586made to the application. Apps with heavy traffic may have events run at very
587close to the correct time, whereas apps with low levels of traffic may see
588events running much later than scheduled. If this is a problem, you can use
589a real cron entry that simply hits your application at the desired time.
590
591 0 * * * * wget -q http://www.myapp.com/
592
593Events which consume a lot of time will slow the request processing for the
594user who triggers the event. For these types of events, you should use
595auto_run => 0 or manual event triggering.
596
597=head1 PERFORMANCE
598
599The plugin only checks once per minute if any events need to be run, so the
600overhead on each request is minimal. On my test server, the difference
601between running with Scheduler and without was only around 0.02% (0.004
602seconds).
603
68f800bd 604Of course, when a scheduled event runs, performance will depend on what's
605being run in the event.
07305803 606
607=head1 METHODS
608
609=head2 schedule
610
611Schedule is a class method for adding scheduled events. See the
8c698cac 612L<"/SCHEDULING"> section for more information.
07305803 613
614=head1 INTERNAL METHODS
615
616The following methods are extended by this plugin.
617
618=over 4
619
620=item dispatch
621
622The main scheduling logic takes place during the dispatch phase.
623
624=item setup
625
626=back
74e31b02 627
628=head1 SEE ALSO
629
630L<crontab(5)>
631
632=head1 AUTHOR
633
634Andy Grundman, <andy@hybridized.org>
635
636=head1 COPYRIGHT
637
638This program is free software, you can redistribute it and/or modify it
639under the same terms as Perl itself.
640
641=cut