Scheduler, switch to Set::Scalar
[catagits/Catalyst-Plugin-Scheduler.git] / lib / Catalyst / Plugin / Scheduler.pm
CommitLineData
74e31b02 1package Catalyst::Plugin::Scheduler;
2
3use strict;
4use warnings;
f9d8e3cf 5use base qw/Class::Accessor::Fast Class::Data::Inheritable/;
cbf1ecfe 6use DateTime;
7use DateTime::Event::Cron;
8use DateTime::TimeZone;
68f800bd 9use File::stat;
74e31b02 10use NEXT;
d2c7c91a 11use Set::Scalar;
f9d8e3cf 12use Storable qw/lock_store lock_retrieve/;
13use YAML;
74e31b02 14
15our $VERSION = '0.01';
16
cbf1ecfe 17__PACKAGE__->mk_classdata( '_events' => [] );
68f800bd 18__PACKAGE__->mk_accessors('_event_state');
cbf1ecfe 19
20sub schedule {
21 my ( $class, %args ) = @_;
68f800bd 22
f9d8e3cf 23 unless ( $args{event} ) {
68f800bd 24 Catalyst::Exception->throw(
25 message => 'The schedule method requires an event parameter' );
f9d8e3cf 26 }
68f800bd 27
f9d8e3cf 28 my $conf = $class->config->{scheduler};
68f800bd 29
cbf1ecfe 30 my $event = {
cbf1ecfe 31 trigger => $args{trigger},
f9d8e3cf 32 event => $args{event},
cbf1ecfe 33 auto_run => ( defined $args{auto_run} ) ? $args{auto_run} : 1,
34 };
68f800bd 35
cbf1ecfe 36 if ( $args{at} ) {
68f800bd 37
cbf1ecfe 38 # replace keywords that Set::Crontab doesn't support
39 $args{at} = _prepare_cron( $args{at} );
68f800bd 40
cbf1ecfe 41 # parse the cron entry into a DateTime::Set
42 my $set;
43 eval { $set = DateTime::Event::Cron->from_cron( $args{at} ) };
68f800bd 44 if ($@) {
45 Catalyst::Exception->throw(
46 "Scheduler: Unable to parse 'at' value "
47 . $args{at} . ': '
48 . $@ );
cbf1ecfe 49 }
50 else {
51 $event->{set} = $set;
52 }
53 }
68f800bd 54
cbf1ecfe 55 push @{ $class->_events }, $event;
56}
57
58sub dispatch {
59 my $c = shift;
68f800bd 60
cbf1ecfe 61 $c->NEXT::dispatch(@_);
68f800bd 62
f9d8e3cf 63 $c->_get_event_state();
68f800bd 64
65 $c->_check_yaml();
66
cbf1ecfe 67 # check if a minute has passed since our last check
f9d8e3cf 68 # This check is not run if the user is manually triggering an event
69 if ( time - $c->_event_state->{last_check} < 60 ) {
70 return unless $c->req->params->{schedule_trigger};
cbf1ecfe 71 }
f9d8e3cf 72 my $last_check = $c->_event_state->{last_check};
73 $c->_event_state->{last_check} = time;
74 $c->_save_event_state();
68f800bd 75
76 my $conf = $c->config->{scheduler};
f9d8e3cf 77 my $last_check_dt = DateTime->from_epoch(
78 epoch => $last_check,
cbf1ecfe 79 time_zone => $conf->{time_zone}
80 );
81 my $now = DateTime->now( time_zone => $conf->{time_zone} );
68f800bd 82
48390e8e 83 EVENT:
cbf1ecfe 84 for my $event ( @{ $c->_events } ) {
f9d8e3cf 85 my $next_run;
68f800bd 86
87 if ( $event->{trigger}
88 && $event->{trigger} eq $c->req->params->{schedule_trigger} )
89 {
90
f9d8e3cf 91 # manual trigger, run it now
92 next EVENT unless $c->_event_authorized;
93 $next_run = $now;
94 }
95 else {
96 next EVENT unless $event->{set};
68f800bd 97 $next_run = $event->{set}->next($last_check_dt);
f9d8e3cf 98 }
68f800bd 99
cbf1ecfe 100 if ( $next_run <= $now ) {
68f800bd 101
cbf1ecfe 102 # do some security checking for non-auto-run events
103 if ( !$event->{auto_run} ) {
104 next EVENT unless $c->_event_authorized;
105 }
68f800bd 106
f9d8e3cf 107 # make sure we're the only process running this event
68f800bd 108 next EVENT unless $c->_mark_running($event);
109
cbf1ecfe 110 my $event_name = $event->{trigger} || $event->{event};
68f800bd 111 $c->log->debug("Scheduler: Executing $event_name")
f9d8e3cf 112 if $c->config->{scheduler}->{logging};
68f800bd 113
cbf1ecfe 114 # trap errors
115 local $c->{error} = [];
68f800bd 116
cbf1ecfe 117 # run event
118 eval {
68f800bd 119
cbf1ecfe 120 # do not allow the event to modify the response
121 local $c->res->{body};
122 local $c->res->{cookies};
123 local $c->res->{headers};
124 local $c->res->{location};
125 local $c->res->{status};
68f800bd 126
cbf1ecfe 127 if ( ref $event->{event} eq 'CODE' ) {
68f800bd 128 $event->{event}->($c);
cbf1ecfe 129 }
130 else {
131 $c->forward( $event->{event} );
132 }
133 };
134 my @errors = @{ $c->{error} };
135 push @errors, $@ if $@;
68f800bd 136 if (@errors) {
137 $c->log->error(
138 'Scheduler: Error executing ' . "$event_name: $_" )
139 for @errors;
cbf1ecfe 140 }
68f800bd 141
142 $c->_mark_finished($event);
cbf1ecfe 143 }
144 }
145}
146
147sub setup {
148 my $c = shift;
68f800bd 149
cbf1ecfe 150 # initial configuration
48390e8e 151 $c->config->{scheduler}->{logging} ||= ( $c->debug ) ? 1 : 0;
68f800bd 152 $c->config->{scheduler}->{time_zone} ||= $c->_detect_timezone();
cbf1ecfe 153 $c->config->{scheduler}->{state_file} ||= $c->path_to('scheduler.state');
154 $c->config->{scheduler}->{hosts_allow} ||= '127.0.0.1';
68f800bd 155 $c->config->{scheduler}->{yaml} ||= $c->path_to('scheduler.yml');
156
f9d8e3cf 157 $c->NEXT::setup(@_);
cbf1ecfe 158}
159
68f800bd 160# check and reload the YAML file with schedule data
161sub _check_yaml {
162 my ($c) = @_;
163
164 # each process needs to load the YAML file independently
165 if ( $c->_event_state->{yaml_mtime}->{$$} ||= 0 ) {
166 return if ( time - $c->_event_state->{last_check} < 60 );
167 }
168
169 return unless -e $c->config->{scheduler}->{yaml};
170
171 eval {
172 my $mtime = ( stat $c->config->{scheduler}->{yaml} )->mtime;
173 if ( $mtime > $c->_event_state->{yaml_mtime}->{$$} ) {
174 $c->_event_state->{yaml_mtime}->{$$} = $mtime;
175 $c->_save_event_state();
176
177 # wipe out all current events and reload from YAML
178 $c->_events( [] );
179
180 my $yaml = YAML::LoadFile( $c->config->{scheduler}->{yaml} );
181
182 foreach my $event ( @{$yaml} ) {
183 $c->schedule( %{$event} );
184 }
185
186 $c->log->info( "Scheduler: PID $$ loaded "
187 . scalar @{$yaml}
188 . ' events from YAML file' )
189 if $c->config->{scheduler}->{logging};
190 }
191 };
192 if ($@) {
193 $c->log->error("Error reading YAML file: $@");
194 }
195}
196
cbf1ecfe 197# Detect the current time zone
198sub _detect_timezone {
199 my $c = shift;
68f800bd 200
cbf1ecfe 201 my $tz;
202 eval { $tz = DateTime::TimeZone->new( name => 'local' ) };
203 if ($@) {
68f800bd 204 $c->log->warn(
205 'Scheduler: Unable to autodetect local time zone, using UTC');
cbf1ecfe 206 return 'UTC';
207 }
208 else {
f9d8e3cf 209 $c->log->debug(
68f800bd 210 'Scheduler: Using autodetected time zone: ' . $tz->name )
211 if $c->config->{scheduler}->{logging};
cbf1ecfe 212 return $tz->name;
213 }
214}
215
216# Check for authorized users on non-auto events
cbf1ecfe 217sub _event_authorized {
218 my $c = shift;
68f800bd 219
f9d8e3cf 220 # this should never happen, but just in case...
68f800bd 221 return unless $c->req->address;
222
f9d8e3cf 223 my $hosts_allow = $c->config->{scheduler}->{hosts_allow};
68f800bd 224 $hosts_allow = [$hosts_allow] unless ref($hosts_allow) eq 'ARRAY';
d2c7c91a 225 my $allowed = Set::Scalar->new( @{$hosts_allow} );
226 return $allowed->contains( $c->req->address );
f9d8e3cf 227}
228
229# get the state from the state file
230sub _get_event_state {
231 my $c = shift;
68f800bd 232
f9d8e3cf 233 if ( -e $c->config->{scheduler}->{state_file} ) {
68f800bd 234 $c->_event_state(
235 lock_retrieve $c->config->{scheduler}->{state_file} );
f9d8e3cf 236 }
237 else {
68f800bd 238
f9d8e3cf 239 # initialize the state file
68f800bd 240 $c->_event_state(
241 { last_check => time,
242 yaml_mtime => {},
243 }
244 );
f9d8e3cf 245 $c->_save_event_state();
246 }
247}
248
249# Check the state file to ensure we are the only process running an event
250sub _mark_running {
251 my ( $c, $event ) = @_;
68f800bd 252
f9d8e3cf 253 $c->_get_event_state();
68f800bd 254
f9d8e3cf 255 return if $c->_event_state->{ $event->{event} };
68f800bd 256
f9d8e3cf 257 # this is a 2-step process to prevent race conditions
258 # 1. write the state file with our PID
259 $c->_event_state->{ $event->{event} } = $$;
260 $c->_save_event_state();
68f800bd 261
f9d8e3cf 262 # 2. re-read the state file and make sure it's got the same PID
263 $c->_get_event_state();
264 if ( $c->_event_state->{ $event->{event} } == $$ ) {
265 return 1;
266 }
68f800bd 267
f9d8e3cf 268 return;
269}
270
271# Mark an event as finished
272sub _mark_finished {
273 my ( $c, $event ) = @_;
68f800bd 274
f9d8e3cf 275 $c->_event_state->{ $event->{event} } = 0;
276 $c->_save_event_state();
cbf1ecfe 277}
278
f9d8e3cf 279# update the state file on disk
280sub _save_event_state {
281 my $c = shift;
68f800bd 282
f9d8e3cf 283 lock_store $c->_event_state, $c->config->{scheduler}->{state_file};
cbf1ecfe 284}
285
286# Set::Crontab does not support day names, or '@' shortcuts
287sub _prepare_cron {
288 my $cron = shift;
68f800bd 289
cbf1ecfe 290 return $cron unless $cron =~ /\w/;
68f800bd 291
cbf1ecfe 292 my %replace = (
293 jan => 1,
294 feb => 2,
295 mar => 3,
296 apr => 4,
297 may => 5,
298 jun => 6,
299 jul => 7,
300 aug => 8,
301 sep => 9,
302 'oct' => 10,
303 nov => 11,
304 dec => 12,
68f800bd 305
cbf1ecfe 306 sun => 0,
307 mon => 1,
308 tue => 2,
309 wed => 3,
310 thu => 4,
311 fri => 5,
312 sat => 6,
68f800bd 313
cbf1ecfe 314 'yearly' => '0 0 1 1 *',
315 'annually' => '0 0 1 1 *',
316 'monthly' => '0 0 1 * *',
317 'weekly' => '0 0 * * 0',
318 'daily' => '0 0 * * *',
319 'midnight' => '0 0 * * *',
320 'hourly' => '0 * * * *',
321 );
68f800bd 322
cbf1ecfe 323 for my $name ( keys %replace ) {
324 my $value = $replace{$name};
68f800bd 325
cbf1ecfe 326 if ( $cron =~ /^\@$name/ ) {
327 $cron = $value;
328 last;
329 }
330 else {
331 $cron =~ s/$name/$value/i;
332 last unless $cron =~ /\w/;
333 }
334 }
335
336 return $cron;
337}
338
74e31b02 3391;
340__END__
341
342=pod
343
344=head1 NAME
345
346Catalyst::Plugin::Scheduler - Schedule events to run in a cron-like fashion
347
348=head1 SYNOPSIS
349
350 use Catalyst qw/Scheduler/;
351
352 # run remove_sessions in the Cron controller every hour
353 __PACKAGE__->schedule(
354 at => '0 * * * *',
355 event => '/cron/remove_sessions'
356 );
357
358 # Run a subroutine at 4:05am every Sunday
359 __PACKAGE__->schedule(
360 at => '5 4 * * sun',
361 event => \&do_stuff,
362 );
363
68f800bd 364 # A long-running scheduled event that must be triggered
365 # manually by an authorized user
366 __PACKAGE__->schedule(
367 trigger => 'rebuild_search_index',
368 event => '/cron/rebuild_search_index',
369 );
370 $ wget -q http://www.myapp.com/?schedule_trigger=rebuild_search_index
f9d8e3cf 371
74e31b02 372=head1 DESCRIPTION
373
374This plugin allows you to schedule events to run at recurring intervals.
375Events will run during the first request which meets or exceeds the specified
376time. Depending on the level of traffic to the application, events may or may
377not run at exactly the correct time, but it should be enough to satisfy many
378basic scheduling needs.
379
380=head1 CONFIGURATION
381
382Configuration is optional and is specified in MyApp->config->{scheduler}.
383
384=head2 logging
385
386Set to 1 to enable logging of events as they are executed. This option is
387enabled by default when running under -Debug mode. Errors are always logged
388regardless of the value of this option.
389
cbf1ecfe 390=head2 time_zone
391
392The time zone of your system. This will be autodetected where possible, or
393will default to UTC (GMT). You can override the detection by providing a
394valid L<DateTime> time zone string, such as 'America/New_York'.
395
74e31b02 396=head2 state_file
397
398The current state of every event is stored in a file. By default this is
f9d8e3cf 399$APP_HOME/scheduler.state. This file is created on the first request if it
400does not already exist.
74e31b02 401
68f800bd 402=head2 yaml_file
403
404The location of the optional YAML event configuration file. By default this
405is $APP_HOME/scheduler.yml.
406
74e31b02 407=head2 hosts_allow
408
409This option specifies IP addresses for trusted users. This option defaults
410to 127.0.0.1. Multiple addresses can be specified by using an array
411reference. This option is used for both events where auto_run is set to 0
412and for manually-triggered events.
413
414 __PACKAGE__->config->{scheduler}->{hosts_allow} = '192.168.1.1';
415 __PACKAGE__->config->{scheduler}->{hosts_allow} = [
416 '127.0.0.1',
417 '192.168.1.1'
418 ];
419
420=head1 SCHEDULING
421
422=head2 AUTOMATED EVENTS
423
424Events are scheduled by calling the class method C<schedule>.
425
426 MyApp->schedule(
427 at => '0 * * * *',
428 event => '/cron/remove_sessions',
429 );
430
431 package MyApp::Controller::Cron;
432
433 sub remove_sessions : Private {
434 my ( $self, $c ) = @_;
435
436 $c->delete_expired_sessions;
437 }
438
439=head3 at
440
441The time to run an event is specified using L<crontab(5)>-style syntax.
442
443 5 0 * * * # 5 minutes after midnight, every day
444 15 14 1 * * # run at 2:15pm on the first of every month
445 0 22 * * 1-5 # run at 10 pm on weekdays
446 5 4 * * sun # run at 4:05am every Sunday
447
448From crontab(5):
449
450 field allowed values
451 ----- --------------
452 minute 0-59
453 hour 0-23
454 day of month 1-31
455 month 0-12 (or names, see below)
456 day of week 0-7 (0 or 7 is Sun, or use names)
457
458Instead of the first five fields, one of seven special strings may appear:
459
460 string meaning
461 ------ -------
462 @yearly Run once a year, "0 0 1 1 *".
463 @annually (same as @yearly)
464 @monthly Run once a month, "0 0 1 * *".
465 @weekly Run once a week, "0 0 * * 0".
466 @daily Run once a day, "0 0 * * *".
467 @midnight (same as @daily)
468 @hourly Run once an hour, "0 * * * *".
469
470=head3 event
471
472The event to run at the specified time can be either a Catalyst private
473action path or a coderef. Both types of event methods will receive the $c
474object from the current request, but you must not rely on any request-specific
475information present in $c as it will be from a random user request at or near
476the event's specified run time.
477
478Important: Methods used for events should be marked C<Private> so that
479they can not be executed via the browser.
480
481=head3 auto_run
482
483The auto_run parameter specifies when the event is allowed to be executed.
484By default this option is set to 1, so the event will be executed during the
485first request that matches the specified time in C<at>.
486
487If set to 0, the event will only run when a request is made by a user from
488an authorized address. The purpose of this option is to allow long-running
489tasks to execute only for certain users.
490
491 MyApp->schedule(
492 at => '0 0 * * *',
493 event => '/cron/rebuild_search_index',
494 auto_run => 0,
495 );
496
497 package MyApp::Controller::Cron;
498
499 sub rebuild_search_index : Private {
500 my ( $self, $c ) = @_;
501
502 # rebuild the search index, this may take a long time
503 }
504
505Now, the search index will only be rebuilt when a request is made from a user
506whose IP address matches the list in the C<hosts_allow> config option. To
507run this event, you probably want to ping the app from a cron job.
508
f9d8e3cf 509 0 0 * * * wget -q http://www.myapp.com/
74e31b02 510
511=head2 MANUAL EVENTS
512
513To create an event that does not run on a set schedule and must be manually
514triggered, you can specify the C<trigger> option instead of C<at>.
515
516 __PACKAGE__->schedule(
517 trigger => 'send_email',
518 event => '/events/send_email',
519 );
520
521The event may then be triggered by a standard web request from an authorized
522user. The trigger to run is specified by using a special GET parameter,
523'schedule_trigger'; the path requested does not matter.
524
525 http://www.myapp.com/?schedule_trigger=send_email
526
527By default, manual events may only be triggered by requests made from
528localhost (127.0.0.1). To allow other addresses to run events, use the
68f800bd 529configuration option L</hosts_allow>.
530
531=head1 SCHEDULING USING A YAML FILE
532
533As an alternative to using the schedule() method, you may define scheduled
534events in an external YAML file. By default, the plugin looks for the
535existence of a file called C<schedule.yml> in your application's home
536directory. You can change the filename using the configuration option
537L</yaml_file>.
538
539Modifications to this file will be re-read once per minute during the normal
540event checking process.
541
542Here's an example YAML configuration file with 4 events. Each event is
543denoted with a '-' character, followed by the same parameters used by the
544C<schedule> method. Note that coderef events are not supported by the YAML
545file.
546
547 ---
548 - at: '* * * * *'
549 event: /cron/delete_sessions
550 - event: /cron/send_email
551 trigger: send_email
552 - at: '@hourly'
553 event: /cron/hourly
554 - at: 0 0 * * *
555 auto_run: 0
556 event: /cron/rebuild_search_index
74e31b02 557
558=head1 SECURITY
559
560All events are run inside of an eval container. This protects the user from
561receiving any error messages or page crashes if an event fails to run
562properly. All event errors are logged, even if logging is disabled.
563
74e31b02 564=head1 PLUGIN SUPPORT
565
566Other plugins may register scheduled events if they need to perform periodic
567maintenance. Plugin authors, B<be sure to inform your users> if you do this!
568Events should be registered from a plugin's C<setup> method.
569
570 sub setup {
571 my $c = shift;
572 $c->NEXT::setup(@_);
573
574 if ( $c->can('schedule') ) {
575 $c->schedule(
576 at => '0 * * * *',
577 event => \&cleanup,
578 );
579 }
580 }
f9d8e3cf 581
582=head1 CAVEATS
583
584The time at which an event will run is determined completely by the requests
585made to the application. Apps with heavy traffic may have events run at very
586close to the correct time, whereas apps with low levels of traffic may see
587events running much later than scheduled. If this is a problem, you can use
588a real cron entry that simply hits your application at the desired time.
589
590 0 * * * * wget -q http://www.myapp.com/
591
592Events which consume a lot of time will slow the request processing for the
593user who triggers the event. For these types of events, you should use
594auto_run => 0 or manual event triggering.
595
596=head1 PERFORMANCE
597
598The plugin only checks once per minute if any events need to be run, so the
599overhead on each request is minimal. On my test server, the difference
600between running with Scheduler and without was only around 0.02% (0.004
601seconds).
602
68f800bd 603Of course, when a scheduled event runs, performance will depend on what's
604being run in the event.
74e31b02 605
606=head1 SEE ALSO
607
608L<crontab(5)>
609
610=head1 AUTHOR
611
612Andy Grundman, <andy@hybridized.org>
613
614=head1 COPYRIGHT
615
616This program is free software, you can redistribute it and/or modify it
617under the same terms as Perl itself.
618
619=cut