fix warnings in test suite
[catagits/Catalyst-Plugin-Scheduler.git] / lib / Catalyst / Plugin / Scheduler.pm
CommitLineData
74e31b02 1package Catalyst::Plugin::Scheduler;
2
3use strict;
4use warnings;
bec4be8c 5use base qw/Class::Accessor::Fast Class::Data::Inheritable/;
6use DateTime;
7use DateTime::Event::Cron;
8use DateTime::TimeZone;
9use File::stat;
bec4be8c 10use Set::Scalar;
11use Storable qw/lock_store lock_retrieve/;
5cc5ef46 12use MRO::Compat;
74e31b02 13
b245b6f3 14our $VERSION = '0.10';
74e31b02 15
bec4be8c 16__PACKAGE__->mk_classdata( '_events' => [] );
17__PACKAGE__->mk_accessors('_event_state');
18
19sub schedule {
20 my ( $class, %args ) = @_;
21
22 unless ( $args{event} ) {
23 Catalyst::Exception->throw(
24 message => 'The schedule method requires an event parameter' );
25 }
26
27 my $conf = $class->config->{scheduler};
28
29 my $event = {
30 trigger => $args{trigger},
31 event => $args{event},
32 auto_run => ( defined $args{auto_run} ) ? $args{auto_run} : 1,
33 };
34
35 if ( $args{at} ) {
36
37 # replace keywords that Set::Crontab doesn't support
38 $args{at} = _prepare_cron( $args{at} );
39
40 # parse the cron entry into a DateTime::Set
41 my $set;
42 eval { $set = DateTime::Event::Cron->from_cron( $args{at} ) };
43 if ($@) {
44 Catalyst::Exception->throw(
45 "Scheduler: Unable to parse 'at' value "
46 . $args{at} . ': '
47 . $@ );
48 }
49 else {
50 $event->{at} = $args{at};
51 $event->{set} = $set;
52 }
53 }
54
55 push @{ $class->_events }, $event;
56}
57
58sub dispatch {
59 my $c = shift;
60
5cc5ef46 61 $c->maybe::next::method();
bec4be8c 62
63 $c->_get_event_state();
64
65 $c->_check_yaml();
66
67 # check if a minute has passed since our last check
68 # This check is not run if the user is manually triggering an event
69 if ( time - $c->_event_state->{last_check} < 60 ) {
70 return unless $c->req->params->{schedule_trigger};
71 }
72 my $last_check = $c->_event_state->{last_check};
73 $c->_event_state->{last_check} = time;
74 $c->_save_event_state();
75
76 my $conf = $c->config->{scheduler};
77 my $last_check_dt = DateTime->from_epoch(
78 epoch => $last_check,
79 time_zone => $conf->{time_zone}
80 );
81 my $now = DateTime->now( time_zone => $conf->{time_zone} );
82
83 EVENT:
84 for my $event ( @{ $c->_events } ) {
85 my $next_run;
86
87 if ( $event->{trigger} && $c->req->params->{schedule_trigger}
88 && $event->{trigger} eq $c->req->params->{schedule_trigger} )
89 {
90
91 # manual trigger, run it now
92 next EVENT unless $c->_event_authorized;
93 $next_run = $now;
94 }
95 else {
96 next EVENT unless $event->{set};
97 $next_run = $event->{set}->next($last_check_dt);
98 }
99
100 if ( $next_run <= $now ) {
101
102 # do some security checking for non-auto-run events
103 if ( !$event->{auto_run} ) {
104 next EVENT unless $c->_event_authorized;
105 }
106
107 # make sure we're the only process running this event
108 next EVENT unless $c->_mark_running($event);
109
110 my $event_name = $event->{trigger} || $event->{event};
111 $c->log->debug("Scheduler: Executing $event_name")
112 if $c->config->{scheduler}->{logging};
113
114 # trap errors
115 local $c->{error} = [];
116
117 # return value/output from the event, if any
118 my $output;
119
120 # run event
121 eval {
122
123 # do not allow the event to modify the response
124 local $c->res->{body};
125 local $c->res->{cookies};
126 local $c->res->{headers};
127 local $c->res->{location};
128 local $c->res->{status};
129
130 if ( ref $event->{event} eq 'CODE' ) {
131 $output = $event->{event}->($c);
132 }
133 else {
134 $output = $c->forward( $event->{event} );
135 }
136 };
137 my @errors = @{ $c->{error} };
138 push @errors, $@ if $@;
139 if (@errors) {
140 $c->log->error(
141 'Scheduler: Error executing ' . "$event_name: $_" )
142 for @errors;
143 $output = join '; ', @errors;
144 }
145
146 $c->_mark_finished( $event, $output );
147 }
148 }
149}
150
151sub setup {
152 my $c = shift;
153
154 # initial configuration
155 $c->config->{scheduler}->{logging} ||= ( $c->debug ) ? 1 : 0;
156 $c->config->{scheduler}->{time_zone} ||= $c->_detect_timezone();
157 $c->config->{scheduler}->{state_file} ||= $c->path_to('scheduler.state');
158 $c->config->{scheduler}->{hosts_allow} ||= '127.0.0.1';
159 $c->config->{scheduler}->{yaml_file} ||= $c->path_to('scheduler.yml');
8cefc921 160
161 # Always start with a clean state
162 if ( -e $c->config->{scheduler}->{state_file} ) {
163 $c->log->debug(
164 'Scheduler: Removing old state file ' .
165 $c->config->{scheduler}->{state_file}
166 ) if $c->config->{scheduler}->{logging};
167
168 unlink $c->config->{scheduler}->{state_file}
169 or Catalyst::Exception->throw(
170 message => 'Scheduler: Unable to remove old state file '
171 . $c->config->{scheduler}->{state_file} . " ($!)"
172 );
173 }
bec4be8c 174
5cc5ef46 175 $c->maybe::next::method(@_);
bec4be8c 176}
177
178sub dump_these {
179 my $c = shift;
180
5cc5ef46 181 return ( $c->maybe::next::method(@_) ) unless @{ $c->_events };
182
bec4be8c 183 # for debugging, we dump out a list of all events with their next
184 # scheduled run time
185 return (
5cc5ef46 186 $c->maybe::next::method(@_),
bec4be8c 187 [ 'Scheduled Events', $c->scheduler_state ],
188 );
189}
190
191sub scheduler_state {
192 my $c = shift;
193
194 $c->_get_event_state();
195
196 my $conf = $c->config->{scheduler};
197 my $now = DateTime->now( time_zone => $conf->{time_zone} );
198
199 my $last_check = $c->_event_state->{last_check};
200 my $last_check_dt = DateTime->from_epoch(
201 epoch => $last_check,
202 time_zone => $conf->{time_zone},
203 );
204
205 my $event_dump = [];
206 for my $event ( @{ $c->_events } ) {
207 my $dump = {};
208 for my $key ( qw/at trigger event auto_run/ ) {
209 $dump->{$key} = $event->{$key} if $event->{$key};
210 }
211
212 # display the next run time
213 if ( $event->{set} ) {
214 my $next_run = $event->{set}->next($last_check_dt);
215 $dump->{next_run}
216 = $next_run->ymd
217 . q{ } . $next_run->hms
218 . q{ } . $next_run->time_zone_short_name;
219 }
220
221 # display the last run time
222 my $last_run
223 = $c->_event_state->{events}->{ $event->{event} }->{last_run};
224 if ( $last_run ) {
225 $last_run = DateTime->from_epoch(
226 epoch => $last_run,
227 time_zone => $conf->{time_zone},
228 );
229 $dump->{last_run}
230 = $last_run->ymd
231 . q{ } . $last_run->hms
232 . q{ } . $last_run->time_zone_short_name;
233 }
234
235 # display the result of the last run
236 my $output
237 = $c->_event_state->{events}->{ $event->{event} }->{last_output};
238 if ( $output ) {
239 $dump->{last_output} = $output;
240 }
241
242 push @{$event_dump}, $dump;
243 }
244
245 return $event_dump;
246}
247
248# check and reload the YAML file with schedule data
249sub _check_yaml {
250 my ($c) = @_;
251
252 # each process needs to load the YAML file independently
253 if ( $c->_event_state->{yaml_mtime}->{$$} ||= 0 ) {
254 return if ( time - $c->_event_state->{last_check} < 60 );
255 }
256
257 return unless -e $c->config->{scheduler}->{yaml_file};
258
259 eval {
260 my $mtime = ( stat $c->config->{scheduler}->{yaml_file} )->mtime;
261 if ( $mtime > $c->_event_state->{yaml_mtime}->{$$} ) {
262 $c->_event_state->{yaml_mtime}->{$$} = $mtime;
263
264 # clean up old PIDs listed in yaml_mtime
265 foreach my $pid ( keys %{ $c->_event_state->{yaml_mtime} } ) {
266 if ( $c->_event_state->{yaml_mtime}->{$pid} < $mtime ) {
267 delete $c->_event_state->{yaml_mtime}->{$pid};
268 }
269 }
270 $c->_save_event_state();
271
272 # wipe out all current events and reload from YAML
273 $c->_events( [] );
274
275 my $file = $c->config->{scheduler}->{yaml_file};
276 my $yaml;
277
278 eval { require YAML::Syck; };
279 if( $@ ) {
280 require YAML;
281 $yaml = YAML::LoadFile( "$file" );
282 }
283 else {
284 open( my $fh, $file ) or die $!;
285 my $content = do { local $/; <$fh> };
286 close $fh;
287 $yaml = YAML::Syck::Load( $content );
288 }
289
290 foreach my $event ( @{$yaml} ) {
291 $c->schedule( %{$event} );
292 }
293
294 $c->log->info( "Scheduler: PID $$ loaded "
295 . scalar @{$yaml}
296 . ' events from YAML file' )
297 if $c->config->{scheduler}->{logging};
298 }
299 };
300 if ($@) {
301 $c->log->error("Scheduler: Error reading YAML file: $@");
302 }
303}
304
305# Detect the current time zone
306sub _detect_timezone {
307 my $c = shift;
308
309 my $tz;
310 eval { $tz = DateTime::TimeZone->new( name => 'local' ) };
311 if ($@) {
312 $c->log->warn(
313 'Scheduler: Unable to autodetect local time zone, using UTC')
314 if $c->config->{scheduler}->{logging};
315 return 'UTC';
316 }
317 else {
318 $c->log->debug(
319 'Scheduler: Using autodetected time zone: ' . $tz->name )
320 if $c->config->{scheduler}->{logging};
321 return $tz->name;
322 }
323}
324
325# Check for authorized users on non-auto events
326sub _event_authorized {
327 my $c = shift;
328
329 # this should never happen, but just in case...
330 return unless $c->req->address;
331
332 my $hosts_allow = $c->config->{scheduler}->{hosts_allow};
333 $hosts_allow = [$hosts_allow] unless ref($hosts_allow) eq 'ARRAY';
334 my $allowed = Set::Scalar->new( @{$hosts_allow} );
335 return $allowed->contains( $c->req->address );
336}
337
338# get the state from the state file
339sub _get_event_state {
340 my $c = shift;
341
342 if ( -e $c->config->{scheduler}->{state_file} ) {
343 $c->_event_state(
344 lock_retrieve $c->config->{scheduler}->{state_file} );
345 }
346 else {
347
348 # initialize the state file
349 $c->_event_state(
350 { last_check => time,
351 events => {},
352 yaml_mtime => {},
353 }
354 );
355 $c->_save_event_state();
356 }
357}
358
359# Check the state file to ensure we are the only process running an event
360sub _mark_running {
361 my ( $c, $event ) = @_;
362
363 $c->_get_event_state();
364
365 return if
366 $c->_event_state->{events}->{ $event->{event} }->{running};
367
368 # this is a 2-step process to prevent race conditions
369 # 1. write the state file with our PID
370 $c->_event_state->{events}->{ $event->{event} }->{running} = $$;
371 $c->_save_event_state();
372
373 # 2. re-read the state file and make sure it's got the same PID
374 $c->_get_event_state();
375 if ( $c->_event_state->{events}->{ $event->{event} }->{running} == $$ ) {
376 return 1;
377 }
378
379 return;
380}
381
382# Mark an event as finished
383sub _mark_finished {
384 my ( $c, $event, $output ) = @_;
385
386 $c->_event_state->{events}->{ $event->{event} }->{running} = 0;
387 $c->_event_state->{events}->{ $event->{event} }->{last_run} = time;
388 $c->_event_state->{events}->{ $event->{event} }->{last_output} = $output;
389 $c->_save_event_state();
390}
391
392# update the state file on disk
393sub _save_event_state {
394 my $c = shift;
395
396 lock_store $c->_event_state, $c->config->{scheduler}->{state_file};
397}
398
399# Set::Crontab does not support day names, or '@' shortcuts
400sub _prepare_cron {
401 my $cron = shift;
402
403 return $cron unless $cron =~ /\w/;
404
405 my %replace = (
406 jan => 1,
407 feb => 2,
408 mar => 3,
409 apr => 4,
410 may => 5,
411 jun => 6,
412 jul => 7,
413 aug => 8,
414 sep => 9,
415 'oct' => 10,
416 nov => 11,
417 dec => 12,
418
419 sun => 0,
420 mon => 1,
421 tue => 2,
422 wed => 3,
423 thu => 4,
424 fri => 5,
425 sat => 6,
426 );
427
428 my %replace_at = (
429 'yearly' => '0 0 1 1 *',
430 'annually' => '0 0 1 1 *',
431 'monthly' => '0 0 1 * *',
432 'weekly' => '0 0 * * 0',
433 'daily' => '0 0 * * *',
434 'midnight' => '0 0 * * *',
435 'hourly' => '0 * * * *',
436 );
437
438 if ( $cron =~ /^\@/ ) {
439 $cron =~ s/^\@//;
440 return $replace_at{ $cron };
441 }
442
443 for my $name ( keys %replace ) {
444 my $value = $replace{$name};
445 $cron =~ s/$name/$value/i;
446 last unless $cron =~ /\w/;
447 }
448 return $cron;
449}
450
4511;
452__END__
74e31b02 453
454=pod
455
456=head1 NAME
457
458Catalyst::Plugin::Scheduler - Schedule events to run in a cron-like fashion
459
460=head1 SYNOPSIS
461
462 use Catalyst qw/Scheduler/;
463
464 # run remove_sessions in the Cron controller every hour
465 __PACKAGE__->schedule(
466 at => '0 * * * *',
467 event => '/cron/remove_sessions'
468 );
469
470 # Run a subroutine at 4:05am every Sunday
471 __PACKAGE__->schedule(
472 at => '5 4 * * sun',
473 event => \&do_stuff,
474 );
475
68f800bd 476 # A long-running scheduled event that must be triggered
477 # manually by an authorized user
478 __PACKAGE__->schedule(
479 trigger => 'rebuild_search_index',
480 event => '/cron/rebuild_search_index',
481 );
482 $ wget -q http://www.myapp.com/?schedule_trigger=rebuild_search_index
f9d8e3cf 483
74e31b02 484=head1 DESCRIPTION
485
486This plugin allows you to schedule events to run at recurring intervals.
487Events will run during the first request which meets or exceeds the specified
488time. Depending on the level of traffic to the application, events may or may
489not run at exactly the correct time, but it should be enough to satisfy many
490basic scheduling needs.
491
492=head1 CONFIGURATION
493
494Configuration is optional and is specified in MyApp->config->{scheduler}.
495
496=head2 logging
497
498Set to 1 to enable logging of events as they are executed. This option is
499enabled by default when running under -Debug mode. Errors are always logged
500regardless of the value of this option.
501
cbf1ecfe 502=head2 time_zone
503
504The time zone of your system. This will be autodetected where possible, or
505will default to UTC (GMT). You can override the detection by providing a
506valid L<DateTime> time zone string, such as 'America/New_York'.
507
74e31b02 508=head2 state_file
509
510The current state of every event is stored in a file. By default this is
f9d8e3cf 511$APP_HOME/scheduler.state. This file is created on the first request if it
512does not already exist.
74e31b02 513
68f800bd 514=head2 yaml_file
515
516The location of the optional YAML event configuration file. By default this
517is $APP_HOME/scheduler.yml.
518
74e31b02 519=head2 hosts_allow
520
521This option specifies IP addresses for trusted users. This option defaults
522to 127.0.0.1. Multiple addresses can be specified by using an array
523reference. This option is used for both events where auto_run is set to 0
524and for manually-triggered events.
525
526 __PACKAGE__->config->{scheduler}->{hosts_allow} = '192.168.1.1';
527 __PACKAGE__->config->{scheduler}->{hosts_allow} = [
528 '127.0.0.1',
529 '192.168.1.1'
530 ];
531
bec4be8c 532=head1 SCHEDULING
74e31b02 533
bec4be8c 534=head2 AUTOMATED EVENTS
74e31b02 535
536Events are scheduled by calling the class method C<schedule>.
537
538 MyApp->schedule(
539 at => '0 * * * *',
540 event => '/cron/remove_sessions',
541 );
542
543 package MyApp::Controller::Cron;
544
545 sub remove_sessions : Private {
546 my ( $self, $c ) = @_;
547
548 $c->delete_expired_sessions;
549 }
550
bec4be8c 551=head3 at
74e31b02 552
553The time to run an event is specified using L<crontab(5)>-style syntax.
554
555 5 0 * * * # 5 minutes after midnight, every day
556 15 14 1 * * # run at 2:15pm on the first of every month
557 0 22 * * 1-5 # run at 10 pm on weekdays
558 5 4 * * sun # run at 4:05am every Sunday
559
560From crontab(5):
561
562 field allowed values
563 ----- --------------
564 minute 0-59
565 hour 0-23
566 day of month 1-31
567 month 0-12 (or names, see below)
568 day of week 0-7 (0 or 7 is Sun, or use names)
569
bec4be8c 570Instead of the first five fields, one of seven special strings may appear:
74e31b02 571
572 string meaning
573 ------ -------
574 @yearly Run once a year, "0 0 1 1 *".
575 @annually (same as @yearly)
576 @monthly Run once a month, "0 0 1 * *".
577 @weekly Run once a week, "0 0 * * 0".
578 @daily Run once a day, "0 0 * * *".
579 @midnight (same as @daily)
580 @hourly Run once an hour, "0 * * * *".
581
bec4be8c 582=head3 event
74e31b02 583
584The event to run at the specified time can be either a Catalyst private
585action path or a coderef. Both types of event methods will receive the $c
586object from the current request, but you must not rely on any request-specific
587information present in $c as it will be from a random user request at or near
588the event's specified run time.
589
590Important: Methods used for events should be marked C<Private> so that
591they can not be executed via the browser.
592
bec4be8c 593=head3 auto_run
74e31b02 594
595The auto_run parameter specifies when the event is allowed to be executed.
596By default this option is set to 1, so the event will be executed during the
597first request that matches the specified time in C<at>.
598
599If set to 0, the event will only run when a request is made by a user from
600an authorized address. The purpose of this option is to allow long-running
601tasks to execute only for certain users.
602
603 MyApp->schedule(
604 at => '0 0 * * *',
605 event => '/cron/rebuild_search_index',
606 auto_run => 0,
607 );
608
609 package MyApp::Controller::Cron;
610
611 sub rebuild_search_index : Private {
612 my ( $self, $c ) = @_;
613
614 # rebuild the search index, this may take a long time
615 }
616
617Now, the search index will only be rebuilt when a request is made from a user
618whose IP address matches the list in the C<hosts_allow> config option. To
619run this event, you probably want to ping the app from a cron job.
620
f9d8e3cf 621 0 0 * * * wget -q http://www.myapp.com/
74e31b02 622
bec4be8c 623=head2 MANUAL EVENTS
74e31b02 624
625To create an event that does not run on a set schedule and must be manually
626triggered, you can specify the C<trigger> option instead of C<at>.
627
628 __PACKAGE__->schedule(
629 trigger => 'send_email',
630 event => '/events/send_email',
631 );
632
633The event may then be triggered by a standard web request from an authorized
634user. The trigger to run is specified by using a special GET parameter,
635'schedule_trigger'; the path requested does not matter.
636
637 http://www.myapp.com/?schedule_trigger=send_email
638
639By default, manual events may only be triggered by requests made from
640localhost (127.0.0.1). To allow other addresses to run events, use the
68f800bd 641configuration option L</hosts_allow>.
642
643=head1 SCHEDULING USING A YAML FILE
644
645As an alternative to using the schedule() method, you may define scheduled
646events in an external YAML file. By default, the plugin looks for the
647existence of a file called C<schedule.yml> in your application's home
648directory. You can change the filename using the configuration option
649L</yaml_file>.
650
bec4be8c 651Modifications to this file will be re-read once per minute during the normal
652event checking process.
68f800bd 653
654Here's an example YAML configuration file with 4 events. Each event is
655denoted with a '-' character, followed by the same parameters used by the
656C<schedule> method. Note that coderef events are not supported by the YAML
657file.
658
659 ---
660 - at: '* * * * *'
661 event: /cron/delete_sessions
662 - event: /cron/send_email
663 trigger: send_email
664 - at: '@hourly'
665 event: /cron/hourly
666 - at: 0 0 * * *
667 auto_run: 0
668 event: /cron/rebuild_search_index
74e31b02 669
670=head1 SECURITY
671
672All events are run inside of an eval container. This protects the user from
673receiving any error messages or page crashes if an event fails to run
674properly. All event errors are logged, even if logging is disabled.
675
74e31b02 676=head1 PLUGIN SUPPORT
677
678Other plugins may register scheduled events if they need to perform periodic
679maintenance. Plugin authors, B<be sure to inform your users> if you do this!
680Events should be registered from a plugin's C<setup> method.
681
682 sub setup {
5cc5ef46 683 my $c = shift;
684 $c->maybe::next::method(@_);
74e31b02 685
686 if ( $c->can('schedule') ) {
687 $c->schedule(
688 at => '0 * * * *',
689 event => \&cleanup,
690 );
691 }
692 }
bec4be8c 693
f9d8e3cf 694=head1 CAVEATS
695
696The time at which an event will run is determined completely by the requests
697made to the application. Apps with heavy traffic may have events run at very
698close to the correct time, whereas apps with low levels of traffic may see
699events running much later than scheduled. If this is a problem, you can use
700a real cron entry that simply hits your application at the desired time.
701
702 0 * * * * wget -q http://www.myapp.com/
703
704Events which consume a lot of time will slow the request processing for the
705user who triggers the event. For these types of events, you should use
706auto_run => 0 or manual event triggering.
707
708=head1 PERFORMANCE
709
bec4be8c 710The plugin only checks once per minute if any events need to be run, so the
711overhead on each request is minimal. On my test server, the difference
712between running with Scheduler and without was only around 0.02% (0.004
713seconds).
f9d8e3cf 714
68f800bd 715Of course, when a scheduled event runs, performance will depend on what's
716being run in the event.
bec4be8c 717
718=head1 METHODS
719
720=head2 schedule
721
722Schedule is a class method for adding scheduled events. See the
723L<"/SCHEDULING"> section for more information.
724
725=head2 scheduler_state
726
727The current state of all scheduled events is available in an easy-to-use
728format by calling $c->scheduler_state. You can use this data to build an
729admin view into the scheduling engine, for example. This same data is also
730displayed on the Catalyst debug screen.
731
732This method returns an array reference containing a hash reference for each
733event.
734
735 [
736 {
737 'last_run' => '2005-12-29 16:29:33 EST',
738 'auto_run' => 1,
739 'last_output' => 1,
740 'at' => '0 0 * * *',
741 'next_run' => '2005-12-30 00:00:00 EST',
742 'event' => '/cron/session_cleanup'
743 },
744 {
745 'auto_run' => 1,
746 'at' => '0 0 * * *',
747 'next_run' => '2005-12-30 00:00:00 EST',
748 'event' => '/cron/build_rss'
749 },
750 ]
751
752=head1 INTERNAL METHODS
753
754The following methods are extended by this plugin.
755
756=over 4
757
758=item dispatch
759
760The main scheduling logic takes place during the dispatch phase.
761
762=item dump_these
763
764On the Catalyst debug screen, all scheduled events are displayed along with
765the next time they will be executed.
766
767=item setup
768
769=back
74e31b02 770
771=head1 SEE ALSO
772
bec4be8c 773L<crontab(5)>
74e31b02 774
bec4be8c 775=head1 AUTHOR
74e31b02 776
777Andy Grundman, <andy@hybridized.org>
778
779=head1 COPYRIGHT
780
781This program is free software, you can redistribute it and/or modify it
782under the same terms as Perl itself.
783
784=cut