use strict;
use warnings;
-use base 'Class::Data::Inheritable';
+use base qw/Class::Accessor::Fast Class::Data::Inheritable/;
use DateTime;
use DateTime::Event::Cron;
use DateTime::TimeZone;
+use File::stat;
use NEXT;
+use Set::Scalar;
+use Storable qw/lock_store lock_retrieve/;
+use YAML;
-our $VERSION = '0.01';
+our $VERSION = '0.05';
__PACKAGE__->mk_classdata( '_events' => [] );
-__PACKAGE__->mk_classdata( '_last_event_check' );
+__PACKAGE__->mk_accessors('_event_state');
sub schedule {
my ( $class, %args ) = @_;
+
+ unless ( $args{event} ) {
+ Catalyst::Exception->throw(
+ message => 'The schedule method requires an event parameter' );
+ }
+
+ my $conf = $class->config->{scheduler};
my $event = {
- event => $args{event},
trigger => $args{trigger},
+ event => $args{event},
auto_run => ( defined $args{auto_run} ) ? $args{auto_run} : 1,
};
-
+
if ( $args{at} ) {
+
# replace keywords that Set::Crontab doesn't support
$args{at} = _prepare_cron( $args{at} );
# parse the cron entry into a DateTime::Set
my $set;
eval { $set = DateTime::Event::Cron->from_cron( $args{at} ) };
- if ( $@ ) {
- Catalyst::Exception->throw(
- "Scheduler: Unable to parse 'at' value "
- . $args{at} . ': ' . $@
- );
+ if ($@) {
+ Catalyst::Exception->throw(
+ "Scheduler: Unable to parse 'at' value "
+ . $args{at} . ': '
+ . $@ );
}
else {
+ $event->{at} = $args{at};
$event->{set} = $set;
}
}
-
+
push @{ $class->_events }, $event;
}
sub dispatch {
my $c = shift;
-
+
$c->NEXT::dispatch(@_);
-
+
+ $c->_get_event_state();
+
+ $c->_check_yaml();
+
# check if a minute has passed since our last check
- if ( time - $c->_last_event_check < 60 ) {
- return;
+ # This check is not run if the user is manually triggering an event
+ if ( time - $c->_event_state->{last_check} < 60 ) {
+ return unless $c->req->params->{schedule_trigger};
}
-
- my $conf = $c->config->{scheduler};
-
- # check for events to execute
- my $last_check = DateTime->from_epoch(
- epoch => $c->_last_event_check,
+ my $last_check = $c->_event_state->{last_check};
+ $c->_event_state->{last_check} = time;
+ $c->_save_event_state();
+
+ my $conf = $c->config->{scheduler};
+ my $last_check_dt = DateTime->from_epoch(
+ epoch => $last_check,
time_zone => $conf->{time_zone}
);
my $now = DateTime->now( time_zone => $conf->{time_zone} );
- $c->_last_event_check( $now->epoch );
-
+
EVENT:
for my $event ( @{ $c->_events } ) {
- next EVENT unless $event->{set};
-
- my $next_run = $event->{set}->next( $last_check );
+ my $next_run;
+
+ if ( $event->{trigger}
+ && $event->{trigger} eq $c->req->params->{schedule_trigger} )
+ {
+
+ # manual trigger, run it now
+ next EVENT unless $c->_event_authorized;
+ $next_run = $now;
+ }
+ else {
+ next EVENT unless $event->{set};
+ $next_run = $event->{set}->next($last_check_dt);
+ }
+
if ( $next_run <= $now ) {
-
+
# do some security checking for non-auto-run events
if ( !$event->{auto_run} ) {
next EVENT unless $c->_event_authorized;
}
-
- # update the state file to make sure we're the only process
- # running this event
- next EVENT unless $c->_mark_state( $event, $next_run );
-
+
+ # make sure we're the only process running this event
+ next EVENT unless $c->_mark_running($event);
+
my $event_name = $event->{trigger} || $event->{event};
- $c->log->info( "Scheduler: Executing $event_name" )
- if $conf->{logging};
-
+ $c->log->debug("Scheduler: Executing $event_name")
+ if $c->config->{scheduler}->{logging};
+
# trap errors
local $c->{error} = [];
-
+
# run event
eval {
+
# do not allow the event to modify the response
local $c->res->{body};
local $c->res->{cookies};
local $c->res->{headers};
local $c->res->{location};
local $c->res->{status};
-
+
if ( ref $event->{event} eq 'CODE' ) {
- $event->{event}->( $c );
+ $event->{event}->($c);
}
else {
$c->forward( $event->{event} );
};
my @errors = @{ $c->{error} };
push @errors, $@ if $@;
- if ( @errors ) {
- $c->log->error( 'Scheduler: Error executing '
- . "$event_name: $_" ) for @errors;
+ if (@errors) {
+ $c->log->error(
+ 'Scheduler: Error executing ' . "$event_name: $_" )
+ for @errors;
}
+
+ $c->_mark_finished($event);
}
}
}
sub setup {
my $c = shift;
-
- $c->NEXT::setup(@_);
-
+
# initial configuration
$c->config->{scheduler}->{logging} ||= ( $c->debug ) ? 1 : 0;
- $c->config->{scheduler}->{time_zone} ||= $c->_detect_timezone();
+ $c->config->{scheduler}->{time_zone} ||= $c->_detect_timezone();
$c->config->{scheduler}->{state_file} ||= $c->path_to('scheduler.state');
$c->config->{scheduler}->{hosts_allow} ||= '127.0.0.1';
+ $c->config->{scheduler}->{yaml_file} ||= $c->path_to('scheduler.yml');
+
+ $c->NEXT::setup(@_);
+}
+
+sub dump_these {
+ my $c = shift;
+
+ return ( $c->NEXT::dump_these(@_) ) unless @{ $c->_events };
+
+ # for debugging, we dump out a list of all events with their next
+ # scheduled run time
+
+ my $conf = $c->config->{scheduler};
+ my $now = DateTime->now( time_zone => $conf->{time_zone} );
+
+ my $last_check = $c->_event_state->{last_check};
+ my $last_check_dt = DateTime->from_epoch(
+ epoch => $last_check,
+ time_zone => $conf->{time_zone}
+ );
+
+ my $event_dump = [];
+ for my $event ( @{ $c->_events } ) {
+ my $dump = {};
+ for my $key ( qw/at trigger event auto_run/ ) {
+ $dump->{$key} = $event->{$key} if $event->{$key};
+ }
+
+ if ( $event->{set} ) {
+ my $next_run = $event->{set}->next($last_check_dt);
+ $dump->{next_run}
+ = $next_run->ymd
+ . q{ } . $next_run->hms
+ . q{ } . $next_run->time_zone_short_name;
+ }
+
+ push @{$event_dump}, $dump;
+ }
- $c->_last_event_check(
- DateTime->now(
- time_zone => $c->config->{scheduler}->{time_zone}
- )->epoch
+ return (
+ $c->NEXT::dump_these(@_),
+ [ 'Scheduled Events', $event_dump ],
);
+}
+
+# check and reload the YAML file with schedule data
+sub _check_yaml {
+ my ($c) = @_;
+
+ # each process needs to load the YAML file independently
+ if ( $c->_event_state->{yaml_mtime}->{$$} ||= 0 ) {
+ return if ( time - $c->_event_state->{last_check} < 60 );
+ }
+
+ return unless -e $c->config->{scheduler}->{yaml_file};
+
+ eval {
+ my $mtime = ( stat $c->config->{scheduler}->{yaml_file} )->mtime;
+ if ( $mtime > $c->_event_state->{yaml_mtime}->{$$} ) {
+ $c->_event_state->{yaml_mtime}->{$$} = $mtime;
+ $c->_save_event_state();
+
+ # wipe out all current events and reload from YAML
+ $c->_events( [] );
+
+ my $yaml = YAML::LoadFile( $c->config->{scheduler}->{yaml_file} );
+
+ foreach my $event ( @{$yaml} ) {
+ $c->schedule( %{$event} );
+ }
+
+ $c->log->info( "Scheduler: PID $$ loaded "
+ . scalar @{$yaml}
+ . ' events from YAML file' )
+ if $c->config->{scheduler}->{logging};
+ }
+ };
+ if ($@) {
+ $c->log->error("Error reading YAML file: $@");
+ }
}
# Detect the current time zone
sub _detect_timezone {
my $c = shift;
-
+
my $tz;
eval { $tz = DateTime::TimeZone->new( name => 'local' ) };
if ($@) {
- $c->log->warn(
- 'Scheduler: Unable to determine local time zone, using UTC' );
+ $c->log->warn(
+ 'Scheduler: Unable to autodetect local time zone, using UTC')
+ if $c->config->{scheduler}->{logging};
return 'UTC';
}
else {
+ $c->log->debug(
+ 'Scheduler: Using autodetected time zone: ' . $tz->name )
+ if $c->config->{scheduler}->{logging};
return $tz->name;
}
}
# Check for authorized users on non-auto events
-# XXX: TODO
sub _event_authorized {
my $c = shift;
-
+
+ # this should never happen, but just in case...
+ return unless $c->req->address;
+
+ my $hosts_allow = $c->config->{scheduler}->{hosts_allow};
+ $hosts_allow = [$hosts_allow] unless ref($hosts_allow) eq 'ARRAY';
+ my $allowed = Set::Scalar->new( @{$hosts_allow} );
+ return $allowed->contains( $c->req->address );
}
-# Update the state file
-# XXX: TODO
-sub _mark_state {
- my ( $c, $event, $next_run ) = @_;
-
+# get the state from the state file
+sub _get_event_state {
+ my $c = shift;
+
+ if ( -e $c->config->{scheduler}->{state_file} ) {
+ $c->_event_state(
+ lock_retrieve $c->config->{scheduler}->{state_file} );
+ }
+ else {
+
+ # initialize the state file
+ $c->_event_state(
+ { last_check => time,
+ yaml_mtime => {},
+ }
+ );
+ $c->_save_event_state();
+ }
+}
+
+# Check the state file to ensure we are the only process running an event
+sub _mark_running {
+ my ( $c, $event ) = @_;
+
+ $c->_get_event_state();
+
+ return if $c->_event_state->{ $event->{event} };
+
+ # this is a 2-step process to prevent race conditions
+ # 1. write the state file with our PID
+ $c->_event_state->{ $event->{event} } = $$;
+ $c->_save_event_state();
+
+ # 2. re-read the state file and make sure it's got the same PID
+ $c->_get_event_state();
+ if ( $c->_event_state->{ $event->{event} } == $$ ) {
+ return 1;
+ }
+
+ return;
+}
+
+# Mark an event as finished
+sub _mark_finished {
+ my ( $c, $event ) = @_;
+
+ $c->_event_state->{ $event->{event} } = 0;
+ $c->_save_event_state();
+}
+
+# update the state file on disk
+sub _save_event_state {
+ my $c = shift;
+
+ lock_store $c->_event_state, $c->config->{scheduler}->{state_file};
}
# Set::Crontab does not support day names, or '@' shortcuts
sub _prepare_cron {
my $cron = shift;
-
+
return $cron unless $cron =~ /\w/;
-
+
my %replace = (
jan => 1,
feb => 2,
'oct' => 10,
nov => 11,
dec => 12,
-
+
sun => 0,
mon => 1,
tue => 2,
thu => 4,
fri => 5,
sat => 6,
-
+ );
+
+ my %replace_at = (
'yearly' => '0 0 1 1 *',
'annually' => '0 0 1 1 *',
'monthly' => '0 0 1 * *',
'hourly' => '0 * * * *',
);
+ if ( $cron =~ /^\@/ ) {
+ $cron =~ s/^\@//;
+ return $replace_at{ $cron };
+ }
+
for my $name ( keys %replace ) {
my $value = $replace{$name};
-
- if ( $cron =~ /^\@$name/ ) {
- $cron = $value;
- last;
- }
- else {
- $cron =~ s/$name/$value/i;
- last unless $cron =~ /\w/;
- }
+ $cron =~ s/$name/$value/i;
+ last unless $cron =~ /\w/;
}
-
return $cron;
}
event => \&do_stuff,
);
+ # A long-running scheduled event that must be triggered
+ # manually by an authorized user
+ __PACKAGE__->schedule(
+ trigger => 'rebuild_search_index',
+ event => '/cron/rebuild_search_index',
+ );
+ $ wget -q http://www.myapp.com/?schedule_trigger=rebuild_search_index
+
=head1 DESCRIPTION
This plugin allows you to schedule events to run at recurring intervals.
=head2 state_file
The current state of every event is stored in a file. By default this is
-$APP_HOME/scheduler.state. If this file cannot be read or created at
-startup, your app will die.
+$APP_HOME/scheduler.state. This file is created on the first request if it
+does not already exist.
+
+=head2 yaml_file
+
+The location of the optional YAML event configuration file. By default this
+is $APP_HOME/scheduler.yml.
=head2 hosts_allow
whose IP address matches the list in the C<hosts_allow> config option. To
run this event, you probably want to ping the app from a cron job.
- 0 0 * * * wget -q http://www.myapp.com/
+ 0 0 * * * wget -q http://www.myapp.com/
=head2 MANUAL EVENTS
By default, manual events may only be triggered by requests made from
localhost (127.0.0.1). To allow other addresses to run events, use the
-configuration option C<hosts_allow>.
+configuration option L</hosts_allow>.
+
+=head1 SCHEDULING USING A YAML FILE
+
+As an alternative to using the schedule() method, you may define scheduled
+events in an external YAML file. By default, the plugin looks for the
+existence of a file called C<schedule.yml> in your application's home
+directory. You can change the filename using the configuration option
+L</yaml_file>.
+
+Modifications to this file will be re-read once per minute during the normal
+event checking process.
+
+Here's an example YAML configuration file with 4 events. Each event is
+denoted with a '-' character, followed by the same parameters used by the
+C<schedule> method. Note that coderef events are not supported by the YAML
+file.
+
+ ---
+ - at: '* * * * *'
+ event: /cron/delete_sessions
+ - event: /cron/send_email
+ trigger: send_email
+ - at: '@hourly'
+ event: /cron/hourly
+ - at: 0 0 * * *
+ auto_run: 0
+ event: /cron/rebuild_search_index
=head1 SECURITY
receiving any error messages or page crashes if an event fails to run
properly. All event errors are logged, even if logging is disabled.
-=head1 CAVEATS
-
-The time at which an event will run is determined completely by the requests
-made to the application. Apps with heavy traffic may have events run at very
-close to the correct time, whereas apps with low levels of traffic may see
-events running much later than scheduled. If this is a problem, you can use
-a real cron entry that simply hits your application at the desired time.
-
- 0 * * * * wget -q http://www.myapp.com/
-
-Events which consume a lot of time will slow the request processing for the
-user who triggers the event. For these types of events, you should use
-auto_run => 0 or manual event triggering.
-
=head1 PLUGIN SUPPORT
Other plugins may register scheduled events if they need to perform periodic
);
}
}
+
+=head1 CAVEATS
+
+The time at which an event will run is determined completely by the requests
+made to the application. Apps with heavy traffic may have events run at very
+close to the correct time, whereas apps with low levels of traffic may see
+events running much later than scheduled. If this is a problem, you can use
+a real cron entry that simply hits your application at the desired time.
+
+ 0 * * * * wget -q http://www.myapp.com/
+
+Events which consume a lot of time will slow the request processing for the
+user who triggers the event. For these types of events, you should use
+auto_run => 0 or manual event triggering.
+
+=head1 PERFORMANCE
+
+The plugin only checks once per minute if any events need to be run, so the
+overhead on each request is minimal. On my test server, the difference
+between running with Scheduler and without was only around 0.02% (0.004
+seconds).
+
+Of course, when a scheduled event runs, performance will depend on what's
+being run in the event.
+
+=head1 METHODS
+
+=head2 schedule
+
+Schedule is a class method for adding scheduled events. See the
+L<"/SCHEDULING"> section for more information.
+
+=head1 INTERNAL METHODS
+
+The following methods are extended by this plugin.
+
+=over 4
+
+=item dispatch
+
+The main scheduling logic takes place during the dispatch phase.
+
+=item dump_these
+
+On the Catalyst debug screen, all scheduled events are displayed along with
+the next time they will be executed.
-=head1 TODO
+=item setup
-Support storing all scheduled events in an external YAML file. This would
-only allow private actions, not coderefs. It would also allow changes to
-the schedule to take effect in realtime.
+=back
=head1 SEE ALSO