X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?p=dbsrgits%2FDBIx-Class.git;a=blobdiff_plain;f=lib%2FDBIx%2FClass%2FStorage%2FDBI%2FReplicated.pm;h=549bc76c397cedc84a5b1e5e29a1cfc38e1fece2;hp=0846f85d9df40eea2a1c9a5c9497ca8fa6165633;hb=b2e4d52289df8386abc4b79ee68c31f9798a1181;hpb=4fafa779d4d14f5790164f515848d30087f24eb8 diff --git a/lib/DBIx/Class/Storage/DBI/Replicated.pm b/lib/DBIx/Class/Storage/DBI/Replicated.pm index 0846f85..549bc76 100644 --- a/lib/DBIx/Class/Storage/DBI/Replicated.pm +++ b/lib/DBIx/Class/Storage/DBI/Replicated.pm @@ -1,16 +1,44 @@ package DBIx::Class::Storage::DBI::Replicated; +BEGIN { + use Carp::Clan qw/^DBIx::Class/; + + ## Modules required for Replication support not required for general DBIC + ## use, so we explicitly test for these. + + my %replication_required = ( + Moose => '0.77', + MooseX::AttributeHelpers => '0.12', + MooseX::Types => '0.10', + namespace::clean => '0.11', + ); + + my @didnt_load; + + for my $module (keys %replication_required) { + eval "use $module $replication_required{$module}"; + push @didnt_load, "$module $replication_required{$module}" + if $@; + } + + croak("@{[ join ', ', @didnt_load ]} are missing and are required for Replication") + if @didnt_load; +} + use Moose; use DBIx::Class::Storage::DBI; use DBIx::Class::Storage::DBI::Replicated::Pool; use DBIx::Class::Storage::DBI::Replicated::Balancer; -use Scalar::Util qw(blessed); +use DBIx::Class::Storage::DBI::Replicated::Types 'BalancerClassNamePart'; +use MooseX::Types::Moose qw/ClassName HashRef Object/; +use Scalar::Util 'reftype'; +use Carp::Clan qw/^DBIx::Class/; -extends 'DBIx::Class::Storage::DBI', 'Moose::Object'; +use namespace::clean -except => 'meta'; =head1 NAME -DBIx::Class::Storage::DBI::Replicated - ALPHA Replicated database support +DBIx::Class::Storage::DBI::Replicated - BETA Replicated database support =head1 SYNOPSIS @@ -18,24 +46,40 @@ The Following example shows how to change an existing $schema to a replicated storage type, add some replicated (readonly) databases, and perform reporting tasks. - ## Change storage_type in your schema class - $schema->storage_type( ['::DBI::Replicated', {balancer=>'::Random'}] ); - - ## Add some slaves. Basically this is an array of arrayrefs, where each - ## arrayref is database connect information - - $schema->storage->connect_replicants( - [$dsn1, $user, $pass, \%opts], - [$dsn1, $user, $pass, \%opts], - [$dsn1, $user, $pass, \%opts], - ); - + ## Change storage_type in your schema class + $schema->storage_type( ['::DBI::Replicated', {balancer=>'::Random'}] ); + + ## Add some slaves. Basically this is an array of arrayrefs, where each + ## arrayref is database connect information + + $schema->storage->connect_replicants( + [$dsn1, $user, $pass, \%opts], + [$dsn2, $user, $pass, \%opts], + [$dsn3, $user, $pass, \%opts], + ); + + ## Now, just use the $schema as normal + $schema->resultset('Source')->search({name=>'etc'}); + + ## You can force a given query to use a particular storage using the search + ### attribute 'force_pool'. For example: + + my $RS = $schema->resultset('Source')->search(undef, {force_pool=>'master'}); + + ## Now $RS will force everything (both reads and writes) to use whatever was + ## setup as the master storage. 'master' is hardcoded to always point to the + ## Master, but you can also use any Replicant name. Please see: + ## L and the replicants attribute for + ## More. Also see transactions and L for alternative ways + ## to force read traffic to the master. + =head1 DESCRIPTION -Warning: This class is marked ALPHA. We are using this in development and have -some basic test coverage but the code hasn't yet been stressed by a variety -of databases. Individual DB's may have quirks we are not aware of. Please -use this in development and pass along your experiences/bug fixes. +Warning: This class is marked BETA. This has been running a production +website using MySQL native replication as its backend and we have some decent +test coverage but the code hasn't yet been stressed by a variety of databases. +Individual DB's may have quirks we are not aware of. Please use this in first +development and pass along your experiences/bug fixes. This class implements replicated data store for DBI. Currently you can define one master and numerous slave database connections. All write-type queries @@ -55,14 +99,38 @@ selected algorithm. The default algorithm is random weighted. The consistancy betweeen master and replicants is database specific. The Pool gives you a method to validate it's replicants, removing and replacing them -when they fail/pass predefined criteria. It is recommened that your application -define two schemas, one using the replicated storage and another that just -connects to the master. +when they fail/pass predefined criteria. Please make careful use of the ways +to force a query to run against Master when needed. + +=head1 REQUIREMENTS + +Replicated Storage has additional requirements not currently part of L + + Moose => 0.77 + MooseX::AttributeHelpers => 0.12 + MooseX::Types => 0.10 + namespace::clean => 0.11 + +You will need to install these modules manually via CPAN or make them part of the +Makefile for your distribution. =head1 ATTRIBUTES This class defines the following attributes. +=head2 schema + +The underlying L object this storage is attaching + +=cut + +has 'schema' => ( + is=>'rw', + isa=>'DBIx::Class::Schema', + weak_ref=>1, + required=>1, +); + =head2 pool_type Contains the classname which will instantiate the L object. Defaults @@ -71,12 +139,13 @@ to: L. =cut has 'pool_type' => ( - is=>'ro', - isa=>'ClassName', - lazy_build=>1, - handles=>{ - 'create_pool' => 'new', - }, + is=>'ro', + isa=>ClassName, + required=>1, + default=>'DBIx::Class::Storage::DBI::Replicated::Pool', + handles=>{ + 'create_pool' => 'new', + }, ); =head2 pool_args @@ -87,11 +156,11 @@ See L for available arguments. =cut has 'pool_args' => ( - is=>'ro', - isa=>'HashRef', - lazy=>1, - required=>1, - default=>sub { {} }, + is=>'ro', + isa=>HashRef, + lazy=>1, + required=>1, + default=>sub { {} }, ); @@ -103,12 +172,14 @@ choose how to spread the query load across each replicant in the pool. =cut has 'balancer_type' => ( - is=>'ro', - isa=>'ClassName', - lazy_build=>1, - handles=>{ - 'create_balancer' => 'new', - }, + is=>'ro', + isa=>BalancerClassNamePart, + coerce=>1, + required=>1, + default=> 'DBIx::Class::Storage::DBI::Replicated::Balancer::First', + handles=>{ + 'create_balancer' => 'new', + }, ); =head2 balancer_args @@ -119,11 +190,11 @@ See L for available arguments. =cut has 'balancer_args' => ( - is=>'ro', - isa=>'HashRef', - lazy=>1, - required=>1, - default=>sub { {} }, + is=>'ro', + isa=>HashRef, + lazy=>1, + required=>1, + default=>sub { {} }, ); =head2 pool @@ -134,14 +205,14 @@ container class for one or more replicated databases. =cut has 'pool' => ( - is=>'ro', - isa=>'DBIx::Class::Storage::DBI::Replicated::Pool', - lazy_build=>1, - handles=>[qw/ - connect_replicants - replicants - has_replicants - /], + is=>'ro', + isa=>'DBIx::Class::Storage::DBI::Replicated::Pool', + lazy_build=>1, + handles=>[qw/ + connect_replicants + replicants + has_replicants + /], ); =head2 balancer @@ -152,10 +223,10 @@ is a class that takes a pool () =cut has 'balancer' => ( - is=>'ro', - isa=>'DBIx::Class::Storage::DBI::Replicated::Balancer', - lazy_build=>1, - handles=>[qw/auto_validate_every/], + is=>'ro', + isa=>'DBIx::Class::Storage::DBI::Replicated::Balancer', + lazy_build=>1, + handles=>[qw/auto_validate_every/], ); =head2 master @@ -169,9 +240,9 @@ pool of databases that is allowed to handle write traffic. =cut has 'master' => ( - is=> 'ro', - isa=>'DBIx::Class::Storage::DBI', - lazy_build=>1, + is=> 'ro', + isa=>'DBIx::Class::Storage::DBI', + lazy_build=>1, ); =head1 ATTRIBUTES IMPLEMENTING THE DBIx::Storage::DBI INTERFACE @@ -186,14 +257,14 @@ Defines an object that implements the read side of L. =cut has 'read_handler' => ( - is=>'rw', - isa=>'Object', - lazy_build=>1, - handles=>[qw/ - select - select_single - columns_info_for - /], + is=>'rw', + isa=>Object, + lazy_build=>1, + handles=>[qw/ + select + select_single + columns_info_for + /], ); =head2 write_handler @@ -203,68 +274,87 @@ Defines an object that implements the write side of L. =cut has 'write_handler' => ( - is=>'ro', - isa=>'Object', - lazy_build=>1, - lazy_build=>1, - handles=>[qw/ - on_connect_do - on_disconnect_do - connect_info - throw_exception - sql_maker - sqlt_type - create_ddl_dir - deployment_statements - datetime_parser - datetime_parser_type - last_insert_id - insert - insert_bulk - update - delete - dbh - txn_commit - txn_rollback - sth - deploy - schema - reload_row - /], + is=>'ro', + isa=>Object, + lazy_build=>1, + lazy_build=>1, + handles=>[qw/ + on_connect_do + on_disconnect_do + connect_info + throw_exception + sql_maker + sqlt_type + create_ddl_dir + deployment_statements + datetime_parser + datetime_parser_type + last_insert_id + insert + insert_bulk + update + delete + dbh + txn_begin + txn_do + txn_commit + txn_rollback + txn_scope_guard + sth + deploy + with_deferred_fk_checks + + reload_row + _prep_for_execute + + /], ); +has _master_connect_info_opts => + (is => 'rw', isa => HashRef, default => sub { {} }); + +=head2 around: connect_info + +Preserve master's C options (for merging with replicants.) + +=cut + +around connect_info => sub { + my ($next, $self, $info, @extra) = @_; + + my %opts; + for my $arg (@$info) { + next unless (reftype($arg)||'') eq 'HASH'; + %opts = (%opts, %$arg); + } + + delete $opts{dsn}; + + $self->_master_connect_info_opts(\%opts); + + $self->$next($info, @extra); +}; + =head1 METHODS This class defines the following methods. -=head2 new +=head2 BUILDARGS L when instantiating it's storage passed itself as the -first argument. We need to invoke L on the underlying parent class, make -sure we properly give it a L meta class, and then correctly instantiate -our attributes. Basically we pass on whatever the schema has in it's class -data for 'storage_type_args' to our replicated storage type. +first argument. So we need to massage the arguments a bit so that all the +bits get put into the correct places. =cut -sub new { - my $class = shift @_; - my $schema = shift @_; - my $storage_type_args = shift @_; - my $obj = $class->SUPER::new($schema, $storage_type_args, @_); - - ## Hate to do it this way, but can't seem to get advice on the attribute working right - ## maybe we can do a type and coercion for it. - if( $storage_type_args->{balancer_type} && $storage_type_args->{balancer_type}=~m/^::/) { - $storage_type_args->{balancer_type} = 'DBIx::Class::Storage::DBI::Replicated::Balancer'.$storage_type_args->{balancer_type}; - eval "require $storage_type_args->{balancer_type}"; - } - - return $class->meta->new_object( - __INSTANCE__ => $obj, - %$storage_type_args, - @_, - ); +sub BUILDARGS { + my ($class, $schema, $storage_type_args, @args) = @_; + + return { + schema=>$schema, + %$storage_type_args, + @args + } } =head2 _build_master @@ -274,17 +364,8 @@ Lazy builder for the L attribute. =cut sub _build_master { - DBIx::Class::Storage::DBI->new; -} - -=head2 _build_pool_type - -Lazy builder for the L attribute. - -=cut - -sub _build_pool_type { - return 'DBIx::Class::Storage::DBI::Replicated::Pool'; + my $self = shift @_; + DBIx::Class::Storage::DBI->new($self->schema); } =head2 _build_pool @@ -294,18 +375,8 @@ Lazy builder for the L attribute. =cut sub _build_pool { - my $self = shift @_; - $self->create_pool(%{$self->pool_args}); -} - -=head2 _build_balancer_type - -Lazy builder for the L attribute. - -=cut - -sub _build_balancer_type { - return 'DBIx::Class::Storage::DBI::Replicated::Balancer::First'; + my $self = shift @_; + $self->create_pool(%{$self->pool_args}); } =head2 _build_balancer @@ -316,11 +387,12 @@ the balancer knows which pool it's balancing. =cut sub _build_balancer { - my $self = shift @_; - $self->create_balancer( - pool=>$self->pool, - master=>$self->master, - %{$self->balancer_args},); + my $self = shift @_; + $self->create_balancer( + pool=>$self->pool, + master=>$self->master, + %{$self->balancer_args}, + ); } =head2 _build_write_handler @@ -331,7 +403,7 @@ the L. =cut sub _build_write_handler { - return shift->master; + return shift->master; } =head2 _build_read_handler @@ -342,19 +414,45 @@ the L. =cut sub _build_read_handler { - return shift->balancer; + return shift->balancer; } =head2 around: connect_replicants All calls to connect_replicants needs to have an existing $schema tacked onto -top of the args, since L needs it. +top of the args, since L needs it, and any C +options merged with the master, with replicant opts having higher priority. =cut -around 'connect_replicants' => sub { - my ($method, $self, @args) = @_; - $self->$method($self->schema, @args); +around connect_replicants => sub { + my ($next, $self, @args) = @_; + + for my $r (@args) { + $r = [ $r ] unless reftype $r eq 'ARRAY'; + + croak "coderef replicant connect_info not supported" + if ref $r->[0] && reftype $r->[0] eq 'CODE'; + +# any connect_info options? + my $i = 0; + $i++ while $i < @$r && (reftype($r->[$i])||'') ne 'HASH'; + +# make one if none + $r->[$i] = {} unless $r->[$i]; + +# merge if two hashes + my %opts = map %$_, @$r[$i .. $#{$r}]; + splice @$r, $i+1, ($#{$r} - $i), (); + +# merge with master + %opts = (%{ $self->_master_connect_info_opts }, %opts); + +# update + $r->[$i] = \%opts; + } + + $self->$next($self->schema, @args); }; =head2 all_storages @@ -366,12 +464,11 @@ replicants. =cut sub all_storages { - my $self = shift @_; - - return grep {defined $_ && blessed $_} ( - $self->master, - $self->replicants, - ); + my $self = shift @_; + return grep {defined $_ && blessed $_} ( + $self->master, + values %{ $self->replicants }, + ); } =head2 execute_reliably ($coderef, ?@args) @@ -382,14 +479,14 @@ restores the original state. Example: - my $reliably = sub { - my $name = shift @_; - $schema->resultset('User')->create({name=>$name}); - my $user_rs = $schema->resultset('User')->find({name=>$name}); - return $user_rs; - }; + my $reliably = sub { + my $name = shift @_; + $schema->resultset('User')->create({name=>$name}); + my $user_rs = $schema->resultset('User')->find({name=>$name}); + return $user_rs; + }; - my $user_rs = $schema->storage->execute_reliably($reliably, 'John'); + my $user_rs = $schema->storage->execute_reliably($reliably, 'John'); Use this when you must be certain of your database state, such as when you just inserted something and need to get a resultset including it, etc. @@ -397,80 +494,79 @@ inserted something and need to get a resultset including it, etc. =cut sub execute_reliably { - my ($self, $coderef, @args) = @_; - - unless( ref $coderef eq 'CODE') { - $self->throw_exception('Second argument must be a coderef'); - } - - ##Get copy of master storage - my $master = $self->master; - - ##Get whatever the current read hander is - my $current = $self->read_handler; - - ##Set the read handler to master - $self->read_handler($master); - - ## do whatever the caller needs - my @result; - my $want_array = wantarray; - - eval { - if($want_array) { - @result = $coderef->(@args); - } - elsif(defined $want_array) { - ($result[0]) = ($coderef->(@args)); - } else { - $coderef->(@args); - } - }; - - ##Reset to the original state - $self->read_handler($current); - - ##Exception testing has to come last, otherwise you might leave the - ##read_handler set to master. - - if($@) { - $self->throw_exception("coderef returned an error: $@"); + my ($self, $coderef, @args) = @_; + + unless( ref $coderef eq 'CODE') { + $self->throw_exception('Second argument must be a coderef'); + } + + ##Get copy of master storage + my $master = $self->master; + + ##Get whatever the current read hander is + my $current = $self->read_handler; + + ##Set the read handler to master + $self->read_handler($master); + + ## do whatever the caller needs + my @result; + my $want_array = wantarray; + + eval { + if($want_array) { + @result = $coderef->(@args); + } elsif(defined $want_array) { + ($result[0]) = ($coderef->(@args)); } else { - return $want_array ? @result : $result[0]; - } + $coderef->(@args); + } + }; + + ##Reset to the original state + $self->read_handler($current); + + ##Exception testing has to come last, otherwise you might leave the + ##read_handler set to master. + + if($@) { + $self->throw_exception("coderef returned an error: $@"); + } else { + return $want_array ? @result : $result[0]; + } } =head2 set_reliable_storage Sets the current $schema to be 'reliable', that is all queries, both read and write are sent to the master - + =cut sub set_reliable_storage { - my $self = shift @_; - my $schema = $self->schema; - my $write_handler = $self->schema->storage->write_handler; - - $schema->storage->read_handler($write_handler); + my $self = shift @_; + my $schema = $self->schema; + my $write_handler = $self->schema->storage->write_handler; + + $schema->storage->read_handler($write_handler); } =head2 set_balanced_storage Sets the current $schema to be use the for all reads, while all writea are sent to the master only - + =cut sub set_balanced_storage { - my $self = shift @_; - my $schema = $self->schema; - my $write_handler = $self->schema->storage->balancer; - - $schema->storage->read_handler($write_handler); + my $self = shift @_; + my $schema = $self->schema; + my $write_handler = $self->schema->storage->balancer; + + $schema->storage->read_handler($write_handler); } -=head2 txn_do ($coderef) +=head2 around: txn_do ($coderef) Overload to the txn_do method, which is delegated to whatever the L is set to. We overload this in order to wrap in inside a @@ -478,23 +574,9 @@ L method. =cut -sub txn_do { - my($self, $coderef, @args) = @_; - $self->execute_reliably($coderef, @args); -} - -=head2 reload_row ($row) - -Overload to the reload_row method so that the reloading is always directed to -the master storage. - -=cut - -around 'reload_row' => sub { - my ($reload_row, $self, $row) = @_; - return $self->execute_reliably(sub { - return $self->$reload_row(shift); - }, $row); +around 'txn_do' => sub { + my($txn_do, $self, $coderef, @args) = @_; + $self->execute_reliably(sub {$self->$txn_do($coderef, @args)}); }; =head2 connected @@ -504,11 +586,10 @@ Check that the master and at least one of the replicants is connected. =cut sub connected { - my $self = shift @_; - - return - $self->master->connected && - $self->pool->connected_replicants; + my $self = shift @_; + return + $self->master->connected && + $self->pool->connected_replicants; } =head2 ensure_connected @@ -518,10 +599,10 @@ Make sure all the storages are connected. =cut sub ensure_connected { - my $self = shift @_; - foreach my $source ($self->all_storages) { - $source->ensure_connected(@_); - } + my $self = shift @_; + foreach my $source ($self->all_storages) { + $source->ensure_connected(@_); + } } =head2 limit_dialect @@ -531,10 +612,11 @@ Set the limit_dialect for all existing storages =cut sub limit_dialect { - my $self = shift @_; - foreach my $source ($self->all_storages) { - $source->limit_dialect(@_); - } + my $self = shift @_; + foreach my $source ($self->all_storages) { + $source->limit_dialect(@_); + } + return $self->master->quote_char; } =head2 quote_char @@ -544,10 +626,11 @@ Set the quote_char for all existing storages =cut sub quote_char { - my $self = shift @_; - foreach my $source ($self->all_storages) { - $source->quote_char(@_); - } + my $self = shift @_; + foreach my $source ($self->all_storages) { + $source->quote_char(@_); + } + return $self->master->quote_char; } =head2 name_sep @@ -557,10 +640,11 @@ Set the name_sep for all existing storages =cut sub name_sep { - my $self = shift @_; - foreach my $source ($self->all_storages) { - $source->name_sep(@_); - } + my $self = shift @_; + foreach my $source ($self->all_storages) { + $source->name_sep(@_); + } + return $self->master->name_sep; } =head2 set_schema @@ -570,10 +654,10 @@ Set the schema object for all existing storages =cut sub set_schema { - my $self = shift @_; - foreach my $source ($self->all_storages) { - $source->set_schema(@_); - } + my $self = shift @_; + foreach my $source ($self->all_storages) { + $source->set_schema(@_); + } } =head2 debug @@ -583,10 +667,13 @@ set a debug flag across all storages =cut sub debug { - my $self = shift @_; + my $self = shift @_; + if(@_) { foreach my $source ($self->all_storages) { - $source->debug(@_); - } + $source->debug(@_); + } + } + return $self->master->debug; } =head2 debugobj @@ -596,10 +683,13 @@ set a debug object across all storages =cut sub debugobj { - my $self = shift @_; + my $self = shift @_; + if(@_) { foreach my $source ($self->all_storages) { - $source->debugobj(@_); - } + $source->debugobj(@_); + } + } + return $self->master->debugobj; } =head2 debugfh @@ -609,10 +699,13 @@ set a debugfh object across all storages =cut sub debugfh { - my $self = shift @_; + my $self = shift @_; + if(@_) { foreach my $source ($self->all_storages) { - $source->debugfh(@_); - } + $source->debugfh(@_); + } + } + return $self->master->debugfh; } =head2 debugcb @@ -622,10 +715,13 @@ set a debug callback across all storages =cut sub debugcb { - my $self = shift @_; + my $self = shift @_; + if(@_) { foreach my $source ($self->all_storages) { - $source->debugcb(@_); - } + $source->debugcb(@_); + } + } + return $self->master->debugcb; } =head2 disconnect @@ -635,20 +731,72 @@ disconnect everything =cut sub disconnect { - my $self = shift @_; - foreach my $source ($self->all_storages) { - $source->disconnect(@_); - } + my $self = shift @_; + foreach my $source ($self->all_storages) { + $source->disconnect(@_); + } +} + +=head2 cursor_class + +set cursor class on all storages, or return master's + +=cut + +sub cursor_class { + my ($self, $cursor_class) = @_; + + if ($cursor_class) { + $_->cursor_class($cursor_class) for $self->all_storages; + } + $self->master->cursor_class; } + +=head1 GOTCHAS + +Due to the fact that replicants can lag behind a master, you must take care to +make sure you use one of the methods to force read queries to a master should +you need realtime data integrity. For example, if you insert a row, and then +immediately re-read it from the database (say, by doing $row->discard_changes) +or you insert a row and then immediately build a query that expects that row +to be an item, you should force the master to handle reads. Otherwise, due to +the lag, there is no certainty your data will be in the expected state. + +For data integrity, all transactions automatically use the master storage for +all read and write queries. Using a transaction is the preferred and recommended +method to force the master to handle all read queries. + +Otherwise, you can force a single query to use the master with the 'force_pool' +attribute: + + my $row = $resultset->search(undef, {force_pool=>'master'})->find($pk); + +This attribute will safely be ignore by non replicated storages, so you can use +the same code for both types of systems. + +Lastly, you can use the L method, which works very much like +a transaction. + +For debugging, you can turn replication on/off with the methods L +and L, however this operates at a global level and is not +suitable if you have a shared Schema object being used by multiple processes, +such as on a web application server. You can get around this limitation by +using the Schema clone method. + + my $new_schema = $schema->clone; + $new_schema->set_reliable_storage; + + ## $new_schema will use only the Master storage for all reads/writes while + ## the $schema object will use replicated storage. =head1 AUTHOR - John Napiorkowski + John Napiorkowski Based on code originated by: - Norbert Csongrádi - Peter Siklósi + Norbert Csongrádi + Peter Siklósi =head1 LICENSE @@ -656,4 +804,6 @@ You may distribute this code under the same terms as Perl itself. =cut +__PACKAGE__->meta->make_immutable; + 1;