From: Moritz Onken Date: Fri, 21 Aug 2009 12:19:41 +0000 (+0000) Subject: rewrite of _collapse_result to support prefetch of multiple has_many X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=27ffa6c0c4e12265ea84d2b0b954a3b8954e4d69;p=dbsrgits%2FDBIx-Class-Historic.git rewrite of _collapse_result to support prefetch of multiple has_many rels on the same level --- diff --git a/lib/DBIx/Class/ResultSet.pm b/lib/DBIx/Class/ResultSet.pm index 7fa5aa1..851e269 100644 --- a/lib/DBIx/Class/ResultSet.pm +++ b/lib/DBIx/Class/ResultSet.pm @@ -966,140 +966,183 @@ sub _construct_object { return @new; } -sub _collapse_result { - my ($self, $as_proto, $row) = @_; - - # if the first row that ever came in is totally empty - this means we got - # hit by a smooth^Wempty left-joined resultset. Just noop in that case - # instead of producing a {} - # - my $has_def; - for (@$row) { - if (defined $_) { - $has_def++; - last; +# _unflatten_result takes a row hashref which looks like this: +# $VAR1 = { +# 'cd.artist.artistid' => '1', +# 'cd.artist' => '1', +# 'cd_id' => '1', +# 'cd.genreid' => undef, +# 'cd.year' => '1999', +# 'cd.title' => 'Spoonful of bees', +# 'cd.single_track' => undef, +# 'cd.artist.name' => 'Caterwauler McCrae', +# 'cd.artist.rank' => '13', +# 'cd.artist.charfield' => undef, +# 'cd.cdid' => '1' +# }; + +# and generates the following structure: + +# $VAR1 = [ +# { +# 'cd_id' => '1' +# }, +# { +# 'cd' => [ +# { +# 'single_track' => undef, +# 'cdid' => '1', +# 'artist' => '1', +# 'title' => 'Spoonful of bees', +# 'year' => '1999', +# 'genreid' => undef +# }, +# { +# 'artist' => [ +# { +# 'artistid' => '1', +# 'charfield' => undef, +# 'name' => 'Caterwauler McCrae', +# 'rank' => '13' +# } +# ] +# } +# ] +# } +# ]; + +# It returns one row object which consists of an arrayref with two +# elements. The first contains the plain column data, the second +# contains the data of relationships. Those are row arrayrefs, themselves. + +# it's a recursive function. It needs to request the relationship_info +# to decide whether to put the data of a relationship in a hashref +# (i.e. belongs_to) or an arrayref (i.e. has_many). + +sub _unflatten_result { + my ( $self, $row ) = @_; + + my $columns = {}; + my $rels = {}; + + foreach my $column ( sort keys %$row ) { + if ( $column =~ /^(.*?)\.(.*)$/ ) { + $rels->{$1} ||= {}; + $rels->{$1}->{$2} = $row->{$column}; + } + else { + $columns->{$column} = $row->{$column}; + } } - } - return undef unless $has_def; - - my @copy = @$row; - - # 'foo' => [ undef, 'foo' ] - # 'foo.bar' => [ 'foo', 'bar' ] - # 'foo.bar.baz' => [ 'foo.bar', 'baz' ] - - my @construct_as = map { [ (/^(?:(.*)\.)?([^.]+)$/) ] } @$as_proto; - - my %collapse = %{$self->{_attrs}{collapse}||{}}; - - my @pri_index; - # if we're doing collapsing (has_many prefetch) we need to grab records - # until the PK changes, so fill @pri_index. if not, we leave it empty so - # we know we don't have to bother. - - # the reason for not using the collapse stuff directly is because if you - # had for e.g. two artists in a row with no cds, the collapse info for - # both would be NULL (undef) so you'd lose the second artist - - # store just the index so we can check the array positions from the row - # without having to contruct the full hash - - if (keys %collapse) { - my %pri = map { ($_ => 1) } $self->result_source->primary_columns; - foreach my $i (0 .. $#construct_as) { - next if defined($construct_as[$i][0]); # only self table - if (delete $pri{$construct_as[$i][1]}) { - push(@pri_index, $i); - } - last unless keys %pri; # short circuit (Johnny Five Is Alive!) + foreach my $rel ( sort keys %$rels ) { + my $rel_info = $self->result_source->relationship_info($rel); + $rels->{$rel} = + $self->related_resultset($rel)->_unflatten_result( $rels->{$rel} ); + $rels->{$rel} = [ $rels->{$rel} ] + if ( $rel_info->{attrs}->{accessor} eq 'multi' ); } - } - - # no need to do an if, it'll be empty if @pri_index is empty anyway - - my %pri_vals = map { ($_ => $copy[$_]) } @pri_index; - my @const_rows; + return keys %$rels ? [ $columns, $rels ] : [$columns]; +} - do { # no need to check anything at the front, we always want the first row +# two arguments: $as_proto is an arrayref of column names, +# $row_ref is an arrayref of the data. If none of the row data +# is defined we return undef (that's copied from the old +# _collapse_result). Next we decide whether we need to collapse +# the resultset (i.e. we prefetch something) or not. $collapse +# indicates that. The do-while loop will run once if we do not need +# to collapse the result and will run as long as _merge_result returns +# a true value. It will return undef if the current added row does not +# match the previous row. A bit of stashing and cursor magic is +# required so that the cursor is not mixed up. - my %const; +# "$rows" is a bit misleading. In the end, there should only be one +# element in this arrayref. - foreach my $this_as (@construct_as) { - $const{$this_as->[0]||''}{$this_as->[1]} = shift(@copy); +sub _collapse_result { + my ( $self, $as_proto, $row_ref ) = @_; + my $has_def; + for (@$row_ref) { + if ( defined $_ ) { + $has_def++; + last; + } } - - push(@const_rows, \%const); - - } until ( # no pri_index => no collapse => drop straight out - !@pri_index - or - do { # get another row, stash it, drop out if different PK - - @copy = $self->cursor->next; - $self->{stashed_row} = \@copy; - - # last thing in do block, counts as true if anything doesn't match - - # check xor defined first for NULL vs. NOT NULL then if one is - # defined the other must be so check string equality - - grep { - (defined $pri_vals{$_} ^ defined $copy[$_]) - || (defined $pri_vals{$_} && ($pri_vals{$_} ne $copy[$_])) - } @pri_index; - } - ); - - my $alias = $self->{attrs}{alias}; - my $info = []; - - my %collapse_pos; - - my @const_keys; - - foreach my $const (@const_rows) { - scalar @const_keys or do { - @const_keys = sort { length($a) <=> length($b) } keys %$const; - }; - foreach my $key (@const_keys) { - if (length $key) { - my $target = $info; - my @parts = split(/\./, $key); - my $cur = ''; - my $data = $const->{$key}; - foreach my $p (@parts) { - $target = $target->[1]->{$p} ||= []; - $cur .= ".${p}"; - if ($cur eq ".${key}" && (my @ckey = @{$collapse{$cur}||[]})) { - # collapsing at this point and on final part - my $pos = $collapse_pos{$cur}; - CK: foreach my $ck (@ckey) { - if (!defined $pos->{$ck} || $pos->{$ck} ne $data->{$ck}) { - $collapse_pos{$cur} = $data; - delete @collapse_pos{ # clear all positioning for sub-entries - grep { m/^\Q${cur}.\E/ } keys %collapse_pos - }; - push(@$target, []); - last CK; - } + return undef unless $has_def; + + my $collapse = keys %{ $self->{_attrs}{collapse} || {} }; + my $rows = []; + my @row = @$row_ref; + do { + my $i = 0; + my $row = { map { $_ => $row[ $i++ ] } @$as_proto }; + $row = $self->_unflatten_result($row); + unless ( scalar @$rows ) { + push( @$rows, $row ); + } + $collapse = undef unless ( $self->_merge_result( $rows, $row ) ); + } while ( + $collapse + && do { @row = $self->cursor->next; $self->{stashed_row} = \@row if @row; } + ); + + return $rows->[0]; + +} + +# _merge_result accepts an arrayref of rows objects (again, an arrayref of two elements) +# and a row object which should be merged into the first object. +# First we try to find out whether $row is already in $rows. If this is the case +# we try to merge them by iteration through their relationship data. We call +# _merge_result again on them, so they get merged. + +# If we don't find the $row in $rows, we append it to $rows and return undef. +# _merge_result returns 1 otherwise (i.e. $row has been found in $rows). + +sub _merge_result { + my ( $self, $rows, $row ) = @_; + my ( $columns, $rels ) = @$row; + my $found = undef; + foreach my $seen (@$rows) { + my $match = 1; + foreach my $column ( keys %$columns ) { + if ( defined $seen->[0]->{$column} ^ defined $columns->{$column} + or defined $columns->{$column} + && $seen->[0]->{$column} ne $columns->{$column} ) + { + + $match = 0; + last; } - } - if (exists $collapse{$cur}) { - $target = $target->[-1]; - } } - $target->[0] = $data; - } else { - $info->[0] = $const->{$key}; - } + if ($match) { + $found = $seen; + last; + } + } + if ($found) { + foreach my $rel ( keys %$rels ) { + my $old_rows = $found->[1]->{$rel}; + $self->_merge_result( + ref $found->[1]->{$rel}->[0] eq 'HASH' ? [ $found->[1]->{$rel} ] + : $found->[1]->{$rel}, + ref $rels->{$rel}->[0] eq 'HASH' ? [ $rels->{$rel}->[0], $rels->{$rel}->[1] ] + : $rels->{$rel}->[0] + ); + + } + + } + else { + push( @$rows, $row ); + return undef; } - } - return $info; + return 1; } + =head2 result_source =over 4 diff --git a/lib/DBIx/Class/ResultSource.pm b/lib/DBIx/Class/ResultSource.pm index dfa4c78..233e97f 100644 --- a/lib/DBIx/Class/ResultSource.pm +++ b/lib/DBIx/Class/ResultSource.pm @@ -1392,19 +1392,7 @@ sub resolve_prefetch { "Can't prefetch has_many ${pre} (join cond too complex)") unless ref($rel_info->{cond}) eq 'HASH'; my $dots = @{[$as_prefix =~ m/\./g]} + 1; # +1 to match the ".${as_prefix}" - if (my ($fail) = grep { @{[$_ =~ m/\./g]} == $dots } - keys %{$collapse}) { - my ($last) = ($fail =~ /([^\.]+)$/); - carp ( - "Prefetching multiple has_many rels ${last} and ${pre} " - .(length($as_prefix) - ? "at the same level (${as_prefix}) " - : "at top level " - ) - . 'will explode the number of row objects retrievable via ->next or ->all. ' - . 'Use at your own risk.' - ); - } + #my @col = map { (/^self\.(.+)$/ ? ("${as_prefix}.$1") : ()); } # values %{$rel_info->{cond}}; $collapse->{".${as_prefix}${pre}"} = [ $rel_source->primary_columns ]; @@ -1476,19 +1464,7 @@ sub _resolve_prefetch { "Can't prefetch has_many ${pre} (join cond too complex)") unless ref($rel_info->{cond}) eq 'HASH'; my $dots = @{[$as_prefix =~ m/\./g]} + 1; # +1 to match the ".${as_prefix}" - if (my ($fail) = grep { @{[$_ =~ m/\./g]} == $dots } - keys %{$collapse}) { - my ($last) = ($fail =~ /([^\.]+)$/); - carp ( - "Prefetching multiple has_many rels ${last} and ${pre} " - .(length($as_prefix) - ? "at the same level (${as_prefix}) " - : "at top level " - ) - . 'will explode the number of row objects retrievable via ->next or ->all. ' - . 'Use at your own risk.' - ); - } + #my @col = map { (/^self\.(.+)$/ ? ("${as_prefix}.$1") : ()); } # values %{$rel_info->{cond}}; $collapse->{".${as_prefix}${pre}"} = [ $rel_source->primary_columns ]; diff --git a/t/76select.t b/t/76select.t index 7560d2c..164f58e 100644 --- a/t/76select.t +++ b/t/76select.t @@ -9,7 +9,7 @@ use DBIC::SqlMakerTest; my $schema = DBICTest->init_schema(); -plan tests => 24; +plan tests => 23; my $rs = $schema->resultset('CD')->search({}, { @@ -165,34 +165,19 @@ my $sub_rs = $rs->search ({}, } ); -is_deeply ( - $sub_rs->single, - { - artist => 1, - track_position => 2, - tracks => - { - trackid => 17, - title => 'Apiary', - }, - }, - 'columns/select/as fold properly on sub-searches', -); - -TODO: { - local $TODO = "Multi-collapsing still doesn't work right - HRI should be getting an arrayref, not an individual hash"; - is_deeply ( +is_deeply( $sub_rs->single, { - artist => 1, - track_position => 2, - tracks => [ - { - trackid => 17, - title => 'Apiary', - }, - ], + artist => 1, + track_position => 2, + tracks => [ + { + trackid => 17, + title => 'Apiary', + }, + ], }, 'columns/select/as fold properly on sub-searches', - ); -} +); + + diff --git a/t/inflate/hri.t b/t/inflate/hri.t index 1d32dd6..79f96c6 100644 --- a/t/inflate/hri.t +++ b/t/inflate/hri.t @@ -110,8 +110,8 @@ for my $index (0 .. $#hashrefinf) { my $datahashref = $hashrefinf[$index]; is ($track->cd->artist->name, $datahashref->{name}, 'Brought back correct artist'); - for my $col (keys %{$datahashref->{cds}{tracks}}) { - is ($track->get_column ($col), $datahashref->{cds}{tracks}{$col}, "Correct track '$col'"); + for my $col (keys %{$datahashref->{cds}[0]{tracks}[0]}) { + is ($track->get_column ($col), $datahashref->{cds}[0]{tracks}[0]{$col}, "Correct track '$col'"); } } diff --git a/t/prefetch/diamond.t b/t/prefetch/diamond.t index fe30ac8..9c87919 100644 --- a/t/prefetch/diamond.t +++ b/t/prefetch/diamond.t @@ -104,4 +104,4 @@ foreach my $name (keys %tests) { is($artwork->cd->artist->artistid, 1, $name . ', correct artist_id over cd'); is($artwork->artwork_to_artist->first->artist->artistid, 2, $name . ', correct artist_id over A2A'); } -} \ No newline at end of file +} diff --git a/t/prefetch/multiple_hasmany.t b/t/prefetch/multiple_hasmany.t index 5607b0e..b9671c9 100644 --- a/t/prefetch/multiple_hasmany.t +++ b/t/prefetch/multiple_hasmany.t @@ -7,189 +7,79 @@ use lib qw(t/lib); use DBICTest; use IO::File; -plan tests => 10; - my $schema = DBICTest->init_schema(); my $sdebug = $schema->storage->debug; -# once the following TODO is complete, remove the 2 warning tests immediately -# after the TODO block -# (the TODO block itself contains tests ensuring that the warns are removed) -TODO: { - local $TODO = 'Prefetch of multiple has_many rels at the same level (currently warn to protect the clueless git)'; - - #( 1 -> M + M ) - my $cd_rs = $schema->resultset('CD')->search ({ 'me.title' => 'Forkful of bees' }); - my $pr_cd_rs = $cd_rs->search ({}, { - prefetch => [qw/tracks tags/], - }); - - my $tracks_rs = $cd_rs->first->tracks; - my $tracks_count = $tracks_rs->count; - - my ($pr_tracks_rs, $pr_tracks_count); +#( 1 -> M + M ) +my $cd_rs = $schema->resultset('CD')->search( { 'me.title' => 'Forkful of bees' } ); +my $pr_cd_rs = $cd_rs->search( {}, { prefetch => [qw/tracks tags/], } ); - my $queries = 0; - $schema->storage->debugcb(sub { $queries++ }); - $schema->storage->debug(1); +my $tracks_rs = $cd_rs->first->tracks; +my $tracks_count = $tracks_rs->count; - my $o_mm_warn; - { - local $SIG{__WARN__} = sub { $o_mm_warn = shift }; - $pr_tracks_rs = $pr_cd_rs->first->tracks; - }; - $pr_tracks_count = $pr_tracks_rs->count; +my ( $pr_tracks_rs, $pr_tracks_count ); - ok(! $o_mm_warn, 'no warning on attempt to prefetch several same level has_many\'s (1 -> M + M)'); +my $queries = 0; +$schema->storage->debugcb( sub { $queries++ } ); +$schema->storage->debug(1); - is($queries, 1, 'prefetch one->(has_many,has_many) ran exactly 1 query'); - $schema->storage->debugcb (undef); - $schema->storage->debug ($sdebug); - - is($pr_tracks_count, $tracks_count, 'equal count of prefetched relations over several same level has_many\'s (1 -> M + M)'); - is ($pr_tracks_rs->all, $tracks_rs->all, 'equal amount of objects returned with and without prefetch over several same level has_many\'s (1 -> M + M)'); - - #( M -> 1 -> M + M ) - my $note_rs = $schema->resultset('LinerNotes')->search ({ notes => 'Buy Whiskey!' }); - my $pr_note_rs = $note_rs->search ({}, { - prefetch => { - cd => [qw/tracks tags/] - }, - }); +my $o_mm_warn; +{ + local $SIG{__WARN__} = sub { $o_mm_warn = shift }; + $pr_tracks_rs = $pr_cd_rs->first->tracks; +}; +$pr_tracks_count = $pr_tracks_rs->count; - my $tags_rs = $note_rs->first->cd->tags; - my $tags_count = $tags_rs->count; +ok( !$o_mm_warn, +'no warning on attempt to prefetch several same level has_many\'s (1 -> M + M)' +); - my ($pr_tags_rs, $pr_tags_count); +is( $queries, 1, 'prefetch one->(has_many,has_many) ran exactly 1 query' ); +$schema->storage->debugcb(undef); +$schema->storage->debug($sdebug); - $queries = 0; - $schema->storage->debugcb(sub { $queries++ }); - $schema->storage->debug(1); +is( $pr_tracks_count, $tracks_count, +'equal count of prefetched relations over several same level has_many\'s (1 -> M + M)' +); +is( $pr_tracks_rs->all, $tracks_rs->all, +'equal amount of objects returned with and without prefetch over several same level has_many\'s (1 -> M + M)' +); - my $m_o_mm_warn; - { - local $SIG{__WARN__} = sub { $m_o_mm_warn = shift }; - $pr_tags_rs = $pr_note_rs->first->cd->tags; - }; - $pr_tags_count = $pr_tags_rs->count; +#( M -> 1 -> M + M ) +my $note_rs = + $schema->resultset('LinerNotes')->search( { notes => 'Buy Whiskey!' } ); +my $pr_note_rs = + $note_rs->search( {}, { prefetch => { cd => [qw/tracks tags/] }, } ); - ok(! $m_o_mm_warn, 'no warning on attempt to prefetch several same level has_many\'s (M -> 1 -> M + M)'); +my $tags_rs = $note_rs->first->cd->tags; +my $tags_count = $tags_rs->count; - is($queries, 1, 'prefetch one->(has_many,has_many) ran exactly 1 query'); - $schema->storage->debugcb (undef); - $schema->storage->debug ($sdebug); +my ( $pr_tags_rs, $pr_tags_count ); - is($pr_tags_count, $tags_count, 'equal count of prefetched relations over several same level has_many\'s (M -> 1 -> M + M)'); - is($pr_tags_rs->all, $tags_rs->all, 'equal amount of objects with and without prefetch over several same level has_many\'s (M -> 1 -> M + M)'); -} +$queries = 0; +$schema->storage->debugcb( sub { $queries++ } ); +$schema->storage->debug(1); -# remove this closure once the TODO above is working +my $m_o_mm_warn; { - my $warn_re = qr/will explode the number of row objects retrievable via/; - - my (@w, @dummy); - local $SIG{__WARN__} = sub { $_[0] =~ $warn_re ? push @w, @_ : warn @_ }; - - my $rs = $schema->resultset('CD')->search ({ 'me.title' => 'Forkful of bees' }, { prefetch => [qw/tracks tags/] }); - @w = (); - @dummy = $rs->first; - is (@w, 1, 'warning on attempt prefetching several same level has_manys (1 -> M + M)'); - - my $rs2 = $schema->resultset('LinerNotes')->search ({ notes => 'Buy Whiskey!' }, { prefetch => { cd => [qw/tags tracks/] } }); - @w = (); - @dummy = $rs2->first; - is (@w, 1, 'warning on attempt prefetching several same level has_manys (M -> 1 -> M + M)'); -} + local $SIG{__WARN__} = sub { $m_o_mm_warn = shift }; + $pr_tags_rs = $pr_note_rs->first->cd->tags; +}; +$pr_tags_count = $pr_tags_rs->count; +ok( !$m_o_mm_warn, +'no warning on attempt to prefetch several same level has_many\'s (M -> 1 -> M + M)' +); -# Illustration purposes only +is( $queries, 1, 'prefetch one->(has_many,has_many) ran exactly 1 query' ); +$schema->storage->debugcb(undef); +$schema->storage->debug($sdebug); -{ - package Inf::Dump; - sub inflate_result { - return [ @_[2,3] ]; - } -} - -my $cd = $schema->resultset ('CD')->create ({ - artist => 1, - title => 'bad cd', - year => 1313, - tags => [ map { { tag => "bad tag $_" } } (1 .. 3) ], - tracks => [ - { title => 'bad track 1', cd_single => { - artist => 1, - title => 'bad_single', - year => 1313, - }}, - map { { title => "bad track $_" } } (2 .. 3), - ], -}); - -my $rs = $schema->resultset ('CD')->search ( - { 'me.cdid' => $cd->id }, - { prefetch => [ 'tags', { tracks => 'cd_single' } ], result_class => 'Inf::Dump' }, +is( $pr_tags_count, $tags_count, +'equal count of prefetched relations over several same level has_many\'s (M -> 1 -> M + M)' +); +is( $pr_tags_rs->all, $tags_rs->all, +'equal amount of objects with and without prefetch over several same level has_many\'s (M -> 1 -> M + M)' ); -use Text::Table; -my $query = ${$rs->as_query}->[0]; -my ($cols) = ( $query =~ /SELECT (.+) FROM/); -my $tb = Text::Table->new (map { $_ => \ ' | ' } (split /,\s*/, $cols) ); - -my $c = $rs->cursor; -while (my @stuff = $c->next) { - $tb->add (map { defined $_ ? $_ : 'NULL' } (@stuff) ); -} - -$rs->reset; -use Data::Dumper; -note Dumper [ - "\n$query", - "\n$tb", - $rs->next -]; - - - - -__END__ -The solution is to rewrite ResultSet->_collapse_result() and -ResultSource->resolve_prefetch() to focus on the final results from the collapse -of the data. Right now, the code doesn't treat the columns from the various -tables as grouped entities. While there is a concept of hierarchy (so that -prefetching down relationships does work as expected), there is no idea of what -the final product should look like and how the various columns in the row would -play together. So, the actual prefetch datastructure from the search would be -very useful in working through this problem. We already have access to the PKs -and sundry for those. So, when collapsing the search result, we know we are -looking for 1 cd object. We also know we're looking for tracks and tags records --independently- of each other. So, we can grab the data for tracks and data for -tags separately, uniqueing on the PK as appropriate. Then, when we're done with -the given cd object's datastream, we know we're good. This should work for all -the various scenarios. - -My reccommendation is the row's data is preprocessed first, breaking it up into -the data for each of the component tables. (This could be done in the single -table case, too, but probably isn't necessary.) So, starting with something -like: - my $row = { - t1.col1 => 1, - t1.col2 => 2, - t2.col1 => 3, - t2.col2 => 4, - t3.col1 => 5, - t3.col2 => 6, - }; -it is massaged to look something like: - my $row_massaged = { - t1 => { col1 => 1, col2 => 2 }, - t2 => { col1 => 3, col2 => 4 }, - t3 => { col1 => 5, col2 => 6 }, - }; -At this point, find the stuff that's different is easy enough to do and slotting -things into the right spot is, likewise, pretty straightforward. Instead of -storing things in a AoH, store them in a HoH keyed on the PKs of the the table, -then convert to an AoH after all collapsing is done. - -This implies that the collapse attribute can probably disappear or, at the -least, be turned into a boolean (which is how it's used in every other place). +done_testing; diff --git a/t/prefetch/with_limit.t b/t/prefetch/with_limit.t index 1dd0829..24625ad 100644 --- a/t/prefetch/with_limit.t +++ b/t/prefetch/with_limit.t @@ -89,4 +89,3 @@ is($artist->cds->count, 1, "count on search limiting prefetched has_many"); # try with double limit my $artist2 = $use_prefetch->search({'cds.title' => { '!=' => $artist_many_cds->cds->first->title } })->slice (0,0)->next; is($artist2->cds->count, 2, "count on search limiting prefetched has_many"); -