From: Peter Rabbitson Date: Thu, 10 Jun 2010 09:17:18 +0000 (+0200) Subject: Collapse-calculator finally works (GPW++) X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=d4d8e97b;p=dbsrgits%2FDBIx-Class-Historic.git Collapse-calculator finally works (GPW++) --- diff --git a/lib/DBIx/Class/ResultSet.pm b/lib/DBIx/Class/ResultSet.pm index 02cba11..347cd0d 100644 --- a/lib/DBIx/Class/ResultSet.pm +++ b/lib/DBIx/Class/ResultSet.pm @@ -14,8 +14,12 @@ use DBIx::Class::ResultSetColumn; use DBIx::Class::ResultSourceHandle; use List::Util (); use Scalar::Util (); + use base qw/DBIx::Class/; +#use Test::Deep::NoTest (qw/eq_deeply/); +use Data::Dumper::Concise; + __PACKAGE__->mk_group_accessors('simple' => qw/_result_class _source_handle/); =head1 NAME @@ -976,6 +980,7 @@ sub _construct_object { return @new; } +=begin # two arguments: $as_proto is an arrayref of column names, # $row_ref is an arrayref of the data. If none of the row data # is defined we return undef (that's copied from the old @@ -1061,17 +1066,121 @@ sub _merge_result { : $rels->{$rel}->[0] ); - } + my $attrs = $self->_resolved_attrs; + my ($keep_collapsing, $set_ident) = @{$attrs}{qw/collapse _collapse_ident/}; + + # FIXME this is temporary, need to calculate in _resolved_attrs + $set_ident ||= { me => [ $self->result_source->_pri_cols ], pref => {} }; + + my @cur_row = @$row_ref; + my (@to_collapse, $last_ident); + + do { + my $row_hr = { map { $as_proto->[$_] => $cur_row[$_] } (0 .. $#$as_proto) }; + + # see if we are switching to another object + # this can be turned off and things will still work + # since _merge_prefetch knows about _collapse_ident +# my $cur_ident = [ @{$row_hr}{@$set_ident} ]; + my $cur_ident = []; + $last_ident ||= $cur_ident; + +# if ($keep_collapsing = Test::Deep::eq_deeply ($cur_ident, $last_ident)) { +# push @to_collapse, $self->result_source->_parse_row ( +# $row_hr, +# ); +# } + } while ( + $keep_collapsing + && + do { @cur_row = $self->cursor->next; $self->{stashed_row} = \@cur_row if @cur_row; } + ); + die Dumper \@to_collapse; + + + # attempt collapse all rows with same collapse identity + if (@to_collapse > 1) { + my @collapsed; + while (@to_collapse) { + $self->_merge_result(\@collapsed, shift @to_collapse); } - else { - push( @$rows, $row ); - return undef; + @to_collapse = @collapsed; + } + + # still didn't fully collapse + $self->throw_exception ('Resultset collapse failed (theoretically impossible). Maybe a wrong collapse_ident...?') + if (@to_collapse > 1); + + return $to_collapse[0]; +} +=cut + +# two arguments: $as_proto is an arrayref of 'as' column names, +# $row_ref is an arrayref of the data. The do-while loop will run +# once if we do not need to collapse the result and will run as long as +# _merge_result returns a true value. It will return undef if the +# current added row does not match the previous row, which in turn +# means we need to stash the row for the subsequent ->next call +sub _collapse_result { + my ( $self, $as_proto, $row_ref ) = @_; + + my $attrs = $self->_resolved_attrs; + my ($keep_collapsing, $set_ident) = @{$attrs}{qw/collapse _collapse_ident/}; + + die Dumper [$as_proto, $row_ref, $keep_collapsing, $set_ident ]; + + + my @cur_row = @$row_ref; + my (@to_collapse, $last_ident); + + do { + my $row_hr = { map { $as_proto->[$_] => $cur_row[$_] } (0 .. $#$as_proto) }; + + # see if we are switching to another object + # this can be turned off and things will still work + # since _merge_prefetch knows about _collapse_ident +# my $cur_ident = [ @{$row_hr}{@$set_ident} ]; + my $cur_ident = []; + $last_ident ||= $cur_ident; + +# if ($keep_collapsing = eq_deeply ($cur_ident, $last_ident)) { +# push @to_collapse, $self->result_source->_parse_row ( +# $row_hr, +# ); +# } + } while ( + $keep_collapsing + && + do { @cur_row = $self->cursor->next; $self->{stashed_row} = \@cur_row if @cur_row; } + ); + + # attempt collapse all rows with same collapse identity + if (@to_collapse > 1) { + my @collapsed; + while (@to_collapse) { + $self->_merge_result(\@collapsed, shift @to_collapse); } + } - return 1; + return 1; } +# Takes an arrayref of me/pref pairs and a new me/pref pair that should +# be merged on a preexisting matching me (or should be pushed into $merged +# as a new me/pref pair for further invocations). It should be possible to +# use this function to collapse complete ->all results, provided _collapse_result() is adjusted +# to provide everything to this sub not to barf when $merged contains more than one +# arrayref) +sub _merge_prefetch { + my ($self, $merged, $next_row) = @_; + + unless (@$merged) { + push @$merged, $next_row; + return; + } + +} =head2 result_source @@ -2931,11 +3040,10 @@ sub _resolved_attrs { } } + # generate selections based on the prefetch helper if ( my $prefetch = delete $attrs->{prefetch} ) { $attrs->{collapse} = 1; - my $prefetch_ordering = []; - # this is a separate structure (we don't look in {from} directly) # as the resolver needs to shift things off the lists to work # properly (identical-prefetches on different branches) @@ -2957,16 +3065,13 @@ sub _resolved_attrs { } } - my @prefetch = $source->_resolve_prefetch( $prefetch, $alias, $join_map, $prefetch_ordering ); + my @prefetch = $source->_resolve_prefetch( $prefetch, $alias, $join_map ); # we need to somehow mark which columns came from prefetch $attrs->{_prefetch_select} = [ map { $_->[0] } @prefetch ]; push @{ $attrs->{select} }, @{$attrs->{_prefetch_select}}; push @{ $attrs->{as} }, (map { $_->[1] } @prefetch); - - push( @{$attrs->{order_by}}, @$prefetch_ordering ); - $attrs->{_collapse_order_by} = \@$prefetch_ordering; } # run through the resulting joinstructure (starting from our current slot) @@ -2983,8 +3088,9 @@ sub _resolved_attrs { last if ($t->{-alias} && $t->{-alias} eq $alias); } - if (@fromlist) { - $attrs->{collapse} = scalar grep { ! $_->[0]{-is_single} } (@fromlist); + for (@fromlist) { + $attrs->{collapse} = ! $_->[0]{-is_single} + and last; } } else { @@ -2993,6 +3099,22 @@ sub _resolved_attrs { } } + # if collapsing (via prefetch or otherwise) calculate row-idents and necessary order_by + if ($attrs->{collapse}) { + + # only consider real columns (not functions) during collapse resolution + # this check shouldn't really be here, as fucktards are not supposed to + # alias random crap to declared columns anyway, but still - just in + # case + my @plain_selects = map + { ( ! ref $attrs->{select}[$_] && $attrs->{as}[$_] ) || () } + ( 0 .. $#{$attrs->{select}} ) + ; + + @{$attrs}{qw/_collapse_ident _collapse_order/} = + $source->_resolve_collapse( \@plain_selects ); + } + # if both page and offset are specified, produce a combined offset # even though it doesn't make much sense, this is what pre 081xx has # been doing diff --git a/lib/DBIx/Class/ResultSource.pm b/lib/DBIx/Class/ResultSource.pm index c611d28..4973b0d 100644 --- a/lib/DBIx/Class/ResultSource.pm +++ b/lib/DBIx/Class/ResultSource.pm @@ -508,7 +508,7 @@ sub _pri_cols { my @pcols = $self->primary_columns or $self->throw_exception (sprintf( 'Operation requires a primary key to be declared on %s via set_primary_key', - ref $self, + $self->source_name, )); return @pcols; } @@ -1453,6 +1453,199 @@ sub _resolve_prefetch { } } +# Takes a selection list and generates a collapse-map representing +# row-object fold-points. Every relationship is assigned a set of unique, +# non-nullable columns (which may *not even be* from the same resultset) +# and the collapser will use this information to correctly distinguish +# data of individual to-be-row-objects. Also returns a sort criteria +# for the entire resultset, such that when the resultset is sorted +# this way ->next will just work +sub _resolve_collapse { + my ($self, $as, $collapse_map, $rel_chain, $multi_join, $parent_underdefined) = @_; + + my ($my_cols, $rel_cols, $rel_col_idx); + for (@$as) { + if ($_ =~ /^ ([^\.]+) \. (.+) /x) { + push @{$rel_cols->{$1}}, $2; + $rel_col_idx->{$1}{$2}++; + } + else { + $my_cols->{$_} = {}; # important for ||= below + } + } + + my $relinfo; + # run through relationships, collect metadata, inject fk-bridges immediately (if any) + for my $rel (keys %$rel_cols) { + my $rel_src = $self->related_source ($rel); + my $inf = $self->relationship_info ($rel); + + $relinfo->{$rel}{is_single} = $inf->{attrs}{accessor} && $inf->{attrs}{accessor} ne 'multi'; + $relinfo->{$rel}{is_inner} = ( $inf->{attrs}{join_type} || '' ) !~ /^left/i; + $relinfo->{$rel}{rsrc} = $rel_src; + + my $cond = $inf->{cond}; + if ( + ref $cond eq 'HASH' + and + keys %$cond + and + ! List::Util::first { $_ !~ /^foreign\./ } (keys %$cond) + and + ! List::Util::first { $_ !~ /^self\./ } (values %$cond) + ) { + for my $f (keys %$cond) { + my $s = $cond->{$f}; + $_ =~ s/^ (?: foreign | self ) \.//x for ($f, $s); + $relinfo->{$rel}{fk_map}{$s} = $f; + + $my_cols->{$s} ||= { via_fk => "$rel.$f" } # need to know source from *our* pov + if $rel_col_idx->{$rel}{$f}; # only if it is in fact selected of course + } + } + } + + # get colinfo for everything + if ($my_cols) { + $my_cols->{$_}{colinfo} = ( + $self->has_column ($_) ? $self->column_info ($_) : undef + ) for keys %$my_cols; + } + + # if collapser not passed down try to resolve based on our columns + # (plus already inserted FK bridges) + if ( + $my_cols + and + ! $collapse_map->{-collapse_on} + and + my $uset = $self->_unique_column_set ($my_cols) + ) { + $collapse_map->{-collapse_on} = { map + { + join ('.', + @{$rel_chain||[]}, + ( $my_cols->{$_}{via_fk} || $_ ), + ) + => + 1 + } + keys %$uset + }; + } + + # still don't know how to collapse - keep descending down 1:1 chains - if + # a related non-LEFT (or not-yet-multijoined) 1:1 is resolvable - it will collapse us too + unless ($collapse_map->{-collapse_on}) { + for my $rel (keys %$relinfo) { + next unless $relinfo->{$rel}{is_single}; + next if ( $multi_join && ! $relinfo->{$rel}{is_inner} ); + + if ( my ($rel_collapse) = $relinfo->{$rel}{rsrc}->_resolve_collapse ( + $rel_cols->{$rel}, + undef, + [ @{$rel_chain||[]}, $rel], + $multi_join || ! $relinfo->{$rel}{is_single}, + 'parent_underdefined', + )) { + $collapse_map->{-collapse_on} = $rel_collapse->{-collapse_on}; + last; + } + } + } + + # nothing down the chain resolves - can't calculate a collapse-map + unless ($collapse_map->{-collapse_on}) { + # FIXME - error message is very vague + $self->throw_exception ( sprintf + "Unable to calculate a definitive collapse column set for %s%s - fetch more unique non-nullable columns", + $self->source_name, + $rel_chain ? sprintf (' (or a %s chain member)', join ' -> ', @$rel_chain ) : '', + ); + } + + return $collapse_map if $parent_underdefined; # we will come here again and go through the children then + + # now that we are collapsable - go down the entire chain a second time, + # and fill in the rest + for my $rel (keys %$relinfo) { + + # inject *all* FK columns (even if we do not directly define them) + # since us being defined means that we can cheat about having e.g. + # a particular PK, which in turn will re-assemble with a unique + # constraint on some related column and our bridged-fk + # when/if the resolution comes back - we take back out everything + # we injected and pass things back up the chain + + my $implied_defined = { map + { $rel_col_idx->{$rel}{$_} + ? () + : ( join ('.', @{$rel_chain||[]}, $rel, $_ ) => $_ ) + } + values %{$relinfo->{$rel}{fk_map}} + }; + + my ($rel_collapse) = $relinfo->{$rel}{rsrc}->_resolve_collapse ( + [ @{$rel_cols->{$rel}}, values %$implied_defined ], + + $relinfo->{$rel}{is_single} # if this is a 1:1 - we simply pass our collapser to it + ? { -collapse_on => { %{$collapse_map->{-collapse_on}} } } + : undef + , + + [ @{$rel_chain||[]}, $rel], + + $multi_join || ! $relinfo->{$rel}{is_single}, + ); + + # if we implied our definition - we inject our own collapser in addition to whatever is left + if (keys %$implied_defined) { + $rel_collapse->{-collapse_on} = { + ( map {( $_ => 1 )} keys %{$collapse_map->{-collapse_on}} ), + ( map + {( $_ => 1 )} + grep + { ! $implied_defined->{$_} } + keys %{$rel_collapse->{-collapse_on}} + ), + }; + }; + + $collapse_map->{$rel} = $rel_collapse; + + } + + # if no relchain (i.e. we are toplevel) - generate an order_by + # here we can take the easy route and compose an order_by out of + # actual unique column names, regardless of whether they were + # selected or not. If nothing ... maybe bad idea + my $order_by = do { + undef; + } if ! $rel_chain; + + return $collapse_map, ($order_by || () ); +} + +sub _unique_column_set { + my ($self, $cols) = @_; + + my %unique = $self->unique_constraints; + + # always prefer the PK first, and then shortest constraints first + USET: + for my $set (delete $unique{primary}, sort { @$a <=> @$b } (values %unique) ) { + next unless $set && @$set; + + for (@$set) { + next USET unless ($cols->{$_} && $cols->{$_}{colinfo} && !$cols->{$_}{colinfo}{is_nullable} ); + } + + return { map { $_ => 1 } @$set }; + } + + return undef; +} + # Takes a hashref of $sth->fetchrow values keyed to the corresponding # {as} dbic aliases, and splits it into a native columns hashref # (as in $row->get_columns), followed by any non-native (prefetched) @@ -1487,7 +1680,10 @@ sub _parse_row { $self->related_source($rel)->_parse_row( $pref->{$rel}, $will_collapse ); $pref->{$rel} = [ $pref->{$rel} ] - if ( $will_collapse && $rel_info->{attrs}{accessor} eq 'multi' ); + if ( $will_collapse + && $rel_info->{attrs}{accessor} + && $rel_info->{attrs}{accessor} eq 'multi' + ); } return [ $me||{}, $pref||() ]; @@ -1510,7 +1706,7 @@ Returns the result source object for the given relationship. sub related_source { my ($self, $rel) = @_; if( !$self->has_relationship( $rel ) ) { - $self->throw_exception("No such relationship '$rel'"); + $self->throw_exception("No such relationship '$rel' on " . $self->source_name); } return $self->schema->source($self->relationship_info($rel)->{source}); } diff --git a/t/prefetch/_internals.t b/t/prefetch/_internals.t index 8ccea90..d9e9575 100644 --- a/t/prefetch/_internals.t +++ b/t/prefetch/_internals.t @@ -99,4 +99,65 @@ is_deeply ( '_parse_row works over missing joins without collapse', ); +my ($collapse_map, $order) = $schema->source ('CD')->_resolve_collapse ( + [ + 'year', # non-unique + 'genreid', # nullable + 'tracks.title', # non-unique (no me.id) + 'single_track.cd.artist.cds.cdid', # to give uniquiness to ...tracks.title below + 'single_track.cd.artist.cds.artist', # non-unique + 'single_track.cd.artist.cds.year', # non-unique + 'single_track.cd.artist.cds.genreid', # nullable + 'single_track.cd.artist.cds.tracks.title',# unique when combined with ...cds.cdid above + 'latest_cd', # random function + ], +); + +is_deeply ( + $collapse_map, + { + -collapse_on => { + "single_track.cd.artist.cds.artist" => 1 + }, + + single_track => { + -collapse_on => { + "single_track.cd.artist.cds.artist" => 1 + }, + + cd => { + -collapse_on => { + "single_track.cd.artist.cds.artist" => 1 + }, + + artist => { + -collapse_on => { + "single_track.cd.artist.cds.artist" => 1 + }, + + cds => { + -collapse_on => { + "single_track.cd.artist.cds.cdid" => 1 + }, + + tracks => { + -collapse_on => { + "single_track.cd.artist.cds.cdid" => 1, + "single_track.cd.artist.cds.tracks.title" => 1 + } + } + } + } + } + }, + tracks => { + -collapse_on => { + "single_track.cd.artist.cds.artist" => 1, + "tracks.title" => 1 + } + } + }, + "Proper collapse map constructed", +); + done_testing;