) for keys %$to_scan;
+ # these will be used for matching in the loop below
+ my $all_aliases = join ' | ', map { quotemeta $_ } keys %$alias_list;
+ my $fq_col_re = qr/
+ $lquote ( $all_aliases ) $rquote $sep (?: $lquote ([^$rquote]+) $rquote )?
+ |
+ \b ( $all_aliases ) \. ( [^\s\)\($rquote]+ )?
+ /x;
+
+ my $all_unq_columns = join ' | ',
+ map
+ { quotemeta $_ }
+ grep
+ # using a regex here shows up on profiles, boggle
+ { index( $_, '.') < 0 }
+ keys %$colinfo
+ ;
+ my $unq_col_re = $all_unq_columns
+ ? qr/ $lquote ( $all_unq_columns ) $rquote /x
+ : undef
+ ;
+
+
# the actual scan, per type
for my $type (keys %$to_scan) {
}
}
+
# we will be bulk-scanning anyway - pieces will not matter in that case
# (unlike in the direct-equivalence above)
my $scan_string = join ' ', @{$to_scan->{$type}};
+
# now loop through all fully qualified columns and get the corresponding
# alias (should work even if they are in scalarrefs)
- for my $alias (keys %$alias_list) {
- my $al_re = qr/
- $lquote $alias $rquote $sep (?: $lquote ([^$rquote]+) $rquote )?
- |
- \b $alias \. ([^\s\)\($rquote]+)?
- /x;
-
- if (my @matches = $scan_string =~ /$al_re/g) {
- $aliases_by_type->{$type}{$alias} ||= { -parents => $alias_list->{$alias}{-join_path}||[] };
- $aliases_by_type->{$type}{$alias}{-seen_columns}{"$alias.$_"} = "$alias.$_"
- for grep { defined $_ } @matches;
+ #
+ # The regex matches in multiples of 4, with one of the two pairs being
+ # undef. There may be a *lot* of matches, hence the convoluted loop
+ my @matches = $scan_string =~ /$fq_col_re/g;
+ my $i = 0;
+ while( $i < $#matches ) {
+
+ if (
+ defined $matches[$i]
+ ) {
+ $aliases_by_type->{$type}{$matches[$i]} ||= { -parents => $alias_list->{$matches[$i]}{-join_path}||[] };
+
+ $aliases_by_type->{$type}{$matches[$i]}{-seen_columns}{"$matches[$i].$matches[$i+1]"} = "$matches[$i].$matches[$i+1]"
+ if defined $matches[$i+1];
+
+ $i += 2;
}
- }
- # now loop through unqualified column names, and try to locate them within
- # the chunks
- for my $col (keys %$colinfo) {
- next if $col =~ / \. /x; # if column is qualified it was caught by the above
+ $i += 2;
+ }
- my $col_re = qr/ $lquote ($col) $rquote /x;
- if ( my @matches = $scan_string =~ /$col_re/g) {
- my $alias = $colinfo->{$col}{-source_alias};
- $aliases_by_type->{$type}{$alias} ||= { -parents => $alias_list->{$alias}{-join_path}||[] };
- $aliases_by_type->{$type}{$alias}{-seen_columns}{"$alias.$_"} = $_
- for grep { defined $_ } @matches;
- }
+ # now loop through unqualified column names, and try to locate them within
+ # the chunks, if there are any unqualified columns in the 1st place
+ next unless $unq_col_re;
+ for ( $scan_string =~ /$unq_col_re/g ) {
+ my $alias = $colinfo->{$_}{-source_alias} or next;
+ $aliases_by_type->{$type}{$alias} ||= { -parents => $alias_list->{$alias}{-join_path}||[] };
+ $aliases_by_type->{$type}{$alias}{-seen_columns}{"$alias.$_"} = $_
}
}
+
# Add any non-left joins to the restriction list (such joins are indeed restrictions)
(
$_->{-alias}
$aliases_by_type->{restricting}{ $_->{-alias} } = { -parents => $_->{-join_path}||[] }
) for values %$alias_list;
+
# final cleanup
(
keys %{$aliases_by_type->{$_}}
delete $aliases_by_type->{$_}
) for keys %$aliases_by_type;
+
$aliases_by_type;
}
return {} if $colnames and ! @$colnames;
- my $alias2src = $self->_resolve_ident_sources($ident);
+ my $sources = $self->_resolve_ident_sources($ident);
+
+ $_ = { rsrc => $_, colinfos => $_->columns_info }
+ for values %$sources;
my (%seen_cols, @auto_colnames);
# compile a global list of column names, to be able to properly
# disambiguate unqualified column names (if at all possible)
- for my $alias (keys %$alias2src) {
- my $rsrc = $alias2src->{$alias};
- for my $colname ($rsrc->columns) {
- push @{$seen_cols{$colname}}, $alias;
- push @auto_colnames, "$alias.$colname" unless $colnames;
- }
+ for my $alias (keys %$sources) {
+ (
+ ++$seen_cols{$_}{$alias}
+ and
+ ! $colnames
+ and
+ push @auto_colnames, "$alias.$_"
+ ) for keys %{ $sources->{$alias}{colinfos} };
}
$colnames ||= [
@auto_colnames,
- grep { @{$seen_cols{$_}} == 1 } (keys %seen_cols),
+ ( grep { keys %{$seen_cols{$_}} == 1 } keys %seen_cols ),
];
- my (%return, $colinfos);
- foreach my $col (@$colnames) {
- my ($source_alias, $colname) = $col =~ m/^ (?: ([^\.]+) \. )? (.+) $/x;
+ my %return;
+ for (@$colnames) {
+ my ($colname, $source_alias) = reverse split /\./, $_;
- # if the column was seen exactly once - we know which rsrc it came from
- $source_alias ||= $seen_cols{$colname}[0]
- if ($seen_cols{$colname} and @{$seen_cols{$colname}} == 1);
-
- next unless $source_alias;
+ my $assumed_alias =
+ $source_alias
+ ||
+ # if the column was seen exactly once - we know which rsrc it came from
+ (
+ $seen_cols{$colname}
+ and
+ keys %{$seen_cols{$colname}} == 1
+ and
+ ( %{$seen_cols{$colname}} )[0]
+ )
+ ||
+ next
+ ;
- my $rsrc = $alias2src->{$source_alias}
- or next;
+ $self->throw_exception(
+ "No such column '$colname' on source " . $sources->{$assumed_alias}{rsrc}->source_name
+ ) unless $seen_cols{$colname}{$assumed_alias};
- $return{$col} = {
- %{
- ( $colinfos->{$source_alias} ||= $rsrc->columns_info )->{$colname}
- ||
- $self->throw_exception(
- "No such column '$colname' on source " . $rsrc->source_name
- );
- },
- -result_source => $rsrc,
- -source_alias => $source_alias,
- -fq_colname => $col eq $colname ? "$source_alias.$col" : $col,
+ $return{$_} = {
+ %{ $sources->{$assumed_alias}{colinfos}{$colname} },
+ -result_source => $sources->{$assumed_alias}{rsrc},
+ -source_alias => $assumed_alias,
+ -fq_colname => "$assumed_alias.$colname",
-colname => $colname,
};
- $return{"$source_alias.$colname"} = $return{$col} if $col eq $colname;
+ $return{"$assumed_alias.$colname"} = $return{$_}
+ unless $source_alias;
}
return \%return;