Instead of assembling many small regexes scan all the SQL in one pass

[dbsrgits/DBIx-Class-Historic.git] / lib / DBIx / Class / Storage / DBIHacks.pm
diff --git a/lib/DBIx/Class/Storage/DBIHacks.pm b/lib/DBIx/Class/Storage/DBIHacks.pm

index 937f771..d889752 100644 (file)
--- a/lib/DBIx/Class/Storage/DBIHacks.pm
+++ b/lib/DBIx/Class/Storage/DBIHacks.pm
@@ -469,6 +469,28 @@ sub _resolve_aliastypes_from_select_args {
   ) for keys %$to_scan;
 
 
+  # these will be used for matching in the loop below
+  my $all_aliases = join ' | ', map { quotemeta $_ } keys %$alias_list;
+  my $fq_col_re = qr/
+    $lquote ( $all_aliases ) $rquote $sep (?: $lquote ([^$rquote]+) $rquote )?
+         |
+    \b ( $all_aliases ) \. ( [^\s\)\($rquote]+ )?
+  /x;
+
+  my $all_unq_columns = join ' | ',
+    map
+      { quotemeta $_ }
+      grep
+        # using a regex here shows up on profiles, boggle
+        { index( $_, '.') < 0 }
+        keys %$colinfo
+  ;
+  my $unq_col_re = $all_unq_columns
+    ? qr/ $lquote ( $all_unq_columns ) $rquote /x
+    : undef
+  ;
+
+
   # the actual scan, per type
   for my $type (keys %$to_scan) {
 
@@ -480,42 +502,47 @@ sub _resolve_aliastypes_from_select_args {
       }
     }
 
+
     # we will be bulk-scanning anyway - pieces will not matter in that case
     # (unlike in the direct-equivalence above)
     my $scan_string = join ' ', @{$to_scan->{$type}};
 
+
     # now loop through all fully qualified columns and get the corresponding
     # alias (should work even if they are in scalarrefs)
-    for my $alias (keys %$alias_list) {
-      my $al_re = qr/
-        $lquote $alias $rquote $sep (?: $lquote ([^$rquote]+) $rquote )?
-          |
-        \b $alias \. ([^\s\)\($rquote]+)?
-      /x;
-
-      if (my @matches = $scan_string =~ /$al_re/g) {
-        $aliases_by_type->{$type}{$alias} ||= { -parents => $alias_list->{$alias}{-join_path}||[] };
-        $aliases_by_type->{$type}{$alias}{-seen_columns}{"$alias.$_"} = "$alias.$_"
-          for grep { defined $_ } @matches;
+    #
+    # The regex matches in multiples of 4, with one of the two pairs being
+    # undef. There may be a *lot* of matches, hence the convoluted loop
+    my @matches = $scan_string =~ /$fq_col_re/g;
+    my $i = 0;
+    while( $i < $#matches ) {
+
+      if (
+        defined $matches[$i]
+      ) {
+        $aliases_by_type->{$type}{$matches[$i]} ||= { -parents => $alias_list->{$matches[$i]}{-join_path}||[] };
+
+        $aliases_by_type->{$type}{$matches[$i]}{-seen_columns}{"$matches[$i].$matches[$i+1]"} = "$matches[$i].$matches[$i+1]"
+          if defined $matches[$i+1];
+
+        $i += 2;
       }
-    }
 
-    # now loop through unqualified column names, and try to locate them within
-    # the chunks
-    for my $col (keys %$colinfo) {
-      next if $col =~ / \. /x;   # if column is qualified it was caught by the above
+      $i += 2;
+    }
 
-      my $col_re = qr/ $lquote ($col) $rquote /x;
 
-      if ( my @matches = $scan_string =~ /$col_re/g) {
-        my $alias = $colinfo->{$col}{-source_alias};
-        $aliases_by_type->{$type}{$alias} ||= { -parents => $alias_list->{$alias}{-join_path}||[] };
-        $aliases_by_type->{$type}{$alias}{-seen_columns}{"$alias.$_"} = $_
-          for grep { defined $_ } @matches;
-      }
+    # now loop through unqualified column names, and try to locate them within
+    # the chunks, if there are any unqualified columns in the 1st place
+    next unless $unq_col_re;
+    for ( $scan_string =~ /$unq_col_re/g ) {
+      my $alias = $colinfo->{$_}{-source_alias} or next;
+      $aliases_by_type->{$type}{$alias} ||= { -parents => $alias_list->{$alias}{-join_path}||[] };
+      $aliases_by_type->{$type}{$alias}{-seen_columns}{"$alias.$_"} = $_
     }
   }
 
+
   # Add any non-left joins to the restriction list (such joins are indeed restrictions)
   (
     $_->{-alias}
@@ -531,6 +558,7 @@ sub _resolve_aliastypes_from_select_args {
     $aliases_by_type->{restricting}{ $_->{-alias} } = { -parents => $_->{-join_path}||[] }
   ) for values %$alias_list;
 
+
   # final cleanup
   (
     keys %{$aliases_by_type->{$_}}
@@ -538,6 +566,7 @@ sub _resolve_aliastypes_from_select_args {
     delete $aliases_by_type->{$_}
   ) for keys %$aliases_by_type;
 
+
   $aliases_by_type;
 }
 
@@ -719,53 +748,63 @@ sub _resolve_column_info {
 
   return {} if $colnames and ! @$colnames;
 
-  my $alias2src = $self->_resolve_ident_sources($ident);
+  my $sources = $self->_resolve_ident_sources($ident);
+
+  $_ = { rsrc => $_, colinfos => $_->columns_info }
+    for values %$sources;
 
   my (%seen_cols, @auto_colnames);
 
   # compile a global list of column names, to be able to properly
   # disambiguate unqualified column names (if at all possible)
-  for my $alias (keys %$alias2src) {
-    my $rsrc = $alias2src->{$alias};
-    for my $colname ($rsrc->columns) {
-      push @{$seen_cols{$colname}}, $alias;
-      push @auto_colnames, "$alias.$colname" unless $colnames;
-    }
+  for my $alias (keys %$sources) {
+    (
+      ++$seen_cols{$_}{$alias}
+        and
+      ! $colnames
+        and
+      push @auto_colnames, "$alias.$_"
+    ) for keys %{ $sources->{$alias}{colinfos} };
   }
 
   $colnames ||= [
     @auto_colnames,
-    grep { @{$seen_cols{$_}} == 1 } (keys %seen_cols),
+    ( grep { keys %{$seen_cols{$_}} == 1 } keys %seen_cols ),
   ];
 
-  my (%return, $colinfos);
-  foreach my $col (@$colnames) {
-    my ($source_alias, $colname) = $col =~ m/^ (?: ([^\.]+) \. )? (.+) $/x;
+  my %return;
+  for (@$colnames) {
+    my ($colname, $source_alias) = reverse split /\./, $_;
 
-    # if the column was seen exactly once - we know which rsrc it came from
-    $source_alias ||= $seen_cols{$colname}[0]
-      if ($seen_cols{$colname} and @{$seen_cols{$colname}} == 1);
-
-    next unless $source_alias;
+    my $assumed_alias =
+      $source_alias
+        ||
+      # if the column was seen exactly once - we know which rsrc it came from
+      (
+        $seen_cols{$colname}
+          and
+        keys %{$seen_cols{$colname}} == 1
+          and
+        ( %{$seen_cols{$colname}} )[0]
+      )
+        ||
+      next
+    ;
 
-    my $rsrc = $alias2src->{$source_alias}
-      or next;
+    $self->throw_exception(
+      "No such column '$colname' on source " . $sources->{$assumed_alias}{rsrc}->source_name
+    ) unless $seen_cols{$colname}{$assumed_alias};
 
-    $return{$col} = {
-      %{
-          ( $colinfos->{$source_alias} ||= $rsrc->columns_info )->{$colname}
-            ||
-          $self->throw_exception(
-            "No such column '$colname' on source " . $rsrc->source_name
-          );
-      },
-      -result_source => $rsrc,
-      -source_alias => $source_alias,
-      -fq_colname => $col eq $colname ? "$source_alias.$col" : $col,
+    $return{$_} = {
+      %{ $sources->{$assumed_alias}{colinfos}{$colname} },
+      -result_source => $sources->{$assumed_alias}{rsrc},
+      -source_alias => $assumed_alias,
+      -fq_colname => "$assumed_alias.$colname",
       -colname => $colname,
     };
 
-    $return{"$source_alias.$colname"} = $return{$col} if $col eq $colname;
+    $return{"$assumed_alias.$colname"} = $return{$_}
+      unless $source_alias;
   }
 
   return \%return;