From: Tara L Andrews Date: Sun, 20 Nov 2011 14:40:43 +0000 (+0100) Subject: a little light refactoring X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=f6c8ea0853f85e4f5a529f1174668a8b2c062728;p=scpubgit%2Fstemmatology.git a little light refactoring --- diff --git a/lib/Text/Tradition/Analysis.pm b/lib/Text/Tradition/Analysis.pm index ea01366..399dbab 100644 --- a/lib/Text/Tradition/Analysis.pm +++ b/lib/Text/Tradition/Analysis.pm @@ -48,63 +48,20 @@ sub run_analysis { # Also return the reading objects in the table, rather than just the words. my $wits = {}; map { $wits->{$_} = 1 } $stemma->witnesses; - my $all_wits_table = $tradition->collation->make_alignment_table( 'refs', $wits ); - # For each column in the alignment table, we want to see if the existing - # groupings of witnesses match our stemma hypothesis. We also want, at the - # end, to produce an HTML table with all the variants. - my $html_columns = 0; + # groupings of witnesses match our stemma hypothesis. We also need to keep + # track of the maximum number of variants at any one location. + my $max_variants = 0; my ( $total, $genealogical, $conflicts ) = ( 0, 0, 0 ); - # Strip the list of sigla and save it for correlation to the readings. - my $col_wits = shift @$all_wits_table; - # Any witness in the stemma that has no row should be noted. - foreach ( @$col_wits ) { - $wits->{$_}++; # Witnesses present in table and stemma now have value 2. - } - my @not_collated = grep { $wits->{$_} == 1 } keys %$wits; - - # We will return a data structure, an array for each row that looks like: - # { id = X, genealogical = Y, readings = [ text = X, group = Y], empty = N } - my $rank = 0; my $t0 = Benchmark->new(); - foreach my $i ( 0 .. $#$all_wits_table ) { - # For each column in the table, group the readings by witness. - my $rdg_wits = {}; - my $col_rdgs = shift @$all_wits_table; - # my $rank; - my $lacunose = [ @not_collated ]; - foreach my $j ( 0 .. $#{$col_rdgs} ) { - my $rdg = $col_rdgs->[$j]; - my $rdg_text = '(omitted)'; # Initialize in case of empty reading - if( $rdg ) { - if( $rdg->is_lacuna ) { - $rdg_text = undef; # Don't count lacunae - push( @$lacunose, $col_wits->[$j] ); - } else { - $rdg_text = $rdg->text; - # Get the rank from any real reading; they should be identical. - # $rank = $rdg->rank; - } - } - if( defined $rdg_text ) { - # Initialize the witness array if we haven't got one yet - $rdg_wits->{$rdg_text} = [] unless $rdg_wits->{$rdg_text}; - # Add the relevant witness, subject to a.c. logic - add_variant_wit( $rdg_wits->{$rdg_text}, $col_wits->[$j], - $tradition->collation->ac_label ); - } - } - - # See if this column has any potentially genealogical variants. - # If not, skip to the next. - $total++ unless scalar keys %$rdg_wits == 1; - my( $groups, $readings ) = useful_variant( $rdg_wits ); - next unless $groups && $readings; - $rank++; + my $variant_groups = group_variants( $tradition->collation, $wits ); + foreach my $rank ( 0 .. $#{$variant_groups} ) { + my $groups = $variant_groups->[$rank]->{'groups'}; + my $readings = $variant_groups->[$rank]->{'readings'}; + my $lacunose = $variant_groups->[$rank]->{'lacunose'}; - # Keep track of our widest row - $html_columns = scalar @$groups if scalar @$groups > $html_columns; + $max_variants = scalar @$groups if scalar @$groups > $max_variants; # We can already look up witnesses for a reading; we also want to look # up readings for a given witness. @@ -115,11 +72,11 @@ sub run_analysis { # For all the groups with more than one member, collect the list of all # contiguous vertices needed to connect them. - my $variant_row = analyze_variant_location( $group_readings, $groups, + my $variant_loc = analyze_variant_location( $group_readings, $groups, $stemma->graph, $lacunose ); - $variant_row->{'id'} = $rank; - $genealogical++ if $variant_row->{'genealogical'}; - $conflicts += grep { $_->{'conflict'} } @{$variant_row->{'readings'}}; + $variant_loc->{'id'} = $rank; + $genealogical++ if $variant_loc->{'genealogical'}; + $conflicts += grep { $_->{'conflict'} } @{$variant_loc->{'readings'}}; # Now run the same analysis given the calculated distance tree(s). # my @trees = @{$stemma->distance_trees}; @@ -134,16 +91,16 @@ sub run_analysis { # } # Record that we used this variant in an analysis - push( @$variants, $variant_row ); + push( @$variants, $variant_loc ); } my $t1 = Benchmark->new(); print STDERR "Analysis of graph for " . $tradition->name . " took " . timestr( timediff( $t1, $t0 ) ) . "seconds\n"; - # Go through our variant rows, after we have seen all of them once, + # Go through our variant locations, after we have seen all of them once, # and add the number of empty columns needed by each. foreach my $row ( @$variants ) { - my $empty = $html_columns - scalar @{$row->{'readings'}}; + my $empty = $max_variants - scalar @{$row->{'readings'}}; $row->{'empty'} = $empty; } @@ -159,6 +116,8 @@ sub group_variants { my( $c, $wits ) = @_; my $variant_groups = []; + # We have the collation, so get the alignment table with witnesses in rows. + # Also return the reading objects in the table, rather than just the words. my $all_wits_table = $c->make_alignment_table( 'refs', $wits ); # Strip the list of sigla and save it for correlation to the readings. my $col_wits = shift @$all_wits_table; @@ -166,12 +125,11 @@ sub group_variants { foreach ( @$col_wits ) { $wits->{$_}++; # Witnesses present in table and stemma now have value 2. } - my @not_collated = grep { $wits->{$_} == 1 } keys %$wits; + my @not_collated = grep { $wits->{$_} == 1 } keys %$wits; foreach my $i ( 0 .. $#$all_wits_table ) { # For each column in the table, group the readings by witness. my $rdg_wits = {}; my $col_rdgs = shift @$all_wits_table; - my $rank; my $lacunose = [ @not_collated ]; foreach my $j ( 0 .. $#{$col_rdgs} ) { my $rdg = $col_rdgs->[$j]; @@ -182,8 +140,6 @@ sub group_variants { push( @$lacunose, $col_wits->[$j] ); } else { $rdg_text = $rdg->text; - # Get the rank from any real reading; they should be identical. - $rank = $rdg->rank; } } if( defined $rdg_text ) { @@ -200,11 +156,14 @@ sub group_variants { my( $groups, $readings ) = useful_variant( $rdg_wits ); next unless $groups && $readings; - push( @$variant_groups, $groups ); + push( @$variant_groups, + { 'groups' => $groups, 'readings' => $readings, 'lacunose' => $lacunose } ); } return $variant_groups; } + + # variant_row -> genealogical # -> readings [ { text, group, conflict, missing } ]