my( $class, $args ) = @_;
my $self = {};
bless( $self, $class );
- $self->run_analysis( $args->{'file'}, $args->{'stemmadot'} );
+ $self->{'data'} = [];
+ foreach my $t ( @{$args->{'traditions'}} ) {
+ $self->run_analysis( $t->{'file'}, $t->{'stemmadot'} );
+ }
return $self;
}
# What we will return
my $svg;
my $variants = [];
+ my $data = {};
# Read in the file and stemma
my $tradition = Text::Tradition->new(
'file' => $file,
'linear' => 1,
);
- $self->{'title'} = $tradition->name;
+ $data->{'title'} = $tradition->name;
my $stemma = Text::Tradition::Stemma->new(
'collation' => $tradition->collation,
'dot' => $stemmadot,
);
# We will return the stemma picture
- $svg = $stemma->as_svg;
- ### DIRTY HACK
- $svg =~ s/transform=\"scale\(1 1\)/transform=\"scale\(0.7 0.7\)/;
- $self->{'svg'} = $svg;
+ $svg = $stemma->as_svg( { size => "8,7.5" } );;
+ $data->{'svg'} = $svg;
# We have the collation, so get the alignment table with witnesses in rows.
# Also return the reading objects in the table, rather than just the words.
-
- my $all_wits_table = $tradition->collation->make_alignment_table( 'refs' );
+ my $wits = {};
+ map { $wits->{$_} = 1 } $stemma->witnesses;
+ my $all_wits_table = $tradition->collation->make_alignment_table( 'refs', $wits );
# For each column in the alignment table, we want to see if the existing
# groupings of witnesses match our stemma hypothesis. We also want, at the
# For all the groups with more than one member, collect the list of all
# contiguous vertices needed to connect them.
# TODO: deal with a.c. reading logic
+ $DB::single = 1 if $rank == 25;
my $variant_row = analyze_variant_location( $group_readings, $groups,
- $stemma->apsp, $lacunose );
+ $stemma->graph, $lacunose );
$variant_row->{'id'} = $rank;
$genealogical++ if $variant_row->{'genealogical'};
$conflicts += grep { $_->{'conflict'} } @{$variant_row->{'readings'}};
# my @trees = @{$stemma->distance_trees};
# if( @trees ) {
# foreach my $tree ( 0 .. $#trees ) {
-# my $dc = analyze_variant_location( $group_readings, $groups,
-# $stemma->distance_apsps->[$tree] );
+# my $dc = analyze_variant_location( $group_readings, $groups, $tree );
# foreach my $rdg ( keys %$dc ) {
# my $var = $dc->{$rdg};
# # TODO Do something with this
}
# Populate self with our analysis data.
- $self->{'variants'} = $variants;
- $self->{'variant_count'} = $total;
- $self->{'conflict_count'} = $conflicts;
- $self->{'genealogical_count'} = $genealogical;
+ $data->{'variants'} = $variants;
+ $data->{'variant_count'} = $total;
+ $data->{'conflict_count'} = $conflicts;
+ $data->{'genealogical_count'} = $genealogical;
+ push( @{$self->{'data'}}, $data );
}
# variant_row -> genealogical
# -> readings [ { text, group, conflict, missing } ]
sub analyze_variant_location {
- my( $group_readings, $groups, $apsp, $lacunose ) = @_;
+ my( $group_readings, $groups, $graph, $lacunose ) = @_;
my %contig;
my $conflict = {};
my %missing;
map { $missing{$_} = 1 } @$lacunose;
my $variant_row = { 'readings' => [] };
+ # Mark each ms as in its own group, first.
+ foreach my $g ( @$groups ) {
+ my $gst = wit_stringify( $g );
+ map { $contig{$_} = $gst } @$g;
+ }
foreach my $g ( sort { scalar @$b <=> scalar @$a } @$groups ) {
- my @members = @$g;
- my $gst = wit_stringify( $g ); # $gst is now the name of this group.
- map { $contig{$_} = $gst } @members; # All members are in this group.
- while( @members ) {
- # Gather the list of vertices that are needed to join all members.
- my $curr = pop @members;
- foreach my $m ( @members ) {
- foreach my $v ( $apsp->path_vertices( $curr, $m ) ) {
- $contig{$v} = $gst unless exists $contig{$v};
- next if $contig{$v} eq $gst;
- # Record what is conflicting. TODO do we use this?
- $conflict->{$group_readings->{$gst}} = $group_readings->{$contig{$v}};
- }
+ my $gst = wit_stringify( $g );
+ # Copy the graph, and delete all non-members from the new graph.
+ my $part = $graph->undirected_copy;
+ map { $part->delete_vertex( $_ )
+ if $contig{$_} && $contig{$_} ne $gst } $graph->vertices;
+ # Now all the members of the group should still be reachable
+ # from the first member.
+ my %reachable = ( $g->[0] => 1 );
+ map { $reachable{$_} = 1 } $part->all_reachable( $g->[0] );
+
+ # ...and none of these nodes should be marked as being in another
+ # group.
+ foreach ( keys %reachable ) {
+ if( $contig{$_} && $contig{$_} ne $gst ) {
+ $conflict->{$group_readings->{$gst}} = $group_readings->{$contig{$_}};
+ } else {
+ $contig{$_} = $gst;
}
}
+ # None of the unreachable nodes should be in our group either.
+ foreach ( $part->vertices ) {
+ next if $reachable{$_};
+ $conflict->{$group_readings->{$gst}} = $group_readings->{$gst}
+ if $contig{$_} && $contig{$_} eq $gst;
+ }
+
# Write the reading.
my $reading = { 'text' => $group_readings->{$gst},
'missing' => wit_stringify( $lacunose ),
if( $reading->{'conflict'} ) {
$reading->{'group'} = $gst;
} else {
- my @all_vertices = grep { $contig{$_} eq $gst && !$missing{$_} } keys %contig;
+ my @all_vertices = grep { !$missing{$_} } keys %reachable;
$reading->{'group'} = wit_stringify( \@all_vertices );
}
push( @{$variant_row->{'readings'}}, $reading );
}
- $variant_row->{'genealogical'} = keys %$conflict ? undef : 1;
+ $variant_row->{'genealogical'} = !( keys %$conflict );
return $variant_row;
}