use Graph::Easy;
use IPC::Run qw( run binary );
use Text::Tradition::Collation::Reading;
+use Text::Tradition::Collation::Path;
use Moose;
has 'graph' => (
default => 'base text',
);
+has 'collapsed' => (
+ is => 'rw',
+ isa => 'Bool',
+ );
+
+has 'linear' => (
+ is => 'rw',
+ isa => 'Bool',
+ default => 1,
+ );
+
+
# The collation can be created two ways:
# 1. Collate a set of witnesses (with CollateX I guess) and process
# the results as in 2.
sub BUILD {
my( $self, $args ) = @_;
$self->graph->use_class('node', 'Text::Tradition::Collation::Reading');
+ $self->graph->use_class('edge', 'Text::Tradition::Collation::Path');
# Pass through any graph-specific options.
my $shape = exists( $args->{'shape'} ) ? $args->{'shape'} : 'ellipse';
$self->$orig( @_ );
};
+# Wrapper around paths
+around paths => sub {
+ my $orig = shift;
+ my $self = shift;
+
+ my @result = grep { $_->class eq 'path' } $self->$orig( @_ );
+ return @result;
+};
+
# Wrapper around merge_nodes
sub merge_readings {
return $self->graph->merge_nodes( @_ );
}
+# Extra graph-alike utility
+sub has_path {
+ my( $self, $source, $target, $label ) = @_;
+ my @paths = $source->edges_to( $target );
+ my @relevant = grep { $_->label eq $label } @paths;
+ return scalar @paths;
+}
+
+## Dealing with relationships between readings. This is a different
+## sort of graph edge.
+
+sub add_relationship {
+ my( $self, $type, $source, $target, $global ) = @_;
+ my $rel = Text::Tradition::Collation::Relationship->new(
+ 'sort' => $type,
+ 'global' => $global,
+ 'orig_relation' => [ $source, $target ],
+ );
+ print STDERR sprintf( "Setting relationship %s between readings %s (%s)"
+ . " and %s (%s)\n", $type,
+ $source->label, $source->name,
+ $target->label, $target->name );
+ $self->graph->add_edge( $source, $target, $rel );
+ if( $global ) {
+ # Look for all readings with the source label, and if there are
+ # colocated readings with the target label, join them too.
+ foreach my $r ( $self->readings() ) {
+ next unless $r->label eq $source->label;
+ my @colocated = grep { $_->label eq $target->label }
+ $self->same_position_as( $r );
+ if( @colocated ) {
+ warn "Multiple readings with same label at same position!"
+ if @colocated > 1;
+ my $dup_rel = Text::Tradition::Collation::Relationship->new(
+ 'sort' => $type,
+ 'global' => $global,
+ 'orig_relation' => [ $source, $target ],
+ );
+ $self->graph->add_edge( $r, $colocated[0], $dup_rel );
+ }
+ }
+ }
+}
+
=head2 Output method(s)
=over
my( $self, $recalc ) = @_;
return $self->svg if $self->has_svg;
+ $self->collapse_graph_paths();
$self->_save_graphviz( $self->graph->as_graphviz() )
unless( $self->has_graphviz && !$recalc );
my $in = $self->graphviz;
run( \@cmd, \$in, ">", binary(), \$svg );
$self->{'svg'} = $svg;
+ $self->expand_graph_paths();
return $svg;
}
# Add the data keys for edges
my %wit_hash;
my $wit_ctr = 0;
- foreach my $wit ( $self->getWitnessList ) {
+ foreach my $wit ( @{$self->tradition->witnesses} ) {
my $wit_key = 'w' . $wit_ctr++;
$wit_hash{$wit} = $wit_key;
my $key = $root->addNewChild( $graphml_ns, 'key' );
}
# Add the graph, its nodes, and its edges
+ $self->collapse_graph_edges();
my $graph = $root->addNewChild( $graphml_ns, 'graph' );
$graph->setAttribute( 'edgedefault', 'directed' );
$graph->setAttribute( 'id', 'g0' ); # TODO make this meaningful
}
}
- foreach my $e ( $self->edges() ) {
+ foreach my $e ( $self->paths() ) {
my( $name, $from, $to ) = ( $e->name,
$node_hash{ $e->from()->name() },
$node_hash{ $e->to()->name() } );
# Return the thing
$self->_save_graphml( $graphml );
+ $self->expand_graph_edges();
return $graphml;
}
+sub collapse_graph_paths {
+ my $self = shift;
+ # Our collation graph has an path per witness. This is great for
+ # calculation purposes, but terrible for display. Thus we want to
+ # display only one path between any two nodes.
+
+ return if $self->collapsed;
+
+ print STDERR "Collapsing witness paths in graph...\n";
+
+ # Don't list out every witness if we have more than half to list.
+ my $majority = int( scalar( @{$self->tradition->witnesses} ) / 2 ) + 1;
+ foreach my $node( $self->readings ) {
+ my $newlabels = {};
+ # We will visit each node, so we only look ahead.
+ foreach my $path ( $node->outgoing() ) {
+ add_hash_entry( $newlabels, $path->to->name, $path->name );
+ $self->del_path( $path );
+ }
+
+ foreach my $newdest ( keys %$newlabels ) {
+ my $label;
+ my @compressed_wits = ();
+ if( @{$newlabels->{$newdest}} < $majority ) {
+ $label = join( ', ', @{$newlabels->{$newdest}} );
+ } else {
+ ## TODO FIX THIS HACK
+ my @aclabels;
+ foreach my $wit ( @{$newlabels->{$newdest}} ) {
+ if( $wit =~ /^(.*?)(\s*\(?a\.\s*c\.\)?)$/ ) {
+ push( @aclabels, $wit );
+ } else {
+ push( @compressed_wits, $wit );
+ }
+ }
+ $label = join( ', ', 'majority', @aclabels );
+ }
+
+ my $newpath =
+ $self->add_path( $node, $self->reading( $newdest ), $label );
+ if( @compressed_wits ) {
+ $newpath->hidden_witnesses( \@compressed_wits );
+ }
+ }
+ }
+
+ $self->collapsed( 1 );
+}
+
+sub expand_graph_paths {
+ my $self = shift;
+ # Our collation graph has only one path between any two nodes.
+ # This is great for display, but not so great for analysis.
+ # Expand this so that each witness has its own path between any
+ # two reading nodes.
+ return unless $self->collapsed;
+
+ print STDERR "Expanding witness paths in graph...\n";
+ $DB::single = 1;
+ foreach my $path( $self->paths ) {
+ my $from = $path->from;
+ my $to = $path->to;
+ my @wits = split( /, /, $path->label );
+ if( $path->has_hidden_witnesses ) {
+ push( @wits, @{$path->hidden_witnesses} );
+ }
+ $self->del_path( $path );
+ foreach ( @wits ) {
+ $self->add_path( $from, $to, $_ );
+ }
+ }
+ $self->collapsed( 0 );
+}
+
=back
=head2 Navigation methods
sub reading_sequence {
my( $self, $start, $end, $witness, $backup ) = @_;
- $witness = 'base text' unless $witness;
+ $witness = $self->baselabel unless $witness;
my @readings = ( $start );
my %seen;
my $n = $start;
- while( $n ne $end ) {
+ while( $n && $n ne $end ) {
if( exists( $seen{$n->name()} ) ) {
warn "Detected loop at " . $n->name();
last;
my $next_reading = $graph->next_reading( $reading, $witpath );
Returns the reading that follows the given reading along the given witness
-path. TODO These are badly named.
+path.
=cut
my $prior_reading = $graph->prior_reading( $reading, $witpath );
Returns the reading that precedes the given reading along the given witness
-path. TODO These are badly named.
+path.
=cut
my $base_le;
my $alt_le;
foreach my $le ( @linked_paths ) {
- if( $le->name eq 'base text' ) {
+ if( $le->name eq $self->baselabel ) {
$base_le = $le;
} else {
my @le_wits = $self->witnesses_of_label( $le->name );
$wit->path( \@wit_path );
# Detect the common readings.
- if( @common_readings ) {
- my @cn;
- foreach my $n ( @wit_path ) {
- push( @cn, $n ) if grep { $_ eq $n } @common_readings;
- }
- @common_readings = ();
- push( @common_readings, @cn );
- } else {
- push( @common_readings, @wit_path );
- }
+ @common_readings = _find_common( \@common_readings, \@wit_path );
}
# Mark all the nodes as either common or not.
return @common_readings;
}
+sub _find_common {
+ my( $common_readings, $new_path ) = @_;
+ my @cr;
+ if( @$common_readings ) {
+ foreach my $n ( @$new_path ) {
+ push( @cr, $n ) if grep { $_ eq $n } @$common_readings;
+ }
+ } else {
+ push( @cr, @$new_path );
+ }
+ return @cr;
+}
+
+sub _remove_common {
+ my( $common_readings, $divergence ) = @_;
+ my @cr;
+ my %diverged;
+ map { $diverged{$_->name} = 1 } @$divergence;
+ foreach( @$common_readings ) {
+ push( @cr, $_ ) unless $diverged{$_->name};
+ }
+ return @cr;
+}
+
+
# An alternative to walk_witness_paths, for use when a collation is
-# constructed from a base text and an apparatus. Also modifies the
-# collation graph to remove all 'base text' paths and replace them
-# with real witness paths.
+# constructed from a base text and an apparatus. We have the
+# sequences of readings and just need to add path edges.
-sub walk_and_expand_base {
- my( $self, $end ) = @_;
+sub make_witness_paths {
+ my( $self ) = @_;
+ my @common_readings;
foreach my $wit ( @{$self->tradition->witnesses} ) {
- my $sig = $wit->sigil;
- my $post_sig;
- $post_sig = $wit->post_correctione
- if $wit->has_post_correctione;
- my @wit_path = $self->reading_sequence( $self->start, $end, $sig );
- $wit->path( \@wit_path );
- $self->connect_readings_for_witness( $wit );
-
- # If there is a post-correctio, get its path and compare.
- # Add a correction range for each divergence.
- if( $post_sig ) {
- # TODO this is looping
- my @corr_wit_path = $self->reading_sequence( $self->start, $end,
- "$sig$post_sig", $sig );
-
- # Map ante-corr readings to their indices
- my %in_orig;
- my $i = 0;
- map { $in_orig{$_->name} = $i++ } @wit_path;
-
- # Look for divergences
- my $diverged = 0;
- my $last_common;
- my @correction;
- foreach my $rdg ( @corr_wit_path ) {
- if( exists( $in_orig{$rdg->name} ) && !$diverged ) {
- # We are reading the same here
- $last_common = $in_orig{$rdg->name};
- next;
- } elsif ( exists( $in_orig{$rdg->name} ) ) {
- # We have been diverging but are reading the same again.
- # Add the correction to the witness.
- my $offset = $last_common;
- my $length = $in_orig{$rdg->name} - $last_common;
- $wit->add_correction( $offset, $length, \@correction );
- $diverged = 0;
- @correction = ();
- $last_common = $in_orig{$rdg->name};
- } elsif( $diverged ) {
- # We are in the middle of a divergence.
- push( @correction, $rdg );
- } else {
- # We have started to diverge. Note it.
- $diverged = 1;
- push( @correction, $rdg );
- }
- }
- # Add any divergence that is at the end of the line
- if( $diverged ) {
- $wit->add_correction( $last_common, $#wit_path, \@correction );
- }
- }
- }
-
- # Remove any 'base text' paths.
- foreach my $path ( $self->paths ) {
- $self->del_path( $path )
- if $path->label eq $self->baselabel;
+ print STDERR "Making path for " . $wit->sigil . "\n";
+ $self->make_witness_path( $wit );
+ @common_readings = _find_common( \@common_readings, $wit->path );
+ @common_readings = _find_common( \@common_readings, $wit->uncorrected_path );
}
+ return @common_readings;
}
-sub connect_readings_for_witness {
+sub make_witness_path {
my( $self, $wit ) = @_;
my @chain = @{$wit->path};
+ my $sig = $wit->sigil;
foreach my $idx ( 0 .. $#chain-1 ) {
- $self->add_path( $chain[$idx], $chain[$idx+1], $wit->sigil );
+ $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
+ }
+ @chain = @{$wit->uncorrected_path};
+ foreach my $idx( 0 .. $#chain-1 ) {
+ my $source = $chain[$idx];
+ my $target = $chain[$idx+1];
+ $self->add_path( $source, $target, "$sig (a.c.)" )
+ unless $self->has_path( $source, $target, $sig );
}
}
my $node_pos = {};
foreach my $wit ( @{$self->tradition->witnesses} ) {
- # First we walk each path, making a matrix for each witness that
- # corresponds to its eventual position identifier. Common nodes
- # always start a new row, and are thus always in the first column.
-
- my $wit_matrix = [];
- my $cn = 0; # We should hit the common readings in order.
- my $row = [];
- foreach my $wn ( @{$wit->path} ) {
- if( $wn eq $ordered_common[$cn] ) {
- # Set up to look for the next common node, and
- # start a new row of words.
- $cn++;
- push( @$wit_matrix, $row ) if scalar( @$row );
- $row = [];
- }
- push( @$row, $wn );
- }
- push( @$wit_matrix, $row ); # Push the last row onto the matrix
-
- # Now we have a matrix per witness, so that each row in the
- # matrix begins with a common node, and continues with all the
- # variant words that appear in the witness. We turn this into
- # real positions in row,cell format. But we need some
- # trickery in order to make sure that each node gets assigned
- # to only one position.
-
- foreach my $li ( 1..scalar(@$wit_matrix) ) {
- foreach my $di ( 1..scalar(@{$wit_matrix->[$li-1]}) ) {
- my $reading = $wit_matrix->[$li-1]->[$di-1];
- my $position = "$li,$di";
- # If we have seen this node before, we need to compare
- # its position with what went before.
- unless( $reading->has_position &&
- _cmp_position( $position, $reading->position ) < 1 ) {
- # The new position ID replaces the old one.
- $reading->position( $position );
- } # otherwise, the old position needs to stay.
- }
- }
+ print STDERR "Calculating positions in " . $wit->sigil . "\n";
+ _update_positions_from_path( $wit->path, @ordered_common );
+ _update_positions_from_path( $wit->uncorrected_path, @ordered_common )
+ if $wit->has_ante_corr;
+ }
+
+ # DEBUG
+ foreach my $r ( $self->readings() ) {
+ print STDERR "Reading " . $r->name . "/" . $r->label . " has no position\n"
+ unless( $r->has_position );
}
$self->init_lemmata();
}
+sub _update_positions_from_path {
+ my( $path, @ordered_common ) = @_;
+
+ # First we walk the given path, making a matrix for the witness
+ # that corresponds to its eventual position identifier. Common
+ # nodes always start a new row, and are thus always in the first
+ # column.
+ my $wit_matrix = [];
+ my $cn = 0; # We should hit the common readings in order.
+ my $row = [];
+ foreach my $wn ( @{$path} ) {
+ if( $wn eq $ordered_common[$cn] ) {
+ # Set up to look for the next common node, and
+ # start a new row of words.
+ $cn++;
+ push( @$wit_matrix, $row ) if scalar( @$row );
+ $row = [];
+ }
+ push( @$row, $wn );
+ }
+ push( @$wit_matrix, $row ); # Push the last row onto the matrix
+
+ # Now we have a matrix per witness, so that each row in the
+ # matrix begins with a common node, and continues with all the
+ # variant words that appear in the witness. We turn this into
+ # real positions in row,cell format. But we need some
+ # trickery in order to make sure that each node gets assigned
+ # to only one position.
+
+ foreach my $li ( 1..scalar(@$wit_matrix) ) {
+ foreach my $di ( 1..scalar(@{$wit_matrix->[$li-1]}) ) {
+ my $reading = $wit_matrix->[$li-1]->[$di-1];
+ my $position = "$li,$di";
+
+ # If we have seen this node before, we need to compare
+ # its position with what went before.
+ unless( $reading->has_position &&
+ _cmp_position( $position, $reading->position ) < 1 ) {
+ # The new position ID replaces the old one.
+ $reading->position( $position );
+ } # otherwise, the old position needs to stay.
+ }
+ }
+}
+
sub _cmp_position {
my( $a, $b ) = @_;
if ( $a && $b ) {
return values( %h );
}
+sub add_hash_entry {
+ my( $hash, $key, $entry ) = @_;
+ if( exists $hash->{$key} ) {
+ push( @{$hash->{$key}}, $entry );
+ } else {
+ $hash->{$key} = [ $entry ];
+ }
+}
+
no Moose;
__PACKAGE__->meta->make_immutable;