use Encode qw( decode_utf8 );
use File::Temp;
+use Graph;
use Graph::Easy;
use IPC::Run qw( run binary );
use Text::CSV_XS;
use Text::Tradition::Collation::Path;
use Text::Tradition::Collation::Reading;
use Text::Tradition::Collation::Relationship;
-use Text::Tradition::Collation::Segment;
use XML::LibXML;
use Moose;
add_reading => 'add_node',
add_lacuna => 'add_node',
del_reading => 'del_node',
- del_segment => 'del_node',
add_path => 'add_edge',
del_path => 'del_edge',
reading => 'node',
path => 'edge',
readings => 'nodes',
- segments => 'nodes',
paths => 'edges',
relationships => 'edges',
},
# Pass through any graph-specific options.
my $shape = exists( $args->{'shape'} ) ? $args->{'shape'} : 'ellipse';
$self->graph->set_attribute( 'node', 'shape', $shape );
+
+ # Start and end points for all texts
+ $self->start( 'INIT' );
+ $self->end( 'INIT' );
}
around add_lacuna => sub {
return @result;
};
-around readings => sub {
- my $orig = shift;
- my $self = shift;
- my @result = grep { $_->sub_class ne 'segment' } $self->$orig( @_ );
- return @result;
-};
-
-around segments => sub {
- my $orig = shift;
- my $self = shift;
- my @result = grep { $_->sub_class eq 'segment' } $self->$orig( @_ );
- return @result;
-};
-
# Wrapper around merge_nodes
-
sub merge_readings {
my $self = shift;
my $first_node = shift;
return scalar @relevant;
}
-## Dealing with groups of readings, i.e. segments.
-
-sub add_segment {
- my( $self, @items ) = @_;
- my $segment = Text::Tradition::Collation::Segment->new( 'members' => \@items );
- return $segment;
-}
-
## Dealing with relationships between readings. This is a different
## sort of graph edge. Return a success/failure value and a list of
## node pairs that have been linked.
my( $self, $source, $target, $options ) = @_;
# Make sure there is not another relationship between these two
- # readings or segments already
+ # readings already
$source = $self->reading( $source )
unless ref( $source ) && $source->isa( 'Graph::Easy::Node' );
$target = $self->reading( $target )
foreach my $reading ( $self->readings ) {
# Need not output nodes without separate labels
next if $reading->name eq $reading->label;
- # TODO output readings or segments, but not both
- next if $reading->class eq 'node.segment';
$dot .= sprintf( "\t\"%s\" [ label=\"%s\" ];\n", $reading->name, $reading->label );
}
$root->setNamespace( $xsi_ns, 'xsi', 0 );
$root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
- # TODO Add some global graph data
+ # Add the data keys for the graph
+ my %graph_data_keys;
+ my $gdi = 0;
+ my @graph_attributes = qw/ wit_list_separator baselabel linear ac_label /;
+ foreach my $datum ( @graph_attributes ) {
+ $graph_data_keys{$datum} = 'dg'.$gdi++;
+ my $key = $root->addNewChild( $graphml_ns, 'key' );
+ $key->setAttribute( 'attr.name', $datum );
+ $key->setAttribute( 'attr.type', $key eq 'linear' ? 'boolean' : 'string' );
+ $key->setAttribute( 'for', 'graph' );
+ $key->setAttribute( 'id', $graph_data_keys{$datum} );
+ }
# Add the data keys for nodes
my %node_data_keys;
# Add the data keys for edges, i.e. witnesses
my $edi = 0;
my %edge_data_keys;
- foreach my $edge_key( qw/ witness_main witness_ante_corr relationship class / ) {
+ my @string_keys = qw/ class witness relationship /;
+ my @bool_keys = qw/ extra equal_rank non_correctable non_independent /;
+ foreach my $edge_key( @string_keys ) {
$edge_data_keys{$edge_key} = 'de'.$edi++;
my $key = $root->addNewChild( $graphml_ns, 'key' );
$key->setAttribute( 'attr.name', $edge_key );
$key->setAttribute( 'for', 'edge' );
$key->setAttribute( 'id', $edge_data_keys{$edge_key} );
}
+ foreach my $edge_key( @bool_keys ) {
+ $edge_data_keys{$edge_key} = 'de'.$edi++;
+ my $key = $root->addNewChild( $graphml_ns, 'key' );
+ $key->setAttribute( 'attr.name', $edge_key );
+ $key->setAttribute( 'attr.type', 'boolean' );
+ $key->setAttribute( 'for', 'edge' );
+ $key->setAttribute( 'id', $edge_data_keys{$edge_key} );
+ }
# Add the graph, its nodes, and its edges
my $graph = $root->addNewChild( $graphml_ns, 'graph' );
$graph->setAttribute( 'edgedefault', 'directed' );
- $graph->setAttribute( 'id', 'g0' ); # TODO make this meaningful
+ $graph->setAttribute( 'id', $self->tradition->name );
$graph->setAttribute( 'parse.edgeids', 'canonical' );
$graph->setAttribute( 'parse.edges', scalar($self->paths) );
$graph->setAttribute( 'parse.nodeids', 'canonical' );
$graph->setAttribute( 'parse.nodes', scalar($self->readings) );
$graph->setAttribute( 'parse.order', 'nodesfirst' );
+
+ # Collation attribute data
+ foreach my $datum ( @graph_attributes ) {
+ _add_graphml_data( $graph, $graph_data_keys{$datum}, $self->$datum );
+ }
my $node_ctr = 0;
my %node_hash;
if $n->has_rank;
_add_graphml_data( $node_el, $node_data_keys{'class'}, $n->sub_class );
_add_graphml_data( $node_el, $node_data_keys{'identical'}, $n->primary->name )
- if $n->has_primary;
- }
-
- # Add any segments we have
- foreach my $n ( sort { $a->name cmp $b->name } $self->segments ) {
- my $node_el = $graph->addNewChild( $graphml_ns, 'node' );
- my $node_xmlid = 'n' . $node_ctr++;
- $node_hash{ $n->name } = $node_xmlid;
- $node_el->setAttribute( 'id', $node_xmlid );
- _add_graphml_data( $node_el, $node_data_keys{'class'}, $n->sub_class );
- _add_graphml_data( $node_el, $node_data_keys{'name'}, $n->name );
+ if $n->has_primary && $n->primary ne $n;
}
- # Add the path, relationship, and segment edges
+ # Add the path and relationship edges
my $edge_ctr = 0;
foreach my $e ( sort { $a->from->name cmp $b->from->name } $self->graph->edges() ) {
my( $name, $from, $to ) = ( 'e'.$edge_ctr++,
$edge_el->setAttribute( 'id', $name );
# Add the edge class
_add_graphml_data( $edge_el, $edge_data_keys{'class'}, $e->sub_class );
+
+ # For some classes we have extra information to save.
if( $e->sub_class eq 'path' ) {
# It's a witness path, so add the witness
my $base = $e->label;
my $key = $edge_data_keys{'witness_main'};
- # TODO kind of hacky
- if( $e->label =~ /^(.*?)\s+(\(a\.c\.\))$/ ) {
+ # Is this an ante-corr witness?
+ my $aclabel = $self->ac_label;
+ if( $e->label =~ /^(.*)\Q$aclabel\E$/ ) {
+ # Keep the base witness
$base = $1;
- $key = $edge_data_keys{'witness_ante_corr'};
+ # ...and record that this is an 'extra' reading path
+ _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, 'true' );
}
- _add_graphml_data( $edge_el, $key, $base );
+ _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
} elsif( $e->sub_class eq 'relationship' ) {
- # It's a relationship
+ # It's a relationship, so save the relationship data
_add_graphml_data( $edge_el, $edge_data_keys{'relationship'}, $e->label );
- } # else a segment, nothing to record but source, target, class
+ _add_graphml_data( $edge_el, $edge_data_keys{'equal_rank'}, $e->equal_rank );
+ _add_graphml_data( $edge_el, $edge_data_keys{'non_correctable'}, $e->non_correctable );
+ _add_graphml_data( $edge_el, $edge_data_keys{'non_independent'}, $e->non_independent );
+ }
}
- # Return the thing
- $self->_save_graphml( $graphml->toString(1) );
- return $graphml->toString(1);
+ # Save and return the thing
+ my $result = decode_utf8( $graphml->toString(1) );
+ $self->_save_graphml( $result );
+ return $result;
}
sub _add_graphml_data {
my( $el, $key, $value ) = @_;
- my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
return unless defined $value;
+ my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
$data_el->setAttribute( 'key', $key );
$data_el->appendText( $value );
}
return $self->csv;
}
+# Make an alignment table - $noderefs controls whether the objects
+# in the table are the nodes or simply their readings.
+
sub make_alignment_table {
- my $self = shift;
+ my( $self, $noderefs ) = @_;
unless( $self->linear ) {
warn "Need a linear graph in order to make an alignment table";
return;
my @all_pos = sort { $a <=> $b } $self->possible_positions;
foreach my $wit ( $self->tradition->witnesses ) {
# print STDERR "Making witness row(s) for " . $wit->sigil . "\n";
- my @row = _make_witness_row( $wit->path, \@all_pos );
+ my @row = _make_witness_row( $wit->path, \@all_pos, $noderefs );
unshift( @row, $wit->sigil );
push( @$table, \@row );
if( $wit->has_ante_corr ) {
- my @ac_row = _make_witness_row( $wit->uncorrected_path, \@all_pos );
+ my @ac_row = _make_witness_row( $wit->uncorrected_path, \@all_pos, $noderefs );
unshift( @ac_row, $wit->sigil . $self->ac_label );
push( @$table, \@ac_row );
}
}
+
# Return a table where the witnesses read in columns rather than rows.
my $turned = _turn_table( $table );
return $turned;
}
sub _make_witness_row {
- my( $path, $positions ) = @_;
+ my( $path, $positions, $noderefs ) = @_;
my %char_hash;
map { $char_hash{$_} = undef } @$positions;
foreach my $rdg ( @$path ) {
my $rtext = $rdg->text;
$rtext = '#LACUNA#' if $rdg->is_lacuna;
- $char_hash{$rdg->rank} = $rtext;
+ $char_hash{$rdg->rank} = $noderefs ? $rdg : $rtext;
}
my @row = map { $char_hash{$_} } @$positions;
# Fill in lacuna markers for undef spots in the row
my $last_el = shift @row;
my @filled_row = ( $last_el );
foreach my $el ( @row ) {
- if( $last_el && $last_el eq '#LACUNA#' && !defined $el ) {
- $el = '#LACUNA#';
+ # If we are using node reference, make the lacuna node appear many times
+ # in the table. If not, use the lacuna tag.
+ if( $last_el && _el_is_lacuna( $last_el ) && !defined $el ) {
+ $el = $noderefs ? $last_el : '#LACUNA#';
}
push( @filled_row, $el );
$last_el = $el;
return @filled_row;
}
+# Tiny utility function to say if a table element is a lacuna
+sub _el_is_lacuna {
+ my $el = shift;
+ return 1 if $el eq '#LACUNA#';
+ return 1 if ref( $el ) eq 'Text::Tradition::Collation::Reading'
+ && $el->is_lacuna;
+ return 0;
+}
+
# Helper to turn the witnesses along columns rather than rows. Assumes
# equal-sized rows.
sub _turn_table {
sub start {
# Return the beginning reading of the graph.
- my $self = shift;
- my( $new_start ) = @_;
- if( $new_start ) {
+ my( $self, $new_start ) = @_;
+ my $start = $self->reading( '#START#' );
+ if( ref( $new_start ) eq 'Text::Tradition::Collation::Reading' ) {
+ # Replace the existing start node.
$self->del_reading( '#START#' );
$self->graph->rename_node( $new_start, '#START#' );
+ $start = $new_start;
+ } elsif ( $new_start && $new_start eq 'INIT' ) {
+ # Make a new start node.
+ $start = $self->add_reading( '#START#' );
}
+ # Make sure the start node is a meta node
+ $start->is_meta( 1 );
# Make sure the start node has a start position.
- unless( $self->reading( '#START#' )->has_rank ) {
- $self->reading( '#START#' )->rank( '0' );
+ unless( $start->has_rank ) {
+ $start->rank( '0' );
}
- return $self->reading('#START#');
+ return $start;
}
=item B<end>
sub end {
my $self = shift;
my( $new_end ) = @_;
- if( $new_end ) {
+ my $end = $self->reading( '#END#' );
+ if( ref( $new_end ) eq 'Text::Tradition::Collation::Reading' ) {
$self->del_reading( '#END#' );
$self->graph->rename_node( $new_end, '#END#' );
+ $end = $new_end
+ } elsif ( $new_end && $new_end eq 'INIT' ) {
+ # Make a new start node.
+ $end = $self->add_reading( '#END#' );
}
- return $self->reading('#END#');
+ # Make sure the start node is a meta node
+ $end->is_meta( 1 );
+ return $end;
}
=item B<reading_sequence>
# Walk a version of the graph where every node linked by a relationship
# edge is fundamentally the same node, and do a topological ranking on
# the nodes in this graph.
- my $topo_graph = Graph::Easy->new();
+ my $topo_graph = Graph->new();
my %rel_containers;
my $rel_ctr = 0;
# Add the nodes
if( @rels ) {
# Make a relationship container.
push( @rels, $r );
- my $rn = $topo_graph->add_node( 'rel_container_' . $rel_ctr++ );
+ my $rn = 'rel_container_' . $rel_ctr++;
+ $topo_graph->add_vertex( $rn );
foreach( @rels ) {
$rel_containers{$_->name} = $rn;
}
} else {
# Add a new node to mirror the old node.
- $rel_containers{$r->name} = $topo_graph->add_node( $r->name );
+ $rel_containers{$r->name} = $r->name;
+ $topo_graph->add_vertex( $r->name );
}
}
# Add the edges. Need only one edge between any pair of nodes.
foreach my $r ( $self->readings ) {
foreach my $n ( $r->neighbor_readings( 'forward' ) ) {
- $topo_graph->add_edge_once( $rel_containers{$r->name},
- $rel_containers{$n->name} );
+ my( $tfrom, $tto ) = ( $rel_containers{$r->name},
+ $rel_containers{$n->name} );
+ $topo_graph->add_edge( $tfrom, $tto )
+ unless $topo_graph->has_edge( $tfrom, $tto );
}
}
# Now do the rankings, starting with the start node.
my $topo_start = $rel_containers{$self->start->name};
- my $node_ranks = { $topo_start->name => 0 };
+ my $node_ranks = { $topo_start => 0 };
my @curr_origin = ( $topo_start );
# A little iterative function.
while( @curr_origin ) {
- @curr_origin = _assign_rank( $node_ranks, @curr_origin );
+ @curr_origin = _assign_rank( $topo_graph, $node_ranks, @curr_origin );
}
# Transfer our rankings from the topological graph to the real one.
foreach my $r ( $self->readings ) {
- $r->rank( $node_ranks->{$rel_containers{$r->name}->name} );
+ $r->rank( $node_ranks->{$rel_containers{$r->name}} );
}
}
sub _assign_rank {
- my( $node_ranks, @current_nodes ) = @_;
+ my( $graph, $node_ranks, @current_nodes ) = @_;
# Look at each of the children of @current_nodes. If all the child's
# parents have a rank, assign it the highest rank + 1 and add it to
- # @next_nodes. Otherwise skip it.
+ # @next_nodes. Otherwise skip it; we will return when the highest-ranked
+ # parent gets a rank.
my @next_nodes;
foreach my $c ( @current_nodes ) {
- warn "Current reading " . $c->name . "has no rank!"
- unless exists $node_ranks->{$c->name};
- # print STDERR "Looking at child of node " . $c->name . ", rank "
- # . $node_ranks->{$c->name} . "\n";
- my @children = map { $_->to } $c->outgoing;
- foreach my $child ( @children ) {
- next if exists $node_ranks->{$child->name};
+ warn "Current reading $c has no rank!"
+ unless exists $node_ranks->{$c};
+ # print STDERR "Looking at child of node $c, rank "
+ # . $node_ranks->{$c} . "\n";
+ foreach my $child ( $graph->successors( $c ) ) {
+ next if exists $node_ranks->{$child};
my $highest_rank = -1;
my $skip = 0;
- my @parents = map { $_->from } $child->incoming;
- foreach my $parent ( @parents ) {
- if( exists $node_ranks->{$parent->name} ) {
- $highest_rank = $node_ranks->{$parent->name}
- if $highest_rank <= $node_ranks->{$parent->name};
+ foreach my $parent ( $graph->predecessors( $child ) ) {
+ if( exists $node_ranks->{$parent} ) {
+ $highest_rank = $node_ranks->{$parent}
+ if $highest_rank <= $node_ranks->{$parent};
} else {
$skip = 1;
last;
}
}
next if $skip;
- # print STDERR "Assigning rank " . ( $highest_rank + 1 ) . " to node " . $child->name . "\n";
- $node_ranks->{$child->name} = $highest_rank + 1;
+ my $c_rank = $highest_rank + 1;
+ # print STDERR "Assigning rank $c_rank to node $child \n";
+ $node_ranks->{$child} = $c_rank;
push( @next_nodes, $child );
}
}
return @next_nodes;
}
+# Another method to make up for rough collation methods. If the same reading
+# appears multiple times at the same rank, collapse the nodes.
+sub flatten_ranks {
+ my $self = shift;
+ my %unique_rank_rdg;
+ foreach my $rdg ( $self->readings ) {
+ next unless $rdg->has_rank;
+ my $key = $rdg->rank . "||" . $rdg->text;
+ if( exists $unique_rank_rdg{$key} ) {
+ # Combine!
+ print STDERR "Combining readings at same rank: $key\n";
+ $self->merge_readings( $unique_rank_rdg{$key}, $rdg );
+ } else {
+ $unique_rank_rdg{$key} = $rdg;
+ }
+ }
+}
+
+
sub possible_positions {
my $self = shift;
my %all_pos;
no Moose;
__PACKAGE__->meta->make_immutable;
+
+=head1 BUGS / TODO
+
+=over
+
+=item * Rationalize edge classes
+
+=item * Port the internal graph from Graph::Easy to Graph
+
+=back