enable graphml export of partial traditions

[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation.pm
diff --git a/lib/Text/Tradition/Collation.pm b/lib/Text/Tradition/Collation.pm

index 42a3d3b..7926844 100644 (file)
--- a/lib/Text/Tradition/Collation.pm
+++ b/lib/Text/Tradition/Collation.pm
@@ -5,7 +5,7 @@ use File::Temp;
 use File::Which;
 use Graph;
 use IPC::Run qw( run binary );
-use Text::CSV_XS;
+use Text::CSV;
 use Text::Tradition::Collation::Reading;
 use Text::Tradition::Collation::RelationshipStore;
 use Text::Tradition::Error;
@@ -37,6 +37,7 @@ has 'relations' => (
 has 'tradition' => (
     is => 'ro',
     isa => 'Text::Tradition',
+    writer => '_set_tradition',
     weak_ref => 1,
     );
 
@@ -338,15 +339,15 @@ $c->flatten_ranks();
 ok( $c->reading( 'n21p0' ), "New reading exists" );
 is( scalar $c->readings, $rno, "Reading add offset by flatten_ranks" );
 
-# Combine n3 and n4
+# Combine n3 and n4 ( with his )
 $c->merge_readings( 'n3', 'n4', 1 );
 ok( !$c->reading('n4'), "Reading n4 is gone" );
 is( $c->reading('n3')->text, 'with his', "Reading n3 has both words" );
 
-# Collapse n25 and n26
-$c->merge_readings( 'n25', 'n26' );
-ok( !$c->reading('n26'), "Reading n26 is gone" );
-is( $c->reading('n25')->text, 'rood', "Reading n25 has an unchanged word" );
+# Collapse n9 and n10 ( rood / root )
+$c->merge_readings( 'n9', 'n10' );
+ok( !$c->reading('n10'), "Reading n10 is gone" );
+is( $c->reading('n9')->text, 'rood', "Reading n9 has an unchanged word" );
 
 # Combine n21 and n21p0
 my $remaining = $c->reading('n21');
@@ -550,7 +551,7 @@ sub as_svg {
     throw( "Need GraphViz installed to output SVG" )
        unless File::Which::which( 'dot' );
     my $want_subgraph = exists $opts->{'from'} || exists $opts->{'to'};
-    $self->calculate_ranks() unless $self->_graphcalc_done;
+    $self->calculate_ranks() unless( $self->_graphcalc_done || $opts->{'nocalc'} );
     if( !$self->has_cached_svg || $opts->{'recalc'}    || $want_subgraph ) {        
                my @cmd = qw/dot -Tsvg/;
                my( $svg, $err );
@@ -594,6 +595,7 @@ sub as_dot {
     my $color_common = $opts->{'color_common'} if $opts;
     my $STRAIGHTENHACK = !$startrank && !$endrank && $self->end->rank 
        && $self->end->rank > 100;
+    $STRAIGHTENHACK = 1 if $opts->{'straight'}; # even for subgraphs or small graphs
 
     # Check the arguments
     if( $startrank ) {
@@ -638,7 +640,8 @@ sub as_dot {
        }
        if( $STRAIGHTENHACK ) {
                ## HACK part 1
-               $dot .= "\tsubgraph { rank=same \"#START#\" \"#SILENT#\" }\n";  
+               my $startlabel = $startrank ? 'SUBSTART' : 'START';
+               $dot .= "\tsubgraph { rank=same \"#$startlabel#\" \"#SILENT#\" }\n";  
                $dot .= "\t\"#SILENT#\" [ shape=diamond,color=white,penwidth=0,label=\"\" ];"
        }
        my %used;  # Keep track of the readings that actually appear in the graph
@@ -719,7 +722,8 @@ sub as_dot {
        }
        # HACK part 2
        if( $STRAIGHTENHACK ) {
-               $dot .= "\t\"#END#\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n";
+               my $endlabel = $endrank ? 'SUBEND' : 'END';
+               $dot .= "\t\"#$endlabel#\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n";
        }       
 
     $dot .= "}\n";
@@ -780,26 +784,55 @@ sub path_witnesses {
        return @wits;
 }
 
+# Helper function. Make a display label for the given witnesses, showing a.c.
+# witnesses only where the main witness is not also in the list.
 sub _path_display_label {
        my $self = shift;
-       my @wits = sort @_;
+       my %wits;
+       map { $wits{$_} = 1 } @_;
+
+       # If an a.c. wit is listed, remove it if the main wit is also listed.
+       # Otherwise keep it for explicit listing.
+       my $aclabel = $self->ac_label;
+       my @disp_ac;
+       foreach my $w ( sort keys %wits ) {
+               if( $w =~ /^(.*)\Q$aclabel\E$/ ) {
+                       if( exists $wits{$1} ) {
+                               delete $wits{$w};
+                       } else {
+                               push( @disp_ac, $w );
+                       }
+               }
+       }
+       
+       # See if we are in a majority situation.
        my $maj = scalar( $self->tradition->witnesses ) * 0.6;
-       if( scalar @wits > $maj ) {
-               # TODO break out a.c. wits
-               return 'majority';
+       if( scalar keys %wits > $maj ) {
+               unshift( @disp_ac, 'majority' );
+               return join( ', ', @disp_ac );
        } else {
-               return join( ', ', @wits );
+               return join( ', ', sort keys %wits );
        }
 }
 
-=head2 witnesses_at_rank
+=head2 readings_at_rank( $rank )
 
-Returns a list of witnesses that are not lacunose, for a given rank.
+Returns a list of readings at a given rank, taken from the alignment table.
 
 =cut
 
-sub witnesses_at_rank {
+sub readings_at_rank {
        my( $self, $rank ) = @_;
+       my $table = $self->alignment_table;
+       # Table rank is real rank - 1.
+       my @elements = map { $_->{'tokens'}->[$rank-1] } @{$table->{'alignment'}};
+       my %readings;
+       foreach my $e ( @elements ) {
+               next unless ref( $e ) eq 'HASH';
+               next unless exists $e->{'t'};
+               $readings{$e->{'t'}->id} = $e->{'t'};
+       }
+       return values %readings;
 }              
 
 =head2 as_graphml
@@ -849,9 +882,21 @@ is( scalar $st->collation->relationships, 3, "Reparsed collation has new relatio
 =cut
 
 sub as_graphml {
-    my( $self ) = @_;
+    my( $self, $options ) = @_;
        $self->calculate_ranks unless $self->_graphcalc_done;
        
+       my $start = $options->{'from'} 
+               ? $self->reading( $options->{'from'} ) : $self->start;
+       my $end = $options->{'to'} 
+               ? $self->reading( $options->{'to'} ) : $self->end;
+       if( $start->has_rank && $end->has_rank && $end->rank < $start->rank ) {
+               throw( 'Start node must be before end node' );
+       }
+       # The readings need to be ranked for this to work.
+       $start = $self->start unless $start->has_rank;
+       $end = $self->end unless $end->has_rank;
+       my %use_readings;
+       
     # Some namespaces
     my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
     my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
@@ -979,6 +1024,9 @@ sub as_graphml {
     my %node_hash;
     # Add our readings to the graph
     foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
+       next if $n->has_rank && $n ne $self->start && $n ne $self->end &&
+               ( $n->rank < $start->rank || $n->rank > $end->rank );
+       $use_readings{$n->id} = 1;
        # Add to the main graph
         my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
         my $node_xmlid = 'n' . $node_ctr++;
@@ -995,7 +1043,11 @@ sub as_graphml {
     my $edge_ctr = 0;
     foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
        # We add an edge in the graphml for every witness in $e.
-       foreach my $wit ( sort $self->path_witnesses( $e ) ) {
+       next unless( $use_readings{$e->[0]} || $use_readings{$e->[1]} );
+       my @edge_wits = sort $self->path_witnesses( $e );
+       $e->[0] = $self->start unless $use_readings{$e->[0]};
+       $e->[1] = $self->end unless $use_readings{$e->[1]};
+       foreach my $wit ( @edge_wits ) {
                        my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
                                                                                $node_hash{ $e->[0] },
                                                                                $node_hash{ $e->[1] } );
@@ -1047,7 +1099,7 @@ row per witness (or witness uncorrected.)
 sub as_csv {
     my( $self ) = @_;
     my $table = $self->alignment_table;
-    my $csv = Text::CSV_XS->new( { binary => 1, quote_null => 0 } );    
+    my $csv = Text::CSV->new( { binary => 1, quote_null => 0 } );    
     my @result;
     # Make the header row
     $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
@@ -1293,7 +1345,7 @@ sub common_readings {
        return @common;
 }
 
-=head2 path_text( $sigil, $mainsigil [, $start, $end ] )
+=head2 path_text( $sigil, [, $start, $end ] )
 
 Returns the text of a witness (plus its backup, if we are using a layer)
 as stored in the collation.  The text is returned as a string, where the
@@ -1355,11 +1407,16 @@ sub make_witness_path {
     my( $self, $wit ) = @_;
     my @chain = @{$wit->path};
     my $sig = $wit->sigil;
+    # Add start and end if necessary
+    unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
+    push( @chain, $self->end ) unless $chain[-1] eq $self->end;
     foreach my $idx ( 0 .. $#chain-1 ) {
         $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
     }
     if( $wit->is_layered ) {
         @chain = @{$wit->uncorrected_path};
+               unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
+               push( @chain, $self->end ) unless $chain[-1] eq $self->end;
         foreach my $idx( 0 .. $#chain-1 ) {
             my $source = $chain[$idx];
             my $target = $chain[$idx+1];
@@ -1394,7 +1451,7 @@ ok( $c->has_cached_table, "Alignment table was cached" );
 is( $c->alignment_table, $table, "Cached table returned upon second call" );
 $c->calculate_ranks;
 is( $c->alignment_table, $table, "Cached table retained with no rank change" );
-$c->add_relationship( 'n9', 'n23', { 'type' => 'spelling' } );
+$c->add_relationship( 'n24', 'n23', { 'type' => 'spelling' } );
 isnt( $c->alignment_table, $table, "Alignment table changed after relationship add" );
 
 =end testing
@@ -1455,8 +1512,8 @@ sub calculate_ranks {
             $r->rank( $node_ranks->{$rel_containers{$r->id}} );
         } else {
                # Die. Find the last rank we calculated.
-               my @all_defined = sort { $node_ranks->{$rel_containers{$a->id}}
-                                <=> $node_ranks->{$rel_containers{$b->id}} }
+               my @all_defined = sort { ( $node_ranks->{$rel_containers{$a->id}}||-1 )
+                                <=> ( $node_ranks->{$rel_containers{$b->id}}||-1 ) }
                        $self->readings;
                my $last = pop @all_defined;
             throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
@@ -1530,18 +1587,23 @@ with the same text at the same rank, and merges any that are found.
 sub flatten_ranks {
     my $self = shift;
     my %unique_rank_rdg;
+    my $changed;
     foreach my $rdg ( $self->readings ) {
         next unless $rdg->has_rank;
         my $key = $rdg->rank . "||" . $rdg->text;
         if( exists $unique_rank_rdg{$key} ) {
             # Combine!
                # print STDERR "Combining readings at same rank: $key\n";
+               $changed = 1;
             $self->merge_readings( $unique_rank_rdg{$key}, $rdg );
             # TODO see if this now makes a common point.
         } else {
             $unique_rank_rdg{$key} = $rdg;
         }
     }
+    # If we merged readings, the ranks are still fine but the alignment
+    # table is wrong. Wipe it.
+    $self->wipe_table() if $changed;
 }
        
 
@@ -1567,7 +1629,7 @@ my @common = $c->calculate_common_readings();
 is( scalar @common, 8, "Found correct number of common readings" );
 my @marked = sort $c->common_readings();
 is( scalar @common, 8, "All common readings got marked as such" );
-my @expected = qw/ n1 n12 n16 n19 n20 n5 n6 n7 /;
+my @expected = qw/ n1 n11 n16 n19 n20 n5 n6 n7 /;
 is_deeply( \@marked, \@expected, "Found correct list of common readings" );
 
 =end testing
@@ -1612,14 +1674,22 @@ original texts.
 sub text_from_paths {
        my $self = shift;
     foreach my $wit ( $self->tradition->witnesses ) {
-       my @text = split( /\s+/, 
-               $self->reading_sequence( $self->start, $self->end, $wit->sigil ) );
+       my @readings = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
+       my @text;
+       foreach my $r ( @readings ) {
+               next if $r->is_meta;
+               push( @text, $r->text );
+       }
        $wit->text( \@text );
        if( $wit->is_layered ) {
-                       my @uctext = split( /\s+/, 
-                               $self->reading_sequence( $self->start, $self->end, 
-                                       $wit->sigil.$self->ac_label ) );
-                       $wit->text( \@uctext );
+                       my @ucrdgs = $self->reading_sequence( $self->start, $self->end, 
+                                                                                                 $wit->sigil.$self->ac_label );
+                       my @uctext;
+                       foreach my $r ( @ucrdgs ) {
+                               next if $r->is_meta;
+                               push( @uctext, $r->text );
+                       }
+                       $wit->layertext( \@uctext );
        }
     }    
 }
@@ -1646,14 +1716,14 @@ my $t = Text::Tradition->new(
     );
 my $c = $t->collation;
 
-is( $c->common_predecessor( 'n9', 'n23' )->id, 
+is( $c->common_predecessor( 'n24', 'n23' )->id, 
     'n20', "Found correct common predecessor" );
-is( $c->common_successor( 'n9', 'n23' )->id, 
+is( $c->common_successor( 'n24', 'n23' )->id, 
     '#END#', "Found correct common successor" );
 
 is( $c->common_predecessor( 'n19', 'n17' )->id, 
     'n16', "Found correct common predecessor for readings on same path" );
-is( $c->common_successor( 'n21', 'n26' )->id, 
+is( $c->common_successor( 'n21', 'n10' )->id, 
     '#END#', "Found correct common successor for readings on same path" );
 
 =end testing