X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FText%2FTradition%2FCollation.pm;h=3a9b056a13d10c5cf144387926461b2d1c4a9b7c;hb=c12bb87843b49eee09f588126ac7d375046b29fd;hp=cc197ff7183966610743a9061f6b7dfcf8f7f707;hpb=ee801e17d007001be15c1b17d4942ffb234aa14f;p=scpubgit%2Fstemmatology.git

diff --git a/lib/Text/Tradition/Collation.pm b/lib/Text/Tradition/Collation.pm
index cc197ff..3a9b056 100644
--- a/lib/Text/Tradition/Collation.pm
+++ b/lib/Text/Tradition/Collation.pm
@@ -88,6 +88,20 @@ has 'end' => (
 	writer => '_set_end',
 	weak_ref => 1,
 	);
+	
+has 'cached_svg' => (
+	is => 'rw',
+	isa => 'Str',
+	predicate => 'has_cached_svg',
+	clearer => 'wipe_svg',
+	);
+	
+has 'cached_table' => (
+	is => 'rw',
+	isa => 'HashRef',
+	predicate => 'has_cached_table',
+	clearer => 'wipe_table',
+	);
 
 =head1 NAME
 
@@ -239,6 +253,12 @@ sub BUILD {
 
 ### Reading construct/destruct functions
 
+sub _clear_cache {
+	my $self = shift;
+	$self->wipe_svg if $self->has_cached_svg;
+	$self->wipe_table if $self->has_cached_table;
+}	
+
 sub add_reading {
 	my( $self, $reading ) = @_;
 	unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
@@ -251,6 +271,7 @@ sub add_reading {
 	if( $self->reading( $reading->id ) ) {
 		throw( "Collation already has a reading with id " . $reading->id );
 	}
+	$self->_clear_cache;
 	$self->_add_reading( $reading->id => $reading );
 	# Once the reading has been added, put it in both graphs.
 	$self->sequence->add_vertex( $reading->id );
@@ -267,6 +288,7 @@ around del_reading => sub {
 		$arg = $arg->id;
 	}
 	# Remove the reading from the graphs.
+	$self->_clear_cache;
 	$self->sequence->delete_vertex( $arg );
 	$self->relations->delete_reading( $arg );
 	
@@ -282,6 +304,7 @@ sub merge_readings {
 	# We only need the IDs for adding paths to the graph, not the reading
 	# objects themselves.
     my( $kept, $deleted, $combine_char ) = $self->_stringify_args( @_ );
+	$self->_clear_cache;
 
     # The kept reading should inherit the paths and the relationships
     # of the deleted reading.
@@ -337,6 +360,7 @@ sub add_path {
 	# objects themselves.
     my( $source, $target, $wit ) = $self->_stringify_args( @_ );
 
+	$self->_clear_cache;
 	# Connect the readings
     $self->sequence->add_edge( $source, $target );
     # Note the witness in question
@@ -357,6 +381,7 @@ sub del_path {
 	# objects themselves.
     my( $source, $target, $wit ) = $self->_stringify_args( @args );
 
+	$self->_clear_cache;
 	if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
 		$self->sequence->delete_edge_attribute( $source, $target, $wit );
 	}
@@ -385,6 +410,7 @@ be called via $tradition->del_witness.
 sub clear_witness {
 	my( $self, @sigils ) = @_;
 
+	$self->_clear_cache;
 	# Clear the witness(es) out of the paths
 	foreach my $e ( $self->paths ) {
 		foreach my $sig ( @sigils ) {
@@ -407,6 +433,7 @@ sub add_relationship {
     	$self->reading( $source ), $target, $self->reading( $target ), $opts );
     # Force a full rank recalculation every time. Yuck.
     $self->calculate_ranks() if $self->end->has_rank;
+	$self->_clear_cache;
     return @vectors;
 }
 
@@ -428,6 +455,10 @@ sub reading_witnesses {
 		my $wits = $self->sequence->get_edge_attributes( @$e );
 		@all_witnesses{ keys %$wits } = 1;
 	}
+	my $acstr = $self->ac_label;
+	foreach my $acwit ( grep { $_ =~ s/^(.*)\Q$acstr\E$/$1/ } keys %all_witnesses ) {
+		delete $all_witnesses{$acwit.$acstr} if exists $all_witnesses{$acwit};
+	}
 	return keys %all_witnesses;
 }
 
@@ -442,17 +473,23 @@ See as_dot for a list of options.
 
 sub as_svg {
     my( $self, $opts ) = @_;
-        
-    my @cmd = qw/dot -Tsvg/;
-    my( $svg, $err );
-    my $dotfile = File::Temp->new();
-    ## USE FOR DEBUGGING
-    # $dotfile->unlink_on_destroy(0);
-    binmode $dotfile, ':utf8';
-    print $dotfile $self->as_dot( $opts );
-    push( @cmd, $dotfile->filename );
-    run( \@cmd, ">", binary(), \$svg );
-    return decode_utf8( $svg );
+    my $want_subgraph = exists $opts->{'from'} || exists $opts->{'to'};
+    if( !$self->has_cached_svg || $opts->{'recalc'}	|| $want_subgraph ) {        
+		my @cmd = qw/dot -Tsvg/;
+		my( $svg, $err );
+		my $dotfile = File::Temp->new();
+		## USE FOR DEBUGGING
+		# $dotfile->unlink_on_destroy(0);
+		binmode $dotfile, ':utf8';
+		print $dotfile $self->as_dot( $opts );
+		push( @cmd, $dotfile->filename );
+		run( \@cmd, ">", binary(), \$svg );
+		$svg = decode_utf8( $svg );
+		$self->cached_svg( $svg ) unless $want_subgraph;
+		return $svg;
+	} else {
+		return $self->cached_svg;
+	}
 }
 
 
@@ -478,7 +515,9 @@ sub as_dot {
     my $startrank = $opts->{'from'} if $opts;
     my $endrank = $opts->{'to'} if $opts;
     my $color_common = $opts->{'color_common'} if $opts;
-    
+    my $STRAIGHTENHACK = !$startrank && !$endrank && $self->end->rank 
+       && $self->end->rank > 100;
+
     # Check the arguments
     if( $startrank ) {
     	return if $endrank && $startrank > $endrank;
@@ -489,7 +528,6 @@ sub as_dot {
 		$endrank = undef if $endrank == $self->end->rank;
 	}
 	
-    # TODO consider making some of these things configurable
     my $graph_name = $self->tradition->name;
     $graph_name =~ s/[^\w\s]//g;
     $graph_name = join( '_', split( /\s+/, $graph_name ) );
@@ -521,12 +559,17 @@ sub as_dot {
 	if( $endrank ) {
 		$dot .= "\t\"#SUBEND#\" [ label=\"...\" ];\n";	
 	}
-
+	if( $STRAIGHTENHACK ) {
+		## HACK part 1
+		$dot .= "\tsubgraph { rank=same \"#START#\" \"#SILENT#\" }\n";  
+		$dot .= "\t\"#SILENT#\" [ shape=diamond,color=white,penwidth=0,label=\"\" ];"
+	}
 	my %used;  # Keep track of the readings that actually appear in the graph
 	# Sort the readings by rank if we have ranks; this speeds layout.
 	my @all_readings = $self->end->has_rank 
 		? sort { $a->rank <=> $b->rank } $self->readings
 		: $self->readings;
+	# TODO Refrain from outputting lacuna nodes - just grey out the edges.
     foreach my $reading ( @all_readings ) {
     	# Only output readings within our rank range.
     	next if $startrank && $reading->rank < $startrank;
@@ -544,7 +587,7 @@ sub as_dot {
     
 	# Add the real edges. Need to weight one edge per rank jump, in a
 	# continuous line.
-	my $weighted = $self->_add_edge_weights;
+	# my $weighted = $self->_add_edge_weights;
     my @edges = $self->paths;
 	my( %substart, %subend );
     foreach my $edge ( @edges ) {
@@ -563,11 +606,11 @@ sub as_dot {
 			}
 			
 			# Add the calculated edge weights
-			if( exists $weighted->{$edge->[0]} 
-				&& $weighted->{$edge->[0]} eq $edge->[1] ) {
-				# $variables->{'color'} = 'red';
-				$variables->{'weight'} = 3.0;
-			}
+			# if( exists $weighted->{$edge->[0]} 
+			# 	&& $weighted->{$edge->[0]} eq $edge->[1] ) {
+			# 	# $variables->{'color'} = 'red';
+			# 	$variables->{'weight'} = 3.0;
+			# }
 
 			# EXPERIMENTAL: make edge width reflect no. of witnesses
 			my $extrawidth = scalar( $self->path_witnesses( $edge ) ) * 0.2;
@@ -595,6 +638,10 @@ sub as_dot {
         my $varopts = _dot_attr_string( $variables );
         $dot .= "\t\"$node\" -> \"#SUBEND#\" $varopts;";
 	}
+	# HACK part 2
+	if( $STRAIGHTENHACK ) {
+		$dot .= "\t\"#END#\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n";
+	}       
 
     $dot .= "}\n";
     return $dot;
@@ -616,13 +663,18 @@ sub _add_edge_weights {
 	# the largest number of witness paths each time.
 	my $weighted = {};
 	my $curr = $self->start->id;
+	my $ranked = $self->end->has_rank;
 	while( $curr ne $self->end->id ) {
+		my $rank = $ranked ? $self->reading( $curr )->rank : 0;
 		my @succ = sort { $self->path_witnesses( $curr, $a )
 							<=> $self->path_witnesses( $curr, $b ) } 
 			$self->sequence->successors( $curr );
 		my $next = pop @succ;
+		my $nextrank = $ranked ? $self->reading( $next )->rank : 0;
 		# Try to avoid lacunae in the weighted path.
-		while( $self->reading( $next )->is_lacuna && @succ ) {
+		while( @succ && 
+			   ( $self->reading( $next )->is_lacuna ||
+			   	 $nextrank - $rank > 1 ) ){
 			$next = pop @succ;
 		}
 		$weighted->{$curr} = $next;
@@ -660,7 +712,16 @@ sub _path_display_label {
 		return join( ', ', @wits );
 	}
 }
-		
+
+=head2 witnesses_at_rank
+
+Returns a list of witnesses that are not lacunose, for a given rank.
+
+=cut
+
+sub witnesses_at_rank {
+	my( $self, $rank ) = @_;
+}		
 
 =head2 as_graphml
 
@@ -865,7 +926,7 @@ row per witness (or witness uncorrected.)
 
 sub as_csv {
     my( $self ) = @_;
-    my $table = $self->make_alignment_table;
+    my $table = $self->alignment_table;
     my $csv = Text::CSV_XS->new( { binary => 1, quote_null => 0 } );    
     my @result;
     # Make the header row
@@ -874,14 +935,14 @@ sub as_csv {
     # Make the rest of the rows
     foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
     	my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
-    	my @row = map { $_ ? $_->{'t'} : $_ } @rowobjs;
+    	my @row = map { $_ ? $_->{'t'}->text : $_ } @rowobjs;
         $csv->combine( @row );
         push( @result, decode_utf8( $csv->string ) );
     }
     return join( "\n", @result );
 }
 
-=head2 make_alignment_table( $use_refs, $include_witnesses )
+=head2 alignment_table( $use_refs, $include_witnesses )
 
 Return a reference to an alignment table, in a slightly enhanced CollateX
 format which looks like this:
@@ -901,8 +962,11 @@ keys have a true hash value will be included.
 
 =cut
 
-sub make_alignment_table {
-    my( $self, $noderefs, $include ) = @_;
+sub alignment_table {
+    my( $self ) = @_;
+    my $include; # see if we can ditch this
+    return $self->cached_table if $self->has_cached_table;
+    
     # Make sure we can do this
 	throw( "Need a linear graph in order to make an alignment table" )
 		unless $self->linear;
@@ -916,22 +980,23 @@ sub make_alignment_table {
     	}
         # print STDERR "Making witness row(s) for " . $wit->sigil . "\n";
         my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
-        my @row = _make_witness_row( \@wit_path, \@all_pos, $noderefs );
+        my @row = _make_witness_row( \@wit_path, \@all_pos );
         push( @{$table->{'alignment'}}, 
         	{ 'witness' => $wit->sigil, 'tokens' => \@row } );
         if( $wit->is_layered ) {
         	my @wit_ac_path = $self->reading_sequence( $self->start, $self->end, 
         		$wit->sigil.$self->ac_label );
-            my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos, $noderefs );
+            my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos );
 			push( @{$table->{'alignment'}},
 				{ 'witness' => $wit->sigil.$self->ac_label, 'tokens' => \@ac_row } );
         }           
     }
-	return $table;
+    $self->cached_table( $table );
+    return $table;
 }
 
 sub _make_witness_row {
-    my( $path, $positions, $noderefs ) = @_;
+    my( $path, $positions ) = @_;
     my %char_hash;
     map { $char_hash{$_} = undef } @$positions;
     my $debug = 0;
@@ -940,8 +1005,7 @@ sub _make_witness_row {
         $rtext = '#LACUNA#' if $rdg->is_lacuna;
         print STDERR "rank " . $rdg->rank . "\n" if $debug;
         # print STDERR "No rank for " . $rdg->id . "\n" unless defined $rdg->rank;
-        $char_hash{$rdg->rank} = $noderefs ? { 't' => $rdg } 
-        								   : { 't' => $rtext };
+        $char_hash{$rdg->rank} = { 't' => $rdg };
     }
     my @row = map { $char_hash{$_} } @$positions;
     # Fill in lacuna markers for undef spots in the row
@@ -950,8 +1014,8 @@ sub _make_witness_row {
     foreach my $el ( @row ) {
         # If we are using node reference, make the lacuna node appear many times
         # in the table.  If not, use the lacuna tag.
-        if( $last_el && _el_is_lacuna( $last_el ) && !defined $el ) {
-            $el = $noderefs ? $last_el : { 't' => '#LACUNA#' };
+        if( $last_el && $last_el->{'t'}->is_lacuna && !defined $el ) {
+            $el = $last_el;
         }
         push( @filled_row, $el );
         $last_el = $el;
@@ -959,30 +1023,6 @@ sub _make_witness_row {
     return @filled_row;
 }
 
-# Tiny utility function to say if a table element is a lacuna
-sub _el_is_lacuna {
-    my $el = shift;
-    return 1 if $el->{'t'} eq '#LACUNA#';
-    return 1 if ref( $el->{'t'} ) eq 'Text::Tradition::Collation::Reading'
-        && $el->{'t'}->is_lacuna;
-    return 0;
-}
-
-# Helper to turn the witnesses along columns rather than rows.  Assumes
-# equal-sized rows.
-sub _turn_table {
-    my( $table ) = @_;
-    my $result = [];
-    return $result unless scalar @$table;
-    my $nrows = scalar @{$table->[0]};
-    foreach my $idx ( 0 .. $nrows - 1 ) {
-        foreach my $wit ( 0 .. $#{$table} ) {
-            $result->[$idx]->[$wit] = $table->[$wit]->[$idx];
-        }
-    }
-    return $result;        
-}
-
 =head1 NAVIGATION METHODS
 
 =head2 reading_sequence( $first, $last, $sigil, $backup )
@@ -1209,10 +1249,36 @@ sub make_witness_path {
 Calculate the reading ranks (that is, their aligned positions relative
 to each other) for the graph.  This can only be called on linear collations.
 
+=begin testing
+
+use Text::Tradition;
+
+my $cxfile = 't/data/Collatex-16.xml';
+my $t = Text::Tradition->new( 
+    'name'  => 'inline', 
+    'input' => 'CollateX',
+    'file'  => $cxfile,
+    );
+my $c = $t->collation;
+
+# Make an svg
+my $svg = $c->as_svg;
+is( substr( $svg, 0, 5 ), '<?xml', "Got XML doc for svg" );
+ok( $c->has_cached_svg, "SVG was cached" );
+is( $c->as_svg, $svg, "Cached SVG returned upon second call" );
+$c->calculate_ranks;
+is( $c->as_svg, $svg, "Cached SVG retained with no rank change" );
+$c->add_relationship( 'n9', 'n23', { 'type' => 'spelling' } );
+isnt( $c->as_svg, $svg, "SVG changed after relationship add" );
+
+=end testing
+
 =cut
 
 sub calculate_ranks {
     my $self = shift;
+    # Save the existing ranks, in case we need to invalidate the cached SVG.
+    my %existing_ranks;
     # Walk a version of the graph where every node linked by a relationship 
     # edge is fundamentally the same node, and do a topological ranking on
     # the nodes in this graph.
@@ -1240,6 +1306,7 @@ sub calculate_ranks {
 
     # Add the edges.
     foreach my $r ( $self->readings ) {
+		$existing_ranks{$r} = $r->rank;
         foreach my $n ( $self->sequence->successors( $r->id ) ) {
         	my( $tfrom, $tto ) = ( $rel_containers{$r->id},
         		$rel_containers{$n} );
@@ -1269,6 +1336,14 @@ sub calculate_ranks {
             throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
         }
     }
+    # Do we need to invalidate the cached SVG?
+    if( $self->has_cached_svg ) {
+    	foreach my $r ( $self->readings ) {
+    		next if $existing_ranks{$r} == $r->rank;
+    		$self->wipe_svg;
+    		last;
+    	}
+    }
 }
 
 sub _assign_rank {
@@ -1330,6 +1405,45 @@ sub flatten_ranks {
     }
 }
 
+=head2 remove_collations
+
+Another convenience method for parsing. Removes all 'collation' relationships
+that were defined in order to get the reading ranks to be correct.
+
+=begin testing
+
+use Text::Tradition;
+
+my $cxfile = 't/data/Collatex-16.xml';
+my $t = Text::Tradition->new( 
+    'name'  => 'inline', 
+    'input' => 'CollateX',
+    'file'  => $cxfile,
+    );
+my $c = $t->collation;
+
+isnt( $c->reading('n23')->rank, $c->reading('n9')->rank, "Rank skew exists" );
+$c->add_relationship( 'n23', 'n9', { 'type' => 'collated', 'scope' => 'local' } );
+is( scalar $c->relationships, 4, "Found all expected relationships" );
+$c->remove_collations;
+is( scalar $c->relationships, 3, "Collated relationships now gone" );
+is( $c->reading('n23')->rank, $c->reading('n9')->rank, "Aligned ranks were preserved" );
+
+=end testing
+
+=cut
+
+sub remove_collations {
+	my $self = shift;
+	foreach my $reledge ( $self->relationships ) {
+		my $relobj = $self->relations->get_relationship( $reledge );
+		if( $relobj && $relobj->type eq 'collated' ) {
+			$self->relations->delete_relationship( $reledge );
+		}
+	}
+}
+	
+
 =head2 calculate_common_readings
 
 Goes through the graph identifying the readings that appear in every witness 
@@ -1362,9 +1476,11 @@ is_deeply( \@marked, \@expected, "Found correct list of common readings" );
 sub calculate_common_readings {
 	my $self = shift;
 	my @common;
-	my $table = $self->make_alignment_table( 1 );
+	my $table = $self->alignment_table;
 	foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
-		my @row = map { $_->{'tokens'}->[$idx]->{'t'} } @{$table->{'alignment'}};
+		my @row = map { $_->{'tokens'}->[$idx] 
+							? $_->{'tokens'}->[$idx]->{'t'} : '' } 
+					@{$table->{'alignment'}};
 		my %hash;
 		foreach my $r ( @row ) {
 			if( $r ) {