X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FText%2FTradition%2FCollation.pm;h=3a9b056a13d10c5cf144387926461b2d1c4a9b7c;hb=c12bb87843b49eee09f588126ac7d375046b29fd;hp=cc197ff7183966610743a9061f6b7dfcf8f7f707;hpb=ee801e17d007001be15c1b17d4942ffb234aa14f;p=scpubgit%2Fstemmatology.git diff --git a/lib/Text/Tradition/Collation.pm b/lib/Text/Tradition/Collation.pm index cc197ff..3a9b056 100644 --- a/lib/Text/Tradition/Collation.pm +++ b/lib/Text/Tradition/Collation.pm @@ -88,6 +88,20 @@ has 'end' => ( writer => '_set_end', weak_ref => 1, ); + +has 'cached_svg' => ( + is => 'rw', + isa => 'Str', + predicate => 'has_cached_svg', + clearer => 'wipe_svg', + ); + +has 'cached_table' => ( + is => 'rw', + isa => 'HashRef', + predicate => 'has_cached_table', + clearer => 'wipe_table', + ); =head1 NAME @@ -239,6 +253,12 @@ sub BUILD { ### Reading construct/destruct functions +sub _clear_cache { + my $self = shift; + $self->wipe_svg if $self->has_cached_svg; + $self->wipe_table if $self->has_cached_table; +} + sub add_reading { my( $self, $reading ) = @_; unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) { @@ -251,6 +271,7 @@ sub add_reading { if( $self->reading( $reading->id ) ) { throw( "Collation already has a reading with id " . $reading->id ); } + $self->_clear_cache; $self->_add_reading( $reading->id => $reading ); # Once the reading has been added, put it in both graphs. $self->sequence->add_vertex( $reading->id ); @@ -267,6 +288,7 @@ around del_reading => sub { $arg = $arg->id; } # Remove the reading from the graphs. + $self->_clear_cache; $self->sequence->delete_vertex( $arg ); $self->relations->delete_reading( $arg ); @@ -282,6 +304,7 @@ sub merge_readings { # We only need the IDs for adding paths to the graph, not the reading # objects themselves. my( $kept, $deleted, $combine_char ) = $self->_stringify_args( @_ ); + $self->_clear_cache; # The kept reading should inherit the paths and the relationships # of the deleted reading. @@ -337,6 +360,7 @@ sub add_path { # objects themselves. my( $source, $target, $wit ) = $self->_stringify_args( @_ ); + $self->_clear_cache; # Connect the readings $self->sequence->add_edge( $source, $target ); # Note the witness in question @@ -357,6 +381,7 @@ sub del_path { # objects themselves. my( $source, $target, $wit ) = $self->_stringify_args( @args ); + $self->_clear_cache; if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) { $self->sequence->delete_edge_attribute( $source, $target, $wit ); } @@ -385,6 +410,7 @@ be called via $tradition->del_witness. sub clear_witness { my( $self, @sigils ) = @_; + $self->_clear_cache; # Clear the witness(es) out of the paths foreach my $e ( $self->paths ) { foreach my $sig ( @sigils ) { @@ -407,6 +433,7 @@ sub add_relationship { $self->reading( $source ), $target, $self->reading( $target ), $opts ); # Force a full rank recalculation every time. Yuck. $self->calculate_ranks() if $self->end->has_rank; + $self->_clear_cache; return @vectors; } @@ -428,6 +455,10 @@ sub reading_witnesses { my $wits = $self->sequence->get_edge_attributes( @$e ); @all_witnesses{ keys %$wits } = 1; } + my $acstr = $self->ac_label; + foreach my $acwit ( grep { $_ =~ s/^(.*)\Q$acstr\E$/$1/ } keys %all_witnesses ) { + delete $all_witnesses{$acwit.$acstr} if exists $all_witnesses{$acwit}; + } return keys %all_witnesses; } @@ -442,17 +473,23 @@ See as_dot for a list of options. sub as_svg { my( $self, $opts ) = @_; - - my @cmd = qw/dot -Tsvg/; - my( $svg, $err ); - my $dotfile = File::Temp->new(); - ## USE FOR DEBUGGING - # $dotfile->unlink_on_destroy(0); - binmode $dotfile, ':utf8'; - print $dotfile $self->as_dot( $opts ); - push( @cmd, $dotfile->filename ); - run( \@cmd, ">", binary(), \$svg ); - return decode_utf8( $svg ); + my $want_subgraph = exists $opts->{'from'} || exists $opts->{'to'}; + if( !$self->has_cached_svg || $opts->{'recalc'} || $want_subgraph ) { + my @cmd = qw/dot -Tsvg/; + my( $svg, $err ); + my $dotfile = File::Temp->new(); + ## USE FOR DEBUGGING + # $dotfile->unlink_on_destroy(0); + binmode $dotfile, ':utf8'; + print $dotfile $self->as_dot( $opts ); + push( @cmd, $dotfile->filename ); + run( \@cmd, ">", binary(), \$svg ); + $svg = decode_utf8( $svg ); + $self->cached_svg( $svg ) unless $want_subgraph; + return $svg; + } else { + return $self->cached_svg; + } } @@ -478,7 +515,9 @@ sub as_dot { my $startrank = $opts->{'from'} if $opts; my $endrank = $opts->{'to'} if $opts; my $color_common = $opts->{'color_common'} if $opts; - + my $STRAIGHTENHACK = !$startrank && !$endrank && $self->end->rank + && $self->end->rank > 100; + # Check the arguments if( $startrank ) { return if $endrank && $startrank > $endrank; @@ -489,7 +528,6 @@ sub as_dot { $endrank = undef if $endrank == $self->end->rank; } - # TODO consider making some of these things configurable my $graph_name = $self->tradition->name; $graph_name =~ s/[^\w\s]//g; $graph_name = join( '_', split( /\s+/, $graph_name ) ); @@ -521,12 +559,17 @@ sub as_dot { if( $endrank ) { $dot .= "\t\"#SUBEND#\" [ label=\"...\" ];\n"; } - + if( $STRAIGHTENHACK ) { + ## HACK part 1 + $dot .= "\tsubgraph { rank=same \"#START#\" \"#SILENT#\" }\n"; + $dot .= "\t\"#SILENT#\" [ shape=diamond,color=white,penwidth=0,label=\"\" ];" + } my %used; # Keep track of the readings that actually appear in the graph # Sort the readings by rank if we have ranks; this speeds layout. my @all_readings = $self->end->has_rank ? sort { $a->rank <=> $b->rank } $self->readings : $self->readings; + # TODO Refrain from outputting lacuna nodes - just grey out the edges. foreach my $reading ( @all_readings ) { # Only output readings within our rank range. next if $startrank && $reading->rank < $startrank; @@ -544,7 +587,7 @@ sub as_dot { # Add the real edges. Need to weight one edge per rank jump, in a # continuous line. - my $weighted = $self->_add_edge_weights; + # my $weighted = $self->_add_edge_weights; my @edges = $self->paths; my( %substart, %subend ); foreach my $edge ( @edges ) { @@ -563,11 +606,11 @@ sub as_dot { } # Add the calculated edge weights - if( exists $weighted->{$edge->[0]} - && $weighted->{$edge->[0]} eq $edge->[1] ) { - # $variables->{'color'} = 'red'; - $variables->{'weight'} = 3.0; - } + # if( exists $weighted->{$edge->[0]} + # && $weighted->{$edge->[0]} eq $edge->[1] ) { + # # $variables->{'color'} = 'red'; + # $variables->{'weight'} = 3.0; + # } # EXPERIMENTAL: make edge width reflect no. of witnesses my $extrawidth = scalar( $self->path_witnesses( $edge ) ) * 0.2; @@ -595,6 +638,10 @@ sub as_dot { my $varopts = _dot_attr_string( $variables ); $dot .= "\t\"$node\" -> \"#SUBEND#\" $varopts;"; } + # HACK part 2 + if( $STRAIGHTENHACK ) { + $dot .= "\t\"#END#\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n"; + } $dot .= "}\n"; return $dot; @@ -616,13 +663,18 @@ sub _add_edge_weights { # the largest number of witness paths each time. my $weighted = {}; my $curr = $self->start->id; + my $ranked = $self->end->has_rank; while( $curr ne $self->end->id ) { + my $rank = $ranked ? $self->reading( $curr )->rank : 0; my @succ = sort { $self->path_witnesses( $curr, $a ) <=> $self->path_witnesses( $curr, $b ) } $self->sequence->successors( $curr ); my $next = pop @succ; + my $nextrank = $ranked ? $self->reading( $next )->rank : 0; # Try to avoid lacunae in the weighted path. - while( $self->reading( $next )->is_lacuna && @succ ) { + while( @succ && + ( $self->reading( $next )->is_lacuna || + $nextrank - $rank > 1 ) ){ $next = pop @succ; } $weighted->{$curr} = $next; @@ -660,7 +712,16 @@ sub _path_display_label { return join( ', ', @wits ); } } - + +=head2 witnesses_at_rank + +Returns a list of witnesses that are not lacunose, for a given rank. + +=cut + +sub witnesses_at_rank { + my( $self, $rank ) = @_; +} =head2 as_graphml @@ -865,7 +926,7 @@ row per witness (or witness uncorrected.) sub as_csv { my( $self ) = @_; - my $table = $self->make_alignment_table; + my $table = $self->alignment_table; my $csv = Text::CSV_XS->new( { binary => 1, quote_null => 0 } ); my @result; # Make the header row @@ -874,14 +935,14 @@ sub as_csv { # Make the rest of the rows foreach my $idx ( 0 .. $table->{'length'} - 1 ) { my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}}; - my @row = map { $_ ? $_->{'t'} : $_ } @rowobjs; + my @row = map { $_ ? $_->{'t'}->text : $_ } @rowobjs; $csv->combine( @row ); push( @result, decode_utf8( $csv->string ) ); } return join( "\n", @result ); } -=head2 make_alignment_table( $use_refs, $include_witnesses ) +=head2 alignment_table( $use_refs, $include_witnesses ) Return a reference to an alignment table, in a slightly enhanced CollateX format which looks like this: @@ -901,8 +962,11 @@ keys have a true hash value will be included. =cut -sub make_alignment_table { - my( $self, $noderefs, $include ) = @_; +sub alignment_table { + my( $self ) = @_; + my $include; # see if we can ditch this + return $self->cached_table if $self->has_cached_table; + # Make sure we can do this throw( "Need a linear graph in order to make an alignment table" ) unless $self->linear; @@ -916,22 +980,23 @@ sub make_alignment_table { } # print STDERR "Making witness row(s) for " . $wit->sigil . "\n"; my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil ); - my @row = _make_witness_row( \@wit_path, \@all_pos, $noderefs ); + my @row = _make_witness_row( \@wit_path, \@all_pos ); push( @{$table->{'alignment'}}, { 'witness' => $wit->sigil, 'tokens' => \@row } ); if( $wit->is_layered ) { my @wit_ac_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil.$self->ac_label ); - my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos, $noderefs ); + my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos ); push( @{$table->{'alignment'}}, { 'witness' => $wit->sigil.$self->ac_label, 'tokens' => \@ac_row } ); } } - return $table; + $self->cached_table( $table ); + return $table; } sub _make_witness_row { - my( $path, $positions, $noderefs ) = @_; + my( $path, $positions ) = @_; my %char_hash; map { $char_hash{$_} = undef } @$positions; my $debug = 0; @@ -940,8 +1005,7 @@ sub _make_witness_row { $rtext = '#LACUNA#' if $rdg->is_lacuna; print STDERR "rank " . $rdg->rank . "\n" if $debug; # print STDERR "No rank for " . $rdg->id . "\n" unless defined $rdg->rank; - $char_hash{$rdg->rank} = $noderefs ? { 't' => $rdg } - : { 't' => $rtext }; + $char_hash{$rdg->rank} = { 't' => $rdg }; } my @row = map { $char_hash{$_} } @$positions; # Fill in lacuna markers for undef spots in the row @@ -950,8 +1014,8 @@ sub _make_witness_row { foreach my $el ( @row ) { # If we are using node reference, make the lacuna node appear many times # in the table. If not, use the lacuna tag. - if( $last_el && _el_is_lacuna( $last_el ) && !defined $el ) { - $el = $noderefs ? $last_el : { 't' => '#LACUNA#' }; + if( $last_el && $last_el->{'t'}->is_lacuna && !defined $el ) { + $el = $last_el; } push( @filled_row, $el ); $last_el = $el; @@ -959,30 +1023,6 @@ sub _make_witness_row { return @filled_row; } -# Tiny utility function to say if a table element is a lacuna -sub _el_is_lacuna { - my $el = shift; - return 1 if $el->{'t'} eq '#LACUNA#'; - return 1 if ref( $el->{'t'} ) eq 'Text::Tradition::Collation::Reading' - && $el->{'t'}->is_lacuna; - return 0; -} - -# Helper to turn the witnesses along columns rather than rows. Assumes -# equal-sized rows. -sub _turn_table { - my( $table ) = @_; - my $result = []; - return $result unless scalar @$table; - my $nrows = scalar @{$table->[0]}; - foreach my $idx ( 0 .. $nrows - 1 ) { - foreach my $wit ( 0 .. $#{$table} ) { - $result->[$idx]->[$wit] = $table->[$wit]->[$idx]; - } - } - return $result; -} - =head1 NAVIGATION METHODS =head2 reading_sequence( $first, $last, $sigil, $backup ) @@ -1209,10 +1249,36 @@ sub make_witness_path { Calculate the reading ranks (that is, their aligned positions relative to each other) for the graph. This can only be called on linear collations. +=begin testing + +use Text::Tradition; + +my $cxfile = 't/data/Collatex-16.xml'; +my $t = Text::Tradition->new( + 'name' => 'inline', + 'input' => 'CollateX', + 'file' => $cxfile, + ); +my $c = $t->collation; + +# Make an svg +my $svg = $c->as_svg; +is( substr( $svg, 0, 5 ), 'has_cached_svg, "SVG was cached" ); +is( $c->as_svg, $svg, "Cached SVG returned upon second call" ); +$c->calculate_ranks; +is( $c->as_svg, $svg, "Cached SVG retained with no rank change" ); +$c->add_relationship( 'n9', 'n23', { 'type' => 'spelling' } ); +isnt( $c->as_svg, $svg, "SVG changed after relationship add" ); + +=end testing + =cut sub calculate_ranks { my $self = shift; + # Save the existing ranks, in case we need to invalidate the cached SVG. + my %existing_ranks; # Walk a version of the graph where every node linked by a relationship # edge is fundamentally the same node, and do a topological ranking on # the nodes in this graph. @@ -1240,6 +1306,7 @@ sub calculate_ranks { # Add the edges. foreach my $r ( $self->readings ) { + $existing_ranks{$r} = $r->rank; foreach my $n ( $self->sequence->successors( $r->id ) ) { my( $tfrom, $tto ) = ( $rel_containers{$r->id}, $rel_containers{$n} ); @@ -1269,6 +1336,14 @@ sub calculate_ranks { throw( "Ranks not calculated after $last - do you have a cycle in the graph?" ); } } + # Do we need to invalidate the cached SVG? + if( $self->has_cached_svg ) { + foreach my $r ( $self->readings ) { + next if $existing_ranks{$r} == $r->rank; + $self->wipe_svg; + last; + } + } } sub _assign_rank { @@ -1330,6 +1405,45 @@ sub flatten_ranks { } } +=head2 remove_collations + +Another convenience method for parsing. Removes all 'collation' relationships +that were defined in order to get the reading ranks to be correct. + +=begin testing + +use Text::Tradition; + +my $cxfile = 't/data/Collatex-16.xml'; +my $t = Text::Tradition->new( + 'name' => 'inline', + 'input' => 'CollateX', + 'file' => $cxfile, + ); +my $c = $t->collation; + +isnt( $c->reading('n23')->rank, $c->reading('n9')->rank, "Rank skew exists" ); +$c->add_relationship( 'n23', 'n9', { 'type' => 'collated', 'scope' => 'local' } ); +is( scalar $c->relationships, 4, "Found all expected relationships" ); +$c->remove_collations; +is( scalar $c->relationships, 3, "Collated relationships now gone" ); +is( $c->reading('n23')->rank, $c->reading('n9')->rank, "Aligned ranks were preserved" ); + +=end testing + +=cut + +sub remove_collations { + my $self = shift; + foreach my $reledge ( $self->relationships ) { + my $relobj = $self->relations->get_relationship( $reledge ); + if( $relobj && $relobj->type eq 'collated' ) { + $self->relations->delete_relationship( $reledge ); + } + } +} + + =head2 calculate_common_readings Goes through the graph identifying the readings that appear in every witness @@ -1362,9 +1476,11 @@ is_deeply( \@marked, \@expected, "Found correct list of common readings" ); sub calculate_common_readings { my $self = shift; my @common; - my $table = $self->make_alignment_table( 1 ); + my $table = $self->alignment_table; foreach my $idx ( 0 .. $table->{'length'} - 1 ) { - my @row = map { $_->{'tokens'}->[$idx]->{'t'} } @{$table->{'alignment'}}; + my @row = map { $_->{'tokens'}->[$idx] + ? $_->{'tokens'}->[$idx]->{'t'} : '' } + @{$table->{'alignment'}}; my %hash; foreach my $r ( @row ) { if( $r ) {