From: tla Date: Wed, 15 Jan 2014 12:24:31 +0000 (+0100) Subject: Rework Stemma / StemmaUtil so that utility functions are all in the latter. Fixes #14 X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=5873cf380e525c86f4d1af264b1810dc7321a3e2;p=scpubgit%2Fstemmatology.git Rework Stemma / StemmaUtil so that utility functions are all in the latter. Fixes #14 --- diff --git a/analysis/lib/Text/Tradition/HasStemma.pm b/analysis/lib/Text/Tradition/HasStemma.pm index 1a31c07..18ecc67 100644 --- a/analysis/lib/Text/Tradition/HasStemma.pm +++ b/analysis/lib/Text/Tradition/HasStemma.pm @@ -5,7 +5,6 @@ use warnings; use Moose::Role; use Date::Parse; use Text::Tradition::Stemma; -use Text::Tradition::StemmaUtil qw/ parse_newick /; =head1 NAME @@ -168,7 +167,7 @@ sub record_stemweb_result { if( $answer->{format} eq 'dot' ) { $self->add_stemma( dot => $answer->{result} ); } elsif( $answer->{format} eq 'newick' ) { - $stemmata = parse_newick( $answer->{result} ); + $stemmata = Text::Tradition::Stemma->new_from_newick( $answer->{result} ); my $title = sprintf( "%s %d", $answer->{algorithm}, str2time( $answer->{start_time} ) ); my $i = 0; diff --git a/analysis/lib/Text/Tradition/Stemma.pm b/analysis/lib/Text/Tradition/Stemma.pm index a5d75a7..9a35feb 100644 --- a/analysis/lib/Text/Tradition/Stemma.pm +++ b/analysis/lib/Text/Tradition/Stemma.pm @@ -6,6 +6,7 @@ use Graph; use Graph::Reader::Dot; use IPC::Run qw/ run binary /; use Text::Tradition::Error; +use Text::Tradition::StemmaUtil qw/ editable_graph display_graph parse_newick /; use Moose; =head1 NAME @@ -234,6 +235,23 @@ sub is_undirected { return $self->graph->is_undirected; } +=head2 new_from_newick( $newick_string ) + +A constructor that will read a Newick-format tree specification and return one +or more undirected Stemma objects. TODO test + +=cut + +sub new_from_newick { + my( $class, $nstring ) = @_; + my @stemmata; + foreach my $tree ( parse_newick( $nstring ) ) { + my $stemma = new( $class, graph => $tree ); + push( @stemmata, $stemma ); + } + return \@stemmata; +} + =head1 METHODS =head2 as_dot( \%options ) @@ -265,56 +283,10 @@ sub as_dot { map { $extant->{$_} = 1 } $self->witnesses; $graph = $self->situation_graph( $extant, $opts->{'layerwits'} ); } - - # Get default and specified options - my %graphopts = ( - # 'ratio' => 1, - 'bgcolor' => 'transparent', - ); - my %nodeopts = ( - 'fontsize' => 11, - 'style' => 'filled', - 'fillcolor' => 'white', - 'color' => 'white', - 'shape' => 'ellipse', # Shape for the extant nodes - ); - my %edgeopts = ( - 'arrowhead' => 'none', - ); - @graphopts{ keys %{$opts->{'graph'}} } = values %{$opts->{'graph'}} - if $opts->{'graph'}; - @nodeopts{ keys %{$opts->{'node'}} } = values %{$opts->{'node'}} - if $opts->{'node'}; - @edgeopts{ keys %{$opts->{'edge'}} } = values %{$opts->{'edge'}} - if $opts->{'edge'}; - - my $gdecl = $graph->is_directed ? 'digraph' : 'graph'; - my $gname = $self->has_identifier ? '"' . $self->identifier . '"' - : 'stemma'; - my @dotlines; - push( @dotlines, "$gdecl $gname {" ); - ## Print out the global attributes - push( @dotlines, _make_dotline( 'graph', %graphopts ) ) if keys %graphopts; - push( @dotlines, _make_dotline( 'edge', %edgeopts ) ) if keys %edgeopts; - push( @dotlines, _make_dotline( 'node', %nodeopts ) ) if keys %nodeopts; - - # Add each of the nodes. - foreach my $n ( $graph->vertices ) { - my %vattr = ( 'id' => $n ); # Set the SVG element ID to the sigil itself - if( $graph->has_vertex_attribute( $n, 'label' ) ) { - $vattr{'label'} = $graph->get_vertex_attribute( $n, 'label' ); - } - push( @dotlines, _make_dotline( $n, %vattr ) ); - } - # Add each of our edges. - foreach my $e ( $graph->edges ) { - my( $from, $to ) = map { _dotquote( $_ ) } @$e; - my $connector = $graph->is_directed ? '->' : '--'; - push( @dotlines, " $from $connector $to;" ); - } - push( @dotlines, '}' ); - - return join( "\n", @dotlines ); + if( $self->has_identifier ) { + $opts->{'name'} = $self->identifier; + } + return display_graph( $graph, $opts ); } =head2 alter_graph( $dotstring ) @@ -334,8 +306,6 @@ sub alter_graph { =head2 editable( $opts ) -=head2 editable_graph( $graph, $opts ) - Returns a version of the graph rendered in our definition format. The output separates statements with a newline; set $opts->{'linesep'} to the empty string or to a space if the result is to be sent via JSON. @@ -360,60 +330,6 @@ sub editable { return editable_graph( $graph, $opts ); } -sub editable_graph { - my( $graph, $opts ) = @_; - - # Create the graph - my $join = ( $opts && exists $opts->{'linesep'} ) ? $opts->{'linesep'} : "\n"; - my $gdecl = $graph->is_undirected ? 'graph' : 'digraph'; - my $gname = exists $opts->{'name'} ? '"' . $opts->{'name'} . '"' - : 'stemma'; - my @dotlines; - push( @dotlines, "$gdecl $gname {" ); - my @real; # A cheap sort - foreach my $n ( sort $graph->vertices ) { - my $c = $graph->get_vertex_attribute( $n, 'class' ); - $c = 'extant' unless $c; - if( $c eq 'extant' ) { - push( @real, $n ); - } else { - push( @dotlines, _make_dotline( $n, 'class' => $c ) ); - } - } - # Now do the real ones - foreach my $n ( @real ) { - push( @dotlines, _make_dotline( $n, 'class' => 'extant' ) ); - } - foreach my $e ( sort _by_vertex $graph->edges ) { - my( $from, $to ) = map { _dotquote( $_ ) } @$e; - my $conn = $graph->is_undirected ? '--' : '->'; - push( @dotlines, " $from $conn $to;" ); - } - push( @dotlines, '}' ); - return join( $join, @dotlines ); -} - -sub _make_dotline { - my( $obj, %attr ) = @_; - my @pairs; - foreach my $k ( keys %attr ) { - my $v = _dotquote( $attr{$k} ); - push( @pairs, "$k=$v" ); - } - return sprintf( " %s [ %s ];", _dotquote( $obj ), join( ', ', @pairs ) ); -} - -sub _dotquote { - my( $str ) = @_; - return $str if $str =~ /^[A-Za-z0-9]+$/; - $str =~ s/\"/\\\"/g; - $str = '"' . $str . '"'; - return $str; -} - -sub _by_vertex { - return $a->[0].$a->[1] cmp $b->[0].$b->[1]; -} =head2 situation_graph( $extant, $layered ) diff --git a/analysis/lib/Text/Tradition/StemmaUtil.pm b/analysis/lib/Text/Tradition/StemmaUtil.pm index 8007af6..865b88d 100644 --- a/analysis/lib/Text/Tradition/StemmaUtil.pm +++ b/analysis/lib/Text/Tradition/StemmaUtil.pm @@ -12,20 +12,147 @@ use File::Which; use Graph; use IPC::Run qw/ run binary /; use Text::Tradition::Error; -use Text::Tradition::Stemma; -@EXPORT_OK = qw/ character_input phylip_pars parse_newick newick_to_svg /; +@EXPORT_OK = qw/ display_graph editable_graph + character_input phylip_pars parse_newick newick_to_svg /; =head1 NAME -Text::Tradition::StemmaUtil - standalone utilities for distance tree calculations +Text::Tradition::StemmaUtil - standalone utilities for stemma graph display and +distance tree calculations =head1 DESCRIPTION -This package contains a set of utilities for running phylogenetic analysis on -text collations. +This package contains a set of utilities for displaying arbitrary stemmata and +running phylogenetic analysis on text collations. =head1 SUBROUTINES +=head2 display_graph( $graph, $opts ) + +Returns a dot specification intended for display, according to the logical +attributes of the witnesses. + +=cut + +sub display_graph { + my( $graph, $opts ) = @_; + + # Get default and specified options + my %graphopts = ( + # 'ratio' => 1, + 'bgcolor' => 'transparent', + ); + my %nodeopts = ( + 'fontsize' => 11, + 'style' => 'filled', + 'fillcolor' => 'white', + 'color' => 'white', + 'shape' => 'ellipse', # Shape for the extant nodes + ); + my %edgeopts = ( + 'arrowhead' => 'none', + ); + @graphopts{ keys %{$opts->{'graph'}} } = values %{$opts->{'graph'}} + if $opts->{'graph'}; + @nodeopts{ keys %{$opts->{'node'}} } = values %{$opts->{'node'}} + if $opts->{'node'}; + @edgeopts{ keys %{$opts->{'edge'}} } = values %{$opts->{'edge'}} + if $opts->{'edge'}; + + my $gdecl = $graph->is_directed ? 'digraph' : 'graph'; + my $gname = $opts->{'name'} ? '"' . $opts->{'name'} . '"' + : 'stemma'; + my @dotlines; + push( @dotlines, "$gdecl $gname {" ); + ## Print out the global attributes + push( @dotlines, _make_dotline( 'graph', %graphopts ) ) if keys %graphopts; + push( @dotlines, _make_dotline( 'edge', %edgeopts ) ) if keys %edgeopts; + push( @dotlines, _make_dotline( 'node', %nodeopts ) ) if keys %nodeopts; + + # Add each of the nodes. + foreach my $n ( $graph->vertices ) { + my %vattr = ( 'id' => $n ); # Set the SVG element ID to the sigil itself + if( $graph->has_vertex_attribute( $n, 'label' ) ) { + $vattr{'label'} = $graph->get_vertex_attribute( $n, 'label' ); + } + push( @dotlines, _make_dotline( $n, %vattr ) ); + } + # Add each of our edges. + foreach my $e ( $graph->edges ) { + my( $from, $to ) = map { _dotquote( $_ ) } @$e; + my $connector = $graph->is_directed ? '->' : '--'; + push( @dotlines, " $from $connector $to;" ); + } + push( @dotlines, '}' ); + + return join( "\n", @dotlines ); +} + + +=head2 editable_graph( $graph, $opts ) + +Returns a dot specification of a stemma graph with logical witness features, +intended for editing the stemma definition. + +=cut + +sub editable_graph { + my( $graph, $opts ) = @_; + + # Create the graph + my $join = ( $opts && exists $opts->{'linesep'} ) ? $opts->{'linesep'} : "\n"; + my $fq = $opts->{'forcequote'}; + my $gdecl = $graph->is_undirected ? 'graph' : 'digraph'; + my $gname = exists $opts->{'name'} ? '"' . $opts->{'name'} . '"' + : 'stemma'; + my @dotlines; + push( @dotlines, "$gdecl $gname {" ); + my @real; # A cheap sort + foreach my $n ( sort $graph->vertices ) { + my $c = $graph->get_vertex_attribute( $n, 'class' ); + $c = 'extant' unless $c; + if( $c eq 'extant' ) { + push( @real, $n ); + } else { + push( @dotlines, _make_dotline( $n, 'class' => $c, 'forcequote' => $fq ) ); + } + } + # Now do the real ones + foreach my $n ( @real ) { + push( @dotlines, _make_dotline( $n, 'class' => 'extant', 'forcequote' => $fq ) ); + } + foreach my $e ( sort _by_vertex $graph->edges ) { + my( $from, $to ) = map { _dotquote( $_ ) } @$e; + my $conn = $graph->is_undirected ? '--' : '->'; + push( @dotlines, " $from $conn $to;" ); + } + push( @dotlines, '}' ); + return join( $join, @dotlines ); +} + +sub _make_dotline { + my( $obj, %attr ) = @_; + my @pairs; + my $fq = delete $attr{forcequote}; + foreach my $k ( keys %attr ) { + my $v = _dotquote( $attr{$k}, $fq ); + push( @pairs, "$k=$v" ); + } + return sprintf( " %s [ %s ];", _dotquote( $obj ), join( ', ', @pairs ) ); +} + +sub _dotquote { + my( $str, $force ) = @_; + return $str if $str =~ /^[A-Za-z0-9]+$/; + $str =~ s/\"/\\\"/g; + $str = '"' . $str . '"'; + return $str; +} + +sub _by_vertex { + return $a->[0].$a->[1] cmp $b->[0].$b->[1]; +} + =head2 character_input( $tradition, $opts ) Returns a character matrix string suitable for Phylip programs, which @@ -222,19 +349,12 @@ undirected graphs. sub parse_newick { my $newick = shift; - my @stemmata; - # Parse the result into a tree + # Parse the result into a set of trees and return them. my $forest = Bio::Phylo::IO->parse( -format => 'newick', -string => $newick, ); - # Turn the tree into a graph, starting with the root node - foreach my $tree ( @{$forest->get_entities} ) { - my $stemma = Text::Tradition::Stemma->new( - graph => _graph_from_bio( $tree ) ); - push( @stemmata, $stemma ); - } - return \@stemmata; + return map { _graph_from_bio( $_ ) } @{$forest->get_entities}; } sub _graph_from_bio { diff --git a/analysis/t/stemma.t b/analysis/t/stemma.t index a4c7ea7..9d9526d 100644 --- a/analysis/t/stemma.t +++ b/analysis/t/stemma.t @@ -5,7 +5,7 @@ use File::Which; use Test::More; use lib 'lib'; use Text::Tradition; -use Text::Tradition::StemmaUtil qw/ character_input phylip_pars parse_newick /; +use Text::Tradition::StemmaUtil qw/ character_input phylip_pars /; use TryCatch; my $datafile = 't/data/Collatex-16.xml'; #TODO need other test data @@ -54,7 +54,7 @@ SKIP: { my $newick = phylip_pars( $mstr ); ok( $newick, "pars ran successfully" ); - my $trees = parse_newick( $newick ); + my $trees = Text::Tradition::Stemma->new_from_newick( $newick ); # Test that we get a tree is( scalar @$trees, 1, "Got a single tree" ); # Test that the tree has all our witnesses