From: Tara L Andrews Date: Fri, 13 Jan 2012 19:33:56 +0000 (+0100) Subject: move more procedural stuff out of Stemma.pm into StemmaUtil X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=b02332cafec34f8fd5af15ff7962a61d512bcf58;p=scpubgit%2Fstemmatology.git move more procedural stuff out of Stemma.pm into StemmaUtil --- diff --git a/TreeOfTexts/lib/TreeOfTexts/Controller/Stemmagraph.pm b/TreeOfTexts/lib/TreeOfTexts/Controller/Stemmagraph.pm index d192178..b440dfb 100644 --- a/TreeOfTexts/lib/TreeOfTexts/Controller/Stemmagraph.pm +++ b/TreeOfTexts/lib/TreeOfTexts/Controller/Stemmagraph.pm @@ -4,7 +4,7 @@ use namespace::autoclean; use File::Temp; use JSON; use Text::Tradition::Collation; -use Text::Tradition::StemmaUtil qw/ phylip_pars_input /; +use Text::Tradition::StemmaUtil qw/ character_input /; BEGIN { extends 'Catalyst::Controller' } @@ -65,7 +65,7 @@ sub character_matrix :Local { my $json = $c->request->params->{'alignment'}; $c->log->debug( $json ); my $table = from_json( $json ); - my $matrix = phylip_pars_input( $table ); + my $matrix = character_input( $table ); $c->stash->{'result'} = { 'matrix' => $matrix }; $c->forward( 'View::JSON' ); } diff --git a/lib/Text/Tradition/Stemma.pm b/lib/Text/Tradition/Stemma.pm index 8ee23d0..2c7b310 100644 --- a/lib/Text/Tradition/Stemma.pm +++ b/lib/Text/Tradition/Stemma.pm @@ -2,13 +2,11 @@ package Text::Tradition::Stemma; use Bio::Phylo::IO; use Encode qw( decode_utf8 ); -use File::chdir; use File::Temp; -use File::Which; use Graph; use Graph::Reader::Dot; use IPC::Run qw/ run binary /; -use Text::Tradition::StemmaUtil qw/ phylip_pars_input /; +use Text::Tradition::StemmaUtil qw/ character_input phylip_pars parse_newick /; use Moose; has collation => ( @@ -199,7 +197,7 @@ before 'distance_trees' => sub { my( $ok, $result ) = $self->$dsub(); if( $ok ) { # Save the resulting trees - my $trees = _parse_newick( $result ); + my $trees = parse_newick( $result ); $self->_save_distance_trees( $trees ); $self->distance_program( $args{'program'} ); } else { @@ -209,110 +207,9 @@ before 'distance_trees' => sub { }; sub run_phylip_pars { - my $self = shift; - - # Set up a temporary directory for all the default Phylip files. - my $phylip_dir = File::Temp->newdir(); - # $phylip_dir->unlink_on_destroy(0); - # We need an infile, and we need a command input file. - open( MATRIX, ">$phylip_dir/infile" ) or die "Could not write $phylip_dir/infile"; - print MATRIX phylip_pars_input( $self->collation->make_alignment_table() ); - close MATRIX; - - open( CMD, ">$phylip_dir/cmdfile" ) or die "Could not write $phylip_dir/cmdfile"; - ## TODO any configuration parameters we want to set here -# U Search for best tree? Yes -# S Search option? More thorough search -# V Number of trees to save? 100 -# J Randomize input order of species? No. Use input order -# O Outgroup root? No, use as outgroup species 1 -# T Use Threshold parsimony? No, use ordinary parsimony -# W Sites weighted? No -# M Analyze multiple data sets? No -# I Input species interleaved? Yes -# 0 Terminal type (IBM PC, ANSI, none)? ANSI -# 1 Print out the data at start of run No -# 2 Print indications of progress of run Yes -# 3 Print out tree Yes -# 4 Print out steps in each site No -# 5 Print character at all nodes of tree No -# 6 Write out trees onto tree file? Yes - print CMD "Y\n"; - close CMD; - - # And then we run the program. - my $program = File::Which::which( 'pars' ); - unless( -x $program ) { - return( undef, "Phylip pars not found in path" ); - } - - { - # We need to run it in our temporary directory where we have created - # all the expected files. - local $CWD = $phylip_dir; - my @cmd = ( $program ); - run \@cmd, '<', 'cmdfile', '>', '/dev/null'; - } - # Now our output should be in 'outfile' and our tree in 'outtree', - # both in the temp directory. - - my @outtree; - if( -f "$phylip_dir/outtree" ) { - open( TREE, "$phylip_dir/outtree" ) or die "Could not open outtree for read"; - @outtree = ; - close TREE; - } - return( 1, join( '', @outtree ) ) if @outtree; - - my @error; - if( -f "$phylip_dir/outfile" ) { - open( OUTPUT, "$phylip_dir/outfile" ) or die "Could not open output for read"; - @error = ; - close OUTPUT; - } else { - push( @error, "Neither outtree nor output file was produced!" ); - } - return( undef, join( '', @error ) ); -} - -sub _parse_newick { - my $newick = shift; - my @trees; - # Parse the result into a tree - my $forest = Bio::Phylo::IO->parse( - -format => 'newick', - -string => $newick, - ); - # Turn the tree into a graph, starting with the root node - foreach my $tree ( @{$forest->get_entities} ) { - push( @trees, _graph_from_bio( $tree ) ); - } - return \@trees; -} - -sub _graph_from_bio { - my $tree = shift; - my $graph = Graph->new( 'undirected' => 1 ); - # Give all the intermediate anonymous nodes a name. - my $i = 0; - foreach my $n ( @{$tree->get_entities} ) { - next if $n->get_name; - $n->set_name( $i++ ); - } - my $root = $tree->get_root->get_name; - $graph->add_vertex( $root ); - _add_tree_children( $graph, $root, $tree->get_root->get_children() ); - return $graph; -} - -sub _add_tree_children { - my( $graph, $parent, $tree_children ) = @_; - foreach my $c ( @$tree_children ) { - my $child = $c->get_name; - $graph->add_vertex( $child ); - $graph->add_path( $parent, $child ); - _add_tree_children( $graph, $child, $c->get_children() ); - } + my $self = shift; + my $cdata = character_input( $self->collation->make_alignment_table() ); + return phylip_pars( $cdata ); } no Moose; diff --git a/lib/Text/Tradition/StemmaUtil.pm b/lib/Text/Tradition/StemmaUtil.pm index efa65da..f9e9bb0 100644 --- a/lib/Text/Tradition/StemmaUtil.pm +++ b/lib/Text/Tradition/StemmaUtil.pm @@ -4,7 +4,14 @@ use strict; use warnings; use Exporter 'import'; use vars qw/ @EXPORT_OK /; -@EXPORT_OK = qw/ phylip_pars_input /; +use Bio::Phylo::IO; +use File::chdir; +use File::Temp; +use File::Which; +use Graph; +use Graph::Reader::Dot; +use IPC::Run qw/ run binary /; +@EXPORT_OK = qw/ make_character_matrix character_input phylip_pars parse_newick /; sub make_character_matrix { my( $table ) = @_; @@ -68,7 +75,7 @@ sub convert_characters { return @chars; } -sub phylip_pars_input { +sub character_input { my $table = shift; my $character_matrix = make_character_matrix( $table ); my $input = ''; @@ -81,3 +88,108 @@ sub phylip_pars_input { return $input; } +sub phylip_pars { + my( $charmatrix ) = @_; + # Set up a temporary directory for all the default Phylip files. + my $phylip_dir = File::Temp->newdir(); + # $phylip_dir->unlink_on_destroy(0); + # We need an infile, and we need a command input file. + open( MATRIX, ">$phylip_dir/infile" ) or die "Could not write $phylip_dir/infile"; + print MATRIX $charmatrix; + close MATRIX; + + open( CMD, ">$phylip_dir/cmdfile" ) or die "Could not write $phylip_dir/cmdfile"; + ## TODO any configuration parameters we want to set here +# U Search for best tree? Yes +# S Search option? More thorough search +# V Number of trees to save? 100 +# J Randomize input order of species? No. Use input order +# O Outgroup root? No, use as outgroup species 1 +# T Use Threshold parsimony? No, use ordinary parsimony +# W Sites weighted? No +# M Analyze multiple data sets? No +# I Input species interleaved? Yes +# 0 Terminal type (IBM PC, ANSI, none)? ANSI +# 1 Print out the data at start of run No +# 2 Print indications of progress of run Yes +# 3 Print out tree Yes +# 4 Print out steps in each site No +# 5 Print character at all nodes of tree No +# 6 Write out trees onto tree file? Yes + print CMD "Y\n"; + close CMD; + + # And then we run the program. + my $program = File::Which::which( 'pars' ); + unless( -x $program ) { + return( undef, "Phylip pars not found in path" ); + } + + { + # We need to run it in our temporary directory where we have created + # all the expected files. + local $CWD = $phylip_dir; + my @cmd = ( $program ); + run \@cmd, '<', 'cmdfile', '>', '/dev/null'; + } + # Now our output should be in 'outfile' and our tree in 'outtree', + # both in the temp directory. + + my @outtree; + if( -f "$phylip_dir/outtree" ) { + open( TREE, "$phylip_dir/outtree" ) or die "Could not open outtree for read"; + @outtree = ; + close TREE; + } + return( 1, join( '', @outtree ) ) if @outtree; + + my @error; + if( -f "$phylip_dir/outfile" ) { + open( OUTPUT, "$phylip_dir/outfile" ) or die "Could not open output for read"; + @error = ; + close OUTPUT; + } else { + push( @error, "Neither outtree nor output file was produced!" ); + } + return( undef, join( '', @error ) ); +} + +sub parse_newick { + my $newick = shift; + my @trees; + # Parse the result into a tree + my $forest = Bio::Phylo::IO->parse( + -format => 'newick', + -string => $newick, + ); + # Turn the tree into a graph, starting with the root node + foreach my $tree ( @{$forest->get_entities} ) { + push( @trees, _graph_from_bio( $tree ) ); + } + return \@trees; +} + +sub _graph_from_bio { + my $tree = shift; + my $graph = Graph->new( 'undirected' => 1 ); + # Give all the intermediate anonymous nodes a name. + my $i = 0; + foreach my $n ( @{$tree->get_entities} ) { + next if $n->get_name; + $n->set_name( $i++ ); + } + my $root = $tree->get_root->get_name; + $graph->add_vertex( $root ); + _add_tree_children( $graph, $root, $tree->get_root->get_children() ); + return $graph; +} + +sub _add_tree_children { + my( $graph, $parent, $tree_children ) = @_; + foreach my $c ( @$tree_children ) { + my $child = $c->get_name; + $graph->add_vertex( $child ); + $graph->add_path( $parent, $child ); + _add_tree_children( $graph, $child, $c->get_children() ); + } +} diff --git a/t/stemma.t b/t/stemma.t index 6f3e7db..8d2fd6e 100644 --- a/t/stemma.t +++ b/t/stemma.t @@ -5,7 +5,7 @@ use File::Which; use Test::More; use lib 'lib'; use Text::Tradition; -use Text::Tradition::StemmaUtil qw/ phylip_pars_input /; +use Text::Tradition::StemmaUtil qw/ make_character_matrix /; use XML::LibXML; use XML::LibXML::XPathContext; @@ -30,29 +30,23 @@ ok( $stemma->isa( 'Text::Tradition::Stemma' ), 'Got the right sort of object' ); is( $stemma->graph, '1-2,1-A,2-B,2-C', "Got the correct graph" ); # Test for character matrix creation -my $m = phylip_pars_input( $c->make_alignment_table() ); +my $m = make_character_matrix( $c->make_alignment_table() ); ## check number of rows -my $expected = "\t3\t18\n"; -$expected .= 'A AAAAAAAXAAAAAAAAAA -B AXXXAAAAAABABAABAA -C AXXXAAAAABAAAAAXBB'; -$expected .= "\n"; -is( $m, $expected, "Got the right pars input" ); -# is( scalar @$m, 3, "Found three witnesses in char matrix" ); -# ## check number of columns -# is( scalar( @{$m->[0]} ), 19, "Found 18 rows plus sigla in char matrix" ); -# ## check matrix -# my %expected = ( -# 'A' => 'AAAAAAAXAAAAAAAAAA', -# 'B' => 'AXXXAAAAAABABAABAA', -# 'C' => 'AXXXAAAAABAAAAAXBB', -# ); -# my @wits = map { shift @$_; } @$m; -# map { s/\s+//g } @wits; -# foreach my $i ( 0 .. $#wits ) { -# my $w = $wits[$i]; -# is( join( '', @{$m->[$i]} ), $expected{$w}, "Row for witness $w is correct" ); -# } +is( scalar @$m, 3, "Found three witnesses in char matrix" ); + ## check number of columns +is( scalar( @{$m->[0]} ), 19, "Found 18 rows plus sigla in char matrix" ); + ## check matrix +my %expected = ( + 'A' => 'AAAAAAAXAAAAAAAAAA', + 'B' => 'AXXXAAAAAABABAABAA', + 'C' => 'AXXXAAAAABAAAAAXBB', + ); +my @wits = map { shift @$_; } @$m; +map { s/\s+//g } @wits; +foreach my $i ( 0 .. $#wits ) { + my $w = $wits[$i]; + is( join( '', @{$m->[$i]} ), $expected{$w}, "Row for witness $w is correct" ); +} # Test that pars runs SKIP: {