X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FText%2FTradition%2FStemmaUtil.pm;h=295110d37f3356f4321b105c51f77a5cbd02d64a;hb=339786dd0b0c493786af4df1ec3bc7ef2bf497e2;hp=f9e9bb020eaf2aa5dfd5cd4eab0206e10125fb26;hpb=b02332cafec34f8fd5af15ff7962a61d512bcf58;p=scpubgit%2Fstemmatology.git diff --git a/lib/Text/Tradition/StemmaUtil.pm b/lib/Text/Tradition/StemmaUtil.pm index f9e9bb0..295110d 100644 --- a/lib/Text/Tradition/StemmaUtil.pm +++ b/lib/Text/Tradition/StemmaUtil.pm @@ -5,15 +5,49 @@ use warnings; use Exporter 'import'; use vars qw/ @EXPORT_OK /; use Bio::Phylo::IO; +use Encode qw( decode_utf8 ); use File::chdir; use File::Temp; use File::Which; use Graph; use Graph::Reader::Dot; use IPC::Run qw/ run binary /; -@EXPORT_OK = qw/ make_character_matrix character_input phylip_pars parse_newick /; +use Text::Tradition::Error; +@EXPORT_OK = qw/ character_input phylip_pars parse_newick newick_to_svg /; -sub make_character_matrix { +=head1 NAME + +Text::Tradition::StemmaUtil - standalone utilities for distance tree calculations + +=head1 DESCRIPTION + +This package contains a set of utilities for running phylogenetic analysis on +text collations. + +=head1 SUBROUTINES + +=head2 character_input( $alignment_table ) + +Returns a character matrix string suitable for Phylip programs, which +corresponds to the given alignment table. See Text::Tradition::Collation +for a description of the alignment table format. + +=cut + +sub character_input { + my $table = shift; + my $character_matrix = _make_character_matrix( $table ); + my $input = ''; + my $rows = scalar @{$character_matrix}; + my $columns = scalar @{$character_matrix->[0]} - 1; + $input .= "\t$rows\t$columns\n"; + foreach my $row ( @{$character_matrix} ) { + $input .= join( '', @$row ) . "\n"; + } + return $input; +} + +sub _make_character_matrix { my( $table ) = @_; # Push the names of the witnesses to initialize the rows of the matrix. my @matrix = map { [ _normalize_witname( $_->{'witness'} ) ] } @@ -24,7 +58,7 @@ sub make_character_matrix { my @pos_readings = map { $_->{'tokens'}->[$token_index] } @{$table->{'alignment'}}; my @pos_text = map { $_ ? $_->{'t'} : $_ } @pos_readings; - my @chars = convert_characters( \@pos_text ); + my @chars = _convert_characters( \@pos_text ); foreach my $idx ( 0 .. $#matrix ) { push( @{$matrix[$idx]}, $chars[$idx] ); } @@ -42,7 +76,7 @@ sub _normalize_witname { return sprintf( "%-10s", $witname ); } -sub convert_characters { +sub _convert_characters { my $row = shift; # This is a simple algorithm that treats every reading as different. # Eventually we will want to be able to specify how relationships @@ -75,18 +109,11 @@ sub convert_characters { return @chars; } -sub character_input { - my $table = shift; - my $character_matrix = make_character_matrix( $table ); - my $input = ''; - my $rows = scalar @{$character_matrix}; - my $columns = scalar @{$character_matrix->[0]} - 1; - $input .= "\t$rows\t$columns\n"; - foreach my $row ( @{$character_matrix} ) { - $input .= join( '', @$row ) . "\n"; - } - return $input; -} +=head2 phylip_pars( $character_matrix ) + +Runs Phylip Pars on the given character matrix. Returns results in Newick format. + +=cut sub phylip_pars { my( $charmatrix ) = @_; @@ -122,7 +149,7 @@ sub phylip_pars { # And then we run the program. my $program = File::Which::which( 'pars' ); unless( -x $program ) { - return( undef, "Phylip pars not found in path" ); + throw( "Phylip pars not found in path" ); } { @@ -141,8 +168,9 @@ sub phylip_pars { @outtree = ; close TREE; } - return( 1, join( '', @outtree ) ) if @outtree; + return join( '', @outtree ) if @outtree; + # If we got this far, we are about to throw an error. my @error; if( -f "$phylip_dir/outfile" ) { open( OUTPUT, "$phylip_dir/outfile" ) or die "Could not open output for read"; @@ -151,9 +179,15 @@ sub phylip_pars { } else { push( @error, "Neither outtree nor output file was produced!" ); } - return( undef, join( '', @error ) ); + throw( join( '', @error ) ); } +=head2 parse_newick( $newick_string ) + +Parses the given Newick tree(s) into one or more undirected Graph objects. + +=cut + sub parse_newick { my $newick = shift; my @trees; @@ -169,6 +203,28 @@ sub parse_newick { return \@trees; } +=head2 newick_to_svg( $newick_string ) + +Uses the FigTree utility (if installed) to transform the given Newick tree(s) +into a graph visualization. + +=cut + +sub newick_to_svg { + my $newick = shift; + my $program = File::Which::which( 'figtree' ); + unless( -x $program ) { + throw( "FigTree commandline utility not found in path" ); + } + my $svg; + my $nfile = File::Temp->new(); + print $nfile $newick; + close $nfile; + my @cmd = ( $program, '-graphic', 'SVG', $nfile ); + run( \@cmd, ">", binary(), \$svg ); + return decode_utf8( $svg ); +} + sub _graph_from_bio { my $tree = shift; my $graph = Graph->new( 'undirected' => 1 ); @@ -193,3 +249,22 @@ sub _add_tree_children { _add_tree_children( $graph, $child, $c->get_children() ); } } + +sub throw { + Text::Tradition::Error->throw( + 'ident' => 'StemmaUtil error', + 'message' => $_[0], + ); +} + +1; + +=head1 LICENSE + +This package is free software and is provided "as is" without express +or implied warranty. You can redistribute it and/or modify it under +the same terms as Perl itself. + +=head1 AUTHOR + +Tara L Andrews Eaurum@cpan.orgE