From: Tara L Andrews Date: Fri, 13 Jan 2012 16:08:10 +0000 (+0100) Subject: split stemma lib into util and object; make phylip_input microservice X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=68454b71e5a66972515258d6312f6355ee0536f4;p=scpubgit%2Fstemmatology.git split stemma lib into util and object; make phylip_input microservice --- diff --git a/TreeOfTexts/lib/TreeOfTexts/Controller/Root.pm b/TreeOfTexts/lib/TreeOfTexts/Controller/Root.pm index 137d28a..1f6792d 100644 --- a/TreeOfTexts/lib/TreeOfTexts/Controller/Root.pm +++ b/TreeOfTexts/lib/TreeOfTexts/Controller/Root.pm @@ -62,8 +62,8 @@ sub relationships :Local { my $m = $c->model('Directory'); my $tradition = $m->tradition( $c->request->params->{'textid'} ); my $table = $tradition->collation->make_alignment_table(); - my $witlist = shift @$table; - $c->stash->{witnesses} = $wits; + my $witlist = map { $_->{'witness'} } @{$table->{'alignment'}}; + $c->stash->{witnesses} = $witlist; $c->stash->{alignment} = $table; $c->stash->{template} = 'relate.tt'; } @@ -102,7 +102,7 @@ sub alignment_table :Local { my $m = $c->model( 'Directory' ); my $tradition = $m->tradition( $c->request->params->{'textid'} ); my $table = $tradition->collation->make_alignment_table(); - $c->stash->{'result'} => $table; + $c->stash->{'result'} = $table; $c->forward-( 'View::JSON' ); } diff --git a/TreeOfTexts/lib/TreeOfTexts/Controller/Stemmagraph.pm b/TreeOfTexts/lib/TreeOfTexts/Controller/Stemmagraph.pm index 1d6a8e0..d192178 100644 --- a/TreeOfTexts/lib/TreeOfTexts/Controller/Stemmagraph.pm +++ b/TreeOfTexts/lib/TreeOfTexts/Controller/Stemmagraph.pm @@ -2,8 +2,9 @@ package TreeOfTexts::Controller::Stemmagraph; use Moose; use namespace::autoclean; use File::Temp; +use JSON; use Text::Tradition::Collation; -use Text::Tradition::Stemma; +use Text::Tradition::StemmaUtil qw/ phylip_pars_input /; BEGIN { extends 'Catalyst::Controller' } @@ -52,6 +53,22 @@ sub get_graph :Local { $c->forward( "View::SVG" ); } +=head2 character_matrix + +Given an alignment table in JSON form, in the parameter 'alignment', returns a +character matrix suitable for input to Phylip PARS. + +=cut + +sub character_matrix :Local { + my( $self, $c ) = @_; + my $json = $c->request->params->{'alignment'}; + $c->log->debug( $json ); + my $table = from_json( $json ); + my $matrix = phylip_pars_input( $table ); + $c->stash->{'result'} = { 'matrix' => $matrix }; + $c->forward( 'View::JSON' ); +} =head2 end Attempt to render a view, if needed. diff --git a/lib/Text/Tradition/Collation.pm b/lib/Text/Tradition/Collation.pm index c5bb6a1..4b15dd1 100644 --- a/lib/Text/Tradition/Collation.pm +++ b/lib/Text/Tradition/Collation.pm @@ -719,7 +719,7 @@ sub make_alignment_table { } my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 }; my @all_pos = ( 1 .. $self->end->rank - 1 ); - foreach my $wit ( $self->tradition->witnesses ) { + foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) { if( $include ) { next unless $include->{$wit->sigil}; } diff --git a/lib/Text/Tradition/Stemma.pm b/lib/Text/Tradition/Stemma.pm index 19140be..8ee23d0 100644 --- a/lib/Text/Tradition/Stemma.pm +++ b/lib/Text/Tradition/Stemma.pm @@ -8,6 +8,7 @@ use File::Which; use Graph; use Graph::Reader::Dot; use IPC::Run qw/ run binary /; +use Text::Tradition::StemmaUtil qw/ phylip_pars_input /; use Moose; has collation => ( @@ -206,85 +207,6 @@ before 'distance_trees' => sub { } } }; - -sub make_character_matrix { - my $self = shift; - unless( $self->collation->linear ) { - warn "Need a linear graph in order to make an alignment table"; - return; - } - my $table = $self->collation->make_alignment_table; - # Push the names of the witnesses to initialize the rows of the matrix. - my @matrix = map { [ $self->_normalize_ac( $_->{'witness'} ) ] } - @{$table->{'alignment'}}; - foreach my $token_index ( 0 .. $table->{'length'} - 1) { - # First implementation: make dumb alignment table, caring about - # nothing except which reading is in which position. - my @pos_readings = map { $_->{'tokens'}->[$token_index] } - @{$table->{'alignment'}}; - my @pos_text = map { $_ ? $_->{'t'} : $_ } @pos_readings; - my @chars = convert_characters( \@pos_text ); - foreach my $idx ( 0 .. $#matrix ) { - push( @{$matrix[$idx]}, $chars[$idx] ); - } - } - return \@matrix; -} - -sub _normalize_ac { - my( $self, $witname ) = @_; - my $ac = $self->collation->ac_label; - if( $witname =~ /(.*)\Q$ac\E$/ ) { - $witname = $1 . '_ac'; - } - return sprintf( "%-10s", $witname ); -} - -sub convert_characters { - my $row = shift; - # This is a simple algorithm that treats every reading as different. - # Eventually we will want to be able to specify how relationships - # affect the character matrix. - my %unique = ( '__UNDEF__' => 'X', - '#LACUNA#' => '?', - ); - my %count; - my $ctr = 0; - foreach my $word ( @$row ) { - if( $word && !exists $unique{$word} ) { - $unique{$word} = chr( 65 + $ctr ); - $ctr++; - } - $count{$word}++ if $word; - } - # Try to keep variants under 8 by lacunizing any singletons. - if( scalar( keys %unique ) > 8 ) { - foreach my $word ( keys %count ) { - if( $count{$word} == 1 ) { - $unique{$word} = '?'; - } - } - } - my %u = reverse %unique; - if( scalar( keys %u ) > 8 ) { - warn "Have more than 8 variants on this location; phylip will break"; - } - my @chars = map { $_ ? $unique{$_} : $unique{'__UNDEF__' } } @$row; - return @chars; -} - -sub phylip_pars_input { - my $self = shift; - my $character_matrix = $self->make_character_matrix; - my $input = ''; - my $rows = scalar @{$character_matrix}; - my $columns = scalar @{$character_matrix->[0]} - 1; - $input .= "\t$rows\t$columns\n"; - foreach my $row ( @{$character_matrix} ) { - $input .= join( '', @$row ) . "\n"; - } - return $input; -} sub run_phylip_pars { my $self = shift; @@ -294,7 +216,7 @@ sub run_phylip_pars { # $phylip_dir->unlink_on_destroy(0); # We need an infile, and we need a command input file. open( MATRIX, ">$phylip_dir/infile" ) or die "Could not write $phylip_dir/infile"; - print MATRIX $self->phylip_pars_input(); + print MATRIX phylip_pars_input( $self->collation->make_alignment_table() ); close MATRIX; open( CMD, ">$phylip_dir/cmdfile" ) or die "Could not write $phylip_dir/cmdfile"; diff --git a/lib/Text/Tradition/StemmaUtil.pm b/lib/Text/Tradition/StemmaUtil.pm new file mode 100644 index 0000000..efa65da --- /dev/null +++ b/lib/Text/Tradition/StemmaUtil.pm @@ -0,0 +1,83 @@ +package Text::Tradition::StemmaUtil; + +use strict; +use warnings; +use Exporter 'import'; +use vars qw/ @EXPORT_OK /; +@EXPORT_OK = qw/ phylip_pars_input /; + +sub make_character_matrix { + my( $table ) = @_; + # Push the names of the witnesses to initialize the rows of the matrix. + my @matrix = map { [ _normalize_witname( $_->{'witness'} ) ] } + @{$table->{'alignment'}}; + foreach my $token_index ( 0 .. $table->{'length'} - 1) { + # First implementation: make dumb alignment table, caring about + # nothing except which reading is in which position. + my @pos_readings = map { $_->{'tokens'}->[$token_index] } + @{$table->{'alignment'}}; + my @pos_text = map { $_ ? $_->{'t'} : $_ } @pos_readings; + my @chars = convert_characters( \@pos_text ); + foreach my $idx ( 0 .. $#matrix ) { + push( @{$matrix[$idx]}, $chars[$idx] ); + } + } + return \@matrix; +} + +# Helper function to make the witness name something legal for pars + +sub _normalize_witname { + my( $witname ) = @_; + $witname =~ s/\s+/ /g; + $witname =~ s/[\[\]\(\)\:;,]//g; + $witname = substr( $witname, 0, 10 ); + return sprintf( "%-10s", $witname ); +} + +sub convert_characters { + my $row = shift; + # This is a simple algorithm that treats every reading as different. + # Eventually we will want to be able to specify how relationships + # affect the character matrix. + my %unique = ( '__UNDEF__' => 'X', + '#LACUNA#' => '?', + ); + my %count; + my $ctr = 0; + foreach my $word ( @$row ) { + if( $word && !exists $unique{$word} ) { + $unique{$word} = chr( 65 + $ctr ); + $ctr++; + } + $count{$word}++ if $word; + } + # Try to keep variants under 8 by lacunizing any singletons. + if( scalar( keys %unique ) > 8 ) { + foreach my $word ( keys %count ) { + if( $count{$word} == 1 ) { + $unique{$word} = '?'; + } + } + } + my %u = reverse %unique; + if( scalar( keys %u ) > 8 ) { + warn "Have more than 8 variants on this location; phylip will break"; + } + my @chars = map { $_ ? $unique{$_} : $unique{'__UNDEF__' } } @$row; + return @chars; +} + +sub phylip_pars_input { + my $table = shift; + my $character_matrix = make_character_matrix( $table ); + my $input = ''; + my $rows = scalar @{$character_matrix}; + my $columns = scalar @{$character_matrix->[0]} - 1; + $input .= "\t$rows\t$columns\n"; + foreach my $row ( @{$character_matrix} ) { + $input .= join( '', @$row ) . "\n"; + } + return $input; +} + diff --git a/t/stemma.t b/t/stemma.t index 67ded66..6f3e7db 100644 --- a/t/stemma.t +++ b/t/stemma.t @@ -5,6 +5,7 @@ use File::Which; use Test::More; use lib 'lib'; use Text::Tradition; +use Text::Tradition::StemmaUtil qw/ phylip_pars_input /; use XML::LibXML; use XML::LibXML::XPathContext; @@ -29,23 +30,29 @@ ok( $stemma->isa( 'Text::Tradition::Stemma' ), 'Got the right sort of object' ); is( $stemma->graph, '1-2,1-A,2-B,2-C', "Got the correct graph" ); # Test for character matrix creation -my $m = $stemma->make_character_matrix(); +my $m = phylip_pars_input( $c->make_alignment_table() ); ## check number of rows -is( scalar @$m, 3, "Found three witnesses in char matrix" ); - ## check number of columns -is( scalar( @{$m->[0]} ), 19, "Found 18 rows plus sigla in char matrix" ); - ## check matrix -my %expected = ( - 'A' => 'AAAAAAAXAAAAAAAAAA', - 'B' => 'AXXXAAAAAABABAABAA', - 'C' => 'AXXXAAAAABAAAAAXBB', - ); -my @wits = map { shift @$_; } @$m; -map { s/\s+//g } @wits; -foreach my $i ( 0 .. $#wits ) { - my $w = $wits[$i]; - is( join( '', @{$m->[$i]} ), $expected{$w}, "Row for witness $w is correct" ); -} +my $expected = "\t3\t18\n"; +$expected .= 'A AAAAAAAXAAAAAAAAAA +B AXXXAAAAAABABAABAA +C AXXXAAAAABAAAAAXBB'; +$expected .= "\n"; +is( $m, $expected, "Got the right pars input" ); +# is( scalar @$m, 3, "Found three witnesses in char matrix" ); +# ## check number of columns +# is( scalar( @{$m->[0]} ), 19, "Found 18 rows plus sigla in char matrix" ); +# ## check matrix +# my %expected = ( +# 'A' => 'AAAAAAAXAAAAAAAAAA', +# 'B' => 'AXXXAAAAAABABAABAA', +# 'C' => 'AXXXAAAAABAAAAAXBB', +# ); +# my @wits = map { shift @$_; } @$m; +# map { s/\s+//g } @wits; +# foreach my $i ( 0 .. $#wits ) { +# my $w = $wits[$i]; +# is( join( '', @{$m->[$i]} ), $expected{$w}, "Row for witness $w is correct" ); +# } # Test that pars runs SKIP: {