add first-cut module for stemma analysis
[scpubgit/stemmatology.git] / lib / Text / Tradition / Stemma.pm
CommitLineData
9463b0bf 1package Text::Tradition::Stemma;
2
3use File::chdir;
4use File::Temp;
5use IPC::Run qw/ run /;
6use Moose;
7use Text::Tradition::Collation::Position;
8
9has collation => (
10 is => 'ro',
11 isa => 'Text::Tradition::Collation',
12 required => 1,
13 );
14
15has character_matrix => (
16 is => 'ro',
17 isa => 'ArrayRef[ArrayRef[Str]]',
18 writer => '_save_character_matrix',
19 predicate => 'has_character_matrix',
20 );
21
22sub make_character_matrix {
23 my $self = shift;
24 unless( $self->collation->linear ) {
25 warn "Need a linear graph in order to make an alignment table";
26 return;
27 }
28 my @all_pos = sort { Text::Tradition::Collation::Position::str_cmp( $a, $b ) }
29 $self->collation->possible_positions;
30 my $table = [];
31 my $characters = {};
32 map { $characters->{$_} = {} } @all_pos;
33 foreach my $wit ( @{$self->collation->tradition->witnesses} ) {
34 # First implementation: make dumb alignment table, caring about
35 # nothing except which reading is in which position.
36 push( @$table, [ $wit->sigil, make_witness_row( $characters, $wit->path,
37 \@all_pos ) ] );
38 if( $wit->has_ante_corr ) {
39 push( @$table, [ $wit->sigil . "_ac",
40 make_witness_row( $characters, $wit->uncorrected_path,
41 \@all_pos ) ] );
42 }
43 }
44 $self->_save_character_matrix( $table );
45}
46
47sub make_witness_row {
48 my( $characters, $path, $positions ) = @_;
49 my @row;
50 my $pathdrift = 0;
51 foreach my $i( 0 .. $#{$positions} ) {
52 if( $path->[$i-$pathdrift]->position->minref eq $positions->[$i] ) {
53 push( @row, get_character( $path->[$i-$pathdrift], $characters ) );
54 } else {
55 push( @row, 'X' );
56 $pathdrift++;
57 }
58 $i++;
59 }
60 return @row;
61}
62
63
64sub get_character {
65 my( $reading, $characters ) = @_;
66 my $this_pos = $characters->{$reading->position->minref};
67 # This is a simple algorithm that treats every reading as different.
68 # Eventually we will want to be able to specify how relationships
69 # affect the character matrix.
70 my $text = $reading->text;
71 unless( exists $this_pos->{$text} ) {
72 # We need to find what the next character is here, and record it.
73 my @all_chr = sort { $a <=> $b } values( %$this_pos );
74 if( @all_chr == 8 ) {
75 warn "Already have eight variants at position "
76 . $reading->position->minref . "; not adding " . $reading->text;
77 return '?';
78 }
79 $this_pos->{$text} = scalar @all_chr;
80 }
81 return $this_pos->{$text};
82}
83
84sub run_pars {
85 my $self = shift;
86 $self->make_character_matrix unless $self->has_character_matrix;
87
88 # Set up a temporary directory for all the default Phylip files.
89 my $phylip_dir = File::Temp->newdir();
90
91 # We need an infile, and we need a command input file.
92 $DB::single = 1;
93 open( MATRIX, ">$phylip_dir/infile" ) or die "Could not write $phylip_dir/infile";
94 my $rows = scalar @{$self->character_matrix};
95 my $columns = scalar @{$self->character_matrix->[0]} - 1;
96 print MATRIX "\t$rows\t$columns\n";
97 foreach my $row ( @{$self->character_matrix} ) {
98 my $wit = shift @$row;
99 my $chars = join( '', @$row );
100 print MATRIX sprintf( "%-10s%s\n", $wit, $chars );
101 }
102 close MATRIX;
103
104 open( CMD, ">$phylip_dir/cmdfile" ) or die "Could not write $phylip_dir/cmdfile";
105 ## TODO any configuration parameters we want to set here
106# U Search for best tree? Yes
107# S Search option? More thorough search
108# V Number of trees to save? 100
109# J Randomize input order of species? No. Use input order
110# O Outgroup root? No, use as outgroup species 1
111# T Use Threshold parsimony? No, use ordinary parsimony
112# W Sites weighted? No
113# M Analyze multiple data sets? No
114# I Input species interleaved? Yes
115# 0 Terminal type (IBM PC, ANSI, none)? ANSI
116# 1 Print out the data at start of run No
117# 2 Print indications of progress of run Yes
118# 3 Print out tree Yes
119# 4 Print out steps in each site No
120# 5 Print character at all nodes of tree No
121# 6 Write out trees onto tree file? Yes
122 print CMD "Y\n";
123 close CMD;
124
125 # And then we run the program.
126 ### HACKY HACKY
127 my $PHYLIP_PATH = '/Users/tla/Projects/phylip-3.69/exe';
128 my $program = "pars";
129 if( $^O eq 'darwin' ) {
130 $program = "$PHYLIP_PATH/$program.app/Contents/MacOS/$program";
131 } else {
132 $program = "$PHYLIP_PATH/$program";
133 }
134
135 {
136 # We need to run it in our temporary directory where we have created
137 # all the expected files.
138 local $CWD = $phylip_dir;
139 my @cmd = ( $program );
140 run \@cmd, '<', 'cmdfile', '>', '/dev/null';
141 }
142 # Now our output should be in 'outfile' and our tree in 'outtree',
143 # both in the temp directory.
144
145 my @outtree;
146 if( -f "$phylip_dir/outtree" ) {
147 open( TREE, "$phylip_dir/outtree" ) or die "Could not open outtree for read";
148 @outtree = <TREE>;
149 close TREE;
150 }
151 return( 1, join( '', @outtree ) ) if @outtree;
152
153 my @error;
154 if( -f "$phylip_dir/output" ) {
155 open( OUTPUT, "$phylip_dir/output" ) or die "Could not open output for read";
156 @error = <OUTPUT>;
157 close OUTPUT;
158 } else {
159 push( @error, "Neither outtree nor output file was produced!" );
160 }
161 return( undef, join( '', @error ) );
162}
163
164no Moose;
165__PACKAGE__->meta->make_immutable;
166
1671;