CHECKPOINT for laptop migration
[scpubgit/stemmatology.git] / lib / Text / Tradition / Stemma.pm
CommitLineData
9463b0bf 1package Text::Tradition::Stemma;
2
3use File::chdir;
4use File::Temp;
5use IPC::Run qw/ run /;
6use Moose;
7use Text::Tradition::Collation::Position;
8
9has collation => (
10 is => 'ro',
11 isa => 'Text::Tradition::Collation',
12 required => 1,
13 );
14
15has character_matrix => (
16 is => 'ro',
17 isa => 'ArrayRef[ArrayRef[Str]]',
18 writer => '_save_character_matrix',
19 predicate => 'has_character_matrix',
20 );
21
22sub make_character_matrix {
23 my $self = shift;
24 unless( $self->collation->linear ) {
25 warn "Need a linear graph in order to make an alignment table";
26 return;
27 }
28 my @all_pos = sort { Text::Tradition::Collation::Position::str_cmp( $a, $b ) }
29 $self->collation->possible_positions;
30 my $table = [];
31 my $characters = {};
32 map { $characters->{$_} = {} } @all_pos;
33 foreach my $wit ( @{$self->collation->tradition->witnesses} ) {
34 # First implementation: make dumb alignment table, caring about
35 # nothing except which reading is in which position.
f6066bac 36 my $sigilfield = sprintf( "%-10s", $wit->sigil );
37 push( @$table, [ $sigilfield, make_witness_row( $characters, $wit->path,
9463b0bf 38 \@all_pos ) ] );
39 if( $wit->has_ante_corr ) {
f6066bac 40 $sigilfield = sprintf( "%-10s", $wit->sigil . "_ac" );
41 push( @$table, [ $sigilfield,
9463b0bf 42 make_witness_row( $characters, $wit->uncorrected_path,
43 \@all_pos ) ] );
44 }
45 }
46 $self->_save_character_matrix( $table );
47}
48
49sub make_witness_row {
50 my( $characters, $path, $positions ) = @_;
f6066bac 51 my %char_hash;
52 map { $char_hash{$_} = 'X' } @$positions;
53 foreach my $rdg( @$path ) {
54 $char_hash{$rdg->position->minref} = get_character( $rdg, $characters );
9463b0bf 55 }
f6066bac 56 my @row = map { $char_hash{$_} } @$positions;
9463b0bf 57 return @row;
58}
59
60
61sub get_character {
62 my( $reading, $characters ) = @_;
63 my $this_pos = $characters->{$reading->position->minref};
64 # This is a simple algorithm that treats every reading as different.
65 # Eventually we will want to be able to specify how relationships
66 # affect the character matrix.
67 my $text = $reading->text;
68 unless( exists $this_pos->{$text} ) {
69 # We need to find what the next character is here, and record it.
70 my @all_chr = sort { $a <=> $b } values( %$this_pos );
71 if( @all_chr == 8 ) {
72 warn "Already have eight variants at position "
73 . $reading->position->minref . "; not adding " . $reading->text;
74 return '?';
75 }
76 $this_pos->{$text} = scalar @all_chr;
77 }
78 return $this_pos->{$text};
79}
80
f6066bac 81sub pars_input {
9463b0bf 82 my $self = shift;
83 $self->make_character_matrix unless $self->has_character_matrix;
f6066bac 84 my $matrix = '';
85 my $rows = scalar @{$self->character_matrix};
86 my $columns = scalar @{$self->character_matrix->[0]} - 1;
87 $matrix .= "\t$rows\t$columns\n";
88 foreach my $row ( @{$self->character_matrix} ) {
89 $matrix .= join( '', @$row ) . "\n";
90 }
91 return $matrix;
92}
93
94sub run_pars {
95 my $self = shift;
9463b0bf 96
97 # Set up a temporary directory for all the default Phylip files.
98 my $phylip_dir = File::Temp->newdir();
9463b0bf 99 $DB::single = 1;
f6066bac 100 # We need an infile, and we need a command input file.
9463b0bf 101 open( MATRIX, ">$phylip_dir/infile" ) or die "Could not write $phylip_dir/infile";
f6066bac 102 print MATRIX $self->pars_input();
9463b0bf 103 close MATRIX;
104
105 open( CMD, ">$phylip_dir/cmdfile" ) or die "Could not write $phylip_dir/cmdfile";
106 ## TODO any configuration parameters we want to set here
107# U Search for best tree? Yes
108# S Search option? More thorough search
109# V Number of trees to save? 100
110# J Randomize input order of species? No. Use input order
111# O Outgroup root? No, use as outgroup species 1
112# T Use Threshold parsimony? No, use ordinary parsimony
113# W Sites weighted? No
114# M Analyze multiple data sets? No
115# I Input species interleaved? Yes
116# 0 Terminal type (IBM PC, ANSI, none)? ANSI
117# 1 Print out the data at start of run No
118# 2 Print indications of progress of run Yes
119# 3 Print out tree Yes
120# 4 Print out steps in each site No
121# 5 Print character at all nodes of tree No
122# 6 Write out trees onto tree file? Yes
123 print CMD "Y\n";
124 close CMD;
125
126 # And then we run the program.
127 ### HACKY HACKY
128 my $PHYLIP_PATH = '/Users/tla/Projects/phylip-3.69/exe';
129 my $program = "pars";
130 if( $^O eq 'darwin' ) {
131 $program = "$PHYLIP_PATH/$program.app/Contents/MacOS/$program";
132 } else {
133 $program = "$PHYLIP_PATH/$program";
134 }
135
136 {
137 # We need to run it in our temporary directory where we have created
138 # all the expected files.
139 local $CWD = $phylip_dir;
140 my @cmd = ( $program );
141 run \@cmd, '<', 'cmdfile', '>', '/dev/null';
142 }
143 # Now our output should be in 'outfile' and our tree in 'outtree',
144 # both in the temp directory.
145
146 my @outtree;
147 if( -f "$phylip_dir/outtree" ) {
148 open( TREE, "$phylip_dir/outtree" ) or die "Could not open outtree for read";
149 @outtree = <TREE>;
150 close TREE;
151 }
152 return( 1, join( '', @outtree ) ) if @outtree;
153
154 my @error;
f6066bac 155 if( -f "$phylip_dir/outfile" ) {
156 open( OUTPUT, "$phylip_dir/outfile" ) or die "Could not open output for read";
9463b0bf 157 @error = <OUTPUT>;
158 close OUTPUT;
159 } else {
160 push( @error, "Neither outtree nor output file was produced!" );
161 }
162 return( undef, join( '', @error ) );
163}
164
165no Moose;
166__PACKAGE__->meta->make_immutable;
167
1681;