huge pile of pod updates
[scpubgit/stemmatology.git] / lib / Text / Tradition / Stemma.pm
CommitLineData
9463b0bf 1package Text::Tradition::Stemma;
2
40f19742 3use Bio::Phylo::IO;
e79c23c7 4use Encode qw( decode_utf8 );
9463b0bf 5use File::Temp;
e05997e2 6use Graph;
7use Graph::Reader::Dot;
e79c23c7 8use IPC::Run qw/ run binary /;
63778331 9use Text::Tradition::Error;
b02332ca 10use Text::Tradition::StemmaUtil qw/ character_input phylip_pars parse_newick /;
40f19742 11use Moose;
9463b0bf 12
027d819c 13=head1 NAME
14
15Text::Tradition::Stemma - a representation of a I<stemma codicum> for a Text::Tradition
16
17=head1 SYNOPSIS
18
19 use Text::Tradition;
20 my $t = Text::Tradition->new(
21 'name' => 'this is a text',
22 'input' => 'TEI',
23 'file' => '/path/to/tei_parallel_seg_file.xml' );
24
25 my $s = $tradition->add_stemma( dotfile => '/path/to/stemma.dot' );
26
27=head1 DESCRIPTION
28
29Text::Tradition is a library for representation and analysis of collated
30texts, particularly medieval ones. The Collation is the central feature of
31a Tradition, where the text, its sequence of readings, and its relationships
32between readings are actually kept.
33
34=head1 DOT SYNTAX
35
36The easiest way to define a stemma (which is a directed acyclic graph, denoting
37the scholar's hypothesis concerning which text(s) were copied from which other(s))
38is to use a special form of the 'dot' syntax of GraphViz.
39
40Each stemma opens with the line
41
42 digraph Stemma {
43
44and continues with a list of all manuscript witnesses in the stemma, whether
45extant witnesses or missing archetypes or hyparchetypes. Each of these is
46listed by its sigil on its own line, e.g.:
47
48 alpha [ class=hypothetical ]
49 1 [ class=hypothetical,label=* ]
50 Ms4 [ class=extant ]
51
52Extant witnesses are listed with class=extant; missing or postulated witnesses
53are listed with class=hypothetical. Anonymous hyparchetypes must be given a
54unique name or number, but can be represented as anonymous with the addition
55of 'label=*' to their lines. Greek letters or other special characters may be
56used as names, but they must always be wrapped in double quotes.
57
58Links between manuscripts are then listed with arrow notation, as below. These
59lines show the direction of copying, one step at a time, for the entire stemma.
60
61 alpha -> 1
62 1 -> Ms4
63
64The final line in the definition should be the closing brace:
65
66 }
67
68Thus for a set of extant manuscripts A, B, and C, where A and B were copied
69from the archetype O and C was copied from B, the definition would be:
70
71 digraph Stemma {
72 O [ class=hypothetical]
73 A [ class=extant ]
74 B [ class=extant ]
75 C [ class=extant ]
76 O -> A
77 O -> B
78 B -> C
79 }
80
81=head1 CONSTRUCTOR
82
83=head2 new
84
85The constructor. This should generally be called from Text::Tradition, but
86if called directly it takes the following options:
87
88=over
89
90=item * collation - The collation with which the stemma is associated.
91
92=item * dot - A filehandle open to a DOT representation of the stemma graph.
93
94=back
95
96=cut
97
9463b0bf 98has collation => (
99 is => 'ro',
100 isa => 'Text::Tradition::Collation',
101 required => 1,
8d9a1cd8 102 weak_ref => 1,
9463b0bf 103 );
104
e05997e2 105has graph => (
106 is => 'rw',
107 isa => 'Graph',
108 predicate => 'has_graph',
109 );
110
40f19742 111has distance_trees => (
112 is => 'ro',
113 isa => 'ArrayRef[Graph]',
114 writer => '_save_distance_trees',
115 predicate => 'has_distance_trees',
116 );
c0ccdb62 117
0f5d05c6 118has distance_program => (
119 is => 'rw',
120 isa => 'Str',
121 default => '',
122 );
123
e05997e2 124sub BUILD {
125 my( $self, $args ) = @_;
126 # If we have been handed a dotfile, initialize it into a graph.
127 if( exists $args->{'dot'} ) {
027d819c 128 $self->_graph_from_dot( $args->{'dot'} );
e05997e2 129 }
c0ccdb62 130}
131
027d819c 132sub _graph_from_dot {
8d9a1cd8 133 my( $self, $dotfh ) = @_;
8d9a1cd8 134 my $reader = Graph::Reader::Dot->new();
135 my $graph = $reader->read_graph( $dotfh );
7a7c249c 136 if( $graph ) {
137 $self->graph( $graph );
138 # Go through the nodes and set any non-hypothetical node to extant.
139 foreach my $v ( $self->graph->vertices ) {
140 $self->graph->set_vertex_attribute( $v, 'class', 'extant' )
141 unless $self->graph->has_vertex_attribute( $v, 'class' );
142 }
143 } else {
63778331 144 throw( "Failed to parse dot in $dotfh" );
7a7c249c 145 }
8d9a1cd8 146}
147
027d819c 148=head1 METHODS
149
150=head2 as_dot( \%options )
151
152Returns a normal dot representation of the stemma layout, suitable for rendering
153with GraphViz. Options include:
154
155=over
156
157=item * graph - A hashref of global graph options.
158
159=item * node - A hashref of global node options.
160
161=item * edge - A hashref of global edge options.
162
163=back
164
165See the GraphViz documentation for the list of available options.
166
167=cut
168
8d9a1cd8 169sub as_dot {
e367f5c0 170 my( $self, $opts ) = @_;
7a7c249c 171
172 # Get default and specified options
e02340f3 173 my %graphopts = (
174 # 'ratio' => 1,
175 );
7a7c249c 176 my %nodeopts = (
177 'fontsize' => 11,
7a7c249c 178 'style' => 'filled',
179 'fillcolor' => 'white',
e02340f3 180 'color' => 'white',
7a7c249c 181 'shape' => 'ellipse', # Shape for the extant nodes
182 );
183 my %edgeopts = (
e02340f3 184 'arrowhead' => 'none',
7a7c249c 185 );
186 @graphopts{ keys %{$opts->{'graph'}} } = values %{$opts->{'graph'}}
187 if $opts->{'graph'};
188 @nodeopts{ keys %{$opts->{'node'}} } = values %{$opts->{'node'}}
189 if $opts->{'node'};
190 @edgeopts{ keys %{$opts->{'edge'}} } = values %{$opts->{'edge'}}
191 if $opts->{'edge'};
192
193 my @dotlines;
194 push( @dotlines, 'digraph stemma {' );
195 ## Print out the global attributes
196 push( @dotlines, _make_dotline( 'graph', %graphopts ) ) if keys %graphopts;
197 push( @dotlines, _make_dotline( 'edge', %edgeopts ) ) if keys %edgeopts;
7a7c249c 198 push( @dotlines, _make_dotline( 'node', %nodeopts ) ) if keys %nodeopts;
199
200 # Add each of the nodes.
201 foreach my $n ( $self->graph->vertices ) {
e02340f3 202 if( $self->graph->has_vertex_attribute( $n, 'label' ) ) {
203 my $ltext = $self->graph->get_vertex_attribute( $n, 'label' );
204 push( @dotlines, _make_dotline( $n, 'label' => $ltext ) );
e79c23c7 205 } else {
7a7c249c 206 # Use the default display settings.
207 push( @dotlines, " $n;" );
e79c23c7 208 }
209 }
7a7c249c 210 # Add each of our edges.
211 foreach my $e ( $self->graph->edges ) {
212 my( $from, $to ) = @$e;
213 push( @dotlines, " $from -> $to;" );
214 }
215 push( @dotlines, '}' );
e79c23c7 216
7a7c249c 217 return join( "\n", @dotlines );
218}
219
027d819c 220=head2 editable
221
222Returns a version of the graph rendered in our definition format.
223
224=cut
7a7c249c 225
7a7c249c 226sub editable {
227 my $self = shift;
228 my @dotlines;
229 push( @dotlines, 'digraph stemma {' );
230 my @real; # A cheap sort
231 foreach my $n ( sort $self->graph->vertices ) {
232 my $c = $self->graph->get_vertex_attribute( $n, 'class' );
233 $c = 'extant' unless $c;
234 if( $c eq 'extant' ) {
235 push( @real, $n );
236 } else {
237 push( @dotlines, _make_dotline( $n, 'class' => $c ) );
238 }
e367f5c0 239 }
7a7c249c 240 # Now do the real ones
241 foreach my $n ( @real ) {
242 push( @dotlines, _make_dotline( $n, 'class' => 'extant' ) );
243 }
244 foreach my $e ( sort _by_vertex $self->graph->edges ) {
245 my( $from, $to ) = @$e;
246 push( @dotlines, " $from -> $to;" );
247 }
248 push( @dotlines, '}' );
249 return join( "\n", @dotlines );
250}
251
252sub _make_dotline {
253 my( $obj, %attr ) = @_;
254 my @pairs;
255 foreach my $k ( keys %attr ) {
256 my $v = $attr{$k};
257 $v =~ s/\"/\\\"/g;
258 push( @pairs, "$k=\"$v\"" );
259 }
260 return sprintf( " %s [ %s ];", $obj, join( ', ', @pairs ) );
8d9a1cd8 261}
262
7a7c249c 263sub _by_vertex {
264 return $a->[0].$a->[1] cmp $b->[0].$b->[1];
265}
8d9a1cd8 266
027d819c 267=head2 as_svg
268
269Returns an SVG representation of the graph, calling as_dot first.
270
271=cut
272
8d9a1cd8 273sub as_svg {
274 my( $self, $opts ) = @_;
275 my $dot = $self->as_dot( $opts );
e79c23c7 276 my @cmd = qw/dot -Tsvg/;
277 my( $svg, $err );
278 my $dotfile = File::Temp->new();
279 ## TODO REMOVE
280 # $dotfile->unlink_on_destroy(0);
281 binmode $dotfile, ':utf8';
8d9a1cd8 282 print $dotfile $dot;
e79c23c7 283 push( @cmd, $dotfile->filename );
284 run( \@cmd, ">", binary(), \$svg );
285 $svg = decode_utf8( $svg );
286 return $svg;
287}
288
027d819c 289=head2 witnesses
290
291Returns a list of the extant witnesses represented in the stemma.
292
293=cut
294
08e0fb85 295sub witnesses {
296 my $self = shift;
297 my @wits = grep { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'extant' }
298 $self->graph->vertices;
299 return @wits;
300}
301
027d819c 302=head2 distance_trees( program => $program )
303
304Returns a set of undirected graphs, which are the result of running a distance
305tree calculation program on the collation. Currently the only supported
306program is phylip_pars.
307
308=cut
309
e79c23c7 310#### Methods for calculating phylogenetic trees ####
311
40f19742 312before 'distance_trees' => sub {
313 my $self = shift;
0f5d05c6 314 my %args = (
315 'program' => 'phylip_pars',
316 @_ );
40f19742 317 # TODO allow specification of method for calculating distance tree
0f5d05c6 318 if( !$self->has_distance_trees
319 || $args{'program'} ne $self->distance_program ) {
40f19742 320 # We need to make a tree before we can return it.
0f5d05c6 321 my $dsub = 'run_' . $args{'program'};
69403daa 322 my $result = $self->$dsub(); # this might throw an error - catch it?
323 # Save the resulting trees
324 my $trees = parse_newick( $result );
325 $self->_save_distance_trees( $trees );
326 $self->distance_program( $args{'program'} );
40f19742 327 }
328};
f6066bac 329
027d819c 330=head2 run_phylip_pars
331
332Runs Phylip Pars on the collation, returning the results in Newick format.
333Used for the distance_trees calculation.
334
335=cut
336
40f19742 337sub run_phylip_pars {
b02332ca 338 my $self = shift;
339 my $cdata = character_input( $self->collation->make_alignment_table() );
340 return phylip_pars( $cdata );
40f19742 341}
342
63778331 343sub throw {
344 Text::Tradition::Error->throw(
345 'ident' => 'Stemma error',
346 'message' => $_[0],
347 );
348}
349
350
9463b0bf 351no Moose;
352__PACKAGE__->meta->make_immutable;
353
3541;
027d819c 355
356=head1 LICENSE
357
358This package is free software and is provided "as is" without express
359or implied warranty. You can redistribute it and/or modify it under
360the same terms as Perl itself.
361
362=head1 AUTHOR
363
364Tara L Andrews E<lt>aurum@cpan.orgE<gt>