working fuller analysis plus tests
[scpubgit/stemmatology.git] / lib / Text / Tradition / Stemma.pm
CommitLineData
9463b0bf 1package Text::Tradition::Stemma;
2
40f19742 3use Bio::Phylo::IO;
e79c23c7 4use Encode qw( decode_utf8 );
9463b0bf 5use File::Temp;
e05997e2 6use Graph;
7use Graph::Reader::Dot;
e79c23c7 8use IPC::Run qw/ run binary /;
63778331 9use Text::Tradition::Error;
b02332ca 10use Text::Tradition::StemmaUtil qw/ character_input phylip_pars parse_newick /;
40f19742 11use Moose;
9463b0bf 12
027d819c 13=head1 NAME
14
15Text::Tradition::Stemma - a representation of a I<stemma codicum> for a Text::Tradition
16
17=head1 SYNOPSIS
18
19 use Text::Tradition;
20 my $t = Text::Tradition->new(
21 'name' => 'this is a text',
22 'input' => 'TEI',
23 'file' => '/path/to/tei_parallel_seg_file.xml' );
24
25 my $s = $tradition->add_stemma( dotfile => '/path/to/stemma.dot' );
26
27=head1 DESCRIPTION
28
29Text::Tradition is a library for representation and analysis of collated
30texts, particularly medieval ones. The Collation is the central feature of
31a Tradition, where the text, its sequence of readings, and its relationships
32between readings are actually kept.
33
34=head1 DOT SYNTAX
35
36The easiest way to define a stemma (which is a directed acyclic graph, denoting
37the scholar's hypothesis concerning which text(s) were copied from which other(s))
38is to use a special form of the 'dot' syntax of GraphViz.
39
40Each stemma opens with the line
41
42 digraph Stemma {
43
44and continues with a list of all manuscript witnesses in the stemma, whether
45extant witnesses or missing archetypes or hyparchetypes. Each of these is
46listed by its sigil on its own line, e.g.:
47
48 alpha [ class=hypothetical ]
49 1 [ class=hypothetical,label=* ]
50 Ms4 [ class=extant ]
51
52Extant witnesses are listed with class=extant; missing or postulated witnesses
53are listed with class=hypothetical. Anonymous hyparchetypes must be given a
54unique name or number, but can be represented as anonymous with the addition
55of 'label=*' to their lines. Greek letters or other special characters may be
56used as names, but they must always be wrapped in double quotes.
57
58Links between manuscripts are then listed with arrow notation, as below. These
59lines show the direction of copying, one step at a time, for the entire stemma.
60
61 alpha -> 1
62 1 -> Ms4
63
64The final line in the definition should be the closing brace:
65
66 }
67
68Thus for a set of extant manuscripts A, B, and C, where A and B were copied
69from the archetype O and C was copied from B, the definition would be:
70
71 digraph Stemma {
72 O [ class=hypothetical]
73 A [ class=extant ]
74 B [ class=extant ]
75 C [ class=extant ]
76 O -> A
77 O -> B
78 B -> C
79 }
80
81=head1 CONSTRUCTOR
82
83=head2 new
84
85The constructor. This should generally be called from Text::Tradition, but
86if called directly it takes the following options:
87
88=over
89
90=item * collation - The collation with which the stemma is associated.
91
92=item * dot - A filehandle open to a DOT representation of the stemma graph.
93
94=back
95
96=cut
97
9463b0bf 98has collation => (
99 is => 'ro',
100 isa => 'Text::Tradition::Collation',
101 required => 1,
8d9a1cd8 102 weak_ref => 1,
9463b0bf 103 );
104
e05997e2 105has graph => (
106 is => 'rw',
107 isa => 'Graph',
108 predicate => 'has_graph',
109 );
9457207b 110
e05997e2 111sub BUILD {
112 my( $self, $args ) = @_;
113 # If we have been handed a dotfile, initialize it into a graph.
114 if( exists $args->{'dot'} ) {
027d819c 115 $self->_graph_from_dot( $args->{'dot'} );
e05997e2 116 }
c0ccdb62 117}
118
027d819c 119sub _graph_from_dot {
8d9a1cd8 120 my( $self, $dotfh ) = @_;
8d9a1cd8 121 my $reader = Graph::Reader::Dot->new();
122 my $graph = $reader->read_graph( $dotfh );
7a7c249c 123 if( $graph ) {
124 $self->graph( $graph );
125 # Go through the nodes and set any non-hypothetical node to extant.
126 foreach my $v ( $self->graph->vertices ) {
127 $self->graph->set_vertex_attribute( $v, 'class', 'extant' )
128 unless $self->graph->has_vertex_attribute( $v, 'class' );
129 }
130 } else {
63778331 131 throw( "Failed to parse dot in $dotfh" );
7a7c249c 132 }
8d9a1cd8 133}
134
027d819c 135=head1 METHODS
136
137=head2 as_dot( \%options )
138
139Returns a normal dot representation of the stemma layout, suitable for rendering
140with GraphViz. Options include:
141
142=over
143
144=item * graph - A hashref of global graph options.
145
146=item * node - A hashref of global node options.
147
148=item * edge - A hashref of global edge options.
149
150=back
151
152See the GraphViz documentation for the list of available options.
153
154=cut
155
8d9a1cd8 156sub as_dot {
e367f5c0 157 my( $self, $opts ) = @_;
7a7c249c 158
159 # Get default and specified options
e02340f3 160 my %graphopts = (
161 # 'ratio' => 1,
162 );
7a7c249c 163 my %nodeopts = (
164 'fontsize' => 11,
7a7c249c 165 'style' => 'filled',
166 'fillcolor' => 'white',
e02340f3 167 'color' => 'white',
7a7c249c 168 'shape' => 'ellipse', # Shape for the extant nodes
169 );
170 my %edgeopts = (
e02340f3 171 'arrowhead' => 'none',
7a7c249c 172 );
173 @graphopts{ keys %{$opts->{'graph'}} } = values %{$opts->{'graph'}}
174 if $opts->{'graph'};
175 @nodeopts{ keys %{$opts->{'node'}} } = values %{$opts->{'node'}}
176 if $opts->{'node'};
177 @edgeopts{ keys %{$opts->{'edge'}} } = values %{$opts->{'edge'}}
178 if $opts->{'edge'};
179
180 my @dotlines;
181 push( @dotlines, 'digraph stemma {' );
182 ## Print out the global attributes
183 push( @dotlines, _make_dotline( 'graph', %graphopts ) ) if keys %graphopts;
184 push( @dotlines, _make_dotline( 'edge', %edgeopts ) ) if keys %edgeopts;
7a7c249c 185 push( @dotlines, _make_dotline( 'node', %nodeopts ) ) if keys %nodeopts;
186
187 # Add each of the nodes.
188 foreach my $n ( $self->graph->vertices ) {
e02340f3 189 if( $self->graph->has_vertex_attribute( $n, 'label' ) ) {
190 my $ltext = $self->graph->get_vertex_attribute( $n, 'label' );
191 push( @dotlines, _make_dotline( $n, 'label' => $ltext ) );
e79c23c7 192 } else {
7a7c249c 193 # Use the default display settings.
194 push( @dotlines, " $n;" );
e79c23c7 195 }
196 }
7a7c249c 197 # Add each of our edges.
198 foreach my $e ( $self->graph->edges ) {
199 my( $from, $to ) = @$e;
200 push( @dotlines, " $from -> $to;" );
201 }
202 push( @dotlines, '}' );
e79c23c7 203
7a7c249c 204 return join( "\n", @dotlines );
205}
206
027d819c 207=head2 editable
208
209Returns a version of the graph rendered in our definition format.
210
211=cut
7a7c249c 212
7a7c249c 213sub editable {
214 my $self = shift;
215 my @dotlines;
216 push( @dotlines, 'digraph stemma {' );
217 my @real; # A cheap sort
218 foreach my $n ( sort $self->graph->vertices ) {
219 my $c = $self->graph->get_vertex_attribute( $n, 'class' );
220 $c = 'extant' unless $c;
221 if( $c eq 'extant' ) {
222 push( @real, $n );
223 } else {
224 push( @dotlines, _make_dotline( $n, 'class' => $c ) );
225 }
e367f5c0 226 }
7a7c249c 227 # Now do the real ones
228 foreach my $n ( @real ) {
229 push( @dotlines, _make_dotline( $n, 'class' => 'extant' ) );
230 }
231 foreach my $e ( sort _by_vertex $self->graph->edges ) {
232 my( $from, $to ) = @$e;
233 push( @dotlines, " $from -> $to;" );
234 }
235 push( @dotlines, '}' );
236 return join( "\n", @dotlines );
237}
238
239sub _make_dotline {
240 my( $obj, %attr ) = @_;
241 my @pairs;
242 foreach my $k ( keys %attr ) {
243 my $v = $attr{$k};
244 $v =~ s/\"/\\\"/g;
245 push( @pairs, "$k=\"$v\"" );
246 }
247 return sprintf( " %s [ %s ];", $obj, join( ', ', @pairs ) );
8d9a1cd8 248}
249
7a7c249c 250sub _by_vertex {
251 return $a->[0].$a->[1] cmp $b->[0].$b->[1];
252}
8d9a1cd8 253
027d819c 254=head2 as_svg
255
256Returns an SVG representation of the graph, calling as_dot first.
257
258=cut
259
8d9a1cd8 260sub as_svg {
261 my( $self, $opts ) = @_;
262 my $dot = $self->as_dot( $opts );
e79c23c7 263 my @cmd = qw/dot -Tsvg/;
264 my( $svg, $err );
265 my $dotfile = File::Temp->new();
266 ## TODO REMOVE
267 # $dotfile->unlink_on_destroy(0);
268 binmode $dotfile, ':utf8';
8d9a1cd8 269 print $dotfile $dot;
e79c23c7 270 push( @cmd, $dotfile->filename );
271 run( \@cmd, ">", binary(), \$svg );
272 $svg = decode_utf8( $svg );
273 return $svg;
274}
275
027d819c 276=head2 witnesses
277
278Returns a list of the extant witnesses represented in the stemma.
279
280=cut
281
08e0fb85 282sub witnesses {
283 my $self = shift;
284 my @wits = grep { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'extant' }
285 $self->graph->vertices;
286 return @wits;
287}
288
bebec0e9 289sub hypotheticals {
290 my $self = shift;
291 my @wits = grep
292 { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'hypothetical' }
293 $self->graph->vertices;
294 return @wits;
295}
296
63778331 297sub throw {
298 Text::Tradition::Error->throw(
299 'ident' => 'Stemma error',
300 'message' => $_[0],
301 );
302}
303
304
9463b0bf 305no Moose;
306__PACKAGE__->meta->make_immutable;
307
3081;
027d819c 309
310=head1 LICENSE
311
312This package is free software and is provided "as is" without express
313or implied warranty. You can redistribute it and/or modify it under
314the same terms as Perl itself.
315
316=head1 AUTHOR
317
318Tara L Andrews E<lt>aurum@cpan.orgE<gt>