load XML::LibXML only when required; handle global relationships more correctly;...
[scpubgit/stemmatology.git] / lib / Text / Tradition / Stemma.pm
CommitLineData
9463b0bf 1package Text::Tradition::Stemma;
2
40f19742 3use Bio::Phylo::IO;
e79c23c7 4use Encode qw( decode_utf8 );
9463b0bf 5use File::Temp;
e05997e2 6use Graph;
7use Graph::Reader::Dot;
e79c23c7 8use IPC::Run qw/ run binary /;
63778331 9use Text::Tradition::Error;
b02332ca 10use Text::Tradition::StemmaUtil qw/ character_input phylip_pars parse_newick /;
40f19742 11use Moose;
9463b0bf 12
027d819c 13=head1 NAME
14
15Text::Tradition::Stemma - a representation of a I<stemma codicum> for a Text::Tradition
16
17=head1 SYNOPSIS
18
19 use Text::Tradition;
20 my $t = Text::Tradition->new(
21 'name' => 'this is a text',
22 'input' => 'TEI',
23 'file' => '/path/to/tei_parallel_seg_file.xml' );
24
25 my $s = $tradition->add_stemma( dotfile => '/path/to/stemma.dot' );
26
27=head1 DESCRIPTION
28
29Text::Tradition is a library for representation and analysis of collated
335a62ef 30texts, particularly medieval ones. The Stemma is a representation of the
31copying relationships between the witnesses in a Tradition, modelled with
32a connected rooted directed acyclic graph (CRDAG).
027d819c 33
34=head1 DOT SYNTAX
35
335a62ef 36The easiest way to define a stemma is to use a special form of the 'dot'
37syntax of GraphViz.
027d819c 38
39Each stemma opens with the line
40
41 digraph Stemma {
42
43and continues with a list of all manuscript witnesses in the stemma, whether
44extant witnesses or missing archetypes or hyparchetypes. Each of these is
45listed by its sigil on its own line, e.g.:
46
47 alpha [ class=hypothetical ]
48 1 [ class=hypothetical,label=* ]
49 Ms4 [ class=extant ]
50
51Extant witnesses are listed with class=extant; missing or postulated witnesses
52are listed with class=hypothetical. Anonymous hyparchetypes must be given a
53unique name or number, but can be represented as anonymous with the addition
54of 'label=*' to their lines. Greek letters or other special characters may be
55used as names, but they must always be wrapped in double quotes.
56
57Links between manuscripts are then listed with arrow notation, as below. These
58lines show the direction of copying, one step at a time, for the entire stemma.
59
60 alpha -> 1
61 1 -> Ms4
62
63The final line in the definition should be the closing brace:
64
65 }
66
67Thus for a set of extant manuscripts A, B, and C, where A and B were copied
68from the archetype O and C was copied from B, the definition would be:
69
70 digraph Stemma {
71 O [ class=hypothetical]
72 A [ class=extant ]
73 B [ class=extant ]
74 C [ class=extant ]
75 O -> A
76 O -> B
77 B -> C
78 }
79
80=head1 CONSTRUCTOR
81
82=head2 new
83
84The constructor. This should generally be called from Text::Tradition, but
85if called directly it takes the following options:
86
87=over
88
89=item * collation - The collation with which the stemma is associated.
90
91=item * dot - A filehandle open to a DOT representation of the stemma graph.
92
93=back
94
95=cut
96
9463b0bf 97has collation => (
98 is => 'ro',
99 isa => 'Text::Tradition::Collation',
100 required => 1,
8d9a1cd8 101 weak_ref => 1,
9463b0bf 102 );
103
e05997e2 104has graph => (
105 is => 'rw',
106 isa => 'Graph',
107 predicate => 'has_graph',
108 );
c57be097 109
e05997e2 110sub BUILD {
111 my( $self, $args ) = @_;
112 # If we have been handed a dotfile, initialize it into a graph.
113 if( exists $args->{'dot'} ) {
027d819c 114 $self->_graph_from_dot( $args->{'dot'} );
e05997e2 115 }
c0ccdb62 116}
117
027d819c 118sub _graph_from_dot {
8d9a1cd8 119 my( $self, $dotfh ) = @_;
8d9a1cd8 120 my $reader = Graph::Reader::Dot->new();
121 my $graph = $reader->read_graph( $dotfh );
7a7c249c 122 if( $graph ) {
123 $self->graph( $graph );
124 # Go through the nodes and set any non-hypothetical node to extant.
125 foreach my $v ( $self->graph->vertices ) {
126 $self->graph->set_vertex_attribute( $v, 'class', 'extant' )
127 unless $self->graph->has_vertex_attribute( $v, 'class' );
128 }
129 } else {
63778331 130 throw( "Failed to parse dot in $dotfh" );
7a7c249c 131 }
8d9a1cd8 132}
133
027d819c 134=head1 METHODS
135
136=head2 as_dot( \%options )
137
138Returns a normal dot representation of the stemma layout, suitable for rendering
139with GraphViz. Options include:
140
141=over
142
143=item * graph - A hashref of global graph options.
144
145=item * node - A hashref of global node options.
146
147=item * edge - A hashref of global edge options.
148
149=back
150
151See the GraphViz documentation for the list of available options.
152
153=cut
154
8d9a1cd8 155sub as_dot {
e367f5c0 156 my( $self, $opts ) = @_;
7a7c249c 157
335a62ef 158 ## See if we are including any a.c. witnesses in this graph.
159 my $graph = $self->graph;
160 if( exists $opts->{'layerwits'} ) {
161 $graph = $self->extend_graph( $opts->{'layerwits'} );
162 }
163
7a7c249c 164 # Get default and specified options
e02340f3 165 my %graphopts = (
166 # 'ratio' => 1,
167 );
7a7c249c 168 my %nodeopts = (
169 'fontsize' => 11,
7a7c249c 170 'style' => 'filled',
171 'fillcolor' => 'white',
e02340f3 172 'color' => 'white',
7a7c249c 173 'shape' => 'ellipse', # Shape for the extant nodes
174 );
175 my %edgeopts = (
e02340f3 176 'arrowhead' => 'none',
7a7c249c 177 );
178 @graphopts{ keys %{$opts->{'graph'}} } = values %{$opts->{'graph'}}
179 if $opts->{'graph'};
180 @nodeopts{ keys %{$opts->{'node'}} } = values %{$opts->{'node'}}
181 if $opts->{'node'};
182 @edgeopts{ keys %{$opts->{'edge'}} } = values %{$opts->{'edge'}}
183 if $opts->{'edge'};
335a62ef 184
7a7c249c 185 my @dotlines;
186 push( @dotlines, 'digraph stemma {' );
187 ## Print out the global attributes
188 push( @dotlines, _make_dotline( 'graph', %graphopts ) ) if keys %graphopts;
189 push( @dotlines, _make_dotline( 'edge', %edgeopts ) ) if keys %edgeopts;
7a7c249c 190 push( @dotlines, _make_dotline( 'node', %nodeopts ) ) if keys %nodeopts;
191
192 # Add each of the nodes.
335a62ef 193 foreach my $n ( $graph->vertices ) {
194 if( $graph->has_vertex_attribute( $n, 'label' ) ) {
195 my $ltext = $graph->get_vertex_attribute( $n, 'label' );
e02340f3 196 push( @dotlines, _make_dotline( $n, 'label' => $ltext ) );
e79c23c7 197 } else {
7a7c249c 198 # Use the default display settings.
986bbd1b 199 $n = _dotquote( $n );
7a7c249c 200 push( @dotlines, " $n;" );
e79c23c7 201 }
202 }
7a7c249c 203 # Add each of our edges.
335a62ef 204 foreach my $e ( $graph->edges ) {
986bbd1b 205 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
7a7c249c 206 push( @dotlines, " $from -> $to;" );
207 }
208 push( @dotlines, '}' );
e79c23c7 209
7a7c249c 210 return join( "\n", @dotlines );
211}
212
335a62ef 213=head2 editable( $opts )
027d819c 214
88a6bac5 215Returns a version of the graph rendered in our definition format. The
335a62ef 216output separates statements with a newline; set $opts->{'linesep'} to the
217empty string or to a space if the result is to be sent via JSON.
218
219Any layer witnesses to be included should be passed via $opts->{'layerwits'}.
027d819c 220
221=cut
7a7c249c 222
7a7c249c 223sub editable {
335a62ef 224 my( $self, $opts ) = @_;
225
226 ## See if we are including any a.c. witnesses in this graph.
227 my $graph = $self->graph;
228 if( exists $opts->{'layerwits'} ) {
229 $graph = $self->extend_graph( $opts->{'layerwits'} );
230 }
231
232 # Create the graph
233 my $join = ( $opts && exists $opts->{'linesep'} ) ? $opts->{'linesep'} : "\n";
7a7c249c 234 my @dotlines;
235 push( @dotlines, 'digraph stemma {' );
236 my @real; # A cheap sort
237 foreach my $n ( sort $self->graph->vertices ) {
238 my $c = $self->graph->get_vertex_attribute( $n, 'class' );
239 $c = 'extant' unless $c;
240 if( $c eq 'extant' ) {
241 push( @real, $n );
242 } else {
243 push( @dotlines, _make_dotline( $n, 'class' => $c ) );
244 }
e367f5c0 245 }
7a7c249c 246 # Now do the real ones
247 foreach my $n ( @real ) {
248 push( @dotlines, _make_dotline( $n, 'class' => 'extant' ) );
249 }
250 foreach my $e ( sort _by_vertex $self->graph->edges ) {
986bbd1b 251 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
7a7c249c 252 push( @dotlines, " $from -> $to;" );
253 }
254 push( @dotlines, '}' );
88a6bac5 255 return join( $join, @dotlines );
7a7c249c 256}
257
258sub _make_dotline {
259 my( $obj, %attr ) = @_;
260 my @pairs;
261 foreach my $k ( keys %attr ) {
986bbd1b 262 my $v = _dotquote( $attr{$k} );
263 push( @pairs, "$k=$v" );
7a7c249c 264 }
986bbd1b 265 return sprintf( " %s [ %s ];", _dotquote( $obj ), join( ', ', @pairs ) );
8d9a1cd8 266}
267
986bbd1b 268sub _dotquote {
269 my( $str ) = @_;
270 return $str if $str =~ /^[A-Za-z0-9]+$/;
271 $str =~ s/\"/\\\"/g;
272 $str = '"' . $str . '"';
273 return $str;
274}
275
7a7c249c 276sub _by_vertex {
277 return $a->[0].$a->[1] cmp $b->[0].$b->[1];
278}
8d9a1cd8 279
335a62ef 280=head2 extend_graph( $layered_witnesses )
281
282Returns a graph which is the original stemma with witness layers added for the
283list in @$layered_witnesses. A layered (a.c.) witness is added as a parent
284of its main version, and additionally shares all other parents and children with
285that version.
286
287=cut
288
289sub extend_graph {
290 my( $self, $layerwits ) = @_;
291 # For each 'layered' witness in the layerwits array, add it to the graph
292 # as an ancestor of the 'main' witness, and otherwise with the same parent/
293 # child links as its main analogue.
294 # TOOD Handle case where B is copied from A but corrected from C
295
296 # Iterate through, adding a.c. witnesses
297 my $actag = $self->collation->ac_label;
94654e27 298 my $graph = $self->graph->deep_copy;
335a62ef 299 foreach my $lw ( @$layerwits ) {
300 # Add the layered witness and set it with the same attributes as
301 # its 'main' analogue
302 my $lwac = $lw . $self->collation->ac_label;
303 $graph->add_vertex( $lwac );
304 $graph->set_vertex_attributes( $lwac,
305 $graph->get_vertex_attributes( $lw ) );
306
307 # Set it as ancestor to the main witness
308 $graph->add_edge( $lwac, $lw );
309
310 # Give it the same ancestors and descendants as the main witness has,
311 # bearing in mind that those ancestors and descendants might also just
312 # have had a layered witness defined.
313 foreach my $v ( $graph->predecessors( $lw ) ) {
314 next if $v eq $lwac; # Don't add a loop
315 $graph->add_edge( $v, $lwac );
316 $graph->add_edge( $v.$self->collation->ac_label, $lwac )
317 if $graph->has_vertex( $v.$self->collation->ac_label );
318 }
319 foreach my $v ( $graph->successors( $lw ) ) {
320 next if $v eq $lwac; # but this shouldn't occur
321 $graph->add_edge( $lwac, $v );
322 $graph->add_edge( $lwac, $v.$self->collation->ac_label )
323 if $graph->has_vertex( $v.$self->collation->ac_label );
324 }
325 }
326 return $graph;
327}
328
027d819c 329=head2 as_svg
330
331Returns an SVG representation of the graph, calling as_dot first.
332
333=cut
334
8d9a1cd8 335sub as_svg {
336 my( $self, $opts ) = @_;
337 my $dot = $self->as_dot( $opts );
e79c23c7 338 my @cmd = qw/dot -Tsvg/;
3bf5d6f1 339 my $svg;
e79c23c7 340 my $dotfile = File::Temp->new();
341 ## TODO REMOVE
342 # $dotfile->unlink_on_destroy(0);
343 binmode $dotfile, ':utf8';
8d9a1cd8 344 print $dotfile $dot;
e79c23c7 345 push( @cmd, $dotfile->filename );
346 run( \@cmd, ">", binary(), \$svg );
3bf5d6f1 347 # HACK: Parse the SVG and change the dimensions.
5a7e26a9 348 # Get rid of width and height attributes to allow scaling.
c57be097 349 if( $opts->{'size'} ) {
428bcf0b 350 require XML::LibXML;
351 my $parser = XML::LibXML->new();
352 my $svgdoc = $parser->parse_string( decode_utf8( $svg ) );
c57be097 353 my( $ew, $eh ) = @{$opts->{'size'}};
354 # If the graph is wider than it is tall, set width to ew and remove height.
355 # Otherwise set height to eh and remove width.
356 my $width = $svgdoc->documentElement->getAttribute('width');
357 my $height = $svgdoc->documentElement->getAttribute('height');
358 $width =~ s/\D+//g;
359 $height =~ s/\D+//g;
360 my( $remove, $keep, $val );
361 if( $width > $height ) {
362 $remove = 'height';
363 $keep = 'width';
364 $val = $ew . 'px';
365 } else {
366 $remove = 'width';
367 $keep = 'height';
368 $val = $eh . 'px';
369 }
370 $svgdoc->documentElement->removeAttribute( $remove );
371 $svgdoc->documentElement->setAttribute( $keep, $val );
428bcf0b 372 $svg = $svgdoc->toString();
c57be097 373 }
3bf5d6f1 374 # Return the result
428bcf0b 375 return decode_utf8( $svg );
e79c23c7 376}
377
027d819c 378=head2 witnesses
379
380Returns a list of the extant witnesses represented in the stemma.
381
382=cut
383
08e0fb85 384sub witnesses {
385 my $self = shift;
386 my @wits = grep { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'extant' }
387 $self->graph->vertices;
388 return @wits;
389}
390
06e7cbc7 391=head2 hypotheticals
392
393Returns a list of the hypothetical witnesses represented in the stemma.
394
395=cut
396
bebec0e9 397sub hypotheticals {
398 my $self = shift;
399 my @wits = grep
400 { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'hypothetical' }
401 $self->graph->vertices;
402 return @wits;
403}
404
63778331 405sub throw {
406 Text::Tradition::Error->throw(
407 'ident' => 'Stemma error',
408 'message' => $_[0],
409 );
410}
411
412
9463b0bf 413no Moose;
414__PACKAGE__->meta->make_immutable;
415
4161;
027d819c 417
418=head1 LICENSE
419
420This package is free software and is provided "as is" without express
421or implied warranty. You can redistribute it and/or modify it under
422the same terms as Perl itself.
423
424=head1 AUTHOR
425
426Tara L Andrews E<lt>aurum@cpan.orgE<gt>