let stexaminer run without IDP server reachable
[scpubgit/stemmatology.git] / lib / Text / Tradition / Stemma.pm
CommitLineData
9463b0bf 1package Text::Tradition::Stemma;
2
40f19742 3use Bio::Phylo::IO;
e79c23c7 4use Encode qw( decode_utf8 );
9463b0bf 5use File::Temp;
e05997e2 6use Graph;
7use Graph::Reader::Dot;
e79c23c7 8use IPC::Run qw/ run binary /;
63778331 9use Text::Tradition::Error;
b02332ca 10use Text::Tradition::StemmaUtil qw/ character_input phylip_pars parse_newick /;
40f19742 11use Moose;
9463b0bf 12
027d819c 13=head1 NAME
14
15Text::Tradition::Stemma - a representation of a I<stemma codicum> for a Text::Tradition
16
17=head1 SYNOPSIS
18
19 use Text::Tradition;
20 my $t = Text::Tradition->new(
21 'name' => 'this is a text',
22 'input' => 'TEI',
23 'file' => '/path/to/tei_parallel_seg_file.xml' );
24
25 my $s = $tradition->add_stemma( dotfile => '/path/to/stemma.dot' );
26
27=head1 DESCRIPTION
28
29Text::Tradition is a library for representation and analysis of collated
335a62ef 30texts, particularly medieval ones. The Stemma is a representation of the
31copying relationships between the witnesses in a Tradition, modelled with
32a connected rooted directed acyclic graph (CRDAG).
027d819c 33
34=head1 DOT SYNTAX
35
335a62ef 36The easiest way to define a stemma is to use a special form of the 'dot'
37syntax of GraphViz.
027d819c 38
39Each stemma opens with the line
40
41 digraph Stemma {
42
43and continues with a list of all manuscript witnesses in the stemma, whether
44extant witnesses or missing archetypes or hyparchetypes. Each of these is
45listed by its sigil on its own line, e.g.:
46
47 alpha [ class=hypothetical ]
48 1 [ class=hypothetical,label=* ]
49 Ms4 [ class=extant ]
50
51Extant witnesses are listed with class=extant; missing or postulated witnesses
52are listed with class=hypothetical. Anonymous hyparchetypes must be given a
53unique name or number, but can be represented as anonymous with the addition
54of 'label=*' to their lines. Greek letters or other special characters may be
55used as names, but they must always be wrapped in double quotes.
56
57Links between manuscripts are then listed with arrow notation, as below. These
58lines show the direction of copying, one step at a time, for the entire stemma.
59
60 alpha -> 1
61 1 -> Ms4
62
63The final line in the definition should be the closing brace:
64
65 }
66
67Thus for a set of extant manuscripts A, B, and C, where A and B were copied
68from the archetype O and C was copied from B, the definition would be:
69
70 digraph Stemma {
71 O [ class=hypothetical]
72 A [ class=extant ]
73 B [ class=extant ]
74 C [ class=extant ]
75 O -> A
76 O -> B
77 B -> C
78 }
79
80=head1 CONSTRUCTOR
81
82=head2 new
83
84The constructor. This should generally be called from Text::Tradition, but
85if called directly it takes the following options:
86
87=over
88
89=item * collation - The collation with which the stemma is associated.
90
91=item * dot - A filehandle open to a DOT representation of the stemma graph.
92
93=back
94
64a36834 95=begin testing
96
97use Text::Tradition::Collation;
98use TryCatch;
99
100use_ok( 'Text::Tradition::Stemma' );
101
102# Placeholder collation to use in tests
103my $c = Text::Tradition::Collation->new();
104
105# Try to create a bad graph
106my $baddotfh;
107open( $baddotfh, 't/data/besoin_bad.dot' ) or die "Could not open test dotfile";
108try {
109 my $stemma = Text::Tradition::Stemma->new( collation => $c, dot => $baddotfh );
110 ok( 0, "Created broken stemma from dotfile with syntax error" );
111} catch( Text::Tradition::Error $e ) {
112 like( $e->message, qr/^Error trying to parse/, "Syntax error in dot threw exception" );
113}
114
115# Create a good graph
116my $dotfh;
117open( $dotfh, 't/data/florilegium.dot' ) or die "Could not open test dotfile";
118binmode( $dotfh, ':utf8' );
119my $stemma = Text::Tradition::Stemma->new( collation => $c, dot => $dotfh );
120is( ref( $stemma ), 'Text::Tradition::Stemma', "Created stemma from good dotfile" );
121is( scalar $stemma->witnesses, 13, "Found correct number of extant witnesses" );
122is( scalar $stemma->hypotheticals, 8, "Found correct number of extant hypotheticals" );
123my $found_unicode_sigil;
124foreach my $h ( $stemma->hypotheticals ) {
125 $found_unicode_sigil = 1 if $h eq "\x{3b1}";
126}
127ok( $found_unicode_sigil, "Found a correctly encoded Unicode sigil" );
128
129=end testing
130
027d819c 131=cut
132
9463b0bf 133has collation => (
134 is => 'ro',
135 isa => 'Text::Tradition::Collation',
136 required => 1,
8d9a1cd8 137 weak_ref => 1,
9463b0bf 138 );
139
e05997e2 140has graph => (
141 is => 'rw',
142 isa => 'Graph',
143 predicate => 'has_graph',
144 );
c57be097 145
e05997e2 146sub BUILD {
147 my( $self, $args ) = @_;
148 # If we have been handed a dotfile, initialize it into a graph.
149 if( exists $args->{'dot'} ) {
027d819c 150 $self->_graph_from_dot( $args->{'dot'} );
e05997e2 151 }
c0ccdb62 152}
153
027d819c 154sub _graph_from_dot {
8d9a1cd8 155 my( $self, $dotfh ) = @_;
8d9a1cd8 156 my $reader = Graph::Reader::Dot->new();
64a36834 157 # Redirect STDOUT in order to trap any error messages - syntax errors
158 # are evidently not fatal.
159 my $reader_out;
160 my $saved_stderr;
161 open $saved_stderr, ">&STDOUT";
162 close STDOUT;
163 open STDOUT, ">", \$reader_out;
8d9a1cd8 164 my $graph = $reader->read_graph( $dotfh );
64a36834 165 close STDOUT;
166 open STDOUT, ">", \$saved_stderr;
167 if( $reader_out && $reader_out =~ /error/s ) {
168 throw( "Error trying to parse dot: $reader_out" );
169 } elsif( !$graph ) {
170 throw( "Failed to create graph from dot" );
171 }
172 $self->graph( $graph );
173 # Go through the nodes and set any non-hypothetical node to extant.
174 foreach my $v ( $self->graph->vertices ) {
175 $self->graph->set_vertex_attribute( $v, 'class', 'extant' )
176 unless $self->graph->has_vertex_attribute( $v, 'class' );
7a7c249c 177 }
8d9a1cd8 178}
179
027d819c 180=head1 METHODS
181
182=head2 as_dot( \%options )
183
184Returns a normal dot representation of the stemma layout, suitable for rendering
185with GraphViz. Options include:
186
187=over
188
189=item * graph - A hashref of global graph options.
190
191=item * node - A hashref of global node options.
192
193=item * edge - A hashref of global edge options.
194
195=back
196
197See the GraphViz documentation for the list of available options.
198
199=cut
200
8d9a1cd8 201sub as_dot {
e367f5c0 202 my( $self, $opts ) = @_;
7a7c249c 203
335a62ef 204 ## See if we are including any a.c. witnesses in this graph.
205 my $graph = $self->graph;
206 if( exists $opts->{'layerwits'} ) {
5c44c598 207 my $extant = {};
208 map { $extant->{$_} = 1 } $self->witnesses;
209 $graph = $self->situation_graph( $extant, $opts->{'layerwits'} );
335a62ef 210 }
211
7a7c249c 212 # Get default and specified options
e02340f3 213 my %graphopts = (
214 # 'ratio' => 1,
215 );
7a7c249c 216 my %nodeopts = (
217 'fontsize' => 11,
7a7c249c 218 'style' => 'filled',
219 'fillcolor' => 'white',
e02340f3 220 'color' => 'white',
7a7c249c 221 'shape' => 'ellipse', # Shape for the extant nodes
222 );
223 my %edgeopts = (
e02340f3 224 'arrowhead' => 'none',
7a7c249c 225 );
226 @graphopts{ keys %{$opts->{'graph'}} } = values %{$opts->{'graph'}}
227 if $opts->{'graph'};
228 @nodeopts{ keys %{$opts->{'node'}} } = values %{$opts->{'node'}}
229 if $opts->{'node'};
230 @edgeopts{ keys %{$opts->{'edge'}} } = values %{$opts->{'edge'}}
231 if $opts->{'edge'};
335a62ef 232
7a7c249c 233 my @dotlines;
234 push( @dotlines, 'digraph stemma {' );
235 ## Print out the global attributes
236 push( @dotlines, _make_dotline( 'graph', %graphopts ) ) if keys %graphopts;
237 push( @dotlines, _make_dotline( 'edge', %edgeopts ) ) if keys %edgeopts;
7a7c249c 238 push( @dotlines, _make_dotline( 'node', %nodeopts ) ) if keys %nodeopts;
239
240 # Add each of the nodes.
335a62ef 241 foreach my $n ( $graph->vertices ) {
242 if( $graph->has_vertex_attribute( $n, 'label' ) ) {
243 my $ltext = $graph->get_vertex_attribute( $n, 'label' );
e02340f3 244 push( @dotlines, _make_dotline( $n, 'label' => $ltext ) );
e79c23c7 245 } else {
7a7c249c 246 # Use the default display settings.
986bbd1b 247 $n = _dotquote( $n );
7a7c249c 248 push( @dotlines, " $n;" );
e79c23c7 249 }
250 }
7a7c249c 251 # Add each of our edges.
335a62ef 252 foreach my $e ( $graph->edges ) {
986bbd1b 253 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
7a7c249c 254 push( @dotlines, " $from -> $to;" );
255 }
256 push( @dotlines, '}' );
e79c23c7 257
7a7c249c 258 return join( "\n", @dotlines );
259}
260
0bded693 261=head2 alter_graph( $dotstring )
262
263Alters the graph of this stemma according to the definition specified
264in $dotstring.
265
266=cut
267
268sub alter_graph {
269 my( $self, $dotstring ) = @_;
270 my $dotfh;
271 open $dotfh, '<', \$dotstring;
272 $self->_graph_from_dot( $dotfh );
273}
274
335a62ef 275=head2 editable( $opts )
027d819c 276
5c44c598 277=head2 editable_graph( $graph, $opts )
278
88a6bac5 279Returns a version of the graph rendered in our definition format. The
335a62ef 280output separates statements with a newline; set $opts->{'linesep'} to the
281empty string or to a space if the result is to be sent via JSON.
282
5c44c598 283If a situational version of the stemma is required, the arguments for
284situation_graph should be passed via $opts->{'extant'} and $opts->{'layerwits'}.
027d819c 285
286=cut
7a7c249c 287
7a7c249c 288sub editable {
5c44c598 289 my( $self, $opts ) = @_;
335a62ef 290 my $graph = $self->graph;
5c44c598 291 ## See if we need an editable version of a situational graph.
292 if( exists $opts->{'layerwits'} || exists $opts->{'extant'} ) {
293 my $extant = delete $opts->{'extant'} || {};
294 my $layerwits = delete $opts->{'layerwits'} || [];
295 $graph = $self->situation_graph( $extant, $layerwits );
335a62ef 296 }
5c44c598 297 return editable_graph( $graph, $opts );
298}
299
300sub editable_graph {
301 my( $graph, $opts ) = @_;
335a62ef 302
303 # Create the graph
304 my $join = ( $opts && exists $opts->{'linesep'} ) ? $opts->{'linesep'} : "\n";
7a7c249c 305 my @dotlines;
306 push( @dotlines, 'digraph stemma {' );
307 my @real; # A cheap sort
5c44c598 308 foreach my $n ( sort $graph->vertices ) {
309 my $c = $graph->get_vertex_attribute( $n, 'class' );
7a7c249c 310 $c = 'extant' unless $c;
311 if( $c eq 'extant' ) {
312 push( @real, $n );
313 } else {
314 push( @dotlines, _make_dotline( $n, 'class' => $c ) );
315 }
e367f5c0 316 }
7a7c249c 317 # Now do the real ones
318 foreach my $n ( @real ) {
319 push( @dotlines, _make_dotline( $n, 'class' => 'extant' ) );
320 }
5c44c598 321 foreach my $e ( sort _by_vertex $graph->edges ) {
986bbd1b 322 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
7a7c249c 323 push( @dotlines, " $from -> $to;" );
324 }
325 push( @dotlines, '}' );
88a6bac5 326 return join( $join, @dotlines );
7a7c249c 327}
328
329sub _make_dotline {
330 my( $obj, %attr ) = @_;
331 my @pairs;
332 foreach my $k ( keys %attr ) {
986bbd1b 333 my $v = _dotquote( $attr{$k} );
334 push( @pairs, "$k=$v" );
7a7c249c 335 }
986bbd1b 336 return sprintf( " %s [ %s ];", _dotquote( $obj ), join( ', ', @pairs ) );
8d9a1cd8 337}
338
986bbd1b 339sub _dotquote {
340 my( $str ) = @_;
341 return $str if $str =~ /^[A-Za-z0-9]+$/;
342 $str =~ s/\"/\\\"/g;
343 $str = '"' . $str . '"';
344 return $str;
345}
346
7a7c249c 347sub _by_vertex {
348 return $a->[0].$a->[1] cmp $b->[0].$b->[1];
349}
8d9a1cd8 350
5c44c598 351=head2 situation_graph( $extant, $layered )
335a62ef 352
5c44c598 353Returns a graph which is the original stemma with all witnesses not in the
354%$extant hash marked as hypothetical, and witness layers added to the graph
355according to the list in @$layered. A layered (a.c.) witness is added as a
356parent of its main version, and additionally shares all other parents and
357children with that version.
335a62ef 358
359=cut
360
5c44c598 361sub situation_graph {
362 my( $self, $extant, $layerwits ) = @_;
363
364 my $graph = $self->graph->copy;
365 foreach my $vertex ( $graph->vertices ) {
366 # Set as extant any vertex that is extant in the stemma AND
367 # exists in the $extant hash.
368 my $class = 'hypothetical';
369 $class = 'extant' if exists $extant->{$vertex} && $extant->{$vertex} &&
370 $self->graph->get_vertex_attribute( $vertex, 'class' ) ne 'hypothetical';
371 $graph->set_vertex_attribute( $vertex, 'class', $class );
372 }
373
335a62ef 374 # For each 'layered' witness in the layerwits array, add it to the graph
375 # as an ancestor of the 'main' witness, and otherwise with the same parent/
376 # child links as its main analogue.
377 # TOOD Handle case where B is copied from A but corrected from C
5c44c598 378 my $aclabel = $self->collation->ac_label;
335a62ef 379 foreach my $lw ( @$layerwits ) {
380 # Add the layered witness and set it with the same attributes as
381 # its 'main' analogue
5c44c598 382 throw( "Cannot add a layer to a hypothetical witness $lw" )
383 unless $graph->get_vertex_attribute( $lw, 'class' ) eq 'extant';
384 my $lwac = $lw . $aclabel;
335a62ef 385 $graph->add_vertex( $lwac );
386 $graph->set_vertex_attributes( $lwac,
387 $graph->get_vertex_attributes( $lw ) );
388
389 # Set it as ancestor to the main witness
390 $graph->add_edge( $lwac, $lw );
391
392 # Give it the same ancestors and descendants as the main witness has,
393 # bearing in mind that those ancestors and descendants might also just
394 # have had a layered witness defined.
395 foreach my $v ( $graph->predecessors( $lw ) ) {
396 next if $v eq $lwac; # Don't add a loop
397 $graph->add_edge( $v, $lwac );
5c44c598 398 $graph->add_edge( $v.$aclabel, $lwac )
399 if $graph->has_vertex( $v.$aclabel );
335a62ef 400 }
401 foreach my $v ( $graph->successors( $lw ) ) {
402 next if $v eq $lwac; # but this shouldn't occur
403 $graph->add_edge( $lwac, $v );
5c44c598 404 $graph->add_edge( $lwac, $v.$aclabel )
405 if $graph->has_vertex( $v.$aclabel );
335a62ef 406 }
407 }
408 return $graph;
409}
410
027d819c 411=head2 as_svg
412
413Returns an SVG representation of the graph, calling as_dot first.
414
415=cut
416
8d9a1cd8 417sub as_svg {
418 my( $self, $opts ) = @_;
419 my $dot = $self->as_dot( $opts );
e79c23c7 420 my @cmd = qw/dot -Tsvg/;
3bf5d6f1 421 my $svg;
e79c23c7 422 my $dotfile = File::Temp->new();
423 ## TODO REMOVE
424 # $dotfile->unlink_on_destroy(0);
425 binmode $dotfile, ':utf8';
8d9a1cd8 426 print $dotfile $dot;
459c39b3 427 close $dotfile;
e79c23c7 428 push( @cmd, $dotfile->filename );
429 run( \@cmd, ">", binary(), \$svg );
3bf5d6f1 430 # HACK: Parse the SVG and change the dimensions.
5a7e26a9 431 # Get rid of width and height attributes to allow scaling.
c57be097 432 if( $opts->{'size'} ) {
428bcf0b 433 require XML::LibXML;
459c39b3 434 my $parser = XML::LibXML->new( load_ext_dtd => 0 );
435 my $svgdoc;
436 eval {
437 $svgdoc = $parser->parse_string( decode_utf8( $svg ) );
438 };
439 throw( "Could not reparse SVG: $@" ) if $@;
c57be097 440 my( $ew, $eh ) = @{$opts->{'size'}};
441 # If the graph is wider than it is tall, set width to ew and remove height.
442 # Otherwise set height to eh and remove width.
443 my $width = $svgdoc->documentElement->getAttribute('width');
444 my $height = $svgdoc->documentElement->getAttribute('height');
445 $width =~ s/\D+//g;
446 $height =~ s/\D+//g;
447 my( $remove, $keep, $val );
448 if( $width > $height ) {
449 $remove = 'height';
450 $keep = 'width';
451 $val = $ew . 'px';
452 } else {
453 $remove = 'width';
454 $keep = 'height';
455 $val = $eh . 'px';
456 }
457 $svgdoc->documentElement->removeAttribute( $remove );
458 $svgdoc->documentElement->setAttribute( $keep, $val );
428bcf0b 459 $svg = $svgdoc->toString();
c57be097 460 }
3bf5d6f1 461 # Return the result
428bcf0b 462 return decode_utf8( $svg );
e79c23c7 463}
464
027d819c 465=head2 witnesses
466
467Returns a list of the extant witnesses represented in the stemma.
468
469=cut
470
08e0fb85 471sub witnesses {
472 my $self = shift;
473 my @wits = grep { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'extant' }
474 $self->graph->vertices;
475 return @wits;
476}
477
06e7cbc7 478=head2 hypotheticals
479
480Returns a list of the hypothetical witnesses represented in the stemma.
481
482=cut
483
bebec0e9 484sub hypotheticals {
485 my $self = shift;
486 my @wits = grep
487 { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'hypothetical' }
488 $self->graph->vertices;
489 return @wits;
490}
491
63778331 492sub throw {
493 Text::Tradition::Error->throw(
494 'ident' => 'Stemma error',
495 'message' => $_[0],
496 );
497}
498
499
9463b0bf 500no Moose;
501__PACKAGE__->meta->make_immutable;
502
5031;
027d819c 504
505=head1 LICENSE
506
507This package is free software and is provided "as is" without express
508or implied warranty. You can redistribute it and/or modify it under
509the same terms as Perl itself.
510
511=head1 AUTHOR
512
513Tara L Andrews E<lt>aurum@cpan.orgE<gt>