initial phylogeny generation work
[scpubgit/stemmatology.git] / lib / Text / Tradition / Stemma.pm
CommitLineData
9463b0bf 1package Text::Tradition::Stemma;
2
40f19742 3use Bio::Phylo::IO;
e79c23c7 4use Encode qw( decode_utf8 );
9463b0bf 5use File::Temp;
e05997e2 6use Graph;
7use Graph::Reader::Dot;
e79c23c7 8use IPC::Run qw/ run binary /;
63778331 9use Text::Tradition::Error;
40f19742 10use Moose;
9463b0bf 11
027d819c 12=head1 NAME
13
14Text::Tradition::Stemma - a representation of a I<stemma codicum> for a Text::Tradition
15
16=head1 SYNOPSIS
17
18 use Text::Tradition;
19 my $t = Text::Tradition->new(
20 'name' => 'this is a text',
21 'input' => 'TEI',
22 'file' => '/path/to/tei_parallel_seg_file.xml' );
23
24 my $s = $tradition->add_stemma( dotfile => '/path/to/stemma.dot' );
25
26=head1 DESCRIPTION
27
28Text::Tradition is a library for representation and analysis of collated
335a62ef 29texts, particularly medieval ones. The Stemma is a representation of the
30copying relationships between the witnesses in a Tradition, modelled with
31a connected rooted directed acyclic graph (CRDAG).
027d819c 32
33=head1 DOT SYNTAX
34
335a62ef 35The easiest way to define a stemma is to use a special form of the 'dot'
36syntax of GraphViz.
027d819c 37
38Each stemma opens with the line
39
40 digraph Stemma {
41
42and continues with a list of all manuscript witnesses in the stemma, whether
43extant witnesses or missing archetypes or hyparchetypes. Each of these is
44listed by its sigil on its own line, e.g.:
45
46 alpha [ class=hypothetical ]
47 1 [ class=hypothetical,label=* ]
48 Ms4 [ class=extant ]
49
50Extant witnesses are listed with class=extant; missing or postulated witnesses
51are listed with class=hypothetical. Anonymous hyparchetypes must be given a
52unique name or number, but can be represented as anonymous with the addition
53of 'label=*' to their lines. Greek letters or other special characters may be
54used as names, but they must always be wrapped in double quotes.
55
56Links between manuscripts are then listed with arrow notation, as below. These
57lines show the direction of copying, one step at a time, for the entire stemma.
58
59 alpha -> 1
60 1 -> Ms4
61
62The final line in the definition should be the closing brace:
63
64 }
65
66Thus for a set of extant manuscripts A, B, and C, where A and B were copied
67from the archetype O and C was copied from B, the definition would be:
68
69 digraph Stemma {
70 O [ class=hypothetical]
71 A [ class=extant ]
72 B [ class=extant ]
73 C [ class=extant ]
74 O -> A
75 O -> B
76 B -> C
77 }
78
79=head1 CONSTRUCTOR
80
81=head2 new
82
83The constructor. This should generally be called from Text::Tradition, but
84if called directly it takes the following options:
85
86=over
87
027d819c 88=item * dot - A filehandle open to a DOT representation of the stemma graph.
89
ea45d2a6 90=item * graph - If no DOT specification is given, you can pass a Graph object
91instead. The vertices of the graph should have an attribute 'class' set to
92either of the values 'extant' or 'hypothetical'.
93
94=item * is_undirected - If the graph specification (or graph object) is for an
95undirected graph (e.g. a phylogenetic tree), this should be set.
96
027d819c 97=back
98
64a36834 99=begin testing
100
64a36834 101use TryCatch;
102
103use_ok( 'Text::Tradition::Stemma' );
104
64a36834 105# Try to create a bad graph
ea45d2a6 106TODO: {
107 local $TODO = "cannot use stdout redirection trick with FastCGI";
64a36834 108my $baddotfh;
ea45d2a6 109 open( $baddotfh, 't/data/besoin_bad.dot' ) or die "Could not open test dotfile";
110 try {
111 my $stemma = Text::Tradition::Stemma->new( dot => $baddotfh );
112 ok( 0, "Created broken stemma from dotfile with syntax error" );
113 } catch( Text::Tradition::Error $e ) {
114 like( $e->message, qr/^Error trying to parse/, "Syntax error in dot threw exception" );
115 }
64a36834 116}
117
118# Create a good graph
119my $dotfh;
120open( $dotfh, 't/data/florilegium.dot' ) or die "Could not open test dotfile";
121binmode( $dotfh, ':utf8' );
ace5fce5 122my $stemma = Text::Tradition::Stemma->new( dot => $dotfh );
64a36834 123is( ref( $stemma ), 'Text::Tradition::Stemma', "Created stemma from good dotfile" );
124is( scalar $stemma->witnesses, 13, "Found correct number of extant witnesses" );
125is( scalar $stemma->hypotheticals, 8, "Found correct number of extant hypotheticals" );
126my $found_unicode_sigil;
127foreach my $h ( $stemma->hypotheticals ) {
128 $found_unicode_sigil = 1 if $h eq "\x{3b1}";
129}
130ok( $found_unicode_sigil, "Found a correctly encoded Unicode sigil" );
131
ea45d2a6 132# TODO Create stemma from graph, create stemma from undirected graph,
133# create stemma from incompletely-specified graph
134
64a36834 135=end testing
136
027d819c 137=cut
138
9463b0bf 139has collation => (
140 is => 'ro',
141 isa => 'Text::Tradition::Collation',
ea45d2a6 142 clearer => 'clear_collation', # interim measure to remove refs in DB
8d9a1cd8 143 weak_ref => 1,
9463b0bf 144 );
145
e05997e2 146has graph => (
147 is => 'rw',
148 isa => 'Graph',
149 predicate => 'has_graph',
150 );
ea45d2a6 151
152has is_undirected => (
153 is => 'ro',
154 isa => 'Bool',
155 default => undef,
156 writer => 'set_undirected',
157 );
c57be097 158
e05997e2 159sub BUILD {
160 my( $self, $args ) = @_;
161 # If we have been handed a dotfile, initialize it into a graph.
162 if( exists $args->{'dot'} ) {
027d819c 163 $self->_graph_from_dot( $args->{'dot'} );
ea45d2a6 164 } else {
165 }
c0ccdb62 166}
167
ea45d2a6 168before 'graph' => sub {
169 my $self = shift;
170 if( @_ ) {
171 # Make sure all unclassed graph nodes are marked extant.
172 my $g = $_[0];
173 throw( "Cannot set graph to a non-Graph object" )
174 unless ref( $g ) eq 'Graph';
175 foreach my $v ( $g->vertices ) {
176 unless( $g->has_vertex_attribute( $v, 'class' ) ) {
177 $g->set_vertex_attribute( $v, 'class', 'extant' );
178 }
179 }
180 $self->set_undirected( $g->is_undirected );
181 }
182};
183
027d819c 184sub _graph_from_dot {
8d9a1cd8 185 my( $self, $dotfh ) = @_;
8d9a1cd8 186 my $reader = Graph::Reader::Dot->new();
64a36834 187 # Redirect STDOUT in order to trap any error messages - syntax errors
188 # are evidently not fatal.
189 my $reader_out;
190 my $saved_stderr;
191 open $saved_stderr, ">&STDOUT";
192 close STDOUT;
193 open STDOUT, ">", \$reader_out;
8d9a1cd8 194 my $graph = $reader->read_graph( $dotfh );
64a36834 195 close STDOUT;
196 open STDOUT, ">", \$saved_stderr;
197 if( $reader_out && $reader_out =~ /error/s ) {
198 throw( "Error trying to parse dot: $reader_out" );
199 } elsif( !$graph ) {
200 throw( "Failed to create graph from dot" );
201 }
202 $self->graph( $graph );
8d9a1cd8 203}
204
027d819c 205=head1 METHODS
206
207=head2 as_dot( \%options )
208
209Returns a normal dot representation of the stemma layout, suitable for rendering
210with GraphViz. Options include:
211
212=over
213
214=item * graph - A hashref of global graph options.
215
216=item * node - A hashref of global node options.
217
218=item * edge - A hashref of global edge options.
219
220=back
221
222See the GraphViz documentation for the list of available options.
223
224=cut
225
8d9a1cd8 226sub as_dot {
e367f5c0 227 my( $self, $opts ) = @_;
7a7c249c 228
335a62ef 229 ## See if we are including any a.c. witnesses in this graph.
230 my $graph = $self->graph;
231 if( exists $opts->{'layerwits'} ) {
5c44c598 232 my $extant = {};
233 map { $extant->{$_} = 1 } $self->witnesses;
234 $graph = $self->situation_graph( $extant, $opts->{'layerwits'} );
335a62ef 235 }
236
7a7c249c 237 # Get default and specified options
e02340f3 238 my %graphopts = (
239 # 'ratio' => 1,
240 );
7a7c249c 241 my %nodeopts = (
242 'fontsize' => 11,
7a7c249c 243 'style' => 'filled',
244 'fillcolor' => 'white',
e02340f3 245 'color' => 'white',
7a7c249c 246 'shape' => 'ellipse', # Shape for the extant nodes
247 );
248 my %edgeopts = (
e02340f3 249 'arrowhead' => 'none',
7a7c249c 250 );
251 @graphopts{ keys %{$opts->{'graph'}} } = values %{$opts->{'graph'}}
252 if $opts->{'graph'};
253 @nodeopts{ keys %{$opts->{'node'}} } = values %{$opts->{'node'}}
254 if $opts->{'node'};
255 @edgeopts{ keys %{$opts->{'edge'}} } = values %{$opts->{'edge'}}
256 if $opts->{'edge'};
335a62ef 257
ea45d2a6 258 my $gdecl = $graph->is_directed ? 'digraph' : 'graph';
7a7c249c 259 my @dotlines;
ea45d2a6 260 push( @dotlines, "$gdecl stemma {" );
7a7c249c 261 ## Print out the global attributes
262 push( @dotlines, _make_dotline( 'graph', %graphopts ) ) if keys %graphopts;
263 push( @dotlines, _make_dotline( 'edge', %edgeopts ) ) if keys %edgeopts;
7a7c249c 264 push( @dotlines, _make_dotline( 'node', %nodeopts ) ) if keys %nodeopts;
265
266 # Add each of the nodes.
335a62ef 267 foreach my $n ( $graph->vertices ) {
268 if( $graph->has_vertex_attribute( $n, 'label' ) ) {
269 my $ltext = $graph->get_vertex_attribute( $n, 'label' );
e02340f3 270 push( @dotlines, _make_dotline( $n, 'label' => $ltext ) );
e79c23c7 271 } else {
7a7c249c 272 # Use the default display settings.
986bbd1b 273 $n = _dotquote( $n );
7a7c249c 274 push( @dotlines, " $n;" );
e79c23c7 275 }
276 }
7a7c249c 277 # Add each of our edges.
335a62ef 278 foreach my $e ( $graph->edges ) {
986bbd1b 279 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
ea45d2a6 280 my $connector = $graph->is_directed ? '->' : '--';
281 push( @dotlines, " $from $connector $to;" );
7a7c249c 282 }
283 push( @dotlines, '}' );
e79c23c7 284
7a7c249c 285 return join( "\n", @dotlines );
286}
287
0bded693 288=head2 alter_graph( $dotstring )
289
290Alters the graph of this stemma according to the definition specified
291in $dotstring.
292
293=cut
294
295sub alter_graph {
296 my( $self, $dotstring ) = @_;
297 my $dotfh;
298 open $dotfh, '<', \$dotstring;
f90b2bde 299 binmode $dotfh, ':utf8';
0bded693 300 $self->_graph_from_dot( $dotfh );
301}
302
335a62ef 303=head2 editable( $opts )
027d819c 304
5c44c598 305=head2 editable_graph( $graph, $opts )
306
88a6bac5 307Returns a version of the graph rendered in our definition format. The
335a62ef 308output separates statements with a newline; set $opts->{'linesep'} to the
309empty string or to a space if the result is to be sent via JSON.
310
5c44c598 311If a situational version of the stemma is required, the arguments for
312situation_graph should be passed via $opts->{'extant'} and $opts->{'layerwits'}.
027d819c 313
314=cut
7a7c249c 315
7a7c249c 316sub editable {
5c44c598 317 my( $self, $opts ) = @_;
335a62ef 318 my $graph = $self->graph;
5c44c598 319 ## See if we need an editable version of a situational graph.
320 if( exists $opts->{'layerwits'} || exists $opts->{'extant'} ) {
321 my $extant = delete $opts->{'extant'} || {};
322 my $layerwits = delete $opts->{'layerwits'} || [];
323 $graph = $self->situation_graph( $extant, $layerwits );
335a62ef 324 }
5c44c598 325 return editable_graph( $graph, $opts );
326}
327
328sub editable_graph {
329 my( $graph, $opts ) = @_;
335a62ef 330
331 # Create the graph
332 my $join = ( $opts && exists $opts->{'linesep'} ) ? $opts->{'linesep'} : "\n";
ea45d2a6 333 my $gdecl = $graph->is_undirected ? 'graph' : 'digraph';
7a7c249c 334 my @dotlines;
ea45d2a6 335 push( @dotlines, "$gdecl stemma {" );
7a7c249c 336 my @real; # A cheap sort
5c44c598 337 foreach my $n ( sort $graph->vertices ) {
338 my $c = $graph->get_vertex_attribute( $n, 'class' );
7a7c249c 339 $c = 'extant' unless $c;
340 if( $c eq 'extant' ) {
341 push( @real, $n );
342 } else {
343 push( @dotlines, _make_dotline( $n, 'class' => $c ) );
344 }
e367f5c0 345 }
7a7c249c 346 # Now do the real ones
347 foreach my $n ( @real ) {
348 push( @dotlines, _make_dotline( $n, 'class' => 'extant' ) );
349 }
5c44c598 350 foreach my $e ( sort _by_vertex $graph->edges ) {
986bbd1b 351 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
ea45d2a6 352 my $conn = $graph->is_undirected ? '--' : '->';
353 push( @dotlines, " $from $conn $to;" );
7a7c249c 354 }
355 push( @dotlines, '}' );
88a6bac5 356 return join( $join, @dotlines );
7a7c249c 357}
358
359sub _make_dotline {
360 my( $obj, %attr ) = @_;
361 my @pairs;
362 foreach my $k ( keys %attr ) {
986bbd1b 363 my $v = _dotquote( $attr{$k} );
364 push( @pairs, "$k=$v" );
7a7c249c 365 }
986bbd1b 366 return sprintf( " %s [ %s ];", _dotquote( $obj ), join( ', ', @pairs ) );
8d9a1cd8 367}
368
986bbd1b 369sub _dotquote {
370 my( $str ) = @_;
371 return $str if $str =~ /^[A-Za-z0-9]+$/;
372 $str =~ s/\"/\\\"/g;
373 $str = '"' . $str . '"';
374 return $str;
375}
376
7a7c249c 377sub _by_vertex {
378 return $a->[0].$a->[1] cmp $b->[0].$b->[1];
379}
8d9a1cd8 380
5c44c598 381=head2 situation_graph( $extant, $layered )
335a62ef 382
ea45d2a6 383Returns a graph which is the original stemma graph with all witnesses not
384in the %$extant hash marked as hypothetical, and witness layers added to
385the graph according to the list in @$layered. A layered (a.c.) witness is
386added as a parent of its main version, and additionally shares all other
387parents and children with that version.
335a62ef 388
389=cut
390
5c44c598 391sub situation_graph {
ace5fce5 392 my( $self, $extant, $layerwits, $layerlabel ) = @_;
5c44c598 393
394 my $graph = $self->graph->copy;
395 foreach my $vertex ( $graph->vertices ) {
396 # Set as extant any vertex that is extant in the stemma AND
397 # exists in the $extant hash.
398 my $class = 'hypothetical';
399 $class = 'extant' if exists $extant->{$vertex} && $extant->{$vertex} &&
400 $self->graph->get_vertex_attribute( $vertex, 'class' ) ne 'hypothetical';
401 $graph->set_vertex_attribute( $vertex, 'class', $class );
402 }
403
335a62ef 404 # For each 'layered' witness in the layerwits array, add it to the graph
405 # as an ancestor of the 'main' witness, and otherwise with the same parent/
406 # child links as its main analogue.
407 # TOOD Handle case where B is copied from A but corrected from C
ace5fce5 408 $layerlabel = ' (a.c.)' unless $layerlabel;
335a62ef 409 foreach my $lw ( @$layerwits ) {
410 # Add the layered witness and set it with the same attributes as
411 # its 'main' analogue
5c44c598 412 throw( "Cannot add a layer to a hypothetical witness $lw" )
413 unless $graph->get_vertex_attribute( $lw, 'class' ) eq 'extant';
ace5fce5 414 my $lwac = $lw . $layerlabel;
335a62ef 415 $graph->add_vertex( $lwac );
416 $graph->set_vertex_attributes( $lwac,
417 $graph->get_vertex_attributes( $lw ) );
418
419 # Set it as ancestor to the main witness
420 $graph->add_edge( $lwac, $lw );
421
422 # Give it the same ancestors and descendants as the main witness has,
423 # bearing in mind that those ancestors and descendants might also just
424 # have had a layered witness defined.
425 foreach my $v ( $graph->predecessors( $lw ) ) {
426 next if $v eq $lwac; # Don't add a loop
427 $graph->add_edge( $v, $lwac );
ace5fce5 428 $graph->add_edge( $v.$layerlabel, $lwac )
429 if $graph->has_vertex( $v.$layerlabel );
335a62ef 430 }
431 foreach my $v ( $graph->successors( $lw ) ) {
432 next if $v eq $lwac; # but this shouldn't occur
433 $graph->add_edge( $lwac, $v );
ace5fce5 434 $graph->add_edge( $lwac, $v.$layerlabel )
435 if $graph->has_vertex( $v.$layerlabel );
335a62ef 436 }
437 }
438 return $graph;
439}
440
027d819c 441=head2 as_svg
442
443Returns an SVG representation of the graph, calling as_dot first.
444
445=cut
446
8d9a1cd8 447sub as_svg {
448 my( $self, $opts ) = @_;
449 my $dot = $self->as_dot( $opts );
ea45d2a6 450 my @cmd = ( '-Tsvg' );
451 unshift( @cmd, $self->is_undirected ? 'neato' : 'dot' );
3bf5d6f1 452 my $svg;
e79c23c7 453 my $dotfile = File::Temp->new();
454 ## TODO REMOVE
455 # $dotfile->unlink_on_destroy(0);
456 binmode $dotfile, ':utf8';
8d9a1cd8 457 print $dotfile $dot;
459c39b3 458 close $dotfile;
e79c23c7 459 push( @cmd, $dotfile->filename );
460 run( \@cmd, ">", binary(), \$svg );
3bf5d6f1 461 # HACK: Parse the SVG and change the dimensions.
5a7e26a9 462 # Get rid of width and height attributes to allow scaling.
c57be097 463 if( $opts->{'size'} ) {
428bcf0b 464 require XML::LibXML;
459c39b3 465 my $parser = XML::LibXML->new( load_ext_dtd => 0 );
466 my $svgdoc;
467 eval {
468 $svgdoc = $parser->parse_string( decode_utf8( $svg ) );
469 };
470 throw( "Could not reparse SVG: $@" ) if $@;
c57be097 471 my( $ew, $eh ) = @{$opts->{'size'}};
472 # If the graph is wider than it is tall, set width to ew and remove height.
473 # Otherwise set height to eh and remove width.
ea45d2a6 474 # TODO Also scale the viewbox
c57be097 475 my $width = $svgdoc->documentElement->getAttribute('width');
476 my $height = $svgdoc->documentElement->getAttribute('height');
477 $width =~ s/\D+//g;
478 $height =~ s/\D+//g;
14e6a110 479 my( $remove, $keep, $val, $viewbox );
c57be097 480 if( $width > $height ) {
481 $remove = 'height';
482 $keep = 'width';
483 $val = $ew . 'px';
14e6a110 484 my $vbheight = $width / $ew * $height;
485 $viewbox = "0.00 0.00 $width.00" . sprintf( "%.2f", $vbheight );
c57be097 486 } else {
487 $remove = 'width';
488 $keep = 'height';
489 $val = $eh . 'px';
14e6a110 490 my $vbwidth = $height / $eh * $width;
491 $viewbox = "0.00 0.00 " . sprintf( "%.2f", $vbwidth ) . " $height.00";
c57be097 492 }
493 $svgdoc->documentElement->removeAttribute( $remove );
494 $svgdoc->documentElement->setAttribute( $keep, $val );
14e6a110 495 $svgdoc->documentElement->removeAttribute( 'viewBox' );
496 $svgdoc->documentElement->setAttribute( 'viewBox', $viewbox );
428bcf0b 497 $svg = $svgdoc->toString();
c57be097 498 }
3bf5d6f1 499 # Return the result
428bcf0b 500 return decode_utf8( $svg );
e79c23c7 501}
502
027d819c 503=head2 witnesses
504
505Returns a list of the extant witnesses represented in the stemma.
506
507=cut
508
08e0fb85 509sub witnesses {
510 my $self = shift;
511 my @wits = grep { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'extant' }
512 $self->graph->vertices;
513 return @wits;
514}
515
06e7cbc7 516=head2 hypotheticals
517
518Returns a list of the hypothetical witnesses represented in the stemma.
519
520=cut
521
bebec0e9 522sub hypotheticals {
523 my $self = shift;
524 my @wits = grep
525 { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'hypothetical' }
526 $self->graph->vertices;
527 return @wits;
528}
529
ea45d2a6 530=head2 root( $root_vertex ) {
531
532If the stemma graph is undirected, make it directed with $root_vertex at the root.
533If it is directed, re-root it.
534
535=cut
536
537sub root_graph {
538 my( $self, $rootvertex ) = @_;
539 my $graph;
540 if( $self->is_undirected ) {
541 $graph = $self->graph;
542 } else {
543 # Make an undirected version of this graph.
544 $graph = $self->graph->undirected_copy();
545 }
546 my $rooted = Graph->new();
547 $rooted->add_vertex( $rootvertex );
548 my @next = ( $rootvertex );
549 while( @next ) {
550 my @children;
551 foreach my $v ( @next ) {
552 # Place its not-placed neighbors (ergo children) in the tree
553 # and connect them
554 foreach my $n ( grep { !$rooted->has_vertex( $_ ) }
555 $graph->neighbors( $v ) ) {
556 $rooted->add_vertex( $n );
557 $rooted->add_edge( $v, $n );
558 push( @children, $n );
559 }
560 }
561 @next = @children;
562 }
563 # Set the vertex classes
564 map { $rooted->set_vertex_attribute( $_, 'class', 'hypothetical' ) }
565 $self->graph->hypotheticals;
566 map { $rooted->set_vertex_class( $_, 'class', 'extant' ) }
567 $self->graph->witnesses;
568 return $rooted;
569}
570
571
63778331 572sub throw {
573 Text::Tradition::Error->throw(
574 'ident' => 'Stemma error',
575 'message' => $_[0],
576 );
577}
578
579
9463b0bf 580no Moose;
581__PACKAGE__->meta->make_immutable;
582
5831;
027d819c 584
585=head1 LICENSE
586
587This package is free software and is provided "as is" without express
588or implied warranty. You can redistribute it and/or modify it under
589the same terms as Perl itself.
590
591=head1 AUTHOR
592
593Tara L Andrews E<lt>aurum@cpan.orgE<gt>