fix stemma test
[scpubgit/stemmatology.git] / lib / Text / Tradition / Stemma.pm
CommitLineData
9463b0bf 1package Text::Tradition::Stemma;
2
40f19742 3use Bio::Phylo::IO;
e79c23c7 4use Encode qw( decode_utf8 );
9463b0bf 5use File::Temp;
e05997e2 6use Graph;
7use Graph::Reader::Dot;
e79c23c7 8use IPC::Run qw/ run binary /;
63778331 9use Text::Tradition::Error;
40f19742 10use Moose;
9463b0bf 11
027d819c 12=head1 NAME
13
14Text::Tradition::Stemma - a representation of a I<stemma codicum> for a Text::Tradition
15
16=head1 SYNOPSIS
17
18 use Text::Tradition;
19 my $t = Text::Tradition->new(
20 'name' => 'this is a text',
21 'input' => 'TEI',
22 'file' => '/path/to/tei_parallel_seg_file.xml' );
23
24 my $s = $tradition->add_stemma( dotfile => '/path/to/stemma.dot' );
25
26=head1 DESCRIPTION
27
28Text::Tradition is a library for representation and analysis of collated
335a62ef 29texts, particularly medieval ones. The Stemma is a representation of the
30copying relationships between the witnesses in a Tradition, modelled with
31a connected rooted directed acyclic graph (CRDAG).
027d819c 32
33=head1 DOT SYNTAX
34
335a62ef 35The easiest way to define a stemma is to use a special form of the 'dot'
36syntax of GraphViz.
027d819c 37
38Each stemma opens with the line
39
40 digraph Stemma {
41
42and continues with a list of all manuscript witnesses in the stemma, whether
43extant witnesses or missing archetypes or hyparchetypes. Each of these is
44listed by its sigil on its own line, e.g.:
45
46 alpha [ class=hypothetical ]
47 1 [ class=hypothetical,label=* ]
48 Ms4 [ class=extant ]
49
50Extant witnesses are listed with class=extant; missing or postulated witnesses
51are listed with class=hypothetical. Anonymous hyparchetypes must be given a
52unique name or number, but can be represented as anonymous with the addition
53of 'label=*' to their lines. Greek letters or other special characters may be
54used as names, but they must always be wrapped in double quotes.
55
56Links between manuscripts are then listed with arrow notation, as below. These
57lines show the direction of copying, one step at a time, for the entire stemma.
58
59 alpha -> 1
60 1 -> Ms4
61
62The final line in the definition should be the closing brace:
63
64 }
65
66Thus for a set of extant manuscripts A, B, and C, where A and B were copied
67from the archetype O and C was copied from B, the definition would be:
68
69 digraph Stemma {
70 O [ class=hypothetical]
71 A [ class=extant ]
72 B [ class=extant ]
73 C [ class=extant ]
74 O -> A
75 O -> B
76 B -> C
77 }
78
79=head1 CONSTRUCTOR
80
81=head2 new
82
83The constructor. This should generally be called from Text::Tradition, but
84if called directly it takes the following options:
85
86=over
87
027d819c 88=item * dot - A filehandle open to a DOT representation of the stemma graph.
89
ea45d2a6 90=item * graph - If no DOT specification is given, you can pass a Graph object
91instead. The vertices of the graph should have an attribute 'class' set to
92either of the values 'extant' or 'hypothetical'.
93
94=item * is_undirected - If the graph specification (or graph object) is for an
95undirected graph (e.g. a phylogenetic tree), this should be set.
96
027d819c 97=back
98
64a36834 99=begin testing
100
64a36834 101use TryCatch;
102
103use_ok( 'Text::Tradition::Stemma' );
104
64a36834 105# Try to create a bad graph
ea45d2a6 106TODO: {
107 local $TODO = "cannot use stdout redirection trick with FastCGI";
836e0546 108 my $baddotfh;
ea45d2a6 109 open( $baddotfh, 't/data/besoin_bad.dot' ) or die "Could not open test dotfile";
110 try {
111 my $stemma = Text::Tradition::Stemma->new( dot => $baddotfh );
112 ok( 0, "Created broken stemma from dotfile with syntax error" );
113 } catch( Text::Tradition::Error $e ) {
114 like( $e->message, qr/^Error trying to parse/, "Syntax error in dot threw exception" );
115 }
64a36834 116}
117
118# Create a good graph
119my $dotfh;
120open( $dotfh, 't/data/florilegium.dot' ) or die "Could not open test dotfile";
121binmode( $dotfh, ':utf8' );
ace5fce5 122my $stemma = Text::Tradition::Stemma->new( dot => $dotfh );
64a36834 123is( ref( $stemma ), 'Text::Tradition::Stemma', "Created stemma from good dotfile" );
124is( scalar $stemma->witnesses, 13, "Found correct number of extant witnesses" );
125is( scalar $stemma->hypotheticals, 8, "Found correct number of extant hypotheticals" );
126my $found_unicode_sigil;
127foreach my $h ( $stemma->hypotheticals ) {
128 $found_unicode_sigil = 1 if $h eq "\x{3b1}";
129}
130ok( $found_unicode_sigil, "Found a correctly encoded Unicode sigil" );
131
ea45d2a6 132# TODO Create stemma from graph, create stemma from undirected graph,
133# create stemma from incompletely-specified graph
134
64a36834 135=end testing
136
027d819c 137=cut
138
9463b0bf 139has collation => (
140 is => 'ro',
141 isa => 'Text::Tradition::Collation',
ea45d2a6 142 clearer => 'clear_collation', # interim measure to remove refs in DB
8d9a1cd8 143 weak_ref => 1,
9463b0bf 144 );
145
e05997e2 146has graph => (
147 is => 'rw',
148 isa => 'Graph',
149 predicate => 'has_graph',
150 );
ea45d2a6 151
152has is_undirected => (
153 is => 'ro',
154 isa => 'Bool',
155 default => undef,
156 writer => 'set_undirected',
157 );
c57be097 158
e05997e2 159sub BUILD {
160 my( $self, $args ) = @_;
161 # If we have been handed a dotfile, initialize it into a graph.
162 if( exists $args->{'dot'} ) {
027d819c 163 $self->_graph_from_dot( $args->{'dot'} );
ea45d2a6 164 } else {
165 }
c0ccdb62 166}
167
ea45d2a6 168before 'graph' => sub {
169 my $self = shift;
170 if( @_ ) {
171 # Make sure all unclassed graph nodes are marked extant.
172 my $g = $_[0];
173 throw( "Cannot set graph to a non-Graph object" )
174 unless ref( $g ) eq 'Graph';
175 foreach my $v ( $g->vertices ) {
176 unless( $g->has_vertex_attribute( $v, 'class' ) ) {
177 $g->set_vertex_attribute( $v, 'class', 'extant' );
178 }
179 }
180 $self->set_undirected( $g->is_undirected );
181 }
182};
183
027d819c 184sub _graph_from_dot {
8d9a1cd8 185 my( $self, $dotfh ) = @_;
8d9a1cd8 186 my $reader = Graph::Reader::Dot->new();
64a36834 187 # Redirect STDOUT in order to trap any error messages - syntax errors
188 # are evidently not fatal.
d34fdf7b 189 # TODO This breaks under FastCGI/Apache; reconsider.
64a36834 190 my $reader_out;
d34fdf7b 191 #my $saved_stderr;
192 #open $saved_stderr, ">&STDOUT";
193 #close STDOUT;
194 #open STDOUT, ">", \$reader_out;
8d9a1cd8 195 my $graph = $reader->read_graph( $dotfh );
d34fdf7b 196 #close STDOUT;
197 #open STDOUT, ">", \$saved_stderr;
64a36834 198 if( $reader_out && $reader_out =~ /error/s ) {
199 throw( "Error trying to parse dot: $reader_out" );
200 } elsif( !$graph ) {
201 throw( "Failed to create graph from dot" );
202 }
203 $self->graph( $graph );
8d9a1cd8 204}
205
027d819c 206=head1 METHODS
207
208=head2 as_dot( \%options )
209
210Returns a normal dot representation of the stemma layout, suitable for rendering
211with GraphViz. Options include:
212
213=over
214
215=item * graph - A hashref of global graph options.
216
217=item * node - A hashref of global node options.
218
219=item * edge - A hashref of global edge options.
220
221=back
222
223See the GraphViz documentation for the list of available options.
224
225=cut
226
8d9a1cd8 227sub as_dot {
e367f5c0 228 my( $self, $opts ) = @_;
7a7c249c 229
335a62ef 230 ## See if we are including any a.c. witnesses in this graph.
231 my $graph = $self->graph;
232 if( exists $opts->{'layerwits'} ) {
5c44c598 233 my $extant = {};
234 map { $extant->{$_} = 1 } $self->witnesses;
235 $graph = $self->situation_graph( $extant, $opts->{'layerwits'} );
335a62ef 236 }
237
7a7c249c 238 # Get default and specified options
e02340f3 239 my %graphopts = (
240 # 'ratio' => 1,
241 );
7a7c249c 242 my %nodeopts = (
243 'fontsize' => 11,
7a7c249c 244 'style' => 'filled',
245 'fillcolor' => 'white',
e02340f3 246 'color' => 'white',
7a7c249c 247 'shape' => 'ellipse', # Shape for the extant nodes
248 );
249 my %edgeopts = (
e02340f3 250 'arrowhead' => 'none',
7a7c249c 251 );
252 @graphopts{ keys %{$opts->{'graph'}} } = values %{$opts->{'graph'}}
253 if $opts->{'graph'};
254 @nodeopts{ keys %{$opts->{'node'}} } = values %{$opts->{'node'}}
255 if $opts->{'node'};
256 @edgeopts{ keys %{$opts->{'edge'}} } = values %{$opts->{'edge'}}
257 if $opts->{'edge'};
335a62ef 258
ea45d2a6 259 my $gdecl = $graph->is_directed ? 'digraph' : 'graph';
7a7c249c 260 my @dotlines;
ea45d2a6 261 push( @dotlines, "$gdecl stemma {" );
7a7c249c 262 ## Print out the global attributes
263 push( @dotlines, _make_dotline( 'graph', %graphopts ) ) if keys %graphopts;
264 push( @dotlines, _make_dotline( 'edge', %edgeopts ) ) if keys %edgeopts;
7a7c249c 265 push( @dotlines, _make_dotline( 'node', %nodeopts ) ) if keys %nodeopts;
266
267 # Add each of the nodes.
335a62ef 268 foreach my $n ( $graph->vertices ) {
269 if( $graph->has_vertex_attribute( $n, 'label' ) ) {
270 my $ltext = $graph->get_vertex_attribute( $n, 'label' );
e02340f3 271 push( @dotlines, _make_dotline( $n, 'label' => $ltext ) );
e79c23c7 272 } else {
7a7c249c 273 # Use the default display settings.
986bbd1b 274 $n = _dotquote( $n );
7a7c249c 275 push( @dotlines, " $n;" );
e79c23c7 276 }
277 }
7a7c249c 278 # Add each of our edges.
335a62ef 279 foreach my $e ( $graph->edges ) {
986bbd1b 280 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
ea45d2a6 281 my $connector = $graph->is_directed ? '->' : '--';
282 push( @dotlines, " $from $connector $to;" );
7a7c249c 283 }
284 push( @dotlines, '}' );
e79c23c7 285
7a7c249c 286 return join( "\n", @dotlines );
287}
288
0bded693 289=head2 alter_graph( $dotstring )
290
291Alters the graph of this stemma according to the definition specified
292in $dotstring.
293
294=cut
295
296sub alter_graph {
297 my( $self, $dotstring ) = @_;
298 my $dotfh;
299 open $dotfh, '<', \$dotstring;
f90b2bde 300 binmode $dotfh, ':utf8';
0bded693 301 $self->_graph_from_dot( $dotfh );
302}
303
335a62ef 304=head2 editable( $opts )
027d819c 305
5c44c598 306=head2 editable_graph( $graph, $opts )
307
88a6bac5 308Returns a version of the graph rendered in our definition format. The
335a62ef 309output separates statements with a newline; set $opts->{'linesep'} to the
310empty string or to a space if the result is to be sent via JSON.
311
5c44c598 312If a situational version of the stemma is required, the arguments for
313situation_graph should be passed via $opts->{'extant'} and $opts->{'layerwits'}.
027d819c 314
315=cut
7a7c249c 316
7a7c249c 317sub editable {
5c44c598 318 my( $self, $opts ) = @_;
335a62ef 319 my $graph = $self->graph;
5c44c598 320 ## See if we need an editable version of a situational graph.
321 if( exists $opts->{'layerwits'} || exists $opts->{'extant'} ) {
322 my $extant = delete $opts->{'extant'} || {};
323 my $layerwits = delete $opts->{'layerwits'} || [];
324 $graph = $self->situation_graph( $extant, $layerwits );
335a62ef 325 }
5c44c598 326 return editable_graph( $graph, $opts );
327}
328
329sub editable_graph {
330 my( $graph, $opts ) = @_;
335a62ef 331
332 # Create the graph
333 my $join = ( $opts && exists $opts->{'linesep'} ) ? $opts->{'linesep'} : "\n";
ea45d2a6 334 my $gdecl = $graph->is_undirected ? 'graph' : 'digraph';
7a7c249c 335 my @dotlines;
ea45d2a6 336 push( @dotlines, "$gdecl stemma {" );
7a7c249c 337 my @real; # A cheap sort
5c44c598 338 foreach my $n ( sort $graph->vertices ) {
339 my $c = $graph->get_vertex_attribute( $n, 'class' );
7a7c249c 340 $c = 'extant' unless $c;
341 if( $c eq 'extant' ) {
342 push( @real, $n );
343 } else {
344 push( @dotlines, _make_dotline( $n, 'class' => $c ) );
345 }
e367f5c0 346 }
7a7c249c 347 # Now do the real ones
348 foreach my $n ( @real ) {
349 push( @dotlines, _make_dotline( $n, 'class' => 'extant' ) );
350 }
5c44c598 351 foreach my $e ( sort _by_vertex $graph->edges ) {
986bbd1b 352 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
ea45d2a6 353 my $conn = $graph->is_undirected ? '--' : '->';
354 push( @dotlines, " $from $conn $to;" );
7a7c249c 355 }
356 push( @dotlines, '}' );
88a6bac5 357 return join( $join, @dotlines );
7a7c249c 358}
359
360sub _make_dotline {
361 my( $obj, %attr ) = @_;
362 my @pairs;
363 foreach my $k ( keys %attr ) {
986bbd1b 364 my $v = _dotquote( $attr{$k} );
365 push( @pairs, "$k=$v" );
7a7c249c 366 }
986bbd1b 367 return sprintf( " %s [ %s ];", _dotquote( $obj ), join( ', ', @pairs ) );
8d9a1cd8 368}
369
986bbd1b 370sub _dotquote {
371 my( $str ) = @_;
372 return $str if $str =~ /^[A-Za-z0-9]+$/;
373 $str =~ s/\"/\\\"/g;
374 $str = '"' . $str . '"';
375 return $str;
376}
377
7a7c249c 378sub _by_vertex {
379 return $a->[0].$a->[1] cmp $b->[0].$b->[1];
380}
8d9a1cd8 381
5c44c598 382=head2 situation_graph( $extant, $layered )
335a62ef 383
ea45d2a6 384Returns a graph which is the original stemma graph with all witnesses not
385in the %$extant hash marked as hypothetical, and witness layers added to
386the graph according to the list in @$layered. A layered (a.c.) witness is
387added as a parent of its main version, and additionally shares all other
388parents and children with that version.
335a62ef 389
390=cut
391
5c44c598 392sub situation_graph {
ace5fce5 393 my( $self, $extant, $layerwits, $layerlabel ) = @_;
5c44c598 394
395 my $graph = $self->graph->copy;
396 foreach my $vertex ( $graph->vertices ) {
397 # Set as extant any vertex that is extant in the stemma AND
398 # exists in the $extant hash.
399 my $class = 'hypothetical';
400 $class = 'extant' if exists $extant->{$vertex} && $extant->{$vertex} &&
401 $self->graph->get_vertex_attribute( $vertex, 'class' ) ne 'hypothetical';
402 $graph->set_vertex_attribute( $vertex, 'class', $class );
403 }
404
335a62ef 405 # For each 'layered' witness in the layerwits array, add it to the graph
406 # as an ancestor of the 'main' witness, and otherwise with the same parent/
407 # child links as its main analogue.
408 # TOOD Handle case where B is copied from A but corrected from C
ace5fce5 409 $layerlabel = ' (a.c.)' unless $layerlabel;
335a62ef 410 foreach my $lw ( @$layerwits ) {
411 # Add the layered witness and set it with the same attributes as
412 # its 'main' analogue
5c44c598 413 throw( "Cannot add a layer to a hypothetical witness $lw" )
414 unless $graph->get_vertex_attribute( $lw, 'class' ) eq 'extant';
ace5fce5 415 my $lwac = $lw . $layerlabel;
335a62ef 416 $graph->add_vertex( $lwac );
417 $graph->set_vertex_attributes( $lwac,
418 $graph->get_vertex_attributes( $lw ) );
419
420 # Set it as ancestor to the main witness
421 $graph->add_edge( $lwac, $lw );
422
423 # Give it the same ancestors and descendants as the main witness has,
424 # bearing in mind that those ancestors and descendants might also just
425 # have had a layered witness defined.
426 foreach my $v ( $graph->predecessors( $lw ) ) {
427 next if $v eq $lwac; # Don't add a loop
428 $graph->add_edge( $v, $lwac );
ace5fce5 429 $graph->add_edge( $v.$layerlabel, $lwac )
430 if $graph->has_vertex( $v.$layerlabel );
335a62ef 431 }
432 foreach my $v ( $graph->successors( $lw ) ) {
433 next if $v eq $lwac; # but this shouldn't occur
434 $graph->add_edge( $lwac, $v );
ace5fce5 435 $graph->add_edge( $lwac, $v.$layerlabel )
436 if $graph->has_vertex( $v.$layerlabel );
335a62ef 437 }
438 }
439 return $graph;
440}
441
027d819c 442=head2 as_svg
443
444Returns an SVG representation of the graph, calling as_dot first.
445
446=cut
447
8d9a1cd8 448sub as_svg {
449 my( $self, $opts ) = @_;
450 my $dot = $self->as_dot( $opts );
ea45d2a6 451 my @cmd = ( '-Tsvg' );
452 unshift( @cmd, $self->is_undirected ? 'neato' : 'dot' );
3bf5d6f1 453 my $svg;
e79c23c7 454 my $dotfile = File::Temp->new();
455 ## TODO REMOVE
456 # $dotfile->unlink_on_destroy(0);
457 binmode $dotfile, ':utf8';
8d9a1cd8 458 print $dotfile $dot;
459c39b3 459 close $dotfile;
e79c23c7 460 push( @cmd, $dotfile->filename );
461 run( \@cmd, ">", binary(), \$svg );
3bf5d6f1 462 # HACK: Parse the SVG and change the dimensions.
5a7e26a9 463 # Get rid of width and height attributes to allow scaling.
c57be097 464 if( $opts->{'size'} ) {
428bcf0b 465 require XML::LibXML;
459c39b3 466 my $parser = XML::LibXML->new( load_ext_dtd => 0 );
467 my $svgdoc;
468 eval {
469 $svgdoc = $parser->parse_string( decode_utf8( $svg ) );
470 };
471 throw( "Could not reparse SVG: $@" ) if $@;
c57be097 472 my( $ew, $eh ) = @{$opts->{'size'}};
473 # If the graph is wider than it is tall, set width to ew and remove height.
474 # Otherwise set height to eh and remove width.
ea45d2a6 475 # TODO Also scale the viewbox
c57be097 476 my $width = $svgdoc->documentElement->getAttribute('width');
477 my $height = $svgdoc->documentElement->getAttribute('height');
478 $width =~ s/\D+//g;
479 $height =~ s/\D+//g;
14e6a110 480 my( $remove, $keep, $val, $viewbox );
c57be097 481 if( $width > $height ) {
482 $remove = 'height';
483 $keep = 'width';
484 $val = $ew . 'px';
14e6a110 485 my $vbheight = $width / $ew * $height;
486 $viewbox = "0.00 0.00 $width.00" . sprintf( "%.2f", $vbheight );
c57be097 487 } else {
488 $remove = 'width';
489 $keep = 'height';
490 $val = $eh . 'px';
14e6a110 491 my $vbwidth = $height / $eh * $width;
492 $viewbox = "0.00 0.00 " . sprintf( "%.2f", $vbwidth ) . " $height.00";
c57be097 493 }
494 $svgdoc->documentElement->removeAttribute( $remove );
495 $svgdoc->documentElement->setAttribute( $keep, $val );
14e6a110 496 $svgdoc->documentElement->removeAttribute( 'viewBox' );
497 $svgdoc->documentElement->setAttribute( 'viewBox', $viewbox );
428bcf0b 498 $svg = $svgdoc->toString();
c57be097 499 }
3bf5d6f1 500 # Return the result
428bcf0b 501 return decode_utf8( $svg );
e79c23c7 502}
503
027d819c 504=head2 witnesses
505
506Returns a list of the extant witnesses represented in the stemma.
507
508=cut
509
08e0fb85 510sub witnesses {
511 my $self = shift;
512 my @wits = grep { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'extant' }
513 $self->graph->vertices;
514 return @wits;
515}
516
06e7cbc7 517=head2 hypotheticals
518
519Returns a list of the hypothetical witnesses represented in the stemma.
520
521=cut
522
bebec0e9 523sub hypotheticals {
524 my $self = shift;
525 my @wits = grep
526 { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'hypothetical' }
527 $self->graph->vertices;
528 return @wits;
529}
530
ea45d2a6 531=head2 root( $root_vertex ) {
532
533If the stemma graph is undirected, make it directed with $root_vertex at the root.
534If it is directed, re-root it.
535
536=cut
537
538sub root_graph {
539 my( $self, $rootvertex ) = @_;
540 my $graph;
541 if( $self->is_undirected ) {
542 $graph = $self->graph;
543 } else {
544 # Make an undirected version of this graph.
545 $graph = $self->graph->undirected_copy();
546 }
547 my $rooted = Graph->new();
548 $rooted->add_vertex( $rootvertex );
549 my @next = ( $rootvertex );
550 while( @next ) {
551 my @children;
552 foreach my $v ( @next ) {
553 # Place its not-placed neighbors (ergo children) in the tree
554 # and connect them
555 foreach my $n ( grep { !$rooted->has_vertex( $_ ) }
556 $graph->neighbors( $v ) ) {
557 $rooted->add_vertex( $n );
558 $rooted->add_edge( $v, $n );
559 push( @children, $n );
560 }
561 }
562 @next = @children;
563 }
564 # Set the vertex classes
565 map { $rooted->set_vertex_attribute( $_, 'class', 'hypothetical' ) }
566 $self->graph->hypotheticals;
567 map { $rooted->set_vertex_class( $_, 'class', 'extant' ) }
568 $self->graph->witnesses;
569 return $rooted;
570}
571
572
63778331 573sub throw {
574 Text::Tradition::Error->throw(
575 'ident' => 'Stemma error',
576 'message' => $_[0],
577 );
578}
579
580
9463b0bf 581no Moose;
582__PACKAGE__->meta->make_immutable;
583
5841;
027d819c 585
586=head1 LICENSE
587
588This package is free software and is provided "as is" without express
589or implied warranty. You can redistribute it and/or modify it under
590the same terms as Perl itself.
591
592=head1 AUTHOR
593
594Tara L Andrews E<lt>aurum@cpan.orgE<gt>