prevent Tradition from association collation with stemma
[scpubgit/stemmatology.git] / lib / Text / Tradition / Stemma.pm
CommitLineData
9463b0bf 1package Text::Tradition::Stemma;
2
40f19742 3use Bio::Phylo::IO;
e79c23c7 4use Encode qw( decode_utf8 );
9463b0bf 5use File::Temp;
e05997e2 6use Graph;
7use Graph::Reader::Dot;
e79c23c7 8use IPC::Run qw/ run binary /;
63778331 9use Text::Tradition::Error;
b02332ca 10use Text::Tradition::StemmaUtil qw/ character_input phylip_pars parse_newick /;
40f19742 11use Moose;
9463b0bf 12
027d819c 13=head1 NAME
14
15Text::Tradition::Stemma - a representation of a I<stemma codicum> for a Text::Tradition
16
17=head1 SYNOPSIS
18
19 use Text::Tradition;
20 my $t = Text::Tradition->new(
21 'name' => 'this is a text',
22 'input' => 'TEI',
23 'file' => '/path/to/tei_parallel_seg_file.xml' );
24
25 my $s = $tradition->add_stemma( dotfile => '/path/to/stemma.dot' );
26
27=head1 DESCRIPTION
28
29Text::Tradition is a library for representation and analysis of collated
335a62ef 30texts, particularly medieval ones. The Stemma is a representation of the
31copying relationships between the witnesses in a Tradition, modelled with
32a connected rooted directed acyclic graph (CRDAG).
027d819c 33
34=head1 DOT SYNTAX
35
335a62ef 36The easiest way to define a stemma is to use a special form of the 'dot'
37syntax of GraphViz.
027d819c 38
39Each stemma opens with the line
40
41 digraph Stemma {
42
43and continues with a list of all manuscript witnesses in the stemma, whether
44extant witnesses or missing archetypes or hyparchetypes. Each of these is
45listed by its sigil on its own line, e.g.:
46
47 alpha [ class=hypothetical ]
48 1 [ class=hypothetical,label=* ]
49 Ms4 [ class=extant ]
50
51Extant witnesses are listed with class=extant; missing or postulated witnesses
52are listed with class=hypothetical. Anonymous hyparchetypes must be given a
53unique name or number, but can be represented as anonymous with the addition
54of 'label=*' to their lines. Greek letters or other special characters may be
55used as names, but they must always be wrapped in double quotes.
56
57Links between manuscripts are then listed with arrow notation, as below. These
58lines show the direction of copying, one step at a time, for the entire stemma.
59
60 alpha -> 1
61 1 -> Ms4
62
63The final line in the definition should be the closing brace:
64
65 }
66
67Thus for a set of extant manuscripts A, B, and C, where A and B were copied
68from the archetype O and C was copied from B, the definition would be:
69
70 digraph Stemma {
71 O [ class=hypothetical]
72 A [ class=extant ]
73 B [ class=extant ]
74 C [ class=extant ]
75 O -> A
76 O -> B
77 B -> C
78 }
79
80=head1 CONSTRUCTOR
81
82=head2 new
83
84The constructor. This should generally be called from Text::Tradition, but
85if called directly it takes the following options:
86
87=over
88
027d819c 89=item * dot - A filehandle open to a DOT representation of the stemma graph.
90
91=back
92
64a36834 93=begin testing
94
64a36834 95use TryCatch;
96
97use_ok( 'Text::Tradition::Stemma' );
98
64a36834 99# Try to create a bad graph
100my $baddotfh;
101open( $baddotfh, 't/data/besoin_bad.dot' ) or die "Could not open test dotfile";
102try {
ace5fce5 103 my $stemma = Text::Tradition::Stemma->new( dot => $baddotfh );
64a36834 104 ok( 0, "Created broken stemma from dotfile with syntax error" );
105} catch( Text::Tradition::Error $e ) {
106 like( $e->message, qr/^Error trying to parse/, "Syntax error in dot threw exception" );
107}
108
109# Create a good graph
110my $dotfh;
111open( $dotfh, 't/data/florilegium.dot' ) or die "Could not open test dotfile";
112binmode( $dotfh, ':utf8' );
ace5fce5 113my $stemma = Text::Tradition::Stemma->new( dot => $dotfh );
64a36834 114is( ref( $stemma ), 'Text::Tradition::Stemma', "Created stemma from good dotfile" );
115is( scalar $stemma->witnesses, 13, "Found correct number of extant witnesses" );
116is( scalar $stemma->hypotheticals, 8, "Found correct number of extant hypotheticals" );
117my $found_unicode_sigil;
118foreach my $h ( $stemma->hypotheticals ) {
119 $found_unicode_sigil = 1 if $h eq "\x{3b1}";
120}
121ok( $found_unicode_sigil, "Found a correctly encoded Unicode sigil" );
122
123=end testing
124
027d819c 125=cut
126
9463b0bf 127has collation => (
128 is => 'ro',
129 isa => 'Text::Tradition::Collation',
ace5fce5 130 clearer => 'clear_collation',
8d9a1cd8 131 weak_ref => 1,
9463b0bf 132 );
133
e05997e2 134has graph => (
135 is => 'rw',
136 isa => 'Graph',
137 predicate => 'has_graph',
138 );
c57be097 139
e05997e2 140sub BUILD {
141 my( $self, $args ) = @_;
142 # If we have been handed a dotfile, initialize it into a graph.
143 if( exists $args->{'dot'} ) {
027d819c 144 $self->_graph_from_dot( $args->{'dot'} );
e05997e2 145 }
c0ccdb62 146}
147
027d819c 148sub _graph_from_dot {
8d9a1cd8 149 my( $self, $dotfh ) = @_;
8d9a1cd8 150 my $reader = Graph::Reader::Dot->new();
64a36834 151 # Redirect STDOUT in order to trap any error messages - syntax errors
152 # are evidently not fatal.
153 my $reader_out;
154 my $saved_stderr;
155 open $saved_stderr, ">&STDOUT";
156 close STDOUT;
157 open STDOUT, ">", \$reader_out;
8d9a1cd8 158 my $graph = $reader->read_graph( $dotfh );
64a36834 159 close STDOUT;
160 open STDOUT, ">", \$saved_stderr;
161 if( $reader_out && $reader_out =~ /error/s ) {
162 throw( "Error trying to parse dot: $reader_out" );
163 } elsif( !$graph ) {
164 throw( "Failed to create graph from dot" );
165 }
166 $self->graph( $graph );
167 # Go through the nodes and set any non-hypothetical node to extant.
168 foreach my $v ( $self->graph->vertices ) {
169 $self->graph->set_vertex_attribute( $v, 'class', 'extant' )
170 unless $self->graph->has_vertex_attribute( $v, 'class' );
7a7c249c 171 }
8d9a1cd8 172}
173
027d819c 174=head1 METHODS
175
176=head2 as_dot( \%options )
177
178Returns a normal dot representation of the stemma layout, suitable for rendering
179with GraphViz. Options include:
180
181=over
182
183=item * graph - A hashref of global graph options.
184
185=item * node - A hashref of global node options.
186
187=item * edge - A hashref of global edge options.
188
189=back
190
191See the GraphViz documentation for the list of available options.
192
193=cut
194
8d9a1cd8 195sub as_dot {
e367f5c0 196 my( $self, $opts ) = @_;
7a7c249c 197
335a62ef 198 ## See if we are including any a.c. witnesses in this graph.
199 my $graph = $self->graph;
200 if( exists $opts->{'layerwits'} ) {
5c44c598 201 my $extant = {};
202 map { $extant->{$_} = 1 } $self->witnesses;
203 $graph = $self->situation_graph( $extant, $opts->{'layerwits'} );
335a62ef 204 }
205
7a7c249c 206 # Get default and specified options
e02340f3 207 my %graphopts = (
208 # 'ratio' => 1,
209 );
7a7c249c 210 my %nodeopts = (
211 'fontsize' => 11,
7a7c249c 212 'style' => 'filled',
213 'fillcolor' => 'white',
e02340f3 214 'color' => 'white',
7a7c249c 215 'shape' => 'ellipse', # Shape for the extant nodes
216 );
217 my %edgeopts = (
e02340f3 218 'arrowhead' => 'none',
7a7c249c 219 );
220 @graphopts{ keys %{$opts->{'graph'}} } = values %{$opts->{'graph'}}
221 if $opts->{'graph'};
222 @nodeopts{ keys %{$opts->{'node'}} } = values %{$opts->{'node'}}
223 if $opts->{'node'};
224 @edgeopts{ keys %{$opts->{'edge'}} } = values %{$opts->{'edge'}}
225 if $opts->{'edge'};
335a62ef 226
7a7c249c 227 my @dotlines;
228 push( @dotlines, 'digraph stemma {' );
229 ## Print out the global attributes
230 push( @dotlines, _make_dotline( 'graph', %graphopts ) ) if keys %graphopts;
231 push( @dotlines, _make_dotline( 'edge', %edgeopts ) ) if keys %edgeopts;
7a7c249c 232 push( @dotlines, _make_dotline( 'node', %nodeopts ) ) if keys %nodeopts;
233
234 # Add each of the nodes.
335a62ef 235 foreach my $n ( $graph->vertices ) {
236 if( $graph->has_vertex_attribute( $n, 'label' ) ) {
237 my $ltext = $graph->get_vertex_attribute( $n, 'label' );
e02340f3 238 push( @dotlines, _make_dotline( $n, 'label' => $ltext ) );
e79c23c7 239 } else {
7a7c249c 240 # Use the default display settings.
986bbd1b 241 $n = _dotquote( $n );
7a7c249c 242 push( @dotlines, " $n;" );
e79c23c7 243 }
244 }
7a7c249c 245 # Add each of our edges.
335a62ef 246 foreach my $e ( $graph->edges ) {
986bbd1b 247 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
7a7c249c 248 push( @dotlines, " $from -> $to;" );
249 }
250 push( @dotlines, '}' );
e79c23c7 251
7a7c249c 252 return join( "\n", @dotlines );
253}
254
0bded693 255=head2 alter_graph( $dotstring )
256
257Alters the graph of this stemma according to the definition specified
258in $dotstring.
259
260=cut
261
262sub alter_graph {
263 my( $self, $dotstring ) = @_;
264 my $dotfh;
265 open $dotfh, '<', \$dotstring;
f90b2bde 266 binmode $dotfh, ':utf8';
0bded693 267 $self->_graph_from_dot( $dotfh );
268}
269
335a62ef 270=head2 editable( $opts )
027d819c 271
5c44c598 272=head2 editable_graph( $graph, $opts )
273
88a6bac5 274Returns a version of the graph rendered in our definition format. The
335a62ef 275output separates statements with a newline; set $opts->{'linesep'} to the
276empty string or to a space if the result is to be sent via JSON.
277
5c44c598 278If a situational version of the stemma is required, the arguments for
279situation_graph should be passed via $opts->{'extant'} and $opts->{'layerwits'}.
027d819c 280
281=cut
7a7c249c 282
7a7c249c 283sub editable {
5c44c598 284 my( $self, $opts ) = @_;
335a62ef 285 my $graph = $self->graph;
5c44c598 286 ## See if we need an editable version of a situational graph.
287 if( exists $opts->{'layerwits'} || exists $opts->{'extant'} ) {
288 my $extant = delete $opts->{'extant'} || {};
289 my $layerwits = delete $opts->{'layerwits'} || [];
290 $graph = $self->situation_graph( $extant, $layerwits );
335a62ef 291 }
5c44c598 292 return editable_graph( $graph, $opts );
293}
294
295sub editable_graph {
296 my( $graph, $opts ) = @_;
335a62ef 297
298 # Create the graph
299 my $join = ( $opts && exists $opts->{'linesep'} ) ? $opts->{'linesep'} : "\n";
7a7c249c 300 my @dotlines;
301 push( @dotlines, 'digraph stemma {' );
302 my @real; # A cheap sort
5c44c598 303 foreach my $n ( sort $graph->vertices ) {
304 my $c = $graph->get_vertex_attribute( $n, 'class' );
7a7c249c 305 $c = 'extant' unless $c;
306 if( $c eq 'extant' ) {
307 push( @real, $n );
308 } else {
309 push( @dotlines, _make_dotline( $n, 'class' => $c ) );
310 }
e367f5c0 311 }
7a7c249c 312 # Now do the real ones
313 foreach my $n ( @real ) {
314 push( @dotlines, _make_dotline( $n, 'class' => 'extant' ) );
315 }
5c44c598 316 foreach my $e ( sort _by_vertex $graph->edges ) {
986bbd1b 317 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
7a7c249c 318 push( @dotlines, " $from -> $to;" );
319 }
320 push( @dotlines, '}' );
88a6bac5 321 return join( $join, @dotlines );
7a7c249c 322}
323
324sub _make_dotline {
325 my( $obj, %attr ) = @_;
326 my @pairs;
327 foreach my $k ( keys %attr ) {
986bbd1b 328 my $v = _dotquote( $attr{$k} );
329 push( @pairs, "$k=$v" );
7a7c249c 330 }
986bbd1b 331 return sprintf( " %s [ %s ];", _dotquote( $obj ), join( ', ', @pairs ) );
8d9a1cd8 332}
333
986bbd1b 334sub _dotquote {
335 my( $str ) = @_;
336 return $str if $str =~ /^[A-Za-z0-9]+$/;
337 $str =~ s/\"/\\\"/g;
338 $str = '"' . $str . '"';
339 return $str;
340}
341
7a7c249c 342sub _by_vertex {
343 return $a->[0].$a->[1] cmp $b->[0].$b->[1];
344}
8d9a1cd8 345
5c44c598 346=head2 situation_graph( $extant, $layered )
335a62ef 347
5c44c598 348Returns a graph which is the original stemma with all witnesses not in the
349%$extant hash marked as hypothetical, and witness layers added to the graph
350according to the list in @$layered. A layered (a.c.) witness is added as a
351parent of its main version, and additionally shares all other parents and
352children with that version.
335a62ef 353
354=cut
355
5c44c598 356sub situation_graph {
ace5fce5 357 my( $self, $extant, $layerwits, $layerlabel ) = @_;
5c44c598 358
359 my $graph = $self->graph->copy;
360 foreach my $vertex ( $graph->vertices ) {
361 # Set as extant any vertex that is extant in the stemma AND
362 # exists in the $extant hash.
363 my $class = 'hypothetical';
364 $class = 'extant' if exists $extant->{$vertex} && $extant->{$vertex} &&
365 $self->graph->get_vertex_attribute( $vertex, 'class' ) ne 'hypothetical';
366 $graph->set_vertex_attribute( $vertex, 'class', $class );
367 }
368
335a62ef 369 # For each 'layered' witness in the layerwits array, add it to the graph
370 # as an ancestor of the 'main' witness, and otherwise with the same parent/
371 # child links as its main analogue.
372 # TOOD Handle case where B is copied from A but corrected from C
ace5fce5 373 $layerlabel = ' (a.c.)' unless $layerlabel;
335a62ef 374 foreach my $lw ( @$layerwits ) {
375 # Add the layered witness and set it with the same attributes as
376 # its 'main' analogue
5c44c598 377 throw( "Cannot add a layer to a hypothetical witness $lw" )
378 unless $graph->get_vertex_attribute( $lw, 'class' ) eq 'extant';
ace5fce5 379 my $lwac = $lw . $layerlabel;
335a62ef 380 $graph->add_vertex( $lwac );
381 $graph->set_vertex_attributes( $lwac,
382 $graph->get_vertex_attributes( $lw ) );
383
384 # Set it as ancestor to the main witness
385 $graph->add_edge( $lwac, $lw );
386
387 # Give it the same ancestors and descendants as the main witness has,
388 # bearing in mind that those ancestors and descendants might also just
389 # have had a layered witness defined.
390 foreach my $v ( $graph->predecessors( $lw ) ) {
391 next if $v eq $lwac; # Don't add a loop
392 $graph->add_edge( $v, $lwac );
ace5fce5 393 $graph->add_edge( $v.$layerlabel, $lwac )
394 if $graph->has_vertex( $v.$layerlabel );
335a62ef 395 }
396 foreach my $v ( $graph->successors( $lw ) ) {
397 next if $v eq $lwac; # but this shouldn't occur
398 $graph->add_edge( $lwac, $v );
ace5fce5 399 $graph->add_edge( $lwac, $v.$layerlabel )
400 if $graph->has_vertex( $v.$layerlabel );
335a62ef 401 }
402 }
403 return $graph;
404}
405
027d819c 406=head2 as_svg
407
408Returns an SVG representation of the graph, calling as_dot first.
409
410=cut
411
8d9a1cd8 412sub as_svg {
413 my( $self, $opts ) = @_;
414 my $dot = $self->as_dot( $opts );
e79c23c7 415 my @cmd = qw/dot -Tsvg/;
3bf5d6f1 416 my $svg;
e79c23c7 417 my $dotfile = File::Temp->new();
418 ## TODO REMOVE
419 # $dotfile->unlink_on_destroy(0);
420 binmode $dotfile, ':utf8';
8d9a1cd8 421 print $dotfile $dot;
459c39b3 422 close $dotfile;
e79c23c7 423 push( @cmd, $dotfile->filename );
424 run( \@cmd, ">", binary(), \$svg );
3bf5d6f1 425 # HACK: Parse the SVG and change the dimensions.
5a7e26a9 426 # Get rid of width and height attributes to allow scaling.
c57be097 427 if( $opts->{'size'} ) {
428bcf0b 428 require XML::LibXML;
459c39b3 429 my $parser = XML::LibXML->new( load_ext_dtd => 0 );
430 my $svgdoc;
431 eval {
432 $svgdoc = $parser->parse_string( decode_utf8( $svg ) );
433 };
434 throw( "Could not reparse SVG: $@" ) if $@;
c57be097 435 my( $ew, $eh ) = @{$opts->{'size'}};
436 # If the graph is wider than it is tall, set width to ew and remove height.
437 # Otherwise set height to eh and remove width.
438 my $width = $svgdoc->documentElement->getAttribute('width');
439 my $height = $svgdoc->documentElement->getAttribute('height');
440 $width =~ s/\D+//g;
441 $height =~ s/\D+//g;
14e6a110 442 my( $remove, $keep, $val, $viewbox );
c57be097 443 if( $width > $height ) {
444 $remove = 'height';
445 $keep = 'width';
446 $val = $ew . 'px';
14e6a110 447 my $vbheight = $width / $ew * $height;
448 $viewbox = "0.00 0.00 $width.00" . sprintf( "%.2f", $vbheight );
c57be097 449 } else {
450 $remove = 'width';
451 $keep = 'height';
452 $val = $eh . 'px';
14e6a110 453 my $vbwidth = $height / $eh * $width;
454 $viewbox = "0.00 0.00 " . sprintf( "%.2f", $vbwidth ) . " $height.00";
c57be097 455 }
456 $svgdoc->documentElement->removeAttribute( $remove );
457 $svgdoc->documentElement->setAttribute( $keep, $val );
14e6a110 458 $svgdoc->documentElement->removeAttribute( 'viewBox' );
459 $svgdoc->documentElement->setAttribute( 'viewBox', $viewbox );
428bcf0b 460 $svg = $svgdoc->toString();
c57be097 461 }
3bf5d6f1 462 # Return the result
428bcf0b 463 return decode_utf8( $svg );
e79c23c7 464}
465
027d819c 466=head2 witnesses
467
468Returns a list of the extant witnesses represented in the stemma.
469
470=cut
471
08e0fb85 472sub witnesses {
473 my $self = shift;
474 my @wits = grep { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'extant' }
475 $self->graph->vertices;
476 return @wits;
477}
478
06e7cbc7 479=head2 hypotheticals
480
481Returns a list of the hypothetical witnesses represented in the stemma.
482
483=cut
484
bebec0e9 485sub hypotheticals {
486 my $self = shift;
487 my @wits = grep
488 { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'hypothetical' }
489 $self->graph->vertices;
490 return @wits;
491}
492
63778331 493sub throw {
494 Text::Tradition::Error->throw(
495 'ident' => 'Stemma error',
496 'message' => $_[0],
497 );
498}
499
500
9463b0bf 501no Moose;
502__PACKAGE__->meta->make_immutable;
503
5041;
027d819c 505
506=head1 LICENSE
507
508This package is free software and is provided "as is" without express
509or implied warranty. You can redistribute it and/or modify it under
510the same terms as Perl itself.
511
512=head1 AUTHOR
513
514Tara L Andrews E<lt>aurum@cpan.orgE<gt>