include the about screen template
[scpubgit/stemmatology.git] / lib / Text / Tradition / Stemma.pm
CommitLineData
9463b0bf 1package Text::Tradition::Stemma;
2
40f19742 3use Bio::Phylo::IO;
e79c23c7 4use Encode qw( decode_utf8 );
9463b0bf 5use File::Temp;
e05997e2 6use Graph;
7use Graph::Reader::Dot;
e79c23c7 8use IPC::Run qw/ run binary /;
63778331 9use Text::Tradition::Error;
b02332ca 10use Text::Tradition::StemmaUtil qw/ character_input phylip_pars parse_newick /;
40f19742 11use Moose;
9463b0bf 12
027d819c 13=head1 NAME
14
15Text::Tradition::Stemma - a representation of a I<stemma codicum> for a Text::Tradition
16
17=head1 SYNOPSIS
18
19 use Text::Tradition;
20 my $t = Text::Tradition->new(
21 'name' => 'this is a text',
22 'input' => 'TEI',
23 'file' => '/path/to/tei_parallel_seg_file.xml' );
24
25 my $s = $tradition->add_stemma( dotfile => '/path/to/stemma.dot' );
26
27=head1 DESCRIPTION
28
29Text::Tradition is a library for representation and analysis of collated
335a62ef 30texts, particularly medieval ones. The Stemma is a representation of the
31copying relationships between the witnesses in a Tradition, modelled with
32a connected rooted directed acyclic graph (CRDAG).
027d819c 33
34=head1 DOT SYNTAX
35
335a62ef 36The easiest way to define a stemma is to use a special form of the 'dot'
37syntax of GraphViz.
027d819c 38
39Each stemma opens with the line
40
41 digraph Stemma {
42
43and continues with a list of all manuscript witnesses in the stemma, whether
44extant witnesses or missing archetypes or hyparchetypes. Each of these is
45listed by its sigil on its own line, e.g.:
46
47 alpha [ class=hypothetical ]
48 1 [ class=hypothetical,label=* ]
49 Ms4 [ class=extant ]
50
51Extant witnesses are listed with class=extant; missing or postulated witnesses
52are listed with class=hypothetical. Anonymous hyparchetypes must be given a
53unique name or number, but can be represented as anonymous with the addition
54of 'label=*' to their lines. Greek letters or other special characters may be
55used as names, but they must always be wrapped in double quotes.
56
57Links between manuscripts are then listed with arrow notation, as below. These
58lines show the direction of copying, one step at a time, for the entire stemma.
59
60 alpha -> 1
61 1 -> Ms4
62
63The final line in the definition should be the closing brace:
64
65 }
66
67Thus for a set of extant manuscripts A, B, and C, where A and B were copied
68from the archetype O and C was copied from B, the definition would be:
69
70 digraph Stemma {
71 O [ class=hypothetical]
72 A [ class=extant ]
73 B [ class=extant ]
74 C [ class=extant ]
75 O -> A
76 O -> B
77 B -> C
78 }
79
80=head1 CONSTRUCTOR
81
82=head2 new
83
84The constructor. This should generally be called from Text::Tradition, but
85if called directly it takes the following options:
86
87=over
88
027d819c 89=item * dot - A filehandle open to a DOT representation of the stemma graph.
90
91=back
92
64a36834 93=begin testing
94
64a36834 95use TryCatch;
96
97use_ok( 'Text::Tradition::Stemma' );
98
64a36834 99# Try to create a bad graph
100my $baddotfh;
101open( $baddotfh, 't/data/besoin_bad.dot' ) or die "Could not open test dotfile";
102try {
ace5fce5 103 my $stemma = Text::Tradition::Stemma->new( dot => $baddotfh );
64a36834 104 ok( 0, "Created broken stemma from dotfile with syntax error" );
105} catch( Text::Tradition::Error $e ) {
106 like( $e->message, qr/^Error trying to parse/, "Syntax error in dot threw exception" );
107}
108
109# Create a good graph
110my $dotfh;
111open( $dotfh, 't/data/florilegium.dot' ) or die "Could not open test dotfile";
112binmode( $dotfh, ':utf8' );
ace5fce5 113my $stemma = Text::Tradition::Stemma->new( dot => $dotfh );
64a36834 114is( ref( $stemma ), 'Text::Tradition::Stemma', "Created stemma from good dotfile" );
115is( scalar $stemma->witnesses, 13, "Found correct number of extant witnesses" );
116is( scalar $stemma->hypotheticals, 8, "Found correct number of extant hypotheticals" );
117my $found_unicode_sigil;
118foreach my $h ( $stemma->hypotheticals ) {
119 $found_unicode_sigil = 1 if $h eq "\x{3b1}";
120}
121ok( $found_unicode_sigil, "Found a correctly encoded Unicode sigil" );
122
123=end testing
124
027d819c 125=cut
126
9463b0bf 127has collation => (
128 is => 'ro',
129 isa => 'Text::Tradition::Collation',
ace5fce5 130 clearer => 'clear_collation',
8d9a1cd8 131 weak_ref => 1,
9463b0bf 132 );
133
e05997e2 134has graph => (
135 is => 'rw',
136 isa => 'Graph',
137 predicate => 'has_graph',
138 );
c57be097 139
e05997e2 140sub BUILD {
141 my( $self, $args ) = @_;
142 # If we have been handed a dotfile, initialize it into a graph.
143 if( exists $args->{'dot'} ) {
027d819c 144 $self->_graph_from_dot( $args->{'dot'} );
e05997e2 145 }
c0ccdb62 146}
147
027d819c 148sub _graph_from_dot {
8d9a1cd8 149 my( $self, $dotfh ) = @_;
8d9a1cd8 150 my $reader = Graph::Reader::Dot->new();
64a36834 151 # Redirect STDOUT in order to trap any error messages - syntax errors
152 # are evidently not fatal.
d34fdf7b 153 # TODO This breaks under FastCGI/Apache; reconsider.
64a36834 154 my $reader_out;
d34fdf7b 155 #my $saved_stderr;
156 #open $saved_stderr, ">&STDOUT";
157 #close STDOUT;
158 #open STDOUT, ">", \$reader_out;
8d9a1cd8 159 my $graph = $reader->read_graph( $dotfh );
d34fdf7b 160 #close STDOUT;
161 #open STDOUT, ">", \$saved_stderr;
64a36834 162 if( $reader_out && $reader_out =~ /error/s ) {
163 throw( "Error trying to parse dot: $reader_out" );
164 } elsif( !$graph ) {
165 throw( "Failed to create graph from dot" );
166 }
167 $self->graph( $graph );
168 # Go through the nodes and set any non-hypothetical node to extant.
169 foreach my $v ( $self->graph->vertices ) {
170 $self->graph->set_vertex_attribute( $v, 'class', 'extant' )
171 unless $self->graph->has_vertex_attribute( $v, 'class' );
7a7c249c 172 }
8d9a1cd8 173}
174
027d819c 175=head1 METHODS
176
177=head2 as_dot( \%options )
178
179Returns a normal dot representation of the stemma layout, suitable for rendering
180with GraphViz. Options include:
181
182=over
183
184=item * graph - A hashref of global graph options.
185
186=item * node - A hashref of global node options.
187
188=item * edge - A hashref of global edge options.
189
190=back
191
192See the GraphViz documentation for the list of available options.
193
194=cut
195
8d9a1cd8 196sub as_dot {
e367f5c0 197 my( $self, $opts ) = @_;
7a7c249c 198
335a62ef 199 ## See if we are including any a.c. witnesses in this graph.
200 my $graph = $self->graph;
201 if( exists $opts->{'layerwits'} ) {
5c44c598 202 my $extant = {};
203 map { $extant->{$_} = 1 } $self->witnesses;
204 $graph = $self->situation_graph( $extant, $opts->{'layerwits'} );
335a62ef 205 }
206
7a7c249c 207 # Get default and specified options
e02340f3 208 my %graphopts = (
209 # 'ratio' => 1,
210 );
7a7c249c 211 my %nodeopts = (
212 'fontsize' => 11,
7a7c249c 213 'style' => 'filled',
214 'fillcolor' => 'white',
e02340f3 215 'color' => 'white',
7a7c249c 216 'shape' => 'ellipse', # Shape for the extant nodes
217 );
218 my %edgeopts = (
e02340f3 219 'arrowhead' => 'none',
7a7c249c 220 );
221 @graphopts{ keys %{$opts->{'graph'}} } = values %{$opts->{'graph'}}
222 if $opts->{'graph'};
223 @nodeopts{ keys %{$opts->{'node'}} } = values %{$opts->{'node'}}
224 if $opts->{'node'};
225 @edgeopts{ keys %{$opts->{'edge'}} } = values %{$opts->{'edge'}}
226 if $opts->{'edge'};
335a62ef 227
7a7c249c 228 my @dotlines;
229 push( @dotlines, 'digraph stemma {' );
230 ## Print out the global attributes
231 push( @dotlines, _make_dotline( 'graph', %graphopts ) ) if keys %graphopts;
232 push( @dotlines, _make_dotline( 'edge', %edgeopts ) ) if keys %edgeopts;
7a7c249c 233 push( @dotlines, _make_dotline( 'node', %nodeopts ) ) if keys %nodeopts;
234
235 # Add each of the nodes.
335a62ef 236 foreach my $n ( $graph->vertices ) {
237 if( $graph->has_vertex_attribute( $n, 'label' ) ) {
238 my $ltext = $graph->get_vertex_attribute( $n, 'label' );
e02340f3 239 push( @dotlines, _make_dotline( $n, 'label' => $ltext ) );
e79c23c7 240 } else {
7a7c249c 241 # Use the default display settings.
986bbd1b 242 $n = _dotquote( $n );
7a7c249c 243 push( @dotlines, " $n;" );
e79c23c7 244 }
245 }
7a7c249c 246 # Add each of our edges.
335a62ef 247 foreach my $e ( $graph->edges ) {
986bbd1b 248 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
7a7c249c 249 push( @dotlines, " $from -> $to;" );
250 }
251 push( @dotlines, '}' );
e79c23c7 252
7a7c249c 253 return join( "\n", @dotlines );
254}
255
0bded693 256=head2 alter_graph( $dotstring )
257
258Alters the graph of this stemma according to the definition specified
259in $dotstring.
260
261=cut
262
263sub alter_graph {
264 my( $self, $dotstring ) = @_;
265 my $dotfh;
266 open $dotfh, '<', \$dotstring;
f90b2bde 267 binmode $dotfh, ':utf8';
0bded693 268 $self->_graph_from_dot( $dotfh );
269}
270
335a62ef 271=head2 editable( $opts )
027d819c 272
5c44c598 273=head2 editable_graph( $graph, $opts )
274
88a6bac5 275Returns a version of the graph rendered in our definition format. The
335a62ef 276output separates statements with a newline; set $opts->{'linesep'} to the
277empty string or to a space if the result is to be sent via JSON.
278
5c44c598 279If a situational version of the stemma is required, the arguments for
280situation_graph should be passed via $opts->{'extant'} and $opts->{'layerwits'}.
027d819c 281
282=cut
7a7c249c 283
7a7c249c 284sub editable {
5c44c598 285 my( $self, $opts ) = @_;
335a62ef 286 my $graph = $self->graph;
5c44c598 287 ## See if we need an editable version of a situational graph.
288 if( exists $opts->{'layerwits'} || exists $opts->{'extant'} ) {
289 my $extant = delete $opts->{'extant'} || {};
290 my $layerwits = delete $opts->{'layerwits'} || [];
291 $graph = $self->situation_graph( $extant, $layerwits );
335a62ef 292 }
5c44c598 293 return editable_graph( $graph, $opts );
294}
295
296sub editable_graph {
297 my( $graph, $opts ) = @_;
335a62ef 298
299 # Create the graph
300 my $join = ( $opts && exists $opts->{'linesep'} ) ? $opts->{'linesep'} : "\n";
7a7c249c 301 my @dotlines;
302 push( @dotlines, 'digraph stemma {' );
303 my @real; # A cheap sort
5c44c598 304 foreach my $n ( sort $graph->vertices ) {
305 my $c = $graph->get_vertex_attribute( $n, 'class' );
7a7c249c 306 $c = 'extant' unless $c;
307 if( $c eq 'extant' ) {
308 push( @real, $n );
309 } else {
310 push( @dotlines, _make_dotline( $n, 'class' => $c ) );
311 }
e367f5c0 312 }
7a7c249c 313 # Now do the real ones
314 foreach my $n ( @real ) {
315 push( @dotlines, _make_dotline( $n, 'class' => 'extant' ) );
316 }
5c44c598 317 foreach my $e ( sort _by_vertex $graph->edges ) {
986bbd1b 318 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
7a7c249c 319 push( @dotlines, " $from -> $to;" );
320 }
321 push( @dotlines, '}' );
88a6bac5 322 return join( $join, @dotlines );
7a7c249c 323}
324
325sub _make_dotline {
326 my( $obj, %attr ) = @_;
327 my @pairs;
328 foreach my $k ( keys %attr ) {
986bbd1b 329 my $v = _dotquote( $attr{$k} );
330 push( @pairs, "$k=$v" );
7a7c249c 331 }
986bbd1b 332 return sprintf( " %s [ %s ];", _dotquote( $obj ), join( ', ', @pairs ) );
8d9a1cd8 333}
334
986bbd1b 335sub _dotquote {
336 my( $str ) = @_;
337 return $str if $str =~ /^[A-Za-z0-9]+$/;
338 $str =~ s/\"/\\\"/g;
339 $str = '"' . $str . '"';
340 return $str;
341}
342
7a7c249c 343sub _by_vertex {
344 return $a->[0].$a->[1] cmp $b->[0].$b->[1];
345}
8d9a1cd8 346
5c44c598 347=head2 situation_graph( $extant, $layered )
335a62ef 348
5c44c598 349Returns a graph which is the original stemma with all witnesses not in the
350%$extant hash marked as hypothetical, and witness layers added to the graph
351according to the list in @$layered. A layered (a.c.) witness is added as a
352parent of its main version, and additionally shares all other parents and
353children with that version.
335a62ef 354
355=cut
356
5c44c598 357sub situation_graph {
ace5fce5 358 my( $self, $extant, $layerwits, $layerlabel ) = @_;
5c44c598 359
360 my $graph = $self->graph->copy;
361 foreach my $vertex ( $graph->vertices ) {
362 # Set as extant any vertex that is extant in the stemma AND
363 # exists in the $extant hash.
364 my $class = 'hypothetical';
365 $class = 'extant' if exists $extant->{$vertex} && $extant->{$vertex} &&
366 $self->graph->get_vertex_attribute( $vertex, 'class' ) ne 'hypothetical';
367 $graph->set_vertex_attribute( $vertex, 'class', $class );
368 }
369
335a62ef 370 # For each 'layered' witness in the layerwits array, add it to the graph
371 # as an ancestor of the 'main' witness, and otherwise with the same parent/
372 # child links as its main analogue.
373 # TOOD Handle case where B is copied from A but corrected from C
ace5fce5 374 $layerlabel = ' (a.c.)' unless $layerlabel;
335a62ef 375 foreach my $lw ( @$layerwits ) {
376 # Add the layered witness and set it with the same attributes as
377 # its 'main' analogue
5c44c598 378 throw( "Cannot add a layer to a hypothetical witness $lw" )
379 unless $graph->get_vertex_attribute( $lw, 'class' ) eq 'extant';
ace5fce5 380 my $lwac = $lw . $layerlabel;
335a62ef 381 $graph->add_vertex( $lwac );
382 $graph->set_vertex_attributes( $lwac,
383 $graph->get_vertex_attributes( $lw ) );
384
385 # Set it as ancestor to the main witness
386 $graph->add_edge( $lwac, $lw );
387
388 # Give it the same ancestors and descendants as the main witness has,
389 # bearing in mind that those ancestors and descendants might also just
390 # have had a layered witness defined.
391 foreach my $v ( $graph->predecessors( $lw ) ) {
392 next if $v eq $lwac; # Don't add a loop
393 $graph->add_edge( $v, $lwac );
ace5fce5 394 $graph->add_edge( $v.$layerlabel, $lwac )
395 if $graph->has_vertex( $v.$layerlabel );
335a62ef 396 }
397 foreach my $v ( $graph->successors( $lw ) ) {
398 next if $v eq $lwac; # but this shouldn't occur
399 $graph->add_edge( $lwac, $v );
ace5fce5 400 $graph->add_edge( $lwac, $v.$layerlabel )
401 if $graph->has_vertex( $v.$layerlabel );
335a62ef 402 }
403 }
404 return $graph;
405}
406
027d819c 407=head2 as_svg
408
409Returns an SVG representation of the graph, calling as_dot first.
410
411=cut
412
8d9a1cd8 413sub as_svg {
414 my( $self, $opts ) = @_;
415 my $dot = $self->as_dot( $opts );
e79c23c7 416 my @cmd = qw/dot -Tsvg/;
3bf5d6f1 417 my $svg;
e79c23c7 418 my $dotfile = File::Temp->new();
419 ## TODO REMOVE
420 # $dotfile->unlink_on_destroy(0);
421 binmode $dotfile, ':utf8';
8d9a1cd8 422 print $dotfile $dot;
459c39b3 423 close $dotfile;
e79c23c7 424 push( @cmd, $dotfile->filename );
425 run( \@cmd, ">", binary(), \$svg );
3bf5d6f1 426 # HACK: Parse the SVG and change the dimensions.
5a7e26a9 427 # Get rid of width and height attributes to allow scaling.
c57be097 428 if( $opts->{'size'} ) {
428bcf0b 429 require XML::LibXML;
459c39b3 430 my $parser = XML::LibXML->new( load_ext_dtd => 0 );
431 my $svgdoc;
432 eval {
433 $svgdoc = $parser->parse_string( decode_utf8( $svg ) );
434 };
435 throw( "Could not reparse SVG: $@" ) if $@;
c57be097 436 my( $ew, $eh ) = @{$opts->{'size'}};
437 # If the graph is wider than it is tall, set width to ew and remove height.
438 # Otherwise set height to eh and remove width.
439 my $width = $svgdoc->documentElement->getAttribute('width');
440 my $height = $svgdoc->documentElement->getAttribute('height');
441 $width =~ s/\D+//g;
442 $height =~ s/\D+//g;
14e6a110 443 my( $remove, $keep, $val, $viewbox );
c57be097 444 if( $width > $height ) {
445 $remove = 'height';
446 $keep = 'width';
447 $val = $ew . 'px';
14e6a110 448 my $vbheight = $width / $ew * $height;
449 $viewbox = "0.00 0.00 $width.00" . sprintf( "%.2f", $vbheight );
c57be097 450 } else {
451 $remove = 'width';
452 $keep = 'height';
453 $val = $eh . 'px';
14e6a110 454 my $vbwidth = $height / $eh * $width;
455 $viewbox = "0.00 0.00 " . sprintf( "%.2f", $vbwidth ) . " $height.00";
c57be097 456 }
457 $svgdoc->documentElement->removeAttribute( $remove );
458 $svgdoc->documentElement->setAttribute( $keep, $val );
14e6a110 459 $svgdoc->documentElement->removeAttribute( 'viewBox' );
460 $svgdoc->documentElement->setAttribute( 'viewBox', $viewbox );
428bcf0b 461 $svg = $svgdoc->toString();
c57be097 462 }
3bf5d6f1 463 # Return the result
428bcf0b 464 return decode_utf8( $svg );
e79c23c7 465}
466
027d819c 467=head2 witnesses
468
469Returns a list of the extant witnesses represented in the stemma.
470
471=cut
472
08e0fb85 473sub witnesses {
474 my $self = shift;
475 my @wits = grep { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'extant' }
476 $self->graph->vertices;
477 return @wits;
478}
479
06e7cbc7 480=head2 hypotheticals
481
482Returns a list of the hypothetical witnesses represented in the stemma.
483
484=cut
485
bebec0e9 486sub hypotheticals {
487 my $self = shift;
488 my @wits = grep
489 { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'hypothetical' }
490 $self->graph->vertices;
491 return @wits;
492}
493
63778331 494sub throw {
495 Text::Tradition::Error->throw(
496 'ident' => 'Stemma error',
497 'message' => $_[0],
498 );
499}
500
501
9463b0bf 502no Moose;
503__PACKAGE__->meta->make_immutable;
504
5051;
027d819c 506
507=head1 LICENSE
508
509This package is free software and is provided "as is" without express
510or implied warranty. You can redistribute it and/or modify it under
511the same terms as Perl itself.
512
513=head1 AUTHOR
514
515Tara L Andrews E<lt>aurum@cpan.orgE<gt>