correct URLs for AJAX / img requests in stexaminer
[scpubgit/stemmatology.git] / lib / Text / Tradition / Stemma.pm
CommitLineData
9463b0bf 1package Text::Tradition::Stemma;
2
40f19742 3use Bio::Phylo::IO;
e79c23c7 4use Encode qw( decode_utf8 );
9463b0bf 5use File::Temp;
e05997e2 6use Graph;
7use Graph::Reader::Dot;
e79c23c7 8use IPC::Run qw/ run binary /;
63778331 9use Text::Tradition::Error;
b02332ca 10use Text::Tradition::StemmaUtil qw/ character_input phylip_pars parse_newick /;
40f19742 11use Moose;
9463b0bf 12
027d819c 13=head1 NAME
14
15Text::Tradition::Stemma - a representation of a I<stemma codicum> for a Text::Tradition
16
17=head1 SYNOPSIS
18
19 use Text::Tradition;
20 my $t = Text::Tradition->new(
21 'name' => 'this is a text',
22 'input' => 'TEI',
23 'file' => '/path/to/tei_parallel_seg_file.xml' );
24
25 my $s = $tradition->add_stemma( dotfile => '/path/to/stemma.dot' );
26
27=head1 DESCRIPTION
28
29Text::Tradition is a library for representation and analysis of collated
335a62ef 30texts, particularly medieval ones. The Stemma is a representation of the
31copying relationships between the witnesses in a Tradition, modelled with
32a connected rooted directed acyclic graph (CRDAG).
027d819c 33
34=head1 DOT SYNTAX
35
335a62ef 36The easiest way to define a stemma is to use a special form of the 'dot'
37syntax of GraphViz.
027d819c 38
39Each stemma opens with the line
40
41 digraph Stemma {
42
43and continues with a list of all manuscript witnesses in the stemma, whether
44extant witnesses or missing archetypes or hyparchetypes. Each of these is
45listed by its sigil on its own line, e.g.:
46
47 alpha [ class=hypothetical ]
48 1 [ class=hypothetical,label=* ]
49 Ms4 [ class=extant ]
50
51Extant witnesses are listed with class=extant; missing or postulated witnesses
52are listed with class=hypothetical. Anonymous hyparchetypes must be given a
53unique name or number, but can be represented as anonymous with the addition
54of 'label=*' to their lines. Greek letters or other special characters may be
55used as names, but they must always be wrapped in double quotes.
56
57Links between manuscripts are then listed with arrow notation, as below. These
58lines show the direction of copying, one step at a time, for the entire stemma.
59
60 alpha -> 1
61 1 -> Ms4
62
63The final line in the definition should be the closing brace:
64
65 }
66
67Thus for a set of extant manuscripts A, B, and C, where A and B were copied
68from the archetype O and C was copied from B, the definition would be:
69
70 digraph Stemma {
71 O [ class=hypothetical]
72 A [ class=extant ]
73 B [ class=extant ]
74 C [ class=extant ]
75 O -> A
76 O -> B
77 B -> C
78 }
79
80=head1 CONSTRUCTOR
81
82=head2 new
83
84The constructor. This should generally be called from Text::Tradition, but
85if called directly it takes the following options:
86
87=over
88
89=item * collation - The collation with which the stemma is associated.
90
91=item * dot - A filehandle open to a DOT representation of the stemma graph.
92
93=back
94
64a36834 95=begin testing
96
97use Text::Tradition::Collation;
98use TryCatch;
99
100use_ok( 'Text::Tradition::Stemma' );
101
102# Placeholder collation to use in tests
103my $c = Text::Tradition::Collation->new();
104
105# Try to create a bad graph
106my $baddotfh;
107open( $baddotfh, 't/data/besoin_bad.dot' ) or die "Could not open test dotfile";
108try {
109 my $stemma = Text::Tradition::Stemma->new( collation => $c, dot => $baddotfh );
110 ok( 0, "Created broken stemma from dotfile with syntax error" );
111} catch( Text::Tradition::Error $e ) {
112 like( $e->message, qr/^Error trying to parse/, "Syntax error in dot threw exception" );
113}
114
115# Create a good graph
116my $dotfh;
117open( $dotfh, 't/data/florilegium.dot' ) or die "Could not open test dotfile";
118binmode( $dotfh, ':utf8' );
119my $stemma = Text::Tradition::Stemma->new( collation => $c, dot => $dotfh );
120is( ref( $stemma ), 'Text::Tradition::Stemma', "Created stemma from good dotfile" );
121is( scalar $stemma->witnesses, 13, "Found correct number of extant witnesses" );
122is( scalar $stemma->hypotheticals, 8, "Found correct number of extant hypotheticals" );
123my $found_unicode_sigil;
124foreach my $h ( $stemma->hypotheticals ) {
125 $found_unicode_sigil = 1 if $h eq "\x{3b1}";
126}
127ok( $found_unicode_sigil, "Found a correctly encoded Unicode sigil" );
128
129=end testing
130
027d819c 131=cut
132
9463b0bf 133has collation => (
134 is => 'ro',
135 isa => 'Text::Tradition::Collation',
136 required => 1,
8d9a1cd8 137 weak_ref => 1,
9463b0bf 138 );
139
e05997e2 140has graph => (
141 is => 'rw',
142 isa => 'Graph',
143 predicate => 'has_graph',
144 );
c57be097 145
e05997e2 146sub BUILD {
147 my( $self, $args ) = @_;
148 # If we have been handed a dotfile, initialize it into a graph.
149 if( exists $args->{'dot'} ) {
027d819c 150 $self->_graph_from_dot( $args->{'dot'} );
e05997e2 151 }
c0ccdb62 152}
153
027d819c 154sub _graph_from_dot {
8d9a1cd8 155 my( $self, $dotfh ) = @_;
8d9a1cd8 156 my $reader = Graph::Reader::Dot->new();
64a36834 157 # Redirect STDOUT in order to trap any error messages - syntax errors
158 # are evidently not fatal.
159 my $reader_out;
160 my $saved_stderr;
161 open $saved_stderr, ">&STDOUT";
162 close STDOUT;
163 open STDOUT, ">", \$reader_out;
8d9a1cd8 164 my $graph = $reader->read_graph( $dotfh );
64a36834 165 close STDOUT;
166 open STDOUT, ">", \$saved_stderr;
167 if( $reader_out && $reader_out =~ /error/s ) {
168 throw( "Error trying to parse dot: $reader_out" );
169 } elsif( !$graph ) {
170 throw( "Failed to create graph from dot" );
171 }
172 $self->graph( $graph );
173 # Go through the nodes and set any non-hypothetical node to extant.
174 foreach my $v ( $self->graph->vertices ) {
175 $self->graph->set_vertex_attribute( $v, 'class', 'extant' )
176 unless $self->graph->has_vertex_attribute( $v, 'class' );
7a7c249c 177 }
8d9a1cd8 178}
179
027d819c 180=head1 METHODS
181
182=head2 as_dot( \%options )
183
184Returns a normal dot representation of the stemma layout, suitable for rendering
185with GraphViz. Options include:
186
187=over
188
189=item * graph - A hashref of global graph options.
190
191=item * node - A hashref of global node options.
192
193=item * edge - A hashref of global edge options.
194
195=back
196
197See the GraphViz documentation for the list of available options.
198
199=cut
200
8d9a1cd8 201sub as_dot {
e367f5c0 202 my( $self, $opts ) = @_;
7a7c249c 203
335a62ef 204 ## See if we are including any a.c. witnesses in this graph.
205 my $graph = $self->graph;
206 if( exists $opts->{'layerwits'} ) {
5c44c598 207 my $extant = {};
208 map { $extant->{$_} = 1 } $self->witnesses;
209 $graph = $self->situation_graph( $extant, $opts->{'layerwits'} );
335a62ef 210 }
211
7a7c249c 212 # Get default and specified options
e02340f3 213 my %graphopts = (
214 # 'ratio' => 1,
215 );
7a7c249c 216 my %nodeopts = (
217 'fontsize' => 11,
7a7c249c 218 'style' => 'filled',
219 'fillcolor' => 'white',
e02340f3 220 'color' => 'white',
7a7c249c 221 'shape' => 'ellipse', # Shape for the extant nodes
222 );
223 my %edgeopts = (
e02340f3 224 'arrowhead' => 'none',
7a7c249c 225 );
226 @graphopts{ keys %{$opts->{'graph'}} } = values %{$opts->{'graph'}}
227 if $opts->{'graph'};
228 @nodeopts{ keys %{$opts->{'node'}} } = values %{$opts->{'node'}}
229 if $opts->{'node'};
230 @edgeopts{ keys %{$opts->{'edge'}} } = values %{$opts->{'edge'}}
231 if $opts->{'edge'};
335a62ef 232
7a7c249c 233 my @dotlines;
234 push( @dotlines, 'digraph stemma {' );
235 ## Print out the global attributes
236 push( @dotlines, _make_dotline( 'graph', %graphopts ) ) if keys %graphopts;
237 push( @dotlines, _make_dotline( 'edge', %edgeopts ) ) if keys %edgeopts;
7a7c249c 238 push( @dotlines, _make_dotline( 'node', %nodeopts ) ) if keys %nodeopts;
239
240 # Add each of the nodes.
335a62ef 241 foreach my $n ( $graph->vertices ) {
242 if( $graph->has_vertex_attribute( $n, 'label' ) ) {
243 my $ltext = $graph->get_vertex_attribute( $n, 'label' );
e02340f3 244 push( @dotlines, _make_dotline( $n, 'label' => $ltext ) );
e79c23c7 245 } else {
7a7c249c 246 # Use the default display settings.
986bbd1b 247 $n = _dotquote( $n );
7a7c249c 248 push( @dotlines, " $n;" );
e79c23c7 249 }
250 }
7a7c249c 251 # Add each of our edges.
335a62ef 252 foreach my $e ( $graph->edges ) {
986bbd1b 253 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
7a7c249c 254 push( @dotlines, " $from -> $to;" );
255 }
256 push( @dotlines, '}' );
e79c23c7 257
7a7c249c 258 return join( "\n", @dotlines );
259}
260
0bded693 261=head2 alter_graph( $dotstring )
262
263Alters the graph of this stemma according to the definition specified
264in $dotstring.
265
266=cut
267
268sub alter_graph {
269 my( $self, $dotstring ) = @_;
270 my $dotfh;
271 open $dotfh, '<', \$dotstring;
f90b2bde 272 binmode $dotfh, ':utf8';
0bded693 273 $self->_graph_from_dot( $dotfh );
274}
275
335a62ef 276=head2 editable( $opts )
027d819c 277
5c44c598 278=head2 editable_graph( $graph, $opts )
279
88a6bac5 280Returns a version of the graph rendered in our definition format. The
335a62ef 281output separates statements with a newline; set $opts->{'linesep'} to the
282empty string or to a space if the result is to be sent via JSON.
283
5c44c598 284If a situational version of the stemma is required, the arguments for
285situation_graph should be passed via $opts->{'extant'} and $opts->{'layerwits'}.
027d819c 286
287=cut
7a7c249c 288
7a7c249c 289sub editable {
5c44c598 290 my( $self, $opts ) = @_;
335a62ef 291 my $graph = $self->graph;
5c44c598 292 ## See if we need an editable version of a situational graph.
293 if( exists $opts->{'layerwits'} || exists $opts->{'extant'} ) {
294 my $extant = delete $opts->{'extant'} || {};
295 my $layerwits = delete $opts->{'layerwits'} || [];
296 $graph = $self->situation_graph( $extant, $layerwits );
335a62ef 297 }
5c44c598 298 return editable_graph( $graph, $opts );
299}
300
301sub editable_graph {
302 my( $graph, $opts ) = @_;
335a62ef 303
304 # Create the graph
305 my $join = ( $opts && exists $opts->{'linesep'} ) ? $opts->{'linesep'} : "\n";
7a7c249c 306 my @dotlines;
307 push( @dotlines, 'digraph stemma {' );
308 my @real; # A cheap sort
5c44c598 309 foreach my $n ( sort $graph->vertices ) {
310 my $c = $graph->get_vertex_attribute( $n, 'class' );
7a7c249c 311 $c = 'extant' unless $c;
312 if( $c eq 'extant' ) {
313 push( @real, $n );
314 } else {
315 push( @dotlines, _make_dotline( $n, 'class' => $c ) );
316 }
e367f5c0 317 }
7a7c249c 318 # Now do the real ones
319 foreach my $n ( @real ) {
320 push( @dotlines, _make_dotline( $n, 'class' => 'extant' ) );
321 }
5c44c598 322 foreach my $e ( sort _by_vertex $graph->edges ) {
986bbd1b 323 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
7a7c249c 324 push( @dotlines, " $from -> $to;" );
325 }
326 push( @dotlines, '}' );
88a6bac5 327 return join( $join, @dotlines );
7a7c249c 328}
329
330sub _make_dotline {
331 my( $obj, %attr ) = @_;
332 my @pairs;
333 foreach my $k ( keys %attr ) {
986bbd1b 334 my $v = _dotquote( $attr{$k} );
335 push( @pairs, "$k=$v" );
7a7c249c 336 }
986bbd1b 337 return sprintf( " %s [ %s ];", _dotquote( $obj ), join( ', ', @pairs ) );
8d9a1cd8 338}
339
986bbd1b 340sub _dotquote {
341 my( $str ) = @_;
342 return $str if $str =~ /^[A-Za-z0-9]+$/;
343 $str =~ s/\"/\\\"/g;
344 $str = '"' . $str . '"';
345 return $str;
346}
347
7a7c249c 348sub _by_vertex {
349 return $a->[0].$a->[1] cmp $b->[0].$b->[1];
350}
8d9a1cd8 351
5c44c598 352=head2 situation_graph( $extant, $layered )
335a62ef 353
5c44c598 354Returns a graph which is the original stemma with all witnesses not in the
355%$extant hash marked as hypothetical, and witness layers added to the graph
356according to the list in @$layered. A layered (a.c.) witness is added as a
357parent of its main version, and additionally shares all other parents and
358children with that version.
335a62ef 359
360=cut
361
5c44c598 362sub situation_graph {
363 my( $self, $extant, $layerwits ) = @_;
364
365 my $graph = $self->graph->copy;
366 foreach my $vertex ( $graph->vertices ) {
367 # Set as extant any vertex that is extant in the stemma AND
368 # exists in the $extant hash.
369 my $class = 'hypothetical';
370 $class = 'extant' if exists $extant->{$vertex} && $extant->{$vertex} &&
371 $self->graph->get_vertex_attribute( $vertex, 'class' ) ne 'hypothetical';
372 $graph->set_vertex_attribute( $vertex, 'class', $class );
373 }
374
335a62ef 375 # For each 'layered' witness in the layerwits array, add it to the graph
376 # as an ancestor of the 'main' witness, and otherwise with the same parent/
377 # child links as its main analogue.
378 # TOOD Handle case where B is copied from A but corrected from C
5c44c598 379 my $aclabel = $self->collation->ac_label;
335a62ef 380 foreach my $lw ( @$layerwits ) {
381 # Add the layered witness and set it with the same attributes as
382 # its 'main' analogue
5c44c598 383 throw( "Cannot add a layer to a hypothetical witness $lw" )
384 unless $graph->get_vertex_attribute( $lw, 'class' ) eq 'extant';
385 my $lwac = $lw . $aclabel;
335a62ef 386 $graph->add_vertex( $lwac );
387 $graph->set_vertex_attributes( $lwac,
388 $graph->get_vertex_attributes( $lw ) );
389
390 # Set it as ancestor to the main witness
391 $graph->add_edge( $lwac, $lw );
392
393 # Give it the same ancestors and descendants as the main witness has,
394 # bearing in mind that those ancestors and descendants might also just
395 # have had a layered witness defined.
396 foreach my $v ( $graph->predecessors( $lw ) ) {
397 next if $v eq $lwac; # Don't add a loop
398 $graph->add_edge( $v, $lwac );
5c44c598 399 $graph->add_edge( $v.$aclabel, $lwac )
400 if $graph->has_vertex( $v.$aclabel );
335a62ef 401 }
402 foreach my $v ( $graph->successors( $lw ) ) {
403 next if $v eq $lwac; # but this shouldn't occur
404 $graph->add_edge( $lwac, $v );
5c44c598 405 $graph->add_edge( $lwac, $v.$aclabel )
406 if $graph->has_vertex( $v.$aclabel );
335a62ef 407 }
408 }
409 return $graph;
410}
411
027d819c 412=head2 as_svg
413
414Returns an SVG representation of the graph, calling as_dot first.
415
416=cut
417
8d9a1cd8 418sub as_svg {
419 my( $self, $opts ) = @_;
420 my $dot = $self->as_dot( $opts );
e79c23c7 421 my @cmd = qw/dot -Tsvg/;
3bf5d6f1 422 my $svg;
e79c23c7 423 my $dotfile = File::Temp->new();
424 ## TODO REMOVE
425 # $dotfile->unlink_on_destroy(0);
426 binmode $dotfile, ':utf8';
8d9a1cd8 427 print $dotfile $dot;
459c39b3 428 close $dotfile;
e79c23c7 429 push( @cmd, $dotfile->filename );
430 run( \@cmd, ">", binary(), \$svg );
3bf5d6f1 431 # HACK: Parse the SVG and change the dimensions.
5a7e26a9 432 # Get rid of width and height attributes to allow scaling.
c57be097 433 if( $opts->{'size'} ) {
428bcf0b 434 require XML::LibXML;
459c39b3 435 my $parser = XML::LibXML->new( load_ext_dtd => 0 );
436 my $svgdoc;
437 eval {
438 $svgdoc = $parser->parse_string( decode_utf8( $svg ) );
439 };
440 throw( "Could not reparse SVG: $@" ) if $@;
c57be097 441 my( $ew, $eh ) = @{$opts->{'size'}};
442 # If the graph is wider than it is tall, set width to ew and remove height.
443 # Otherwise set height to eh and remove width.
444 my $width = $svgdoc->documentElement->getAttribute('width');
445 my $height = $svgdoc->documentElement->getAttribute('height');
446 $width =~ s/\D+//g;
447 $height =~ s/\D+//g;
14e6a110 448 my( $remove, $keep, $val, $viewbox );
c57be097 449 if( $width > $height ) {
450 $remove = 'height';
451 $keep = 'width';
452 $val = $ew . 'px';
14e6a110 453 my $vbheight = $width / $ew * $height;
454 $viewbox = "0.00 0.00 $width.00" . sprintf( "%.2f", $vbheight );
c57be097 455 } else {
456 $remove = 'width';
457 $keep = 'height';
458 $val = $eh . 'px';
14e6a110 459 my $vbwidth = $height / $eh * $width;
460 $viewbox = "0.00 0.00 " . sprintf( "%.2f", $vbwidth ) . " $height.00";
c57be097 461 }
462 $svgdoc->documentElement->removeAttribute( $remove );
463 $svgdoc->documentElement->setAttribute( $keep, $val );
14e6a110 464 $svgdoc->documentElement->removeAttribute( 'viewBox' );
465 $svgdoc->documentElement->setAttribute( 'viewBox', $viewbox );
428bcf0b 466 $svg = $svgdoc->toString();
c57be097 467 }
3bf5d6f1 468 # Return the result
428bcf0b 469 return decode_utf8( $svg );
e79c23c7 470}
471
027d819c 472=head2 witnesses
473
474Returns a list of the extant witnesses represented in the stemma.
475
476=cut
477
08e0fb85 478sub witnesses {
479 my $self = shift;
480 my @wits = grep { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'extant' }
481 $self->graph->vertices;
482 return @wits;
483}
484
06e7cbc7 485=head2 hypotheticals
486
487Returns a list of the hypothetical witnesses represented in the stemma.
488
489=cut
490
bebec0e9 491sub hypotheticals {
492 my $self = shift;
493 my @wits = grep
494 { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'hypothetical' }
495 $self->graph->vertices;
496 return @wits;
497}
498
63778331 499sub throw {
500 Text::Tradition::Error->throw(
501 'ident' => 'Stemma error',
502 'message' => $_[0],
503 );
504}
505
506
9463b0bf 507no Moose;
508__PACKAGE__->meta->make_immutable;
509
5101;
027d819c 511
512=head1 LICENSE
513
514This package is free software and is provided "as is" without express
515or implied warranty. You can redistribute it and/or modify it under
516the same terms as Perl itself.
517
518=head1 AUTHOR
519
520Tara L Andrews E<lt>aurum@cpan.orgE<gt>