ensure graph names come through for undirected graphs too (tla/stemmaweb#28)
[scpubgit/stemmatology.git] / analysis / lib / Text / Tradition / Stemma.pm
CommitLineData
9463b0bf 1package Text::Tradition::Stemma;
2
e79c23c7 3use Encode qw( decode_utf8 );
9463b0bf 4use File::Temp;
e05997e2 5use Graph;
6use Graph::Reader::Dot;
e79c23c7 7use IPC::Run qw/ run binary /;
63778331 8use Text::Tradition::Error;
40f19742 9use Moose;
9463b0bf 10
027d819c 11=head1 NAME
12
13Text::Tradition::Stemma - a representation of a I<stemma codicum> for a Text::Tradition
14
15=head1 SYNOPSIS
16
17 use Text::Tradition;
18 my $t = Text::Tradition->new(
19 'name' => 'this is a text',
20 'input' => 'TEI',
21 'file' => '/path/to/tei_parallel_seg_file.xml' );
22
23 my $s = $tradition->add_stemma( dotfile => '/path/to/stemma.dot' );
24
25=head1 DESCRIPTION
26
27Text::Tradition is a library for representation and analysis of collated
335a62ef 28texts, particularly medieval ones. The Stemma is a representation of the
29copying relationships between the witnesses in a Tradition, modelled with
30a connected rooted directed acyclic graph (CRDAG).
027d819c 31
32=head1 DOT SYNTAX
33
335a62ef 34The easiest way to define a stemma is to use a special form of the 'dot'
35syntax of GraphViz.
027d819c 36
37Each stemma opens with the line
38
f96630e3 39 digraph "Name of Stemma" {
027d819c 40
41and continues with a list of all manuscript witnesses in the stemma, whether
42extant witnesses or missing archetypes or hyparchetypes. Each of these is
43listed by its sigil on its own line, e.g.:
44
45 alpha [ class=hypothetical ]
46 1 [ class=hypothetical,label=* ]
47 Ms4 [ class=extant ]
48
49Extant witnesses are listed with class=extant; missing or postulated witnesses
50are listed with class=hypothetical. Anonymous hyparchetypes must be given a
51unique name or number, but can be represented as anonymous with the addition
52of 'label=*' to their lines. Greek letters or other special characters may be
53used as names, but they must always be wrapped in double quotes.
54
55Links between manuscripts are then listed with arrow notation, as below. These
56lines show the direction of copying, one step at a time, for the entire stemma.
57
58 alpha -> 1
59 1 -> Ms4
60
61The final line in the definition should be the closing brace:
62
63 }
64
65Thus for a set of extant manuscripts A, B, and C, where A and B were copied
66from the archetype O and C was copied from B, the definition would be:
67
f96630e3 68 digraph "Test stemma 1" {
027d819c 69 O [ class=hypothetical]
70 A [ class=extant ]
71 B [ class=extant ]
72 C [ class=extant ]
73 O -> A
74 O -> B
75 B -> C
76 }
77
78=head1 CONSTRUCTOR
79
80=head2 new
81
82The constructor. This should generally be called from Text::Tradition, but
83if called directly it takes the following options:
84
85=over
86
027d819c 87=item * dot - A filehandle open to a DOT representation of the stemma graph.
88
ea45d2a6 89=item * graph - If no DOT specification is given, you can pass a Graph object
90instead. The vertices of the graph should have an attribute 'class' set to
91either of the values 'extant' or 'hypothetical'.
92
93=item * is_undirected - If the graph specification (or graph object) is for an
94undirected graph (e.g. a phylogenetic tree), this should be set.
95
027d819c 96=back
97
64a36834 98=begin testing
99
64a36834 100use TryCatch;
101
102use_ok( 'Text::Tradition::Stemma' );
103
64a36834 104# Try to create a bad graph
02b6340e 105my $baddotfh;
106open( $baddotfh, 't/data/besoin_bad.dot' ) or die "Could not open test dotfile";
107try {
108 my $stemma = Text::Tradition::Stemma->new( dot => $baddotfh );
109 ok( 0, "Created broken stemma from dotfile with syntax error" );
110} catch( Text::Tradition::Error $e ) {
111 like( $e->message, qr/^Error trying to parse/, "Syntax error in dot threw exception" );
64a36834 112}
113
114# Create a good graph
115my $dotfh;
116open( $dotfh, 't/data/florilegium.dot' ) or die "Could not open test dotfile";
117binmode( $dotfh, ':utf8' );
ace5fce5 118my $stemma = Text::Tradition::Stemma->new( dot => $dotfh );
64a36834 119is( ref( $stemma ), 'Text::Tradition::Stemma', "Created stemma from good dotfile" );
120is( scalar $stemma->witnesses, 13, "Found correct number of extant witnesses" );
121is( scalar $stemma->hypotheticals, 8, "Found correct number of extant hypotheticals" );
f96630e3 122ok( $stemma->has_identifier, "Stemma identifier was found in dot" );
123is( $stemma->identifier, 'Coislinianum lineage', "Correct stemma identifier was found in dot" );
64a36834 124my $found_unicode_sigil;
125foreach my $h ( $stemma->hypotheticals ) {
126 $found_unicode_sigil = 1 if $h eq "\x{3b1}";
127}
128ok( $found_unicode_sigil, "Found a correctly encoded Unicode sigil" );
129
cb741417 130# Create an undirected graph
131my $undirdotfh;
132open( $undirdotfh, 't/data/besoin_undirected.dot' ) or die "Could not open test dotfile";
133binmode( $undirdotfh, ':utf8' );
134my $udstemma = Text::Tradition::Stemma->new( dot => $undirdotfh );
135is( ref( $udstemma ), 'Text::Tradition::Stemma', "Created stemma from undirected dotfile" );
136is( scalar $udstemma->witnesses, 13, "Found correct number of extant witnesses" );
137is( scalar $udstemma->hypotheticals, 12, "Found correct number of hypotheticals" );
138ok( $udstemma->is_undirected, "Stemma was recorded as undirected" );
6665a327 139is( $udstemma->identifier, "RHM stemma", "Undirected graph retained its name" );
ea45d2a6 140
64a36834 141=end testing
142
027d819c 143=cut
144
9463b0bf 145has collation => (
146 is => 'ro',
147 isa => 'Text::Tradition::Collation',
ea45d2a6 148 clearer => 'clear_collation', # interim measure to remove refs in DB
8d9a1cd8 149 weak_ref => 1,
9463b0bf 150 );
151
e05997e2 152has graph => (
153 is => 'rw',
154 isa => 'Graph',
155 predicate => 'has_graph',
156 );
ea45d2a6 157
98f22390 158has identifier => (
ea45d2a6 159 is => 'ro',
98f22390 160 isa => 'Str',
161 writer => 'set_identifier',
162 predicate => 'has_identifier',
ea45d2a6 163 );
98f22390 164
e05997e2 165sub BUILD {
166 my( $self, $args ) = @_;
167 # If we have been handed a dotfile, initialize it into a graph.
168 if( exists $args->{'dot'} ) {
027d819c 169 $self->_graph_from_dot( $args->{'dot'} );
98f22390 170 }
c0ccdb62 171}
172
ea45d2a6 173before 'graph' => sub {
174 my $self = shift;
175 if( @_ ) {
176 # Make sure all unclassed graph nodes are marked extant.
177 my $g = $_[0];
178 throw( "Cannot set graph to a non-Graph object" )
cb741417 179 unless $g->isa( 'Graph' );
ea45d2a6 180 foreach my $v ( $g->vertices ) {
181 unless( $g->has_vertex_attribute( $v, 'class' ) ) {
182 $g->set_vertex_attribute( $v, 'class', 'extant' );
183 }
184 }
98f22390 185 }
186};
187
027d819c 188sub _graph_from_dot {
8d9a1cd8 189 my( $self, $dotfh ) = @_;
8d9a1cd8 190 my $reader = Graph::Reader::Dot->new();
64a36834 191 # Redirect STDOUT in order to trap any error messages - syntax errors
192 # are evidently not fatal.
02b6340e 193 my $graph;
194 my $reader_out;
cb741417 195 my $reader_err;
02b6340e 196 {
197 local(*STDOUT);
198 open( STDOUT, ">", \$reader_out );
cb741417 199 local(*STDERR);
200 open( STDERR, ">", \$reader_err );
02b6340e 201 $graph = $reader->read_graph( $dotfh );
202 close STDOUT;
cb741417 203 close STDERR;
02b6340e 204 }
64a36834 205 if( $reader_out && $reader_out =~ /error/s ) {
206 throw( "Error trying to parse dot: $reader_out" );
207 } elsif( !$graph ) {
208 throw( "Failed to create graph from dot" );
209 }
6665a327 210 ## HORRIBLE HACK but there is no API access to graph attributes!
211 my $graph_id = exists $graph->[4]->{'name'} ? $graph->[4]->{'name'} : 'stemma';
cb741417 212 # Correct for implicit graph -> digraph quirk of reader
213 if( $reader_err && $reader_err =~ /graph will be treated as digraph/ ) {
214 my $udgraph = $graph->undirected_copy;
215 foreach my $v ( $graph->vertices ) {
216 $udgraph->set_vertex_attributes( $v, $graph->get_vertex_attributes( $v ) );
217 }
218 $graph = $udgraph;
219 }
64a36834 220 $self->graph( $graph );
6665a327 221 $self->set_identifier( $graph_id );
8d9a1cd8 222}
223
98f22390 224sub is_undirected {
225 my( $self ) = @_;
226 return undef unless $self->has_graph;
227 return $self->graph->is_undirected;
228}
229
027d819c 230=head1 METHODS
231
232=head2 as_dot( \%options )
233
234Returns a normal dot representation of the stemma layout, suitable for rendering
235with GraphViz. Options include:
236
237=over
238
239=item * graph - A hashref of global graph options.
240
241=item * node - A hashref of global node options.
242
243=item * edge - A hashref of global edge options.
244
245=back
246
247See the GraphViz documentation for the list of available options.
248
249=cut
250
8d9a1cd8 251sub as_dot {
e367f5c0 252 my( $self, $opts ) = @_;
7a7c249c 253
335a62ef 254 ## See if we are including any a.c. witnesses in this graph.
255 my $graph = $self->graph;
256 if( exists $opts->{'layerwits'} ) {
5c44c598 257 my $extant = {};
258 map { $extant->{$_} = 1 } $self->witnesses;
259 $graph = $self->situation_graph( $extant, $opts->{'layerwits'} );
335a62ef 260 }
261
7a7c249c 262 # Get default and specified options
e02340f3 263 my %graphopts = (
264 # 'ratio' => 1,
f05e891c 265 'bgcolor' => 'transparent',
e02340f3 266 );
7a7c249c 267 my %nodeopts = (
268 'fontsize' => 11,
7a7c249c 269 'style' => 'filled',
270 'fillcolor' => 'white',
e02340f3 271 'color' => 'white',
7a7c249c 272 'shape' => 'ellipse', # Shape for the extant nodes
273 );
274 my %edgeopts = (
e02340f3 275 'arrowhead' => 'none',
7a7c249c 276 );
277 @graphopts{ keys %{$opts->{'graph'}} } = values %{$opts->{'graph'}}
278 if $opts->{'graph'};
279 @nodeopts{ keys %{$opts->{'node'}} } = values %{$opts->{'node'}}
280 if $opts->{'node'};
281 @edgeopts{ keys %{$opts->{'edge'}} } = values %{$opts->{'edge'}}
282 if $opts->{'edge'};
335a62ef 283
ea45d2a6 284 my $gdecl = $graph->is_directed ? 'digraph' : 'graph';
f96630e3 285 my $gname = $self->has_identifier ? '"' . $self->identifier . '"'
286 : 'stemma';
7a7c249c 287 my @dotlines;
f96630e3 288 push( @dotlines, "$gdecl $gname {" );
7a7c249c 289 ## Print out the global attributes
290 push( @dotlines, _make_dotline( 'graph', %graphopts ) ) if keys %graphopts;
291 push( @dotlines, _make_dotline( 'edge', %edgeopts ) ) if keys %edgeopts;
7a7c249c 292 push( @dotlines, _make_dotline( 'node', %nodeopts ) ) if keys %nodeopts;
293
294 # Add each of the nodes.
335a62ef 295 foreach my $n ( $graph->vertices ) {
c753ea40 296 my %vattr = ( 'id' => $n ); # Set the SVG element ID to the sigil itself
335a62ef 297 if( $graph->has_vertex_attribute( $n, 'label' ) ) {
c753ea40 298 $vattr{'label'} = $graph->get_vertex_attribute( $n, 'label' );
e79c23c7 299 }
c753ea40 300 push( @dotlines, _make_dotline( $n, %vattr ) );
e79c23c7 301 }
7a7c249c 302 # Add each of our edges.
335a62ef 303 foreach my $e ( $graph->edges ) {
986bbd1b 304 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
ea45d2a6 305 my $connector = $graph->is_directed ? '->' : '--';
306 push( @dotlines, " $from $connector $to;" );
7a7c249c 307 }
308 push( @dotlines, '}' );
e79c23c7 309
7a7c249c 310 return join( "\n", @dotlines );
311}
312
0bded693 313=head2 alter_graph( $dotstring )
314
315Alters the graph of this stemma according to the definition specified
316in $dotstring.
317
318=cut
319
320sub alter_graph {
321 my( $self, $dotstring ) = @_;
322 my $dotfh;
323 open $dotfh, '<', \$dotstring;
f90b2bde 324 binmode $dotfh, ':utf8';
0bded693 325 $self->_graph_from_dot( $dotfh );
326}
327
335a62ef 328=head2 editable( $opts )
027d819c 329
5c44c598 330=head2 editable_graph( $graph, $opts )
331
88a6bac5 332Returns a version of the graph rendered in our definition format. The
335a62ef 333output separates statements with a newline; set $opts->{'linesep'} to the
334empty string or to a space if the result is to be sent via JSON.
335
5c44c598 336If a situational version of the stemma is required, the arguments for
337situation_graph should be passed via $opts->{'extant'} and $opts->{'layerwits'}.
027d819c 338
339=cut
7a7c249c 340
7a7c249c 341sub editable {
5c44c598 342 my( $self, $opts ) = @_;
335a62ef 343 my $graph = $self->graph;
f96630e3 344 if( $self->has_identifier ) {
345 $opts->{'name'} = $self->identifier;
346 }
5c44c598 347 ## See if we need an editable version of a situational graph.
348 if( exists $opts->{'layerwits'} || exists $opts->{'extant'} ) {
349 my $extant = delete $opts->{'extant'} || {};
350 my $layerwits = delete $opts->{'layerwits'} || [];
351 $graph = $self->situation_graph( $extant, $layerwits );
335a62ef 352 }
5c44c598 353 return editable_graph( $graph, $opts );
354}
355
356sub editable_graph {
357 my( $graph, $opts ) = @_;
335a62ef 358
359 # Create the graph
360 my $join = ( $opts && exists $opts->{'linesep'} ) ? $opts->{'linesep'} : "\n";
ea45d2a6 361 my $gdecl = $graph->is_undirected ? 'graph' : 'digraph';
f96630e3 362 my $gname = exists $opts->{'name'} ? '"' . $opts->{'name'} . '"'
363 : 'stemma';
7a7c249c 364 my @dotlines;
f96630e3 365 push( @dotlines, "$gdecl $gname {" );
7a7c249c 366 my @real; # A cheap sort
5c44c598 367 foreach my $n ( sort $graph->vertices ) {
368 my $c = $graph->get_vertex_attribute( $n, 'class' );
7a7c249c 369 $c = 'extant' unless $c;
370 if( $c eq 'extant' ) {
371 push( @real, $n );
372 } else {
373 push( @dotlines, _make_dotline( $n, 'class' => $c ) );
374 }
e367f5c0 375 }
7a7c249c 376 # Now do the real ones
377 foreach my $n ( @real ) {
378 push( @dotlines, _make_dotline( $n, 'class' => 'extant' ) );
379 }
5c44c598 380 foreach my $e ( sort _by_vertex $graph->edges ) {
986bbd1b 381 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
ea45d2a6 382 my $conn = $graph->is_undirected ? '--' : '->';
383 push( @dotlines, " $from $conn $to;" );
7a7c249c 384 }
385 push( @dotlines, '}' );
88a6bac5 386 return join( $join, @dotlines );
7a7c249c 387}
388
389sub _make_dotline {
390 my( $obj, %attr ) = @_;
391 my @pairs;
392 foreach my $k ( keys %attr ) {
986bbd1b 393 my $v = _dotquote( $attr{$k} );
394 push( @pairs, "$k=$v" );
7a7c249c 395 }
986bbd1b 396 return sprintf( " %s [ %s ];", _dotquote( $obj ), join( ', ', @pairs ) );
8d9a1cd8 397}
398
986bbd1b 399sub _dotquote {
400 my( $str ) = @_;
401 return $str if $str =~ /^[A-Za-z0-9]+$/;
402 $str =~ s/\"/\\\"/g;
403 $str = '"' . $str . '"';
404 return $str;
405}
406
7a7c249c 407sub _by_vertex {
408 return $a->[0].$a->[1] cmp $b->[0].$b->[1];
409}
8d9a1cd8 410
5c44c598 411=head2 situation_graph( $extant, $layered )
335a62ef 412
ea45d2a6 413Returns a graph which is the original stemma graph with all witnesses not
414in the %$extant hash marked as hypothetical, and witness layers added to
415the graph according to the list in @$layered. A layered (a.c.) witness is
416added as a parent of its main version, and additionally shares all other
417parents and children with that version.
335a62ef 418
419=cut
420
5c44c598 421sub situation_graph {
ace5fce5 422 my( $self, $extant, $layerwits, $layerlabel ) = @_;
5c44c598 423
424 my $graph = $self->graph->copy;
425 foreach my $vertex ( $graph->vertices ) {
426 # Set as extant any vertex that is extant in the stemma AND
427 # exists in the $extant hash.
428 my $class = 'hypothetical';
429 $class = 'extant' if exists $extant->{$vertex} && $extant->{$vertex} &&
430 $self->graph->get_vertex_attribute( $vertex, 'class' ) ne 'hypothetical';
431 $graph->set_vertex_attribute( $vertex, 'class', $class );
432 }
433
335a62ef 434 # For each 'layered' witness in the layerwits array, add it to the graph
435 # as an ancestor of the 'main' witness, and otherwise with the same parent/
436 # child links as its main analogue.
437 # TOOD Handle case where B is copied from A but corrected from C
ace5fce5 438 $layerlabel = ' (a.c.)' unless $layerlabel;
335a62ef 439 foreach my $lw ( @$layerwits ) {
440 # Add the layered witness and set it with the same attributes as
441 # its 'main' analogue
5c44c598 442 throw( "Cannot add a layer to a hypothetical witness $lw" )
443 unless $graph->get_vertex_attribute( $lw, 'class' ) eq 'extant';
ace5fce5 444 my $lwac = $lw . $layerlabel;
335a62ef 445 $graph->add_vertex( $lwac );
446 $graph->set_vertex_attributes( $lwac,
447 $graph->get_vertex_attributes( $lw ) );
448
449 # Set it as ancestor to the main witness
450 $graph->add_edge( $lwac, $lw );
451
452 # Give it the same ancestors and descendants as the main witness has,
453 # bearing in mind that those ancestors and descendants might also just
454 # have had a layered witness defined.
455 foreach my $v ( $graph->predecessors( $lw ) ) {
456 next if $v eq $lwac; # Don't add a loop
457 $graph->add_edge( $v, $lwac );
ace5fce5 458 $graph->add_edge( $v.$layerlabel, $lwac )
459 if $graph->has_vertex( $v.$layerlabel );
335a62ef 460 }
461 foreach my $v ( $graph->successors( $lw ) ) {
462 next if $v eq $lwac; # but this shouldn't occur
463 $graph->add_edge( $lwac, $v );
ace5fce5 464 $graph->add_edge( $lwac, $v.$layerlabel )
465 if $graph->has_vertex( $v.$layerlabel );
335a62ef 466 }
467 }
468 return $graph;
469}
470
027d819c 471=head2 as_svg
472
473Returns an SVG representation of the graph, calling as_dot first.
474
475=cut
476
8d9a1cd8 477sub as_svg {
478 my( $self, $opts ) = @_;
479 my $dot = $self->as_dot( $opts );
ea45d2a6 480 my @cmd = ( '-Tsvg' );
481 unshift( @cmd, $self->is_undirected ? 'neato' : 'dot' );
3bf5d6f1 482 my $svg;
e79c23c7 483 my $dotfile = File::Temp->new();
e79c23c7 484 binmode $dotfile, ':utf8';
8d9a1cd8 485 print $dotfile $dot;
459c39b3 486 close $dotfile;
e79c23c7 487 push( @cmd, $dotfile->filename );
488 run( \@cmd, ">", binary(), \$svg );
428bcf0b 489 return decode_utf8( $svg );
e79c23c7 490}
491
027d819c 492=head2 witnesses
493
494Returns a list of the extant witnesses represented in the stemma.
495
496=cut
497
08e0fb85 498sub witnesses {
499 my $self = shift;
500 my @wits = grep { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'extant' }
501 $self->graph->vertices;
502 return @wits;
503}
504
06e7cbc7 505=head2 hypotheticals
506
507Returns a list of the hypothetical witnesses represented in the stemma.
508
509=cut
510
bebec0e9 511sub hypotheticals {
512 my $self = shift;
513 my @wits = grep
514 { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'hypothetical' }
515 $self->graph->vertices;
516 return @wits;
517}
518
37bf09f4 519=head2 root_graph( $root_vertex )
ea45d2a6 520
521If the stemma graph is undirected, make it directed with $root_vertex at the root.
522If it is directed, re-root it.
523
524=cut
525
526sub root_graph {
527 my( $self, $rootvertex ) = @_;
528 my $graph;
907f6671 529 my $ident = $self->identifier; # will have to restore this at the end
ea45d2a6 530 if( $self->is_undirected ) {
531 $graph = $self->graph;
532 } else {
533 # Make an undirected version of this graph.
534 $graph = $self->graph->undirected_copy();
535 }
1cf6dd32 536 # First, ensure that the requested root is actually a vertex in the graph.
537 unless( $graph->has_vertex( $rootvertex ) ) {
538 throw( "Cannot orient graph $graph on nonexistent vertex $rootvertex" );
539 }
540
541 # Now make a directed version of the graph.
ea45d2a6 542 my $rooted = Graph->new();
543 $rooted->add_vertex( $rootvertex );
544 my @next = ( $rootvertex );
545 while( @next ) {
546 my @children;
547 foreach my $v ( @next ) {
548 # Place its not-placed neighbors (ergo children) in the tree
549 # and connect them
550 foreach my $n ( grep { !$rooted->has_vertex( $_ ) }
551 $graph->neighbors( $v ) ) {
552 $rooted->add_vertex( $n );
553 $rooted->add_edge( $v, $n );
554 push( @children, $n );
555 }
556 }
557 @next = @children;
558 }
559 # Set the vertex classes
560 map { $rooted->set_vertex_attribute( $_, 'class', 'hypothetical' ) }
1cf6dd32 561 $self->hypotheticals;
562 map { $rooted->set_vertex_attribute( $_, 'class', 'extant' ) }
563 $self->witnesses;
564 $self->graph( $rooted );
907f6671 565 $self->set_identifier( $ident );
ea45d2a6 566}
567
568
63778331 569sub throw {
570 Text::Tradition::Error->throw(
571 'ident' => 'Stemma error',
572 'message' => $_[0],
573 );
574}
575
576
9463b0bf 577no Moose;
578__PACKAGE__->meta->make_immutable;
579
5801;
027d819c 581
582=head1 LICENSE
583
584This package is free software and is provided "as is" without express
585or implied warranty. You can redistribute it and/or modify it under
586the same terms as Perl itself.
587
588=head1 AUTHOR
589
590Tara L Andrews E<lt>aurum@cpan.orgE<gt>