Rework Stemma / StemmaUtil so that utility functions are all in the latter. Fixes #14
[scpubgit/stemmatology.git] / analysis / lib / Text / Tradition / Stemma.pm
CommitLineData
9463b0bf 1package Text::Tradition::Stemma;
2
e79c23c7 3use Encode qw( decode_utf8 );
9463b0bf 4use File::Temp;
e05997e2 5use Graph;
6use Graph::Reader::Dot;
e79c23c7 7use IPC::Run qw/ run binary /;
63778331 8use Text::Tradition::Error;
5873cf38 9use Text::Tradition::StemmaUtil qw/ editable_graph display_graph parse_newick /;
40f19742 10use Moose;
9463b0bf 11
027d819c 12=head1 NAME
13
14Text::Tradition::Stemma - a representation of a I<stemma codicum> for a Text::Tradition
15
16=head1 SYNOPSIS
17
18 use Text::Tradition;
19 my $t = Text::Tradition->new(
20 'name' => 'this is a text',
21 'input' => 'TEI',
22 'file' => '/path/to/tei_parallel_seg_file.xml' );
23
24 my $s = $tradition->add_stemma( dotfile => '/path/to/stemma.dot' );
25
26=head1 DESCRIPTION
27
28Text::Tradition is a library for representation and analysis of collated
335a62ef 29texts, particularly medieval ones. The Stemma is a representation of the
30copying relationships between the witnesses in a Tradition, modelled with
31a connected rooted directed acyclic graph (CRDAG).
027d819c 32
33=head1 DOT SYNTAX
34
335a62ef 35The easiest way to define a stemma is to use a special form of the 'dot'
36syntax of GraphViz.
027d819c 37
38Each stemma opens with the line
39
f96630e3 40 digraph "Name of Stemma" {
027d819c 41
42and continues with a list of all manuscript witnesses in the stemma, whether
43extant witnesses or missing archetypes or hyparchetypes. Each of these is
44listed by its sigil on its own line, e.g.:
45
46 alpha [ class=hypothetical ]
47 1 [ class=hypothetical,label=* ]
48 Ms4 [ class=extant ]
49
50Extant witnesses are listed with class=extant; missing or postulated witnesses
51are listed with class=hypothetical. Anonymous hyparchetypes must be given a
52unique name or number, but can be represented as anonymous with the addition
53of 'label=*' to their lines. Greek letters or other special characters may be
54used as names, but they must always be wrapped in double quotes.
55
56Links between manuscripts are then listed with arrow notation, as below. These
57lines show the direction of copying, one step at a time, for the entire stemma.
58
59 alpha -> 1
60 1 -> Ms4
61
62The final line in the definition should be the closing brace:
63
64 }
65
66Thus for a set of extant manuscripts A, B, and C, where A and B were copied
67from the archetype O and C was copied from B, the definition would be:
68
f96630e3 69 digraph "Test stemma 1" {
027d819c 70 O [ class=hypothetical]
71 A [ class=extant ]
72 B [ class=extant ]
73 C [ class=extant ]
74 O -> A
75 O -> B
76 B -> C
77 }
78
79=head1 CONSTRUCTOR
80
81=head2 new
82
83The constructor. This should generally be called from Text::Tradition, but
84if called directly it takes the following options:
85
86=over
87
027d819c 88=item * dot - A filehandle open to a DOT representation of the stemma graph.
89
ea45d2a6 90=item * graph - If no DOT specification is given, you can pass a Graph object
91instead. The vertices of the graph should have an attribute 'class' set to
92either of the values 'extant' or 'hypothetical'.
93
94=item * is_undirected - If the graph specification (or graph object) is for an
95undirected graph (e.g. a phylogenetic tree), this should be set.
96
027d819c 97=back
98
64a36834 99=begin testing
100
64a36834 101use TryCatch;
102
103use_ok( 'Text::Tradition::Stemma' );
104
64a36834 105# Try to create a bad graph
02b6340e 106my $baddotfh;
107open( $baddotfh, 't/data/besoin_bad.dot' ) or die "Could not open test dotfile";
108try {
109 my $stemma = Text::Tradition::Stemma->new( dot => $baddotfh );
110 ok( 0, "Created broken stemma from dotfile with syntax error" );
111} catch( Text::Tradition::Error $e ) {
112 like( $e->message, qr/^Error trying to parse/, "Syntax error in dot threw exception" );
64a36834 113}
114
115# Create a good graph
116my $dotfh;
117open( $dotfh, 't/data/florilegium.dot' ) or die "Could not open test dotfile";
118binmode( $dotfh, ':utf8' );
ace5fce5 119my $stemma = Text::Tradition::Stemma->new( dot => $dotfh );
64a36834 120is( ref( $stemma ), 'Text::Tradition::Stemma', "Created stemma from good dotfile" );
121is( scalar $stemma->witnesses, 13, "Found correct number of extant witnesses" );
122is( scalar $stemma->hypotheticals, 8, "Found correct number of extant hypotheticals" );
f96630e3 123ok( $stemma->has_identifier, "Stemma identifier was found in dot" );
124is( $stemma->identifier, 'Coislinianum lineage', "Correct stemma identifier was found in dot" );
64a36834 125my $found_unicode_sigil;
126foreach my $h ( $stemma->hypotheticals ) {
127 $found_unicode_sigil = 1 if $h eq "\x{3b1}";
128}
129ok( $found_unicode_sigil, "Found a correctly encoded Unicode sigil" );
130
cb741417 131# Create an undirected graph
132my $undirdotfh;
133open( $undirdotfh, 't/data/besoin_undirected.dot' ) or die "Could not open test dotfile";
134binmode( $undirdotfh, ':utf8' );
135my $udstemma = Text::Tradition::Stemma->new( dot => $undirdotfh );
136is( ref( $udstemma ), 'Text::Tradition::Stemma', "Created stemma from undirected dotfile" );
137is( scalar $udstemma->witnesses, 13, "Found correct number of extant witnesses" );
138is( scalar $udstemma->hypotheticals, 12, "Found correct number of hypotheticals" );
139ok( $udstemma->is_undirected, "Stemma was recorded as undirected" );
6665a327 140is( $udstemma->identifier, "RHM stemma", "Undirected graph retained its name" );
ea45d2a6 141
64a36834 142=end testing
143
027d819c 144=cut
145
9463b0bf 146has collation => (
147 is => 'ro',
148 isa => 'Text::Tradition::Collation',
ea45d2a6 149 clearer => 'clear_collation', # interim measure to remove refs in DB
8d9a1cd8 150 weak_ref => 1,
9463b0bf 151 );
152
e05997e2 153has graph => (
154 is => 'rw',
155 isa => 'Graph',
156 predicate => 'has_graph',
157 );
ea45d2a6 158
98f22390 159has identifier => (
ea45d2a6 160 is => 'ro',
98f22390 161 isa => 'Str',
162 writer => 'set_identifier',
163 predicate => 'has_identifier',
ea45d2a6 164 );
738620c7 165
166has from_jobid => (
167 is => 'ro',
168 isa => 'Str',
169 predicate => 'came_from_jobid',
170 writer => '_set_from_jobid',
171 );
98f22390 172
e05997e2 173sub BUILD {
174 my( $self, $args ) = @_;
175 # If we have been handed a dotfile, initialize it into a graph.
176 if( exists $args->{'dot'} ) {
027d819c 177 $self->_graph_from_dot( $args->{'dot'} );
98f22390 178 }
c0ccdb62 179}
180
ea45d2a6 181before 'graph' => sub {
182 my $self = shift;
183 if( @_ ) {
184 # Make sure all unclassed graph nodes are marked extant.
185 my $g = $_[0];
186 throw( "Cannot set graph to a non-Graph object" )
cb741417 187 unless $g->isa( 'Graph' );
ea45d2a6 188 foreach my $v ( $g->vertices ) {
189 unless( $g->has_vertex_attribute( $v, 'class' ) ) {
190 $g->set_vertex_attribute( $v, 'class', 'extant' );
191 }
192 }
98f22390 193 }
194};
195
027d819c 196sub _graph_from_dot {
8d9a1cd8 197 my( $self, $dotfh ) = @_;
8d9a1cd8 198 my $reader = Graph::Reader::Dot->new();
64a36834 199 # Redirect STDOUT in order to trap any error messages - syntax errors
200 # are evidently not fatal.
02b6340e 201 my $graph;
202 my $reader_out;
cb741417 203 my $reader_err;
02b6340e 204 {
205 local(*STDOUT);
206 open( STDOUT, ">", \$reader_out );
cb741417 207 local(*STDERR);
208 open( STDERR, ">", \$reader_err );
02b6340e 209 $graph = $reader->read_graph( $dotfh );
210 close STDOUT;
cb741417 211 close STDERR;
02b6340e 212 }
64a36834 213 if( $reader_out && $reader_out =~ /error/s ) {
214 throw( "Error trying to parse dot: $reader_out" );
215 } elsif( !$graph ) {
216 throw( "Failed to create graph from dot" );
217 }
6665a327 218 ## HORRIBLE HACK but there is no API access to graph attributes!
219 my $graph_id = exists $graph->[4]->{'name'} ? $graph->[4]->{'name'} : 'stemma';
cb741417 220 # Correct for implicit graph -> digraph quirk of reader
221 if( $reader_err && $reader_err =~ /graph will be treated as digraph/ ) {
222 my $udgraph = $graph->undirected_copy;
223 foreach my $v ( $graph->vertices ) {
224 $udgraph->set_vertex_attributes( $v, $graph->get_vertex_attributes( $v ) );
225 }
226 $graph = $udgraph;
227 }
64a36834 228 $self->graph( $graph );
6665a327 229 $self->set_identifier( $graph_id );
8d9a1cd8 230}
231
98f22390 232sub is_undirected {
233 my( $self ) = @_;
234 return undef unless $self->has_graph;
235 return $self->graph->is_undirected;
236}
237
5873cf38 238=head2 new_from_newick( $newick_string )
239
240A constructor that will read a Newick-format tree specification and return one
241or more undirected Stemma objects. TODO test
242
243=cut
244
245sub new_from_newick {
246 my( $class, $nstring ) = @_;
247 my @stemmata;
248 foreach my $tree ( parse_newick( $nstring ) ) {
249 my $stemma = new( $class, graph => $tree );
250 push( @stemmata, $stemma );
251 }
252 return \@stemmata;
253}
254
027d819c 255=head1 METHODS
256
257=head2 as_dot( \%options )
258
259Returns a normal dot representation of the stemma layout, suitable for rendering
260with GraphViz. Options include:
261
262=over
263
264=item * graph - A hashref of global graph options.
265
266=item * node - A hashref of global node options.
267
268=item * edge - A hashref of global edge options.
269
270=back
271
272See the GraphViz documentation for the list of available options.
273
274=cut
275
8d9a1cd8 276sub as_dot {
e367f5c0 277 my( $self, $opts ) = @_;
7a7c249c 278
335a62ef 279 ## See if we are including any a.c. witnesses in this graph.
280 my $graph = $self->graph;
281 if( exists $opts->{'layerwits'} ) {
5c44c598 282 my $extant = {};
283 map { $extant->{$_} = 1 } $self->witnesses;
284 $graph = $self->situation_graph( $extant, $opts->{'layerwits'} );
335a62ef 285 }
5873cf38 286 if( $self->has_identifier ) {
287 $opts->{'name'} = $self->identifier;
288 }
289 return display_graph( $graph, $opts );
7a7c249c 290}
291
0bded693 292=head2 alter_graph( $dotstring )
293
294Alters the graph of this stemma according to the definition specified
295in $dotstring.
296
297=cut
298
299sub alter_graph {
300 my( $self, $dotstring ) = @_;
301 my $dotfh;
302 open $dotfh, '<', \$dotstring;
f90b2bde 303 binmode $dotfh, ':utf8';
0bded693 304 $self->_graph_from_dot( $dotfh );
305}
306
335a62ef 307=head2 editable( $opts )
027d819c 308
88a6bac5 309Returns a version of the graph rendered in our definition format. The
335a62ef 310output separates statements with a newline; set $opts->{'linesep'} to the
311empty string or to a space if the result is to be sent via JSON.
312
5c44c598 313If a situational version of the stemma is required, the arguments for
314situation_graph should be passed via $opts->{'extant'} and $opts->{'layerwits'}.
027d819c 315
316=cut
7a7c249c 317
7a7c249c 318sub editable {
5c44c598 319 my( $self, $opts ) = @_;
335a62ef 320 my $graph = $self->graph;
f96630e3 321 if( $self->has_identifier ) {
322 $opts->{'name'} = $self->identifier;
323 }
5c44c598 324 ## See if we need an editable version of a situational graph.
325 if( exists $opts->{'layerwits'} || exists $opts->{'extant'} ) {
326 my $extant = delete $opts->{'extant'} || {};
327 my $layerwits = delete $opts->{'layerwits'} || [];
328 $graph = $self->situation_graph( $extant, $layerwits );
335a62ef 329 }
5c44c598 330 return editable_graph( $graph, $opts );
331}
332
8d9a1cd8 333
5c44c598 334=head2 situation_graph( $extant, $layered )
335a62ef 335
ea45d2a6 336Returns a graph which is the original stemma graph with all witnesses not
337in the %$extant hash marked as hypothetical, and witness layers added to
338the graph according to the list in @$layered. A layered (a.c.) witness is
339added as a parent of its main version, and additionally shares all other
340parents and children with that version.
335a62ef 341
342=cut
343
5c44c598 344sub situation_graph {
ace5fce5 345 my( $self, $extant, $layerwits, $layerlabel ) = @_;
5c44c598 346
347 my $graph = $self->graph->copy;
348 foreach my $vertex ( $graph->vertices ) {
349 # Set as extant any vertex that is extant in the stemma AND
350 # exists in the $extant hash.
351 my $class = 'hypothetical';
352 $class = 'extant' if exists $extant->{$vertex} && $extant->{$vertex} &&
353 $self->graph->get_vertex_attribute( $vertex, 'class' ) ne 'hypothetical';
354 $graph->set_vertex_attribute( $vertex, 'class', $class );
355 }
356
335a62ef 357 # For each 'layered' witness in the layerwits array, add it to the graph
358 # as an ancestor of the 'main' witness, and otherwise with the same parent/
359 # child links as its main analogue.
360 # TOOD Handle case where B is copied from A but corrected from C
ace5fce5 361 $layerlabel = ' (a.c.)' unless $layerlabel;
335a62ef 362 foreach my $lw ( @$layerwits ) {
363 # Add the layered witness and set it with the same attributes as
364 # its 'main' analogue
5c44c598 365 throw( "Cannot add a layer to a hypothetical witness $lw" )
366 unless $graph->get_vertex_attribute( $lw, 'class' ) eq 'extant';
ace5fce5 367 my $lwac = $lw . $layerlabel;
335a62ef 368 $graph->add_vertex( $lwac );
369 $graph->set_vertex_attributes( $lwac,
370 $graph->get_vertex_attributes( $lw ) );
371
372 # Set it as ancestor to the main witness
373 $graph->add_edge( $lwac, $lw );
374
375 # Give it the same ancestors and descendants as the main witness has,
376 # bearing in mind that those ancestors and descendants might also just
377 # have had a layered witness defined.
378 foreach my $v ( $graph->predecessors( $lw ) ) {
379 next if $v eq $lwac; # Don't add a loop
380 $graph->add_edge( $v, $lwac );
ace5fce5 381 $graph->add_edge( $v.$layerlabel, $lwac )
382 if $graph->has_vertex( $v.$layerlabel );
335a62ef 383 }
384 foreach my $v ( $graph->successors( $lw ) ) {
385 next if $v eq $lwac; # but this shouldn't occur
386 $graph->add_edge( $lwac, $v );
ace5fce5 387 $graph->add_edge( $lwac, $v.$layerlabel )
388 if $graph->has_vertex( $v.$layerlabel );
335a62ef 389 }
390 }
391 return $graph;
392}
393
027d819c 394=head2 as_svg
395
396Returns an SVG representation of the graph, calling as_dot first.
397
398=cut
399
8d9a1cd8 400sub as_svg {
401 my( $self, $opts ) = @_;
402 my $dot = $self->as_dot( $opts );
ea45d2a6 403 my @cmd = ( '-Tsvg' );
404 unshift( @cmd, $self->is_undirected ? 'neato' : 'dot' );
3bf5d6f1 405 my $svg;
e79c23c7 406 my $dotfile = File::Temp->new();
e79c23c7 407 binmode $dotfile, ':utf8';
8d9a1cd8 408 print $dotfile $dot;
459c39b3 409 close $dotfile;
e79c23c7 410 push( @cmd, $dotfile->filename );
411 run( \@cmd, ">", binary(), \$svg );
428bcf0b 412 return decode_utf8( $svg );
e79c23c7 413}
414
027d819c 415=head2 witnesses
416
417Returns a list of the extant witnesses represented in the stemma.
418
419=cut
420
08e0fb85 421sub witnesses {
422 my $self = shift;
423 my @wits = grep { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'extant' }
424 $self->graph->vertices;
425 return @wits;
426}
427
06e7cbc7 428=head2 hypotheticals
429
430Returns a list of the hypothetical witnesses represented in the stemma.
431
432=cut
433
bebec0e9 434sub hypotheticals {
435 my $self = shift;
436 my @wits = grep
437 { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'hypothetical' }
438 $self->graph->vertices;
439 return @wits;
440}
441
37bf09f4 442=head2 root_graph( $root_vertex )
ea45d2a6 443
444If the stemma graph is undirected, make it directed with $root_vertex at the root.
445If it is directed, re-root it.
446
447=cut
448
449sub root_graph {
450 my( $self, $rootvertex ) = @_;
451 my $graph;
907f6671 452 my $ident = $self->identifier; # will have to restore this at the end
ea45d2a6 453 if( $self->is_undirected ) {
454 $graph = $self->graph;
455 } else {
456 # Make an undirected version of this graph.
457 $graph = $self->graph->undirected_copy();
458 }
1cf6dd32 459 # First, ensure that the requested root is actually a vertex in the graph.
460 unless( $graph->has_vertex( $rootvertex ) ) {
461 throw( "Cannot orient graph $graph on nonexistent vertex $rootvertex" );
462 }
463
464 # Now make a directed version of the graph.
ea45d2a6 465 my $rooted = Graph->new();
466 $rooted->add_vertex( $rootvertex );
467 my @next = ( $rootvertex );
468 while( @next ) {
469 my @children;
470 foreach my $v ( @next ) {
471 # Place its not-placed neighbors (ergo children) in the tree
472 # and connect them
473 foreach my $n ( grep { !$rooted->has_vertex( $_ ) }
474 $graph->neighbors( $v ) ) {
475 $rooted->add_vertex( $n );
476 $rooted->add_edge( $v, $n );
477 push( @children, $n );
478 }
479 }
480 @next = @children;
481 }
482 # Set the vertex classes
483 map { $rooted->set_vertex_attribute( $_, 'class', 'hypothetical' ) }
1cf6dd32 484 $self->hypotheticals;
485 map { $rooted->set_vertex_attribute( $_, 'class', 'extant' ) }
486 $self->witnesses;
487 $self->graph( $rooted );
907f6671 488 $self->set_identifier( $ident );
ea45d2a6 489}
490
491
63778331 492sub throw {
493 Text::Tradition::Error->throw(
494 'ident' => 'Stemma error',
495 'message' => $_[0],
496 );
497}
498
499
9463b0bf 500no Moose;
501__PACKAGE__->meta->make_immutable;
502
5031;
027d819c 504
505=head1 LICENSE
506
507This package is free software and is provided "as is" without express
508or implied warranty. You can redistribute it and/or modify it under
509the same terms as Perl itself.
510
511=head1 AUTHOR
512
513Tara L Andrews E<lt>aurum@cpan.orgE<gt>