Collect all hacks for Graph::Reader::Dot into a single utility. Fixes #15
[scpubgit/stemmatology.git] / analysis / lib / Text / Tradition / Stemma.pm
CommitLineData
9463b0bf 1package Text::Tradition::Stemma;
2
e79c23c7 3use Encode qw( decode_utf8 );
9463b0bf 4use File::Temp;
e05997e2 5use Graph;
6use Graph::Reader::Dot;
e79c23c7 7use IPC::Run qw/ run binary /;
63778331 8use Text::Tradition::Error;
11954259 9use Text::Tradition::StemmaUtil qw/ read_graph editable_graph display_graph
10 parse_newick /;
40f19742 11use Moose;
9463b0bf 12
027d819c 13=head1 NAME
14
15Text::Tradition::Stemma - a representation of a I<stemma codicum> for a Text::Tradition
16
17=head1 SYNOPSIS
18
19 use Text::Tradition;
20 my $t = Text::Tradition->new(
21 'name' => 'this is a text',
22 'input' => 'TEI',
23 'file' => '/path/to/tei_parallel_seg_file.xml' );
24
25 my $s = $tradition->add_stemma( dotfile => '/path/to/stemma.dot' );
26
27=head1 DESCRIPTION
28
29Text::Tradition is a library for representation and analysis of collated
335a62ef 30texts, particularly medieval ones. The Stemma is a representation of the
31copying relationships between the witnesses in a Tradition, modelled with
32a connected rooted directed acyclic graph (CRDAG).
027d819c 33
34=head1 DOT SYNTAX
35
335a62ef 36The easiest way to define a stemma is to use a special form of the 'dot'
37syntax of GraphViz.
027d819c 38
39Each stemma opens with the line
40
f96630e3 41 digraph "Name of Stemma" {
027d819c 42
43and continues with a list of all manuscript witnesses in the stemma, whether
44extant witnesses or missing archetypes or hyparchetypes. Each of these is
45listed by its sigil on its own line, e.g.:
46
47 alpha [ class=hypothetical ]
48 1 [ class=hypothetical,label=* ]
49 Ms4 [ class=extant ]
50
51Extant witnesses are listed with class=extant; missing or postulated witnesses
52are listed with class=hypothetical. Anonymous hyparchetypes must be given a
53unique name or number, but can be represented as anonymous with the addition
54of 'label=*' to their lines. Greek letters or other special characters may be
55used as names, but they must always be wrapped in double quotes.
56
57Links between manuscripts are then listed with arrow notation, as below. These
58lines show the direction of copying, one step at a time, for the entire stemma.
59
60 alpha -> 1
61 1 -> Ms4
62
63The final line in the definition should be the closing brace:
64
65 }
66
67Thus for a set of extant manuscripts A, B, and C, where A and B were copied
68from the archetype O and C was copied from B, the definition would be:
69
f96630e3 70 digraph "Test stemma 1" {
027d819c 71 O [ class=hypothetical]
72 A [ class=extant ]
73 B [ class=extant ]
74 C [ class=extant ]
75 O -> A
76 O -> B
77 B -> C
78 }
79
80=head1 CONSTRUCTOR
81
82=head2 new
83
84The constructor. This should generally be called from Text::Tradition, but
85if called directly it takes the following options:
86
87=over
88
027d819c 89=item * dot - A filehandle open to a DOT representation of the stemma graph.
90
ea45d2a6 91=item * graph - If no DOT specification is given, you can pass a Graph object
92instead. The vertices of the graph should have an attribute 'class' set to
93either of the values 'extant' or 'hypothetical'.
94
95=item * is_undirected - If the graph specification (or graph object) is for an
96undirected graph (e.g. a phylogenetic tree), this should be set.
97
027d819c 98=back
99
64a36834 100=begin testing
101
64a36834 102use TryCatch;
103
104use_ok( 'Text::Tradition::Stemma' );
105
64a36834 106# Try to create a bad graph
02b6340e 107try {
11954259 108 my $stemma = Text::Tradition::Stemma->new( dotfile => 't/data/besoin_bad.dot' );
02b6340e 109 ok( 0, "Created broken stemma from dotfile with syntax error" );
110} catch( Text::Tradition::Error $e ) {
111 like( $e->message, qr/^Error trying to parse/, "Syntax error in dot threw exception" );
64a36834 112}
113
114# Create a good graph
11954259 115my $stemma = Text::Tradition::Stemma->new( dotfile => 't/data/florilegium.dot' );
64a36834 116is( ref( $stemma ), 'Text::Tradition::Stemma', "Created stemma from good dotfile" );
117is( scalar $stemma->witnesses, 13, "Found correct number of extant witnesses" );
118is( scalar $stemma->hypotheticals, 8, "Found correct number of extant hypotheticals" );
f96630e3 119ok( $stemma->has_identifier, "Stemma identifier was found in dot" );
120is( $stemma->identifier, 'Coislinianum lineage', "Correct stemma identifier was found in dot" );
64a36834 121my $found_unicode_sigil;
122foreach my $h ( $stemma->hypotheticals ) {
123 $found_unicode_sigil = 1 if $h eq "\x{3b1}";
124}
125ok( $found_unicode_sigil, "Found a correctly encoded Unicode sigil" );
126
cb741417 127# Create an undirected graph
11954259 128my $udstemma = Text::Tradition::Stemma->new( dotfile => 't/data/besoin_undirected.dot' );
cb741417 129is( ref( $udstemma ), 'Text::Tradition::Stemma', "Created stemma from undirected dotfile" );
130is( scalar $udstemma->witnesses, 13, "Found correct number of extant witnesses" );
131is( scalar $udstemma->hypotheticals, 12, "Found correct number of hypotheticals" );
132ok( $udstemma->is_undirected, "Stemma was recorded as undirected" );
6665a327 133is( $udstemma->identifier, "RHM stemma", "Undirected graph retained its name" );
ea45d2a6 134
64a36834 135=end testing
136
027d819c 137=cut
138
9463b0bf 139has collation => (
140 is => 'ro',
141 isa => 'Text::Tradition::Collation',
ea45d2a6 142 clearer => 'clear_collation', # interim measure to remove refs in DB
8d9a1cd8 143 weak_ref => 1,
9463b0bf 144 );
145
e05997e2 146has graph => (
147 is => 'rw',
148 isa => 'Graph',
149 predicate => 'has_graph',
150 );
ea45d2a6 151
98f22390 152has identifier => (
ea45d2a6 153 is => 'ro',
98f22390 154 isa => 'Str',
155 writer => 'set_identifier',
156 predicate => 'has_identifier',
ea45d2a6 157 );
738620c7 158
159has from_jobid => (
160 is => 'ro',
161 isa => 'Str',
162 predicate => 'came_from_jobid',
163 writer => '_set_from_jobid',
164 );
98f22390 165
e05997e2 166sub BUILD {
167 my( $self, $args ) = @_;
168 # If we have been handed a dotfile, initialize it into a graph.
11954259 169 my $dotstring;
e05997e2 170 if( exists $args->{'dot'} ) {
11954259 171 $dotstring = $args->{'dot'};
172 } elsif( exists $args->{'dotfile'} ) {
173 # Read the file into a string.
174 my @dotlines;
175 open( DOTFH, $args->{'dotfile'} )
176 or throw( "Could not read specified dot file " . $args->{'dotfile'} );
177 binmode( DOTFH, ':encoding(UTF-8)' );
178 @dotlines = <DOTFH>;
179 close DOTFH;
180 $dotstring = join( '', @dotlines );
181 }
182 $self->_graph_from_dot( $dotstring ) if $dotstring;
c0ccdb62 183}
184
ea45d2a6 185before 'graph' => sub {
186 my $self = shift;
187 if( @_ ) {
188 # Make sure all unclassed graph nodes are marked extant.
189 my $g = $_[0];
190 throw( "Cannot set graph to a non-Graph object" )
cb741417 191 unless $g->isa( 'Graph' );
ea45d2a6 192 foreach my $v ( $g->vertices ) {
193 unless( $g->has_vertex_attribute( $v, 'class' ) ) {
194 $g->set_vertex_attribute( $v, 'class', 'extant' );
195 }
196 }
98f22390 197 }
198};
199
027d819c 200sub _graph_from_dot {
11954259 201 my( $self, $dotstring ) = @_;
202 my $graph = read_graph( $dotstring );
203
6665a327 204 ## HORRIBLE HACK but there is no API access to graph attributes!
11954259 205 my $graph_id = $graph->has_graph_attribute( 'name' )
206 ? $graph->get_graph_attribute( 'name' ) : 'stemma';
64a36834 207 $self->graph( $graph );
6665a327 208 $self->set_identifier( $graph_id );
8d9a1cd8 209}
210
98f22390 211sub is_undirected {
212 my( $self ) = @_;
213 return undef unless $self->has_graph;
214 return $self->graph->is_undirected;
215}
216
5873cf38 217=head2 new_from_newick( $newick_string )
218
219A constructor that will read a Newick-format tree specification and return one
220or more undirected Stemma objects. TODO test
221
222=cut
223
224sub new_from_newick {
225 my( $class, $nstring ) = @_;
226 my @stemmata;
227 foreach my $tree ( parse_newick( $nstring ) ) {
228 my $stemma = new( $class, graph => $tree );
229 push( @stemmata, $stemma );
230 }
231 return \@stemmata;
232}
233
027d819c 234=head1 METHODS
235
236=head2 as_dot( \%options )
237
238Returns a normal dot representation of the stemma layout, suitable for rendering
239with GraphViz. Options include:
240
241=over
242
243=item * graph - A hashref of global graph options.
244
245=item * node - A hashref of global node options.
246
247=item * edge - A hashref of global edge options.
248
249=back
250
251See the GraphViz documentation for the list of available options.
252
253=cut
254
8d9a1cd8 255sub as_dot {
e367f5c0 256 my( $self, $opts ) = @_;
7a7c249c 257
335a62ef 258 ## See if we are including any a.c. witnesses in this graph.
259 my $graph = $self->graph;
260 if( exists $opts->{'layerwits'} ) {
5c44c598 261 my $extant = {};
262 map { $extant->{$_} = 1 } $self->witnesses;
263 $graph = $self->situation_graph( $extant, $opts->{'layerwits'} );
335a62ef 264 }
5873cf38 265 if( $self->has_identifier ) {
266 $opts->{'name'} = $self->identifier;
267 }
268 return display_graph( $graph, $opts );
7a7c249c 269}
270
0bded693 271=head2 alter_graph( $dotstring )
272
273Alters the graph of this stemma according to the definition specified
274in $dotstring.
275
276=cut
277
278sub alter_graph {
279 my( $self, $dotstring ) = @_;
11954259 280 $self->_graph_from_dot( $dotstring );
0bded693 281}
282
335a62ef 283=head2 editable( $opts )
027d819c 284
88a6bac5 285Returns a version of the graph rendered in our definition format. The
335a62ef 286output separates statements with a newline; set $opts->{'linesep'} to the
287empty string or to a space if the result is to be sent via JSON.
288
5c44c598 289If a situational version of the stemma is required, the arguments for
290situation_graph should be passed via $opts->{'extant'} and $opts->{'layerwits'}.
027d819c 291
292=cut
7a7c249c 293
7a7c249c 294sub editable {
5c44c598 295 my( $self, $opts ) = @_;
335a62ef 296 my $graph = $self->graph;
f96630e3 297 if( $self->has_identifier ) {
298 $opts->{'name'} = $self->identifier;
299 }
5c44c598 300 ## See if we need an editable version of a situational graph.
301 if( exists $opts->{'layerwits'} || exists $opts->{'extant'} ) {
302 my $extant = delete $opts->{'extant'} || {};
303 my $layerwits = delete $opts->{'layerwits'} || [];
304 $graph = $self->situation_graph( $extant, $layerwits );
335a62ef 305 }
5c44c598 306 return editable_graph( $graph, $opts );
307}
308
8d9a1cd8 309
5c44c598 310=head2 situation_graph( $extant, $layered )
335a62ef 311
ea45d2a6 312Returns a graph which is the original stemma graph with all witnesses not
313in the %$extant hash marked as hypothetical, and witness layers added to
314the graph according to the list in @$layered. A layered (a.c.) witness is
315added as a parent of its main version, and additionally shares all other
316parents and children with that version.
335a62ef 317
318=cut
319
5c44c598 320sub situation_graph {
ace5fce5 321 my( $self, $extant, $layerwits, $layerlabel ) = @_;
5c44c598 322
323 my $graph = $self->graph->copy;
324 foreach my $vertex ( $graph->vertices ) {
325 # Set as extant any vertex that is extant in the stemma AND
326 # exists in the $extant hash.
327 my $class = 'hypothetical';
328 $class = 'extant' if exists $extant->{$vertex} && $extant->{$vertex} &&
329 $self->graph->get_vertex_attribute( $vertex, 'class' ) ne 'hypothetical';
330 $graph->set_vertex_attribute( $vertex, 'class', $class );
331 }
332
335a62ef 333 # For each 'layered' witness in the layerwits array, add it to the graph
334 # as an ancestor of the 'main' witness, and otherwise with the same parent/
335 # child links as its main analogue.
336 # TOOD Handle case where B is copied from A but corrected from C
ace5fce5 337 $layerlabel = ' (a.c.)' unless $layerlabel;
335a62ef 338 foreach my $lw ( @$layerwits ) {
339 # Add the layered witness and set it with the same attributes as
340 # its 'main' analogue
5c44c598 341 throw( "Cannot add a layer to a hypothetical witness $lw" )
342 unless $graph->get_vertex_attribute( $lw, 'class' ) eq 'extant';
ace5fce5 343 my $lwac = $lw . $layerlabel;
335a62ef 344 $graph->add_vertex( $lwac );
345 $graph->set_vertex_attributes( $lwac,
346 $graph->get_vertex_attributes( $lw ) );
347
348 # Set it as ancestor to the main witness
349 $graph->add_edge( $lwac, $lw );
350
351 # Give it the same ancestors and descendants as the main witness has,
352 # bearing in mind that those ancestors and descendants might also just
353 # have had a layered witness defined.
354 foreach my $v ( $graph->predecessors( $lw ) ) {
355 next if $v eq $lwac; # Don't add a loop
356 $graph->add_edge( $v, $lwac );
ace5fce5 357 $graph->add_edge( $v.$layerlabel, $lwac )
358 if $graph->has_vertex( $v.$layerlabel );
335a62ef 359 }
360 foreach my $v ( $graph->successors( $lw ) ) {
361 next if $v eq $lwac; # but this shouldn't occur
362 $graph->add_edge( $lwac, $v );
ace5fce5 363 $graph->add_edge( $lwac, $v.$layerlabel )
364 if $graph->has_vertex( $v.$layerlabel );
335a62ef 365 }
366 }
367 return $graph;
368}
369
027d819c 370=head2 as_svg
371
372Returns an SVG representation of the graph, calling as_dot first.
373
374=cut
375
8d9a1cd8 376sub as_svg {
377 my( $self, $opts ) = @_;
378 my $dot = $self->as_dot( $opts );
ea45d2a6 379 my @cmd = ( '-Tsvg' );
380 unshift( @cmd, $self->is_undirected ? 'neato' : 'dot' );
3bf5d6f1 381 my $svg;
e79c23c7 382 my $dotfile = File::Temp->new();
e79c23c7 383 binmode $dotfile, ':utf8';
8d9a1cd8 384 print $dotfile $dot;
459c39b3 385 close $dotfile;
e79c23c7 386 push( @cmd, $dotfile->filename );
387 run( \@cmd, ">", binary(), \$svg );
428bcf0b 388 return decode_utf8( $svg );
e79c23c7 389}
390
027d819c 391=head2 witnesses
392
393Returns a list of the extant witnesses represented in the stemma.
394
395=cut
396
08e0fb85 397sub witnesses {
398 my $self = shift;
399 my @wits = grep { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'extant' }
400 $self->graph->vertices;
401 return @wits;
402}
403
06e7cbc7 404=head2 hypotheticals
405
406Returns a list of the hypothetical witnesses represented in the stemma.
407
408=cut
409
bebec0e9 410sub hypotheticals {
411 my $self = shift;
412 my @wits = grep
413 { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'hypothetical' }
414 $self->graph->vertices;
415 return @wits;
416}
417
37bf09f4 418=head2 root_graph( $root_vertex )
ea45d2a6 419
420If the stemma graph is undirected, make it directed with $root_vertex at the root.
421If it is directed, re-root it.
422
423=cut
424
425sub root_graph {
426 my( $self, $rootvertex ) = @_;
427 my $graph;
907f6671 428 my $ident = $self->identifier; # will have to restore this at the end
ea45d2a6 429 if( $self->is_undirected ) {
430 $graph = $self->graph;
431 } else {
432 # Make an undirected version of this graph.
433 $graph = $self->graph->undirected_copy();
434 }
1cf6dd32 435 # First, ensure that the requested root is actually a vertex in the graph.
436 unless( $graph->has_vertex( $rootvertex ) ) {
437 throw( "Cannot orient graph $graph on nonexistent vertex $rootvertex" );
438 }
439
440 # Now make a directed version of the graph.
ea45d2a6 441 my $rooted = Graph->new();
442 $rooted->add_vertex( $rootvertex );
443 my @next = ( $rootvertex );
444 while( @next ) {
445 my @children;
446 foreach my $v ( @next ) {
447 # Place its not-placed neighbors (ergo children) in the tree
448 # and connect them
449 foreach my $n ( grep { !$rooted->has_vertex( $_ ) }
450 $graph->neighbors( $v ) ) {
451 $rooted->add_vertex( $n );
452 $rooted->add_edge( $v, $n );
453 push( @children, $n );
454 }
455 }
456 @next = @children;
457 }
458 # Set the vertex classes
459 map { $rooted->set_vertex_attribute( $_, 'class', 'hypothetical' ) }
1cf6dd32 460 $self->hypotheticals;
461 map { $rooted->set_vertex_attribute( $_, 'class', 'extant' ) }
462 $self->witnesses;
463 $self->graph( $rooted );
907f6671 464 $self->set_identifier( $ident );
ea45d2a6 465}
466
467
63778331 468sub throw {
469 Text::Tradition::Error->throw(
470 'ident' => 'Stemma error',
471 'message' => $_[0],
472 );
473}
474
475
9463b0bf 476no Moose;
477__PACKAGE__->meta->make_immutable;
478
4791;
027d819c 480
481=head1 LICENSE
482
483This package is free software and is provided "as is" without express
484or implied warranty. You can redistribute it and/or modify it under
485the same terms as Perl itself.
486
487=head1 AUTHOR
488
489Tara L Andrews E<lt>aurum@cpan.orgE<gt>