Commit | Line | Data |
9463b0bf |
1 | package Text::Tradition::Stemma; |
2 | |
e79c23c7 |
3 | use Encode qw( decode_utf8 ); |
9463b0bf |
4 | use File::Temp; |
e05997e2 |
5 | use Graph; |
6 | use Graph::Reader::Dot; |
e79c23c7 |
7 | use IPC::Run qw/ run binary /; |
63778331 |
8 | use Text::Tradition::Error; |
5873cf38 |
9 | use Text::Tradition::StemmaUtil qw/ editable_graph display_graph parse_newick /; |
40f19742 |
10 | use Moose; |
9463b0bf |
11 | |
027d819c |
12 | =head1 NAME |
13 | |
14 | Text::Tradition::Stemma - a representation of a I<stemma codicum> for a Text::Tradition |
15 | |
16 | =head1 SYNOPSIS |
17 | |
18 | use Text::Tradition; |
19 | my $t = Text::Tradition->new( |
20 | 'name' => 'this is a text', |
21 | 'input' => 'TEI', |
22 | 'file' => '/path/to/tei_parallel_seg_file.xml' ); |
23 | |
24 | my $s = $tradition->add_stemma( dotfile => '/path/to/stemma.dot' ); |
25 | |
26 | =head1 DESCRIPTION |
27 | |
28 | Text::Tradition is a library for representation and analysis of collated |
335a62ef |
29 | texts, particularly medieval ones. The Stemma is a representation of the |
30 | copying relationships between the witnesses in a Tradition, modelled with |
31 | a connected rooted directed acyclic graph (CRDAG). |
027d819c |
32 | |
33 | =head1 DOT SYNTAX |
34 | |
335a62ef |
35 | The easiest way to define a stemma is to use a special form of the 'dot' |
36 | syntax of GraphViz. |
027d819c |
37 | |
38 | Each stemma opens with the line |
39 | |
f96630e3 |
40 | digraph "Name of Stemma" { |
027d819c |
41 | |
42 | and continues with a list of all manuscript witnesses in the stemma, whether |
43 | extant witnesses or missing archetypes or hyparchetypes. Each of these is |
44 | listed by its sigil on its own line, e.g.: |
45 | |
46 | alpha [ class=hypothetical ] |
47 | 1 [ class=hypothetical,label=* ] |
48 | Ms4 [ class=extant ] |
49 | |
50 | Extant witnesses are listed with class=extant; missing or postulated witnesses |
51 | are listed with class=hypothetical. Anonymous hyparchetypes must be given a |
52 | unique name or number, but can be represented as anonymous with the addition |
53 | of 'label=*' to their lines. Greek letters or other special characters may be |
54 | used as names, but they must always be wrapped in double quotes. |
55 | |
56 | Links between manuscripts are then listed with arrow notation, as below. These |
57 | lines show the direction of copying, one step at a time, for the entire stemma. |
58 | |
59 | alpha -> 1 |
60 | 1 -> Ms4 |
61 | |
62 | The final line in the definition should be the closing brace: |
63 | |
64 | } |
65 | |
66 | Thus for a set of extant manuscripts A, B, and C, where A and B were copied |
67 | from the archetype O and C was copied from B, the definition would be: |
68 | |
f96630e3 |
69 | digraph "Test stemma 1" { |
027d819c |
70 | O [ class=hypothetical] |
71 | A [ class=extant ] |
72 | B [ class=extant ] |
73 | C [ class=extant ] |
74 | O -> A |
75 | O -> B |
76 | B -> C |
77 | } |
78 | |
79 | =head1 CONSTRUCTOR |
80 | |
81 | =head2 new |
82 | |
83 | The constructor. This should generally be called from Text::Tradition, but |
84 | if called directly it takes the following options: |
85 | |
86 | =over |
87 | |
027d819c |
88 | =item * dot - A filehandle open to a DOT representation of the stemma graph. |
89 | |
ea45d2a6 |
90 | =item * graph - If no DOT specification is given, you can pass a Graph object |
91 | instead. The vertices of the graph should have an attribute 'class' set to |
92 | either of the values 'extant' or 'hypothetical'. |
93 | |
94 | =item * is_undirected - If the graph specification (or graph object) is for an |
95 | undirected graph (e.g. a phylogenetic tree), this should be set. |
96 | |
027d819c |
97 | =back |
98 | |
64a36834 |
99 | =begin testing |
100 | |
64a36834 |
101 | use TryCatch; |
102 | |
103 | use_ok( 'Text::Tradition::Stemma' ); |
104 | |
64a36834 |
105 | # Try to create a bad graph |
02b6340e |
106 | my $baddotfh; |
107 | open( $baddotfh, 't/data/besoin_bad.dot' ) or die "Could not open test dotfile"; |
108 | try { |
109 | my $stemma = Text::Tradition::Stemma->new( dot => $baddotfh ); |
110 | ok( 0, "Created broken stemma from dotfile with syntax error" ); |
111 | } catch( Text::Tradition::Error $e ) { |
112 | like( $e->message, qr/^Error trying to parse/, "Syntax error in dot threw exception" ); |
64a36834 |
113 | } |
114 | |
115 | # Create a good graph |
116 | my $dotfh; |
117 | open( $dotfh, 't/data/florilegium.dot' ) or die "Could not open test dotfile"; |
118 | binmode( $dotfh, ':utf8' ); |
ace5fce5 |
119 | my $stemma = Text::Tradition::Stemma->new( dot => $dotfh ); |
64a36834 |
120 | is( ref( $stemma ), 'Text::Tradition::Stemma', "Created stemma from good dotfile" ); |
121 | is( scalar $stemma->witnesses, 13, "Found correct number of extant witnesses" ); |
122 | is( scalar $stemma->hypotheticals, 8, "Found correct number of extant hypotheticals" ); |
f96630e3 |
123 | ok( $stemma->has_identifier, "Stemma identifier was found in dot" ); |
124 | is( $stemma->identifier, 'Coislinianum lineage', "Correct stemma identifier was found in dot" ); |
64a36834 |
125 | my $found_unicode_sigil; |
126 | foreach my $h ( $stemma->hypotheticals ) { |
127 | $found_unicode_sigil = 1 if $h eq "\x{3b1}"; |
128 | } |
129 | ok( $found_unicode_sigil, "Found a correctly encoded Unicode sigil" ); |
130 | |
cb741417 |
131 | # Create an undirected graph |
132 | my $undirdotfh; |
133 | open( $undirdotfh, 't/data/besoin_undirected.dot' ) or die "Could not open test dotfile"; |
134 | binmode( $undirdotfh, ':utf8' ); |
135 | my $udstemma = Text::Tradition::Stemma->new( dot => $undirdotfh ); |
136 | is( ref( $udstemma ), 'Text::Tradition::Stemma', "Created stemma from undirected dotfile" ); |
137 | is( scalar $udstemma->witnesses, 13, "Found correct number of extant witnesses" ); |
138 | is( scalar $udstemma->hypotheticals, 12, "Found correct number of hypotheticals" ); |
139 | ok( $udstemma->is_undirected, "Stemma was recorded as undirected" ); |
6665a327 |
140 | is( $udstemma->identifier, "RHM stemma", "Undirected graph retained its name" ); |
ea45d2a6 |
141 | |
64a36834 |
142 | =end testing |
143 | |
027d819c |
144 | =cut |
145 | |
9463b0bf |
146 | has collation => ( |
147 | is => 'ro', |
148 | isa => 'Text::Tradition::Collation', |
ea45d2a6 |
149 | clearer => 'clear_collation', # interim measure to remove refs in DB |
8d9a1cd8 |
150 | weak_ref => 1, |
9463b0bf |
151 | ); |
152 | |
e05997e2 |
153 | has graph => ( |
154 | is => 'rw', |
155 | isa => 'Graph', |
156 | predicate => 'has_graph', |
157 | ); |
ea45d2a6 |
158 | |
98f22390 |
159 | has identifier => ( |
ea45d2a6 |
160 | is => 'ro', |
98f22390 |
161 | isa => 'Str', |
162 | writer => 'set_identifier', |
163 | predicate => 'has_identifier', |
ea45d2a6 |
164 | ); |
738620c7 |
165 | |
166 | has from_jobid => ( |
167 | is => 'ro', |
168 | isa => 'Str', |
169 | predicate => 'came_from_jobid', |
170 | writer => '_set_from_jobid', |
171 | ); |
98f22390 |
172 | |
e05997e2 |
173 | sub BUILD { |
174 | my( $self, $args ) = @_; |
175 | # If we have been handed a dotfile, initialize it into a graph. |
176 | if( exists $args->{'dot'} ) { |
027d819c |
177 | $self->_graph_from_dot( $args->{'dot'} ); |
98f22390 |
178 | } |
c0ccdb62 |
179 | } |
180 | |
ea45d2a6 |
181 | before 'graph' => sub { |
182 | my $self = shift; |
183 | if( @_ ) { |
184 | # Make sure all unclassed graph nodes are marked extant. |
185 | my $g = $_[0]; |
186 | throw( "Cannot set graph to a non-Graph object" ) |
cb741417 |
187 | unless $g->isa( 'Graph' ); |
ea45d2a6 |
188 | foreach my $v ( $g->vertices ) { |
189 | unless( $g->has_vertex_attribute( $v, 'class' ) ) { |
190 | $g->set_vertex_attribute( $v, 'class', 'extant' ); |
191 | } |
192 | } |
98f22390 |
193 | } |
194 | }; |
195 | |
027d819c |
196 | sub _graph_from_dot { |
8d9a1cd8 |
197 | my( $self, $dotfh ) = @_; |
8d9a1cd8 |
198 | my $reader = Graph::Reader::Dot->new(); |
64a36834 |
199 | # Redirect STDOUT in order to trap any error messages - syntax errors |
200 | # are evidently not fatal. |
02b6340e |
201 | my $graph; |
202 | my $reader_out; |
cb741417 |
203 | my $reader_err; |
02b6340e |
204 | { |
205 | local(*STDOUT); |
206 | open( STDOUT, ">", \$reader_out ); |
cb741417 |
207 | local(*STDERR); |
208 | open( STDERR, ">", \$reader_err ); |
02b6340e |
209 | $graph = $reader->read_graph( $dotfh ); |
210 | close STDOUT; |
cb741417 |
211 | close STDERR; |
02b6340e |
212 | } |
64a36834 |
213 | if( $reader_out && $reader_out =~ /error/s ) { |
214 | throw( "Error trying to parse dot: $reader_out" ); |
215 | } elsif( !$graph ) { |
216 | throw( "Failed to create graph from dot" ); |
217 | } |
6665a327 |
218 | ## HORRIBLE HACK but there is no API access to graph attributes! |
219 | my $graph_id = exists $graph->[4]->{'name'} ? $graph->[4]->{'name'} : 'stemma'; |
cb741417 |
220 | # Correct for implicit graph -> digraph quirk of reader |
221 | if( $reader_err && $reader_err =~ /graph will be treated as digraph/ ) { |
222 | my $udgraph = $graph->undirected_copy; |
223 | foreach my $v ( $graph->vertices ) { |
224 | $udgraph->set_vertex_attributes( $v, $graph->get_vertex_attributes( $v ) ); |
225 | } |
226 | $graph = $udgraph; |
227 | } |
64a36834 |
228 | $self->graph( $graph ); |
6665a327 |
229 | $self->set_identifier( $graph_id ); |
8d9a1cd8 |
230 | } |
231 | |
98f22390 |
232 | sub is_undirected { |
233 | my( $self ) = @_; |
234 | return undef unless $self->has_graph; |
235 | return $self->graph->is_undirected; |
236 | } |
237 | |
5873cf38 |
238 | =head2 new_from_newick( $newick_string ) |
239 | |
240 | A constructor that will read a Newick-format tree specification and return one |
241 | or more undirected Stemma objects. TODO test |
242 | |
243 | =cut |
244 | |
245 | sub new_from_newick { |
246 | my( $class, $nstring ) = @_; |
247 | my @stemmata; |
248 | foreach my $tree ( parse_newick( $nstring ) ) { |
249 | my $stemma = new( $class, graph => $tree ); |
250 | push( @stemmata, $stemma ); |
251 | } |
252 | return \@stemmata; |
253 | } |
254 | |
027d819c |
255 | =head1 METHODS |
256 | |
257 | =head2 as_dot( \%options ) |
258 | |
259 | Returns a normal dot representation of the stemma layout, suitable for rendering |
260 | with GraphViz. Options include: |
261 | |
262 | =over |
263 | |
264 | =item * graph - A hashref of global graph options. |
265 | |
266 | =item * node - A hashref of global node options. |
267 | |
268 | =item * edge - A hashref of global edge options. |
269 | |
270 | =back |
271 | |
272 | See the GraphViz documentation for the list of available options. |
273 | |
274 | =cut |
275 | |
8d9a1cd8 |
276 | sub as_dot { |
e367f5c0 |
277 | my( $self, $opts ) = @_; |
7a7c249c |
278 | |
335a62ef |
279 | ## See if we are including any a.c. witnesses in this graph. |
280 | my $graph = $self->graph; |
281 | if( exists $opts->{'layerwits'} ) { |
5c44c598 |
282 | my $extant = {}; |
283 | map { $extant->{$_} = 1 } $self->witnesses; |
284 | $graph = $self->situation_graph( $extant, $opts->{'layerwits'} ); |
335a62ef |
285 | } |
5873cf38 |
286 | if( $self->has_identifier ) { |
287 | $opts->{'name'} = $self->identifier; |
288 | } |
289 | return display_graph( $graph, $opts ); |
7a7c249c |
290 | } |
291 | |
0bded693 |
292 | =head2 alter_graph( $dotstring ) |
293 | |
294 | Alters the graph of this stemma according to the definition specified |
295 | in $dotstring. |
296 | |
297 | =cut |
298 | |
299 | sub alter_graph { |
300 | my( $self, $dotstring ) = @_; |
301 | my $dotfh; |
302 | open $dotfh, '<', \$dotstring; |
f90b2bde |
303 | binmode $dotfh, ':utf8'; |
0bded693 |
304 | $self->_graph_from_dot( $dotfh ); |
305 | } |
306 | |
335a62ef |
307 | =head2 editable( $opts ) |
027d819c |
308 | |
88a6bac5 |
309 | Returns a version of the graph rendered in our definition format. The |
335a62ef |
310 | output separates statements with a newline; set $opts->{'linesep'} to the |
311 | empty string or to a space if the result is to be sent via JSON. |
312 | |
5c44c598 |
313 | If a situational version of the stemma is required, the arguments for |
314 | situation_graph should be passed via $opts->{'extant'} and $opts->{'layerwits'}. |
027d819c |
315 | |
316 | =cut |
7a7c249c |
317 | |
7a7c249c |
318 | sub editable { |
5c44c598 |
319 | my( $self, $opts ) = @_; |
335a62ef |
320 | my $graph = $self->graph; |
f96630e3 |
321 | if( $self->has_identifier ) { |
322 | $opts->{'name'} = $self->identifier; |
323 | } |
5c44c598 |
324 | ## See if we need an editable version of a situational graph. |
325 | if( exists $opts->{'layerwits'} || exists $opts->{'extant'} ) { |
326 | my $extant = delete $opts->{'extant'} || {}; |
327 | my $layerwits = delete $opts->{'layerwits'} || []; |
328 | $graph = $self->situation_graph( $extant, $layerwits ); |
335a62ef |
329 | } |
5c44c598 |
330 | return editable_graph( $graph, $opts ); |
331 | } |
332 | |
8d9a1cd8 |
333 | |
5c44c598 |
334 | =head2 situation_graph( $extant, $layered ) |
335a62ef |
335 | |
ea45d2a6 |
336 | Returns a graph which is the original stemma graph with all witnesses not |
337 | in the %$extant hash marked as hypothetical, and witness layers added to |
338 | the graph according to the list in @$layered. A layered (a.c.) witness is |
339 | added as a parent of its main version, and additionally shares all other |
340 | parents and children with that version. |
335a62ef |
341 | |
342 | =cut |
343 | |
5c44c598 |
344 | sub situation_graph { |
ace5fce5 |
345 | my( $self, $extant, $layerwits, $layerlabel ) = @_; |
5c44c598 |
346 | |
347 | my $graph = $self->graph->copy; |
348 | foreach my $vertex ( $graph->vertices ) { |
349 | # Set as extant any vertex that is extant in the stemma AND |
350 | # exists in the $extant hash. |
351 | my $class = 'hypothetical'; |
352 | $class = 'extant' if exists $extant->{$vertex} && $extant->{$vertex} && |
353 | $self->graph->get_vertex_attribute( $vertex, 'class' ) ne 'hypothetical'; |
354 | $graph->set_vertex_attribute( $vertex, 'class', $class ); |
355 | } |
356 | |
335a62ef |
357 | # For each 'layered' witness in the layerwits array, add it to the graph |
358 | # as an ancestor of the 'main' witness, and otherwise with the same parent/ |
359 | # child links as its main analogue. |
360 | # TOOD Handle case where B is copied from A but corrected from C |
ace5fce5 |
361 | $layerlabel = ' (a.c.)' unless $layerlabel; |
335a62ef |
362 | foreach my $lw ( @$layerwits ) { |
363 | # Add the layered witness and set it with the same attributes as |
364 | # its 'main' analogue |
5c44c598 |
365 | throw( "Cannot add a layer to a hypothetical witness $lw" ) |
366 | unless $graph->get_vertex_attribute( $lw, 'class' ) eq 'extant'; |
ace5fce5 |
367 | my $lwac = $lw . $layerlabel; |
335a62ef |
368 | $graph->add_vertex( $lwac ); |
369 | $graph->set_vertex_attributes( $lwac, |
370 | $graph->get_vertex_attributes( $lw ) ); |
371 | |
372 | # Set it as ancestor to the main witness |
373 | $graph->add_edge( $lwac, $lw ); |
374 | |
375 | # Give it the same ancestors and descendants as the main witness has, |
376 | # bearing in mind that those ancestors and descendants might also just |
377 | # have had a layered witness defined. |
378 | foreach my $v ( $graph->predecessors( $lw ) ) { |
379 | next if $v eq $lwac; # Don't add a loop |
380 | $graph->add_edge( $v, $lwac ); |
ace5fce5 |
381 | $graph->add_edge( $v.$layerlabel, $lwac ) |
382 | if $graph->has_vertex( $v.$layerlabel ); |
335a62ef |
383 | } |
384 | foreach my $v ( $graph->successors( $lw ) ) { |
385 | next if $v eq $lwac; # but this shouldn't occur |
386 | $graph->add_edge( $lwac, $v ); |
ace5fce5 |
387 | $graph->add_edge( $lwac, $v.$layerlabel ) |
388 | if $graph->has_vertex( $v.$layerlabel ); |
335a62ef |
389 | } |
390 | } |
391 | return $graph; |
392 | } |
393 | |
027d819c |
394 | =head2 as_svg |
395 | |
396 | Returns an SVG representation of the graph, calling as_dot first. |
397 | |
398 | =cut |
399 | |
8d9a1cd8 |
400 | sub as_svg { |
401 | my( $self, $opts ) = @_; |
402 | my $dot = $self->as_dot( $opts ); |
ea45d2a6 |
403 | my @cmd = ( '-Tsvg' ); |
404 | unshift( @cmd, $self->is_undirected ? 'neato' : 'dot' ); |
3bf5d6f1 |
405 | my $svg; |
e79c23c7 |
406 | my $dotfile = File::Temp->new(); |
e79c23c7 |
407 | binmode $dotfile, ':utf8'; |
8d9a1cd8 |
408 | print $dotfile $dot; |
459c39b3 |
409 | close $dotfile; |
e79c23c7 |
410 | push( @cmd, $dotfile->filename ); |
411 | run( \@cmd, ">", binary(), \$svg ); |
428bcf0b |
412 | return decode_utf8( $svg ); |
e79c23c7 |
413 | } |
414 | |
027d819c |
415 | =head2 witnesses |
416 | |
417 | Returns a list of the extant witnesses represented in the stemma. |
418 | |
419 | =cut |
420 | |
08e0fb85 |
421 | sub witnesses { |
422 | my $self = shift; |
423 | my @wits = grep { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'extant' } |
424 | $self->graph->vertices; |
425 | return @wits; |
426 | } |
427 | |
06e7cbc7 |
428 | =head2 hypotheticals |
429 | |
430 | Returns a list of the hypothetical witnesses represented in the stemma. |
431 | |
432 | =cut |
433 | |
bebec0e9 |
434 | sub hypotheticals { |
435 | my $self = shift; |
436 | my @wits = grep |
437 | { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'hypothetical' } |
438 | $self->graph->vertices; |
439 | return @wits; |
440 | } |
441 | |
37bf09f4 |
442 | =head2 root_graph( $root_vertex ) |
ea45d2a6 |
443 | |
444 | If the stemma graph is undirected, make it directed with $root_vertex at the root. |
445 | If it is directed, re-root it. |
446 | |
447 | =cut |
448 | |
449 | sub root_graph { |
450 | my( $self, $rootvertex ) = @_; |
451 | my $graph; |
907f6671 |
452 | my $ident = $self->identifier; # will have to restore this at the end |
ea45d2a6 |
453 | if( $self->is_undirected ) { |
454 | $graph = $self->graph; |
455 | } else { |
456 | # Make an undirected version of this graph. |
457 | $graph = $self->graph->undirected_copy(); |
458 | } |
1cf6dd32 |
459 | # First, ensure that the requested root is actually a vertex in the graph. |
460 | unless( $graph->has_vertex( $rootvertex ) ) { |
461 | throw( "Cannot orient graph $graph on nonexistent vertex $rootvertex" ); |
462 | } |
463 | |
464 | # Now make a directed version of the graph. |
ea45d2a6 |
465 | my $rooted = Graph->new(); |
466 | $rooted->add_vertex( $rootvertex ); |
467 | my @next = ( $rootvertex ); |
468 | while( @next ) { |
469 | my @children; |
470 | foreach my $v ( @next ) { |
471 | # Place its not-placed neighbors (ergo children) in the tree |
472 | # and connect them |
473 | foreach my $n ( grep { !$rooted->has_vertex( $_ ) } |
474 | $graph->neighbors( $v ) ) { |
475 | $rooted->add_vertex( $n ); |
476 | $rooted->add_edge( $v, $n ); |
477 | push( @children, $n ); |
478 | } |
479 | } |
480 | @next = @children; |
481 | } |
482 | # Set the vertex classes |
483 | map { $rooted->set_vertex_attribute( $_, 'class', 'hypothetical' ) } |
1cf6dd32 |
484 | $self->hypotheticals; |
485 | map { $rooted->set_vertex_attribute( $_, 'class', 'extant' ) } |
486 | $self->witnesses; |
487 | $self->graph( $rooted ); |
907f6671 |
488 | $self->set_identifier( $ident ); |
ea45d2a6 |
489 | } |
490 | |
491 | |
63778331 |
492 | sub throw { |
493 | Text::Tradition::Error->throw( |
494 | 'ident' => 'Stemma error', |
495 | 'message' => $_[0], |
496 | ); |
497 | } |
498 | |
499 | |
9463b0bf |
500 | no Moose; |
501 | __PACKAGE__->meta->make_immutable; |
502 | |
503 | 1; |
027d819c |
504 | |
505 | =head1 LICENSE |
506 | |
507 | This package is free software and is provided "as is" without express |
508 | or implied warranty. You can redistribute it and/or modify it under |
509 | the same terms as Perl itself. |
510 | |
511 | =head1 AUTHOR |
512 | |
513 | Tara L Andrews E<lt>aurum@cpan.orgE<gt> |