1 package Text::Tradition::Stemma;
3 use Encode qw( decode_utf8 );
6 use Graph::Reader::Dot;
7 use IPC::Run qw/ run binary /;
8 use Text::Tradition::Error;
9 use Text::Tradition::StemmaUtil qw/ read_graph editable_graph display_graph
15 Text::Tradition::Stemma - a representation of a I<stemma codicum> for a Text::Tradition
20 my $t = Text::Tradition->new(
21 'name' => 'this is a text',
23 'file' => '/path/to/tei_parallel_seg_file.xml' );
25 my $s = $tradition->add_stemma( dotfile => '/path/to/stemma.dot' );
29 Text::Tradition is a library for representation and analysis of collated
30 texts, particularly medieval ones. The Stemma is a representation of the
31 copying relationships between the witnesses in a Tradition, modelled with
32 a connected rooted directed acyclic graph (CRDAG).
36 The easiest way to define a stemma is to use a special form of the 'dot'
39 Each stemma opens with the line
41 digraph "Name of Stemma" {
43 and continues with a list of all manuscript witnesses in the stemma, whether
44 extant witnesses or missing archetypes or hyparchetypes. Each of these is
45 listed by its sigil on its own line, e.g.:
47 alpha [ class=hypothetical ]
48 1 [ class=hypothetical,label=* ]
51 Extant witnesses are listed with class=extant; missing or postulated witnesses
52 are listed with class=hypothetical. Anonymous hyparchetypes must be given a
53 unique name or number, but can be represented as anonymous with the addition
54 of 'label=*' to their lines. Greek letters or other special characters may be
55 used as names, but they must always be wrapped in double quotes.
57 Links between manuscripts are then listed with arrow notation, as below. These
58 lines show the direction of copying, one step at a time, for the entire stemma.
63 The final line in the definition should be the closing brace:
67 Thus for a set of extant manuscripts A, B, and C, where A and B were copied
68 from the archetype O and C was copied from B, the definition would be:
70 digraph "Test stemma 1" {
71 O [ class=hypothetical]
84 The constructor. This should generally be called from Text::Tradition, but
85 if called directly it takes the following options:
89 =item * dot - A filehandle open to a DOT representation of the stemma graph.
91 =item * graph - If no DOT specification is given, you can pass a Graph object
92 instead. The vertices of the graph should have an attribute 'class' set to
93 either of the values 'extant' or 'hypothetical'.
95 =item * is_undirected - If the graph specification (or graph object) is for an
96 undirected graph (e.g. a phylogenetic tree), this should be set.
104 use_ok( 'Text::Tradition::Stemma' );
106 # Try to create a bad graph
108 my $stemma = Text::Tradition::Stemma->new( dotfile => 't/data/besoin_bad.dot' );
109 ok( 0, "Created broken stemma from dotfile with syntax error" );
110 } catch( Text::Tradition::Error $e ) {
111 like( $e->message, qr/^Error trying to parse/, "Syntax error in dot threw exception" );
114 # Create a good graph
115 my $stemma = Text::Tradition::Stemma->new( dotfile => 't/data/florilegium.dot' );
116 is( ref( $stemma ), 'Text::Tradition::Stemma', "Created stemma from good dotfile" );
117 is( scalar $stemma->witnesses, 13, "Found correct number of extant witnesses" );
118 is( scalar $stemma->hypotheticals, 8, "Found correct number of extant hypotheticals" );
119 ok( $stemma->has_identifier, "Stemma identifier was found in dot" );
120 is( $stemma->identifier, 'Coislinianum lineage', "Correct stemma identifier was found in dot" );
121 my $found_unicode_sigil;
122 foreach my $h ( $stemma->hypotheticals ) {
123 $found_unicode_sigil = 1 if $h eq "\x{3b1}";
125 ok( $found_unicode_sigil, "Found a correctly encoded Unicode sigil" );
127 # Create an undirected graph
128 my $udstemma = Text::Tradition::Stemma->new( dotfile => 't/data/besoin_undirected.dot' );
129 is( ref( $udstemma ), 'Text::Tradition::Stemma', "Created stemma from undirected dotfile" );
130 is( scalar $udstemma->witnesses, 13, "Found correct number of extant witnesses" );
131 is( scalar $udstemma->hypotheticals, 12, "Found correct number of hypotheticals" );
132 ok( $udstemma->is_undirected, "Stemma was recorded as undirected" );
133 is( $udstemma->identifier, "RHM stemma", "Undirected graph retained its name" );
141 isa => 'Text::Tradition::Collation',
142 clearer => 'clear_collation', # interim measure to remove refs in DB
149 predicate => 'has_graph',
155 writer => 'set_identifier',
156 predicate => 'has_identifier',
162 predicate => 'came_from_jobid',
163 writer => '_set_from_jobid',
167 my( $self, $args ) = @_;
168 # If we have been handed a dotfile, initialize it into a graph.
170 if( exists $args->{'dot'} ) {
171 $dotstring = $args->{'dot'};
172 } elsif( exists $args->{'dotfile'} ) {
173 # Read the file into a string.
175 open( DOTFH, $args->{'dotfile'} )
176 or throw( "Could not read specified dot file " . $args->{'dotfile'} );
177 binmode( DOTFH, ':encoding(UTF-8)' );
180 $dotstring = join( '', @dotlines );
182 $self->_graph_from_dot( $dotstring ) if $dotstring;
185 before 'graph' => sub {
188 # Make sure all unclassed graph nodes are marked extant.
190 throw( "Cannot set graph to a non-Graph object" )
191 unless $g->isa( 'Graph' );
192 foreach my $v ( $g->vertices ) {
193 unless( $g->has_vertex_attribute( $v, 'class' ) ) {
194 $g->set_vertex_attribute( $v, 'class', 'extant' );
200 sub _graph_from_dot {
201 my( $self, $dotstring ) = @_;
202 my $graph = read_graph( $dotstring );
204 ## HORRIBLE HACK but there is no API access to graph attributes!
205 my $graph_id = $graph->has_graph_attribute( 'name' )
206 ? $graph->get_graph_attribute( 'name' ) : 'stemma';
207 $self->graph( $graph );
208 $self->set_identifier( $graph_id );
213 return undef unless $self->has_graph;
214 return $self->graph->is_undirected;
217 =head2 new_from_newick( $newick_string )
219 A constructor that will read a Newick-format tree specification and return one
220 or more undirected Stemma objects. TODO test
224 sub new_from_newick {
225 my( $class, $nstring ) = @_;
227 foreach my $tree ( parse_newick( $nstring ) ) {
228 my $stemma = new( $class, graph => $tree );
229 push( @stemmata, $stemma );
236 =head2 as_dot( \%options )
238 Returns a normal dot representation of the stemma layout, suitable for rendering
239 with GraphViz. Options include:
243 =item * graph - A hashref of global graph options.
245 =item * node - A hashref of global node options.
247 =item * edge - A hashref of global edge options.
251 See the GraphViz documentation for the list of available options.
256 my( $self, $opts ) = @_;
258 ## See if we are including any a.c. witnesses in this graph.
259 my $graph = $self->graph;
260 if( exists $opts->{'layerwits'} ) {
262 map { $extant->{$_} = 1 } $self->witnesses;
263 $graph = $self->situation_graph( $extant, $opts->{'layerwits'} );
265 if( $self->has_identifier ) {
266 $opts->{'name'} = $self->identifier;
268 return display_graph( $graph, $opts );
271 =head2 alter_graph( $dotstring )
273 Alters the graph of this stemma according to the definition specified
279 my( $self, $dotstring ) = @_;
280 $self->_graph_from_dot( $dotstring );
283 =head2 editable( $opts )
285 Returns a version of the graph rendered in our definition format. The
286 output separates statements with a newline; set $opts->{'linesep'} to the
287 empty string or to a space if the result is to be sent via JSON.
289 If a situational version of the stemma is required, the arguments for
290 situation_graph should be passed via $opts->{'extant'} and $opts->{'layerwits'}.
295 my( $self, $opts ) = @_;
296 my $graph = $self->graph;
297 if( $self->has_identifier ) {
298 $opts->{'name'} = $self->identifier;
300 ## See if we need an editable version of a situational graph.
301 if( exists $opts->{'layerwits'} || exists $opts->{'extant'} ) {
302 my $extant = delete $opts->{'extant'} || {};
303 my $layerwits = delete $opts->{'layerwits'} || [];
304 $graph = $self->situation_graph( $extant, $layerwits );
306 return editable_graph( $graph, $opts );
310 =head2 situation_graph( $extant, $layered )
312 Returns a graph which is the original stemma graph with all witnesses not
313 in the %$extant hash marked as hypothetical, and witness layers added to
314 the graph according to the list in @$layered. A layered (a.c.) witness is
315 added as a parent of its main version, and additionally shares all other
316 parents and children with that version.
320 sub situation_graph {
321 my( $self, $extant, $layerwits, $layerlabel ) = @_;
323 my $graph = $self->graph->copy;
324 foreach my $vertex ( $graph->vertices ) {
325 # Set as extant any vertex that is extant in the stemma AND
326 # exists in the $extant hash.
327 my $class = 'hypothetical';
328 $class = 'extant' if exists $extant->{$vertex} && $extant->{$vertex} &&
329 $self->graph->get_vertex_attribute( $vertex, 'class' ) ne 'hypothetical';
330 $graph->set_vertex_attribute( $vertex, 'class', $class );
333 # For each 'layered' witness in the layerwits array, add it to the graph
334 # as an ancestor of the 'main' witness, and otherwise with the same parent/
335 # child links as its main analogue.
336 # TOOD Handle case where B is copied from A but corrected from C
337 $layerlabel = ' (a.c.)' unless $layerlabel;
338 foreach my $lw ( @$layerwits ) {
339 # Add the layered witness and set it with the same attributes as
340 # its 'main' analogue
341 throw( "Cannot add a layer to a hypothetical witness $lw" )
342 unless $graph->get_vertex_attribute( $lw, 'class' ) eq 'extant';
343 my $lwac = $lw . $layerlabel;
344 $graph->add_vertex( $lwac );
345 $graph->set_vertex_attributes( $lwac,
346 $graph->get_vertex_attributes( $lw ) );
348 # Set it as ancestor to the main witness
349 $graph->add_edge( $lwac, $lw );
351 # Give it the same ancestors and descendants as the main witness has,
352 # bearing in mind that those ancestors and descendants might also just
353 # have had a layered witness defined.
354 foreach my $v ( $graph->predecessors( $lw ) ) {
355 next if $v eq $lwac; # Don't add a loop
356 $graph->add_edge( $v, $lwac );
357 $graph->add_edge( $v.$layerlabel, $lwac )
358 if $graph->has_vertex( $v.$layerlabel );
360 foreach my $v ( $graph->successors( $lw ) ) {
361 next if $v eq $lwac; # but this shouldn't occur
362 $graph->add_edge( $lwac, $v );
363 $graph->add_edge( $lwac, $v.$layerlabel )
364 if $graph->has_vertex( $v.$layerlabel );
372 Returns an SVG representation of the graph, calling as_dot first.
377 my( $self, $opts ) = @_;
378 my $dot = $self->as_dot( $opts );
379 my @cmd = ( '-Tsvg' );
380 unshift( @cmd, $self->is_undirected ? 'neato' : 'dot' );
382 my $dotfile = File::Temp->new();
383 binmode $dotfile, ':utf8';
386 push( @cmd, $dotfile->filename );
387 run( \@cmd, ">", binary(), \$svg );
388 return decode_utf8( $svg );
393 Returns a list of the extant witnesses represented in the stemma.
399 my @wits = grep { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'extant' }
400 $self->graph->vertices;
406 Returns a list of the hypothetical witnesses represented in the stemma.
413 { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'hypothetical' }
414 $self->graph->vertices;
418 =head2 root_graph( $root_vertex )
420 If the stemma graph is undirected, make it directed with $root_vertex at the root.
421 If it is directed, re-root it.
426 my( $self, $rootvertex ) = @_;
428 my $ident = $self->identifier; # will have to restore this at the end
429 if( $self->is_undirected ) {
430 $graph = $self->graph;
432 # Make an undirected version of this graph.
433 $graph = $self->graph->undirected_copy();
435 # First, ensure that the requested root is actually a vertex in the graph.
436 unless( $graph->has_vertex( $rootvertex ) ) {
437 throw( "Cannot orient graph $graph on nonexistent vertex $rootvertex" );
440 # Now make a directed version of the graph.
441 my $rooted = Graph->new();
442 $rooted->add_vertex( $rootvertex );
443 my @next = ( $rootvertex );
446 foreach my $v ( @next ) {
447 # Place its not-placed neighbors (ergo children) in the tree
449 foreach my $n ( grep { !$rooted->has_vertex( $_ ) }
450 $graph->neighbors( $v ) ) {
451 $rooted->add_vertex( $n );
452 $rooted->add_edge( $v, $n );
453 push( @children, $n );
458 # Set the vertex classes
459 map { $rooted->set_vertex_attribute( $_, 'class', 'hypothetical' ) }
460 $self->hypotheticals;
461 map { $rooted->set_vertex_attribute( $_, 'class', 'extant' ) }
463 $self->graph( $rooted );
464 $self->set_identifier( $ident );
469 Text::Tradition::Error->throw(
470 'ident' => 'Stemma error',
477 __PACKAGE__->meta->make_immutable;
483 This package is free software and is provided "as is" without express
484 or implied warranty. You can redistribute it and/or modify it under
485 the same terms as Perl itself.
489 Tara L Andrews E<lt>aurum@cpan.orgE<gt>