1 package Text::Tradition::Stemma;
3 use Encode qw( decode_utf8 );
6 use Graph::Reader::Dot;
7 use IPC::Run qw/ run binary /;
8 use Text::Tradition::Error;
9 use Text::Tradition::StemmaUtil qw/ editable_graph display_graph parse_newick /;
14 Text::Tradition::Stemma - a representation of a I<stemma codicum> for a Text::Tradition
19 my $t = Text::Tradition->new(
20 'name' => 'this is a text',
22 'file' => '/path/to/tei_parallel_seg_file.xml' );
24 my $s = $tradition->add_stemma( dotfile => '/path/to/stemma.dot' );
28 Text::Tradition is a library for representation and analysis of collated
29 texts, particularly medieval ones. The Stemma is a representation of the
30 copying relationships between the witnesses in a Tradition, modelled with
31 a connected rooted directed acyclic graph (CRDAG).
35 The easiest way to define a stemma is to use a special form of the 'dot'
38 Each stemma opens with the line
40 digraph "Name of Stemma" {
42 and continues with a list of all manuscript witnesses in the stemma, whether
43 extant witnesses or missing archetypes or hyparchetypes. Each of these is
44 listed by its sigil on its own line, e.g.:
46 alpha [ class=hypothetical ]
47 1 [ class=hypothetical,label=* ]
50 Extant witnesses are listed with class=extant; missing or postulated witnesses
51 are listed with class=hypothetical. Anonymous hyparchetypes must be given a
52 unique name or number, but can be represented as anonymous with the addition
53 of 'label=*' to their lines. Greek letters or other special characters may be
54 used as names, but they must always be wrapped in double quotes.
56 Links between manuscripts are then listed with arrow notation, as below. These
57 lines show the direction of copying, one step at a time, for the entire stemma.
62 The final line in the definition should be the closing brace:
66 Thus for a set of extant manuscripts A, B, and C, where A and B were copied
67 from the archetype O and C was copied from B, the definition would be:
69 digraph "Test stemma 1" {
70 O [ class=hypothetical]
83 The constructor. This should generally be called from Text::Tradition, but
84 if called directly it takes the following options:
88 =item * dot - A filehandle open to a DOT representation of the stemma graph.
90 =item * graph - If no DOT specification is given, you can pass a Graph object
91 instead. The vertices of the graph should have an attribute 'class' set to
92 either of the values 'extant' or 'hypothetical'.
94 =item * is_undirected - If the graph specification (or graph object) is for an
95 undirected graph (e.g. a phylogenetic tree), this should be set.
103 use_ok( 'Text::Tradition::Stemma' );
105 # Try to create a bad graph
107 open( $baddotfh, 't/data/besoin_bad.dot' ) or die "Could not open test dotfile";
109 my $stemma = Text::Tradition::Stemma->new( dot => $baddotfh );
110 ok( 0, "Created broken stemma from dotfile with syntax error" );
111 } catch( Text::Tradition::Error $e ) {
112 like( $e->message, qr/^Error trying to parse/, "Syntax error in dot threw exception" );
115 # Create a good graph
117 open( $dotfh, 't/data/florilegium.dot' ) or die "Could not open test dotfile";
118 binmode( $dotfh, ':utf8' );
119 my $stemma = Text::Tradition::Stemma->new( dot => $dotfh );
120 is( ref( $stemma ), 'Text::Tradition::Stemma', "Created stemma from good dotfile" );
121 is( scalar $stemma->witnesses, 13, "Found correct number of extant witnesses" );
122 is( scalar $stemma->hypotheticals, 8, "Found correct number of extant hypotheticals" );
123 ok( $stemma->has_identifier, "Stemma identifier was found in dot" );
124 is( $stemma->identifier, 'Coislinianum lineage', "Correct stemma identifier was found in dot" );
125 my $found_unicode_sigil;
126 foreach my $h ( $stemma->hypotheticals ) {
127 $found_unicode_sigil = 1 if $h eq "\x{3b1}";
129 ok( $found_unicode_sigil, "Found a correctly encoded Unicode sigil" );
131 # Create an undirected graph
133 open( $undirdotfh, 't/data/besoin_undirected.dot' ) or die "Could not open test dotfile";
134 binmode( $undirdotfh, ':utf8' );
135 my $udstemma = Text::Tradition::Stemma->new( dot => $undirdotfh );
136 is( ref( $udstemma ), 'Text::Tradition::Stemma', "Created stemma from undirected dotfile" );
137 is( scalar $udstemma->witnesses, 13, "Found correct number of extant witnesses" );
138 is( scalar $udstemma->hypotheticals, 12, "Found correct number of hypotheticals" );
139 ok( $udstemma->is_undirected, "Stemma was recorded as undirected" );
140 is( $udstemma->identifier, "RHM stemma", "Undirected graph retained its name" );
148 isa => 'Text::Tradition::Collation',
149 clearer => 'clear_collation', # interim measure to remove refs in DB
156 predicate => 'has_graph',
162 writer => 'set_identifier',
163 predicate => 'has_identifier',
169 predicate => 'came_from_jobid',
170 writer => '_set_from_jobid',
174 my( $self, $args ) = @_;
175 # If we have been handed a dotfile, initialize it into a graph.
176 if( exists $args->{'dot'} ) {
177 $self->_graph_from_dot( $args->{'dot'} );
181 before 'graph' => sub {
184 # Make sure all unclassed graph nodes are marked extant.
186 throw( "Cannot set graph to a non-Graph object" )
187 unless $g->isa( 'Graph' );
188 foreach my $v ( $g->vertices ) {
189 unless( $g->has_vertex_attribute( $v, 'class' ) ) {
190 $g->set_vertex_attribute( $v, 'class', 'extant' );
196 sub _graph_from_dot {
197 my( $self, $dotfh ) = @_;
198 my $reader = Graph::Reader::Dot->new();
199 # Redirect STDOUT in order to trap any error messages - syntax errors
200 # are evidently not fatal.
206 open( STDOUT, ">", \$reader_out );
208 open( STDERR, ">", \$reader_err );
209 $graph = $reader->read_graph( $dotfh );
213 if( $reader_out && $reader_out =~ /error/s ) {
214 throw( "Error trying to parse dot: $reader_out" );
216 throw( "Failed to create graph from dot" );
218 ## HORRIBLE HACK but there is no API access to graph attributes!
219 my $graph_id = exists $graph->[4]->{'name'} ? $graph->[4]->{'name'} : 'stemma';
220 # Correct for implicit graph -> digraph quirk of reader
221 if( $reader_err && $reader_err =~ /graph will be treated as digraph/ ) {
222 my $udgraph = $graph->undirected_copy;
223 foreach my $v ( $graph->vertices ) {
224 $udgraph->set_vertex_attributes( $v, $graph->get_vertex_attributes( $v ) );
228 $self->graph( $graph );
229 $self->set_identifier( $graph_id );
234 return undef unless $self->has_graph;
235 return $self->graph->is_undirected;
238 =head2 new_from_newick( $newick_string )
240 A constructor that will read a Newick-format tree specification and return one
241 or more undirected Stemma objects. TODO test
245 sub new_from_newick {
246 my( $class, $nstring ) = @_;
248 foreach my $tree ( parse_newick( $nstring ) ) {
249 my $stemma = new( $class, graph => $tree );
250 push( @stemmata, $stemma );
257 =head2 as_dot( \%options )
259 Returns a normal dot representation of the stemma layout, suitable for rendering
260 with GraphViz. Options include:
264 =item * graph - A hashref of global graph options.
266 =item * node - A hashref of global node options.
268 =item * edge - A hashref of global edge options.
272 See the GraphViz documentation for the list of available options.
277 my( $self, $opts ) = @_;
279 ## See if we are including any a.c. witnesses in this graph.
280 my $graph = $self->graph;
281 if( exists $opts->{'layerwits'} ) {
283 map { $extant->{$_} = 1 } $self->witnesses;
284 $graph = $self->situation_graph( $extant, $opts->{'layerwits'} );
286 if( $self->has_identifier ) {
287 $opts->{'name'} = $self->identifier;
289 return display_graph( $graph, $opts );
292 =head2 alter_graph( $dotstring )
294 Alters the graph of this stemma according to the definition specified
300 my( $self, $dotstring ) = @_;
302 open $dotfh, '<', \$dotstring;
303 binmode $dotfh, ':utf8';
304 $self->_graph_from_dot( $dotfh );
307 =head2 editable( $opts )
309 Returns a version of the graph rendered in our definition format. The
310 output separates statements with a newline; set $opts->{'linesep'} to the
311 empty string or to a space if the result is to be sent via JSON.
313 If a situational version of the stemma is required, the arguments for
314 situation_graph should be passed via $opts->{'extant'} and $opts->{'layerwits'}.
319 my( $self, $opts ) = @_;
320 my $graph = $self->graph;
321 if( $self->has_identifier ) {
322 $opts->{'name'} = $self->identifier;
324 ## See if we need an editable version of a situational graph.
325 if( exists $opts->{'layerwits'} || exists $opts->{'extant'} ) {
326 my $extant = delete $opts->{'extant'} || {};
327 my $layerwits = delete $opts->{'layerwits'} || [];
328 $graph = $self->situation_graph( $extant, $layerwits );
330 return editable_graph( $graph, $opts );
334 =head2 situation_graph( $extant, $layered )
336 Returns a graph which is the original stemma graph with all witnesses not
337 in the %$extant hash marked as hypothetical, and witness layers added to
338 the graph according to the list in @$layered. A layered (a.c.) witness is
339 added as a parent of its main version, and additionally shares all other
340 parents and children with that version.
344 sub situation_graph {
345 my( $self, $extant, $layerwits, $layerlabel ) = @_;
347 my $graph = $self->graph->copy;
348 foreach my $vertex ( $graph->vertices ) {
349 # Set as extant any vertex that is extant in the stemma AND
350 # exists in the $extant hash.
351 my $class = 'hypothetical';
352 $class = 'extant' if exists $extant->{$vertex} && $extant->{$vertex} &&
353 $self->graph->get_vertex_attribute( $vertex, 'class' ) ne 'hypothetical';
354 $graph->set_vertex_attribute( $vertex, 'class', $class );
357 # For each 'layered' witness in the layerwits array, add it to the graph
358 # as an ancestor of the 'main' witness, and otherwise with the same parent/
359 # child links as its main analogue.
360 # TOOD Handle case where B is copied from A but corrected from C
361 $layerlabel = ' (a.c.)' unless $layerlabel;
362 foreach my $lw ( @$layerwits ) {
363 # Add the layered witness and set it with the same attributes as
364 # its 'main' analogue
365 throw( "Cannot add a layer to a hypothetical witness $lw" )
366 unless $graph->get_vertex_attribute( $lw, 'class' ) eq 'extant';
367 my $lwac = $lw . $layerlabel;
368 $graph->add_vertex( $lwac );
369 $graph->set_vertex_attributes( $lwac,
370 $graph->get_vertex_attributes( $lw ) );
372 # Set it as ancestor to the main witness
373 $graph->add_edge( $lwac, $lw );
375 # Give it the same ancestors and descendants as the main witness has,
376 # bearing in mind that those ancestors and descendants might also just
377 # have had a layered witness defined.
378 foreach my $v ( $graph->predecessors( $lw ) ) {
379 next if $v eq $lwac; # Don't add a loop
380 $graph->add_edge( $v, $lwac );
381 $graph->add_edge( $v.$layerlabel, $lwac )
382 if $graph->has_vertex( $v.$layerlabel );
384 foreach my $v ( $graph->successors( $lw ) ) {
385 next if $v eq $lwac; # but this shouldn't occur
386 $graph->add_edge( $lwac, $v );
387 $graph->add_edge( $lwac, $v.$layerlabel )
388 if $graph->has_vertex( $v.$layerlabel );
396 Returns an SVG representation of the graph, calling as_dot first.
401 my( $self, $opts ) = @_;
402 my $dot = $self->as_dot( $opts );
403 my @cmd = ( '-Tsvg' );
404 unshift( @cmd, $self->is_undirected ? 'neato' : 'dot' );
406 my $dotfile = File::Temp->new();
407 binmode $dotfile, ':utf8';
410 push( @cmd, $dotfile->filename );
411 run( \@cmd, ">", binary(), \$svg );
412 return decode_utf8( $svg );
417 Returns a list of the extant witnesses represented in the stemma.
423 my @wits = grep { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'extant' }
424 $self->graph->vertices;
430 Returns a list of the hypothetical witnesses represented in the stemma.
437 { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'hypothetical' }
438 $self->graph->vertices;
442 =head2 root_graph( $root_vertex )
444 If the stemma graph is undirected, make it directed with $root_vertex at the root.
445 If it is directed, re-root it.
450 my( $self, $rootvertex ) = @_;
452 my $ident = $self->identifier; # will have to restore this at the end
453 if( $self->is_undirected ) {
454 $graph = $self->graph;
456 # Make an undirected version of this graph.
457 $graph = $self->graph->undirected_copy();
459 # First, ensure that the requested root is actually a vertex in the graph.
460 unless( $graph->has_vertex( $rootvertex ) ) {
461 throw( "Cannot orient graph $graph on nonexistent vertex $rootvertex" );
464 # Now make a directed version of the graph.
465 my $rooted = Graph->new();
466 $rooted->add_vertex( $rootvertex );
467 my @next = ( $rootvertex );
470 foreach my $v ( @next ) {
471 # Place its not-placed neighbors (ergo children) in the tree
473 foreach my $n ( grep { !$rooted->has_vertex( $_ ) }
474 $graph->neighbors( $v ) ) {
475 $rooted->add_vertex( $n );
476 $rooted->add_edge( $v, $n );
477 push( @children, $n );
482 # Set the vertex classes
483 map { $rooted->set_vertex_attribute( $_, 'class', 'hypothetical' ) }
484 $self->hypotheticals;
485 map { $rooted->set_vertex_attribute( $_, 'class', 'extant' ) }
487 $self->graph( $rooted );
488 $self->set_identifier( $ident );
493 Text::Tradition::Error->throw(
494 'ident' => 'Stemma error',
501 __PACKAGE__->meta->make_immutable;
507 This package is free software and is provided "as is" without express
508 or implied warranty. You can redistribute it and/or modify it under
509 the same terms as Perl itself.
513 Tara L Andrews E<lt>aurum@cpan.orgE<gt>