1 package Text::Tradition::Stemma;
3 use Encode qw( decode_utf8 );
6 use Graph::Reader::Dot;
7 use IPC::Run qw/ run binary /;
8 use Text::Tradition::Error;
13 Text::Tradition::Stemma - a representation of a I<stemma codicum> for a Text::Tradition
18 my $t = Text::Tradition->new(
19 'name' => 'this is a text',
21 'file' => '/path/to/tei_parallel_seg_file.xml' );
23 my $s = $tradition->add_stemma( dotfile => '/path/to/stemma.dot' );
27 Text::Tradition is a library for representation and analysis of collated
28 texts, particularly medieval ones. The Stemma is a representation of the
29 copying relationships between the witnesses in a Tradition, modelled with
30 a connected rooted directed acyclic graph (CRDAG).
34 The easiest way to define a stemma is to use a special form of the 'dot'
37 Each stemma opens with the line
39 digraph "Name of Stemma" {
41 and continues with a list of all manuscript witnesses in the stemma, whether
42 extant witnesses or missing archetypes or hyparchetypes. Each of these is
43 listed by its sigil on its own line, e.g.:
45 alpha [ class=hypothetical ]
46 1 [ class=hypothetical,label=* ]
49 Extant witnesses are listed with class=extant; missing or postulated witnesses
50 are listed with class=hypothetical. Anonymous hyparchetypes must be given a
51 unique name or number, but can be represented as anonymous with the addition
52 of 'label=*' to their lines. Greek letters or other special characters may be
53 used as names, but they must always be wrapped in double quotes.
55 Links between manuscripts are then listed with arrow notation, as below. These
56 lines show the direction of copying, one step at a time, for the entire stemma.
61 The final line in the definition should be the closing brace:
65 Thus for a set of extant manuscripts A, B, and C, where A and B were copied
66 from the archetype O and C was copied from B, the definition would be:
68 digraph "Test stemma 1" {
69 O [ class=hypothetical]
82 The constructor. This should generally be called from Text::Tradition, but
83 if called directly it takes the following options:
87 =item * dot - A filehandle open to a DOT representation of the stemma graph.
89 =item * graph - If no DOT specification is given, you can pass a Graph object
90 instead. The vertices of the graph should have an attribute 'class' set to
91 either of the values 'extant' or 'hypothetical'.
93 =item * is_undirected - If the graph specification (or graph object) is for an
94 undirected graph (e.g. a phylogenetic tree), this should be set.
102 use_ok( 'Text::Tradition::Stemma' );
104 # Try to create a bad graph
106 open( $baddotfh, 't/data/besoin_bad.dot' ) or die "Could not open test dotfile";
108 my $stemma = Text::Tradition::Stemma->new( dot => $baddotfh );
109 ok( 0, "Created broken stemma from dotfile with syntax error" );
110 } catch( Text::Tradition::Error $e ) {
111 like( $e->message, qr/^Error trying to parse/, "Syntax error in dot threw exception" );
114 # Create a good graph
116 open( $dotfh, 't/data/florilegium.dot' ) or die "Could not open test dotfile";
117 binmode( $dotfh, ':utf8' );
118 my $stemma = Text::Tradition::Stemma->new( dot => $dotfh );
119 is( ref( $stemma ), 'Text::Tradition::Stemma', "Created stemma from good dotfile" );
120 is( scalar $stemma->witnesses, 13, "Found correct number of extant witnesses" );
121 is( scalar $stemma->hypotheticals, 8, "Found correct number of extant hypotheticals" );
122 ok( $stemma->has_identifier, "Stemma identifier was found in dot" );
123 is( $stemma->identifier, 'Coislinianum lineage', "Correct stemma identifier was found in dot" );
124 my $found_unicode_sigil;
125 foreach my $h ( $stemma->hypotheticals ) {
126 $found_unicode_sigil = 1 if $h eq "\x{3b1}";
128 ok( $found_unicode_sigil, "Found a correctly encoded Unicode sigil" );
130 # Create an undirected graph
132 open( $undirdotfh, 't/data/besoin_undirected.dot' ) or die "Could not open test dotfile";
133 binmode( $undirdotfh, ':utf8' );
134 my $udstemma = Text::Tradition::Stemma->new( dot => $undirdotfh );
135 is( ref( $udstemma ), 'Text::Tradition::Stemma', "Created stemma from undirected dotfile" );
136 is( scalar $udstemma->witnesses, 13, "Found correct number of extant witnesses" );
137 is( scalar $udstemma->hypotheticals, 12, "Found correct number of hypotheticals" );
138 ok( $udstemma->is_undirected, "Stemma was recorded as undirected" );
139 is( $udstemma->identifier, "RHM stemma", "Undirected graph retained its name" );
147 isa => 'Text::Tradition::Collation',
148 clearer => 'clear_collation', # interim measure to remove refs in DB
155 predicate => 'has_graph',
161 writer => 'set_identifier',
162 predicate => 'has_identifier',
168 predicate => 'came_from_jobid',
169 writer => '_set_from_jobid',
173 my( $self, $args ) = @_;
174 # If we have been handed a dotfile, initialize it into a graph.
175 if( exists $args->{'dot'} ) {
176 $self->_graph_from_dot( $args->{'dot'} );
180 before 'graph' => sub {
183 # Make sure all unclassed graph nodes are marked extant.
185 throw( "Cannot set graph to a non-Graph object" )
186 unless $g->isa( 'Graph' );
187 foreach my $v ( $g->vertices ) {
188 unless( $g->has_vertex_attribute( $v, 'class' ) ) {
189 $g->set_vertex_attribute( $v, 'class', 'extant' );
195 sub _graph_from_dot {
196 my( $self, $dotfh ) = @_;
197 my $reader = Graph::Reader::Dot->new();
198 # Redirect STDOUT in order to trap any error messages - syntax errors
199 # are evidently not fatal.
205 open( STDOUT, ">", \$reader_out );
207 open( STDERR, ">", \$reader_err );
208 $graph = $reader->read_graph( $dotfh );
212 if( $reader_out && $reader_out =~ /error/s ) {
213 throw( "Error trying to parse dot: $reader_out" );
215 throw( "Failed to create graph from dot" );
217 ## HORRIBLE HACK but there is no API access to graph attributes!
218 my $graph_id = exists $graph->[4]->{'name'} ? $graph->[4]->{'name'} : 'stemma';
219 # Correct for implicit graph -> digraph quirk of reader
220 if( $reader_err && $reader_err =~ /graph will be treated as digraph/ ) {
221 my $udgraph = $graph->undirected_copy;
222 foreach my $v ( $graph->vertices ) {
223 $udgraph->set_vertex_attributes( $v, $graph->get_vertex_attributes( $v ) );
227 $self->graph( $graph );
228 $self->set_identifier( $graph_id );
233 return undef unless $self->has_graph;
234 return $self->graph->is_undirected;
239 =head2 as_dot( \%options )
241 Returns a normal dot representation of the stemma layout, suitable for rendering
242 with GraphViz. Options include:
246 =item * graph - A hashref of global graph options.
248 =item * node - A hashref of global node options.
250 =item * edge - A hashref of global edge options.
254 See the GraphViz documentation for the list of available options.
259 my( $self, $opts ) = @_;
261 ## See if we are including any a.c. witnesses in this graph.
262 my $graph = $self->graph;
263 if( exists $opts->{'layerwits'} ) {
265 map { $extant->{$_} = 1 } $self->witnesses;
266 $graph = $self->situation_graph( $extant, $opts->{'layerwits'} );
269 # Get default and specified options
272 'bgcolor' => 'transparent',
277 'fillcolor' => 'white',
279 'shape' => 'ellipse', # Shape for the extant nodes
282 'arrowhead' => 'none',
284 @graphopts{ keys %{$opts->{'graph'}} } = values %{$opts->{'graph'}}
286 @nodeopts{ keys %{$opts->{'node'}} } = values %{$opts->{'node'}}
288 @edgeopts{ keys %{$opts->{'edge'}} } = values %{$opts->{'edge'}}
291 my $gdecl = $graph->is_directed ? 'digraph' : 'graph';
292 my $gname = $self->has_identifier ? '"' . $self->identifier . '"'
295 push( @dotlines, "$gdecl $gname {" );
296 ## Print out the global attributes
297 push( @dotlines, _make_dotline( 'graph', %graphopts ) ) if keys %graphopts;
298 push( @dotlines, _make_dotline( 'edge', %edgeopts ) ) if keys %edgeopts;
299 push( @dotlines, _make_dotline( 'node', %nodeopts ) ) if keys %nodeopts;
301 # Add each of the nodes.
302 foreach my $n ( $graph->vertices ) {
303 my %vattr = ( 'id' => $n ); # Set the SVG element ID to the sigil itself
304 if( $graph->has_vertex_attribute( $n, 'label' ) ) {
305 $vattr{'label'} = $graph->get_vertex_attribute( $n, 'label' );
307 push( @dotlines, _make_dotline( $n, %vattr ) );
309 # Add each of our edges.
310 foreach my $e ( $graph->edges ) {
311 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
312 my $connector = $graph->is_directed ? '->' : '--';
313 push( @dotlines, " $from $connector $to;" );
315 push( @dotlines, '}' );
317 return join( "\n", @dotlines );
320 =head2 alter_graph( $dotstring )
322 Alters the graph of this stemma according to the definition specified
328 my( $self, $dotstring ) = @_;
330 open $dotfh, '<', \$dotstring;
331 binmode $dotfh, ':utf8';
332 $self->_graph_from_dot( $dotfh );
335 =head2 editable( $opts )
337 =head2 editable_graph( $graph, $opts )
339 Returns a version of the graph rendered in our definition format. The
340 output separates statements with a newline; set $opts->{'linesep'} to the
341 empty string or to a space if the result is to be sent via JSON.
343 If a situational version of the stemma is required, the arguments for
344 situation_graph should be passed via $opts->{'extant'} and $opts->{'layerwits'}.
349 my( $self, $opts ) = @_;
350 my $graph = $self->graph;
351 if( $self->has_identifier ) {
352 $opts->{'name'} = $self->identifier;
354 ## See if we need an editable version of a situational graph.
355 if( exists $opts->{'layerwits'} || exists $opts->{'extant'} ) {
356 my $extant = delete $opts->{'extant'} || {};
357 my $layerwits = delete $opts->{'layerwits'} || [];
358 $graph = $self->situation_graph( $extant, $layerwits );
360 return editable_graph( $graph, $opts );
364 my( $graph, $opts ) = @_;
367 my $join = ( $opts && exists $opts->{'linesep'} ) ? $opts->{'linesep'} : "\n";
368 my $gdecl = $graph->is_undirected ? 'graph' : 'digraph';
369 my $gname = exists $opts->{'name'} ? '"' . $opts->{'name'} . '"'
372 push( @dotlines, "$gdecl $gname {" );
373 my @real; # A cheap sort
374 foreach my $n ( sort $graph->vertices ) {
375 my $c = $graph->get_vertex_attribute( $n, 'class' );
376 $c = 'extant' unless $c;
377 if( $c eq 'extant' ) {
380 push( @dotlines, _make_dotline( $n, 'class' => $c ) );
383 # Now do the real ones
384 foreach my $n ( @real ) {
385 push( @dotlines, _make_dotline( $n, 'class' => 'extant' ) );
387 foreach my $e ( sort _by_vertex $graph->edges ) {
388 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
389 my $conn = $graph->is_undirected ? '--' : '->';
390 push( @dotlines, " $from $conn $to;" );
392 push( @dotlines, '}' );
393 return join( $join, @dotlines );
397 my( $obj, %attr ) = @_;
399 foreach my $k ( keys %attr ) {
400 my $v = _dotquote( $attr{$k} );
401 push( @pairs, "$k=$v" );
403 return sprintf( " %s [ %s ];", _dotquote( $obj ), join( ', ', @pairs ) );
408 return $str if $str =~ /^[A-Za-z0-9]+$/;
410 $str = '"' . $str . '"';
415 return $a->[0].$a->[1] cmp $b->[0].$b->[1];
418 =head2 situation_graph( $extant, $layered )
420 Returns a graph which is the original stemma graph with all witnesses not
421 in the %$extant hash marked as hypothetical, and witness layers added to
422 the graph according to the list in @$layered. A layered (a.c.) witness is
423 added as a parent of its main version, and additionally shares all other
424 parents and children with that version.
428 sub situation_graph {
429 my( $self, $extant, $layerwits, $layerlabel ) = @_;
431 my $graph = $self->graph->copy;
432 foreach my $vertex ( $graph->vertices ) {
433 # Set as extant any vertex that is extant in the stemma AND
434 # exists in the $extant hash.
435 my $class = 'hypothetical';
436 $class = 'extant' if exists $extant->{$vertex} && $extant->{$vertex} &&
437 $self->graph->get_vertex_attribute( $vertex, 'class' ) ne 'hypothetical';
438 $graph->set_vertex_attribute( $vertex, 'class', $class );
441 # For each 'layered' witness in the layerwits array, add it to the graph
442 # as an ancestor of the 'main' witness, and otherwise with the same parent/
443 # child links as its main analogue.
444 # TOOD Handle case where B is copied from A but corrected from C
445 $layerlabel = ' (a.c.)' unless $layerlabel;
446 foreach my $lw ( @$layerwits ) {
447 # Add the layered witness and set it with the same attributes as
448 # its 'main' analogue
449 throw( "Cannot add a layer to a hypothetical witness $lw" )
450 unless $graph->get_vertex_attribute( $lw, 'class' ) eq 'extant';
451 my $lwac = $lw . $layerlabel;
452 $graph->add_vertex( $lwac );
453 $graph->set_vertex_attributes( $lwac,
454 $graph->get_vertex_attributes( $lw ) );
456 # Set it as ancestor to the main witness
457 $graph->add_edge( $lwac, $lw );
459 # Give it the same ancestors and descendants as the main witness has,
460 # bearing in mind that those ancestors and descendants might also just
461 # have had a layered witness defined.
462 foreach my $v ( $graph->predecessors( $lw ) ) {
463 next if $v eq $lwac; # Don't add a loop
464 $graph->add_edge( $v, $lwac );
465 $graph->add_edge( $v.$layerlabel, $lwac )
466 if $graph->has_vertex( $v.$layerlabel );
468 foreach my $v ( $graph->successors( $lw ) ) {
469 next if $v eq $lwac; # but this shouldn't occur
470 $graph->add_edge( $lwac, $v );
471 $graph->add_edge( $lwac, $v.$layerlabel )
472 if $graph->has_vertex( $v.$layerlabel );
480 Returns an SVG representation of the graph, calling as_dot first.
485 my( $self, $opts ) = @_;
486 my $dot = $self->as_dot( $opts );
487 my @cmd = ( '-Tsvg' );
488 unshift( @cmd, $self->is_undirected ? 'neato' : 'dot' );
490 my $dotfile = File::Temp->new();
491 binmode $dotfile, ':utf8';
494 push( @cmd, $dotfile->filename );
495 run( \@cmd, ">", binary(), \$svg );
496 return decode_utf8( $svg );
501 Returns a list of the extant witnesses represented in the stemma.
507 my @wits = grep { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'extant' }
508 $self->graph->vertices;
514 Returns a list of the hypothetical witnesses represented in the stemma.
521 { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'hypothetical' }
522 $self->graph->vertices;
526 =head2 root_graph( $root_vertex )
528 If the stemma graph is undirected, make it directed with $root_vertex at the root.
529 If it is directed, re-root it.
534 my( $self, $rootvertex ) = @_;
536 my $ident = $self->identifier; # will have to restore this at the end
537 if( $self->is_undirected ) {
538 $graph = $self->graph;
540 # Make an undirected version of this graph.
541 $graph = $self->graph->undirected_copy();
543 # First, ensure that the requested root is actually a vertex in the graph.
544 unless( $graph->has_vertex( $rootvertex ) ) {
545 throw( "Cannot orient graph $graph on nonexistent vertex $rootvertex" );
548 # Now make a directed version of the graph.
549 my $rooted = Graph->new();
550 $rooted->add_vertex( $rootvertex );
551 my @next = ( $rootvertex );
554 foreach my $v ( @next ) {
555 # Place its not-placed neighbors (ergo children) in the tree
557 foreach my $n ( grep { !$rooted->has_vertex( $_ ) }
558 $graph->neighbors( $v ) ) {
559 $rooted->add_vertex( $n );
560 $rooted->add_edge( $v, $n );
561 push( @children, $n );
566 # Set the vertex classes
567 map { $rooted->set_vertex_attribute( $_, 'class', 'hypothetical' ) }
568 $self->hypotheticals;
569 map { $rooted->set_vertex_attribute( $_, 'class', 'extant' ) }
571 $self->graph( $rooted );
572 $self->set_identifier( $ident );
577 Text::Tradition::Error->throw(
578 'ident' => 'Stemma error',
585 __PACKAGE__->meta->make_immutable;
591 This package is free software and is provided "as is" without express
592 or implied warranty. You can redistribute it and/or modify it under
593 the same terms as Perl itself.
597 Tara L Andrews E<lt>aurum@cpan.orgE<gt>