1 package Text::Tradition::Stemma;
3 use Encode qw( decode_utf8 );
6 use Graph::Reader::Dot;
7 use IPC::Run qw/ run binary /;
8 use Text::Tradition::Error;
13 Text::Tradition::Stemma - a representation of a I<stemma codicum> for a Text::Tradition
18 my $t = Text::Tradition->new(
19 'name' => 'this is a text',
21 'file' => '/path/to/tei_parallel_seg_file.xml' );
23 my $s = $tradition->add_stemma( dotfile => '/path/to/stemma.dot' );
27 Text::Tradition is a library for representation and analysis of collated
28 texts, particularly medieval ones. The Stemma is a representation of the
29 copying relationships between the witnesses in a Tradition, modelled with
30 a connected rooted directed acyclic graph (CRDAG).
34 The easiest way to define a stemma is to use a special form of the 'dot'
37 Each stemma opens with the line
39 digraph "Name of Stemma" {
41 and continues with a list of all manuscript witnesses in the stemma, whether
42 extant witnesses or missing archetypes or hyparchetypes. Each of these is
43 listed by its sigil on its own line, e.g.:
45 alpha [ class=hypothetical ]
46 1 [ class=hypothetical,label=* ]
49 Extant witnesses are listed with class=extant; missing or postulated witnesses
50 are listed with class=hypothetical. Anonymous hyparchetypes must be given a
51 unique name or number, but can be represented as anonymous with the addition
52 of 'label=*' to their lines. Greek letters or other special characters may be
53 used as names, but they must always be wrapped in double quotes.
55 Links between manuscripts are then listed with arrow notation, as below. These
56 lines show the direction of copying, one step at a time, for the entire stemma.
61 The final line in the definition should be the closing brace:
65 Thus for a set of extant manuscripts A, B, and C, where A and B were copied
66 from the archetype O and C was copied from B, the definition would be:
68 digraph "Test stemma 1" {
69 O [ class=hypothetical]
82 The constructor. This should generally be called from Text::Tradition, but
83 if called directly it takes the following options:
87 =item * dot - A filehandle open to a DOT representation of the stemma graph.
89 =item * graph - If no DOT specification is given, you can pass a Graph object
90 instead. The vertices of the graph should have an attribute 'class' set to
91 either of the values 'extant' or 'hypothetical'.
93 =item * is_undirected - If the graph specification (or graph object) is for an
94 undirected graph (e.g. a phylogenetic tree), this should be set.
102 use_ok( 'Text::Tradition::Stemma' );
104 # Try to create a bad graph
106 open( $baddotfh, 't/data/besoin_bad.dot' ) or die "Could not open test dotfile";
108 my $stemma = Text::Tradition::Stemma->new( dot => $baddotfh );
109 ok( 0, "Created broken stemma from dotfile with syntax error" );
110 } catch( Text::Tradition::Error $e ) {
111 like( $e->message, qr/^Error trying to parse/, "Syntax error in dot threw exception" );
114 # Create a good graph
116 open( $dotfh, 't/data/florilegium.dot' ) or die "Could not open test dotfile";
117 binmode( $dotfh, ':utf8' );
118 my $stemma = Text::Tradition::Stemma->new( dot => $dotfh );
119 is( ref( $stemma ), 'Text::Tradition::Stemma', "Created stemma from good dotfile" );
120 is( scalar $stemma->witnesses, 13, "Found correct number of extant witnesses" );
121 is( scalar $stemma->hypotheticals, 8, "Found correct number of extant hypotheticals" );
122 ok( $stemma->has_identifier, "Stemma identifier was found in dot" );
123 is( $stemma->identifier, 'Coislinianum lineage', "Correct stemma identifier was found in dot" );
124 my $found_unicode_sigil;
125 foreach my $h ( $stemma->hypotheticals ) {
126 $found_unicode_sigil = 1 if $h eq "\x{3b1}";
128 ok( $found_unicode_sigil, "Found a correctly encoded Unicode sigil" );
130 # TODO Create stemma from graph, create stemma from undirected graph,
131 # create stemma from incompletely-specified graph
139 isa => 'Text::Tradition::Collation',
140 clearer => 'clear_collation', # interim measure to remove refs in DB
147 predicate => 'has_graph',
153 writer => 'set_identifier',
154 predicate => 'has_identifier',
158 my( $self, $args ) = @_;
159 # If we have been handed a dotfile, initialize it into a graph.
160 if( exists $args->{'dot'} ) {
161 $self->_graph_from_dot( $args->{'dot'} );
165 before 'graph' => sub {
168 # Make sure all unclassed graph nodes are marked extant.
170 throw( "Cannot set graph to a non-Graph object" )
171 unless ref( $g ) eq 'Graph';
172 foreach my $v ( $g->vertices ) {
173 unless( $g->has_vertex_attribute( $v, 'class' ) ) {
174 $g->set_vertex_attribute( $v, 'class', 'extant' );
180 after 'graph' => sub {
183 unless( $self->has_identifier ) {
184 ## HORRIBLE HACK but there is no API access to graph attributes!
185 if( exists $_[0]->[4]->{'name'} ) {
186 $self->set_identifier( $_[0]->[4]->{'name'} );
191 sub _graph_from_dot {
192 my( $self, $dotfh ) = @_;
193 my $reader = Graph::Reader::Dot->new();
194 # Redirect STDOUT in order to trap any error messages - syntax errors
195 # are evidently not fatal.
200 open( STDOUT, ">", \$reader_out );
201 $graph = $reader->read_graph( $dotfh );
204 if( $reader_out && $reader_out =~ /error/s ) {
205 throw( "Error trying to parse dot: $reader_out" );
207 throw( "Failed to create graph from dot" );
209 $self->graph( $graph );
214 return undef unless $self->has_graph;
215 return $self->graph->is_undirected;
220 =head2 as_dot( \%options )
222 Returns a normal dot representation of the stemma layout, suitable for rendering
223 with GraphViz. Options include:
227 =item * graph - A hashref of global graph options.
229 =item * node - A hashref of global node options.
231 =item * edge - A hashref of global edge options.
235 See the GraphViz documentation for the list of available options.
240 my( $self, $opts ) = @_;
242 ## See if we are including any a.c. witnesses in this graph.
243 my $graph = $self->graph;
244 if( exists $opts->{'layerwits'} ) {
246 map { $extant->{$_} = 1 } $self->witnesses;
247 $graph = $self->situation_graph( $extant, $opts->{'layerwits'} );
250 # Get default and specified options
253 'bgcolor' => 'transparent',
258 'fillcolor' => 'white',
260 'shape' => 'ellipse', # Shape for the extant nodes
263 'arrowhead' => 'none',
265 @graphopts{ keys %{$opts->{'graph'}} } = values %{$opts->{'graph'}}
267 @nodeopts{ keys %{$opts->{'node'}} } = values %{$opts->{'node'}}
269 @edgeopts{ keys %{$opts->{'edge'}} } = values %{$opts->{'edge'}}
272 my $gdecl = $graph->is_directed ? 'digraph' : 'graph';
273 my $gname = $self->has_identifier ? '"' . $self->identifier . '"'
276 push( @dotlines, "$gdecl $gname {" );
277 ## Print out the global attributes
278 push( @dotlines, _make_dotline( 'graph', %graphopts ) ) if keys %graphopts;
279 push( @dotlines, _make_dotline( 'edge', %edgeopts ) ) if keys %edgeopts;
280 push( @dotlines, _make_dotline( 'node', %nodeopts ) ) if keys %nodeopts;
282 # Add each of the nodes.
283 foreach my $n ( $graph->vertices ) {
284 my %vattr = ( 'id' => $n ); # Set the SVG element ID to the sigil itself
285 if( $graph->has_vertex_attribute( $n, 'label' ) ) {
286 $vattr{'label'} = $graph->get_vertex_attribute( $n, 'label' );
288 push( @dotlines, _make_dotline( $n, %vattr ) );
290 # Add each of our edges.
291 foreach my $e ( $graph->edges ) {
292 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
293 my $connector = $graph->is_directed ? '->' : '--';
294 push( @dotlines, " $from $connector $to;" );
296 push( @dotlines, '}' );
298 return join( "\n", @dotlines );
301 =head2 alter_graph( $dotstring )
303 Alters the graph of this stemma according to the definition specified
309 my( $self, $dotstring ) = @_;
311 open $dotfh, '<', \$dotstring;
312 binmode $dotfh, ':utf8';
313 $self->_graph_from_dot( $dotfh );
316 =head2 editable( $opts )
318 =head2 editable_graph( $graph, $opts )
320 Returns a version of the graph rendered in our definition format. The
321 output separates statements with a newline; set $opts->{'linesep'} to the
322 empty string or to a space if the result is to be sent via JSON.
324 If a situational version of the stemma is required, the arguments for
325 situation_graph should be passed via $opts->{'extant'} and $opts->{'layerwits'}.
330 my( $self, $opts ) = @_;
331 my $graph = $self->graph;
332 if( $self->has_identifier ) {
333 $opts->{'name'} = $self->identifier;
335 ## See if we need an editable version of a situational graph.
336 if( exists $opts->{'layerwits'} || exists $opts->{'extant'} ) {
337 my $extant = delete $opts->{'extant'} || {};
338 my $layerwits = delete $opts->{'layerwits'} || [];
339 $graph = $self->situation_graph( $extant, $layerwits );
341 return editable_graph( $graph, $opts );
345 my( $graph, $opts ) = @_;
348 my $join = ( $opts && exists $opts->{'linesep'} ) ? $opts->{'linesep'} : "\n";
349 my $gdecl = $graph->is_undirected ? 'graph' : 'digraph';
350 my $gname = exists $opts->{'name'} ? '"' . $opts->{'name'} . '"'
353 push( @dotlines, "$gdecl $gname {" );
354 my @real; # A cheap sort
355 foreach my $n ( sort $graph->vertices ) {
356 my $c = $graph->get_vertex_attribute( $n, 'class' );
357 $c = 'extant' unless $c;
358 if( $c eq 'extant' ) {
361 push( @dotlines, _make_dotline( $n, 'class' => $c ) );
364 # Now do the real ones
365 foreach my $n ( @real ) {
366 push( @dotlines, _make_dotline( $n, 'class' => 'extant' ) );
368 foreach my $e ( sort _by_vertex $graph->edges ) {
369 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
370 my $conn = $graph->is_undirected ? '--' : '->';
371 push( @dotlines, " $from $conn $to;" );
373 push( @dotlines, '}' );
374 return join( $join, @dotlines );
378 my( $obj, %attr ) = @_;
380 foreach my $k ( keys %attr ) {
381 my $v = _dotquote( $attr{$k} );
382 push( @pairs, "$k=$v" );
384 return sprintf( " %s [ %s ];", _dotquote( $obj ), join( ', ', @pairs ) );
389 return $str if $str =~ /^[A-Za-z0-9]+$/;
391 $str = '"' . $str . '"';
396 return $a->[0].$a->[1] cmp $b->[0].$b->[1];
399 =head2 situation_graph( $extant, $layered )
401 Returns a graph which is the original stemma graph with all witnesses not
402 in the %$extant hash marked as hypothetical, and witness layers added to
403 the graph according to the list in @$layered. A layered (a.c.) witness is
404 added as a parent of its main version, and additionally shares all other
405 parents and children with that version.
409 sub situation_graph {
410 my( $self, $extant, $layerwits, $layerlabel ) = @_;
412 my $graph = $self->graph->copy;
413 foreach my $vertex ( $graph->vertices ) {
414 # Set as extant any vertex that is extant in the stemma AND
415 # exists in the $extant hash.
416 my $class = 'hypothetical';
417 $class = 'extant' if exists $extant->{$vertex} && $extant->{$vertex} &&
418 $self->graph->get_vertex_attribute( $vertex, 'class' ) ne 'hypothetical';
419 $graph->set_vertex_attribute( $vertex, 'class', $class );
422 # For each 'layered' witness in the layerwits array, add it to the graph
423 # as an ancestor of the 'main' witness, and otherwise with the same parent/
424 # child links as its main analogue.
425 # TOOD Handle case where B is copied from A but corrected from C
426 $layerlabel = ' (a.c.)' unless $layerlabel;
427 foreach my $lw ( @$layerwits ) {
428 # Add the layered witness and set it with the same attributes as
429 # its 'main' analogue
430 throw( "Cannot add a layer to a hypothetical witness $lw" )
431 unless $graph->get_vertex_attribute( $lw, 'class' ) eq 'extant';
432 my $lwac = $lw . $layerlabel;
433 $graph->add_vertex( $lwac );
434 $graph->set_vertex_attributes( $lwac,
435 $graph->get_vertex_attributes( $lw ) );
437 # Set it as ancestor to the main witness
438 $graph->add_edge( $lwac, $lw );
440 # Give it the same ancestors and descendants as the main witness has,
441 # bearing in mind that those ancestors and descendants might also just
442 # have had a layered witness defined.
443 foreach my $v ( $graph->predecessors( $lw ) ) {
444 next if $v eq $lwac; # Don't add a loop
445 $graph->add_edge( $v, $lwac );
446 $graph->add_edge( $v.$layerlabel, $lwac )
447 if $graph->has_vertex( $v.$layerlabel );
449 foreach my $v ( $graph->successors( $lw ) ) {
450 next if $v eq $lwac; # but this shouldn't occur
451 $graph->add_edge( $lwac, $v );
452 $graph->add_edge( $lwac, $v.$layerlabel )
453 if $graph->has_vertex( $v.$layerlabel );
461 Returns an SVG representation of the graph, calling as_dot first.
466 my( $self, $opts ) = @_;
467 my $dot = $self->as_dot( $opts );
468 my @cmd = ( '-Tsvg' );
469 unshift( @cmd, $self->is_undirected ? 'neato' : 'dot' );
471 my $dotfile = File::Temp->new();
472 binmode $dotfile, ':utf8';
475 push( @cmd, $dotfile->filename );
476 run( \@cmd, ">", binary(), \$svg );
477 return decode_utf8( $svg );
482 Returns a list of the extant witnesses represented in the stemma.
488 my @wits = grep { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'extant' }
489 $self->graph->vertices;
495 Returns a list of the hypothetical witnesses represented in the stemma.
502 { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'hypothetical' }
503 $self->graph->vertices;
507 =head2 root_graph( $root_vertex )
509 If the stemma graph is undirected, make it directed with $root_vertex at the root.
510 If it is directed, re-root it.
515 my( $self, $rootvertex ) = @_;
517 if( $self->is_undirected ) {
518 $graph = $self->graph;
520 # Make an undirected version of this graph.
521 $graph = $self->graph->undirected_copy();
523 # First, ensure that the requested root is actually a vertex in the graph.
524 unless( $graph->has_vertex( $rootvertex ) ) {
525 throw( "Cannot orient graph $graph on nonexistent vertex $rootvertex" );
528 # Now make a directed version of the graph.
529 my $rooted = Graph->new();
530 $rooted->add_vertex( $rootvertex );
531 my @next = ( $rootvertex );
534 foreach my $v ( @next ) {
535 # Place its not-placed neighbors (ergo children) in the tree
537 foreach my $n ( grep { !$rooted->has_vertex( $_ ) }
538 $graph->neighbors( $v ) ) {
539 $rooted->add_vertex( $n );
540 $rooted->add_edge( $v, $n );
541 push( @children, $n );
546 # Set the vertex classes
547 map { $rooted->set_vertex_attribute( $_, 'class', 'hypothetical' ) }
548 $self->hypotheticals;
549 map { $rooted->set_vertex_attribute( $_, 'class', 'extant' ) }
551 $self->graph( $rooted );
556 Text::Tradition::Error->throw(
557 'ident' => 'Stemma error',
564 __PACKAGE__->meta->make_immutable;
570 This package is free software and is provided "as is" without express
571 or implied warranty. You can redistribute it and/or modify it under
572 the same terms as Perl itself.
576 Tara L Andrews E<lt>aurum@cpan.orgE<gt>