1 package Text::Tradition::Stemma;
3 use Encode qw( decode_utf8 );
6 use Graph::Reader::Dot;
7 use IPC::Run qw/ run binary /;
8 use Text::Tradition::Error;
13 Text::Tradition::Stemma - a representation of a I<stemma codicum> for a Text::Tradition
18 my $t = Text::Tradition->new(
19 'name' => 'this is a text',
21 'file' => '/path/to/tei_parallel_seg_file.xml' );
23 my $s = $tradition->add_stemma( dotfile => '/path/to/stemma.dot' );
27 Text::Tradition is a library for representation and analysis of collated
28 texts, particularly medieval ones. The Stemma is a representation of the
29 copying relationships between the witnesses in a Tradition, modelled with
30 a connected rooted directed acyclic graph (CRDAG).
34 The easiest way to define a stemma is to use a special form of the 'dot'
37 Each stemma opens with the line
39 digraph "Name of Stemma" {
41 and continues with a list of all manuscript witnesses in the stemma, whether
42 extant witnesses or missing archetypes or hyparchetypes. Each of these is
43 listed by its sigil on its own line, e.g.:
45 alpha [ class=hypothetical ]
46 1 [ class=hypothetical,label=* ]
49 Extant witnesses are listed with class=extant; missing or postulated witnesses
50 are listed with class=hypothetical. Anonymous hyparchetypes must be given a
51 unique name or number, but can be represented as anonymous with the addition
52 of 'label=*' to their lines. Greek letters or other special characters may be
53 used as names, but they must always be wrapped in double quotes.
55 Links between manuscripts are then listed with arrow notation, as below. These
56 lines show the direction of copying, one step at a time, for the entire stemma.
61 The final line in the definition should be the closing brace:
65 Thus for a set of extant manuscripts A, B, and C, where A and B were copied
66 from the archetype O and C was copied from B, the definition would be:
68 digraph "Test stemma 1" {
69 O [ class=hypothetical]
82 The constructor. This should generally be called from Text::Tradition, but
83 if called directly it takes the following options:
87 =item * dot - A filehandle open to a DOT representation of the stemma graph.
89 =item * graph - If no DOT specification is given, you can pass a Graph object
90 instead. The vertices of the graph should have an attribute 'class' set to
91 either of the values 'extant' or 'hypothetical'.
93 =item * is_undirected - If the graph specification (or graph object) is for an
94 undirected graph (e.g. a phylogenetic tree), this should be set.
102 use_ok( 'Text::Tradition::Stemma' );
104 # Try to create a bad graph
106 local $TODO = "cannot use stdout redirection trick with FastCGI";
108 open( $baddotfh, 't/data/besoin_bad.dot' ) or die "Could not open test dotfile";
110 my $stemma = Text::Tradition::Stemma->new( dot => $baddotfh );
111 ok( 0, "Created broken stemma from dotfile with syntax error" );
112 } catch( Text::Tradition::Error $e ) {
113 like( $e->message, qr/^Error trying to parse/, "Syntax error in dot threw exception" );
117 # Create a good graph
119 open( $dotfh, 't/data/florilegium.dot' ) or die "Could not open test dotfile";
120 binmode( $dotfh, ':utf8' );
121 my $stemma = Text::Tradition::Stemma->new( dot => $dotfh );
122 is( ref( $stemma ), 'Text::Tradition::Stemma', "Created stemma from good dotfile" );
123 is( scalar $stemma->witnesses, 13, "Found correct number of extant witnesses" );
124 is( scalar $stemma->hypotheticals, 8, "Found correct number of extant hypotheticals" );
125 ok( $stemma->has_identifier, "Stemma identifier was found in dot" );
126 is( $stemma->identifier, 'Coislinianum lineage', "Correct stemma identifier was found in dot" );
127 my $found_unicode_sigil;
128 foreach my $h ( $stemma->hypotheticals ) {
129 $found_unicode_sigil = 1 if $h eq "\x{3b1}";
131 ok( $found_unicode_sigil, "Found a correctly encoded Unicode sigil" );
133 # TODO Create stemma from graph, create stemma from undirected graph,
134 # create stemma from incompletely-specified graph
142 isa => 'Text::Tradition::Collation',
143 clearer => 'clear_collation', # interim measure to remove refs in DB
150 predicate => 'has_graph',
156 writer => 'set_identifier',
157 predicate => 'has_identifier',
161 my( $self, $args ) = @_;
162 # If we have been handed a dotfile, initialize it into a graph.
163 if( exists $args->{'dot'} ) {
164 $self->_graph_from_dot( $args->{'dot'} );
168 before 'graph' => sub {
171 # Make sure all unclassed graph nodes are marked extant.
173 throw( "Cannot set graph to a non-Graph object" )
174 unless ref( $g ) eq 'Graph';
175 foreach my $v ( $g->vertices ) {
176 unless( $g->has_vertex_attribute( $v, 'class' ) ) {
177 $g->set_vertex_attribute( $v, 'class', 'extant' );
183 after 'graph' => sub {
186 unless( $self->has_identifier ) {
187 ## HORRIBLE HACK but there is no API access to graph attributes!
188 if( exists $_[0]->[4]->{'name'} ) {
189 $self->set_identifier( $_[0]->[4]->{'name'} );
194 sub _graph_from_dot {
195 my( $self, $dotfh ) = @_;
196 my $reader = Graph::Reader::Dot->new();
197 # Redirect STDOUT in order to trap any error messages - syntax errors
198 # are evidently not fatal.
199 # TODO This breaks under FastCGI/Apache; reconsider.
202 #open $saved_stderr, ">&STDOUT";
204 #open STDOUT, ">", \$reader_out;
205 my $graph = $reader->read_graph( $dotfh );
207 #open STDOUT, ">", \$saved_stderr;
208 if( $reader_out && $reader_out =~ /error/s ) {
209 throw( "Error trying to parse dot: $reader_out" );
211 throw( "Failed to create graph from dot" );
213 $self->graph( $graph );
218 return undef unless $self->has_graph;
219 return $self->graph->is_undirected;
224 =head2 as_dot( \%options )
226 Returns a normal dot representation of the stemma layout, suitable for rendering
227 with GraphViz. Options include:
231 =item * graph - A hashref of global graph options.
233 =item * node - A hashref of global node options.
235 =item * edge - A hashref of global edge options.
239 See the GraphViz documentation for the list of available options.
244 my( $self, $opts ) = @_;
246 ## See if we are including any a.c. witnesses in this graph.
247 my $graph = $self->graph;
248 if( exists $opts->{'layerwits'} ) {
250 map { $extant->{$_} = 1 } $self->witnesses;
251 $graph = $self->situation_graph( $extant, $opts->{'layerwits'} );
254 # Get default and specified options
257 'bgcolor' => 'transparent',
262 'fillcolor' => 'white',
264 'shape' => 'ellipse', # Shape for the extant nodes
267 'arrowhead' => 'none',
269 @graphopts{ keys %{$opts->{'graph'}} } = values %{$opts->{'graph'}}
271 @nodeopts{ keys %{$opts->{'node'}} } = values %{$opts->{'node'}}
273 @edgeopts{ keys %{$opts->{'edge'}} } = values %{$opts->{'edge'}}
276 my $gdecl = $graph->is_directed ? 'digraph' : 'graph';
277 my $gname = $self->has_identifier ? '"' . $self->identifier . '"'
280 push( @dotlines, "$gdecl $gname {" );
281 ## Print out the global attributes
282 push( @dotlines, _make_dotline( 'graph', %graphopts ) ) if keys %graphopts;
283 push( @dotlines, _make_dotline( 'edge', %edgeopts ) ) if keys %edgeopts;
284 push( @dotlines, _make_dotline( 'node', %nodeopts ) ) if keys %nodeopts;
286 # Add each of the nodes.
287 foreach my $n ( $graph->vertices ) {
288 my %vattr = ( 'id' => $n ); # Set the SVG element ID to the sigil itself
289 if( $graph->has_vertex_attribute( $n, 'label' ) ) {
290 $vattr{'label'} = $graph->get_vertex_attribute( $n, 'label' );
292 push( @dotlines, _make_dotline( $n, %vattr ) );
294 # Add each of our edges.
295 foreach my $e ( $graph->edges ) {
296 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
297 my $connector = $graph->is_directed ? '->' : '--';
298 push( @dotlines, " $from $connector $to;" );
300 push( @dotlines, '}' );
302 return join( "\n", @dotlines );
305 =head2 alter_graph( $dotstring )
307 Alters the graph of this stemma according to the definition specified
313 my( $self, $dotstring ) = @_;
315 open $dotfh, '<', \$dotstring;
316 binmode $dotfh, ':utf8';
317 $self->_graph_from_dot( $dotfh );
320 =head2 editable( $opts )
322 =head2 editable_graph( $graph, $opts )
324 Returns a version of the graph rendered in our definition format. The
325 output separates statements with a newline; set $opts->{'linesep'} to the
326 empty string or to a space if the result is to be sent via JSON.
328 If a situational version of the stemma is required, the arguments for
329 situation_graph should be passed via $opts->{'extant'} and $opts->{'layerwits'}.
334 my( $self, $opts ) = @_;
335 my $graph = $self->graph;
336 if( $self->has_identifier ) {
337 $opts->{'name'} = $self->identifier;
339 ## See if we need an editable version of a situational graph.
340 if( exists $opts->{'layerwits'} || exists $opts->{'extant'} ) {
341 my $extant = delete $opts->{'extant'} || {};
342 my $layerwits = delete $opts->{'layerwits'} || [];
343 $graph = $self->situation_graph( $extant, $layerwits );
345 return editable_graph( $graph, $opts );
349 my( $graph, $opts ) = @_;
352 my $join = ( $opts && exists $opts->{'linesep'} ) ? $opts->{'linesep'} : "\n";
353 my $gdecl = $graph->is_undirected ? 'graph' : 'digraph';
354 my $gname = exists $opts->{'name'} ? '"' . $opts->{'name'} . '"'
357 push( @dotlines, "$gdecl $gname {" );
358 my @real; # A cheap sort
359 foreach my $n ( sort $graph->vertices ) {
360 my $c = $graph->get_vertex_attribute( $n, 'class' );
361 $c = 'extant' unless $c;
362 if( $c eq 'extant' ) {
365 push( @dotlines, _make_dotline( $n, 'class' => $c ) );
368 # Now do the real ones
369 foreach my $n ( @real ) {
370 push( @dotlines, _make_dotline( $n, 'class' => 'extant' ) );
372 foreach my $e ( sort _by_vertex $graph->edges ) {
373 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
374 my $conn = $graph->is_undirected ? '--' : '->';
375 push( @dotlines, " $from $conn $to;" );
377 push( @dotlines, '}' );
378 return join( $join, @dotlines );
382 my( $obj, %attr ) = @_;
384 foreach my $k ( keys %attr ) {
385 my $v = _dotquote( $attr{$k} );
386 push( @pairs, "$k=$v" );
388 return sprintf( " %s [ %s ];", _dotquote( $obj ), join( ', ', @pairs ) );
393 return $str if $str =~ /^[A-Za-z0-9]+$/;
395 $str = '"' . $str . '"';
400 return $a->[0].$a->[1] cmp $b->[0].$b->[1];
403 =head2 situation_graph( $extant, $layered )
405 Returns a graph which is the original stemma graph with all witnesses not
406 in the %$extant hash marked as hypothetical, and witness layers added to
407 the graph according to the list in @$layered. A layered (a.c.) witness is
408 added as a parent of its main version, and additionally shares all other
409 parents and children with that version.
413 sub situation_graph {
414 my( $self, $extant, $layerwits, $layerlabel ) = @_;
416 my $graph = $self->graph->copy;
417 foreach my $vertex ( $graph->vertices ) {
418 # Set as extant any vertex that is extant in the stemma AND
419 # exists in the $extant hash.
420 my $class = 'hypothetical';
421 $class = 'extant' if exists $extant->{$vertex} && $extant->{$vertex} &&
422 $self->graph->get_vertex_attribute( $vertex, 'class' ) ne 'hypothetical';
423 $graph->set_vertex_attribute( $vertex, 'class', $class );
426 # For each 'layered' witness in the layerwits array, add it to the graph
427 # as an ancestor of the 'main' witness, and otherwise with the same parent/
428 # child links as its main analogue.
429 # TOOD Handle case where B is copied from A but corrected from C
430 $layerlabel = ' (a.c.)' unless $layerlabel;
431 foreach my $lw ( @$layerwits ) {
432 # Add the layered witness and set it with the same attributes as
433 # its 'main' analogue
434 throw( "Cannot add a layer to a hypothetical witness $lw" )
435 unless $graph->get_vertex_attribute( $lw, 'class' ) eq 'extant';
436 my $lwac = $lw . $layerlabel;
437 $graph->add_vertex( $lwac );
438 $graph->set_vertex_attributes( $lwac,
439 $graph->get_vertex_attributes( $lw ) );
441 # Set it as ancestor to the main witness
442 $graph->add_edge( $lwac, $lw );
444 # Give it the same ancestors and descendants as the main witness has,
445 # bearing in mind that those ancestors and descendants might also just
446 # have had a layered witness defined.
447 foreach my $v ( $graph->predecessors( $lw ) ) {
448 next if $v eq $lwac; # Don't add a loop
449 $graph->add_edge( $v, $lwac );
450 $graph->add_edge( $v.$layerlabel, $lwac )
451 if $graph->has_vertex( $v.$layerlabel );
453 foreach my $v ( $graph->successors( $lw ) ) {
454 next if $v eq $lwac; # but this shouldn't occur
455 $graph->add_edge( $lwac, $v );
456 $graph->add_edge( $lwac, $v.$layerlabel )
457 if $graph->has_vertex( $v.$layerlabel );
465 Returns an SVG representation of the graph, calling as_dot first.
470 my( $self, $opts ) = @_;
471 my $dot = $self->as_dot( $opts );
472 my @cmd = ( '-Tsvg' );
473 unshift( @cmd, $self->is_undirected ? 'neato' : 'dot' );
475 my $dotfile = File::Temp->new();
476 binmode $dotfile, ':utf8';
479 push( @cmd, $dotfile->filename );
480 run( \@cmd, ">", binary(), \$svg );
481 return decode_utf8( $svg );
486 Returns a list of the extant witnesses represented in the stemma.
492 my @wits = grep { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'extant' }
493 $self->graph->vertices;
499 Returns a list of the hypothetical witnesses represented in the stemma.
506 { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'hypothetical' }
507 $self->graph->vertices;
511 =head2 root_graph( $root_vertex )
513 If the stemma graph is undirected, make it directed with $root_vertex at the root.
514 If it is directed, re-root it.
519 my( $self, $rootvertex ) = @_;
521 if( $self->is_undirected ) {
522 $graph = $self->graph;
524 # Make an undirected version of this graph.
525 $graph = $self->graph->undirected_copy();
527 my $rooted = Graph->new();
528 $rooted->add_vertex( $rootvertex );
529 my @next = ( $rootvertex );
532 foreach my $v ( @next ) {
533 # Place its not-placed neighbors (ergo children) in the tree
535 foreach my $n ( grep { !$rooted->has_vertex( $_ ) }
536 $graph->neighbors( $v ) ) {
537 $rooted->add_vertex( $n );
538 $rooted->add_edge( $v, $n );
539 push( @children, $n );
544 # Set the vertex classes
545 map { $rooted->set_vertex_attribute( $_, 'class', 'hypothetical' ) }
546 $self->graph->hypotheticals;
547 map { $rooted->set_vertex_class( $_, 'class', 'extant' ) }
548 $self->graph->witnesses;
554 Text::Tradition::Error->throw(
555 'ident' => 'Stemma error',
562 __PACKAGE__->meta->make_immutable;
568 This package is free software and is provided "as is" without express
569 or implied warranty. You can redistribute it and/or modify it under
570 the same terms as Perl itself.
574 Tara L Andrews E<lt>aurum@cpan.orgE<gt>