1 package Text::Tradition::Stemma;
3 use Encode qw( decode_utf8 );
6 use Graph::Reader::Dot;
7 use IPC::Run qw/ run binary /;
8 use Text::Tradition::Error;
13 Text::Tradition::Stemma - a representation of a I<stemma codicum> for a Text::Tradition
18 my $t = Text::Tradition->new(
19 'name' => 'this is a text',
21 'file' => '/path/to/tei_parallel_seg_file.xml' );
23 my $s = $tradition->add_stemma( dotfile => '/path/to/stemma.dot' );
27 Text::Tradition is a library for representation and analysis of collated
28 texts, particularly medieval ones. The Stemma is a representation of the
29 copying relationships between the witnesses in a Tradition, modelled with
30 a connected rooted directed acyclic graph (CRDAG).
34 The easiest way to define a stemma is to use a special form of the 'dot'
37 Each stemma opens with the line
41 and continues with a list of all manuscript witnesses in the stemma, whether
42 extant witnesses or missing archetypes or hyparchetypes. Each of these is
43 listed by its sigil on its own line, e.g.:
45 alpha [ class=hypothetical ]
46 1 [ class=hypothetical,label=* ]
49 Extant witnesses are listed with class=extant; missing or postulated witnesses
50 are listed with class=hypothetical. Anonymous hyparchetypes must be given a
51 unique name or number, but can be represented as anonymous with the addition
52 of 'label=*' to their lines. Greek letters or other special characters may be
53 used as names, but they must always be wrapped in double quotes.
55 Links between manuscripts are then listed with arrow notation, as below. These
56 lines show the direction of copying, one step at a time, for the entire stemma.
61 The final line in the definition should be the closing brace:
65 Thus for a set of extant manuscripts A, B, and C, where A and B were copied
66 from the archetype O and C was copied from B, the definition would be:
69 O [ class=hypothetical]
82 The constructor. This should generally be called from Text::Tradition, but
83 if called directly it takes the following options:
87 =item * dot - A filehandle open to a DOT representation of the stemma graph.
89 =item * graph - If no DOT specification is given, you can pass a Graph object
90 instead. The vertices of the graph should have an attribute 'class' set to
91 either of the values 'extant' or 'hypothetical'.
93 =item * is_undirected - If the graph specification (or graph object) is for an
94 undirected graph (e.g. a phylogenetic tree), this should be set.
102 use_ok( 'Text::Tradition::Stemma' );
104 # Try to create a bad graph
106 local $TODO = "cannot use stdout redirection trick with FastCGI";
108 open( $baddotfh, 't/data/besoin_bad.dot' ) or die "Could not open test dotfile";
110 my $stemma = Text::Tradition::Stemma->new( dot => $baddotfh );
111 ok( 0, "Created broken stemma from dotfile with syntax error" );
112 } catch( Text::Tradition::Error $e ) {
113 like( $e->message, qr/^Error trying to parse/, "Syntax error in dot threw exception" );
117 # Create a good graph
119 open( $dotfh, 't/data/florilegium.dot' ) or die "Could not open test dotfile";
120 binmode( $dotfh, ':utf8' );
121 my $stemma = Text::Tradition::Stemma->new( dot => $dotfh );
122 is( ref( $stemma ), 'Text::Tradition::Stemma', "Created stemma from good dotfile" );
123 is( scalar $stemma->witnesses, 13, "Found correct number of extant witnesses" );
124 is( scalar $stemma->hypotheticals, 8, "Found correct number of extant hypotheticals" );
125 my $found_unicode_sigil;
126 foreach my $h ( $stemma->hypotheticals ) {
127 $found_unicode_sigil = 1 if $h eq "\x{3b1}";
129 ok( $found_unicode_sigil, "Found a correctly encoded Unicode sigil" );
131 # TODO Create stemma from graph, create stemma from undirected graph,
132 # create stemma from incompletely-specified graph
140 isa => 'Text::Tradition::Collation',
141 clearer => 'clear_collation', # interim measure to remove refs in DB
148 predicate => 'has_graph',
151 has is_undirected => (
155 writer => 'set_undirected',
159 my( $self, $args ) = @_;
160 # If we have been handed a dotfile, initialize it into a graph.
161 if( exists $args->{'dot'} ) {
162 $self->_graph_from_dot( $args->{'dot'} );
167 before 'graph' => sub {
170 # Make sure all unclassed graph nodes are marked extant.
172 throw( "Cannot set graph to a non-Graph object" )
173 unless ref( $g ) eq 'Graph';
174 foreach my $v ( $g->vertices ) {
175 unless( $g->has_vertex_attribute( $v, 'class' ) ) {
176 $g->set_vertex_attribute( $v, 'class', 'extant' );
179 $self->set_undirected( $g->is_undirected );
183 sub _graph_from_dot {
184 my( $self, $dotfh ) = @_;
185 my $reader = Graph::Reader::Dot->new();
186 # Redirect STDOUT in order to trap any error messages - syntax errors
187 # are evidently not fatal.
188 # TODO This breaks under FastCGI/Apache; reconsider.
191 #open $saved_stderr, ">&STDOUT";
193 #open STDOUT, ">", \$reader_out;
194 my $graph = $reader->read_graph( $dotfh );
196 #open STDOUT, ">", \$saved_stderr;
197 if( $reader_out && $reader_out =~ /error/s ) {
198 throw( "Error trying to parse dot: $reader_out" );
200 throw( "Failed to create graph from dot" );
202 $self->graph( $graph );
207 =head2 as_dot( \%options )
209 Returns a normal dot representation of the stemma layout, suitable for rendering
210 with GraphViz. Options include:
214 =item * graph - A hashref of global graph options.
216 =item * node - A hashref of global node options.
218 =item * edge - A hashref of global edge options.
222 See the GraphViz documentation for the list of available options.
227 my( $self, $opts ) = @_;
229 ## See if we are including any a.c. witnesses in this graph.
230 my $graph = $self->graph;
231 if( exists $opts->{'layerwits'} ) {
233 map { $extant->{$_} = 1 } $self->witnesses;
234 $graph = $self->situation_graph( $extant, $opts->{'layerwits'} );
237 # Get default and specified options
240 'bgcolor' => 'transparent',
245 'fillcolor' => 'white',
247 'shape' => 'ellipse', # Shape for the extant nodes
250 'arrowhead' => 'none',
252 @graphopts{ keys %{$opts->{'graph'}} } = values %{$opts->{'graph'}}
254 @nodeopts{ keys %{$opts->{'node'}} } = values %{$opts->{'node'}}
256 @edgeopts{ keys %{$opts->{'edge'}} } = values %{$opts->{'edge'}}
259 my $gdecl = $graph->is_directed ? 'digraph' : 'graph';
261 push( @dotlines, "$gdecl stemma {" );
262 ## Print out the global attributes
263 push( @dotlines, _make_dotline( 'graph', %graphopts ) ) if keys %graphopts;
264 push( @dotlines, _make_dotline( 'edge', %edgeopts ) ) if keys %edgeopts;
265 push( @dotlines, _make_dotline( 'node', %nodeopts ) ) if keys %nodeopts;
267 # Add each of the nodes.
268 foreach my $n ( $graph->vertices ) {
269 my %vattr = ( 'id' => $n ); # Set the SVG element ID to the sigil itself
270 if( $graph->has_vertex_attribute( $n, 'label' ) ) {
271 $vattr{'label'} = $graph->get_vertex_attribute( $n, 'label' );
273 push( @dotlines, _make_dotline( $n, %vattr ) );
275 # Add each of our edges.
276 foreach my $e ( $graph->edges ) {
277 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
278 my $connector = $graph->is_directed ? '->' : '--';
279 push( @dotlines, " $from $connector $to;" );
281 push( @dotlines, '}' );
283 return join( "\n", @dotlines );
286 =head2 alter_graph( $dotstring )
288 Alters the graph of this stemma according to the definition specified
294 my( $self, $dotstring ) = @_;
296 open $dotfh, '<', \$dotstring;
297 binmode $dotfh, ':utf8';
298 $self->_graph_from_dot( $dotfh );
301 =head2 editable( $opts )
303 =head2 editable_graph( $graph, $opts )
305 Returns a version of the graph rendered in our definition format. The
306 output separates statements with a newline; set $opts->{'linesep'} to the
307 empty string or to a space if the result is to be sent via JSON.
309 If a situational version of the stemma is required, the arguments for
310 situation_graph should be passed via $opts->{'extant'} and $opts->{'layerwits'}.
315 my( $self, $opts ) = @_;
316 my $graph = $self->graph;
317 ## See if we need an editable version of a situational graph.
318 if( exists $opts->{'layerwits'} || exists $opts->{'extant'} ) {
319 my $extant = delete $opts->{'extant'} || {};
320 my $layerwits = delete $opts->{'layerwits'} || [];
321 $graph = $self->situation_graph( $extant, $layerwits );
323 return editable_graph( $graph, $opts );
327 my( $graph, $opts ) = @_;
330 my $join = ( $opts && exists $opts->{'linesep'} ) ? $opts->{'linesep'} : "\n";
331 my $gdecl = $graph->is_undirected ? 'graph' : 'digraph';
333 push( @dotlines, "$gdecl stemma {" );
334 my @real; # A cheap sort
335 foreach my $n ( sort $graph->vertices ) {
336 my $c = $graph->get_vertex_attribute( $n, 'class' );
337 $c = 'extant' unless $c;
338 if( $c eq 'extant' ) {
341 push( @dotlines, _make_dotline( $n, 'class' => $c ) );
344 # Now do the real ones
345 foreach my $n ( @real ) {
346 push( @dotlines, _make_dotline( $n, 'class' => 'extant' ) );
348 foreach my $e ( sort _by_vertex $graph->edges ) {
349 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
350 my $conn = $graph->is_undirected ? '--' : '->';
351 push( @dotlines, " $from $conn $to;" );
353 push( @dotlines, '}' );
354 return join( $join, @dotlines );
358 my( $obj, %attr ) = @_;
360 foreach my $k ( keys %attr ) {
361 my $v = _dotquote( $attr{$k} );
362 push( @pairs, "$k=$v" );
364 return sprintf( " %s [ %s ];", _dotquote( $obj ), join( ', ', @pairs ) );
369 return $str if $str =~ /^[A-Za-z0-9]+$/;
371 $str = '"' . $str . '"';
376 return $a->[0].$a->[1] cmp $b->[0].$b->[1];
379 =head2 situation_graph( $extant, $layered )
381 Returns a graph which is the original stemma graph with all witnesses not
382 in the %$extant hash marked as hypothetical, and witness layers added to
383 the graph according to the list in @$layered. A layered (a.c.) witness is
384 added as a parent of its main version, and additionally shares all other
385 parents and children with that version.
389 sub situation_graph {
390 my( $self, $extant, $layerwits, $layerlabel ) = @_;
392 my $graph = $self->graph->copy;
393 foreach my $vertex ( $graph->vertices ) {
394 # Set as extant any vertex that is extant in the stemma AND
395 # exists in the $extant hash.
396 my $class = 'hypothetical';
397 $class = 'extant' if exists $extant->{$vertex} && $extant->{$vertex} &&
398 $self->graph->get_vertex_attribute( $vertex, 'class' ) ne 'hypothetical';
399 $graph->set_vertex_attribute( $vertex, 'class', $class );
402 # For each 'layered' witness in the layerwits array, add it to the graph
403 # as an ancestor of the 'main' witness, and otherwise with the same parent/
404 # child links as its main analogue.
405 # TOOD Handle case where B is copied from A but corrected from C
406 $layerlabel = ' (a.c.)' unless $layerlabel;
407 foreach my $lw ( @$layerwits ) {
408 # Add the layered witness and set it with the same attributes as
409 # its 'main' analogue
410 throw( "Cannot add a layer to a hypothetical witness $lw" )
411 unless $graph->get_vertex_attribute( $lw, 'class' ) eq 'extant';
412 my $lwac = $lw . $layerlabel;
413 $graph->add_vertex( $lwac );
414 $graph->set_vertex_attributes( $lwac,
415 $graph->get_vertex_attributes( $lw ) );
417 # Set it as ancestor to the main witness
418 $graph->add_edge( $lwac, $lw );
420 # Give it the same ancestors and descendants as the main witness has,
421 # bearing in mind that those ancestors and descendants might also just
422 # have had a layered witness defined.
423 foreach my $v ( $graph->predecessors( $lw ) ) {
424 next if $v eq $lwac; # Don't add a loop
425 $graph->add_edge( $v, $lwac );
426 $graph->add_edge( $v.$layerlabel, $lwac )
427 if $graph->has_vertex( $v.$layerlabel );
429 foreach my $v ( $graph->successors( $lw ) ) {
430 next if $v eq $lwac; # but this shouldn't occur
431 $graph->add_edge( $lwac, $v );
432 $graph->add_edge( $lwac, $v.$layerlabel )
433 if $graph->has_vertex( $v.$layerlabel );
441 Returns an SVG representation of the graph, calling as_dot first.
446 my( $self, $opts ) = @_;
447 my $dot = $self->as_dot( $opts );
448 my @cmd = ( '-Tsvg' );
449 unshift( @cmd, $self->is_undirected ? 'neato' : 'dot' );
451 my $dotfile = File::Temp->new();
452 binmode $dotfile, ':utf8';
455 push( @cmd, $dotfile->filename );
456 run( \@cmd, ">", binary(), \$svg );
457 return decode_utf8( $svg );
462 Returns a list of the extant witnesses represented in the stemma.
468 my @wits = grep { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'extant' }
469 $self->graph->vertices;
475 Returns a list of the hypothetical witnesses represented in the stemma.
482 { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'hypothetical' }
483 $self->graph->vertices;
487 =head2 root_graph( $root_vertex )
489 If the stemma graph is undirected, make it directed with $root_vertex at the root.
490 If it is directed, re-root it.
495 my( $self, $rootvertex ) = @_;
497 if( $self->is_undirected ) {
498 $graph = $self->graph;
500 # Make an undirected version of this graph.
501 $graph = $self->graph->undirected_copy();
503 my $rooted = Graph->new();
504 $rooted->add_vertex( $rootvertex );
505 my @next = ( $rootvertex );
508 foreach my $v ( @next ) {
509 # Place its not-placed neighbors (ergo children) in the tree
511 foreach my $n ( grep { !$rooted->has_vertex( $_ ) }
512 $graph->neighbors( $v ) ) {
513 $rooted->add_vertex( $n );
514 $rooted->add_edge( $v, $n );
515 push( @children, $n );
520 # Set the vertex classes
521 map { $rooted->set_vertex_attribute( $_, 'class', 'hypothetical' ) }
522 $self->graph->hypotheticals;
523 map { $rooted->set_vertex_class( $_, 'class', 'extant' ) }
524 $self->graph->witnesses;
530 Text::Tradition::Error->throw(
531 'ident' => 'Stemma error',
538 __PACKAGE__->meta->make_immutable;
544 This package is free software and is provided "as is" without express
545 or implied warranty. You can redistribute it and/or modify it under
546 the same terms as Perl itself.
550 Tara L Andrews E<lt>aurum@cpan.orgE<gt>