1 package Text::Tradition::Stemma;
4 use Encode qw( decode_utf8 );
7 use Graph::Reader::Dot;
8 use IPC::Run qw/ run binary /;
9 use Text::Tradition::Error;
10 use Text::Tradition::StemmaUtil qw/ character_input phylip_pars parse_newick /;
15 Text::Tradition::Stemma - a representation of a I<stemma codicum> for a Text::Tradition
20 my $t = Text::Tradition->new(
21 'name' => 'this is a text',
23 'file' => '/path/to/tei_parallel_seg_file.xml' );
25 my $s = $tradition->add_stemma( dotfile => '/path/to/stemma.dot' );
29 Text::Tradition is a library for representation and analysis of collated
30 texts, particularly medieval ones. The Stemma is a representation of the
31 copying relationships between the witnesses in a Tradition, modelled with
32 a connected rooted directed acyclic graph (CRDAG).
36 The easiest way to define a stemma is to use a special form of the 'dot'
39 Each stemma opens with the line
43 and continues with a list of all manuscript witnesses in the stemma, whether
44 extant witnesses or missing archetypes or hyparchetypes. Each of these is
45 listed by its sigil on its own line, e.g.:
47 alpha [ class=hypothetical ]
48 1 [ class=hypothetical,label=* ]
51 Extant witnesses are listed with class=extant; missing or postulated witnesses
52 are listed with class=hypothetical. Anonymous hyparchetypes must be given a
53 unique name or number, but can be represented as anonymous with the addition
54 of 'label=*' to their lines. Greek letters or other special characters may be
55 used as names, but they must always be wrapped in double quotes.
57 Links between manuscripts are then listed with arrow notation, as below. These
58 lines show the direction of copying, one step at a time, for the entire stemma.
63 The final line in the definition should be the closing brace:
67 Thus for a set of extant manuscripts A, B, and C, where A and B were copied
68 from the archetype O and C was copied from B, the definition would be:
71 O [ class=hypothetical]
84 The constructor. This should generally be called from Text::Tradition, but
85 if called directly it takes the following options:
89 =item * dot - A filehandle open to a DOT representation of the stemma graph.
97 use_ok( 'Text::Tradition::Stemma' );
99 # Try to create a bad graph
101 open( $baddotfh, 't/data/besoin_bad.dot' ) or die "Could not open test dotfile";
103 my $stemma = Text::Tradition::Stemma->new( dot => $baddotfh );
104 ok( 0, "Created broken stemma from dotfile with syntax error" );
105 } catch( Text::Tradition::Error $e ) {
106 like( $e->message, qr/^Error trying to parse/, "Syntax error in dot threw exception" );
109 # Create a good graph
111 open( $dotfh, 't/data/florilegium.dot' ) or die "Could not open test dotfile";
112 binmode( $dotfh, ':utf8' );
113 my $stemma = Text::Tradition::Stemma->new( dot => $dotfh );
114 is( ref( $stemma ), 'Text::Tradition::Stemma', "Created stemma from good dotfile" );
115 is( scalar $stemma->witnesses, 13, "Found correct number of extant witnesses" );
116 is( scalar $stemma->hypotheticals, 8, "Found correct number of extant hypotheticals" );
117 my $found_unicode_sigil;
118 foreach my $h ( $stemma->hypotheticals ) {
119 $found_unicode_sigil = 1 if $h eq "\x{3b1}";
121 ok( $found_unicode_sigil, "Found a correctly encoded Unicode sigil" );
129 isa => 'Text::Tradition::Collation',
130 clearer => 'clear_collation',
137 predicate => 'has_graph',
141 my( $self, $args ) = @_;
142 # If we have been handed a dotfile, initialize it into a graph.
143 if( exists $args->{'dot'} ) {
144 $self->_graph_from_dot( $args->{'dot'} );
148 sub _graph_from_dot {
149 my( $self, $dotfh ) = @_;
150 my $reader = Graph::Reader::Dot->new();
151 # Redirect STDOUT in order to trap any error messages - syntax errors
152 # are evidently not fatal.
153 # TODO This breaks under FastCGI/Apache; reconsider.
156 #open $saved_stderr, ">&STDOUT";
158 #open STDOUT, ">", \$reader_out;
159 my $graph = $reader->read_graph( $dotfh );
161 #open STDOUT, ">", \$saved_stderr;
162 if( $reader_out && $reader_out =~ /error/s ) {
163 throw( "Error trying to parse dot: $reader_out" );
165 throw( "Failed to create graph from dot" );
167 $self->graph( $graph );
168 # Go through the nodes and set any non-hypothetical node to extant.
169 foreach my $v ( $self->graph->vertices ) {
170 $self->graph->set_vertex_attribute( $v, 'class', 'extant' )
171 unless $self->graph->has_vertex_attribute( $v, 'class' );
177 =head2 as_dot( \%options )
179 Returns a normal dot representation of the stemma layout, suitable for rendering
180 with GraphViz. Options include:
184 =item * graph - A hashref of global graph options.
186 =item * node - A hashref of global node options.
188 =item * edge - A hashref of global edge options.
192 See the GraphViz documentation for the list of available options.
197 my( $self, $opts ) = @_;
199 ## See if we are including any a.c. witnesses in this graph.
200 my $graph = $self->graph;
201 if( exists $opts->{'layerwits'} ) {
203 map { $extant->{$_} = 1 } $self->witnesses;
204 $graph = $self->situation_graph( $extant, $opts->{'layerwits'} );
207 # Get default and specified options
214 'fillcolor' => 'white',
216 'shape' => 'ellipse', # Shape for the extant nodes
219 'arrowhead' => 'none',
221 @graphopts{ keys %{$opts->{'graph'}} } = values %{$opts->{'graph'}}
223 @nodeopts{ keys %{$opts->{'node'}} } = values %{$opts->{'node'}}
225 @edgeopts{ keys %{$opts->{'edge'}} } = values %{$opts->{'edge'}}
229 push( @dotlines, 'digraph stemma {' );
230 ## Print out the global attributes
231 push( @dotlines, _make_dotline( 'graph', %graphopts ) ) if keys %graphopts;
232 push( @dotlines, _make_dotline( 'edge', %edgeopts ) ) if keys %edgeopts;
233 push( @dotlines, _make_dotline( 'node', %nodeopts ) ) if keys %nodeopts;
235 # Add each of the nodes.
236 foreach my $n ( $graph->vertices ) {
237 if( $graph->has_vertex_attribute( $n, 'label' ) ) {
238 my $ltext = $graph->get_vertex_attribute( $n, 'label' );
239 push( @dotlines, _make_dotline( $n, 'label' => $ltext ) );
241 # Use the default display settings.
242 $n = _dotquote( $n );
243 push( @dotlines, " $n;" );
246 # Add each of our edges.
247 foreach my $e ( $graph->edges ) {
248 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
249 push( @dotlines, " $from -> $to;" );
251 push( @dotlines, '}' );
253 return join( "\n", @dotlines );
256 =head2 alter_graph( $dotstring )
258 Alters the graph of this stemma according to the definition specified
264 my( $self, $dotstring ) = @_;
266 open $dotfh, '<', \$dotstring;
267 binmode $dotfh, ':utf8';
268 $self->_graph_from_dot( $dotfh );
271 =head2 editable( $opts )
273 =head2 editable_graph( $graph, $opts )
275 Returns a version of the graph rendered in our definition format. The
276 output separates statements with a newline; set $opts->{'linesep'} to the
277 empty string or to a space if the result is to be sent via JSON.
279 If a situational version of the stemma is required, the arguments for
280 situation_graph should be passed via $opts->{'extant'} and $opts->{'layerwits'}.
285 my( $self, $opts ) = @_;
286 my $graph = $self->graph;
287 ## See if we need an editable version of a situational graph.
288 if( exists $opts->{'layerwits'} || exists $opts->{'extant'} ) {
289 my $extant = delete $opts->{'extant'} || {};
290 my $layerwits = delete $opts->{'layerwits'} || [];
291 $graph = $self->situation_graph( $extant, $layerwits );
293 return editable_graph( $graph, $opts );
297 my( $graph, $opts ) = @_;
300 my $join = ( $opts && exists $opts->{'linesep'} ) ? $opts->{'linesep'} : "\n";
302 push( @dotlines, 'digraph stemma {' );
303 my @real; # A cheap sort
304 foreach my $n ( sort $graph->vertices ) {
305 my $c = $graph->get_vertex_attribute( $n, 'class' );
306 $c = 'extant' unless $c;
307 if( $c eq 'extant' ) {
310 push( @dotlines, _make_dotline( $n, 'class' => $c ) );
313 # Now do the real ones
314 foreach my $n ( @real ) {
315 push( @dotlines, _make_dotline( $n, 'class' => 'extant' ) );
317 foreach my $e ( sort _by_vertex $graph->edges ) {
318 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
319 push( @dotlines, " $from -> $to;" );
321 push( @dotlines, '}' );
322 return join( $join, @dotlines );
326 my( $obj, %attr ) = @_;
328 foreach my $k ( keys %attr ) {
329 my $v = _dotquote( $attr{$k} );
330 push( @pairs, "$k=$v" );
332 return sprintf( " %s [ %s ];", _dotquote( $obj ), join( ', ', @pairs ) );
337 return $str if $str =~ /^[A-Za-z0-9]+$/;
339 $str = '"' . $str . '"';
344 return $a->[0].$a->[1] cmp $b->[0].$b->[1];
347 =head2 situation_graph( $extant, $layered )
349 Returns a graph which is the original stemma with all witnesses not in the
350 %$extant hash marked as hypothetical, and witness layers added to the graph
351 according to the list in @$layered. A layered (a.c.) witness is added as a
352 parent of its main version, and additionally shares all other parents and
353 children with that version.
357 sub situation_graph {
358 my( $self, $extant, $layerwits, $layerlabel ) = @_;
360 my $graph = $self->graph->copy;
361 foreach my $vertex ( $graph->vertices ) {
362 # Set as extant any vertex that is extant in the stemma AND
363 # exists in the $extant hash.
364 my $class = 'hypothetical';
365 $class = 'extant' if exists $extant->{$vertex} && $extant->{$vertex} &&
366 $self->graph->get_vertex_attribute( $vertex, 'class' ) ne 'hypothetical';
367 $graph->set_vertex_attribute( $vertex, 'class', $class );
370 # For each 'layered' witness in the layerwits array, add it to the graph
371 # as an ancestor of the 'main' witness, and otherwise with the same parent/
372 # child links as its main analogue.
373 # TOOD Handle case where B is copied from A but corrected from C
374 $layerlabel = ' (a.c.)' unless $layerlabel;
375 foreach my $lw ( @$layerwits ) {
376 # Add the layered witness and set it with the same attributes as
377 # its 'main' analogue
378 throw( "Cannot add a layer to a hypothetical witness $lw" )
379 unless $graph->get_vertex_attribute( $lw, 'class' ) eq 'extant';
380 my $lwac = $lw . $layerlabel;
381 $graph->add_vertex( $lwac );
382 $graph->set_vertex_attributes( $lwac,
383 $graph->get_vertex_attributes( $lw ) );
385 # Set it as ancestor to the main witness
386 $graph->add_edge( $lwac, $lw );
388 # Give it the same ancestors and descendants as the main witness has,
389 # bearing in mind that those ancestors and descendants might also just
390 # have had a layered witness defined.
391 foreach my $v ( $graph->predecessors( $lw ) ) {
392 next if $v eq $lwac; # Don't add a loop
393 $graph->add_edge( $v, $lwac );
394 $graph->add_edge( $v.$layerlabel, $lwac )
395 if $graph->has_vertex( $v.$layerlabel );
397 foreach my $v ( $graph->successors( $lw ) ) {
398 next if $v eq $lwac; # but this shouldn't occur
399 $graph->add_edge( $lwac, $v );
400 $graph->add_edge( $lwac, $v.$layerlabel )
401 if $graph->has_vertex( $v.$layerlabel );
409 Returns an SVG representation of the graph, calling as_dot first.
414 my( $self, $opts ) = @_;
415 my $dot = $self->as_dot( $opts );
416 my @cmd = qw/dot -Tsvg/;
418 my $dotfile = File::Temp->new();
420 # $dotfile->unlink_on_destroy(0);
421 binmode $dotfile, ':utf8';
424 push( @cmd, $dotfile->filename );
425 run( \@cmd, ">", binary(), \$svg );
426 # HACK: Parse the SVG and change the dimensions.
427 # Get rid of width and height attributes to allow scaling.
428 if( $opts->{'size'} ) {
430 my $parser = XML::LibXML->new( load_ext_dtd => 0 );
433 $svgdoc = $parser->parse_string( decode_utf8( $svg ) );
435 throw( "Could not reparse SVG: $@" ) if $@;
436 my( $ew, $eh ) = @{$opts->{'size'}};
437 # If the graph is wider than it is tall, set width to ew and remove height.
438 # Otherwise set height to eh and remove width.
439 my $width = $svgdoc->documentElement->getAttribute('width');
440 my $height = $svgdoc->documentElement->getAttribute('height');
443 my( $remove, $keep, $val, $viewbox );
444 if( $width > $height ) {
448 my $vbheight = $width / $ew * $height;
449 $viewbox = "0.00 0.00 $width.00" . sprintf( "%.2f", $vbheight );
454 my $vbwidth = $height / $eh * $width;
455 $viewbox = "0.00 0.00 " . sprintf( "%.2f", $vbwidth ) . " $height.00";
457 $svgdoc->documentElement->removeAttribute( $remove );
458 $svgdoc->documentElement->setAttribute( $keep, $val );
459 $svgdoc->documentElement->removeAttribute( 'viewBox' );
460 $svgdoc->documentElement->setAttribute( 'viewBox', $viewbox );
461 $svg = $svgdoc->toString();
464 return decode_utf8( $svg );
469 Returns a list of the extant witnesses represented in the stemma.
475 my @wits = grep { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'extant' }
476 $self->graph->vertices;
482 Returns a list of the hypothetical witnesses represented in the stemma.
489 { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'hypothetical' }
490 $self->graph->vertices;
495 Text::Tradition::Error->throw(
496 'ident' => 'Stemma error',
503 __PACKAGE__->meta->make_immutable;
509 This package is free software and is provided "as is" without express
510 or implied warranty. You can redistribute it and/or modify it under
511 the same terms as Perl itself.
515 Tara L Andrews E<lt>aurum@cpan.orgE<gt>