1 package Text::Tradition::Stemma;
4 use Encode qw( decode_utf8 );
7 use Graph::Reader::Dot;
8 use IPC::Run qw/ run binary /;
9 use Text::Tradition::Error;
10 use Text::Tradition::StemmaUtil qw/ character_input phylip_pars parse_newick /;
15 Text::Tradition::Stemma - a representation of a I<stemma codicum> for a Text::Tradition
20 my $t = Text::Tradition->new(
21 'name' => 'this is a text',
23 'file' => '/path/to/tei_parallel_seg_file.xml' );
25 my $s = $tradition->add_stemma( dotfile => '/path/to/stemma.dot' );
29 Text::Tradition is a library for representation and analysis of collated
30 texts, particularly medieval ones. The Stemma is a representation of the
31 copying relationships between the witnesses in a Tradition, modelled with
32 a connected rooted directed acyclic graph (CRDAG).
36 The easiest way to define a stemma is to use a special form of the 'dot'
39 Each stemma opens with the line
43 and continues with a list of all manuscript witnesses in the stemma, whether
44 extant witnesses or missing archetypes or hyparchetypes. Each of these is
45 listed by its sigil on its own line, e.g.:
47 alpha [ class=hypothetical ]
48 1 [ class=hypothetical,label=* ]
51 Extant witnesses are listed with class=extant; missing or postulated witnesses
52 are listed with class=hypothetical. Anonymous hyparchetypes must be given a
53 unique name or number, but can be represented as anonymous with the addition
54 of 'label=*' to their lines. Greek letters or other special characters may be
55 used as names, but they must always be wrapped in double quotes.
57 Links between manuscripts are then listed with arrow notation, as below. These
58 lines show the direction of copying, one step at a time, for the entire stemma.
63 The final line in the definition should be the closing brace:
67 Thus for a set of extant manuscripts A, B, and C, where A and B were copied
68 from the archetype O and C was copied from B, the definition would be:
71 O [ class=hypothetical]
84 The constructor. This should generally be called from Text::Tradition, but
85 if called directly it takes the following options:
89 =item * collation - The collation with which the stemma is associated.
91 =item * dot - A filehandle open to a DOT representation of the stemma graph.
97 use Text::Tradition::Collation;
100 use_ok( 'Text::Tradition::Stemma' );
102 # Placeholder collation to use in tests
103 my $c = Text::Tradition::Collation->new();
105 # Try to create a bad graph
107 open( $baddotfh, 't/data/besoin_bad.dot' ) or die "Could not open test dotfile";
109 my $stemma = Text::Tradition::Stemma->new( collation => $c, dot => $baddotfh );
110 ok( 0, "Created broken stemma from dotfile with syntax error" );
111 } catch( Text::Tradition::Error $e ) {
112 like( $e->message, qr/^Error trying to parse/, "Syntax error in dot threw exception" );
115 # Create a good graph
117 open( $dotfh, 't/data/florilegium.dot' ) or die "Could not open test dotfile";
118 binmode( $dotfh, ':utf8' );
119 my $stemma = Text::Tradition::Stemma->new( collation => $c, dot => $dotfh );
120 is( ref( $stemma ), 'Text::Tradition::Stemma', "Created stemma from good dotfile" );
121 is( scalar $stemma->witnesses, 13, "Found correct number of extant witnesses" );
122 is( scalar $stemma->hypotheticals, 8, "Found correct number of extant hypotheticals" );
123 my $found_unicode_sigil;
124 foreach my $h ( $stemma->hypotheticals ) {
125 $found_unicode_sigil = 1 if $h eq "\x{3b1}";
127 ok( $found_unicode_sigil, "Found a correctly encoded Unicode sigil" );
135 isa => 'Text::Tradition::Collation',
143 predicate => 'has_graph',
147 my( $self, $args ) = @_;
148 # If we have been handed a dotfile, initialize it into a graph.
149 if( exists $args->{'dot'} ) {
150 $self->_graph_from_dot( $args->{'dot'} );
154 sub _graph_from_dot {
155 my( $self, $dotfh ) = @_;
156 my $reader = Graph::Reader::Dot->new();
157 # Redirect STDOUT in order to trap any error messages - syntax errors
158 # are evidently not fatal.
161 open $saved_stderr, ">&STDOUT";
163 open STDOUT, ">", \$reader_out;
164 my $graph = $reader->read_graph( $dotfh );
166 open STDOUT, ">", \$saved_stderr;
167 if( $reader_out && $reader_out =~ /error/s ) {
168 throw( "Error trying to parse dot: $reader_out" );
170 throw( "Failed to create graph from dot" );
172 $self->graph( $graph );
173 # Go through the nodes and set any non-hypothetical node to extant.
174 foreach my $v ( $self->graph->vertices ) {
175 $self->graph->set_vertex_attribute( $v, 'class', 'extant' )
176 unless $self->graph->has_vertex_attribute( $v, 'class' );
182 =head2 as_dot( \%options )
184 Returns a normal dot representation of the stemma layout, suitable for rendering
185 with GraphViz. Options include:
189 =item * graph - A hashref of global graph options.
191 =item * node - A hashref of global node options.
193 =item * edge - A hashref of global edge options.
197 See the GraphViz documentation for the list of available options.
202 my( $self, $opts ) = @_;
204 ## See if we are including any a.c. witnesses in this graph.
205 my $graph = $self->graph;
206 if( exists $opts->{'layerwits'} ) {
208 map { $extant->{$_} = 1 } $self->witnesses;
209 $graph = $self->situation_graph( $extant, $opts->{'layerwits'} );
212 # Get default and specified options
219 'fillcolor' => 'white',
221 'shape' => 'ellipse', # Shape for the extant nodes
224 'arrowhead' => 'none',
226 @graphopts{ keys %{$opts->{'graph'}} } = values %{$opts->{'graph'}}
228 @nodeopts{ keys %{$opts->{'node'}} } = values %{$opts->{'node'}}
230 @edgeopts{ keys %{$opts->{'edge'}} } = values %{$opts->{'edge'}}
234 push( @dotlines, 'digraph stemma {' );
235 ## Print out the global attributes
236 push( @dotlines, _make_dotline( 'graph', %graphopts ) ) if keys %graphopts;
237 push( @dotlines, _make_dotline( 'edge', %edgeopts ) ) if keys %edgeopts;
238 push( @dotlines, _make_dotline( 'node', %nodeopts ) ) if keys %nodeopts;
240 # Add each of the nodes.
241 foreach my $n ( $graph->vertices ) {
242 if( $graph->has_vertex_attribute( $n, 'label' ) ) {
243 my $ltext = $graph->get_vertex_attribute( $n, 'label' );
244 push( @dotlines, _make_dotline( $n, 'label' => $ltext ) );
246 # Use the default display settings.
247 $n = _dotquote( $n );
248 push( @dotlines, " $n;" );
251 # Add each of our edges.
252 foreach my $e ( $graph->edges ) {
253 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
254 push( @dotlines, " $from -> $to;" );
256 push( @dotlines, '}' );
258 return join( "\n", @dotlines );
261 =head2 alter_graph( $dotstring )
263 Alters the graph of this stemma according to the definition specified
269 my( $self, $dotstring ) = @_;
271 open $dotfh, '<', \$dotstring;
272 binmode $dotfh, ':utf8';
273 $self->_graph_from_dot( $dotfh );
276 =head2 editable( $opts )
278 =head2 editable_graph( $graph, $opts )
280 Returns a version of the graph rendered in our definition format. The
281 output separates statements with a newline; set $opts->{'linesep'} to the
282 empty string or to a space if the result is to be sent via JSON.
284 If a situational version of the stemma is required, the arguments for
285 situation_graph should be passed via $opts->{'extant'} and $opts->{'layerwits'}.
290 my( $self, $opts ) = @_;
291 my $graph = $self->graph;
292 ## See if we need an editable version of a situational graph.
293 if( exists $opts->{'layerwits'} || exists $opts->{'extant'} ) {
294 my $extant = delete $opts->{'extant'} || {};
295 my $layerwits = delete $opts->{'layerwits'} || [];
296 $graph = $self->situation_graph( $extant, $layerwits );
298 return editable_graph( $graph, $opts );
302 my( $graph, $opts ) = @_;
305 my $join = ( $opts && exists $opts->{'linesep'} ) ? $opts->{'linesep'} : "\n";
307 push( @dotlines, 'digraph stemma {' );
308 my @real; # A cheap sort
309 foreach my $n ( sort $graph->vertices ) {
310 my $c = $graph->get_vertex_attribute( $n, 'class' );
311 $c = 'extant' unless $c;
312 if( $c eq 'extant' ) {
315 push( @dotlines, _make_dotline( $n, 'class' => $c ) );
318 # Now do the real ones
319 foreach my $n ( @real ) {
320 push( @dotlines, _make_dotline( $n, 'class' => 'extant' ) );
322 foreach my $e ( sort _by_vertex $graph->edges ) {
323 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
324 push( @dotlines, " $from -> $to;" );
326 push( @dotlines, '}' );
327 return join( $join, @dotlines );
331 my( $obj, %attr ) = @_;
333 foreach my $k ( keys %attr ) {
334 my $v = _dotquote( $attr{$k} );
335 push( @pairs, "$k=$v" );
337 return sprintf( " %s [ %s ];", _dotquote( $obj ), join( ', ', @pairs ) );
342 return $str if $str =~ /^[A-Za-z0-9]+$/;
344 $str = '"' . $str . '"';
349 return $a->[0].$a->[1] cmp $b->[0].$b->[1];
352 =head2 situation_graph( $extant, $layered )
354 Returns a graph which is the original stemma with all witnesses not in the
355 %$extant hash marked as hypothetical, and witness layers added to the graph
356 according to the list in @$layered. A layered (a.c.) witness is added as a
357 parent of its main version, and additionally shares all other parents and
358 children with that version.
362 sub situation_graph {
363 my( $self, $extant, $layerwits ) = @_;
365 my $graph = $self->graph->copy;
366 foreach my $vertex ( $graph->vertices ) {
367 # Set as extant any vertex that is extant in the stemma AND
368 # exists in the $extant hash.
369 my $class = 'hypothetical';
370 $class = 'extant' if exists $extant->{$vertex} && $extant->{$vertex} &&
371 $self->graph->get_vertex_attribute( $vertex, 'class' ) ne 'hypothetical';
372 $graph->set_vertex_attribute( $vertex, 'class', $class );
375 # For each 'layered' witness in the layerwits array, add it to the graph
376 # as an ancestor of the 'main' witness, and otherwise with the same parent/
377 # child links as its main analogue.
378 # TOOD Handle case where B is copied from A but corrected from C
379 my $aclabel = $self->collation->ac_label;
380 foreach my $lw ( @$layerwits ) {
381 # Add the layered witness and set it with the same attributes as
382 # its 'main' analogue
383 throw( "Cannot add a layer to a hypothetical witness $lw" )
384 unless $graph->get_vertex_attribute( $lw, 'class' ) eq 'extant';
385 my $lwac = $lw . $aclabel;
386 $graph->add_vertex( $lwac );
387 $graph->set_vertex_attributes( $lwac,
388 $graph->get_vertex_attributes( $lw ) );
390 # Set it as ancestor to the main witness
391 $graph->add_edge( $lwac, $lw );
393 # Give it the same ancestors and descendants as the main witness has,
394 # bearing in mind that those ancestors and descendants might also just
395 # have had a layered witness defined.
396 foreach my $v ( $graph->predecessors( $lw ) ) {
397 next if $v eq $lwac; # Don't add a loop
398 $graph->add_edge( $v, $lwac );
399 $graph->add_edge( $v.$aclabel, $lwac )
400 if $graph->has_vertex( $v.$aclabel );
402 foreach my $v ( $graph->successors( $lw ) ) {
403 next if $v eq $lwac; # but this shouldn't occur
404 $graph->add_edge( $lwac, $v );
405 $graph->add_edge( $lwac, $v.$aclabel )
406 if $graph->has_vertex( $v.$aclabel );
414 Returns an SVG representation of the graph, calling as_dot first.
419 my( $self, $opts ) = @_;
420 my $dot = $self->as_dot( $opts );
421 my @cmd = qw/dot -Tsvg/;
423 my $dotfile = File::Temp->new();
425 # $dotfile->unlink_on_destroy(0);
426 binmode $dotfile, ':utf8';
429 push( @cmd, $dotfile->filename );
430 run( \@cmd, ">", binary(), \$svg );
431 # HACK: Parse the SVG and change the dimensions.
432 # Get rid of width and height attributes to allow scaling.
433 if( $opts->{'size'} ) {
435 my $parser = XML::LibXML->new( load_ext_dtd => 0 );
438 $svgdoc = $parser->parse_string( decode_utf8( $svg ) );
440 throw( "Could not reparse SVG: $@" ) if $@;
441 my( $ew, $eh ) = @{$opts->{'size'}};
442 # If the graph is wider than it is tall, set width to ew and remove height.
443 # Otherwise set height to eh and remove width.
444 my $width = $svgdoc->documentElement->getAttribute('width');
445 my $height = $svgdoc->documentElement->getAttribute('height');
448 my( $remove, $keep, $val );
449 if( $width > $height ) {
458 $svgdoc->documentElement->removeAttribute( $remove );
459 $svgdoc->documentElement->setAttribute( $keep, $val );
460 $svg = $svgdoc->toString();
463 return decode_utf8( $svg );
468 Returns a list of the extant witnesses represented in the stemma.
474 my @wits = grep { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'extant' }
475 $self->graph->vertices;
481 Returns a list of the hypothetical witnesses represented in the stemma.
488 { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'hypothetical' }
489 $self->graph->vertices;
494 Text::Tradition::Error->throw(
495 'ident' => 'Stemma error',
502 __PACKAGE__->meta->make_immutable;
508 This package is free software and is provided "as is" without express
509 or implied warranty. You can redistribute it and/or modify it under
510 the same terms as Perl itself.
514 Tara L Andrews E<lt>aurum@cpan.orgE<gt>