1 package Text::Tradition::Stemma;
4 use Encode qw( decode_utf8 );
7 use Graph::Reader::Dot;
8 use IPC::Run qw/ run binary /;
9 use Text::Tradition::Error;
10 use Text::Tradition::StemmaUtil qw/ character_input phylip_pars parse_newick /;
15 Text::Tradition::Stemma - a representation of a I<stemma codicum> for a Text::Tradition
20 my $t = Text::Tradition->new(
21 'name' => 'this is a text',
23 'file' => '/path/to/tei_parallel_seg_file.xml' );
25 my $s = $tradition->add_stemma( dotfile => '/path/to/stemma.dot' );
29 Text::Tradition is a library for representation and analysis of collated
30 texts, particularly medieval ones. The Stemma is a representation of the
31 copying relationships between the witnesses in a Tradition, modelled with
32 a connected rooted directed acyclic graph (CRDAG).
36 The easiest way to define a stemma is to use a special form of the 'dot'
39 Each stemma opens with the line
43 and continues with a list of all manuscript witnesses in the stemma, whether
44 extant witnesses or missing archetypes or hyparchetypes. Each of these is
45 listed by its sigil on its own line, e.g.:
47 alpha [ class=hypothetical ]
48 1 [ class=hypothetical,label=* ]
51 Extant witnesses are listed with class=extant; missing or postulated witnesses
52 are listed with class=hypothetical. Anonymous hyparchetypes must be given a
53 unique name or number, but can be represented as anonymous with the addition
54 of 'label=*' to their lines. Greek letters or other special characters may be
55 used as names, but they must always be wrapped in double quotes.
57 Links between manuscripts are then listed with arrow notation, as below. These
58 lines show the direction of copying, one step at a time, for the entire stemma.
63 The final line in the definition should be the closing brace:
67 Thus for a set of extant manuscripts A, B, and C, where A and B were copied
68 from the archetype O and C was copied from B, the definition would be:
71 O [ class=hypothetical]
84 The constructor. This should generally be called from Text::Tradition, but
85 if called directly it takes the following options:
89 =item * collation - The collation with which the stemma is associated.
91 =item * dot - A filehandle open to a DOT representation of the stemma graph.
97 use Text::Tradition::Collation;
100 use_ok( 'Text::Tradition::Stemma' );
102 # Placeholder collation to use in tests
103 my $c = Text::Tradition::Collation->new();
105 # Try to create a bad graph
107 open( $baddotfh, 't/data/besoin_bad.dot' ) or die "Could not open test dotfile";
109 my $stemma = Text::Tradition::Stemma->new( collation => $c, dot => $baddotfh );
110 ok( 0, "Created broken stemma from dotfile with syntax error" );
111 } catch( Text::Tradition::Error $e ) {
112 like( $e->message, qr/^Error trying to parse/, "Syntax error in dot threw exception" );
115 # Create a good graph
117 open( $dotfh, 't/data/florilegium.dot' ) or die "Could not open test dotfile";
118 binmode( $dotfh, ':utf8' );
119 my $stemma = Text::Tradition::Stemma->new( collation => $c, dot => $dotfh );
120 is( ref( $stemma ), 'Text::Tradition::Stemma', "Created stemma from good dotfile" );
121 is( scalar $stemma->witnesses, 13, "Found correct number of extant witnesses" );
122 is( scalar $stemma->hypotheticals, 8, "Found correct number of extant hypotheticals" );
123 my $found_unicode_sigil;
124 foreach my $h ( $stemma->hypotheticals ) {
125 $found_unicode_sigil = 1 if $h eq "\x{3b1}";
127 ok( $found_unicode_sigil, "Found a correctly encoded Unicode sigil" );
135 isa => 'Text::Tradition::Collation',
143 predicate => 'has_graph',
147 my( $self, $args ) = @_;
148 # If we have been handed a dotfile, initialize it into a graph.
149 if( exists $args->{'dot'} ) {
150 $self->_graph_from_dot( $args->{'dot'} );
154 sub _graph_from_dot {
155 my( $self, $dotfh ) = @_;
156 my $reader = Graph::Reader::Dot->new();
157 # Redirect STDOUT in order to trap any error messages - syntax errors
158 # are evidently not fatal.
161 open $saved_stderr, ">&STDOUT";
163 open STDOUT, ">", \$reader_out;
164 my $graph = $reader->read_graph( $dotfh );
166 open STDOUT, ">", \$saved_stderr;
167 if( $reader_out && $reader_out =~ /error/s ) {
168 throw( "Error trying to parse dot: $reader_out" );
170 throw( "Failed to create graph from dot" );
172 $self->graph( $graph );
173 # Go through the nodes and set any non-hypothetical node to extant.
174 foreach my $v ( $self->graph->vertices ) {
175 $self->graph->set_vertex_attribute( $v, 'class', 'extant' )
176 unless $self->graph->has_vertex_attribute( $v, 'class' );
182 =head2 as_dot( \%options )
184 Returns a normal dot representation of the stemma layout, suitable for rendering
185 with GraphViz. Options include:
189 =item * graph - A hashref of global graph options.
191 =item * node - A hashref of global node options.
193 =item * edge - A hashref of global edge options.
197 See the GraphViz documentation for the list of available options.
202 my( $self, $opts ) = @_;
204 ## See if we are including any a.c. witnesses in this graph.
205 my $graph = $self->graph;
206 if( exists $opts->{'layerwits'} ) {
208 map { $extant->{$_} = 1 } $self->witnesses;
209 $graph = $self->situation_graph( $extant, $opts->{'layerwits'} );
212 # Get default and specified options
219 'fillcolor' => 'white',
221 'shape' => 'ellipse', # Shape for the extant nodes
224 'arrowhead' => 'none',
226 @graphopts{ keys %{$opts->{'graph'}} } = values %{$opts->{'graph'}}
228 @nodeopts{ keys %{$opts->{'node'}} } = values %{$opts->{'node'}}
230 @edgeopts{ keys %{$opts->{'edge'}} } = values %{$opts->{'edge'}}
234 push( @dotlines, 'digraph stemma {' );
235 ## Print out the global attributes
236 push( @dotlines, _make_dotline( 'graph', %graphopts ) ) if keys %graphopts;
237 push( @dotlines, _make_dotline( 'edge', %edgeopts ) ) if keys %edgeopts;
238 push( @dotlines, _make_dotline( 'node', %nodeopts ) ) if keys %nodeopts;
240 # Add each of the nodes.
241 foreach my $n ( $graph->vertices ) {
242 if( $graph->has_vertex_attribute( $n, 'label' ) ) {
243 my $ltext = $graph->get_vertex_attribute( $n, 'label' );
244 push( @dotlines, _make_dotline( $n, 'label' => $ltext ) );
246 # Use the default display settings.
247 $n = _dotquote( $n );
248 push( @dotlines, " $n;" );
251 # Add each of our edges.
252 foreach my $e ( $graph->edges ) {
253 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
254 push( @dotlines, " $from -> $to;" );
256 push( @dotlines, '}' );
258 return join( "\n", @dotlines );
261 =head2 alter_graph( $dotstring )
263 Alters the graph of this stemma according to the definition specified
269 my( $self, $dotstring ) = @_;
271 open $dotfh, '<', \$dotstring;
272 $self->_graph_from_dot( $dotfh );
275 =head2 editable( $opts )
277 =head2 editable_graph( $graph, $opts )
279 Returns a version of the graph rendered in our definition format. The
280 output separates statements with a newline; set $opts->{'linesep'} to the
281 empty string or to a space if the result is to be sent via JSON.
283 If a situational version of the stemma is required, the arguments for
284 situation_graph should be passed via $opts->{'extant'} and $opts->{'layerwits'}.
289 my( $self, $opts ) = @_;
290 my $graph = $self->graph;
291 ## See if we need an editable version of a situational graph.
292 if( exists $opts->{'layerwits'} || exists $opts->{'extant'} ) {
293 my $extant = delete $opts->{'extant'} || {};
294 my $layerwits = delete $opts->{'layerwits'} || [];
295 $graph = $self->situation_graph( $extant, $layerwits );
297 return editable_graph( $graph, $opts );
301 my( $graph, $opts ) = @_;
304 my $join = ( $opts && exists $opts->{'linesep'} ) ? $opts->{'linesep'} : "\n";
306 push( @dotlines, 'digraph stemma {' );
307 my @real; # A cheap sort
308 foreach my $n ( sort $graph->vertices ) {
309 my $c = $graph->get_vertex_attribute( $n, 'class' );
310 $c = 'extant' unless $c;
311 if( $c eq 'extant' ) {
314 push( @dotlines, _make_dotline( $n, 'class' => $c ) );
317 # Now do the real ones
318 foreach my $n ( @real ) {
319 push( @dotlines, _make_dotline( $n, 'class' => 'extant' ) );
321 foreach my $e ( sort _by_vertex $graph->edges ) {
322 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
323 push( @dotlines, " $from -> $to;" );
325 push( @dotlines, '}' );
326 return join( $join, @dotlines );
330 my( $obj, %attr ) = @_;
332 foreach my $k ( keys %attr ) {
333 my $v = _dotquote( $attr{$k} );
334 push( @pairs, "$k=$v" );
336 return sprintf( " %s [ %s ];", _dotquote( $obj ), join( ', ', @pairs ) );
341 return $str if $str =~ /^[A-Za-z0-9]+$/;
343 $str = '"' . $str . '"';
348 return $a->[0].$a->[1] cmp $b->[0].$b->[1];
351 =head2 situation_graph( $extant, $layered )
353 Returns a graph which is the original stemma with all witnesses not in the
354 %$extant hash marked as hypothetical, and witness layers added to the graph
355 according to the list in @$layered. A layered (a.c.) witness is added as a
356 parent of its main version, and additionally shares all other parents and
357 children with that version.
361 sub situation_graph {
362 my( $self, $extant, $layerwits ) = @_;
364 my $graph = $self->graph->copy;
365 foreach my $vertex ( $graph->vertices ) {
366 # Set as extant any vertex that is extant in the stemma AND
367 # exists in the $extant hash.
368 my $class = 'hypothetical';
369 $class = 'extant' if exists $extant->{$vertex} && $extant->{$vertex} &&
370 $self->graph->get_vertex_attribute( $vertex, 'class' ) ne 'hypothetical';
371 $graph->set_vertex_attribute( $vertex, 'class', $class );
374 # For each 'layered' witness in the layerwits array, add it to the graph
375 # as an ancestor of the 'main' witness, and otherwise with the same parent/
376 # child links as its main analogue.
377 # TOOD Handle case where B is copied from A but corrected from C
378 my $aclabel = $self->collation->ac_label;
379 foreach my $lw ( @$layerwits ) {
380 # Add the layered witness and set it with the same attributes as
381 # its 'main' analogue
382 throw( "Cannot add a layer to a hypothetical witness $lw" )
383 unless $graph->get_vertex_attribute( $lw, 'class' ) eq 'extant';
384 my $lwac = $lw . $aclabel;
385 $graph->add_vertex( $lwac );
386 $graph->set_vertex_attributes( $lwac,
387 $graph->get_vertex_attributes( $lw ) );
389 # Set it as ancestor to the main witness
390 $graph->add_edge( $lwac, $lw );
392 # Give it the same ancestors and descendants as the main witness has,
393 # bearing in mind that those ancestors and descendants might also just
394 # have had a layered witness defined.
395 foreach my $v ( $graph->predecessors( $lw ) ) {
396 next if $v eq $lwac; # Don't add a loop
397 $graph->add_edge( $v, $lwac );
398 $graph->add_edge( $v.$aclabel, $lwac )
399 if $graph->has_vertex( $v.$aclabel );
401 foreach my $v ( $graph->successors( $lw ) ) {
402 next if $v eq $lwac; # but this shouldn't occur
403 $graph->add_edge( $lwac, $v );
404 $graph->add_edge( $lwac, $v.$aclabel )
405 if $graph->has_vertex( $v.$aclabel );
413 Returns an SVG representation of the graph, calling as_dot first.
418 my( $self, $opts ) = @_;
419 my $dot = $self->as_dot( $opts );
420 my @cmd = qw/dot -Tsvg/;
422 my $dotfile = File::Temp->new();
424 # $dotfile->unlink_on_destroy(0);
425 binmode $dotfile, ':utf8';
428 push( @cmd, $dotfile->filename );
429 run( \@cmd, ">", binary(), \$svg );
430 # HACK: Parse the SVG and change the dimensions.
431 # Get rid of width and height attributes to allow scaling.
432 if( $opts->{'size'} ) {
434 my $parser = XML::LibXML->new( load_ext_dtd => 0 );
437 $svgdoc = $parser->parse_string( decode_utf8( $svg ) );
439 throw( "Could not reparse SVG: $@" ) if $@;
440 my( $ew, $eh ) = @{$opts->{'size'}};
441 # If the graph is wider than it is tall, set width to ew and remove height.
442 # Otherwise set height to eh and remove width.
443 my $width = $svgdoc->documentElement->getAttribute('width');
444 my $height = $svgdoc->documentElement->getAttribute('height');
447 my( $remove, $keep, $val );
448 if( $width > $height ) {
457 $svgdoc->documentElement->removeAttribute( $remove );
458 $svgdoc->documentElement->setAttribute( $keep, $val );
459 $svg = $svgdoc->toString();
462 return decode_utf8( $svg );
467 Returns a list of the extant witnesses represented in the stemma.
473 my @wits = grep { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'extant' }
474 $self->graph->vertices;
480 Returns a list of the hypothetical witnesses represented in the stemma.
487 { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'hypothetical' }
488 $self->graph->vertices;
493 Text::Tradition::Error->throw(
494 'ident' => 'Stemma error',
501 __PACKAGE__->meta->make_immutable;
507 This package is free software and is provided "as is" without express
508 or implied warranty. You can redistribute it and/or modify it under
509 the same terms as Perl itself.
513 Tara L Andrews E<lt>aurum@cpan.orgE<gt>