1 package Text::Tradition::Stemma;
4 use Encode qw( decode_utf8 );
7 use Graph::Reader::Dot;
8 use IPC::Run qw/ run binary /;
9 use Text::Tradition::Error;
10 use Text::Tradition::StemmaUtil qw/ character_input phylip_pars parse_newick /;
15 Text::Tradition::Stemma - a representation of a I<stemma codicum> for a Text::Tradition
20 my $t = Text::Tradition->new(
21 'name' => 'this is a text',
23 'file' => '/path/to/tei_parallel_seg_file.xml' );
25 my $s = $tradition->add_stemma( dotfile => '/path/to/stemma.dot' );
29 Text::Tradition is a library for representation and analysis of collated
30 texts, particularly medieval ones. The Stemma is a representation of the
31 copying relationships between the witnesses in a Tradition, modelled with
32 a connected rooted directed acyclic graph (CRDAG).
36 The easiest way to define a stemma is to use a special form of the 'dot'
39 Each stemma opens with the line
43 and continues with a list of all manuscript witnesses in the stemma, whether
44 extant witnesses or missing archetypes or hyparchetypes. Each of these is
45 listed by its sigil on its own line, e.g.:
47 alpha [ class=hypothetical ]
48 1 [ class=hypothetical,label=* ]
51 Extant witnesses are listed with class=extant; missing or postulated witnesses
52 are listed with class=hypothetical. Anonymous hyparchetypes must be given a
53 unique name or number, but can be represented as anonymous with the addition
54 of 'label=*' to their lines. Greek letters or other special characters may be
55 used as names, but they must always be wrapped in double quotes.
57 Links between manuscripts are then listed with arrow notation, as below. These
58 lines show the direction of copying, one step at a time, for the entire stemma.
63 The final line in the definition should be the closing brace:
67 Thus for a set of extant manuscripts A, B, and C, where A and B were copied
68 from the archetype O and C was copied from B, the definition would be:
71 O [ class=hypothetical]
84 The constructor. This should generally be called from Text::Tradition, but
85 if called directly it takes the following options:
89 =item * dot - A filehandle open to a DOT representation of the stemma graph.
97 use_ok( 'Text::Tradition::Stemma' );
99 # Try to create a bad graph
101 open( $baddotfh, 't/data/besoin_bad.dot' ) or die "Could not open test dotfile";
103 my $stemma = Text::Tradition::Stemma->new( dot => $baddotfh );
104 ok( 0, "Created broken stemma from dotfile with syntax error" );
105 } catch( Text::Tradition::Error $e ) {
106 like( $e->message, qr/^Error trying to parse/, "Syntax error in dot threw exception" );
109 # Create a good graph
111 open( $dotfh, 't/data/florilegium.dot' ) or die "Could not open test dotfile";
112 binmode( $dotfh, ':utf8' );
113 my $stemma = Text::Tradition::Stemma->new( dot => $dotfh );
114 is( ref( $stemma ), 'Text::Tradition::Stemma', "Created stemma from good dotfile" );
115 is( scalar $stemma->witnesses, 13, "Found correct number of extant witnesses" );
116 is( scalar $stemma->hypotheticals, 8, "Found correct number of extant hypotheticals" );
117 my $found_unicode_sigil;
118 foreach my $h ( $stemma->hypotheticals ) {
119 $found_unicode_sigil = 1 if $h eq "\x{3b1}";
121 ok( $found_unicode_sigil, "Found a correctly encoded Unicode sigil" );
129 isa => 'Text::Tradition::Collation',
130 clearer => 'clear_collation',
137 predicate => 'has_graph',
141 my( $self, $args ) = @_;
142 # If we have been handed a dotfile, initialize it into a graph.
143 if( exists $args->{'dot'} ) {
144 $self->_graph_from_dot( $args->{'dot'} );
148 sub _graph_from_dot {
149 my( $self, $dotfh ) = @_;
150 my $reader = Graph::Reader::Dot->new();
151 # Redirect STDOUT in order to trap any error messages - syntax errors
152 # are evidently not fatal.
155 open $saved_stderr, ">&STDOUT";
157 open STDOUT, ">", \$reader_out;
158 my $graph = $reader->read_graph( $dotfh );
160 open STDOUT, ">", \$saved_stderr;
161 if( $reader_out && $reader_out =~ /error/s ) {
162 throw( "Error trying to parse dot: $reader_out" );
164 throw( "Failed to create graph from dot" );
166 $self->graph( $graph );
167 # Go through the nodes and set any non-hypothetical node to extant.
168 foreach my $v ( $self->graph->vertices ) {
169 $self->graph->set_vertex_attribute( $v, 'class', 'extant' )
170 unless $self->graph->has_vertex_attribute( $v, 'class' );
176 =head2 as_dot( \%options )
178 Returns a normal dot representation of the stemma layout, suitable for rendering
179 with GraphViz. Options include:
183 =item * graph - A hashref of global graph options.
185 =item * node - A hashref of global node options.
187 =item * edge - A hashref of global edge options.
191 See the GraphViz documentation for the list of available options.
196 my( $self, $opts ) = @_;
198 ## See if we are including any a.c. witnesses in this graph.
199 my $graph = $self->graph;
200 if( exists $opts->{'layerwits'} ) {
202 map { $extant->{$_} = 1 } $self->witnesses;
203 $graph = $self->situation_graph( $extant, $opts->{'layerwits'} );
206 # Get default and specified options
213 'fillcolor' => 'white',
215 'shape' => 'ellipse', # Shape for the extant nodes
218 'arrowhead' => 'none',
220 @graphopts{ keys %{$opts->{'graph'}} } = values %{$opts->{'graph'}}
222 @nodeopts{ keys %{$opts->{'node'}} } = values %{$opts->{'node'}}
224 @edgeopts{ keys %{$opts->{'edge'}} } = values %{$opts->{'edge'}}
228 push( @dotlines, 'digraph stemma {' );
229 ## Print out the global attributes
230 push( @dotlines, _make_dotline( 'graph', %graphopts ) ) if keys %graphopts;
231 push( @dotlines, _make_dotline( 'edge', %edgeopts ) ) if keys %edgeopts;
232 push( @dotlines, _make_dotline( 'node', %nodeopts ) ) if keys %nodeopts;
234 # Add each of the nodes.
235 foreach my $n ( $graph->vertices ) {
236 if( $graph->has_vertex_attribute( $n, 'label' ) ) {
237 my $ltext = $graph->get_vertex_attribute( $n, 'label' );
238 push( @dotlines, _make_dotline( $n, 'label' => $ltext ) );
240 # Use the default display settings.
241 $n = _dotquote( $n );
242 push( @dotlines, " $n;" );
245 # Add each of our edges.
246 foreach my $e ( $graph->edges ) {
247 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
248 push( @dotlines, " $from -> $to;" );
250 push( @dotlines, '}' );
252 return join( "\n", @dotlines );
255 =head2 alter_graph( $dotstring )
257 Alters the graph of this stemma according to the definition specified
263 my( $self, $dotstring ) = @_;
265 open $dotfh, '<', \$dotstring;
266 binmode $dotfh, ':utf8';
267 $self->_graph_from_dot( $dotfh );
270 =head2 editable( $opts )
272 =head2 editable_graph( $graph, $opts )
274 Returns a version of the graph rendered in our definition format. The
275 output separates statements with a newline; set $opts->{'linesep'} to the
276 empty string or to a space if the result is to be sent via JSON.
278 If a situational version of the stemma is required, the arguments for
279 situation_graph should be passed via $opts->{'extant'} and $opts->{'layerwits'}.
284 my( $self, $opts ) = @_;
285 my $graph = $self->graph;
286 ## See if we need an editable version of a situational graph.
287 if( exists $opts->{'layerwits'} || exists $opts->{'extant'} ) {
288 my $extant = delete $opts->{'extant'} || {};
289 my $layerwits = delete $opts->{'layerwits'} || [];
290 $graph = $self->situation_graph( $extant, $layerwits );
292 return editable_graph( $graph, $opts );
296 my( $graph, $opts ) = @_;
299 my $join = ( $opts && exists $opts->{'linesep'} ) ? $opts->{'linesep'} : "\n";
301 push( @dotlines, 'digraph stemma {' );
302 my @real; # A cheap sort
303 foreach my $n ( sort $graph->vertices ) {
304 my $c = $graph->get_vertex_attribute( $n, 'class' );
305 $c = 'extant' unless $c;
306 if( $c eq 'extant' ) {
309 push( @dotlines, _make_dotline( $n, 'class' => $c ) );
312 # Now do the real ones
313 foreach my $n ( @real ) {
314 push( @dotlines, _make_dotline( $n, 'class' => 'extant' ) );
316 foreach my $e ( sort _by_vertex $graph->edges ) {
317 my( $from, $to ) = map { _dotquote( $_ ) } @$e;
318 push( @dotlines, " $from -> $to;" );
320 push( @dotlines, '}' );
321 return join( $join, @dotlines );
325 my( $obj, %attr ) = @_;
327 foreach my $k ( keys %attr ) {
328 my $v = _dotquote( $attr{$k} );
329 push( @pairs, "$k=$v" );
331 return sprintf( " %s [ %s ];", _dotquote( $obj ), join( ', ', @pairs ) );
336 return $str if $str =~ /^[A-Za-z0-9]+$/;
338 $str = '"' . $str . '"';
343 return $a->[0].$a->[1] cmp $b->[0].$b->[1];
346 =head2 situation_graph( $extant, $layered )
348 Returns a graph which is the original stemma with all witnesses not in the
349 %$extant hash marked as hypothetical, and witness layers added to the graph
350 according to the list in @$layered. A layered (a.c.) witness is added as a
351 parent of its main version, and additionally shares all other parents and
352 children with that version.
356 sub situation_graph {
357 my( $self, $extant, $layerwits, $layerlabel ) = @_;
359 my $graph = $self->graph->copy;
360 foreach my $vertex ( $graph->vertices ) {
361 # Set as extant any vertex that is extant in the stemma AND
362 # exists in the $extant hash.
363 my $class = 'hypothetical';
364 $class = 'extant' if exists $extant->{$vertex} && $extant->{$vertex} &&
365 $self->graph->get_vertex_attribute( $vertex, 'class' ) ne 'hypothetical';
366 $graph->set_vertex_attribute( $vertex, 'class', $class );
369 # For each 'layered' witness in the layerwits array, add it to the graph
370 # as an ancestor of the 'main' witness, and otherwise with the same parent/
371 # child links as its main analogue.
372 # TOOD Handle case where B is copied from A but corrected from C
373 $layerlabel = ' (a.c.)' unless $layerlabel;
374 foreach my $lw ( @$layerwits ) {
375 # Add the layered witness and set it with the same attributes as
376 # its 'main' analogue
377 throw( "Cannot add a layer to a hypothetical witness $lw" )
378 unless $graph->get_vertex_attribute( $lw, 'class' ) eq 'extant';
379 my $lwac = $lw . $layerlabel;
380 $graph->add_vertex( $lwac );
381 $graph->set_vertex_attributes( $lwac,
382 $graph->get_vertex_attributes( $lw ) );
384 # Set it as ancestor to the main witness
385 $graph->add_edge( $lwac, $lw );
387 # Give it the same ancestors and descendants as the main witness has,
388 # bearing in mind that those ancestors and descendants might also just
389 # have had a layered witness defined.
390 foreach my $v ( $graph->predecessors( $lw ) ) {
391 next if $v eq $lwac; # Don't add a loop
392 $graph->add_edge( $v, $lwac );
393 $graph->add_edge( $v.$layerlabel, $lwac )
394 if $graph->has_vertex( $v.$layerlabel );
396 foreach my $v ( $graph->successors( $lw ) ) {
397 next if $v eq $lwac; # but this shouldn't occur
398 $graph->add_edge( $lwac, $v );
399 $graph->add_edge( $lwac, $v.$layerlabel )
400 if $graph->has_vertex( $v.$layerlabel );
408 Returns an SVG representation of the graph, calling as_dot first.
413 my( $self, $opts ) = @_;
414 my $dot = $self->as_dot( $opts );
415 my @cmd = qw/dot -Tsvg/;
417 my $dotfile = File::Temp->new();
419 # $dotfile->unlink_on_destroy(0);
420 binmode $dotfile, ':utf8';
423 push( @cmd, $dotfile->filename );
424 run( \@cmd, ">", binary(), \$svg );
425 # HACK: Parse the SVG and change the dimensions.
426 # Get rid of width and height attributes to allow scaling.
427 if( $opts->{'size'} ) {
429 my $parser = XML::LibXML->new( load_ext_dtd => 0 );
432 $svgdoc = $parser->parse_string( decode_utf8( $svg ) );
434 throw( "Could not reparse SVG: $@" ) if $@;
435 my( $ew, $eh ) = @{$opts->{'size'}};
436 # If the graph is wider than it is tall, set width to ew and remove height.
437 # Otherwise set height to eh and remove width.
438 my $width = $svgdoc->documentElement->getAttribute('width');
439 my $height = $svgdoc->documentElement->getAttribute('height');
442 my( $remove, $keep, $val, $viewbox );
443 if( $width > $height ) {
447 my $vbheight = $width / $ew * $height;
448 $viewbox = "0.00 0.00 $width.00" . sprintf( "%.2f", $vbheight );
453 my $vbwidth = $height / $eh * $width;
454 $viewbox = "0.00 0.00 " . sprintf( "%.2f", $vbwidth ) . " $height.00";
456 $svgdoc->documentElement->removeAttribute( $remove );
457 $svgdoc->documentElement->setAttribute( $keep, $val );
458 $svgdoc->documentElement->removeAttribute( 'viewBox' );
459 $svgdoc->documentElement->setAttribute( 'viewBox', $viewbox );
460 $svg = $svgdoc->toString();
463 return decode_utf8( $svg );
468 Returns a list of the extant witnesses represented in the stemma.
474 my @wits = grep { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'extant' }
475 $self->graph->vertices;
481 Returns a list of the hypothetical witnesses represented in the stemma.
488 { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'hypothetical' }
489 $self->graph->vertices;
494 Text::Tradition::Error->throw(
495 'ident' => 'Stemma error',
502 __PACKAGE__->meta->make_immutable;
508 This package is free software and is provided "as is" without express
509 or implied warranty. You can redistribute it and/or modify it under
510 the same terms as Perl itself.
514 Tara L Andrews E<lt>aurum@cpan.orgE<gt>